|  | @@ -49,7 +49,7 @@ max_minute=20
 | 
	
		
			
				|  |  |  global_init() {
 | 
	
		
			
				|  |  |      # 获取当前小时,确定需要使用的数据分区范围
 | 
	
		
			
				|  |  |      local current_hour="$(date +%H)"
 | 
	
		
			
				|  |  | -    if [ $current_hour -lt 08 ]; then
 | 
	
		
			
				|  |  | +    if [ $current_hour -le 05 ]; then
 | 
	
		
			
				|  |  |          train_begin_str=${today_early_1}14
 | 
	
		
			
				|  |  |          train_end_str=${today_early_1}21
 | 
	
		
			
				|  |  |          predict_begin_str=${today_early_1}22
 | 
	
	
		
			
				|  | @@ -70,8 +70,8 @@ global_init() {
 | 
	
		
			
				|  |  |          trainBucketFeaturePath=${bucketFeatureSavePathHome}/${today}/train
 | 
	
		
			
				|  |  |          predictBucketFeaturePath=${bucketFeatureSavePathHome}/${today}/predict
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -        local_model_file_path=${MODEL_HOME}/${train_end_str}.txt
 | 
	
		
			
				|  |  | -        local_change_model_file_path=${MODEL_HOME}/${train_end_str}_change.txt
 | 
	
		
			
				|  |  | +        local_model_file_path=${MODEL_HOME}/${model_name}_${train_end_str}.txt
 | 
	
		
			
				|  |  | +        local_change_model_file_path=${MODEL_HOME}/${model_name}_${train_end_str}_change.txt
 | 
	
		
			
				|  |  |          max_hour=21
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      else
 | 
	
	
		
			
				|  | @@ -160,10 +160,8 @@ make_origin_data() {
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -# 特征分桶,训练用的数据和预测用的数据分不同的目录
 | 
	
		
			
				|  |  | -make_bucket_feature() {
 | 
	
		
			
				|  |  | -    local step_start_time=$(date +%s)
 | 
	
		
			
				|  |  | -    # 训练用的数据
 | 
	
		
			
				|  |  | +# 训练用数据分桶
 | 
	
		
			
				|  |  | +make_train_bucket_feature() {
 | 
	
		
			
				|  |  |      /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
 | 
	
		
			
				|  |  |      --class com.aliyun.odps.spark.zhp.makedata_ad.makedata_ad_33_bucketData_20240717 \
 | 
	
		
			
				|  |  |      --master yarn --driver-memory 2G --executor-memory 4G --executor-cores 1 --num-executors 16 \
 | 
	
	
		
			
				|  | @@ -172,11 +170,10 @@ make_bucket_feature() {
 | 
	
		
			
				|  |  |      filterNames:adid_,targeting_conversion_ \
 | 
	
		
			
				|  |  |      readPath:${originDataSavePath} \
 | 
	
		
			
				|  |  |      savePath:${trainBucketFeaturePath}
 | 
	
		
			
				|  |  | +}
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    local return_code=$?
 | 
	
		
			
				|  |  | -    check_run_status $return_code $step_start_time "Spark特征分桶任务: 训练数据分桶"
 | 
	
		
			
				|  |  | -    
 | 
	
		
			
				|  |  | -    # 预测用的数据
 | 
	
		
			
				|  |  | +# 预测用数据分桶
 | 
	
		
			
				|  |  | +make_predict_bucket_feature() {
 | 
	
		
			
				|  |  |      /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
 | 
	
		
			
				|  |  |      --class com.aliyun.odps.spark.zhp.makedata_ad.makedata_ad_33_bucketData_20240717 \
 | 
	
		
			
				|  |  |      --master yarn --driver-memory 2G --executor-memory 4G --executor-cores 1 --num-executors 16 \
 | 
	
	
		
			
				|  | @@ -185,9 +182,31 @@ make_bucket_feature() {
 | 
	
		
			
				|  |  |      filterNames:adid_,targeting_conversion_ \
 | 
	
		
			
				|  |  |      readPath:${originDataSavePath} \
 | 
	
		
			
				|  |  |      savePath:${predictBucketFeaturePath}
 | 
	
		
			
				|  |  | +}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +# 特征分桶,训练用的数据和预测用的数据分不同的目录
 | 
	
		
			
				|  |  | +make_bucket_feature() {
 | 
	
		
			
				|  |  | +    local step_start_time=$(date +%s)
 | 
	
		
			
				|  |  | +    
 | 
	
		
			
				|  |  | +    # 训练用的数据
 | 
	
		
			
				|  |  | +    make_train_bucket_feature &
 | 
	
		
			
				|  |  | +    train_bucket_pid=$!
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    wait $train_bucket_pid
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    local train_return_code=$?
 | 
	
		
			
				|  |  | +    check_run_status $train_return_code $step_start_time "Spark特征分桶任务: 训练数据分桶"
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    
 | 
	
		
			
				|  |  | +    # 预测用的数据
 | 
	
		
			
				|  |  | +    make_predict_bucket_feature &
 | 
	
		
			
				|  |  | +    predict_bucket_pid=$!
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    wait $predict_bucket_pid
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    return_code=$?
 | 
	
		
			
				|  |  | -    check_run_status $return_code $step_start_time "Spark特征分桶任务: 预测数据分桶"
 | 
	
		
			
				|  |  | +    local predict_return_code=$?
 | 
	
		
			
				|  |  | +    check_run_status $predict_return_code $step_start_time "Spark特征分桶任务: 预测数据分桶"
 | 
	
		
			
				|  |  |  }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  # 模型训练
 | 
	
	
		
			
				|  | @@ -199,25 +218,44 @@ model_train() {
 | 
	
		
			
				|  |  |      check_run_status $return_code $step_start_time "模型训练"
 | 
	
		
			
				|  |  |  }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +# 计算线上模型的AUC
 | 
	
		
			
				|  |  | +calc_online_model_auc() {
 | 
	
		
			
				|  |  | +    $HADOOP fs -text ${predictBucketFeaturePath}/*/* | ${FM_HOME}/bin/fm_predict -m ${LAST_MODEL_HOME}/model_online.txt -dim 8 -core 8 -out ${PREDICT_PATH}/${model_name}_${train_end_str}_online.txt
 | 
	
		
			
				|  |  | +    online_auc=`cat ${PREDICT_PATH}/${model_name}_${train_end_str}_online.txt | /root/sunmingze/AUC/AUC`
 | 
	
		
			
				|  |  | +}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +calc_new_model_auc() {
 | 
	
		
			
				|  |  | +    $HADOOP fs -text ${predictBucketFeaturePath}/*/* | ${FM_HOME}/bin/fm_predict -m ${local_model_file_path} -dim 8 -core 8 -out ${PREDICT_PATH}/${model_name}_${train_end_str}_new.txt
 | 
	
		
			
				|  |  | +    new_auc=`cat ${PREDICT_PATH}/${model_name}_${train_end_str}_new.txt | /root/sunmingze/AUC/AUC`
 | 
	
		
			
				|  |  | +}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |  # AUC对比
 | 
	
		
			
				|  |  |  auc_compare() {
 | 
	
		
			
				|  |  |      local step5_start_time=$(date +%s)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      # 5.1 计算线上模型的AUC
 | 
	
		
			
				|  |  |      local step_start_time=$(date +%s)
 | 
	
		
			
				|  |  | -    $HADOOP fs -text ${predictBucketFeaturePath}/*/* | ${FM_HOME}/bin/fm_predict -m ${LAST_MODEL_HOME}/model_online.txt -dim 8 -core 8 -out ${PREDICT_PATH}/${model_name}_${train_end_str}_online.txt
 | 
	
		
			
				|  |  | -    online_auc=`cat ${PREDICT_PATH}/${model_name}_${train_end_str}_online.txt | /root/sunmingze/AUC/AUC`
 | 
	
		
			
				|  |  | -    
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    calc_online_model_auc &
 | 
	
		
			
				|  |  | +    local calc_online_model_auc_pid=$!
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    wait $calc_online_model_auc_pid
 | 
	
		
			
				|  |  |      local return_code=$?
 | 
	
		
			
				|  |  |      check_run_status $return_code $step_start_time "线上模型AUC计算"
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      # 5.2 计算新模型的AUC
 | 
	
		
			
				|  |  |      step_start_time=$(date +%s)
 | 
	
		
			
				|  |  | -    $HADOOP fs -text ${predictBucketFeaturePath}/*/* | ${FM_HOME}/bin/fm_predict -m ${local_model_file_path} -dim 8 -core 8 -out ${PREDICT_PATH}/${model_name}_${train_end_str}_new.txt
 | 
	
		
			
				|  |  | -    new_auc=`cat ${PREDICT_PATH}/${model_name}_${train_end_str}_new.txt | /root/sunmingze/AUC/AUC`
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    return_code=$?
 | 
	
		
			
				|  |  | -    check_run_status $return_code $step_start_time "新模型的AUC计算"
 | 
	
		
			
				|  |  | +    calc_new_model_auc &
 | 
	
		
			
				|  |  | +    local calc_new_model_auc_pid=$!
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    wait $calc_new_model_auc_pid
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    local new_return_code=$?
 | 
	
		
			
				|  |  | +    check_run_status $new_return_code $step_start_time "新模型的AUC计算"
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      echo "AUC比对: 线上模型的AUC: ${online_auc}, 新模型的AUC: ${new_auc}"
 | 
	
		
			
				|  |  |  
 | 
	
	
		
			
				|  | @@ -322,11 +360,11 @@ main() {
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      model_to_online_format
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    model_upload_oss
 | 
	
		
			
				|  |  | +    # model_upload_oss
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    model_local_back
 | 
	
		
			
				|  |  | +    # model_local_back
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    success_inform
 | 
	
		
			
				|  |  | +    # success_inform
 | 
	
		
			
				|  |  |  }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 |