Parcourir la source

feat:广告模型定时更新脚本优化

zhaohaipeng il y a 9 mois
Parent
commit
d13e059860
1 fichiers modifiés avec 61 ajouts et 23 suppressions
  1. 61 23
      ad/02_ad_model_update_twice_daily.sh

+ 61 - 23
ad/02_ad_model_update_twice_daily.sh

@@ -49,7 +49,7 @@ max_minute=20
 global_init() {
     # 获取当前小时,确定需要使用的数据分区范围
     local current_hour="$(date +%H)"
-    if [ $current_hour -lt 08 ]; then
+    if [ $current_hour -le 05 ]; then
         train_begin_str=${today_early_1}14
         train_end_str=${today_early_1}21
         predict_begin_str=${today_early_1}22
@@ -70,8 +70,8 @@ global_init() {
         trainBucketFeaturePath=${bucketFeatureSavePathHome}/${today}/train
         predictBucketFeaturePath=${bucketFeatureSavePathHome}/${today}/predict
 
-        local_model_file_path=${MODEL_HOME}/${train_end_str}.txt
-        local_change_model_file_path=${MODEL_HOME}/${train_end_str}_change.txt
+        local_model_file_path=${MODEL_HOME}/${model_name}_${train_end_str}.txt
+        local_change_model_file_path=${MODEL_HOME}/${model_name}_${train_end_str}_change.txt
         max_hour=21
 
     else
@@ -160,10 +160,8 @@ make_origin_data() {
 
 }
 
-# 特征分桶,训练用的数据和预测用的数据分不同的目录
-make_bucket_feature() {
-    local step_start_time=$(date +%s)
-    # 训练用的数据
+# 训练用数据分桶
+make_train_bucket_feature() {
     /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
     --class com.aliyun.odps.spark.zhp.makedata_ad.makedata_ad_33_bucketData_20240717 \
     --master yarn --driver-memory 2G --executor-memory 4G --executor-cores 1 --num-executors 16 \
@@ -172,11 +170,10 @@ make_bucket_feature() {
     filterNames:adid_,targeting_conversion_ \
     readPath:${originDataSavePath} \
     savePath:${trainBucketFeaturePath}
+}
 
-    local return_code=$?
-    check_run_status $return_code $step_start_time "Spark特征分桶任务: 训练数据分桶"
-    
-    # 预测用的数据
+# 预测用数据分桶
+make_predict_bucket_feature() {
     /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
     --class com.aliyun.odps.spark.zhp.makedata_ad.makedata_ad_33_bucketData_20240717 \
     --master yarn --driver-memory 2G --executor-memory 4G --executor-cores 1 --num-executors 16 \
@@ -185,9 +182,31 @@ make_bucket_feature() {
     filterNames:adid_,targeting_conversion_ \
     readPath:${originDataSavePath} \
     savePath:${predictBucketFeaturePath}
+}
+
+
+# 特征分桶,训练用的数据和预测用的数据分不同的目录
+make_bucket_feature() {
+    local step_start_time=$(date +%s)
+    
+    # 训练用的数据
+    make_train_bucket_feature &
+    train_bucket_pid=$!
+
+    wait $train_bucket_pid
+
+    local train_return_code=$?
+    check_run_status $train_return_code $step_start_time "Spark特征分桶任务: 训练数据分桶"
+
+    
+    # 预测用的数据
+    make_predict_bucket_feature &
+    predict_bucket_pid=$!
+
+    wait $predict_bucket_pid
 
-    return_code=$?
-    check_run_status $return_code $step_start_time "Spark特征分桶任务: 预测数据分桶"
+    local predict_return_code=$?
+    check_run_status $predict_return_code $step_start_time "Spark特征分桶任务: 预测数据分桶"
 }
 
 # 模型训练
@@ -199,25 +218,44 @@ model_train() {
     check_run_status $return_code $step_start_time "模型训练"
 }
 
+
+# 计算线上模型的AUC
+calc_online_model_auc() {
+    $HADOOP fs -text ${predictBucketFeaturePath}/*/* | ${FM_HOME}/bin/fm_predict -m ${LAST_MODEL_HOME}/model_online.txt -dim 8 -core 8 -out ${PREDICT_PATH}/${model_name}_${train_end_str}_online.txt
+    online_auc=`cat ${PREDICT_PATH}/${model_name}_${train_end_str}_online.txt | /root/sunmingze/AUC/AUC`
+}
+
+calc_new_model_auc() {
+    $HADOOP fs -text ${predictBucketFeaturePath}/*/* | ${FM_HOME}/bin/fm_predict -m ${local_model_file_path} -dim 8 -core 8 -out ${PREDICT_PATH}/${model_name}_${train_end_str}_new.txt
+    new_auc=`cat ${PREDICT_PATH}/${model_name}_${train_end_str}_new.txt | /root/sunmingze/AUC/AUC`
+}
+
 # AUC对比
 auc_compare() {
     local step5_start_time=$(date +%s)
 
     # 5.1 计算线上模型的AUC
     local step_start_time=$(date +%s)
-    $HADOOP fs -text ${predictBucketFeaturePath}/*/* | ${FM_HOME}/bin/fm_predict -m ${LAST_MODEL_HOME}/model_online.txt -dim 8 -core 8 -out ${PREDICT_PATH}/${model_name}_${train_end_str}_online.txt
-    online_auc=`cat ${PREDICT_PATH}/${model_name}_${train_end_str}_online.txt | /root/sunmingze/AUC/AUC`
-    
+
+    calc_online_model_auc &
+    local calc_online_model_auc_pid=$!
+
+    wait $calc_online_model_auc_pid
     local return_code=$?
     check_run_status $return_code $step_start_time "线上模型AUC计算"
 
     # 5.2 计算新模型的AUC
     step_start_time=$(date +%s)
-    $HADOOP fs -text ${predictBucketFeaturePath}/*/* | ${FM_HOME}/bin/fm_predict -m ${local_model_file_path} -dim 8 -core 8 -out ${PREDICT_PATH}/${model_name}_${train_end_str}_new.txt
-    new_auc=`cat ${PREDICT_PATH}/${model_name}_${train_end_str}_new.txt | /root/sunmingze/AUC/AUC`
 
-    return_code=$?
-    check_run_status $return_code $step_start_time "新模型的AUC计算"
+    calc_new_model_auc &
+    local calc_new_model_auc_pid=$!
+
+    wait $calc_new_model_auc_pid
+
+    local new_return_code=$?
+    check_run_status $new_return_code $step_start_time "新模型的AUC计算"
+
+
 
     echo "AUC比对: 线上模型的AUC: ${online_auc}, 新模型的AUC: ${new_auc}"
 
@@ -322,11 +360,11 @@ main() {
 
     model_to_online_format
 
-    model_upload_oss
+    # model_upload_oss
 
-    model_local_back
+    # model_local_back
 
-    success_inform
+    # success_inform
 }