ソースを参照

feat:添加测试脚本

zhaohaipeng 5 ヶ月 前
コミット
651c409a36
3 ファイル変更26 行追加17 行削除
  1. 16 6
      ad/01_ad_model_update.sh
  2. 1 1
      ad/02_ad_model_update_test.sh
  3. 9 10
      ad/model_predict_analyse.py

+ 16 - 6
ad/01_ad_model_update.sh

@@ -29,6 +29,10 @@ PREDICT_RESULT_SAVE_PATH=/dw/recommend/model/34_ad_predict_data
 MODEL_OSS_PATH=oss://art-recommend.oss-cn-hangzhou.aliyuncs.com/zhangbo/
 # 线上模型名,测试时修改为其他模型名,避免影响线上
 model_name=model_xgb_351_1000_v2
+# 线上校准文件名
+OSS_CALIBRATION_FILE_NAME=model_xgb_351_1000_v2_calibration
+
+
 # 本地保存HDFS模型路径文件,测试时修改为其他模型名,避免影响线上
 model_path_file=${model_local_home}/online_model_path.txt
 # 获取当前是星期几,1表示星期一
@@ -50,6 +54,9 @@ new_model_predict_result_path=""
 model_save_path=""
 # 评测结果保存路径,后续需要根据此文件评估是否要更新模型
 predict_analyse_file_path=""
+# 校准文件保存路径
+calibration_file_path=""
+
 # 保存模型评估的分析结果
 old_incr_rate_avg=0
 new_incr_rate_avg=0
@@ -95,8 +102,6 @@ send_success_upload_msg(){
   /root/anaconda3/bin/python ${sh_path}/ad_monitor_util.py --level info --msg "${msg}" --start "${start_time}" --elapsed "${elapsed}" --top10 "${top10_msg}"
 }
 
-
-
 init() {
   
   declare -a date_keys=()
@@ -132,6 +137,7 @@ init() {
   new_model_predict_result_path=${PREDICT_RESULT_SAVE_PATH}/${today_early_1}_351_1000_${train_first_day: -4}_${train_last_day: -4}
   online_model_predict_result_path=${PREDICT_RESULT_SAVE_PATH}/${today_early_1}_351_1000_${online_model_path: -9}
   predict_analyse_file_path=${model_local_home}/predict_analyse_file/${today_early_1}_351_1000_analyse.txt
+  calibration_file_path=${model_local_home}/${OSS_CALIBRATION_FILE_NAME}.txt
 
   echo "init param train_data_path: ${train_data_path}"
   echo "init param predict_date_path: ${predict_date_path}"
@@ -144,6 +150,7 @@ init() {
   echo "init param model_local_home: ${model_local_home}"
   echo "init param model_oss_path: ${MODEL_OSS_PATH}"
   echo "init param predict_analyse_file_path: ${predict_analyse_file_path}"
+  echo "init param calibration_file_path: ${calibration_file_path}"
   echo "init param current_day_of_week: ${current_day_of_week}"
 
   echo "当前Python环境安装的Python版本: $(python --version)"
@@ -291,7 +298,7 @@ model_predict() {
   check_run_status $return_code $step_start_time "线上模型评估${predict_date_path: -8}的数据" "线上模型评估${predict_date_path: -8}的数据失败"
 
   # 结果分析
-  local python_return_code=$(python ${sh_path}/model_predict_analyse.py -p ${online_model_predict_result_path} ${new_model_predict_result_path} -f ${predict_analyse_file_path})
+  local python_return_code=$(python ${sh_path}/model_predict_analyse.py -op ${online_model_predict_result_path} -np ${new_model_predict_result_path} -af ${predict_analyse_file_path} -cf ${calibration_file_path})
   check_run_status $python_return_code $step_start_time "分析线上模型评估${predict_date_path: -8}的数据" "分析线上模型评估${predict_date_path: -8}的数据失败"
 
   calc_model_predict
@@ -330,17 +337,20 @@ model_upload_oss() {
 
     rm -rf ${model_name}.tar.gz.crc
 
-    ${HADOOP} fs -rm -r -skipTrash ${MODEL_OSS_PATH}/${model_name}.tar.gz
+    # 从OSS中移除模型文件和校准文件
+    ${HADOOP} fs -rm -r -skipTrash ${MODEL_OSS_PATH}/${model_name}.tar.gz ${MODEL_OSS_PATH}/${OSS_CALIBRATION_FILE_NAME}.txt
     
-    ${HADOOP} fs -put ${model_name}.tar.gz ${MODEL_OSS_PATH}
+    # 将模型文件和校准文件推送到OSS上
+    ${HADOOP} fs -put ${model_name}.tar.gz ${OSS_CALIBRATION_FILE_NAME}.txt ${MODEL_OSS_PATH}
     local return_code=$?
     check_run_status $return_code $step_start_time "模型上传OSS任务" "模型上传OSS失败"
 
-
     echo ${model_save_path} > ${model_path_file}
 
+    # 删除本地的文件
     rm -f ./${model_name}.tar.gz
     rm -rf ./${model_name}
+    rm -rf ${OSS_CALIBRATION_FILE_NAME}.txt
   )
 
   local return_code=$?

+ 1 - 1
ad/02_ad_model_update_test.sh

@@ -129,7 +129,7 @@ init() {
 
   model_save_path=${MODEL_PATH}/${model_name}_${train_first_day: -4}_${train_last_day: -4}
   predict_date_path=${BUCKET_FEATURE_PATH}/${today_early_1}
-  new_model_predict_result_path=/dw/recommend/model/34_ad_predict_data/20241104_351_1000_1028_1102
+  new_model_predict_result_path=/dw/recommend/model/34_ad_predict_data/20241103_351_1000_1028_1102
   online_model_predict_result_path=/dw/recommend/model/34_ad_predict_data/20241104_351_1000_1028_1102
   predict_analyse_file_path=${model_local_home}/predict_analyse_file/${today_early_1}_351_1000_analyse.txt
   calibration_file_path=${model_local_home}/${OSS_CALIBRATION_FILE_NAME}.txt

+ 9 - 10
ad/model_predict_analyse.py

@@ -38,16 +38,15 @@ def read_predict_from_hdfs(hdfs_path: str) -> list:
                 for line in gz_file.read().decode("utf-8").split("\n"):
                     split = line.split("\t")
                     if len(split) == 4:
-                        continue
-                    cid = split[3].split("_")[0]
-                    label = int(split[0])
-                    score = float(split[2].replace("[", "").replace("]", "").split(",")[1])
-
-                    result.append({
-                        "cid": cid,
-                        "label": label,
-                        "score": score
-                    })
+                        cid = split[3].split("_")[0]
+                        label = int(split[0])
+                        score = float(split[2].replace("[", "").replace("]", "").split(",")[1])
+
+                        result.append({
+                            "cid": cid,
+                            "label": label,
+                            "score": score
+                        })
 
     return result