8 maanden geleden · 651c409a36
--- a/ad/01_ad_model_update.sh
+++ b/ad/01_ad_model_update.sh
@@ -29,6 +29,10 @@ PREDICT_RESULT_SAVE_PATH=/dw/recommend/model/34_ad_predict_data
 
				 MODEL_OSS_PATH=oss://art-recommend.oss-cn-hangzhou.aliyuncs.com/zhangbo/
			
 
				 # 线上模型名，测试时修改为其他模型名，避免影响线上
			
 
				 model_name=model_xgb_351_1000_v2
			
 
				+# 线上校准文件名
			
 
				+OSS_CALIBRATION_FILE_NAME=model_xgb_351_1000_v2_calibration
			
 
				+
			
 
				+
			
 
				 # 本地保存HDFS模型路径文件，测试时修改为其他模型名，避免影响线上
			
 
				 model_path_file=${model_local_home}/online_model_path.txt
			
 
				 # 获取当前是星期几，1表示星期一
			
@@ -50,6 +54,9 @@ new_model_predict_result_path=""
 
				 model_save_path=""
			
 
				 # 评测结果保存路径，后续需要根据此文件评估是否要更新模型
			
 
				 predict_analyse_file_path=""
			
 
				+# 校准文件保存路径
			
 
				+calibration_file_path=""
			
 
				+
			
 
				 # 保存模型评估的分析结果
			
 
				 old_incr_rate_avg=0
			
 
				 new_incr_rate_avg=0
			
@@ -95,8 +102,6 @@ send_success_upload_msg(){
 
				   /root/anaconda3/bin/python ${sh_path}/ad_monitor_util.py --level info --msg "${msg}" --start "${start_time}" --elapsed "${elapsed}" --top10 "${top10_msg}"
			
 
				 }
			
 
				 
			
 
				-
			
 
				-
			
 
				 init() {
			
 
				   
			
 
				   declare -a date_keys=()
			
@@ -132,6 +137,7 @@ init() {
 
				   new_model_predict_result_path=${PREDICT_RESULT_SAVE_PATH}/${today_early_1}_351_1000_${train_first_day: -4}_${train_last_day: -4}
			
 
				   online_model_predict_result_path=${PREDICT_RESULT_SAVE_PATH}/${today_early_1}_351_1000_${online_model_path: -9}
			
 
				   predict_analyse_file_path=${model_local_home}/predict_analyse_file/${today_early_1}_351_1000_analyse.txt
			
 
				+  calibration_file_path=${model_local_home}/${OSS_CALIBRATION_FILE_NAME}.txt
			
 
				 
			
 
				   echo "init param train_data_path: ${train_data_path}"
			
 
				   echo "init param predict_date_path: ${predict_date_path}"
			
@@ -144,6 +150,7 @@ init() {
 
				   echo "init param model_local_home: ${model_local_home}"
			
 
				   echo "init param model_oss_path: ${MODEL_OSS_PATH}"
			
 
				   echo "init param predict_analyse_file_path: ${predict_analyse_file_path}"
			
 
				+  echo "init param calibration_file_path: ${calibration_file_path}"
			
 
				   echo "init param current_day_of_week: ${current_day_of_week}"
			
 
				 
			
 
				   echo "当前Python环境安装的Python版本: $(python --version)"
			
@@ -291,7 +298,7 @@ model_predict() {
 
				   check_run_status $return_code $step_start_time "线上模型评估${predict_date_path: -8}的数据" "线上模型评估${predict_date_path: -8}的数据失败"
			
 
				 
			
 
				   # 结果分析
			
 
				-  local python_return_code=$(python ${sh_path}/model_predict_analyse.py -p ${online_model_predict_result_path} ${new_model_predict_result_path} -f ${predict_analyse_file_path})
			
 
				+  local python_return_code=$(python ${sh_path}/model_predict_analyse.py -op ${online_model_predict_result_path} -np ${new_model_predict_result_path} -af ${predict_analyse_file_path} -cf ${calibration_file_path})
			
 
				   check_run_status $python_return_code $step_start_time "分析线上模型评估${predict_date_path: -8}的数据" "分析线上模型评估${predict_date_path: -8}的数据失败"
			
 
				 
			
 
				   calc_model_predict
			
@@ -330,17 +337,20 @@ model_upload_oss() {
 
				 
			
 
				     rm -rf ${model_name}.tar.gz.crc
			
 
				 
			
 
				-    ${HADOOP} fs -rm -r -skipTrash ${MODEL_OSS_PATH}/${model_name}.tar.gz
			
 
				+    # 从OSS中移除模型文件和校准文件
			
 
				+    ${HADOOP} fs -rm -r -skipTrash ${MODEL_OSS_PATH}/${model_name}.tar.gz ${MODEL_OSS_PATH}/${OSS_CALIBRATION_FILE_NAME}.txt
			
 
				     
			
 
				-    ${HADOOP} fs -put ${model_name}.tar.gz ${MODEL_OSS_PATH}
			
 
				+    # 将模型文件和校准文件推送到OSS上
			
 
				+    ${HADOOP} fs -put ${model_name}.tar.gz ${OSS_CALIBRATION_FILE_NAME}.txt ${MODEL_OSS_PATH}
			
 
				     local return_code=$?
			
 
				     check_run_status $return_code $step_start_time "模型上传OSS任务" "模型上传OSS失败"
			
 
				 
			
 
				-
			
 
				     echo ${model_save_path} > ${model_path_file}
			
 
				 
			
 
				+    # 删除本地的文件
			
 
				     rm -f ./${model_name}.tar.gz
			
 
				     rm -rf ./${model_name}
			
 
				+    rm -rf ${OSS_CALIBRATION_FILE_NAME}.txt
			
 
				   )
			
 
				 
			
 
				   local return_code=$?
			
--- a/ad/02_ad_model_update_test.sh
+++ b/ad/02_ad_model_update_test.sh
@@ -129,7 +129,7 @@ init() {
 
				 
			
 
				   model_save_path=${MODEL_PATH}/${model_name}_${train_first_day: -4}_${train_last_day: -4}
			
 
				   predict_date_path=${BUCKET_FEATURE_PATH}/${today_early_1}
			
 
				-  new_model_predict_result_path=/dw/recommend/model/34_ad_predict_data/20241104_351_1000_1028_1102
			
 
				+  new_model_predict_result_path=/dw/recommend/model/34_ad_predict_data/20241103_351_1000_1028_1102
			
 
				   online_model_predict_result_path=/dw/recommend/model/34_ad_predict_data/20241104_351_1000_1028_1102
			
 
				   predict_analyse_file_path=${model_local_home}/predict_analyse_file/${today_early_1}_351_1000_analyse.txt
			
 
				   calibration_file_path=${model_local_home}/${OSS_CALIBRATION_FILE_NAME}.txt
			
--- a/ad/model_predict_analyse.py
+++ b/ad/model_predict_analyse.py
@@ -38,16 +38,15 @@ def read_predict_from_hdfs(hdfs_path: str) -> list:
 
				                 for line in gz_file.read().decode("utf-8").split("\n"):
			
 
				                     split = line.split("\t")
			
 
				                     if len(split) == 4:
			
 
				-                        continue
			
 
				-                    cid = split[3].split("_")[0]
			
 
				-                    label = int(split[0])
			
 
				-                    score = float(split[2].replace("[", "").replace("]", "").split(",")[1])
			
 
				-
			
 
				-                    result.append({
			
 
				-                        "cid": cid,
			
 
				-                        "label": label,
			
 
				-                        "score": score
			
 
				-                    })
			
 
				+                        cid = split[3].split("_")[0]
			
 
				+                        label = int(split[0])
			
 
				+                        score = float(split[2].replace("[", "").replace("]", "").split(",")[1])
			
 
				+
			
 
				+                        result.append({
			
 
				+                            "cid": cid,
			
 
				+                            "label": label,
			
 
				+                            "score": score
			
 
				+                        })
			
 
				 
			
 
				     return result