Explorar o código

feat:计算AUC

zhaohaipeng hai 5 meses
pai
achega
0b5c06ba17
Modificáronse 2 ficheiros con 29 adicións e 4 borrados
  1. 26 1
      ad/01_ad_model_update.sh
  2. 3 3
      ad/model_predict_analyse.py

+ 26 - 1
ad/01_ad_model_update.sh

@@ -32,6 +32,8 @@ MODEL_OSS_PATH=oss://art-recommend.oss-cn-hangzhou.aliyuncs.com/zhangbo/
 model_name=model_xgb_351_1000_v2
 # 线上校准文件名
 OSS_CALIBRATION_FILE_NAME=model_xgb_351_1000_v2_calibration
+# 用于存放一些临时的文件
+PREDICT_CACHE_PATH=/root/zhaohp/XGB/predict_cache
 
 
 # 本地保存HDFS模型路径文件,测试时修改为其他模型名,避免影响线上
@@ -61,8 +63,11 @@ calibration_file_path=""
 # 保存模型评估的分析结果
 old_incr_rate_avg=0
 new_incr_rate_avg=0
-
+# Top10的详情
 top10_msg=""
+# AUC值
+old_auc=0
+new_auc=0
 
 declare -A real_score_map
 declare -A old_score_map
@@ -78,6 +83,14 @@ check_run_status() {
     local step_end_time=$(date +%s)
     local step_elapsed=$(($step_end_time - $step_start_time))
 
+    if [[ -n "${old_auc}" && "${old_auc}" != "0" ]]; then
+      msg+="\n\t - 老模型AUC: ${old_auc}"
+    fi
+    if [[ -n "${new_auc}" && "${new_auc}" != "0" ]]; then
+      msg+="\n\t - 新模型AUC: ${new_auc}"
+    fi
+
+
     if [ ${status} -ne 0 ]; then
         echo "${LOG_PREFIX} -- ${step_name}失败: 耗时 ${step_elapsed}"
         local elapsed=$(($step_end_time - $start_time))
@@ -91,6 +104,8 @@ check_run_status() {
 send_success_upload_msg(){ 
   # 发送更新成功通知
   local msg=" 广告模型文件更新完成"
+  msg+="\n\t - 老模型AUC: ${old_auc}"
+  msg+="\n\t - 新模型AUC: ${new_auc}"
   msg+="\n\t - 老模型Top10差异平均值: ${old_incr_rate_avg}"
   msg+="\n\t - 新模型Top10差异平均值: ${new_incr_rate_avg}"
   msg+="\n\t - 模型在HDFS中的路径: ${model_save_path}"
@@ -277,6 +292,14 @@ calc_model_predict() {
   done
 }
 
+calc_auc() {
+  old_auc=`cat ${PREDICT_CACHE_PATH}/old_1.txt | /root/sunmingze/AUC/AUC` &
+  new_auc=`cat ${PREDICT_CACHE_PATH}/new_1.txt | /root/sunmingze/AUC/AUC` &
+
+  wait
+
+}
+
 model_predict() {
 
   # 线上模型评估最新的数据
@@ -307,6 +330,8 @@ model_predict() {
 
   calc_model_predict
 
+  calc_auc
+
   if (( $(echo "${new_incr_rate_avg} > 0.100000" | bc -l ) ));then 
     echo "线上模型评估${predict_date_path: -8}的数据,绝对误差大于0.1,请检查"
     check_run_status 1 ${step_start_time} "${predict_date_path: -8}的数据,绝对误差大于0.1" "线上模型评估${predict_date_path: -8}的数据,绝对误差大于0.1,请检查"

+ 3 - 3
ad/model_predict_analyse.py

@@ -135,16 +135,16 @@ def predict_local_save_for_auc(old_df: pd.DataFrame, new_df: pd.DataFrame):
     d = {"old": old_df, "new": new_df}
     for key in d:
         df = d[key][['label', "score"]]
-        df.to_csv(f"{PREDICT_CACHE_PATH}/{key}_1.csv", sep="\t", index=False, header=False)
+        df.to_csv(f"{PREDICT_CACHE_PATH}/{key}_1.txt", sep="\t", index=False, header=False)
         df = d[key][['label', "score_2"]]
-        df.to_csv(f"{PREDICT_CACHE_PATH}/{key}_2.csv", sep="\t", index=False, header=False)
+        df.to_csv(f"{PREDICT_CACHE_PATH}/{key}_2.txt", sep="\t", index=False, header=False)
 
 
 def _main(old_predict_path: str, new_predict_path: str, calibration_file: str, analyse_file: str):
     old_df, old_group_df, old_segment_df = read_and_calibration_predict(old_predict_path)
     new_df, new_group_df, new_segment_df = read_and_calibration_predict(new_predict_path)
 
-    # predict_local_save_for_auc(old_df, new_df)
+    predict_local_save_for_auc(old_df, new_df)
 
     # 分段文件保存, 此处保留的最后使用的分段文件,不是所有的分段
     new_segment_df.to_csv(calibration_file, sep='\t', index=False, header=False)