|
@@ -2,8 +2,8 @@
|
|
|
set -x
|
|
|
|
|
|
# 0 全局变量/参数
|
|
|
-originDataSavePath=/dw/recommend/model/31_ad_sample_data_v3_auto/
|
|
|
-bucketFeatureSavePath=/dw/recommend/model/33_ad_train_data_v3_auto/
|
|
|
+originDataSavePath=/dw/recommend/model/31_ad_sample_data_v3_auto
|
|
|
+bucketFeatureSavePath=/dw/recommend/model/33_ad_train_data_v3_auto
|
|
|
model_name=model_bkb8_v3
|
|
|
today="$(date +%Y%m%d)"
|
|
|
today_early_1="$(date -d '1 days ago' +%Y%m%d)"
|
|
@@ -14,7 +14,7 @@ MODEL_PATH=/root/zhaohp/recommend-emr-dataprocess/model
|
|
|
PREDICT_PATH=/root/zhaohp/recommend-emr-dataprocess/predict
|
|
|
HADOOP=/opt/apps/HADOOP-COMMON/hadoop-common-current/bin/hadoop
|
|
|
FM_HOME=/root/sunmingze/alphaFM
|
|
|
-OSS_PATH=oss://art-recommend.oss-cn-hangzhou.aliyuncs.com/zhangbo/
|
|
|
+OSS_PATH=oss://art-recommend.oss-cn-hangzhou.aliyuncs.com/zhangbo
|
|
|
max_hour=17
|
|
|
max_minute=00
|
|
|
|
|
@@ -125,12 +125,7 @@ echo "$LOG_PREFIX -- 原始样本生产 -- 模型训练完成: 耗时 $step_elap
|
|
|
# 5 对比AUC
|
|
|
step5_start_time=$(date "+%Y-%m-%d %H:%M:%S")
|
|
|
|
|
|
-# 5.1 用昨天生成的模型,计算昨天一天的AUC
|
|
|
-# $HADOOP fs -text ${bucketFeatureSavePath}/${today_early_1}/* | ${FM_HOME}/bin/fm_predict -m ${MODEL_PATH}/${model_name}_${}.txt -dim 8 -core 8 -out ${PREDICT_PATH}/${model_name}_${today}_online.txt
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-# 5.2 计算线上模型的AUC
|
|
|
+# 5.1 计算线上模型的AUC
|
|
|
step_start_time=$(date "+%Y-%m-%d %H:%M:%S")
|
|
|
$HADOOP fs -text ${bucketFeatureSavePath}/${today}/* | ${FM_HOME}/bin/fm_predict -m ${LAST_MODEL_HOME}/model_online.txt -dim 8 -core 8 -out ${PREDICT_PATH}/${model_name}_${today}_online.txt
|
|
|
online_auc=`cat ${PREDICT_PATH}/${model_name}_${today}_online.txt | /root/sunmingze/AUC/AUC`
|
|
@@ -146,7 +141,7 @@ if [ $? -ne 0 ]; then
|
|
|
fi
|
|
|
echo "$LOG_PREFIX -- 线上模型AUC计算 -- 线上模型AUC计算完成: 耗时 $step_elapsed"
|
|
|
|
|
|
-# 5.3 计算新模型的AUC
|
|
|
+# 5.2 计算新模型的AUC
|
|
|
step_start_time=$(date "+%Y-%m-%d %H:%M:%S")
|
|
|
$HADOOP fs -text ${bucketFeatureSavePath}/${today}/* | ${FM_HOME}/bin/fm_predict -m ${MODEL_PATH}/${model_name}_${today_early_1}.txt -dim 8 -core 8 -out ${PREDICT_PATH}/${model_name}_${today}_new.txt
|
|
|
new_auc=`cat ${PREDICT_PATH}/${model_name}_${today}_new.txt | /root/sunmingze/AUC/AUC`
|
|
@@ -164,13 +159,13 @@ echo "$LOG_PREFIX -- 新模型AUC计算 -- 新模型AUC计算完成: 耗时 $ste
|
|
|
|
|
|
echo "AUC比对: 线上模型的AUC: ${online_auc}, 新模型的AUC: ${new_auc}"
|
|
|
|
|
|
-# 5.4 计算新模型与线上模型的AUC差值的绝对值
|
|
|
+# 5.3 计算新模型与线上模型的AUC差值的绝对值
|
|
|
auc_diff=$(echo "$online_auc - $new_auc" | bc -l)
|
|
|
auc_diff_abs=$(echo "sqrt(($auc_diff)^2)" | bc -l)
|
|
|
|
|
|
step_end_time=$(date "+%Y-%m-%d %H:%M:%S")
|
|
|
step5_elapsed=$(($(date +%s -d "$step_end_time") - $(date +%s -d "$step5_start_time")))
|
|
|
-# 5.5 如果差值的绝对值小于0.005且新模型的AUC大于0.73, 则更新模型
|
|
|
+# 5.4 如果差值的绝对值小于0.005且新模型的AUC大于0.73, 则更新模型
|
|
|
if (( $(echo "${online_auc} <= ${new_auc}" | bc -l) )); then
|
|
|
msg="新模型优于线上模型 \n\t线上模型AUC: ${online_auc} \n\t新模型AUC: ${new_auc}"
|
|
|
echo -e "$LOG_PREFIX -- AUC对比 -- $msg: 耗时 $step5_elapsed"
|
|
@@ -187,10 +182,10 @@ else
|
|
|
exit 1
|
|
|
fi
|
|
|
|
|
|
-# 5.6 使用前一天线上模型和前一天的新模型对前一天的数据进行预测并计算AUC
|
|
|
+# 5.5 使用前一天线上模型和前一天的新模型对前一天的数据进行预测并计算AUC
|
|
|
yesterday_online_model=${LAST_MODEL_HOME}/model_online.txt
|
|
|
|
|
|
-# 5.6.1 判断model_online文件的生成时间,如果是昨天生成的则表示模型有更新
|
|
|
+# 5.5.1 判断model_online文件的生成时间,如果是昨天生成的则表示模型有更新
|
|
|
# ${MODEL_PATH}/${model_name}_${today_early_1}.txt 和 ${LAST_MODEL_HOME}/model_online_$(date +\%Y\%m\%d).txt
|
|
|
file_creation_date=$(stat -c %W "$yesterday_online_model")
|
|
|
file_creation_date_format=$(date -d "@$file_creation_date" +%Y%m%d)
|
|
@@ -198,11 +193,12 @@ if [ "$file_creation_date_format" == "$today_early_1" ]; then
|
|
|
yesterday_online_model=${LAST_MODEL_HOME}/model_online_${today_early_1}.txt
|
|
|
fi
|
|
|
|
|
|
-# 5.6.2 使用昨天的线上模型,进行预测
|
|
|
+# 5.5.2 使用昨天的线上模型,进行预测
|
|
|
+echo "前一天的线上模型路径: $yesterday_online_model"
|
|
|
$HADOOP fs -text ${bucketFeatureSavePath}/${today_early_1}/* | ${FM_HOME}/bin/fm_predict -m "$yesterday_online_model" -dim 8 -core 8 -out ${PREDICT_PATH}/${model_name}_${today_early_1}_online_all.txt
|
|
|
yesterday_online_auc=`cat ${PREDICT_PATH}/${model_name}_${today_early_1}_online_all.txt | /root/sunmingze/AUC/AUC`
|
|
|
|
|
|
-# 5.6.3 使用昨天的新模型,进行预测
|
|
|
+# 5.5.3 使用昨天的新模型,进行预测
|
|
|
$HADOOP fs -text ${bucketFeatureSavePath}/${today_early_1}/* | ${FM_HOME}/bin/fm_predict -m ${MODEL_PATH}/${model_name}_${today_early_2}.txt -dim 8 -core 8 -out ${PREDICT_PATH}/${model_name}_${today_early_1}_new_all.txt
|
|
|
yesterday_new_auc=`cat ${PREDICT_PATH}/${model_name}_${today_early_1}_new_all.txt | /root/sunmingze/AUC/AUC`
|
|
|
|