|
@@ -1,8 +1,13 @@
|
|
|
#!/bin/sh
|
|
|
-set -ex
|
|
|
+set -x
|
|
|
|
|
|
source /root/anaconda3/bin/activate py37
|
|
|
|
|
|
+export SPARK_HOME=/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8
|
|
|
+export PATH=$SPARK_HOME/bin:$PATH
|
|
|
+export HADOOP_CONF_DIR=/etc/taihao-apps/hadoop-conf
|
|
|
+export JAVA_HOME=/usr/lib/jvm/java-1.8.0
|
|
|
+
|
|
|
# nohup sh handle_rov.sh > "$(date +%Y%m%d_%H%M%S)_handle_rov.log" 2>&1 &
|
|
|
|
|
|
# 原始数据table name
|
|
@@ -55,7 +60,7 @@ while true; do
|
|
|
# shellcheck disable=SC2039
|
|
|
if (( current_hour > max_hour || (current_hour == max_hour && current_minute >= max_minute) )); then
|
|
|
echo "最长等待时间已到,失败:${current_hour}-${current_minute}"
|
|
|
- python FeishuBot.py "荐模型数据更新 \n【任务名称】:step0校验是否生产完数据\n【是否成功】:error\n【信息】:最长等待时间已到,失败:${current_hour}-${current_minute}"
|
|
|
+ /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step0校验是否生产完数据\n【是否成功】:error\n【信息】:最长等待时间已到,失败:${current_hour}-${current_minute}"
|
|
|
exit 1
|
|
|
fi
|
|
|
done
|
|
@@ -72,7 +77,7 @@ savePath:${originDataPath} \
|
|
|
table:${table}
|
|
|
if [ $? -ne 0 ]; then
|
|
|
echo "Spark原始样本生产任务执行失败"
|
|
|
- python FeishuBot.py "荐模型数据更新 \n【任务名称】:step1根据${table}生产原始数据\n【是否成功】:error\n【信息】:Spark原始样本生产任务执行失败"
|
|
|
+ /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step1根据${table}生产原始数据\n【是否成功】:error\n【信息】:Spark原始样本生产任务执行失败"
|
|
|
exit 1
|
|
|
else
|
|
|
echo "spark原始样本生产执行成功"
|
|
@@ -90,7 +95,7 @@ savePath:${valueDataPath} \
|
|
|
beginStr:${begin_early_2_Str} endStr:${end_early_2_Str} repartition:1000
|
|
|
if [ $? -ne 0 ]; then
|
|
|
echo "Spark特征值拼接处理任务执行失败"
|
|
|
- python FeishuBot.py "荐模型数据更新 \n【任务名称】:step2特征值拼接\n【是否成功】:error\n【信息】:Spark特征值拼接处理任务执行失败"
|
|
|
+ /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step2特征值拼接\n【是否成功】:error\n【信息】:Spark特征值拼接处理任务执行失败"
|
|
|
exit 1
|
|
|
else
|
|
|
echo "spark特征值拼接处理执行成功"
|
|
@@ -107,7 +112,7 @@ savePath:${bucketDataPath} \
|
|
|
beginStr:${begin_early_2_Str} endStr:${end_early_2_Str} repartition:1000
|
|
|
if [ $? -ne 0 ]; then
|
|
|
echo "Spark特征分桶处理任务执行失败"
|
|
|
- python FeishuBot.py "荐模型数据更新 \n【任务名称】:step3训练数据产出\n【是否成功】:error\n【信息】:Spark特征分桶处理任务执行失败"
|
|
|
+ /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step3训练数据产出\n【是否成功】:error\n【信息】:Spark特征分桶处理任务执行失败"
|
|
|
exit 1
|
|
|
else
|
|
|
echo "spark特征分桶处理执行成功"
|
|
@@ -117,86 +122,87 @@ fi
|
|
|
# 4 对比AUC 前置对比3日模型数据 与 线上模型数据效果对比,如果3日模型优于线上,更新线上模型
|
|
|
echo "$(date +%Y-%m-%d_%H-%M-%S)----------step4------------开始对比,新:${MODEL_PATH}/${model_name}_${today_early_3}.txt,与线上online模型数据auc效果"
|
|
|
$HADOOP fs -text ${bucketDataPath}/${begin_early_2_Str}/* | ${FM_HOME}/fm_predict -m ${LAST_MODEL_HOME}/model_online.txt -dim 8 -core 8 -out ${PREDICT_PATH}/${model_name}_${today}_online.txt
|
|
|
-$HADOOP fs -text ${bucketDataPath}/${begin_early_2_Str}/* | ${FM_HOME}/fm_predict -m ${MODEL_PATH}/${model_name}_${today_early_3}.txt -dim 8 -core 8 -out ${PREDICT_PATH}/${model_name}_${today}_new.txt
|
|
|
-
|
|
|
-online_auc=`cat ${PREDICT_PATH}/${model_name}_${today}_online.txt | /root/sunmingze/AUC/AUC`
|
|
|
-if [ $? -ne 0 ]; then
|
|
|
- echo "推荐线上模型AUC计算失败"
|
|
|
- python FeishuBot.py "荐模型数据更新 \n【任务名称】:step4新旧模型AUC对比\n【是否成功】:error\n【信息】:推荐线上模型AUC计算失败"
|
|
|
- exit 1
|
|
|
-fi
|
|
|
-
|
|
|
-new_auc=`cat ${PREDICT_PATH}/${model_name}_${today}_new.txt | /root/sunmingze/AUC/AUC`
|
|
|
if [ $? -ne 0 ]; then
|
|
|
- echo "推荐新模型AUC计算失败"
|
|
|
- python FeishuBot.py "荐模型数据更新 \n【任务名称】:step4新旧模型AUC对比\n【是否成功】:error\n【信息】:推荐新模型AUC计算失败${PREDICT_PATH}/${model_name}_${today}_new.txt"
|
|
|
- exit 1
|
|
|
-fi
|
|
|
-
|
|
|
-
|
|
|
-# 4.1 对比auc数据判断是否更新线上模型
|
|
|
-if [ "$online_auc" \< "$new_auc" ]; then
|
|
|
- echo "新模型优于线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc}"
|
|
|
- # 4.1.1 模型格式转换
|
|
|
- cat ${MODEL_PATH}/${model_name}_${today_early_3}.txt |
|
|
|
- awk -F " " '{
|
|
|
- if (NR == 1) {
|
|
|
- print $1"\t"$2
|
|
|
- } else {
|
|
|
- split($0, fields, " ");
|
|
|
- OFS="\t";
|
|
|
- line="" 1; i <= 10 && i <= length(fields); i++) {
|
|
|
- line
|
|
|
- for (i = = (line ? line "\t" : "") fields[i];
|
|
|
- }
|
|
|
- print line
|
|
|
- }
|
|
|
- }' > ${MODEL_PATH}/${model_name}_${today_early_3}_change.txt
|
|
|
+ echo "推荐线上模型AUC计算失败"
|
|
|
+ /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step4新旧模型AUC对比\n【是否成功】:error\n【信息】:推荐线上模型AUC计算失败"
|
|
|
+else
|
|
|
+ $HADOOP fs -text ${bucketDataPath}/${begin_early_2_Str}/* | ${FM_HOME}/fm_predict -m ${MODEL_PATH}/${model_name}_${today_early_3}.txt -dim 8 -core 8 -out ${PREDICT_PATH}/${model_name}_${today}_new.txt
|
|
|
+ if [ $? -ne 0 ]; then
|
|
|
+ echo "推荐新模型AUC计算失败"
|
|
|
+ /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step4新旧模型AUC对比\n【是否成功】:error\n【信息】:推荐新模型AUC计算失败${PREDICT_PATH}/${model_name}_${today}_new.txt"
|
|
|
+ else
|
|
|
+ online_auc=`cat ${PREDICT_PATH}/${model_name}_${today}_online.txt | /root/sunmingze/AUC/AUC`
|
|
|
if [ $? -ne 0 ]; then
|
|
|
- echo "新模型文件格式转换失败"
|
|
|
- python FeishuBot.py "荐模型数据更新 \n【任务名称】:step4模型格式转换\n【是否成功】:error\n【信息】:新模型文件格式转换失败${MODEL_PATH}/${model_name}_${today_early_3}.txt"
|
|
|
- exit 1
|
|
|
- fi
|
|
|
- # 4.1.2 模型文件上传OSS
|
|
|
- online_model_path=${OSS_PATH}/${model_name}.txt
|
|
|
- $HADOOP fs -test -e ${online_model_path}
|
|
|
- if [ $? -eq 0 ]; then
|
|
|
- echo "数据存在, 先删除。"
|
|
|
- $HADOOP fs -rm -r -skipTrash ${online_model_path}
|
|
|
- else
|
|
|
- echo "数据不存在"
|
|
|
- fi
|
|
|
-
|
|
|
- $HADOOP fs -put ${MODEL_PATH}/${model_name}_${today_early_3}_change.txt ${online_model_path}
|
|
|
- if [ $? -eq 0 ]; then
|
|
|
- echo "推荐模型文件至OSS成功"
|
|
|
+ echo "推荐线上模型AUC计算失败"
|
|
|
+ /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step4新旧模型AUC对比\n【是否成功】:error\n【信息】:推荐线上模型AUC计算失败"
|
|
|
else
|
|
|
- echo "推荐模型文件至OSS失败"
|
|
|
- python FeishuBot.py "荐模型数据更新 \n【任务名称】:step4模型推送oss\n【是否成功】:error\n【信息】:推荐模型文件至OSS失败${MODEL_PATH}/${model_name}_${today_early_3}_change.txt --- ${online_model_path}"
|
|
|
- exit 1
|
|
|
- fi
|
|
|
- # 4.1.3 本地保存最新的线上使用的模型,用于下一次的AUC验证
|
|
|
- cp -f ${LAST_MODEL_HOME}/model_online.txt ${LAST_MODEL_HOME}/model_online_$(date +\%Y\%m\%d).txt
|
|
|
- cp -f ${MODEL_PATH}/${model_name}_${today_early_3}.txt ${LAST_MODEL_HOME}/model_online.txt
|
|
|
- if [ $? -ne 0 ]; then
|
|
|
- echo "模型备份失败"
|
|
|
- exit 1
|
|
|
+ new_auc=`cat ${PREDICT_PATH}/${model_name}_${today}_new.txt | /root/sunmingze/AUC/AUC`
|
|
|
+ if [ $? -ne 0 ]; then
|
|
|
+ echo "推荐新模型AUC计算失败"
|
|
|
+ /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step4新旧模型AUC对比\n【是否成功】:error\n【信息】:推荐新模型AUC计算失败${PREDICT_PATH}/${model_name}_${today}_new.txt"
|
|
|
+ else
|
|
|
+ # 4.1 对比auc数据判断是否更新线上模型
|
|
|
+ if [ "$online_auc" \< "$new_auc" ]; then
|
|
|
+ echo "新模型优于线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc}"
|
|
|
+ # 4.1.1 模型格式转换
|
|
|
+ cat ${MODEL_PATH}/${model_name}_${today_early_3}.txt |
|
|
|
+ awk -F " " '{
|
|
|
+ if (NR == 1) {
|
|
|
+ print $1"\t"$2
|
|
|
+ } else {
|
|
|
+ split($0, fields, " ");
|
|
|
+ OFS="\t";
|
|
|
+ line="" 1; i <= 10 && i <= length(fields); i++) {
|
|
|
+ line
|
|
|
+ for (i = = (line ? line "\t" : "") fields[i];
|
|
|
+ }
|
|
|
+ print line
|
|
|
+ }
|
|
|
+ }' > ${MODEL_PATH}/${model_name}_${today_early_3}_change.txt
|
|
|
+ if [ $? -ne 0 ]; then
|
|
|
+ echo "新模型文件格式转换失败"
|
|
|
+ /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step4模型格式转换\n【是否成功】:error\n【信息】:新模型文件格式转换失败${MODEL_PATH}/${model_name}_${today_early_3}.txt"
|
|
|
+ else
|
|
|
+ # 4.1.2 模型文件上传OSS
|
|
|
+ online_model_path=${OSS_PATH}/${model_name}.txt
|
|
|
+ $HADOOP fs -test -e ${online_model_path}
|
|
|
+ if [ $? -eq 0 ]; then
|
|
|
+ echo "数据存在, 先删除。"
|
|
|
+ $HADOOP fs -rm -r -skipTrash ${online_model_path}
|
|
|
+ else
|
|
|
+ echo "数据不存在"
|
|
|
+ fi
|
|
|
+ $HADOOP fs -put ${MODEL_PATH}/${model_name}_${today_early_3}_change.txt ${online_model_path}
|
|
|
+ if [ $? -eq 0 ]; then
|
|
|
+ echo "推荐模型文件至OSS成功"
|
|
|
+ # 4.1.3 本地保存最新的线上使用的模型,用于下一次的AUC验证
|
|
|
+ cp -f ${LAST_MODEL_HOME}/model_online.txt ${LAST_MODEL_HOME}/model_online_$(date +\%Y\%m\%d).txt
|
|
|
+ cp -f ${MODEL_PATH}/${model_name}_${today_early_3}.txt ${LAST_MODEL_HOME}/model_online.txt
|
|
|
+ if [ $? -ne 0 ]; then
|
|
|
+ echo "模型备份失败"
|
|
|
+ fi
|
|
|
+ /root/anaconda3/bin/python monitor_util.py --level info --msg "荐模型数据更新 \n【任务名称】:step4模型更新\n【是否成功】:success\n【信息】:新模型优于线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc},已更新${model_name}_${today_early_3}.txt模型}"
|
|
|
+ else
|
|
|
+ echo "推荐模型文件至OSS失败"
|
|
|
+ /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step4模型推送oss\n【是否成功】:error\n【信息】:推荐模型文件至OSS失败${MODEL_PATH}/${model_name}_${today_early_3}_change.txt --- ${online_model_path}"
|
|
|
+ fi
|
|
|
+ fi
|
|
|
+ /root/anaconda3/bin/python monitor_util.py --level info --msg "荐模型数据更新 \n【任务名称】:step4模型更新\n【是否成功】:success\n【信息】:新模型优于线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc},已更新${model_name}_${today_early_3}.txt模型}"
|
|
|
+ else
|
|
|
+ echo "新模型不如线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc}"
|
|
|
+ /root/anaconda3/bin/python monitor_util.py --level info --msg "荐模型数据更新 \n【任务名称】:step4模型更新\n【是否成功】:success\n【信息】:新模型不如线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc}}"
|
|
|
+ fi
|
|
|
+ fi
|
|
|
fi
|
|
|
- python FeishuBot.py "荐模型数据更新 \n【任务名称】:step4模型更新\n【是否成功】:success\n【信息】:新模型优于线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc},已更新${model_name}_${today_early_3}.txt模型}"
|
|
|
-else
|
|
|
- echo "新模型不如线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc}"
|
|
|
- python FeishuBot.py "荐模型数据更新 \n【任务名称】:step4模型更新\n【是否成功】:success\n【信息】:新模型不如线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc}}"
|
|
|
+ fi
|
|
|
fi
|
|
|
|
|
|
-
|
|
|
-
|
|
|
# 5 模型训练
|
|
|
echo "$(date +%Y-%m-%d_%H-%M-%S)----------step5------------开始模型训练"
|
|
|
$HADOOP fs -text ${bucketDataPath}/${begin_early_2_Str}/* | ${FM_HOME}/fm_train -m ${MODEL_PATH}/${model_name}_${begin_early_2_Str}.txt -dim 1,1,8 -im ${LAST_MODEL_HOME}/model_online.txt -core 8
|
|
|
if [ $? -ne 0 ]; then
|
|
|
echo "模型训练失败"
|
|
|
- python FeishuBot.py "荐模型数据更新 \n【任务名称】:step5模型训练\n【是否成功】:error\n【信息】:${bucketDataPath}/${begin_early_2_Str}训练失败"
|
|
|
- exit 1
|
|
|
+ /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step5模型训练\n【是否成功】:error\n【信息】:${bucketDataPath}/${begin_early_2_Str}训练失败"
|
|
|
fi
|
|
|
|
|
|
echo "$(date +%Y-%m-%d_%H-%M-%S)----------step6------------模型训练完成:${MODEL_PATH}/${model_name}_${begin_early_2_Str}.txt"
|