Explorar o código

推荐模型自动化更新-脚本

Joe hai 9 meses
pai
achega
84cb854db1

+ 72 - 73
qiaojialiang/check_auc.sh

@@ -41,80 +41,79 @@ HADOOP=/opt/apps/HADOOP-COMMON/hadoop-common-current/bin/hadoop
 
 
 # 0 对比AUC 前置对比2日模型数据 与 线上模型数据效果对比,如果2日模型优于线上,更新线上模型
-echo "$(date +%Y-%m-%d_%H-%M-%S)----------step0------------开始对比,新:${MODEL_PATH}/${model_name}_${today_early_3}.txt,与线上online模型数据auc效果"
-#$HADOOP fs -text ${bucketDataPath}/${begin_early_2_Str}/* | ${FM_HOME}/bin/fm_predict -m ${LAST_MODEL_HOME}/model_online.txt -dim 8 -core 8 -out ${PREDICT_PATH}/${model_name}_${today}_online.txt
-#$HADOOP fs -text ${bucketDataPath}/${begin_early_2_Str}/* | ${FM_HOME}/bin/fm_predict -m ${MODEL_PATH}/${model_name}_${today_early_3}.txt -dim 8 -core 8 -out ${PREDICT_PATH}/${model_name}_${today}_new.txt
-
-#$HADOOP fs -text ${bucketDataPath}/20240703/* | ${FM_HOME}/fm_predict -m ${LAST_MODEL_HOME}/model_online.txt -dim 8 -core 8 -out ${PREDICT_PATH}/${model_name}_${today}_online.txt
-#$HADOOP fs -text ${bucketDataPath}/20240703/* | ${FM_HOME}/fm_predict -m ${MODEL_PATH}/${model_name}_20240703.txt -dim 8 -core 8 -out ${PREDICT_PATH}/${model_name}_${today}_new.txt
-
-
-online_auc=`cat ${PREDICT_PATH}/${model_name}_${today}_online.txt | /root/sunmingze/AUC/AUC`
+echo "$(date +%Y-%m-%d_%H-%M-%S)----------step4------------开始对比,新:${MODEL_PATH}/${model_name}_20240703.txt,与线上online模型数据auc效果"
+$HADOOP fs -text ${bucketDataPath}/20240707/* | ${FM_HOME}/fm_predict -m ${LAST_MODEL_HOME}/model_online.txt -dim 8 -core 8 -out ${PREDICT_PATH}/${model_name}_${today}_online.txt
 if [ $? -ne 0 ]; then
-   echo "推荐线上模型AUC计算失败"
-#   /root/anaconda3/bin/python ad/ad_monitor_util.py "线上模型AUC计算失败"
-   exit 1
-fi
-
-new_auc=`cat ${PREDICT_PATH}/${model_name}_${today}_new.txt | /root/sunmingze/AUC/AUC`
-if [ $? -ne 0 ]; then
-   echo "推荐新模型AUC计算失败"
-#   /root/anaconda3/bin/python ad/ad_monitor_util.py "新模型AUC计算失败"
-   exit 1
-fi
-
-
-# 1 对比auc数据判断是否更新线上模型
-if [ "$online_auc" \< "$new_auc" ]; then
-    echo "新模型优于线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc}"
-    # 模型格式转换
-    cat ${MODEL_PATH}/${model_name}_${today_early_3}.txt |
-    awk -F " " '{
-        if (NR == 1) {
-            print $1"\t"$2
-        } else {
-            split($0, fields, " ");
-            OFS="\t";
-            line=""
-            for (i = 1; i <= 10 && i <= length(fields); i++) {
-                line = (line ? line "\t" : "") fields[i];
-            }
-            print line
-        }
-    }' > ${MODEL_PATH}/${model_name}_${today_early_3}_change.txt
-    if [ $? -ne 0 ]; then
-       echo "新模型文件格式转换失败"
-#       /root/anaconda3/bin/python ad/ad_monitor_util.py "新模型文件格式转换失败"
-       exit 1
-    fi
-    # 模型文件上传OSS
-#    online_model_path=${OSS_PATH}/${model_name}.txt
-#    $HADOOP fs -test -e ${online_model_path}
-#    if [ $? -eq 0 ]; then
-#        echo "数据存在, 先删除。"
-#        $HADOOP fs -rm -r -skipTrash ${online_model_path}
-#    else
-#        echo "数据不存在"
-#    fi
-#
-#    $HADOOP fs -put ${MODEL_PATH}/${model_name}_${today_early_3}_change.txt ${online_model_path}
-#    if [ $? -eq 0 ]; then
-#       echo "推荐模型文件至OSS成功"
-#    else
-#       echo "推荐模型文件至OSS失败"
-##       /root/anaconda3/bin/python ad/ad_monitor_util.py "推荐模型文件至OSS失败"
-#       exit 1
-#    fi
-    # 本地保存最新的线上使用的模型,用于下一次的AUC验证
-    cp -f ${LAST_MODEL_HOME}/model_online.txt ${LAST_MODEL_HOME}/model_online_$(date +\%Y\%m\%d).txt
-    cp -f ${MODEL_PATH}/${model_name}_${today_early_3}.txt ${LAST_MODEL_HOME}/model_online.txt
+  echo "推荐线上模型AUC计算失败"
+  /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step4新旧模型AUC对比\n【是否成功】:error\n【信息】:推荐线上模型AUC计算失败"
+else
+  $HADOOP fs -text ${bucketDataPath}/20240707/* | ${FM_HOME}/fm_predict -m ${MODEL_PATH}/${model_name}_20240703.txt -dim 8 -core 8 -out ${PREDICT_PATH}/${model_name}_${today}_new.txt
+  if [ $? -ne 0 ]; then
+     echo "推荐新模型AUC计算失败"
+     /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step4新旧模型AUC对比\n【是否成功】:error\n【信息】:推荐新模型AUC计算失败${PREDICT_PATH}/${model_name}_${today}_new.txt"
+  else
+    online_auc=`cat ${PREDICT_PATH}/${model_name}_${today}_online.txt | /root/sunmingze/AUC/AUC`
     if [ $? -ne 0 ]; then
-       echo "模型备份失败"
-       /root/anaconda3/bin/python ad/ad_monitor_util.py "模型备份失败 - 最新模型地址: ${MODEL_PATH}/${model_name}_${today_early_1}.txt"
-       exit 1
+       echo "推荐线上模型AUC计算失败"
+       /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step4新旧模型AUC对比\n【是否成功】:error\n【信息】:推荐线上模型AUC计算失败"
+    else
+      new_auc=`cat ${PREDICT_PATH}/${model_name}_${today}_new.txt | /root/sunmingze/AUC/AUC`
+      if [ $? -ne 0 ]; then
+         echo "推荐新模型AUC计算失败"
+         /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step4新旧模型AUC对比\n【是否成功】:error\n【信息】:推荐新模型AUC计算失败${PREDICT_PATH}/${model_name}_${today}_new.txt"
+      else
+        # 4.1 对比auc数据判断是否更新线上模型
+        if [ "$online_auc" \< "$new_auc" ]; then
+            echo "新模型优于线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc}"
+            # 4.1.1 模型格式转换
+            cat ${MODEL_PATH}/${model_name}_20240703.txt |
+            awk -F " " '{
+                if (NR == 1) {
+                    print $1"\t"$2
+                } else {
+                    split($0, fields, " ");
+                    OFS="\t";
+                    line="" 1; i <= 10 && i <= length(fields); i++) {
+                        line
+                    for (i = = (line ? line "\t" : "") fields[i];
+                    }
+                    print line
+                }
+            }' > ${MODEL_PATH}/${model_name}_20240703_change.txt
+            if [ $? -ne 0 ]; then
+               echo "新模型文件格式转换失败"
+               /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step4模型格式转换\n【是否成功】:error\n【信息】:新模型文件格式转换失败${MODEL_PATH}/${model_name}_20240703.txt"
+            else
+#              # 4.1.2 模型文件上传OSS
+#              online_model_path=${OSS_PATH}/${model_name}.txt
+#              $HADOOP fs -test -e ${online_model_path}
+#              if [ $? -eq 0 ]; then
+#                  echo "数据存在, 先删除。"
+#                  $HADOOP fs -rm -r -skipTrash ${online_model_path}
+#              else
+#                  echo "数据不存在"
+#              fi
+#              $HADOOP fs -put ${MODEL_PATH}/${model_name}_20240703_change.txt ${online_model_path}
+#              if [ $? -eq 0 ]; then
+#                 echo "推荐模型文件至OSS成功"
+#                  # 4.1.3 本地保存最新的线上使用的模型,用于下一次的AUC验证
+                 cp -f ${LAST_MODEL_HOME}/model_online.txt ${LAST_MODEL_HOME}/model_online_$(date +\%Y\%m\%d).txt
+                 cp -f ${MODEL_PATH}/${model_name}_20240703.txt ${LAST_MODEL_HOME}/model_online.txt
+                 if [ $? -ne 0 ]; then
+                     echo "模型备份失败"
+                 fi
+#                 /root/anaconda3/bin/python monitor_util.py --level info --msg "荐模型数据更新 \n【任务名称】:step4模型更新\n【是否成功】:success\n【信息】:新模型优于线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc},已更新${model_name}_20240703.txt模型}"
+#              else
+#                 echo "推荐模型文件至OSS失败"
+#                 /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step4模型推送oss\n【是否成功】:error\n【信息】:推荐模型文件至OSS失败${MODEL_PATH}/${model_name}_20240703_change.txt --- ${online_model_path}"
+#              fi
+            fi
+            /root/anaconda3/bin/python monitor_util.py --level info --msg "荐模型数据更新 \n【任务名称】:step4模型更新\n【是否成功】:success\n【信息】:新模型优于线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc},已更新${model_name}_20240703.txt模型}"
+        else
+            echo "新模型不如线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc}"
+            /root/anaconda3/bin/python monitor_util.py --level info --msg "荐模型数据更新 \n【任务名称】:step4模型更新\n【是否成功】:success\n【信息】:新模型不如线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc}}"
+        fi
+      fi
     fi
-#    /root/anaconda3/bin/python ad/ad_monitor_util.py "新模型优于线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc}"
-else
-    echo "新模型不如线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc}"
-#    /root/anaconda3/bin/python ad/ad_monitor_util.py "新模型不如线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc}"
+  fi
 fi

+ 80 - 74
qiaojialiang/handle_rov.sh

@@ -1,8 +1,13 @@
 #!/bin/sh
-set -ex
+set -x
 
 source /root/anaconda3/bin/activate py37
 
+export SPARK_HOME=/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8
+export PATH=$SPARK_HOME/bin:$PATH
+export HADOOP_CONF_DIR=/etc/taihao-apps/hadoop-conf
+export JAVA_HOME=/usr/lib/jvm/java-1.8.0
+
 #  nohup sh handle_rov.sh > "$(date +%Y%m%d_%H%M%S)_handle_rov.log" 2>&1 &
 
 # 原始数据table name
@@ -55,7 +60,7 @@ while true; do
   # shellcheck disable=SC2039
   if (( current_hour > max_hour || (current_hour == max_hour && current_minute >= max_minute) )); then
     echo "最长等待时间已到,失败:${current_hour}-${current_minute}"
-    python FeishuBot.py "荐模型数据更新 \n【任务名称】:step0校验是否生产完数据\n【是否成功】:error\n【信息】:最长等待时间已到,失败:${current_hour}-${current_minute}"
+    /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step0校验是否生产完数据\n【是否成功】:error\n【信息】:最长等待时间已到,失败:${current_hour}-${current_minute}"
     exit 1
   fi
 done
@@ -72,7 +77,7 @@ savePath:${originDataPath} \
 table:${table}
 if [ $? -ne 0 ]; then
    echo "Spark原始样本生产任务执行失败"
-   python FeishuBot.py "荐模型数据更新 \n【任务名称】:step1根据${table}生产原始数据\n【是否成功】:error\n【信息】:Spark原始样本生产任务执行失败"
+   /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step1根据${table}生产原始数据\n【是否成功】:error\n【信息】:Spark原始样本生产任务执行失败"
    exit 1
 else
    echo "spark原始样本生产执行成功"
@@ -90,7 +95,7 @@ savePath:${valueDataPath} \
 beginStr:${begin_early_2_Str} endStr:${end_early_2_Str} repartition:1000
 if [ $? -ne 0 ]; then
    echo "Spark特征值拼接处理任务执行失败"
-   python FeishuBot.py "荐模型数据更新 \n【任务名称】:step2特征值拼接\n【是否成功】:error\n【信息】:Spark特征值拼接处理任务执行失败"
+   /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step2特征值拼接\n【是否成功】:error\n【信息】:Spark特征值拼接处理任务执行失败"
    exit 1
 else
    echo "spark特征值拼接处理执行成功"
@@ -107,7 +112,7 @@ savePath:${bucketDataPath} \
 beginStr:${begin_early_2_Str} endStr:${end_early_2_Str} repartition:1000
 if [ $? -ne 0 ]; then
    echo "Spark特征分桶处理任务执行失败"
-   python FeishuBot.py "荐模型数据更新 \n【任务名称】:step3训练数据产出\n【是否成功】:error\n【信息】:Spark特征分桶处理任务执行失败"
+   /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step3训练数据产出\n【是否成功】:error\n【信息】:Spark特征分桶处理任务执行失败"
    exit 1
 else
    echo "spark特征分桶处理执行成功"
@@ -117,86 +122,87 @@ fi
 # 4 对比AUC 前置对比3日模型数据 与 线上模型数据效果对比,如果3日模型优于线上,更新线上模型
 echo "$(date +%Y-%m-%d_%H-%M-%S)----------step4------------开始对比,新:${MODEL_PATH}/${model_name}_${today_early_3}.txt,与线上online模型数据auc效果"
 $HADOOP fs -text ${bucketDataPath}/${begin_early_2_Str}/* | ${FM_HOME}/fm_predict -m ${LAST_MODEL_HOME}/model_online.txt -dim 8 -core 8 -out ${PREDICT_PATH}/${model_name}_${today}_online.txt
-$HADOOP fs -text ${bucketDataPath}/${begin_early_2_Str}/* | ${FM_HOME}/fm_predict -m ${MODEL_PATH}/${model_name}_${today_early_3}.txt -dim 8 -core 8 -out ${PREDICT_PATH}/${model_name}_${today}_new.txt
-
-online_auc=`cat ${PREDICT_PATH}/${model_name}_${today}_online.txt | /root/sunmingze/AUC/AUC`
-if [ $? -ne 0 ]; then
-   echo "推荐线上模型AUC计算失败"
-   python FeishuBot.py "荐模型数据更新 \n【任务名称】:step4新旧模型AUC对比\n【是否成功】:error\n【信息】:推荐线上模型AUC计算失败"
-   exit 1
-fi
-
-new_auc=`cat ${PREDICT_PATH}/${model_name}_${today}_new.txt | /root/sunmingze/AUC/AUC`
 if [ $? -ne 0 ]; then
-   echo "推荐新模型AUC计算失败"
-   python FeishuBot.py "荐模型数据更新 \n【任务名称】:step4新旧模型AUC对比\n【是否成功】:error\n【信息】:推荐新模型AUC计算失败${PREDICT_PATH}/${model_name}_${today}_new.txt"
-   exit 1
-fi
-
-
-# 4.1 对比auc数据判断是否更新线上模型
-if [ "$online_auc" \< "$new_auc" ]; then
-    echo "新模型优于线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc}"
-    # 4.1.1 模型格式转换
-    cat ${MODEL_PATH}/${model_name}_${today_early_3}.txt |
-    awk -F " " '{
-        if (NR == 1) {
-            print $1"\t"$2
-        } else {
-            split($0, fields, " ");
-            OFS="\t";
-            line="" 1; i <= 10 && i <= length(fields); i++) {
-                line
-            for (i = = (line ? line "\t" : "") fields[i];
-            }
-            print line
-        }
-    }' > ${MODEL_PATH}/${model_name}_${today_early_3}_change.txt
+  echo "推荐线上模型AUC计算失败"
+  /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step4新旧模型AUC对比\n【是否成功】:error\n【信息】:推荐线上模型AUC计算失败"
+else
+  $HADOOP fs -text ${bucketDataPath}/${begin_early_2_Str}/* | ${FM_HOME}/fm_predict -m ${MODEL_PATH}/${model_name}_${today_early_3}.txt -dim 8 -core 8 -out ${PREDICT_PATH}/${model_name}_${today}_new.txt
+  if [ $? -ne 0 ]; then
+     echo "推荐新模型AUC计算失败"
+     /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step4新旧模型AUC对比\n【是否成功】:error\n【信息】:推荐新模型AUC计算失败${PREDICT_PATH}/${model_name}_${today}_new.txt"
+  else
+    online_auc=`cat ${PREDICT_PATH}/${model_name}_${today}_online.txt | /root/sunmingze/AUC/AUC`
     if [ $? -ne 0 ]; then
-       echo "新模型文件格式转换失败"
-       python FeishuBot.py "荐模型数据更新 \n【任务名称】:step4模型格式转换\n【是否成功】:error\n【信息】:新模型文件格式转换失败${MODEL_PATH}/${model_name}_${today_early_3}.txt"
-       exit 1
-    fi
-    # 4.1.2 模型文件上传OSS
-    online_model_path=${OSS_PATH}/${model_name}.txt
-    $HADOOP fs -test -e ${online_model_path}
-    if [ $? -eq 0 ]; then
-        echo "数据存在, 先删除。"
-        $HADOOP fs -rm -r -skipTrash ${online_model_path}
-    else
-        echo "数据不存在"
-    fi
-
-    $HADOOP fs -put ${MODEL_PATH}/${model_name}_${today_early_3}_change.txt ${online_model_path}
-    if [ $? -eq 0 ]; then
-       echo "推荐模型文件至OSS成功"
+       echo "推荐线上模型AUC计算失败"
+       /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step4新旧模型AUC对比\n【是否成功】:error\n【信息】:推荐线上模型AUC计算失败"
     else
-       echo "推荐模型文件至OSS失败"
-       python FeishuBot.py "荐模型数据更新 \n【任务名称】:step4模型推送oss\n【是否成功】:error\n【信息】:推荐模型文件至OSS失败${MODEL_PATH}/${model_name}_${today_early_3}_change.txt --- ${online_model_path}"
-       exit 1
-    fi
-    # 4.1.3 本地保存最新的线上使用的模型,用于下一次的AUC验证
-    cp -f ${LAST_MODEL_HOME}/model_online.txt ${LAST_MODEL_HOME}/model_online_$(date +\%Y\%m\%d).txt
-    cp -f ${MODEL_PATH}/${model_name}_${today_early_3}.txt ${LAST_MODEL_HOME}/model_online.txt
-    if [ $? -ne 0 ]; then
-       echo "模型备份失败"
-       exit 1
+      new_auc=`cat ${PREDICT_PATH}/${model_name}_${today}_new.txt | /root/sunmingze/AUC/AUC`
+      if [ $? -ne 0 ]; then
+         echo "推荐新模型AUC计算失败"
+         /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step4新旧模型AUC对比\n【是否成功】:error\n【信息】:推荐新模型AUC计算失败${PREDICT_PATH}/${model_name}_${today}_new.txt"
+      else
+        # 4.1 对比auc数据判断是否更新线上模型
+        if [ "$online_auc" \< "$new_auc" ]; then
+            echo "新模型优于线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc}"
+            # 4.1.1 模型格式转换
+            cat ${MODEL_PATH}/${model_name}_${today_early_3}.txt |
+            awk -F " " '{
+                if (NR == 1) {
+                    print $1"\t"$2
+                } else {
+                    split($0, fields, " ");
+                    OFS="\t";
+                    line="" 1; i <= 10 && i <= length(fields); i++) {
+                        line
+                    for (i = = (line ? line "\t" : "") fields[i];
+                    }
+                    print line
+                }
+            }' > ${MODEL_PATH}/${model_name}_${today_early_3}_change.txt
+            if [ $? -ne 0 ]; then
+               echo "新模型文件格式转换失败"
+               /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step4模型格式转换\n【是否成功】:error\n【信息】:新模型文件格式转换失败${MODEL_PATH}/${model_name}_${today_early_3}.txt"
+            else
+              # 4.1.2 模型文件上传OSS
+              online_model_path=${OSS_PATH}/${model_name}.txt
+              $HADOOP fs -test -e ${online_model_path}
+              if [ $? -eq 0 ]; then
+                  echo "数据存在, 先删除。"
+                  $HADOOP fs -rm -r -skipTrash ${online_model_path}
+              else
+                  echo "数据不存在"
+              fi
+              $HADOOP fs -put ${MODEL_PATH}/${model_name}_${today_early_3}_change.txt ${online_model_path}
+              if [ $? -eq 0 ]; then
+                 echo "推荐模型文件至OSS成功"
+                  # 4.1.3 本地保存最新的线上使用的模型,用于下一次的AUC验证
+                 cp -f ${LAST_MODEL_HOME}/model_online.txt ${LAST_MODEL_HOME}/model_online_$(date +\%Y\%m\%d).txt
+                 cp -f ${MODEL_PATH}/${model_name}_${today_early_3}.txt ${LAST_MODEL_HOME}/model_online.txt
+                 if [ $? -ne 0 ]; then
+                     echo "模型备份失败"
+                 fi
+                 /root/anaconda3/bin/python monitor_util.py --level info --msg "荐模型数据更新 \n【任务名称】:step4模型更新\n【是否成功】:success\n【信息】:新模型优于线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc},已更新${model_name}_${today_early_3}.txt模型}"
+              else
+                 echo "推荐模型文件至OSS失败"
+                 /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step4模型推送oss\n【是否成功】:error\n【信息】:推荐模型文件至OSS失败${MODEL_PATH}/${model_name}_${today_early_3}_change.txt --- ${online_model_path}"
+              fi
+            fi
+            /root/anaconda3/bin/python monitor_util.py --level info --msg "荐模型数据更新 \n【任务名称】:step4模型更新\n【是否成功】:success\n【信息】:新模型优于线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc},已更新${model_name}_${today_early_3}.txt模型}"
+        else
+            echo "新模型不如线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc}"
+            /root/anaconda3/bin/python monitor_util.py --level info --msg "荐模型数据更新 \n【任务名称】:step4模型更新\n【是否成功】:success\n【信息】:新模型不如线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc}}"
+        fi
+      fi
     fi
-    python FeishuBot.py "荐模型数据更新 \n【任务名称】:step4模型更新\n【是否成功】:success\n【信息】:新模型优于线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc},已更新${model_name}_${today_early_3}.txt模型}"
-else
-    echo "新模型不如线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc}"
-    python FeishuBot.py "荐模型数据更新 \n【任务名称】:step4模型更新\n【是否成功】:success\n【信息】:新模型不如线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc}}"
+  fi
 fi
 
-
-
 # 5 模型训练
 echo "$(date +%Y-%m-%d_%H-%M-%S)----------step5------------开始模型训练"
 $HADOOP fs -text ${bucketDataPath}/${begin_early_2_Str}/* | ${FM_HOME}/fm_train -m ${MODEL_PATH}/${model_name}_${begin_early_2_Str}.txt -dim 1,1,8 -im ${LAST_MODEL_HOME}/model_online.txt -core 8
 if [ $? -ne 0 ]; then
    echo "模型训练失败"
-   python FeishuBot.py "荐模型数据更新 \n【任务名称】:step5模型训练\n【是否成功】:error\n【信息】:${bucketDataPath}/${begin_early_2_Str}训练失败"
-   exit 1
+   /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step5模型训练\n【是否成功】:error\n【信息】:${bucketDataPath}/${begin_early_2_Str}训练失败"
 fi
 
 echo "$(date +%Y-%m-%d_%H-%M-%S)----------step6------------模型训练完成:${MODEL_PATH}/${model_name}_${begin_early_2_Str}.txt"

+ 104 - 0
qiaojialiang/monitor_util.py

@@ -0,0 +1,104 @@
+# -*- coding: utf-8 -*-
+import argparse
+import datetime
+import json
+
+import requests
+
+server_robot = {
+    'webhook': 'https://open.feishu.cn/open-apis/bot/v2/hook/926982f5-e7af-40f5-81fd-27d8f42718e4',
+}
+
+level_header_template_map = {
+    "info": "turquoise",
+    "error": "red",
+    "warn": "yellow"
+}
+
+level_header_title_content_map = {
+    "info": "推荐模型自动更新通知",
+    "error": "推荐模型自动更新告警",
+    "warn": "推荐模型自动更新告警"
+}
+
+
+def send_card_msg_to_feishu(webhook, card_json):
+    """发送消息到飞书"""
+    headers = {'Content-Type': 'application/json'}
+    payload_message = {
+        "msg_type": "interactive",
+        "card": card_json
+    }
+    print(f"推送飞书消息内容: {json.dumps(payload_message)}")
+    response = requests.request('POST', url=webhook, headers=headers, data=json.dumps(payload_message))
+    print(response.text)
+
+
+def seconds_convert(seconds):
+    hours = seconds // 3600
+    minutes = (seconds % 3600) // 60
+    seconds = seconds % 60
+    return f"{hours}小时 {minutes}分钟 {seconds}秒"
+
+
+def _monitor(level, msg: str, start, elapsed):
+    """消息推送"""
+    now = datetime.datetime.now()
+    msg = msg.replace("\\n", "\n").replace("\\t", "\t")
+    mgs_text = f"- 当前时间: {now.strftime('%Y-%m-%d %H:%M:%S')}" \
+               f"\n- 任务描述: {msg}"
+    # f"\n- 任务开始时间: {start}" \
+    # f"\n- 任务耗时: {seconds_convert(elapsed)}" \
+    card_json = {
+        "config": {},
+        "i18n_elements": {
+            "zh_cn": [
+                {
+                    "tag": "markdown",
+                    "content": "",
+                    "text_align": "left",
+                    "text_size": "normal"
+                },
+                {
+                    "tag": "markdown",
+                    "content": mgs_text,
+                    "text_align": "left",
+                    "text_size": "normal"
+                }
+            ]
+        },
+        "i18n_header": {
+            "zh_cn": {
+                "title": {
+                    "tag": "plain_text",
+                    "content": level_header_title_content_map[level]
+                },
+                "subtitle": {
+                    "tag": "plain_text",
+                    "content": ""
+                },
+                "template": level_header_template_map[level]
+            }
+        }
+    }
+
+    send_card_msg_to_feishu(
+        webhook=server_robot.get('webhook'),
+        card_json=card_json
+    )
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='告警Utils')
+    parser.add_argument('--level', type=str, help='通知级别, info, warn, error', required=True)
+    parser.add_argument('--msg', type=str, help='消息', required=True)
+    # parser.add_argument('--start', type=str, help='任务开始时间', required=True)
+    # parser.add_argument('--elapsed', type=int, help='任务耗时【秒】', required=True)
+    args = parser.parse_args()
+
+    _monitor(
+        level=args.level,
+        msg=args.msg,
+        start="",
+        elapsed=0
+    )

+ 202 - 0
qiaojialiang/test/handle_rov_bak.sh

@@ -0,0 +1,202 @@
+#!/bin/sh
+set -ex
+
+source /root/anaconda3/bin/activate py37
+
+#  nohup sh handle_rov.sh > "$(date +%Y%m%d_%H%M%S)_handle_rov.log" 2>&1 &
+
+# 原始数据table name
+table='alg_recsys_sample_all'
+today="$(date +%Y%m%d)"
+today_early_3="$(date -d '3 days ago' +%Y%m%d)"
+#table='alg_recsys_sample_all_test'
+# 处理分区配置 推荐数据间隔一天生产,所以5日0点使用3日0-23点数据生产new模型数据
+begin_early_2_Str="$(date -d '2 days ago' +%Y%m%d)"
+end_early_2_Str="$(date -d '2 days ago' +%Y%m%d)"
+beginHhStr=00
+endHhStr=23
+max_hour=05
+max_minute=00
+# 各节点产出hdfs文件绝对路径
+# 源数据文件
+originDataPath=/dw/recommend/model/13_sample_data/
+# 特征值
+valueDataPath=/dw/recommend/model/14_feature_data/
+# 特征分桶
+bucketDataPath=/dw/recommend/model/16_train_data/
+# 模型数据路径
+MODEL_PATH=/root/joe/recommend-emr-dataprocess/model
+# 预测路径
+PREDICT_PATH=/root/joe/recommend-emr-dataprocess/predict
+# 历史线上正在使用的模型数据路径
+LAST_MODEL_HOME=/root/joe/model_online
+# 模型数据文件前缀
+model_name=aka8
+# fm模型
+FM_HOME=/root/sunmingze/alphaFM/bin
+# hadoop
+HADOOP=/opt/apps/HADOOP-COMMON/hadoop-common-current/bin/hadoop
+OSS_PATH=oss://art-recommend.oss-cn-hangzhou.aliyuncs.com/zhangbo/
+
+# 0 判断上游表是否生产完成,最长等待到max_hour点
+# shellcheck disable=SC2154
+echo "$(date +%Y-%m-%d_%H-%M-%S)----------step0------------开始校验是否生产完数据,分区信息:beginStr:${begin_early_2_Str}${beginHhStr},endStr:${end_early_2_Str}${endHhStr}"
+while true; do
+  python_return_code=$(python /root/joe/recommend-emr-dataprocess/qiaojialiang/checkHiveDataUtil.py --table ${table} --beginStr ${begin_early_2_Str}${beginHhStr} --endStr ${end_early_2_Str}${endHhStr})
+  echo "python 返回值:${python_return_code}"
+  if [ $python_return_code -eq 0 ]; then
+    echo "Python程序返回0,校验存在数据,退出循环。"
+    break
+  fi
+  echo "Python程序返回非0值,不存在数据,等待五分钟后再次调用。"
+  sleep 300
+  current_hour=$(date +%H)
+  current_minute=$(date +%M)
+  # shellcheck disable=SC2039
+  if (( current_hour > max_hour || (current_hour == max_hour && current_minute >= max_minute) )); then
+    echo "最长等待时间已到,失败:${current_hour}-${current_minute}"
+    python FeishuBot.py "荐模型数据更新 \n【任务名称】:step0校验是否生产完数据\n【是否成功】:error\n【信息】:最长等待时间已到,失败:${current_hour}-${current_minute}"
+    exit 1
+  fi
+done
+
+# 1 生产原始数据
+echo "$(date +%Y-%m-%d_%H-%M-%S)----------step1------------开始根据${table}生产原始数据"
+/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
+--class com.aliyun.odps.spark.examples.makedata_qiao.makedata_13_originData_20240705 \
+--master yarn --driver-memory 1G --executor-memory 2G --executor-cores 1 --num-executors 16 \
+../target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
+tablePart:64 repartition:32 \
+beginStr:${begin_early_2_Str}${beginHhStr} endStr:${end_early_2_Str}${endHhStr} \
+savePath:${originDataPath} \
+table:${table}
+if [ $? -ne 0 ]; then
+   echo "Spark原始样本生产任务执行失败"
+   python FeishuBot.py "荐模型数据更新 \n【任务名称】:step1根据${table}生产原始数据\n【是否成功】:error\n【信息】:Spark原始样本生产任务执行失败"
+   exit 1
+else
+   echo "spark原始样本生产执行成功"
+fi
+
+
+# 2 特征值拼接
+echo "$(date +%Y-%m-%d_%H-%M-%S)----------step2------------开始特征值拼接"
+/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
+--class com.aliyun.odps.spark.examples.makedata_qiao.makedata_14_valueData_20240705 \
+--master yarn --driver-memory 1G --executor-memory 3G --executor-cores 1 --num-executors 32 \
+../target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
+readPath:${originDataPath} \
+savePath:${valueDataPath} \
+beginStr:${begin_early_2_Str} endStr:${end_early_2_Str} repartition:1000
+if [ $? -ne 0 ]; then
+   echo "Spark特征值拼接处理任务执行失败"
+   python FeishuBot.py "荐模型数据更新 \n【任务名称】:step2特征值拼接\n【是否成功】:error\n【信息】:Spark特征值拼接处理任务执行失败"
+   exit 1
+else
+   echo "spark特征值拼接处理执行成功"
+fi
+
+# 3 特征分桶
+echo "$(date +%Y-%m-%d_%H-%M-%S)----------step3------------根据特征分桶生产重打分特征数据"
+/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
+--class com.aliyun.odps.spark.examples.makedata_qiao.makedata_16_bucketData_20240705 \
+--master yarn --driver-memory 2G --executor-memory 4G --executor-cores 1 --num-executors 16 \
+../target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
+readPath:${valueDataPath} \
+savePath:${bucketDataPath} \
+beginStr:${begin_early_2_Str} endStr:${end_early_2_Str} repartition:1000
+if [ $? -ne 0 ]; then
+   echo "Spark特征分桶处理任务执行失败"
+   python FeishuBot.py "荐模型数据更新 \n【任务名称】:step3训练数据产出\n【是否成功】:error\n【信息】:Spark特征分桶处理任务执行失败"
+   exit 1
+else
+   echo "spark特征分桶处理执行成功"
+fi
+
+
+# 4 对比AUC 前置对比3日模型数据 与 线上模型数据效果对比,如果3日模型优于线上,更新线上模型
+echo "$(date +%Y-%m-%d_%H-%M-%S)----------step4------------开始对比,新:${MODEL_PATH}/${model_name}_${today_early_3}.txt,与线上online模型数据auc效果"
+$HADOOP fs -text ${bucketDataPath}/${begin_early_2_Str}/* | ${FM_HOME}/fm_predict -m ${LAST_MODEL_HOME}/model_online.txt -dim 8 -core 8 -out ${PREDICT_PATH}/${model_name}_${today}_online.txt
+$HADOOP fs -text ${bucketDataPath}/${begin_early_2_Str}/* | ${FM_HOME}/fm_predict -m ${MODEL_PATH}/${model_name}_${today_early_3}.txt -dim 8 -core 8 -out ${PREDICT_PATH}/${model_name}_${today}_new.txt
+
+online_auc=`cat ${PREDICT_PATH}/${model_name}_${today}_online.txt | /root/sunmingze/AUC/AUC`
+if [ $? -ne 0 ]; then
+   echo "推荐线上模型AUC计算失败"
+   python FeishuBot.py "荐模型数据更新 \n【任务名称】:step4新旧模型AUC对比\n【是否成功】:error\n【信息】:推荐线上模型AUC计算失败"
+   exit 1
+fi
+
+new_auc=`cat ${PREDICT_PATH}/${model_name}_${today}_new.txt | /root/sunmingze/AUC/AUC`
+if [ $? -ne 0 ]; then
+   echo "推荐新模型AUC计算失败"
+   python FeishuBot.py "荐模型数据更新 \n【任务名称】:step4新旧模型AUC对比\n【是否成功】:error\n【信息】:推荐新模型AUC计算失败${PREDICT_PATH}/${model_name}_${today}_new.txt"
+   exit 1
+fi
+
+
+# 4.1 对比auc数据判断是否更新线上模型
+if [ "$online_auc" \< "$new_auc" ]; then
+    echo "新模型优于线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc}"
+    # 4.1.1 模型格式转换
+    cat ${MODEL_PATH}/${model_name}_${today_early_3}.txt |
+    awk -F " " '{
+        if (NR == 1) {
+            print $1"\t"$2
+        } else {
+            split($0, fields, " ");
+            OFS="\t";
+            line="" 1; i <= 10 && i <= length(fields); i++) {
+                line
+            for (i = = (line ? line "\t" : "") fields[i];
+            }
+            print line
+        }
+    }' > ${MODEL_PATH}/${model_name}_${today_early_3}_change.txt
+    if [ $? -ne 0 ]; then
+       echo "新模型文件格式转换失败"
+       python FeishuBot.py "荐模型数据更新 \n【任务名称】:step4模型格式转换\n【是否成功】:error\n【信息】:新模型文件格式转换失败${MODEL_PATH}/${model_name}_${today_early_3}.txt"
+       exit 1
+    fi
+    # 4.1.2 模型文件上传OSS
+    online_model_path=${OSS_PATH}/${model_name}.txt
+    $HADOOP fs -test -e ${online_model_path}
+    if [ $? -eq 0 ]; then
+        echo "数据存在, 先删除。"
+        $HADOOP fs -rm -r -skipTrash ${online_model_path}
+    else
+        echo "数据不存在"
+    fi
+
+    $HADOOP fs -put ${MODEL_PATH}/${model_name}_${today_early_3}_change.txt ${online_model_path}
+    if [ $? -eq 0 ]; then
+       echo "推荐模型文件至OSS成功"
+    else
+       echo "推荐模型文件至OSS失败"
+       python FeishuBot.py "荐模型数据更新 \n【任务名称】:step4模型推送oss\n【是否成功】:error\n【信息】:推荐模型文件至OSS失败${MODEL_PATH}/${model_name}_${today_early_3}_change.txt --- ${online_model_path}"
+       exit 1
+    fi
+    # 4.1.3 本地保存最新的线上使用的模型,用于下一次的AUC验证
+    cp -f ${LAST_MODEL_HOME}/model_online.txt ${LAST_MODEL_HOME}/model_online_$(date +\%Y\%m\%d).txt
+    cp -f ${MODEL_PATH}/${model_name}_${today_early_3}.txt ${LAST_MODEL_HOME}/model_online.txt
+    if [ $? -ne 0 ]; then
+       echo "模型备份失败"
+       exit 1
+    fi
+    python FeishuBot.py "荐模型数据更新 \n【任务名称】:step4模型更新\n【是否成功】:success\n【信息】:新模型优于线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc},已更新${model_name}_${today_early_3}.txt模型}"
+else
+    echo "新模型不如线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc}"
+    python FeishuBot.py "荐模型数据更新 \n【任务名称】:step4模型更新\n【是否成功】:success\n【信息】:新模型不如线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc}}"
+fi
+
+
+
+# 5 模型训练
+echo "$(date +%Y-%m-%d_%H-%M-%S)----------step5------------开始模型训练"
+$HADOOP fs -text ${bucketDataPath}/${begin_early_2_Str}/* | ${FM_HOME}/fm_train -m ${MODEL_PATH}/${model_name}_${begin_early_2_Str}.txt -dim 1,1,8 -im ${LAST_MODEL_HOME}/model_online.txt -core 8
+if [ $? -ne 0 ]; then
+   echo "模型训练失败"
+   python FeishuBot.py "荐模型数据更新 \n【任务名称】:step5模型训练\n【是否成功】:error\n【信息】:${bucketDataPath}/${begin_early_2_Str}训练失败"
+   exit 1
+fi
+
+echo "$(date +%Y-%m-%d_%H-%M-%S)----------step6------------模型训练完成:${MODEL_PATH}/${model_name}_${begin_early_2_Str}.txt"