|  | @@ -1,8 +1,13 @@
 | 
	
		
			
				|  |  |  #!/bin/sh
 | 
	
		
			
				|  |  | -set -ex
 | 
	
		
			
				|  |  | +set -x
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  source /root/anaconda3/bin/activate py37
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +export SPARK_HOME=/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8
 | 
	
		
			
				|  |  | +export PATH=$SPARK_HOME/bin:$PATH
 | 
	
		
			
				|  |  | +export HADOOP_CONF_DIR=/etc/taihao-apps/hadoop-conf
 | 
	
		
			
				|  |  | +export JAVA_HOME=/usr/lib/jvm/java-1.8.0
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |  #  nohup sh handle_rov.sh > "$(date +%Y%m%d_%H%M%S)_handle_rov.log" 2>&1 &
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  # 原始数据table name
 | 
	
	
		
			
				|  | @@ -55,7 +60,7 @@ while true; do
 | 
	
		
			
				|  |  |    # shellcheck disable=SC2039
 | 
	
		
			
				|  |  |    if (( current_hour > max_hour || (current_hour == max_hour && current_minute >= max_minute) )); then
 | 
	
		
			
				|  |  |      echo "最长等待时间已到,失败:${current_hour}-${current_minute}"
 | 
	
		
			
				|  |  | -    python FeishuBot.py "荐模型数据更新 \n【任务名称】:step0校验是否生产完数据\n【是否成功】:error\n【信息】:最长等待时间已到,失败:${current_hour}-${current_minute}"
 | 
	
		
			
				|  |  | +    /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step0校验是否生产完数据\n【是否成功】:error\n【信息】:最长等待时间已到,失败:${current_hour}-${current_minute}"
 | 
	
		
			
				|  |  |      exit 1
 | 
	
		
			
				|  |  |    fi
 | 
	
		
			
				|  |  |  done
 | 
	
	
		
			
				|  | @@ -72,7 +77,7 @@ savePath:${originDataPath} \
 | 
	
		
			
				|  |  |  table:${table}
 | 
	
		
			
				|  |  |  if [ $? -ne 0 ]; then
 | 
	
		
			
				|  |  |     echo "Spark原始样本生产任务执行失败"
 | 
	
		
			
				|  |  | -   python FeishuBot.py "荐模型数据更新 \n【任务名称】:step1根据${table}生产原始数据\n【是否成功】:error\n【信息】:Spark原始样本生产任务执行失败"
 | 
	
		
			
				|  |  | +   /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step1根据${table}生产原始数据\n【是否成功】:error\n【信息】:Spark原始样本生产任务执行失败"
 | 
	
		
			
				|  |  |     exit 1
 | 
	
		
			
				|  |  |  else
 | 
	
		
			
				|  |  |     echo "spark原始样本生产执行成功"
 | 
	
	
		
			
				|  | @@ -90,7 +95,7 @@ savePath:${valueDataPath} \
 | 
	
		
			
				|  |  |  beginStr:${begin_early_2_Str} endStr:${end_early_2_Str} repartition:1000
 | 
	
		
			
				|  |  |  if [ $? -ne 0 ]; then
 | 
	
		
			
				|  |  |     echo "Spark特征值拼接处理任务执行失败"
 | 
	
		
			
				|  |  | -   python FeishuBot.py "荐模型数据更新 \n【任务名称】:step2特征值拼接\n【是否成功】:error\n【信息】:Spark特征值拼接处理任务执行失败"
 | 
	
		
			
				|  |  | +   /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step2特征值拼接\n【是否成功】:error\n【信息】:Spark特征值拼接处理任务执行失败"
 | 
	
		
			
				|  |  |     exit 1
 | 
	
		
			
				|  |  |  else
 | 
	
		
			
				|  |  |     echo "spark特征值拼接处理执行成功"
 | 
	
	
		
			
				|  | @@ -107,7 +112,7 @@ savePath:${bucketDataPath} \
 | 
	
		
			
				|  |  |  beginStr:${begin_early_2_Str} endStr:${end_early_2_Str} repartition:1000
 | 
	
		
			
				|  |  |  if [ $? -ne 0 ]; then
 | 
	
		
			
				|  |  |     echo "Spark特征分桶处理任务执行失败"
 | 
	
		
			
				|  |  | -   python FeishuBot.py "荐模型数据更新 \n【任务名称】:step3训练数据产出\n【是否成功】:error\n【信息】:Spark特征分桶处理任务执行失败"
 | 
	
		
			
				|  |  | +   /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step3训练数据产出\n【是否成功】:error\n【信息】:Spark特征分桶处理任务执行失败"
 | 
	
		
			
				|  |  |     exit 1
 | 
	
		
			
				|  |  |  else
 | 
	
		
			
				|  |  |     echo "spark特征分桶处理执行成功"
 | 
	
	
		
			
				|  | @@ -117,86 +122,87 @@ fi
 | 
	
		
			
				|  |  |  # 4 对比AUC 前置对比3日模型数据 与 线上模型数据效果对比,如果3日模型优于线上,更新线上模型
 | 
	
		
			
				|  |  |  echo "$(date +%Y-%m-%d_%H-%M-%S)----------step4------------开始对比,新:${MODEL_PATH}/${model_name}_${today_early_3}.txt,与线上online模型数据auc效果"
 | 
	
		
			
				|  |  |  $HADOOP fs -text ${bucketDataPath}/${begin_early_2_Str}/* | ${FM_HOME}/fm_predict -m ${LAST_MODEL_HOME}/model_online.txt -dim 8 -core 8 -out ${PREDICT_PATH}/${model_name}_${today}_online.txt
 | 
	
		
			
				|  |  | -$HADOOP fs -text ${bucketDataPath}/${begin_early_2_Str}/* | ${FM_HOME}/fm_predict -m ${MODEL_PATH}/${model_name}_${today_early_3}.txt -dim 8 -core 8 -out ${PREDICT_PATH}/${model_name}_${today}_new.txt
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -online_auc=`cat ${PREDICT_PATH}/${model_name}_${today}_online.txt | /root/sunmingze/AUC/AUC`
 | 
	
		
			
				|  |  | -if [ $? -ne 0 ]; then
 | 
	
		
			
				|  |  | -   echo "推荐线上模型AUC计算失败"
 | 
	
		
			
				|  |  | -   python FeishuBot.py "荐模型数据更新 \n【任务名称】:step4新旧模型AUC对比\n【是否成功】:error\n【信息】:推荐线上模型AUC计算失败"
 | 
	
		
			
				|  |  | -   exit 1
 | 
	
		
			
				|  |  | -fi
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -new_auc=`cat ${PREDICT_PATH}/${model_name}_${today}_new.txt | /root/sunmingze/AUC/AUC`
 | 
	
		
			
				|  |  |  if [ $? -ne 0 ]; then
 | 
	
		
			
				|  |  | -   echo "推荐新模型AUC计算失败"
 | 
	
		
			
				|  |  | -   python FeishuBot.py "荐模型数据更新 \n【任务名称】:step4新旧模型AUC对比\n【是否成功】:error\n【信息】:推荐新模型AUC计算失败${PREDICT_PATH}/${model_name}_${today}_new.txt"
 | 
	
		
			
				|  |  | -   exit 1
 | 
	
		
			
				|  |  | -fi
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -# 4.1 对比auc数据判断是否更新线上模型
 | 
	
		
			
				|  |  | -if [ "$online_auc" \< "$new_auc" ]; then
 | 
	
		
			
				|  |  | -    echo "新模型优于线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc}"
 | 
	
		
			
				|  |  | -    # 4.1.1 模型格式转换
 | 
	
		
			
				|  |  | -    cat ${MODEL_PATH}/${model_name}_${today_early_3}.txt |
 | 
	
		
			
				|  |  | -    awk -F " " '{
 | 
	
		
			
				|  |  | -        if (NR == 1) {
 | 
	
		
			
				|  |  | -            print $1"\t"$2
 | 
	
		
			
				|  |  | -        } else {
 | 
	
		
			
				|  |  | -            split($0, fields, " ");
 | 
	
		
			
				|  |  | -            OFS="\t";
 | 
	
		
			
				|  |  | -            line="" 1; i <= 10 && i <= length(fields); i++) {
 | 
	
		
			
				|  |  | -                line
 | 
	
		
			
				|  |  | -            for (i = = (line ? line "\t" : "") fields[i];
 | 
	
		
			
				|  |  | -            }
 | 
	
		
			
				|  |  | -            print line
 | 
	
		
			
				|  |  | -        }
 | 
	
		
			
				|  |  | -    }' > ${MODEL_PATH}/${model_name}_${today_early_3}_change.txt
 | 
	
		
			
				|  |  | +  echo "推荐线上模型AUC计算失败"
 | 
	
		
			
				|  |  | +  /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step4新旧模型AUC对比\n【是否成功】:error\n【信息】:推荐线上模型AUC计算失败"
 | 
	
		
			
				|  |  | +else
 | 
	
		
			
				|  |  | +  $HADOOP fs -text ${bucketDataPath}/${begin_early_2_Str}/* | ${FM_HOME}/fm_predict -m ${MODEL_PATH}/${model_name}_${today_early_3}.txt -dim 8 -core 8 -out ${PREDICT_PATH}/${model_name}_${today}_new.txt
 | 
	
		
			
				|  |  | +  if [ $? -ne 0 ]; then
 | 
	
		
			
				|  |  | +     echo "推荐新模型AUC计算失败"
 | 
	
		
			
				|  |  | +     /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step4新旧模型AUC对比\n【是否成功】:error\n【信息】:推荐新模型AUC计算失败${PREDICT_PATH}/${model_name}_${today}_new.txt"
 | 
	
		
			
				|  |  | +  else
 | 
	
		
			
				|  |  | +    online_auc=`cat ${PREDICT_PATH}/${model_name}_${today}_online.txt | /root/sunmingze/AUC/AUC`
 | 
	
		
			
				|  |  |      if [ $? -ne 0 ]; then
 | 
	
		
			
				|  |  | -       echo "新模型文件格式转换失败"
 | 
	
		
			
				|  |  | -       python FeishuBot.py "荐模型数据更新 \n【任务名称】:step4模型格式转换\n【是否成功】:error\n【信息】:新模型文件格式转换失败${MODEL_PATH}/${model_name}_${today_early_3}.txt"
 | 
	
		
			
				|  |  | -       exit 1
 | 
	
		
			
				|  |  | -    fi
 | 
	
		
			
				|  |  | -    # 4.1.2 模型文件上传OSS
 | 
	
		
			
				|  |  | -    online_model_path=${OSS_PATH}/${model_name}.txt
 | 
	
		
			
				|  |  | -    $HADOOP fs -test -e ${online_model_path}
 | 
	
		
			
				|  |  | -    if [ $? -eq 0 ]; then
 | 
	
		
			
				|  |  | -        echo "数据存在, 先删除。"
 | 
	
		
			
				|  |  | -        $HADOOP fs -rm -r -skipTrash ${online_model_path}
 | 
	
		
			
				|  |  | -    else
 | 
	
		
			
				|  |  | -        echo "数据不存在"
 | 
	
		
			
				|  |  | -    fi
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    $HADOOP fs -put ${MODEL_PATH}/${model_name}_${today_early_3}_change.txt ${online_model_path}
 | 
	
		
			
				|  |  | -    if [ $? -eq 0 ]; then
 | 
	
		
			
				|  |  | -       echo "推荐模型文件至OSS成功"
 | 
	
		
			
				|  |  | +       echo "推荐线上模型AUC计算失败"
 | 
	
		
			
				|  |  | +       /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step4新旧模型AUC对比\n【是否成功】:error\n【信息】:推荐线上模型AUC计算失败"
 | 
	
		
			
				|  |  |      else
 | 
	
		
			
				|  |  | -       echo "推荐模型文件至OSS失败"
 | 
	
		
			
				|  |  | -       python FeishuBot.py "荐模型数据更新 \n【任务名称】:step4模型推送oss\n【是否成功】:error\n【信息】:推荐模型文件至OSS失败${MODEL_PATH}/${model_name}_${today_early_3}_change.txt --- ${online_model_path}"
 | 
	
		
			
				|  |  | -       exit 1
 | 
	
		
			
				|  |  | -    fi
 | 
	
		
			
				|  |  | -    # 4.1.3 本地保存最新的线上使用的模型,用于下一次的AUC验证
 | 
	
		
			
				|  |  | -    cp -f ${LAST_MODEL_HOME}/model_online.txt ${LAST_MODEL_HOME}/model_online_$(date +\%Y\%m\%d).txt
 | 
	
		
			
				|  |  | -    cp -f ${MODEL_PATH}/${model_name}_${today_early_3}.txt ${LAST_MODEL_HOME}/model_online.txt
 | 
	
		
			
				|  |  | -    if [ $? -ne 0 ]; then
 | 
	
		
			
				|  |  | -       echo "模型备份失败"
 | 
	
		
			
				|  |  | -       exit 1
 | 
	
		
			
				|  |  | +      new_auc=`cat ${PREDICT_PATH}/${model_name}_${today}_new.txt | /root/sunmingze/AUC/AUC`
 | 
	
		
			
				|  |  | +      if [ $? -ne 0 ]; then
 | 
	
		
			
				|  |  | +         echo "推荐新模型AUC计算失败"
 | 
	
		
			
				|  |  | +         /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step4新旧模型AUC对比\n【是否成功】:error\n【信息】:推荐新模型AUC计算失败${PREDICT_PATH}/${model_name}_${today}_new.txt"
 | 
	
		
			
				|  |  | +      else
 | 
	
		
			
				|  |  | +        # 4.1 对比auc数据判断是否更新线上模型
 | 
	
		
			
				|  |  | +        if [ "$online_auc" \< "$new_auc" ]; then
 | 
	
		
			
				|  |  | +            echo "新模型优于线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc}"
 | 
	
		
			
				|  |  | +            # 4.1.1 模型格式转换
 | 
	
		
			
				|  |  | +            cat ${MODEL_PATH}/${model_name}_${today_early_3}.txt |
 | 
	
		
			
				|  |  | +            awk -F " " '{
 | 
	
		
			
				|  |  | +                if (NR == 1) {
 | 
	
		
			
				|  |  | +                    print $1"\t"$2
 | 
	
		
			
				|  |  | +                } else {
 | 
	
		
			
				|  |  | +                    split($0, fields, " ");
 | 
	
		
			
				|  |  | +                    OFS="\t";
 | 
	
		
			
				|  |  | +                    line="" 1; i <= 10 && i <= length(fields); i++) {
 | 
	
		
			
				|  |  | +                        line
 | 
	
		
			
				|  |  | +                    for (i = = (line ? line "\t" : "") fields[i];
 | 
	
		
			
				|  |  | +                    }
 | 
	
		
			
				|  |  | +                    print line
 | 
	
		
			
				|  |  | +                }
 | 
	
		
			
				|  |  | +            }' > ${MODEL_PATH}/${model_name}_${today_early_3}_change.txt
 | 
	
		
			
				|  |  | +            if [ $? -ne 0 ]; then
 | 
	
		
			
				|  |  | +               echo "新模型文件格式转换失败"
 | 
	
		
			
				|  |  | +               /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step4模型格式转换\n【是否成功】:error\n【信息】:新模型文件格式转换失败${MODEL_PATH}/${model_name}_${today_early_3}.txt"
 | 
	
		
			
				|  |  | +            else
 | 
	
		
			
				|  |  | +              # 4.1.2 模型文件上传OSS
 | 
	
		
			
				|  |  | +              online_model_path=${OSS_PATH}/${model_name}.txt
 | 
	
		
			
				|  |  | +              $HADOOP fs -test -e ${online_model_path}
 | 
	
		
			
				|  |  | +              if [ $? -eq 0 ]; then
 | 
	
		
			
				|  |  | +                  echo "数据存在, 先删除。"
 | 
	
		
			
				|  |  | +                  $HADOOP fs -rm -r -skipTrash ${online_model_path}
 | 
	
		
			
				|  |  | +              else
 | 
	
		
			
				|  |  | +                  echo "数据不存在"
 | 
	
		
			
				|  |  | +              fi
 | 
	
		
			
				|  |  | +              $HADOOP fs -put ${MODEL_PATH}/${model_name}_${today_early_3}_change.txt ${online_model_path}
 | 
	
		
			
				|  |  | +              if [ $? -eq 0 ]; then
 | 
	
		
			
				|  |  | +                 echo "推荐模型文件至OSS成功"
 | 
	
		
			
				|  |  | +                  # 4.1.3 本地保存最新的线上使用的模型,用于下一次的AUC验证
 | 
	
		
			
				|  |  | +                 cp -f ${LAST_MODEL_HOME}/model_online.txt ${LAST_MODEL_HOME}/model_online_$(date +\%Y\%m\%d).txt
 | 
	
		
			
				|  |  | +                 cp -f ${MODEL_PATH}/${model_name}_${today_early_3}.txt ${LAST_MODEL_HOME}/model_online.txt
 | 
	
		
			
				|  |  | +                 if [ $? -ne 0 ]; then
 | 
	
		
			
				|  |  | +                     echo "模型备份失败"
 | 
	
		
			
				|  |  | +                 fi
 | 
	
		
			
				|  |  | +                 /root/anaconda3/bin/python monitor_util.py --level info --msg "荐模型数据更新 \n【任务名称】:step4模型更新\n【是否成功】:success\n【信息】:新模型优于线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc},已更新${model_name}_${today_early_3}.txt模型}"
 | 
	
		
			
				|  |  | +              else
 | 
	
		
			
				|  |  | +                 echo "推荐模型文件至OSS失败"
 | 
	
		
			
				|  |  | +                 /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step4模型推送oss\n【是否成功】:error\n【信息】:推荐模型文件至OSS失败${MODEL_PATH}/${model_name}_${today_early_3}_change.txt --- ${online_model_path}"
 | 
	
		
			
				|  |  | +              fi
 | 
	
		
			
				|  |  | +            fi
 | 
	
		
			
				|  |  | +            /root/anaconda3/bin/python monitor_util.py --level info --msg "荐模型数据更新 \n【任务名称】:step4模型更新\n【是否成功】:success\n【信息】:新模型优于线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc},已更新${model_name}_${today_early_3}.txt模型}"
 | 
	
		
			
				|  |  | +        else
 | 
	
		
			
				|  |  | +            echo "新模型不如线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc}"
 | 
	
		
			
				|  |  | +            /root/anaconda3/bin/python monitor_util.py --level info --msg "荐模型数据更新 \n【任务名称】:step4模型更新\n【是否成功】:success\n【信息】:新模型不如线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc}}"
 | 
	
		
			
				|  |  | +        fi
 | 
	
		
			
				|  |  | +      fi
 | 
	
		
			
				|  |  |      fi
 | 
	
		
			
				|  |  | -    python FeishuBot.py "荐模型数据更新 \n【任务名称】:step4模型更新\n【是否成功】:success\n【信息】:新模型优于线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc},已更新${model_name}_${today_early_3}.txt模型}"
 | 
	
		
			
				|  |  | -else
 | 
	
		
			
				|  |  | -    echo "新模型不如线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc}"
 | 
	
		
			
				|  |  | -    python FeishuBot.py "荐模型数据更新 \n【任务名称】:step4模型更新\n【是否成功】:success\n【信息】:新模型不如线上模型: 线上模型AUC: ${online_auc}, 新模型AUC: ${new_auc}}"
 | 
	
		
			
				|  |  | +  fi
 | 
	
		
			
				|  |  |  fi
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  |  # 5 模型训练
 | 
	
		
			
				|  |  |  echo "$(date +%Y-%m-%d_%H-%M-%S)----------step5------------开始模型训练"
 | 
	
		
			
				|  |  |  $HADOOP fs -text ${bucketDataPath}/${begin_early_2_Str}/* | ${FM_HOME}/fm_train -m ${MODEL_PATH}/${model_name}_${begin_early_2_Str}.txt -dim 1,1,8 -im ${LAST_MODEL_HOME}/model_online.txt -core 8
 | 
	
		
			
				|  |  |  if [ $? -ne 0 ]; then
 | 
	
		
			
				|  |  |     echo "模型训练失败"
 | 
	
		
			
				|  |  | -   python FeishuBot.py "荐模型数据更新 \n【任务名称】:step5模型训练\n【是否成功】:error\n【信息】:${bucketDataPath}/${begin_early_2_Str}训练失败"
 | 
	
		
			
				|  |  | -   exit 1
 | 
	
		
			
				|  |  | +   /root/anaconda3/bin/python monitor_util.py --level error --msg "荐模型数据更新 \n【任务名称】:step5模型训练\n【是否成功】:error\n【信息】:${bucketDataPath}/${begin_early_2_Str}训练失败"
 | 
	
		
			
				|  |  |  fi
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  echo "$(date +%Y-%m-%d_%H-%M-%S)----------step6------------模型训练完成:${MODEL_PATH}/${model_name}_${begin_early_2_Str}.txt"
 |