|
@@ -52,53 +52,53 @@ echo "$LOG_PREFIX -- 大数据数据生产校验 -- 大数据数据生产校验
|
|
|
|
|
|
|
|
|
|
|
|
-## 2 原始特征生成
|
|
|
-#step_start_time=$(date "+%Y-%m-%d %H:%M:%S")
|
|
|
-#/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
|
|
|
-#--class com.aliyun.odps.spark.zhp.makedata_ad.makedata_ad_31_originData_20240620 \
|
|
|
-#--master yarn --driver-memory 1G --executor-memory 2G --executor-cores 1 --num-executors 16 \
|
|
|
-#./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
|
|
|
-#tablePart:64 repartition:16 \
|
|
|
-#beginStr:${today_early_1}00 endStr:${today}10 \
|
|
|
-#savePath:${originDataSavePath} \
|
|
|
-#table:alg_recsys_ad_sample_all filterHours:00,01,02,03,04,05,06,07 \
|
|
|
-#idDefaultValue:0.01
|
|
|
-#
|
|
|
-#step_end_time=$(date "+%Y-%m-%d %H:%M:%S")
|
|
|
-#step_elapsed=$(($(date +%s -d "$step_end_time") - $(date +%s -d "$step_start_time")))
|
|
|
-#if [ $? -ne 0 ]; then
|
|
|
-# msg="Spark原始样本生产任务执行失败"
|
|
|
-# echo "$LOG_PREFIX -- 原始样本生产 -- $msg: 耗时 $step_elapsed"
|
|
|
-# elapsed=$(($(date +%s -d "$step_end_time") - $(date +%s -d "$start_time")))
|
|
|
-# /root/anaconda3/bin/python ad/ad_monitor_util.py --level error --msg "$msg" --start "$start_time" --elapsed "$elapsed"
|
|
|
-# exit 1
|
|
|
-#fi
|
|
|
-#echo "$LOG_PREFIX -- 原始样本生产 -- Spark原始样本生产任务执行成功: 耗时 $step_elapsed"
|
|
|
-#
|
|
|
-#
|
|
|
-#
|
|
|
-#
|
|
|
-## 3 特征分桶
|
|
|
-#step_start_time=$(date "+%Y-%m-%d %H:%M:%S")
|
|
|
-#/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
|
|
|
-#--class com.aliyun.odps.spark.zhp.makedata_ad.makedata_ad_33_bucketData_20240622 \
|
|
|
-#--master yarn --driver-memory 2G --executor-memory 4G --executor-cores 1 --num-executors 16 \
|
|
|
-#./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
|
|
|
-#beginStr:${today_early_1} endStr:${today} repartition:100 \
|
|
|
-#filterNames:adid_,targeting_conversion_ \
|
|
|
-#readPath:${originDataSavePath} \
|
|
|
-#savePath:${bucketFeatureSavePath}
|
|
|
-#
|
|
|
-#step_end_time=$(date "+%Y-%m-%d %H:%M:%S")
|
|
|
-#step_elapsed=$(($(date +%s -d "$step_end_time") - $(date +%s -d "$step_start_time")))
|
|
|
-#if [ $? -ne 0 ]; then
|
|
|
-# msg="Spark特征分桶处理任务执行失败"
|
|
|
-# echo "$LOG_PREFIX -- 特征分桶处理任务 -- $msg: 耗时 $step_elapsed"
|
|
|
-# elapsed=$(($(date +%s -d "$step_end_time") - $(date +%s -d "$start_time")))
|
|
|
-# /root/anaconda3/bin/python ad/ad_monitor_util.py ${msg}
|
|
|
-# exit 1
|
|
|
-#fi
|
|
|
-#echo "$LOG_PREFIX -- 特征分桶处理任务 -- spark特征分桶处理执行成功: 耗时 $step_elapsed"
|
|
|
+# 2 原始特征生成
|
|
|
+step_start_time=$(date "+%Y-%m-%d %H:%M:%S")
|
|
|
+/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
|
|
|
+--class com.aliyun.odps.spark.zhp.makedata_ad.makedata_ad_31_originData_20240620 \
|
|
|
+--master yarn --driver-memory 1G --executor-memory 2G --executor-cores 1 --num-executors 16 \
|
|
|
+./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
|
|
|
+tablePart:64 repartition:16 \
|
|
|
+beginStr:${today_early_1}00 endStr:${today}10 \
|
|
|
+savePath:${originDataSavePath} \
|
|
|
+table:alg_recsys_ad_sample_all filterHours:00,01,02,03,04,05,06,07 \
|
|
|
+idDefaultValue:0.01
|
|
|
+
|
|
|
+step_end_time=$(date "+%Y-%m-%d %H:%M:%S")
|
|
|
+step_elapsed=$(($(date +%s -d "$step_end_time") - $(date +%s -d "$step_start_time")))
|
|
|
+if [ $? -ne 0 ]; then
|
|
|
+ msg="Spark原始样本生产任务执行失败"
|
|
|
+ echo "$LOG_PREFIX -- 原始样本生产 -- $msg: 耗时 $step_elapsed"
|
|
|
+ elapsed=$(($(date +%s -d "$step_end_time") - $(date +%s -d "$start_time")))
|
|
|
+ /root/anaconda3/bin/python ad/ad_monitor_util.py --level error --msg "$msg" --start "$start_time" --elapsed "$elapsed"
|
|
|
+ exit 1
|
|
|
+fi
|
|
|
+echo "$LOG_PREFIX -- 原始样本生产 -- Spark原始样本生产任务执行成功: 耗时 $step_elapsed"
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+# 3 特征分桶
|
|
|
+step_start_time=$(date "+%Y-%m-%d %H:%M:%S")
|
|
|
+/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
|
|
|
+--class com.aliyun.odps.spark.zhp.makedata_ad.makedata_ad_33_bucketData_20240622 \
|
|
|
+--master yarn --driver-memory 2G --executor-memory 4G --executor-cores 1 --num-executors 16 \
|
|
|
+./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
|
|
|
+beginStr:${today_early_1} endStr:${today} repartition:100 \
|
|
|
+filterNames:adid_,targeting_conversion_ \
|
|
|
+readPath:${originDataSavePath} \
|
|
|
+savePath:${bucketFeatureSavePath}
|
|
|
+
|
|
|
+step_end_time=$(date "+%Y-%m-%d %H:%M:%S")
|
|
|
+step_elapsed=$(($(date +%s -d "$step_end_time") - $(date +%s -d "$step_start_time")))
|
|
|
+if [ $? -ne 0 ]; then
|
|
|
+ msg="Spark特征分桶处理任务执行失败"
|
|
|
+ echo "$LOG_PREFIX -- 特征分桶处理任务 -- $msg: 耗时 $step_elapsed"
|
|
|
+ elapsed=$(($(date +%s -d "$step_end_time") - $(date +%s -d "$start_time")))
|
|
|
+ /root/anaconda3/bin/python ad/ad_monitor_util.py ${msg}
|
|
|
+ exit 1
|
|
|
+fi
|
|
|
+echo "$LOG_PREFIX -- 特征分桶处理任务 -- spark特征分桶处理执行成功: 耗时 $step_elapsed"
|
|
|
|
|
|
|
|
|
|
|
@@ -213,47 +213,47 @@ echo -e "$LOG_PREFIX -- 模型文件格式转换 -- 转换后的路径为 [$chan
|
|
|
|
|
|
|
|
|
|
|
|
-## 7 模型文件上传OSS
|
|
|
-#step_start_time=$(date "+%Y-%m-%d %H:%M:%S")
|
|
|
-#online_model_path=${OSS_PATH}/${model_name}.txt
|
|
|
-#$HADOOP fs -test -e ${online_model_path}
|
|
|
-#if [ $? -eq 0 ]; then
|
|
|
-# echo "数据存在, 先删除。"
|
|
|
-# $HADOOP fs -rm -r -skipTrash ${online_model_path}
|
|
|
-#else
|
|
|
-# echo "数据不存在"
|
|
|
-#fi
|
|
|
-#$HADOOP fs -put ${MODEL_PATH}/${model_name}_${today_early_1}_change.txt ${online_model_path}
|
|
|
-#
|
|
|
-#step_end_time=$(date "+%Y-%m-%d %H:%M:%S")
|
|
|
-#step_elapsed=$(($(date +%s -d "$step_end_time") - $(date +%s -d "$step_start_time")))
|
|
|
-#if [ $? -ne 0 ]; then
|
|
|
-# msg="广告模型文件至OSS失败, OSS模型文件路径: $online_model_path"
|
|
|
-# echo -e "$LOG_PREFIX -- 模型文件推送至OSS -- $msg: 耗时 $step_elapsed"
|
|
|
-# elapsed=$(($(date +%s -d "$step_end_time") - $(date +%s -d "$start_time")))
|
|
|
-# /root/anaconda3/bin/python ad/ad_monitor_util.py --level error --msg "$msg" --start "$start_time" --elapsed "$elapsed"
|
|
|
-# exit 1
|
|
|
-#fi
|
|
|
-#echo -e "$LOG_PREFIX -- 模型文件推送至OSS -- 广告模型文件至OSS成功, OSS模型文件路径 $online_model_path: 耗时 $step_elapsed"
|
|
|
-#
|
|
|
-#
|
|
|
-#
|
|
|
-#
|
|
|
-## 8 本地保存最新的线上使用的模型,用于下一次的AUC验证
|
|
|
-#step_start_time=$(date "+%Y-%m-%d %H:%M:%S")
|
|
|
-#cp -f ${LAST_MODEL_HOME}/model_online.txt ${LAST_MODEL_HOME}/model_online_$(date +\%Y\%m\%d).txt
|
|
|
-#cp -f ${MODEL_PATH}/${model_name}_${today_early_1}.txt ${LAST_MODEL_HOME}/model_online.txt
|
|
|
-#
|
|
|
-#step_end_time=$(date "+%Y-%m-%d %H:%M:%S")
|
|
|
-#step_elapsed=$(($(date +%s -d "$step_end_time") - $(date +%s -d "$step_start_time")))
|
|
|
-#if [ $? -ne 0 ]; then
|
|
|
-# msg="模型备份失败"
|
|
|
-# echo -e "$LOG_PREFIX -- 模型备份 -- $msg: 耗时 $step_elapsed"
|
|
|
-# elapsed=$(($(date +%s -d "$step_end_time") - $(date +%s -d "$start_time")))
|
|
|
-# /root/anaconda3/bin/python ad/ad_monitor_util.py --level error --msg "$msg" --start "$start_time" --elapsed "$elapsed"
|
|
|
-# exit 1
|
|
|
-#fi
|
|
|
-#echo -e "$LOG_PREFIX -- 模型备份 -- 模型备份完成: 耗时 $step_elapsed"
|
|
|
+# 7 模型文件上传OSS
|
|
|
+step_start_time=$(date "+%Y-%m-%d %H:%M:%S")
|
|
|
+online_model_path=${OSS_PATH}/${model_name}.txt
|
|
|
+$HADOOP fs -test -e ${online_model_path}
|
|
|
+if [ $? -eq 0 ]; then
|
|
|
+ echo "数据存在, 先删除。"
|
|
|
+ $HADOOP fs -rm -r -skipTrash ${online_model_path}
|
|
|
+else
|
|
|
+ echo "数据不存在"
|
|
|
+fi
|
|
|
+$HADOOP fs -put ${MODEL_PATH}/${model_name}_${today_early_1}_change.txt ${online_model_path}
|
|
|
+
|
|
|
+step_end_time=$(date "+%Y-%m-%d %H:%M:%S")
|
|
|
+step_elapsed=$(($(date +%s -d "$step_end_time") - $(date +%s -d "$step_start_time")))
|
|
|
+if [ $? -ne 0 ]; then
|
|
|
+ msg="广告模型文件至OSS失败, OSS模型文件路径: $online_model_path"
|
|
|
+ echo -e "$LOG_PREFIX -- 模型文件推送至OSS -- $msg: 耗时 $step_elapsed"
|
|
|
+ elapsed=$(($(date +%s -d "$step_end_time") - $(date +%s -d "$start_time")))
|
|
|
+ /root/anaconda3/bin/python ad/ad_monitor_util.py --level error --msg "$msg" --start "$start_time" --elapsed "$elapsed"
|
|
|
+ exit 1
|
|
|
+fi
|
|
|
+echo -e "$LOG_PREFIX -- 模型文件推送至OSS -- 广告模型文件至OSS成功, OSS模型文件路径 $online_model_path: 耗时 $step_elapsed"
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+# 8 本地保存最新的线上使用的模型,用于下一次的AUC验证
|
|
|
+step_start_time=$(date "+%Y-%m-%d %H:%M:%S")
|
|
|
+cp -f ${LAST_MODEL_HOME}/model_online.txt ${LAST_MODEL_HOME}/model_online_$(date +\%Y\%m\%d).txt
|
|
|
+cp -f ${MODEL_PATH}/${model_name}_${today_early_1}.txt ${LAST_MODEL_HOME}/model_online.txt
|
|
|
+
|
|
|
+step_end_time=$(date "+%Y-%m-%d %H:%M:%S")
|
|
|
+step_elapsed=$(($(date +%s -d "$step_end_time") - $(date +%s -d "$step_start_time")))
|
|
|
+if [ $? -ne 0 ]; then
|
|
|
+ msg="模型备份失败"
|
|
|
+ echo -e "$LOG_PREFIX -- 模型备份 -- $msg: 耗时 $step_elapsed"
|
|
|
+ elapsed=$(($(date +%s -d "$step_end_time") - $(date +%s -d "$start_time")))
|
|
|
+ /root/anaconda3/bin/python ad/ad_monitor_util.py --level error --msg "$msg" --start "$start_time" --elapsed "$elapsed"
|
|
|
+ exit 1
|
|
|
+fi
|
|
|
+echo -e "$LOG_PREFIX -- 模型备份 -- 模型备份完成: 耗时 $step_elapsed"
|
|
|
|
|
|
|
|
|
|