|  | @@ -51,51 +51,51 @@ echo "$LOG_PREFIX -- 大数据数据生产校验 -- 大数据数据生产校验
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -# 2 原始特征生成
 | 
	
		
			
				|  |  | -step_start_time=$(date "+%Y-%m-%d %H:%M:%S")
 | 
	
		
			
				|  |  | -/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
 | 
	
		
			
				|  |  | ---class com.aliyun.odps.spark.zhp.makedata_ad.makedata_ad_31_originData_20240620 \
 | 
	
		
			
				|  |  | ---master yarn --driver-memory 1G --executor-memory 2G --executor-cores 1 --num-executors 16 \
 | 
	
		
			
				|  |  | -./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
 | 
	
		
			
				|  |  | -tablePart:64 repartition:16 \
 | 
	
		
			
				|  |  | -beginStr:${today_early_1}00 endStr:${today}10 \
 | 
	
		
			
				|  |  | -savePath:${originDataSavePath} \
 | 
	
		
			
				|  |  | -table:alg_recsys_ad_sample_all filterHours:00,01,02,03,04,05,06,07 \
 | 
	
		
			
				|  |  | -idDefaultValue:0.01
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -step_elapsed=$(($(date +%s -d "$step_start_time") - $(date +%s -d "+%Y-%m-%d %H:%M:%S")))
 | 
	
		
			
				|  |  | -if [ $? -ne 0 ]; then
 | 
	
		
			
				|  |  | -   msg="Spark原始样本生产任务执行失败"
 | 
	
		
			
				|  |  | -   echo "$LOG_PREFIX -- 原始样本生产 -- $msg: 耗时 $step_elapsed"
 | 
	
		
			
				|  |  | -   elapsed=$(($(date +%s -d "$start_time") - $(date +%s -d "+%Y-%m-%d %H:%M:%S")))
 | 
	
		
			
				|  |  | -   /root/anaconda3/bin/python ad/ad_monitor_util.py --level error --msg "$msg" --start "$start_time" --elapsed "$elapsed"
 | 
	
		
			
				|  |  | -   exit 1
 | 
	
		
			
				|  |  | -fi
 | 
	
		
			
				|  |  | -echo "$LOG_PREFIX -- 原始样本生产 -- Spark原始样本生产任务执行成功: 耗时 $step_elapsed"
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -# 3 特征分桶
 | 
	
		
			
				|  |  | -step_start_time=$(date "+%Y-%m-%d %H:%M:%S")
 | 
	
		
			
				|  |  | -/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
 | 
	
		
			
				|  |  | ---class com.aliyun.odps.spark.zhp.makedata_ad.makedata_ad_33_bucketData_20240622 \
 | 
	
		
			
				|  |  | ---master yarn --driver-memory 2G --executor-memory 4G --executor-cores 1 --num-executors 16 \
 | 
	
		
			
				|  |  | -./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
 | 
	
		
			
				|  |  | -beginStr:${today_early_1} endStr:${today} repartition:100 \
 | 
	
		
			
				|  |  | -filterNames:adid_,targeting_conversion_ \
 | 
	
		
			
				|  |  | -readPath:${originDataSavePath} \
 | 
	
		
			
				|  |  | -savePath:${bucketFeatureSavePath}
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -step_elapsed=$(($(date +%s -d "$step_start_time") - $(date +%s -d "+%Y-%m-%d %H:%M:%S")))
 | 
	
		
			
				|  |  | -if [ $? -ne 0 ]; then
 | 
	
		
			
				|  |  | -   msg="Spark特征分桶处理任务执行失败"
 | 
	
		
			
				|  |  | -   echo "$LOG_PREFIX -- 特征分桶处理任务 -- $msg: 耗时 $step_elapsed"
 | 
	
		
			
				|  |  | -   elapsed=$(($(date +%s -d "$start_time") - $(date +%s -d "+%Y-%m-%d %H:%M:%S")))
 | 
	
		
			
				|  |  | -   /root/anaconda3/bin/python ad/ad_monitor_util.py ${msg}
 | 
	
		
			
				|  |  | -   exit 1
 | 
	
		
			
				|  |  | -fi
 | 
	
		
			
				|  |  | -echo "$LOG_PREFIX -- 特征分桶处理任务 -- spark特征分桶处理执行成功: 耗时 $step_elapsed"
 | 
	
		
			
				|  |  | +## 2 原始特征生成
 | 
	
		
			
				|  |  | +#step_start_time=$(date "+%Y-%m-%d %H:%M:%S")
 | 
	
		
			
				|  |  | +#/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
 | 
	
		
			
				|  |  | +#--class com.aliyun.odps.spark.zhp.makedata_ad.makedata_ad_31_originData_20240620 \
 | 
	
		
			
				|  |  | +#--master yarn --driver-memory 1G --executor-memory 2G --executor-cores 1 --num-executors 16 \
 | 
	
		
			
				|  |  | +#./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
 | 
	
		
			
				|  |  | +#tablePart:64 repartition:16 \
 | 
	
		
			
				|  |  | +#beginStr:${today_early_1}00 endStr:${today}10 \
 | 
	
		
			
				|  |  | +#savePath:${originDataSavePath} \
 | 
	
		
			
				|  |  | +#table:alg_recsys_ad_sample_all filterHours:00,01,02,03,04,05,06,07 \
 | 
	
		
			
				|  |  | +#idDefaultValue:0.01
 | 
	
		
			
				|  |  | +#
 | 
	
		
			
				|  |  | +#step_elapsed=$(($(date +%s -d "$step_start_time") - $(date +%s -d "+%Y-%m-%d %H:%M:%S")))
 | 
	
		
			
				|  |  | +#if [ $? -ne 0 ]; then
 | 
	
		
			
				|  |  | +#   msg="Spark原始样本生产任务执行失败"
 | 
	
		
			
				|  |  | +#   echo "$LOG_PREFIX -- 原始样本生产 -- $msg: 耗时 $step_elapsed"
 | 
	
		
			
				|  |  | +#   elapsed=$(($(date +%s -d "$start_time") - $(date +%s -d "+%Y-%m-%d %H:%M:%S")))
 | 
	
		
			
				|  |  | +#   /root/anaconda3/bin/python ad/ad_monitor_util.py --level error --msg "$msg" --start "$start_time" --elapsed "$elapsed"
 | 
	
		
			
				|  |  | +#   exit 1
 | 
	
		
			
				|  |  | +#fi
 | 
	
		
			
				|  |  | +#echo "$LOG_PREFIX -- 原始样本生产 -- Spark原始样本生产任务执行成功: 耗时 $step_elapsed"
 | 
	
		
			
				|  |  | +#
 | 
	
		
			
				|  |  | +#
 | 
	
		
			
				|  |  | +#
 | 
	
		
			
				|  |  | +#
 | 
	
		
			
				|  |  | +## 3 特征分桶
 | 
	
		
			
				|  |  | +#step_start_time=$(date "+%Y-%m-%d %H:%M:%S")
 | 
	
		
			
				|  |  | +#/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
 | 
	
		
			
				|  |  | +#--class com.aliyun.odps.spark.zhp.makedata_ad.makedata_ad_33_bucketData_20240622 \
 | 
	
		
			
				|  |  | +#--master yarn --driver-memory 2G --executor-memory 4G --executor-cores 1 --num-executors 16 \
 | 
	
		
			
				|  |  | +#./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
 | 
	
		
			
				|  |  | +#beginStr:${today_early_1} endStr:${today} repartition:100 \
 | 
	
		
			
				|  |  | +#filterNames:adid_,targeting_conversion_ \
 | 
	
		
			
				|  |  | +#readPath:${originDataSavePath} \
 | 
	
		
			
				|  |  | +#savePath:${bucketFeatureSavePath}
 | 
	
		
			
				|  |  | +#
 | 
	
		
			
				|  |  | +#step_elapsed=$(($(date +%s -d "$step_start_time") - $(date +%s -d "+%Y-%m-%d %H:%M:%S")))
 | 
	
		
			
				|  |  | +#if [ $? -ne 0 ]; then
 | 
	
		
			
				|  |  | +#   msg="Spark特征分桶处理任务执行失败"
 | 
	
		
			
				|  |  | +#   echo "$LOG_PREFIX -- 特征分桶处理任务 -- $msg: 耗时 $step_elapsed"
 | 
	
		
			
				|  |  | +#   elapsed=$(($(date +%s -d "$start_time") - $(date +%s -d "+%Y-%m-%d %H:%M:%S")))
 | 
	
		
			
				|  |  | +#   /root/anaconda3/bin/python ad/ad_monitor_util.py ${msg}
 | 
	
		
			
				|  |  | +#   exit 1
 | 
	
		
			
				|  |  | +#fi
 | 
	
		
			
				|  |  | +#echo "$LOG_PREFIX -- 特征分桶处理任务 -- spark特征分桶处理执行成功: 耗时 $step_elapsed"
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
	
		
			
				|  | @@ -244,6 +244,7 @@ echo -e "$LOG_PREFIX -- 模型文件格式转换 -- 转换后的路径为 [$chan
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  msg="广告模型文件更新完成 \n\t \n\t 新模型AUC: $new_auc \n\t 线上模型AUC: $online_auc AUC差值: $auc_diff_abs \n\t 模型上传路径: $online_model_path \n\t"
 | 
	
		
			
				|  |  | +echo -e "$LOG_PREFIX -- 模型更新完成 -- $msg: 耗时 $step_elapsed"
 | 
	
		
			
				|  |  |  elapsed=$(($(date +%s -d "$start_time") - $(date +%s -d "+%Y-%m-%d %H:%M:%S")))
 | 
	
		
			
				|  |  |  /root/anaconda3/bin/python ad/ad_monitor_util.py --level error --msg "$msg" --start "$start_time" --elapsed "$elapsed"
 | 
	
		
			
				|  |  |  
 |