hace 1 año · 6af6fd6d9a
--- a/qiaojialiang/handle_rov.sh
+++ b/qiaojialiang/handle_rov.sh
@@ -6,11 +6,11 @@ set -ex
 
				 # 原始数据table name
			
 
				 #table='alg_recsys_sample_all'
			
 
				 table='alg_recsys_sample_all_test'
			
 
				-# 处理分区配置
			
 
				-beginStr="$(date -d '1 days ago' +%Y%m%d)"
			
 
				-endStr="$(date -d '1 days ago' +%Y%m%d)"
			
 
				-beginHhStr=08
			
 
				-endHhStr=08
			
 
				+# 处理分区配置 推荐数据间隔一天生产，所以5日0点使用3日0-23点数据生产new模型数据
			
 
				+begin_early_2_Str="$(date -d '2 days ago' +%Y%m%d)"
			
 
				+end_early_2_Str="$(date -d '2 days ago' +%Y%m%d)"
			
 
				+beginHhStr=00
			
 
				+endHhStr=23
			
 
				 # 各节点产出hdfs文件绝对路径
			
 
				 originDataPath=/dw/recommend/model/13_sample_data/
			
 
				 valueDataPath=/dw/recommend/model/14_feature_data/
			
@@ -19,9 +19,10 @@ bucketDataPath=/dw/recommend/model/16_train_data/
 
				 # 0 判断上游表是否生产完成，最长等待到12点
			
 
				 # shellcheck disable=SC2039
			
 
				 source /root/anaconda3/bin/activate py37
			
 
				-echo "----------step1------------开始校验是否生产完数据，分区信息：beginStr:${beginStr}${beginHhStr},endStr:${endStr}${endHhStr}"
			
 
				+# shellcheck disable=SC2154
			
 
				+echo "----------step1------------开始校验是否生产完数据，分区信息：begin_early_2_Str:${begin_early_2_Str}${beginHhStr},end_early_2_Str:${end_early_2_Str}${endHhStr}"
			
 
				 while true; do
			
 
				-  python_return_code=$(python /root/joe/recommend-emr-dataprocess/qiaojialiang/checkHiveDataUtil.py --table ${table} --beginStr ${beginStr}${beginHhStr} --endStr ${endStr}${endHhStr})
			
 
				+  python_return_code=$(python /root/joe/recommend-emr-dataprocess/qiaojialiang/checkHiveDataUtil.py --table ${table} --begin_early_2_Str ${begin_early_2_Str}${beginHhStr} --end_early_2_Str ${end_early_2_Str}${endHhStr})
			
 
				   echo "python 返回值：${python_return_code}"
			
 
				   if [ $python_return_code -eq 0 ]; then
			
 
				     echo "Python程序返回0，校验存在数据，退出循环。"
			
@@ -45,7 +46,7 @@ echo "----------step2------------开始根据${table}生产原始数据"
 
				 --master yarn --driver-memory 1G --executor-memory 2G --executor-cores 1 --num-executors 16 \
			
 
				 ../target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
			
 
				 tablePart:64 repartition:32 \
			
 
				-beginStr:${beginStr}${beginHhStr} endStr:${endStr}${endHhStr} \
			
 
				+beginStr:${begin_early_2_Str}${beginHhStr} endStr:${end_early_2_Str}${endHhStr} \
			
 
				 savePath:${originDataPath} \
			
 
				 table:${table}
			
 
				 if [ $? -ne 0 ]; then
			
@@ -64,7 +65,7 @@ echo "----------step3------------开始特征值拼接"
 
				 ../target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
			
 
				 readPath:${originDataPath} \
			
 
				 savePath:${valueDataPath} \
			
 
				-beginStr:${beginStr} endStr:${endStr} repartition:1000
			
 
				+beginStr:${begin_early_2_Str} endStr:${end_early_2_Str} repartition:1000
			
 
				 if [ $? -ne 0 ]; then
			
 
				    echo "Spark特征值拼接处理任务执行失败"
			
 
				    exit 1
			
@@ -80,7 +81,7 @@ echo "----------step4------------根据特征分桶生产重打分特征数据"
 
				 ../target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
			
 
				 readPath:${valueDataPath} \
			
 
				 savePath:${bucketDataPath} \
			
 
				-beginStr:${beginStr} endStr:${endStr} repartition:1000
			
 
				+beginStr:${begin_early_2_Str} endStr:${end_early_2_Str} repartition:1000
			
 
				 if [ $? -ne 0 ]; then
			
 
				    echo "Spark特征分桶处理任务执行失败"
			
 
				    exit 1
			
--- a/qiaojialiang/test/demo01.py
+++ b/qiaojialiang/test/demo01.py
@@ -12,8 +12,8 @@ n1 = 1
 
				 n2 = 5
			
 
				 
			
 
				 # 定义Shell脚本的路径
			
 
				-script1_path = "./script1.sh"
			
 
				-script2_path = "./script2.sh"
			
 
				+script1_path = "script1.sh"
			
 
				+script2_path = "script2.sh"
			
 
				 
			
 
				 # 打开日志文件准备写入
			
 
				 with open(log_file, 'w') as f:
			
--- a/qiaojialiang/test/script1.sh
+++ b/qiaojialiang/test/script1.sh
--- a/qiaojialiang/test/script2.sh
+++ b/qiaojialiang/test/script2.sh