فهرست منبع

推荐模型自动化更新-特征分桶数据生产

Joe 9 ماه پیش
والد
کامیت
8f7a5eb772
2فایلهای تغییر یافته به همراه5 افزوده شده و 5 حذف شده
  1. 1 1
      qiaojialiang/checkHiveDataUtil.py
  2. 4 4
      qiaojialiang/handle_rov.sh

+ 1 - 1
qiaojialiang/checkHiveDataUtil.py

@@ -30,7 +30,7 @@ def check_origin_hive(args):
             # msg = (
             #     f'推荐模型数据更新 \n【任务名称】:step1校验hive数据源\n【是否成功】:success\n【信息】:table:{table},beginStr:{beginStr},endStr:{endStr}\n【详细日志】:{exist_partition}')
             msg = (
-                f'推荐模型数据更新 \n【任务名称】:step1校验hive数据源\n【是否成功】:error\n【信息】:table:{table},beginStr:{beginStr},endStr:{endStr}分区数据不存在,继续检查')
+                f'推荐模型数据更新 \n【任务名称】:step1校验hive数据源\n【是否成功】:error\n【信息】:table:{table},{time_str}分区数据不存在,继续检查')
             bot.send_message(msg)
             print('1')
             exit(1)

+ 4 - 4
qiaojialiang/handle_rov.sh

@@ -20,7 +20,7 @@ bucketDataPath=/dw/recommend/model/16_train_data/
 # shellcheck disable=SC2039
 source /root/anaconda3/bin/activate py37
 # shellcheck disable=SC2154
-echo "----------step1------------开始校验是否生产完数据,分区信息:beginStr:${begin_early_2_Str}${beginHhStr},endStr:${end_early_2_Str}${endHhStr}"
+echo "$(date +%Y-%m-%d_%H-%M-%S)----------step1------------开始校验是否生产完数据,分区信息:beginStr:${begin_early_2_Str}${beginHhStr},endStr:${end_early_2_Str}${endHhStr}"
 while true; do
   python_return_code=$(python /root/joe/recommend-emr-dataprocess/qiaojialiang/checkHiveDataUtil.py --table ${table} --beginStr ${begin_early_2_Str}${beginHhStr} --endStr ${end_early_2_Str}${endHhStr})
   echo "python 返回值:${python_return_code}"
@@ -40,7 +40,7 @@ while true; do
 done
 
 # 1 生产原始数据
-echo "----------step2------------开始根据${table}生产原始数据"
+echo "$(date +%Y-%m-%d_%H-%M-%S)----------step2------------开始根据${table}生产原始数据"
 /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
 --class com.aliyun.odps.spark.examples.makedata_qiao.makedata_13_originData_20240705 \
 --master yarn --driver-memory 1G --executor-memory 2G --executor-cores 1 --num-executors 16 \
@@ -58,7 +58,7 @@ fi
 
 
 # 2 特征值拼接
-echo "----------step3------------开始特征值拼接"
+echo "$(date +%Y-%m-%d_%H-%M-%S)----------step3------------开始特征值拼接"
 /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
 --class com.aliyun.odps.spark.examples.makedata_qiao.makedata_14_valueData_20240705 \
 --master yarn --driver-memory 1G --executor-memory 3G --executor-cores 1 --num-executors 32 \
@@ -74,7 +74,7 @@ else
 fi
 
 # 3 特征分桶
-echo "----------step4------------根据特征分桶生产重打分特征数据"
+echo "$(date +%Y-%m-%d_%H-%M-%S)----------step4------------根据特征分桶生产重打分特征数据"
 /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
 --class com.aliyun.odps.spark.examples.makedata_qiao.makedata_16_bucketData_20240705 \
 --master yarn --driver-memory 2G --executor-memory 4G --executor-cores 1 --num-executors 16 \