1 年之前 · 9d169bb63f
--- a/zhangbo/01_train.sh
+++ b/zhangbo/01_train.sh
--- a/zhangbo/02_train_go.sh
+++ b/zhangbo/02_train_go.sh
--- a/zhangbo/03_predict.sh
+++ b/zhangbo/03_predict.sh
--- a/zhangbo/04_upload.sh
+++ b/zhangbo/04_upload.sh
--- a/zhangbo/05_update_everyday_2model.sh
+++ b/zhangbo/05_update_everyday_2model.sh
--- a/zhangbo/05_update_everyday_str.sh
+++ b/zhangbo/05_update_everyday_str.sh
--- a/zhangbo/06_update_everyday_feature.sh
+++ b/zhangbo/06_update_everyday_feature.sh
--- a/zhangbo/07_ad_model_update_everyday.sh
+++ b/zhangbo/07_ad_model_update_everyday.sh
@@ -0,0 +1,73 @@
 
															+#!/bin/sh
														
 
															+set -ex
														
 
															+
														
 
															+# 0 全局变量/参数
														
 
															+originDataSavePath=/dw/recommend/model/31_ad_sample_data_fix/
														
 
															+bucketFeatureSavePath=/dw/recommend/model/12_ros_data_v3/
														
 
															+model_name=ad_model_lr
														
 
															+today="$(date +%Y%m%d)"
														
 
															+today_early_1="$(date -d '1 days ago' +%Y%m%d)"
														
 
															+beginStr=${today_early_1}08
														
 
															+endStr=${today_early_1}23
														
 
															+
														
 
															+
														
 
															+# 1 判断依赖的数据表是否生产完成
														
 
															+source /root/anaconda3/bin/activate py37
														
 
															+max_hour=15
														
 
															+max_minute=00
														
 
															+while true; do
														
 
															+  python_return_code=$(python utils.py --excute_program check_ad_origin_hive --partition ${endStr})
														
 
															+  if [ $python_return_code -eq 0 ]; then
														
 
															+    echo "Python程序返回0，退出循环。"
														
 
															+    break
														
 
															+  fi
														
 
															+  echo "Python程序返回非0值，等待五分钟后再次调用。"
														
 
															+  sleep 300
														
 
															+  current_hour=$(date +%H)
														
 
															+  current_minute=$(date +%M)
														
 
															+  if (( current_hour > max_hour || (current_hour == max_hour && current_minute >= max_minute) )); then
														
 
															+    echo "最长等待时间已到，失败:${current_hour}-${current_minute}"
														
 
															+    exit 1
														
 
															+  fi
														
 
															+done
														
 
															+
														
 
															+
														
 
															+
														
 
															+# 2 原始特征生成
														
 
															+# /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
														
 
															+# --class com.aliyun.odps.spark.examples.makedata_ad.makedata_ad_31_originData_20240620 \
														
 
															+# --master yarn --driver-memory 1G --executor-memory 2G --executor-cores 1 --num-executors 16 \
														
 
															+# ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
														
 
															+# tablePart:64 repartition:32 \
														
 
															+# beginStr:${beginStr} endStr:${endStr} \
														
 
															+# savePath:/dw/recommend/model/31_ad_sample_data_auto/ \
														
 
															+# table:alg_recsys_ad_sample_all_new
														
 
															+# if [ $? -eq 1 ]; then
														
 
															+#    echo "Spark原始样本生产任务执行失败"
														
 
															+#    exit 1
														
 
															+# else
														
 
															+#     echo "spark原始样本生产执行成功"
														
 
															+# fi
														
 
															+
														
 
															+
														
 
															+# 3 特征分桶
														
 
															+# /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
														
 
															+# --class com.aliyun.odps.spark.examples.makedata_ad.makedata_ad_33_bucketData_20240622 \
														
 
															+# --master yarn --driver-memory 2G --executor-memory 4G --executor-cores 1 --num-executors 16 \
														
 
															+# ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
														
 
															+# beginStr:${today_early_1} endStr:${today_early_1} repartition:400
														
 
															+# if [ $? -eq 1 ]; then
														
 
															+#     echo "Spark特征分桶任务执行失败"
														
 
															+#    exit 1
														
 
															+# else
														
 
															+#     echo "spark特征分桶执行失败"
														
 
															+# fi
														
 
															+
														
 
															+
														
 
															+# 4 模型训练
														
 
															+
														
 
															+# 5 对比AUC
														
 
															+
														
 
															+# 6 模型格式转换
														
 
															+
														
 
															+# 7 模型文件上传OSS
														
--- a/zhangbo/50_delete_hdfs.sh
+++ b/zhangbo/50_delete_hdfs.sh
--- a/zhangbo/train.sh
+++ b/zhangbo/train.sh
--- a/zhangbo/up.sh
+++ b/zhangbo/up.sh
--- a/zhangbo/up2.sh
+++ b/zhangbo/up2.sh