11 месяцев назад · 5968a936e1
--- a/ad/02_ad_model_dnn_v11_update.sh
+++ b/ad/02_ad_model_dnn_v11_update.sh
@@ -10,7 +10,7 @@ export SEGMENT_BASE_PATH=/dw/recommend/model/36_model_attachment/score_calibrati
 
				 sh_path=$(cd $(dirname $0); pwd)
			
 
				 source ${sh_path}/00_common.sh
			
 
				 
			
 
				-# source /root/anaconda3/bin/activate py37
			
 
				+source /root/anaconda3/bin/activate py37
			
 
				 
			
 
				 
			
 
				 # 全局常量
			
@@ -207,17 +207,11 @@ check_ad_hive() {
 
				   echo "${LOG_PREFIX} -- 大数据数据生产校验 -- 大数据数据生产校验通过: 耗时 ${elapsed}"
			
 
				 }
			
 
				 
			
 
				-origin_data() {
			
 
				+bucket_feature_from_origin_to_hive() {
			
 
				   (
			
 
				+    export outputTable=ad_easyrec_train_data_v3_sampled
			
 
				     source ${sh_path}/25_xgb_make_data_origin_bucket.sh
			
 
				-    make_origin_data
			
 
				-  )
			
 
				-}
			
 
				-
			
 
				-bucket_feature() {
			
 
				-  (
			
 
				-    source ${sh_path}/25_xgb_make_data_origin_bucket.sh
			
 
				-    make_bucket_feature
			
 
				+    make_bucket_feature_from_origin_to_hive
			
 
				   )
			
 
				 }
			
 
				 
			
@@ -409,34 +403,9 @@ model_upload_oss() {
 
				   rm -rf ${OSS_CALIBRATION_FILE_NAME}.txt
			
 
				 }
			
 
				 
			
 
				-get_feature_score() {
			
 
				-  # 线上模型评估最新的数据
			
 
				-  local step_start_time=$(date +%s)
			
 
				-  /opt/apps/SPARK3/spark-3.3.1-hadoop3.2-1.0.5/bin/spark-class org.apache.spark.deploy.SparkSubmit \
			
 
				-  --class com.tzld.piaoquan.recommend.model.pred_01_xgb_ad_hdfsfile_20240813 \
			
 
				-  --master yarn --driver-memory 1G --executor-memory 3G --executor-cores 1 --num-executors 3 \
			
 
				-  --conf spark.yarn.executor.memoryoverhead=1024 \
			
 
				-  --conf spark.shuffle.service.enabled=true \
			
 
				-  --conf spark.shuffle.service.port=7337 \
			
 
				-  --conf spark.shuffle.consolidateFiles=true \
			
 
				-  --conf spark.shuffle.manager=sort \
			
 
				-  --conf spark.storage.memoryFraction=0.4 \
			
 
				-  --conf spark.shuffle.memoryFraction=0.5 \
			
 
				-  --conf spark.default.parallelism=200 \
			
 
				-  /root/fengzhoutian/recommend-model/recommend-model-produce/target/recommend-model-produce-jar-with-dependencies.jar \
			
 
				-  featureFile:20240703_ad_feature_name.txt \
			
 
				-  saveFeatureScoresOnly:true \
			
 
				-  savePath:"/dw/recommend/model/37_model_feature_scores/${model_name}" \
			
 
				-  modelPath:"/dw/recommend/model/35_ad_model/${model_name}"
			
 
				-}
			
 
				-
			
 
				-make_data() {
			
 
				-  origin_data
			
 
				-  bucket_feature
			
 
				-}
			
 
				-
			
 
				 make_train_node_conf() {
			
 
				-  TABLE_PART_PREFIX="odps://loghubods/tables/ad_easyrec_train_data_v3_sampled/dt="
			
 
				+  train_data_path=''
			
 
				+  TABLE_PART_PREFIX="odps://loghubods/tables/ad_easyrec_train_data_v3_sampled/dt"
			
 
				   declare -a date_keys=()
			
 
				   local count=1
			
 
				   local current_data="$(date -d "${today_early_1} -1 day" +%Y%m%d)"
			
@@ -472,17 +441,15 @@ make_train_node_conf() {
 
				 main() {
			
 
				   init
			
 
				 
			
 
				+  check_ad_hive
			
 
				+  bucket_feature_from_origin_to_hive
			
 
				   make_train_node_conf
			
 
				   exit
			
 
				 
			
 
				-  check_ad_hive
			
 
				-  make_data
			
 
				-
			
 
				   if [ "${current_day_of_week}" -eq 1 ] || [ "${current_day_of_week}" -eq 3 ] || [ "${current_day_of_week}" -eq 5 ]; then
			
 
				     echo "当前是周一，周三或周五，开始训练并更新模型"
			
 
				     xgb_train
			
 
				     model_predict
			
 
				-    # get_feature_score
			
 
				     compare_predictions
			
 
				     draw_q_distribution
			
 
				     model_upload_oss