浏览代码

Update 25_xgb_make_data_origin_bucket: add outputTable2

fengzhoutian 5 天之前
父节点
当前提交
a3ab079190
共有 2 个文件被更改,包括 5 次插入0 次删除
  1. 1 0
      ad/02_ad_model_dnn_v11_update.sh
  2. 4 0
      ad/25_xgb_make_data_origin_bucket.sh

+ 1 - 0
ad/02_ad_model_dnn_v11_update.sh

@@ -210,6 +210,7 @@ check_ad_hive() {
 bucket_feature_from_origin_to_hive() {
   (
     export outputTable=ad_easyrec_train_data_v3_sampled
+    export outputTable2=ad_easyrec_eval_data_v3_sampled
     source ${sh_path}/25_xgb_make_data_origin_bucket.sh
     make_bucket_feature_from_origin_to_hive
   )

+ 4 - 0
ad/25_xgb_make_data_origin_bucket.sh

@@ -113,12 +113,16 @@ make_bucket_feature_from_origin_to_hive() {
   /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
   --class com.aliyun.odps.spark.examples.makedata_ad.v20240718.makedata_ad_33_bucketDataFromOriginToHive_20250228 \
   --master yarn --driver-memory 2G --executor-memory 3G --executor-cores 1 --num-executors 30 \
+  --conf spark.dynamicAllocation.enabled=true \
+  --conf spark.shuffle.service.enabled=true \
+  --conf spark.dynamicAllocation.maxExecutors=100 \
   ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
   beginStr:${today_early_1} endStr:${today_early_1} \
   filterHours:${FILTER_HOURS:-00,01,02,03,04,05,06,07} \
   filterAdverIds:${FILTER_ADVER_IDS} \
   filterNames:_4h_,_5h_,adid_,targeting_conversion_ \
   outputTable:${outputTable} \
+  outputTable2:${outputTable2} \
   inputTable:alg_recsys_ad_sample_all \
   negSampleRate:${neg_sample_rate}