Ver Fonte

新table

jch há 12 horas atrás
pai
commit
26992bfb88
1 ficheiros alterados com 32 adições e 15 exclusões
  1. 32 15
      ad/25_xgb_make_data_origin_bucket.sh

+ 32 - 15
ad/25_xgb_make_data_origin_bucket.sh

@@ -111,21 +111,38 @@ make_bucket_feature_from_origin_to_hive() {
   neg_sample_rate=${NEG_SAMPLE_RATE:-0.04}
   mask_feature_rate=0.0005
   
-  /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
-  --class com.aliyun.odps.spark.examples.makedata_ad.v20240718.makedata_ad_33_bucketDataFromOriginToHive_20250228 \
-  --master yarn --driver-memory 2G --executor-memory 3G --executor-cores 1 --num-executors 30 \
-  --conf spark.dynamicAllocation.enabled=true \
-  --conf spark.shuffle.service.enabled=true \
-  --conf spark.dynamicAllocation.maxExecutors=100 \
-  ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
-  beginStr:${today_early_1} endStr:${today_early_1} \
-  filterHours:${FILTER_HOURS:-00,01,02,03,04,05,06,07} \
-  filterAdverIds:${FILTER_ADVER_IDS} \
-  filterNames:_4h_,_5h_,adid_,targeting_conversion_ \
-  outputTable:${outputTable} \
-  inputTable:alg_recsys_ad_sample_all \
-  negSampleRate:${neg_sample_rate}
-  local task1=$!
+#  /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
+#  --class com.aliyun.odps.spark.examples.makedata_ad.v20240718.makedata_ad_33_bucketDataFromOriginToHive_20250228 \
+#  --master yarn --driver-memory 2G --executor-memory 3G --executor-cores 1 --num-executors 30 \
+#  --conf spark.dynamicAllocation.enabled=true \
+#  --conf spark.shuffle.service.enabled=true \
+#  --conf spark.dynamicAllocation.maxExecutors=100 \
+#  ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
+#  beginStr:${today_early_1} endStr:${today_early_1} \
+#  filterHours:${FILTER_HOURS:-00,01,02,03,04,05,06,07} \
+#  filterAdverIds:${FILTER_ADVER_IDS} \
+#  filterNames:_4h_,_5h_,adid_,targeting_conversion_ \
+#  outputTable:${outputTable} \
+#  inputTable:alg_recsys_ad_sample_all \
+#  negSampleRate:${neg_sample_rate}
+#  local task1=$!
+
+   /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
+   --class com.aliyun.odps.spark.examples.makedata_ad.v20240718.makedata_ad_33_bucketDataFromOriginToHive_20250522 \
+   --master yarn --driver-memory 2G --executor-memory 3G --executor-cores 1 --num-executors 30 \
+   --conf spark.dynamicAllocation.enabled=true \
+   --conf spark.shuffle.service.enabled=true \
+   --conf spark.dynamicAllocation.maxExecutors=100 \
+   ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
+   beginStr:${today_early_1} endStr:${today_early_1} \
+   filterHours:${FILTER_HOURS:-00,01,02,03,04,05} \
+   filterAdverIds:${FILTER_ADVER_IDS} \
+   filterNames:_4h_,_5h_,adid_,targeting_conversion_ \
+   outputTable:ad_easyrec_train_realtime_data_v3_sampled_v2 \
+   inputTable:alg_recsys_ad_sample_all \
+   negSampleRate:${neg_sample_rate} \
+   maskFeatureRate:${mask_feature_rate}
+   local task1=$!
 
   /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
   --class com.aliyun.odps.spark.examples.makedata_ad.v20240718.makedata_ad_33_bucketDataFromOriginToHive_20250522 \