|
@@ -17,7 +17,7 @@ make_origin_data() {
|
|
|
local step_start_time=$(date +%s)
|
|
|
|
|
|
/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
|
|
|
- --class com.aliyun.odps.spark.examples.makedata_ad.v20240718.makedata_ad_31_originData_20240718 \
|
|
|
+ --class com.aliyun.odps.spark.examples.makedata_ad.v20240718.makedata_ad_31_originData_20250110 \
|
|
|
--master yarn --driver-memory 1G --executor-memory 2G --executor-cores 1 --num-executors 16 \
|
|
|
./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
|
|
|
tablePart:64 repartition:32 \
|
|
@@ -29,7 +29,7 @@ make_origin_data() {
|
|
|
local task1=$!
|
|
|
|
|
|
/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
|
|
|
- --class com.aliyun.odps.spark.examples.makedata_ad.v20240718.makedata_ad_31_originData_20240718 \
|
|
|
+ --class com.aliyun.odps.spark.examples.makedata_ad.v20240718.makedata_ad_31_originData_20250110 \
|
|
|
--master yarn --driver-memory 1G --executor-memory 2G --executor-cores 1 --num-executors 16 \
|
|
|
./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
|
|
|
tablePart:64 repartition:32 \
|
|
@@ -41,7 +41,7 @@ make_origin_data() {
|
|
|
local task2=$!
|
|
|
|
|
|
/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
|
|
|
- --class com.aliyun.odps.spark.examples.makedata_ad.v20240718.makedata_ad_31_originData_20240718 \
|
|
|
+ --class com.aliyun.odps.spark.examples.makedata_ad.v20240718.makedata_ad_31_originData_20250110 \
|
|
|
--master yarn --driver-memory 1G --executor-memory 2G --executor-cores 1 --num-executors 16 \
|
|
|
./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
|
|
|
tablePart:64 repartition:32 \
|
|
@@ -74,14 +74,51 @@ make_bucket_feature() {
|
|
|
local step_start_time=$(date +%s)
|
|
|
|
|
|
/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
|
|
|
- --class com.aliyun.odps.spark.examples.makedata_ad.v20240718.makedata_ad_33_bucketData_20240718 \
|
|
|
- --master yarn --driver-memory 2G --executor-memory 4G --executor-cores 1 --num-executors 16 \
|
|
|
+ --class com.aliyun.odps.spark.examples.makedata_ad.v20240718.makedata_ad_33_bucketData_20250110 \
|
|
|
+ --master yarn --driver-memory 2G --executor-memory 3G --executor-cores 1 --num-executors 16 \
|
|
|
./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
|
|
|
- beginStr:${today_early_1} endStr:${today_early_1} repartition:100 \
|
|
|
+ beginStr:${today_early_1} endStr:${today_early_1} repartition:64 \
|
|
|
filterNames:_4h_,_5h_,adid_,targeting_conversion_ \
|
|
|
+ bucketFileName:20250217_ad_bucket_688.txt \
|
|
|
readPath:${TRAIN_PATH} \
|
|
|
savePath:${BUCKET_FEATURE_PATH}
|
|
|
|
|
|
local return_code=$?
|
|
|
check_run_status ${return_code} ${step_start_time} "spark特征分桶任务"
|
|
|
-}
|
|
|
+}
|
|
|
+
|
|
|
+make_bucket_feature_to_hive() {
|
|
|
+
|
|
|
+ local step_start_time=$(date +%s)
|
|
|
+
|
|
|
+ /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
|
|
|
+ --class com.aliyun.odps.spark.examples.makedata_ad.v20240718.makedata_ad_33_bucketDataToHive_20250110 \
|
|
|
+ --master yarn --driver-memory 2G --executor-memory 3G --executor-cores 1 --num-executors 16 \
|
|
|
+ ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
|
|
|
+ beginStr:${today_early_1} endStr:${today_early_1} repartition:64 \
|
|
|
+ filterNames:_4h_,_5h_,adid_,targeting_conversion_ \
|
|
|
+ table:ad_easyrec_eval_data_v2_sampled \
|
|
|
+ partition:"dt=${today_early_1}" \
|
|
|
+ readPath:${TRAIN_PATH} \
|
|
|
+ negSampleRate:0.04
|
|
|
+
|
|
|
+ local return_code=$?
|
|
|
+ check_run_status ${return_code} ${step_start_time} "spark特征分桶任务"
|
|
|
+}
|
|
|
+
|
|
|
+make_bucket_feature_from_origin_to_hive() {
|
|
|
+ local step_start_time=$(date +%s)
|
|
|
+
|
|
|
+ /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
|
|
|
+ --class com.aliyun.odps.spark.examples.makedata_ad.v20240718.makedata_ad_33_bucketDataFromOriginToHive_20250228 \
|
|
|
+ --master yarn --driver-memory 2G --executor-memory 3G --executor-cores 1 --num-executors 30 \
|
|
|
+ ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
|
|
|
+ beginStr:${today_early_1} endStr:${today_early_1} \
|
|
|
+ filterNames:_4h_,_5h_,adid_,targeting_conversion_ \
|
|
|
+ outputTable:${outputTable} \
|
|
|
+ inputTable:alg_recsys_ad_sample_all \
|
|
|
+ negSampleRate:0.04
|
|
|
+
|
|
|
+ local return_code=$?
|
|
|
+ check_run_status ${return_code} ${step_start_time} "spark特征分桶任务"
|
|
|
+}
|