Преглед на файлове

feat:添加评估结果分析脚本

zhaohaipeng преди 6 месеца
родител
ревизия
272d4f2070
променени са 1 файла, в които са добавени 77 реда и са изтрити 0 реда
  1. 77 0
      ad/25_xgb_make_data_origin_bucket.sh

+ 77 - 0
ad/25_xgb_make_data_origin_bucket.sh

@@ -0,0 +1,77 @@
+#!/bin/sh
+set -x
+
+export PATH=$SPARK_HOME/bin:$PATH
+export HADOOP_CONF_DIR=/etc/taihao-apps/hadoop-conf
+export JAVA_HOME=/usr/lib/jvm/java-1.8.0
+export SPARK_HOME=/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8
+
+
+sh_path=$(dirname $0)
+source ${sh_path}/00_common.sh
+
+source /root/anaconda3/bin/activate py37
+
+
+
+make_origin_data() {
+  
+  local step_start_time=$(date +%s)
+
+  /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
+  --class com.aliyun.odps.spark.examples.makedata_ad.v20240718.makedata_ad_31_originData_20240718 \
+  --master yarn --driver-memory 1G --executor-memory 2G --executor-cores 1 --num-executors 16 \
+  ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
+  tablePart:64 repartition:32 \
+  beginStr:${today_early_1}00 endStr:${today_early_1}12 \
+  savePath:${TRAIN_PATH} \
+  table:${TABLE} \
+  filterHours:00,01,02,03,04,05,06,07 \
+  idDefaultValue:0.1 &
+
+  /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
+  --class com.aliyun.odps.spark.examples.makedata_ad.v20240718.makedata_ad_31_originData_20240718 \
+  --master yarn --driver-memory 1G --executor-memory 2G --executor-cores 1 --num-executors 16 \
+  ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
+  tablePart:64 repartition:32 \
+  beginStr:${today_early_1}13 endStr:${today_early_1}18 \
+  savePath:${TRAIN_PATH} \
+  table:${TABLE} \
+  filterHours:00,01,02,03,04,05,06,07 \
+  idDefaultValue:0.1 &
+
+  /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
+  --class com.aliyun.odps.spark.examples.makedata_ad.v20240718.makedata_ad_31_originData_20240718 \
+  --master yarn --driver-memory 1G --executor-memory 2G --executor-cores 1 --num-executors 16 \
+  ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
+  tablePart:64 repartition:32 \
+  beginStr:${today_early_1}19 endStr:${today_early_1}23 \
+  savePath:${TRAIN_PATH} \
+  table:${TABLE} \
+  filterHours:00,01,02,03,04,05,06,07 \
+  idDefaultValue:0.1 &
+
+  wait
+
+  local return_code=$?
+  check_run_status $return_code $step_start_time "spark原始样本生产任务"
+}
+
+
+
+make_bucket_feature() {
+
+  local step_start_time=$(date +%s)
+  
+  /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
+  --class com.aliyun.odps.spark.examples.makedata_ad.v20240718.makedata_ad_33_bucketData_20240718 \
+  --master yarn --driver-memory 2G --executor-memory 4G --executor-cores 1 --num-executors 16 \
+  ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
+  beginStr:${today_early_1} endStr:${today_early_1} repartition:100 \
+  filterNames:_4h_,_5h_,adid_,targeting_conversion_ \
+  readPath:${TRAIN_PATH} \
+  savePath:${BUCKET_FEATURE_PATH}
+
+  local return_code=$?
+  check_run_status $return_code $step_start_time "spark特征分桶任务"
+}