【新 上游样本】 nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \ --class com.aliyun.odps.spark.examples.makedata.makedata_10_originData_v3 \ --master yarn --driver-memory 1G --executor-memory 1G --executor-cores 1 --num-executors 64 \ ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \ tablePart:64 savePath:/dw/recommend/model/10_sample_data_v3/ beginStr:20240227 endStr:20240227 > p10_.log 2>&1 & [ros样本生产] nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \ --class com.aliyun.odps.spark.examples.makedata.makedata_12_rosData_v3 \ --master yarn --driver-memory 1G --executor-memory 1G --executor-cores 1 --num-executors 32 \ ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \ savePath:/dw/recommend/model/12_ros_data_v3/ beginStr:20240228 endStr:20240228 ifRepart:10 \ > p12_1.log 2>&1 & nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \ --class com.aliyun.odps.spark.examples.makedata.makedata_12_rosData_v3_noweight \ --master yarn --driver-memory 1G --executor-memory 1G --executor-cores 1 --num-executors 32 \ ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \ savePath:/dw/recommend/model/12_ros_data_v3_noweight/ beginStr:20240222 endStr:20240226 ifRepart:10 \ > p12_2.log 2>&1 & [str样本生产] nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \ --class com.aliyun.odps.spark.examples.makedata.makedata_11_strData_v3 \ --master yarn --driver-memory 1G --executor-memory 1G --executor-cores 1 --num-executors 64 \ ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \ savePath:/dw/recommend/model/11_str_data_v3/ beginStr:20240227 endStr:20240227 ifRepart:100 \ > p11.log 2>&1 & [user写redis] nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \ --class com.aliyun.odps.spark.examples.makedata.makedata_09_user2redis_freq \ --name makedata_09_user2redis_freq \ --master yarn --driver-memory 1G --executor-memory 4G --executor-cores 1 --num-executors 32 \ --conf spark.yarn.executor.memoryoverhead=1024 \ /root/zhangbo/recommend-emr-dataprocess/target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \ date:20240302 tablePart:96 expireDay:3 ifWriteRedisUser:True ifUser:True midDays:14 redisLimit:80000000 \ savePathUser:/dw/recommend/model/09_feature/user/ > p09.log 2>&1 & -------------- 【旧STR 上游样本】 nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \ --class com.aliyun.odps.spark.examples.makedata.makedata_06_originData \ --master yarn --driver-memory 1G --executor-memory 2G --executor-cores 1 --num-executors 32 \ --conf spark.yarn.executor.memoryoverhead=1024 \ --conf spark.shuffle.service.enabled=true \ --conf spark.shuffle.service.port=7337 \ --conf spark.shuffle.consolidateFiles=true \ --conf spark.shuffle.manager=sort \ --conf spark.storage.memoryFraction=0.4 \ --conf spark.shuffle.memoryFraction=0.5 \ --conf spark.default.parallelism=200 \ ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \ tablePart:64 savePath:/dw/recommend/model/00_sample_data/ beginStr:20240311 endStr:20240312 > p6.log 2>&1 & 【旧STR 训练数据】 nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \ --class com.aliyun.odps.spark.examples.makedata.makedata_07_strData \ --master yarn --driver-memory 1G --executor-memory 1G --executor-cores 1 --num-executors 32 \ ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \ savePath:/dw/recommend/model/04_str_data/ beginStr:20240311 endStr:20240312 featureVersion:v4 ifRepart:100 \ > p7.log 2>&1 & --- nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \ --class com.aliyun.odps.spark.examples.makedata.makedata_13_originData_20240529 \ --master yarn --driver-memory 1G --executor-memory 2G --executor-cores 1 --num-executors 16 \ ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \ tablePart:64 repartition:32 \ beginStr:2024061600 endStr:2024061623 \ savePath:/dw/recommend/model/13_sample_data/ \ table:alg_recsys_sample_all \ > p13_2024061600.log 2>&1 & nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \ --class com.aliyun.odps.spark.examples.makedata.makedata_14_valueData_20240608 \ --master yarn --driver-memory 1G --executor-memory 3G --executor-cores 1 --num-executors 32 \ ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \ readPath:/dw/recommend/model/13_sample_data/ \ savePath:/dw/recommend/model/14_feature_data/ \ beginStr:20240615 endStr:20240615 repartition:1000 \ > p14_data_check.log 2>&1 & nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \ --class com.aliyun.odps.spark.examples.makedata.makedata_15_bucket_20240608 \ --master yarn --driver-memory 16G --executor-memory 1G --executor-cores 1 --num-executors 16 \ --conf spark.driver.maxResultSize=16G \ ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \ readPath:/dw/recommend/model/14_feature_data/20240606/ fileName:20240606_200_v3 \ bucketNum:200 sampleRate:0.1 \ > p15_data2.log 2>&1 & nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \ --class com.aliyun.odps.spark.examples.makedata.makedata_16_bucketData_20240609 \ --master yarn --driver-memory 2G --executor-memory 4G --executor-cores 1 --num-executors 16 \ ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \ beginStr:20240615 endStr:20240615 repartition:1000 \ > p16_data.log 2>&1 & /dw/recommend/model/13_sample_data/ /dw/recommend/model/14_feature_data/ /dw/recommend/model/16_train_data/ ----- 一个执行:只有用线上打印特征的才执行 nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \ --class com.aliyun.odps.spark.examples.makedata.makedata_13_originData_20240529_check \ --master yarn --driver-memory 1G --executor-memory 2G --executor-cores 1 --num-executors 16 \ ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \ tablePart:64 repartition:32 \ beginStr:2024061500 endStr:2024061523 \ savePath:/dw/recommend/model/13_sample_data_check_print/ \ table:alg_recsys_sample_all_new \ > p13_2024061500_check.log 2>&1 & 两个都要执行:过滤不需要的样本 nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \ --class com.aliyun.odps.spark.examples.makedata.makedata_16_bucketData_20240609_check \ --master yarn --driver-memory 2G --executor-memory 4G --executor-cores 1 --num-executors 16 \ ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \ readPath:/dw/recommend/model/14_feature_data_check_print/ \ savePath:/dw/recommend/model/16_train_data_check_print/ \ beginStr:20240615 endStr:20240615 repartition:1000 \ > p16_data_check.log 2>&1 & /dw/recommend/model/13_sample_data_check/ /dw/recommend/model/13_sample_data_check_print/ /dw/recommend/model/14_feature_data_check/ /dw/recommend/model/14_feature_data_check_print/ /dw/recommend/model/16_train_data_check/ /dw/recommend/model/16_train_data_check_print/ nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \ --class com.aliyun.odps.spark.examples.makedata.makedata_17_bucketDataPrint_20240617 \ --master yarn --driver-memory 2G --executor-memory 4G --executor-cores 1 --num-executors 16 \ ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \ > p17_data_check.log 2>&1 & nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \ --class com.aliyun.odps.spark.examples.makedata.makedata_18_mergehour2day_20240617 \ --master yarn --driver-memory 2G --executor-memory 4G --executor-cores 1 --num-executors 16 \ ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \ > p18_data_check.log 2>&1 & nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \ --class com.aliyun.odps.spark.examples.makedata.makedata_17_bucketDataPrint_20240617 \ --master yarn --driver-memory 2G --executor-memory 4G --executor-cores 1 --num-executors 16 \ ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \ beginStr:2024061800 endStr:2024061814 \ readDate:20240618 \ > p17_data_check.log 2>&1 &