make_data.sh 1.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344
  1. #!/bin/bash
  2. run_mode=""
  3. if(($#==1))
  4. then
  5. run_mode=$1
  6. else
  7. exit -1
  8. fi
  9. if [[ "$run_mode" != "run" ]]
  10. then
  11. exit -1
  12. fi
  13. set -x
  14. export SPARK_HOME=/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8
  15. export PATH=$SPARK_HOME/bin:$PATH
  16. export HADOOP_CONF_DIR=/etc/taihao-apps/hadoop-conf
  17. export JAVA_HOME=/usr/lib/jvm/java-1.8.0
  18. # params
  19. data_date=$(date +%Y%m%d -d "-2 $days day")
  20. start_date=${data_date}
  21. end_date=${data_date}
  22. start_hour=00
  23. end_hour=23
  24. sampleRate=0.01
  25. table=dwd_recsys_alg_sample_all_20250212
  26. savePath=/dw/recommend/model/82_origin_data/
  27. # 1 生产原始数据
  28. echo "$(date +%Y-%m-%d_%H-%M-%S)----------step1------------开始根据${table}生产原始数据"
  29. /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
  30. --class com.aliyun.odps.spark.examples.makedata_recsys_r_rate.makedata_recsys_82_originData_20250221 \
  31. --master yarn --driver-memory 3G --executor-memory 4G --executor-cores 1 --num-executors 16 \
  32. /mnt/disk1/jch/recommend-emr-dataprocess/target/spark-examples-1.0.0-SNAPSHOT-jar-with-dependencies.jar \
  33. table:${table} tablePart:64 \
  34. beginStr:${start_date}${start_hour} endStr:${end_date}${end_hour} \
  35. whatLabel:is_return_n_noself \
  36. fuSampleRate:${sampleRate} \
  37. repartition:8 \
  38. savePath:${savePath} \