make_data.sh 1.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. #!/bin/bash
  2. start_date=""
  3. end_date=""
  4. start_hour=""
  5. end_hour=""
  6. table=""
  7. if(($#==5))
  8. then
  9. start_date=$1
  10. end_date=$2
  11. start_hour=$3
  12. end_hour=$4
  13. table=$5
  14. else
  15. start_date=$(date +%Y%m%d -d "-1 $days day")
  16. end_date=$start_date
  17. start_hour=00
  18. end_hour=23
  19. table=dwd_recsys_alg_sample_all_20250212
  20. fi
  21. set -x
  22. export SPARK_HOME=/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8
  23. export PATH=$SPARK_HOME/bin:$PATH
  24. export HADOOP_CONF_DIR=/etc/taihao-apps/hadoop-conf
  25. export JAVA_HOME=/usr/lib/jvm/java-1.8.0
  26. # params
  27. sampleRate=0.036
  28. label=is_share
  29. savePath=/dw/recommend/model/83_origin_data/
  30. # 1 生产原始数据
  31. echo "$(date +%Y-%m-%d_%H-%M-%S)----------step1------------开始根据${table}生产原始数据"
  32. /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
  33. --class com.aliyun.odps.spark.examples.makedata_recsys_r_rate.makedata_recsys_83_originData_20250317 \
  34. --master yarn --driver-memory 6G --executor-memory 10G --executor-cores 1 --num-executors 16 \
  35. --conf spark.yarn.executor.memoryoverhead=2048 \
  36. /mnt/disk1/jch/recommend-emr-dataprocess/target/spark-examples-1.0.0-SNAPSHOT-jar-with-dependencies.jar \
  37. table:${table} tablePart:96 \
  38. beginStr:${start_date}${start_hour} endStr:${end_date}${end_hour} \
  39. whatPages:"详情后沉浸页,回流后沉浸页&内页feed,首页feed,详情页,回流页" \
  40. whatLabel:${label} \
  41. fuSampleRate:${sampleRate} \
  42. repartition:8 \
  43. savePath:${savePath} \