01-脚本记录 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138
  1. spark-submit --class com.tzld.piaoquan.recommend.model.produce.xgboost.XGBoostTrain --master yarn --driver-memory 512M --executor-memory 512M --executor-cores 1 --num-executors 4 /root/recommend-model/recommend-model-produce-new.jar > ~/recommend-model/log 2>&1 &
  2. recommend-model-produce-jar-with-dependencies.jar
  3. nohup /opt/apps/SPARK3/spark-3.3.1-hadoop3.2-1.0.5/bin/spark-class org.apache.spark.deploy.SparkSubmit \
  4. --class com.tzld.piaoquan.recommend.model.produce.xgboost.XGBoostTrain \
  5. --master yarn --driver-memory 512M --executor-memory 512M --executor-cores 1 --num-executors 2 \
  6. ./target/recommend-model-produce-jar-with-dependencies.jar \
  7. > p.log 2>&1 &
  8. nohup /opt/apps/SPARK3/spark-3.3.1-hadoop3.2-1.0.5/bin/spark-class org.apache.spark.deploy.SparkSubmit --class com.tzld.piaoquan.recommend.model.train_01_xgb_ad_20240808 --master yarn --driver-memory 6G --executor-memory 6G --executor-cores 1 --num-executors 32 --conf spark.yarn.executor.memoryoverhead=1024 --conf spark.shuffle.service.enabled=true --conf spark.shuffle.service.port=7337 --conf spark.shuffle.consolidateFiles=true --conf spark.shuffle.manager=sort --conf spark.storage.memoryFraction=0.4 --conf spark.shuffle.memoryFraction=0.5 --conf spark.default.parallelism=200 ./target/recommend-model-produce-jar-with-dependencies.jar > p.log 2>&1 &
  9. nohup /opt/apps/SPARK3/spark-3.3.1-hadoop3.2-1.0.5/bin/spark-class org.apache.spark.deploy.SparkSubmit \
  10. --class com.tzld.piaoquan.recommend.model.train_01_xgb_ad_20240808 \
  11. --master yarn --driver-memory 6G --executor-memory 3G --executor-cores 1 --num-executors 160 \
  12. --conf spark.yarn.executor.memoryoverhead=1000 \
  13. --conf spark.shuffle.service.enabled=true \
  14. --conf spark.shuffle.service.port=7337 \
  15. --conf spark.shuffle.consolidateFiles=true \
  16. --conf spark.shuffle.manager=sort \
  17. --conf spark.storage.memoryFraction=0.4 \
  18. --conf spark.shuffle.memoryFraction=0.5 \
  19. --conf spark.default.parallelism=200 \
  20. ./target/recommend-model-produce-jar-with-dependencies.jar \
  21. featureFile:20240809_ad_feature_name_517.txt \
  22. trainPath:/dw/recommend/model/33_ad_train_data_v4/2024080[6-9],/dw/recommend/model/33_ad_train_data_v4/2024081[0-2] \
  23. testPath:/dw/recommend/model/33_ad_train_data_v4/20240813/ \
  24. savePath:/dw/recommend/model/34_ad_predict_data/20240813_1000/ \
  25. modelPath:/dw/recommend/model/35_ad_model/model_xgb_7day \
  26. eta:0.01 gamma:0.0 max_depth:5 num_round:1000 num_worker:63 \
  27. repartition:20 \
  28. > p5.log 2>&1 &
  29. 0.7316512679739304 1000
  30. 2024072[5-9],2024073[0-1],2024080[1-4]
  31. /dw/recommend/model/33_ad_train_data_v4/(20240725|20240726|20240727|20240728|20240729|20240730|20240731|20240801|20240802|20240803|20240804)
  32. nohup /opt/apps/SPARK3/spark-3.3.1-hadoop3.2-1.0.5/bin/spark-class org.apache.spark.deploy.SparkSubmit \
  33. --class com.tzld.piaoquan.recommend.model.ana_01_xgb_ad_20240809 \
  34. --master yarn --driver-memory 1G --executor-memory 1G --executor-cores 1 --num-executors 32 \
  35. --conf spark.yarn.executor.memoryoverhead=1024 \
  36. --conf spark.shuffle.service.enabled=true \
  37. --conf spark.shuffle.service.port=7337 \
  38. --conf spark.shuffle.consolidateFiles=true \
  39. --conf spark.shuffle.manager=sort \
  40. --conf spark.storage.memoryFraction=0.4 \
  41. --conf spark.shuffle.memoryFraction=0.5 \
  42. --conf spark.default.parallelism=200 \
  43. ./target/recommend-model-produce-jar-with-dependencies.jar \
  44. savePath:/dw/recommend/model/34_ad_predict_data/20240805_1000/ \
  45. > p1.log 2>&1 &
  46. dfs -get /dw/recommend/model/35_ad_model/model_xgb_1000 ./
  47. tar -czvf model_xgb_1000.tar.gz -C model_xgb_1000 .
  48. dfs -put model_xgb_1000.tar.gz oss://art-recommend.oss-cn-hangzhou.aliyuncs.com/zhangbo/
  49. oss://art-recommend.oss-cn-hangzhou.aliyuncs.com/zhangbo/model_xgb_1000.tar.gz
  50. nohup /opt/apps/SPARK3/spark-3.3.1-hadoop3.2-1.0.5/bin/spark-class org.apache.spark.deploy.SparkSubmit \
  51. --class com.tzld.piaoquan.recommend.model.pred_01_xgb_ad_jsonfile_20240813 \
  52. --master yarn --driver-memory 6G --executor-memory 6G --executor-cores 1 --num-executors 32 \
  53. --conf spark.yarn.executor.memoryoverhead=1024 \
  54. --conf spark.shuffle.service.enabled=true \
  55. --conf spark.shuffle.service.port=7337 \
  56. --conf spark.shuffle.consolidateFiles=true \
  57. --conf spark.shuffle.manager=sort \
  58. --conf spark.storage.memoryFraction=0.4 \
  59. --conf spark.shuffle.memoryFraction=0.5 \
  60. --conf spark.default.parallelism=200 \
  61. ./target/recommend-model-produce-jar-with-dependencies.jar \
  62. featureFile:20240809_ad_feature_name_517.txt \
  63. savePath:/dw/recommend/model/34_ad_predict_data/case_tmp/ \
  64. modelPath:/dw/recommend/model/35_ad_model/model_xgb_1000 \
  65. > p5.log 2>&1 &
  66. -------------------------------预测-----------------------------------------
  67. nohup /opt/apps/SPARK3/spark-3.3.1-hadoop3.2-1.0.5/bin/spark-class org.apache.spark.deploy.SparkSubmit \
  68. --class com.tzld.piaoquan.recommend.model.pred_01_xgb_ad_hdfsfile_20240813 \
  69. --master yarn --driver-memory 1G --executor-memory 1G --executor-cores 1 --num-executors 30 \
  70. --conf spark.yarn.executor.memoryoverhead=1024 \
  71. --conf spark.shuffle.service.enabled=true \
  72. --conf spark.shuffle.service.port=7337 \
  73. --conf spark.shuffle.consolidateFiles=true \
  74. --conf spark.shuffle.manager=sort \
  75. --conf spark.storage.memoryFraction=0.4 \
  76. --conf spark.shuffle.memoryFraction=0.5 \
  77. --conf spark.default.parallelism=200 \
  78. ./target/recommend-model-produce-jar-with-dependencies.jar \
  79. featureFile:20240809_ad_feature_name_517.txt \
  80. testPath:/dw/recommend/model/33_ad_train_data_v4/20240815/ \
  81. savePath:/dw/recommend/model/34_ad_predict_data/20240815_new2/ \
  82. modelPath:/dw/recommend/model/35_ad_model/model_xgb_7day_2000 \
  83. > p1_pred_20240815_new2.log 2>&1 &
  84. cat p1_pred_20240815_new2.log | grep -E "^3353|^3606|^2670|^3585|^2912|^3869|^3857" | grep "实验组"
  85. -------------------------------训练-----------------------------------------
  86. nohup /opt/apps/SPARK3/spark-3.3.1-hadoop3.2-1.0.5/bin/spark-class org.apache.spark.deploy.SparkSubmit \
  87. --class com.tzld.piaoquan.recommend.model.train_01_xgb_ad_20240808 \
  88. --master yarn --driver-memory 6G --executor-memory 9G --executor-cores 1 --num-executors 31 \
  89. --conf spark.yarn.executor.memoryoverhead=1000 \
  90. --conf spark.shuffle.service.enabled=true \
  91. --conf spark.shuffle.service.port=7337 \
  92. --conf spark.shuffle.consolidateFiles=true \
  93. --conf spark.shuffle.manager=sort \
  94. --conf spark.storage.memoryFraction=0.4 \
  95. --conf spark.shuffle.memoryFraction=0.5 \
  96. --conf spark.default.parallelism=200 \
  97. ./target/recommend-model-produce-jar-with-dependencies.jar \
  98. featureFile:20240809_ad_feature_name_517.txt \
  99. trainPath:/dw/recommend/model/33_ad_train_data_v4/2024080[7-9],/dw/recommend/model/33_ad_train_data_v4/2024081[0-3] \
  100. testPath:/dw/recommend/model/33_ad_train_data_v4/20240814/ \
  101. savePath:/dw/recommend/model/34_ad_predict_data/20240814_2000/ \
  102. modelPath:/dw/recommend/model/35_ad_model/model_xgb_7day_2000 \
  103. eta:0.01 gamma:0.0 max_depth:5 num_round:2000 num_worker:30 \
  104. repartition:20 \
  105. > p2_train_0814.log 2>&1 &
  106. dfs -get /dw/recommend/model/35_ad_model/model_xgb_7day ./
  107. tar -czvf model_xgb_1000.tar.gz -C model_xgb_7day .
  108. rm -rf .model.tar.gz.crc
  109. dfs -rm -r -skipTrash oss://art-recommend.oss-cn-hangzhou.aliyuncs.com/zhangbo/model.tar.gz
  110. dfs -put model_xgb_1000.tar.gz oss://art-recommend.oss-cn-hangzhou.aliyuncs.com/zhangbo/