临时记录的脚本-推荐 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209
  1. 【新 上游样本】
  2. nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
  3. --class com.aliyun.odps.spark.examples.makedata.makedata_10_originData_v3 \
  4. --master yarn --driver-memory 1G --executor-memory 1G --executor-cores 1 --num-executors 64 \
  5. ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
  6. tablePart:64 savePath:/dw/recommend/model/10_sample_data_v3/ beginStr:20240227 endStr:20240227 > p10_.log 2>&1 &
  7. [ros样本生产]
  8. nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
  9. --class com.aliyun.odps.spark.examples.makedata.makedata_12_rosData_v3 \
  10. --master yarn --driver-memory 1G --executor-memory 1G --executor-cores 1 --num-executors 32 \
  11. ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
  12. savePath:/dw/recommend/model/12_ros_data_v3/ beginStr:20240228 endStr:20240228 ifRepart:10 \
  13. > p12_1.log 2>&1 &
  14. nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
  15. --class com.aliyun.odps.spark.examples.makedata.makedata_12_rosData_v3_noweight \
  16. --master yarn --driver-memory 1G --executor-memory 1G --executor-cores 1 --num-executors 32 \
  17. ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
  18. savePath:/dw/recommend/model/12_ros_data_v3_noweight/ beginStr:20240222 endStr:20240226 ifRepart:10 \
  19. > p12_2.log 2>&1 &
  20. [str样本生产]
  21. nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
  22. --class com.aliyun.odps.spark.examples.makedata.makedata_11_strData_v3 \
  23. --master yarn --driver-memory 1G --executor-memory 1G --executor-cores 1 --num-executors 64 \
  24. ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
  25. savePath:/dw/recommend/model/11_str_data_v3/ beginStr:20240227 endStr:20240227 ifRepart:100 \
  26. > p11.log 2>&1 &
  27. [user写redis]
  28. nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
  29. --class com.aliyun.odps.spark.examples.makedata.makedata_09_user2redis_freq \
  30. --name makedata_09_user2redis_freq \
  31. --master yarn --driver-memory 1G --executor-memory 4G --executor-cores 1 --num-executors 32 \
  32. --conf spark.yarn.executor.memoryoverhead=1024 \
  33. /root/zhangbo/recommend-emr-dataprocess/target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
  34. date:20240302 tablePart:96 expireDay:3 ifWriteRedisUser:True ifUser:True midDays:14 redisLimit:80000000 \
  35. savePathUser:/dw/recommend/model/09_feature/user/ > p09.log 2>&1 &
  36. --------------
  37. 【旧STR 上游样本】
  38. nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
  39. --class com.aliyun.odps.spark.examples.makedata.makedata_06_originData \
  40. --master yarn --driver-memory 1G --executor-memory 2G --executor-cores 1 --num-executors 32 \
  41. --conf spark.yarn.executor.memoryoverhead=1024 \
  42. --conf spark.shuffle.service.enabled=true \
  43. --conf spark.shuffle.service.port=7337 \
  44. --conf spark.shuffle.consolidateFiles=true \
  45. --conf spark.shuffle.manager=sort \
  46. --conf spark.storage.memoryFraction=0.4 \
  47. --conf spark.shuffle.memoryFraction=0.5 \
  48. --conf spark.default.parallelism=200 \
  49. ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
  50. tablePart:64 savePath:/dw/recommend/model/00_sample_data/ beginStr:20240311 endStr:20240312 > p6.log 2>&1 &
  51. 【旧STR 训练数据】
  52. nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
  53. --class com.aliyun.odps.spark.examples.makedata.makedata_07_strData \
  54. --master yarn --driver-memory 1G --executor-memory 1G --executor-cores 1 --num-executors 32 \
  55. ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
  56. savePath:/dw/recommend/model/04_str_data/ beginStr:20240311 endStr:20240312 featureVersion:v4 ifRepart:100 \
  57. > p7.log 2>&1 &
  58. ---------------------------------------------------------------------------------------------
  59. ---------------------------------------------------------------------------------------------
  60. --------------------------------下-----------------------------------------------------------
  61. --------------------------------面-----------------------------------------------------------
  62. --------------------------------为-----------------------------------------------------------
  63. --------------------------------准-----------------------------------------------------------
  64. ---------------------------------------------------------------------------------------------
  65. ---------------------------------------------------------------------------------------------
  66. nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
  67. --class com.aliyun.odps.spark.examples.makedata.makedata_13_originData_20240529 \
  68. --master yarn --driver-memory 1G --executor-memory 2G --executor-cores 1 --num-executors 16 \
  69. ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
  70. tablePart:64 repartition:32 \
  71. beginStr:2024061600 endStr:2024061623 \
  72. savePath:/dw/recommend/model/13_sample_data/ \
  73. table:alg_recsys_sample_all \
  74. > p13_2024061600.log 2>&1 &
  75. nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
  76. --class com.aliyun.odps.spark.examples.makedata.makedata_14_valueData_20240608 \
  77. --master yarn --driver-memory 1G --executor-memory 3G --executor-cores 1 --num-executors 32 \
  78. ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
  79. readPath:/dw/recommend/model/13_sample_data/ \
  80. savePath:/dw/recommend/model/14_feature_data/ \
  81. beginStr:20240615 endStr:20240615 repartition:1000 \
  82. > p14_data_check.log 2>&1 &
  83. nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
  84. --class com.aliyun.odps.spark.examples.makedata.makedata_15_bucket_20240608 \
  85. --master yarn --driver-memory 16G --executor-memory 1G --executor-cores 1 --num-executors 16 \
  86. --conf spark.driver.maxResultSize=16G \
  87. ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
  88. readPath:/dw/recommend/model/14_feature_data/20240606/ fileName:20240606_200_v3 \
  89. bucketNum:200 sampleRate:0.1 \
  90. > p15_data2.log 2>&1 &
  91. nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
  92. --class com.aliyun.odps.spark.examples.makedata.makedata_16_bucketData_20240609 \
  93. --master yarn --driver-memory 2G --executor-memory 4G --executor-cores 1 --num-executors 16 \
  94. ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
  95. beginStr:20240615 endStr:20240615 repartition:1000 \
  96. > p16_data.log 2>&1 &
  97. /dw/recommend/model/13_sample_data/
  98. /dw/recommend/model/14_feature_data/
  99. /dw/recommend/model/16_train_data/
  100. -----
  101. 一个执行:只有用线上打印特征的才执行
  102. nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
  103. --class com.aliyun.odps.spark.examples.makedata.makedata_13_originData_20240529_check \
  104. --master yarn --driver-memory 1G --executor-memory 2G --executor-cores 1 --num-executors 16 \
  105. ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
  106. tablePart:64 repartition:32 \
  107. beginStr:2024061500 endStr:2024061523 \
  108. savePath:/dw/recommend/model/13_sample_data_check_print/ \
  109. table:alg_recsys_sample_all_new \
  110. > p13_2024061500_check.log 2>&1 &
  111. 两个都要执行:过滤不需要的样本
  112. nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
  113. --class com.aliyun.odps.spark.examples.makedata.makedata_16_bucketData_20240609_check \
  114. --master yarn --driver-memory 2G --executor-memory 4G --executor-cores 1 --num-executors 16 \
  115. ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
  116. readPath:/dw/recommend/model/14_feature_data_check_print/ \
  117. savePath:/dw/recommend/model/16_train_data_check_print/ \
  118. beginStr:20240615 endStr:20240615 repartition:1000 \
  119. > p16_data_check.log 2>&1 &
  120. /dw/recommend/model/13_sample_data_check/
  121. /dw/recommend/model/13_sample_data_check_print/
  122. /dw/recommend/model/14_feature_data_check/
  123. /dw/recommend/model/14_feature_data_check_print/
  124. /dw/recommend/model/16_train_data_check/
  125. /dw/recommend/model/16_train_data_check_print/
  126. nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
  127. --class com.aliyun.odps.spark.examples.makedata.makedata_17_bucketDataPrint_20240617 \
  128. --master yarn --driver-memory 2G --executor-memory 4G --executor-cores 1 --num-executors 16 \
  129. ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
  130. > p17_data_check.log 2>&1 &
  131. nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
  132. --class com.aliyun.odps.spark.examples.makedata.makedata_18_mergehour2day_20240617 \
  133. --master yarn --driver-memory 2G --executor-memory 4G --executor-cores 1 --num-executors 16 \
  134. ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
  135. > p18_data_check.log 2>&1 &
  136. nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
  137. --class com.aliyun.odps.spark.examples.makedata.makedata_17_bucketDataPrint_20240617 \
  138. --master yarn --driver-memory 2G --executor-memory 4G --executor-cores 1 --num-executors 16 \
  139. ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
  140. beginStr:2024062600 endStr:2024062623 \
  141. readDate:20240626 \
  142. > p17_20240626.log 2>&1 &
  143. /dw/recommend/model/17_for_check/
  144. ------------------------------------------------------------------------------------------------------------------------
  145. nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
  146. --class com.aliyun.odps.spark.examples.makedata_recsys.makedata_recsys_41_originData_20240709 \
  147. --master yarn --driver-memory 1G --executor-memory 2G --executor-cores 1 --num-executors 16 \
  148. ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
  149. tablePart:64 repartition:32 \
  150. beginStr:2024070508 endStr:2024070508 \
  151. savePath:/dw/recommend/model/41_recsys_sample_data/ \
  152. table:alg_recsys_sample_all \
  153. > p41_2024070508.log 2>&1 &
  154. nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
  155. --class com.aliyun.odps.spark.examples.makedata_recsys.makedata_recsys_42_bucket_20240709 \
  156. --master yarn --driver-memory 16G --executor-memory 1G --executor-cores 1 --num-executors 16 \
  157. --conf spark.driver.maxResultSize=16G \
  158. ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
  159. readPath:/dw/recommend/model/41_recsys_sample_data_v1/20240705* \
  160. savePath:/dw/recommend/model/42_recsys_bucket/ \
  161. fileName:20240705_314_200 \
  162. bucketNum:200 sampleRate:1.0 \
  163. > p42.log 2>&1 &
  164. nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
  165. --class com.aliyun.odps.spark.examples.makedata_recsys.makedata_recsys_43_bucketData_20240709 \
  166. --master yarn --driver-memory 2G --executor-memory 4G --executor-cores 1 --num-executors 16 \
  167. ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
  168. readPath:/dw/recommend/model/41_recsys_sample_data/ \
  169. savePath:/dw/recommend/model/43_recsys_train_data/ \
  170. beginStr:20240705 endStr:20240705 repartition:100 \
  171. filterNames:XXXXXXXXX \
  172. fileName:20240609_bucket_314.txt \
  173. whatLabel:is_return whatApps:0,4,21,3,6,17,23 \
  174. > p43_20240705.log 2>&1 &
  175. ------------- 20240709_recsys_bucket_314.txt ------------ 20240609_bucket_274.txt -------------
  176. ------------- filterNames:b123_1h_ROS,b123_2h_ROS,b123_3h_ROS,b123_4h_ROS,b123_12h_ROS,b123_1d_ROS,b123_3d_ROS,b123_7d_ROS,b167_1h_ROS,b167_2h_ROS,b167_3h_ROS,b167_4h_ROS,b167_12h_ROS,b167_1d_ROS,b167_3d_ROS,b167_7d_ROS,b8910_1h_ROS,b8910_2h_ROS,b8910_3h_ROS,b8910_4h_ROS,b8910_12h_ROS,b8910_1d_ROS,b8910_3d_ROS,b8910_7d_ROS,b111213_1h_ROS,b111213_2h_ROS,b111213_3h_ROS,b111213_4h_ROS,b111213_12h_ROS,b111213_1d_ROS,b111213_3d_ROS,b111213_7d_ROS,b171819_1h_ROS,b171819_2h_ROS,b171819_3h_ROS,b171819_4h_ROS,b171819_12h_ROS,b171819_1d_ROS,b171819_3d_ROS,b171819_7d_ROS \
  177. ------------- filterNames:XXXXXXXXX \