Forráskód Böngészése

更新bucket文件

zhangbo 9 hónapja
szülő
commit
f90eea2d8f

A különbségek nem kerülnek megjelenítésre, a fájl túl nagy
+ 0 - 2
src/main/resources/20240609_bucket_274_old.txt


A különbségek nem kerülnek megjelenítésre, a fájl túl nagy
+ 0 - 0
src/main/resources/20240609_bucket_314.txt


+ 14 - 11
src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys/makedata_recsys_43_bucketData_20240709.scala

@@ -16,6 +16,18 @@ import scala.io.Source
 object makedata_recsys_43_bucketData_20240709 {
   def main(args: Array[String]): Unit = {
 
+    // 1 读取参数
+    val param = ParamUtils.parseArgs(args)
+    val readPath = param.getOrElse("readPath", "/dw/recommend/model/41_recsys_sample_data_v1/")
+    val savePath = param.getOrElse("savePath", "/dw/recommend/model/43_recsys_train_data_v1/")
+    val beginStr = param.getOrElse("beginStr", "20240703")
+    val endStr = param.getOrElse("endStr", "20240703")
+    val repartition = param.getOrElse("repartition", "100").toInt
+    val filterNames = param.getOrElse("filterNames", "").split(",").toSet
+    val whatLabel = param.getOrElse("whatLabel", "is_return")
+    val whatApps = param.getOrElse("whatApps", "0,4,5,21,3,6").split(",").toSet
+    val fileName = param.getOrElse("fileName", "20240709_recsys_bucket_314.txt")
+
     val spark = SparkSession
       .builder()
       .appName(this.getClass.getName)
@@ -24,7 +36,7 @@ object makedata_recsys_43_bucketData_20240709 {
 
     val loader = getClass.getClassLoader
 
-    val resourceUrlBucket = loader.getResource("20240709_recsys_bucket_314.txt")
+    val resourceUrlBucket = loader.getResource(fileName)
     val buckets =
       if (resourceUrlBucket != null) {
         val buckets = Source.fromURL(resourceUrlBucket).getLines().mkString("\n")
@@ -44,16 +56,7 @@ object makedata_recsys_43_bucketData_20240709 {
     val bucketsMap_br = sc.broadcast(bucketsMap)
 
 
-    // 1 读取参数
-    val param = ParamUtils.parseArgs(args)
-    val readPath = param.getOrElse("readPath", "/dw/recommend/model/41_recsys_sample_data_v1/")
-    val savePath = param.getOrElse("savePath", "/dw/recommend/model/43_recsys_train_data_v1/")
-    val beginStr = param.getOrElse("beginStr", "20240703")
-    val endStr = param.getOrElse("endStr", "20240703")
-    val repartition = param.getOrElse("repartition", "100").toInt
-    val filterNames = param.getOrElse("filterNames", "").split(",").toSet
-    val whatLabel = param.getOrElse("whatLabel", "is_return")
-    val whatApps = param.getOrElse("whatApps", "0,4,5,21,3,6").split(",").toSet
+
 
     val dateRange = MyDateUtils.getDateRange(beginStr, endStr)
     for (date <- dateRange) {

+ 31 - 5
src/main/scala/com/aliyun/odps/spark/examples/临时记录的脚本-推荐

@@ -169,15 +169,41 @@ readDate:20240626 \
 /dw/recommend/model/17_for_check/
 
 
-
-------------------------------------------
+------------------------------------------------------------------------------------------------------------------------
 
 nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
 --class com.aliyun.odps.spark.examples.makedata_recsys.makedata_recsys_41_originData_20240709 \
 --master yarn --driver-memory 1G --executor-memory 2G --executor-cores 1 --num-executors 16 \
 ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
 tablePart:64 repartition:32 \
-beginStr:2024070500 endStr:2024070508 \
-savePath:/dw/recommend/model/41_recsys_sample_data_v1/ \
+beginStr:2024070508 endStr:2024070508 \
+savePath:/dw/recommend/model/41_recsys_sample_data/ \
 table:alg_recsys_sample_all \
-> p41_2024070500.log 2>&1 &
+> p41_2024070508.log 2>&1 &
+
+nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
+--class com.aliyun.odps.spark.examples.makedata_recsys.makedata_recsys_42_bucket_20240709 \
+--master yarn --driver-memory 16G --executor-memory 1G --executor-cores 1 --num-executors 16 \
+--conf spark.driver.maxResultSize=16G \
+./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
+readPath:/dw/recommend/model/41_recsys_sample_data_v1/20240705* \
+savePath:/dw/recommend/model/42_recsys_bucket/ \
+fileName:20240705_314_200 \
+bucketNum:200 sampleRate:1.0 \
+> p42.log 2>&1 &
+
+nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
+--class com.aliyun.odps.spark.examples.makedata_recsys.makedata_recsys_43_bucketData_20240709 \
+--master yarn --driver-memory 2G --executor-memory 4G --executor-cores 1 --num-executors 16 \
+./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
+readPath:/dw/recommend/model/41_recsys_sample_data/ \
+savePath:/dw/recommend/model/43_recsys_train_data/ \
+beginStr:20240705 endStr:20240705 repartition:100 \
+filterNames:XXXXXXXXX \
+fileName:20240609_bucket_314.txt \
+whatLabel:is_return whatApps:0,4,21,3,6,17,23 \
+> p43_20240705.log 2>&1 &
+
+------------- 20240709_recsys_bucket_314.txt ------------ 20240609_bucket_274.txt -------------
+------------- filterNames:b123_1h_ROS,b123_2h_ROS,b123_3h_ROS,b123_4h_ROS,b123_12h_ROS,b123_1d_ROS,b123_3d_ROS,b123_7d_ROS,b167_1h_ROS,b167_2h_ROS,b167_3h_ROS,b167_4h_ROS,b167_12h_ROS,b167_1d_ROS,b167_3d_ROS,b167_7d_ROS,b8910_1h_ROS,b8910_2h_ROS,b8910_3h_ROS,b8910_4h_ROS,b8910_12h_ROS,b8910_1d_ROS,b8910_3d_ROS,b8910_7d_ROS,b111213_1h_ROS,b111213_2h_ROS,b111213_3h_ROS,b111213_4h_ROS,b111213_12h_ROS,b111213_1d_ROS,b111213_3d_ROS,b111213_7d_ROS,b171819_1h_ROS,b171819_2h_ROS,b171819_3h_ROS,b171819_4h_ROS,b171819_12h_ROS,b171819_1d_ROS,b171819_3d_ROS,b171819_7d_ROS \
+------------- filterNames:XXXXXXXXX \

Nem az összes módosított fájl került megjelenítésre, mert túl sok fájl változott