Browse Source

add 71 rov sample

jch 5 months ago
parent
commit
5ef8c1e991

+ 3 - 3
src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys_r_rate/makedata_recsys_71_originData_20250109.scala

@@ -27,14 +27,14 @@ object makedata_recsys_71_originData_20250109 {
 
     // 1 读取参数
     val param = ParamUtils.parseArgs(args)
-    val beginStr = param.getOrElse("beginStr", "2025010723")
-    val endStr = param.getOrElse("endStr", "2025010723")
     val project = param.getOrElse("project", "loghubods")
     val table = param.getOrElse("table", "alg_recsys_sample_tmp_20250109")
     val tablePart = param.getOrElse("tablePart", "64").toInt
-    val repartition = param.getOrElse("repartition", "32").toInt
+    val beginStr = param.getOrElse("beginStr", "2025010723")
+    val endStr = param.getOrElse("endStr", "2025010723")
     val whatLabel = param.getOrElse("whatLabel", "is_return_1")
     val fuSampleRate = param.getOrElse("fuSampleRate", "0.1").toDouble
+    val repartition = param.getOrElse("repartition", "32").toInt
     val savePath = param.getOrElse("savePath", "/dw/recommend/model/71_origin_data/")
 
     // 2 odps

+ 4 - 4
src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys_r_rate/makedata_recsys_71_rov_sample_20250109.scala

@@ -18,16 +18,16 @@ object makedata_recsys_71_rov_sample_20250109 {
     // 1 读取参数
     val param = ParamUtils.parseArgs(args)
     val readPath = param.getOrElse("readPath", "/dw/recommend/model/71_origin_data/")
-    val savePath = param.getOrElse("savePath", "/dw/recommend/model/71_recsys_rov_train_data/")
     val beginStr = param.getOrElse("beginStr", "20250103")
     val endStr = param.getOrElse("endStr", "20250103")
-    val repartition = param.getOrElse("repartition", "100").toInt
-    val whatLabel = param.getOrElse("whatLabel", "is_return_1")
     val whatApps = param.getOrElse("whatApps", "0,3,4,21,17").split(",").toSet
+    val whatLabel = param.getOrElse("whatLabel", "is_return_1")
     val fuSampleRate = param.getOrElse("fuSampleRate", "1.0").toDouble
     val notUseBucket = param.getOrElse("notUseBucket", "0").toInt
     val featureNameFile = param.getOrElse("featureName", "20241209_recsys_rov_name.txt")
     val featureBucketFile = param.getOrElse("featureBucket", "20241209_recsys_rov_bucket.txt")
+    val repartition = param.getOrElse("repartition", "100").toInt
+    val savePath = param.getOrElse("savePath", "/dw/recommend/model/71_recsys_rov_train_data/")
 
     val spark = SparkSession
       .builder()
@@ -113,7 +113,7 @@ object makedata_recsys_71_rov_sample_20250109 {
 
   private def loadUseFeatureBuckets(loader: ClassLoader, notUseBucket: Int, nameFile: String): Map[String, (Double, Array[Double])] = {
     if (notUseBucket > 0) {
-      return scala.collection.mutable.Map[String, (Double, Array[Double])]
+      return Map[String, (Double, Array[Double])]()
     }
     val buckets = loadFileData(loader, nameFile)
     println(buckets)