jch 3 napja
szülő
commit
088bc35507

+ 21 - 2
src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys_r_rate/makedata_profile_gender_sample_20251114.scala

@@ -33,9 +33,8 @@ object makedata_profile_gender_sample_20251114 {
     val sc = spark.sparkContext
 
     // 3. 处理数据
-    val loader = getClass.getClassLoader
     val featureSet = loadFeatureNames(featureFile)
-    val featureBucketMap = DataUtils.loadUseFeatureBuckets(loader, notUseBucket, featureBucketFile)
+    val featureBucketMap = loadUseFeatureBuckets(notUseBucket, featureBucketFile)
     val bucketsMap_br = sc.broadcast(featureBucketMap)
     for (suffix <- suffixSet) {
       val partition = "%s_%s".format(year, suffix)
@@ -101,4 +100,24 @@ object makedata_profile_gender_sample_20251114 {
     println(featSet)
     featSet
   }
+
+  def loadUseFeatureBuckets(notUseBucket: Int, bucketFile: String): Map[String, (Double, Array[Double])] = {
+    if (notUseBucket > 0) {
+      return Map[String, (Double, Array[Double])]()
+    }
+
+    val buffer = Source.fromFile(bucketFile)
+    val lines = buffer.getLines().mkString("\n")
+    buffer.close()
+    val bucketMap = lines.split("\n")
+      .map(r => r.replace(" ", "").replaceAll("\n", ""))
+      .filter(r => r.nonEmpty)
+      .map(r => {
+        val rList = r.split("\t")
+        (rList(0), (rList(1).toDouble, rList(2).split(",").map(_.toDouble)))
+      }).toMap
+    println("bucketMap.size=" + bucketMap.size)
+    println(bucketMap)
+    bucketMap
+  }
 }