il y a 1 mois · 088bc35507
--- a/src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys_r_rate/makedata_profile_gender_sample_20251114.scala
+++ b/src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys_r_rate/makedata_profile_gender_sample_20251114.scala
@@ -33,9 +33,8 @@ object makedata_profile_gender_sample_20251114 {
 
				     val sc = spark.sparkContext
			
 
				 
			
 
				     // 3. 处理数据
			
 
				-    val loader = getClass.getClassLoader
			
 
				     val featureSet = loadFeatureNames(featureFile)
			
 
				-    val featureBucketMap = DataUtils.loadUseFeatureBuckets(loader, notUseBucket, featureBucketFile)
			
 
				+    val featureBucketMap = loadUseFeatureBuckets(notUseBucket, featureBucketFile)
			
 
				     val bucketsMap_br = sc.broadcast(featureBucketMap)
			
 
				     for (suffix <- suffixSet) {
			
 
				       val partition = "%s_%s".format(year, suffix)
			
@@ -101,4 +100,24 @@ object makedata_profile_gender_sample_20251114 {
 
				     println(featSet)
			
 
				     featSet
			
 
				   }
			
 
				+
			
 
				+  def loadUseFeatureBuckets(notUseBucket: Int, bucketFile: String): Map[String, (Double, Array[Double])] = {
			
 
				+    if (notUseBucket > 0) {
			
 
				+      return Map[String, (Double, Array[Double])]()
			
 
				+    }
			
 
				+
			
 
				+    val buffer = Source.fromFile(bucketFile)
			
 
				+    val lines = buffer.getLines().mkString("\n")
			
 
				+    buffer.close()
			
 
				+    val bucketMap = lines.split("\n")
			
 
				+      .map(r => r.replace(" ", "").replaceAll("\n", ""))
			
 
				+      .filter(r => r.nonEmpty)
			
 
				+      .map(r => {
			
 
				+        val rList = r.split("\t")
			
 
				+        (rList(0), (rList(1).toDouble, rList(2).split(",").map(_.toDouble)))
			
 
				+      }).toMap
			
 
				+    println("bucketMap.size=" + bucketMap.size)
			
 
				+    println(bucketMap)
			
 
				+    bucketMap
			
 
				+  }
			
 
				 }