jch 2 недель назад
Родитель
Сommit
aeb5204408

+ 16 - 2
src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys_r_rate/makedata_recsys_83_fm_sample_20250317.scala

@@ -6,6 +6,7 @@ import org.apache.spark.sql.SparkSession
 
 import scala.collection.JavaConversions._
 import scala.collection.mutable.ArrayBuffer
+import scala.io.Source
 import scala.util.Random
 
 object makedata_recsys_83_fm_sample_20250317 {
@@ -19,7 +20,7 @@ object makedata_recsys_83_fm_sample_20250317 {
     val whatLabel = param.getOrElse("whatLabel", "is_return_n_noself")
     val fuSampleRate = param.getOrElse("fuSampleRate", "-1.0").toDouble
     val notUseBucket = param.getOrElse("notUseBucket", "0").toInt
-    val featureNameFile = param.getOrElse("featureName", "20241209_recsys_nor_name.txt")
+    val featureNameFile = param.getOrElse("featureName", "20250317_recsys_rov_name.txt")
     val featureBucketFile = param.getOrElse("featureBucket", "20241209_recsys_nor_bucket.txt")
     val repartition = param.getOrElse("repartition", "100").toInt
     val savePath = param.getOrElse("savePath", "/dw/recommend/model/83_recsys_rov_train_data/")
@@ -32,7 +33,7 @@ object makedata_recsys_83_fm_sample_20250317 {
 
     // 2. 加载特征
     val loader = getClass.getClassLoader
-    val featureNameSet = DataUtils.loadUseFeatureNames(loader, featureNameFile)
+    val featureNameSet = loadFeatureNames(featureNameFile)
     val featureBucketMap = DataUtils.loadUseFeatureBuckets(loader, notUseBucket, featureBucketFile)
     val bucketsMap_br = sc.broadcast(featureBucketMap)
 
@@ -113,4 +114,17 @@ object makedata_recsys_83_fm_sample_20250317 {
     }
     false
   }
+
+  def loadFeatureNames(nameFile: String): Set[String] = {
+    val buffer = Source.fromFile(nameFile)
+    val names = buffer.getLines().mkString("\n")
+    buffer.close()
+    val featSet = names.split("\n")
+      .map(r => r.replace(" ", "").replaceAll("\n", ""))
+      .filter(r => r.nonEmpty)
+      .toSet
+    println("featSet.size=" + featSet.size)
+    println(featSet)
+    featSet
+  }
 }