|
@@ -6,6 +6,7 @@ import org.apache.spark.sql.SparkSession
|
|
|
|
|
|
import scala.collection.JavaConversions._
|
|
import scala.collection.JavaConversions._
|
|
import scala.collection.mutable.ArrayBuffer
|
|
import scala.collection.mutable.ArrayBuffer
|
|
|
|
+import scala.io.Source
|
|
import scala.util.Random
|
|
import scala.util.Random
|
|
|
|
|
|
object makedata_recsys_83_fm_sample_20250317 {
|
|
object makedata_recsys_83_fm_sample_20250317 {
|
|
@@ -19,7 +20,7 @@ object makedata_recsys_83_fm_sample_20250317 {
|
|
val whatLabel = param.getOrElse("whatLabel", "is_return_n_noself")
|
|
val whatLabel = param.getOrElse("whatLabel", "is_return_n_noself")
|
|
val fuSampleRate = param.getOrElse("fuSampleRate", "-1.0").toDouble
|
|
val fuSampleRate = param.getOrElse("fuSampleRate", "-1.0").toDouble
|
|
val notUseBucket = param.getOrElse("notUseBucket", "0").toInt
|
|
val notUseBucket = param.getOrElse("notUseBucket", "0").toInt
|
|
- val featureNameFile = param.getOrElse("featureName", "20241209_recsys_nor_name.txt")
|
|
|
|
|
|
+ val featureNameFile = param.getOrElse("featureName", "20250317_recsys_rov_name.txt")
|
|
val featureBucketFile = param.getOrElse("featureBucket", "20241209_recsys_nor_bucket.txt")
|
|
val featureBucketFile = param.getOrElse("featureBucket", "20241209_recsys_nor_bucket.txt")
|
|
val repartition = param.getOrElse("repartition", "100").toInt
|
|
val repartition = param.getOrElse("repartition", "100").toInt
|
|
val savePath = param.getOrElse("savePath", "/dw/recommend/model/83_recsys_rov_train_data/")
|
|
val savePath = param.getOrElse("savePath", "/dw/recommend/model/83_recsys_rov_train_data/")
|
|
@@ -32,7 +33,7 @@ object makedata_recsys_83_fm_sample_20250317 {
|
|
|
|
|
|
// 2. 加载特征
|
|
// 2. 加载特征
|
|
val loader = getClass.getClassLoader
|
|
val loader = getClass.getClassLoader
|
|
- val featureNameSet = DataUtils.loadUseFeatureNames(loader, featureNameFile)
|
|
|
|
|
|
+ val featureNameSet = loadFeatureNames(featureNameFile)
|
|
val featureBucketMap = DataUtils.loadUseFeatureBuckets(loader, notUseBucket, featureBucketFile)
|
|
val featureBucketMap = DataUtils.loadUseFeatureBuckets(loader, notUseBucket, featureBucketFile)
|
|
val bucketsMap_br = sc.broadcast(featureBucketMap)
|
|
val bucketsMap_br = sc.broadcast(featureBucketMap)
|
|
|
|
|
|
@@ -113,4 +114,17 @@ object makedata_recsys_83_fm_sample_20250317 {
|
|
}
|
|
}
|
|
false
|
|
false
|
|
}
|
|
}
|
|
|
|
+
|
|
|
|
+ def loadFeatureNames(nameFile: String): Set[String] = {
|
|
|
|
+ val buffer = Source.fromFile(nameFile)
|
|
|
|
+ val names = buffer.getLines().mkString("\n")
|
|
|
|
+ buffer.close()
|
|
|
|
+ val featSet = names.split("\n")
|
|
|
|
+ .map(r => r.replace(" ", "").replaceAll("\n", ""))
|
|
|
|
+ .filter(r => r.nonEmpty)
|
|
|
|
+ .toSet
|
|
|
|
+ println("featSet.size=" + featSet.size)
|
|
|
|
+ println(featSet)
|
|
|
|
+ featSet
|
|
|
|
+ }
|
|
}
|
|
}
|