|
@@ -8,12 +8,12 @@ import org.apache.spark.sql.SparkSession
|
|
|
|
|
|
import scala.collection.JavaConversions._
|
|
|
import scala.collection.mutable.ArrayBuffer
|
|
|
+import examples.utils.RosUtil
|
|
|
|
|
|
-/*
|
|
|
-
|
|
|
+/**
|
|
|
+ * ros 多分类特征分桶
|
|
|
*/
|
|
|
-
|
|
|
-object makedata_recsys_43_ros_data_bucket_20250304 {
|
|
|
+object makedata_recsys_43_ros_multi_data_bucket_20250304 {
|
|
|
def main(args: Array[String]): Unit = {
|
|
|
|
|
|
// 1 读取参数
|
|
@@ -25,7 +25,7 @@ object makedata_recsys_43_ros_data_bucket_20250304 {
|
|
|
val repartition = param.getOrElse("repartition", "100").toInt
|
|
|
val filterNames = param.getOrElse("filterNames", "").split(",").filter(_.nonEmpty).toSet
|
|
|
val noBucketFeature = param.getOrElse("noBucketFeature", "hour,is_greeting,day_of_week").split(",").filter(_.nonEmpty).toSet
|
|
|
- val whatLabel = param.getOrElse("whatLabel", "is_share")
|
|
|
+ val whatLabel = param.getOrElse("whatLabel", "return_n_uv_noself")
|
|
|
val whatApps = param.getOrElse("whatApps", "0,4,2,32,17,18,21,22,24,25,26,27,28,29,3,30,31,33,34,35,36").split(",").toSet
|
|
|
val fileName = param.getOrElse("fileName", "20250218_bucket_322.txt")
|
|
|
|
|
@@ -73,7 +73,8 @@ object makedata_recsys_43_ros_data_bucket_20250304 {
|
|
|
}
|
|
|
.map {
|
|
|
case (logKey, labelKey, features) =>
|
|
|
- val label = JSON.parseObject(labelKey).getOrDefault(whatLabel, "0").toString
|
|
|
+ val labelJson = JSON.parseObject(labelKey)
|
|
|
+ val label = RosUtil.multiClassLabel(labelJson, labelKey)
|
|
|
(logKey, label, features)
|
|
|
}
|
|
|
.mapPartitions(row => {
|