Преглед на файлове

feat:添加ros多分类模型的label分组

zhaohaipeng преди 1 месец
родител
ревизия
52b0307acb

+ 32 - 0
src/main/java/examples/utils/RosUtil.java

@@ -0,0 +1,32 @@
+package examples.utils;
+
+import com.alibaba.fastjson.JSONObject;
+import org.apache.commons.lang3.StringUtils;
+
+public class RosUtil {
+
+    public static int multiClassLabel(JSONObject labelJson, String labelKey) {
+        if (!labelJson.containsKey(labelKey) && StringUtils.isBlank(labelJson.getString(labelKey))) {
+            return 0;
+        }
+        int labelValue = labelJson.getInteger(labelKey);
+        if (labelValue <= 0) {
+            return 0;
+        } else if (labelValue == 1) {
+            return 1;
+        } else if (labelValue <= 4) {
+            return 2;
+        } else if (labelValue <= 8) {
+            return 3;
+        } else if (labelValue <= 14) {
+            return 4;
+        } else if (labelValue <= 21) {
+            return 5;
+        } else if (labelValue <= 50) {
+            return 6;
+        } else {
+            return 7;
+        }
+    }
+
+}

+ 7 - 6
src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys/v20250218/makedata_recsys_43_ros_data_bucket_20250304.scala → src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys/v20250218/makedata_recsys_43_ros_multi_data_bucket_20250304.scala

@@ -8,12 +8,12 @@ import org.apache.spark.sql.SparkSession
 
 import scala.collection.JavaConversions._
 import scala.collection.mutable.ArrayBuffer
+import examples.utils.RosUtil
 
-/*
-
+/**
+ * ros 多分类特征分桶
  */
-
-object makedata_recsys_43_ros_data_bucket_20250304 {
+object makedata_recsys_43_ros_multi_data_bucket_20250304 {
   def main(args: Array[String]): Unit = {
 
     // 1 读取参数
@@ -25,7 +25,7 @@ object makedata_recsys_43_ros_data_bucket_20250304 {
     val repartition = param.getOrElse("repartition", "100").toInt
     val filterNames = param.getOrElse("filterNames", "").split(",").filter(_.nonEmpty).toSet
     val noBucketFeature = param.getOrElse("noBucketFeature", "hour,is_greeting,day_of_week").split(",").filter(_.nonEmpty).toSet
-    val whatLabel = param.getOrElse("whatLabel", "is_share")
+    val whatLabel = param.getOrElse("whatLabel", "return_n_uv_noself")
     val whatApps = param.getOrElse("whatApps", "0,4,2,32,17,18,21,22,24,25,26,27,28,29,3,30,31,33,34,35,36").split(",").toSet
     val fileName = param.getOrElse("fileName", "20250218_bucket_322.txt")
 
@@ -73,7 +73,8 @@ object makedata_recsys_43_ros_data_bucket_20250304 {
         }
         .map {
           case (logKey, labelKey, features) =>
-            val label = JSON.parseObject(labelKey).getOrDefault(whatLabel, "0").toString
+            val labelJson = JSON.parseObject(labelKey)
+            val label = RosUtil.multiClassLabel(labelJson, labelKey)
             (logKey, label, features)
         }
         .mapPartitions(row => {