Selaa lähdekoodia

添加类目特征

jch 2 kuukautta sitten
vanhempi
commit
a2b3b1d8d9

+ 14 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys_r_rate/makedata_recsys_61_originData_20241209.scala

@@ -3,7 +3,7 @@ package com.aliyun.odps.spark.examples.makedata_recsys_r_rate
 import com.alibaba.fastjson.{JSON, JSONObject}
 import com.aliyun.odps.TableSchema
 import com.aliyun.odps.data.Record
-import com.aliyun.odps.spark.examples.myUtils.{MyDateUtils, MyHdfsUtils, ParamUtils, env}
+import com.aliyun.odps.spark.examples.myUtils._
 import examples.extractor.RankExtractorFeature_20240530
 import examples.utils.SimilarityUtils
 import org.apache.hadoop.io.compress.GzipCodec
@@ -283,6 +283,19 @@ object makedata_recsys_61_originData_20241209 {
               }
             }
 
+            // category
+            if (rankVideo.nonEmpty) {
+              val attrList = List(FeatureUtils.cate1Attr, FeatureUtils.cate2Attr, FeatureUtils.festive1Attr)
+              for (attr <- attrList) {
+                val attrVal = if (rankVideo.containsKey(attr)) rankVideo.getString(attr) else ""
+                val attrId = FeatureUtils.getAttrId(attr, attrVal)
+                if (attrId > 0) {
+                  val key = "%s@%s@%d".format("r", attr, attrId)
+                  featureMap.put(key, 1)
+                }
+              }
+            }
+
             // time
             val ts = record.getString("ts")
             val calendar = tsToCalendar(ts)

+ 78 - 0
src/main/scala/com/aliyun/odps/spark/examples/myUtils/FeatureUtils.java

@@ -0,0 +1,78 @@
+package com.aliyun.odps.spark.examples.myUtils;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class FeatureUtils {
+    public static final String cate1Attr = "cate1_list";
+    public static final String cate2Attr = "cate2";
+    public static final String festive1Attr = "festive_label1";
+    private static final Map<String, Integer> cate1MAP = new HashMap<>();
+    private static final Map<String, Integer> cate2MAP = new HashMap<>();
+    private static final Map<String, Integer> festive1Map = new HashMap<>();
+
+    static {
+        cate1MAP.put("情感", 1);
+        cate1MAP.put("音乐", 2);
+        cate1MAP.put("搞笑", 3);
+        cate1MAP.put("生活记录", 4);
+        cate1MAP.put("医疗健康", 5);
+        cate1MAP.put("文化", 6);
+        cate1MAP.put("旅行", 7);
+        cate1MAP.put("美食", 8);
+        cate1MAP.put("历史", 9);
+        cate1MAP.put("科普", 10);
+        cate1MAP.put("艺术", 11);
+        cate1MAP.put("综艺", 12);
+        cate1MAP.put("时政", 13);
+        cate1MAP.put("三农", 14);
+        cate1MAP.put("舞蹈", 15);
+
+        cate2MAP.put("人生忠告", 1);
+        cate2MAP.put("知识科普", 2);
+        cate2MAP.put("祝福音乐", 3);
+        cate2MAP.put("生活小妙招", 4);
+        cate2MAP.put("节日祝福", 5);
+        cate2MAP.put("搞笑段子", 6);
+        cate2MAP.put("怀念时光", 7);
+        cate2MAP.put("早中晚好", 8);
+        cate2MAP.put("风景实拍", 9);
+        cate2MAP.put("动物萌宠", 10);
+        cate2MAP.put("美食教程", 11);
+        cate2MAP.put("国家力量", 12);
+        cate2MAP.put("中国历史影像", 13);
+        cate2MAP.put("杂技柔术", 14);
+        cate2MAP.put("健康知识", 15);
+        cate2MAP.put("传统文化", 16);
+        cate2MAP.put("长寿知识", 17);
+        cate2MAP.put("社会风气", 18);
+        cate2MAP.put("大型集体艺术", 19);
+        cate2MAP.put("省份城市亮点", 20);
+        cate2MAP.put("民生政策", 21);
+        cate2MAP.put("健身操", 22);
+        cate2MAP.put("亲情音乐", 23);
+        cate2MAP.put("天气变化", 24);
+        cate2MAP.put("人财诈骗", 25);
+        cate2MAP.put("正能量剧情", 26);
+
+        festive1Map.put("节假日", 1);
+        festive1Map.put("问候语", 2);
+        festive1Map.put("祝福语", 3);
+        festive1Map.put("节气", 4);
+        festive1Map.put("热点", 5);
+        festive1Map.put("人物", 6);
+    }
+
+    public static int getAttrId(String cate, String value) {
+        switch (cate) {
+            case cate1Attr:
+                return cate1MAP.getOrDefault(value, 0);
+            case cate2Attr:
+                return cate2MAP.getOrDefault(value, 0);
+            case festive1Attr:
+                return festive1Map.getOrDefault(value, 0);
+            default:
+                return 0;
+        }
+    }
+}