浏览代码

特征调整

jch 2 月之前
父节点
当前提交
db3add12c3

+ 7 - 0
src/main/scala/com/aliyun/odps/spark/examples/myUtils/FeatureTransform.java

@@ -134,6 +134,13 @@ public class FeatureTransform {
 
         // cate
         getVideoCateFeature(prefix, videoInfo, featMap);
+        if (videoInfo.containsKey("title")) {
+            int id = FeatureUtils.judgeVideoTimeType(videoInfo.get("title"));
+            if (id > 0) {
+                String key = String.format("%s@%s@%d", prefix, "tt", id);
+                featMap.put(key, 1.0);
+            }
+        }
 
         // time
         try {

+ 27 - 0
src/main/scala/com/aliyun/odps/spark/examples/myUtils/FeatureUtils.java

@@ -6,6 +6,8 @@ import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 public class FeatureUtils {
     public static final String cate1Attr = "cate1_list";
@@ -16,6 +18,13 @@ public class FeatureUtils {
     private static final Map<String, Integer> festive1Map = new HashMap<>();
     public static final double twoMonthMs = 2 * 30 * 24 * 3600 * 1000.0;
 
+    private static final String goodMorningRegex = "(早安|早上好|早晨好|上午好)";
+    private static final String goodAfternoonRegex = "(午安|中午好|下午好)";
+    private static final String goodEveningRegex = "(晚安|晚上好)";
+    private static final Pattern goodMorningPattern = Pattern.compile(goodMorningRegex);
+    private static final Pattern goodAfternoonPattern = Pattern.compile(goodAfternoonRegex);
+    private static final Pattern goodEveningPattern = Pattern.compile(goodEveningRegex);
+
     static {
         cate1MAP.put("情感", 1);
         cate1MAP.put("音乐", 2);
@@ -81,6 +90,24 @@ public class FeatureUtils {
         }
     }
 
+    public static int judgeVideoTimeType(String s) {
+        if (null != s && !s.isEmpty()) {
+            Matcher morning = goodMorningPattern.matcher(s);
+            if (morning.find()) {
+                return 1;
+            }
+            Matcher afternoon = goodAfternoonPattern.matcher(s);
+            if (afternoon.find()) {
+                return 2;
+            }
+            Matcher evening = goodEveningPattern.matcher(s);
+            if (evening.find()) {
+                return 3;
+            }
+        }
+        return 0;
+    }
+
     public static double getTimeDiff(long currentMs, long historyMs) {
         double diff = (currentMs - historyMs) / twoMonthMs;
         if (diff > 1.0) {