Ver código fonte

特征调整

jch 8 meses atrás
pai
commit
c83a3d8a8c

+ 2 - 3
src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys_r_rate/makedata_recsys_81_originData_20250217.scala

@@ -302,13 +302,12 @@ object makedata_recsys_81_originData_20250217 {
             }
 
             // createTime
-            var createTime = 1D
             if (null != ts && ts.nonEmpty && rankVideo.containsKey("gmt_create_timestamp")) {
               val currentMs = ts.toLong * 1000L
               val createMs = rankVideo.getString("gmt_create_timestamp").toLong
-              createTime = FeatureUtils.getTimeDiff(currentMs, createMs)
+              val createTime = FeatureUtils.getTimeDiff(currentMs, createMs)
+              featureMap.put("createTime", 1 - createTime)
             }
-            featureMap.put("createTime", createTime)
 
             // week & hour
             val calendar = tsToCalendar(ts)

+ 28 - 27
src/main/scala/com/aliyun/odps/spark/examples/myUtils/FeatureTransform.java

@@ -6,7 +6,8 @@ import java.util.*;
 
 public class FeatureTransform {
     private static final double smoothPlus = 5.0;
-    private static final List<String> periods = Arrays.asList("1h", "3h", "6h", "12h", "24h", "72h", "168h");
+    private static final List<String> hourPeriods = Arrays.asList("1h", "3h", "6h", "12h", "24h", "72h", "168h");
+    private static final List<String> dayPeriods = Arrays.asList("7d", "14d", "30d", "60d");
     private static final List<String> videoCateAttrs = Arrays.asList(FeatureUtils.cate1Attr, FeatureUtils.cate2Attr, FeatureUtils.festive1Attr);
     private static final List<String> videoSimAttrs = Arrays.asList("title", "cate2", "cate2_list", "keywords", "cate1_list", "topic");
     private static final List<String> hVideoSimAttrs = Arrays.asList("title");
@@ -25,7 +26,10 @@ public class FeatureTransform {
         oneTypeStatFeature("c1", "return_1_uv", userOriginInfo.get("mid_global_feature_20250212"), featMap);
         oneTypeStatFeature("c2", "return_n_uv", userOriginInfo.get("mid_merge_cate1_feature_20250212"), featMap);
         oneTypeStatFeature("c3", "return_n_uv", userOriginInfo.get("mid_merge_cate2_feature_20250212"), featMap);
-        u2uFeature("c4", userOriginInfo.get("mid_u2u_friend_index_feature_20250212"), featMap);
+        Map<String, String> c4Map = userOriginInfo.get("mid_u2u_friend_index_feature_20250212");
+        for (String calType : Arrays.asList("avg_", "max_", "min_")) {
+            getRateStatFeature("c4", calType, hourPeriods, c4Map, featMap);
+        }
     }
 
     public static void getUserProfileFeature(UserShareReturnProfile profile, Map<String, Double> featMap) {
@@ -110,7 +114,7 @@ public class FeatureTransform {
         oneTypeStatFeature("b9", "return_n_uv", videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_merge_cate2_recommend_exp_feature_20250212"), featMap);
         oneTypeStatFeature("b10", "return_n_uv", videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_channel_recommend_exp_feature_20250212"), featMap);
         oneTypeStatFeature("b11", "return_n_uv", videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_festive_recommend_exp_feature_20250212"), featMap);
-        oneTypeStatFeature("b12", "return_n_uv", videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_long_period_recommend_exp_feature_20250212"), featMap);
+        getRateStatFeature("b12", "", dayPeriods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_long_period_recommend_exp_feature_20250212"), featMap);
         oneTypeStatFeature("b13", "return_n_uv", videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_video_unionid_recommend_exp_feature_20250212"), featMap);
 
         // head video cf
@@ -130,16 +134,15 @@ public class FeatureTransform {
         getVideoCateFeature(prefix, videoInfo, featMap);
 
         // time
-        double createTime = 1D;
         try {
             if (videoInfo.containsKey("gmt_create_timestamp")) {
                 String createMsStr = videoInfo.get("gmt_create_timestamp");
                 long createMs = Long.parseLong(createMsStr);
-                createTime = FeatureUtils.getTimeDiff(currentMs, createMs);
+                double createTime = FeatureUtils.getTimeDiff(currentMs, createMs);
+                featMap.put(prefix + "@ts", 1 - createTime);
             }
         } catch (Exception ignored) {
         }
-        featMap.put(prefix + "@ts", createTime);
     }
 
     public static void getHeadRankVideoCrossFeature(Map<String, String> headInfo, Map<String, String> rankInfo, Map<String, Double> featMap) {
@@ -185,7 +188,7 @@ public class FeatureTransform {
                             featMap.put(baseKey + "@uv", FeatureUtils.log1(uv));
                         }
                         if (ts > 0) {
-                            featMap.put(baseKey + "@ts", FeatureUtils.getTimeDiff(currentMs, ts * 1000));
+                            featMap.put(baseKey + "@ts", 1 - FeatureUtils.getTimeDiff(currentMs, ts * 1000));
                         }
                         if (null != hVideoMap && hVideoMap.containsKey(vid)) {
                             Map<String, String> hVideo = hVideoMap.get(vid);
@@ -314,28 +317,26 @@ public class FeatureTransform {
         return allScoresMap;
     }
 
-    private static void u2uFeature(String prefix, Map<String, String> infoMap, Map<String, Double> featMap) {
+    private static void getRateStatFeature(String prefix, String calType, List<String> periods, Map<String, String> infoMap, Map<String, Double> featMap) {
         if (null == infoMap || infoMap.isEmpty()) {
             return;
         }
-        for (String calType : Arrays.asList("avg", "max", "min")) {
-            for (String period : periods) {
-                double str_one = getOneInfo(calType + "_str_one_" + period, infoMap);
-                double ros_one = getOneInfo(calType + "_ros_one_" + period, infoMap);
-                double str = getOneInfo(calType + "_str_" + period, infoMap);
-                double ros = getOneInfo(calType + "_ros_" + period, infoMap);
-                double str_plus = getOneInfo(calType + "_str_plus_" + period, infoMap);
-                double ros_minus = getOneInfo(calType + "_ros_minus_" + period, infoMap);
-                double rovn = getOneInfo(calType + "_rovn_" + period, infoMap);
-
-                featMap.put(prefix + "_" + period + "_" + calType + "_str_one", str_one);
-                featMap.put(prefix + "_" + period + "_" + calType + "_ros_one", ros_one);
-                featMap.put(prefix + "_" + period + "_" + calType + "_str", str);
-                featMap.put(prefix + "_" + period + "_" + calType + "_ros", ros);
-                featMap.put(prefix + "_" + period + "_" + calType + "_str_plus", str_plus);
-                featMap.put(prefix + "_" + period + "_" + calType + "_ros_minus", ros_minus);
-                featMap.put(prefix + "_" + period + "_" + calType + "_rovn", rovn);
-            }
+        for (String period : periods) {
+            double str_one = getOneInfo(calType + "str_one_" + period, infoMap);
+            double ros_one = getOneInfo(calType + "ros_one_" + period, infoMap);
+            double str = getOneInfo(calType + "str_" + period, infoMap);
+            double ros = getOneInfo(calType + "ros_" + period, infoMap);
+            double str_plus = getOneInfo(calType + "str_plus_" + period, infoMap);
+            double ros_minus = getOneInfo(calType + "ros_minus_" + period, infoMap);
+            double rovn = getOneInfo(calType + "rovn_" + period, infoMap);
+
+            featMap.put(prefix + "_" + period + "_" + calType + "str_one", str_one);
+            featMap.put(prefix + "_" + period + "_" + calType + "ros_one", ros_one);
+            featMap.put(prefix + "_" + period + "_" + calType + "str", str);
+            featMap.put(prefix + "_" + period + "_" + calType + "ros", ros);
+            featMap.put(prefix + "_" + period + "_" + calType + "str_plus", str_plus);
+            featMap.put(prefix + "_" + period + "_" + calType + "ros_minus", ros_minus);
+            featMap.put(prefix + "_" + period + "_" + calType + "rovn", rovn);
         }
     }
 
@@ -343,7 +344,7 @@ public class FeatureTransform {
         if (null == infoMap || infoMap.isEmpty()) {
             return;
         }
-        for (String period : periods) {
+        for (String period : hourPeriods) {
             double exp = getOneInfo("exp_" + period, infoMap);
             double is_share = getOneInfo("is_share_" + period, infoMap);
             double share_cnt = getOneInfo("share_cnt_" + period, infoMap);

+ 2 - 2
src/main/scala/com/aliyun/odps/spark/examples/myUtils/FeatureUtils.java

@@ -14,7 +14,7 @@ public class FeatureUtils {
     private static final Map<String, Integer> cate1MAP = new HashMap<>();
     private static final Map<String, Integer> cate2MAP = new HashMap<>();
     private static final Map<String, Integer> festive1Map = new HashMap<>();
-    public static final double oneYearMs = 365 * 24 * 3600 * 1000.0;
+    public static final double twoMonthMs = 2 * 30 * 24 * 3600 * 1000.0;
 
     static {
         cate1MAP.put("情感", 1);
@@ -82,7 +82,7 @@ public class FeatureUtils {
     }
 
     public static double getTimeDiff(long currentMs, long historyMs) {
-        double diff = (currentMs - historyMs) / oneYearMs;
+        double diff = (currentMs - historyMs) / twoMonthMs;
         if (diff > 1.0) {
             diff = 1.0;
         }