Browse Source

特征调整

jch 2 months ago
parent
commit
c14f818d96

+ 3 - 2
src/main/scala/com/aliyun/odps/spark/examples/myUtils/ConvertUtils.java

@@ -64,8 +64,6 @@ public class ConvertUtils {
     private static Map<String, Map<String, String>> getUserOriginInfo(Map<String, String> record) {
         Map<String, Map<String, String>> map = new HashMap<>();
         map.put("mid_global_feature_20250212", getRecordCol(record, "c1_feature"));
-        map.put("mid_merge_cate1_feature_20250212", getRecordCol(record, "c2_feature"));
-        map.put("mid_merge_cate2_feature_20250212", getRecordCol(record, "c3_feature"));
         map.put("mid_u2u_friend_index_feature_20250212", getRecordCol(record, "c4_feature"));
         map.put("alg_mid_feature_return_tags", getRecordCol(record, "c5_feature"));
         map.put("alg_mid_feature_share_tags", getRecordCol(record, "c6_feature"));
@@ -93,6 +91,9 @@ public class ConvertUtils {
         map.put("scene_type_vid_cf_feature_20250212", getRecordCol(record, "d1_feature"));
         map.put("vid_click_cf_feature_20250212", getRecordCol(record, "d2_feature"));
         map.put("alg_recsys_feature_cf_i2i_v2", getRecordCol(record, "d3_feature"));
+        // 特征
+        map.put("mid_merge_cate1_feature_20250212", getRecordCol(record, "c2_feature"));
+        map.put("mid_merge_cate2_feature_20250212", getRecordCol(record, "c3_feature"));
 
         Map<String, Map<String, Map<String, String>>> allMap = new HashMap<>();
         String vid = record.get("vid");

+ 32 - 18
src/main/scala/com/aliyun/odps/spark/examples/myUtils/FeatureTransform.java

@@ -8,8 +8,20 @@ public class FeatureTransform {
     private static final int seqMaxN = 2;
     private static final int seqLastN = 2;
     private static final double smoothPlus = 5.0;
-    private static final List<String> userHourPeriods = Arrays.asList("1h", "3h", "6h", "12h", "24h", "72h", "168h");
-    private static final List<String> videoHourPeriods = Arrays.asList("1h", "3h", "6h", "12h", "24h", "72h", "168h");
+    private static final List<String> c1Periods = Arrays.asList("72h", "168h");
+    private static final List<String> c4Periods = Arrays.asList("72h", "168h");
+    private static final List<String> b1Periods = Arrays.asList("1h", "3h", "24h", "72h", "168h");
+    private static final List<String> b2Periods = Arrays.asList("1h", "3h", "24h");
+    private static final List<String> b3Periods = Arrays.asList("24h", "168h");
+    private static final List<String> b4Periods = Arrays.asList("1h", "12h");
+    private static final List<String> b5Periods = Arrays.asList("72h", "168h");
+    private static final List<String> b6Periods = Arrays.asList("1h", "24h");
+    private static final List<String> b7Periods = Arrays.asList("24h", "168h");
+    private static final List<String> b8Periods = Arrays.asList("24h");
+    private static final List<String> b9Periods = Arrays.asList("24h");
+    private static final List<String> b10Periods = Arrays.asList("1h", "12h");
+    private static final List<String> b11Periods = Arrays.asList("12h", "168h");
+    private static final List<String> b13Periods = Arrays.asList("24h", "168h");
     private static final List<String> dayPeriods = Arrays.asList("7d", "14d", "30d", "60d");
     private static final List<String> videoCateAttrs = Arrays.asList(FeatureUtils.cate1Attr, FeatureUtils.cate2Attr, FeatureUtils.festive1Attr);
     private static final List<String> videoSimAttrs = Arrays.asList("title", "cate2", "cate2_list", "keywords");
@@ -26,12 +38,10 @@ public class FeatureTransform {
     }
 
     public static void getUserFeature(Map<String, Map<String, String>> userOriginInfo, Map<String, Double> featMap) {
-        oneTypeStatFeature("c1", "return_1_uv", userHourPeriods, userOriginInfo.get("mid_global_feature_20250212"), featMap);
-        oneTypeStatFeature("c2", "return_n_uv", userHourPeriods, userOriginInfo.get("mid_merge_cate1_feature_20250212"), featMap);
-        oneTypeStatFeature("c3", "return_n_uv", userHourPeriods, userOriginInfo.get("mid_merge_cate2_feature_20250212"), featMap);
+        oneTypeStatFeature("c1", "return_1_uv", c1Periods, userOriginInfo.get("mid_global_feature_20250212"), featMap);
         Map<String, String> c4Map = userOriginInfo.get("mid_u2u_friend_index_feature_20250212");
         for (String calType : Arrays.asList("avg_", "max_", "min_")) {
-            getRateStatFeature("c4", calType, userHourPeriods, c4Map, featMap);
+            getRateStatFeature("c4", calType, c4Periods, c4Map, featMap);
         }
     }
 
@@ -106,24 +116,28 @@ public class FeatureTransform {
     }
 
     public static void getVideoFeature(String vid, Map<String, Map<String, Map<String, String>>> videoOriginInfo, Map<String, Double> featMap) {
-        oneTypeStatFeature("b1", "return_1_uv", videoHourPeriods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_global_feature_20250212"), featMap);
-        oneTypeStatFeature("b2", "return_n_uv", videoHourPeriods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_recommend_exp_feature_20250212"), featMap);
-        oneTypeStatFeature("b3", "return_n_uv", videoHourPeriods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_recommend_flowpool_exp_feature_20250212"), featMap);
-        oneTypeStatFeature("b4", "return_n_uv", videoHourPeriods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_apptype_recommend_exp_feature_20250212"), featMap);
-        oneTypeStatFeature("b5", "return_n_uv", videoHourPeriods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_province_recommend_exp_feature_20250212"), featMap);
-        oneTypeStatFeature("b6", "return_n_uv", videoHourPeriods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_brand_recommend_exp_feature_20250212"), featMap);
-        oneTypeStatFeature("b7", "return_n_uv", videoHourPeriods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_hotsencetype_recommend_exp_feature_20250212"), featMap);
-        oneTypeStatFeature("b8", "return_n_uv", videoHourPeriods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_merge_cate1_recommend_exp_feature_20250212"), featMap);
-        oneTypeStatFeature("b9", "return_n_uv", videoHourPeriods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_merge_cate2_recommend_exp_feature_20250212"), featMap);
-        oneTypeStatFeature("b10", "return_n_uv", videoHourPeriods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_channel_recommend_exp_feature_20250212"), featMap);
-        oneTypeStatFeature("b11", "return_n_uv", videoHourPeriods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_festive_recommend_exp_feature_20250212"), featMap);
+        oneTypeStatFeature("b1", "return_1_uv", b1Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_global_feature_20250212"), featMap);
+        oneTypeStatFeature("b2", "return_n_uv", b2Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_recommend_exp_feature_20250212"), featMap);
+        oneTypeStatFeature("b3", "return_n_uv", b3Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_recommend_flowpool_exp_feature_20250212"), featMap);
+        oneTypeStatFeature("b4", "return_n_uv", b4Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_apptype_recommend_exp_feature_20250212"), featMap);
+        oneTypeStatFeature("b5", "return_n_uv", b5Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_province_recommend_exp_feature_20250212"), featMap);
+        oneTypeStatFeature("b6", "return_n_uv", b6Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_brand_recommend_exp_feature_20250212"), featMap);
+        oneTypeStatFeature("b7", "return_n_uv", b7Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_hotsencetype_recommend_exp_feature_20250212"), featMap);
+        oneTypeStatFeature("b8", "return_n_uv", b8Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_merge_cate1_recommend_exp_feature_20250212"), featMap);
+        oneTypeStatFeature("b9", "return_n_uv", b9Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_merge_cate2_recommend_exp_feature_20250212"), featMap);
+        oneTypeStatFeature("b10", "return_n_uv", b10Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_channel_recommend_exp_feature_20250212"), featMap);
+        oneTypeStatFeature("b11", "return_n_uv", b11Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_festive_recommend_exp_feature_20250212"), featMap);
         //getRateStatFeature("b12", "", dayPeriods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_long_period_recommend_exp_feature_20250212"), featMap);
-        oneTypeStatFeature("b13", "return_n_uv", videoHourPeriods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_video_unionid_recommend_exp_feature_20250212"), featMap);
+        oneTypeStatFeature("b13", "return_n_uv", b13Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_video_unionid_recommend_exp_feature_20250212"), featMap);
 
         // head video cf
         headVideoCFD1Feature("d1", videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("scene_type_vid_cf_feature_20250212"), featMap);
         headVideoCFD2Feature("d2", videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("vid_click_cf_feature_20250212"), featMap);
         headVideoCFD3Feature("d3", videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_recsys_feature_cf_i2i_v2"), featMap);
+
+        // 特殊mid * cate
+        oneTypeStatFeature("c2", "return_n_uv", c1Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("mid_merge_cate1_feature_20250212"), featMap);
+        oneTypeStatFeature("c3", "return_n_uv", c1Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("mid_merge_cate2_feature_20250212"), featMap);
     }
 
     public static void getVideoBaseFeature(String prefix, long currentMs, Map<String, String> videoInfo, Map<String, Double> featMap) {