ソースを参照

nor样本生成

jch 2 ヶ月 前
コミット
c4275015d4

+ 25 - 24
src/main/scala/com/aliyun/odps/spark/examples/myUtils/FeatureTransform.java

@@ -8,11 +8,12 @@ public class FeatureTransform {
     private static final int seqMaxN = 2;
     private static final int seqLastN = 2;
     private static final double smoothPlus = 5.0;
-    private static final List<String> hourPeriods = Arrays.asList("1h", "3h", "6h", "12h", "24h", "72h", "168h");
+    private static final List<String> userHourPeriods = Arrays.asList("1h", "3h", "6h", "12h", "24h", "72h", "168h");
+    private static final List<String> videoHourPeriods = Arrays.asList("1h", "3h", "6h", "12h", "24h", "72h", "168h");
     private static final List<String> dayPeriods = Arrays.asList("7d", "14d", "30d", "60d");
     private static final List<String> videoCateAttrs = Arrays.asList(FeatureUtils.cate1Attr, FeatureUtils.cate2Attr, FeatureUtils.festive1Attr);
-    private static final List<String> videoSimAttrs = Arrays.asList("title", "cate2", "cate2_list", "keywords", "cate1_list", "topic");
-    private static final List<String> hVideoSimAttrs = Arrays.asList("title", "keywords", "topic");
+    private static final List<String> videoSimAttrs = Arrays.asList("title", "cate2", "cate2_list", "keywords");
+    private static final List<String> hVideoSimAttrs = Arrays.asList("title");
 
     public static void getContextFeature(long currentMs, Map<String, Double> featureMap) {
         Calendar calendar = Calendar.getInstance();
@@ -25,12 +26,12 @@ public class FeatureTransform {
     }
 
     public static void getUserFeature(Map<String, Map<String, String>> userOriginInfo, Map<String, Double> featMap) {
-        oneTypeStatFeature("c1", "return_1_uv", userOriginInfo.get("mid_global_feature_20250212"), featMap);
-        oneTypeStatFeature("c2", "return_n_uv", userOriginInfo.get("mid_merge_cate1_feature_20250212"), featMap);
-        oneTypeStatFeature("c3", "return_n_uv", userOriginInfo.get("mid_merge_cate2_feature_20250212"), featMap);
+        oneTypeStatFeature("c1", "return_1_uv", userHourPeriods, userOriginInfo.get("mid_global_feature_20250212"), featMap);
+        oneTypeStatFeature("c2", "return_n_uv", userHourPeriods, userOriginInfo.get("mid_merge_cate1_feature_20250212"), featMap);
+        oneTypeStatFeature("c3", "return_n_uv", userHourPeriods, userOriginInfo.get("mid_merge_cate2_feature_20250212"), featMap);
         Map<String, String> c4Map = userOriginInfo.get("mid_u2u_friend_index_feature_20250212");
         for (String calType : Arrays.asList("avg_", "max_", "min_")) {
-            getRateStatFeature("c4", calType, hourPeriods, c4Map, featMap);
+            getRateStatFeature("c4", calType, userHourPeriods, c4Map, featMap);
         }
     }
 
@@ -105,19 +106,19 @@ public class FeatureTransform {
     }
 
     public static void getVideoFeature(String vid, Map<String, Map<String, Map<String, String>>> videoOriginInfo, Map<String, Double> featMap) {
-        oneTypeStatFeature("b1", "return_1_uv", videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_global_feature_20250212"), featMap);
-        oneTypeStatFeature("b2", "return_n_uv", videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_recommend_exp_feature_20250212"), featMap);
-        oneTypeStatFeature("b3", "return_n_uv", videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_recommend_flowpool_exp_feature_20250212"), featMap);
-        oneTypeStatFeature("b4", "return_n_uv", videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_apptype_recommend_exp_feature_20250212"), featMap);
-        oneTypeStatFeature("b5", "return_n_uv", videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_province_recommend_exp_feature_20250212"), featMap);
-        oneTypeStatFeature("b6", "return_n_uv", videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_brand_recommend_exp_feature_20250212"), featMap);
-        oneTypeStatFeature("b7", "return_n_uv", videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_hotsencetype_recommend_exp_feature_20250212"), featMap);
-        oneTypeStatFeature("b8", "return_n_uv", videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_merge_cate1_recommend_exp_feature_20250212"), featMap);
-        oneTypeStatFeature("b9", "return_n_uv", videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_merge_cate2_recommend_exp_feature_20250212"), featMap);
-        oneTypeStatFeature("b10", "return_n_uv", videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_channel_recommend_exp_feature_20250212"), featMap);
-        oneTypeStatFeature("b11", "return_n_uv", videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_festive_recommend_exp_feature_20250212"), featMap);
-        getRateStatFeature("b12", "", dayPeriods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_long_period_recommend_exp_feature_20250212"), featMap);
-        oneTypeStatFeature("b13", "return_n_uv", videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_video_unionid_recommend_exp_feature_20250212"), featMap);
+        oneTypeStatFeature("b1", "return_1_uv", videoHourPeriods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_global_feature_20250212"), featMap);
+        oneTypeStatFeature("b2", "return_n_uv", videoHourPeriods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_recommend_exp_feature_20250212"), featMap);
+        oneTypeStatFeature("b3", "return_n_uv", videoHourPeriods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_recommend_flowpool_exp_feature_20250212"), featMap);
+        oneTypeStatFeature("b4", "return_n_uv", videoHourPeriods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_apptype_recommend_exp_feature_20250212"), featMap);
+        oneTypeStatFeature("b5", "return_n_uv", videoHourPeriods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_province_recommend_exp_feature_20250212"), featMap);
+        oneTypeStatFeature("b6", "return_n_uv", videoHourPeriods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_brand_recommend_exp_feature_20250212"), featMap);
+        oneTypeStatFeature("b7", "return_n_uv", videoHourPeriods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_hotsencetype_recommend_exp_feature_20250212"), featMap);
+        oneTypeStatFeature("b8", "return_n_uv", videoHourPeriods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_merge_cate1_recommend_exp_feature_20250212"), featMap);
+        oneTypeStatFeature("b9", "return_n_uv", videoHourPeriods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_merge_cate2_recommend_exp_feature_20250212"), featMap);
+        oneTypeStatFeature("b10", "return_n_uv", videoHourPeriods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_channel_recommend_exp_feature_20250212"), featMap);
+        oneTypeStatFeature("b11", "return_n_uv", videoHourPeriods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_festive_recommend_exp_feature_20250212"), featMap);
+        //getRateStatFeature("b12", "", dayPeriods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_long_period_recommend_exp_feature_20250212"), featMap);
+        oneTypeStatFeature("b13", "return_n_uv", videoHourPeriods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_video_unionid_recommend_exp_feature_20250212"), featMap);
 
         // head video cf
         headVideoCFD1Feature("d1", videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("scene_type_vid_cf_feature_20250212"), featMap);
@@ -133,7 +134,7 @@ public class FeatureTransform {
         featMap.put(prefix + "@bit_rate", Double.parseDouble(videoInfo.getOrDefault("bit_rate", "0")));
 
         // cate
-        getVideoCateFeature(prefix, videoInfo, featMap);
+        // getVideoCateFeature(prefix, videoInfo, featMap);
         if (videoInfo.containsKey("title")) {
             int id = FeatureUtils.judgeVideoTimeType(videoInfo.get("title"));
             if (id > 0) {
@@ -199,7 +200,7 @@ public class FeatureTransform {
                         }
                         if (null != hVideoMap && hVideoMap.containsKey(vid)) {
                             Map<String, String> hVideo = hVideoMap.get(vid);
-                            getVideoCateFeature(baseKey, hVideo, featMap);
+                            //getVideoCateFeature(baseKey, hVideo, featMap);
                             getTwoVideoCrossFeature(baseKey, hVideoSimAttrs, hVideo, rankVideo, featMap);
                         }
                     }
@@ -347,11 +348,11 @@ public class FeatureTransform {
         }
     }
 
-    private static void oneTypeStatFeature(String prefix, String uvPrefix, Map<String, String> infoMap, Map<String, Double> featMap) {
+    private static void oneTypeStatFeature(String prefix, String uvPrefix, List<String> periods, Map<String, String> infoMap, Map<String, Double> featMap) {
         if (null == infoMap || infoMap.isEmpty()) {
             return;
         }
-        for (String period : hourPeriods) {
+        for (String period : periods) {
             double exp = getOneInfo("exp_" + period, infoMap);
             if (!FeatureUtils.greaterThanZero(exp)) {
                 continue;