Prechádzať zdrojové kódy

扩展用户渠道特征

jch 5 dní pred
rodič
commit
24bfa83ca4

+ 3 - 5
src/main/scala/com/aliyun/odps/spark/examples/myUtils/ConvertV2.java

@@ -17,6 +17,9 @@ public class ConvertV2 {
         String vid = record.get("vid");
         String appType = record.getOrDefault("apptype", "");
         String hotSencetype = record.getOrDefault("hotsencetype", "");
+        Map<String, String> extendMap = ConvertUtils.getRecordCol(record, "extend");
+        record.put("user_channel", FeatureUtils.getUserChannel(record.get("rootsourceid"), extendMap.get("group_name")));
+        record.put("user_level", FeatureUtils.getUserLevel(extendMap.get("rootsessionid"), record.get("subsessionid")));
         Map<String, String> headInfo = ConvertUtils.getRecordCol(record, "v2_feature");
         Map<String, String> rankInfo = ConvertUtils.getRecordCol(record, "v1_feature");
         Map<String, Map<String, String>> userOriginInfo = getUserOriginInfo(record);
@@ -62,7 +65,6 @@ public class ConvertV2 {
     private static Map<String, Map<String, String>> getUserOriginInfo(Map<String, String> record) {
         Map<String, Map<String, String>> map = new HashMap<>();
         map.put("mid_global_feature_20250212", ConvertUtils.getRecordCol(record, "c1_feature"));
-        map.put("mid_u2u_friend_index_feature_20250212", ConvertUtils.getRecordCol(record, "c4_feature"));
         map.put("alg_mid_feature_return_tags", ConvertUtils.getRecordCol(record, "c5_feature"));
         map.put("alg_mid_feature_share_tags", ConvertUtils.getRecordCol(record, "c6_feature"));
         map.put("alg_mid_feature_sharecf", ConvertUtils.getRecordCol(record, "c7_feature"));
@@ -85,14 +87,10 @@ public class ConvertV2 {
         map.put("alg_merge_cate2_recommend_exp_feature_20250212", ConvertUtils.getRecordCol(record, "b9_feature"));
         map.put("alg_channel_recommend_exp_feature_20250212", ConvertUtils.getRecordCol(record, "b10_feature"));
         map.put("alg_festive_recommend_exp_feature_20250212", ConvertUtils.getRecordCol(record, "b11_feature"));
-        //map.put("alg_vid_long_period_recommend_exp_feature_20250212", ConvertUtils.getRecordCol(record, "b12_feature"));
         map.put("alg_video_unionid_recommend_exp_feature_20250212", ConvertUtils.getRecordCol(record, "b13_feature"));
         map.put("scene_type_vid_cf_feature_20250212", ConvertUtils.getRecordCol(record, "d1_feature"));
         map.put("vid_click_cf_feature_20250212", ConvertUtils.getRecordCol(record, "d2_feature"));
         map.put("alg_recsys_feature_cf_i2i_v2", ConvertUtils.getRecordCol(record, "d3_feature"));
-        // 特征
-        map.put("mid_merge_cate1_feature_20250212", ConvertUtils.getRecordCol(record, "c2_feature"));
-        map.put("mid_merge_cate2_feature_20250212", ConvertUtils.getRecordCol(record, "c3_feature"));
 
         Map<String, Map<String, Map<String, String>>> allMap = new HashMap<>();
         String vid = record.get("vid");

+ 1 - 1
src/main/scala/com/aliyun/odps/spark/examples/myUtils/FeatureTransformV2.java

@@ -27,7 +27,7 @@ public class FeatureTransformV2 {
     private static final List<String> videoSimAttrs = Arrays.asList("title", "cate2", "cate2_list", "keywords");
     private static final List<String> hVideoSimAttrs = Arrays.asList("title");
     private static final List<String> cfList = Arrays.asList("share", "return");
-    private static final List<String> userAttrList = Arrays.asList("province", "city", "model", "brand", "system");
+    private static final List<String> userAttrList = Arrays.asList("province", "city", "model", "brand", "system", "user_channel", "user_level");
     private static final Set<String> hotSceneSet = new HashSet<>(Arrays.asList("1008", "1007", "1058", "1074", "1010"));
 
     public static void getContextFeature(long currentMs, String appType, String hotSceneType, Map<String, Double> featureMap) {

+ 47 - 72
src/main/scala/com/aliyun/odps/spark/examples/myUtils/FeatureUtils.java

@@ -1,6 +1,7 @@
 package com.aliyun.odps.spark.examples.myUtils;
 
 import examples.utils.SimilarityUtils;
+import org.apache.commons.lang3.StringUtils;
 
 import java.util.ArrayList;
 import java.util.HashMap;
@@ -14,9 +15,6 @@ public class FeatureUtils {
     public static final String cate2Attr = "cate2";
     public static final String festive1Attr = "festive_label1";
     public static final String channelAttr = "channel";
-    private static final Map<String, Integer> cate1MAP = new HashMap<>();
-    private static final Map<String, Integer> cate2MAP = new HashMap<>();
-    private static final Map<String, Integer> festive1Map = new HashMap<>();
     public static final double twoMonthMs = 2 * 30 * 24 * 3600 * 1000.0;
 
     private static final String goodMorningRegex = "(早安|早上好|早晨好|上午好)";
@@ -25,70 +23,18 @@ public class FeatureUtils {
     private static final Pattern goodMorningPattern = Pattern.compile(goodMorningRegex);
     private static final Pattern goodAfternoonPattern = Pattern.compile(goodAfternoonRegex);
     private static final Pattern goodEveningPattern = Pattern.compile(goodEveningRegex);
+    private static final Map<String, String[]> userChannelPrefixMap = new HashMap<>();
 
     static {
-        cate1MAP.put("情感", 1);
-        cate1MAP.put("音乐", 2);
-        cate1MAP.put("搞笑", 3);
-        cate1MAP.put("生活记录", 4);
-        cate1MAP.put("医疗健康", 5);
-        cate1MAP.put("文化", 6);
-        cate1MAP.put("旅行", 7);
-        cate1MAP.put("美食", 8);
-        cate1MAP.put("历史", 9);
-        cate1MAP.put("科普", 10);
-        cate1MAP.put("艺术", 11);
-        cate1MAP.put("综艺", 12);
-        cate1MAP.put("时政", 13);
-        cate1MAP.put("三农", 14);
-        cate1MAP.put("舞蹈", 15);
-
-        cate2MAP.put("人生忠告", 1);
-        cate2MAP.put("知识科普", 2);
-        cate2MAP.put("祝福音乐", 3);
-        cate2MAP.put("生活小妙招", 4);
-        cate2MAP.put("节日祝福", 5);
-        cate2MAP.put("搞笑段子", 6);
-        cate2MAP.put("怀念时光", 7);
-        cate2MAP.put("早中晚好", 8);
-        cate2MAP.put("风景实拍", 9);
-        cate2MAP.put("动物萌宠", 10);
-        cate2MAP.put("美食教程", 11);
-        cate2MAP.put("国家力量", 12);
-        cate2MAP.put("中国历史影像", 13);
-        cate2MAP.put("杂技柔术", 14);
-        cate2MAP.put("健康知识", 15);
-        cate2MAP.put("传统文化", 16);
-        cate2MAP.put("长寿知识", 17);
-        cate2MAP.put("社会风气", 18);
-        cate2MAP.put("大型集体艺术", 19);
-        cate2MAP.put("省份城市亮点", 20);
-        cate2MAP.put("民生政策", 21);
-        cate2MAP.put("健身操", 22);
-        cate2MAP.put("亲情音乐", 23);
-        cate2MAP.put("天气变化", 24);
-        cate2MAP.put("人财诈骗", 25);
-        cate2MAP.put("正能量剧情", 26);
-
-        festive1Map.put("节假日", 1);
-        festive1Map.put("问候语", 2);
-        festive1Map.put("祝福语", 3);
-        festive1Map.put("节气", 4);
-        festive1Map.put("热点", 5);
-        festive1Map.put("人物", 6);
-    }
-
-    public static int getAttrId(String cate, String value) {
-        switch (cate) {
-            case cate1Attr:
-                return cate1MAP.getOrDefault(value, 0);
-            case cate2Attr:
-                return cate2MAP.getOrDefault(value, 0);
-            case festive1Attr:
-                return festive1Map.getOrDefault(value, 0);
-            default:
-                return 0;
-        }
+        // 公众号买号 & 公众号代运营-Daily, 需要二次判断
+        userChannelPrefixMap.put("公众号mix", new String[]{"longArticles_"});
+        userChannelPrefixMap.put("公众号代运营-即转", new String[]{"dyyjs_"});
+        userChannelPrefixMap.put("小程序投流", new String[]{"touliu_tencent_"});
+        userChannelPrefixMap.put("公众号投流", new String[]{"touliu_tencentgzh_", "touliu_tencentGzhArticle_", "GzhTouLiu_Articles_gh"});
+        userChannelPrefixMap.put("测-企微投放", new String[]{"touliu_tencentqw_", "WeCom_"});
+        userChannelPrefixMap.put("测-企微合作", new String[]{"touliu_tencentwbqw_", "dyyqw_"});
+        userChannelPrefixMap.put("停-公众号合作", new String[]{"gzhhz_"});
+        userChannelPrefixMap.put("测-公众号完全代投放", new String[]{"daitou_tencentgzh", "DaiTou_gh"});
     }
 
     public static int judgeVideoTimeType(String s) {
@@ -117,13 +63,6 @@ public class FeatureUtils {
         return diff;
     }
 
-    public static double log1(double data) {
-        if (data <= 0) {
-            return 0D;
-        }
-        return Math.log(data + 1.0);
-    }
-
     public static double log1(double data, double scale) {
         if (data <= 0) {
             return 0D;
@@ -184,4 +123,40 @@ public class FeatureUtils {
         Double[] result = {(double) d1, d3, d4};
         return result;
     }
+
+
+    public static String getUserChannel(String rootSourceId, String groupName) {
+        String channel = getUserChannel(rootSourceId);
+        if (channel.equals("公众号mix")) {
+            if (null != groupName && groupName.equals("公众号买号")) {
+                return "公众号买号";
+            } else {
+                return "公众号代运营-Daily";
+            }
+        } else {
+            return channel;
+        }
+    }
+
+    public static String getUserChannel(String rootSourceId) {
+        if (null != rootSourceId && !rootSourceId.isEmpty()) {
+            for (Map.Entry<String, String[]> entry : userChannelPrefixMap.entrySet()) {
+                String channel = entry.getKey();
+                String[] prefixArray = entry.getValue();
+                if (StringUtils.startsWithAny(rootSourceId, prefixArray)) {
+                    return channel;
+                }
+            }
+        }
+        return "";
+    }
+
+    public static String getUserLevel(String rootSessionId, String subSessionId) {
+        if (null != rootSessionId && null != subSessionId && !rootSessionId.isEmpty() && !subSessionId.isEmpty()) {
+            if (rootSessionId.equals(subSessionId)) {
+                return "1st";
+            }
+        }
+        return "";
+    }
 }