|  | @@ -1,6 +1,7 @@
 | 
	
		
			
				|  |  |  package com.aliyun.odps.spark.examples.myUtils;
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  import examples.utils.SimilarityUtils;
 | 
	
		
			
				|  |  | +import org.apache.commons.lang3.StringUtils;
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  import java.util.ArrayList;
 | 
	
		
			
				|  |  |  import java.util.HashMap;
 | 
	
	
		
			
				|  | @@ -14,9 +15,6 @@ public class FeatureUtils {
 | 
	
		
			
				|  |  |      public static final String cate2Attr = "cate2";
 | 
	
		
			
				|  |  |      public static final String festive1Attr = "festive_label1";
 | 
	
		
			
				|  |  |      public static final String channelAttr = "channel";
 | 
	
		
			
				|  |  | -    private static final Map<String, Integer> cate1MAP = new HashMap<>();
 | 
	
		
			
				|  |  | -    private static final Map<String, Integer> cate2MAP = new HashMap<>();
 | 
	
		
			
				|  |  | -    private static final Map<String, Integer> festive1Map = new HashMap<>();
 | 
	
		
			
				|  |  |      public static final double twoMonthMs = 2 * 30 * 24 * 3600 * 1000.0;
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      private static final String goodMorningRegex = "(早安|早上好|早晨好|上午好)";
 | 
	
	
		
			
				|  | @@ -25,70 +23,18 @@ public class FeatureUtils {
 | 
	
		
			
				|  |  |      private static final Pattern goodMorningPattern = Pattern.compile(goodMorningRegex);
 | 
	
		
			
				|  |  |      private static final Pattern goodAfternoonPattern = Pattern.compile(goodAfternoonRegex);
 | 
	
		
			
				|  |  |      private static final Pattern goodEveningPattern = Pattern.compile(goodEveningRegex);
 | 
	
		
			
				|  |  | +    private static final Map<String, String[]> userChannelPrefixMap = new HashMap<>();
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      static {
 | 
	
		
			
				|  |  | -        cate1MAP.put("情感", 1);
 | 
	
		
			
				|  |  | -        cate1MAP.put("音乐", 2);
 | 
	
		
			
				|  |  | -        cate1MAP.put("搞笑", 3);
 | 
	
		
			
				|  |  | -        cate1MAP.put("生活记录", 4);
 | 
	
		
			
				|  |  | -        cate1MAP.put("医疗健康", 5);
 | 
	
		
			
				|  |  | -        cate1MAP.put("文化", 6);
 | 
	
		
			
				|  |  | -        cate1MAP.put("旅行", 7);
 | 
	
		
			
				|  |  | -        cate1MAP.put("美食", 8);
 | 
	
		
			
				|  |  | -        cate1MAP.put("历史", 9);
 | 
	
		
			
				|  |  | -        cate1MAP.put("科普", 10);
 | 
	
		
			
				|  |  | -        cate1MAP.put("艺术", 11);
 | 
	
		
			
				|  |  | -        cate1MAP.put("综艺", 12);
 | 
	
		
			
				|  |  | -        cate1MAP.put("时政", 13);
 | 
	
		
			
				|  |  | -        cate1MAP.put("三农", 14);
 | 
	
		
			
				|  |  | -        cate1MAP.put("舞蹈", 15);
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -        cate2MAP.put("人生忠告", 1);
 | 
	
		
			
				|  |  | -        cate2MAP.put("知识科普", 2);
 | 
	
		
			
				|  |  | -        cate2MAP.put("祝福音乐", 3);
 | 
	
		
			
				|  |  | -        cate2MAP.put("生活小妙招", 4);
 | 
	
		
			
				|  |  | -        cate2MAP.put("节日祝福", 5);
 | 
	
		
			
				|  |  | -        cate2MAP.put("搞笑段子", 6);
 | 
	
		
			
				|  |  | -        cate2MAP.put("怀念时光", 7);
 | 
	
		
			
				|  |  | -        cate2MAP.put("早中晚好", 8);
 | 
	
		
			
				|  |  | -        cate2MAP.put("风景实拍", 9);
 | 
	
		
			
				|  |  | -        cate2MAP.put("动物萌宠", 10);
 | 
	
		
			
				|  |  | -        cate2MAP.put("美食教程", 11);
 | 
	
		
			
				|  |  | -        cate2MAP.put("国家力量", 12);
 | 
	
		
			
				|  |  | -        cate2MAP.put("中国历史影像", 13);
 | 
	
		
			
				|  |  | -        cate2MAP.put("杂技柔术", 14);
 | 
	
		
			
				|  |  | -        cate2MAP.put("健康知识", 15);
 | 
	
		
			
				|  |  | -        cate2MAP.put("传统文化", 16);
 | 
	
		
			
				|  |  | -        cate2MAP.put("长寿知识", 17);
 | 
	
		
			
				|  |  | -        cate2MAP.put("社会风气", 18);
 | 
	
		
			
				|  |  | -        cate2MAP.put("大型集体艺术", 19);
 | 
	
		
			
				|  |  | -        cate2MAP.put("省份城市亮点", 20);
 | 
	
		
			
				|  |  | -        cate2MAP.put("民生政策", 21);
 | 
	
		
			
				|  |  | -        cate2MAP.put("健身操", 22);
 | 
	
		
			
				|  |  | -        cate2MAP.put("亲情音乐", 23);
 | 
	
		
			
				|  |  | -        cate2MAP.put("天气变化", 24);
 | 
	
		
			
				|  |  | -        cate2MAP.put("人财诈骗", 25);
 | 
	
		
			
				|  |  | -        cate2MAP.put("正能量剧情", 26);
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -        festive1Map.put("节假日", 1);
 | 
	
		
			
				|  |  | -        festive1Map.put("问候语", 2);
 | 
	
		
			
				|  |  | -        festive1Map.put("祝福语", 3);
 | 
	
		
			
				|  |  | -        festive1Map.put("节气", 4);
 | 
	
		
			
				|  |  | -        festive1Map.put("热点", 5);
 | 
	
		
			
				|  |  | -        festive1Map.put("人物", 6);
 | 
	
		
			
				|  |  | -    }
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    public static int getAttrId(String cate, String value) {
 | 
	
		
			
				|  |  | -        switch (cate) {
 | 
	
		
			
				|  |  | -            case cate1Attr:
 | 
	
		
			
				|  |  | -                return cate1MAP.getOrDefault(value, 0);
 | 
	
		
			
				|  |  | -            case cate2Attr:
 | 
	
		
			
				|  |  | -                return cate2MAP.getOrDefault(value, 0);
 | 
	
		
			
				|  |  | -            case festive1Attr:
 | 
	
		
			
				|  |  | -                return festive1Map.getOrDefault(value, 0);
 | 
	
		
			
				|  |  | -            default:
 | 
	
		
			
				|  |  | -                return 0;
 | 
	
		
			
				|  |  | -        }
 | 
	
		
			
				|  |  | +        // 公众号买号 & 公众号代运营-Daily, 需要二次判断
 | 
	
		
			
				|  |  | +        userChannelPrefixMap.put("公众号mix", new String[]{"longArticles_"});
 | 
	
		
			
				|  |  | +        userChannelPrefixMap.put("公众号代运营-即转", new String[]{"dyyjs_"});
 | 
	
		
			
				|  |  | +        userChannelPrefixMap.put("小程序投流", new String[]{"touliu_tencent_"});
 | 
	
		
			
				|  |  | +        userChannelPrefixMap.put("公众号投流", new String[]{"touliu_tencentgzh_", "touliu_tencentGzhArticle_", "GzhTouLiu_Articles_gh"});
 | 
	
		
			
				|  |  | +        userChannelPrefixMap.put("测-企微投放", new String[]{"touliu_tencentqw_", "WeCom_"});
 | 
	
		
			
				|  |  | +        userChannelPrefixMap.put("测-企微合作", new String[]{"touliu_tencentwbqw_", "dyyqw_"});
 | 
	
		
			
				|  |  | +        userChannelPrefixMap.put("停-公众号合作", new String[]{"gzhhz_"});
 | 
	
		
			
				|  |  | +        userChannelPrefixMap.put("测-公众号完全代投放", new String[]{"daitou_tencentgzh", "DaiTou_gh"});
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      public static int judgeVideoTimeType(String s) {
 | 
	
	
		
			
				|  | @@ -117,13 +63,6 @@ public class FeatureUtils {
 | 
	
		
			
				|  |  |          return diff;
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    public static double log1(double data) {
 | 
	
		
			
				|  |  | -        if (data <= 0) {
 | 
	
		
			
				|  |  | -            return 0D;
 | 
	
		
			
				|  |  | -        }
 | 
	
		
			
				|  |  | -        return Math.log(data + 1.0);
 | 
	
		
			
				|  |  | -    }
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  |      public static double log1(double data, double scale) {
 | 
	
		
			
				|  |  |          if (data <= 0) {
 | 
	
		
			
				|  |  |              return 0D;
 | 
	
	
		
			
				|  | @@ -184,4 +123,40 @@ public class FeatureUtils {
 | 
	
		
			
				|  |  |          Double[] result = {(double) d1, d3, d4};
 | 
	
		
			
				|  |  |          return result;
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    public static String getUserChannel(String rootSourceId, String groupName) {
 | 
	
		
			
				|  |  | +        String channel = getUserChannel(rootSourceId);
 | 
	
		
			
				|  |  | +        if (channel.equals("公众号mix")) {
 | 
	
		
			
				|  |  | +            if (null != groupName && groupName.equals("公众号买号")) {
 | 
	
		
			
				|  |  | +                return "公众号买号";
 | 
	
		
			
				|  |  | +            } else {
 | 
	
		
			
				|  |  | +                return "公众号代运营-Daily";
 | 
	
		
			
				|  |  | +            }
 | 
	
		
			
				|  |  | +        } else {
 | 
	
		
			
				|  |  | +            return channel;
 | 
	
		
			
				|  |  | +        }
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    public static String getUserChannel(String rootSourceId) {
 | 
	
		
			
				|  |  | +        if (null != rootSourceId && !rootSourceId.isEmpty()) {
 | 
	
		
			
				|  |  | +            for (Map.Entry<String, String[]> entry : userChannelPrefixMap.entrySet()) {
 | 
	
		
			
				|  |  | +                String channel = entry.getKey();
 | 
	
		
			
				|  |  | +                String[] prefixArray = entry.getValue();
 | 
	
		
			
				|  |  | +                if (StringUtils.startsWithAny(rootSourceId, prefixArray)) {
 | 
	
		
			
				|  |  | +                    return channel;
 | 
	
		
			
				|  |  | +                }
 | 
	
		
			
				|  |  | +            }
 | 
	
		
			
				|  |  | +        }
 | 
	
		
			
				|  |  | +        return "";
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    public static String getUserLevel(String rootSessionId, String subSessionId) {
 | 
	
		
			
				|  |  | +        if (null != rootSessionId && null != subSessionId && !rootSessionId.isEmpty() && !subSessionId.isEmpty()) {
 | 
	
		
			
				|  |  | +            if (rootSessionId.equals(subSessionId)) {
 | 
	
		
			
				|  |  | +                return "1st";
 | 
	
		
			
				|  |  | +            }
 | 
	
		
			
				|  |  | +        }
 | 
	
		
			
				|  |  | +        return "";
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  |  }
 |