|
@@ -1,6 +1,7 @@
|
|
package com.aliyun.odps.spark.examples.myUtils;
|
|
package com.aliyun.odps.spark.examples.myUtils;
|
|
|
|
|
|
import examples.utils.SimilarityUtils;
|
|
import examples.utils.SimilarityUtils;
|
|
|
|
+import org.apache.commons.lang3.StringUtils;
|
|
|
|
|
|
import java.util.ArrayList;
|
|
import java.util.ArrayList;
|
|
import java.util.HashMap;
|
|
import java.util.HashMap;
|
|
@@ -14,9 +15,6 @@ public class FeatureUtils {
|
|
public static final String cate2Attr = "cate2";
|
|
public static final String cate2Attr = "cate2";
|
|
public static final String festive1Attr = "festive_label1";
|
|
public static final String festive1Attr = "festive_label1";
|
|
public static final String channelAttr = "channel";
|
|
public static final String channelAttr = "channel";
|
|
- private static final Map<String, Integer> cate1MAP = new HashMap<>();
|
|
|
|
- private static final Map<String, Integer> cate2MAP = new HashMap<>();
|
|
|
|
- private static final Map<String, Integer> festive1Map = new HashMap<>();
|
|
|
|
public static final double twoMonthMs = 2 * 30 * 24 * 3600 * 1000.0;
|
|
public static final double twoMonthMs = 2 * 30 * 24 * 3600 * 1000.0;
|
|
|
|
|
|
private static final String goodMorningRegex = "(早安|早上好|早晨好|上午好)";
|
|
private static final String goodMorningRegex = "(早安|早上好|早晨好|上午好)";
|
|
@@ -25,70 +23,18 @@ public class FeatureUtils {
|
|
private static final Pattern goodMorningPattern = Pattern.compile(goodMorningRegex);
|
|
private static final Pattern goodMorningPattern = Pattern.compile(goodMorningRegex);
|
|
private static final Pattern goodAfternoonPattern = Pattern.compile(goodAfternoonRegex);
|
|
private static final Pattern goodAfternoonPattern = Pattern.compile(goodAfternoonRegex);
|
|
private static final Pattern goodEveningPattern = Pattern.compile(goodEveningRegex);
|
|
private static final Pattern goodEveningPattern = Pattern.compile(goodEveningRegex);
|
|
|
|
+ private static final Map<String, String[]> userChannelPrefixMap = new HashMap<>();
|
|
|
|
|
|
static {
|
|
static {
|
|
- cate1MAP.put("情感", 1);
|
|
|
|
- cate1MAP.put("音乐", 2);
|
|
|
|
- cate1MAP.put("搞笑", 3);
|
|
|
|
- cate1MAP.put("生活记录", 4);
|
|
|
|
- cate1MAP.put("医疗健康", 5);
|
|
|
|
- cate1MAP.put("文化", 6);
|
|
|
|
- cate1MAP.put("旅行", 7);
|
|
|
|
- cate1MAP.put("美食", 8);
|
|
|
|
- cate1MAP.put("历史", 9);
|
|
|
|
- cate1MAP.put("科普", 10);
|
|
|
|
- cate1MAP.put("艺术", 11);
|
|
|
|
- cate1MAP.put("综艺", 12);
|
|
|
|
- cate1MAP.put("时政", 13);
|
|
|
|
- cate1MAP.put("三农", 14);
|
|
|
|
- cate1MAP.put("舞蹈", 15);
|
|
|
|
-
|
|
|
|
- cate2MAP.put("人生忠告", 1);
|
|
|
|
- cate2MAP.put("知识科普", 2);
|
|
|
|
- cate2MAP.put("祝福音乐", 3);
|
|
|
|
- cate2MAP.put("生活小妙招", 4);
|
|
|
|
- cate2MAP.put("节日祝福", 5);
|
|
|
|
- cate2MAP.put("搞笑段子", 6);
|
|
|
|
- cate2MAP.put("怀念时光", 7);
|
|
|
|
- cate2MAP.put("早中晚好", 8);
|
|
|
|
- cate2MAP.put("风景实拍", 9);
|
|
|
|
- cate2MAP.put("动物萌宠", 10);
|
|
|
|
- cate2MAP.put("美食教程", 11);
|
|
|
|
- cate2MAP.put("国家力量", 12);
|
|
|
|
- cate2MAP.put("中国历史影像", 13);
|
|
|
|
- cate2MAP.put("杂技柔术", 14);
|
|
|
|
- cate2MAP.put("健康知识", 15);
|
|
|
|
- cate2MAP.put("传统文化", 16);
|
|
|
|
- cate2MAP.put("长寿知识", 17);
|
|
|
|
- cate2MAP.put("社会风气", 18);
|
|
|
|
- cate2MAP.put("大型集体艺术", 19);
|
|
|
|
- cate2MAP.put("省份城市亮点", 20);
|
|
|
|
- cate2MAP.put("民生政策", 21);
|
|
|
|
- cate2MAP.put("健身操", 22);
|
|
|
|
- cate2MAP.put("亲情音乐", 23);
|
|
|
|
- cate2MAP.put("天气变化", 24);
|
|
|
|
- cate2MAP.put("人财诈骗", 25);
|
|
|
|
- cate2MAP.put("正能量剧情", 26);
|
|
|
|
-
|
|
|
|
- festive1Map.put("节假日", 1);
|
|
|
|
- festive1Map.put("问候语", 2);
|
|
|
|
- festive1Map.put("祝福语", 3);
|
|
|
|
- festive1Map.put("节气", 4);
|
|
|
|
- festive1Map.put("热点", 5);
|
|
|
|
- festive1Map.put("人物", 6);
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- public static int getAttrId(String cate, String value) {
|
|
|
|
- switch (cate) {
|
|
|
|
- case cate1Attr:
|
|
|
|
- return cate1MAP.getOrDefault(value, 0);
|
|
|
|
- case cate2Attr:
|
|
|
|
- return cate2MAP.getOrDefault(value, 0);
|
|
|
|
- case festive1Attr:
|
|
|
|
- return festive1Map.getOrDefault(value, 0);
|
|
|
|
- default:
|
|
|
|
- return 0;
|
|
|
|
- }
|
|
|
|
|
|
+ // 公众号买号 & 公众号代运营-Daily, 需要二次判断
|
|
|
|
+ userChannelPrefixMap.put("公众号mix", new String[]{"longArticles_"});
|
|
|
|
+ userChannelPrefixMap.put("公众号代运营-即转", new String[]{"dyyjs_"});
|
|
|
|
+ userChannelPrefixMap.put("小程序投流", new String[]{"touliu_tencent_"});
|
|
|
|
+ userChannelPrefixMap.put("公众号投流", new String[]{"touliu_tencentgzh_", "touliu_tencentGzhArticle_", "GzhTouLiu_Articles_gh"});
|
|
|
|
+ userChannelPrefixMap.put("测-企微投放", new String[]{"touliu_tencentqw_", "WeCom_"});
|
|
|
|
+ userChannelPrefixMap.put("测-企微合作", new String[]{"touliu_tencentwbqw_", "dyyqw_"});
|
|
|
|
+ userChannelPrefixMap.put("停-公众号合作", new String[]{"gzhhz_"});
|
|
|
|
+ userChannelPrefixMap.put("测-公众号完全代投放", new String[]{"daitou_tencentgzh", "DaiTou_gh"});
|
|
}
|
|
}
|
|
|
|
|
|
public static int judgeVideoTimeType(String s) {
|
|
public static int judgeVideoTimeType(String s) {
|
|
@@ -117,13 +63,6 @@ public class FeatureUtils {
|
|
return diff;
|
|
return diff;
|
|
}
|
|
}
|
|
|
|
|
|
- public static double log1(double data) {
|
|
|
|
- if (data <= 0) {
|
|
|
|
- return 0D;
|
|
|
|
- }
|
|
|
|
- return Math.log(data + 1.0);
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
public static double log1(double data, double scale) {
|
|
public static double log1(double data, double scale) {
|
|
if (data <= 0) {
|
|
if (data <= 0) {
|
|
return 0D;
|
|
return 0D;
|
|
@@ -184,4 +123,40 @@ public class FeatureUtils {
|
|
Double[] result = {(double) d1, d3, d4};
|
|
Double[] result = {(double) d1, d3, d4};
|
|
return result;
|
|
return result;
|
|
}
|
|
}
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ public static String getUserChannel(String rootSourceId, String groupName) {
|
|
|
|
+ String channel = getUserChannel(rootSourceId);
|
|
|
|
+ if (channel.equals("公众号mix")) {
|
|
|
|
+ if (null != groupName && groupName.equals("公众号买号")) {
|
|
|
|
+ return "公众号买号";
|
|
|
|
+ } else {
|
|
|
|
+ return "公众号代运营-Daily";
|
|
|
|
+ }
|
|
|
|
+ } else {
|
|
|
|
+ return channel;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ public static String getUserChannel(String rootSourceId) {
|
|
|
|
+ if (null != rootSourceId && !rootSourceId.isEmpty()) {
|
|
|
|
+ for (Map.Entry<String, String[]> entry : userChannelPrefixMap.entrySet()) {
|
|
|
|
+ String channel = entry.getKey();
|
|
|
|
+ String[] prefixArray = entry.getValue();
|
|
|
|
+ if (StringUtils.startsWithAny(rootSourceId, prefixArray)) {
|
|
|
|
+ return channel;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ return "";
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ public static String getUserLevel(String rootSessionId, String subSessionId) {
|
|
|
|
+ if (null != rootSessionId && null != subSessionId && !rootSessionId.isEmpty() && !subSessionId.isEmpty()) {
|
|
|
|
+ if (rootSessionId.equals(subSessionId)) {
|
|
|
|
+ return "1st";
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ return "";
|
|
|
|
+ }
|
|
}
|
|
}
|