|
@@ -0,0 +1,303 @@
|
|
|
+package com.tzld.piaoquan.recommend.server.util;
|
|
|
+
|
|
|
+import com.tzld.piaoquan.recommend.server.service.rank.extractor.ExtractorUtils;
|
|
|
+import org.apache.commons.lang3.StringUtils;
|
|
|
+
|
|
|
+import java.time.Instant;
|
|
|
+import java.time.LocalDateTime;
|
|
|
+import java.time.ZoneId;
|
|
|
+import java.time.format.DateTimeFormatter;
|
|
|
+import java.util.Arrays;
|
|
|
+import java.util.HashMap;
|
|
|
+import java.util.List;
|
|
|
+import java.util.Map;
|
|
|
+
|
|
|
+public class ExtractFeature20250218 {
|
|
|
+
|
|
|
+ private ExtractFeature20250218() {
|
|
|
+ }
|
|
|
+
|
|
|
+ public static void handleB1(Map<String, Object> b1Feature, Map<String, Object> featureMap) {
|
|
|
+ List<String> times = Arrays.asList("1h", "3h", "6h", "12h", "24h", "72h", "168h");
|
|
|
+ List<String> indexList = Arrays.asList("is_share", "share_cnt", "is_return_1", "return_1_uv", "str_one", "ros_one", "str", "ros", "str_plus", "ros_minus", "rovn");
|
|
|
+ for (String time : times) {
|
|
|
+ for (String index : indexList) {
|
|
|
+ double value = Double.parseDouble(b1Feature.getOrDefault(index + "_" + time, "0").toString());
|
|
|
+ featureMap.put("b1_" + index + "_" + time, value);
|
|
|
+ }
|
|
|
+
|
|
|
+ double rovn = Double.parseDouble(b1Feature.getOrDefault("rovn_" + time, "0").toString());
|
|
|
+ double returnNUv = Double.parseDouble(b1Feature.getOrDefault("return_1_uv_" + time, "0").toString());
|
|
|
+
|
|
|
+ featureMap.put("b1_rovn*log(r)_" + time, rovn * ExtractorUtils.calLog(returnNUv));
|
|
|
+ }
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+ public static void handleB2ToB11AndB13(Map<String, Map<String, Object>> videoFeature, Map<String, Object> featureMap) {
|
|
|
+ List<String> times = Arrays.asList("1h", "3h", "6h", "12h", "24h", "72h", "168h");
|
|
|
+ List<String> indexList = Arrays.asList("is_share", "share_cnt", "is_return_1", "return_n_uv", "str_one", "ros_one", "str", "ros", "str_plus", "ros_minus", "rovn");
|
|
|
+ for (Map.Entry<String, Map<String, Object>> entry : videoFeature.entrySet()) {
|
|
|
+ String key = entry.getKey();
|
|
|
+ Map<String, Object> feature = entry.getValue();
|
|
|
+ for (String time : times) {
|
|
|
+ for (String index : indexList) {
|
|
|
+ double value = Double.parseDouble(feature.getOrDefault(index + "_" + time, "0").toString());
|
|
|
+ featureMap.put(key + "_" + index + "_" + time, value);
|
|
|
+ }
|
|
|
+
|
|
|
+ double rovn = Double.parseDouble(feature.getOrDefault("rovn_" + time, "0").toString());
|
|
|
+ double returnNUv = Double.parseDouble(feature.getOrDefault("return_n_uv_" + time, "0").toString());
|
|
|
+
|
|
|
+ featureMap.put(key + "_rovn*log(r)_" + time, rovn * ExtractorUtils.calLog(returnNUv));
|
|
|
+ }
|
|
|
+
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ public static void handleB12(Map<String, Object> b12Feature, Map<String, Object> featureMap) {
|
|
|
+ List<String> times = Arrays.asList("7d", "14d", "30d", "60d");
|
|
|
+ List<String> indexList = Arrays.asList("is_share", "share_cnt", "is_return_1", "return_n_uv", "str_one", "ros_one", "str", "ros", "str_plus", "ros_minus", "rovn");
|
|
|
+ for (String time : times) {
|
|
|
+ for (String index : indexList) {
|
|
|
+ double value = Double.parseDouble(b12Feature.getOrDefault(index + "_" + time, "0").toString());
|
|
|
+ featureMap.put("b12_" + index + "_" + time, value);
|
|
|
+ }
|
|
|
+ double rovn = Double.parseDouble(b12Feature.getOrDefault("rovn_" + time, "0").toString());
|
|
|
+ double returnNUv = Double.parseDouble(b12Feature.getOrDefault("return_n_uv_" + time, "0").toString());
|
|
|
+ featureMap.put("b12_rovn*log(r)_" + time, rovn * ExtractorUtils.calLog(returnNUv));
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ public static void handleVideoBasicFeature(Map<String, Object> videoFeature, long ts, Map<String, Object> featureMap) {
|
|
|
+ Double totalTime = Double.parseDouble(videoFeature.getOrDefault("total_time", "0").toString());
|
|
|
+ Double width = Double.parseDouble(videoFeature.getOrDefault("width", "0d").toString());
|
|
|
+ Double height = Double.parseDouble(videoFeature.getOrDefault("height", "0d").toString());
|
|
|
+ Double size = Double.parseDouble(videoFeature.getOrDefault("size", "0d").toString());
|
|
|
+ Double bit_rate = Double.parseDouble(videoFeature.getOrDefault("bit_rate", "0d").toString());
|
|
|
+ String festiveLabel1 = videoFeature.getOrDefault("festive_label1", "").toString();
|
|
|
+ String festiveLabel2 = videoFeature.getOrDefault("festive_label2", "").toString();
|
|
|
+
|
|
|
+
|
|
|
+ featureMap.put("total_time", totalTime);
|
|
|
+ featureMap.put("width", width);
|
|
|
+ featureMap.put("height", height);
|
|
|
+ featureMap.put("size", size);
|
|
|
+ featureMap.put("bit_rate", bit_rate);
|
|
|
+ featureMap.put("width/height", ExtractorUtils.divisionDouble(width, height));
|
|
|
+ featureMap.put("is_festive", 0);
|
|
|
+ featureMap.put("is_greeting", 0);
|
|
|
+ if (StringUtils.equals(festiveLabel1, "节假日")) {
|
|
|
+ featureMap.put("is_festive", 1);
|
|
|
+ } else if (StringUtils.equals(festiveLabel1, "问候语")) {
|
|
|
+ featureMap.put("is_greeting", 1);
|
|
|
+ }
|
|
|
+
|
|
|
+ featureMap.put("hour", ExtractorUtils.getHourByTimestamp(ts));
|
|
|
+ featureMap.put("day_of_week", ExtractorUtils.getDayOfWeekByTimestamp(ts));
|
|
|
+
|
|
|
+ long createTs = Long.parseLong(videoFeature.getOrDefault("gmt_create_timestamp", "0").toString()) / 1000;
|
|
|
+ featureMap.put("create_ts_diff", ExtractorUtils.getDaysBetween(createTs, ts));
|
|
|
+
|
|
|
+ String date = LocalDateTime.ofInstant(Instant.ofEpochSecond(ts), ZoneId.systemDefault()).format(DateTimeFormatter.ofPattern("yyyy-MM-dd"));
|
|
|
+ String festiveByDate = FestiveUtil.getFestiveByDate(date);
|
|
|
+ featureMap.put("today_is_fes", 0);
|
|
|
+ featureMap.put("video_fes_eq", 0);
|
|
|
+ if (StringUtils.isNotBlank(festiveByDate)) {
|
|
|
+ featureMap.put("today_is_fes", 1);
|
|
|
+ if (StringUtils.equals(festiveByDate, festiveLabel2)) {
|
|
|
+ featureMap.put("video_today_fes_eq", 1);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+ public static void handleC1(Map<String, Object> c1Feature, Map<String, Object> featureMap) {
|
|
|
+ List<String> times = Arrays.asList("12h", "24h", "72h", "168h");
|
|
|
+ List<String> indexList = Arrays.asList("is_share", "share_cnt", "is_return_1", "return_1_uv", "click", "str_one", "ros_one", "str", "ros", "str_plus", "ros_minus", "rovn");
|
|
|
+ for (String time : times) {
|
|
|
+ for (String index : indexList) {
|
|
|
+ double value = Double.parseDouble(c1Feature.getOrDefault(index + "_" + time, "0").toString());
|
|
|
+ featureMap.put("c1_" + index + "_" + time, value);
|
|
|
+ }
|
|
|
+ double rovn = Double.parseDouble(c1Feature.getOrDefault("rovn_" + time, "0").toString());
|
|
|
+ double returnNUv = Double.parseDouble(c1Feature.getOrDefault("return_1_uv_" + time, "0").toString());
|
|
|
+ featureMap.put("c1_rovn*log(r)_" + time, rovn * ExtractorUtils.calLog(returnNUv));
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ public static void handleC2ToC3(Map<String, Object> c2Feature, Map<String, Object> c3Feature, Map<String, Object> featureMap) {
|
|
|
+ Map<String, Map<String, Object>> featureMaps = new HashMap<>();
|
|
|
+ featureMaps.put("c2", c2Feature);
|
|
|
+ featureMaps.put("c3", c3Feature);
|
|
|
+
|
|
|
+ List<String> times = Arrays.asList("12h", "24h", "72h", "168h");
|
|
|
+ List<String> indexList = Arrays.asList("is_share", "share_cnt", "is_return_1", "return_n_uv", "click");
|
|
|
+
|
|
|
+ for (Map.Entry<String, Map<String, Object>> entry : featureMaps.entrySet()) {
|
|
|
+ String key = entry.getKey();
|
|
|
+ Map<String, Object> feature = entry.getValue();
|
|
|
+ for (String time : times) {
|
|
|
+ for (String index : indexList) {
|
|
|
+ double value = Double.parseDouble(feature.getOrDefault(index + "_" + time, "0").toString());
|
|
|
+ featureMap.put(key + "_" + index + "_" + time, value);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ public static void handleC4(Map<String, Object> c4Feature, Map<String, Object> featureMap) {
|
|
|
+ List<String> times = Arrays.asList("24h", "72h", "168h");
|
|
|
+ List<String> indexList = Arrays.asList("str_one", "ros_one", "str", "ros", "str_plus", "ros_minus", "rovn");
|
|
|
+
|
|
|
+ for (String time : times) {
|
|
|
+ for (String index : indexList) {
|
|
|
+ double value = Double.parseDouble(c4Feature.getOrDefault("avg_" + index + "_" + time, "0").toString());
|
|
|
+ featureMap.put("c4_avg_" + index + "_" + time, value);
|
|
|
+
|
|
|
+ double max = Double.parseDouble(c4Feature.getOrDefault("max_" + index + "_" + time, "0").toString());
|
|
|
+ double min = Double.parseDouble(c4Feature.getOrDefault("min_" + index + "_" + time, "0").toString());
|
|
|
+
|
|
|
+ featureMap.put("c4_diff_" + index + "_" + time, max - min);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+ public static void handleC5ToC6(Map<String, Object> c5Feature, Map<String, Object> c6Feature, Map<String, Object> videoMap, Map<String, Object> featureMap) {
|
|
|
+ Map<String, Map<String, Object>> featureMaps = new HashMap<>();
|
|
|
+ featureMaps.put("c5", c5Feature);
|
|
|
+ featureMaps.put("c6", c6Feature);
|
|
|
+ List<String> times = Arrays.asList("tags_1d", "tags_3d", "tags_7d");
|
|
|
+
|
|
|
+ String title = videoMap.getOrDefault("title", "").toString();
|
|
|
+
|
|
|
+ for (Map.Entry<String, Map<String, Object>> entry : featureMaps.entrySet()) {
|
|
|
+ String key = entry.getKey();
|
|
|
+ Map<String, Object> feature = entry.getValue();
|
|
|
+ for (String time : times) {
|
|
|
+ String tags = feature.getOrDefault(time, "").toString();
|
|
|
+ Double[] scores = ExtractorUtils.funcC34567ForTagsNew(tags, title);
|
|
|
+ featureMap.put(key + "_matchnum" + "_" + time, scores[0]);
|
|
|
+ featureMap.put(key + "_maxscore" + "_" + time, scores[1]);
|
|
|
+ featureMap.put(key + "_avgscore" + "_" + time, scores[2]);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+ public static Map<String, Map<String, String[]>> handleC7ToC8(Map<String, Object> c7Feature, Map<String, Object> c8Feature) {
|
|
|
+ Map<String, Map<String, String[]>> resultMap = new HashMap<>();
|
|
|
+
|
|
|
+ Map<String, Map<String, Object>> featureMaps = new HashMap<>();
|
|
|
+ featureMaps.put("c7", c7Feature);
|
|
|
+ featureMaps.put("c8", c8Feature);
|
|
|
+ List<String> indexList = Arrays.asList("share", "return");
|
|
|
+ for (Map.Entry<String, Map<String, Object>> entry : featureMaps.entrySet()) {
|
|
|
+ String key = entry.getKey();
|
|
|
+ Map<String, Object> feature = entry.getValue();
|
|
|
+ for (String index : indexList) {
|
|
|
+ if (feature.containsKey(index)) {
|
|
|
+ Map<String, String[]> cfMap = new HashMap<>();
|
|
|
+ String[] entries = feature.get(index).toString().split(",");
|
|
|
+ for (String e : entries) {
|
|
|
+ String[] rList = e.split(":");
|
|
|
+ if (rList.length >= 4) {
|
|
|
+ String vid = rList[0];
|
|
|
+ String value1 = rList[1];
|
|
|
+ String value2 = rList[2];
|
|
|
+ String value3 = rList[3];
|
|
|
+ String[] strs = {value1, value2, value3};
|
|
|
+ cfMap.put(vid, strs);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ resultMap.put(key, cfMap);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return resultMap;
|
|
|
+ }
|
|
|
+
|
|
|
+ public static void useC7ToC8(Map<String, Map<String, String[]>> map, String vid, Map<String, Object> featureMap) {
|
|
|
+ if (StringUtils.isBlank(vid)) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ for (String key : Arrays.asList("c6", "c7")) {
|
|
|
+ for (String action : Arrays.asList("share", "return")) {
|
|
|
+ String featureKey = key + "_" + action;
|
|
|
+ if (map.containsKey(featureKey)) {
|
|
|
+ Map<String, String[]> cfMap = map.get(featureKey);
|
|
|
+ String[] scores = cfMap.get(vid);
|
|
|
+ featureMap.put(featureKey + "_score", Double.parseDouble(scores[0]));
|
|
|
+ featureMap.put(featureKey + "_num", Double.parseDouble(scores[1]));
|
|
|
+ featureMap.put(featureKey + "_rank", ExtractorUtils.reciprocal(Double.parseDouble(scores[2])));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ public static void handleD3(Map<String, Object> d3Feature, Map<String, Object> featureMap) {
|
|
|
+ for (String index : Arrays.asList("exp", "return_n", "rovn")) {
|
|
|
+ double value = Double.parseDouble(d3Feature.getOrDefault(index, "0").toString());
|
|
|
+ featureMap.put("d3_" + index, value);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ public static void handleD1(Map<String, Object> d4Feature, Map<String, Object> featureMap) {
|
|
|
+ double rosCfScores = Double.parseDouble(d4Feature.getOrDefault("ros_cf_score", "0").toString());
|
|
|
+ featureMap.put("d1_ros_cf_score", rosCfScores);
|
|
|
+ double rovCfScores = Double.parseDouble(d4Feature.getOrDefault("rov_cf_score", "0").toString());
|
|
|
+ featureMap.put("d1_rov_cf_score", rovCfScores);
|
|
|
+
|
|
|
+ double rosCfRank = Double.parseDouble(d4Feature.getOrDefault("ros_cf_rank", "0").toString());
|
|
|
+ featureMap.put("d1_ros_cf_rank", ExtractorUtils.reciprocal(rosCfRank));
|
|
|
+ double rovCfRank = Double.parseDouble(d4Feature.getOrDefault("rov_cf_rank", "0").toString());
|
|
|
+ featureMap.put("d1_rov_cf_rank", ExtractorUtils.reciprocal(rovCfRank));
|
|
|
+ }
|
|
|
+
|
|
|
+ public static void handleD2(Map<String, Object> d5Feature, Map<String, Object> featureMap) {
|
|
|
+ double score = Double.parseDouble(d5Feature.getOrDefault("score", "0").toString());
|
|
|
+ featureMap.put("d2_score", score);
|
|
|
+
|
|
|
+ double rank = Double.parseDouble(d5Feature.getOrDefault("rank", "0").toString());
|
|
|
+ featureMap.put("d2_rank", ExtractorUtils.reciprocal(rank));
|
|
|
+ }
|
|
|
+
|
|
|
+ public static void handleVideoSimilarity(Map<String, Object> videoFeature, Map<String, Object> headVideoFeature, Map<String, Object> featureMap) {
|
|
|
+ String headVideoTitle = headVideoFeature.getOrDefault("title", "").toString();
|
|
|
+ String headVideoMergeCate2 = headVideoFeature.getOrDefault("merge_second_level_cate", "").toString();
|
|
|
+ String headVideoMergeCate1 = headVideoFeature.getOrDefault("merge_first_level_cate", "").toString();
|
|
|
+ String headVideoFestiveLabel2 = headVideoFeature.getOrDefault("festive_label2", "").toString();
|
|
|
+
|
|
|
+
|
|
|
+ String videoTitle = videoFeature.getOrDefault("title", "").toString();
|
|
|
+ String videoMergeCate2 = videoFeature.getOrDefault("merge_second_level_cate", "").toString();
|
|
|
+ String videoMergeCate1 = videoFeature.getOrDefault("merge_first_level_cate", "").toString();
|
|
|
+ String videoFestiveLabel2 = videoFeature.getOrDefault("festive_label2", "").toString();
|
|
|
+
|
|
|
+ double titleSimilarity = ExtractFeature20250218.calcTxtSimilarity(headVideoTitle, videoTitle);
|
|
|
+ double headTitleAndMerge1Similarity = ExtractFeature20250218.calcTxtSimilarity(headVideoTitle, videoMergeCate1);
|
|
|
+ double headTitleAndMerge2Similarity = ExtractFeature20250218.calcTxtSimilarity(headVideoTitle, videoMergeCate2);
|
|
|
+ double headTitleAndFestiveSimilarity = ExtractFeature20250218.calcTxtSimilarity(headVideoTitle, videoFestiveLabel2);
|
|
|
+ double merge1Similarity = ExtractFeature20250218.calcTxtSimilarity(headVideoMergeCate1, videoMergeCate1);
|
|
|
+ double merge2Similarity = ExtractFeature20250218.calcTxtSimilarity(headVideoMergeCate2, videoMergeCate2);
|
|
|
+ double festiveSimilarity = ExtractFeature20250218.calcTxtSimilarity(headVideoFestiveLabel2, videoFestiveLabel2);
|
|
|
+
|
|
|
+ featureMap.put("title_sim", titleSimilarity);
|
|
|
+ featureMap.put("head_title_merge1_sim", headTitleAndMerge1Similarity);
|
|
|
+ featureMap.put("head_title_merge2_sim", headTitleAndMerge2Similarity);
|
|
|
+ featureMap.put("head_title_festive_sim", headTitleAndFestiveSimilarity);
|
|
|
+ featureMap.put("merge1_sim", merge1Similarity);
|
|
|
+ featureMap.put("merge2_sim", merge2Similarity);
|
|
|
+ featureMap.put("festive_sim", festiveSimilarity);
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+ private static double calcTxtSimilarity(String txt1, String txt2) {
|
|
|
+ if (StringUtils.isBlank(txt1) || StringUtils.isBlank(txt2)) {
|
|
|
+ return 0d;
|
|
|
+ }
|
|
|
+ return SimilarityUtils.word2VecSimilarity(txt1, txt2);
|
|
|
+ }
|
|
|
+}
|