123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307 |
- package examples.extractor.v20250218;
- import examples.extractor.ExtractorUtils;
- import examples.extractor.RankExtractorFeature_20240530;
- import examples.utils.FestiveUtil;
- import examples.utils.SimilarityUtils;
- import org.apache.commons.lang3.StringUtils;
- import java.time.Instant;
- import java.time.LocalDateTime;
- import java.time.ZoneId;
- import java.time.format.DateTimeFormatter;
- import java.util.Arrays;
- import java.util.HashMap;
- import java.util.List;
- import java.util.Map;
- public class ExtractFeature20250218 {
- private ExtractFeature20250218() {
- }
- public static void handleB1(Map<String, Object> b1Feature, Map<String, Object> featureMap) {
- List<String> times = Arrays.asList("1h", "3h", "6h", "12h", "24h", "72h", "168h");
- List<String> indexList = Arrays.asList("is_share", "share_cnt", "is_return_1", "return_1_uv", "str_one", "ros_one", "str", "ros", "str_plus", "ros_minus", "rovn");
- for (String time : times) {
- for (String index : indexList) {
- double value = Double.parseDouble(b1Feature.getOrDefault(index + "_" + time, "0").toString());
- featureMap.put("b1_" + index + "_" + time, value);
- }
- double rovn = Double.parseDouble(b1Feature.getOrDefault("rovn_" + time, "0").toString());
- double returnNUv = Double.parseDouble(b1Feature.getOrDefault("return_n_uv", "0").toString());
- featureMap.put("b1_rovn*log(r)_" + time, rovn * RankExtractorFeature_20240530.calLog(returnNUv));
- }
- }
- public static void handleB2ToB11AndB13(Map<String, Map<String, Object>> videoFeature, Map<String, Object> featureMap) {
- List<String> times = Arrays.asList("1h", "3h", "6h", "12h", "24h", "72h", "168h");
- List<String> indexList = Arrays.asList("is_share", "share_cnt", "is_return_1", "return_n_uv", "str_one", "ros_one", "str", "ros", "str_plus", "ros_minus", "rovn");
- for (Map.Entry<String, Map<String, Object>> entry : videoFeature.entrySet()) {
- String key = entry.getKey();
- Map<String, Object> feature = entry.getValue();
- for (String time : times) {
- for (String index : indexList) {
- double value = Double.parseDouble(feature.getOrDefault(index + "_" + time, "0").toString());
- featureMap.put(key + "_" + index + "_" + time, value);
- }
- double rovn = Double.parseDouble(feature.getOrDefault("rovn_" + time, "0").toString());
- double returnNUv = Double.parseDouble(feature.getOrDefault("return_n_uv", "0").toString());
- featureMap.put(key + "_rovn*log(r)_" + time, rovn * RankExtractorFeature_20240530.calLog(returnNUv));
- }
- }
- }
- public static void handleB12(Map<String, Object> b12Feature, Map<String, Object> featureMap) {
- List<String> times = Arrays.asList("7d", "14d", "30d", "60d");
- List<String> indexList = Arrays.asList("is_share", "share_cnt", "is_return_1", "return_n_uv", "str_one", "ros_one", "str", "ros", "str_plus", "ros_minus", "rovn");
- for (String time : times) {
- for (String index : indexList) {
- double value = Double.parseDouble(b12Feature.getOrDefault(index + "_" + time, "0").toString());
- featureMap.put("b12_" + index + "_" + time, value);
- }
- double rovn = Double.parseDouble(b12Feature.getOrDefault("rovn_" + time, "0").toString());
- double returnNUv = Double.parseDouble(b12Feature.getOrDefault("return_n_uv", "0").toString());
- featureMap.put("b12_rovn*log(r)_" + time, rovn * RankExtractorFeature_20240530.calLog(returnNUv));
- }
- }
- public static void handleVideoBasicFeature(Map<String, Object> videoFeature, long ts, Map<String, Object> featureMap) {
- Double totalTime = Double.parseDouble(videoFeature.getOrDefault("total_time", "0").toString());
- Double width = Double.parseDouble(videoFeature.getOrDefault("width", "0d").toString());
- Double height = Double.parseDouble(videoFeature.getOrDefault("height", "0d").toString());
- Double size = Double.parseDouble(videoFeature.getOrDefault("size", "0d").toString());
- Double bit_rate = Double.parseDouble(videoFeature.getOrDefault("bit_rate", "0d").toString());
- String festiveLabel1 = videoFeature.getOrDefault("festive_label1", "").toString();
- String festiveLabel2 = videoFeature.getOrDefault("festive_label2", "").toString();
- featureMap.put("total_time", totalTime);
- featureMap.put("width", width);
- featureMap.put("height", height);
- featureMap.put("size", size);
- featureMap.put("bit_rate", bit_rate);
- featureMap.put("width/height", ExtractorUtils.divisionDouble(width, height));
- featureMap.put("is_festive", 0);
- featureMap.put("is_greeting", 0);
- if (StringUtils.equals(festiveLabel1, "节假日")) {
- featureMap.put("is_festive", 1);
- } else if (StringUtils.equals(festiveLabel1, "问候语")) {
- featureMap.put("is_greeting", 1);
- }
- LocalDateTime now = LocalDateTime.ofInstant(Instant.ofEpochSecond(ts), ZoneId.systemDefault());
- featureMap.put("hour", now.getHour() + 1);
- featureMap.put("day_of_week", now.getDayOfWeek());
- long createTs = Long.parseLong(videoFeature.getOrDefault("gmt_create_timestamp", "0").toString());
- featureMap.put("create_ts_diff", ExtractorUtils.getDaysBetween(createTs, ts));
- String date = LocalDateTime.ofInstant(Instant.ofEpochSecond(ts), ZoneId.systemDefault()).format(DateTimeFormatter.ofPattern("yyyy-MM-dd"));
- String festiveByDate = FestiveUtil.getFestiveByDate(date);
- featureMap.put("today_is_fes", 0);
- featureMap.put("video_fes_eq", 0);
- if (StringUtils.isNotBlank(festiveByDate)) {
- featureMap.put("today_is_fes", 1);
- if (StringUtils.equals(festiveByDate, festiveLabel2)) {
- featureMap.put("video_fes_eq", 1);
- }
- }
- }
- public static void handleC1(Map<String, Object> c1Feature, Map<String, Object> featureMap) {
- List<String> times = Arrays.asList("12h", "24h", "72h", "168h");
- List<String> indexList = Arrays.asList("is_share", "share_cnt", "is_return_1", "return_1_uv", "click", "str_one", "ros_one", "str", "ros", "str_plus", "ros_minus", "rovn");
- for (String time : times) {
- for (String index : indexList) {
- double value = Double.parseDouble(featureMap.getOrDefault(index + "_" + time, "0").toString());
- featureMap.put("c1_" + index + "_" + time, value);
- }
- double rovn = Double.parseDouble(c1Feature.getOrDefault("rovn_" + time, "0").toString());
- double returnNUv = Double.parseDouble(c1Feature.getOrDefault("return_n_uv", "0").toString());
- featureMap.put("c1_rovn*log(r)_" + time, rovn * RankExtractorFeature_20240530.calLog(returnNUv));
- }
- }
- public static void handleC2ToC3(Map<String, Object> c2Feature, Map<String, Object> c3Feature, Map<String, Object> featureMap) {
- Map<String, Map<String, Object>> featureMaps = new HashMap<>();
- featureMaps.put("c2", c2Feature);
- featureMaps.put("c3", c3Feature);
- List<String> times = Arrays.asList("12h", "24h", "72h", "168h");
- List<String> indexList = Arrays.asList("is_share", "share_cnt", "is_return_1", "return_n_uv", "click");
- for (Map.Entry<String, Map<String, Object>> entry : featureMaps.entrySet()) {
- String key = entry.getKey();
- Map<String, Object> feature = entry.getValue();
- for (String time : times) {
- for (String index : indexList) {
- double value = Double.parseDouble(feature.getOrDefault(index + "_" + time, "0").toString());
- featureMap.put(key + "_" + index + "_" + time, value);
- }
- }
- }
- }
- public static void handleC4(Map<String, Object> c4Feature, Map<String, Object> featureMap) {
- List<String> times = Arrays.asList("24h", "72h", "168h");
- List<String> indexList = Arrays.asList("str_one", "ros_one", "str", "ros", "str_plus", "ros_minus", "rovn");
- for (String time : times) {
- for (String index : indexList) {
- double value = Double.parseDouble(c4Feature.getOrDefault("avg_" + index + "_" + time, "0").toString());
- featureMap.put("c4_avg_" + index + "_" + time, value);
- double max = Double.parseDouble(c4Feature.getOrDefault("max_" + index + "_" + time, "0").toString());
- double min = Double.parseDouble(c4Feature.getOrDefault("min_" + index + "_" + time, "0").toString());
- featureMap.put("c4_diff_" + index + "_" + time, max - min);
- }
- }
- }
- public static void handleC5ToC6(Map<String, Object> c5Feature, Map<String, Object> c6Feature, Map<String, Object> videoMap, Map<String, Object> featureMap) {
- Map<String, Map<String, Object>> featureMaps = new HashMap<>();
- featureMaps.put("c5", c5Feature);
- featureMaps.put("c6", c6Feature);
- List<String> times = Arrays.asList("tags_1d", "tags_3d", "tags_7d");
- String title = videoMap.getOrDefault("title", "").toString();
- for (Map.Entry<String, Map<String, Object>> entry : featureMaps.entrySet()) {
- String key = entry.getKey();
- Map<String, Object> feature = entry.getValue();
- for (String time : times) {
- String tags = feature.getOrDefault(time, "").toString();
- Double[] scores = ExtractorUtils.funcC34567ForTagsNew(tags, title);
- featureMap.put(key + "_matchnum" + "_" + time, scores[0]);
- featureMap.put(key + "_maxscore" + "_" + time, scores[1]);
- featureMap.put(key + "_avgscore" + "_" + time, scores[2]);
- }
- }
- }
- public static Map<String, Map<String, String[]>> handleC7ToC8(Map<String, Object> c7Feature, Map<String, Object> c8Feature) {
- Map<String, Map<String, String[]>> resultMap = new HashMap<>();
- Map<String, Map<String, Object>> featureMaps = new HashMap<>();
- featureMaps.put("c7", c7Feature);
- featureMaps.put("c8", c8Feature);
- List<String> indexList = Arrays.asList("share", "return");
- for (Map.Entry<String, Map<String, Object>> entry : featureMaps.entrySet()) {
- String key = entry.getKey();
- Map<String, Object> feature = entry.getValue();
- for (String index : indexList) {
- if (feature.containsKey(index)) {
- Map<String, String[]> cfMap = new HashMap<>();
- String[] entries = feature.get(index).toString().split(",");
- for (String e : entries) {
- String[] rList = e.split(":");
- if (rList.length >= 4) {
- String vid = rList[0];
- String value1 = rList[1];
- String value2 = rList[2];
- String value3 = rList[3];
- String[] strs = {value1, value2, value3};
- cfMap.put(vid, strs);
- }
- }
- resultMap.put(key, cfMap);
- }
- }
- }
- return resultMap;
- }
- public static void useC7ToC8(Map<String, Map<String, String[]>> map, String vid, Map<String, Object> featureMap) {
- if (StringUtils.isBlank(vid)) {
- return;
- }
- for (String key : Arrays.asList("c6", "c7")) {
- for (String action : Arrays.asList("share", "return")) {
- String featureKey = key + "_" + action;
- if (map.containsKey(featureKey)) {
- Map<String, String[]> cfMap = map.get(featureKey);
- String[] scores = cfMap.get(vid);
- featureMap.put(featureKey + "_score", Double.parseDouble(scores[0]));
- featureMap.put(featureKey + "_num", Double.parseDouble(scores[1]));
- featureMap.put(featureKey + "_rank", ExtractorUtils.reciprocal(Double.parseDouble(scores[2])));
- }
- }
- }
- }
- public static void handleD3(Map<String, Object> d3Feature, Map<String, Object> featureMap) {
- for (String index : Arrays.asList("exp", "return_n", "rovn")) {
- double value = Double.parseDouble(d3Feature.getOrDefault(index, "0").toString());
- featureMap.put("d3_" + index, value);
- }
- }
- public static void handleD1(Map<String, Object> d4Feature, Map<String, Object> featureMap) {
- double rosCfScores = Double.parseDouble(d4Feature.getOrDefault("ros_cf_score", "0").toString());
- featureMap.put("d1_ros_cf_score", rosCfScores);
- double rovCfScores = Double.parseDouble(d4Feature.getOrDefault("rov_cf_score", "0").toString());
- featureMap.put("d1_rov_cf_score", rovCfScores);
- double rosCfRank = Double.parseDouble(d4Feature.getOrDefault("ros_cf_rank", "0").toString());
- featureMap.put("d1_ros_cf_rank", ExtractorUtils.reciprocal(rosCfRank));
- double rovCfRank = Double.parseDouble(d4Feature.getOrDefault("rov_cf_rank", "0").toString());
- featureMap.put("d1_rov_cf_rank", ExtractorUtils.reciprocal(rovCfRank));
- }
- public static void handleD2(Map<String, Object> d5Feature, Map<String, Object> featureMap) {
- double score = Double.parseDouble(d5Feature.getOrDefault("score", "0").toString());
- featureMap.put("d2_score", score);
- double rank = Double.parseDouble(d5Feature.getOrDefault("rank", "0").toString());
- featureMap.put("d2_rank", ExtractorUtils.reciprocal(rank));
- }
- public static void handleVideoSimilarity(Map<String, Object> videoFeature, Map<String, Object> headVideoFeature, Map<String, Object> featureMap) {
- String headVideoTitle = headVideoFeature.getOrDefault("title", "").toString();
- String headVideoMergeCate2 = headVideoFeature.getOrDefault("merge_second_level_cate", "").toString();
- String headVideoMergeCate1 = headVideoFeature.getOrDefault("merge_first_level_cate", "").toString();
- String headVideoFestiveLabel2 = headVideoFeature.getOrDefault("festive_label2", "").toString();
- String videoTitle = videoFeature.getOrDefault("title", "").toString();
- String videoMergeCate2 = videoFeature.getOrDefault("merge_second_level_cate", "").toString();
- String videoMergeCate1 = videoFeature.getOrDefault("merge_first_level_cate", "").toString();
- String videoFestiveLabel2 = videoFeature.getOrDefault("festive_label2", "").toString();
- double titleSimilarity = ExtractFeature20250218.calcTxtSimilarity(headVideoTitle, videoTitle);
- double headTitleAndMerge1Similarity = ExtractFeature20250218.calcTxtSimilarity(headVideoTitle, videoMergeCate1);
- double headTitleAndMerge2Similarity = ExtractFeature20250218.calcTxtSimilarity(headVideoTitle, videoMergeCate2);
- double headTitleAndFestiveSimilarity = ExtractFeature20250218.calcTxtSimilarity(headVideoTitle, videoFestiveLabel2);
- double merge1Similarity = ExtractFeature20250218.calcTxtSimilarity(headVideoMergeCate1, videoMergeCate1);
- double merge2Similarity = ExtractFeature20250218.calcTxtSimilarity(headVideoMergeCate2, videoMergeCate2);
- double festiveSimilarity = ExtractFeature20250218.calcTxtSimilarity(headVideoFestiveLabel2, videoFestiveLabel2);
- featureMap.put("title_sim", titleSimilarity);
- featureMap.put("head_title_merge1_sim", headTitleAndMerge1Similarity);
- featureMap.put("head_title_merge2_sim", headTitleAndMerge2Similarity);
- featureMap.put("head_title_festive_sim", headTitleAndFestiveSimilarity);
- featureMap.put("merge1_sim", merge1Similarity);
- featureMap.put("merge2_sim", merge2Similarity);
- featureMap.put("festive_sim", festiveSimilarity);
- }
- private static double calcTxtSimilarity(String txt1, String txt2) {
- if (StringUtils.isBlank(txt1) || StringUtils.isBlank(txt2)) {
- return 0d;
- }
- return SimilarityUtils.word2VecSimilarity(txt1, txt2);
- }
- }
|