package examples.extractor.v20250218; import examples.extractor.ExtractorUtils; import examples.extractor.RankExtractorFeature_20240530; import examples.utils.FestiveUtil; import examples.utils.SimilarityUtils; import org.apache.commons.lang3.StringUtils; import java.time.Instant; import java.time.LocalDateTime; import java.time.ZoneId; import java.time.format.DateTimeFormatter; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; public class ExtractFeature20250218 { private ExtractFeature20250218() { } public static void handleB1(Map b1Feature, Map featureMap) { List times = Arrays.asList("1h", "3h", "6h", "12h", "24h", "72h", "168h"); List indexList = Arrays.asList("is_share", "share_cnt", "is_return_1", "return_1_uv", "str_one", "ros_one", "str", "ros", "str_plus", "ros_minus", "rovn"); for (String time : times) { for (String index : indexList) { double value = Double.parseDouble(b1Feature.getOrDefault(index + "_" + time, "0").toString()); featureMap.put("b1_" + index + "_" + time, value); } double rovn = Double.parseDouble(b1Feature.getOrDefault("rovn_" + time, "0").toString()); double returnNUv = Double.parseDouble(b1Feature.getOrDefault("return_1_uv_" + time, "0").toString()); featureMap.put("b1_rovn*log(r)_" + time, rovn * RankExtractorFeature_20240530.calLog(returnNUv)); } } public static void handleB2ToB11AndB13(Map> videoFeature, Map featureMap) { List times = Arrays.asList("1h", "3h", "6h", "12h", "24h", "72h", "168h"); List indexList = Arrays.asList("is_share", "share_cnt", "is_return_1", "return_n_uv", "str_one", "ros_one", "str", "ros", "str_plus", "ros_minus", "rovn"); for (Map.Entry> entry : videoFeature.entrySet()) { String key = entry.getKey(); Map feature = entry.getValue(); for (String time : times) { for (String index : indexList) { double value = Double.parseDouble(feature.getOrDefault(index + "_" + time, "0").toString()); featureMap.put(key + "_" + index + "_" + time, value); } double rovn = Double.parseDouble(feature.getOrDefault("rovn_" + time, "0").toString()); double returnNUv = Double.parseDouble(feature.getOrDefault("return_n_uv_" + time, "0").toString()); featureMap.put(key + "_rovn*log(r)_" + time, rovn * RankExtractorFeature_20240530.calLog(returnNUv)); } } } public static void handleB12(Map b12Feature, Map featureMap) { List times = Arrays.asList("7d", "14d", "30d", "60d"); List indexList = Arrays.asList("is_share", "share_cnt", "is_return_1", "return_n_uv", "str_one", "ros_one", "str", "ros", "str_plus", "ros_minus", "rovn"); for (String time : times) { for (String index : indexList) { double value = Double.parseDouble(b12Feature.getOrDefault(index + "_" + time, "0").toString()); featureMap.put("b12_" + index + "_" + time, value); } double rovn = Double.parseDouble(b12Feature.getOrDefault("rovn_" + time, "0").toString()); double returnNUv = Double.parseDouble(b12Feature.getOrDefault("return_n_uv_"+time, "0").toString()); featureMap.put("b12_rovn*log(r)_" + time, rovn * RankExtractorFeature_20240530.calLog(returnNUv)); } } public static void handleVideoBasicFeature(Map videoFeature, long ts, Map featureMap) { Double totalTime = Double.parseDouble(videoFeature.getOrDefault("total_time", "0").toString()); Double width = Double.parseDouble(videoFeature.getOrDefault("width", "0d").toString()); Double height = Double.parseDouble(videoFeature.getOrDefault("height", "0d").toString()); Double size = Double.parseDouble(videoFeature.getOrDefault("size", "0d").toString()); Double bit_rate = Double.parseDouble(videoFeature.getOrDefault("bit_rate", "0d").toString()); String festiveLabel1 = videoFeature.getOrDefault("festive_label1", "").toString(); String festiveLabel2 = videoFeature.getOrDefault("festive_label2", "").toString(); featureMap.put("total_time", totalTime); featureMap.put("width", width); featureMap.put("height", height); featureMap.put("size", size); featureMap.put("bit_rate", bit_rate); featureMap.put("width/height", ExtractorUtils.divisionDouble(width, height)); featureMap.put("is_festive", 0); featureMap.put("is_greeting", 0); if (StringUtils.equals(festiveLabel1, "节假日")) { featureMap.put("is_festive", 1); } else if (StringUtils.equals(festiveLabel1, "问候语")) { featureMap.put("is_greeting", 1); } featureMap.put("hour", ExtractorUtils.getHourByTimestamp(ts)); featureMap.put("day_of_week", ExtractorUtils.getDayOfWeekByTimestamp(ts)); long createTs = Long.parseLong(videoFeature.getOrDefault("gmt_create_timestamp", "0").toString()); featureMap.put("create_ts_diff", ExtractorUtils.getDaysBetween(createTs, ts)); String date = LocalDateTime.ofInstant(Instant.ofEpochSecond(ts), ZoneId.systemDefault()).format(DateTimeFormatter.ofPattern("yyyy-MM-dd")); String festiveByDate = FestiveUtil.getFestiveByDate(date); featureMap.put("today_is_fes", 0); featureMap.put("video_fes_eq", 0); if (StringUtils.isNotBlank(festiveByDate)) { featureMap.put("today_is_fes", 1); if (StringUtils.equals(festiveByDate, festiveLabel2)) { featureMap.put("video_fes_eq", 1); } } } public static void handleC1(Map c1Feature, Map featureMap) { List times = Arrays.asList("12h", "24h", "72h", "168h"); List indexList = Arrays.asList("is_share", "share_cnt", "is_return_1", "return_1_uv", "click", "str_one", "ros_one", "str", "ros", "str_plus", "ros_minus", "rovn"); for (String time : times) { for (String index : indexList) { double value = Double.parseDouble(featureMap.getOrDefault(index + "_" + time, "0").toString()); featureMap.put("c1_" + index + "_" + time, value); } double rovn = Double.parseDouble(c1Feature.getOrDefault("rovn_" + time, "0").toString()); double returnNUv = Double.parseDouble(c1Feature.getOrDefault("return_1_uv_"+time, "0").toString()); featureMap.put("c1_rovn*log(r)_" + time, rovn * RankExtractorFeature_20240530.calLog(returnNUv)); } } public static void handleC2ToC3(Map c2Feature, Map c3Feature, Map featureMap) { Map> featureMaps = new HashMap<>(); featureMaps.put("c2", c2Feature); featureMaps.put("c3", c3Feature); List times = Arrays.asList("12h", "24h", "72h", "168h"); List indexList = Arrays.asList("is_share", "share_cnt", "is_return_1", "return_n_uv", "click"); for (Map.Entry> entry : featureMaps.entrySet()) { String key = entry.getKey(); Map feature = entry.getValue(); for (String time : times) { for (String index : indexList) { double value = Double.parseDouble(feature.getOrDefault(index + "_" + time, "0").toString()); featureMap.put(key + "_" + index + "_" + time, value); } } } } public static void handleC4(Map c4Feature, Map featureMap) { List times = Arrays.asList("24h", "72h", "168h"); List indexList = Arrays.asList("str_one", "ros_one", "str", "ros", "str_plus", "ros_minus", "rovn"); for (String time : times) { for (String index : indexList) { double value = Double.parseDouble(c4Feature.getOrDefault("avg_" + index + "_" + time, "0").toString()); featureMap.put("c4_avg_" + index + "_" + time, value); double max = Double.parseDouble(c4Feature.getOrDefault("max_" + index + "_" + time, "0").toString()); double min = Double.parseDouble(c4Feature.getOrDefault("min_" + index + "_" + time, "0").toString()); featureMap.put("c4_diff_" + index + "_" + time, max - min); } } } public static void handleC5ToC6(Map c5Feature, Map c6Feature, Map videoMap, Map featureMap) { Map> featureMaps = new HashMap<>(); featureMaps.put("c5", c5Feature); featureMaps.put("c6", c6Feature); List times = Arrays.asList("tags_1d", "tags_3d", "tags_7d"); String title = videoMap.getOrDefault("title", "").toString(); for (Map.Entry> entry : featureMaps.entrySet()) { String key = entry.getKey(); Map feature = entry.getValue(); for (String time : times) { String tags = feature.getOrDefault(time, "").toString(); Double[] scores = ExtractorUtils.funcC34567ForTagsNew(tags, title); featureMap.put(key + "_matchnum" + "_" + time, scores[0]); featureMap.put(key + "_maxscore" + "_" + time, scores[1]); featureMap.put(key + "_avgscore" + "_" + time, scores[2]); } } } public static Map> handleC7ToC8(Map c7Feature, Map c8Feature) { Map> resultMap = new HashMap<>(); Map> featureMaps = new HashMap<>(); featureMaps.put("c7", c7Feature); featureMaps.put("c8", c8Feature); List indexList = Arrays.asList("share", "return"); for (Map.Entry> entry : featureMaps.entrySet()) { String key = entry.getKey(); Map feature = entry.getValue(); for (String index : indexList) { if (feature.containsKey(index)) { Map cfMap = new HashMap<>(); String[] entries = feature.get(index).toString().split(","); for (String e : entries) { String[] rList = e.split(":"); if (rList.length >= 4) { String vid = rList[0]; String value1 = rList[1]; String value2 = rList[2]; String value3 = rList[3]; String[] strs = {value1, value2, value3}; cfMap.put(vid, strs); } } resultMap.put(key, cfMap); } } } return resultMap; } public static void useC7ToC8(Map> map, String vid, Map featureMap) { if (StringUtils.isBlank(vid)) { return; } for (String key : Arrays.asList("c6", "c7")) { for (String action : Arrays.asList("share", "return")) { String featureKey = key + "_" + action; if (map.containsKey(featureKey)) { Map cfMap = map.get(featureKey); String[] scores = cfMap.get(vid); featureMap.put(featureKey + "_score", Double.parseDouble(scores[0])); featureMap.put(featureKey + "_num", Double.parseDouble(scores[1])); featureMap.put(featureKey + "_rank", ExtractorUtils.reciprocal(Double.parseDouble(scores[2]))); } } } } public static void handleD3(Map d3Feature, Map featureMap) { for (String index : Arrays.asList("exp", "return_n", "rovn")) { double value = Double.parseDouble(d3Feature.getOrDefault(index, "0").toString()); featureMap.put("d3_" + index, value); } } public static void handleD1(Map d4Feature, Map featureMap) { double rosCfScores = Double.parseDouble(d4Feature.getOrDefault("ros_cf_score", "0").toString()); featureMap.put("d1_ros_cf_score", rosCfScores); double rovCfScores = Double.parseDouble(d4Feature.getOrDefault("rov_cf_score", "0").toString()); featureMap.put("d1_rov_cf_score", rovCfScores); double rosCfRank = Double.parseDouble(d4Feature.getOrDefault("ros_cf_rank", "0").toString()); featureMap.put("d1_ros_cf_rank", ExtractorUtils.reciprocal(rosCfRank)); double rovCfRank = Double.parseDouble(d4Feature.getOrDefault("rov_cf_rank", "0").toString()); featureMap.put("d1_rov_cf_rank", ExtractorUtils.reciprocal(rovCfRank)); } public static void handleD2(Map d5Feature, Map featureMap) { double score = Double.parseDouble(d5Feature.getOrDefault("score", "0").toString()); featureMap.put("d2_score", score); double rank = Double.parseDouble(d5Feature.getOrDefault("rank", "0").toString()); featureMap.put("d2_rank", ExtractorUtils.reciprocal(rank)); } public static void handleVideoSimilarity(Map videoFeature, Map headVideoFeature, Map featureMap) { String headVideoTitle = headVideoFeature.getOrDefault("title", "").toString(); String headVideoMergeCate2 = headVideoFeature.getOrDefault("merge_second_level_cate", "").toString(); String headVideoMergeCate1 = headVideoFeature.getOrDefault("merge_first_level_cate", "").toString(); String headVideoFestiveLabel2 = headVideoFeature.getOrDefault("festive_label2", "").toString(); String videoTitle = videoFeature.getOrDefault("title", "").toString(); String videoMergeCate2 = videoFeature.getOrDefault("merge_second_level_cate", "").toString(); String videoMergeCate1 = videoFeature.getOrDefault("merge_first_level_cate", "").toString(); String videoFestiveLabel2 = videoFeature.getOrDefault("festive_label2", "").toString(); double titleSimilarity = ExtractFeature20250218.calcTxtSimilarity(headVideoTitle, videoTitle); double headTitleAndMerge1Similarity = ExtractFeature20250218.calcTxtSimilarity(headVideoTitle, videoMergeCate1); double headTitleAndMerge2Similarity = ExtractFeature20250218.calcTxtSimilarity(headVideoTitle, videoMergeCate2); double headTitleAndFestiveSimilarity = ExtractFeature20250218.calcTxtSimilarity(headVideoTitle, videoFestiveLabel2); double merge1Similarity = ExtractFeature20250218.calcTxtSimilarity(headVideoMergeCate1, videoMergeCate1); double merge2Similarity = ExtractFeature20250218.calcTxtSimilarity(headVideoMergeCate2, videoMergeCate2); double festiveSimilarity = ExtractFeature20250218.calcTxtSimilarity(headVideoFestiveLabel2, videoFestiveLabel2); featureMap.put("title_sim", titleSimilarity); featureMap.put("head_title_merge1_sim", headTitleAndMerge1Similarity); featureMap.put("head_title_merge2_sim", headTitleAndMerge2Similarity); featureMap.put("head_title_festive_sim", headTitleAndFestiveSimilarity); featureMap.put("merge1_sim", merge1Similarity); featureMap.put("merge2_sim", merge2Similarity); featureMap.put("festive_sim", festiveSimilarity); } private static double calcTxtSimilarity(String txt1, String txt2) { if (StringUtils.isBlank(txt1) || StringUtils.isBlank(txt2)) { return 0d; } return SimilarityUtils.word2VecSimilarity(txt1, txt2); } }