ExtractFeature20250218.java 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306
  1. package examples.extractor.v20250218;
  2. import examples.extractor.ExtractorUtils;
  3. import examples.extractor.RankExtractorFeature_20240530;
  4. import examples.utils.FestiveUtil;
  5. import examples.utils.SimilarityUtils;
  6. import org.apache.commons.lang3.StringUtils;
  7. import java.time.Instant;
  8. import java.time.LocalDateTime;
  9. import java.time.ZoneId;
  10. import java.time.format.DateTimeFormatter;
  11. import java.util.Arrays;
  12. import java.util.HashMap;
  13. import java.util.List;
  14. import java.util.Map;
  15. public class ExtractFeature20250218 {
  16. private ExtractFeature20250218() {
  17. }
  18. public static void handleB1(Map<String, Object> b1Feature, Map<String, Object> featureMap) {
  19. List<String> times = Arrays.asList("1h", "3h", "6h", "12h", "24h", "72h", "168h");
  20. List<String> indexList = Arrays.asList("is_share", "share_cnt", "is_return_1", "return_1_uv", "str_one", "ros_one", "str", "ros", "str_plus", "ros_minus", "rovn");
  21. for (String time : times) {
  22. for (String index : indexList) {
  23. double value = Double.parseDouble(b1Feature.getOrDefault(index + "_" + time, "0").toString());
  24. featureMap.put("b1_" + index + "_" + time, value);
  25. }
  26. double rovn = Double.parseDouble(b1Feature.getOrDefault("rovn_" + time, "0").toString());
  27. double returnNUv = Double.parseDouble(b1Feature.getOrDefault("return_1_uv_" + time, "0").toString());
  28. featureMap.put("b1_rovn*log(r)_" + time, rovn * RankExtractorFeature_20240530.calLog(returnNUv));
  29. }
  30. }
  31. public static void handleB2ToB11AndB13(Map<String, Map<String, Object>> videoFeature, Map<String, Object> featureMap) {
  32. List<String> times = Arrays.asList("1h", "3h", "6h", "12h", "24h", "72h", "168h");
  33. List<String> indexList = Arrays.asList("is_share", "share_cnt", "is_return_1", "return_n_uv", "str_one", "ros_one", "str", "ros", "str_plus", "ros_minus", "rovn");
  34. for (Map.Entry<String, Map<String, Object>> entry : videoFeature.entrySet()) {
  35. String key = entry.getKey();
  36. Map<String, Object> feature = entry.getValue();
  37. for (String time : times) {
  38. for (String index : indexList) {
  39. double value = Double.parseDouble(feature.getOrDefault(index + "_" + time, "0").toString());
  40. featureMap.put(key + "_" + index + "_" + time, value);
  41. }
  42. double rovn = Double.parseDouble(feature.getOrDefault("rovn_" + time, "0").toString());
  43. double returnNUv = Double.parseDouble(feature.getOrDefault("return_n_uv_" + time, "0").toString());
  44. featureMap.put(key + "_rovn*log(r)_" + time, rovn * RankExtractorFeature_20240530.calLog(returnNUv));
  45. }
  46. }
  47. }
  48. public static void handleB12(Map<String, Object> b12Feature, Map<String, Object> featureMap) {
  49. List<String> times = Arrays.asList("7d", "14d", "30d", "60d");
  50. List<String> indexList = Arrays.asList("is_share", "share_cnt", "is_return_1", "return_n_uv", "str_one", "ros_one", "str", "ros", "str_plus", "ros_minus", "rovn");
  51. for (String time : times) {
  52. for (String index : indexList) {
  53. double value = Double.parseDouble(b12Feature.getOrDefault(index + "_" + time, "0").toString());
  54. featureMap.put("b12_" + index + "_" + time, value);
  55. }
  56. double rovn = Double.parseDouble(b12Feature.getOrDefault("rovn_" + time, "0").toString());
  57. double returnNUv = Double.parseDouble(b12Feature.getOrDefault("return_n_uv_"+time, "0").toString());
  58. featureMap.put("b12_rovn*log(r)_" + time, rovn * RankExtractorFeature_20240530.calLog(returnNUv));
  59. }
  60. }
  61. public static void handleVideoBasicFeature(Map<String, Object> videoFeature, long ts, Map<String, Object> featureMap) {
  62. Double totalTime = Double.parseDouble(videoFeature.getOrDefault("total_time", "0").toString());
  63. Double width = Double.parseDouble(videoFeature.getOrDefault("width", "0d").toString());
  64. Double height = Double.parseDouble(videoFeature.getOrDefault("height", "0d").toString());
  65. Double size = Double.parseDouble(videoFeature.getOrDefault("size", "0d").toString());
  66. Double bit_rate = Double.parseDouble(videoFeature.getOrDefault("bit_rate", "0d").toString());
  67. String festiveLabel1 = videoFeature.getOrDefault("festive_label1", "").toString();
  68. String festiveLabel2 = videoFeature.getOrDefault("festive_label2", "").toString();
  69. featureMap.put("total_time", totalTime);
  70. featureMap.put("width", width);
  71. featureMap.put("height", height);
  72. featureMap.put("size", size);
  73. featureMap.put("bit_rate", bit_rate);
  74. featureMap.put("width/height", ExtractorUtils.divisionDouble(width, height));
  75. featureMap.put("is_festive", 0);
  76. featureMap.put("is_greeting", 0);
  77. if (StringUtils.equals(festiveLabel1, "节假日")) {
  78. featureMap.put("is_festive", 1);
  79. } else if (StringUtils.equals(festiveLabel1, "问候语")) {
  80. featureMap.put("is_greeting", 1);
  81. }
  82. featureMap.put("hour", ExtractorUtils.getHourByTimestamp(ts));
  83. featureMap.put("day_of_week", ExtractorUtils.getDayOfWeekByTimestamp(ts));
  84. long createTs = Long.parseLong(videoFeature.getOrDefault("gmt_create_timestamp", "0").toString());
  85. featureMap.put("create_ts_diff", ExtractorUtils.getDaysBetween(createTs, ts));
  86. String date = LocalDateTime.ofInstant(Instant.ofEpochSecond(ts), ZoneId.systemDefault()).format(DateTimeFormatter.ofPattern("yyyy-MM-dd"));
  87. String festiveByDate = FestiveUtil.getFestiveByDate(date);
  88. featureMap.put("today_is_fes", 0);
  89. featureMap.put("video_fes_eq", 0);
  90. if (StringUtils.isNotBlank(festiveByDate)) {
  91. featureMap.put("today_is_fes", 1);
  92. if (StringUtils.equals(festiveByDate, festiveLabel2)) {
  93. featureMap.put("video_fes_eq", 1);
  94. }
  95. }
  96. }
  97. public static void handleC1(Map<String, Object> c1Feature, Map<String, Object> featureMap) {
  98. List<String> times = Arrays.asList("12h", "24h", "72h", "168h");
  99. List<String> indexList = Arrays.asList("is_share", "share_cnt", "is_return_1", "return_1_uv", "click", "str_one", "ros_one", "str", "ros", "str_plus", "ros_minus", "rovn");
  100. for (String time : times) {
  101. for (String index : indexList) {
  102. double value = Double.parseDouble(featureMap.getOrDefault(index + "_" + time, "0").toString());
  103. featureMap.put("c1_" + index + "_" + time, value);
  104. }
  105. double rovn = Double.parseDouble(c1Feature.getOrDefault("rovn_" + time, "0").toString());
  106. double returnNUv = Double.parseDouble(c1Feature.getOrDefault("return_1_uv_"+time, "0").toString());
  107. featureMap.put("c1_rovn*log(r)_" + time, rovn * RankExtractorFeature_20240530.calLog(returnNUv));
  108. }
  109. }
  110. public static void handleC2ToC3(Map<String, Object> c2Feature, Map<String, Object> c3Feature, Map<String, Object> featureMap) {
  111. Map<String, Map<String, Object>> featureMaps = new HashMap<>();
  112. featureMaps.put("c2", c2Feature);
  113. featureMaps.put("c3", c3Feature);
  114. List<String> times = Arrays.asList("12h", "24h", "72h", "168h");
  115. List<String> indexList = Arrays.asList("is_share", "share_cnt", "is_return_1", "return_n_uv", "click");
  116. for (Map.Entry<String, Map<String, Object>> entry : featureMaps.entrySet()) {
  117. String key = entry.getKey();
  118. Map<String, Object> feature = entry.getValue();
  119. for (String time : times) {
  120. for (String index : indexList) {
  121. double value = Double.parseDouble(feature.getOrDefault(index + "_" + time, "0").toString());
  122. featureMap.put(key + "_" + index + "_" + time, value);
  123. }
  124. }
  125. }
  126. }
  127. public static void handleC4(Map<String, Object> c4Feature, Map<String, Object> featureMap) {
  128. List<String> times = Arrays.asList("24h", "72h", "168h");
  129. List<String> indexList = Arrays.asList("str_one", "ros_one", "str", "ros", "str_plus", "ros_minus", "rovn");
  130. for (String time : times) {
  131. for (String index : indexList) {
  132. double value = Double.parseDouble(c4Feature.getOrDefault("avg_" + index + "_" + time, "0").toString());
  133. featureMap.put("c4_avg_" + index + "_" + time, value);
  134. double max = Double.parseDouble(c4Feature.getOrDefault("max_" + index + "_" + time, "0").toString());
  135. double min = Double.parseDouble(c4Feature.getOrDefault("min_" + index + "_" + time, "0").toString());
  136. featureMap.put("c4_diff_" + index + "_" + time, max - min);
  137. }
  138. }
  139. }
  140. public static void handleC5ToC6(Map<String, Object> c5Feature, Map<String, Object> c6Feature, Map<String, Object> videoMap, Map<String, Object> featureMap) {
  141. Map<String, Map<String, Object>> featureMaps = new HashMap<>();
  142. featureMaps.put("c5", c5Feature);
  143. featureMaps.put("c6", c6Feature);
  144. List<String> times = Arrays.asList("tags_1d", "tags_3d", "tags_7d");
  145. String title = videoMap.getOrDefault("title", "").toString();
  146. for (Map.Entry<String, Map<String, Object>> entry : featureMaps.entrySet()) {
  147. String key = entry.getKey();
  148. Map<String, Object> feature = entry.getValue();
  149. for (String time : times) {
  150. String tags = feature.getOrDefault(time, "").toString();
  151. Double[] scores = ExtractorUtils.funcC34567ForTagsNew(tags, title);
  152. featureMap.put(key + "_matchnum" + "_" + time, scores[0]);
  153. featureMap.put(key + "_maxscore" + "_" + time, scores[1]);
  154. featureMap.put(key + "_avgscore" + "_" + time, scores[2]);
  155. }
  156. }
  157. }
  158. public static Map<String, Map<String, String[]>> handleC7ToC8(Map<String, Object> c7Feature, Map<String, Object> c8Feature) {
  159. Map<String, Map<String, String[]>> resultMap = new HashMap<>();
  160. Map<String, Map<String, Object>> featureMaps = new HashMap<>();
  161. featureMaps.put("c7", c7Feature);
  162. featureMaps.put("c8", c8Feature);
  163. List<String> indexList = Arrays.asList("share", "return");
  164. for (Map.Entry<String, Map<String, Object>> entry : featureMaps.entrySet()) {
  165. String key = entry.getKey();
  166. Map<String, Object> feature = entry.getValue();
  167. for (String index : indexList) {
  168. if (feature.containsKey(index)) {
  169. Map<String, String[]> cfMap = new HashMap<>();
  170. String[] entries = feature.get(index).toString().split(",");
  171. for (String e : entries) {
  172. String[] rList = e.split(":");
  173. if (rList.length >= 4) {
  174. String vid = rList[0];
  175. String value1 = rList[1];
  176. String value2 = rList[2];
  177. String value3 = rList[3];
  178. String[] strs = {value1, value2, value3};
  179. cfMap.put(vid, strs);
  180. }
  181. }
  182. resultMap.put(key, cfMap);
  183. }
  184. }
  185. }
  186. return resultMap;
  187. }
  188. public static void useC7ToC8(Map<String, Map<String, String[]>> map, String vid, Map<String, Object> featureMap) {
  189. if (StringUtils.isBlank(vid)) {
  190. return;
  191. }
  192. for (String key : Arrays.asList("c6", "c7")) {
  193. for (String action : Arrays.asList("share", "return")) {
  194. String featureKey = key + "_" + action;
  195. if (map.containsKey(featureKey)) {
  196. Map<String, String[]> cfMap = map.get(featureKey);
  197. String[] scores = cfMap.get(vid);
  198. featureMap.put(featureKey + "_score", Double.parseDouble(scores[0]));
  199. featureMap.put(featureKey + "_num", Double.parseDouble(scores[1]));
  200. featureMap.put(featureKey + "_rank", ExtractorUtils.reciprocal(Double.parseDouble(scores[2])));
  201. }
  202. }
  203. }
  204. }
  205. public static void handleD3(Map<String, Object> d3Feature, Map<String, Object> featureMap) {
  206. for (String index : Arrays.asList("exp", "return_n", "rovn")) {
  207. double value = Double.parseDouble(d3Feature.getOrDefault(index, "0").toString());
  208. featureMap.put("d3_" + index, value);
  209. }
  210. }
  211. public static void handleD1(Map<String, Object> d4Feature, Map<String, Object> featureMap) {
  212. double rosCfScores = Double.parseDouble(d4Feature.getOrDefault("ros_cf_score", "0").toString());
  213. featureMap.put("d1_ros_cf_score", rosCfScores);
  214. double rovCfScores = Double.parseDouble(d4Feature.getOrDefault("rov_cf_score", "0").toString());
  215. featureMap.put("d1_rov_cf_score", rovCfScores);
  216. double rosCfRank = Double.parseDouble(d4Feature.getOrDefault("ros_cf_rank", "0").toString());
  217. featureMap.put("d1_ros_cf_rank", ExtractorUtils.reciprocal(rosCfRank));
  218. double rovCfRank = Double.parseDouble(d4Feature.getOrDefault("rov_cf_rank", "0").toString());
  219. featureMap.put("d1_rov_cf_rank", ExtractorUtils.reciprocal(rovCfRank));
  220. }
  221. public static void handleD2(Map<String, Object> d5Feature, Map<String, Object> featureMap) {
  222. double score = Double.parseDouble(d5Feature.getOrDefault("score", "0").toString());
  223. featureMap.put("d2_score", score);
  224. double rank = Double.parseDouble(d5Feature.getOrDefault("rank", "0").toString());
  225. featureMap.put("d2_rank", ExtractorUtils.reciprocal(rank));
  226. }
  227. public static void handleVideoSimilarity(Map<String, Object> videoFeature, Map<String, Object> headVideoFeature, Map<String, Object> featureMap) {
  228. String headVideoTitle = headVideoFeature.getOrDefault("title", "").toString();
  229. String headVideoMergeCate2 = headVideoFeature.getOrDefault("merge_second_level_cate", "").toString();
  230. String headVideoMergeCate1 = headVideoFeature.getOrDefault("merge_first_level_cate", "").toString();
  231. String headVideoFestiveLabel2 = headVideoFeature.getOrDefault("festive_label2", "").toString();
  232. String videoTitle = videoFeature.getOrDefault("title", "").toString();
  233. String videoMergeCate2 = videoFeature.getOrDefault("merge_second_level_cate", "").toString();
  234. String videoMergeCate1 = videoFeature.getOrDefault("merge_first_level_cate", "").toString();
  235. String videoFestiveLabel2 = videoFeature.getOrDefault("festive_label2", "").toString();
  236. double titleSimilarity = ExtractFeature20250218.calcTxtSimilarity(headVideoTitle, videoTitle);
  237. double headTitleAndMerge1Similarity = ExtractFeature20250218.calcTxtSimilarity(headVideoTitle, videoMergeCate1);
  238. double headTitleAndMerge2Similarity = ExtractFeature20250218.calcTxtSimilarity(headVideoTitle, videoMergeCate2);
  239. double headTitleAndFestiveSimilarity = ExtractFeature20250218.calcTxtSimilarity(headVideoTitle, videoFestiveLabel2);
  240. double merge1Similarity = ExtractFeature20250218.calcTxtSimilarity(headVideoMergeCate1, videoMergeCate1);
  241. double merge2Similarity = ExtractFeature20250218.calcTxtSimilarity(headVideoMergeCate2, videoMergeCate2);
  242. double festiveSimilarity = ExtractFeature20250218.calcTxtSimilarity(headVideoFestiveLabel2, videoFestiveLabel2);
  243. featureMap.put("title_sim", titleSimilarity);
  244. featureMap.put("head_title_merge1_sim", headTitleAndMerge1Similarity);
  245. featureMap.put("head_title_merge2_sim", headTitleAndMerge2Similarity);
  246. featureMap.put("head_title_festive_sim", headTitleAndFestiveSimilarity);
  247. featureMap.put("merge1_sim", merge1Similarity);
  248. featureMap.put("merge2_sim", merge2Similarity);
  249. featureMap.put("festive_sim", festiveSimilarity);
  250. }
  251. private static double calcTxtSimilarity(String txt1, String txt2) {
  252. if (StringUtils.isBlank(txt1) || StringUtils.isBlank(txt2)) {
  253. return 0d;
  254. }
  255. return SimilarityUtils.word2VecSimilarity(txt1, txt2);
  256. }
  257. }