ExtractFeature20250218.java 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
  1. package examples.extractor.v20250218;
  2. import examples.extractor.ExtractorUtils;
  3. import examples.extractor.RankExtractorFeature_20240530;
  4. import examples.utils.FestiveUtil;
  5. import examples.utils.SimilarityUtils;
  6. import org.apache.commons.lang3.StringUtils;
  7. import java.time.Instant;
  8. import java.time.LocalDateTime;
  9. import java.time.ZoneId;
  10. import java.time.format.DateTimeFormatter;
  11. import java.util.Arrays;
  12. import java.util.HashMap;
  13. import java.util.List;
  14. import java.util.Map;
  15. public class ExtractFeature20250218 {
  16. private ExtractFeature20250218() {
  17. }
  18. public static void handleB1(Map<String, Object> b1Feature, Map<String, Object> featureMap) {
  19. List<String> times = Arrays.asList("1h", "3h", "6h", "12h", "24h", "72h", "168h");
  20. List<String> indexList = Arrays.asList("is_share", "share_cnt", "is_return_1", "return_1_uv", "str_one", "ros_one", "str", "ros", "str_plus", "ros_minus", "rovn");
  21. for (String time : times) {
  22. for (String index : indexList) {
  23. double value = Double.parseDouble(b1Feature.getOrDefault(index + "_" + time, "0").toString());
  24. featureMap.put("b1_" + index + "_" + time, value);
  25. }
  26. double rovn = Double.parseDouble(b1Feature.getOrDefault("rovn_" + time, "0").toString());
  27. double returnNUv = Double.parseDouble(b1Feature.getOrDefault("return_n_uv", "0").toString());
  28. featureMap.put("b1_rovn*log(r)_" + time, rovn * RankExtractorFeature_20240530.calLog(returnNUv));
  29. }
  30. }
  31. public static void handleB2ToB11AndB13(Map<String, Map<String, Object>> videoFeature, Map<String, Object> featureMap) {
  32. List<String> times = Arrays.asList("1h", "3h", "6h", "12h", "24h", "72h", "168h");
  33. List<String> indexList = Arrays.asList("is_share", "share_cnt", "is_return_1", "return_n_uv", "str_one", "ros_one", "str", "ros", "str_plus", "ros_minus", "rovn");
  34. for (Map.Entry<String, Map<String, Object>> entry : videoFeature.entrySet()) {
  35. String key = entry.getKey();
  36. Map<String, Object> feature = entry.getValue();
  37. for (String time : times) {
  38. for (String index : indexList) {
  39. double value = Double.parseDouble(feature.getOrDefault(index + "_" + time, "0").toString());
  40. featureMap.put(key + "_" + index + "_" + time, value);
  41. }
  42. double rovn = Double.parseDouble(feature.getOrDefault("rovn_" + time, "0").toString());
  43. double returnNUv = Double.parseDouble(feature.getOrDefault("return_n_uv", "0").toString());
  44. featureMap.put(key + "_rovn*log(r)_" + time, rovn * RankExtractorFeature_20240530.calLog(returnNUv));
  45. }
  46. }
  47. }
  48. public static void handleB12(Map<String, Object> b12Feature, Map<String, Object> featureMap) {
  49. List<String> times = Arrays.asList("7d", "14d", "30d", "60d");
  50. List<String> indexList = Arrays.asList("is_share", "share_cnt", "is_return_1", "return_n_uv", "str_one", "ros_one", "str", "ros", "str_plus", "ros_minus", "rovn");
  51. for (String time : times) {
  52. for (String index : indexList) {
  53. double value = Double.parseDouble(b12Feature.getOrDefault(index + "_" + time, "0").toString());
  54. featureMap.put("b12_" + index + "_" + time, value);
  55. }
  56. double rovn = Double.parseDouble(b12Feature.getOrDefault("rovn_" + time, "0").toString());
  57. double returnNUv = Double.parseDouble(b12Feature.getOrDefault("return_n_uv", "0").toString());
  58. featureMap.put("b12_rovn*log(r)_" + time, rovn * RankExtractorFeature_20240530.calLog(returnNUv));
  59. }
  60. }
  61. public static void handleVideoBasicFeature(Map<String, Object> videoFeature, long ts, Map<String, Object> featureMap) {
  62. Double totalTime = Double.parseDouble(videoFeature.getOrDefault("total_time", "0").toString());
  63. Double width = Double.parseDouble(videoFeature.getOrDefault("width", "0d").toString());
  64. Double height = Double.parseDouble(videoFeature.getOrDefault("height", "0d").toString());
  65. Double size = Double.parseDouble(videoFeature.getOrDefault("size", "0d").toString());
  66. Double bit_rate = Double.parseDouble(videoFeature.getOrDefault("bit_rate", "0d").toString());
  67. String festiveLabel1 = videoFeature.getOrDefault("festive_label1", "").toString();
  68. String festiveLabel2 = videoFeature.getOrDefault("festive_label2", "").toString();
  69. featureMap.put("total_time", totalTime);
  70. featureMap.put("width", width);
  71. featureMap.put("height", height);
  72. featureMap.put("size", size);
  73. featureMap.put("bit_rate", bit_rate);
  74. featureMap.put("width/height", ExtractorUtils.divisionDouble(width, height));
  75. featureMap.put("is_festive", 0);
  76. featureMap.put("is_greeting", 0);
  77. if (StringUtils.equals(festiveLabel1, "节假日")) {
  78. featureMap.put("is_festive", 1);
  79. } else if (StringUtils.equals(festiveLabel1, "问候语")) {
  80. featureMap.put("is_greeting", 1);
  81. }
  82. LocalDateTime now = LocalDateTime.ofInstant(Instant.ofEpochSecond(ts), ZoneId.systemDefault());
  83. featureMap.put("hour", now.getHour() + 1);
  84. featureMap.put("day_of_week", now.getDayOfWeek());
  85. long createTs = Long.parseLong(videoFeature.getOrDefault("gmt_create_timestamp", "0").toString());
  86. featureMap.put("create_ts_diff", ExtractorUtils.getDaysBetween(createTs, ts));
  87. String date = LocalDateTime.ofInstant(Instant.ofEpochSecond(ts), ZoneId.systemDefault()).format(DateTimeFormatter.ofPattern("yyyy-MM-dd"));
  88. String festiveByDate = FestiveUtil.getFestiveByDate(date);
  89. featureMap.put("today_is_fes", 0);
  90. featureMap.put("video_fes_eq", 0);
  91. if (StringUtils.isNotBlank(festiveByDate)) {
  92. featureMap.put("today_is_fes", 1);
  93. if (StringUtils.equals(festiveByDate, festiveLabel2)) {
  94. featureMap.put("video_fes_eq", 1);
  95. }
  96. }
  97. }
  98. public static void handleC1(Map<String, Object> c1Feature, Map<String, Object> featureMap) {
  99. List<String> times = Arrays.asList("12h", "24h", "72h", "168h");
  100. List<String> indexList = Arrays.asList("is_share", "share_cnt", "is_return_1", "return_1_uv", "click", "str_one", "ros_one", "str", "ros", "str_plus", "ros_minus", "rovn");
  101. for (String time : times) {
  102. for (String index : indexList) {
  103. double value = Double.parseDouble(featureMap.getOrDefault(index + "_" + time, "0").toString());
  104. featureMap.put("c1_" + index + "_" + time, value);
  105. }
  106. double rovn = Double.parseDouble(c1Feature.getOrDefault("rovn_" + time, "0").toString());
  107. double returnNUv = Double.parseDouble(c1Feature.getOrDefault("return_n_uv", "0").toString());
  108. featureMap.put("c1_rovn*log(r)_" + time, rovn * RankExtractorFeature_20240530.calLog(returnNUv));
  109. }
  110. }
  111. public static void handleC2ToC3(Map<String, Object> c2Feature, Map<String, Object> c3Feature, Map<String, Object> featureMap) {
  112. Map<String, Map<String, Object>> featureMaps = new HashMap<>();
  113. featureMaps.put("c2", c2Feature);
  114. featureMaps.put("c3", c3Feature);
  115. List<String> times = Arrays.asList("12h", "24h", "72h", "168h");
  116. List<String> indexList = Arrays.asList("is_share", "share_cnt", "is_return_1", "return_n_uv", "click");
  117. for (Map.Entry<String, Map<String, Object>> entry : featureMaps.entrySet()) {
  118. String key = entry.getKey();
  119. Map<String, Object> feature = entry.getValue();
  120. for (String time : times) {
  121. for (String index : indexList) {
  122. double value = Double.parseDouble(feature.getOrDefault(index + "_" + time, "0").toString());
  123. featureMap.put(key + "_" + index + "_" + time, value);
  124. }
  125. }
  126. }
  127. }
  128. public static void handleC4(Map<String, Object> c4Feature, Map<String, Object> featureMap) {
  129. List<String> times = Arrays.asList("24h", "72h", "168h");
  130. List<String> indexList = Arrays.asList("str_one", "ros_one", "str", "ros", "str_plus", "ros_minus", "rovn");
  131. for (String time : times) {
  132. for (String index : indexList) {
  133. double value = Double.parseDouble(c4Feature.getOrDefault("avg_" + index + "_" + time, "0").toString());
  134. featureMap.put("c4_avg_" + index + "_" + time, value);
  135. double max = Double.parseDouble(c4Feature.getOrDefault("max_" + index + "_" + time, "0").toString());
  136. double min = Double.parseDouble(c4Feature.getOrDefault("min_" + index + "_" + time, "0").toString());
  137. featureMap.put("c4_diff_" + index + "_" + time, max - min);
  138. }
  139. }
  140. }
  141. public static void handleC5ToC6(Map<String, Object> c5Feature, Map<String, Object> c6Feature, Map<String, Object> videoMap, Map<String, Object> featureMap) {
  142. Map<String, Map<String, Object>> featureMaps = new HashMap<>();
  143. featureMaps.put("c5", c5Feature);
  144. featureMaps.put("c6", c6Feature);
  145. List<String> times = Arrays.asList("tags_1d", "tags_3d", "tags_7d");
  146. String title = videoMap.getOrDefault("title", "").toString();
  147. for (Map.Entry<String, Map<String, Object>> entry : featureMaps.entrySet()) {
  148. String key = entry.getKey();
  149. Map<String, Object> feature = entry.getValue();
  150. for (String time : times) {
  151. String tags = feature.getOrDefault(time, "").toString();
  152. Double[] scores = ExtractorUtils.funcC34567ForTagsNew(tags, title);
  153. featureMap.put(key + "_matchnum" + "_" + time, scores[0]);
  154. featureMap.put(key + "_maxscore" + "_" + time, scores[1]);
  155. featureMap.put(key + "_avgscore" + "_" + time, scores[2]);
  156. }
  157. }
  158. }
  159. public static Map<String, Map<String, String[]>> handleC7ToC8(Map<String, Object> c7Feature, Map<String, Object> c8Feature) {
  160. Map<String, Map<String, String[]>> resultMap = new HashMap<>();
  161. Map<String, Map<String, Object>> featureMaps = new HashMap<>();
  162. featureMaps.put("c7", c7Feature);
  163. featureMaps.put("c8", c8Feature);
  164. List<String> indexList = Arrays.asList("share", "return");
  165. for (Map.Entry<String, Map<String, Object>> entry : featureMaps.entrySet()) {
  166. String key = entry.getKey();
  167. Map<String, Object> feature = entry.getValue();
  168. for (String index : indexList) {
  169. if (feature.containsKey(index)) {
  170. Map<String, String[]> cfMap = new HashMap<>();
  171. String[] entries = feature.get(index).toString().split(",");
  172. for (String e : entries) {
  173. String[] rList = e.split(":");
  174. if (rList.length >= 4) {
  175. String vid = rList[0];
  176. String value1 = rList[1];
  177. String value2 = rList[2];
  178. String value3 = rList[3];
  179. String[] strs = {value1, value2, value3};
  180. cfMap.put(vid, strs);
  181. }
  182. }
  183. resultMap.put(key, cfMap);
  184. }
  185. }
  186. }
  187. return resultMap;
  188. }
  189. public static void useC7ToC8(Map<String, Map<String, String[]>> map, String vid, Map<String, Object> featureMap) {
  190. if (StringUtils.isBlank(vid)) {
  191. return;
  192. }
  193. for (String key : Arrays.asList("c6", "c7")) {
  194. for (String action : Arrays.asList("share", "return")) {
  195. String featureKey = key + "_" + action;
  196. if (map.containsKey(featureKey)) {
  197. Map<String, String[]> cfMap = map.get(featureKey);
  198. String[] scores = cfMap.get(vid);
  199. featureMap.put(featureKey + "_score", Double.parseDouble(scores[0]));
  200. featureMap.put(featureKey + "_num", Double.parseDouble(scores[1]));
  201. featureMap.put(featureKey + "_rank", ExtractorUtils.reciprocal(Double.parseDouble(scores[2])));
  202. }
  203. }
  204. }
  205. }
  206. public static void handleD3(Map<String, Object> d3Feature, Map<String, Object> featureMap) {
  207. for (String index : Arrays.asList("exp", "return_n", "rovn")) {
  208. double value = Double.parseDouble(d3Feature.getOrDefault(index, "0").toString());
  209. featureMap.put("d3_" + index, value);
  210. }
  211. }
  212. public static void handleD1(Map<String, Object> d4Feature, Map<String, Object> featureMap) {
  213. double rosCfScores = Double.parseDouble(d4Feature.getOrDefault("ros_cf_score", "0").toString());
  214. featureMap.put("d1_ros_cf_score", rosCfScores);
  215. double rovCfScores = Double.parseDouble(d4Feature.getOrDefault("rov_cf_score", "0").toString());
  216. featureMap.put("d1_rov_cf_score", rovCfScores);
  217. double rosCfRank = Double.parseDouble(d4Feature.getOrDefault("ros_cf_rank", "0").toString());
  218. featureMap.put("d1_ros_cf_rank", ExtractorUtils.reciprocal(rosCfRank));
  219. double rovCfRank = Double.parseDouble(d4Feature.getOrDefault("rov_cf_rank", "0").toString());
  220. featureMap.put("d1_rov_cf_rank", ExtractorUtils.reciprocal(rovCfRank));
  221. }
  222. public static void handleD2(Map<String, Object> d5Feature, Map<String, Object> featureMap) {
  223. double score = Double.parseDouble(d5Feature.getOrDefault("score", "0").toString());
  224. featureMap.put("d2_score", score);
  225. double rank = Double.parseDouble(d5Feature.getOrDefault("rank", "0").toString());
  226. featureMap.put("d2_rank", ExtractorUtils.reciprocal(rank));
  227. }
  228. public static void handleVideoSimilarity(Map<String, Object> videoFeature, Map<String, Object> headVideoFeature, Map<String, Object> featureMap) {
  229. String headVideoTitle = headVideoFeature.getOrDefault("title", "").toString();
  230. String headVideoMergeCate2 = headVideoFeature.getOrDefault("merge_second_level_cate", "").toString();
  231. String headVideoMergeCate1 = headVideoFeature.getOrDefault("merge_first_level_cate", "").toString();
  232. String headVideoFestiveLabel2 = headVideoFeature.getOrDefault("festive_label2", "").toString();
  233. String videoTitle = videoFeature.getOrDefault("title", "").toString();
  234. String videoMergeCate2 = videoFeature.getOrDefault("merge_second_level_cate", "").toString();
  235. String videoMergeCate1 = videoFeature.getOrDefault("merge_first_level_cate", "").toString();
  236. String videoFestiveLabel2 = videoFeature.getOrDefault("festive_label2", "").toString();
  237. double titleSimilarity = ExtractFeature20250218.calcTxtSimilarity(headVideoTitle, videoTitle);
  238. double headTitleAndMerge1Similarity = ExtractFeature20250218.calcTxtSimilarity(headVideoTitle, videoMergeCate1);
  239. double headTitleAndMerge2Similarity = ExtractFeature20250218.calcTxtSimilarity(headVideoTitle, videoMergeCate2);
  240. double headTitleAndFestiveSimilarity = ExtractFeature20250218.calcTxtSimilarity(headVideoTitle, videoFestiveLabel2);
  241. double merge1Similarity = ExtractFeature20250218.calcTxtSimilarity(headVideoMergeCate1, videoMergeCate1);
  242. double merge2Similarity = ExtractFeature20250218.calcTxtSimilarity(headVideoMergeCate2, videoMergeCate2);
  243. double festiveSimilarity = ExtractFeature20250218.calcTxtSimilarity(headVideoFestiveLabel2, videoFestiveLabel2);
  244. featureMap.put("title_sim", titleSimilarity);
  245. featureMap.put("head_title_merge1_sim", headTitleAndMerge1Similarity);
  246. featureMap.put("head_title_merge2_sim", headTitleAndMerge2Similarity);
  247. featureMap.put("head_title_festive_sim", headTitleAndFestiveSimilarity);
  248. featureMap.put("merge1_sim", merge1Similarity);
  249. featureMap.put("merge2_sim", merge2Similarity);
  250. featureMap.put("festive_sim", festiveSimilarity);
  251. }
  252. private static double calcTxtSimilarity(String txt1, String txt2) {
  253. if (StringUtils.isBlank(txt1) || StringUtils.isBlank(txt2)) {
  254. return 0d;
  255. }
  256. return SimilarityUtils.word2VecSimilarity(txt1, txt2);
  257. }
  258. }