FeatureTransformV2.java 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622
  1. package examples.myUtils;
  2. import com.tzld.piaoquan.recommend.similarity.word2vec.Segment;
  3. import examples.utils.SimilarityUtils;
  4. import java.util.*;
  5. public class FeatureTransformV2 {
  6. private static final int seqMaxN = 2;
  7. private static final int seqLastN = 2;
  8. private static final double smoothPlus = 5.0;
  9. private static final double largerSmoothPlus = 30.0;
  10. private static final double log1Scale = 10.0;
  11. private static final long midValidTs = 14 * 24 * 3600 * 1000;
  12. private static final List<String> c1Periods = Arrays.asList("72h", "168h");
  13. private static final List<String> b0Periods = Arrays.asList("1h", "3h", "6h", "12h");
  14. private static final List<String> b1Periods = Arrays.asList("1h", "3h", "6h", "24h", "72h", "168h");
  15. private static final List<String> b2Periods = Arrays.asList("1h", "3h", "6h", "24h");
  16. private static final List<String> b3Periods = Arrays.asList("1h", "3h", "6h", "24h", "72h");
  17. private static final List<String> b4Periods = Arrays.asList("1h", "3h", "6h", "12h", "24h", "72h");
  18. private static final List<String> b5Periods = Arrays.asList("1h", "3h", "6h", "12h", "24h", "72h");
  19. private static final List<String> b6Periods = Arrays.asList("1h", "24h");
  20. private static final List<String> b7Periods = Arrays.asList("1h", "3h", "6h", "12h", "24h", "72h");
  21. private static final List<String> b8Periods = Arrays.asList("1h", "3h", "6h", "12h", "24h");
  22. private static final List<String> b9Periods = Arrays.asList("1h", "3h", "6h", "12h", "24h");
  23. private static final List<String> b10Periods = Arrays.asList("1h", "12h");
  24. private static final List<String> b11Periods = Arrays.asList("1h", "12h");
  25. private static final List<String> b13Periods = Arrays.asList("1h", "3h", "6h", "12h", "24h", "72h");
  26. private static final List<String> b14Periods = Arrays.asList("1h", "2h", "3h", "6h", "12h");
  27. private static final List<String> b15Periods = Arrays.asList("1h", "2h", "3h", "6h", "12h");
  28. private static final List<String> videoCateAttrs = Arrays.asList(FeatureUtils.cate1Attr, FeatureUtils.cate2Attr, FeatureUtils.festive1Attr,
  29. FeatureUtils.channelAttr, FeatureUtils.sourceAttr, FeatureUtils.uidAttr, FeatureUtils.mergeCate1Attr, FeatureUtils.mergeCate2Attr);
  30. private static final List<String> videoSimAttrs = Arrays.asList("title", "cate2", "cate2_list", "keywords");
  31. private static final List<String> creativeSimAttrs = Arrays.asList("title");
  32. private static final List<String> hVideoSimAttrs = Arrays.asList("title");
  33. private static final List<String> cfList = Arrays.asList("share", "return");
  34. private static final List<String> userAttrList = Arrays.asList("province", "city", "model", "brand", "system", "user_channel", "user_level");
  35. private static final Set<String> appSet = new HashSet<>(Arrays.asList("0", "2", "4"));
  36. private static final Set<String> hotSceneSet = new HashSet<>(Arrays.asList("1008", "1007", "1058", "1074", "1010"));
  37. private static final Map<String, String> histotyVideoAttrMAP = new HashMap<>();
  38. static {
  39. histotyVideoAttrMAP.put("merge_first_level_cate", "cate1");
  40. histotyVideoAttrMAP.put("merge_second_level_cate", "cate2");
  41. }
  42. public static void getContextFeature(long currentMs, String appType, String hotSceneType, Map<String, Double> featureMap) {
  43. Calendar calendar = Calendar.getInstance();
  44. calendar.setTimeInMillis(currentMs);
  45. int week = calendar.get(Calendar.DAY_OF_WEEK);
  46. int hour = calendar.get(Calendar.HOUR_OF_DAY) + 1;
  47. featureMap.put(String.format("%s@%d", "week", week), 1.0);
  48. featureMap.put(String.format("%s@%d", "hour", hour), 1.0);
  49. featureMap.put("hour", hour * 1.0);
  50. String app;
  51. if (appSet.contains(appType)) {
  52. app = appType;
  53. } else {
  54. app = "other";
  55. }
  56. featureMap.put(String.format("%s@%s", "app", app), 1.0);
  57. String hot;
  58. if (hotSceneSet.contains(hotSceneType)) {
  59. hot = hotSceneType;
  60. } else {
  61. hot = "other";
  62. }
  63. featureMap.put(String.format("%s@%s", "hot", hot), 1.0);
  64. }
  65. public static void getUserFeature(Map<String, Map<String, String>> userOriginInfo, Map<String, Double> featMap) {
  66. oneTypeStatFeature("c1", "return_1_uv", c1Periods, userOriginInfo.get("mid_global_feature_20250212"), featMap);
  67. }
  68. public static void getMid(String uid, String mid, long currentMs, UserShareReturnProfile profile, Map<String, Double> featMap) {
  69. if (null == profile) {
  70. return;
  71. }
  72. String key = "";
  73. if (null != uid && !uid.isEmpty() && !uid.equals("null")) {
  74. key = uid;
  75. } else if (null != mid && !mid.isEmpty() && !mid.equals("null")) {
  76. key = mid;
  77. }
  78. if (!key.isEmpty()) {
  79. List<UserSRBO> l_r_s = profile.getL_r_s();
  80. if (null != l_r_s && !l_r_s.isEmpty()) {
  81. UserSRBO user = l_r_s.get(0);
  82. if (null != user) {
  83. long ts = user.getTs() * 1000;
  84. if (ts > currentMs - midValidTs) {
  85. featMap.put(String.format("%s@%s", "mid", key), 1.0);
  86. }
  87. }
  88. }
  89. }
  90. }
  91. public static void getUserProfileFeature(UserShareReturnProfile profile, Map<String, String> userInfo, Map<String, Double> featMap) {
  92. if (null != profile) {
  93. long s_pv = profile.getS_pv(); // share_pv(分享pv)
  94. long s_cnt = profile.getS_cnt(); // share_cnt(分享次数)
  95. long r_pv = profile.getR_pv(); // return_pv(回流pv)
  96. long r_uv = profile.getR_uv(); // return_uv(回流uv)
  97. long m_s_cnt = profile.getM_s_cnt(); // max_share_cnt(最大分享次数)
  98. long m_r_uv = profile.getM_r_uv(); // max_return_uv(最大回流uv)
  99. if (s_pv > 0) {
  100. double s_pv_s = FeatureUtils.log1(s_pv, log1Scale);
  101. double s_cnt_s = FeatureUtils.log1(s_cnt, log1Scale);
  102. double r_pv_s = FeatureUtils.log1(r_pv, log1Scale);
  103. double r_uv_s = FeatureUtils.log1(r_uv, log1Scale);
  104. double m_s_cnt_s = FeatureUtils.log1(m_s_cnt, log1Scale);
  105. double m_r_uv_s = FeatureUtils.log1(m_r_uv, log1Scale);
  106. double ros_one = FeatureUtils.wilsonScore(r_pv, s_pv);
  107. double ros = FeatureUtils.plusSmooth(r_uv, s_pv, smoothPlus);
  108. double ros_minus = FeatureUtils.plusSmooth(r_uv, r_pv, smoothPlus);
  109. double l_ros = FeatureUtils.plusSmooth(r_uv / 5.0, s_pv, largerSmoothPlus, 2);
  110. double l_ros_minus = FeatureUtils.plusSmooth(r_uv / 5.0, r_pv, largerSmoothPlus, 2);
  111. featMap.put("c9@s_pv", s_pv_s);
  112. featMap.put("c9@s_cnt", s_cnt_s);
  113. featMap.put("c9@r_pv", r_pv_s);
  114. featMap.put("c9@r_uv", r_uv_s);
  115. featMap.put("c9@m_s_cnt", m_s_cnt_s);
  116. featMap.put("c9@m_r_uv", m_r_uv_s);
  117. featMap.put("c9@ros_one", ros_one);
  118. featMap.put("c9@ros", ros);
  119. featMap.put("c9@ros_minus", ros_minus);
  120. featMap.put("c9@ros_#", l_ros);
  121. featMap.put("c9@ros_minus_#", l_ros_minus);
  122. }
  123. }
  124. if (null != userInfo && !userInfo.isEmpty()) {
  125. for (String attr : userAttrList) {
  126. if (userInfo.containsKey(attr)) {
  127. String value = userInfo.get(attr).trim().replaceAll("(\\s+|\\t|:)", "_");
  128. if (!value.isEmpty()) {
  129. String key = String.format("%s@%s", attr, value.toLowerCase());
  130. featMap.put(key, 1.0);
  131. }
  132. }
  133. }
  134. String page = userInfo.getOrDefault("page", "");
  135. if (page.equals("回流页")) {
  136. featMap.put("page@return", 1.0);
  137. }
  138. }
  139. }
  140. public static void getUserTagsCrossVideoFeature(String prefix, Map<String, String> videoInfo, Map<String, String> infoMap, Map<String, Double> featMap) {
  141. if (null == videoInfo || videoInfo.isEmpty() || null == infoMap || infoMap.isEmpty()) {
  142. return;
  143. }
  144. String title = videoInfo.getOrDefault("title", "");
  145. if (title.isEmpty()) {
  146. return;
  147. }
  148. for (String period : Arrays.asList("tags_1d", "tags_3d", "tags_7d")) {
  149. String tags = infoMap.getOrDefault(period, "");
  150. if (!tags.isEmpty()) {
  151. Double[] doubles = FeatureUtils.funcC34567ForTagsNew(tags, title);
  152. featMap.put(prefix + "_" + period + "@matchnum", doubles[0]);
  153. featMap.put(prefix + "_" + period + "@maxscore", doubles[1]);
  154. featMap.put(prefix + "_" + period + "@avgscore", doubles[2]);
  155. }
  156. }
  157. }
  158. public static void getUserCFFeature(String prefix, String vid, Map<String, Map<String, String[]>> infoMap, Map<String, Double> featMap) {
  159. if (vid.isEmpty() || null == infoMap || infoMap.isEmpty()) {
  160. return;
  161. }
  162. for (String cfType : cfList) {
  163. if (infoMap.containsKey(cfType)) {
  164. Map<String, String[]> cfScoresMap = infoMap.get(cfType);
  165. if (null != cfScoresMap && cfScoresMap.containsKey(vid)) {
  166. String[] scores = cfScoresMap.get(vid);
  167. Double score1 = Double.parseDouble(scores[0]);
  168. Double score2 = Double.parseDouble(scores[1]);
  169. Double score3 = Double.parseDouble(scores[2]) <= 0 ? 0D : 1.0 / Double.parseDouble(scores[2]);
  170. featMap.put(prefix + "_" + cfType + "@score", score1);
  171. featMap.put(prefix + "_" + cfType + "@num", score2);
  172. featMap.put(prefix + "_" + cfType + "@rank", score3);
  173. }
  174. }
  175. }
  176. }
  177. public static void getVideoFeature(String vid, Map<String, Map<String, Map<String, String>>> videoOriginInfo, Map<String, Double> featMap) {
  178. oneTypeStatFeature("b0", b0Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_recsys_feature_video_clean_stat"), featMap);
  179. oneTypeStatFeature("b1", "return_1_uv", b1Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_global_feature_20250212"), featMap);
  180. oneTypeStatFeature("b2", "return_n_uv", b2Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_recommend_exp_feature_20250212"), featMap);
  181. oneTypeStatFeature("b3", "return_n_uv", b3Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_recommend_flowpool_exp_feature_20250212"), featMap);
  182. oneTypeStatFeature("b4", "return_n_uv", b4Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_apptype_recommend_exp_feature_20250212"), featMap);
  183. oneTypeStatFeature("b5", "return_n_uv", b5Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_province_recommend_exp_feature_20250212"), featMap);
  184. oneTypeStatFeature("b6", "return_n_uv", b6Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_brand_recommend_exp_feature_20250212"), featMap);
  185. oneTypeStatFeature("b7", "return_n_uv", b7Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_hotsencetype_recommend_exp_feature_20250212"), featMap);
  186. oneTypeStatFeature("b8", "return_n_uv", b8Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_merge_cate1_recommend_exp_feature_20250212"), featMap);
  187. oneTypeStatFeature("b9", "return_n_uv", b9Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_merge_cate2_recommend_exp_feature_20250212"), featMap);
  188. oneTypeStatFeature("b10", "return_n_uv", b10Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_channel_recommend_exp_feature_20250212"), featMap);
  189. oneTypeStatFeature("b11", "return_n_uv", b11Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_festive_recommend_exp_feature_20250212"), featMap);
  190. oneTypeStatFeature("b13", "return_n_uv", b13Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_video_unionid_recommend_exp_feature_20250212"), featMap);
  191. oneTypeStatFeature("b14", b14Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_recsys_feature_video_recommend_channel_layer"), featMap);
  192. oneTypeStatFeature("b15", b15Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_recsys_feature_video_recommend_channel_layer_head"), featMap);
  193. // head video cf
  194. headVideoCFD1Feature("d1", videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("scene_type_vid_cf_feature_20250212"), featMap);
  195. headVideoCFD2Feature("d2", videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("vid_click_cf_feature_20250212"), featMap);
  196. headVideoCFD3Feature("d3", videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_recsys_feature_cf_i2i_v2"), featMap);
  197. }
  198. public static void getVideoBaseFeature(String prefix, long currentMs, Map<String, String> videoInfo, Map<String, Double> featMap) {
  199. if (null == videoInfo || videoInfo.isEmpty()) {
  200. return;
  201. }
  202. featMap.put(prefix + "@total_time", FeatureUtils.log1(Double.parseDouble(videoInfo.getOrDefault("total_time", "0")), log1Scale));
  203. featMap.put(prefix + "@bit_rate", FeatureUtils.log1(Double.parseDouble(videoInfo.getOrDefault("bit_rate", "0")), log1Scale));
  204. String resolution = FeatureUtils.getResolution(videoInfo);
  205. if (!resolution.isEmpty()) {
  206. String resKey = String.format("%s@%s@%s", prefix, "res", resolution);
  207. featMap.put(resKey, 1.0);
  208. }
  209. // cate
  210. getVideoCateFeature(prefix, videoInfo, featMap);
  211. if (videoInfo.containsKey("title")) {
  212. int id = FeatureUtils.judgeVideoTimeType(videoInfo.get("title"));
  213. if (id > 0) {
  214. String key = String.format("%s@%s@%d", prefix, "tt", id);
  215. featMap.put(key, 1.0);
  216. }
  217. }
  218. // time
  219. try {
  220. if (videoInfo.containsKey("gmt_create_timestamp")) {
  221. String createMsStr = videoInfo.get("gmt_create_timestamp");
  222. long createMs = Long.parseLong(createMsStr);
  223. double createTime = FeatureUtils.getTimeDiff(currentMs, createMs);
  224. featMap.put(prefix + "@ts", 1 - createTime);
  225. }
  226. } catch (Exception ignored) {
  227. }
  228. }
  229. public static void getHeadRankVideoCrossFeature(Map<String, String> headInfo, Map<String, String> rankInfo, Map<String, Double> featMap) {
  230. getTwoVideoCrossFeature("hr_sim", videoSimAttrs, headInfo, rankInfo, featMap);
  231. }
  232. public static void getCreativeBaseFeature(String prefix, Map<String, String> creativeInfo, Map<String, Double> featMap) {
  233. if (null == creativeInfo || creativeInfo.isEmpty()) {
  234. return;
  235. }
  236. if (creativeInfo.containsKey("ghId")) {
  237. String ghId = creativeInfo.get("ghId");
  238. if (null != ghId && !ghId.isEmpty()) {
  239. String key = String.format("%s@gid@%s", prefix, ghId);
  240. featMap.put(key, 1.0);
  241. }
  242. }
  243. if (creativeInfo.containsKey("name")) {
  244. String name = creativeInfo.get("name");
  245. if (null != name && !name.isEmpty()) {
  246. List<String> words = Segment.getWords(name);
  247. for (String word : words) {
  248. if (null != word && word.length() > 1) {
  249. String key = String.format("%s@name@%s", prefix, word);
  250. featMap.put(key, 1.0);
  251. }
  252. }
  253. }
  254. }
  255. }
  256. public static void getCreativeCrossFeature(String prefix, Map<String, String> creativeInfo, Map<String, String> rankInfo, Map<String, Double> featMap) {
  257. getTwoVideoCrossFeature(prefix, creativeSimAttrs, creativeInfo, rankInfo, featMap);
  258. }
  259. public static void getProfileVideoCrossFeature(long currentMs, UserShareReturnProfile profile, Map<String, String> rankVideo, Map<String, Map<String, String>> hVideoMap, Map<String, Double> featMap) {
  260. if (null == profile) {
  261. return;
  262. }
  263. getRSCrossFeature(false, "c9_mss", currentMs, seqMaxN, profile.getM_s_s(), rankVideo, hVideoMap, featMap);
  264. getRSCrossFeature(false, "c9_mrs", currentMs, seqMaxN, profile.getM_r_s(), rankVideo, hVideoMap, featMap);
  265. getRSCrossFeature(true, "c9_lss", currentMs, seqLastN, profile.getL_s_s(), rankVideo, hVideoMap, featMap);
  266. getRSCrossFeature(false, "c9_lrs", currentMs, seqLastN, profile.getL_r_s(), rankVideo, hVideoMap, featMap);
  267. getRSCrossFeature(true, "c9_lr1s", currentMs, seqLastN, profile.getL_r1_s(), rankVideo, hVideoMap, featMap);
  268. if (null == rankVideo || rankVideo.isEmpty()) {
  269. return;
  270. }
  271. getVideoAttrSRCrossFeature("c9_c1s", rankVideo.getOrDefault("merge_first_level_cate", ""), profile.getC1_s(), featMap);
  272. getVideoAttrSRCrossFeature("c9_c2s", rankVideo.getOrDefault("merge_second_level_cate", ""), profile.getC2_s(), featMap);
  273. getVideoAttrSRCrossFeature("c9_l1s", rankVideo.getOrDefault("festive_label1", ""), profile.getL1_s(), featMap);
  274. getVideoAttrSRCrossFeature("c9_l2s", rankVideo.getOrDefault("festive_label2", ""), profile.getL2_s(), featMap);
  275. // 视频解构的关键词
  276. if (rankVideo.containsKey("dk_keywords")) {
  277. String dkKeywords = rankVideo.get("dk_keywords");
  278. if (Objects.isNull(dkKeywords) || dkKeywords.isEmpty()) {
  279. return;
  280. }
  281. for (String kw : dkKeywords.split("[,,、]")) {
  282. kw = kw.replaceAll("(\\s+|\\t|:)", "");
  283. getVideoAttrSRCrossFeature("c9_dks", kw, profile.getD_k_s(), featMap);
  284. }
  285. }
  286. }
  287. private static void getRSCrossFeature(boolean flag, String prefix, long currentMs, int maxN, List<UserSRBO> list, Map<String, String> rankVideo, Map<String, Map<String, String>> hVideoMap, Map<String, Double> featMap) {
  288. if (null != list && !list.isEmpty()) {
  289. for (int i = 0; i < list.size() && i < maxN; i++) {
  290. UserSRBO u = list.get(i);
  291. if (null != u) {
  292. long id = u.getId();
  293. long cnt = u.getCnt();
  294. long uv = u.getUv();
  295. long ts = u.getTs();
  296. if (id > 0) {
  297. String vid = id + "";
  298. String baseKey = String.format("%s@%d", prefix, i + 1);
  299. if (cnt > 0) {
  300. featMap.put(baseKey + "@cnt", FeatureUtils.log1(cnt, log1Scale));
  301. }
  302. if (uv > 0) {
  303. featMap.put(baseKey + "@uv", FeatureUtils.log1(uv, log1Scale));
  304. }
  305. if (ts > 0) {
  306. long historyMs = ts * 1000;
  307. featMap.put(baseKey + "@ts", 1 - FeatureUtils.getTimeDiff(currentMs, historyMs));
  308. // history week & hour
  309. Calendar calendar = Calendar.getInstance();
  310. calendar.setTimeInMillis(historyMs);
  311. featMap.put(String.format("%s_week@%d", baseKey, calendar.get(Calendar.DAY_OF_WEEK)), 1.0);
  312. featMap.put(String.format("%s_hour@%d", baseKey, calendar.get(Calendar.HOUR_OF_DAY) + 1), 1.0);
  313. }
  314. if (null != hVideoMap && hVideoMap.containsKey(vid)) {
  315. Map<String, String> hVideo = hVideoMap.get(vid);
  316. getTwoVideoCrossFeature(baseKey, hVideoSimAttrs, hVideo, rankVideo, featMap);
  317. if (flag) {
  318. getHistoryVideoCateFeature(baseKey, hVideo, featMap);
  319. }
  320. }
  321. }
  322. }
  323. }
  324. }
  325. }
  326. private static void getVideoAttrSRCrossFeature(String prefix, String attr, Map<String, VideoAttrSRBO> attrMap, Map<String, Double> featMap) {
  327. if (null == attrMap || attrMap.isEmpty()) {
  328. return;
  329. }
  330. attr = attr.trim();
  331. if (attrMap.containsKey(attr)) {
  332. VideoAttrSRBO bo = attrMap.get(attr);
  333. if (null != bo) {
  334. long sp = bo.getSp(); // share_pv
  335. long rp = bo.getRp(); // return_n_pv_noself
  336. long ru = bo.getRu(); // return_n_uv_noself
  337. long mu = bo.getMu(); // max_return_uv
  338. if (sp > 0) {
  339. double sp_s = FeatureUtils.log1(sp, log1Scale);
  340. double rp_s = FeatureUtils.log1(rp, log1Scale);
  341. double ru_s = FeatureUtils.log1(ru, log1Scale);
  342. double mu_s = FeatureUtils.log1(mu, log1Scale);
  343. double ros_one = FeatureUtils.wilsonScore(rp, sp);
  344. double ros = FeatureUtils.plusSmooth(ru, sp, smoothPlus);
  345. double ros_minus = FeatureUtils.plusSmooth(ru, rp, smoothPlus);
  346. double l_ros = FeatureUtils.plusSmooth(ru / 5.0, sp, largerSmoothPlus, 2);
  347. double l_ros_minus = FeatureUtils.plusSmooth(ru / 5.0, rp, largerSmoothPlus, 2);
  348. featMap.put(prefix + "@sp", sp_s);
  349. featMap.put(prefix + "@rp", rp_s);
  350. featMap.put(prefix + "@ru", ru_s);
  351. featMap.put(prefix + "@mu", mu_s);
  352. featMap.put(prefix + "@ros_one", ros_one);
  353. featMap.put(prefix + "@ros", ros);
  354. featMap.put(prefix + "@ros_minus", ros_minus);
  355. featMap.put(prefix + "@ros_#", l_ros);
  356. featMap.put(prefix + "@ros_minus_#", l_ros_minus);
  357. }
  358. }
  359. }
  360. }
  361. private static void getVideoCateFeature(String prefix, Map<String, String> videoInfo, Map<String, Double> featMap) {
  362. if (null == videoInfo || videoInfo.isEmpty()) {
  363. return;
  364. }
  365. for (String attr : videoCateAttrs) {
  366. String attrVal = videoInfo.getOrDefault(attr, "");
  367. attrVal = attrVal.trim();
  368. if (!attrVal.isEmpty() && !attrVal.equals("unknown")) {
  369. String key = String.format("%s@%s@%s", prefix, attr, attrVal);
  370. featMap.put(key, 1.0);
  371. }
  372. }
  373. if (videoInfo.containsKey("keywords")) {
  374. String keywords = videoInfo.get("keywords");
  375. if (null != keywords && !keywords.isEmpty()) {
  376. for (String kw : keywords.split("[,,、]")) {
  377. kw = kw.replaceAll("(\\s+|\\t|:)", "");
  378. if (!kw.isEmpty()) {
  379. String featKey = String.format("%s@kw@%s", prefix, kw);
  380. featMap.put(featKey, 1.0);
  381. }
  382. }
  383. }
  384. }
  385. // 视频解构的关键词 ID特征
  386. if (videoInfo.containsKey("dk_keywords")) {
  387. String dkKeywords = videoInfo.get("dk_keywords");
  388. if (Objects.nonNull(dkKeywords) && !dkKeywords.isEmpty()) {
  389. for (String kw : dkKeywords.split("[,,、]")) {
  390. kw = kw.replaceAll("(\\s+|\\t|:)", "");
  391. if (!kw.isEmpty()) {
  392. String featKey = String.format("%s@dkw@%s", prefix, kw);
  393. featMap.put(featKey, 1.0);
  394. }
  395. }
  396. }
  397. }
  398. }
  399. private static void getTwoVideoCrossFeature(String prefix, List<String> attrs, Map<String, String> video1, Map<String, String> video2, Map<String, Double> featMap) {
  400. if (null == video1 || video1.isEmpty() || null == video2 || video2.isEmpty()) {
  401. return;
  402. }
  403. for (String attr : attrs) {
  404. String attr1 = video1.getOrDefault(attr, "");
  405. String attr2 = video2.getOrDefault(attr, "");
  406. if (!"".equals(attr1) && !"unknown".equals(attr1) && !"".equals(attr2) && !"unknown".equals(attr2)) {
  407. double simScore = SimilarityUtils.word2VecSimilarity(attr1, attr2);
  408. featMap.put(prefix + "@" + attr, simScore);
  409. }
  410. }
  411. }
  412. private static void headVideoCFD1Feature(String prefix, Map<String, String> infoMap, Map<String, Double> featMap) {
  413. double ros_cf_score = getOneInfo("ros_cf_score", infoMap);
  414. double ros_cf_rank = getOneInfo("ros_cf_rank", infoMap);
  415. double rov_cf_score = getOneInfo("rov_cf_score", infoMap);
  416. double rov_cf_rank = getOneInfo("rov_cf_rank", infoMap);
  417. featMap.put(prefix + "@ros_cf_score", ros_cf_score);
  418. featMap.put(prefix + "@ros_cf_rank", ros_cf_rank);
  419. featMap.put(prefix + "@rov_cf_score", rov_cf_score);
  420. featMap.put(prefix + "@rov_cf_rank", rov_cf_rank);
  421. }
  422. private static void headVideoCFD2Feature(String prefix, Map<String, String> infoMap, Map<String, Double> featMap) {
  423. double score = getOneInfo("score", infoMap);
  424. double rank = getOneInfo("rank", infoMap);
  425. featMap.put(prefix + "@score", score);
  426. featMap.put(prefix + "@rank", rank);
  427. }
  428. private static void headVideoCFD3Feature(String prefix, Map<String, String> infoMap, Map<String, Double> featMap) {
  429. double exp = getOneInfo("exp", infoMap);
  430. double return_n = getOneInfo("return_n", infoMap);
  431. double rovn = FeatureUtils.plusSmooth(return_n, exp, smoothPlus);
  432. featMap.put(prefix + "@exp", FeatureUtils.log1(exp, log1Scale));
  433. featMap.put(prefix + "@return_n", FeatureUtils.log1(return_n, log1Scale));
  434. featMap.put(prefix + "@rovn", rovn);
  435. }
  436. public static Map<String, Map<String, String[]>> parseUCFScore(Map<String, String> mapInfo) {
  437. Map<String, Map<String, String[]>> allScoresMap = new HashMap<>();
  438. for (String cfType : cfList) {
  439. String data = mapInfo.getOrDefault(cfType, "");
  440. if (!data.isEmpty()) {
  441. Map<String, String[]> oneScoresMap = new HashMap<>();
  442. String[] entries = data.split(",");
  443. for (String entry : entries) {
  444. String[] rList = entry.split(":");
  445. if (rList.length >= 4) { // 确保分割后有四个元素
  446. String key = rList[0];
  447. String value1 = rList[1];
  448. String value2 = rList[2];
  449. String value3 = rList[3];
  450. String[] strs = {value1, value2, value3};
  451. oneScoresMap.put(key, strs);
  452. }
  453. }
  454. if (!oneScoresMap.isEmpty()) {
  455. allScoresMap.put(cfType, oneScoresMap);
  456. }
  457. }
  458. }
  459. return allScoresMap;
  460. }
  461. private static void oneTypeStatFeature(String prefix, String uvPrefix, List<String> periods, Map<String, String> infoMap, Map<String, Double> featMap) {
  462. if (null == infoMap || infoMap.isEmpty()) {
  463. return;
  464. }
  465. for (String period : periods) {
  466. double exp = getOneInfo("exp_" + period, infoMap);
  467. if (!FeatureUtils.greaterThanZero(exp)) {
  468. continue;
  469. }
  470. double is_share = getOneInfo("is_share_" + period, infoMap);
  471. double share_cnt = getOneInfo("share_cnt_" + period, infoMap);
  472. double is_return_1 = getOneInfo("is_return_1_" + period, infoMap);
  473. double return_n_uv = getOneInfo(uvPrefix + "_" + period, infoMap);
  474. double exp_s = FeatureUtils.log1(exp, log1Scale);
  475. double is_share_s = FeatureUtils.log1(is_share, log1Scale);
  476. double share_cnt_s = FeatureUtils.log1(share_cnt, log1Scale);
  477. double is_return_1_s = FeatureUtils.log1(is_return_1, log1Scale);
  478. double return_n_uv_s = FeatureUtils.log1(return_n_uv, log1Scale);
  479. double str = FeatureUtils.wilsonScore(is_share, exp);
  480. double str_plus = FeatureUtils.wilsonScore(is_return_1, exp);
  481. double ros_one = FeatureUtils.wilsonScore(is_return_1, is_share);
  482. // larger smooth
  483. double l_rovn = FeatureUtils.plusSmooth(return_n_uv, exp, largerSmoothPlus, 1);
  484. double l_ros = FeatureUtils.plusSmooth(return_n_uv / 5, is_share, largerSmoothPlus, 2);
  485. double l_ros_n = FeatureUtils.plusSmooth(return_n_uv / 5, share_cnt, largerSmoothPlus, 2);
  486. double l_ros_minus = FeatureUtils.plusSmooth(return_n_uv / 5, is_return_1, largerSmoothPlus, 2);
  487. featMap.put(prefix + "_" + period + "@" + "exp", exp_s);
  488. featMap.put(prefix + "_" + period + "@" + "is_share", is_share_s);
  489. featMap.put(prefix + "_" + period + "@" + "share_cnt", share_cnt_s);
  490. featMap.put(prefix + "_" + period + "@" + "is_return_1", is_return_1_s);
  491. featMap.put(prefix + "_" + period + "@" + "return_n_uv", return_n_uv_s);
  492. featMap.put(prefix + "_" + period + "@" + "str", str);
  493. featMap.put(prefix + "_" + period + "@" + "str_plus", str_plus);
  494. featMap.put(prefix + "_" + period + "@" + "ros_one", ros_one);
  495. // larger smooth
  496. featMap.put(prefix + "_" + period + "@" + "rovn_#", l_rovn);
  497. featMap.put(prefix + "_" + period + "@" + "ros_#", l_ros);
  498. featMap.put(prefix + "_" + period + "@" + "ros_n_#", l_ros_n);
  499. featMap.put(prefix + "_" + period + "@" + "ros_minus_#", l_ros_minus);
  500. }
  501. }
  502. private static void oneTypeStatFeature(String prefix, List<String> periods, Map<String, String> infoMap, Map<String, Double> featMap) {
  503. if (null == infoMap || infoMap.isEmpty()) {
  504. return;
  505. }
  506. for (String period : periods) {
  507. double exp = getOneInfo("exp_" + period, infoMap);
  508. if (!FeatureUtils.greaterThanZero(exp)) {
  509. continue;
  510. }
  511. double is_share = getOneInfo("is_share_" + period, infoMap);
  512. double share_cnt = getOneInfo("share_cnt_" + period, infoMap);
  513. double is_return_1 = getOneInfo("is_return_1_" + period, infoMap);
  514. double return_1_uv = getOneInfo("return_1_uv_" + period, infoMap);
  515. double return_n_uv = getOneInfo("return_n_uv_" + period, infoMap);
  516. double exp_s = FeatureUtils.log1(exp, log1Scale);
  517. double is_share_s = FeatureUtils.log1(is_share, log1Scale);
  518. double share_cnt_s = FeatureUtils.log1(share_cnt, log1Scale);
  519. double is_return_1_s = FeatureUtils.log1(is_return_1, log1Scale);
  520. double return_1_uv_s = FeatureUtils.log1(return_1_uv, log1Scale);
  521. double return_n_uv_s = FeatureUtils.log1(return_n_uv, log1Scale);
  522. double str = FeatureUtils.wilsonScore(is_share, exp);
  523. double str_plus = FeatureUtils.wilsonScore(is_return_1, exp);
  524. double ros_one = FeatureUtils.wilsonScore(is_return_1, is_share);
  525. // larger smooth
  526. double l_rovn1 = FeatureUtils.plusSmooth(return_1_uv, exp, largerSmoothPlus, 1);
  527. double l_ros1 = FeatureUtils.plusSmooth(return_1_uv / 5, is_share, largerSmoothPlus, 2);
  528. double l_ros_n1 = FeatureUtils.plusSmooth(return_1_uv / 5, share_cnt, largerSmoothPlus, 2);
  529. double l_ros_minus1 = FeatureUtils.plusSmooth(return_1_uv / 5, is_return_1, largerSmoothPlus, 2);
  530. double l_rovn = FeatureUtils.plusSmooth(return_n_uv, exp, largerSmoothPlus, 1);
  531. double l_ros = FeatureUtils.plusSmooth(return_n_uv / 5, is_share, largerSmoothPlus, 2);
  532. double l_ros_n = FeatureUtils.plusSmooth(return_n_uv / 5, share_cnt, largerSmoothPlus, 2);
  533. double l_ros_minus = FeatureUtils.plusSmooth(return_n_uv / 5, is_return_1, largerSmoothPlus, 2);
  534. featMap.put(prefix + "_" + period + "@" + "exp", exp_s);
  535. featMap.put(prefix + "_" + period + "@" + "is_share", is_share_s);
  536. featMap.put(prefix + "_" + period + "@" + "share_cnt", share_cnt_s);
  537. featMap.put(prefix + "_" + period + "@" + "is_return_1", is_return_1_s);
  538. featMap.put(prefix + "_" + period + "@" + "return_1_uv", return_1_uv_s);
  539. featMap.put(prefix + "_" + period + "@" + "return_n_uv", return_n_uv_s);
  540. featMap.put(prefix + "_" + period + "@" + "str", str);
  541. featMap.put(prefix + "_" + period + "@" + "str_plus", str_plus);
  542. featMap.put(prefix + "_" + period + "@" + "ros_one", ros_one);
  543. // larger smooth
  544. featMap.put(prefix + "_" + period + "@" + "rovn1_#", l_rovn1);
  545. featMap.put(prefix + "_" + period + "@" + "ros1_#", l_ros1);
  546. featMap.put(prefix + "_" + period + "@" + "ros_n1_#", l_ros_n1);
  547. featMap.put(prefix + "_" + period + "@" + "ros_minus1_#", l_ros_minus1);
  548. featMap.put(prefix + "_" + period + "@" + "rovn_#", l_rovn);
  549. featMap.put(prefix + "_" + period + "@" + "ros_#", l_ros);
  550. featMap.put(prefix + "_" + period + "@" + "ros_n_#", l_ros_n);
  551. featMap.put(prefix + "_" + period + "@" + "ros_minus_#", l_ros_minus);
  552. }
  553. }
  554. private static double getOneInfo(String name, Map<String, String> map) {
  555. if (null == map) {
  556. return 0.0;
  557. }
  558. return map.isEmpty() ? 0 : Double.parseDouble(map.getOrDefault(name, "0.0"));
  559. }
  560. private static void getHistoryVideoCateFeature(String prefix, Map<String, String> videoInfo, Map<String, Double> featMap) {
  561. if (null == videoInfo || videoInfo.isEmpty()) {
  562. return;
  563. }
  564. for (Map.Entry<String, String> entry : histotyVideoAttrMAP.entrySet()) {
  565. String attr = entry.getKey();
  566. String attrVal = videoInfo.getOrDefault(attr, "");
  567. attrVal = attrVal.trim();
  568. if (!attrVal.isEmpty() && !attrVal.equals("unknown")) {
  569. String key = String.format("%s@%s@%s", prefix, entry.getValue(), attrVal);
  570. featMap.put(key, 1.0);
  571. }
  572. }
  573. }
  574. }