package examples.myUtils; import com.tzld.piaoquan.recommend.similarity.word2vec.Segment; import examples.utils.SimilarityUtils; import java.util.*; public class FeatureTransformV2 { private static final int seqMaxN = 2; private static final int seqLastN = 2; private static final double smoothPlus = 5.0; private static final double largerSmoothPlus = 30.0; private static final double log1Scale = 10.0; private static final long midValidTs = 14 * 24 * 3600 * 1000; private static final List c1Periods = Arrays.asList("72h", "168h"); private static final List b0Periods = Arrays.asList("1h", "3h", "6h", "12h"); private static final List b1Periods = Arrays.asList("1h", "3h", "6h", "24h", "72h", "168h"); private static final List b2Periods = Arrays.asList("1h", "3h", "6h", "24h"); private static final List b3Periods = Arrays.asList("1h", "3h", "6h", "24h", "72h"); private static final List b4Periods = Arrays.asList("1h", "3h", "6h", "12h", "24h", "72h"); private static final List b5Periods = Arrays.asList("1h", "3h", "6h", "12h", "24h", "72h"); private static final List b6Periods = Arrays.asList("1h", "24h"); private static final List b7Periods = Arrays.asList("1h", "3h", "6h", "12h", "24h", "72h"); private static final List b8Periods = Arrays.asList("1h", "3h", "6h", "12h", "24h"); private static final List b9Periods = Arrays.asList("1h", "3h", "6h", "12h", "24h"); private static final List b10Periods = Arrays.asList("1h", "12h"); private static final List b11Periods = Arrays.asList("1h", "12h"); private static final List b13Periods = Arrays.asList("1h", "3h", "6h", "12h", "24h", "72h"); private static final List b14Periods = Arrays.asList("1h", "2h", "3h", "6h", "12h"); private static final List b15Periods = Arrays.asList("1h", "2h", "3h", "6h", "12h"); private static final List videoCateAttrs = Arrays.asList(FeatureUtils.cate1Attr, FeatureUtils.cate2Attr, FeatureUtils.festive1Attr, FeatureUtils.channelAttr, FeatureUtils.sourceAttr, FeatureUtils.uidAttr, FeatureUtils.mergeCate1Attr, FeatureUtils.mergeCate2Attr); private static final List videoSimAttrs = Arrays.asList("title", "cate2", "cate2_list", "keywords"); private static final List creativeSimAttrs = Arrays.asList("title"); private static final List hVideoSimAttrs = Arrays.asList("title"); private static final List cfList = Arrays.asList("share", "return"); private static final List userAttrList = Arrays.asList("province", "city", "model", "brand", "system", "user_channel", "user_level"); private static final Set appSet = new HashSet<>(Arrays.asList("0", "2", "4")); private static final Set hotSceneSet = new HashSet<>(Arrays.asList("1008", "1007", "1058", "1074", "1010")); private static final Map histotyVideoAttrMAP = new HashMap<>(); static { histotyVideoAttrMAP.put("merge_first_level_cate", "cate1"); histotyVideoAttrMAP.put("merge_second_level_cate", "cate2"); } public static void getContextFeature(long currentMs, String appType, String hotSceneType, Map featureMap) { Calendar calendar = Calendar.getInstance(); calendar.setTimeInMillis(currentMs); int week = calendar.get(Calendar.DAY_OF_WEEK); int hour = calendar.get(Calendar.HOUR_OF_DAY) + 1; featureMap.put(String.format("%s@%d", "week", week), 1.0); featureMap.put(String.format("%s@%d", "hour", hour), 1.0); featureMap.put("hour", hour * 1.0); String app; if (appSet.contains(appType)) { app = appType; } else { app = "other"; } featureMap.put(String.format("%s@%s", "app", app), 1.0); String hot; if (hotSceneSet.contains(hotSceneType)) { hot = hotSceneType; } else { hot = "other"; } featureMap.put(String.format("%s@%s", "hot", hot), 1.0); } public static void getUserFeature(Map> userOriginInfo, Map featMap) { oneTypeStatFeature("c1", "return_1_uv", c1Periods, userOriginInfo.get("mid_global_feature_20250212"), featMap); } public static void getMid(String uid, String mid, long currentMs, UserShareReturnProfile profile, Map featMap) { if (null == profile) { return; } String key = ""; if (null != uid && !uid.isEmpty() && !uid.equals("null")) { key = uid; } else if (null != mid && !mid.isEmpty() && !mid.equals("null")) { key = mid; } if (!key.isEmpty()) { List l_r_s = profile.getL_r_s(); if (null != l_r_s && !l_r_s.isEmpty()) { UserSRBO user = l_r_s.get(0); if (null != user) { long ts = user.getTs() * 1000; if (ts > currentMs - midValidTs) { featMap.put(String.format("%s@%s", "mid", key), 1.0); } } } } } public static void getUserProfileFeature(UserShareReturnProfile profile, Map userInfo, Map featMap) { if (null != profile) { long s_pv = profile.getS_pv(); // share_pv(分享pv) long s_cnt = profile.getS_cnt(); // share_cnt(分享次数) long r_pv = profile.getR_pv(); // return_pv(回流pv) long r_uv = profile.getR_uv(); // return_uv(回流uv) long m_s_cnt = profile.getM_s_cnt(); // max_share_cnt(最大分享次数) long m_r_uv = profile.getM_r_uv(); // max_return_uv(最大回流uv) if (s_pv > 0) { double s_pv_s = FeatureUtils.log1(s_pv, log1Scale); double s_cnt_s = FeatureUtils.log1(s_cnt, log1Scale); double r_pv_s = FeatureUtils.log1(r_pv, log1Scale); double r_uv_s = FeatureUtils.log1(r_uv, log1Scale); double m_s_cnt_s = FeatureUtils.log1(m_s_cnt, log1Scale); double m_r_uv_s = FeatureUtils.log1(m_r_uv, log1Scale); double ros_one = FeatureUtils.wilsonScore(r_pv, s_pv); double ros = FeatureUtils.plusSmooth(r_uv, s_pv, smoothPlus); double ros_minus = FeatureUtils.plusSmooth(r_uv, r_pv, smoothPlus); double l_ros = FeatureUtils.plusSmooth(r_uv / 5.0, s_pv, largerSmoothPlus, 2); double l_ros_minus = FeatureUtils.plusSmooth(r_uv / 5.0, r_pv, largerSmoothPlus, 2); featMap.put("c9@s_pv", s_pv_s); featMap.put("c9@s_cnt", s_cnt_s); featMap.put("c9@r_pv", r_pv_s); featMap.put("c9@r_uv", r_uv_s); featMap.put("c9@m_s_cnt", m_s_cnt_s); featMap.put("c9@m_r_uv", m_r_uv_s); featMap.put("c9@ros_one", ros_one); featMap.put("c9@ros", ros); featMap.put("c9@ros_minus", ros_minus); featMap.put("c9@ros_#", l_ros); featMap.put("c9@ros_minus_#", l_ros_minus); } } if (null != userInfo && !userInfo.isEmpty()) { for (String attr : userAttrList) { if (userInfo.containsKey(attr)) { String value = userInfo.get(attr).trim().replaceAll("(\\s+|\\t|:)", "_"); if (!value.isEmpty()) { String key = String.format("%s@%s", attr, value.toLowerCase()); featMap.put(key, 1.0); } } } String page = userInfo.getOrDefault("page", ""); if (page.equals("回流页")) { featMap.put("page@return", 1.0); } } } public static void getUserTagsCrossVideoFeature(String prefix, Map videoInfo, Map infoMap, Map featMap) { if (null == videoInfo || videoInfo.isEmpty() || null == infoMap || infoMap.isEmpty()) { return; } String title = videoInfo.getOrDefault("title", ""); if (title.isEmpty()) { return; } for (String period : Arrays.asList("tags_1d", "tags_3d", "tags_7d")) { String tags = infoMap.getOrDefault(period, ""); if (!tags.isEmpty()) { Double[] doubles = FeatureUtils.funcC34567ForTagsNew(tags, title); featMap.put(prefix + "_" + period + "@matchnum", doubles[0]); featMap.put(prefix + "_" + period + "@maxscore", doubles[1]); featMap.put(prefix + "_" + period + "@avgscore", doubles[2]); } } } public static void getUserCFFeature(String prefix, String vid, Map> infoMap, Map featMap) { if (vid.isEmpty() || null == infoMap || infoMap.isEmpty()) { return; } for (String cfType : cfList) { if (infoMap.containsKey(cfType)) { Map cfScoresMap = infoMap.get(cfType); if (null != cfScoresMap && cfScoresMap.containsKey(vid)) { String[] scores = cfScoresMap.get(vid); Double score1 = Double.parseDouble(scores[0]); Double score2 = Double.parseDouble(scores[1]); Double score3 = Double.parseDouble(scores[2]) <= 0 ? 0D : 1.0 / Double.parseDouble(scores[2]); featMap.put(prefix + "_" + cfType + "@score", score1); featMap.put(prefix + "_" + cfType + "@num", score2); featMap.put(prefix + "_" + cfType + "@rank", score3); } } } } public static void getVideoFeature(String vid, Map>> videoOriginInfo, Map featMap) { oneTypeStatFeature("b0", b0Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_recsys_feature_video_clean_stat"), featMap); oneTypeStatFeature("b1", "return_1_uv", b1Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_global_feature_20250212"), featMap); oneTypeStatFeature("b2", "return_n_uv", b2Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_recommend_exp_feature_20250212"), featMap); oneTypeStatFeature("b3", "return_n_uv", b3Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_recommend_flowpool_exp_feature_20250212"), featMap); oneTypeStatFeature("b4", "return_n_uv", b4Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_apptype_recommend_exp_feature_20250212"), featMap); oneTypeStatFeature("b5", "return_n_uv", b5Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_province_recommend_exp_feature_20250212"), featMap); oneTypeStatFeature("b6", "return_n_uv", b6Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_brand_recommend_exp_feature_20250212"), featMap); oneTypeStatFeature("b7", "return_n_uv", b7Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_hotsencetype_recommend_exp_feature_20250212"), featMap); oneTypeStatFeature("b8", "return_n_uv", b8Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_merge_cate1_recommend_exp_feature_20250212"), featMap); oneTypeStatFeature("b9", "return_n_uv", b9Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_merge_cate2_recommend_exp_feature_20250212"), featMap); oneTypeStatFeature("b10", "return_n_uv", b10Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_channel_recommend_exp_feature_20250212"), featMap); oneTypeStatFeature("b11", "return_n_uv", b11Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_festive_recommend_exp_feature_20250212"), featMap); oneTypeStatFeature("b13", "return_n_uv", b13Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_video_unionid_recommend_exp_feature_20250212"), featMap); oneTypeStatFeature("b14", b14Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_recsys_feature_video_recommend_channel_layer"), featMap); oneTypeStatFeature("b15", b15Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_recsys_feature_video_recommend_channel_layer_head"), featMap); // head video cf headVideoCFD1Feature("d1", videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("scene_type_vid_cf_feature_20250212"), featMap); headVideoCFD2Feature("d2", videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("vid_click_cf_feature_20250212"), featMap); headVideoCFD3Feature("d3", videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_recsys_feature_cf_i2i_v2"), featMap); } public static void getVideoBaseFeature(String prefix, long currentMs, Map videoInfo, Map featMap) { if (null == videoInfo || videoInfo.isEmpty()) { return; } featMap.put(prefix + "@total_time", FeatureUtils.log1(Double.parseDouble(videoInfo.getOrDefault("total_time", "0")), log1Scale)); featMap.put(prefix + "@bit_rate", FeatureUtils.log1(Double.parseDouble(videoInfo.getOrDefault("bit_rate", "0")), log1Scale)); String resolution = FeatureUtils.getResolution(videoInfo); if (!resolution.isEmpty()) { String resKey = String.format("%s@%s@%s", prefix, "res", resolution); featMap.put(resKey, 1.0); } // cate getVideoCateFeature(prefix, videoInfo, featMap); if (videoInfo.containsKey("title")) { int id = FeatureUtils.judgeVideoTimeType(videoInfo.get("title")); if (id > 0) { String key = String.format("%s@%s@%d", prefix, "tt", id); featMap.put(key, 1.0); } } // time try { if (videoInfo.containsKey("gmt_create_timestamp")) { String createMsStr = videoInfo.get("gmt_create_timestamp"); long createMs = Long.parseLong(createMsStr); double createTime = FeatureUtils.getTimeDiff(currentMs, createMs); featMap.put(prefix + "@ts", 1 - createTime); } } catch (Exception ignored) { } } public static void getHeadRankVideoCrossFeature(Map headInfo, Map rankInfo, Map featMap) { getTwoVideoCrossFeature("hr_sim", videoSimAttrs, headInfo, rankInfo, featMap); } public static void getCreativeBaseFeature(String prefix, Map creativeInfo, Map featMap) { if (null == creativeInfo || creativeInfo.isEmpty()) { return; } if (creativeInfo.containsKey("ghId")) { String ghId = creativeInfo.get("ghId"); if (null != ghId && !ghId.isEmpty()) { String key = String.format("%s@gid@%s", prefix, ghId); featMap.put(key, 1.0); } } if (creativeInfo.containsKey("name")) { String name = creativeInfo.get("name"); if (null != name && !name.isEmpty()) { List words = Segment.getWords(name); for (String word : words) { if (null != word && word.length() > 1) { String key = String.format("%s@name@%s", prefix, word); featMap.put(key, 1.0); } } } } } public static void getCreativeCrossFeature(String prefix, Map creativeInfo, Map rankInfo, Map featMap) { getTwoVideoCrossFeature(prefix, creativeSimAttrs, creativeInfo, rankInfo, featMap); } public static void getProfileVideoCrossFeature(long currentMs, UserShareReturnProfile profile, Map rankVideo, Map> hVideoMap, Map featMap) { if (null == profile) { return; } getRSCrossFeature(false, "c9_mss", currentMs, seqMaxN, profile.getM_s_s(), rankVideo, hVideoMap, featMap); getRSCrossFeature(false, "c9_mrs", currentMs, seqMaxN, profile.getM_r_s(), rankVideo, hVideoMap, featMap); getRSCrossFeature(true, "c9_lss", currentMs, seqLastN, profile.getL_s_s(), rankVideo, hVideoMap, featMap); getRSCrossFeature(false, "c9_lrs", currentMs, seqLastN, profile.getL_r_s(), rankVideo, hVideoMap, featMap); getRSCrossFeature(true, "c9_lr1s", currentMs, seqLastN, profile.getL_r1_s(), rankVideo, hVideoMap, featMap); if (null == rankVideo || rankVideo.isEmpty()) { return; } getVideoAttrSRCrossFeature("c9_c1s", rankVideo.getOrDefault("merge_first_level_cate", ""), profile.getC1_s(), featMap); getVideoAttrSRCrossFeature("c9_c2s", rankVideo.getOrDefault("merge_second_level_cate", ""), profile.getC2_s(), featMap); getVideoAttrSRCrossFeature("c9_l1s", rankVideo.getOrDefault("festive_label1", ""), profile.getL1_s(), featMap); getVideoAttrSRCrossFeature("c9_l2s", rankVideo.getOrDefault("festive_label2", ""), profile.getL2_s(), featMap); // 视频解构的关键词 if (rankVideo.containsKey("dk_keywords")) { String dkKeywords = rankVideo.get("dk_keywords"); if (Objects.isNull(dkKeywords) || dkKeywords.isEmpty()) { return; } for (String kw : dkKeywords.split("[,,、]")) { kw = kw.replaceAll("(\\s+|\\t|:)", ""); getVideoAttrSRCrossFeature("c9_dks", kw, profile.getD_k_s(), featMap); } } } private static void getRSCrossFeature(boolean flag, String prefix, long currentMs, int maxN, List list, Map rankVideo, Map> hVideoMap, Map featMap) { if (null != list && !list.isEmpty()) { for (int i = 0; i < list.size() && i < maxN; i++) { UserSRBO u = list.get(i); if (null != u) { long id = u.getId(); long cnt = u.getCnt(); long uv = u.getUv(); long ts = u.getTs(); if (id > 0) { String vid = id + ""; String baseKey = String.format("%s@%d", prefix, i + 1); if (cnt > 0) { featMap.put(baseKey + "@cnt", FeatureUtils.log1(cnt, log1Scale)); } if (uv > 0) { featMap.put(baseKey + "@uv", FeatureUtils.log1(uv, log1Scale)); } if (ts > 0) { long historyMs = ts * 1000; featMap.put(baseKey + "@ts", 1 - FeatureUtils.getTimeDiff(currentMs, historyMs)); // history week & hour Calendar calendar = Calendar.getInstance(); calendar.setTimeInMillis(historyMs); featMap.put(String.format("%s_week@%d", baseKey, calendar.get(Calendar.DAY_OF_WEEK)), 1.0); featMap.put(String.format("%s_hour@%d", baseKey, calendar.get(Calendar.HOUR_OF_DAY) + 1), 1.0); } if (null != hVideoMap && hVideoMap.containsKey(vid)) { Map hVideo = hVideoMap.get(vid); getTwoVideoCrossFeature(baseKey, hVideoSimAttrs, hVideo, rankVideo, featMap); if (flag) { getHistoryVideoCateFeature(baseKey, hVideo, featMap); } } } } } } } private static void getVideoAttrSRCrossFeature(String prefix, String attr, Map attrMap, Map featMap) { if (null == attrMap || attrMap.isEmpty()) { return; } attr = attr.trim(); if (attrMap.containsKey(attr)) { VideoAttrSRBO bo = attrMap.get(attr); if (null != bo) { long sp = bo.getSp(); // share_pv long rp = bo.getRp(); // return_n_pv_noself long ru = bo.getRu(); // return_n_uv_noself long mu = bo.getMu(); // max_return_uv if (sp > 0) { double sp_s = FeatureUtils.log1(sp, log1Scale); double rp_s = FeatureUtils.log1(rp, log1Scale); double ru_s = FeatureUtils.log1(ru, log1Scale); double mu_s = FeatureUtils.log1(mu, log1Scale); double ros_one = FeatureUtils.wilsonScore(rp, sp); double ros = FeatureUtils.plusSmooth(ru, sp, smoothPlus); double ros_minus = FeatureUtils.plusSmooth(ru, rp, smoothPlus); double l_ros = FeatureUtils.plusSmooth(ru / 5.0, sp, largerSmoothPlus, 2); double l_ros_minus = FeatureUtils.plusSmooth(ru / 5.0, rp, largerSmoothPlus, 2); featMap.put(prefix + "@sp", sp_s); featMap.put(prefix + "@rp", rp_s); featMap.put(prefix + "@ru", ru_s); featMap.put(prefix + "@mu", mu_s); featMap.put(prefix + "@ros_one", ros_one); featMap.put(prefix + "@ros", ros); featMap.put(prefix + "@ros_minus", ros_minus); featMap.put(prefix + "@ros_#", l_ros); featMap.put(prefix + "@ros_minus_#", l_ros_minus); } } } } private static void getVideoCateFeature(String prefix, Map videoInfo, Map featMap) { if (null == videoInfo || videoInfo.isEmpty()) { return; } for (String attr : videoCateAttrs) { String attrVal = videoInfo.getOrDefault(attr, ""); attrVal = attrVal.trim(); if (!attrVal.isEmpty() && !attrVal.equals("unknown")) { String key = String.format("%s@%s@%s", prefix, attr, attrVal); featMap.put(key, 1.0); } } if (videoInfo.containsKey("keywords")) { String keywords = videoInfo.get("keywords"); if (null != keywords && !keywords.isEmpty()) { for (String kw : keywords.split("[,,、]")) { kw = kw.replaceAll("(\\s+|\\t|:)", ""); if (!kw.isEmpty()) { String featKey = String.format("%s@kw@%s", prefix, kw); featMap.put(featKey, 1.0); } } } } // 视频解构的关键词 ID特征 if (videoInfo.containsKey("dk_keywords")) { String dkKeywords = videoInfo.get("dk_keywords"); if (Objects.nonNull(dkKeywords) && !dkKeywords.isEmpty()) { for (String kw : dkKeywords.split("[,,、]")) { kw = kw.replaceAll("(\\s+|\\t|:)", ""); if (!kw.isEmpty()) { String featKey = String.format("%s@dkw@%s", prefix, kw); featMap.put(featKey, 1.0); } } } } } private static void getTwoVideoCrossFeature(String prefix, List attrs, Map video1, Map video2, Map featMap) { if (null == video1 || video1.isEmpty() || null == video2 || video2.isEmpty()) { return; } for (String attr : attrs) { String attr1 = video1.getOrDefault(attr, ""); String attr2 = video2.getOrDefault(attr, ""); if (!"".equals(attr1) && !"unknown".equals(attr1) && !"".equals(attr2) && !"unknown".equals(attr2)) { double simScore = SimilarityUtils.word2VecSimilarity(attr1, attr2); featMap.put(prefix + "@" + attr, simScore); } } } private static void headVideoCFD1Feature(String prefix, Map infoMap, Map featMap) { double ros_cf_score = getOneInfo("ros_cf_score", infoMap); double ros_cf_rank = getOneInfo("ros_cf_rank", infoMap); double rov_cf_score = getOneInfo("rov_cf_score", infoMap); double rov_cf_rank = getOneInfo("rov_cf_rank", infoMap); featMap.put(prefix + "@ros_cf_score", ros_cf_score); featMap.put(prefix + "@ros_cf_rank", ros_cf_rank); featMap.put(prefix + "@rov_cf_score", rov_cf_score); featMap.put(prefix + "@rov_cf_rank", rov_cf_rank); } private static void headVideoCFD2Feature(String prefix, Map infoMap, Map featMap) { double score = getOneInfo("score", infoMap); double rank = getOneInfo("rank", infoMap); featMap.put(prefix + "@score", score); featMap.put(prefix + "@rank", rank); } private static void headVideoCFD3Feature(String prefix, Map infoMap, Map featMap) { double exp = getOneInfo("exp", infoMap); double return_n = getOneInfo("return_n", infoMap); double rovn = FeatureUtils.plusSmooth(return_n, exp, smoothPlus); featMap.put(prefix + "@exp", FeatureUtils.log1(exp, log1Scale)); featMap.put(prefix + "@return_n", FeatureUtils.log1(return_n, log1Scale)); featMap.put(prefix + "@rovn", rovn); } public static Map> parseUCFScore(Map mapInfo) { Map> allScoresMap = new HashMap<>(); for (String cfType : cfList) { String data = mapInfo.getOrDefault(cfType, ""); if (!data.isEmpty()) { Map oneScoresMap = new HashMap<>(); String[] entries = data.split(","); for (String entry : entries) { String[] rList = entry.split(":"); if (rList.length >= 4) { // 确保分割后有四个元素 String key = rList[0]; String value1 = rList[1]; String value2 = rList[2]; String value3 = rList[3]; String[] strs = {value1, value2, value3}; oneScoresMap.put(key, strs); } } if (!oneScoresMap.isEmpty()) { allScoresMap.put(cfType, oneScoresMap); } } } return allScoresMap; } private static void oneTypeStatFeature(String prefix, String uvPrefix, List periods, Map infoMap, Map featMap) { if (null == infoMap || infoMap.isEmpty()) { return; } for (String period : periods) { double exp = getOneInfo("exp_" + period, infoMap); if (!FeatureUtils.greaterThanZero(exp)) { continue; } double is_share = getOneInfo("is_share_" + period, infoMap); double share_cnt = getOneInfo("share_cnt_" + period, infoMap); double is_return_1 = getOneInfo("is_return_1_" + period, infoMap); double return_n_uv = getOneInfo(uvPrefix + "_" + period, infoMap); double exp_s = FeatureUtils.log1(exp, log1Scale); double is_share_s = FeatureUtils.log1(is_share, log1Scale); double share_cnt_s = FeatureUtils.log1(share_cnt, log1Scale); double is_return_1_s = FeatureUtils.log1(is_return_1, log1Scale); double return_n_uv_s = FeatureUtils.log1(return_n_uv, log1Scale); double str = FeatureUtils.wilsonScore(is_share, exp); double str_plus = FeatureUtils.wilsonScore(is_return_1, exp); double ros_one = FeatureUtils.wilsonScore(is_return_1, is_share); // larger smooth double l_rovn = FeatureUtils.plusSmooth(return_n_uv, exp, largerSmoothPlus, 1); double l_ros = FeatureUtils.plusSmooth(return_n_uv / 5, is_share, largerSmoothPlus, 2); double l_ros_n = FeatureUtils.plusSmooth(return_n_uv / 5, share_cnt, largerSmoothPlus, 2); double l_ros_minus = FeatureUtils.plusSmooth(return_n_uv / 5, is_return_1, largerSmoothPlus, 2); featMap.put(prefix + "_" + period + "@" + "exp", exp_s); featMap.put(prefix + "_" + period + "@" + "is_share", is_share_s); featMap.put(prefix + "_" + period + "@" + "share_cnt", share_cnt_s); featMap.put(prefix + "_" + period + "@" + "is_return_1", is_return_1_s); featMap.put(prefix + "_" + period + "@" + "return_n_uv", return_n_uv_s); featMap.put(prefix + "_" + period + "@" + "str", str); featMap.put(prefix + "_" + period + "@" + "str_plus", str_plus); featMap.put(prefix + "_" + period + "@" + "ros_one", ros_one); // larger smooth featMap.put(prefix + "_" + period + "@" + "rovn_#", l_rovn); featMap.put(prefix + "_" + period + "@" + "ros_#", l_ros); featMap.put(prefix + "_" + period + "@" + "ros_n_#", l_ros_n); featMap.put(prefix + "_" + period + "@" + "ros_minus_#", l_ros_minus); } } private static void oneTypeStatFeature(String prefix, List periods, Map infoMap, Map featMap) { if (null == infoMap || infoMap.isEmpty()) { return; } for (String period : periods) { double exp = getOneInfo("exp_" + period, infoMap); if (!FeatureUtils.greaterThanZero(exp)) { continue; } double is_share = getOneInfo("is_share_" + period, infoMap); double share_cnt = getOneInfo("share_cnt_" + period, infoMap); double is_return_1 = getOneInfo("is_return_1_" + period, infoMap); double return_1_uv = getOneInfo("return_1_uv_" + period, infoMap); double return_n_uv = getOneInfo("return_n_uv_" + period, infoMap); double exp_s = FeatureUtils.log1(exp, log1Scale); double is_share_s = FeatureUtils.log1(is_share, log1Scale); double share_cnt_s = FeatureUtils.log1(share_cnt, log1Scale); double is_return_1_s = FeatureUtils.log1(is_return_1, log1Scale); double return_1_uv_s = FeatureUtils.log1(return_1_uv, log1Scale); double return_n_uv_s = FeatureUtils.log1(return_n_uv, log1Scale); double str = FeatureUtils.wilsonScore(is_share, exp); double str_plus = FeatureUtils.wilsonScore(is_return_1, exp); double ros_one = FeatureUtils.wilsonScore(is_return_1, is_share); // larger smooth double l_rovn1 = FeatureUtils.plusSmooth(return_1_uv, exp, largerSmoothPlus, 1); double l_ros1 = FeatureUtils.plusSmooth(return_1_uv / 5, is_share, largerSmoothPlus, 2); double l_ros_n1 = FeatureUtils.plusSmooth(return_1_uv / 5, share_cnt, largerSmoothPlus, 2); double l_ros_minus1 = FeatureUtils.plusSmooth(return_1_uv / 5, is_return_1, largerSmoothPlus, 2); double l_rovn = FeatureUtils.plusSmooth(return_n_uv, exp, largerSmoothPlus, 1); double l_ros = FeatureUtils.plusSmooth(return_n_uv / 5, is_share, largerSmoothPlus, 2); double l_ros_n = FeatureUtils.plusSmooth(return_n_uv / 5, share_cnt, largerSmoothPlus, 2); double l_ros_minus = FeatureUtils.plusSmooth(return_n_uv / 5, is_return_1, largerSmoothPlus, 2); featMap.put(prefix + "_" + period + "@" + "exp", exp_s); featMap.put(prefix + "_" + period + "@" + "is_share", is_share_s); featMap.put(prefix + "_" + period + "@" + "share_cnt", share_cnt_s); featMap.put(prefix + "_" + period + "@" + "is_return_1", is_return_1_s); featMap.put(prefix + "_" + period + "@" + "return_1_uv", return_1_uv_s); featMap.put(prefix + "_" + period + "@" + "return_n_uv", return_n_uv_s); featMap.put(prefix + "_" + period + "@" + "str", str); featMap.put(prefix + "_" + period + "@" + "str_plus", str_plus); featMap.put(prefix + "_" + period + "@" + "ros_one", ros_one); // larger smooth featMap.put(prefix + "_" + period + "@" + "rovn1_#", l_rovn1); featMap.put(prefix + "_" + period + "@" + "ros1_#", l_ros1); featMap.put(prefix + "_" + period + "@" + "ros_n1_#", l_ros_n1); featMap.put(prefix + "_" + period + "@" + "ros_minus1_#", l_ros_minus1); featMap.put(prefix + "_" + period + "@" + "rovn_#", l_rovn); featMap.put(prefix + "_" + period + "@" + "ros_#", l_ros); featMap.put(prefix + "_" + period + "@" + "ros_n_#", l_ros_n); featMap.put(prefix + "_" + period + "@" + "ros_minus_#", l_ros_minus); } } private static double getOneInfo(String name, Map map) { if (null == map) { return 0.0; } return map.isEmpty() ? 0 : Double.parseDouble(map.getOrDefault(name, "0.0")); } private static void getHistoryVideoCateFeature(String prefix, Map videoInfo, Map featMap) { if (null == videoInfo || videoInfo.isEmpty()) { return; } for (Map.Entry entry : histotyVideoAttrMAP.entrySet()) { String attr = entry.getKey(); String attrVal = videoInfo.getOrDefault(attr, ""); attrVal = attrVal.trim(); if (!attrVal.isEmpty() && !attrVal.equals("unknown")) { String key = String.format("%s@%s@%s", prefix, entry.getValue(), attrVal); featMap.put(key, 1.0); } } } }