| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622 |
- package examples.myUtils;
- import com.tzld.piaoquan.recommend.similarity.word2vec.Segment;
- import examples.utils.SimilarityUtils;
- import java.util.*;
- public class FeatureTransformV2 {
- private static final int seqMaxN = 2;
- private static final int seqLastN = 2;
- private static final double smoothPlus = 5.0;
- private static final double largerSmoothPlus = 30.0;
- private static final double log1Scale = 10.0;
- private static final long midValidTs = 14 * 24 * 3600 * 1000;
- private static final List<String> c1Periods = Arrays.asList("72h", "168h");
- private static final List<String> b0Periods = Arrays.asList("1h", "3h", "6h", "12h");
- private static final List<String> b1Periods = Arrays.asList("1h", "3h", "6h", "24h", "72h", "168h");
- private static final List<String> b2Periods = Arrays.asList("1h", "3h", "6h", "24h");
- private static final List<String> b3Periods = Arrays.asList("1h", "3h", "6h", "24h", "72h");
- private static final List<String> b4Periods = Arrays.asList("1h", "3h", "6h", "12h", "24h", "72h");
- private static final List<String> b5Periods = Arrays.asList("1h", "3h", "6h", "12h", "24h", "72h");
- private static final List<String> b6Periods = Arrays.asList("1h", "24h");
- private static final List<String> b7Periods = Arrays.asList("1h", "3h", "6h", "12h", "24h", "72h");
- private static final List<String> b8Periods = Arrays.asList("1h", "3h", "6h", "12h", "24h");
- private static final List<String> b9Periods = Arrays.asList("1h", "3h", "6h", "12h", "24h");
- private static final List<String> b10Periods = Arrays.asList("1h", "12h");
- private static final List<String> b11Periods = Arrays.asList("1h", "12h");
- private static final List<String> b13Periods = Arrays.asList("1h", "3h", "6h", "12h", "24h", "72h");
- private static final List<String> b14Periods = Arrays.asList("1h", "2h", "3h", "6h", "12h");
- private static final List<String> b15Periods = Arrays.asList("1h", "2h", "3h", "6h", "12h");
- private static final List<String> videoCateAttrs = Arrays.asList(FeatureUtils.cate1Attr, FeatureUtils.cate2Attr, FeatureUtils.festive1Attr,
- FeatureUtils.channelAttr, FeatureUtils.sourceAttr, FeatureUtils.uidAttr, FeatureUtils.mergeCate1Attr, FeatureUtils.mergeCate2Attr);
- private static final List<String> videoSimAttrs = Arrays.asList("title", "cate2", "cate2_list", "keywords");
- private static final List<String> creativeSimAttrs = Arrays.asList("title");
- private static final List<String> hVideoSimAttrs = Arrays.asList("title");
- private static final List<String> cfList = Arrays.asList("share", "return");
- private static final List<String> userAttrList = Arrays.asList("province", "city", "model", "brand", "system", "user_channel", "user_level");
- private static final Set<String> appSet = new HashSet<>(Arrays.asList("0", "2", "4"));
- private static final Set<String> hotSceneSet = new HashSet<>(Arrays.asList("1008", "1007", "1058", "1074", "1010"));
- private static final Map<String, String> histotyVideoAttrMAP = new HashMap<>();
- static {
- histotyVideoAttrMAP.put("merge_first_level_cate", "cate1");
- histotyVideoAttrMAP.put("merge_second_level_cate", "cate2");
- }
- public static void getContextFeature(long currentMs, String appType, String hotSceneType, Map<String, Double> featureMap) {
- Calendar calendar = Calendar.getInstance();
- calendar.setTimeInMillis(currentMs);
- int week = calendar.get(Calendar.DAY_OF_WEEK);
- int hour = calendar.get(Calendar.HOUR_OF_DAY) + 1;
- featureMap.put(String.format("%s@%d", "week", week), 1.0);
- featureMap.put(String.format("%s@%d", "hour", hour), 1.0);
- featureMap.put("hour", hour * 1.0);
- String app;
- if (appSet.contains(appType)) {
- app = appType;
- } else {
- app = "other";
- }
- featureMap.put(String.format("%s@%s", "app", app), 1.0);
- String hot;
- if (hotSceneSet.contains(hotSceneType)) {
- hot = hotSceneType;
- } else {
- hot = "other";
- }
- featureMap.put(String.format("%s@%s", "hot", hot), 1.0);
- }
- public static void getUserFeature(Map<String, Map<String, String>> userOriginInfo, Map<String, Double> featMap) {
- oneTypeStatFeature("c1", "return_1_uv", c1Periods, userOriginInfo.get("mid_global_feature_20250212"), featMap);
- }
- public static void getMid(String uid, String mid, long currentMs, UserShareReturnProfile profile, Map<String, Double> featMap) {
- if (null == profile) {
- return;
- }
- String key = "";
- if (null != uid && !uid.isEmpty() && !uid.equals("null")) {
- key = uid;
- } else if (null != mid && !mid.isEmpty() && !mid.equals("null")) {
- key = mid;
- }
- if (!key.isEmpty()) {
- List<UserSRBO> l_r_s = profile.getL_r_s();
- if (null != l_r_s && !l_r_s.isEmpty()) {
- UserSRBO user = l_r_s.get(0);
- if (null != user) {
- long ts = user.getTs() * 1000;
- if (ts > currentMs - midValidTs) {
- featMap.put(String.format("%s@%s", "mid", key), 1.0);
- }
- }
- }
- }
- }
- public static void getUserProfileFeature(UserShareReturnProfile profile, Map<String, String> userInfo, Map<String, Double> featMap) {
- if (null != profile) {
- long s_pv = profile.getS_pv(); // share_pv(分享pv)
- long s_cnt = profile.getS_cnt(); // share_cnt(分享次数)
- long r_pv = profile.getR_pv(); // return_pv(回流pv)
- long r_uv = profile.getR_uv(); // return_uv(回流uv)
- long m_s_cnt = profile.getM_s_cnt(); // max_share_cnt(最大分享次数)
- long m_r_uv = profile.getM_r_uv(); // max_return_uv(最大回流uv)
- if (s_pv > 0) {
- double s_pv_s = FeatureUtils.log1(s_pv, log1Scale);
- double s_cnt_s = FeatureUtils.log1(s_cnt, log1Scale);
- double r_pv_s = FeatureUtils.log1(r_pv, log1Scale);
- double r_uv_s = FeatureUtils.log1(r_uv, log1Scale);
- double m_s_cnt_s = FeatureUtils.log1(m_s_cnt, log1Scale);
- double m_r_uv_s = FeatureUtils.log1(m_r_uv, log1Scale);
- double ros_one = FeatureUtils.wilsonScore(r_pv, s_pv);
- double ros = FeatureUtils.plusSmooth(r_uv, s_pv, smoothPlus);
- double ros_minus = FeatureUtils.plusSmooth(r_uv, r_pv, smoothPlus);
- double l_ros = FeatureUtils.plusSmooth(r_uv / 5.0, s_pv, largerSmoothPlus, 2);
- double l_ros_minus = FeatureUtils.plusSmooth(r_uv / 5.0, r_pv, largerSmoothPlus, 2);
- featMap.put("c9@s_pv", s_pv_s);
- featMap.put("c9@s_cnt", s_cnt_s);
- featMap.put("c9@r_pv", r_pv_s);
- featMap.put("c9@r_uv", r_uv_s);
- featMap.put("c9@m_s_cnt", m_s_cnt_s);
- featMap.put("c9@m_r_uv", m_r_uv_s);
- featMap.put("c9@ros_one", ros_one);
- featMap.put("c9@ros", ros);
- featMap.put("c9@ros_minus", ros_minus);
- featMap.put("c9@ros_#", l_ros);
- featMap.put("c9@ros_minus_#", l_ros_minus);
- }
- }
- if (null != userInfo && !userInfo.isEmpty()) {
- for (String attr : userAttrList) {
- if (userInfo.containsKey(attr)) {
- String value = userInfo.get(attr).trim().replaceAll("(\\s+|\\t|:)", "_");
- if (!value.isEmpty()) {
- String key = String.format("%s@%s", attr, value.toLowerCase());
- featMap.put(key, 1.0);
- }
- }
- }
- String page = userInfo.getOrDefault("page", "");
- if (page.equals("回流页")) {
- featMap.put("page@return", 1.0);
- }
- }
- }
- public static void getUserTagsCrossVideoFeature(String prefix, Map<String, String> videoInfo, Map<String, String> infoMap, Map<String, Double> featMap) {
- if (null == videoInfo || videoInfo.isEmpty() || null == infoMap || infoMap.isEmpty()) {
- return;
- }
- String title = videoInfo.getOrDefault("title", "");
- if (title.isEmpty()) {
- return;
- }
- for (String period : Arrays.asList("tags_1d", "tags_3d", "tags_7d")) {
- String tags = infoMap.getOrDefault(period, "");
- if (!tags.isEmpty()) {
- Double[] doubles = FeatureUtils.funcC34567ForTagsNew(tags, title);
- featMap.put(prefix + "_" + period + "@matchnum", doubles[0]);
- featMap.put(prefix + "_" + period + "@maxscore", doubles[1]);
- featMap.put(prefix + "_" + period + "@avgscore", doubles[2]);
- }
- }
- }
- public static void getUserCFFeature(String prefix, String vid, Map<String, Map<String, String[]>> infoMap, Map<String, Double> featMap) {
- if (vid.isEmpty() || null == infoMap || infoMap.isEmpty()) {
- return;
- }
- for (String cfType : cfList) {
- if (infoMap.containsKey(cfType)) {
- Map<String, String[]> cfScoresMap = infoMap.get(cfType);
- if (null != cfScoresMap && cfScoresMap.containsKey(vid)) {
- String[] scores = cfScoresMap.get(vid);
- Double score1 = Double.parseDouble(scores[0]);
- Double score2 = Double.parseDouble(scores[1]);
- Double score3 = Double.parseDouble(scores[2]) <= 0 ? 0D : 1.0 / Double.parseDouble(scores[2]);
- featMap.put(prefix + "_" + cfType + "@score", score1);
- featMap.put(prefix + "_" + cfType + "@num", score2);
- featMap.put(prefix + "_" + cfType + "@rank", score3);
- }
- }
- }
- }
- public static void getVideoFeature(String vid, Map<String, Map<String, Map<String, String>>> videoOriginInfo, Map<String, Double> featMap) {
- oneTypeStatFeature("b0", b0Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_recsys_feature_video_clean_stat"), featMap);
- oneTypeStatFeature("b1", "return_1_uv", b1Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_global_feature_20250212"), featMap);
- oneTypeStatFeature("b2", "return_n_uv", b2Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_recommend_exp_feature_20250212"), featMap);
- oneTypeStatFeature("b3", "return_n_uv", b3Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_recommend_flowpool_exp_feature_20250212"), featMap);
- oneTypeStatFeature("b4", "return_n_uv", b4Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_apptype_recommend_exp_feature_20250212"), featMap);
- oneTypeStatFeature("b5", "return_n_uv", b5Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_province_recommend_exp_feature_20250212"), featMap);
- oneTypeStatFeature("b6", "return_n_uv", b6Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_brand_recommend_exp_feature_20250212"), featMap);
- oneTypeStatFeature("b7", "return_n_uv", b7Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_vid_hotsencetype_recommend_exp_feature_20250212"), featMap);
- oneTypeStatFeature("b8", "return_n_uv", b8Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_merge_cate1_recommend_exp_feature_20250212"), featMap);
- oneTypeStatFeature("b9", "return_n_uv", b9Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_merge_cate2_recommend_exp_feature_20250212"), featMap);
- oneTypeStatFeature("b10", "return_n_uv", b10Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_channel_recommend_exp_feature_20250212"), featMap);
- oneTypeStatFeature("b11", "return_n_uv", b11Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_festive_recommend_exp_feature_20250212"), featMap);
- oneTypeStatFeature("b13", "return_n_uv", b13Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_video_unionid_recommend_exp_feature_20250212"), featMap);
- oneTypeStatFeature("b14", b14Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_recsys_feature_video_recommend_channel_layer"), featMap);
- oneTypeStatFeature("b15", b15Periods, videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_recsys_feature_video_recommend_channel_layer_head"), featMap);
- // head video cf
- headVideoCFD1Feature("d1", videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("scene_type_vid_cf_feature_20250212"), featMap);
- headVideoCFD2Feature("d2", videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("vid_click_cf_feature_20250212"), featMap);
- headVideoCFD3Feature("d3", videoOriginInfo.getOrDefault(vid, new HashMap<>()).get("alg_recsys_feature_cf_i2i_v2"), featMap);
- }
- public static void getVideoBaseFeature(String prefix, long currentMs, Map<String, String> videoInfo, Map<String, Double> featMap) {
- if (null == videoInfo || videoInfo.isEmpty()) {
- return;
- }
- featMap.put(prefix + "@total_time", FeatureUtils.log1(Double.parseDouble(videoInfo.getOrDefault("total_time", "0")), log1Scale));
- featMap.put(prefix + "@bit_rate", FeatureUtils.log1(Double.parseDouble(videoInfo.getOrDefault("bit_rate", "0")), log1Scale));
- String resolution = FeatureUtils.getResolution(videoInfo);
- if (!resolution.isEmpty()) {
- String resKey = String.format("%s@%s@%s", prefix, "res", resolution);
- featMap.put(resKey, 1.0);
- }
- // cate
- getVideoCateFeature(prefix, videoInfo, featMap);
- if (videoInfo.containsKey("title")) {
- int id = FeatureUtils.judgeVideoTimeType(videoInfo.get("title"));
- if (id > 0) {
- String key = String.format("%s@%s@%d", prefix, "tt", id);
- featMap.put(key, 1.0);
- }
- }
- // time
- try {
- if (videoInfo.containsKey("gmt_create_timestamp")) {
- String createMsStr = videoInfo.get("gmt_create_timestamp");
- long createMs = Long.parseLong(createMsStr);
- double createTime = FeatureUtils.getTimeDiff(currentMs, createMs);
- featMap.put(prefix + "@ts", 1 - createTime);
- }
- } catch (Exception ignored) {
- }
- }
- public static void getHeadRankVideoCrossFeature(Map<String, String> headInfo, Map<String, String> rankInfo, Map<String, Double> featMap) {
- getTwoVideoCrossFeature("hr_sim", videoSimAttrs, headInfo, rankInfo, featMap);
- }
- public static void getCreativeBaseFeature(String prefix, Map<String, String> creativeInfo, Map<String, Double> featMap) {
- if (null == creativeInfo || creativeInfo.isEmpty()) {
- return;
- }
- if (creativeInfo.containsKey("ghId")) {
- String ghId = creativeInfo.get("ghId");
- if (null != ghId && !ghId.isEmpty()) {
- String key = String.format("%s@gid@%s", prefix, ghId);
- featMap.put(key, 1.0);
- }
- }
- if (creativeInfo.containsKey("name")) {
- String name = creativeInfo.get("name");
- if (null != name && !name.isEmpty()) {
- List<String> words = Segment.getWords(name);
- for (String word : words) {
- if (null != word && word.length() > 1) {
- String key = String.format("%s@name@%s", prefix, word);
- featMap.put(key, 1.0);
- }
- }
- }
- }
- }
- public static void getCreativeCrossFeature(String prefix, Map<String, String> creativeInfo, Map<String, String> rankInfo, Map<String, Double> featMap) {
- getTwoVideoCrossFeature(prefix, creativeSimAttrs, creativeInfo, rankInfo, featMap);
- }
- public static void getProfileVideoCrossFeature(long currentMs, UserShareReturnProfile profile, Map<String, String> rankVideo, Map<String, Map<String, String>> hVideoMap, Map<String, Double> featMap) {
- if (null == profile) {
- return;
- }
- getRSCrossFeature(false, "c9_mss", currentMs, seqMaxN, profile.getM_s_s(), rankVideo, hVideoMap, featMap);
- getRSCrossFeature(false, "c9_mrs", currentMs, seqMaxN, profile.getM_r_s(), rankVideo, hVideoMap, featMap);
- getRSCrossFeature(true, "c9_lss", currentMs, seqLastN, profile.getL_s_s(), rankVideo, hVideoMap, featMap);
- getRSCrossFeature(false, "c9_lrs", currentMs, seqLastN, profile.getL_r_s(), rankVideo, hVideoMap, featMap);
- getRSCrossFeature(true, "c9_lr1s", currentMs, seqLastN, profile.getL_r1_s(), rankVideo, hVideoMap, featMap);
- if (null == rankVideo || rankVideo.isEmpty()) {
- return;
- }
- getVideoAttrSRCrossFeature("c9_c1s", rankVideo.getOrDefault("merge_first_level_cate", ""), profile.getC1_s(), featMap);
- getVideoAttrSRCrossFeature("c9_c2s", rankVideo.getOrDefault("merge_second_level_cate", ""), profile.getC2_s(), featMap);
- getVideoAttrSRCrossFeature("c9_l1s", rankVideo.getOrDefault("festive_label1", ""), profile.getL1_s(), featMap);
- getVideoAttrSRCrossFeature("c9_l2s", rankVideo.getOrDefault("festive_label2", ""), profile.getL2_s(), featMap);
- // 视频解构的关键词
- if (rankVideo.containsKey("dk_keywords")) {
- String dkKeywords = rankVideo.get("dk_keywords");
- if (Objects.isNull(dkKeywords) || dkKeywords.isEmpty()) {
- return;
- }
- for (String kw : dkKeywords.split("[,,、]")) {
- kw = kw.replaceAll("(\\s+|\\t|:)", "");
- getVideoAttrSRCrossFeature("c9_dks", kw, profile.getD_k_s(), featMap);
- }
- }
- }
- private static void getRSCrossFeature(boolean flag, String prefix, long currentMs, int maxN, List<UserSRBO> list, Map<String, String> rankVideo, Map<String, Map<String, String>> hVideoMap, Map<String, Double> featMap) {
- if (null != list && !list.isEmpty()) {
- for (int i = 0; i < list.size() && i < maxN; i++) {
- UserSRBO u = list.get(i);
- if (null != u) {
- long id = u.getId();
- long cnt = u.getCnt();
- long uv = u.getUv();
- long ts = u.getTs();
- if (id > 0) {
- String vid = id + "";
- String baseKey = String.format("%s@%d", prefix, i + 1);
- if (cnt > 0) {
- featMap.put(baseKey + "@cnt", FeatureUtils.log1(cnt, log1Scale));
- }
- if (uv > 0) {
- featMap.put(baseKey + "@uv", FeatureUtils.log1(uv, log1Scale));
- }
- if (ts > 0) {
- long historyMs = ts * 1000;
- featMap.put(baseKey + "@ts", 1 - FeatureUtils.getTimeDiff(currentMs, historyMs));
- // history week & hour
- Calendar calendar = Calendar.getInstance();
- calendar.setTimeInMillis(historyMs);
- featMap.put(String.format("%s_week@%d", baseKey, calendar.get(Calendar.DAY_OF_WEEK)), 1.0);
- featMap.put(String.format("%s_hour@%d", baseKey, calendar.get(Calendar.HOUR_OF_DAY) + 1), 1.0);
- }
- if (null != hVideoMap && hVideoMap.containsKey(vid)) {
- Map<String, String> hVideo = hVideoMap.get(vid);
- getTwoVideoCrossFeature(baseKey, hVideoSimAttrs, hVideo, rankVideo, featMap);
- if (flag) {
- getHistoryVideoCateFeature(baseKey, hVideo, featMap);
- }
- }
- }
- }
- }
- }
- }
- private static void getVideoAttrSRCrossFeature(String prefix, String attr, Map<String, VideoAttrSRBO> attrMap, Map<String, Double> featMap) {
- if (null == attrMap || attrMap.isEmpty()) {
- return;
- }
- attr = attr.trim();
- if (attrMap.containsKey(attr)) {
- VideoAttrSRBO bo = attrMap.get(attr);
- if (null != bo) {
- long sp = bo.getSp(); // share_pv
- long rp = bo.getRp(); // return_n_pv_noself
- long ru = bo.getRu(); // return_n_uv_noself
- long mu = bo.getMu(); // max_return_uv
- if (sp > 0) {
- double sp_s = FeatureUtils.log1(sp, log1Scale);
- double rp_s = FeatureUtils.log1(rp, log1Scale);
- double ru_s = FeatureUtils.log1(ru, log1Scale);
- double mu_s = FeatureUtils.log1(mu, log1Scale);
- double ros_one = FeatureUtils.wilsonScore(rp, sp);
- double ros = FeatureUtils.plusSmooth(ru, sp, smoothPlus);
- double ros_minus = FeatureUtils.plusSmooth(ru, rp, smoothPlus);
- double l_ros = FeatureUtils.plusSmooth(ru / 5.0, sp, largerSmoothPlus, 2);
- double l_ros_minus = FeatureUtils.plusSmooth(ru / 5.0, rp, largerSmoothPlus, 2);
- featMap.put(prefix + "@sp", sp_s);
- featMap.put(prefix + "@rp", rp_s);
- featMap.put(prefix + "@ru", ru_s);
- featMap.put(prefix + "@mu", mu_s);
- featMap.put(prefix + "@ros_one", ros_one);
- featMap.put(prefix + "@ros", ros);
- featMap.put(prefix + "@ros_minus", ros_minus);
- featMap.put(prefix + "@ros_#", l_ros);
- featMap.put(prefix + "@ros_minus_#", l_ros_minus);
- }
- }
- }
- }
- private static void getVideoCateFeature(String prefix, Map<String, String> videoInfo, Map<String, Double> featMap) {
- if (null == videoInfo || videoInfo.isEmpty()) {
- return;
- }
- for (String attr : videoCateAttrs) {
- String attrVal = videoInfo.getOrDefault(attr, "");
- attrVal = attrVal.trim();
- if (!attrVal.isEmpty() && !attrVal.equals("unknown")) {
- String key = String.format("%s@%s@%s", prefix, attr, attrVal);
- featMap.put(key, 1.0);
- }
- }
- if (videoInfo.containsKey("keywords")) {
- String keywords = videoInfo.get("keywords");
- if (null != keywords && !keywords.isEmpty()) {
- for (String kw : keywords.split("[,,、]")) {
- kw = kw.replaceAll("(\\s+|\\t|:)", "");
- if (!kw.isEmpty()) {
- String featKey = String.format("%s@kw@%s", prefix, kw);
- featMap.put(featKey, 1.0);
- }
- }
- }
- }
- // 视频解构的关键词 ID特征
- if (videoInfo.containsKey("dk_keywords")) {
- String dkKeywords = videoInfo.get("dk_keywords");
- if (Objects.nonNull(dkKeywords) && !dkKeywords.isEmpty()) {
- for (String kw : dkKeywords.split("[,,、]")) {
- kw = kw.replaceAll("(\\s+|\\t|:)", "");
- if (!kw.isEmpty()) {
- String featKey = String.format("%s@dkw@%s", prefix, kw);
- featMap.put(featKey, 1.0);
- }
- }
- }
- }
- }
- private static void getTwoVideoCrossFeature(String prefix, List<String> attrs, Map<String, String> video1, Map<String, String> video2, Map<String, Double> featMap) {
- if (null == video1 || video1.isEmpty() || null == video2 || video2.isEmpty()) {
- return;
- }
- for (String attr : attrs) {
- String attr1 = video1.getOrDefault(attr, "");
- String attr2 = video2.getOrDefault(attr, "");
- if (!"".equals(attr1) && !"unknown".equals(attr1) && !"".equals(attr2) && !"unknown".equals(attr2)) {
- double simScore = SimilarityUtils.word2VecSimilarity(attr1, attr2);
- featMap.put(prefix + "@" + attr, simScore);
- }
- }
- }
- private static void headVideoCFD1Feature(String prefix, Map<String, String> infoMap, Map<String, Double> featMap) {
- double ros_cf_score = getOneInfo("ros_cf_score", infoMap);
- double ros_cf_rank = getOneInfo("ros_cf_rank", infoMap);
- double rov_cf_score = getOneInfo("rov_cf_score", infoMap);
- double rov_cf_rank = getOneInfo("rov_cf_rank", infoMap);
- featMap.put(prefix + "@ros_cf_score", ros_cf_score);
- featMap.put(prefix + "@ros_cf_rank", ros_cf_rank);
- featMap.put(prefix + "@rov_cf_score", rov_cf_score);
- featMap.put(prefix + "@rov_cf_rank", rov_cf_rank);
- }
- private static void headVideoCFD2Feature(String prefix, Map<String, String> infoMap, Map<String, Double> featMap) {
- double score = getOneInfo("score", infoMap);
- double rank = getOneInfo("rank", infoMap);
- featMap.put(prefix + "@score", score);
- featMap.put(prefix + "@rank", rank);
- }
- private static void headVideoCFD3Feature(String prefix, Map<String, String> infoMap, Map<String, Double> featMap) {
- double exp = getOneInfo("exp", infoMap);
- double return_n = getOneInfo("return_n", infoMap);
- double rovn = FeatureUtils.plusSmooth(return_n, exp, smoothPlus);
- featMap.put(prefix + "@exp", FeatureUtils.log1(exp, log1Scale));
- featMap.put(prefix + "@return_n", FeatureUtils.log1(return_n, log1Scale));
- featMap.put(prefix + "@rovn", rovn);
- }
- public static Map<String, Map<String, String[]>> parseUCFScore(Map<String, String> mapInfo) {
- Map<String, Map<String, String[]>> allScoresMap = new HashMap<>();
- for (String cfType : cfList) {
- String data = mapInfo.getOrDefault(cfType, "");
- if (!data.isEmpty()) {
- Map<String, String[]> oneScoresMap = new HashMap<>();
- String[] entries = data.split(",");
- for (String entry : entries) {
- String[] rList = entry.split(":");
- if (rList.length >= 4) { // 确保分割后有四个元素
- String key = rList[0];
- String value1 = rList[1];
- String value2 = rList[2];
- String value3 = rList[3];
- String[] strs = {value1, value2, value3};
- oneScoresMap.put(key, strs);
- }
- }
- if (!oneScoresMap.isEmpty()) {
- allScoresMap.put(cfType, oneScoresMap);
- }
- }
- }
- return allScoresMap;
- }
- private static void oneTypeStatFeature(String prefix, String uvPrefix, List<String> periods, Map<String, String> infoMap, Map<String, Double> featMap) {
- if (null == infoMap || infoMap.isEmpty()) {
- return;
- }
- for (String period : periods) {
- double exp = getOneInfo("exp_" + period, infoMap);
- if (!FeatureUtils.greaterThanZero(exp)) {
- continue;
- }
- double is_share = getOneInfo("is_share_" + period, infoMap);
- double share_cnt = getOneInfo("share_cnt_" + period, infoMap);
- double is_return_1 = getOneInfo("is_return_1_" + period, infoMap);
- double return_n_uv = getOneInfo(uvPrefix + "_" + period, infoMap);
- double exp_s = FeatureUtils.log1(exp, log1Scale);
- double is_share_s = FeatureUtils.log1(is_share, log1Scale);
- double share_cnt_s = FeatureUtils.log1(share_cnt, log1Scale);
- double is_return_1_s = FeatureUtils.log1(is_return_1, log1Scale);
- double return_n_uv_s = FeatureUtils.log1(return_n_uv, log1Scale);
- double str = FeatureUtils.wilsonScore(is_share, exp);
- double str_plus = FeatureUtils.wilsonScore(is_return_1, exp);
- double ros_one = FeatureUtils.wilsonScore(is_return_1, is_share);
- // larger smooth
- double l_rovn = FeatureUtils.plusSmooth(return_n_uv, exp, largerSmoothPlus, 1);
- double l_ros = FeatureUtils.plusSmooth(return_n_uv / 5, is_share, largerSmoothPlus, 2);
- double l_ros_n = FeatureUtils.plusSmooth(return_n_uv / 5, share_cnt, largerSmoothPlus, 2);
- double l_ros_minus = FeatureUtils.plusSmooth(return_n_uv / 5, is_return_1, largerSmoothPlus, 2);
- featMap.put(prefix + "_" + period + "@" + "exp", exp_s);
- featMap.put(prefix + "_" + period + "@" + "is_share", is_share_s);
- featMap.put(prefix + "_" + period + "@" + "share_cnt", share_cnt_s);
- featMap.put(prefix + "_" + period + "@" + "is_return_1", is_return_1_s);
- featMap.put(prefix + "_" + period + "@" + "return_n_uv", return_n_uv_s);
- featMap.put(prefix + "_" + period + "@" + "str", str);
- featMap.put(prefix + "_" + period + "@" + "str_plus", str_plus);
- featMap.put(prefix + "_" + period + "@" + "ros_one", ros_one);
- // larger smooth
- featMap.put(prefix + "_" + period + "@" + "rovn_#", l_rovn);
- featMap.put(prefix + "_" + period + "@" + "ros_#", l_ros);
- featMap.put(prefix + "_" + period + "@" + "ros_n_#", l_ros_n);
- featMap.put(prefix + "_" + period + "@" + "ros_minus_#", l_ros_minus);
- }
- }
- private static void oneTypeStatFeature(String prefix, List<String> periods, Map<String, String> infoMap, Map<String, Double> featMap) {
- if (null == infoMap || infoMap.isEmpty()) {
- return;
- }
- for (String period : periods) {
- double exp = getOneInfo("exp_" + period, infoMap);
- if (!FeatureUtils.greaterThanZero(exp)) {
- continue;
- }
- double is_share = getOneInfo("is_share_" + period, infoMap);
- double share_cnt = getOneInfo("share_cnt_" + period, infoMap);
- double is_return_1 = getOneInfo("is_return_1_" + period, infoMap);
- double return_1_uv = getOneInfo("return_1_uv_" + period, infoMap);
- double return_n_uv = getOneInfo("return_n_uv_" + period, infoMap);
- double exp_s = FeatureUtils.log1(exp, log1Scale);
- double is_share_s = FeatureUtils.log1(is_share, log1Scale);
- double share_cnt_s = FeatureUtils.log1(share_cnt, log1Scale);
- double is_return_1_s = FeatureUtils.log1(is_return_1, log1Scale);
- double return_1_uv_s = FeatureUtils.log1(return_1_uv, log1Scale);
- double return_n_uv_s = FeatureUtils.log1(return_n_uv, log1Scale);
- double str = FeatureUtils.wilsonScore(is_share, exp);
- double str_plus = FeatureUtils.wilsonScore(is_return_1, exp);
- double ros_one = FeatureUtils.wilsonScore(is_return_1, is_share);
- // larger smooth
- double l_rovn1 = FeatureUtils.plusSmooth(return_1_uv, exp, largerSmoothPlus, 1);
- double l_ros1 = FeatureUtils.plusSmooth(return_1_uv / 5, is_share, largerSmoothPlus, 2);
- double l_ros_n1 = FeatureUtils.plusSmooth(return_1_uv / 5, share_cnt, largerSmoothPlus, 2);
- double l_ros_minus1 = FeatureUtils.plusSmooth(return_1_uv / 5, is_return_1, largerSmoothPlus, 2);
- double l_rovn = FeatureUtils.plusSmooth(return_n_uv, exp, largerSmoothPlus, 1);
- double l_ros = FeatureUtils.plusSmooth(return_n_uv / 5, is_share, largerSmoothPlus, 2);
- double l_ros_n = FeatureUtils.plusSmooth(return_n_uv / 5, share_cnt, largerSmoothPlus, 2);
- double l_ros_minus = FeatureUtils.plusSmooth(return_n_uv / 5, is_return_1, largerSmoothPlus, 2);
- featMap.put(prefix + "_" + period + "@" + "exp", exp_s);
- featMap.put(prefix + "_" + period + "@" + "is_share", is_share_s);
- featMap.put(prefix + "_" + period + "@" + "share_cnt", share_cnt_s);
- featMap.put(prefix + "_" + period + "@" + "is_return_1", is_return_1_s);
- featMap.put(prefix + "_" + period + "@" + "return_1_uv", return_1_uv_s);
- featMap.put(prefix + "_" + period + "@" + "return_n_uv", return_n_uv_s);
- featMap.put(prefix + "_" + period + "@" + "str", str);
- featMap.put(prefix + "_" + period + "@" + "str_plus", str_plus);
- featMap.put(prefix + "_" + period + "@" + "ros_one", ros_one);
- // larger smooth
- featMap.put(prefix + "_" + period + "@" + "rovn1_#", l_rovn1);
- featMap.put(prefix + "_" + period + "@" + "ros1_#", l_ros1);
- featMap.put(prefix + "_" + period + "@" + "ros_n1_#", l_ros_n1);
- featMap.put(prefix + "_" + period + "@" + "ros_minus1_#", l_ros_minus1);
- featMap.put(prefix + "_" + period + "@" + "rovn_#", l_rovn);
- featMap.put(prefix + "_" + period + "@" + "ros_#", l_ros);
- featMap.put(prefix + "_" + period + "@" + "ros_n_#", l_ros_n);
- featMap.put(prefix + "_" + period + "@" + "ros_minus_#", l_ros_minus);
- }
- }
- private static double getOneInfo(String name, Map<String, String> map) {
- if (null == map) {
- return 0.0;
- }
- return map.isEmpty() ? 0 : Double.parseDouble(map.getOrDefault(name, "0.0"));
- }
- private static void getHistoryVideoCateFeature(String prefix, Map<String, String> videoInfo, Map<String, Double> featMap) {
- if (null == videoInfo || videoInfo.isEmpty()) {
- return;
- }
- for (Map.Entry<String, String> entry : histotyVideoAttrMAP.entrySet()) {
- String attr = entry.getKey();
- String attrVal = videoInfo.getOrDefault(attr, "");
- attrVal = attrVal.trim();
- if (!attrVal.isEmpty() && !attrVal.equals("unknown")) {
- String key = String.format("%s@%s@%s", prefix, entry.getValue(), attrVal);
- featMap.put(key, 1.0);
- }
- }
- }
- }
|