Browse Source

567添加cf召回&nor排序

jch 10 tháng trước cách đây
mục cha
commit
490b954fba

+ 157 - 32
recommend-server-service/src/main/java/com/tzld/piaoquan/recommend/server/service/rank/strategy/RankStrategy4RegionMergeModelV567.java

@@ -10,6 +10,8 @@ import com.tzld.piaoquan.recommend.server.service.rank.extractor.ExtractorUtils;
 import com.tzld.piaoquan.recommend.server.service.recall.strategy.*;
 import com.tzld.piaoquan.recommend.server.service.score.ScorerUtils;
 import com.tzld.piaoquan.recommend.server.util.CommonCollectionUtils;
+import com.tzld.piaoquan.recommend.server.util.FeatureBucketUtils;
+import com.tzld.piaoquan.recommend.server.util.SimilarityUtils;
 import lombok.extern.slf4j.Slf4j;
 import org.apache.commons.collections4.MapUtils;
 import org.apache.commons.math3.util.Pair;
@@ -35,6 +37,14 @@ public class RankStrategy4RegionMergeModelV567 extends RankStrategy4RegionMergeM
     @Value("${similarity.concurrent: true}")
     private boolean similarityConcurrent;
 
+    private static final List<String> shortPeriod = Arrays.asList("1h", "2h", "4h", "6h", "12h", "24h", "7d");
+    private static final List<String> middlePeriod = Arrays.asList("14d", "30d");
+    private static final List<String> longPeriod = Arrays.asList("7d", "35d", "90d", "365d");
+    private static final List<String> cfRosList = Collections.singletonList("rosn");
+    private static final List<String> cfRovList = Collections.singletonList("rovn");
+    private static final List<String> videoSimAttrs = Arrays.asList("cate1_list", "cate2", "cate2_list",
+            "keywords", "style", "theme", "title", "topic", "user_value");
+
     @Override
     public List<Video> mergeAndRankRovRecall(RankParam param) {
         Map<String, Double> mergeWeight = this.mergeWeight != null ? this.mergeWeight : new HashMap<>(0);
@@ -70,6 +80,18 @@ public class RankStrategy4RegionMergeModelV567 extends RankStrategy4RegionMergeM
         v1 = v1.subList(0, Math.min(mergeWeight.getOrDefault("v1", 5.0).intValue(), v1.size()));
         rovRecallRank.addAll(v1);
         setVideo.addAll(v1.stream().map(Video::getVideoId).collect(Collectors.toSet()));
+        //-------------------scene cf rovn------------------
+        List<Video> sceneCFRovn = extractAndSort(param, SceneCFRovnRecallStrategy.PUSH_FORM);
+        sceneCFRovn = sceneCFRovn.stream().filter(r -> !setVideo.contains(r.getVideoId())).collect(Collectors.toList());
+        sceneCFRovn = sceneCFRovn.subList(0, Math.min(mergeWeight.getOrDefault("sceneCFRovn", 5.0).intValue(), sceneCFRovn.size()));
+        rovRecallRank.addAll(sceneCFRovn);
+        setVideo.addAll(sceneCFRovn.stream().map(Video::getVideoId).collect(Collectors.toSet()));
+        //-------------------scene cf rosn------------------
+        List<Video> sceneCFRosn = extractAndSort(param, SceneCFRosnRecallStrategy.PUSH_FORM);
+        sceneCFRosn = sceneCFRosn.stream().filter(r -> !setVideo.contains(r.getVideoId())).collect(Collectors.toList());
+        sceneCFRosn = sceneCFRosn.subList(0, Math.min(mergeWeight.getOrDefault("sceneCFRosn", 5.0).intValue(), sceneCFRosn.size()));
+        rovRecallRank.addAll(sceneCFRosn);
+        setVideo.addAll(sceneCFRosn.stream().map(Video::getVideoId).collect(Collectors.toSet()));
 
         //-------------------排-------------------
         //-------------------序-------------------
@@ -82,11 +104,12 @@ public class RankStrategy4RegionMergeModelV567 extends RankStrategy4RegionMergeM
         // k1:视频、k2:表、k3:特征、v:特征值
         String provinceCn = param.getProvince().replaceAll("省$", "");
         String headVid = String.valueOf(param.getHeadVid());
-        FeatureService.Feature feature = featureService.getFeature(param.getMid(), vids,
-                String.valueOf(param.getAppType()), provinceCn, headVid);
+        String sceneType = String.valueOf(param.getHotSceneType());
+        Map<String, Map<String, Map<String, String>>> videoBaseInfoMap = featureService.getVideoBaseInfo(headVid, vids);
+        FeatureService.Feature feature = featureService.getNewFeature(provinceCn, param.getMid(), sceneType, headVid, videoBaseInfoMap, vids);
         Map<String, Map<String, String>> featureOriginUser = feature.getUserFeature();
         Map<String, Map<String, Map<String, String>>> featureOriginVideo = feature.getVideoFeature();
-
+        Map<String, String> headVideoInfo = videoBaseInfoMap.getOrDefault(headVid, new HashMap<>()).getOrDefault("alg_vid_feature_basic_info", new HashMap<>());
 
         // 2 特征处理
         Map<String, Double> userFeatureMapDouble = new HashMap<>();
@@ -217,7 +240,7 @@ public class RankStrategy4RegionMergeModelV567 extends RankStrategy4RegionMergeM
                 }
             }
 
-            Map<String, String> videoInfo = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_basic_info", new HashMap<>());
+            Map<String, String> videoInfo = videoBaseInfoMap.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_basic_info", new HashMap<>());
             featureMap.put("total_time", Double.parseDouble(videoInfo.getOrDefault("total_time", "0")));
             featureMap.put("bit_rate", Double.parseDouble(videoInfo.getOrDefault("bit_rate", "0")));
 
@@ -231,12 +254,7 @@ public class RankStrategy4RegionMergeModelV567 extends RankStrategy4RegionMergeM
                             String tags = c34567Map.getOrDefault(key, "");
                             if (!tags.isEmpty()) {
                                 Future<Pair<String, Double[]>> future = ThreadPoolFactory.defaultPool().submit(() -> {
-                                    Double[] doubles = null;
-                                    if (param.getAbExpCodes().contains(word2vecExp)) {
-                                        doubles = ExtractorUtils.funcC34567ForTagsNew(tags, title);
-                                    } else {
-                                        doubles = ExtractorUtils.funcC34567ForTags(tags, title);
-                                    }
+                                    Double[] doubles = ExtractorUtils.funcC34567ForTagsNew(tags, title);
                                     return Pair.create(key, doubles);
                                 });
                                 futures.add(future);
@@ -295,12 +313,18 @@ public class RankStrategy4RegionMergeModelV567 extends RankStrategy4RegionMergeM
                 featureMap.put("d1_return_n", Double.parseDouble(d1.getOrDefault("return_n", "0")));
                 featureMap.put("d1_rovn", Double.parseDouble(d1.getOrDefault("rovn", "0")));
             }
+            // ******************** new feature ********************
+            addVideoStatFeature(vid, featureOriginVideo, featureMap);
+            //addVideoCFFeature(vid, featureOriginVideo, featureMap);
+            addVideoSimFeature(headVideoInfo, videoInfo, featureMap);
+
             item.featureMapDouble = featureMap;
         }
 
         // 3 连续值特征分桶
         readBucketFile();
         Map<String, String> userFeatureMap = new HashMap<>(userFeatureMapDouble.size());
+        Map<String, String> norUserFeatureMap = FeatureBucketUtils.bucketFeature("20241209_nor_bucket.txt", userFeatureMapDouble);
         for (Map.Entry<String, Double> entry : userFeatureMapDouble.entrySet()) {
             String name = entry.getKey();
             Double score = entry.getValue();
@@ -312,7 +336,6 @@ public class RankStrategy4RegionMergeModelV567 extends RankStrategy4RegionMergeM
                 userFeatureMap.put(name, String.valueOf(scoreNew));
             }
         }
-
         for (RankItem item : rankItems) {
             Map<String, String> featureMap = new HashMap<>();
             Map<String, Double> featureMapDouble = item.featureMapDouble;
@@ -329,45 +352,38 @@ public class RankStrategy4RegionMergeModelV567 extends RankStrategy4RegionMergeM
                 }
             }
             item.featureMap = featureMap;
+            item.norFeatureMap = FeatureBucketUtils.bucketFeature("20241209_nor_bucket.txt", featureMapDouble);
         }
         // 4 排序模型计算
         Map<String, String> sceneFeatureMap = new HashMap<>(0);
-        List<RankItem> items = ScorerUtils.getScorerPipeline("feeds_score_config_20240807.conf").scoring(sceneFeatureMap, userFeatureMap, rankItems);
+        List<RankItem> items = ScorerUtils.getScorerPipeline("feeds_score_config_fm_xgb_20241209.conf").scoring(sceneFeatureMap, userFeatureMap, norUserFeatureMap, rankItems);
         // 5 排序公式特征
-        Map<String, Map<String, String>> vid2MapFeature = this.getVideoRedisFeature(vids, "redis:vid_hasreturn_rov:");
-        Map<String, Map<String, String>> vid2VovFeatureMap = this.getVideoRedisFeature(vids, "redis:vid_vovh24pred:");
-        double alpha_vov = mergeWeight.getOrDefault("alpha_vov", 0.05);
-        double func = mergeWeight.getOrDefault("func", 1.0);
+        Map<String, Map<String, String>> vid2MapFeature = this.getVideoRedisFeature(vids, "redis:vid_hasreturn_vor:");
         List<Video> result = new ArrayList<>();
         for (RankItem item : items) {
-            item.getScoresMap().put("alpha_vov", alpha_vov);
-            double score = 0.0;
+            double score;
             double fmRovOrigin = item.getScoreRov();
             item.getScoresMap().put("fmRovOrigin", fmRovOrigin);
             double fmRov = restoreScore(fmRovOrigin);
             item.getScoresMap().put("fmRov", fmRov);
-            double hasReturnRovScore = Double.parseDouble(vid2MapFeature.getOrDefault(item.getVideoId() + "", new HashMap<>()).getOrDefault("rate_n", "0"));
+            double hasReturnRovScore = Double.parseDouble(vid2MapFeature.getOrDefault(item.getVideoId() + "", new HashMap<>()).getOrDefault("rov", "0"));
             item.getScoresMap().put("hasReturnRovScore", hasReturnRovScore);
-            double vovScore = this.calVovScore(item, vid2VovFeatureMap);
-            item.getScoresMap().put("vovScore", vovScore);
-            if (func == 1){
-                score = fmRov * (1 + hasReturnRovScore) + alpha_vov * vovScore;
-            }else{
-                score = fmRov * (1 + hasReturnRovScore) * (1.0 + alpha_vov * vovScore);
-            }
-
+            double norXGBScore = item.getScoresMap().getOrDefault("NorXGBScore", 0d);
+            double vor = Double.parseDouble(vid2MapFeature.getOrDefault(item.getVideoId() + "", new HashMap<>()).getOrDefault("vor", "0"));
+            item.getScoresMap().put("vor", vor);
+            score = fmRov * (0.1 + norXGBScore) * (0.1 + vor);
             Video video = item.getVideo();
             video.setScore(score);
             video.setSortScore(score);
             video.setScoresMap(item.getScoresMap());
             video.setAllFeatureMap(item.getAllFeatureMap());
-            if (feature != null
-                    && MapUtils.isNotEmpty(feature.getVideoFeature())
-                    && MapUtils.isNotEmpty(feature.getVideoFeature().get(item.getVideoId() + ""))) {
+            if (MapUtils.isNotEmpty(feature.getVideoFeature()) && MapUtils.isNotEmpty(feature.getVideoFeature().get(item.getVideoId() + ""))) {
                 video.getMetaFeatureMap().putAll(feature.getVideoFeature().get(item.getVideoId() + ""));
             }
-            if (feature != null
-                    && MapUtils.isNotEmpty(feature.getUserFeature())) {
+            if (MapUtils.isNotEmpty(videoBaseInfoMap) && MapUtils.isNotEmpty(videoBaseInfoMap.get(item.getVideoId() + ""))) {
+                video.getMetaFeatureMap().putAll(videoBaseInfoMap.get(item.getVideoId() + ""));
+            }
+            if (MapUtils.isNotEmpty(feature.getUserFeature())) {
                 video.getMetaFeatureMap().putAll(feature.getUserFeature());
             }
             result.add(video);
@@ -376,4 +392,113 @@ public class RankStrategy4RegionMergeModelV567 extends RankStrategy4RegionMergeM
         return result;
     }
 
+    private Map<String, String> getVideoOneTypeInfo(String vid, String name,
+                                                    Map<String, Map<String, Map<String, String>>> videoAllInfoMap) {
+        if (null == videoAllInfoMap) {
+            return new HashMap<>();
+        }
+        return videoAllInfoMap.getOrDefault(vid, new HashMap<>()).getOrDefault(name, new HashMap<>());
+    }
+
+    private double getVideoOneInfo(String name, Map<String, String> infoMap) {
+        if (null == infoMap) {
+            return 0.0;
+        }
+        return infoMap.isEmpty() ? 0 : Double.parseDouble(infoMap.getOrDefault(name, "0.0"));
+    }
+
+    private void addVideoStatFeature(String vid, Map<String, Map<String, Map<String, String>>> videoAllInfoMap,
+                                     Map<String, Double> featureMap) {
+        List<Tuple3> vidStatInfo = Arrays.asList(
+                new Tuple3("b20", shortPeriod, getVideoOneTypeInfo(vid, "alg_cate2_feature", videoAllInfoMap)),
+                new Tuple3("b21", shortPeriod, getVideoOneTypeInfo(vid, "alg_cate1_feature", videoAllInfoMap)),
+                new Tuple3("b22", shortPeriod, getVideoOneTypeInfo(vid, "alg_vid_source_feature", videoAllInfoMap)),
+                new Tuple3("b28", shortPeriod, getVideoOneTypeInfo(vid, "alg_sence_type_feature", videoAllInfoMap)),
+                new Tuple3("b23", middlePeriod, getVideoOneTypeInfo(vid, "alg_cate2_feature_day", videoAllInfoMap)),
+                new Tuple3("b24", middlePeriod, getVideoOneTypeInfo(vid, "alg_cate1_feature_day", videoAllInfoMap)),
+                new Tuple3("b25", middlePeriod, getVideoOneTypeInfo(vid, "alg_video_source_feature_day", videoAllInfoMap)),
+                new Tuple3("b26", longPeriod, getVideoOneTypeInfo(vid, "alg_video_unionid_feature_day", videoAllInfoMap)),
+                new Tuple3("b27", longPeriod, getVideoOneTypeInfo(vid, "alg_vid_feature_day", videoAllInfoMap))
+        );
+        for (Tuple3 tuple3 : vidStatInfo) {
+            String infoType = tuple3.first;
+            List<String> infoPeriod = tuple3.second;
+            Map<String, String> infoMap = tuple3.third;
+            for (String period : infoPeriod) {
+                double share = getVideoOneInfo("share_" + period, infoMap);
+                double return_ = getVideoOneInfo("return_" + period, infoMap);
+                double view_hasreturn = getVideoOneInfo("view_hasreturn_" + period, infoMap);
+                double share_hasreturn = getVideoOneInfo("share_hasreturn_" + period, infoMap);
+                double ros = getVideoOneInfo("ros_" + period, infoMap);
+                double rov = getVideoOneInfo("rov_" + period, infoMap);
+                double r_cnt = getVideoOneInfo("r_cnt_" + period, infoMap);
+                double r_rate = getVideoOneInfo("r_rate_" + period, infoMap);
+                double r_cnt4s = getVideoOneInfo("r_cnt4s_" + period, infoMap);
+                double str = getVideoOneInfo("str_" + period, infoMap);
+
+                featureMap.put(infoType + "_" + period + "_" + "share", ExtractorUtils.calLog(share));
+                featureMap.put(infoType + "_" + period + "_" + "return", ExtractorUtils.calLog(return_));
+                featureMap.put(infoType + "_" + period + "_" + "view_hasreturn", ExtractorUtils.calLog(view_hasreturn));
+                featureMap.put(infoType + "_" + period + "_" + "share_hasreturn", ExtractorUtils.calLog(share_hasreturn));
+                featureMap.put(infoType + "_" + period + "_" + "ros", ros);
+                featureMap.put(infoType + "_" + period + "_" + "rov", rov);
+                featureMap.put(infoType + "_" + period + "_" + "r_cnt", r_cnt);
+                featureMap.put(infoType + "_" + period + "_" + "r_rate", r_rate);
+                featureMap.put(infoType + "_" + period + "_" + "r_cnt4s", r_cnt4s);
+                featureMap.put(infoType + "_" + period + "_" + "str", str);
+            }
+        }
+    }
+
+    private void addVideoCFFeature(String vid, Map<String, Map<String, Map<String, String>>> videoAllInfoMap,
+                                   Map<String, Double> featureMap) {
+        List<Tuple3> vidCFInfo = Arrays.asList(
+                new Tuple3("d2", cfRosList, getVideoOneTypeInfo(vid, "alg_recsys_feature_weak_cf_i2i_scene_ros", videoAllInfoMap)),
+                new Tuple3("d3", cfRosList, getVideoOneTypeInfo(vid, "alg_recsys_feature_cf_i2i_scene_ros", videoAllInfoMap)),
+                new Tuple3("d4", cfRovList, getVideoOneTypeInfo(vid, "alg_recsys_feature_weak_cf_i2i_scene_rov", videoAllInfoMap)),
+                new Tuple3("d5", cfRovList, getVideoOneTypeInfo(vid, "alg_recsys_feature_cf_i2i_scene_rov", videoAllInfoMap))
+        );
+        for (Tuple3 tuple3 : vidCFInfo) {
+            String infoType = tuple3.first;
+            List<String> valTypeList = tuple3.second;
+            Map<String, String> infoMap = tuple3.third;
+            if (!infoMap.isEmpty()) {
+                for (String valType : valTypeList) {
+                    double exp = getVideoOneInfo("exp", infoMap);
+                    double return_n = getVideoOneInfo("return_n", infoMap);
+                    double value = getVideoOneInfo(valType, infoMap);
+
+                    featureMap.put(infoType + "_exp", ExtractorUtils.calLog(exp));
+                    featureMap.put(infoType + "_return_n", ExtractorUtils.calLog(return_n));
+                    featureMap.put(infoType + "_" + valType, value);
+                }
+            }
+        }
+    }
+
+    private void addVideoSimFeature(Map<String, String> headInfo, Map<String, String> rankInfo, Map<String, Double> featureMap) {
+        if (!headInfo.isEmpty() && !rankInfo.isEmpty()) {
+            List<Future<Pair<String, Double>>> futures = new ArrayList<>();
+            for (String attr : videoSimAttrs) {
+                String headAttr = headInfo.getOrDefault(attr, "");
+                String rankAttr = rankInfo.getOrDefault(attr, "");
+                if (!"".equals(headAttr) && !"".equals(rankAttr)) {
+                    String key = "video_sim_" + attr;
+                    Future<Pair<String, Double>> future = ThreadPoolFactory.defaultPool().submit(() -> {
+                        double simScore = SimilarityUtils.word2VecSimilarity(headAttr, rankAttr);
+                        return Pair.create(key, simScore);
+                    });
+                    futures.add(future);
+                }
+            }
+            try {
+                for (Future<Pair<String, Double>> future : futures) {
+                    Pair<String, Double> pair = future.get(1000, TimeUnit.MILLISECONDS);
+                    featureMap.put(pair.getFirst(), pair.getSecond());
+                }
+            } catch (Exception e) {
+                log.error("video attr similarity error", e);
+            }
+        }
+    }
 }

+ 1 - 1
recommend-server-service/src/main/java/com/tzld/piaoquan/recommend/server/service/recall/RecallService.java

@@ -165,7 +165,6 @@ public class RecallService implements ApplicationContextAware {
             case "60105": // 551
             case "60106": // 552
             case "60112": // 562
-            case "60117": // 567
             case "60114": // 564
             case "60115": // 565
             case "60119": // 569
@@ -174,6 +173,7 @@ public class RecallService implements ApplicationContextAware {
                 strategies.add(strategyMap.get(ReturnVideoRecallStrategy.class.getSimpleName()));
                 break;
             case "60113": // 563
+            case "60117": // 567
                 strategies.add(strategyMap.get(RegionRealtimeRecallStrategyV1.class.getSimpleName()));
                 strategies.addAll(getRegionRecallStrategy(param));
                 strategies.add(strategyMap.get(ReturnVideoRecallStrategy.class.getSimpleName()));