Selaa lähdekoodia

新模型-修复特征

zhangbo 8 kuukautta sitten
vanhempi
commit
a7ee1cb218

+ 3 - 0
recommend-server-service/src/main/java/com/tzld/piaoquan/recommend/server/service/rank/strategy/RankStrategy4RegionMergeModelBasic.java

@@ -349,6 +349,9 @@ public class RankStrategy4RegionMergeModelBasic extends RankService {
         }
 
     }
+    protected double restoreScore(double score){
+        return (0.1 * score) / (1- 0.9 * score);
+    }
 
 
     public static void main(String[] args) {

+ 378 - 0
recommend-server-service/src/main/java/com/tzld/piaoquan/recommend/server/service/rank/strategy/RankStrategy4RegionMergeModelV552.java

@@ -0,0 +1,378 @@
+package com.tzld.piaoquan.recommend.server.service.rank.strategy;
+
+import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
+import com.tzld.piaoquan.recommend.server.common.base.RankItem;
+import com.tzld.piaoquan.recommend.server.model.Video;
+import com.tzld.piaoquan.recommend.server.service.FeatureService;
+import com.tzld.piaoquan.recommend.server.service.rank.RankParam;
+import com.tzld.piaoquan.recommend.server.service.rank.extractor.ExtractorUtils;
+import com.tzld.piaoquan.recommend.server.service.recall.strategy.*;
+import com.tzld.piaoquan.recommend.server.service.score.ScorerUtils;
+import com.tzld.piaoquan.recommend.server.util.CommonCollectionUtils;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.collections4.MapUtils;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.stereotype.Service;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.*;
+import java.util.stream.Collectors;
+
+@Service
+@Slf4j
+public class RankStrategy4RegionMergeModelV552 extends RankStrategy4RegionMergeModelBasic {
+    @ApolloJsonValue("${rank.score.merge.weightv552:}")
+    private Map<String, Double> mergeWeight;
+
+    @Autowired
+    private FeatureService featureService;
+
+    Map<String, double[]> bucketsMap = new HashMap<>();
+    Map<String, Double> bucketsLen = new HashMap<>();
+
+    @Override
+    public List<Video> mergeAndRankRovRecall(RankParam param) {
+        Map<String, Double> mergeWeight = this.mergeWeight != null ? this.mergeWeight : new HashMap<>(0);
+        //-------------------融-------------------
+        //-------------------合-------------------
+        //-------------------逻-------------------
+        //-------------------辑-------------------
+
+        List<Video> oldRovs = new ArrayList<>();
+        oldRovs.addAll(extractAndSort(param, RegionHRecallStrategy.PUSH_FORM));
+        oldRovs.addAll(extractAndSort(param, RegionHDupRecallStrategy.PUSH_FORM));
+        oldRovs.addAll(extractAndSort(param, Region24HRecallStrategy.PUSH_FORM));
+        oldRovs.addAll(extractAndSort(param, RegionRelative24HRecallStrategy.PUSH_FORM));
+        oldRovs.addAll(extractAndSort(param, RegionRelative24HDupRecallStrategy.PUSH_FORM));
+        removeDuplicate(oldRovs);
+        int sizeReturn = param.getSize();
+        List<Video> v0 = oldRovs.size() <= sizeReturn
+                ? oldRovs
+                : oldRovs.subList(0, sizeReturn);
+        Set<Long> setVideo = new HashSet<>();
+        this.duplicate(setVideo, v0);
+        setVideo.addAll(v0.stream().map(Video::getVideoId).collect(Collectors.toSet()));
+        List<Video> rovRecallRank = new ArrayList<>(v0);
+        //-------------------return相似召回------------------
+        List<Video> v6 = extractAndSort(param, ReturnVideoRecallStrategy.PUSH_FORM);
+        v6 = v6.stream().filter(r-> !setVideo.contains(r.getVideoId())).collect(Collectors.toList());
+        v6 = v6.subList(0, Math.min(mergeWeight.getOrDefault("v6", 5.0).intValue(), v6.size()));
+        rovRecallRank.addAll(v6);
+        setVideo.addAll(v6.stream().map(Video::getVideoId).collect(Collectors.toSet()));
+        //-------------------新地域召回------------------
+        List<Video> v1 = extractAndSort(param, RegionRealtimeRecallStrategyV1.PUSH_FORM);
+        v1 = v1.stream().filter(r-> !setVideo.contains(r.getVideoId())).collect(Collectors.toList());
+        v1 = v1.subList(0, Math.min(mergeWeight.getOrDefault("v1", 5.0).intValue(), v1.size()));
+        rovRecallRank.addAll(v1);
+        setVideo.addAll(v1.stream().map(Video::getVideoId).collect(Collectors.toSet()));
+
+        //-------------------排-------------------
+        //-------------------序-------------------
+        //-------------------逻-------------------
+        //-------------------辑-------------------
+
+        // TODO 1 批量获取特征  省份参数要对齐  headvid  要传递过来!
+        List<String> vids = CommonCollectionUtils.toListDistinct(rovRecallRank, v -> String.valueOf(v.getVideoId()));
+
+        // k1:视频、k2:表、k3:特征、v:特征值
+        String provinceCn = param.getProvince().replaceAll("省$", "");
+        String headVid = String.valueOf(param.getHeadVid());
+        FeatureService.Feature feature = featureService.getFeature(param.getMid(), vids,
+                String.valueOf(param.getAppType()), provinceCn, headVid);
+        Map<String, Map<String, String>> featureOriginUser = feature.getUserFeature();
+        Map<String, Map<String, Map<String, String>>> featureOriginVideo = feature.getVideoFeature();
+
+
+        // TODO 2 特征处理
+        Map<String, Double> userFeatureMapDouble = new HashMap<>();
+        String mid = param.getMid();
+        Map<String, String> c1 = featureOriginUser.getOrDefault("alg_mid_feature_play", new HashMap<>());
+        Map<String, String> c2 = featureOriginUser.getOrDefault("alg_mid_feature_share_and_return", new HashMap<>());
+        Map<String, String> c3 = featureOriginUser.getOrDefault("alg_mid_feature_play_tags", new HashMap<>());
+        Map<String, String> c4 = featureOriginUser.getOrDefault("alg_mid_feature_return_tags", new HashMap<>());
+        Map<String, String> c5 = featureOriginUser.getOrDefault("alg_mid_feature_share_tags", new HashMap<>());
+        Map<String, String> c6 = featureOriginUser.getOrDefault("alg_mid_feature_feed_exp_share_tags_v2", new HashMap<>());
+        Map<String, String> c7 = featureOriginUser.getOrDefault("alg_mid_feature_feed_exp_return_tags_v2", new HashMap<>());
+        Map<String, String> c8 = featureOriginUser.getOrDefault("alg_mid_feature_sharecf", new HashMap<>());
+        Map<String, String> c9 = featureOriginUser.getOrDefault("alg_mid_feature_returncf", new HashMap<>());
+
+        if (!c1.isEmpty()) {
+            userFeatureMapDouble.put("playcnt_6h", Double.parseDouble(c1.getOrDefault("playcnt_6h", "0")));
+            userFeatureMapDouble.put("playcnt_1d", Double.parseDouble(c1.getOrDefault("playcnt_1d", "0")));
+            userFeatureMapDouble.put("playcnt_3d", Double.parseDouble(c1.getOrDefault("playcnt_3d", "0")));
+            userFeatureMapDouble.put("playcnt_7d", Double.parseDouble(c1.getOrDefault("playcnt_7d", "0")));
+        }
+        if (!c2.isEmpty()) {
+            userFeatureMapDouble.put("share_pv_12h", Double.parseDouble(c2.getOrDefault("share_pv_12h", "0")));
+            userFeatureMapDouble.put("share_pv_1d", Double.parseDouble(c2.getOrDefault("share_pv_1d", "0")));
+            userFeatureMapDouble.put("share_pv_3d", Double.parseDouble(c2.getOrDefault("share_pv_3d", "0")));
+            userFeatureMapDouble.put("share_pv_7d", Double.parseDouble(c2.getOrDefault("share_pv_7d", "0")));
+            userFeatureMapDouble.put("return_uv_12h", Double.parseDouble(c2.getOrDefault("return_uv_12h", "0")));
+            userFeatureMapDouble.put("return_uv_1d", Double.parseDouble(c2.getOrDefault("return_uv_1d", "0")));
+            userFeatureMapDouble.put("return_uv_3d", Double.parseDouble(c2.getOrDefault("return_uv_3d", "0")));
+            userFeatureMapDouble.put("return_uv_7d", Double.parseDouble(c2.getOrDefault("return_uv_7d", "0")));
+        }
+
+        Map<String, String> c34567Map = new HashMap<>(15);
+        List<Tuple2> tmpList0 = Arrays.asList(
+                new Tuple2(c3, "c3_feature"),
+                new Tuple2(c4, "c4_feature"),
+                new Tuple2(c5, "c5_feature"),
+                new Tuple2(c6, "c6_feature"),
+                new Tuple2(c7, "c7_feature")
+        );
+        for (Tuple2 tuple2 : tmpList0) {
+            for (String key_time : Arrays.asList("tags_1d", "tags_3d", "tags_7d")) {
+                String tags = tuple2.first.getOrDefault(key_time, "");
+                if (!tags.isEmpty()) {
+                    c34567Map.put(tuple2.name + "_" + key_time, tags);
+                }
+            }
+        }
+
+        Map<String, Map<String, String[]>> c89Map = new HashMap<>(4);
+        List<Tuple2> tmpList1 = Arrays.asList(
+                new Tuple2(c8, "c8_feature"),
+                new Tuple2(c9, "c9_feature")
+        );
+        for (Tuple2 tuple2 : tmpList1) {
+            for (String key_action : Arrays.asList("share", "return")) {
+                String cfListStr = tuple2.first.getOrDefault(key_action, "");
+                if (!cfListStr.isEmpty()) {
+                    Map<String, String[]> cfMap = new HashMap<>();
+                    String[] entries = cfListStr.split(",");
+                    for (String entry : entries) {
+                        String[] rList = entry.split(":");
+                        if (rList.length >= 4) { // 确保分割后有四个元素
+                            String key = rList[0];
+                            String value1 = rList[1];
+                            String value2 = rList[2];
+                            String value3 = rList[3];
+                            String[] strs = {value1, value2, value3};
+                            cfMap.put(key, strs);
+                        }
+                    }
+                    c89Map.put(tuple2.name + "_" + key_action, cfMap);
+                }
+            }
+        }
+
+
+        List<RankItem> rankItems = CommonCollectionUtils.toList(rovRecallRank, RankItem::new);
+        for (RankItem item : rankItems) {
+            Map<String, Double> featureMap = new HashMap<>();
+            String vid = item.getVideoId() + "";
+            Map<String, String> b1 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_all_exp_v2", new HashMap<>());
+            Map<String, String> b2 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_all_share", new HashMap<>());
+            Map<String, String> b3 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_all_return", new HashMap<>());
+            Map<String, String> b6 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_exp2share_v2", new HashMap<>());
+            Map<String, String> b7 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_share2return", new HashMap<>());
+
+            Map<String, String> b8 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_noflow_exp_v2", new HashMap<>());
+            Map<String, String> b9 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_noflow_root_share_v2", new HashMap<>());
+            Map<String, String> b10 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_noflow_root_return_v2", new HashMap<>());
+            Map<String, String> b11 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_flow_exp_v2", new HashMap<>());
+            Map<String, String> b12 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_flow_root_share_v2", new HashMap<>());
+            Map<String, String> b13 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_flow_root_return_v2", new HashMap<>());
+            Map<String, String> b17 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_province_exp_v2", new HashMap<>());
+            Map<String, String> b18 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_province_root_share_v2", new HashMap<>());
+            Map<String, String> b19 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_province_root_return_v2", new HashMap<>());
+
+            List<Tuple4> originData = Arrays.asList(
+                    new Tuple4(b1, b2, b3, "b123"),
+                    new Tuple4(b1, b6, b7, "b167"),
+                    new Tuple4(b8, b9, b10, "b8910"),
+                    new Tuple4(b11, b12, b13, "b111213"),
+                    new Tuple4(b17, b18, b19, "b171819")
+            );
+
+            for (Tuple4 tuple4 : originData) {
+                for (String prefix2 : Arrays.asList("1h", "2h", "3h", "4h", "12h", "1d", "3d", "7d")) {
+                    double exp = tuple4.first.isEmpty() ? 0 : Double.parseDouble(tuple4.first.getOrDefault("exp_pv_" + prefix2, "0.0"));
+                    double share = tuple4.second.isEmpty() ? 0 : Double.parseDouble(tuple4.second.getOrDefault("share_pv_" + prefix2, "0.0"));
+                    double returns = tuple4.third.isEmpty() ? 0 : Double.parseDouble(tuple4.third.getOrDefault("return_uv_" + prefix2, "0.0"));
+
+                    double f1 = ExtractorUtils.calDiv(share, exp);
+                    double f2 = ExtractorUtils.calLog(share);
+                    double f3 = ExtractorUtils.calDiv(returns, exp);
+                    double f4 = ExtractorUtils.calLog(returns);
+                    double f5 = f3 * f4;
+
+                    String key1 = tuple4.name + "_" + prefix2 + "_" + "STR";
+                    String key2 = tuple4.name + "_" + prefix2 + "_" + "log(share)";
+                    String key3 = tuple4.name + "_" + prefix2 + "_" + "ROV";
+                    String key4 = tuple4.name + "_" + prefix2 + "_" + "log(return)";
+                    String key5 = tuple4.name + "_" + prefix2 + "_" + "ROV*log(return)";
+
+                    featureMap.put(key1, f1);
+                    featureMap.put(key2, f2);
+                    featureMap.put(key3, f3);
+                    featureMap.put(key4, f4);
+                    featureMap.put(key5, f5);
+                }
+            }
+
+            Map<String, String> videoInfo = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_basic_info", new HashMap<>());
+            featureMap.put("total_time", Double.parseDouble(videoInfo.getOrDefault("total_time", "0")));
+            featureMap.put("bit_rate", Double.parseDouble(videoInfo.getOrDefault("bit_rate", "0")));
+
+            String title = videoInfo.getOrDefault("title", "");
+            if (!title.isEmpty()) {
+                for (String name : Arrays.asList("c3_feature", "c4_feature", "c5_feature", "c6_feature", "c7_feature")) {
+                    for (String key_time : Arrays.asList("tags_1d", "tags_3d", "tags_7d")) {
+                        String tags = c34567Map.getOrDefault(name + "_" + key_time, "");
+                        if (!tags.isEmpty()) {
+                            Double[] doubles = ExtractorUtils.funcC34567ForTags(tags, title);
+                            featureMap.put(name + "_" + key_time + "_matchnum", doubles[0]);
+                            featureMap.put(name + "_" + key_time + "_maxscore", doubles[1]);
+                            featureMap.put(name + "_" + key_time + "_avgscore", doubles[2]);
+                        }
+                    }
+                }
+            }
+
+            if (!vid.isEmpty()) {
+                for (String key_feature : Arrays.asList("c8_feature", "c9_feature")) {
+                    for (String key_action : Arrays.asList("share", "return")) {
+                        Map<String, String[]> cfMap = c89Map.getOrDefault(key_feature + "_" + key_action, new HashMap<>());
+                        if (cfMap.containsKey(vid)) {
+                            String[] scores = cfMap.get(vid);
+                            Double score1 = Double.parseDouble(scores[0]);
+                            Double score2 = Double.parseDouble(scores[1]);
+                            Double score3 = Double.parseDouble(scores[2]) <= 0 ? 0D : 1.0 / Double.parseDouble(scores[2]);
+                            featureMap.put(key_feature + "_" + key_action + "_score", score1);
+                            featureMap.put(key_feature + "_" + key_action + "_num", score2);
+                            featureMap.put(key_feature + "_" + key_action + "_rank", score3);
+                        }
+                    }
+                }
+            }
+            Map<String, String> d1 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_recsys_feature_cf_i2i_new_v2", new HashMap<>());
+            if (!d1.isEmpty()) {
+                featureMap.put("d1_exp", Double.parseDouble(d1.getOrDefault("exp", "0")));
+                featureMap.put("d1_return_n", Double.parseDouble(d1.getOrDefault("return_n", "0")));
+                featureMap.put("d1_rovn", Double.parseDouble(d1.getOrDefault("rovn", "0")));
+            }
+            item.featureMapDouble = featureMap;
+        }
+
+        // 3 连续值特征分桶
+        readBucketFile();
+        Map<String, String> userFeatureMap = new HashMap<>(userFeatureMapDouble.size());
+        for (Map.Entry<String, Double> entry : userFeatureMapDouble.entrySet()) {
+            String name = entry.getKey();
+            Double score = entry.getValue();
+            // 注意:0值、不在分桶文件中的特征,会被过滤掉。
+            if (score > 1E-8 && this.bucketsLen.containsKey(name) && this.bucketsMap.containsKey(name)) {
+                Double bucketNum = this.bucketsLen.get(name);
+                double[] buckets = this.bucketsMap.get(name);
+                Double scoreNew = 1.0 / bucketNum * (ExtractorUtils.findInsertPosition(buckets, score) + 1.0);
+                userFeatureMap.put(name, String.valueOf(scoreNew));
+            }
+        }
+
+        for (RankItem item : rankItems) {
+            Map<String, String> featureMap = new HashMap<>();
+            Map<String, Double> featureMapDouble = item.featureMapDouble;
+
+            for (Map.Entry<String, Double> entry : featureMapDouble.entrySet()) {
+                String name = entry.getKey();
+                Double score = entry.getValue();
+                // 注意:0值、不在分桶文件中的特征,会被过滤掉。
+                if (score > 1E-8 && this.bucketsLen.containsKey(name) && this.bucketsMap.containsKey(name)) {
+                    Double bucketNum = this.bucketsLen.get(name);
+                    double[] buckets = this.bucketsMap.get(name);
+                    Double scoreNew = 1.0 / bucketNum * (ExtractorUtils.findInsertPosition(buckets, score) + 1.0);
+                    featureMap.put(name, String.valueOf(scoreNew));
+                }
+            }
+            item.featureMap = featureMap;
+        }
+
+        // TODO 3 排序
+        Map<String, String> sceneFeatureMap = new HashMap<>(0);
+
+        List<RankItem> items = ScorerUtils.getScorerPipeline("feeds_score_config_20240807.conf")
+                .scoring(sceneFeatureMap, userFeatureMap, rankItems);
+        String redisScoreKey =  mergeWeight.getOrDefault("redisScoreKey", 0.0) < 0.5 ? "redis:vid_hasreturn_rov:" : "redis:vid_hasreturn_rov_7d:";
+        Map<String, Map<String, String>> vid2MapFeature = this.getVideoRedisFeature(vids, redisScoreKey);
+        List<Video> result = new ArrayList<>();
+        String hasReturnRovKey = mergeWeight.getOrDefault("hasReturnRovKey", 1.0) < 0.5 ? "rate_1" : "rate_n";
+        Double chooseFunction = mergeWeight.getOrDefault("chooseFunction", 0.0);
+
+        for (RankItem item : items) {
+            double score = 0.0;
+            double hasReturnRovScore = Double.parseDouble(vid2MapFeature.getOrDefault(item.getVideoId() + "", new HashMap<>())
+                    .getOrDefault(hasReturnRovKey, "0"));
+            item.getScoresMap().put("hasReturnRovScore", hasReturnRovScore);
+            double fmRovOrigin = item.getScoreRov();
+            item.getScoresMap().put("fmRovOrigin", fmRovOrigin);
+            double fmRov = restoreScore(fmRovOrigin);
+            item.getScoresMap().put("fmRov", fmRov);
+            if (chooseFunction == 0){
+                score = fmRov * (1 + hasReturnRovScore);
+            }else if (chooseFunction == 1){
+                score = fmRov * (1 + Math.log(hasReturnRovScore + 1));
+            }else {
+                score = fmRov * ExtractorUtils.sigmoid(hasReturnRovScore);
+            }
+
+            Video video = item.getVideo();
+            video.setScore(score);
+            video.setSortScore(score);
+            video.setScoresMap(item.getScoresMap());
+            video.setAllFeatureMap(item.getAllFeatureMap());
+            if (feature != null
+                    && MapUtils.isNotEmpty(feature.getVideoFeature())
+                    && MapUtils.isNotEmpty(feature.getVideoFeature().get(item.getVideoId() + ""))) {
+                video.getMetaFeatureMap().putAll(feature.getVideoFeature().get(item.getVideoId() + ""));
+            }
+            if (feature != null
+                    && MapUtils.isNotEmpty(feature.getUserFeature())) {
+                video.getMetaFeatureMap().putAll(feature.getUserFeature());
+            }
+            result.add(video);
+        }
+        result.sort(Comparator.comparingDouble(o -> -o.getSortScore()));
+
+        return result;
+    }
+
+    private void readBucketFile() {
+        InputStream resourceStream = RankStrategy4RegionMergeModelV552.class.getClassLoader().getResourceAsStream("20240609_bucket_274.txt");
+        if (resourceStream != null) {
+            try (BufferedReader reader = new BufferedReader(new InputStreamReader(resourceStream))) {
+                Map<String, double[]> bucketsMap = new HashMap<>();
+                Map<String, Double> bucketsLen = new HashMap<>();
+                String line;
+                while ((line = reader.readLine()) != null) {
+                    // 替换空格和换行符,过滤空行
+                    line = line.replace(" ", "").replaceAll("\n", "");
+                    if (!line.isEmpty()) {
+                        String[] rList = line.split("\t");
+                        if (rList.length == 3) {
+                            String key = rList[0];
+                            double value1 = Double.parseDouble(rList[1]);
+                            bucketsLen.put(key, value1);
+                            double[] value2 = Arrays.stream(rList[2].split(","))
+                                    .mapToDouble(Double::valueOf)
+                                    .toArray();
+                            bucketsMap.put(key, value2);
+                        }
+                    }
+                }
+                this.bucketsMap = bucketsMap;
+                this.bucketsLen = bucketsLen;
+            } catch (IOException e) {
+                log.error("something is wrong in parse bucket file:" + e);
+            }
+        } else {
+            log.error("no bucket file");
+        }
+    }
+
+
+}