فهرست منبع

fm模型增加有回流的rov分值

zhangbo 10 ماه پیش
والد
کامیت
5f8c14c7a0

+ 24 - 0
recommend-server-service/src/main/java/com/tzld/piaoquan/recommend/server/service/rank/strategy/RankStrategy4RegionMergeModelBasic.java

@@ -298,6 +298,30 @@ public class RankStrategy4RegionMergeModelBasic extends RankService {
         }
     }
 
+    public Map<String, Map<String, String>> getVideoRedisFeature(List<String> vids, String redisKeyPrefix){
+        List<String> keys = vids.stream().map(r -> redisKeyPrefix + r).collect(Collectors.toList());
+        List<String> key2Values = this.redisTemplate.opsForValue().multiGet(keys);
+        Map<String, Map<String, String>> result = new HashMap<>(vids.size());
+        if (key2Values != null) {
+            int j = 0;
+            for (String vid : vids) {
+                String vF = key2Values.get(j);
+                ++j;
+                if (vF == null) {
+                    continue;
+                }
+                Map<String, String> vfMap = new HashMap<>();
+                try {
+                    vfMap = JSONUtils.fromJson(vF, new TypeToken<Map<String, String>>() {}, vfMap);
+                } catch (Exception e) {
+                    log.error(String.format("parse video json is wrong on redisKeyPrefix in {} with {}", this.CLASS_NAME, redisKeyPrefix));
+                }
+                result.put(vid, vfMap);
+            }
+        }
+        return result;
+    }
+
     public static void main(String[] args) {
 
     }

+ 320 - 302
recommend-server-service/src/main/java/com/tzld/piaoquan/recommend/server/service/rank/strategy/RankStrategy4RegionMergeModelV569.java

@@ -1,62 +1,36 @@
 package com.tzld.piaoquan.recommend.server.service.rank.strategy;
 
 import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
-import com.google.common.reflect.TypeToken;
 import com.tzld.piaoquan.recommend.server.common.base.RankItem;
 import com.tzld.piaoquan.recommend.server.model.Video;
+import com.tzld.piaoquan.recommend.server.service.FeatureService;
 import com.tzld.piaoquan.recommend.server.service.rank.RankParam;
-import com.tzld.piaoquan.recommend.server.service.rank.RankResult;
-import com.tzld.piaoquan.recommend.server.service.rank.RankService;
 import com.tzld.piaoquan.recommend.server.service.rank.extractor.ExtractorUtils;
-import com.tzld.piaoquan.recommend.server.service.rank.extractor.RankExtractorItemFeature;
-import com.tzld.piaoquan.recommend.server.service.rank.extractor.RankExtractorItemTags;
-import com.tzld.piaoquan.recommend.server.service.rank.extractor.RankExtractorUserFeature;
-import com.tzld.piaoquan.recommend.server.service.rank.processor.RankProcessorBoost;
-import com.tzld.piaoquan.recommend.server.service.rank.processor.RankProcessorDensity;
-import com.tzld.piaoquan.recommend.server.service.rank.processor.RankProcessorInsert;
-import com.tzld.piaoquan.recommend.server.service.rank.processor.RankProcessorTagFilter;
 import com.tzld.piaoquan.recommend.server.service.recall.strategy.*;
 import com.tzld.piaoquan.recommend.server.service.score.ScorerUtils;
 import com.tzld.piaoquan.recommend.server.util.CommonCollectionUtils;
-import com.tzld.piaoquan.recommend.server.util.JSONUtils;
 import lombok.extern.slf4j.Slf4j;
-import org.apache.commons.collections4.CollectionUtils;
-import org.apache.commons.lang3.RandomUtils;
-import org.springframework.data.redis.connection.RedisConnectionFactory;
-import org.springframework.data.redis.connection.RedisStandaloneConfiguration;
-import org.springframework.data.redis.connection.jedis.JedisConnectionFactory;
-import org.springframework.data.redis.core.RedisTemplate;
-import org.springframework.data.redis.serializer.StringRedisSerializer;
+import org.apache.commons.collections4.MapUtils;
+import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.stereotype.Service;
 
-import java.text.SimpleDateFormat;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
 import java.util.*;
-import java.util.stream.Collectors;
 
-/**
- * @author zhangbo
- * @desc 地域召回融合 流量池汤姆森
- */
 @Service
 @Slf4j
-public class RankStrategy4RegionMergeModelV569 extends RankService {
+public class RankStrategy4RegionMergeModelV569 extends RankStrategy4RegionMergeModelBasic {
     @ApolloJsonValue("${rank.score.merge.weightv569:}")
     private Map<String, Double> mergeWeight;
-    @ApolloJsonValue("${RankStrategy4DensityFilterV2:}")
-    private Map<String, Map<String, Map<String, String>>> filterRules = new HashMap<>();
-    final private String CLASS_NAME = this.getClass().getSimpleName();
-
-    public void duplicate(Set<Long> setVideo, List<Video> videos) {
-        Iterator<Video> iterator = videos.iterator();
-        while (iterator.hasNext()) {
-            Video v = iterator.next();
-            if (setVideo.contains(v.getVideoId())) {
-                iterator.remove();
-            } else {
-                setVideo.add(v.getVideoId());
-            }
-        }
-    }
+
+    @Autowired
+    private FeatureService featureService;
+
+    Map<String, double[]> bucketsMap = new HashMap<>();
+    Map<String, Double> bucketsLen = new HashMap<>();
 
     @Override
     public List<Video> mergeAndRankRovRecall(RankParam param) {
@@ -66,9 +40,6 @@ public class RankStrategy4RegionMergeModelV569 extends RankService {
         //-------------------逻-------------------
         //-------------------辑-------------------
 
-        List<Video> rovRecallRank = new ArrayList<>();
-        Set<Long> setVideo = new HashSet<>();
-        //-------------------老地域召回-------------------
         List<Video> oldRovs = new ArrayList<>();
         oldRovs.addAll(extractAndSort(param, RegionHRecallStrategy.PUSH_FORM));
         oldRovs.addAll(extractAndSort(param, RegionHDupRecallStrategy.PUSH_FORM));
@@ -80,320 +51,367 @@ public class RankStrategy4RegionMergeModelV569 extends RankService {
         List<Video> v0 = oldRovs.size() <= sizeReturn
                 ? oldRovs
                 : oldRovs.subList(0, sizeReturn);
+        Set<Long> setVideo = new HashSet<>();
         this.duplicate(setVideo, v0);
-        rovRecallRank.addAll(v0);
-        setVideo.addAll(v0.stream().map(Video::getVideoId).collect(Collectors.toSet()));
-        //-------------------sim相似召回------------------
+
+
+        //-------------------相关性召回 融合+去重-------------------
         List<Video> v5 = extractAndSort(param, SimHotVideoRecallStrategy.PUSH_FORM);
-        v5 = v5.stream().filter(r-> !setVideo.contains(r.getVideoId())).collect(Collectors.toList());
-        v5 = v5.subList(0, Math.min(mergeWeight.getOrDefault("v5", 5.0).intValue(), v5.size()));
-        rovRecallRank.addAll(v5);
-        setVideo.addAll(v5.stream().map(Video::getVideoId).collect(Collectors.toSet()));
-        //-------------------return相似召回------------------
         List<Video> v6 = extractAndSort(param, ReturnVideoRecallStrategy.PUSH_FORM);
-        v6 = v6.stream().filter(r-> !setVideo.contains(r.getVideoId())).collect(Collectors.toList());
-        v6 = v6.subList(0, Math.min(mergeWeight.getOrDefault("v6", 5.0).intValue(), v6.size()));
-        rovRecallRank.addAll(v6);
-        setVideo.addAll(v6.stream().map(Video::getVideoId).collect(Collectors.toSet()));
-        //-------------------新地域召回------------------
+        this.duplicate(setVideo, v5);
+        this.duplicate(setVideo, v6);
+        //-------------------流量池直接送 融合+去重-------------------
+        List<Video> v9 = extractAndSort(param, FlowPoolLastDayTopRecallStrategy.PUSH_FORM);
+        this.duplicate(setVideo, v9);
+        //-------------------地域相关召回 融合+去重-------------------
         List<Video> v1 = extractAndSort(param, RegionRealtimeRecallStrategyV1.PUSH_FORM);
-        v1 = v1.stream().filter(r-> !setVideo.contains(r.getVideoId())).collect(Collectors.toList());
-        v1 = v1.subList(0, Math.min(mergeWeight.getOrDefault("v1", 5.0).intValue(), v1.size()));
-        rovRecallRank.addAll(v1);
-        setVideo.addAll(v1.stream().map(Video::getVideoId).collect(Collectors.toSet()));
-        //-------------------节日特殊召回-------------------
+        this.duplicate(setVideo, v1);
+        //-------------------节日扶持召回 融合+去重-------------------
         List<Video> v7 = extractAndSort(param, FestivalRecallStrategyV1.PUSH_FORM);
-        v7 = v7.stream().filter(r-> !setVideo.contains(r.getVideoId())).collect(Collectors.toList());
-        v7 = v7.subList(0, Math.min(mergeWeight.getOrDefault("v7", 5.0).intValue(), v7.size()));
-        rovRecallRank.addAll(v7);
-        setVideo.addAll(v7.stream().map(Video::getVideoId).collect(Collectors.toSet()));
-
+        this.duplicate(setVideo, v7);
+        List<Video> rovRecallRank = new ArrayList<>();
+        rovRecallRank.addAll(v0);
+        rovRecallRank.addAll(v5.subList(0, Math.min(mergeWeight.getOrDefault("v5", 5.0).intValue(), v5.size())));
+        rovRecallRank.addAll(v6.subList(0, Math.min(mergeWeight.getOrDefault("v6", 5.0).intValue(), v6.size())));
+        rovRecallRank.addAll(v9.subList(0, Math.min(mergeWeight.getOrDefault("v9", 0.0).intValue(), v9.size())));
+        rovRecallRank.addAll(v1.subList(0, Math.min(mergeWeight.getOrDefault("v1", 5.0).intValue(), v1.size())));
+        rovRecallRank.addAll(v7.subList(0, Math.min(mergeWeight.getOrDefault("v7", 5.0).intValue(), v7.size())));
 
         //-------------------排-------------------
         //-------------------序-------------------
         //-------------------逻-------------------
         //-------------------辑-------------------
 
-        // 1 模型分
-        List<String> rtFeaPart = new ArrayList<>();
-        List<RankItem> items = model(rovRecallRank, param, rtFeaPart);
-        List<String> rtFeaPartKey = new ArrayList<>(Arrays.asList("item_rt_fea_1day_partition", "item_rt_fea_1h_partition"));
-        List<String> rtFeaPartKeyResult = this.redisTemplate.opsForValue().multiGet(rtFeaPartKey);
-        Calendar calendar = Calendar.getInstance();
-        String date = new SimpleDateFormat("yyyyMMdd").format(calendar.getTime());
-        String hour = new SimpleDateFormat("HH").format(calendar.getTime());
-        String rtFeaPart1h = date + hour;
-        if (rtFeaPartKeyResult != null) {
-            if (rtFeaPartKeyResult.get(1) != null) {
-                rtFeaPart1h = rtFeaPartKeyResult.get(1);
+        // TODO 1 批量获取特征  省份参数要对齐  headvid  要传递过来!
+        List<String> vids = CommonCollectionUtils.toListDistinct(rovRecallRank, v -> String.valueOf(v.getVideoId()));
+
+        // k1:视频、k2:表、k3:特征、v:特征值
+        String provinceCn = param.getProvince().replaceAll("省$", "");
+        String headVid = String.valueOf(param.getHeadVid());
+        FeatureService.Feature feature = featureService.getFeature(param.getMid(), vids,
+                String.valueOf(param.getAppType()), provinceCn, headVid);
+        Map<String, Map<String, String>> featureOriginUser = feature.getUserFeature();
+        Map<String, Map<String, Map<String, String>>> featureOriginVideo = feature.getVideoFeature();
+
+
+        // TODO 2 特征处理
+        Map<String, Double> userFeatureMapDouble = new HashMap<>();
+        String mid = param.getMid();
+        Map<String, String> c1 = featureOriginUser.getOrDefault("alg_mid_feature_play", new HashMap<>());
+        Map<String, String> c2 = featureOriginUser.getOrDefault("alg_mid_feature_share_and_return", new HashMap<>());
+        Map<String, String> c3 = featureOriginUser.getOrDefault("alg_mid_feature_play_tags", new HashMap<>());
+        Map<String, String> c4 = featureOriginUser.getOrDefault("alg_mid_feature_return_tags", new HashMap<>());
+        Map<String, String> c5 = featureOriginUser.getOrDefault("alg_mid_feature_share_tags", new HashMap<>());
+        Map<String, String> c6 = featureOriginUser.getOrDefault("alg_mid_feature_feed_exp_share_tags", new HashMap<>());
+        Map<String, String> c7 = featureOriginUser.getOrDefault("alg_mid_feature_feed_exp_return_tags", new HashMap<>());
+        Map<String, String> c8 = featureOriginUser.getOrDefault("alg_mid_feature_sharecf", new HashMap<>());
+        Map<String, String> c9 = featureOriginUser.getOrDefault("alg_mid_feature_returncf", new HashMap<>());
+
+        if (!c1.isEmpty()) {
+            userFeatureMapDouble.put("playcnt_6h", Double.parseDouble(c1.getOrDefault("playcnt_6h", "0")));
+            userFeatureMapDouble.put("playcnt_1d", Double.parseDouble(c1.getOrDefault("playcnt_1d", "0")));
+            userFeatureMapDouble.put("playcnt_3d", Double.parseDouble(c1.getOrDefault("playcnt_3d", "0")));
+            userFeatureMapDouble.put("playcnt_7d", Double.parseDouble(c1.getOrDefault("playcnt_7d", "0")));
+        }
+        if (!c2.isEmpty()) {
+            userFeatureMapDouble.put("share_pv_12h", Double.parseDouble(c2.getOrDefault("share_pv_12h", "0")));
+            userFeatureMapDouble.put("share_pv_1d", Double.parseDouble(c2.getOrDefault("share_pv_1d", "0")));
+            userFeatureMapDouble.put("share_pv_3d", Double.parseDouble(c2.getOrDefault("share_pv_3d", "0")));
+            userFeatureMapDouble.put("share_pv_7d", Double.parseDouble(c2.getOrDefault("share_pv_7d", "0")));
+            userFeatureMapDouble.put("return_uv_12h", Double.parseDouble(c2.getOrDefault("return_uv_12h", "0")));
+            userFeatureMapDouble.put("return_uv_1d", Double.parseDouble(c2.getOrDefault("return_uv_1d", "0")));
+            userFeatureMapDouble.put("return_uv_3d", Double.parseDouble(c2.getOrDefault("return_uv_3d", "0")));
+            userFeatureMapDouble.put("return_uv_7d", Double.parseDouble(c2.getOrDefault("return_uv_7d", "0")));
+        }
+
+        Map<String, String> c34567Map = new HashMap<>(15);
+        List<Tuple2> tmpList0 = Arrays.asList(
+                new Tuple2(c3, "c3_feature"),
+                new Tuple2(c4, "c4_feature"),
+                new Tuple2(c5, "c5_feature"),
+                new Tuple2(c6, "c6_feature"),
+                new Tuple2(c7, "c7_feature")
+        );
+        for (Tuple2 tuple2 : tmpList0) {
+            for (String key_time : Arrays.asList("tags_1d", "tags_3d", "tags_7d")) {
+                String tags = tuple2.first.getOrDefault(key_time, "");
+                if (!tags.isEmpty()) {
+                    c34567Map.put(tuple2.name + "_" + key_time, tags);
+                }
             }
         }
-        // 2 统计分
-        String cur = rtFeaPart1h;
-        List<String> datehours = new LinkedList<>(); // 时间是倒叙的
-        for (int i = 0; i < 24; ++i) {
-            datehours.add(cur);
-            cur = ExtractorUtils.subtractHours(cur, 1);
+
+        Map<String, Map<String, String[]>> c89Map = new HashMap<>(4);
+        List<Tuple2> tmpList1 = Arrays.asList(
+                new Tuple2(c8, "c8_feature"),
+                new Tuple2(c9, "c9_feature")
+        );
+        for (Tuple2 tuple2 : tmpList1) {
+            for (String key_action : Arrays.asList("share", "return")) {
+                String cfListStr = tuple2.first.getOrDefault(key_action, "");
+                if (!cfListStr.isEmpty()) {
+                    Map<String, String[]> cfMap = new HashMap<>();
+                    String[] entries = cfListStr.split(",");
+                    for (String entry : entries) {
+                        String[] rList = entry.split(":");
+                        if (rList.length >= 4) { // 确保分割后有四个元素
+                            String key = rList[0];
+                            String value1 = rList[1];
+                            String value2 = rList[2];
+                            String value3 = rList[3];
+                            String[] strs = {value1, value2, value3};
+                            cfMap.put(key, strs);
+                        }
+                    }
+                    c89Map.put(tuple2.name + "_" + key_action, cfMap);
+                }
+            }
         }
-        for (RankItem item : items) {
-            Map<String, Map<String, Double>> itemRealMap = item.getItemRealTimeFeature();
-            List<Double> views = getStaticData(itemRealMap, datehours, "view_uv_list_1h");
-            List<Double> shares = getStaticData(itemRealMap, datehours, "share_uv_list_1h");
-            List<Double> allreturns = getStaticData(itemRealMap, datehours, "return_uv_list_1h");
 
-            // 全部回流的rov和ros
-            List<Double> share2allreturn = getRateData(allreturns, shares, 0.0, 0.0);
-            Double share2allreturnScore = calScoreWeightNoTimeDecay(share2allreturn);
-            item.scoresMap.put("share2allreturnScore", share2allreturnScore);
-            List<Double> view2allreturn = getRateData(allreturns, views, 0.0, 0.0);
-            Double view2allreturnScore = calScoreWeightNoTimeDecay(view2allreturn);
-            item.scoresMap.put("view2allreturnScore", view2allreturnScore);
 
-            // 全部回流
-            Double allreturnsScore = calScoreWeightNoTimeDecay(allreturns);
-            item.scoresMap.put("allreturnsScore", allreturnsScore);
+        List<RankItem> rankItems = CommonCollectionUtils.toList(rovRecallRank, RankItem::new);
+        for (RankItem item : rankItems) {
+            Map<String, Double> featureMap = new HashMap<>();
+            String vid = item.getVideoId() + "";
+            Map<String, String> b1 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_all_exp", new HashMap<>());
+            Map<String, String> b2 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_all_share", new HashMap<>());
+            Map<String, String> b3 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_all_return", new HashMap<>());
+            Map<String, String> b6 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_exp2share", new HashMap<>());
+            Map<String, String> b7 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_share2return", new HashMap<>());
+
+            Map<String, String> b8 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_noflow_exp", new HashMap<>());
+            Map<String, String> b9 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_noflow_root_share", new HashMap<>());
+            Map<String, String> b10 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_noflow_root_return", new HashMap<>());
+            Map<String, String> b11 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_flow_exp", new HashMap<>());
+            Map<String, String> b12 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_flow_root_share", new HashMap<>());
+            Map<String, String> b13 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_flow_root_return", new HashMap<>());
+            Map<String, String> b17 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_province_exp", new HashMap<>());
+            Map<String, String> b18 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_province_root_share", new HashMap<>());
+            Map<String, String> b19 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_feed_province_root_return", new HashMap<>());
+
+            List<Tuple4> originData = Arrays.asList(
+                    new Tuple4(b1, b2, b3, "b123"),
+                    new Tuple4(b1, b6, b7, "b167"),
+                    new Tuple4(b8, b9, b10, "b8910"),
+                    new Tuple4(b11, b12, b13, "b111213"),
+                    new Tuple4(b17, b18, b19, "b171819")
+            );
+
+            for (Tuple4 tuple4 : originData) {
+                for (String prefix2 : Arrays.asList("1h", "2h", "3h", "4h", "12h", "1d", "3d", "7d")) {
+                    double exp = tuple4.first.isEmpty() ? 0 : Double.parseDouble(tuple4.first.getOrDefault("exp_pv_" + prefix2, "0.0"));
+                    double share = tuple4.second.isEmpty() ? 0 : Double.parseDouble(tuple4.second.getOrDefault("share_pv_" + prefix2, "0.0"));
+                    double returns = tuple4.third.isEmpty() ? 0 : Double.parseDouble(tuple4.third.getOrDefault("return_uv_" + prefix2, "0.0"));
+
+                    double f1 = ExtractorUtils.calDiv(share, exp);
+                    double f2 = ExtractorUtils.calLog(share);
+                    double f3 = ExtractorUtils.calDiv(returns, exp);
+                    double f4 = ExtractorUtils.calLog(returns);
+                    double f5 = f3 * f4;
+
+                    String key1 = tuple4.name + "_" + prefix2 + "_" + "STR";
+                    String key2 = tuple4.name + "_" + prefix2 + "_" + "log(share)";
+                    String key3 = tuple4.name + "_" + prefix2 + "_" + "ROV";
+                    String key4 = tuple4.name + "_" + prefix2 + "_" + "log(return)";
+                    String key5 = tuple4.name + "_" + prefix2 + "_" + "ROV*log(return)";
+
+                    featureMap.put(key1, f1);
+                    featureMap.put(key2, f2);
+                    featureMap.put(key3, f3);
+                    featureMap.put(key4, f4);
+                    featureMap.put(key5, f5);
+                }
+            }
 
+            Map<String, String> videoInfo = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_basic_info", new HashMap<>());
+            featureMap.put("total_time", Double.parseDouble(videoInfo.getOrDefault("total_time", "0")));
+            featureMap.put("bit_rate", Double.parseDouble(videoInfo.getOrDefault("bit_rate", "0")));
+
+            String title = videoInfo.getOrDefault("title", "");
+            if (!title.isEmpty()) {
+                for (String name : Arrays.asList("c3_feature", "c4_feature", "c5_feature", "c6_feature", "c7_feature")) {
+                    for (String key_time : Arrays.asList("tags_1d", "tags_3d", "tags_7d")) {
+                        String tags = c34567Map.getOrDefault(name + "_" + key_time, "");
+                        if (!tags.isEmpty()) {
+                            Double[] doubles = ExtractorUtils.funcC34567ForTags(tags, title);
+                            featureMap.put(name + "_" + key_time + "_matchnum", doubles[0]);
+                            featureMap.put(name + "_" + key_time + "_maxscore", doubles[1]);
+                            featureMap.put(name + "_" + key_time + "_avgscore", doubles[2]);
+                        }
+                    }
+                }
+            }
 
+            if (!vid.isEmpty()) {
+                for (String key_feature : Arrays.asList("c8_feature", "c9_feature")) {
+                    for (String key_action : Arrays.asList("share", "return")) {
+                        Map<String, String[]> cfMap = c89Map.getOrDefault(key_feature + "_" + key_action, new HashMap<>());
+                        if (cfMap.containsKey(vid)) {
+                            String[] scores = cfMap.get(vid);
+                            Double score1 = Double.parseDouble(scores[0]);
+                            Double score2 = Double.parseDouble(scores[1]);
+                            Double score3 = Double.parseDouble(scores[2]) <= 0 ? 0D : 1.0 / Double.parseDouble(scores[2]);
+                            featureMap.put(key_feature + "_" + key_action + "_score", score1);
+                            featureMap.put(key_feature + "_" + key_action + "_num", score2);
+                            featureMap.put(key_feature + "_" + key_action + "_rank", score3);
+                        }
+                    }
+                }
+            }
+            Map<String, String> d1 = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_recsys_feature_cf_i2i_new", new HashMap<>());
+            if (!d1.isEmpty()) {
+                featureMap.put("d1_exp", Double.parseDouble(d1.getOrDefault("exp", "0")));
+                featureMap.put("d1_return_n", Double.parseDouble(d1.getOrDefault("return_n", "0")));
+                featureMap.put("d1_rovn", Double.parseDouble(d1.getOrDefault("rovn", "0")));
+            }
+            item.featureMapDouble = featureMap;
+        }
+
+        // 3 连续值特征分桶
+        readBucketFile();
+        Map<String, String> userFeatureMap = new HashMap<>(userFeatureMapDouble.size());
+        for (Map.Entry<String, Double> entry : userFeatureMapDouble.entrySet()) {
+            String name = entry.getKey();
+            Double score = entry.getValue();
+            // 注意:0值、不在分桶文件中的特征,会被过滤掉。
+            if (score > 1E-8 && this.bucketsLen.containsKey(name) && this.bucketsMap.containsKey(name)) {
+                Double bucketNum = this.bucketsLen.get(name);
+                double[] buckets = this.bucketsMap.get(name);
+                Double scoreNew = 1.0 / bucketNum * (ExtractorUtils.findInsertPosition(buckets, score) + 1.0);
+                userFeatureMap.put(name, String.valueOf(scoreNew));
+            }
         }
-        // 3 融合公式
+
+        for (RankItem item : rankItems) {
+            Map<String, String> featureMap = new HashMap<>();
+            Map<String, Double> featureMapDouble = item.featureMapDouble;
+
+            for (Map.Entry<String, Double> entry : featureMapDouble.entrySet()) {
+                String name = entry.getKey();
+                Double score = entry.getValue();
+                // 注意:0值、不在分桶文件中的特征,会被过滤掉。
+                if (score > 1E-8 && this.bucketsLen.containsKey(name) && this.bucketsMap.containsKey(name)) {
+                    Double bucketNum = this.bucketsLen.get(name);
+                    double[] buckets = this.bucketsMap.get(name);
+                    Double scoreNew = 1.0 / bucketNum * (ExtractorUtils.findInsertPosition(buckets, score) + 1.0);
+                    featureMap.put(name, String.valueOf(scoreNew));
+                }
+            }
+            item.featureMap = featureMap;
+        }
+
+        // TODO 3 排序
+        Map<String, String> sceneFeatureMap = new HashMap<>(0);
+
+        List<RankItem> items = ScorerUtils.getScorerPipeline("feeds_score_config_20240609.conf")
+                .scoring(sceneFeatureMap, userFeatureMap, rankItems);
+        Map<String, Map<String, String>> vid2MapFeature =  this.getVideoRedisFeature(vids, "redis:vid_hasreturn_rov:");
         List<Video> result = new ArrayList<>();
-        double f = mergeWeight.getOrDefault("f", 0.1);
-        double g = mergeWeight.getOrDefault("g", 1.0);
+        String hasReturnRovKey = mergeWeight.getOrDefault("hasReturnRovKey", 0.0) < 0.5? "rate_1" : "rate_n";
         for (RankItem item : items) {
-            double share2allreturnScore = item.scoresMap.getOrDefault("share2allreturnScore", 0.0);
-            double view2allreturnScore = item.scoresMap.getOrDefault("view2allreturnScore", 0.0);
             double score = 0.0;
-            double allreturnsScore = item.scoresMap.getOrDefault("allreturnsScore", 0.0);
-            if (allreturnsScore > 50) {
-                score += (f * share2allreturnScore + g * view2allreturnScore);
-            }else{
-                score += (f * share2allreturnScore + g * view2allreturnScore) * 0.01;
-            }
+            double hasReturnRovScore = Double.parseDouble(vid2MapFeature.getOrDefault(item.getVideoId() + "", new HashMap<>())
+                    .getOrDefault(hasReturnRovKey, "0"));
+            item.getScoresMap().put("hasReturnRovScore", hasReturnRovScore);
+            double fmRov = item.getScoreRov();
+            item.getScoresMap().put("fmRov", fmRov);
+            score = fmRov * Math.log1p(hasReturnRovScore<=0?0:hasReturnRovScore);
             Video video = item.getVideo();
             video.setScore(score);
             video.setSortScore(score);
-            video.setScoreStr(item.getScoreStr());
             video.setScoresMap(item.getScoresMap());
+            video.setAllFeatureMap(item.getAllFeatureMap());
+            if (feature != null
+                    && MapUtils.isNotEmpty(feature.getVideoFeature())
+                    && MapUtils.isNotEmpty(feature.getVideoFeature().get(item.getVideoId()))) {
+                video.getMetaFeatureMap().putAll(feature.getVideoFeature().get(item.getVideoId()));
+            }
+            if (feature != null
+                    && MapUtils.isNotEmpty(feature.getUserFeature())) {
+                video.getMetaFeatureMap().putAll(feature.getUserFeature());
+            }
             result.add(video);
         }
         result.sort(Comparator.comparingDouble(o -> -o.getSortScore()));
+
         return result;
     }
 
-    public Double calScoreWeightNoTimeDecay(List<Double> data) {
-        Double up = 0.0;
-        Double down = 0.0;
-        for (int i = 0; i < data.size(); ++i) {
-            up += 1.0 * data.get(i);
-            down += 1.0;
-        }
-        return down > 1E-8 ? up / down : 0.0;
+    private Map<String, Map<String, String>> extractVideoFeature(Map<String, Map<String, Map<String, String>>> featureMap) {
+        // TODO
+        return null;
     }
 
-    public List<Double> getRateData(List<Double> ups, List<Double> downs, Double up, Double down) {
-        List<Double> data = new LinkedList<>();
-        for (int i = 0; i < ups.size(); ++i) {
-            if (ExtractorUtils.isDoubleEqualToZero(downs.get(i) + down)) {
-                data.add(0.0);
-            } else {
-                data.add(
-                        (ups.get(i) + up) / (downs.get(i) + down)
-                );
-            }
-        }
-        return data;
+    private Map<String, String> extractSceneFeature(Map<String, Map<String, Map<String, String>>> featureMap) {
+        // TODO
+        return null;
     }
 
-    public List<Double> getStaticData(Map<String, Map<String, Double>> itemRealMap,
-                                      List<String> datehours, String key) {
-        List<Double> views = new LinkedList<>();
-        Map<String, Double> tmp = itemRealMap.getOrDefault(key, new HashMap<>());
-        for (String dh : datehours) {
-            views.add(tmp.getOrDefault(dh, 0.0D) +
-                    (views.isEmpty() ? 0.0 : views.get(views.size() - 1))
-            );
-        }
-        return views;
+    private Map<String, String> extractUserFeature(Map<String, Map<String, Map<String, String>>> featureMap) {
+        // TODO
+        return null;
     }
 
-    public List<RankItem> model(List<Video> videos, RankParam param,
-                                List<String> rtFeaPart) {
-        List<RankItem> result = new ArrayList<>();
-        if (videos.isEmpty()) {
-            return result;
-        }
-
-
-        List<RankItem> rankItems = CommonCollectionUtils.toList(videos, RankItem::new);
-        List<Long> videoIds = CommonCollectionUtils.toListDistinct(videos, Video::getVideoId);
-
-        // 2-2: item 实时特征处理
-        List<String> videoRtKeys2 = videoIds.stream().map(r -> "item_rt_fea_1h_" + r)
-                .collect(Collectors.toList());
-        List<String> videoRtFeatures = this.redisTemplate.opsForValue().multiGet(videoRtKeys2);
-
-
-        if (videoRtFeatures != null) {
-            int j = 0;
-            for (RankItem item : rankItems) {
-                String vF = videoRtFeatures.get(j);
-                ++j;
-                if (vF == null) {
-                    continue;
-                }
-                Map<String, String> vfMap = new HashMap<>();
-                Map<String, Map<String, Double>> vfMapNew = new HashMap<>();
-                try {
-                    vfMap = JSONUtils.fromJson(vF, new TypeToken<Map<String, String>>() {
-                    }, vfMap);
-
-                    for (Map.Entry<String, String> entry : vfMap.entrySet()) {
-                        String value = entry.getValue();
-                        if (value == null) {
-                            continue;
-                        }
-                        String[] var1 = value.split(",");
-                        Map<String, Double> tmp = new HashMap<>();
-                        for (String var2 : var1) {
-                            String[] var3 = var2.split(":");
-                            tmp.put(var3[0], Double.valueOf(var3[1]));
+    private void readBucketFile() {
+        InputStream resourceStream = RankStrategy4RegionMergeModelV999.class.getClassLoader().getResourceAsStream("20240609_bucket_274.txt");
+        if (resourceStream != null) {
+            try (BufferedReader reader = new BufferedReader(new InputStreamReader(resourceStream))) {
+                Map<String, double[]> bucketsMap = new HashMap<>();
+                Map<String, Double> bucketsLen = new HashMap<>();
+                String line;
+                while ((line = reader.readLine()) != null) {
+                    // 替换空格和换行符,过滤空行
+                    line = line.replace(" ", "").replaceAll("\n", "");
+                    if (!line.isEmpty()) {
+                        String[] rList = line.split("\t");
+                        if (rList.length == 3) {
+                            String key = rList[0];
+                            double value1 = Double.parseDouble(rList[1]);
+                            bucketsLen.put(key, value1);
+                            double[] value2 = Arrays.stream(rList[2].split(","))
+                                    .mapToDouble(Double::valueOf)
+                                    .toArray();
+                            bucketsMap.put(key, value2);
                         }
-                        vfMapNew.put(entry.getKey(), tmp);
                     }
-                    item.setItemRealTimeFeature(vfMapNew);
-                } catch (Exception e) {
-                    log.error(String.format("parse video item_rt_fea_1h_ json is wrong in {} with {}", this.CLASS_NAME, e));
                 }
+                this.bucketsMap = bucketsMap;
+                this.bucketsLen = bucketsLen;
+            } catch (IOException e) {
+                log.error("something is wrong in parse bucket file:" + e);
             }
+        } else {
+            log.error("no bucket file");
         }
 
-        return rankItems;
     }
 
-    @Override
-    public RankResult mergeAndSort(RankParam param, List<Video> rovVideos, List<Video> flowVideos) {
-
-        //1 兜底策略,rov池子不足时,用冷启池填补。直接返回。
-        if (CollectionUtils.isEmpty(rovVideos)) {
-            if (param.getSize() < flowVideos.size()) {
-                return new RankResult(flowVideos.subList(0, param.getSize()));
-            } else {
-                return new RankResult(flowVideos);
-            }
-        }
-
-        //2 根据实验号解析阿波罗参数。
-        String abCode = param.getAbCode();
-        Map<String, Map<String, String>> rulesMap = this.filterRules.getOrDefault(abCode, new HashMap<>(0));
-
-        //3 标签读取
-        if (rulesMap != null && !rulesMap.isEmpty()) {
-            RankExtractorItemTags extractorItemTags = new RankExtractorItemTags(this.redisTemplate);
-            extractorItemTags.processor(rovVideos, flowVideos);
-        }
-        //6 合并结果时间卡控
-        if (rulesMap != null && !rulesMap.isEmpty()) {
-            RankProcessorTagFilter.processor(rovVideos, flowVideos, rulesMap);
-        }
-
-        //4 rov池提权功能
-        RankProcessorBoost.boostByTag(rovVideos, rulesMap);
+    static class Tuple4 {
+        public Map<String, String> first;
+        public Map<String, String> second;
+        public Map<String, String> third;
 
-        //5 rov池强插功能
-        RankProcessorInsert.insertByTag(param, rovVideos, rulesMap);
+        public String name;
 
-        //7 流量池按比例强插
-        List<Video> result = new ArrayList<>();
-        for (int i = 0; i < param.getTopK() && i < rovVideos.size(); i++) {
-            result.add(rovVideos.get(i));
-        }
-        double flowPoolP = getFlowPoolP(param);
-        int flowPoolIndex = 0;
-        int rovPoolIndex = param.getTopK();
-        for (int i = 0; i < param.getSize() - param.getTopK(); i++) {
-            double rand = RandomUtils.nextDouble(0, 1);
-            if (rand < flowPoolP) {
-                if (flowPoolIndex < flowVideos.size()) {
-                    result.add(flowVideos.get(flowPoolIndex++));
-                } else {
-                    break;
-                }
-            } else {
-                if (rovPoolIndex < rovVideos.size()) {
-                    result.add(rovVideos.get(rovPoolIndex++));
-                } else {
-                    break;
-                }
-            }
-        }
-        if (rovPoolIndex >= rovVideos.size()) {
-            for (int i = flowPoolIndex; i < flowVideos.size() && result.size() < param.getSize(); i++) {
-                result.add(flowVideos.get(i));
-            }
-        }
-        if (flowPoolIndex >= flowVideos.size()) {
-            for (int i = rovPoolIndex; i < rovVideos.size() && result.size() < param.getSize(); i++) {
-                result.add(rovVideos.get(i));
-            }
+        public Tuple4(Map<String, String> first, Map<String, String> second, Map<String, String> third, String name) {
+            this.first = first;
+            this.second = second;
+            this.third = third;
+            this.name = name;
         }
 
-        //8 合并结果密度控制
-        Map<String, Integer> densityRules = new HashMap<>();
-        if (rulesMap != null && !rulesMap.isEmpty()) {
-            for (Map.Entry<String, Map<String, String>> entry : rulesMap.entrySet()) {
-                String key = entry.getKey();
-                Map<String, String> value = entry.getValue();
-                if (value.containsKey("density")) {
-                    densityRules.put(key, Integer.valueOf(value.get("density")));
-                }
-            }
-        }
-        Set<Long> videosSet = result.stream().map(Video::getVideoId).collect(Collectors.toSet());
-        List<Video> rovRecallRankNew = rovVideos.stream().filter(r -> !videosSet.contains(r.getVideoId())).collect(Collectors.toList());
-        List<Video> flowPoolRankNew = flowVideos.stream().filter(r -> !videosSet.contains(r.getVideoId())).collect(Collectors.toList());
-        List<Video> resultWithDensity = RankProcessorDensity.mergeDensityControl(result,
-                rovRecallRankNew, flowPoolRankNew, densityRules);
-
-        return new RankResult(resultWithDensity);
     }
 
-    public static void main(String[] args) {
-//        String up1 = "2024031012:513,2024031013:456,2024031014:449,2024031015:262,2024031016:414,2024031017:431,2024031018:643,2024031019:732,2024031020:927,2024031021:859,2024031022:866,2024031023:358,2024031100:133,2024031101:28,2024031102:22,2024031103:15,2024031104:21,2024031105:36,2024031106:157,2024031107:371,2024031108:378,2024031109:216,2024031110:269,2024031111:299,2024031112:196,2024031113:186,2024031114:85,2024031115:82";
-        String up1 = "2024031012:1167,2024031013:1023,2024031014:947,2024031015:664,2024031016:842,2024031017:898,2024031018:1170,2024031019:1439,2024031020:2010,2024031021:1796,2024031022:1779,2024031023:722,2024031100:226,2024031101:50,2024031102:31,2024031103:30,2024031104:38,2024031105:63,2024031106:293,2024031107:839,2024031108:1250,2024031109:858,2024031110:767,2024031111:697,2024031112:506,2024031113:534,2024031114:381,2024031115:278";
-        String down1 = "2024031012:2019,2024031013:1676,2024031014:1626,2024031015:1458,2024031016:1508,2024031017:1510,2024031018:1713,2024031019:1972,2024031020:2500,2024031021:2348,2024031022:2061,2024031023:1253,2024031100:659,2024031101:243,2024031102:191,2024031103:282,2024031104:246,2024031105:439,2024031106:1079,2024031107:1911,2024031108:2023,2024031109:1432,2024031110:1632,2024031111:1183,2024031112:1024,2024031113:938,2024031114:701,2024031115:541";
-
-//        String up2 = "2024031012:215,2024031013:242,2024031014:166,2024031015:194,2024031016:209,2024031017:245,2024031018:320,2024031019:332,2024031020:400,2024031021:375,2024031022:636,2024031023:316,2024031100:167,2024031101:45,2024031102:22,2024031103:26,2024031104:12,2024031105:22,2024031106:24,2024031107:143,2024031108:181,2024031109:199,2024031110:194,2024031111:330,2024031112:423,2024031113:421,2024031114:497,2024031115:424";
-        String up2 = "2024031012:409,2024031013:464,2024031014:354,2024031015:474,2024031016:436,2024031017:636,2024031018:709,2024031019:741,2024031020:802,2024031021:904,2024031022:1112,2024031023:639,2024031100:378,2024031101:78,2024031102:47,2024031103:37,2024031104:17,2024031105:49,2024031106:103,2024031107:293,2024031108:457,2024031109:488,2024031110:558,2024031111:711,2024031112:785,2024031113:830,2024031114:974,2024031115:850";
-        String down2 = "2024031012:748,2024031013:886,2024031014:788,2024031015:1029,2024031016:957,2024031017:1170,2024031018:1208,2024031019:1181,2024031020:1275,2024031021:1265,2024031022:1512,2024031023:1190,2024031100:1127,2024031101:486,2024031102:289,2024031103:254,2024031104:197,2024031105:310,2024031106:344,2024031107:693,2024031108:976,2024031109:1045,2024031110:1039,2024031111:1257,2024031112:1202,2024031113:1454,2024031114:1785,2024031115:1544";
-
-        RankStrategy4RegionMergeModelV569 job = new RankStrategy4RegionMergeModelV569();
-        List<Double> l1 = job.getRateData(job.help(up1, "2024031115", 24), job.help(down1, "2024031115", 24), 1., 10.);
-        Double d1 = job.calScoreWeightNoTimeDecay(l1);
-
-        System.out.println(d1);
+    static class Tuple2 {
+        public Map<String, String> first;
 
-        List<Double> l2 = job.getRateData(job.help(up2, "2024031115", 24), job.help(down2, "2024031115", 24), 1., 10.);
-        Double d2 = job.calScoreWeightNoTimeDecay(l2);
+        public String name;
 
-        System.out.println(d2);
-
-    }
-
-    List<Double> help(String s, String date, Integer h) {
-        Map<String, Double> maps = Arrays.stream(s.split(",")).map(pair -> pair.split(":"))
-                .collect(Collectors.toMap(
-                        arr -> arr[0],
-                        arr -> Double.valueOf(arr[1])
-                ));
-        List<String> datehours = new LinkedList<>(); // 时间是倒叙的
-        List<Double> result = new ArrayList<>();
-        for (int i = 0; i < h; ++i) {
-            Double d = (result.isEmpty() ? 0.0 : result.get(result.size() - 1));
-            result.add(d + maps.getOrDefault(date, 0D));
-            datehours.add(date);
-            date = ExtractorUtils.subtractHours(date, 1);
+        public Tuple2(Map<String, String> first, String name) {
+            this.first = first;
+            this.name = name;
         }
-        return result;
+
     }
 
 }

+ 2 - 9
recommend-server-service/src/main/java/com/tzld/piaoquan/recommend/server/service/recall/RecallService.java

@@ -163,13 +163,10 @@ public class RecallService implements ApplicationContextAware {
                 strategies.add(strategyMap.get(RegionRealtimeRecallStrategyV1.class.getSimpleName()));
                 strategies.addAll(getRegionRecallStrategy(param));
             case "60118": // 568
-                strategies.add(strategyMap.get(RegionRealtimeRecallStrategyV1.class.getSimpleName()));
-                strategies.addAll(getRegionRecallStrategy(param));
-                strategies.add(strategyMap.get(ReturnVideoRecallStrategy.class.getSimpleName()));
             case "60119": // 569
                 strategies.add(strategyMap.get(RegionRealtimeRecallStrategyV1.class.getSimpleName()));
-                strategies.add(strategyMap.get(TitleTagRecallStrategyV1.class.getSimpleName()));
                 strategies.addAll(getRegionRecallStrategy(param));
+                strategies.add(strategyMap.get(ReturnVideoRecallStrategy.class.getSimpleName()));
             default:
                 strategies.addAll(getRegionRecallStrategy(param));
         }
@@ -281,11 +278,6 @@ public class RecallService implements ApplicationContextAware {
             case "60106": // 552
             case "60107": // 553
             case "60116": // 566
-            case "60119": // 569
-                strategies.add(strategyMap.get(SimHotVideoRecallStrategy.class.getSimpleName()));
-                strategies.add(strategyMap.get(ReturnVideoRecallStrategy.class.getSimpleName()));
-                strategies.add(strategyMap.get(FestivalRecallStrategyV1.class.getSimpleName()));
-                break;
             case "60113": // 563
             case "60114": // 564
             case "60115": // 565
@@ -294,6 +286,7 @@ public class RecallService implements ApplicationContextAware {
                 strategies.add(strategyMap.get(ReturnVideoRecallStrategy.class.getSimpleName()));
                 break;
             case "60118": // 568
+            case "60119": // 569
                 break;
             case "60110": // 新内容的召回(流量池的Top内容)
                 strategies.add(strategyMap.get(TopGoodPerformanceVideoRecallStrategy.class.getSimpleName()));