Bladeren bron

refactor: dk_elements 两路召回 scoresMap 改用 Redis rovn 真实分 (替代 positionScores)

跟项目其他 multiGet 召回 (positionScores 位置分) 不同, dk_elements 倒排 value
就是 rovn 真实分 (0-1 概率类), 直接用更有信号:
- recall() 签名 List<Long> -> Map<Long, Double>, 解析时同 vid 取 max score
- 主流程从 scoresMap 排序得到 ids, 不再走 FilterParamFactory.positionScores
- Video.rovScore 写入真实 rovn 分

真实分影响链:
- filter 阶段 FilterParam.scoresMap = 真实分 (filter 内部 cap/截断更准)
- 粗排截断 coarseMap miss 的 vid -> fallback 用 Video.rovScore 排序, 真实分更有价值
- funnel 漏斗 entry.score = 真实分 (filter 通过的 entry)

跟 AbstractRedisRecallStrategy 基类范式一致 (Return1Cate2Ros 也是真实分).
两路 strategy 现在 recall() 实现字节级一致, 仅"挑 kw"逻辑不同.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
yangxiaohui 3 dagen geleden
bovenliggende
commit
77d727540a

+ 42 - 32
recommend-server-service/src/main/java/com/tzld/piaoquan/recommend/server/service/recall/strategy/UserProfileDkElementsRecallStrategy.java

@@ -80,9 +80,15 @@ public class UserProfileDkElementsRecallStrategy implements RecallStrategy {
 
             List<String> keys = getRedisKey(topElements);
             List<String> values = redisTemplate.opsForValue().multiGet(keys);
-            List<Long> ids = recall(param.getVideoId(), values);
 
-            Map<Long, Double> scoresMap = FilterParamFactory.positionScores(ids);
+            // 保留 Redis 倒排的真实 rovn 分 (而非位置分): scoresMap 的 score 会写到 Video.rovScore,
+            // 粗排截断 coarseMap miss 的 vid 会 fallback 用 Video.rovScore 排序, 真实分更有信号.
+            Map<Long, Double> scoresMap = recall(param.getVideoId(), values);
+            List<Long> ids = scoresMap.entrySet().stream()
+                    .sorted(Comparator.comparingDouble((Map.Entry<Long, Double> e) -> e.getValue()).reversed())
+                    .map(Map.Entry::getKey)
+                    .collect(Collectors.toList());
+
             FilterParam filterParam = FilterParamFactory.create(param, ids, pushFrom(), scoresMap);
             FilterResult filterResult = filterService.filter(filterParam);
             if (filterResult != null && CollectionUtils.isNotEmpty(filterResult.getVideoIds())) {
@@ -133,39 +139,43 @@ public class UserProfileDkElementsRecallStrategy implements RecallStrategy {
         return keys;
     }
 
-    private List<Long> recall(Long headVid, List<String> values) {
-        List<Long> vidList = new ArrayList<>();
-        if (null != values && !values.isEmpty()) {
-            Set<Long> hits = new HashSet<>();
-            hits.add(headVid);
-            List<org.apache.commons.math3.util.Pair<Long, Double>> list = new ArrayList<>();
-            for (String value : values) {
-                if (null != value && !value.isEmpty()) {
-                    String[] cells = value.split("\t");
-                    if (2 == cells.length) {
-                        List<Long> ids = Arrays.stream(cells[0].split(",")).map(Long::valueOf).collect(Collectors.toList());
-                        List<Double> scores = Arrays.stream(cells[1].split(",")).map(Double::valueOf).collect(Collectors.toList());
-                        if (!ids.isEmpty() && ids.size() == scores.size()) {
-                            for (int i = 0; i < ids.size(); ++i) {
-                                long id = ids.get(i);
-                                double score = scores.get(i);
-                                if (hits.contains(id)) {
-                                    continue;
-                                }
-                                hits.add(id);
-                                list.add(org.apache.commons.math3.util.Pair.create(id, score));
-                            }
-                        }
-                    }
-                }
+    /**
+     * 解析 multiGet 拿到的 N 个 Redis value, 拼成 vid -> 真实 score map.
+     * value 格式: vid1,vid2,...\tscore1,score2,...  (rovn 真实分)
+     * 同 vid 在多个 element 倒排里出现时, 取 max score (跟 AbstractRedisRecallStrategy 一致).
+     */
+    private Map<Long, Double> recall(Long headVid, List<String> values) {
+        Map<Long, Double> scoresMap = new HashMap<>();
+        if (CollectionUtils.isEmpty(values)) {
+            return scoresMap;
+        }
+        for (String value : values) {
+            if (StringUtils.isBlank(value)) {
+                continue;
+            }
+            String[] cells = value.split("\t");
+            if (cells.length != 2) {
+                continue;
+            }
+            List<Long> ids;
+            List<Double> scores;
+            try {
+                ids = Arrays.stream(cells[0].split(",")).map(Long::valueOf).collect(Collectors.toList());
+                scores = Arrays.stream(cells[1].split(",")).map(Double::valueOf).collect(Collectors.toList());
+            } catch (NumberFormatException nfe) {
+                continue;
+            }
+            if (ids.isEmpty() || ids.size() != scores.size()) {
+                continue;
             }
-            if (!list.isEmpty()) {
-                list.sort(Comparator.comparingDouble(o -> -o.getSecond()));
-                for (org.apache.commons.math3.util.Pair<Long, Double> pair : list) {
-                    vidList.add(pair.getFirst());
+            for (int i = 0; i < ids.size(); i++) {
+                long id = ids.get(i);
+                if (headVid != null && headVid == id) {
+                    continue;
                 }
+                scoresMap.merge(id, scores.get(i), Math::max);
             }
         }
-        return vidList;
+        return scoresMap;
     }
 }

+ 42 - 32
recommend-server-service/src/main/java/com/tzld/piaoquan/recommend/server/service/recall/strategy/YearShareDkElementsRecallStrategy.java

@@ -81,9 +81,15 @@ public class YearShareDkElementsRecallStrategy implements RecallStrategy {
 
             List<String> keys = this.getRedisKey(allElements);
             List<String> values = redisTemplate.opsForValue().multiGet(keys);
-            List<Long> ids = recall(param.getVideoId(), values);
 
-            Map<Long, Double> scoresMap = FilterParamFactory.positionScores(ids);
+            // 保留 Redis 倒排的真实 rovn 分 (而非位置分): scoresMap 的 score 会写到 Video.rovScore,
+            // 粗排截断 coarseMap miss 的 vid 会 fallback 用 Video.rovScore 排序, 真实分更有信号.
+            Map<Long, Double> scoresMap = recall(param.getVideoId(), values);
+            List<Long> ids = scoresMap.entrySet().stream()
+                    .sorted(Comparator.comparingDouble((Map.Entry<Long, Double> e) -> e.getValue()).reversed())
+                    .map(Map.Entry::getKey)
+                    .collect(Collectors.toList());
+
             FilterParam filterParam = FilterParamFactory.create(param, ids, pushFrom(), scoresMap);
             FilterResult filterResult = filterService.filter(filterParam);
             if (filterResult != null && CollectionUtils.isNotEmpty(filterResult.getVideoIds())) {
@@ -144,40 +150,44 @@ public class YearShareDkElementsRecallStrategy implements RecallStrategy {
         return keys;
     }
 
-    private List<Long> recall(Long headVid, List<String> values) {
-        List<Long> vidList = new ArrayList<>();
-        if (null != values && !values.isEmpty()) {
-            Set<Long> hits = new HashSet<>();
-            hits.add(headVid);
-            List<org.apache.commons.math3.util.Pair<Long, Double>> list = new ArrayList<>();
-            for (String value : values) {
-                if (null != value && !value.isEmpty()) {
-                    String[] cells = value.split("\t");
-                    if (2 == cells.length) {
-                        List<Long> ids = Arrays.stream(cells[0].split(",")).map(Long::valueOf).collect(Collectors.toList());
-                        List<Double> scores = Arrays.stream(cells[1].split(",")).map(Double::valueOf).collect(Collectors.toList());
-                        if (!ids.isEmpty() && ids.size() == scores.size()) {
-                            for (int i = 0; i < ids.size(); ++i) {
-                                long id = ids.get(i);
-                                double score = scores.get(i);
-                                if (hits.contains(id)) {
-                                    continue;
-                                }
-                                hits.add(id);
-                                list.add(org.apache.commons.math3.util.Pair.create(id, score));
-                            }
-                        }
-                    }
-                }
+    /**
+     * 解析 multiGet 拿到的 N 个 Redis value, 拼成 vid -> 真实 score map.
+     * value 格式: vid1,vid2,...\tscore1,score2,...  (rovn 真实分)
+     * 同 vid 在多个 element 倒排里出现时, 取 max score (跟 AbstractRedisRecallStrategy 一致).
+     */
+    private Map<Long, Double> recall(Long headVid, List<String> values) {
+        Map<Long, Double> scoresMap = new HashMap<>();
+        if (CollectionUtils.isEmpty(values)) {
+            return scoresMap;
+        }
+        for (String value : values) {
+            if (StringUtils.isBlank(value)) {
+                continue;
+            }
+            String[] cells = value.split("\t");
+            if (cells.length != 2) {
+                continue;
+            }
+            List<Long> ids;
+            List<Double> scores;
+            try {
+                ids = Arrays.stream(cells[0].split(",")).map(Long::valueOf).collect(Collectors.toList());
+                scores = Arrays.stream(cells[1].split(",")).map(Double::valueOf).collect(Collectors.toList());
+            } catch (NumberFormatException nfe) {
+                continue;
+            }
+            if (ids.isEmpty() || ids.size() != scores.size()) {
+                continue;
             }
-            if (!list.isEmpty()) {
-                list.sort(Comparator.comparingDouble(o -> -o.getSecond()));
-                for (org.apache.commons.math3.util.Pair<Long, Double> pair : list) {
-                    vidList.add(pair.getFirst());
+            for (int i = 0; i < ids.size(); i++) {
+                long id = ids.get(i);
+                if (headVid != null && headVid == id) {
+                    continue;
                 }
+                scoresMap.merge(id, scores.get(i), Math::max);
             }
         }
-        return vidList;
+        return scoresMap;
     }
 
     @Override