فهرست منبع

feat: V569 实验同步成 V564 (粗排分统一截断)

将 V569 从"19 路 extractRecall + recallQuotaScale 整体倍数"模式
改为 V564 当前的"粗排分统一截断 (个性化 6 + 非个性化 17 = 23 路,
两配额动态补足)"模式, 两实验业务逻辑完全一致, 仅 expCode 不同。

公共基础设施 (RecallUtils.extractAllAndTruncateByCoarseRank /
FeatureService.getVideoCoarseRankFeature) 已在主干, 直接复用。
RecallService 不开 isHit569Exp gate, 跟 V564 一致。

附带 apollo/rank.score.merge.weightv569.json 同步成 V564 内容
(untracked, 不进 PR, 需运维同步到 Apollo 后台)。

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
yangxiaohui 1 هفته پیش
والد
کامیت
5e6ae4ac7c

+ 141 - 46
recommend-server-service/src/main/java/com/tzld/piaoquan/recommend/server/service/rank/strategy/RankStrategy4RegionMergeModelV569.java

@@ -36,11 +36,61 @@ public class RankStrategy4RegionMergeModelV569 extends RankStrategy4RegionMergeM
     @Autowired
     private FeatureService featureService;
 
+    /**
+     * V569 个性化召回白名单 (6 路):召回 key 含 mid/uid,依赖该用户行为信号。
+     * 注:YearReturnCate2 因线上效果不佳, 2026-06-04 起移到非个性化白名单。
+     */
+    private static final Set<String> PERSONAL_RECALL_PUSH_FROMS = new HashSet<>(Arrays.asList(
+            UserCate1RecallStrategy.PUSH_FORM,
+            UserCate2RecallStrategy.PUSH_FORM,
+            Return1Cate2RosRecallStrategy.PUSH_FORM,
+            Return1Cate2StrRecallStrategy.PUSH_FORM,
+            YearShareCate1RecallStrategy.PUSH_FROM,
+            YearShareCate2RecallStrategy.PUSH_FROM
+    ));
+
+    /**
+     * V569 非个性化召回白名单 (17 路):只依赖 headVid + 地域/品类/相似度(vid-vid CF 也归此类)。
+     * 含 5 路旧地域、新地域、城市、head province/cate、先验省份、return 相似、scene CF、YearReturnCate2。
+     */
+    private static final Set<String> NON_PERSONAL_RECALL_PUSH_FROMS = new HashSet<>(Arrays.asList(
+            RegionHRecallStrategy.PUSH_FORM,
+            RegionHDupRecallStrategy.PUSH_FORM,
+            Region24HRecallStrategy.PUSH_FORM,
+            RegionRelative24HRecallStrategy.PUSH_FORM,
+            RegionRelative24HDupRecallStrategy.PUSH_FORM,
+            RegionRealtimeRecallStrategyV1.PUSH_FORM,
+            CityRovnRecallStrategy.PUSH_FROM,
+            HeadProvinceCate1RecallStrategy.PUSH_FORM,
+            HeadProvinceCate2RecallStrategy.PUSH_FORM,
+            HeadCate2RovRecallStrategy.PUSH_FROM,
+            PrioriProvinceRovnRecallStrategy.PUSH_FROM,
+            PrioriProvinceStrRecallStrategy.PUSH_FROM,
+            PrioriProvinceRosRecallStrategy.PUSH_FROM,
+            ReturnVideoRecallStrategy.PUSH_FORM,
+            SceneCFRovnRecallStrategy.PUSH_FORM,
+            SceneCFRosnRecallStrategy.PUSH_FORM,
+            YearReturnCate2RecallStrategy.PUSH_FROM
+    ));
+
+    /** PERSONAL ∪ NON_PERSONAL = 23 路。用于 fetchCoarseRankScores 跳过流量池等不参与截断的 vid。 */
+    private static final Set<String> ALL_ROV_PUSH_FROMS;
+    static {
+        Set<String> all = new HashSet<>(PERSONAL_RECALL_PUSH_FROMS);
+        all.addAll(NON_PERSONAL_RECALL_PUSH_FROMS);
+        ALL_ROV_PUSH_FROMS = Collections.unmodifiableSet(all);
+    }
+
+    /*
+     * 设计要点:
+     *   - fail-closed 白名单:RecallService 未来加新路不会自动进 V569,避免污染 vs V568 AB 对比
+     *   - 流量池 3 路 (flow_pool / quick_flow_pool / recall_strategy_hotspot) 不在任何名单——独立通道
+     *   - 调用顺序 = 个性化优先:同 vid 双类命中时归个性化,保护用户兴趣信号
+     */
+
     @Override
     public List<Video> mergeAndRankRovRecall(RankParam param) {
         Map<String, Double> mergeWeight = this.mergeWeight != null ? this.mergeWeight : new HashMap<>(0);
-        // V569 新增: 整体召回配额倍数, 每一路 recallNum 都乘以这个 scale, Apollo 热调
-        double recallQuotaScale = mergeWeight.getOrDefault("recallQuotaScale", 1.2);
 
         //-------------------融-------------------
         //-------------------合-------------------
@@ -51,42 +101,24 @@ public class RankStrategy4RegionMergeModelV569 extends RankStrategy4RegionMergeM
         Set<Long> setVideo = new HashSet<>();
         setVideo.add(param.getHeadVid());
         List<Video> rovRecallRank = new ArrayList<>();
-        // -------------------5路特殊旧召回------------------
-        RecallUtils.extractOldSpecialRecall(scaledQuota(mergeWeight, "oldSpecialN", (double) param.getSize(), recallQuotaScale), param, setVideo, rovRecallRank);
-        //-------------------return相似召回------------------
-        RecallUtils.extractRecall(scaledQuota(mergeWeight, "v6", 5.0, recallQuotaScale), param, ReturnVideoRecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
-        //-------------------新地域召回------------------
-        RecallUtils.extractRecall(scaledQuota(mergeWeight, "v1", 5.0, recallQuotaScale), param, RegionRealtimeRecallStrategyV1.PUSH_FORM, setVideo, rovRecallRank);
-        //-------------------scene cf rovn------------------
-        RecallUtils.extractRecall(scaledQuota(mergeWeight, "sceneCFRovn", 5.0, recallQuotaScale), param, SceneCFRovnRecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
-        //-------------------scene cf rosn------------------
-        RecallUtils.extractRecall(scaledQuota(mergeWeight, "sceneCFRosn", 5.0, recallQuotaScale), param, SceneCFRosnRecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
-        // -------------------user cate1------------------
-        RecallUtils.extractRecall(scaledQuota(mergeWeight, "cate1RecallN", 5.0, recallQuotaScale), param, UserCate1RecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
-        // -------------------user cate2------------------
-        RecallUtils.extractRecall(scaledQuota(mergeWeight, "cate2RecallN", 5.0, recallQuotaScale), param, UserCate2RecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
-        // -------------------head province cate1------------------
-        RecallUtils.extractRecall(scaledQuota(mergeWeight, "headCate1RecallN", 3.0, recallQuotaScale), param, HeadProvinceCate1RecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
-        // -------------------head province cate2------------------
-        RecallUtils.extractRecall(scaledQuota(mergeWeight, "headCate2RecallN", 3.0, recallQuotaScale), param, HeadProvinceCate2RecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
-        //-------------------head cate2 of rovn------------------
-        RecallUtils.extractRecall(scaledQuota(mergeWeight, "headCate2Rov", 5.0, recallQuotaScale), param, HeadCate2RovRecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
-        //-------------------city rovn------------------
-        RecallUtils.extractRecall(scaledQuota(mergeWeight, "cityRov", 5.0, recallQuotaScale), param, CityRovnRecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
-        //-------------------priori province rovn------------------
-        RecallUtils.extractRecall(scaledQuota(mergeWeight, "prioriProvinceRov", 3.0, recallQuotaScale), param, PrioriProvinceRovnRecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
-        //-------------------priori province str------------------
-        RecallUtils.extractRecall(scaledQuota(mergeWeight, "prioriProvinceStr", 1.0, recallQuotaScale), param, PrioriProvinceStrRecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
-        //-------------------priori province ros------------------
-        RecallUtils.extractRecall(scaledQuota(mergeWeight, "prioriProvinceRos", 1.0, recallQuotaScale), param, PrioriProvinceRosRecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
-        //-------------------return1 cate2 ros------------------
-        RecallUtils.extractRecall(scaledQuota(mergeWeight, "return1Cate2Ros", 5.0, recallQuotaScale), param, Return1Cate2RosRecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
-        //-------------------return1 cate2 str------------------
-        RecallUtils.extractRecall(scaledQuota(mergeWeight, "return1Cate2Str", 5.0, recallQuotaScale), param, Return1Cate2StrRecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
-
-        RecallUtils.extractRecall(scaledQuota(mergeWeight, "yearShareCate1", 5.0, recallQuotaScale), param, YearShareCate1RecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
-        RecallUtils.extractRecall(scaledQuota(mergeWeight, "yearShareCate2", 5.0, recallQuotaScale), param, YearShareCate2RecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
-        RecallUtils.extractRecall(scaledQuota(mergeWeight, "yearReturnCate2", 5.0, recallQuotaScale), param, YearReturnCate2RecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
+
+        // ============================================================
+        // V569 实验:统一粗排分截断 (个性化 / 非个性化 两配额, 动态补足)
+        // 总配额 coarseRankTopN,个性化占 personalRatio。先个性化按上限抢位,
+        // 个性化不足时剩余名额转给非个性化,保证精排算力满载。
+        //
+        // 粗排分 = alg_vid_recommend_exp_feature_20250212.rovn_1h / rovn_24h 平均
+        // ============================================================
+        int totalTopN = mergeWeight.getOrDefault("coarseRankTopN", 80.0).intValue();
+        double personalRatio = mergeWeight.getOrDefault("personalRatio", 0.4);
+        int personalTopN = (int) Math.round(totalTopN * personalRatio);
+        Map<Long, Double> coarseRankMap = fetchCoarseRankScores(param);
+
+        int sizeBeforePersonal = rovRecallRank.size();
+        RecallUtils.extractAllAndTruncateByCoarseRank(personalTopN, param, setVideo, rovRecallRank, coarseRankMap, PERSONAL_RECALL_PUSH_FROMS);
+        int personalActual = rovRecallRank.size() - sizeBeforePersonal;
+        int nonPersonalBudget = totalTopN - personalActual;  // 个性化不足时, 名额转给非个性化
+        RecallUtils.extractAllAndTruncateByCoarseRank(nonPersonalBudget, param, setVideo, rovRecallRank, coarseRankMap, NON_PERSONAL_RECALL_PUSH_FROMS);
 
         // 记录召回源中的视频
         this.rankBeforePostProcessor(rovRecallRank);
@@ -291,6 +323,77 @@ public class RankStrategy4RegionMergeModelV569 extends RankStrategy4RegionMergeM
         return result;
     }
 
+    /**
+     * V569 实验:拉取粗排分(按 vid → score 返回)。
+     *
+     * 数据源:alg_vid_recommend_exp_feature_20250212。
+     * 表里没有现成 rovn 字段,需要从原子字段 (return_n_uv_*, exp_*) 用 plusSmooth 算出来。
+     * 公式 = FeatureV6.oneTypeStatFeature 同口径:rovn = plusSmooth(return_n_uv, exp, plus, 1)
+     * 默认 plus=30 与 FeatureV6.largerSmoothPlus 对齐,AB 对比不会因口径不同污染结论。
+     *
+     * Apollo 可调维度:
+     *   - coarseRovn1hW / coarseRovn24hW:1h 和 24h 的加权(默认 0.5/0.5)
+     *   - coarseRovn1hSmoothPlus / coarseRovn24hSmoothPlus:贝叶斯平滑系数(默认 30/30)
+     *
+     * 缺失自动归一化:单值缺失时剩下的撑起全部权重;两值都缺失则 caller 兜底 RovScore。
+     */
+    private Map<Long, Double> fetchCoarseRankScores(RankParam param) {
+        if (param == null || param.getRecallResult() == null
+                || CollectionUtils.isEmpty(param.getRecallResult().getData())) {
+            return Collections.emptyMap();
+        }
+        Map<String, Double> mergeWeight = this.mergeWeight != null ? this.mergeWeight : Collections.emptyMap();
+        double w1h = mergeWeight.getOrDefault("coarseRovn1hW", 0.5);
+        double w24h = mergeWeight.getOrDefault("coarseRovn24hW", 0.5);
+        double plus1h = mergeWeight.getOrDefault("coarseRovn1hSmoothPlus", 30.0);
+        double plus24h = mergeWeight.getOrDefault("coarseRovn24hSmoothPlus", 30.0);
+        // 只对参与统一截断的 23 路 vid 拉粗排分(跳过流量池 3 路,省 proto + RPC 延迟)
+        List<String> vids = param.getRecallResult().getData().stream()
+                .filter(d -> d != null && CollectionUtils.isNotEmpty(d.getVideos()))
+                .filter(d -> ALL_ROV_PUSH_FROMS.contains(d.getPushFrom()))
+                .flatMap(d -> d.getVideos().stream())
+                .map(v -> String.valueOf(v.getVideoId()))
+                .distinct()
+                .collect(Collectors.toList());
+        if (vids.isEmpty()) return Collections.emptyMap();
+
+        Map<String, Map<String, Map<String, String>>> feats = featureService.getVideoCoarseRankFeature(vids);
+        Map<Long, Double> result = new HashMap<>(vids.size());
+        for (String vid : vids) {
+            Map<String, String> row = feats.getOrDefault(vid, Collections.emptyMap())
+                    .getOrDefault("alg_vid_recommend_exp_feature_20250212", Collections.emptyMap());
+            Double rovn1h = computeRovn(row, "1h", plus1h);
+            Double rovn24h = computeRovn(row, "24h", plus24h);
+            // 加权平均,缺失自动归一化
+            double sumW = (rovn1h != null ? w1h : 0) + (rovn24h != null ? w24h : 0);
+            if (sumW <= 0) continue;
+            double sumWS = (rovn1h != null ? rovn1h * w1h : 0) + (rovn24h != null ? rovn24h * w24h : 0);
+            try {
+                result.put(Long.parseLong(vid), sumWS / sumW);
+            } catch (NumberFormatException ignore) { }
+        }
+        return result;
+    }
+
+    /**
+     * 与 FeatureV6.oneTypeStatFeature 同口径:rovn = plusSmooth(return_n_uv, exp, plus, 1)
+     *
+     * 字段语义(区分 0 vs null):
+     *   - exp 是 period 有效性 anchor:null 或 ≤0 → 整个 period 无效(return null)
+     *   - return_n_uv 缺失视为 0(真实信号"无回访"):rovn=0,参与加权(不会让另一时段兜底)
+     */
+    private static Double computeRovn(Map<String, String> row, String period, double smoothPlus) {
+        Double exp = parseDoubleOrNull(row.get("exp_" + period));
+        if (exp == null || exp <= 0) return null;
+        Double returnNuv = parseDoubleOrNull(row.get("return_n_uv_" + period));
+        return FeatureUtils.plusSmooth(returnNuv != null ? returnNuv : 0, exp, smoothPlus, 1);
+    }
+
+    private static Double parseDoubleOrNull(String s) {
+        if (StringUtils.isBlank(s)) return null;
+        try { return Double.parseDouble(s); } catch (NumberFormatException e) { return null; }
+    }
+
     private UserShareReturnProfile parseUserProfile(Map<String, Map<String, String>> userOriginInfo) {
         if (null != userOriginInfo) {
             Map<String, String> c9 = userOriginInfo.get("alg_recsys_feature_user_share_return_stat");
@@ -460,12 +563,4 @@ public class RankStrategy4RegionMergeModelV569 extends RankStrategy4RegionMergeM
         }
         return newScore;
     }
-
-    /**
-     * V569: 读 Apollo 单路 recallNum, 再乘整体配额倍数 scale, 向上取整。
-     * ceil 保证小值路 (default=1/3) 在 scale=1.2 时也能扩 (1->2, 3->4); scale=1.0 时整数路径下 ceil 跟 cast 等价。
-     */
-    private static int scaledQuota(Map<String, Double> mergeWeight, String key, double defaultVal, double scale) {
-        return (int) Math.ceil(mergeWeight.getOrDefault(key, defaultVal) * scale);
-    }
 }