Prechádzať zdrojové kódy

feat: V566 实验同步成 V564 (粗排分统一截断)

将 V566 从"19 路 extractRecall + supplyType/drivingStrategy 乘数加权"
模式改为 V564 当前的"粗排分统一截断 (个性化 6 + 非个性化 17 = 23 路,
两配额动态补足)"模式, 两实验业务逻辑完全一致, 仅 expCode 不同。

公共基础设施 (RecallUtils.extractAllAndTruncateByCoarseRank /
FeatureService.getVideoCoarseRankFeature) 已在主干, 直接复用。
RecallService 不开 isHit566Exp gate, 跟 V564 一致。

同步顺手删 RecommendService 里 V566 专属的 userSocialRecallInfo
拉取 if 块 (对应的 SocialI2I* 召回策略本就在 RecallService 注释,
是死代码)。空 map fallback 保留, 对其它实验无副作用。

附带 apollo/rank.score.merge.weightv566.json 同步成 V564 内容
(untracked, 不进 PR, 需运维同步到 Apollo 后台)。

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
yangxiaohui 1 týždeň pred
rodič
commit
9a4778f816

+ 0 - 3
recommend-server-service/src/main/java/com/tzld/piaoquan/recommend/server/service/RecommendService.java

@@ -570,9 +570,6 @@ public class RecommendService {
         Map<String, Map<String, String>> behaviorVideos = featureService.getBehaviorVideos(vid, userProfile);
 
         Map<String, String> userSocialRecallInfo = new HashMap<>();
-        if (CollectionUtils.isNotEmpty(param.getAbExpCodes()) && param.getAbExpCodes().contains("566")) {
-            userSocialRecallInfo = featureService.getUserSocialRecallInfo(param.getMid());
-        }
 
         Map<String, Map<String, String>> unionIdFeature = featureService.getUnionIdFeature(param.getUnionId());
         Map<String, String> userNetworkSeqFeature = unionIdFeature.getOrDefault("alg_user_network_seq_feature", new HashMap<>());

+ 140 - 82
recommend-server-service/src/main/java/com/tzld/piaoquan/recommend/server/service/rank/strategy/RankStrategy4RegionMergeModelV566.java

@@ -29,15 +29,6 @@ import java.util.stream.Stream;
 
 @Service
 @Slf4j
-/**
- * V566 实验:基于 V563,按"供给类型(supply_type) + 驱动策略(driving_strategy)"对 item 加权/降权。
- * 数据来源: alg_vid_feature_basic_info.feature JSON。
- * 规则(硬编码,见 {@link #getSupplyWeight}):
- *   - supply_type ∈ {UGC, 垂直spider}                          → 0.8
- *   - supply_type = 自动AGC AND driving_strategy = 当下供需gap → 1.5
- *   - supply_type = 人工AGC AND driving_strategy ∈ {人工历史需求, 当下供需gap} → 1.2
- *   - 其他                                                       → 1.0
- */
 public class RankStrategy4RegionMergeModelV566 extends RankStrategy4RegionMergeModelBasic {
     @ApolloJsonValue("${rank.score.merge.weightv566:}")
     private Map<String, Double> mergeWeight;
@@ -45,9 +36,57 @@ public class RankStrategy4RegionMergeModelV566 extends RankStrategy4RegionMergeM
     @Autowired
     private FeatureService featureService;
 
-    // V566 供给加权规则用到的常量
-    private static final Set<String> SUPPLY_DEMOTE_TYPES = new HashSet<>(Arrays.asList("UGC", "垂直spider"));
-    private static final Set<String> AGC_BOOST_DRIVINGS = new HashSet<>(Arrays.asList("人工历史需求", "当下供需gap"));
+    /**
+     * V566 个性化召回白名单 (6 路):召回 key 含 mid/uid,依赖该用户行为信号。
+     * 注:YearReturnCate2 因线上效果不佳, 2026-06-04 起移到非个性化白名单。
+     */
+    private static final Set<String> PERSONAL_RECALL_PUSH_FROMS = new HashSet<>(Arrays.asList(
+            UserCate1RecallStrategy.PUSH_FORM,
+            UserCate2RecallStrategy.PUSH_FORM,
+            Return1Cate2RosRecallStrategy.PUSH_FORM,
+            Return1Cate2StrRecallStrategy.PUSH_FORM,
+            YearShareCate1RecallStrategy.PUSH_FROM,
+            YearShareCate2RecallStrategy.PUSH_FROM
+    ));
+
+    /**
+     * V566 非个性化召回白名单 (17 路):只依赖 headVid + 地域/品类/相似度(vid-vid CF 也归此类)。
+     * 含 5 路旧地域、新地域、城市、head province/cate、先验省份、return 相似、scene CF、YearReturnCate2。
+     */
+    private static final Set<String> NON_PERSONAL_RECALL_PUSH_FROMS = new HashSet<>(Arrays.asList(
+            RegionHRecallStrategy.PUSH_FORM,
+            RegionHDupRecallStrategy.PUSH_FORM,
+            Region24HRecallStrategy.PUSH_FORM,
+            RegionRelative24HRecallStrategy.PUSH_FORM,
+            RegionRelative24HDupRecallStrategy.PUSH_FORM,
+            RegionRealtimeRecallStrategyV1.PUSH_FORM,
+            CityRovnRecallStrategy.PUSH_FROM,
+            HeadProvinceCate1RecallStrategy.PUSH_FORM,
+            HeadProvinceCate2RecallStrategy.PUSH_FORM,
+            HeadCate2RovRecallStrategy.PUSH_FROM,
+            PrioriProvinceRovnRecallStrategy.PUSH_FROM,
+            PrioriProvinceStrRecallStrategy.PUSH_FROM,
+            PrioriProvinceRosRecallStrategy.PUSH_FROM,
+            ReturnVideoRecallStrategy.PUSH_FORM,
+            SceneCFRovnRecallStrategy.PUSH_FORM,
+            SceneCFRosnRecallStrategy.PUSH_FORM,
+            YearReturnCate2RecallStrategy.PUSH_FROM
+    ));
+
+    /** PERSONAL ∪ NON_PERSONAL = 23 路。用于 fetchCoarseRankScores 跳过流量池等不参与截断的 vid。 */
+    private static final Set<String> ALL_ROV_PUSH_FROMS;
+    static {
+        Set<String> all = new HashSet<>(PERSONAL_RECALL_PUSH_FROMS);
+        all.addAll(NON_PERSONAL_RECALL_PUSH_FROMS);
+        ALL_ROV_PUSH_FROMS = Collections.unmodifiableSet(all);
+    }
+
+    /*
+     * 设计要点:
+     *   - fail-closed 白名单:RecallService 未来加新路不会自动进 V566,避免污染 vs V568 AB 对比
+     *   - 流量池 3 路 (flow_pool / quick_flow_pool / recall_strategy_hotspot) 不在任何名单——独立通道
+     *   - 调用顺序 = 个性化优先:同 vid 双类命中时归个性化,保护用户兴趣信号
+     */
 
     @Override
     public List<Video> mergeAndRankRovRecall(RankParam param) {
@@ -62,42 +101,24 @@ public class RankStrategy4RegionMergeModelV566 extends RankStrategy4RegionMergeM
         Set<Long> setVideo = new HashSet<>();
         setVideo.add(param.getHeadVid());
         List<Video> rovRecallRank = new ArrayList<>();
-        // -------------------5路特殊旧召回------------------
-        RecallUtils.extractOldSpecialRecall(mergeWeight.getOrDefault("oldSpecialN", (double) param.getSize()).intValue(), param, setVideo, rovRecallRank);
-        //-------------------return相似召回------------------
-        RecallUtils.extractRecall(mergeWeight.getOrDefault("v6", 5.0).intValue(), param, ReturnVideoRecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
-        //-------------------新地域召回------------------
-        RecallUtils.extractRecall(mergeWeight.getOrDefault("v1", 5.0).intValue(), param, RegionRealtimeRecallStrategyV1.PUSH_FORM, setVideo, rovRecallRank);
-        //-------------------scene cf rovn------------------
-        RecallUtils.extractRecall(mergeWeight.getOrDefault("sceneCFRovn", 5.0).intValue(), param, SceneCFRovnRecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
-        //-------------------scene cf rosn------------------
-        RecallUtils.extractRecall(mergeWeight.getOrDefault("sceneCFRosn", 5.0).intValue(), param, SceneCFRosnRecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
-        // -------------------user cate1------------------
-        RecallUtils.extractRecall(mergeWeight.getOrDefault("cate1RecallN", 5.0).intValue(), param, UserCate1RecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
-        // -------------------user cate2------------------
-        RecallUtils.extractRecall(mergeWeight.getOrDefault("cate2RecallN", 5.0).intValue(), param, UserCate2RecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
-        // -------------------head province cate1------------------
-        RecallUtils.extractRecall(mergeWeight.getOrDefault("headCate1RecallN", 3.0).intValue(), param, HeadProvinceCate1RecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
-        // -------------------head province cate2------------------
-        RecallUtils.extractRecall(mergeWeight.getOrDefault("headCate2RecallN", 3.0).intValue(), param, HeadProvinceCate2RecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
-        //-------------------head cate2 of rovn------------------
-        RecallUtils.extractRecall(mergeWeight.getOrDefault("headCate2Rov", 5.0).intValue(), param, HeadCate2RovRecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
-        //-------------------city rovn------------------
-        RecallUtils.extractRecall(mergeWeight.getOrDefault("cityRov", 5.0).intValue(), param, CityRovnRecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
-        //-------------------priori province rovn------------------
-        RecallUtils.extractRecall(mergeWeight.getOrDefault("prioriProvinceRov", 3.0).intValue(), param, PrioriProvinceRovnRecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
-        //-------------------priori province str------------------
-        RecallUtils.extractRecall(mergeWeight.getOrDefault("prioriProvinceStr", 1.0).intValue(), param, PrioriProvinceStrRecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
-        //-------------------priori province ros------------------
-        RecallUtils.extractRecall(mergeWeight.getOrDefault("prioriProvinceRos", 1.0).intValue(), param, PrioriProvinceRosRecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
-        //-------------------return1 cate2 ros------------------
-        RecallUtils.extractRecall(mergeWeight.getOrDefault("return1Cate2Ros", 5.0).intValue(), param, Return1Cate2RosRecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
-        //-------------------return1 cate2 str------------------
-        RecallUtils.extractRecall(mergeWeight.getOrDefault("return1Cate2Str", 5.0).intValue(), param, Return1Cate2StrRecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
-
-        RecallUtils.extractRecall(mergeWeight.getOrDefault("yearShareCate1", 5.0).intValue(), param, YearShareCate1RecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
-        RecallUtils.extractRecall(mergeWeight.getOrDefault("yearShareCate2", 5.0).intValue(), param, YearShareCate2RecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
-        RecallUtils.extractRecall(mergeWeight.getOrDefault("yearReturnCate2", 5.0).intValue(), param, YearReturnCate2RecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
+
+        // ============================================================
+        // V566 实验:统一粗排分截断 (个性化 / 非个性化 两配额, 动态补足)
+        // 总配额 coarseRankTopN,个性化占 personalRatio。先个性化按上限抢位,
+        // 个性化不足时剩余名额转给非个性化,保证精排算力满载。
+        //
+        // 粗排分 = alg_vid_recommend_exp_feature_20250212.rovn_1h / rovn_24h 平均
+        // ============================================================
+        int totalTopN = mergeWeight.getOrDefault("coarseRankTopN", 80.0).intValue();
+        double personalRatio = mergeWeight.getOrDefault("personalRatio", 0.4);
+        int personalTopN = (int) Math.round(totalTopN * personalRatio);
+        Map<Long, Double> coarseRankMap = fetchCoarseRankScores(param);
+
+        int sizeBeforePersonal = rovRecallRank.size();
+        RecallUtils.extractAllAndTruncateByCoarseRank(personalTopN, param, setVideo, rovRecallRank, coarseRankMap, PERSONAL_RECALL_PUSH_FROMS);
+        int personalActual = rovRecallRank.size() - sizeBeforePersonal;
+        int nonPersonalBudget = totalTopN - personalActual;  // 个性化不足时, 名额转给非个性化
+        RecallUtils.extractAllAndTruncateByCoarseRank(nonPersonalBudget, param, setVideo, rovRecallRank, coarseRankMap, NON_PERSONAL_RECALL_PUSH_FROMS);
 
         // 记录召回源中的视频
         this.rankBeforePostProcessor(rovRecallRank);
@@ -259,24 +280,7 @@ public class RankStrategy4RegionMergeModelV566 extends RankStrategy4RegionMergeM
 
             score = fmRov * (rosAdd + rosW * newNorDNNScore) * (vorAdd + vorW * vor) + c1RovnScore + b0StrScore + b0RorScore + cnRovnScore + dnRovnScore;
 
-            // ============== V566: 按供给类型 + 驱动策略加权 ==============
-            // 数据来源: alg_vid_feature_basic_info.feature.{supply_type, driving_strategy}
-            // 规则硬编码在 getSupplyWeight() 中
             Video video = item.getVideo();
-            Map<String, String> vidBaseInfo = videoBaseInfoMap
-                    .getOrDefault(String.valueOf(item.getVideoId()), Collections.emptyMap())
-                    .getOrDefault("alg_vid_feature_basic_info", Collections.emptyMap());
-            String supplyType = vidBaseInfo.getOrDefault("supply_type", "unknown");
-            String drivingStrategy = vidBaseInfo.getOrDefault("driving_strategy", "unknown");
-            String sceneDimension = vidBaseInfo.getOrDefault("scene_dimension", "unknown");
-            double supplyTypeWeightValue = getSupplyWeight(supplyType, drivingStrategy);
-            item.getScoresMap().put("supplyType_" + supplyType, 1.0);
-            item.getScoresMap().put("drivingStrategy_" + drivingStrategy, 1.0);
-            item.getScoresMap().put("sceneDimension_" + sceneDimension, 1.0);
-            item.getScoresMap().put("supplyTypeWeight", supplyTypeWeightValue);
-            score = score * supplyTypeWeightValue;
-            // ============== V566 加权块结束 ==============
-
             video.setScore(score);
             video.setSortScore(score);
             video.setScoresMap(item.getScoresMap());
@@ -319,6 +323,77 @@ public class RankStrategy4RegionMergeModelV566 extends RankStrategy4RegionMergeM
         return result;
     }
 
+    /**
+     * V566 实验:拉取粗排分(按 vid → score 返回)。
+     *
+     * 数据源:alg_vid_recommend_exp_feature_20250212。
+     * 表里没有现成 rovn 字段,需要从原子字段 (return_n_uv_*, exp_*) 用 plusSmooth 算出来。
+     * 公式 = FeatureV6.oneTypeStatFeature 同口径:rovn = plusSmooth(return_n_uv, exp, plus, 1)
+     * 默认 plus=30 与 FeatureV6.largerSmoothPlus 对齐,AB 对比不会因口径不同污染结论。
+     *
+     * Apollo 可调维度:
+     *   - coarseRovn1hW / coarseRovn24hW:1h 和 24h 的加权(默认 0.5/0.5)
+     *   - coarseRovn1hSmoothPlus / coarseRovn24hSmoothPlus:贝叶斯平滑系数(默认 30/30)
+     *
+     * 缺失自动归一化:单值缺失时剩下的撑起全部权重;两值都缺失则 caller 兜底 RovScore。
+     */
+    private Map<Long, Double> fetchCoarseRankScores(RankParam param) {
+        if (param == null || param.getRecallResult() == null
+                || CollectionUtils.isEmpty(param.getRecallResult().getData())) {
+            return Collections.emptyMap();
+        }
+        Map<String, Double> mergeWeight = this.mergeWeight != null ? this.mergeWeight : Collections.emptyMap();
+        double w1h = mergeWeight.getOrDefault("coarseRovn1hW", 0.5);
+        double w24h = mergeWeight.getOrDefault("coarseRovn24hW", 0.5);
+        double plus1h = mergeWeight.getOrDefault("coarseRovn1hSmoothPlus", 30.0);
+        double plus24h = mergeWeight.getOrDefault("coarseRovn24hSmoothPlus", 30.0);
+        // 只对参与统一截断的 23 路 vid 拉粗排分(跳过流量池 3 路,省 proto + RPC 延迟)
+        List<String> vids = param.getRecallResult().getData().stream()
+                .filter(d -> d != null && CollectionUtils.isNotEmpty(d.getVideos()))
+                .filter(d -> ALL_ROV_PUSH_FROMS.contains(d.getPushFrom()))
+                .flatMap(d -> d.getVideos().stream())
+                .map(v -> String.valueOf(v.getVideoId()))
+                .distinct()
+                .collect(Collectors.toList());
+        if (vids.isEmpty()) return Collections.emptyMap();
+
+        Map<String, Map<String, Map<String, String>>> feats = featureService.getVideoCoarseRankFeature(vids);
+        Map<Long, Double> result = new HashMap<>(vids.size());
+        for (String vid : vids) {
+            Map<String, String> row = feats.getOrDefault(vid, Collections.emptyMap())
+                    .getOrDefault("alg_vid_recommend_exp_feature_20250212", Collections.emptyMap());
+            Double rovn1h = computeRovn(row, "1h", plus1h);
+            Double rovn24h = computeRovn(row, "24h", plus24h);
+            // 加权平均,缺失自动归一化
+            double sumW = (rovn1h != null ? w1h : 0) + (rovn24h != null ? w24h : 0);
+            if (sumW <= 0) continue;
+            double sumWS = (rovn1h != null ? rovn1h * w1h : 0) + (rovn24h != null ? rovn24h * w24h : 0);
+            try {
+                result.put(Long.parseLong(vid), sumWS / sumW);
+            } catch (NumberFormatException ignore) { }
+        }
+        return result;
+    }
+
+    /**
+     * 与 FeatureV6.oneTypeStatFeature 同口径:rovn = plusSmooth(return_n_uv, exp, plus, 1)
+     *
+     * 字段语义(区分 0 vs null):
+     *   - exp 是 period 有效性 anchor:null 或 ≤0 → 整个 period 无效(return null)
+     *   - return_n_uv 缺失视为 0(真实信号"无回访"):rovn=0,参与加权(不会让另一时段兜底)
+     */
+    private static Double computeRovn(Map<String, String> row, String period, double smoothPlus) {
+        Double exp = parseDoubleOrNull(row.get("exp_" + period));
+        if (exp == null || exp <= 0) return null;
+        Double returnNuv = parseDoubleOrNull(row.get("return_n_uv_" + period));
+        return FeatureUtils.plusSmooth(returnNuv != null ? returnNuv : 0, exp, smoothPlus, 1);
+    }
+
+    private static Double parseDoubleOrNull(String s) {
+        if (StringUtils.isBlank(s)) return null;
+        try { return Double.parseDouble(s); } catch (NumberFormatException e) { return null; }
+    }
+
     private UserShareReturnProfile parseUserProfile(Map<String, Map<String, String>> userOriginInfo) {
         if (null != userOriginInfo) {
             Map<String, String> c9 = userOriginInfo.get("alg_recsys_feature_user_share_return_stat");
@@ -488,21 +563,4 @@ public class RankStrategy4RegionMergeModelV566 extends RankStrategy4RegionMergeM
         }
         return newScore;
     }
-
-    /**
-     * V566 供给加权规则(硬编码)。
-     * 规则顺序固定:命中即返回,按业务优先级从高到低。
-     */
-    private double getSupplyWeight(String supplyType, String drivingStrategy) {
-        if (SUPPLY_DEMOTE_TYPES.contains(supplyType)) {
-            return 0.8;
-        }
-        if ("自动AGC".equals(supplyType) && "当下供需gap".equals(drivingStrategy)) {
-            return 1.5;
-        }
-        if ("人工AGC".equals(supplyType) && AGC_BOOST_DRIVINGS.contains(drivingStrategy)) {
-            return 1.2;
-        }
-        return 1.0;
-    }
 }