|
|
@@ -36,6 +36,58 @@ public class RankStrategy4RegionMergeModelV562 extends RankStrategy4RegionMergeM
|
|
|
@Autowired
|
|
|
private FeatureService featureService;
|
|
|
|
|
|
+ /**
|
|
|
+ * V562 个性化召回白名单 (6 路):召回 key 含 mid/uid,依赖该用户行为信号。
|
|
|
+ * 注:YearReturnCate2 因线上效果不佳, 2026-06-04 起移到非个性化白名单。
|
|
|
+ */
|
|
|
+ private static final Set<String> PERSONAL_RECALL_PUSH_FROMS = new HashSet<>(Arrays.asList(
|
|
|
+ UserCate1RecallStrategy.PUSH_FORM,
|
|
|
+ UserCate2RecallStrategy.PUSH_FORM,
|
|
|
+ Return1Cate2RosRecallStrategy.PUSH_FORM,
|
|
|
+ Return1Cate2StrRecallStrategy.PUSH_FORM,
|
|
|
+ YearShareCate1RecallStrategy.PUSH_FROM,
|
|
|
+ YearShareCate2RecallStrategy.PUSH_FROM
|
|
|
+ ));
|
|
|
+
|
|
|
+ /**
|
|
|
+ * V562 非个性化召回白名单 (17 路):只依赖 headVid + 地域/品类/相似度(vid-vid CF 也归此类)。
|
|
|
+ * 含 5 路旧地域、新地域、城市、head province/cate、先验省份、return 相似、scene CF、YearReturnCate2。
|
|
|
+ */
|
|
|
+ private static final Set<String> NON_PERSONAL_RECALL_PUSH_FROMS = new HashSet<>(Arrays.asList(
|
|
|
+ RegionHRecallStrategy.PUSH_FORM,
|
|
|
+ RegionHDupRecallStrategy.PUSH_FORM,
|
|
|
+ Region24HRecallStrategy.PUSH_FORM,
|
|
|
+ RegionRelative24HRecallStrategy.PUSH_FORM,
|
|
|
+ RegionRelative24HDupRecallStrategy.PUSH_FORM,
|
|
|
+ RegionRealtimeRecallStrategyV1.PUSH_FORM,
|
|
|
+ CityRovnRecallStrategy.PUSH_FROM,
|
|
|
+ HeadProvinceCate1RecallStrategy.PUSH_FORM,
|
|
|
+ HeadProvinceCate2RecallStrategy.PUSH_FORM,
|
|
|
+ HeadCate2RovRecallStrategy.PUSH_FROM,
|
|
|
+ PrioriProvinceRovnRecallStrategy.PUSH_FROM,
|
|
|
+ PrioriProvinceStrRecallStrategy.PUSH_FROM,
|
|
|
+ PrioriProvinceRosRecallStrategy.PUSH_FROM,
|
|
|
+ ReturnVideoRecallStrategy.PUSH_FORM,
|
|
|
+ SceneCFRovnRecallStrategy.PUSH_FORM,
|
|
|
+ SceneCFRosnRecallStrategy.PUSH_FORM,
|
|
|
+ YearReturnCate2RecallStrategy.PUSH_FROM
|
|
|
+ ));
|
|
|
+
|
|
|
+ /** PERSONAL ∪ NON_PERSONAL = 23 路。用于 fetchCoarseRankScores 跳过流量池等不参与截断的 vid。 */
|
|
|
+ private static final Set<String> ALL_ROV_PUSH_FROMS;
|
|
|
+ static {
|
|
|
+ Set<String> all = new HashSet<>(PERSONAL_RECALL_PUSH_FROMS);
|
|
|
+ all.addAll(NON_PERSONAL_RECALL_PUSH_FROMS);
|
|
|
+ ALL_ROV_PUSH_FROMS = Collections.unmodifiableSet(all);
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * 设计要点:
|
|
|
+ * - fail-closed 白名单:RecallService 未来加新路不会自动进 V562,避免污染 vs V568 AB 对比
|
|
|
+ * - 流量池 3 路 (flow_pool / quick_flow_pool / recall_strategy_hotspot) 不在任何名单——独立通道
|
|
|
+ * - 调用顺序 = 个性化优先:同 vid 双类命中时归个性化,保护用户兴趣信号
|
|
|
+ */
|
|
|
+
|
|
|
@Override
|
|
|
public List<Video> mergeAndRankRovRecall(RankParam param) {
|
|
|
Map<String, Double> mergeWeight = this.mergeWeight != null ? this.mergeWeight : new HashMap<>(0);
|
|
|
@@ -49,42 +101,30 @@ public class RankStrategy4RegionMergeModelV562 extends RankStrategy4RegionMergeM
|
|
|
Set<Long> setVideo = new HashSet<>();
|
|
|
setVideo.add(param.getHeadVid());
|
|
|
List<Video> rovRecallRank = new ArrayList<>();
|
|
|
- // -------------------5路特殊旧召回------------------
|
|
|
- RecallUtils.extractOldSpecialRecall(mergeWeight.getOrDefault("oldSpecialN", (double) param.getSize()).intValue(), param, setVideo, rovRecallRank);
|
|
|
- //-------------------return相似召回------------------
|
|
|
- RecallUtils.extractRecall(mergeWeight.getOrDefault("v6", 5.0).intValue(), param, ReturnVideoRecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
|
|
|
- //-------------------新地域召回 (V562: all_rov, V568 base 用 V1)------------------
|
|
|
- RecallUtils.extractRecall(mergeWeight.getOrDefault("v1", 5.0).intValue(), param, RegionRealtimeRecallStrategyV1AllRov.PUSH_FROM, setVideo, rovRecallRank);
|
|
|
- //-------------------scene cf rovn------------------
|
|
|
- RecallUtils.extractRecall(mergeWeight.getOrDefault("sceneCFRovn", 5.0).intValue(), param, SceneCFRovnRecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
|
|
|
- //-------------------scene cf rosn------------------
|
|
|
- RecallUtils.extractRecall(mergeWeight.getOrDefault("sceneCFRosn", 5.0).intValue(), param, SceneCFRosnRecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
|
|
|
- // -------------------user cate1------------------
|
|
|
- RecallUtils.extractRecall(mergeWeight.getOrDefault("cate1RecallN", 5.0).intValue(), param, UserCate1RecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
|
|
|
- // -------------------user cate2------------------
|
|
|
- RecallUtils.extractRecall(mergeWeight.getOrDefault("cate2RecallN", 5.0).intValue(), param, UserCate2RecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
|
|
|
- // -------------------head province cate1------------------
|
|
|
- RecallUtils.extractRecall(mergeWeight.getOrDefault("headCate1RecallN", 3.0).intValue(), param, HeadProvinceCate1RecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
|
|
|
- // -------------------head province cate2------------------
|
|
|
- RecallUtils.extractRecall(mergeWeight.getOrDefault("headCate2RecallN", 3.0).intValue(), param, HeadProvinceCate2RecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
|
|
|
- //-------------------head cate2 of rovn------------------
|
|
|
- RecallUtils.extractRecall(mergeWeight.getOrDefault("headCate2Rov", 5.0).intValue(), param, HeadCate2RovRecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
|
|
|
- //-------------------city rovn (V562: all_rov, V568 base 用 v1)------------------
|
|
|
- RecallUtils.extractRecall(mergeWeight.getOrDefault("cityRov", 5.0).intValue(), param, CityRovnAllRovRecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
|
|
|
- //-------------------priori province rovn------------------
|
|
|
- RecallUtils.extractRecall(mergeWeight.getOrDefault("prioriProvinceRov", 3.0).intValue(), param, PrioriProvinceRovnRecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
|
|
|
- //-------------------priori province str------------------
|
|
|
- RecallUtils.extractRecall(mergeWeight.getOrDefault("prioriProvinceStr", 1.0).intValue(), param, PrioriProvinceStrRecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
|
|
|
- //-------------------priori province ros------------------
|
|
|
- RecallUtils.extractRecall(mergeWeight.getOrDefault("prioriProvinceRos", 1.0).intValue(), param, PrioriProvinceRosRecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
|
|
|
- //-------------------return1 cate2 ros------------------
|
|
|
- RecallUtils.extractRecall(mergeWeight.getOrDefault("return1Cate2Ros", 5.0).intValue(), param, Return1Cate2RosRecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
|
|
|
- //-------------------return1 cate2 str------------------
|
|
|
- RecallUtils.extractRecall(mergeWeight.getOrDefault("return1Cate2Str", 5.0).intValue(), param, Return1Cate2StrRecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
|
|
|
-
|
|
|
- RecallUtils.extractRecall(mergeWeight.getOrDefault("yearShareCate1", 5.0).intValue(), param, YearShareCate1RecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
|
|
|
- RecallUtils.extractRecall(mergeWeight.getOrDefault("yearShareCate2", 5.0).intValue(), param, YearShareCate2RecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
|
|
|
- RecallUtils.extractRecall(mergeWeight.getOrDefault("yearReturnCate2", 5.0).intValue(), param, YearReturnCate2RecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
|
|
|
+
|
|
|
+ // ============================================================
|
|
|
+ // V562 实验:统一粗排分截断 (个性化 / 非个性化 两配额, 动态补足)
|
|
|
+ // 总配额 coarseRankTopN,个性化占 personalRatio。先个性化按上限抢位,
|
|
|
+ // 个性化不足时剩余名额转给非个性化,保证精排算力满载。
|
|
|
+ //
|
|
|
+ // 粗排分 = alg_vid_recommend_exp_feature_20250212.rovn_1h / rovn_24h 平均
|
|
|
+ // ============================================================
|
|
|
+ int totalTopN = mergeWeight.getOrDefault("coarseRankTopN", 80.0).intValue();
|
|
|
+ double personalRatio = mergeWeight.getOrDefault("personalRatio", 0.4);
|
|
|
+ int personalTopN = (int) Math.round(totalTopN * personalRatio);
|
|
|
+ Map<Long, Double> coarseRankMap = fetchCoarseRankScores(param);
|
|
|
+
|
|
|
+ int personalCandidates = RecallUtils.countDistinctCandidates(param, setVideo, PERSONAL_RECALL_PUSH_FROMS);
|
|
|
+ int sizeBeforePersonal = rovRecallRank.size();
|
|
|
+ RecallUtils.extractAllAndTruncateByCoarseRank(personalTopN, param, setVideo, rovRecallRank, coarseRankMap, PERSONAL_RECALL_PUSH_FROMS);
|
|
|
+ int personalActual = rovRecallRank.size() - sizeBeforePersonal;
|
|
|
+ int nonPersonalBudget = totalTopN - personalActual; // 个性化不足时, 名额转给非个性化
|
|
|
+ int nonPersonalCandidates = RecallUtils.countDistinctCandidates(param, setVideo, NON_PERSONAL_RECALL_PUSH_FROMS);
|
|
|
+ int sizeBeforeNonPersonal = rovRecallRank.size();
|
|
|
+ RecallUtils.extractAllAndTruncateByCoarseRank(nonPersonalBudget, param, setVideo, rovRecallRank, coarseRankMap, NON_PERSONAL_RECALL_PUSH_FROMS);
|
|
|
+ int nonPersonalActual = rovRecallRank.size() - sizeBeforeNonPersonal;
|
|
|
+ log.info("coarse_rank_summary exp=562 quota={} pc={} ps={} nc={} ns={}",
|
|
|
+ totalTopN, personalCandidates, personalActual, nonPersonalCandidates, nonPersonalActual);
|
|
|
|
|
|
// 记录召回源中的视频
|
|
|
this.rankBeforePostProcessor(rovRecallRank);
|
|
|
@@ -289,6 +329,77 @@ public class RankStrategy4RegionMergeModelV562 extends RankStrategy4RegionMergeM
|
|
|
return result;
|
|
|
}
|
|
|
|
|
|
+ /**
|
|
|
+ * V562 实验:拉取粗排分(按 vid → score 返回)。
|
|
|
+ *
|
|
|
+ * 数据源:alg_vid_recommend_exp_feature_20250212。
|
|
|
+ * 表里没有现成 rovn 字段,需要从原子字段 (return_n_uv_*, exp_*) 用 plusSmooth 算出来。
|
|
|
+ * 公式 = FeatureV6.oneTypeStatFeature 同口径:rovn = plusSmooth(return_n_uv, exp, plus, 1)
|
|
|
+ * 默认 plus=30 与 FeatureV6.largerSmoothPlus 对齐,AB 对比不会因口径不同污染结论。
|
|
|
+ *
|
|
|
+ * Apollo 可调维度:
|
|
|
+ * - coarseRovn1hW / coarseRovn24hW:1h 和 24h 的加权(默认 0.5/0.5)
|
|
|
+ * - coarseRovn1hSmoothPlus / coarseRovn24hSmoothPlus:贝叶斯平滑系数(默认 30/30)
|
|
|
+ *
|
|
|
+ * 缺失自动归一化:单值缺失时剩下的撑起全部权重;两值都缺失则 caller 兜底 RovScore。
|
|
|
+ */
|
|
|
+ private Map<Long, Double> fetchCoarseRankScores(RankParam param) {
|
|
|
+ if (param == null || param.getRecallResult() == null
|
|
|
+ || CollectionUtils.isEmpty(param.getRecallResult().getData())) {
|
|
|
+ return Collections.emptyMap();
|
|
|
+ }
|
|
|
+ Map<String, Double> mergeWeight = this.mergeWeight != null ? this.mergeWeight : Collections.emptyMap();
|
|
|
+ double w1h = mergeWeight.getOrDefault("coarseRovn1hW", 0.5);
|
|
|
+ double w24h = mergeWeight.getOrDefault("coarseRovn24hW", 0.5);
|
|
|
+ double plus1h = mergeWeight.getOrDefault("coarseRovn1hSmoothPlus", 30.0);
|
|
|
+ double plus24h = mergeWeight.getOrDefault("coarseRovn24hSmoothPlus", 30.0);
|
|
|
+ // 只对参与统一截断的 23 路 vid 拉粗排分(跳过流量池 3 路,省 proto + RPC 延迟)
|
|
|
+ List<String> vids = param.getRecallResult().getData().stream()
|
|
|
+ .filter(d -> d != null && CollectionUtils.isNotEmpty(d.getVideos()))
|
|
|
+ .filter(d -> ALL_ROV_PUSH_FROMS.contains(d.getPushFrom()))
|
|
|
+ .flatMap(d -> d.getVideos().stream())
|
|
|
+ .map(v -> String.valueOf(v.getVideoId()))
|
|
|
+ .distinct()
|
|
|
+ .collect(Collectors.toList());
|
|
|
+ if (vids.isEmpty()) return Collections.emptyMap();
|
|
|
+
|
|
|
+ Map<String, Map<String, Map<String, String>>> feats = featureService.getVideoCoarseRankFeature(vids);
|
|
|
+ Map<Long, Double> result = new HashMap<>(vids.size());
|
|
|
+ for (String vid : vids) {
|
|
|
+ Map<String, String> row = feats.getOrDefault(vid, Collections.emptyMap())
|
|
|
+ .getOrDefault("alg_vid_recommend_exp_feature_20250212", Collections.emptyMap());
|
|
|
+ Double rovn1h = computeRovn(row, "1h", plus1h);
|
|
|
+ Double rovn24h = computeRovn(row, "24h", plus24h);
|
|
|
+ // 加权平均,缺失自动归一化
|
|
|
+ double sumW = (rovn1h != null ? w1h : 0) + (rovn24h != null ? w24h : 0);
|
|
|
+ if (sumW <= 0) continue;
|
|
|
+ double sumWS = (rovn1h != null ? rovn1h * w1h : 0) + (rovn24h != null ? rovn24h * w24h : 0);
|
|
|
+ try {
|
|
|
+ result.put(Long.parseLong(vid), sumWS / sumW);
|
|
|
+ } catch (NumberFormatException ignore) { }
|
|
|
+ }
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 与 FeatureV6.oneTypeStatFeature 同口径:rovn = plusSmooth(return_n_uv, exp, plus, 1)
|
|
|
+ *
|
|
|
+ * 字段语义(区分 0 vs null):
|
|
|
+ * - exp 是 period 有效性 anchor:null 或 ≤0 → 整个 period 无效(return null)
|
|
|
+ * - return_n_uv 缺失视为 0(真实信号"无回访"):rovn=0,参与加权(不会让另一时段兜底)
|
|
|
+ */
|
|
|
+ private static Double computeRovn(Map<String, String> row, String period, double smoothPlus) {
|
|
|
+ Double exp = parseDoubleOrNull(row.get("exp_" + period));
|
|
|
+ if (exp == null || exp <= 0) return null;
|
|
|
+ Double returnNuv = parseDoubleOrNull(row.get("return_n_uv_" + period));
|
|
|
+ return FeatureUtils.plusSmooth(returnNuv != null ? returnNuv : 0, exp, smoothPlus, 1);
|
|
|
+ }
|
|
|
+
|
|
|
+ private static Double parseDoubleOrNull(String s) {
|
|
|
+ if (StringUtils.isBlank(s)) return null;
|
|
|
+ try { return Double.parseDouble(s); } catch (NumberFormatException e) { return null; }
|
|
|
+ }
|
|
|
+
|
|
|
private UserShareReturnProfile parseUserProfile(Map<String, Map<String, String>> userOriginInfo) {
|
|
|
if (null != userOriginInfo) {
|
|
|
Map<String, String> c9 = userOriginInfo.get("alg_recsys_feature_user_share_return_stat");
|