|
|
@@ -7,7 +7,6 @@ import com.tzld.piaoquan.recommend.server.common.base.RankItem;
|
|
|
import com.tzld.piaoquan.recommend.server.model.MachineInfo;
|
|
|
import com.tzld.piaoquan.recommend.server.model.Video;
|
|
|
import com.tzld.piaoquan.recommend.server.service.FeatureService;
|
|
|
-import com.tzld.piaoquan.recommend.server.service.funnel.ColdStartAction;
|
|
|
import com.tzld.piaoquan.recommend.server.service.funnel.FunnelContext;
|
|
|
import com.tzld.piaoquan.recommend.server.service.rank.RankParam;
|
|
|
import com.tzld.piaoquan.recommend.server.service.rank.RankResult;
|
|
|
@@ -31,20 +30,6 @@ import java.util.concurrent.TimeUnit;
|
|
|
import java.util.stream.Collectors;
|
|
|
import java.util.stream.Stream;
|
|
|
|
|
|
-/**
|
|
|
- * V536 实验(2026-05-29 复写):基于 V568 召回 + DNN 打分,fusion 只保留"流量池相关 + 兜底相关"
|
|
|
- *
|
|
|
- * 与 V568 唯一差异在 fusion 阶段(mergeAndSort):
|
|
|
- * - 保留:rov 空兜底 + 流量池按比例强插(topK 头部锁 + flowPoolP/newFlowPoolSelectRate 概率门 + 一侧用光兜底回填)
|
|
|
- * - 删除:标签 filter / rov boost / 强插 / 品类降权 / 节日降权 / 密度控制(Basic 的段 2/3/4/5/6/8)
|
|
|
- *
|
|
|
- * Apollo key 保留 ${rank.score.merge.weightv536},召回/打分逻辑与 V568 同源但参数独立可调,仅 fusion 策略不同。
|
|
|
- * 召回侧:与 V568 基线召回完全一致,RecallService 不需要 isHit536Exp 门控。
|
|
|
- *
|
|
|
- * 历史:
|
|
|
- * - 原 V536(V569 + 4 路召回提权)在 2026-05-27 被复写为 V565 base + 极简 fusion
|
|
|
- * - 2026-05-29 再次复写:base 从 V565 切到 V568(加回 5 路 oldSpecial + 3 路 prioriProvince,v1/cityRov 换回普通版)
|
|
|
- */
|
|
|
@Service
|
|
|
@Slf4j
|
|
|
public class RankStrategy4RegionMergeModelV536 extends RankStrategy4RegionMergeModelBasic {
|
|
|
@@ -54,6 +39,60 @@ public class RankStrategy4RegionMergeModelV536 extends RankStrategy4RegionMergeM
|
|
|
@Autowired
|
|
|
private FeatureService featureService;
|
|
|
|
|
|
+ /**
|
|
|
+ * V536 个性化召回白名单 (6 路):召回 key 含 mid/uid,依赖该用户行为信号。
|
|
|
+ * 注:YearReturnCate2 因线上效果不佳, 2026-06-04 起移到非个性化白名单。
|
|
|
+ * 注:YearShareDkElements 在 V536 中移到非个性化白名单 (相对 V562 的实验差异点)。
|
|
|
+ */
|
|
|
+ private static final Set<String> PERSONAL_RECALL_PUSH_FROMS = new HashSet<>(Arrays.asList(
|
|
|
+ UserCate1RecallStrategy.PUSH_FORM,
|
|
|
+ UserCate2RecallStrategy.PUSH_FORM,
|
|
|
+ Return1Cate2RosRecallStrategy.PUSH_FORM,
|
|
|
+ Return1Cate2StrRecallStrategy.PUSH_FORM,
|
|
|
+ YearShareCate1RecallStrategy.PUSH_FROM,
|
|
|
+ YearShareCate2RecallStrategy.PUSH_FROM
|
|
|
+ ));
|
|
|
+
|
|
|
+ /**
|
|
|
+ * V536 非个性化召回白名单 (18 路):只依赖 headVid + 地域/品类/相似度(vid-vid CF 也归此类)。
|
|
|
+ * 含 5 路旧地域、新地域、城市、head province/cate、先验省份、return 相似、scene CF、YearReturnCate2、YearShareDkElements。
|
|
|
+ */
|
|
|
+ private static final Set<String> NON_PERSONAL_RECALL_PUSH_FROMS = new HashSet<>(Arrays.asList(
|
|
|
+ RegionHRecallStrategy.PUSH_FORM,
|
|
|
+ RegionHDupRecallStrategy.PUSH_FORM,
|
|
|
+ Region24HRecallStrategy.PUSH_FORM,
|
|
|
+ RegionRelative24HRecallStrategy.PUSH_FORM,
|
|
|
+ RegionRelative24HDupRecallStrategy.PUSH_FORM,
|
|
|
+ RegionRealtimeRecallStrategyV1.PUSH_FORM,
|
|
|
+ CityRovnRecallStrategy.PUSH_FROM,
|
|
|
+ HeadProvinceCate1RecallStrategy.PUSH_FORM,
|
|
|
+ HeadProvinceCate2RecallStrategy.PUSH_FORM,
|
|
|
+ HeadCate2RovRecallStrategy.PUSH_FROM,
|
|
|
+ PrioriProvinceRovnRecallStrategy.PUSH_FROM,
|
|
|
+ PrioriProvinceStrRecallStrategy.PUSH_FROM,
|
|
|
+ PrioriProvinceRosRecallStrategy.PUSH_FROM,
|
|
|
+ ReturnVideoRecallStrategy.PUSH_FORM,
|
|
|
+ SceneCFRovnRecallStrategy.PUSH_FORM,
|
|
|
+ SceneCFRosnRecallStrategy.PUSH_FORM,
|
|
|
+ YearReturnCate2RecallStrategy.PUSH_FROM,
|
|
|
+ YearShareDkElementsRecallStrategy.PUSH_FROM
|
|
|
+ ));
|
|
|
+
|
|
|
+ /** PERSONAL ∪ NON_PERSONAL = 23 路。用于 fetchCoarseRankScores 跳过流量池等不参与截断的 vid。 */
|
|
|
+ private static final Set<String> ALL_ROV_PUSH_FROMS;
|
|
|
+ static {
|
|
|
+ Set<String> all = new HashSet<>(PERSONAL_RECALL_PUSH_FROMS);
|
|
|
+ all.addAll(NON_PERSONAL_RECALL_PUSH_FROMS);
|
|
|
+ ALL_ROV_PUSH_FROMS = Collections.unmodifiableSet(all);
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * 设计要点:
|
|
|
+ * - fail-closed 白名单:RecallService 未来加新路不会自动进 V536,避免污染 vs V568 AB 对比
|
|
|
+ * - 流量池 3 路 (flow_pool / quick_flow_pool / recall_strategy_hotspot) 不在任何名单——独立通道
|
|
|
+ * - 调用顺序 = 个性化优先:同 vid 双类命中时归个性化,保护用户兴趣信号
|
|
|
+ */
|
|
|
+
|
|
|
@Override
|
|
|
public List<Video> mergeAndRankRovRecall(RankParam param) {
|
|
|
Map<String, Double> mergeWeight = this.mergeWeight != null ? this.mergeWeight : new HashMap<>(0);
|
|
|
@@ -67,42 +106,30 @@ public class RankStrategy4RegionMergeModelV536 extends RankStrategy4RegionMergeM
|
|
|
Set<Long> setVideo = new HashSet<>();
|
|
|
setVideo.add(param.getHeadVid());
|
|
|
List<Video> rovRecallRank = new ArrayList<>();
|
|
|
- // -------------------5路特殊旧召回------------------
|
|
|
- RecallUtils.extractOldSpecialRecall(mergeWeight.getOrDefault("oldSpecialN", (double) param.getSize()).intValue(), param, setVideo, rovRecallRank);
|
|
|
- //-------------------return相似召回------------------
|
|
|
- RecallUtils.extractRecall(mergeWeight.getOrDefault("v6", 5.0).intValue(), param, ReturnVideoRecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
|
|
|
- //-------------------新地域召回------------------
|
|
|
- RecallUtils.extractRecall(mergeWeight.getOrDefault("v1", 5.0).intValue(), param, RegionRealtimeRecallStrategyV1.PUSH_FORM, setVideo, rovRecallRank);
|
|
|
- //-------------------scene cf rovn------------------
|
|
|
- RecallUtils.extractRecall(mergeWeight.getOrDefault("sceneCFRovn", 5.0).intValue(), param, SceneCFRovnRecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
|
|
|
- //-------------------scene cf rosn------------------
|
|
|
- RecallUtils.extractRecall(mergeWeight.getOrDefault("sceneCFRosn", 5.0).intValue(), param, SceneCFRosnRecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
|
|
|
- // -------------------user cate1------------------
|
|
|
- RecallUtils.extractRecall(mergeWeight.getOrDefault("cate1RecallN", 5.0).intValue(), param, UserCate1RecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
|
|
|
- // -------------------user cate2------------------
|
|
|
- RecallUtils.extractRecall(mergeWeight.getOrDefault("cate2RecallN", 5.0).intValue(), param, UserCate2RecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
|
|
|
- // -------------------head province cate1------------------
|
|
|
- RecallUtils.extractRecall(mergeWeight.getOrDefault("headCate1RecallN", 3.0).intValue(), param, HeadProvinceCate1RecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
|
|
|
- // -------------------head province cate2------------------
|
|
|
- RecallUtils.extractRecall(mergeWeight.getOrDefault("headCate2RecallN", 3.0).intValue(), param, HeadProvinceCate2RecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
|
|
|
- //-------------------head cate2 of rovn------------------
|
|
|
- RecallUtils.extractRecall(mergeWeight.getOrDefault("headCate2Rov", 5.0).intValue(), param, HeadCate2RovRecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
|
|
|
- //-------------------city rovn------------------
|
|
|
- RecallUtils.extractRecall(mergeWeight.getOrDefault("cityRov", 5.0).intValue(), param, CityRovnRecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
|
|
|
- //-------------------priori province rovn------------------
|
|
|
- RecallUtils.extractRecall(mergeWeight.getOrDefault("prioriProvinceRov", 3.0).intValue(), param, PrioriProvinceRovnRecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
|
|
|
- //-------------------priori province str------------------
|
|
|
- RecallUtils.extractRecall(mergeWeight.getOrDefault("prioriProvinceStr", 1.0).intValue(), param, PrioriProvinceStrRecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
|
|
|
- //-------------------priori province ros------------------
|
|
|
- RecallUtils.extractRecall(mergeWeight.getOrDefault("prioriProvinceRos", 1.0).intValue(), param, PrioriProvinceRosRecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
|
|
|
- //-------------------return1 cate2 ros------------------
|
|
|
- RecallUtils.extractRecall(mergeWeight.getOrDefault("return1Cate2Ros", 5.0).intValue(), param, Return1Cate2RosRecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
|
|
|
- //-------------------return1 cate2 str------------------
|
|
|
- RecallUtils.extractRecall(mergeWeight.getOrDefault("return1Cate2Str", 5.0).intValue(), param, Return1Cate2StrRecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
|
|
|
-
|
|
|
- RecallUtils.extractRecall(mergeWeight.getOrDefault("yearShareCate1", 5.0).intValue(), param, YearShareCate1RecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
|
|
|
- RecallUtils.extractRecall(mergeWeight.getOrDefault("yearShareCate2", 5.0).intValue(), param, YearShareCate2RecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
|
|
|
- RecallUtils.extractRecall(mergeWeight.getOrDefault("yearReturnCate2", 5.0).intValue(), param, YearReturnCate2RecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
|
|
|
+
|
|
|
+ // ============================================================
|
|
|
+ // V536 实验:统一粗排分截断 (个性化 / 非个性化 两配额, 动态补足)
|
|
|
+ // 总配额 coarseRankTopN,个性化占 personalRatio。先个性化按上限抢位,
|
|
|
+ // 个性化不足时剩余名额转给非个性化,保证精排算力满载。
|
|
|
+ //
|
|
|
+ // 粗排分 = alg_vid_recommend_exp_feature_20250212.rovn_1h / rovn_24h 平均
|
|
|
+ // ============================================================
|
|
|
+ int totalTopN = mergeWeight.getOrDefault("coarseRankTopN", 80.0).intValue();
|
|
|
+ double personalRatio = mergeWeight.getOrDefault("personalRatio", 0.4);
|
|
|
+ int personalTopN = (int) Math.round(totalTopN * personalRatio);
|
|
|
+ Map<Long, Double> coarseRankMap = fetchCoarseRankScores(param);
|
|
|
+
|
|
|
+ int personalCandidates = RecallUtils.countDistinctCandidates(param, setVideo, PERSONAL_RECALL_PUSH_FROMS);
|
|
|
+ int sizeBeforePersonal = rovRecallRank.size();
|
|
|
+ RecallUtils.extractAllAndTruncateByCoarseRank(personalTopN, param, setVideo, rovRecallRank, coarseRankMap, PERSONAL_RECALL_PUSH_FROMS);
|
|
|
+ int personalActual = rovRecallRank.size() - sizeBeforePersonal;
|
|
|
+ int nonPersonalBudget = totalTopN - personalActual; // 个性化不足时, 名额转给非个性化
|
|
|
+ int nonPersonalCandidates = RecallUtils.countDistinctCandidates(param, setVideo, NON_PERSONAL_RECALL_PUSH_FROMS);
|
|
|
+ int sizeBeforeNonPersonal = rovRecallRank.size();
|
|
|
+ RecallUtils.extractAllAndTruncateByCoarseRank(nonPersonalBudget, param, setVideo, rovRecallRank, coarseRankMap, NON_PERSONAL_RECALL_PUSH_FROMS);
|
|
|
+ int nonPersonalActual = rovRecallRank.size() - sizeBeforeNonPersonal;
|
|
|
+ log.info("coarse_rank_summary exp=536 quota={} pc={} ps={} nc={} ns={}",
|
|
|
+ totalTopN, personalCandidates, personalActual, nonPersonalCandidates, nonPersonalActual);
|
|
|
|
|
|
// 记录召回源中的视频
|
|
|
this.rankBeforePostProcessor(rovRecallRank);
|
|
|
@@ -308,18 +335,12 @@ public class RankStrategy4RegionMergeModelV536 extends RankStrategy4RegionMergeM
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * V536 fusion: 只保留"流量池相关 + 兜底相关"逻辑
|
|
|
+ * 同 V567 极简 fusion:只保留"流量池相关 + 兜底相关"逻辑
|
|
|
* 1. rov 空兜底:rov 池为空时流量池直接顶上 (Basic 段 1)
|
|
|
* 7. 流量池按比例强插:topK 头部锁 rov + topK..size 按 flowPoolP / newFlowPoolSelectRate 概率门
|
|
|
* 混入 flowVideos / douHotFlowPoolVideos,否则用 rov 中段;一侧用光时另一侧兜底回填 (Basic 段 7)
|
|
|
*
|
|
|
- * 删除(相对 Basic):
|
|
|
- * - 段 2: Apollo 实验参数解析 (filterRules / rankReduceConfig) — 解析出来只给 boost/filter 用,不需要了
|
|
|
- * - 段 3: 标签读取 RankExtractorItemTags
|
|
|
- * - 段 4: 时间卡控 RankProcessorTagFilter
|
|
|
- * - 段 5: rov 池提权 RankProcessorBoost.boostByTag
|
|
|
- * - 段 6: rov 池强插 RankProcessorInsert.insertByTag + 品类降权 boostByMergeCate + 节日降权 boostByFestive
|
|
|
- * - 段 8: 密度控制 RankProcessorDensity.mergeDensityControl
|
|
|
+ * 删除(相对 Basic):标签 filter / rov boost / 强插 / 品类降权 / 节日降权 / 密度控制
|
|
|
*/
|
|
|
@Override
|
|
|
public RankResult mergeAndSort(RankParam param, List<Video> rovVideos, List<Video> flowVideos, List<Video> douHotFlowPoolVideos) {
|
|
|
@@ -384,6 +405,77 @@ public class RankStrategy4RegionMergeModelV536 extends RankStrategy4RegionMergeM
|
|
|
return new RankResult(result);
|
|
|
}
|
|
|
|
|
|
+ /**
|
|
|
+ * V536 实验:拉取粗排分(按 vid → score 返回)。
|
|
|
+ *
|
|
|
+ * 数据源:alg_vid_recommend_exp_feature_20250212。
|
|
|
+ * 表里没有现成 rovn 字段,需要从原子字段 (return_n_uv_*, exp_*) 用 plusSmooth 算出来。
|
|
|
+ * 公式 = FeatureV6.oneTypeStatFeature 同口径:rovn = plusSmooth(return_n_uv, exp, plus, 1)
|
|
|
+ * 默认 plus=30 与 FeatureV6.largerSmoothPlus 对齐,AB 对比不会因口径不同污染结论。
|
|
|
+ *
|
|
|
+ * Apollo 可调维度:
|
|
|
+ * - coarseRovn1hW / coarseRovn24hW:1h 和 24h 的加权(默认 0.5/0.5)
|
|
|
+ * - coarseRovn1hSmoothPlus / coarseRovn24hSmoothPlus:贝叶斯平滑系数(默认 30/30)
|
|
|
+ *
|
|
|
+ * 缺失自动归一化:单值缺失时剩下的撑起全部权重;两值都缺失则 caller 兜底 RovScore。
|
|
|
+ */
|
|
|
+ private Map<Long, Double> fetchCoarseRankScores(RankParam param) {
|
|
|
+ if (param == null || param.getRecallResult() == null
|
|
|
+ || CollectionUtils.isEmpty(param.getRecallResult().getData())) {
|
|
|
+ return Collections.emptyMap();
|
|
|
+ }
|
|
|
+ Map<String, Double> mergeWeight = this.mergeWeight != null ? this.mergeWeight : Collections.emptyMap();
|
|
|
+ double w1h = mergeWeight.getOrDefault("coarseRovn1hW", 0.5);
|
|
|
+ double w24h = mergeWeight.getOrDefault("coarseRovn24hW", 0.5);
|
|
|
+ double plus1h = mergeWeight.getOrDefault("coarseRovn1hSmoothPlus", 30.0);
|
|
|
+ double plus24h = mergeWeight.getOrDefault("coarseRovn24hSmoothPlus", 30.0);
|
|
|
+ // 只对参与统一截断的 23 路 vid 拉粗排分(跳过流量池 3 路,省 proto + RPC 延迟)
|
|
|
+ List<String> vids = param.getRecallResult().getData().stream()
|
|
|
+ .filter(d -> d != null && CollectionUtils.isNotEmpty(d.getVideos()))
|
|
|
+ .filter(d -> ALL_ROV_PUSH_FROMS.contains(d.getPushFrom()))
|
|
|
+ .flatMap(d -> d.getVideos().stream())
|
|
|
+ .map(v -> String.valueOf(v.getVideoId()))
|
|
|
+ .distinct()
|
|
|
+ .collect(Collectors.toList());
|
|
|
+ if (vids.isEmpty()) return Collections.emptyMap();
|
|
|
+
|
|
|
+ Map<String, Map<String, Map<String, String>>> feats = featureService.getVideoCoarseRankFeature(vids);
|
|
|
+ Map<Long, Double> result = new HashMap<>(vids.size());
|
|
|
+ for (String vid : vids) {
|
|
|
+ Map<String, String> row = feats.getOrDefault(vid, Collections.emptyMap())
|
|
|
+ .getOrDefault("alg_vid_recommend_exp_feature_20250212", Collections.emptyMap());
|
|
|
+ Double rovn1h = computeRovn(row, "1h", plus1h);
|
|
|
+ Double rovn24h = computeRovn(row, "24h", plus24h);
|
|
|
+ // 加权平均,缺失自动归一化
|
|
|
+ double sumW = (rovn1h != null ? w1h : 0) + (rovn24h != null ? w24h : 0);
|
|
|
+ if (sumW <= 0) continue;
|
|
|
+ double sumWS = (rovn1h != null ? rovn1h * w1h : 0) + (rovn24h != null ? rovn24h * w24h : 0);
|
|
|
+ try {
|
|
|
+ result.put(Long.parseLong(vid), sumWS / sumW);
|
|
|
+ } catch (NumberFormatException ignore) { }
|
|
|
+ }
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 与 FeatureV6.oneTypeStatFeature 同口径:rovn = plusSmooth(return_n_uv, exp, plus, 1)
|
|
|
+ *
|
|
|
+ * 字段语义(区分 0 vs null):
|
|
|
+ * - exp 是 period 有效性 anchor:null 或 ≤0 → 整个 period 无效(return null)
|
|
|
+ * - return_n_uv 缺失视为 0(真实信号"无回访"):rovn=0,参与加权(不会让另一时段兜底)
|
|
|
+ */
|
|
|
+ private static Double computeRovn(Map<String, String> row, String period, double smoothPlus) {
|
|
|
+ Double exp = parseDoubleOrNull(row.get("exp_" + period));
|
|
|
+ if (exp == null || exp <= 0) return null;
|
|
|
+ Double returnNuv = parseDoubleOrNull(row.get("return_n_uv_" + period));
|
|
|
+ return FeatureUtils.plusSmooth(returnNuv != null ? returnNuv : 0, exp, smoothPlus, 1);
|
|
|
+ }
|
|
|
+
|
|
|
+ private static Double parseDoubleOrNull(String s) {
|
|
|
+ if (StringUtils.isBlank(s)) return null;
|
|
|
+ try { return Double.parseDouble(s); } catch (NumberFormatException e) { return null; }
|
|
|
+ }
|
|
|
+
|
|
|
private UserShareReturnProfile parseUserProfile(Map<String, Map<String, String>> userOriginInfo) {
|
|
|
if (null != userOriginInfo) {
|
|
|
Map<String, String> c9 = userOriginInfo.get("alg_recsys_feature_user_share_return_stat");
|