|
|
@@ -0,0 +1,207 @@
|
|
|
+package com.tzld.videoVector.service.rank;
|
|
|
+
|
|
|
+import com.tzld.videoVector.common.enums.Modality;
|
|
|
+import com.tzld.videoVector.model.vo.recall.RecallSignalsVO;
|
|
|
+import com.tzld.videoVector.model.vo.recall.RecallSignalsVO.QualitySignal;
|
|
|
+import com.tzld.videoVector.model.vo.recall.VideoMatchEnrichedVO;
|
|
|
+import org.springframework.stereotype.Service;
|
|
|
+
|
|
|
+import java.util.ArrayList;
|
|
|
+import java.util.Comparator;
|
|
|
+import java.util.List;
|
|
|
+import java.util.stream.Collectors;
|
|
|
+
|
|
|
+/**
|
|
|
+ * 精排纯函数——逐行移植前端 scoring.ts 的 computeCompositeScore。
|
|
|
+ *
|
|
|
+ * 关键修正(相对于旧实现):
|
|
|
+ * - deconstructBoost 按 modality===VIDEO 判定,不按 configCode.startsWith("VIDEO_")
|
|
|
+ * - ARTICLE 无 rov 时退化为纯 sim 排序
|
|
|
+ * - MATERIAL 质量缺失按 materialMissingStrategy 处理
|
|
|
+ */
|
|
|
+@Service
|
|
|
+public class RankServiceImpl implements RankService {
|
|
|
+
|
|
|
+ private static double clip01(double x) {
|
|
|
+ return Math.max(0, Math.min(1, x));
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public ScoreBreakdown rank(VideoMatchEnrichedVO item, RankingParams params) {
|
|
|
+ RecallSignalsVO signals = item.getSignals();
|
|
|
+ if (signals == null || signals.getSim() == null || !Double.isFinite(signals.getSim())) {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+
|
|
|
+ double sim = signals.getSim();
|
|
|
+ double lowerBound = effectiveSimThreshold(item.getConfigCode(), params);
|
|
|
+ double denom = 1.0 - lowerBound;
|
|
|
+ double simNorm = denom > 0 ? clip01((sim - lowerBound) / denom) : 0;
|
|
|
+ boolean passesThreshold = sim >= lowerBound;
|
|
|
+
|
|
|
+ Modality modality = item.getModality();
|
|
|
+
|
|
|
+ // 素材模态:多维质量加权
|
|
|
+ if (modality == Modality.MATERIAL) {
|
|
|
+ return rankMaterial(simNorm, sim, lowerBound, passesThreshold, signals.getQuality(), params);
|
|
|
+ }
|
|
|
+
|
|
|
+ // VIDEO / ARTICLE 模态:ROV 公式
|
|
|
+ return rankVideoArticle(simNorm, sim, lowerBound, passesThreshold, signals, modality, params);
|
|
|
+ }
|
|
|
+
|
|
|
+ private ScoreBreakdown rankMaterial(double simNorm, double sim, double lowerBound,
|
|
|
+ boolean passesThreshold, QualitySignal qs, RankingParams params) {
|
|
|
+ double alpha = params.getAlpha();
|
|
|
+
|
|
|
+ // WP2: 质量缺失策略
|
|
|
+ if (qs == null || !qs.isHasData()) {
|
|
|
+ if ("shrink".equals(params.getMaterialMissingStrategy())
|
|
|
+ && params.getPriorCtr() != null && params.getPriorViral() != null
|
|
|
+ && params.getPriorRoi() != null) {
|
|
|
+ // shrink:回退先验均值 + 低置信度权重
|
|
|
+ double qualTotalW = params.getWCtr() + params.getWViral() + params.getWRoi();
|
|
|
+ double priorQuality = (params.getWCtr() * params.getPriorCtr()
|
|
|
+ + params.getWViral() * params.getPriorViral()
|
|
|
+ + params.getWRoi() * params.getPriorRoi()) / qualTotalW;
|
|
|
+ double conf = 0.3;
|
|
|
+ double composite = conf * (alpha * simNorm + (1 - alpha) * priorQuality);
|
|
|
+ return ScoreBreakdown.of(composite, simNorm, 0, 1, lowerBound, passesThreshold);
|
|
|
+ }
|
|
|
+ // group(默认):无质量数据,仅依赖相关性
|
|
|
+ double composite = alpha * simNorm;
|
|
|
+ ScoreBreakdown b = ScoreBreakdown.of(composite, simNorm, 0, 1, lowerBound, passesThreshold);
|
|
|
+ b.setQualityMissing(true);
|
|
|
+ return b;
|
|
|
+ }
|
|
|
+
|
|
|
+ double ctr = qs.getCtr() != null ? qs.getCtr() : 0;
|
|
|
+ double viral = qs.getViral() != null ? qs.getViral() : 0;
|
|
|
+ double roi = qs.getRoi() != null ? qs.getRoi() : 0;
|
|
|
+ double qualTotalW = params.getWCtr() + params.getWViral() + params.getWRoi();
|
|
|
+ if (qualTotalW <= 0) qualTotalW = 1;
|
|
|
+ double qualityScore = (params.getWCtr() * ctr + params.getWViral() * viral
|
|
|
+ + params.getWRoi() * roi) / qualTotalW;
|
|
|
+ double composite = alpha * simNorm + (1 - alpha) * qualityScore;
|
|
|
+ return ScoreBreakdown.of(composite, simNorm, 0, 1, lowerBound, passesThreshold);
|
|
|
+ }
|
|
|
+
|
|
|
+ private ScoreBreakdown rankVideoArticle(double simNorm, double sim, double lowerBound,
|
|
|
+ boolean passesThreshold, RecallSignalsVO signals,
|
|
|
+ Modality modality, RankingParams params) {
|
|
|
+ Double rov = signals.getRov();
|
|
|
+
|
|
|
+ // 按维度独立 boost:优先取 boostsByCode[configCode],回退 deconstructBoost
|
|
|
+ Double codeBoost = params.getBoostsByCode() != null
|
|
|
+ ? params.getBoostsByCode().getOrDefault(signals.getProvenance() != null
|
|
|
+ ? signals.getProvenance().getConfigCode() : null, params.getDeconstructBoost())
|
|
|
+ : params.getDeconstructBoost();
|
|
|
+ if (codeBoost == null) codeBoost = params.getDeconstructBoost();
|
|
|
+
|
|
|
+ boolean hasRov = rov != null && Double.isFinite(rov);
|
|
|
+ double boost = (modality == Modality.VIDEO && hasRov) ? codeBoost : 1.0;
|
|
|
+
|
|
|
+ if (!hasRov) {
|
|
|
+ double composite = boost * simNorm;
|
|
|
+ return ScoreBreakdown.of(composite, simNorm, 0, boost, lowerBound, passesThreshold);
|
|
|
+ }
|
|
|
+
|
|
|
+ double rovDenom = params.getRovClipHigh() - params.getRovClipLow();
|
|
|
+ double rovNorm = rovDenom > 0 ? clip01((rov - params.getRovClipLow()) / rovDenom) : 0;
|
|
|
+ double composite = boost * (params.getAlpha() * simNorm + (1 - params.getAlpha()) * rovNorm);
|
|
|
+ return ScoreBreakdown.of(composite, simNorm, rovNorm, boost, lowerBound, passesThreshold);
|
|
|
+ }
|
|
|
+
|
|
|
+ private double effectiveSimThreshold(String configCode, RankingParams params) {
|
|
|
+ if (configCode != null && params.getSimThresholdsByCode().containsKey(configCode)) {
|
|
|
+ return params.getSimThresholdsByCode().get(configCode);
|
|
|
+ }
|
|
|
+ return params.getSimThreshold();
|
|
|
+ }
|
|
|
+
|
|
|
+ // ================================================================
|
|
|
+ // WP3 T3.4 自适应批量精排
|
|
|
+ // ================================================================
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public AdaptiveRankResult adaptiveRank(List<VideoMatchEnrichedVO> items, RankingParams baseParams) {
|
|
|
+ if (items == null || items.isEmpty()) {
|
|
|
+ return AdaptiveRankResult.of(items != null ? items : new ArrayList<>(),
|
|
|
+ baseParams, baseParams.getRovClipLow(), baseParams.getRovClipHigh(),
|
|
|
+ baseParams.getSimThreshold(), 0);
|
|
|
+ }
|
|
|
+
|
|
|
+ // 收集 sim 和 ROV 样本
|
|
|
+ List<Double> sims = new ArrayList<>();
|
|
|
+ List<Double> rovs = new ArrayList<>();
|
|
|
+ for (VideoMatchEnrichedVO item : items) {
|
|
|
+ RecallSignalsVO signals = item.getSignals();
|
|
|
+ if (signals == null || signals.getSim() == null) continue;
|
|
|
+ sims.add(signals.getSim());
|
|
|
+ if (signals.getRov() != null && Double.isFinite(signals.getRov()) && signals.getRov() > 0) {
|
|
|
+ rovs.add(signals.getRov());
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (sims.isEmpty()) {
|
|
|
+ return AdaptiveRankResult.of(items, baseParams,
|
|
|
+ baseParams.getRovClipLow(), baseParams.getRovClipHigh(),
|
|
|
+ baseParams.getSimThreshold(), 0);
|
|
|
+ }
|
|
|
+
|
|
|
+ double maxSim = sims.stream().max(Double::compare).orElse(0.0);
|
|
|
+
|
|
|
+ // 自适应 ROV 分位(从候选集估计 P5/P95,样本需足够大才有统计意义)
|
|
|
+ double rovLow = baseParams.getRovClipLow();
|
|
|
+ double rovHigh = baseParams.getRovClipHigh();
|
|
|
+ if (rovs.size() >= 30) {
|
|
|
+ rovs.sort(Double::compare);
|
|
|
+ int p5Idx = Math.max(0, (int) (rovs.size() * 0.05));
|
|
|
+ int p95Idx = Math.min(rovs.size() - 1, (int) (rovs.size() * 0.95));
|
|
|
+ rovLow = rovs.get(p5Idx);
|
|
|
+ rovHigh = rovs.get(p95Idx);
|
|
|
+ // 防止上下界过于接近
|
|
|
+ if (rovHigh - rovLow < 0.001) {
|
|
|
+ rovHigh = rovLow + 0.01;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 自适应 sim 下界:max(绝对兜底, 相对阈值 max_sim - delta)
|
|
|
+ double absFloor = baseParams.getSimThreshold();
|
|
|
+ double relFloor = maxSim - 0.15;
|
|
|
+ double simFloor = Math.max(absFloor, relFloor);
|
|
|
+ // 安全钳:不高于 0.9,不低于 0.4
|
|
|
+ simFloor = Math.max(0.4, Math.min(0.9, simFloor));
|
|
|
+
|
|
|
+ // 构建自适应参数
|
|
|
+ RankingParams adaptiveParams = new RankingParams();
|
|
|
+ adaptiveParams.setSimThreshold(simFloor);
|
|
|
+ adaptiveParams.setSimThresholdsByCode(baseParams.getSimThresholdsByCode());
|
|
|
+ adaptiveParams.setRovClipLow(rovLow);
|
|
|
+ adaptiveParams.setRovClipHigh(rovHigh);
|
|
|
+ adaptiveParams.setAlpha(baseParams.getAlpha());
|
|
|
+ adaptiveParams.setDeconstructBoost(baseParams.getDeconstructBoost());
|
|
|
+ if (baseParams.getBoostsByCode() != null) {
|
|
|
+ adaptiveParams.setBoostsByCode(baseParams.getBoostsByCode());
|
|
|
+ }
|
|
|
+ adaptiveParams.setWCtr(baseParams.getWCtr());
|
|
|
+ adaptiveParams.setWViral(baseParams.getWViral());
|
|
|
+ adaptiveParams.setWRoi(baseParams.getWRoi());
|
|
|
+ adaptiveParams.setMaterialMissingStrategy(baseParams.getMaterialMissingStrategy());
|
|
|
+
|
|
|
+ // 逐条打分 + 回填 rankScore
|
|
|
+ List<VideoMatchEnrichedVO> scored = items.stream()
|
|
|
+ .peek(item -> {
|
|
|
+ ScoreBreakdown b = rank(item, adaptiveParams);
|
|
|
+ if (b != null && b.isPassesThreshold()) {
|
|
|
+ item.setRankScore(b.getComposite());
|
|
|
+ }
|
|
|
+ })
|
|
|
+ .filter(item -> item.getRankScore() != null)
|
|
|
+ .sorted(Comparator.comparing(VideoMatchEnrichedVO::getRankScore,
|
|
|
+ Comparator.nullsLast(Comparator.reverseOrder())))
|
|
|
+ .collect(Collectors.toList());
|
|
|
+
|
|
|
+ return AdaptiveRankResult.of(scored, adaptiveParams, rovLow, rovHigh, simFloor, maxSim);
|
|
|
+ }
|
|
|
+}
|