Parcourir la source

优化现有召排

luojunhui il y a 23 heures
Parent
commit
bd4870b1c3
20 fichiers modifiés avec 1304 ajouts et 74 suppressions
  1. 1 0
      core/pom.xml
  2. 4 1
      core/src/main/java/com/tzld/videoVector/common/constant/VectorConstants.java
  3. 43 0
      core/src/main/java/com/tzld/videoVector/model/param/recall/BatchByTextParam.java
  4. 17 0
      core/src/main/java/com/tzld/videoVector/model/param/recall/MatchByArticleIdParam.java
  5. 17 0
      core/src/main/java/com/tzld/videoVector/model/param/recall/MatchByMaterialIdParam.java
  6. 26 0
      core/src/main/java/com/tzld/videoVector/model/param/recall/MatchByTextParam.java
  7. 6 0
      core/src/main/java/com/tzld/videoVector/model/param/recall/MatchByVideoIdParam.java
  8. 46 0
      core/src/main/java/com/tzld/videoVector/model/param/recall/RankingSpec.java
  9. 9 0
      core/src/main/java/com/tzld/videoVector/model/vo/recall/RecallResultVO.java
  10. 41 0
      core/src/main/java/com/tzld/videoVector/model/vo/recall/RecallSignalsVO.java
  11. 6 0
      core/src/main/java/com/tzld/videoVector/model/vo/recall/VideoMatchEnrichedVO.java
  12. 4 5
      core/src/main/java/com/tzld/videoVector/service/impl/MaterialSearchServiceImpl.java
  13. 43 0
      core/src/main/java/com/tzld/videoVector/service/rank/AdaptiveRankResult.java
  14. 31 0
      core/src/main/java/com/tzld/videoVector/service/rank/RankService.java
  15. 207 0
      core/src/main/java/com/tzld/videoVector/service/rank/RankServiceImpl.java
  16. 73 0
      core/src/main/java/com/tzld/videoVector/service/rank/RankingParams.java
  17. 43 0
      core/src/main/java/com/tzld/videoVector/service/rank/ScoreBreakdown.java
  18. 10 0
      core/src/main/java/com/tzld/videoVector/service/recall/VectorRecallTestService.java
  19. 664 68
      core/src/main/java/com/tzld/videoVector/service/recall/impl/VectorRecallTestServiceImpl.java
  20. 13 0
      server/src/main/java/com/tzld/videoVector/controller/VectorRecallTestController.java

+ 1 - 0
core/pom.xml

@@ -17,4 +17,5 @@
         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
     </properties>
 
+
 </project>

+ 4 - 1
core/src/main/java/com/tzld/videoVector/common/constant/VectorConstants.java

@@ -73,8 +73,11 @@ public interface VectorConstants {
      * 多点向量召回候选倍数:单素材有多个点向量命中时,需要在应用层按 materialId 去重。
      * 实际拉取数 = max(topN * MULTI_POINT_RECALL_CANDIDATE_FACTOR, MULTI_POINT_RECALL_MIN_CANDIDATES)
      */
-    int MULTI_POINT_RECALL_CANDIDATE_FACTOR = 3;
+    int MULTI_POINT_RECALL_CANDIDATE_FACTOR = 2;
 
     /** 多点向量召回候选最小数(避免极端小 topN 时候选不足) */
     int MULTI_POINT_RECALL_MIN_CANDIDATES = 30;
+
+    /** recallK 上限,防止 displayK 调大时候选爆炸 */
+    int RECALL_K_CAP = 200;
 }

+ 43 - 0
core/src/main/java/com/tzld/videoVector/model/param/recall/BatchByTextParam.java

@@ -0,0 +1,43 @@
+package com.tzld.videoVector.model.param.recall;
+
+import lombok.Data;
+
+import java.util.List;
+
+/**
+ * 批量文本召回参数——单次 embedding + 多 configCode 并行 ANN。
+ * 替代前端 N 次 matchByText 调用,服务端按 (modality, id) 去重保留 max sim。
+ */
+@Data
+public class BatchByTextParam {
+
+    /** 查询文本(必填) */
+    private String queryText;
+
+    /** 召回维度列表,空 = 全部已启用维度 */
+    private List<String> configCodes;
+
+    /** 期望展示条数,默认 50 */
+    private Integer displayK = 50;
+
+    /** ANN 候选数,不传按 min(displayK * factor, CAP) 计算 */
+    private Integer recallK;
+
+    /** 视频期望展示条数,不传回落 displayK */
+    private Integer videoDisplayK;
+
+    /** 素材期望展示条数,不传回落 displayK */
+    private Integer materialDisplayK;
+
+    /** 文章期望展示条数,不传回落 displayK */
+    private Integer articleDisplayK;
+
+    /** 模态筛选:空=全部,可选 VIDEO / MATERIAL / ARTICLE */
+    private List<String> modalities;
+
+    /** 素材来源筛选:空=全部,可选 "内部素材" / "外部合作" */
+    private List<String> sourceLabels;
+
+    /** 精排参数(从前端传入,覆盖后端默认值) */
+    private RankingSpec ranking;
+}

+ 17 - 0
core/src/main/java/com/tzld/videoVector/model/param/recall/MatchByArticleIdParam.java

@@ -2,6 +2,8 @@ package com.tzld.videoVector.model.param.recall;
 
 import lombok.Data;
 
+import java.util.List;
+
 /**
  * 长文ID召回参数 (matchByArticleId)
  */
@@ -16,4 +18,19 @@ public class MatchByArticleIdParam {
 
     /** 返回 Top-N,默认 50 */
     private Integer topN = 50;
+
+    /** 期望展示条数(不传回落到 topN) */
+    private Integer displayK;
+
+    /** ANN 候选数(不传按 min(displayK*factor, CAP) 计算) */
+    private Integer recallK;
+
+    /** 模态筛选:空=全部 */
+    private List<String> modalities;
+
+    /** 素材来源筛选:空=全部 */
+    private List<String> sourceLabels;
+
+    /** 精排参数(从前端传入,覆盖后端默认值) */
+    private RankingSpec ranking;
 }

+ 17 - 0
core/src/main/java/com/tzld/videoVector/model/param/recall/MatchByMaterialIdParam.java

@@ -2,6 +2,8 @@ package com.tzld.videoVector.model.param.recall;
 
 import lombok.Data;
 
+import java.util.List;
+
 /**
  * 素材ID召回参数 (matchByMaterialId)
  */
@@ -16,4 +18,19 @@ public class MatchByMaterialIdParam {
 
     /** 返回 Top-N,默认 50 */
     private Integer topN = 50;
+
+    /** 期望展示条数(不传回落到 topN) */
+    private Integer displayK;
+
+    /** ANN 候选数(不传按 min(displayK*factor, CAP) 计算) */
+    private Integer recallK;
+
+    /** 模态筛选:空=全部 */
+    private List<String> modalities;
+
+    /** 素材来源筛选:空=全部 */
+    private List<String> sourceLabels;
+
+    /** 精排参数(从前端传入,覆盖后端默认值) */
+    private RankingSpec ranking;
 }

+ 26 - 0
core/src/main/java/com/tzld/videoVector/model/param/recall/MatchByTextParam.java

@@ -2,6 +2,8 @@ package com.tzld.videoVector.model.param.recall;
 
 import lombok.Data;
 
+import java.util.List;
+
 /**
  * 文本召回参数 (Tab2 文本输入)
  */
@@ -29,4 +31,28 @@ public class MatchByTextParam {
 
     /** 文章返回条数;不传则与 topN 相同 */
     private Integer articleTopN;
+
+    /** 期望展示条数(不传回落到 topN) */
+    private Integer displayK;
+
+    /** ANN 候选数(不传按 min(displayK*factor, CAP) 计算) */
+    private Integer recallK;
+
+    /** 视频期望展示条数(同义可选,不传回落 displayK/topN) */
+    private Integer videoDisplayK;
+
+    /** 素材期望展示条数(同义可选,不传回落 displayK/topN) */
+    private Integer materialDisplayK;
+
+    /** 文章期望展示条数(同义可选,不传回落 displayK/topN) */
+    private Integer articleDisplayK;
+
+    /** 模态筛选:空=全部,可选 VIDEO / MATERIAL / ARTICLE */
+    private List<String> modalities;
+
+    /** 素材来源筛选:空=全部,可选 "内部素材" / "外部合作" */
+    private List<String> sourceLabels;
+
+    /** 精排参数(从前端传入,覆盖后端默认值) */
+    private RankingSpec ranking;
 }

+ 6 - 0
core/src/main/java/com/tzld/videoVector/model/param/recall/MatchByVideoIdParam.java

@@ -20,4 +20,10 @@ public class MatchByVideoIdParam {
 
     /** 返回 Top-N,默认 10 */
     private Integer topN = 10;
+
+    /** 期望展示条数(不传回落到 topN) */
+    private Integer displayK;
+
+    /** ANN 候选数(不传按 min(displayK*factor, CAP) 计算) */
+    private Integer recallK;
 }

+ 46 - 0
core/src/main/java/com/tzld/videoVector/model/param/recall/RankingSpec.java

@@ -0,0 +1,46 @@
+package com.tzld.videoVector.model.param.recall;
+
+import lombok.Data;
+
+import java.util.Map;
+
+/**
+ * 精排参数——由前端随请求传入,覆盖后端默认 RankingParams。
+ * 字段与前端 scoring.ts 的 RankingParams 完全对齐。
+ */
+@Data
+public class RankingSpec {
+
+    /** 硬筛阈值:sim < simThreshold 直接剔除,默认 0.65 */
+    private Double simThreshold;
+
+    /** 按 configCode 覆盖 simThreshold */
+    private Map<String, Double> simThresholdsByCode;
+
+    /** ROV 归一化下界,默认 0 */
+    private Double rovClipLow;
+
+    /** ROV 归一化上界,默认 0.07 */
+    private Double rovClipHigh;
+
+    /** 相关性 VS 质量的权衡权重 [0,1],VIDEO/ARTICLE/MATERIAL 通用,默认 0.6 */
+    private Double alpha;
+
+    /** 解构维度加权兜底值,默认 1.2 */
+    private Double deconstructBoost;
+
+    /** 按维度独立 boost */
+    private Map<String, Double> boostsByCode;
+
+    /** 素材质量子维度权重——打开率,默认 0.5(与 wViral/wRoi 之和为 1) */
+    private Double wCtr;
+
+    /** 素材质量子维度权重——裂变率,默认 0.3 */
+    private Double wViral;
+
+    /** 素材质量子维度权重——ROI,默认 0.2 */
+    private Double wRoi;
+
+    /** 素材质量缺失策略:"group" | "shrink",默认 "group" */
+    private String materialMissingStrategy;
+}

+ 9 - 0
core/src/main/java/com/tzld/videoVector/model/vo/recall/RecallResultVO.java

@@ -25,4 +25,13 @@ public class RecallResultVO {
 
     /** 命中长文数 */
     private int articleCount;
+
+    /** 期望展示条数 */
+    private Integer displayK;
+
+    /** ANN 候选数 */
+    private Integer recallK;
+
+    /** 候选放大倍数 */
+    private Integer factor;
 }

+ 41 - 0
core/src/main/java/com/tzld/videoVector/model/vo/recall/RecallSignalsVO.java

@@ -0,0 +1,41 @@
+package com.tzld.videoVector.model.vo.recall;
+
+import lombok.Data;
+
+/**
+ * 召回结构化信号(替代被重载的 score 字段承载多维信息)
+ *
+ * score 字段保留为兼容别名,值恒等于 signals.sim,新代码一律读 signals。
+ */
+@Data
+public class RecallSignalsVO {
+
+    /** 恒为 cosine 相似度,缺失为 null */
+    private Double sim;
+
+    /** 自身置顶标志,替代伪造 score=1.0 */
+    private Boolean isSelf;
+
+    /** 视频/文章运营指标,缺失为 null(不是 0) */
+    private Double rov;
+
+    /** 素材质量,缺失时 hasData=false */
+    private QualitySignal quality;
+
+    private Provenance provenance;
+
+    @Data
+    public static class QualitySignal {
+        private boolean hasData;
+        private Double ctr;   // conversionEfficiencyScore
+        private Double viral; // viralScore
+        private Double roi;   // revenueScore
+    }
+
+    @Data
+    public static class Provenance {
+        private String configCode;
+        /** "ann" | "self" */
+        private String source;
+    }
+}

+ 6 - 0
core/src/main/java/com/tzld/videoVector/model/vo/recall/VideoMatchEnrichedVO.java

@@ -81,4 +81,10 @@ public class VideoMatchEnrichedVO {
      * 长文详情,modality=ARTICLE 时下发;当前未实现,预留占位
      */
     private Object articleDetail;
+
+    /** 结构化召回信号(WP0 新增,替代被重载的 score) */
+    private RecallSignalsVO signals;
+
+    /** 精排综合分,未精排时为 null */
+    private Double rankScore;
 }

+ 4 - 5
core/src/main/java/com/tzld/videoVector/service/impl/MaterialSearchServiceImpl.java

@@ -87,10 +87,9 @@ public class MaterialSearchServiceImpl implements MaterialSearchService {
     @Resource
     private MaterialDeconstructResultMapperExt materialDeconstructResultMapperExt;
 
-    // 默认参数
-    private static final double DEFAULT_ALPHA = 0.7;
-    private static final double DEFAULT_SIM_MIN = 0.7;
-    private static final int DEFAULT_EXPANSION_FACTOR = 3;
+    // 默认参数——WP2 收敛:统一使用 RankingParams 和 VectorConstants
+    private static final double DEFAULT_ALPHA = 0.6;        // 对齐 RankingParams.alpha
+    private static final double DEFAULT_SIM_MIN = 0.65;     // 对齐 RankingParams.simThreshold
 
     // ================================================================ 入库
     @Override
@@ -310,7 +309,7 @@ public class MaterialSearchServiceImpl implements MaterialSearchService {
 
         int topN = param.getTopN() != null && param.getTopN() > 0 ? param.getTopN() : 10;
         int expansionFactor = param.getExpansionFactor() != null && param.getExpansionFactor() > 0
-                ? param.getExpansionFactor() : DEFAULT_EXPANSION_FACTOR;
+                ? param.getExpansionFactor() : MULTI_POINT_RECALL_CANDIDATE_FACTOR;
         double alpha = param.getAlpha() != null ? param.getAlpha() : DEFAULT_ALPHA;
         double simMin = param.getSimMin() != null ? param.getSimMin() : DEFAULT_SIM_MIN;
         String configCode = param.getConfigCode();

+ 43 - 0
core/src/main/java/com/tzld/videoVector/service/rank/AdaptiveRankResult.java

@@ -0,0 +1,43 @@
+package com.tzld.videoVector.service.rank;
+
+import com.tzld.videoVector.model.vo.recall.VideoMatchEnrichedVO;
+import lombok.Data;
+
+import java.util.List;
+
+/**
+ * 自适应精排结果——包含排序后的条目和从候选集学习的参数。
+ */
+@Data
+public class AdaptiveRankResult {
+
+    /** 排序后的条目(已挂 rankScore) */
+    private List<VideoMatchEnrichedVO> items;
+
+    /** 自适应参数(可用于前端展示) */
+    private RankingParams effectiveParams;
+
+    /** 自适应 ROV 下界(候选集真实 P5) */
+    private double effectiveRovLow;
+
+    /** 自适应 ROV 上界(候选集真实 P95) */
+    private double effectiveRovHigh;
+
+    /** 自适应 sim 下界(相对阈值和绝对兜底取 max) */
+    private double effectiveSimFloor;
+
+    /** 候选集最大 sim */
+    private double maxSim;
+
+    public static AdaptiveRankResult of(List<VideoMatchEnrichedVO> items, RankingParams params,
+                                         double rovLow, double rovHigh, double simFloor, double maxSim) {
+        AdaptiveRankResult r = new AdaptiveRankResult();
+        r.items = items;
+        r.effectiveParams = params;
+        r.effectiveRovLow = rovLow;
+        r.effectiveRovHigh = rovHigh;
+        r.effectiveSimFloor = simFloor;
+        r.maxSim = maxSim;
+        return r;
+    }
+}

+ 31 - 0
core/src/main/java/com/tzld/videoVector/service/rank/RankService.java

@@ -0,0 +1,31 @@
+package com.tzld.videoVector.service.rank;
+
+import com.tzld.videoVector.model.vo.recall.VideoMatchEnrichedVO;
+
+import java.util.List;
+
+/**
+ * 精排服务——与前端 scoring.ts computeCompositeScore 同构的纯函数。
+ * 逐条计算 rankScore,不依赖外部状态。
+ */
+public interface RankService {
+
+    /**
+     * 对单条召回结果计算精排综合分。
+     *
+     * @param item   召回条目(必须有 signals)
+     * @param params 精排参数
+     * @return 得分分解,sim 缺失时返回 null
+     */
+    ScoreBreakdown rank(VideoMatchEnrichedVO item, RankingParams params);
+
+    /**
+     * 自适应批量精排(WP3 T3.4)。
+     * 从候选集自动学习 ROV 分位边界和相对 sim 阈值,避免一刀切。
+     *
+     * @param items      粗排候选集
+     * @param baseParams 基础参数(simThreshold 降级为兜底值)
+     * @return 排序后的条目 + 自适应参数
+     */
+    AdaptiveRankResult adaptiveRank(List<VideoMatchEnrichedVO> items, RankingParams baseParams);
+}

+ 207 - 0
core/src/main/java/com/tzld/videoVector/service/rank/RankServiceImpl.java

@@ -0,0 +1,207 @@
+package com.tzld.videoVector.service.rank;
+
+import com.tzld.videoVector.common.enums.Modality;
+import com.tzld.videoVector.model.vo.recall.RecallSignalsVO;
+import com.tzld.videoVector.model.vo.recall.RecallSignalsVO.QualitySignal;
+import com.tzld.videoVector.model.vo.recall.VideoMatchEnrichedVO;
+import org.springframework.stereotype.Service;
+
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ * 精排纯函数——逐行移植前端 scoring.ts 的 computeCompositeScore。
+ *
+ * 关键修正(相对于旧实现):
+ * - deconstructBoost 按 modality===VIDEO 判定,不按 configCode.startsWith("VIDEO_")
+ * - ARTICLE 无 rov 时退化为纯 sim 排序
+ * - MATERIAL 质量缺失按 materialMissingStrategy 处理
+ */
+@Service
+public class RankServiceImpl implements RankService {
+
+    private static double clip01(double x) {
+        return Math.max(0, Math.min(1, x));
+    }
+
+    @Override
+    public ScoreBreakdown rank(VideoMatchEnrichedVO item, RankingParams params) {
+        RecallSignalsVO signals = item.getSignals();
+        if (signals == null || signals.getSim() == null || !Double.isFinite(signals.getSim())) {
+            return null;
+        }
+
+        double sim = signals.getSim();
+        double lowerBound = effectiveSimThreshold(item.getConfigCode(), params);
+        double denom = 1.0 - lowerBound;
+        double simNorm = denom > 0 ? clip01((sim - lowerBound) / denom) : 0;
+        boolean passesThreshold = sim >= lowerBound;
+
+        Modality modality = item.getModality();
+
+        // 素材模态:多维质量加权
+        if (modality == Modality.MATERIAL) {
+            return rankMaterial(simNorm, sim, lowerBound, passesThreshold, signals.getQuality(), params);
+        }
+
+        // VIDEO / ARTICLE 模态:ROV 公式
+        return rankVideoArticle(simNorm, sim, lowerBound, passesThreshold, signals, modality, params);
+    }
+
+    private ScoreBreakdown rankMaterial(double simNorm, double sim, double lowerBound,
+                                         boolean passesThreshold, QualitySignal qs, RankingParams params) {
+        double alpha = params.getAlpha();
+
+        // WP2: 质量缺失策略
+        if (qs == null || !qs.isHasData()) {
+            if ("shrink".equals(params.getMaterialMissingStrategy())
+                    && params.getPriorCtr() != null && params.getPriorViral() != null
+                    && params.getPriorRoi() != null) {
+                // shrink:回退先验均值 + 低置信度权重
+                double qualTotalW = params.getWCtr() + params.getWViral() + params.getWRoi();
+                double priorQuality = (params.getWCtr() * params.getPriorCtr()
+                        + params.getWViral() * params.getPriorViral()
+                        + params.getWRoi() * params.getPriorRoi()) / qualTotalW;
+                double conf = 0.3;
+                double composite = conf * (alpha * simNorm + (1 - alpha) * priorQuality);
+                return ScoreBreakdown.of(composite, simNorm, 0, 1, lowerBound, passesThreshold);
+            }
+            // group(默认):无质量数据,仅依赖相关性
+            double composite = alpha * simNorm;
+            ScoreBreakdown b = ScoreBreakdown.of(composite, simNorm, 0, 1, lowerBound, passesThreshold);
+            b.setQualityMissing(true);
+            return b;
+        }
+
+        double ctr = qs.getCtr() != null ? qs.getCtr() : 0;
+        double viral = qs.getViral() != null ? qs.getViral() : 0;
+        double roi = qs.getRoi() != null ? qs.getRoi() : 0;
+        double qualTotalW = params.getWCtr() + params.getWViral() + params.getWRoi();
+        if (qualTotalW <= 0) qualTotalW = 1;
+        double qualityScore = (params.getWCtr() * ctr + params.getWViral() * viral
+                + params.getWRoi() * roi) / qualTotalW;
+        double composite = alpha * simNorm + (1 - alpha) * qualityScore;
+        return ScoreBreakdown.of(composite, simNorm, 0, 1, lowerBound, passesThreshold);
+    }
+
+    private ScoreBreakdown rankVideoArticle(double simNorm, double sim, double lowerBound,
+                                             boolean passesThreshold, RecallSignalsVO signals,
+                                             Modality modality, RankingParams params) {
+        Double rov = signals.getRov();
+
+        // 按维度独立 boost:优先取 boostsByCode[configCode],回退 deconstructBoost
+        Double codeBoost = params.getBoostsByCode() != null
+                ? params.getBoostsByCode().getOrDefault(signals.getProvenance() != null
+                    ? signals.getProvenance().getConfigCode() : null, params.getDeconstructBoost())
+                : params.getDeconstructBoost();
+        if (codeBoost == null) codeBoost = params.getDeconstructBoost();
+
+        boolean hasRov = rov != null && Double.isFinite(rov);
+        double boost = (modality == Modality.VIDEO && hasRov) ? codeBoost : 1.0;
+
+        if (!hasRov) {
+            double composite = boost * simNorm;
+            return ScoreBreakdown.of(composite, simNorm, 0, boost, lowerBound, passesThreshold);
+        }
+
+        double rovDenom = params.getRovClipHigh() - params.getRovClipLow();
+        double rovNorm = rovDenom > 0 ? clip01((rov - params.getRovClipLow()) / rovDenom) : 0;
+        double composite = boost * (params.getAlpha() * simNorm + (1 - params.getAlpha()) * rovNorm);
+        return ScoreBreakdown.of(composite, simNorm, rovNorm, boost, lowerBound, passesThreshold);
+    }
+
+    private double effectiveSimThreshold(String configCode, RankingParams params) {
+        if (configCode != null && params.getSimThresholdsByCode().containsKey(configCode)) {
+            return params.getSimThresholdsByCode().get(configCode);
+        }
+        return params.getSimThreshold();
+    }
+
+    // ================================================================
+    // WP3 T3.4 自适应批量精排
+    // ================================================================
+
+    @Override
+    public AdaptiveRankResult adaptiveRank(List<VideoMatchEnrichedVO> items, RankingParams baseParams) {
+        if (items == null || items.isEmpty()) {
+            return AdaptiveRankResult.of(items != null ? items : new ArrayList<>(),
+                    baseParams, baseParams.getRovClipLow(), baseParams.getRovClipHigh(),
+                    baseParams.getSimThreshold(), 0);
+        }
+
+        // 收集 sim 和 ROV 样本
+        List<Double> sims = new ArrayList<>();
+        List<Double> rovs = new ArrayList<>();
+        for (VideoMatchEnrichedVO item : items) {
+            RecallSignalsVO signals = item.getSignals();
+            if (signals == null || signals.getSim() == null) continue;
+            sims.add(signals.getSim());
+            if (signals.getRov() != null && Double.isFinite(signals.getRov()) && signals.getRov() > 0) {
+                rovs.add(signals.getRov());
+            }
+        }
+
+        if (sims.isEmpty()) {
+            return AdaptiveRankResult.of(items, baseParams,
+                    baseParams.getRovClipLow(), baseParams.getRovClipHigh(),
+                    baseParams.getSimThreshold(), 0);
+        }
+
+        double maxSim = sims.stream().max(Double::compare).orElse(0.0);
+
+        // 自适应 ROV 分位(从小样本估计 P5/P95)
+        double rovLow = baseParams.getRovClipLow();
+        double rovHigh = baseParams.getRovClipHigh();
+        if (rovs.size() >= 10) {
+            rovs.sort(Double::compare);
+            int p5Idx = Math.max(0, (int) (rovs.size() * 0.05));
+            int p95Idx = Math.min(rovs.size() - 1, (int) (rovs.size() * 0.95));
+            rovLow = rovs.get(p5Idx);
+            rovHigh = rovs.get(p95Idx);
+            // 防止上下界过于接近
+            if (rovHigh - rovLow < 0.001) {
+                rovHigh = rovLow + 0.01;
+            }
+        }
+
+        // 自适应 sim 下界:max(绝对兜底, 相对阈值 max_sim - delta)
+        double absFloor = baseParams.getSimThreshold();
+        double relFloor = maxSim - 0.15;
+        double simFloor = Math.max(absFloor, relFloor);
+        // 安全钳:不高于 0.9,不低于 0.4
+        simFloor = Math.max(0.4, Math.min(0.9, simFloor));
+
+        // 构建自适应参数
+        RankingParams adaptiveParams = new RankingParams();
+        adaptiveParams.setSimThreshold(simFloor);
+        adaptiveParams.setSimThresholdsByCode(baseParams.getSimThresholdsByCode());
+        adaptiveParams.setRovClipLow(rovLow);
+        adaptiveParams.setRovClipHigh(rovHigh);
+        adaptiveParams.setAlpha(baseParams.getAlpha());
+        adaptiveParams.setDeconstructBoost(baseParams.getDeconstructBoost());
+        if (baseParams.getBoostsByCode() != null) {
+            adaptiveParams.setBoostsByCode(baseParams.getBoostsByCode());
+        }
+        adaptiveParams.setWCtr(baseParams.getWCtr());
+        adaptiveParams.setWViral(baseParams.getWViral());
+        adaptiveParams.setWRoi(baseParams.getWRoi());
+        adaptiveParams.setMaterialMissingStrategy(baseParams.getMaterialMissingStrategy());
+
+        // 逐条打分 + 回填 rankScore
+        List<VideoMatchEnrichedVO> scored = items.stream()
+                .peek(item -> {
+                    ScoreBreakdown b = rank(item, adaptiveParams);
+                    if (b != null && b.isPassesThreshold()) {
+                        item.setRankScore(b.getComposite());
+                    }
+                })
+                .filter(item -> item.getRankScore() != null)
+                .sorted(Comparator.comparing(VideoMatchEnrichedVO::getRankScore,
+                        Comparator.nullsLast(Comparator.reverseOrder())))
+                .collect(Collectors.toList());
+
+        return AdaptiveRankResult.of(scored, adaptiveParams, rovLow, rovHigh, simFloor, maxSim);
+    }
+}

+ 73 - 0
core/src/main/java/com/tzld/videoVector/service/rank/RankingParams.java

@@ -0,0 +1,73 @@
+package com.tzld.videoVector.service.rank;
+
+import lombok.Data;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * 精排参数——前后端同构的单一来源。
+ * 以前端 scoring.ts 的 DEFAULT_RANKING_PARAMS 为权威默认值。
+ *
+ * 公式(VIDEO/ARTICLE):
+ *   sim_norm = clip((sim - simThreshold) / (1 - simThreshold), 0, 1)
+ *   rov_norm = clip((rov - rovClipLow) / (rovClipHigh - rovClipLow), 0, 1)
+ *   composite = boost × (alpha × sim_norm + (1 - alpha) × rov_norm)
+ *
+ * 公式(MATERIAL):
+ *   sim_norm = clip((sim - simThreshold) / (1 - simThreshold), 0, 1)
+ *   qualityScore = (wCtr × ctr + wViral × viral + wRoi × roi) / qualTotalW
+ *   composite = alpha × sim_norm + (1 - alpha) × qualityScore
+ *   materialMissingStrategy 控制无质量数据素材的处理策略。
+ */
+@Data
+public class RankingParams {
+
+    /** 硬筛阈值:sim < simThreshold 直接剔除 */
+    private double simThreshold = 0.65;
+
+    /** 按 configCode 覆盖 simThreshold */
+    private Map<String, Double> simThresholdsByCode = new HashMap<>();
+
+    /** ROV 归一化下界(clip 低值) */
+    private double rovClipLow = 0;
+
+    /** ROV 归一化上界(clip 高值) */
+    private double rovClipHigh = 0.07;
+
+    /** 相关性 VS 质量的权衡权重,[0, 1],VIDEO/ARTICLE/MATERIAL 通用 */
+    private double alpha = 0.6;
+
+    /** 解构维度加权(兜底,未在 boostsByCode 中配置的维度使用此值) */
+    private double deconstructBoost = 1.0;
+
+    /** 按维度独立 boost —— 每个 configCode 可单独设置,覆盖 deconstructBoost */
+    private Map<String, Double> boostsByCode = new HashMap<>();
+
+    /** 素材质量子维度权重——打开率,默认 0.5(与 wViral/wRoi 之和为 1) */
+    private double wCtr = 0.5;
+
+    /** 素材质量子维度权重——裂变率,默认 0.3 */
+    private double wViral = 0.3;
+
+    /** 素材质量子维度权重——ROI,默认 0.2 */
+    private double wRoi = 0.2;
+
+    /**
+     * 素材质量缺失策略:"group"(分组展示,不与有数据素材混排)或 "shrink"(置信度收缩)。
+     * 默认 "group"。
+     */
+    private String materialMissingStrategy = "group";
+
+    /** shrink 策略下的先验均值(仅 materialMissingStrategy="shrink" 时有效) */
+    private Double priorCtr;
+    private Double priorViral;
+    private Double priorRoi;
+
+    /**
+     * 返回全局默认 RankingParams(与前端 DEFAULT_RANKING_PARAMS 一致)。
+     */
+    public static RankingParams defaults() {
+        return new RankingParams();
+    }
+}

+ 43 - 0
core/src/main/java/com/tzld/videoVector/service/rank/ScoreBreakdown.java

@@ -0,0 +1,43 @@
+package com.tzld.videoVector.service.rank;
+
+import lombok.Data;
+
+/**
+ * 单条精排得分分解——与前端 ScoreBreakdown 对齐。
+ */
+@Data
+public class ScoreBreakdown {
+
+    /** 综合得分 */
+    private double composite;
+
+    /** 归一化相似度 */
+    private double simNorm;
+
+    /** 归一化 ROV */
+    private double rovNorm;
+
+    /** 解构加权系数 */
+    private double boost;
+
+    /** sim 判定下界 */
+    private double lowerBound;
+
+    /** sim 是否通过硬阈值 */
+    private boolean passesThreshold;
+
+    /** 素材质量缺失时置 true,前端按分组展示 */
+    private boolean qualityMissing;
+
+    public static ScoreBreakdown of(double composite, double simNorm, double rovNorm,
+                                    double boost, double lowerBound, boolean passesThreshold) {
+        ScoreBreakdown b = new ScoreBreakdown();
+        b.composite = composite;
+        b.simNorm = simNorm;
+        b.rovNorm = rovNorm;
+        b.boost = boost;
+        b.lowerBound = lowerBound;
+        b.passesThreshold = passesThreshold;
+        return b;
+    }
+}

+ 10 - 0
core/src/main/java/com/tzld/videoVector/service/recall/VectorRecallTestService.java

@@ -1,5 +1,6 @@
 package com.tzld.videoVector.service.recall;
 
+import com.tzld.videoVector.model.param.recall.BatchByTextParam;
 import com.tzld.videoVector.model.param.recall.MatchByArticleIdParam;
 import com.tzld.videoVector.model.param.recall.MatchByMaterialIdParam;
 import com.tzld.videoVector.model.param.recall.MatchByTextParam;
@@ -94,4 +95,13 @@ public interface VectorRecallTestService {
      * @return 长文基础信息,不存在返回 null
      */
     ArticleBasicVO getArticleDetail(String articleId);
+
+    /**
+     * 批量文本召回——单次 embedding + 多 configCode 并行 ANN。
+     * 替代前端 N 次 matchByText,服务端按 (modality, id) 去重保留 max sim。
+     *
+     * @param param 批量召回参数
+     * @return 去重合并后的召回结果
+     */
+    RecallResultVO batchByText(BatchByTextParam param);
 }

Fichier diff supprimé car celui-ci est trop grand
+ 664 - 68
core/src/main/java/com/tzld/videoVector/service/recall/impl/VectorRecallTestServiceImpl.java


+ 13 - 0
server/src/main/java/com/tzld/videoVector/controller/VectorRecallTestController.java

@@ -1,6 +1,7 @@
 package com.tzld.videoVector.controller;
 
 import com.tzld.videoVector.common.base.CommonResponse;
+import com.tzld.videoVector.model.param.recall.BatchByTextParam;
 import com.tzld.videoVector.model.param.recall.MatchByArticleIdParam;
 import com.tzld.videoVector.model.param.recall.MatchByMaterialIdParam;
 import com.tzld.videoVector.model.param.recall.MatchByTextParam;
@@ -118,4 +119,16 @@ public class VectorRecallTestController {
     public CommonResponse<ArticleBasicVO> articleDetail(@RequestParam("articleId") String articleId) {
         return CommonResponse.success(vectorRecallTestService.getArticleDetail(articleId));
     }
+
+    /**
+     * 批量文本召回——单次 embedding + 多 configCode 并行 ANN(WP3)。
+     * 替代前端 N 次 matchByText,服务端按 (modality, id) 去重保留 max sim。
+     *
+     * POST /videoVector/recallTest/batchByText
+     * body: { "queryText": "...", "configCodes": ["VIDEO_TOPIC", ...], "displayK": 50 }
+     */
+    @PostMapping("/batchByText")
+    public CommonResponse<RecallResultVO> batchByText(@RequestBody BatchByTextParam param) {
+        return CommonResponse.success(vectorRecallTestService.batchByText(param));
+    }
 }

Certains fichiers n'ont pas été affichés car il y a eu trop de fichiers modifiés dans ce diff