Просмотр исходного кода

Merge branch 'feature_vector_recall_0429' into master

刘立冬 3 часов назад
Родитель
Сommit
b76fd61143
17 измененных файлов с 1184 добавлено и 10 удалено
  1. 6 1
      .gitignore
  2. 35 0
      core/src/main/java/com/tzld/videoVector/common/enums/Modality.java
  3. 22 0
      core/src/main/java/com/tzld/videoVector/dao/mapper/videoVector/VideoAiUnderstandingMapper.java
  4. 23 0
      core/src/main/java/com/tzld/videoVector/model/param/recall/MatchByTextParam.java
  5. 23 0
      core/src/main/java/com/tzld/videoVector/model/param/recall/MatchByVideoIdParam.java
  6. 41 0
      core/src/main/java/com/tzld/videoVector/model/po/videoVector/VideoAiUnderstanding.java
  7. 34 0
      core/src/main/java/com/tzld/videoVector/model/vo/recall/AIUnderstandingVO.java
  8. 58 0
      core/src/main/java/com/tzld/videoVector/model/vo/recall/DeconstructPointsVO.java
  9. 28 0
      core/src/main/java/com/tzld/videoVector/model/vo/recall/RecallResultVO.java
  10. 28 0
      core/src/main/java/com/tzld/videoVector/model/vo/recall/VideoBasicVO.java
  11. 58 0
      core/src/main/java/com/tzld/videoVector/model/vo/recall/VideoMatchEnrichedVO.java
  12. 61 0
      core/src/main/java/com/tzld/videoVector/service/recall/VectorRecallTestService.java
  13. 402 0
      core/src/main/java/com/tzld/videoVector/service/recall/impl/VectorRecallTestServiceImpl.java
  14. 24 0
      core/src/main/resources/sql/video_ai_understanding.sql
  15. 253 0
      script/recall_regression_test.sh
  16. 8 9
      server/src/main/java/com/tzld/videoVector/Application.java
  17. 80 0
      server/src/main/java/com/tzld/videoVector/controller/VectorRecallTestController.java

+ 6 - 1
.gitignore

@@ -34,4 +34,9 @@ build/
 ### Mac OS ###
 .DS_Store
 
-*.log
+*.log
+### Runtime caches ###
+config-cache/
+
+### 回归测试生成的快照 ###
+script/recall_test_results/

+ 35 - 0
core/src/main/java/com/tzld/videoVector/common/enums/Modality.java

@@ -0,0 +1,35 @@
+package com.tzld.videoVector.common.enums;
+
+/**
+ * 召回结果模态枚举
+ *
+ * 与 deconstruct_content.content_type 的映射:
+ *   content_type = 3 (视频)  → VIDEO
+ *   content_type = 2 (图文)  → MATERIAL
+ *   content_type = 1 (长文)  → ARTICLE
+ *   content_type 缺省/未知   → VIDEO (用户确认默认按视频处理)
+ */
+public enum Modality {
+    VIDEO,
+    MATERIAL,
+    ARTICLE;
+
+    /**
+     * 用 Number 兼容 Byte / Short 不同 PO 的字段类型
+     */
+    public static Modality fromContentType(Number contentType) {
+        if (contentType == null) {
+            return VIDEO;
+        }
+        int v = contentType.intValue();
+        switch (v) {
+            case 1:
+                return ARTICLE;
+            case 2:
+                return MATERIAL;
+            case 3:
+            default:
+                return VIDEO;
+        }
+    }
+}

+ 22 - 0
core/src/main/java/com/tzld/videoVector/dao/mapper/videoVector/VideoAiUnderstandingMapper.java

@@ -0,0 +1,22 @@
+package com.tzld.videoVector.dao.mapper.videoVector;
+
+import com.tzld.videoVector.model.po.videoVector.VideoAiUnderstanding;
+import org.apache.ibatis.annotations.Param;
+import org.apache.ibatis.annotations.Select;
+
+/**
+ * 视频AI理解结果 Mapper
+ * 数据由 DataWorks 离线同步任务从 ODPS loghubods.result_log 抽取后写入。
+ *
+ * MVP 期间表为空(同步Job未实施),selectByVideoId 返回 null,
+ * Service 层据此返回 null 给前端展示"未就绪"占位。
+ */
+public interface VideoAiUnderstandingMapper {
+
+    @Select("SELECT video_id AS videoId, content_topic AS contentTopic, " +
+            "video_theme AS videoTheme, video_keywords AS videoKeywords, " +
+            "video_narration AS videoNarration, raw_data AS rawData, " +
+            "dt, sync_time AS syncTime " +
+            "FROM video_ai_understanding WHERE video_id = #{videoId} LIMIT 1")
+    VideoAiUnderstanding selectByVideoId(@Param("videoId") Long videoId);
+}

+ 23 - 0
core/src/main/java/com/tzld/videoVector/model/param/recall/MatchByTextParam.java

@@ -0,0 +1,23 @@
+package com.tzld.videoVector.model.param.recall;
+
+import lombok.Data;
+
+/**
+ * 文本召回参数 (Tab2 文本输入)
+ */
+@Data
+public class MatchByTextParam {
+
+    /** 查询文本 */
+    private String queryText;
+
+    /**
+     * 向量配置编码
+     * 当前已支持: VIDEO_TOPIC(选题) / VIDEO_INSPIRATION(灵感点)
+     * 不传则用默认 VIDEO_TOPIC
+     */
+    private String configCode;
+
+    /** 返回 Top-N,默认 10 */
+    private Integer topN = 10;
+}

+ 23 - 0
core/src/main/java/com/tzld/videoVector/model/param/recall/MatchByVideoIdParam.java

@@ -0,0 +1,23 @@
+package com.tzld.videoVector.model.param.recall;
+
+import lombok.Data;
+
+/**
+ * 通过视频/素材ID召回相似 (Tab1 解构树节点点击触发,或"以此视频召回")
+ */
+@Data
+public class MatchByVideoIdParam {
+
+    /** 视频ID 或 channelContentId 数值化 */
+    private Long videoId;
+
+    /**
+     * 向量配置编码
+     * 当前已支持: VIDEO_TOPIC / VIDEO_INSPIRATION
+     * 不传则用默认 VIDEO_TOPIC
+     */
+    private String configCode;
+
+    /** 返回 Top-N,默认 10 */
+    private Integer topN = 10;
+}

+ 41 - 0
core/src/main/java/com/tzld/videoVector/model/po/videoVector/VideoAiUnderstanding.java

@@ -0,0 +1,41 @@
+package com.tzld.videoVector.model.po.videoVector;
+
+import lombok.Data;
+
+import java.util.Date;
+
+/**
+ * 视频AI理解结果(本地缓存表)
+ *
+ * 数据来源: 阿里云 MaxCompute loghubods.result_log,经 DataWorks 同步Job
+ * 抽取关键字段同步到本地 MySQL。本地表先建好结构,数据待同步Job就绪。
+ *
+ * 对应建表DDL见: core/src/main/resources/sql/video_ai_understanding.sql
+ */
+@Data
+public class VideoAiUnderstanding {
+
+    /** 视频ID,主键 */
+    private Long videoId;
+
+    /** 内容选题 */
+    private String contentTopic;
+
+    /** 视频主题 */
+    private String videoTheme;
+
+    /** 视频关键词 */
+    private String videoKeywords;
+
+    /** 视频口播 */
+    private String videoNarration;
+
+    /** 原始 data 字段 JSON 字符串(完整保留) */
+    private String rawData;
+
+    /** 数据所属分区,通常为 yyyyMMddHH */
+    private String dt;
+
+    /** 同步时间 */
+    private Date syncTime;
+}

+ 34 - 0
core/src/main/java/com/tzld/videoVector/model/vo/recall/AIUnderstandingVO.java

@@ -0,0 +1,34 @@
+package com.tzld.videoVector.model.vo.recall;
+
+import lombok.Data;
+
+/**
+ * AI理解结果 VO
+ *
+ * 数据来源: ODPS loghubods.result_log (大数据,慢) → 经 DataWorks 同步Job
+ * 同步到本地表 video_ai_understanding。
+ *
+ * MVP 期间本地表为空,该 VO 全部为 null,前端展示"AI理解数据未就绪,等待同步Job"。
+ * 严禁后端伪造任何字段返回。
+ */
+@Data
+public class AIUnderstandingVO {
+
+    /** 视频ID */
+    private Long videoId;
+
+    /** 内容选题 */
+    private String contentTopic;
+
+    /** 视频主题 */
+    private String videoTheme;
+
+    /** 视频关键词 */
+    private String videoKeywords;
+
+    /** 视频口播 */
+    private String videoNarration;
+
+    /** 数据所属分区(yyyyMMddHH) */
+    private String dt;
+}

+ 58 - 0
core/src/main/java/com/tzld/videoVector/model/vo/recall/DeconstructPointsVO.java

@@ -0,0 +1,58 @@
+package com.tzld.videoVector.model.vo.recall;
+
+import lombok.Data;
+
+import java.util.List;
+
+/**
+ * 视频解构层级返回 (筛选 实质≥0.8 的高价值点)
+ *
+ * 数据来源: Singapore RDS aigc_topic_decode_task_result 表
+ *           → 本地 Python 脚本解析筛选 → 写入国内 Redis
+ *           → 后端按 vid 直接读 Redis (key = recall:vid_decode:{vid})
+ */
+@Data
+public class DeconstructPointsVO {
+
+    /** 视频ID */
+    private Long vid;
+
+    /** 视频标题 */
+    private String title;
+
+    /** 视频地址 */
+    private String videoUrl;
+
+    /** 带权重的可视化页面 URL */
+    private String htmlUrl;
+
+    /** 选题(最终选题.选题) */
+    private String topic;
+
+    /** 实质≥0.8 的高价值点 */
+    private List<HighValuePoint> highValuePoints;
+
+    @Data
+    public static class HighValuePoint {
+        /** 业务侧 ID,例如 inspiration_1 / purpose_1 / kp_xxxxxx */
+        private String id;
+
+        /** 类型: 灵感点 / 目的点 / 关键点 */
+        private String type;
+
+        /** 描述名 */
+        private String name;
+
+        /** 描述拆解出的"实质"分词,score>=0.8 */
+        private List<EssenceWord> essences;
+    }
+
+    @Data
+    public static class EssenceWord {
+        /** 实质词 */
+        private String word;
+
+        /** 词级贡献度 0~1 */
+        private Double score;
+    }
+}

+ 28 - 0
core/src/main/java/com/tzld/videoVector/model/vo/recall/RecallResultVO.java

@@ -0,0 +1,28 @@
+package com.tzld.videoVector.model.vo.recall;
+
+import lombok.Data;
+
+import java.util.List;
+
+/**
+ * 召回结果包装
+ * 前端可按 modality 字段对 items 分组展示模态Tab。
+ */
+@Data
+public class RecallResultVO {
+
+    /** 召回结果(已 enrich,带模态信息) */
+    private List<VideoMatchEnrichedVO> items;
+
+    /** 命中视频数 */
+    private int videoCount;
+
+    /** 命中素材数 */
+    private int materialCount;
+
+    /** 命中长文数 */
+    private int articleCount;
+
+    /** 总条数 */
+    private int total;
+}

+ 28 - 0
core/src/main/java/com/tzld/videoVector/model/vo/recall/VideoBasicVO.java

@@ -0,0 +1,28 @@
+package com.tzld.videoVector.model.vo.recall;
+
+import lombok.Data;
+
+/**
+ * 视频基础详情 VO (Tab1 视频ID 查询返回)
+ */
+@Data
+public class VideoBasicVO {
+
+    /** 视频ID */
+    private Long videoId;
+
+    /** 标题 */
+    private String title;
+
+    /** 视频播放地址 */
+    private String videoUrl;
+
+    /** 封面图 */
+    private String cover;
+
+    /**
+     * 播放量
+     * 长视频API当前不返回播放量字段,真实占位 "--"
+     */
+    private String playCount = "--";
+}

+ 58 - 0
core/src/main/java/com/tzld/videoVector/model/vo/recall/VideoMatchEnrichedVO.java

@@ -0,0 +1,58 @@
+package com.tzld.videoVector.model.vo.recall;
+
+import com.tzld.videoVector.common.enums.Modality;
+import lombok.Data;
+
+import java.util.List;
+
+/**
+ * 单条召回结果(模态感知 enrich 后)
+ *
+ * 召回是多对多对称架构,一次召回可能混合返回视频和素材。
+ * 通过 modality 字段区分,前端按模态分组展示。
+ */
+@Data
+public class VideoMatchEnrichedVO {
+
+    /** 业务ID (视频时为 wx_video.id, 素材时为 channelContentId 数值化) */
+    private Long id;
+
+    /** 模态 */
+    private Modality modality;
+
+    /** 命中的向量配置编码,如 VIDEO_TOPIC / VIDEO_INSPIRATION */
+    private String configCode;
+
+    /** 相似度分数 */
+    private Double score;
+
+    /** 标题 */
+    private String title;
+
+    /** 封面/缩略图 */
+    private String cover;
+
+    /** 视频URL (仅 modality=VIDEO 有效) */
+    private String videoUrl;
+
+    /** 图片列表 (仅 modality=MATERIAL 有效) */
+    private List<String> imageList;
+
+    /** 正文 (仅 modality=ARTICLE 有效) */
+    private String bodyText;
+
+    /** 播放量,占位 "--" */
+    private String playCount;
+
+    /** 曝光量,占位 "--" */
+    private String exposure;
+
+    /** CTR,占位 "--" */
+    private String ctr;
+
+    /** 阅读数,占位 "--" */
+    private String readCount;
+
+    /** ROV,占位 "--" */
+    private String rov;
+}

+ 61 - 0
core/src/main/java/com/tzld/videoVector/service/recall/VectorRecallTestService.java

@@ -0,0 +1,61 @@
+package com.tzld.videoVector.service.recall;
+
+import com.tzld.videoVector.model.param.recall.MatchByTextParam;
+import com.tzld.videoVector.model.param.recall.MatchByVideoIdParam;
+import com.tzld.videoVector.model.vo.recall.AIUnderstandingVO;
+import com.tzld.videoVector.model.vo.recall.DeconstructPointsVO;
+import com.tzld.videoVector.model.vo.recall.RecallResultVO;
+import com.tzld.videoVector.model.vo.recall.VideoBasicVO;
+
+/**
+ * 向量召回测试 Service
+ * 复用现有 VideoSearchService / VideoApiService / DeconstructService,在此层做数据组装。
+ */
+public interface VectorRecallTestService {
+
+    /**
+     * 获取视频基础详情 (Tab1 用)
+     * 调用长视频API
+     *
+     * @param videoId 视频ID
+     * @return VideoBasicVO,视频不存在返回 null
+     */
+    VideoBasicVO getVideoDetail(Long videoId);
+
+    /**
+     * 文本召回 + 模态感知 enrich (Tab2 用)
+     *
+     * @param param 文本召回参数
+     * @return 召回结果(已 enrich)
+     */
+    RecallResultVO matchByText(MatchByTextParam param);
+
+    /**
+     * 获取视频/素材的解构层级
+     * 直接读取 deconstruct_content.result_json,透传给前端递归渲染。
+     *
+     * @param videoId 视频ID 或 channelContentId 数值化
+     * @return 解构层级 VO,记录不存在返回 null
+     */
+    DeconstructPointsVO getDeconstructPoints(Long videoId);
+
+    /**
+     * 通过视频ID召回相似 (Tab1 解构节点点击触发)
+     * 内部转调 matchTopNVideo,以 channelContentId 形式传入。
+     *
+     * @param param 参数
+     * @return 召回结果(已 enrich)
+     */
+    RecallResultVO matchByVideoId(MatchByVideoIdParam param);
+
+    /**
+     * 获取视频AI理解结果 (Tab1 用)
+     *
+     * 数据来源: 本地 MySQL 表 video_ai_understanding (待 DataWorks 同步Job灌数据)。
+     * MVP 期间表是空的,真实查询返回 null,前端展示"未就绪"占位。
+     *
+     * @param videoId 视频ID
+     * @return AI理解结果,无数据返回 null (严禁mock)
+     */
+    AIUnderstandingVO getAiUnderstanding(Long videoId);
+}

+ 402 - 0
core/src/main/java/com/tzld/videoVector/service/recall/impl/VectorRecallTestServiceImpl.java

@@ -0,0 +1,402 @@
+package com.tzld.videoVector.service.recall.impl;
+
+import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson.JSONArray;
+import com.tzld.videoVector.api.VideoApiService;
+import com.tzld.videoVector.common.enums.Modality;
+import com.tzld.videoVector.dao.mapper.videoVector.VideoAiUnderstandingMapper;
+import com.tzld.videoVector.dao.mapper.videoVector.deconstruct.MysqlDeconstructContentMapper;
+import com.tzld.videoVector.model.entity.VideoDetail;
+import com.tzld.videoVector.model.param.MatchTopNVideoParam;
+import com.tzld.videoVector.model.param.recall.MatchByTextParam;
+import com.tzld.videoVector.model.param.recall.MatchByVideoIdParam;
+import com.tzld.videoVector.model.po.videoVector.VideoAiUnderstanding;
+import com.tzld.videoVector.model.po.videoVector.deconstruct.MysqlDeconstructContent;
+import com.tzld.videoVector.model.po.videoVector.deconstruct.MysqlDeconstructContentExample;
+import com.tzld.videoVector.model.vo.VideoMatchResult;
+import com.tzld.videoVector.model.vo.recall.AIUnderstandingVO;
+import com.tzld.videoVector.model.vo.recall.DeconstructPointsVO;
+import com.tzld.videoVector.model.vo.recall.RecallResultVO;
+import com.tzld.videoVector.model.vo.recall.VideoBasicVO;
+import com.tzld.videoVector.model.vo.recall.VideoMatchEnrichedVO;
+import com.tzld.videoVector.service.VideoSearchService;
+import com.tzld.videoVector.service.recall.VectorRecallTestService;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.data.redis.core.StringRedisTemplate;
+import org.springframework.stereotype.Service;
+import org.springframework.util.CollectionUtils;
+import org.springframework.util.StringUtils;
+
+import javax.annotation.Resource;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+/**
+ * 向量召回测试 Service 实现
+ * 核心职责: 调度现有 Service + 数据组装 + 模态感知 enrich
+ * 严禁 mock 任何业务数据。
+ */
+@Slf4j
+@Service
+public class VectorRecallTestServiceImpl implements VectorRecallTestService {
+
+    @Autowired
+    private VideoSearchService videoSearchService;
+
+    @Resource
+    private VideoApiService videoApiService;
+
+    @Autowired
+    private MysqlDeconstructContentMapper mysqlDeconstructContentMapper;
+
+    @Autowired(required = false)
+    private VideoAiUnderstandingMapper videoAiUnderstandingMapper;
+
+    @Autowired(required = false)
+    private StringRedisTemplate stringRedisTemplate;
+
+    private static final String PLACEHOLDER = "--";
+
+    /**
+     * Redis Key: recall:vid_decode:{vid}
+     * Value: 本地脚本(script/sync_decode_to_redis.py)解析后的瘦身 JSON,
+     *        含 vid/title/videoUrl/htmlUrl/topic/highValuePoints
+     */
+    private static final String REDIS_KEY_DECODE_PREFIX = "recall:vid_decode:";
+
+    @Override
+    public VideoBasicVO getVideoDetail(Long videoId) {
+        if (videoId == null || videoId <= 0L) {
+            return null;
+        }
+        Map<Long, VideoDetail> map = videoApiService.getVideoDetail(Collections.singleton(videoId));
+        VideoDetail detail = map.get(videoId);
+        if (detail == null) {
+            log.info("getVideoDetail: video not found, videoId={}", videoId);
+            return null;
+        }
+        VideoBasicVO vo = new VideoBasicVO();
+        vo.setVideoId(videoId);
+        vo.setTitle(detail.getTitle());
+        vo.setVideoUrl(detail.getVideoPath());
+        vo.setCover(detail.getCover());
+        // playCount 长视频API无字段,真实占位 "--"
+        return vo;
+    }
+
+    @Override
+    public RecallResultVO matchByText(MatchByTextParam param) {
+        RecallResultVO empty = emptyResult();
+        if (param == null || !StringUtils.hasText(param.getQueryText())) {
+            log.warn("matchByText: queryText 为空");
+            return empty;
+        }
+
+        // 1. 调用现有召回 Service
+        MatchTopNVideoParam matchParam = new MatchTopNVideoParam();
+        matchParam.setQueryText(param.getQueryText());
+        matchParam.setConfigCode(param.getConfigCode());
+        matchParam.setTopN(param.getTopN() != null && param.getTopN() > 0 ? param.getTopN() : 10);
+
+        List<VideoMatchResult> rawMatches = videoSearchService.matchTopNVideo(matchParam);
+        if (CollectionUtils.isEmpty(rawMatches)) {
+            return empty;
+        }
+
+        // 2. 解析并 enrich
+        return enrich(rawMatches);
+    }
+
+    @Override
+    public RecallResultVO matchByVideoId(MatchByVideoIdParam param) {
+        RecallResultVO empty = emptyResult();
+        if (param == null || param.getVideoId() == null || param.getVideoId() <= 0L) {
+            return empty;
+        }
+
+        // 转调现有 matchTopNVideo,以 channelContentId 字符串形式传入
+        MatchTopNVideoParam matchParam = new MatchTopNVideoParam();
+        matchParam.setChannelContentId(String.valueOf(param.getVideoId()));
+        matchParam.setConfigCode(param.getConfigCode());
+        matchParam.setTopN(param.getTopN() != null && param.getTopN() > 0 ? param.getTopN() : 10);
+
+        List<VideoMatchResult> rawMatches = videoSearchService.matchTopNVideo(matchParam);
+        if (CollectionUtils.isEmpty(rawMatches)) {
+            return empty;
+        }
+        return enrich(rawMatches);
+    }
+
+    @Override
+    public DeconstructPointsVO getDeconstructPoints(Long videoId) {
+        if (videoId == null || videoId <= 0L) {
+            return null;
+        }
+        if (stringRedisTemplate == null) {
+            log.warn("getDeconstructPoints: stringRedisTemplate 未注入");
+            return null;
+        }
+        String vid = String.valueOf(videoId);
+        String key = REDIS_KEY_DECODE_PREFIX + vid;
+        String json;
+        try {
+            json = stringRedisTemplate.opsForValue().get(key);
+        } catch (Exception e) {
+            log.error("getDeconstructPoints: read redis fail, vid={}, err={}", vid, e.getMessage(), e);
+            return null;
+        }
+        if (!StringUtils.hasText(json)) {
+            log.info("getDeconstructPoints: Redis 无 vid={} 的解构记录(脚本未同步或非视频内容)", vid);
+            return null;
+        }
+        try {
+            return JSON.parseObject(json, DeconstructPointsVO.class);
+        } catch (Exception e) {
+            log.error("getDeconstructPoints: parse redis value fail, vid={}, err={}", vid, e.getMessage(), e);
+            return null;
+        }
+    }
+
+    @Override
+    public AIUnderstandingVO getAiUnderstanding(Long videoId) {
+        if (videoId == null || videoId <= 0L) {
+            return null;
+        }
+        if (videoAiUnderstandingMapper == null) {
+            // 表/Mapper 未就绪(同步Job尚未实施)
+            log.info("getAiUnderstanding: mapper not available, returning null. videoId={}", videoId);
+            return null;
+        }
+        try {
+            VideoAiUnderstanding po = videoAiUnderstandingMapper.selectByVideoId(videoId);
+            if (po == null) {
+                return null;
+            }
+            AIUnderstandingVO vo = new AIUnderstandingVO();
+            vo.setVideoId(po.getVideoId());
+            vo.setContentTopic(po.getContentTopic());
+            vo.setVideoTheme(po.getVideoTheme());
+            vo.setVideoKeywords(po.getVideoKeywords());
+            vo.setVideoNarration(po.getVideoNarration());
+            vo.setDt(po.getDt());
+            return vo;
+        } catch (Exception e) {
+            // 表可能尚未创建(BadSqlGrammarException等),按真实"未就绪"返回 null
+            log.warn("getAiUnderstanding: query failed, table may not exist yet. videoId={}, err={}",
+                    videoId, e.getMessage());
+            return null;
+        }
+    }
+
+    private String getStatusDesc(Number status) {
+        if (status == null) return "未知";
+        switch (status.intValue()) {
+            case 0: return "待处理";
+            case 1: return "处理中";
+            case 2: return "成功";
+            case 3: return "失败";
+            default: return "未知";
+        }
+    }
+
+    /**
+     * 召回结果模态感知 enrich
+     *
+     * 流程:
+     *  - 提取所有 id
+     *  - 查 deconstruct_content WHERE channel_content_id IN (...) 拿 content_type
+     *  - 视频走 VideoApiService 取权威详情
+     *  - 素材/长文用 deconstruct_content 数据
+     *  - 默认按视频处理(用户确认 content_type 缺省语义)
+     */
+    private RecallResultVO enrich(List<VideoMatchResult> rawMatches) {
+        // 转成内部 MatchItem(过滤 null/无效 id)
+        List<MatchItem> matches = new ArrayList<>(rawMatches.size());
+        for (VideoMatchResult r : rawMatches) {
+            if (r == null || r.getVideoId() == null) {
+                continue;
+            }
+            MatchItem mi = new MatchItem();
+            mi.id = r.getVideoId();
+            mi.configCode = r.getConfigCode();
+            mi.score = r.getScore();
+            matches.add(mi);
+        }
+
+        if (matches.isEmpty()) {
+            return emptyResult();
+        }
+
+        // 提取 id 列表(string形式,用于查 channel_content_id)
+        Set<Long> allIds = matches.stream().map(m -> m.id).collect(Collectors.toSet());
+        List<String> idStrings = allIds.stream().map(String::valueOf).collect(Collectors.toList());
+
+        // 查 deconstruct_content
+        Map<String, MysqlDeconstructContent> contentByCcid = queryDeconstructContent(idStrings);
+
+        // 收集需要走 VideoApiService 的视频id
+        Set<Long> videoIds = new HashSet<>();
+        Map<Long, Modality> modalityMap = new HashMap<>();
+
+        for (MatchItem m : matches) {
+            MysqlDeconstructContent c = contentByCcid.get(String.valueOf(m.id));
+            Modality modality = (c == null) ? Modality.VIDEO : Modality.fromContentType(c.getContentType());
+            modalityMap.put(m.id, modality);
+            if (modality == Modality.VIDEO) {
+                videoIds.add(m.id);
+            }
+        }
+
+        // 批量取视频详情
+        Map<Long, VideoDetail> videoDetails = videoIds.isEmpty()
+                ? Collections.emptyMap()
+                : videoApiService.getVideoDetail(videoIds);
+
+        // 组装 VO
+        List<VideoMatchEnrichedVO> items = new ArrayList<>(matches.size());
+        int videoCount = 0;
+        int materialCount = 0;
+        int articleCount = 0;
+
+        for (MatchItem m : matches) {
+            Modality modality = modalityMap.get(m.id);
+            VideoMatchEnrichedVO vo = new VideoMatchEnrichedVO();
+            vo.setId(m.id);
+            vo.setModality(modality);
+            vo.setConfigCode(m.configCode);
+            vo.setScore(m.score);
+            vo.setPlayCount(PLACEHOLDER);
+            vo.setExposure(PLACEHOLDER);
+            vo.setCtr(PLACEHOLDER);
+            vo.setReadCount(PLACEHOLDER);
+            vo.setRov(PLACEHOLDER);
+
+            MysqlDeconstructContent content = contentByCcid.get(String.valueOf(m.id));
+
+            switch (modality) {
+                case VIDEO:
+                    VideoDetail vd = videoDetails.get(m.id);
+                    if (vd != null) {
+                        vo.setTitle(vd.getTitle());
+                        vo.setVideoUrl(vd.getVideoPath());
+                        vo.setCover(vd.getCover());
+                    } else if (content != null) {
+                        // 长视频API查不到,降级用本地 deconstruct_content
+                        vo.setTitle(content.getTitle());
+                        vo.setVideoUrl(content.getVideoUrl());
+                    }
+                    videoCount++;
+                    break;
+                case MATERIAL:
+                    if (content != null) {
+                        vo.setTitle(content.getTitle());
+                        vo.setImageList(parseImages(content.getImages()));
+                        if (!CollectionUtils.isEmpty(vo.getImageList())) {
+                            vo.setCover(vo.getImageList().get(0));
+                        }
+                    }
+                    materialCount++;
+                    break;
+                case ARTICLE:
+                    if (content != null) {
+                        vo.setTitle(content.getTitle());
+                        vo.setBodyText(content.getBodyText());
+                    }
+                    articleCount++;
+                    break;
+                default:
+                    videoCount++;
+                    break;
+            }
+
+            items.add(vo);
+        }
+
+        RecallResultVO result = new RecallResultVO();
+        result.setItems(items);
+        result.setVideoCount(videoCount);
+        result.setMaterialCount(materialCount);
+        result.setArticleCount(articleCount);
+        result.setTotal(items.size());
+        return result;
+    }
+
+    /**
+     * 按 channelContentId 批量查 deconstruct_content
+     */
+    private Map<String, MysqlDeconstructContent> queryDeconstructContent(List<String> channelContentIds) {
+        if (CollectionUtils.isEmpty(channelContentIds)) {
+            return Collections.emptyMap();
+        }
+        try {
+            MysqlDeconstructContentExample example = new MysqlDeconstructContentExample();
+            example.createCriteria().andChannelContentIdIn(channelContentIds);
+            List<MysqlDeconstructContent> list = mysqlDeconstructContentMapper.selectByExample(example);
+            // channel_content_id 可能重复(同一内容多次解构),保留最新一条
+            Map<String, MysqlDeconstructContent> map = new HashMap<>();
+            for (MysqlDeconstructContent c : list) {
+                String ccid = c.getChannelContentId();
+                if (ccid == null) {
+                    continue;
+                }
+                MysqlDeconstructContent prev = map.get(ccid);
+                if (prev == null || (c.getId() != null && (prev.getId() == null || c.getId() > prev.getId()))) {
+                    map.put(ccid, c);
+                }
+            }
+            return map;
+        } catch (Exception e) {
+            log.error("queryDeconstructContent error: {}", e.getMessage(), e);
+            return Collections.emptyMap();
+        }
+    }
+
+    private List<String> parseImages(String imagesJson) {
+        if (!StringUtils.hasText(imagesJson)) {
+            return Collections.emptyList();
+        }
+        try {
+            JSONArray arr = JSON.parseArray(imagesJson);
+            if (arr == null) {
+                return Collections.emptyList();
+            }
+            List<String> result = new ArrayList<>(arr.size());
+            for (int i = 0; i < arr.size(); i++) {
+                String s = arr.getString(i);
+                if (StringUtils.hasText(s)) {
+                    result.add(s);
+                }
+            }
+            return result;
+        } catch (Exception e) {
+            log.warn("parseImages fail, json={}, err={}", imagesJson, e.getMessage());
+            return Collections.emptyList();
+        }
+    }
+
+    private RecallResultVO emptyResult() {
+        RecallResultVO vo = new RecallResultVO();
+        vo.setItems(Collections.emptyList());
+        vo.setVideoCount(0);
+        vo.setMaterialCount(0);
+        vo.setArticleCount(0);
+        vo.setTotal(0);
+        return vo;
+    }
+
+    /**
+     * 解析后的单条 match
+     */
+    private static class MatchItem {
+        Long id;
+        String configCode;
+        Double score;
+    }
+}

+ 24 - 0
core/src/main/resources/sql/video_ai_understanding.sql

@@ -0,0 +1,24 @@
+-- 视频AI理解结果(本地缓存表)
+--
+-- 数据来源: 阿里云 MaxCompute loghubods.result_log
+-- 同步方式: DataWorks 离线同步任务,按 dt(yyyyMMddHH) 分区增量抽取并解析 data 字段
+-- 解析路径(参考 ODPS data 字段 JSON):
+--   content_topic    ← data."一、基础信息"."内容选题"
+--   video_theme      ← data."一、基础信息"."视频主题"
+--   video_keywords   ← data."一、基础信息"."视频关键词"
+--   video_narration  ← data."五、音画细节"."视频口播"
+--
+-- 建在 deconstruct-vector MySQL 库(video-vector-server 已使用)
+
+CREATE TABLE IF NOT EXISTS video_ai_understanding (
+    video_id        BIGINT       NOT NULL COMMENT '视频ID',
+    content_topic   TEXT         COMMENT '内容选题',
+    video_theme     TEXT         COMMENT '视频主题',
+    video_keywords  TEXT         COMMENT '视频关键词',
+    video_narration TEXT         COMMENT '视频口播',
+    raw_data        JSON         COMMENT '原始 data 字段 JSON 完整保留',
+    dt              VARCHAR(16)  COMMENT '数据所属分区 yyyyMMddHH',
+    sync_time       DATETIME     DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '同步时间',
+    PRIMARY KEY (video_id),
+    KEY idx_dt (dt)
+) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4 COMMENT = '视频AI理解结果缓存(同步自 ODPS result_log)';

+ 253 - 0
script/recall_regression_test.sh

@@ -0,0 +1,253 @@
+#!/usr/bin/env bash
+# recall_regression_test.sh
+#
+# 召回测试接口 — 部署前后回归对比脚本
+#
+# 用法:
+#   ./recall_regression_test.sh run baseline       # 部署前: 跑全部用例,存到 results/baseline/
+#   ./recall_regression_test.sh run verify         # 部署后: 跑同样用例,存到 results/verify/
+#   ./recall_regression_test.sh diff baseline verify   # 比对两次输出
+#
+# 可选环境变量:
+#   BASE_URL      默认 https://api-internal.piaoquantv.com/videoVector
+#   RESULTS_DIR   默认 ./recall_test_results
+#
+# 依赖: curl, python3 (用于 json 格式化)
+
+set -euo pipefail
+
+BASE_URL="${BASE_URL:-https://api-internal.piaoquantv.com/videoVector}"
+RESULTS_DIR="${RESULTS_DIR:-$(cd "$(dirname "$0")" && pwd)/recall_test_results}"
+
+# ===== 测试输入 =====
+# 真实视频 ID (用户确认)
+VIDEO_ID_PRIMARY=64632804
+# 候补 ID (从 prod matchByText 实际返回中取得)
+VIDEO_ID_SECONDARY=67688956
+
+# 文本召回查询词
+QUERY_TEXTS=(
+  "夏季减肥小妙招"
+  "宝宝辅食做法"
+  "婆婆和儿媳"
+)
+
+# 配置编码
+CONFIG_CODES=(VIDEO_TOPIC VIDEO_INSPIRATION)
+
+# Top-N (固定,避免参数不同导致的 size 差异)
+TOP_N=10
+
+# ===== 工具函数 =====
+log() { echo "[$(date +%H:%M:%S)] $*" >&2; }
+
+# 美化 + 标准化 JSON (排序 key、UTF-8 不转义、固定缩进) → 稳定 diff
+prettify() {
+  python3 -c '
+import sys, json
+try:
+    d = json.load(sys.stdin)
+    print(json.dumps(d, indent=2, ensure_ascii=False, sort_keys=True))
+except Exception as e:
+    sys.stderr.write("JSON parse failed: %s\n" % e)
+    sys.exit(1)
+'
+}
+
+# 调 GET
+do_get() {
+  local name="$1"; shift
+  local path="$1"; shift
+  local out_dir="$1"; shift
+  local raw="$out_dir/${name}.raw.txt"
+  local pretty="$out_dir/${name}.json"
+
+  log "GET  $name -> $path"
+  if curl -sS --max-time 30 "${BASE_URL}${path}" > "$raw" 2>"$raw.err"; then
+    if prettify < "$raw" > "$pretty" 2>/dev/null; then
+      rm -f "$raw" "$raw.err"
+    else
+      log "  WARN: $name 返回非 JSON,保留 .raw.txt"
+      mv "$raw" "$pretty"
+      rm -f "$raw.err"
+    fi
+  else
+    log "  ERROR: $name curl 失败"
+    mv "$raw.err" "$pretty"
+    rm -f "$raw"
+  fi
+}
+
+# 调 POST
+do_post() {
+  local name="$1"; shift
+  local path="$1"; shift
+  local body="$1"; shift
+  local out_dir="$1"; shift
+  local raw="$out_dir/${name}.raw.txt"
+  local pretty="$out_dir/${name}.json"
+
+  log "POST $name -> $path  body=$body"
+  if curl -sS --max-time 60 -X POST \
+       -H "Content-Type: application/json" \
+       -d "$body" \
+       "${BASE_URL}${path}" > "$raw" 2>"$raw.err"; then
+    if prettify < "$raw" > "$pretty" 2>/dev/null; then
+      rm -f "$raw" "$raw.err"
+    else
+      log "  WARN: $name 返回非 JSON,保留 .raw.txt"
+      mv "$raw" "$pretty"
+      rm -f "$raw.err"
+    fi
+  else
+    log "  ERROR: $name curl 失败"
+    mv "$raw.err" "$pretty"
+    rm -f "$raw"
+  fi
+}
+
+# ===== run 模式 =====
+run_tests() {
+  local label="$1"
+  local out_dir="$RESULTS_DIR/$label"
+  mkdir -p "$out_dir"
+
+  log "==> 输出目录: $out_dir"
+  log "==> BASE_URL: $BASE_URL"
+
+  # --- videoDetail ---
+  do_get "videoDetail__primary"   "/recallTest/videoDetail?videoId=$VIDEO_ID_PRIMARY"   "$out_dir"
+  do_get "videoDetail__secondary" "/recallTest/videoDetail?videoId=$VIDEO_ID_SECONDARY" "$out_dir"
+  do_get "videoDetail__missing"   "/recallTest/videoDetail?videoId=1"                   "$out_dir"
+
+  # --- aiUnderstanding ---
+  do_get "aiUnderstanding__primary" "/recallTest/aiUnderstanding?videoId=$VIDEO_ID_PRIMARY" "$out_dir"
+
+  # --- deconstructPoints ---
+  do_get "deconstructPoints__primary"   "/recallTest/deconstructPoints?videoId=$VIDEO_ID_PRIMARY"   "$out_dir"
+  do_get "deconstructPoints__secondary" "/recallTest/deconstructPoints?videoId=$VIDEO_ID_SECONDARY" "$out_dir"
+
+  # --- matchByText × (queryText × configCode) ---
+  local idx=0
+  for qt in "${QUERY_TEXTS[@]}"; do
+    idx=$((idx+1))
+    for cc in "${CONFIG_CODES[@]}"; do
+      do_post \
+        "matchByText__q${idx}__${cc}" \
+        "/recallTest/matchByText" \
+        "{\"queryText\":\"$qt\",\"configCode\":\"$cc\",\"topN\":$TOP_N}" \
+        "$out_dir"
+    done
+    # 默认 configCode (不传)
+    do_post \
+      "matchByText__q${idx}__DEFAULT" \
+      "/recallTest/matchByText" \
+      "{\"queryText\":\"$qt\",\"topN\":$TOP_N}" \
+      "$out_dir"
+  done
+
+  # --- matchByText 边界: 空文本 ---
+  do_post "matchByText__empty" "/recallTest/matchByText" \
+    "{\"queryText\":\"\",\"configCode\":\"VIDEO_TOPIC\",\"topN\":$TOP_N}" "$out_dir"
+
+  # --- matchByVideoId × configCode ---
+  for cc in "${CONFIG_CODES[@]}"; do
+    do_post \
+      "matchByVideoId__primary__${cc}" \
+      "/recallTest/matchByVideoId" \
+      "{\"videoId\":$VIDEO_ID_PRIMARY,\"configCode\":\"$cc\",\"topN\":$TOP_N}" \
+      "$out_dir"
+    do_post \
+      "matchByVideoId__secondary__${cc}" \
+      "/recallTest/matchByVideoId" \
+      "{\"videoId\":$VIDEO_ID_SECONDARY,\"configCode\":\"$cc\",\"topN\":$TOP_N}" \
+      "$out_dir"
+  done
+  # 默认 configCode
+  do_post \
+    "matchByVideoId__primary__DEFAULT" \
+    "/recallTest/matchByVideoId" \
+    "{\"videoId\":$VIDEO_ID_PRIMARY,\"topN\":$TOP_N}" \
+    "$out_dir"
+
+  # --- matchByVideoId 边界: 不存在的 ID ---
+  do_post "matchByVideoId__missing" "/recallTest/matchByVideoId" \
+    "{\"videoId\":1,\"configCode\":\"VIDEO_TOPIC\",\"topN\":$TOP_N}" "$out_dir"
+
+  log "==> 完成,共生成 $(ls "$out_dir" | wc -l | tr -d ' ') 个文件"
+  log "==> 路径: $out_dir"
+}
+
+# ===== diff 模式 =====
+diff_results() {
+  local a="$1"
+  local b="$2"
+  local dir_a="$RESULTS_DIR/$a"
+  local dir_b="$RESULTS_DIR/$b"
+
+  if [[ ! -d "$dir_a" ]]; then echo "目录不存在: $dir_a" >&2; exit 2; fi
+  if [[ ! -d "$dir_b" ]]; then echo "目录不存在: $dir_b" >&2; exit 2; fi
+
+  echo "==> 比对 $dir_a   vs   $dir_b"
+  echo
+
+  local files_a files_b
+  files_a=$(cd "$dir_a" && ls *.json 2>/dev/null | sort)
+  files_b=$(cd "$dir_b" && ls *.json 2>/dev/null | sort)
+
+  if [[ "$files_a" != "$files_b" ]]; then
+    echo "!! 文件清单不一致:"
+    diff <(echo "$files_a") <(echo "$files_b") || true
+    echo
+  fi
+
+  local total=0 changed=0 same=0
+  for f in $files_a; do
+    if [[ ! -f "$dir_b/$f" ]]; then continue; fi
+    total=$((total+1))
+    if diff -q "$dir_a/$f" "$dir_b/$f" >/dev/null 2>&1; then
+      same=$((same+1))
+    else
+      changed=$((changed+1))
+      echo "----- DIFF: $f -----"
+      diff -u "$dir_a/$f" "$dir_b/$f" || true
+      echo
+    fi
+  done
+
+  echo "==> 统计: 共 $total 个文件, 一致 $same, 有差异 $changed"
+}
+
+# ===== 入口 =====
+cmd="${1:-}"
+case "$cmd" in
+  run)
+    label="${2:-baseline}"
+    run_tests "$label"
+    ;;
+  diff)
+    a="${2:-baseline}"
+    b="${3:-verify}"
+    diff_results "$a" "$b"
+    ;;
+  *)
+    cat <<EOF
+用法:
+  $0 run [LABEL]              # 跑全部测试,默认 LABEL=baseline
+  $0 diff LABEL_A LABEL_B     # 比对两次输出
+
+示例:
+  # 部署前
+  $0 run baseline
+  # (你部署新版本)
+  $0 run verify
+  # 比对差异
+  $0 diff baseline verify
+
+环境变量:
+  BASE_URL=$BASE_URL
+  RESULTS_DIR=$RESULTS_DIR
+EOF
+    exit 1
+    ;;
+esac

+ 8 - 9
server/src/main/java/com/tzld/videoVector/Application.java

@@ -1,6 +1,5 @@
 package com.tzld.videoVector;
 
-import com.tzld.videoVector.interceptor.CrosDomainAllowInterceptor;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.springframework.boot.SpringApplication;
@@ -8,14 +7,19 @@ import org.springframework.boot.autoconfigure.SpringBootApplication;
 import org.springframework.boot.web.servlet.ServletComponentScan;
 import org.springframework.cloud.client.discovery.EnableDiscoveryClient;
 import org.springframework.cloud.openfeign.EnableFeignClients;
-import org.springframework.web.servlet.config.annotation.InterceptorRegistry;
-import org.springframework.web.servlet.config.annotation.WebMvcConfigurer;
 
+/**
+ * CORS 由 K8s Ingress / 阿里云 Tengine 网关统一处理,后端不再重复加 CORS 头,
+ * 否则浏览器会因 Access-Control-Allow-Origin 出现多个值而拦截响应。
+ *
+ * @MapperScan 由各 DBConfig (PgVectorDBConfig / VideoVectorDBConfig) 分包配置,
+ * 此处不再放全局 scan。
+ */
 @SpringBootApplication
 @ServletComponentScan("com.tzld.videoVector.controller")
 @EnableDiscoveryClient
 @EnableFeignClients
-public class Application implements WebMvcConfigurer {
+public class Application {
 
     private static final Logger LOGGER = LoggerFactory.getLogger(Application.class);
 
@@ -23,9 +27,4 @@ public class Application implements WebMvcConfigurer {
         SpringApplication.run(Application.class, args);
         LOGGER.info("video-vector-server Start Success");
     }
-
-    @Override
-    public void addInterceptors(InterceptorRegistry registry) {
-        registry.addInterceptor(new CrosDomainAllowInterceptor()).addPathPatterns("/**");
-    }
 }

+ 80 - 0
server/src/main/java/com/tzld/videoVector/controller/VectorRecallTestController.java

@@ -0,0 +1,80 @@
+package com.tzld.videoVector.controller;
+
+import com.tzld.videoVector.common.base.CommonResponse;
+import com.tzld.videoVector.model.param.recall.MatchByTextParam;
+import com.tzld.videoVector.model.param.recall.MatchByVideoIdParam;
+import com.tzld.videoVector.model.vo.recall.AIUnderstandingVO;
+import com.tzld.videoVector.model.vo.recall.DeconstructPointsVO;
+import com.tzld.videoVector.model.vo.recall.RecallResultVO;
+import com.tzld.videoVector.model.vo.recall.VideoBasicVO;
+import com.tzld.videoVector.service.recall.VectorRecallTestService;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.web.bind.annotation.GetMapping;
+import org.springframework.web.bind.annotation.PostMapping;
+import org.springframework.web.bind.annotation.RequestBody;
+import org.springframework.web.bind.annotation.RequestMapping;
+import org.springframework.web.bind.annotation.RequestParam;
+import org.springframework.web.bind.annotation.RestController;
+
+/**
+ * 向量召回测试 Controller
+ * 提供给前端测试页面使用,只新增接口,不修改现有接口逻辑。
+ *
+ * MVP 不加鉴权。CORS 由全局拦截器 CrosDomainAllowInterceptor 统一处理。
+ */
+@RestController
+@RequestMapping("/recallTest")
+public class VectorRecallTestController {
+
+    @Autowired
+    private VectorRecallTestService vectorRecallTestService;
+
+    /**
+     * 获取视频基础详情 (Tab1)
+     * GET /videoVector/recallTest/videoDetail?videoId=12345
+     */
+    @GetMapping("/videoDetail")
+    public CommonResponse<VideoBasicVO> videoDetail(@RequestParam("videoId") Long videoId) {
+        return CommonResponse.success(vectorRecallTestService.getVideoDetail(videoId));
+    }
+
+    /**
+     * 文本召回 (Tab2)
+     * POST /videoVector/recallTest/matchByText
+     * body: { "queryText": "...", "configCode": "VIDEO_TOPIC", "topN": 10 }
+     */
+    @PostMapping("/matchByText")
+    public CommonResponse<RecallResultVO> matchByText(@RequestBody MatchByTextParam param) {
+        return CommonResponse.success(vectorRecallTestService.matchByText(param));
+    }
+
+    /**
+     * 视频/素材的解构层级 (Tab1 解构树)
+     * GET /videoVector/recallTest/deconstructPoints?videoId=12345
+     * 直接读取 deconstruct_content.result_json,透传给前端递归渲染。
+     */
+    @GetMapping("/deconstructPoints")
+    public CommonResponse<DeconstructPointsVO> deconstructPoints(@RequestParam("videoId") Long videoId) {
+        return CommonResponse.success(vectorRecallTestService.getDeconstructPoints(videoId));
+    }
+
+    /**
+     * 通过视频/素材ID召回相似 (Tab1 解构节点点击触发)
+     * POST /videoVector/recallTest/matchByVideoId
+     * body: { "videoId": 12345, "configCode": "VIDEO_TOPIC", "topN": 10 }
+     */
+    @PostMapping("/matchByVideoId")
+    public CommonResponse<RecallResultVO> matchByVideoId(@RequestBody MatchByVideoIdParam param) {
+        return CommonResponse.success(vectorRecallTestService.matchByVideoId(param));
+    }
+
+    /**
+     * 视频AI理解结果 (Tab1)
+     * GET /videoVector/recallTest/aiUnderstanding?videoId=12345
+     * 数据未就绪时返回 data:null,前端展示"未就绪"占位。严禁mock。
+     */
+    @GetMapping("/aiUnderstanding")
+    public CommonResponse<AIUnderstandingVO> aiUnderstanding(@RequestParam("videoId") Long videoId) {
+        return CommonResponse.success(vectorRecallTestService.getAiUnderstanding(videoId));
+    }
+}