Преглед изворни кода

Merge branch 'feature/luojunhui/20260529-match-by-video' of Server/video-vector-server into master

luojunhui пре 5 дана
родитељ
комит
c1980a2ce5

+ 5 - 0
core/src/main/java/com/tzld/videoVector/dao/mapper/pgVector/ext/ArticleVectorMapperExt.java

@@ -41,4 +41,9 @@ public interface ArticleVectorMapperExt {
     int deleteAbovePointIndex(@Param("articleId") String articleId,
                                @Param("configCode") String configCode,
                                @Param("minPointIndex") int minPointIndex);
+
+    List<String> selectDistinctConfigCodesByArticleId(@Param("articleId") String articleId);
+
+    List<ArticleVector> selectVectorsByArticleIdAndConfigCode(@Param("articleId") String articleId,
+                                                               @Param("configCode") String configCode);
 }

+ 19 - 0
core/src/main/java/com/tzld/videoVector/model/param/recall/MatchByArticleIdParam.java

@@ -0,0 +1,19 @@
+package com.tzld.videoVector.model.param.recall;
+
+import lombok.Data;
+
+/**
+ * 长文ID召回参数 (matchByArticleId)
+ */
+@Data
+public class MatchByArticleIdParam {
+
+    /** 长文 ID(必填) */
+    private String articleId;
+
+    /** 召回维度,不传则使用该长文已向量化的所有维度 */
+    private String configCode;
+
+    /** 返回 Top-N,默认 50 */
+    private Integer topN = 50;
+}

+ 28 - 0
core/src/main/java/com/tzld/videoVector/model/vo/recall/ArticleBasicVO.java

@@ -0,0 +1,28 @@
+package com.tzld.videoVector.model.vo.recall;
+
+import lombok.Data;
+
+/**
+ * 长文基础信息 VO(articleDetail 接口用)
+ */
+@Data
+public class ArticleBasicVO {
+
+    /** 长文 ID */
+    private String articleId;
+
+    /** 文章标题 */
+    private String title;
+
+    /** 摘要(body_text 前 120 字符) */
+    private String summary;
+
+    /** 封面图 CDN 地址 */
+    private String cover;
+
+    /** 原文链接 */
+    private String url;
+
+    /** 来源/公众号名称 */
+    private String source;
+}

+ 7 - 0
core/src/main/java/com/tzld/videoVector/service/ArticleVectorStoreService.java

@@ -1,6 +1,7 @@
 package com.tzld.videoVector.service;
 
 import com.tzld.videoVector.model.entity.ArticleMatch;
+import com.tzld.videoVector.model.po.pgVector.ArticleVector;
 
 import java.util.Collection;
 import java.util.List;
@@ -39,4 +40,10 @@ public interface ArticleVectorStoreService {
     List<ArticleMatch> searchTopN(String configCode, List<Float> queryVector, int topN);
 
     List<ArticleMatch> searchTopNByRawVector(String configCode, String rawVector, int topN);
+
+    /** 查询指定长文已向量化的所有 configCode */
+    List<String> getDistinctConfigCodes(String articleId);
+
+    /** 查询指定长文在指定 configCode 下的所有向量行(raw embedding),用于获取长文自身向量进行跨模态召回 */
+    List<ArticleVector> getVectorsByArticleId(String articleId, String configCode);
 }

+ 18 - 0
core/src/main/java/com/tzld/videoVector/service/impl/PgArticleVectorStoreServiceImpl.java

@@ -231,6 +231,24 @@ public class PgArticleVectorStoreServiceImpl implements ArticleVectorStoreServic
         return matches;
     }
 
+    @Override
+    public List<String> getDistinctConfigCodes(String articleId) {
+        if (!StringUtils.hasText(articleId)) {
+            return Collections.emptyList();
+        }
+        List<String> codes = articleVectorMapperExt.selectDistinctConfigCodesByArticleId(articleId);
+        return codes != null ? codes : Collections.emptyList();
+    }
+
+    @Override
+    public List<ArticleVector> getVectorsByArticleId(String articleId, String configCode) {
+        if (!StringUtils.hasText(articleId) || !StringUtils.hasText(configCode)) {
+            return Collections.emptyList();
+        }
+        List<ArticleVector> vectors = articleVectorMapperExt.selectVectorsByArticleIdAndConfigCode(articleId, configCode);
+        return vectors != null ? vectors : Collections.emptyList();
+    }
+
     private List<ArticleMatch> convertToMatch(List<ArticleVector> results, String configCode) {
         return results.stream()
                 .map(av -> {

+ 18 - 0
core/src/main/java/com/tzld/videoVector/service/recall/VectorRecallTestService.java

@@ -1,9 +1,11 @@
 package com.tzld.videoVector.service.recall;
 
+import com.tzld.videoVector.model.param.recall.MatchByArticleIdParam;
 import com.tzld.videoVector.model.param.recall.MatchByMaterialIdParam;
 import com.tzld.videoVector.model.param.recall.MatchByTextParam;
 import com.tzld.videoVector.model.param.recall.MatchByVideoIdParam;
 import com.tzld.videoVector.model.vo.recall.AIUnderstandingVO;
+import com.tzld.videoVector.model.vo.recall.ArticleBasicVO;
 import com.tzld.videoVector.model.vo.recall.DeconstructPointsVO;
 import com.tzld.videoVector.model.vo.recall.MaterialBasicVO;
 import com.tzld.videoVector.model.vo.recall.RecallResultVO;
@@ -76,4 +78,20 @@ public interface VectorRecallTestService {
      * @return 素材基础信息,不存在返回 null
      */
     MaterialBasicVO getMaterialDetail(String materialId);
+
+    /**
+     * 长文ID召回 — 用长文自身的向量做跨模态相似搜索
+     *
+     * @param param 长文ID召回参数
+     * @return 召回结果(视频 + 素材 + 文章混合)
+     */
+    RecallResultVO matchByArticleId(MatchByArticleIdParam param);
+
+    /**
+     * 长文详情预览 — 返回长文标题、摘要、封面、原文链接、来源
+     *
+     * @param articleId 长文ID
+     * @return 长文基础信息,不存在返回 null
+     */
+    ArticleBasicVO getArticleDetail(String articleId);
 }

+ 246 - 0
core/src/main/java/com/tzld/videoVector/service/recall/impl/VectorRecallTestServiceImpl.java

@@ -14,6 +14,7 @@ import com.tzld.videoVector.model.entity.MaterialMatch;
 import com.tzld.videoVector.model.entity.VideoDetail;
 import com.tzld.videoVector.model.entity.VideoMatch;
 import com.tzld.videoVector.model.param.MatchTopNVideoParam;
+import com.tzld.videoVector.model.param.recall.MatchByArticleIdParam;
 import com.tzld.videoVector.model.param.recall.MatchByMaterialIdParam;
 import com.tzld.videoVector.model.param.recall.MatchByTextParam;
 import com.tzld.videoVector.model.param.recall.MatchByVideoIdParam;
@@ -23,6 +24,7 @@ import com.tzld.videoVector.model.po.pgVector.DeconstructVectorConfigExample;
 import com.tzld.videoVector.model.po.pgVector.MaterialDeconstructResult;
 import com.tzld.videoVector.model.vo.VideoMatchResult;
 import com.tzld.videoVector.model.vo.recall.AIUnderstandingVO;
+import com.tzld.videoVector.model.vo.recall.ArticleBasicVO;
 import com.tzld.videoVector.model.vo.recall.DeconstructPointsVO;
 import com.tzld.videoVector.model.vo.recall.ArticleDetailVO;
 import com.tzld.videoVector.model.vo.recall.MaterialBasicVO;
@@ -39,6 +41,7 @@ import com.tzld.videoVector.service.recall.VectorRecallTestService;
 import com.tzld.videoVector.util.Md5Util;
 import com.tzld.videoVector.util.RedisUtils;
 import com.tzld.videoVector.util.VectorUtils;
+import com.tzld.videoVector.model.po.pgVector.ArticleVector;
 import com.tzld.videoVector.model.po.pgVector.MaterialVector;
 import lombok.extern.slf4j.Slf4j;
 import org.springframework.beans.factory.annotation.Autowired;
@@ -663,6 +666,8 @@ public class VectorRecallTestServiceImpl implements VectorRecallTestService {
                 detail.setSummary(basic.summary);
                 detail.setCover(basic.cover);
                 detail.setImages(basic.images);
+                detail.setUrl(basic.url);
+                detail.setSource(basic.source);
             }
             detail.setDeconstruct(deconstructFlat);
             vo.setArticleDetail(detail);
@@ -738,6 +743,18 @@ public class VectorRecallTestServiceImpl implements VectorRecallTestService {
             }
         }
 
+        String url = targetPost.getString("url");
+        if (!StringUtils.hasText(url)) {
+            url = targetPost.getString("source_url");
+        }
+        meta.url = url;
+
+        String source = targetPost.getString("source");
+        if (!StringUtils.hasText(source)) {
+            source = targetPost.getString("account_name");
+        }
+        meta.source = source;
+
         if (!StringUtils.hasText(meta.title)) {
             return null;
         }
@@ -1441,6 +1458,234 @@ public class VectorRecallTestServiceImpl implements VectorRecallTestService {
         return vo;
     }
 
+    // ====================================================================
+    // articleDetail — 长文详情预览
+    // ====================================================================
+
+    @Override
+    public ArticleBasicVO getArticleDetail(String articleId) {
+        if (!StringUtils.hasText(articleId)) {
+            return null;
+        }
+        String trimmed = articleId.trim();
+        Map<String, ArticleDeconstructResult> rows = loadArticleDeconstructRows(Collections.singletonList(trimmed));
+        ArticleDeconstructResult row = rows.get(trimmed);
+        if (row == null) {
+            log.info("getArticleDetail: articleId={} 不存在", trimmed);
+            return null;
+        }
+
+        JSONObject raw = parseArticleResultJson(row);
+        ArticleBasicMeta basic = raw != null ? extractArticleBasicMeta(raw) : null;
+
+        ArticleBasicVO vo = new ArticleBasicVO();
+        vo.setArticleId(trimmed);
+        if (basic != null) {
+            vo.setTitle(basic.title);
+            vo.setSummary(basic.summary);
+            vo.setCover(basic.cover);
+            vo.setUrl(basic.url);
+            vo.setSource(basic.source);
+        }
+        return vo;
+    }
+
+    // ====================================================================
+    // matchByArticleId — 长文ID跨模态召回
+    // ====================================================================
+
+    @Override
+    public RecallResultVO matchByArticleId(MatchByArticleIdParam param) {
+        RecallResultVO empty = emptyResult();
+        if (param == null || !StringUtils.hasText(param.getArticleId())) {
+            log.info("matchByArticleId: articleId 为空");
+            return empty;
+        }
+
+        String articleId = param.getArticleId().trim();
+        int topN = param.getTopN() != null && param.getTopN() > 0 ? param.getTopN() : 50;
+
+        List<String> configCodes;
+        if (StringUtils.hasText(param.getConfigCode())) {
+            configCodes = Collections.singletonList(param.getConfigCode().trim());
+        } else {
+            configCodes = articleVectorStoreService.getDistinctConfigCodes(articleId);
+            if (configCodes.isEmpty()) {
+                log.info("matchByArticleId: articleId={} 无向量数据", articleId);
+                return empty;
+            }
+        }
+        log.info("matchByArticleId: articleId={}, topN={}, configCodes={}", articleId, topN, configCodes);
+
+        // 收集所有有效向量点(支持多点模式)
+        List<MaterialVectorQuery> vectorQueries = new ArrayList<>();
+        for (String configCode : configCodes) {
+            List<ArticleVector> vectors = articleVectorStoreService.getVectorsByArticleId(articleId, configCode);
+            for (ArticleVector vector : vectors) {
+                if (vector != null && StringUtils.hasText(vector.getEmbedding())) {
+                    vectorQueries.add(new MaterialVectorQuery(configCode, vector.getEmbedding()));
+                }
+            }
+        }
+        if (vectorQueries.isEmpty()) {
+            log.info("matchByArticleId: articleId={} 无有效向量 embedding", articleId);
+            return empty;
+        }
+
+        int candidateTopN = Math.max(topN * VectorConstants.MULTI_POINT_RECALL_CANDIDATE_FACTOR,
+                VectorConstants.MULTI_POINT_RECALL_MIN_CANDIDATES);
+
+        List<VideoMatchEnrichedVO> allResults = Collections.synchronizedList(new ArrayList<>());
+        List<CompletableFuture<Void>> allFutures = new ArrayList<>();
+
+        for (MaterialVectorQuery query : vectorQueries) {
+            final String cc = query.configCode;
+            final String rawEmbedding = query.rawEmbedding;
+            final int ctn = candidateTopN;
+            final int tn = topN;
+
+            allFutures.add(CompletableFuture.runAsync(() -> {
+                try {
+                    List<VideoMatch> matches = vectorStoreService.searchTopNByRawVector(cc, rawEmbedding, ctn);
+                    List<VideoMatch> deduped = deduplicateVideoMatches(matches, tn);
+                    log.info("matchByArticleId 视频搜索 cc={}: {} 条, 去重后 {} 条",
+                            cc, matches != null ? matches.size() : 0, deduped.size());
+                    if (!deduped.isEmpty()) {
+                        List<VideoMatchResult> videoResults = toVideoMatchResults(deduped, cc);
+                        populateVideoMatchResultDetails(videoResults);
+                        allResults.addAll(enrichVideoMatches(videoResults, cc));
+                    }
+                } catch (Exception e) {
+                    log.error("matchByArticleId 视频搜索失败 configCode={}: {}", cc, e.getMessage(), e);
+                }
+            }, RECALL_EXECUTOR));
+
+            allFutures.add(CompletableFuture.runAsync(() -> {
+                try {
+                    List<MaterialMatch> matches = materialVectorStoreService.searchTopNByRawVector(cc, rawEmbedding, ctn);
+                    List<MaterialMatch> deduped = deduplicateMaterialMatches(matches, tn);
+                    if (!deduped.isEmpty()) {
+                        allResults.addAll(enrichMaterialMatches(deduped, cc));
+                    }
+                } catch (Exception e) {
+                    log.error("matchByArticleId 素材搜索失败 configCode={}: {}", cc, e.getMessage(), e);
+                }
+            }, RECALL_EXECUTOR));
+
+            allFutures.add(CompletableFuture.runAsync(() -> {
+                try {
+                    List<ArticleMatch> matches = articleVectorStoreService.searchTopNByRawVector(cc, rawEmbedding, ctn);
+                    matches = matches.stream()
+                            .filter(m -> !articleId.equals(m.getArticleId()))
+                            .collect(Collectors.toList());
+                    List<ArticleMatch> deduped = deduplicateArticleMatches(matches, tn);
+                    if (!deduped.isEmpty()) {
+                        allResults.addAll(enrichArticleMatches(deduped, cc));
+                    }
+                } catch (Exception e) {
+                    log.error("matchByArticleId 文章搜索失败 configCode={}: {}", cc, e.getMessage(), e);
+                }
+            }, RECALL_EXECUTOR));
+        }
+
+        for (CompletableFuture<Void> future : allFutures) {
+            try {
+                future.get(30, TimeUnit.SECONDS);
+            } catch (Exception e) {
+                log.error("matchByArticleId 并行搜索等待异常: {}", e.getMessage(), e);
+            }
+        }
+
+        List<VideoMatchEnrichedVO> merged = deduplicateCrossConfigCode(allResults);
+
+        List<VideoMatchEnrichedVO> videoItems = merged.stream()
+                .filter(it -> it.getModality() == Modality.VIDEO)
+                .sorted(Comparator.comparing(VideoMatchEnrichedVO::getScore,
+                        Comparator.nullsLast(Comparator.reverseOrder())))
+                .limit(topN)
+                .collect(Collectors.toList());
+        List<VideoMatchEnrichedVO> materialItems = merged.stream()
+                .filter(it -> it.getModality() == Modality.MATERIAL)
+                .sorted(Comparator.comparing(VideoMatchEnrichedVO::getScore,
+                        Comparator.nullsLast(Comparator.reverseOrder())))
+                .limit(topN)
+                .collect(Collectors.toList());
+        List<VideoMatchEnrichedVO> articleItems = merged.stream()
+                .filter(it -> it.getModality() == Modality.ARTICLE)
+                .sorted(Comparator.comparing(VideoMatchEnrichedVO::getScore,
+                        Comparator.nullsLast(Comparator.reverseOrder())))
+                .limit(topN)
+                .collect(Collectors.toList());
+
+        log.info("matchByArticleId 按模态截断后: video={}, material={}, article={}",
+                videoItems.size(), materialItems.size(), articleItems.size());
+
+        String selfConfigCode = StringUtils.hasText(param.getConfigCode())
+                ? param.getConfigCode().trim()
+                : vectorQueries.get(0).configCode;
+        ensureSelfArticleInResults(articleItems, articleId, selfConfigCode, topN);
+
+        RecallResultVO result = buildResult(videoItems, materialItems, articleItems);
+        log.info("matchByArticleId 完成: total={}, video={}, material={}, article={}",
+                result.getTotal(), result.getVideoCount(), result.getMaterialCount(), result.getArticleCount());
+        return result;
+    }
+
+    /**
+     * 构建输入长文自身的 enriched 条目(score=1.0)
+     */
+    private VideoMatchEnrichedVO enrichSelfArticle(String articleId, String configCode) {
+        Map<String, ArticleDeconstructResult> rows = loadArticleDeconstructRows(Collections.singletonList(articleId));
+        ArticleDeconstructResult row = rows.get(articleId);
+
+        JSONObject raw = row != null ? parseArticleResultJson(row) : null;
+        ArticleBasicMeta basic = raw != null ? extractArticleBasicMeta(raw) : null;
+        Map<String, Object> deconstructFlat = raw != null ? buildDeconstructFromRaw(raw) : null;
+
+        VideoMatchEnrichedVO vo = new VideoMatchEnrichedVO();
+        vo.setArticleId(articleId);
+        vo.setModality(Modality.ARTICLE);
+        vo.setConfigCode(StringUtils.hasText(configCode) ? configCode : null);
+        vo.setScore(1.0);
+
+        if (basic != null) {
+            vo.setTitle(basic.title);
+            vo.setCover(basic.cover);
+            if (basic.images != null && !basic.images.isEmpty()) {
+                vo.setImageList(basic.images);
+            }
+        }
+
+        ArticleDetailVO detail = new ArticleDetailVO();
+        if (basic != null) {
+            detail.setTitle(basic.title);
+            detail.setSummary(basic.summary);
+            detail.setCover(basic.cover);
+            detail.setImages(basic.images);
+            detail.setUrl(basic.url);
+            detail.setSource(basic.source);
+        }
+        detail.setDeconstruct(deconstructFlat);
+        vo.setArticleDetail(detail);
+
+        applyCompatibilityFields(vo);
+        return vo;
+    }
+
+    private void ensureSelfArticleInResults(List<VideoMatchEnrichedVO> articleItems,
+                                             String articleId, String configCode, int topN) {
+        boolean exists = articleItems.stream()
+                .anyMatch(it -> articleId.equals(it.getArticleId()));
+        if (exists) {
+            return;
+        }
+        VideoMatchEnrichedVO self = enrichSelfArticle(articleId, configCode);
+        articleItems.add(0, self);
+        if (articleItems.size() > topN) {
+            articleItems.remove(articleItems.size() - 1);
+        }
+    }
+
     @Override
     public DeconstructPointsVO getDeconstructPoints(Long videoId) {
         if (videoId == null || videoId <= 0L) {
@@ -1598,6 +1843,7 @@ public class VectorRecallTestServiceImpl implements VectorRecallTestService {
         List<String> tags;
         String cover;
         String url;
+        String source;
         List<String> images;
     }
 }

+ 23 - 0
core/src/main/resources/mapper/pgVector/ext/ArticleVectorMapperExt.xml

@@ -119,4 +119,27 @@
           AND point_index >= #{minPointIndex}
     </delete>
 
+    <select id="selectDistinctConfigCodesByArticleId" resultType="java.lang.String">
+        SELECT DISTINCT config_code
+        FROM article_vectors
+        WHERE article_id = #{articleId}
+    </select>
+
+    <select id="selectVectorsByArticleIdAndConfigCode" resultMap="ArticleVectorResultMap">
+        SELECT
+            id,
+            article_id,
+            config_code,
+            embedding::text AS embedding,
+            created_at,
+            updated_at,
+            point_index,
+            text,
+            text_hash
+        FROM article_vectors
+        WHERE article_id = #{articleId}
+          AND config_code = #{configCode}
+        ORDER BY point_index ASC
+    </select>
+
 </mapper>

+ 20 - 0
server/src/main/java/com/tzld/videoVector/controller/VectorRecallTestController.java

@@ -1,10 +1,12 @@
 package com.tzld.videoVector.controller;
 
 import com.tzld.videoVector.common.base.CommonResponse;
+import com.tzld.videoVector.model.param.recall.MatchByArticleIdParam;
 import com.tzld.videoVector.model.param.recall.MatchByMaterialIdParam;
 import com.tzld.videoVector.model.param.recall.MatchByTextParam;
 import com.tzld.videoVector.model.param.recall.MatchByVideoIdParam;
 import com.tzld.videoVector.model.vo.recall.AIUnderstandingVO;
+import com.tzld.videoVector.model.vo.recall.ArticleBasicVO;
 import com.tzld.videoVector.model.vo.recall.DeconstructPointsVO;
 import com.tzld.videoVector.model.vo.recall.MaterialBasicVO;
 import com.tzld.videoVector.model.vo.recall.RecallResultVO;
@@ -98,4 +100,22 @@ public class VectorRecallTestController {
     public CommonResponse<MaterialBasicVO> materialDetail(@RequestParam("materialId") String materialId) {
         return CommonResponse.success(vectorRecallTestService.getMaterialDetail(materialId));
     }
+
+    /**
+     * 长文ID召回 (长文相似度召回 Tab)
+     * POST /videoVector/recallTest/matchByArticleId
+     */
+    @PostMapping("/matchByArticleId")
+    public CommonResponse<RecallResultVO> matchByArticleId(@RequestBody MatchByArticleIdParam param) {
+        return CommonResponse.success(vectorRecallTestService.matchByArticleId(param));
+    }
+
+    /**
+     * 长文详情预览 (长文相似度召回 Tab — 输入防抖预览)
+     * GET /videoVector/recallTest/articleDetail?articleId=xxx
+     */
+    @GetMapping("/articleDetail")
+    public CommonResponse<ArticleBasicVO> articleDetail(@RequestParam("articleId") String articleId) {
+        return CommonResponse.success(vectorRecallTestService.getArticleDetail(articleId));
+    }
 }