Просмотр исходного кода

Merge branch 'feature/luojunhui/20260526-add-daily' of Server/video-vector-server into master

luojunhui 1 неделя назад
Родитель
Сommit
8ef0d24ed3
19 измененных файлов с 1943 добавлено и 346 удалено
  1. 24 0
      core/src/main/java/com/tzld/videoVector/dao/mapper/pgVector/ext/ArticleDeconstructResultMapperExt.java
  2. 44 0
      core/src/main/java/com/tzld/videoVector/dao/mapper/pgVector/ext/ArticleVectorMapperExt.java
  3. 430 0
      core/src/main/java/com/tzld/videoVector/job/ArticleVectorJob.java
  4. 38 305
      core/src/main/java/com/tzld/videoVector/job/MaterialVectorJob.java
  5. 84 0
      core/src/main/java/com/tzld/videoVector/model/entity/ArticleMatch.java
  6. 3 0
      core/src/main/java/com/tzld/videoVector/model/param/recall/MatchByTextParam.java
  7. 66 0
      core/src/main/java/com/tzld/videoVector/model/po/pgVector/ArticleDeconstructResult.java
  8. 104 0
      core/src/main/java/com/tzld/videoVector/model/po/pgVector/ArticleVector.java
  9. 42 0
      core/src/main/java/com/tzld/videoVector/model/vo/recall/ArticleDetailVO.java
  10. 6 0
      core/src/main/java/com/tzld/videoVector/model/vo/recall/VideoMatchEnrichedVO.java
  11. 42 0
      core/src/main/java/com/tzld/videoVector/service/ArticleVectorStoreService.java
  12. 245 0
      core/src/main/java/com/tzld/videoVector/service/impl/PgArticleVectorStoreServiceImpl.java
  13. 3 20
      core/src/main/java/com/tzld/videoVector/service/impl/PgMaterialVectorStoreServiceImpl.java
  14. 307 21
      core/src/main/java/com/tzld/videoVector/service/recall/impl/VectorRecallTestServiceImpl.java
  15. 237 0
      core/src/main/java/com/tzld/videoVector/util/DeconstructTextExtractor.java
  16. 72 0
      core/src/main/java/com/tzld/videoVector/util/VectorUtils.java
  17. 51 0
      core/src/main/resources/mapper/pgVector/ext/ArticleDeconstructResultMapperExt.xml
  18. 122 0
      core/src/main/resources/mapper/pgVector/ext/ArticleVectorMapperExt.xml
  19. 23 0
      server/src/main/java/com/tzld/videoVector/controller/XxlJobController.java

+ 24 - 0
core/src/main/java/com/tzld/videoVector/dao/mapper/pgVector/ext/ArticleDeconstructResultMapperExt.java

@@ -0,0 +1,24 @@
+package com.tzld.videoVector.dao.mapper.pgVector.ext;
+
+import com.tzld.videoVector.model.po.pgVector.ArticleDeconstructResult;
+import org.apache.ibatis.annotations.Param;
+
+import java.util.List;
+
+/**
+ * ArticleDeconstructResult 自定义 Mapper(对称 MaterialDeconstructResultMapperExt)
+ */
+public interface ArticleDeconstructResultMapperExt {
+
+    List<String> selectExistingArticleIds(@Param("source") String source,
+                                           @Param("articleIds") List<String> articleIds);
+
+    int batchInsertIgnore(@Param("list") List<ArticleDeconstructResult> list);
+
+    List<String> selectArticleIdsBySourcePaged(@Param("source") String source,
+                                                @Param("offset") int offset,
+                                                @Param("limit") int limit);
+
+    List<ArticleDeconstructResult> selectResultsByArticleIds(@Param("source") String source,
+                                                               @Param("articleIds") List<String> articleIds);
+}

+ 44 - 0
core/src/main/java/com/tzld/videoVector/dao/mapper/pgVector/ext/ArticleVectorMapperExt.java

@@ -0,0 +1,44 @@
+package com.tzld.videoVector.dao.mapper.pgVector.ext;
+
+import com.tzld.videoVector.model.po.pgVector.ArticleVector;
+import org.apache.ibatis.annotations.Param;
+
+import java.util.List;
+
+/**
+ * ArticleVector 自定义 Mapper(对称 MaterialVectorMapperExt)
+ */
+public interface ArticleVectorMapperExt {
+
+    int upsertVector(@Param("articleId") String articleId,
+                     @Param("configCode") String configCode,
+                     @Param("pointIndex") int pointIndex,
+                     @Param("embedding") String embedding,
+                     @Param("text") String text,
+                     @Param("textHash") String textHash);
+
+    int existsByArticleIdAndConfigCode(@Param("articleId") String articleId,
+                                        @Param("configCode") String configCode);
+
+    List<String> selectExistingArticleIds(@Param("articleIds") List<String> articleIds,
+                                           @Param("configCode") String configCode);
+
+    List<String> selectAllArticleIds(@Param("configCode") String configCode);
+
+    List<ArticleVector> searchTopN(@Param("configCode") String configCode,
+                                    @Param("queryVector") String queryVector,
+                                    @Param("topN") int topN);
+
+    ArticleVector selectByTextHashAndConfigCode(@Param("textHash") String textHash,
+                                                  @Param("configCode") String configCode);
+
+    int deleteByArticleIdAndConfigCode(@Param("articleId") String articleId,
+                                        @Param("configCode") String configCode);
+
+    int deleteBatchByArticleIds(@Param("articleIds") List<String> articleIds,
+                                 @Param("configCode") String configCode);
+
+    int deleteAbovePointIndex(@Param("articleId") String articleId,
+                               @Param("configCode") String configCode,
+                               @Param("minPointIndex") int minPointIndex);
+}

+ 430 - 0
core/src/main/java/com/tzld/videoVector/job/ArticleVectorJob.java

@@ -0,0 +1,430 @@
+package com.tzld.videoVector.job;
+
+import com.alibaba.fastjson.JSON;
+
+import com.alibaba.fastjson.JSONObject;
+import com.google.common.collect.Lists;
+import com.tzld.videoVector.api.AigcApiService;
+import com.tzld.videoVector.common.constant.VectorConstants;
+import com.tzld.videoVector.dao.mapper.pgVector.DeconstructVectorConfigMapper;
+import com.tzld.videoVector.dao.mapper.pgVector.ext.ArticleDeconstructResultMapperExt;
+import com.tzld.videoVector.model.po.pgVector.ArticleDeconstructResult;
+import com.tzld.videoVector.model.po.pgVector.DeconstructVectorConfig;
+import com.tzld.videoVector.model.po.pgVector.DeconstructVectorConfigExample;
+import com.tzld.videoVector.service.ArticleVectorStoreService;
+import com.tzld.videoVector.service.EmbeddingService;
+import com.tzld.videoVector.util.DeconstructTextExtractor;
+import com.tzld.videoVector.util.Md5Util;
+import com.tzld.videoVector.util.VectorUtils;
+import com.xxl.job.core.biz.model.ReturnT;
+import com.xxl.job.core.handler.annotation.XxlJob;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.stereotype.Component;
+import org.springframework.util.CollectionUtils;
+import org.springframework.util.StringUtils;
+
+import javax.annotation.Resource;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.stream.Collectors;
+
+/**
+ * 文章向量化定时任务(对称 MaterialVectorJob)
+ * <p>
+ * 数据流:
+ * <ul>
+ *   <li>{@link #syncArticleDeconstructJob(String)}:从 AIGC API 拉取文章解构结果,写入 article_deconstruct_result</li>
+ *   <li>{@link #vectorArticleJob(String)}:扫描 article_deconstruct_result,按配置提取文本并向量化,写入 article_vectors</li>
+ *   <li>{@link #articleJob(String)}:编排前两步串行执行</li>
+ * </ul>
+ */
+@Slf4j
+@Component
+public class ArticleVectorJob {
+
+    private static final String SOURCE_AIGC = "aigc_deconstruct";
+
+    @Resource
+    private DeconstructVectorConfigMapper vectorConfigMapper;
+
+    @Resource
+    private ArticleDeconstructResultMapperExt articleDeconstructResultMapperExt;
+
+    @Resource
+    private ArticleVectorStoreService articleVectorStoreService;
+
+    @Resource
+    private EmbeddingService embeddingService;
+
+    @Resource
+    private AigcApiService aigcApiService;
+
+    /**
+     * 文章 AIGC 任务 ID
+     */
+    @Value("${aigc.article.task.id:66}")
+    private int articleTaskId;
+
+    // ====================================================================
+    // 入口 1:同步文章解构结果
+    // ====================================================================
+
+    @XxlJob("syncArticleDeconstructJob")
+    public ReturnT<String> syncArticleDeconstructJob(String param) {
+        log.info("开始执行文章解构同步任务, param: {}", param);
+        try {
+            AtomicInteger insertCount = new AtomicInteger(0);
+            AtomicInteger skipCount = new AtomicInteger(0);
+            syncAigcArticleSource(insertCount, skipCount);
+            log.info("文章解构同步完成 新增={}, 已存在跳过={}", insertCount.get(), skipCount.get());
+            return ReturnT.SUCCESS;
+        } catch (Exception e) {
+            log.error("文章解构同步任务失败: {}", e.getMessage(), e);
+            return new ReturnT<>(ReturnT.FAIL_CODE, "任务执行失败: " + e.getMessage());
+        }
+    }
+
+    private void syncAigcArticleSource(AtomicInteger insertCount, AtomicInteger skipCount) {
+        log.info("开始从 AIGC taskId={} 拉取文章数据", articleTaskId);
+
+        List<AigcApiService.AigcTaskInput> taskInputList = aigcApiService.getTaskInputList(articleTaskId);
+        if (CollectionUtils.isEmpty(taskInputList)) {
+            log.info("AIGC taskId={} 无文章数据", articleTaskId);
+            return;
+        }
+
+        Map<String, Long> articleIdToTaskInstanceId = new HashMap<>();
+        for (AigcApiService.AigcTaskInput input : taskInputList) {
+            String articleId = normalizeArticleId(input.getBizUniqueId());
+            if (articleId == null) {
+                log.info("跳过空 bizUniqueId, taskId={}", articleTaskId);
+                continue;
+            }
+            articleIdToTaskInstanceId.put(articleId, input.getTaskInstanceId());
+        }
+        log.info("taskId={} 拉到 {} 篇文章", articleTaskId, articleIdToTaskInstanceId.size());
+
+        if (articleIdToTaskInstanceId.isEmpty()) {
+            log.info("AIGC 任务无有效文章数据");
+            return;
+        }
+
+        List<String> allArticleIds = new ArrayList<>(articleIdToTaskInstanceId.keySet());
+        for (List<String> batchIds : Lists.partition(allArticleIds, VectorConstants.ODPS_IN_BATCH_SIZE)) {
+            Set<String> existingIds = new HashSet<>(
+                    articleDeconstructResultMapperExt.selectExistingArticleIds(SOURCE_AIGC, batchIds));
+            skipCount.addAndGet(existingIds.size());
+
+            List<String> needSyncIds = batchIds.stream()
+                    .filter(id -> !existingIds.contains(id))
+                    .collect(Collectors.toList());
+
+            if (needSyncIds.isEmpty()) {
+                continue;
+            }
+
+            ExecutorService executor = Executors.newFixedThreadPool(VectorConstants.AIGC_DETAIL_PARALLELISM);
+            List<ArticleDeconstructResult> batch = Collections.synchronizedList(new ArrayList<>());
+
+            for (String articleId : needSyncIds) {
+                executor.submit(() -> {
+                    try {
+                        Long taskInstanceId = articleIdToTaskInstanceId.get(articleId);
+                        if (taskInstanceId == null) return;
+                        JSONObject dataContent = aigcApiService.getTaskCallbackDetail(taskInstanceId);
+                        if (dataContent != null) {
+                            ArticleDeconstructResult r = new ArticleDeconstructResult();
+                            r.setArticleId(articleId);
+                            r.setSource(SOURCE_AIGC);
+                            r.setResult(dataContent.toJSONString());
+                            batch.add(r);
+                        }
+                    } catch (Exception e) {
+                        log.error("同步 articleId={} 失败: {}", articleId, e.getMessage());
+                    }
+                });
+            }
+            VectorUtils.awaitAndShutdown(executor, 30, "文章同步");
+
+            if (!batch.isEmpty()) {
+                for (List<ArticleDeconstructResult> subBatch : Lists.partition(batch, 200)) {
+                    insertCount.addAndGet(articleDeconstructResultMapperExt.batchInsertIgnore(subBatch));
+                }
+            }
+        }
+    }
+
+    // ====================================================================
+    // 入口 2:文章向量化
+    // ====================================================================
+
+    @XxlJob("vectorArticleJob")
+    public ReturnT<String> vectorArticleJob(String param) {
+        log.info("开始执行文章向量化任务, param: {}", param);
+        Integer maxArticleCount = VectorUtils.parseMaxCount(param);
+        return doVectorize(maxArticleCount);
+    }
+
+    private ReturnT<String> doVectorize(Integer maxArticleCount) {
+        try {
+            List<DeconstructVectorConfig> configs = getEnabledConfigsBySourceField(SOURCE_AIGC);
+            if (CollectionUtils.isEmpty(configs)) {
+                log.info("未找到 source_field={} 的向量化配置", SOURCE_AIGC);
+                return ReturnT.SUCCESS;
+            }
+            log.info("加载 {} 个文章向量化配置: {}", configs.size(),
+                    configs.stream().map(DeconstructVectorConfig::getConfigCode).collect(Collectors.toList()));
+
+            AtomicInteger totalSuccessCount = new AtomicInteger(0);
+            AtomicInteger totalFailCount = new AtomicInteger(0);
+            AtomicInteger totalProcessed = new AtomicInteger(0);
+            int pageNum = 0;
+
+            while (true) {
+                int offset = pageNum * VectorConstants.PAGE_SIZE;
+                int limit = VectorConstants.PAGE_SIZE;
+                if (maxArticleCount != null && maxArticleCount > 0) {
+                    int remaining = maxArticleCount - totalProcessed.get();
+                    if (remaining <= 0) break;
+                    limit = Math.min(limit, remaining);
+                }
+
+                List<String> articleIds = articleDeconstructResultMapperExt
+                        .selectArticleIdsBySourcePaged(SOURCE_AIGC, offset, limit);
+                if (CollectionUtils.isEmpty(articleIds)) {
+                    log.info("第 {} 页没有查询到数据,分页查询结束", pageNum);
+                    break;
+                }
+                log.info("第 {} 页查询到 {} 个 articleId", pageNum, articleIds.size());
+
+                Map<String, ParsedArticle> parsedById = loadParsedArticles(articleIds);
+
+                ExecutorService configExecutor = Executors.newFixedThreadPool(configs.size());
+            for (DeconstructVectorConfig config : configs) {
+                configExecutor.submit(() ->
+                        processConfigForArticle(config, articleIds, parsedById, totalSuccessCount, totalFailCount)
+                );
+            }
+            VectorUtils.awaitAndShutdown(configExecutor, 30, "文章向量化配置并发");
+
+                totalProcessed.addAndGet(articleIds.size());
+
+                if (maxArticleCount != null && maxArticleCount > 0
+                        && totalProcessed.get() >= maxArticleCount) {
+                    log.info("已达到 maxArticleCount={} 限制,结束扫描", maxArticleCount);
+                    break;
+                }
+
+                if (articleIds.size() < limit) {
+                    log.info("第 {} 页数据量 {} 小于 limit {},分页结束", pageNum, articleIds.size(), limit);
+                    break;
+                }
+                pageNum++;
+            }
+
+            log.info("文章向量化任务完成 总处理文章={}, 成功={}, 失败={}",
+                    totalProcessed.get(), totalSuccessCount.get(), totalFailCount.get());
+            return ReturnT.SUCCESS;
+        } catch (Exception e) {
+            log.error("文章向量化任务失败: {}", e.getMessage(), e);
+            return new ReturnT<>(ReturnT.FAIL_CODE, "任务执行失败: " + e.getMessage());
+        }
+    }
+
+    private Map<String, ParsedArticle> loadParsedArticles(List<String> articleIds) {
+        List<ArticleDeconstructResult> results = articleDeconstructResultMapperExt
+                .selectResultsByArticleIds(SOURCE_AIGC, articleIds);
+        Map<String, ParsedArticle> map = new HashMap<>(articleIds.size());
+        for (ArticleDeconstructResult r : results) {
+            if (r == null || !StringUtils.hasText(r.getResult())) continue;
+            JSONObject dataContent;
+            try {
+                dataContent = JSON.parseObject(r.getResult());
+            } catch (Exception e) {
+                log.error("articleId={} result JSON 解析失败: {}", r.getArticleId(), e.getMessage());
+                continue;
+            }
+            if (dataContent == null) continue;
+            map.put(r.getArticleId(), new ParsedArticle(dataContent));
+        }
+        return map;
+    }
+
+    private void processConfigForArticle(DeconstructVectorConfig config, List<String> articleIds,
+                                          Map<String, ParsedArticle> parsedById,
+                                          AtomicInteger totalSuccessCount, AtomicInteger totalFailCount) {
+        String configCode = config.getConfigCode();
+        try {
+            Set<String> existingIds = articleVectorStoreService.existsByIds(configCode, articleIds);
+            List<String> needProcessIds = articleIds.stream()
+                    .filter(id -> !existingIds.contains(id))
+                    .collect(Collectors.toList());
+            if (needProcessIds.isEmpty()) {
+                log.info("配置 {} 下所有文章已有向量,跳过", configCode);
+                return;
+            }
+            log.info("配置 {} 需要处理 {} 篇文章", configCode, needProcessIds.size());
+
+            for (String articleId : needProcessIds) {
+                ParsedArticle parsed = parsedById.get(articleId);
+                if (parsed == null) {
+                    log.info("articleId={} 配置 {} 无解构结果,跳过", articleId, configCode);
+                    totalFailCount.incrementAndGet();
+                    continue;
+                }
+                try {
+                    List<String> texts = DeconstructTextExtractor.extractTextsFromDataContent(parsed.dataContent, config);
+                    if (CollectionUtils.isEmpty(texts)) {
+                        log.info("articleId={} 配置 {} 未提取到文本,跳过", articleId, configCode);
+                        totalFailCount.incrementAndGet();
+                        continue;
+                    }
+                    boolean ok = vectorizeAndStoreArticle(config, articleId, texts);
+                    if (ok) {
+                        totalSuccessCount.incrementAndGet();
+                    } else {
+                        totalFailCount.incrementAndGet();
+                    }
+                } catch (Exception e) {
+                    log.error("处理 articleId={} 配置 {} 时发生异常: {}", articleId, configCode, e.getMessage(), e);
+                    totalFailCount.incrementAndGet();
+                }
+            }
+        } catch (Exception e) {
+            log.error("配置 {} 处理异常: {}", configCode, e.getMessage(), e);
+        }
+    }
+
+    private boolean vectorizeAndStoreArticle(DeconstructVectorConfig config, String articleId,
+                                              List<String> texts) {
+        String configCode = config.getConfigCode();
+        Integer maxLength = config.getMaxLength();
+        boolean multiPoint = VectorUtils.isMultiPointConfig(config);
+
+        if (multiPoint) {
+            // 预清理旧向量,防止上一轮 partial write 留下残缺数据导致 existsByIds 误判已完成
+            articleVectorStoreService.deleteAbovePointIndex(configCode, articleId, 0);
+
+            List<String> validTexts = new ArrayList<>(texts.size());
+            for (String raw : texts) {
+                if (StringUtils.hasText(raw)) validTexts.add(raw);
+            }
+            if (validTexts.isEmpty()) {
+                log.info("articleId={} 配置 {} 无有效文本", articleId, configCode);
+                return false;
+            }
+            List<List<Float>> vectors = new ArrayList<>(validTexts.size());
+            List<String> truncated = new ArrayList<>(validTexts.size());
+            for (int i = 0; i < validTexts.size(); i++) {
+                String text = validTexts.get(i);
+                if (maxLength != null && maxLength > 0 && text.length() > maxLength) {
+                    text = text.substring(0, maxLength);
+                }
+                List<Float> vector = getOrEmbed(text, config);
+                if (vector == null || vector.isEmpty()) {
+                    log.error("articleId={} 配置 {} 第{}个文本向量化失败,本文章本轮放弃",
+                            articleId, configCode, i);
+                    return false;
+                }
+                vectors.add(vector);
+                truncated.add(text);
+            }
+            for (int i = 0; i < vectors.size(); i++) {
+                if (!articleVectorStoreService.save(configCode, articleId, i, vectors.get(i), truncated.get(i))) {
+                    log.error("articleId={} 配置 {} 第{}个点 save 返回 false", articleId, configCode, i);
+                    return false;
+                }
+            }
+            articleVectorStoreService.deleteAbovePointIndex(configCode, articleId, vectors.size());
+            log.debug("articleId={} 配置 {} 多点向量化存储成功,共 {} 个点", articleId, configCode, vectors.size());
+            return true;
+        } else {
+            String text = null;
+            for (String t : texts) {
+                if (StringUtils.hasText(t)) {
+                    text = t;
+                    break;
+                }
+            }
+            if (text == null) {
+                log.info("articleId={} 配置 {} 无有效文本,跳过", articleId, configCode);
+                return false;
+            }
+            if (maxLength != null && maxLength > 0 && text.length() > maxLength) {
+                text = text.substring(0, maxLength);
+            }
+            List<Float> vector = getOrEmbed(text, config);
+            if (vector == null || vector.isEmpty()) {
+                log.error("articleId={} 配置 {} 文本向量化失败", articleId, configCode);
+                return false;
+            }
+            if (!articleVectorStoreService.save(configCode, articleId, vector, text)) {
+                log.error("articleId={} 配置 {} save 返回 false", articleId, configCode);
+                return false;
+            }
+            log.debug("articleId={} 配置 {} 向量化存储成功", articleId, configCode);
+            return true;
+        }
+    }
+
+    private List<Float> getOrEmbed(String text, DeconstructVectorConfig config) {
+        String configCode = config.getConfigCode();
+        String textHash = Md5Util.encoderByMd5(text);
+        if (StringUtils.hasText(textHash)) {
+            List<Float> cached = articleVectorStoreService.getVectorByTextHash(textHash, configCode);
+            if (cached != null && !cached.isEmpty()) {
+                log.debug("命中 text_hash 缓存(article),hash={}, configCode={}", textHash, configCode);
+                return cached;
+            }
+        }
+        return embeddingService.embed(text, config);
+    }
+
+    // ====================================================================
+    // 入口 3:编排
+    // ====================================================================
+
+    @XxlJob("articleJob")
+    public ReturnT<String> articleJob(String param) {
+        log.info("开始执行文章完整链路, param: {}", param);
+        ReturnT<String> syncResult = syncArticleDeconstructJob(param);
+        if (syncResult.getCode() != ReturnT.SUCCESS_CODE) {
+            log.error("文章同步阶段失败: {}", syncResult.getMsg());
+            return syncResult;
+        }
+        return vectorArticleJob(param);
+    }
+
+    private List<DeconstructVectorConfig> getEnabledConfigsBySourceField(String sourceField) {
+        DeconstructVectorConfigExample example = new DeconstructVectorConfigExample();
+        example.createCriteria()
+                .andEnabledEqualTo((short) 1)
+                .andSourceFieldEqualTo(sourceField);
+        example.setOrderByClause("priority ASC");
+        return vectorConfigMapper.selectByExample(example);
+    }
+
+    private String normalizeArticleId(String bizUniqueId) {
+        if (!StringUtils.hasText(bizUniqueId)) {
+            return null;
+        }
+        return bizUniqueId.trim();
+    }
+
+    private static final class ParsedArticle {
+        final JSONObject dataContent;
+
+        ParsedArticle(JSONObject dataContent) {
+            this.dataContent = dataContent;
+        }
+    }
+}

+ 38 - 305
core/src/main/java/com/tzld/videoVector/job/MaterialVectorJob.java

@@ -1,7 +1,7 @@
 package com.tzld.videoVector.job;
 
 import com.alibaba.fastjson.JSON;
-import com.alibaba.fastjson.JSONArray;
+
 import com.alibaba.fastjson.JSONObject;
 import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
 import com.google.common.collect.Lists;
@@ -14,6 +14,7 @@ import com.tzld.videoVector.model.po.pgVector.DeconstructVectorConfigExample;
 import com.tzld.videoVector.model.po.pgVector.MaterialDeconstructResult;
 import com.tzld.videoVector.service.EmbeddingService;
 import com.tzld.videoVector.service.MaterialVectorStoreService;
+import com.tzld.videoVector.util.DeconstructTextExtractor;
 import com.tzld.videoVector.util.Md5Util;
 import com.tzld.videoVector.util.VectorUtils;
 import com.xxl.job.core.biz.model.ReturnT;
@@ -35,8 +36,7 @@ import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-import java.util.concurrent.TimeUnit;
+
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.stream.Collectors;
 
@@ -198,38 +198,33 @@ public class MaterialVectorJob {
 
             // 并发调 detail 接口
             ExecutorService executor = Executors.newFixedThreadPool(VectorConstants.AIGC_DETAIL_PARALLELISM);
-            try {
-                List<Future<?>> futures = new ArrayList<>();
-                List<MaterialDeconstructResult> batch = Collections.synchronizedList(new ArrayList<>());
-
-                for (String materialId : needSyncIds) {
-                    futures.add(executor.submit(() -> {
-                        try {
-                            Long taskInstanceId = materialIdToTaskInstanceId.get(materialId);
-                            if (taskInstanceId == null) return;
-                            JSONObject dataContent = aigcApiService.getTaskCallbackDetail(taskInstanceId);
-                            if (dataContent != null) {
-                                MaterialDeconstructResult r = new MaterialDeconstructResult();
-                                r.setMaterialId(materialId);
-                                r.setSource(SOURCE_AIGC);
-                                r.setResult(dataContent.toJSONString());
-                                r.setSourceType(materialIdToSourceType.getOrDefault(materialId, defaultSourceType));
-                                batch.add(r);
-                            }
-                        } catch (Exception e) {
-                            log.error("同步 materialId={} 失败: {}", materialId, e.getMessage());
+            List<MaterialDeconstructResult> batch = Collections.synchronizedList(new ArrayList<>());
+
+            for (String materialId : needSyncIds) {
+                executor.submit(() -> {
+                    try {
+                        Long taskInstanceId = materialIdToTaskInstanceId.get(materialId);
+                        if (taskInstanceId == null) return;
+                        JSONObject dataContent = aigcApiService.getTaskCallbackDetail(taskInstanceId);
+                        if (dataContent != null) {
+                            MaterialDeconstructResult r = new MaterialDeconstructResult();
+                            r.setMaterialId(materialId);
+                            r.setSource(SOURCE_AIGC);
+                            r.setResult(dataContent.toJSONString());
+                            r.setSourceType(materialIdToSourceType.getOrDefault(materialId, defaultSourceType));
+                            batch.add(r);
                         }
-                    }));
-                }
-                awaitAndShutdown(futures, executor, 30, "素材同步");
-
-                if (!batch.isEmpty()) {
-                    for (List<MaterialDeconstructResult> subBatch : Lists.partition(batch, 200)) {
-                        insertCount.addAndGet(materialDeconstructResultMapperExt.batchInsertIgnore(subBatch));
+                    } catch (Exception e) {
+                        log.error("同步 materialId={} 失败: {}", materialId, e.getMessage());
                     }
+                });
+            }
+            VectorUtils.awaitAndShutdown(executor, 30, "素材同步");
+
+            if (!batch.isEmpty()) {
+                for (List<MaterialDeconstructResult> subBatch : Lists.partition(batch, 200)) {
+                    insertCount.addAndGet(materialDeconstructResultMapperExt.batchInsertIgnore(subBatch));
                 }
-            } finally {
-                executor.shutdownNow();
             }
         }
     }
@@ -246,7 +241,7 @@ public class MaterialVectorJob {
     @XxlJob("vectorMaterialJob")
     public ReturnT<String> vectorMaterialJob(String param) {
         log.info("开始执行素材向量化任务, param: {}", param);
-        Integer maxMaterialCount = parseMaxCount(param);
+        Integer maxMaterialCount = VectorUtils.parseMaxCount(param);
         return doVectorize(maxMaterialCount);
     }
 
@@ -293,17 +288,12 @@ public class MaterialVectorJob {
 
                 // 3. 对每个配置并发处理
                 ExecutorService configExecutor = Executors.newFixedThreadPool(configs.size());
-                try {
-                    List<Future<?>> configFutures = new ArrayList<>();
-                    for (DeconstructVectorConfig config : configs) {
-                        configFutures.add(configExecutor.submit(() ->
-                                processConfigForMaterial(config, materialIds, parsedById, totalSuccessCount, totalFailCount)
-                        ));
-                    }
-                    awaitAndShutdown(configFutures, configExecutor, 30, "素材向量化配置并发");
-                } finally {
-                    configExecutor.shutdownNow();
-                }
+            for (DeconstructVectorConfig config : configs) {
+                configExecutor.submit(() ->
+                        processConfigForMaterial(config, materialIds, parsedById, totalSuccessCount, totalFailCount)
+                );
+            }
+            VectorUtils.awaitAndShutdown(configExecutor, 30, "素材向量化配置并发");
 
                 totalProcessed.addAndGet(materialIds.size());
 
@@ -380,7 +370,7 @@ public class MaterialVectorJob {
                     continue;
                 }
                 try {
-                    List<String> texts = extractTextsFromDataContent(parsed.dataContent, config);
+                    List<String> texts = DeconstructTextExtractor.extractTextsFromDataContent(parsed.dataContent, config);
                     if (CollectionUtils.isEmpty(texts)) {
                         log.info("materialId={} 配置 {} 未提取到文本,跳过", materialId, configCode);
                         totalFailCount.incrementAndGet();
@@ -414,9 +404,9 @@ public class MaterialVectorJob {
         boolean multiPoint = VectorUtils.isMultiPointConfig(config);
 
         if (multiPoint) {
-            // 1) 先压缩掉空文本,pointIndex 用紧凑下标
-            // 2) 全部 embed 成功后再统一 save,避免出现"部分点写入、existsByIds 误判已完成"的中间态
-            //    (existsByIds 仅按 materialId 判存,留下"洞"后下一轮会跳过整个素材)
+            // 预清理旧向量,防止上一轮 partial write 留下残缺数据导致 existsByIds 误判已完成
+            materialVectorStoreService.deleteAbovePointIndex(configCode, materialId, 0);
+
             List<String> validTexts = new ArrayList<>(texts.size());
             for (String raw : texts) {
                 if (StringUtils.hasText(raw)) validTexts.add(raw);
@@ -535,226 +525,6 @@ public class MaterialVectorJob {
         return doVectorize(maxMaterialCount);
     }
 
-    // ====================================================================
-    // TODO: 与 VideoVectorJob 的提取逻辑统一抽取到 VectorUtils / ExtractionUtils,避免两边各自维护
-    // ====================================================================
-
-    /**
-     * 从 dataContent 中提取文本(与 VideoVectorJob 完全对称)
-     */
-    private List<String> extractTextsFromDataContent(JSONObject dataContent, DeconstructVectorConfig config) {
-        if (dataContent == null) {
-            return Collections.emptyList();
-        }
-        String extractRule = config.getExtractRule();
-        if (StringUtils.hasText(extractRule)) {
-            try {
-                JSONObject rule = JSON.parseObject(extractRule);
-                if ("point_decomposition".equals(rule.getString("type"))) {
-                    return extractTextsFromPointDecomposition(dataContent, rule);
-                }
-            } catch (Exception e) {
-                // 不是 JSON 或无 type 字段,走原有逻辑
-            }
-            return extractTextsWithConfidence(dataContent, config.getSourcePath(), extractRule);
-        } else {
-            return VectorUtils.extractFromJson(dataContent, config.getSourcePath());
-        }
-    }
-
-    private List<String> extractTextsWithConfidence(JSONObject json, String sourcePath, String extractRule) {
-        List<String> texts = new ArrayList<>();
-        try {
-            JSONObject rule = JSON.parseObject(extractRule);
-            String textField = rule.getString("text_field");
-            String confidenceField = rule.getString("confidence_field");
-            double confidenceThreshold = rule.getDoubleValue("confidence_threshold");
-            if (!StringUtils.hasText(textField) || !StringUtils.hasText(confidenceField)) {
-                log.error("extract_rule 缺少必要字段: text_field={}, confidence_field={}", textField, confidenceField);
-                return texts;
-            }
-            if (sourcePath.endsWith("[*]")) {
-                List<JSONObject> items = VectorUtils.extractArrayItemsFromJson(json, sourcePath);
-                for (JSONObject item : items) {
-                    if (isConfidenceQualified(item, confidenceField, confidenceThreshold)) {
-                        String text = item.getString(textField);
-                        if (StringUtils.hasText(text)) {
-                            texts.add(text);
-                        }
-                    }
-                }
-            } else {
-                List<String> pathValues = VectorUtils.extractFromJson(json, sourcePath);
-                if (!pathValues.isEmpty()) {
-                    JSONObject targetObj = navigateToObject(json, sourcePath);
-                    if (targetObj != null && isConfidenceQualified(targetObj, confidenceField, confidenceThreshold)) {
-                        String text = targetObj.getString(textField);
-                        if (StringUtils.hasText(text)) {
-                            texts.add(text);
-                        }
-                    }
-                }
-            }
-        } catch (Exception e) {
-            log.error("置信度过滤提取失败: path={}, error={}", sourcePath, e.getMessage());
-        }
-        return texts;
-    }
-
-    private List<String> extractTextsFromPointDecomposition(JSONObject dataContent, JSONObject rule) {
-        List<String> texts = new ArrayList<>();
-        try {
-            String pointArrayPath = rule.getString("point_array_path");
-            String finalResultPath = rule.getString("final_result_path");
-            String pointNameField = rule.getString("point_name_field");
-            String confidenceField = rule.getString("confidence_field");
-            double confidenceThreshold = rule.getDoubleValue("confidence_threshold");
-            String target = rule.getString("target");
-            String contributionPath = rule.getString("contribution_path");
-            double contributionThreshold = rule.getDoubleValue("contribution_threshold");
-
-            List<JSONObject> finalPoints = VectorUtils.extractArrayItemsFromJson(dataContent, finalResultPath + "[*]");
-            List<String> qualifiedPointNames = new ArrayList<>();
-            for (JSONObject fp : finalPoints) {
-                if (isConfidenceQualified(fp, confidenceField, confidenceThreshold)) {
-                    String pointName = fp.getString(pointNameField);
-                    if (StringUtils.hasText(pointName)) {
-                        qualifiedPointNames.add(pointName);
-                    }
-                }
-            }
-            if (qualifiedPointNames.isEmpty()) return texts;
-
-            List<JSONObject> pointDetails = VectorUtils.extractArrayItemsFromJson(dataContent, pointArrayPath + "[*]");
-            Map<String, Double> contributionMap = buildContributionMap(dataContent, contributionPath);
-
-            for (String pointName : qualifiedPointNames) {
-                try {
-                    JSONObject matchedPoint = null;
-                    for (JSONObject detail : pointDetails) {
-                        if (pointName.equals(detail.getString("点"))) {
-                            matchedPoint = detail;
-                            break;
-                        }
-                    }
-                    if (matchedPoint == null) continue;
-
-                    List<String> itemNames = "substance".equals(target)
-                            ? extractSubstanceNames(matchedPoint)
-                            : extractFormNames(matchedPoint);
-                    for (String name : itemNames) {
-                        Double contribution = contributionMap.get(name);
-                        if (contribution != null && contribution >= contributionThreshold) {
-                            texts.add(name);
-                        }
-                    }
-                } catch (Exception e) {
-                    log.debug("extractTextsFromPointDecomposition 单点处理异常 pointName={}: {}", pointName, e.getMessage());
-                }
-            }
-        } catch (Exception e) {
-            log.error("extractTextsFromPointDecomposition 失败: {}", e.getMessage(), e);
-        }
-        return texts;
-    }
-
-    private List<String> extractSubstanceNames(JSONObject point) {
-        List<String> names = new ArrayList<>();
-        JSONObject substance = point.getJSONObject("实质");
-        if (substance == null) return names;
-        for (String key : new String[]{"具体元素", "具象概念", "抽象概念"}) {
-            try {
-                collectNamesFromArray(substance.getJSONArray(key), names);
-            } catch (Exception e) {
-                log.debug("extractSubstanceNames key={} 异常: {}", key, e.getMessage());
-            }
-        }
-        return names;
-    }
-
-    private List<String> extractFormNames(JSONObject point) {
-        List<String> names = new ArrayList<>();
-        JSONObject form = point.getJSONObject("形式");
-        if (form == null) return names;
-        for (String key : new String[]{"具体元素形式", "具象概念形式", "整体形式"}) {
-            try {
-                collectNamesFromArray(form.getJSONArray(key), names);
-            } catch (Exception e) {
-                log.debug("extractFormNames key={} 异常: {}", key, e.getMessage());
-            }
-        }
-        return names;
-    }
-
-    private void collectNamesFromArray(JSONArray array, List<String> names) {
-        if (array == null || array.isEmpty()) return;
-        for (int i = 0; i < array.size(); i++) {
-            try {
-                JSONObject item = array.getJSONObject(i);
-                if (item != null) {
-                    String name = item.getString("名称");
-                    if (StringUtils.hasText(name)) {
-                        names.add(name);
-                    }
-                }
-            } catch (Exception e) {
-                log.debug("collectNamesFromArray 单元素解析异常: {}", e.getMessage());
-            }
-        }
-    }
-
-    private Map<String, Double> buildContributionMap(JSONObject dataContent, String contributionPath) {
-        Map<String, Double> map = new HashMap<>();
-        try {
-            List<JSONObject> contributions = VectorUtils.extractArrayItemsFromJson(dataContent, contributionPath + "[*]");
-            for (JSONObject c : contributions) {
-                try {
-                    String word = c.getString("词");
-                    Double contribution = c.getDouble("贡献度");
-                    if (StringUtils.hasText(word) && contribution != null) {
-                        map.put(word, contribution);
-                    }
-                } catch (Exception e) {
-                    log.debug("buildContributionMap 单元素解析异常: {}", e.getMessage());
-                }
-            }
-        } catch (Exception e) {
-            log.error("构建贡献度查找表失败: {}", e.getMessage());
-        }
-        return map;
-    }
-
-    private JSONObject navigateToObject(JSONObject json, String path) {
-        if (json == null || !StringUtils.hasText(path) || !path.startsWith("$.")) return null;
-        try {
-            String pathContent = path.substring(2);
-            String[] parts = pathContent.split("\\.");
-            Object current = json;
-            for (String part : parts) {
-                if (current instanceof JSONObject) {
-                    current = ((JSONObject) current).get(part);
-                } else {
-                    return null;
-                }
-            }
-            return current instanceof JSONObject ? (JSONObject) current : null;
-        } catch (Exception e) {
-            return null;
-        }
-    }
-
-    private boolean isConfidenceQualified(JSONObject item, String confidenceField, double threshold) {
-        Object value = item.get(confidenceField);
-        if (value == null) return false;
-        if (value instanceof String) return "high".equalsIgnoreCase((String) value);
-        if (value instanceof Number) return ((Number) value).doubleValue() >= threshold;
-        return false;
-    }
-
-    // ====================================================================
-    // 通用辅助
-    // ====================================================================
-
     private List<DeconstructVectorConfig> getEnabledConfigsBySourceField(String sourceField) {
         DeconstructVectorConfigExample example = new DeconstructVectorConfigExample();
         example.createCriteria()
@@ -764,43 +534,6 @@ public class MaterialVectorJob {
         return vectorConfigMapper.selectByExample(example);
     }
 
-    private void awaitAndShutdown(List<Future<?>> futures, ExecutorService executor,
-                                  long timeoutMinutes, String taskDesc) {
-        long deadline = System.currentTimeMillis() + TimeUnit.MINUTES.toMillis(timeoutMinutes);
-        int completed = 0;
-        for (Future<?> future : futures) {
-            long remaining = deadline - System.currentTimeMillis();
-            if (remaining <= 0) {
-                log.error("{} 整体超时({}分钟),已取消剩余任务 (已完成 {}/{})",
-                        taskDesc, timeoutMinutes, completed, futures.size());
-                for (Future<?> f : futures) {
-                    f.cancel(true);
-                }
-                break;
-            }
-            try {
-                future.get(remaining, TimeUnit.MILLISECONDS);
-                completed++;
-            } catch (Exception e) {
-                log.error("{} 并发任务等待异常: {}", taskDesc, e.getMessage());
-            }
-        }
-        executor.shutdown();
-    }
-
-    /**
-     * 入参 N 解析为 maxMaterialCount
-     */
-    private Integer parseMaxCount(String param) {
-        if (!StringUtils.hasText(param)) return null;
-        try {
-            int v = Integer.parseInt(param.trim());
-            return v > 0 ? v : null;
-        } catch (NumberFormatException e) {
-            return null;
-        }
-    }
-
     /**
      * 归一化 AIGC bizUniqueId 为 materialId 字符串。
      * 外部合作素材为文件 MD5(32 位 hex),内部素材通常为数字字符串。

+ 84 - 0
core/src/main/java/com/tzld/videoVector/model/entity/ArticleMatch.java

@@ -0,0 +1,84 @@
+package com.tzld.videoVector.model.entity;
+
+/**
+ * 文章向量匹配结果实体
+ * 与 MaterialMatch 对称,用于文章向量召回结果
+ */
+public class ArticleMatch {
+
+    /** 文章ID */
+    private String articleId;
+
+    /** 余弦相似度分值(-1 ~ 1,越大越相似) */
+    private double score;
+
+    /** 命中的配置编码 */
+    private String configCode;
+
+    /** 向量点索引(多点模式下区分同一文章的不同向量点) */
+    private Integer pointIndex;
+
+    /** 向量化原文 */
+    private String text;
+
+    public ArticleMatch() {
+    }
+
+    public ArticleMatch(String articleId, double score) {
+        this.articleId = articleId;
+        this.score = score;
+    }
+
+    public ArticleMatch(String articleId, double score, String configCode) {
+        this.articleId = articleId;
+        this.score = score;
+        this.configCode = configCode;
+    }
+
+    public String getArticleId() {
+        return articleId;
+    }
+
+    public void setArticleId(String articleId) {
+        this.articleId = articleId;
+    }
+
+    public double getScore() {
+        return score;
+    }
+
+    public void setScore(double score) {
+        this.score = score;
+    }
+
+    public String getConfigCode() {
+        return configCode;
+    }
+
+    public void setConfigCode(String configCode) {
+        this.configCode = configCode;
+    }
+
+    public Integer getPointIndex() {
+        return pointIndex;
+    }
+
+    public void setPointIndex(Integer pointIndex) {
+        this.pointIndex = pointIndex;
+    }
+
+    public String getText() {
+        return text;
+    }
+
+    public void setText(String text) {
+        this.text = text;
+    }
+
+    @Override
+    public String toString() {
+        return "ArticleMatch{articleId=" + articleId + ", score=" + score +
+                ", configCode='" + configCode + "'" +
+                ", pointIndex=" + pointIndex + "}";
+    }
+}

+ 3 - 0
core/src/main/java/com/tzld/videoVector/model/param/recall/MatchByTextParam.java

@@ -26,4 +26,7 @@ public class MatchByTextParam {
 
     /** 素材返回条数;不传则与 topN 相同 */
     private Integer materialTopN;
+
+    /** 文章返回条数;不传则与 topN 相同 */
+    private Integer articleTopN;
 }

+ 66 - 0
core/src/main/java/com/tzld/videoVector/model/po/pgVector/ArticleDeconstructResult.java

@@ -0,0 +1,66 @@
+package com.tzld.videoVector.model.po.pgVector;
+
+import java.util.Date;
+
+/**
+ * Database Table Remarks:
+ *   文章解构结果缓存(对称 material_deconstruct_result)
+ *
+ * This class corresponds to the database table article_deconstruct_result
+ */
+public class ArticleDeconstructResult {
+    private Long id;
+    private String articleId;
+    private String source;
+    private String result;
+    private Date createTime;
+    private Date updateTime;
+
+    public Long getId() {
+        return id;
+    }
+
+    public void setId(Long id) {
+        this.id = id;
+    }
+
+    public String getArticleId() {
+        return articleId;
+    }
+
+    public void setArticleId(String articleId) {
+        this.articleId = articleId;
+    }
+
+    public String getSource() {
+        return source;
+    }
+
+    public void setSource(String source) {
+        this.source = source;
+    }
+
+    public String getResult() {
+        return result;
+    }
+
+    public void setResult(String result) {
+        this.result = result;
+    }
+
+    public Date getCreateTime() {
+        return createTime;
+    }
+
+    public void setCreateTime(Date createTime) {
+        this.createTime = createTime;
+    }
+
+    public Date getUpdateTime() {
+        return updateTime;
+    }
+
+    public void setUpdateTime(Date updateTime) {
+        this.updateTime = updateTime;
+    }
+}

+ 104 - 0
core/src/main/java/com/tzld/videoVector/model/po/pgVector/ArticleVector.java

@@ -0,0 +1,104 @@
+package com.tzld.videoVector.model.po.pgVector;
+
+import java.util.Date;
+
+/**
+ * Database Table Remarks:
+ *   文章向量存储表(对称 material_vectors)
+ *
+ * This class corresponds to the database table article_vectors
+ */
+public class ArticleVector {
+    private Long id;
+    private String articleId;
+    private String configCode;
+    private String embedding;
+    private Date createdAt;
+    private Date updatedAt;
+    private Integer pointIndex;
+    private String text;
+    private String textHash;
+
+    /** 余弦相似度得分(仅搜索时使用,非持久化字段) */
+    private Double score;
+
+    public Double getScore() {
+        return score;
+    }
+
+    public void setScore(Double score) {
+        this.score = score;
+    }
+
+    public Long getId() {
+        return id;
+    }
+
+    public void setId(Long id) {
+        this.id = id;
+    }
+
+    public String getArticleId() {
+        return articleId;
+    }
+
+    public void setArticleId(String articleId) {
+        this.articleId = articleId;
+    }
+
+    public String getConfigCode() {
+        return configCode;
+    }
+
+    public void setConfigCode(String configCode) {
+        this.configCode = configCode;
+    }
+
+    public String getEmbedding() {
+        return embedding;
+    }
+
+    public void setEmbedding(String embedding) {
+        this.embedding = embedding;
+    }
+
+    public Date getCreatedAt() {
+        return createdAt;
+    }
+
+    public void setCreatedAt(Date createdAt) {
+        this.createdAt = createdAt;
+    }
+
+    public Date getUpdatedAt() {
+        return updatedAt;
+    }
+
+    public void setUpdatedAt(Date updatedAt) {
+        this.updatedAt = updatedAt;
+    }
+
+    public Integer getPointIndex() {
+        return pointIndex;
+    }
+
+    public void setPointIndex(Integer pointIndex) {
+        this.pointIndex = pointIndex;
+    }
+
+    public String getText() {
+        return text;
+    }
+
+    public void setText(String text) {
+        this.text = text;
+    }
+
+    public String getTextHash() {
+        return textHash;
+    }
+
+    public void setTextHash(String textHash) {
+        this.textHash = textHash;
+    }
+}

+ 42 - 0
core/src/main/java/com/tzld/videoVector/model/vo/recall/ArticleDetailVO.java

@@ -0,0 +1,42 @@
+package com.tzld.videoVector.model.vo.recall;
+
+import lombok.Data;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ * 文章详情 VO(modality=ARTICLE 互斥下发)
+ *
+ * 对称 MaterialDetailVO
+ */
+@Data
+public class ArticleDetailVO {
+
+    /** 文章标题 */
+    private String title;
+
+    /** 正文 */
+    private String content;
+
+    /** 摘要 */
+    private String summary;
+
+    /** 标签列表 */
+    private List<String> tags;
+
+    /** 封面图 */
+    private String cover;
+
+    /** 原文链接 */
+    private String url;
+
+    /** 配图列表 */
+    private List<String> images;
+
+    /** 来源标签 */
+    private String source;
+
+    /** 解构(与视频 deconstruct 子结构对齐) */
+    private Map<String, Object> deconstruct;
+}

+ 6 - 0
core/src/main/java/com/tzld/videoVector/model/vo/recall/VideoMatchEnrichedVO.java

@@ -25,6 +25,12 @@ public class VideoMatchEnrichedVO {
      */
     private String materialId;
 
+    /**
+     * 文章原始 ID(modality=ARTICLE 时下发)
+     * 前端:id 为空时取本字段展示。
+     */
+    private String articleId;
+
     /** 模态:VIDEO / MATERIAL / ARTICLE */
     private Modality modality;
 

+ 42 - 0
core/src/main/java/com/tzld/videoVector/service/ArticleVectorStoreService.java

@@ -0,0 +1,42 @@
+package com.tzld.videoVector.service;
+
+import com.tzld.videoVector.model.entity.ArticleMatch;
+
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * 文章向量存储服务接口(对称 MaterialVectorStoreService)
+ */
+public interface ArticleVectorStoreService {
+
+    boolean save(String configCode, String articleId, List<Float> vector, String text);
+
+    boolean save(String configCode, String articleId, int pointIndex, List<Float> vector, String text);
+
+    boolean exists(String configCode, String articleId);
+
+    Set<String> existsByIds(String configCode, Collection<String> articleIds);
+
+    List<Float> getVector(String configCode, String articleId);
+
+    Map<String, List<Float>> getVectors(String configCode, Collection<String> articleIds);
+
+    Set<String> getAllArticleIds(String configCode);
+
+    void delete(String configCode, String articleId);
+
+    void deleteBatch(String configCode, Collection<String> articleIds);
+
+    void deleteAbovePointIndex(String configCode, String articleId, int minPointIndex);
+
+    List<Float> getVectorByTextHash(String textHash, String configCode);
+
+    String getRawVectorByTextHash(String textHash, String configCode);
+
+    List<ArticleMatch> searchTopN(String configCode, List<Float> queryVector, int topN);
+
+    List<ArticleMatch> searchTopNByRawVector(String configCode, String rawVector, int topN);
+}

+ 245 - 0
core/src/main/java/com/tzld/videoVector/service/impl/PgArticleVectorStoreServiceImpl.java

@@ -0,0 +1,245 @@
+package com.tzld.videoVector.service.impl;
+
+import com.tzld.videoVector.common.constant.VectorConstants;
+import com.tzld.videoVector.dao.mapper.pgVector.ext.ArticleVectorMapperExt;
+import com.tzld.videoVector.model.entity.ArticleMatch;
+import com.tzld.videoVector.model.po.pgVector.ArticleVector;
+import com.tzld.videoVector.service.ArticleVectorStoreService;
+import com.tzld.videoVector.util.Md5Util;
+import com.tzld.videoVector.util.VectorUtils;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.stereotype.Service;
+import org.springframework.util.StringUtils;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+/**
+ * 文章向量存储服务 pgvector 实现(对称 PgMaterialVectorStoreServiceImpl)
+ */
+@Slf4j
+@Service
+public class PgArticleVectorStoreServiceImpl implements ArticleVectorStoreService {
+
+    @Autowired
+    private ArticleVectorMapperExt articleVectorMapperExt;
+
+    @Override
+    public boolean save(String configCode, String articleId, List<Float> vector, String text) {
+        return save(configCode, articleId, 0, vector, text);
+    }
+
+    @Override
+    public boolean save(String configCode, String articleId, int pointIndex, List<Float> vector, String text) {
+        if (!StringUtils.hasText(articleId) || vector == null || vector.isEmpty()) {
+            log.error("save 参数非法,configCode={}, articleId={}", configCode, articleId);
+            return false;
+        }
+        if (configCode == null || configCode.isEmpty()) {
+            log.error("save configCode 不能为空");
+            return false;
+        }
+
+        String embedding = VectorUtils.vectorToString(vector);
+        String textHash = (text != null && !text.isEmpty()) ? Md5Util.encoderByMd5(text) : null;
+        articleVectorMapperExt.upsertVector(articleId, configCode, pointIndex, embedding, text, textHash);
+        log.debug("保存文章向量成功,configCode={}, articleId={}, pointIndex={}, 维度={}",
+                configCode, articleId, pointIndex, vector.size());
+        return true;
+    }
+
+    @Override
+    public boolean exists(String configCode, String articleId) {
+        if (!StringUtils.hasText(articleId) || configCode == null || configCode.isEmpty()) return false;
+        return articleVectorMapperExt.existsByArticleIdAndConfigCode(articleId, configCode) > 0;
+    }
+
+    @Override
+    public Set<String> existsByIds(String configCode, Collection<String> articleIds) {
+        if (articleIds == null || articleIds.isEmpty() || configCode == null || configCode.isEmpty()) {
+            return Collections.emptySet();
+        }
+
+        List<String> idList = new ArrayList<>(articleIds);
+        Set<String> existing = new HashSet<>();
+        for (int i = 0; i < idList.size(); i += VectorConstants.ODPS_IN_BATCH_SIZE) {
+            int end = Math.min(i + VectorConstants.ODPS_IN_BATCH_SIZE, idList.size());
+            List<String> batch = idList.subList(i, end);
+            List<String> found = articleVectorMapperExt.selectExistingArticleIds(batch, configCode);
+            if (found != null) {
+                existing.addAll(found);
+            }
+        }
+        return existing;
+    }
+
+    @Override
+    public List<Float> getVector(String configCode, String articleId) {
+        throw new UnsupportedOperationException("getVector 暂不支持,请使用 searchTopN");
+    }
+
+    @Override
+    public Map<String, List<Float>> getVectors(String configCode, Collection<String> articleIds) {
+        throw new UnsupportedOperationException("getVectors 暂不支持,请使用 searchTopN");
+    }
+
+    @Override
+    public Set<String> getAllArticleIds(String configCode) {
+        if (configCode == null || configCode.isEmpty()) {
+            return Collections.emptySet();
+        }
+        List<String> ids = articleVectorMapperExt.selectAllArticleIds(configCode);
+        if (ids == null) return Collections.emptySet();
+        return new HashSet<>(ids);
+    }
+
+    @Override
+    public void delete(String configCode, String articleId) {
+        if (!StringUtils.hasText(articleId) || configCode == null || configCode.isEmpty()) return;
+        articleVectorMapperExt.deleteByArticleIdAndConfigCode(articleId, configCode);
+        log.info("删除文章向量成功,configCode={}, articleId={}", configCode, articleId);
+    }
+
+    @Override
+    public void deleteBatch(String configCode, Collection<String> articleIds) {
+        if (articleIds == null || articleIds.isEmpty() || configCode == null || configCode.isEmpty()) return;
+
+        List<String> idList = new ArrayList<>(articleIds);
+        for (int i = 0; i < idList.size(); i += VectorConstants.ODPS_IN_BATCH_SIZE) {
+            int end = Math.min(i + VectorConstants.ODPS_IN_BATCH_SIZE, idList.size());
+            List<String> batch = idList.subList(i, end);
+            articleVectorMapperExt.deleteBatchByArticleIds(batch, configCode);
+        }
+        log.info("批量删除文章向量成功,configCode={}, 数量={}", configCode, articleIds.size());
+    }
+
+    @Override
+    public void deleteAbovePointIndex(String configCode, String articleId, int minPointIndex) {
+        if (!StringUtils.hasText(articleId) || configCode == null || configCode.isEmpty()) return;
+        articleVectorMapperExt.deleteAbovePointIndex(articleId, configCode, minPointIndex);
+    }
+
+    @Override
+    public List<Float> getVectorByTextHash(String textHash, String configCode) {
+        if (textHash == null || textHash.isEmpty() || configCode == null || configCode.isEmpty()) return null;
+        try {
+            ArticleVector av = articleVectorMapperExt.selectByTextHashAndConfigCode(textHash, configCode);
+            if (av == null) {
+                log.info("getVectorByTextHash MISS: textHash={}, configCode={}", textHash, configCode);
+                return null;
+            }
+            if (av.getEmbedding() == null) {
+                log.info("getVectorByTextHash HIT but embedding IS NULL: textHash={}, configCode={}, articleId={}",
+                        textHash, configCode, av.getArticleId());
+                return null;
+            }
+            List<Float> vector = VectorUtils.parseVectorString(av.getEmbedding());
+            if (vector == null || vector.isEmpty()) {
+                log.info("getVectorByTextHash HIT but parseVectorString FAILED: textHash={}, configCode={}, embeddingLen={}",
+                        textHash, configCode, av.getEmbedding().length());
+                return null;
+            }
+            log.info("getVectorByTextHash HIT OK: textHash={}, configCode={}, articleId={}, dim={}",
+                    textHash, configCode, av.getArticleId(), vector.size());
+            return vector;
+        } catch (Exception e) {
+            log.error("根据 text_hash 查询文章向量失败,hash={}, configCode={}, error={}", textHash, configCode, e.getMessage());
+            return null;
+        }
+    }
+
+    @Override
+    public String getRawVectorByTextHash(String textHash, String configCode) {
+        if (textHash == null || textHash.isEmpty() || configCode == null || configCode.isEmpty()) return null;
+        try {
+            ArticleVector av = articleVectorMapperExt.selectByTextHashAndConfigCode(textHash, configCode);
+            if (av == null) {
+                log.info("getRawVectorByTextHash MISS: textHash={}, configCode={}", textHash, configCode);
+                return null;
+            }
+            String raw = av.getEmbedding();
+            if (raw == null || raw.isEmpty()) {
+                log.info("getRawVectorByTextHash HIT but embedding IS NULL: textHash={}, configCode={}, articleId={}",
+                        textHash, configCode, av.getArticleId());
+                return null;
+            }
+            if (raw.length() < 10 || !raw.trim().startsWith("[")) {
+                log.info("getRawVectorByTextHash HIT but format SUSPECT: textHash={}, configCode={}, len={}, preview={}",
+                        textHash, configCode, raw.length(), raw.substring(0, Math.min(80, raw.length())));
+                return null;
+            }
+            log.info("getRawVectorByTextHash HIT OK: textHash={}, configCode={}, articleId={}, len={}, preview={}",
+                    textHash, configCode, av.getArticleId(), raw.length(),
+                    raw.substring(0, Math.min(80, raw.length())));
+            return raw;
+        } catch (Exception e) {
+            log.error("getRawVectorByTextHash 异常,hash={}, configCode={}, error={}", textHash, configCode, e.getMessage());
+            return null;
+        }
+    }
+
+    @Override
+    public List<ArticleMatch> searchTopNByRawVector(String configCode, String rawVector, int topN) {
+        if (rawVector == null || rawVector.isEmpty() || topN <= 0) {
+            return Collections.emptyList();
+        }
+        if (configCode == null || configCode.isEmpty()) {
+            log.error("searchTopNByRawVector configCode 不能为空");
+            return Collections.emptyList();
+        }
+        log.info("searchTopNByRawVector raw前100字符: {}, topN={}, configCode={}",
+                rawVector.substring(0, Math.min(100, rawVector.length())), topN, configCode);
+        List<ArticleVector> results = articleVectorMapperExt.searchTopN(configCode, rawVector, topN);
+        if (results == null || results.isEmpty()) {
+            log.info("文章向量库为空或无匹配结果,configCode={}", configCode);
+            return Collections.emptyList();
+        }
+        List<ArticleMatch> matches = convertToMatch(results, configCode);
+        log.info("searchTopNByRawVector DB返回 {} 行, configCode={}", results.size(), configCode);
+        return matches;
+    }
+
+    @Override
+    public List<ArticleMatch> searchTopN(String configCode, List<Float> queryVector, int topN) {
+        if (queryVector == null || queryVector.isEmpty() || topN <= 0) {
+            return Collections.emptyList();
+        }
+        if (configCode == null || configCode.isEmpty()) {
+            log.error("searchTopN configCode 不能为空");
+            return Collections.emptyList();
+        }
+
+        String queryVectorStr = VectorUtils.vectorToString(queryVector);
+        log.info("searchTopN SQL vector前100字符: {}, topN={}, configCode={}",
+                queryVectorStr.substring(0, Math.min(100, queryVectorStr.length())), topN, configCode);
+        List<ArticleVector> results = articleVectorMapperExt.searchTopN(configCode, queryVectorStr, topN);
+        if (results == null || results.isEmpty()) {
+            log.info("文章向量库为空或无匹配结果,configCode={}", configCode);
+            return Collections.emptyList();
+        }
+
+        List<ArticleMatch> matches = convertToMatch(results, configCode);
+        log.info("searchTopN DB返回 {} 行, configCode={}", results.size(), configCode);
+        return matches;
+    }
+
+    private List<ArticleMatch> convertToMatch(List<ArticleVector> results, String configCode) {
+        return results.stream()
+                .map(av -> {
+                    double scoreVal = av.getScore() != null ? av.getScore() : 0.0;
+                    ArticleMatch m = new ArticleMatch(av.getArticleId(), scoreVal, configCode);
+                    m.setPointIndex(av.getPointIndex());
+                    m.setText(av.getText());
+                    return m;
+                })
+                .collect(Collectors.toList());
+    }
+}

+ 3 - 20
core/src/main/java/com/tzld/videoVector/service/impl/PgMaterialVectorStoreServiceImpl.java

@@ -45,7 +45,7 @@ public class PgMaterialVectorStoreServiceImpl implements MaterialVectorStoreServ
             return false;
         }
 
-        String embedding = vectorToString(vector);
+        String embedding = VectorUtils.vectorToString(vector);
         String textHash = (text != null && !text.isEmpty()) ? Md5Util.encoderByMd5(text) : null;
         materialVectorMapperExt.upsertVector(materialId, configCode, pointIndex, embedding, text, textHash, sourceType);
         log.debug("保存素材向量成功,configCode={}, materialId={}, pointIndex={}, sourceType={}, 维度={}",
@@ -215,7 +215,7 @@ public class PgMaterialVectorStoreServiceImpl implements MaterialVectorStoreServ
             return Collections.emptyList();
         }
 
-        String queryVectorStr = vectorToString(queryVector);
+        String queryVectorStr = VectorUtils.vectorToString(queryVector);
         log.info("searchTopN SQL vector前100字符: {}, topN={}, configCode={}",
                 queryVectorStr.substring(0, Math.min(100, queryVectorStr.length())), topN, configCode);
         List<MaterialVector> results = materialVectorMapperExt.searchTopN(configCode, queryVectorStr, topN);
@@ -242,7 +242,7 @@ public class PgMaterialVectorStoreServiceImpl implements MaterialVectorStoreServ
             return searchTopN(configCode, queryVector, topN);
         }
 
-        String queryVectorStr = vectorToString(queryVector);
+        String queryVectorStr = VectorUtils.vectorToString(queryVector);
         List<MaterialVector> results = materialVectorMapperExt.searchTopNBySource(configCode, queryVectorStr, topN, sourceType);
         if (results == null || results.isEmpty()) {
             log.info("素材向量库无匹配结果,configCode={}, sourceType={}", configCode, sourceType);
@@ -264,21 +264,4 @@ public class PgMaterialVectorStoreServiceImpl implements MaterialVectorStoreServ
                 })
                 .collect(Collectors.toList());
     }
-
-    private String vectorToString(List<Float> vector) {
-        StringBuilder sb = new StringBuilder("[");
-        for (int i = 0; i < vector.size(); i++) {
-            if (i > 0) sb.append(",");
-            // Float.toString() 对 |v| < 1e-3 的值会输出科学计数法(如 6.399564E-4)
-            // pgvector 的 ::vector 只认标准十进制格式, 必须用 BigDecimal.toPlainString() 兜底
-            float v = vector.get(i);
-            String s = Float.toString(v);
-            if (s.indexOf('E') >= 0 || s.indexOf('e') >= 0) {
-                s = new java.math.BigDecimal(s).toPlainString();
-            }
-            sb.append(s);
-        }
-        sb.append("]");
-        return sb.toString();
-    }
 }

+ 307 - 21
core/src/main/java/com/tzld/videoVector/service/recall/impl/VectorRecallTestServiceImpl.java

@@ -7,22 +7,27 @@ import com.tzld.videoVector.api.VideoApiService;
 import com.tzld.videoVector.common.constant.VectorConstants;
 import com.tzld.videoVector.common.enums.Modality;
 import com.tzld.videoVector.dao.mapper.pgVector.DeconstructVectorConfigMapper;
+import com.tzld.videoVector.dao.mapper.pgVector.ext.ArticleDeconstructResultMapperExt;
 import com.tzld.videoVector.dao.mapper.pgVector.ext.MaterialDeconstructResultMapperExt;
+import com.tzld.videoVector.model.entity.ArticleMatch;
 import com.tzld.videoVector.model.entity.MaterialMatch;
 import com.tzld.videoVector.model.entity.VideoDetail;
 import com.tzld.videoVector.model.param.MatchTopNVideoParam;
 import com.tzld.videoVector.model.param.recall.MatchByTextParam;
 import com.tzld.videoVector.model.param.recall.MatchByVideoIdParam;
+import com.tzld.videoVector.model.po.pgVector.ArticleDeconstructResult;
 import com.tzld.videoVector.model.po.pgVector.DeconstructVectorConfig;
 import com.tzld.videoVector.model.po.pgVector.DeconstructVectorConfigExample;
 import com.tzld.videoVector.model.po.pgVector.MaterialDeconstructResult;
 import com.tzld.videoVector.model.vo.VideoMatchResult;
 import com.tzld.videoVector.model.vo.recall.AIUnderstandingVO;
 import com.tzld.videoVector.model.vo.recall.DeconstructPointsVO;
+import com.tzld.videoVector.model.vo.recall.ArticleDetailVO;
 import com.tzld.videoVector.model.vo.recall.MaterialDetailVO;
 import com.tzld.videoVector.model.vo.recall.RecallResultVO;
 import com.tzld.videoVector.model.vo.recall.VideoBasicVO;
 import com.tzld.videoVector.model.vo.recall.VideoMatchEnrichedVO;
+import com.tzld.videoVector.service.ArticleVectorStoreService;
 import com.tzld.videoVector.service.EmbeddingService;
 import com.tzld.videoVector.service.MaterialVectorStoreService;
 import com.tzld.videoVector.service.VideoSearchService;
@@ -76,6 +81,12 @@ public class VectorRecallTestServiceImpl implements VectorRecallTestService {
     @Autowired
     private MaterialDeconstructResultMapperExt materialDeconstructResultMapperExt;
 
+    @Autowired
+    private ArticleVectorStoreService articleVectorStoreService;
+
+    @Autowired
+    private ArticleDeconstructResultMapperExt articleDeconstructResultMapperExt;
+
     @Autowired
     private DeconstructVectorConfigMapper deconstructVectorConfigMapper;
 
@@ -150,12 +161,15 @@ public class VectorRecallTestServiceImpl implements VectorRecallTestService {
         int videoTopN = param.getVideoTopN() != null && param.getVideoTopN() > 0 ? param.getVideoTopN() : defaultTopN;
         int materialTopN = param.getMaterialTopN() != null && param.getMaterialTopN() > 0
                 ? param.getMaterialTopN() : defaultTopN;
+        int articleTopN = param.getArticleTopN() != null && param.getArticleTopN() > 0
+                ? param.getArticleTopN() : defaultTopN;
         String configCode = StringUtils.hasText(param.getConfigCode())
                 ? param.getConfigCode() : VectorConstants.DEFAULT_CONFIG_CODE;
 
-        // 并行召回:视频、素材各自独立 topN
+        // 并行召回:视频、素材、文章各自独立 topN
         final int finalVideoTopN = videoTopN;
         final int finalMaterialTopN = materialTopN;
+        final int finalArticleTopN = articleTopN;
         final String finalConfigCode = configCode;
         CompletableFuture<List<VideoMatchResult>> videoFuture = CompletableFuture.supplyAsync(() -> {
             try {
@@ -175,8 +189,13 @@ public class VectorRecallTestServiceImpl implements VectorRecallTestService {
                 () -> recallMaterialItems(param.getQueryText(), finalConfigCode, finalMaterialTopN),
                 RECALL_EXECUTOR);
 
+        CompletableFuture<List<VideoMatchEnrichedVO>> articleFuture = CompletableFuture.supplyAsync(
+                () -> recallArticleItems(param.getQueryText(), finalConfigCode, finalArticleTopN),
+                RECALL_EXECUTOR);
+
         List<VideoMatchResult> videoMatches;
         List<VideoMatchEnrichedVO> materialItems;
+        List<VideoMatchEnrichedVO> articleItems;
         try {
             videoMatches = videoFuture.get(30, TimeUnit.SECONDS);
         } catch (Exception e) {
@@ -189,9 +208,15 @@ public class VectorRecallTestServiceImpl implements VectorRecallTestService {
             log.error("素材召回等待超时/异常: {}", e.getMessage(), e);
             materialItems = Collections.emptyList();
         }
+        try {
+            articleItems = articleFuture.get(30, TimeUnit.SECONDS);
+        } catch (Exception e) {
+            log.error("文章召回等待超时/异常: {}", e.getMessage(), e);
+            articleItems = Collections.emptyList();
+        }
 
         List<VideoMatchEnrichedVO> videoItems = enrichVideoMatches(videoMatches, configCode);
-        return buildResult(videoItems, materialItems);
+        return buildResult(videoItems, materialItems, articleItems);
     }
 
     private List<VideoMatchResult> limitVideoMatchesByScore(List<VideoMatchResult> matches, int topN) {
@@ -246,7 +271,27 @@ public class VectorRecallTestServiceImpl implements VectorRecallTestService {
                     log.info("素材召回(rawVector) 无结果, configCode={}", configCode);
                     return Collections.emptyList();
                 }
-                log.info("素材召回 text_hash 缓存未命中, textHash={}, 降级到 embedding API", textHash);
+                log.info("素材召回 rawVector 缓存未命中, textHash={}, 尝试 parsed vector 缓存", textHash);
+            }
+
+            if (StringUtils.hasText(textHash)) {
+                List<Float> cachedVector = materialVectorStoreService.getVectorByTextHash(textHash, configCode);
+                if (cachedVector != null && !cachedVector.isEmpty()) {
+                    log.info("素材召回 使用缓存的 parsed vector, dim={}", cachedVector.size());
+                    List<MaterialMatch> raw = materialVectorStoreService.searchTopN(configCode, cachedVector, candidate);
+                    List<MaterialMatch> matches = deduplicateMaterialMatches(raw, topN);
+                    if (!CollectionUtils.isEmpty(matches)) {
+                        List<String> matchSample = new ArrayList<>();
+                        for (MaterialMatch m : matches) {
+                            matchSample.add(m.getMaterialId() + ":" + String.format("%.4f", m.getScore()));
+                        }
+                        log.info("素材召回(parsed vector缓存) 去重后({}条): {}, configCode={}",
+                                matches.size(), matchSample, configCode);
+                        return limitEnrichedItemsByScore(enrichMaterialMatches(matches, configCode), topN);
+                    }
+                    log.info("素材召回(parsed vector缓存) 无结果, configCode={}", configCode);
+                    return Collections.emptyList();
+                }
             }
 
             // 降级:embedding API → Float 向量 → 搜索(非缓存路径,容忍精度损失)
@@ -289,19 +334,6 @@ public class VectorRecallTestServiceImpl implements VectorRecallTestService {
         log.info("resolveQueryVectorForMaterial: queryText={}, configCode={}, model={}, dim={}",
                 queryText, configCode, config.getEmbeddingModel(), config.getDimension());
 
-        // 1. 先查 material_vectors 的 text_hash 缓存
-        String textHash = Md5Util.encoderByMd5(queryText);
-        if (StringUtils.hasText(textHash)) {
-            log.info("resolveQueryVectorForMaterial textHash={}, 开始查 text_hash 缓存", textHash);
-            List<Float> cached = materialVectorStoreService.getVectorByTextHash(textHash, configCode);
-            if (cached != null && !cached.isEmpty()) {
-                log.info("resolveQueryVectorForMaterial 命中 text_hash 缓存,dim={}", cached.size());
-                return cached;
-            }
-            log.info("resolveQueryVectorForMaterial text_hash 缓存未命中,降级到 embedding API");
-        }
-
-        // 2. 调用 embedding API(与入库时相同的 model / dimension)
         try {
             log.info("resolveQueryVectorForMaterial 调用 embedding API: text={}, model={}, dim={}",
                     queryText, config.getEmbeddingModel(), config.getDimension());
@@ -467,6 +499,245 @@ public class VectorRecallTestServiceImpl implements VectorRecallTestService {
         return null;
     }
 
+    // ====================================================================
+    // 文章召回(对称素材召回)
+    // ====================================================================
+
+    private List<VideoMatchEnrichedVO> recallArticleItems(String queryText, String configCode, int topN) {
+        try {
+            int candidate = Math.max(topN * VectorConstants.MULTI_POINT_RECALL_CANDIDATE_FACTOR,
+                    VectorConstants.MULTI_POINT_RECALL_MIN_CANDIDATES);
+
+            String textHash = Md5Util.encoderByMd5(queryText);
+            if (StringUtils.hasText(textHash)) {
+                String rawVector = articleVectorStoreService.getRawVectorByTextHash(textHash, configCode);
+                if (rawVector != null && !rawVector.isEmpty()) {
+                    log.info("文章召回 使用缓存的原始向量字符串, configCode={}", configCode);
+                    List<ArticleMatch> raw = articleVectorStoreService.searchTopNByRawVector(
+                            configCode, rawVector, candidate);
+                    List<ArticleMatch> matches = deduplicateArticleMatches(raw, topN);
+                    if (!CollectionUtils.isEmpty(matches)) {
+                        List<String> matchSample = new ArrayList<>();
+                        for (ArticleMatch m : matches) {
+                            matchSample.add(m.getArticleId() + ":" + String.format("%.4f", m.getScore()));
+                        }
+                        log.info("文章召回(rawVector) 去重后({}条): {}, configCode={}",
+                                matches.size(), matchSample, configCode);
+                        return limitEnrichedItemsByScore(enrichArticleMatches(matches, configCode), topN);
+                    }
+                    log.info("文章召回(rawVector) 无结果, configCode={}", configCode);
+                    return Collections.emptyList();
+                }
+                log.info("文章召回 rawVector 缓存未命中, textHash={}, 尝试 parsed vector 缓存", textHash);
+            }
+
+            if (StringUtils.hasText(textHash)) {
+                List<Float> cachedVector = articleVectorStoreService.getVectorByTextHash(textHash, configCode);
+                if (cachedVector != null && !cachedVector.isEmpty()) {
+                    log.info("文章召回 使用缓存的 parsed vector, dim={}", cachedVector.size());
+                    List<ArticleMatch> raw = articleVectorStoreService.searchTopN(configCode, cachedVector, candidate);
+                    List<ArticleMatch> matches = deduplicateArticleMatches(raw, topN);
+                    if (!CollectionUtils.isEmpty(matches)) {
+                        List<String> matchSample = new ArrayList<>();
+                        for (ArticleMatch m : matches) {
+                            matchSample.add(m.getArticleId() + ":" + String.format("%.4f", m.getScore()));
+                        }
+                        log.info("文章召回(parsed vector缓存) 去重后({}条): {}, configCode={}",
+                                matches.size(), matchSample, configCode);
+                        return limitEnrichedItemsByScore(enrichArticleMatches(matches, configCode), topN);
+                    }
+                    log.info("文章召回(parsed vector缓存) 无结果, configCode={}", configCode);
+                    return Collections.emptyList();
+                }
+            }
+
+            List<Float> queryVector = resolveQueryVectorForArticle(queryText, configCode);
+            if (queryVector == null || queryVector.isEmpty()) {
+                log.info("文章召回: 无法获取查询向量, queryText={}", queryText);
+                return Collections.emptyList();
+            }
+            log.info("文章召回 使用 embedding API 向量, dim={}", queryVector.size());
+            List<ArticleMatch> raw = articleVectorStoreService.searchTopN(configCode, queryVector, candidate);
+            List<ArticleMatch> matches = deduplicateArticleMatches(raw, topN);
+            if (CollectionUtils.isEmpty(matches)) {
+                log.info("文章召回 article_vectors 无结果, configCode={}", configCode);
+                return Collections.emptyList();
+            }
+            List<String> matchSample = new ArrayList<>();
+            for (ArticleMatch m : matches) {
+                matchSample.add(m.getArticleId() + ":" + String.format("%.4f", m.getScore()));
+            }
+            log.info("文章召回(embedding API) 去重后({}条): {}, configCode={}", matches.size(), matchSample, configCode);
+            return limitEnrichedItemsByScore(enrichArticleMatches(matches, configCode), topN);
+        } catch (Exception e) {
+            log.error("文章召回 article_vectors 异常: {}", e.getMessage(), e);
+            return Collections.emptyList();
+        }
+    }
+
+    private List<Float> resolveQueryVectorForArticle(String queryText, String configCode) {
+        if (!StringUtils.hasText(queryText)) {
+            return null;
+        }
+        DeconstructVectorConfig config = getVectorConfigByCode(configCode);
+        if (config == null) {
+            config = new DeconstructVectorConfig();
+            config.setConfigCode(configCode);
+        }
+
+        try {
+            log.info("resolveQueryVectorForArticle 调用 embedding API: text={}, model={}, dim={}",
+                    queryText, config.getEmbeddingModel(), config.getDimension());
+            return embeddingService.embed(queryText, config);
+        } catch (Exception e) {
+            log.error("文章召回 embedding 失败: queryText={}, error={}", queryText, e.getMessage());
+            return null;
+        }
+    }
+
+    private List<ArticleMatch> deduplicateArticleMatches(List<ArticleMatch> matches, int topN) {
+        if (CollectionUtils.isEmpty(matches)) {
+            return Collections.emptyList();
+        }
+        Map<String, ArticleMatch> deduped = new LinkedHashMap<>();
+        for (ArticleMatch m : matches) {
+            if (m == null || !StringUtils.hasText(m.getArticleId())) {
+                continue;
+            }
+            ArticleMatch existing = deduped.get(m.getArticleId());
+            if (existing == null || m.getScore() > existing.getScore()) {
+                deduped.put(m.getArticleId(), m);
+            }
+        }
+        return deduped.values().stream().limit(topN).collect(Collectors.toList());
+    }
+
+    private List<VideoMatchEnrichedVO> enrichArticleMatches(List<ArticleMatch> matches, String requestConfigCode) {
+        if (CollectionUtils.isEmpty(matches)) {
+            return Collections.emptyList();
+        }
+        List<String> articleIds = matches.stream()
+                .map(ArticleMatch::getArticleId)
+                .filter(java.util.Objects::nonNull)
+                .collect(Collectors.toList());
+        Map<String, ArticleDeconstructResult> rowByArticleId = loadArticleDeconstructRows(articleIds);
+
+        List<VideoMatchEnrichedVO> items = new ArrayList<>(matches.size());
+        for (ArticleMatch m : matches) {
+            if (m == null || m.getArticleId() == null) continue;
+            VideoMatchEnrichedVO vo = new VideoMatchEnrichedVO();
+            vo.setModality(Modality.ARTICLE);
+            vo.setConfigCode(requestConfigCode);
+            vo.setScore(m.getScore());
+
+            ArticleDeconstructResult row = rowByArticleId.get(m.getArticleId());
+            JSONObject raw = parseArticleResultJson(row);
+            ArticleBasicMeta basic = raw != null ? extractArticleBasicMeta(raw) : null;
+            Map<String, Object> deconstructFlat = raw != null ? buildDeconstructFromRaw(raw) : null;
+
+            String displayArticleId = (basic != null && StringUtils.hasText(basic.articleId))
+                    ? basic.articleId : m.getArticleId();
+            vo.setArticleId(displayArticleId);
+            try {
+                vo.setId(Long.parseLong(displayArticleId));
+            } catch (NumberFormatException ignored) {
+            }
+
+            if (basic != null) {
+                vo.setTitle(basic.title);
+                vo.setCover(basic.cover);
+                if (basic.images != null && !basic.images.isEmpty()) {
+                    vo.setImageList(basic.images);
+                }
+            }
+
+            ArticleDetailVO detail = new ArticleDetailVO();
+            if (basic != null) {
+                detail.setTitle(basic.title);
+                detail.setSummary(basic.summary);
+                detail.setCover(basic.cover);
+                detail.setImages(basic.images);
+            }
+            detail.setDeconstruct(deconstructFlat);
+            vo.setArticleDetail(detail);
+
+            applyCompatibilityFields(vo);
+            items.add(vo);
+        }
+        return items;
+    }
+
+    private Map<String, ArticleDeconstructResult> loadArticleDeconstructRows(List<String> articleIds) {
+        if (CollectionUtils.isEmpty(articleIds)) {
+            return Collections.emptyMap();
+        }
+        Map<String, ArticleDeconstructResult> result = new HashMap<>();
+        try {
+            List<ArticleDeconstructResult> rows = articleDeconstructResultMapperExt
+                    .selectResultsByArticleIds(SOURCE_AIGC, articleIds);
+            if (CollectionUtils.isEmpty(rows)) {
+                return result;
+            }
+            for (ArticleDeconstructResult row : rows) {
+                if (row == null || !StringUtils.hasText(row.getArticleId())) {
+                    continue;
+                }
+                result.putIfAbsent(row.getArticleId(), row);
+            }
+        } catch (Exception e) {
+            log.error("批量加载 article_deconstruct_result 失败: {}", e.getMessage(), e);
+        }
+        return result;
+    }
+
+    private JSONObject parseArticleResultJson(ArticleDeconstructResult row) {
+        if (row == null || !StringUtils.hasText(row.getResult())) {
+            return null;
+        }
+        try {
+            return JSON.parseObject(row.getResult());
+        } catch (Exception e) {
+            log.info("解析 article_deconstruct_result.result 失败 articleId={}: {}",
+                    row.getArticleId(), e.getMessage());
+            return null;
+        }
+    }
+
+    private ArticleBasicMeta extractArticleBasicMeta(JSONObject raw) {
+        if (raw == null) {
+            return null;
+        }
+        JSONObject targetPost = raw.getJSONObject("target_post");
+        if (targetPost == null) {
+            return null;
+        }
+
+        ArticleBasicMeta meta = new ArticleBasicMeta();
+        meta.title = targetPost.getString("title");
+        String bodyText = targetPost.getString("body_text");
+        if (StringUtils.hasText(bodyText)) {
+            meta.summary = bodyText.length() > 120 ? bodyText.substring(0, 120) : bodyText;
+        }
+        meta.articleId = targetPost.getString("channel_content_id");
+
+        JSONArray imagesArr = targetPost.getJSONArray("images");
+        if (imagesArr != null && !imagesArr.isEmpty()) {
+            meta.cover = imagesArr.getString(0);
+            meta.images = new ArrayList<>(imagesArr.size());
+            for (int i = 0; i < imagesArr.size(); i++) {
+                String img = imagesArr.getString(i);
+                if (StringUtils.hasText(img)) {
+                    meta.images.add(img);
+                }
+            }
+        }
+
+        if (!StringUtils.hasText(meta.title)) {
+            return null;
+        }
+        return meta;
+    }
+
     /**
      * 批量加载 material_deconstruct_result 原始行(保留 source_type / result)
      */
@@ -665,21 +936,26 @@ public class VectorRecallTestServiceImpl implements VectorRecallTestService {
     }
 
     /**
-     * 组装返回结果:视频 + 素材合并为 items。
-     * 各模态在前置链路已按 videoTopN / materialTopN 各自截断,此处仅拼接 + 计数,不做合并截断。
+     * 组装返回结果:视频 + 素材 + 文章合并为 items。
+     * 各模态在前置链路已按各自 topN 截断,此处仅拼接 + 计数,不做合并截断。
      */
     private RecallResultVO buildResult(List<VideoMatchEnrichedVO> videoItems,
-                                       List<VideoMatchEnrichedVO> materialItems) {
+                                       List<VideoMatchEnrichedVO> materialItems,
+                                       List<VideoMatchEnrichedVO> articleItems) {
         if (videoItems == null) {
             videoItems = Collections.emptyList();
         }
         if (materialItems == null) {
             materialItems = Collections.emptyList();
         }
+        if (articleItems == null) {
+            articleItems = Collections.emptyList();
+        }
 
-        List<VideoMatchEnrichedVO> all = new ArrayList<>(videoItems.size() + materialItems.size());
+        List<VideoMatchEnrichedVO> all = new ArrayList<>(videoItems.size() + materialItems.size() + articleItems.size());
         all.addAll(videoItems);
         all.addAll(materialItems);
+        all.addAll(articleItems);
 
         int videoCount = 0;
         int materialCount = 0;
@@ -743,7 +1019,7 @@ public class VectorRecallTestServiceImpl implements VectorRecallTestService {
         String configCode = StringUtils.hasText(param.getConfigCode())
                 ? param.getConfigCode() : VectorConstants.DEFAULT_CONFIG_CODE;
         List<VideoMatchEnrichedVO> videoItems = enrichVideoMatches(rawMatches, configCode);
-        return buildResult(videoItems, Collections.emptyList());
+        return buildResult(videoItems, Collections.emptyList(), Collections.emptyList());
     }
 
     @Override
@@ -882,4 +1158,14 @@ public class VectorRecallTestServiceImpl implements VectorRecallTestService {
         String title;
         String imagesJson;
     }
+
+    private static class ArticleBasicMeta {
+        String articleId;
+        String title;
+        String summary;
+        List<String> tags;
+        String cover;
+        String url;
+        List<String> images;
+    }
 }

+ 237 - 0
core/src/main/java/com/tzld/videoVector/util/DeconstructTextExtractor.java

@@ -0,0 +1,237 @@
+package com.tzld.videoVector.util;
+
+import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson.JSONArray;
+import com.alibaba.fastjson.JSONObject;
+import com.tzld.videoVector.model.po.pgVector.DeconstructVectorConfig;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.util.StringUtils;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * 解构文本提取工具,从 AIGC 解构结果的 dataContent 中按配置规则提取文本。
+ * MaterialVectorJob / ArticleVectorJob / VideoVectorJob 共享。
+ */
+@Slf4j
+public final class DeconstructTextExtractor {
+
+    private DeconstructTextExtractor() {
+    }
+
+    /**
+     * 从 dataContent 中提取文本
+     */
+    public static List<String> extractTextsFromDataContent(JSONObject dataContent, DeconstructVectorConfig config) {
+        if (dataContent == null) {
+            return Collections.emptyList();
+        }
+        String extractRule = config.getExtractRule();
+        if (StringUtils.hasText(extractRule)) {
+            try {
+                JSONObject rule = JSON.parseObject(extractRule);
+                if ("point_decomposition".equals(rule.getString("type"))) {
+                    return extractTextsFromPointDecomposition(dataContent, rule);
+                }
+            } catch (Exception e) {
+                // 不是 JSON 或无 type 字段,走原有逻辑
+            }
+            return extractTextsWithConfidence(dataContent, config.getSourcePath(), extractRule);
+        } else {
+            return VectorUtils.extractFromJson(dataContent, config.getSourcePath());
+        }
+    }
+
+    public static List<String> extractTextsWithConfidence(JSONObject json, String sourcePath, String extractRule) {
+        List<String> texts = new ArrayList<>();
+        try {
+            JSONObject rule = JSON.parseObject(extractRule);
+            String textField = rule.getString("text_field");
+            String confidenceField = rule.getString("confidence_field");
+            double confidenceThreshold = rule.getDoubleValue("confidence_threshold");
+            if (!StringUtils.hasText(textField) || !StringUtils.hasText(confidenceField)) {
+                log.error("extract_rule 缺少必要字段: text_field={}, confidence_field={}", textField, confidenceField);
+                return texts;
+            }
+            if (sourcePath.endsWith("[*]")) {
+                List<JSONObject> items = VectorUtils.extractArrayItemsFromJson(json, sourcePath);
+                for (JSONObject item : items) {
+                    if (isConfidenceQualified(item, confidenceField, confidenceThreshold)) {
+                        String text = item.getString(textField);
+                        if (StringUtils.hasText(text)) {
+                            texts.add(text);
+                        }
+                    }
+                }
+            } else {
+                List<String> pathValues = VectorUtils.extractFromJson(json, sourcePath);
+                if (!pathValues.isEmpty()) {
+                    JSONObject targetObj = navigateToObject(json, sourcePath);
+                    if (targetObj != null && isConfidenceQualified(targetObj, confidenceField, confidenceThreshold)) {
+                        String text = targetObj.getString(textField);
+                        if (StringUtils.hasText(text)) {
+                            texts.add(text);
+                        }
+                    }
+                }
+            }
+        } catch (Exception e) {
+            log.error("置信度过滤提取失败: path={}, error={}", sourcePath, e.getMessage());
+        }
+        return texts;
+    }
+
+    static List<String> extractTextsFromPointDecomposition(JSONObject dataContent, JSONObject rule) {
+        List<String> texts = new ArrayList<>();
+        try {
+            String pointArrayPath = rule.getString("point_array_path");
+            String finalResultPath = rule.getString("final_result_path");
+            String pointNameField = rule.getString("point_name_field");
+            String confidenceField = rule.getString("confidence_field");
+            double confidenceThreshold = rule.getDoubleValue("confidence_threshold");
+            String target = rule.getString("target");
+            String contributionPath = rule.getString("contribution_path");
+            double contributionThreshold = rule.getDoubleValue("contribution_threshold");
+
+            List<JSONObject> finalPoints = VectorUtils.extractArrayItemsFromJson(dataContent, finalResultPath + "[*]");
+            List<String> qualifiedPointNames = new ArrayList<>();
+            for (JSONObject fp : finalPoints) {
+                if (isConfidenceQualified(fp, confidenceField, confidenceThreshold)) {
+                    String pointName = fp.getString(pointNameField);
+                    if (StringUtils.hasText(pointName)) {
+                        qualifiedPointNames.add(pointName);
+                    }
+                }
+            }
+            if (qualifiedPointNames.isEmpty()) return texts;
+
+            List<JSONObject> pointDetails = VectorUtils.extractArrayItemsFromJson(dataContent, pointArrayPath + "[*]");
+            Map<String, Double> contributionMap = buildContributionMap(dataContent, contributionPath);
+
+            for (String pointName : qualifiedPointNames) {
+                try {
+                    JSONObject matchedPoint = null;
+                    for (JSONObject detail : pointDetails) {
+                        if (pointName.equals(detail.getString("点"))) {
+                            matchedPoint = detail;
+                            break;
+                        }
+                    }
+                    if (matchedPoint == null) continue;
+
+                    List<String> itemNames = "substance".equals(target)
+                            ? extractSubstanceNames(matchedPoint)
+                            : extractFormNames(matchedPoint);
+                    for (String name : itemNames) {
+                        Double contribution = contributionMap.get(name);
+                        if (contribution != null && contribution >= contributionThreshold) {
+                            texts.add(name);
+                        }
+                    }
+                } catch (Exception e) {
+                    log.debug("extractTextsFromPointDecomposition 单点处理异常 pointName={}: {}", pointName, e.getMessage());
+                }
+            }
+        } catch (Exception e) {
+            log.error("extractTextsFromPointDecomposition 失败: {}", e.getMessage(), e);
+        }
+        return texts;
+    }
+
+    static Map<String, Double> buildContributionMap(JSONObject dataContent, String contributionPath) {
+        Map<String, Double> map = new HashMap<>();
+        try {
+            List<JSONObject> contributions = VectorUtils.extractArrayItemsFromJson(dataContent, contributionPath + "[*]");
+            for (JSONObject c : contributions) {
+                try {
+                    String word = c.getString("词");
+                    Double contribution = c.getDouble("贡献度");
+                    if (StringUtils.hasText(word) && contribution != null) {
+                        map.put(word, contribution);
+                    }
+                } catch (Exception e) {
+                    log.debug("buildContributionMap 单元素解析异常: {}", e.getMessage());
+                }
+            }
+        } catch (Exception e) {
+            log.error("构建贡献度查找表失败: {}", e.getMessage());
+        }
+        return map;
+    }
+
+    static List<String> extractSubstanceNames(JSONObject point) {
+        List<String> names = new ArrayList<>();
+        JSONObject substance = point.getJSONObject("实质");
+        if (substance == null) return names;
+        for (String key : new String[]{"具体元素", "具象概念", "抽象概念"}) {
+            try {
+                collectNamesFromArray(substance.getJSONArray(key), names);
+            } catch (Exception e) {
+                log.debug("extractSubstanceNames key={} 异常: {}", key, e.getMessage());
+            }
+        }
+        return names;
+    }
+
+    static List<String> extractFormNames(JSONObject point) {
+        List<String> names = new ArrayList<>();
+        JSONObject form = point.getJSONObject("形式");
+        if (form == null) return names;
+        for (String key : new String[]{"具体元素形式", "具象概念形式", "整体形式"}) {
+            try {
+                collectNamesFromArray(form.getJSONArray(key), names);
+            } catch (Exception e) {
+                log.debug("extractFormNames key={} 异常: {}", key, e.getMessage());
+            }
+        }
+        return names;
+    }
+
+    static void collectNamesFromArray(JSONArray array, List<String> names) {
+        if (array == null || array.isEmpty()) return;
+        for (int i = 0; i < array.size(); i++) {
+            try {
+                JSONObject item = array.getJSONObject(i);
+                if (item != null) {
+                    String name = item.getString("名称");
+                    if (StringUtils.hasText(name)) {
+                        names.add(name);
+                    }
+                }
+            } catch (Exception e) {
+                log.debug("collectNamesFromArray 单元素解析异常: {}", e.getMessage());
+            }
+        }
+    }
+
+    static JSONObject navigateToObject(JSONObject json, String path) {
+        if (json == null || !StringUtils.hasText(path) || !path.startsWith("$.")) return null;
+        try {
+            String pathContent = path.substring(2);
+            String[] parts = pathContent.split("\\.");
+            Object current = json;
+            for (String part : parts) {
+                if (current instanceof JSONObject) {
+                    current = ((JSONObject) current).get(part);
+                } else {
+                    return null;
+                }
+            }
+            return current instanceof JSONObject ? (JSONObject) current : null;
+        } catch (Exception e) {
+            return null;
+        }
+    }
+
+    static boolean isConfidenceQualified(JSONObject item, String confidenceField, double threshold) {
+        Object value = item.get(confidenceField);
+        if (value == null) return false;
+        if (value instanceof String) return "high".equalsIgnoreCase((String) value);
+        if (value instanceof Number) return ((Number) value).doubleValue() >= threshold;
+        return false;
+    }
+}

+ 72 - 0
core/src/main/java/com/tzld/videoVector/util/VectorUtils.java

@@ -4,15 +4,19 @@ import com.alibaba.fastjson.JSON;
 import com.alibaba.fastjson.JSONArray;
 import com.alibaba.fastjson.JSONObject;
 import com.tzld.videoVector.model.po.pgVector.DeconstructVectorConfig;
+import lombok.extern.slf4j.Slf4j;
 import org.springframework.util.StringUtils;
 
 import java.util.ArrayList;
 import java.util.List;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.TimeUnit;
 
 /**
  * 向量化公共工具方法
  * 集中管理 parseVectorString、extractFromJson、parseJsonPath、isMultiPointConfig 等通用逻辑
  */
+@Slf4j
 public final class VectorUtils {
 
     private VectorUtils() {
@@ -307,4 +311,72 @@ public final class VectorUtils {
 
         return parts;
     }
+
+    // ========================== 向量字符串序列化 ==========================
+
+    /**
+     * 将 float 向量序列化为 pgvector 兼容字符串: "[0.1,0.2,...]"
+     * 科学计数法的值会自动转为 toPlainString() 以避免 pgvector 解析失败。
+     */
+    public static String vectorToString(List<Float> vector) {
+        StringBuilder sb = new StringBuilder("[");
+        for (int i = 0; i < vector.size(); i++) {
+            if (i > 0) sb.append(",");
+            float v = vector.get(i);
+            String s = Float.toString(v);
+            if (s.indexOf('E') >= 0 || s.indexOf('e') >= 0) {
+                s = new java.math.BigDecimal(s).toPlainString();
+            }
+            sb.append(s);
+        }
+        sb.append("]");
+        return sb.toString();
+    }
+
+    // ========================== 并发工具 ==========================
+
+    /**
+     * 关闭线程池并等待所有已提交任务完成,超时则强制终止。
+     * 使用 shutdown() + awaitTermination() 替代逐个 Future.get(),
+     * 解决原实现中一个慢任务阻塞后续任务超时检查的问题。
+     *
+     * @param executor       要关闭的线程池(调用后不可再提交新任务)
+     * @param timeoutMinutes 等待超时(分钟)
+     * @param taskDesc       任务描述(用于日志)
+     */
+    public static void awaitAndShutdown(ExecutorService executor,
+                                         long timeoutMinutes, String taskDesc) {
+        executor.shutdown();
+        try {
+            if (!executor.awaitTermination(timeoutMinutes, TimeUnit.MINUTES)) {
+                log.error("{} 整体超时({}分钟),强制取消剩余任务", taskDesc, timeoutMinutes);
+                executor.shutdownNow();
+                if (!executor.awaitTermination(60, TimeUnit.SECONDS)) {
+                    log.error("{} 强制终止未在60秒内完成", taskDesc);
+                }
+            }
+        } catch (InterruptedException e) {
+            executor.shutdownNow();
+            Thread.currentThread().interrupt();
+            log.error("{} 等待被中断", taskDesc);
+        }
+    }
+
+    // ========================== 参数解析 ==========================
+
+    /**
+     * 解析任务入参字符串为最大处理数量。
+     *
+     * @param param 入参(期望为正整数),null/空/非正整数均返回 null
+     * @return 正整数值,或 null(表示不限制)
+     */
+    public static Integer parseMaxCount(String param) {
+        if (!StringUtils.hasText(param)) return null;
+        try {
+            int v = Integer.parseInt(param.trim());
+            return v > 0 ? v : null;
+        } catch (NumberFormatException e) {
+            return null;
+        }
+    }
 }

+ 51 - 0
core/src/main/resources/mapper/pgVector/ext/ArticleDeconstructResultMapperExt.xml

@@ -0,0 +1,51 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
+<mapper namespace="com.tzld.videoVector.dao.mapper.pgVector.ext.ArticleDeconstructResultMapperExt">
+
+    <resultMap id="BaseResultMap" type="com.tzld.videoVector.model.po.pgVector.ArticleDeconstructResult">
+        <id column="id" jdbcType="BIGINT" property="id"/>
+        <result column="article_id" jdbcType="VARCHAR" property="articleId"/>
+        <result column="source" jdbcType="VARCHAR" property="source"/>
+        <result column="result" jdbcType="VARCHAR" property="result"/>
+        <result column="create_time" jdbcType="TIMESTAMP" property="createTime"/>
+        <result column="update_time" jdbcType="TIMESTAMP" property="updateTime"/>
+    </resultMap>
+
+    <select id="selectExistingArticleIds" resultType="java.lang.String">
+        SELECT article_id
+        FROM article_deconstruct_result
+        WHERE source = #{source}
+        AND article_id IN
+        <foreach collection="articleIds" item="id" open="(" separator="," close=")">
+            #{id}
+        </foreach>
+    </select>
+
+    <insert id="batchInsertIgnore">
+        INSERT INTO article_deconstruct_result (article_id, source, result)
+        VALUES
+        <foreach collection="list" item="item" separator=",">
+            (#{item.articleId}, #{item.source}, #{item.result})
+        </foreach>
+        ON CONFLICT (article_id, source) DO NOTHING
+    </insert>
+
+    <select id="selectArticleIdsBySourcePaged" resultType="java.lang.String">
+        SELECT article_id
+        FROM article_deconstruct_result
+        WHERE source = #{source}
+        ORDER BY article_id
+        LIMIT #{limit} OFFSET #{offset}
+    </select>
+
+    <select id="selectResultsByArticleIds" resultMap="BaseResultMap">
+        SELECT article_id, source, result
+        FROM article_deconstruct_result
+        WHERE source = #{source}
+        AND article_id IN
+        <foreach collection="articleIds" item="id" open="(" separator="," close=")">
+            #{id}
+        </foreach>
+    </select>
+
+</mapper>

+ 122 - 0
core/src/main/resources/mapper/pgVector/ext/ArticleVectorMapperExt.xml

@@ -0,0 +1,122 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
+<mapper namespace="com.tzld.videoVector.dao.mapper.pgVector.ext.ArticleVectorMapperExt">
+
+    <resultMap id="ArticleVectorResultMap" type="com.tzld.videoVector.model.po.pgVector.ArticleVector">
+        <id column="id" property="id" jdbcType="BIGINT"/>
+        <result column="article_id" jdbcType="VARCHAR" property="articleId"/>
+        <result column="config_code" property="configCode" jdbcType="VARCHAR"/>
+        <result column="embedding" property="embedding" jdbcType="VARCHAR"/>
+        <result column="created_at" property="createdAt" jdbcType="TIMESTAMP"/>
+        <result column="updated_at" property="updatedAt" jdbcType="TIMESTAMP"/>
+        <result column="point_index" property="pointIndex" jdbcType="INTEGER"/>
+        <result column="text" property="text" jdbcType="VARCHAR"/>
+        <result column="text_hash" property="textHash" jdbcType="VARCHAR"/>
+        <result column="score" property="score" jdbcType="DOUBLE"/>
+    </resultMap>
+
+    <insert id="upsertVector">
+        INSERT INTO article_vectors (
+            article_id,
+            config_code,
+            point_index,
+            embedding,
+            text,
+            text_hash,
+            created_at,
+            updated_at
+        ) VALUES (
+            #{articleId},
+            #{configCode},
+            #{pointIndex},
+            #{embedding}::vector,
+            #{text},
+            #{textHash},
+            NOW(),
+            NOW()
+        )
+        ON CONFLICT (config_code, article_id, point_index)
+        DO UPDATE SET
+            embedding = EXCLUDED.embedding,
+            text = EXCLUDED.text,
+            text_hash = EXCLUDED.text_hash,
+            updated_at = NOW()
+    </insert>
+
+    <select id="existsByArticleIdAndConfigCode" resultType="int">
+        SELECT COUNT(1)
+        FROM article_vectors
+        WHERE article_id = #{articleId}
+          AND config_code = #{configCode}
+        LIMIT 1
+    </select>
+
+    <select id="selectExistingArticleIds" resultType="java.lang.String">
+        SELECT DISTINCT article_id
+        FROM article_vectors
+        WHERE config_code = #{configCode}
+          AND article_id IN
+        <foreach collection="articleIds" item="id" open="(" separator="," close=")">
+            #{id}
+        </foreach>
+    </select>
+
+    <select id="selectAllArticleIds" resultType="java.lang.String">
+        SELECT DISTINCT article_id
+        FROM article_vectors
+        WHERE config_code = #{configCode}
+        ORDER BY article_id
+    </select>
+
+    <select id="searchTopN" resultMap="ArticleVectorResultMap">
+        SELECT
+            article_id,
+            config_code,
+            text,
+            1 - (embedding &lt;=&gt; #{queryVector}::vector) AS score
+        FROM article_vectors
+        WHERE config_code = #{configCode}
+        ORDER BY embedding &lt;=&gt; #{queryVector}::vector
+        LIMIT #{topN}
+    </select>
+
+    <select id="selectByTextHashAndConfigCode" resultMap="ArticleVectorResultMap">
+        SELECT
+            id,
+            article_id,
+            config_code,
+            embedding::text AS embedding,
+            created_at,
+            updated_at,
+            point_index,
+            text,
+            text_hash
+        FROM article_vectors
+        WHERE text_hash = #{textHash}
+          AND config_code = #{configCode}
+        LIMIT 1
+    </select>
+
+    <delete id="deleteByArticleIdAndConfigCode">
+        DELETE FROM article_vectors
+        WHERE article_id = #{articleId}
+          AND config_code = #{configCode}
+    </delete>
+
+    <delete id="deleteBatchByArticleIds">
+        DELETE FROM article_vectors
+        WHERE config_code = #{configCode}
+          AND article_id IN
+        <foreach collection="articleIds" item="id" open="(" separator="," close=")">
+            #{id}
+        </foreach>
+    </delete>
+
+    <delete id="deleteAbovePointIndex">
+        DELETE FROM article_vectors
+        WHERE article_id = #{articleId}
+          AND config_code = #{configCode}
+          AND point_index >= #{minPointIndex}
+    </delete>
+
+</mapper>

+ 23 - 0
server/src/main/java/com/tzld/videoVector/controller/XxlJobController.java

@@ -26,6 +26,9 @@ public class XxlJobController {
     @Autowired
     private AiUnderstandingSyncJob aiUnderstandingSyncJob;
 
+    @Autowired
+    private ArticleVectorJob articleVectorJob;
+
     @Autowired
     private ChannelDemandMatchJob channelDemandMatchJob;
 
@@ -81,6 +84,26 @@ public class XxlJobController {
         return CommonResponse.success();
     }
 
+    // ==================== 文章相关任务 ====================
+
+    @GetMapping("/syncArticleDeconstructJob")
+    public CommonResponse<Void> syncArticleDeconstructJob() {
+        articleVectorJob.syncArticleDeconstructJob(null);
+        return CommonResponse.success();
+    }
+
+    @GetMapping("/vectorArticleJob")
+    public CommonResponse<Void> vectorArticleJob() {
+        articleVectorJob.vectorArticleJob(null);
+        return CommonResponse.success();
+    }
+
+    @GetMapping("/articleJob")
+    public CommonResponse<Void> articleJob() {
+        articleVectorJob.articleJob(null);
+        return CommonResponse.success();
+    }
+
     // ==================== 视频详情同步任务 ====================
 
     @GetMapping("/syncVideoDetailJob")