فهرست منبع

新增长文匹配

luojunhui 1 هفته پیش
والد
کامیت
2a71181f60

+ 36 - 295
core/src/main/java/com/tzld/videoVector/job/ArticleVectorJob.java

@@ -1,7 +1,7 @@
 package com.tzld.videoVector.job;
 
 import com.alibaba.fastjson.JSON;
-import com.alibaba.fastjson.JSONArray;
+
 import com.alibaba.fastjson.JSONObject;
 import com.google.common.collect.Lists;
 import com.tzld.videoVector.api.AigcApiService;
@@ -13,6 +13,7 @@ import com.tzld.videoVector.model.po.pgVector.DeconstructVectorConfig;
 import com.tzld.videoVector.model.po.pgVector.DeconstructVectorConfigExample;
 import com.tzld.videoVector.service.ArticleVectorStoreService;
 import com.tzld.videoVector.service.EmbeddingService;
+import com.tzld.videoVector.util.DeconstructTextExtractor;
 import com.tzld.videoVector.util.Md5Util;
 import com.tzld.videoVector.util.VectorUtils;
 import com.xxl.job.core.biz.model.ReturnT;
@@ -33,8 +34,6 @@ import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.stream.Collectors;
 
@@ -134,37 +133,32 @@ public class ArticleVectorJob {
             }
 
             ExecutorService executor = Executors.newFixedThreadPool(VectorConstants.AIGC_DETAIL_PARALLELISM);
-            try {
-                List<Future<?>> futures = new ArrayList<>();
-                List<ArticleDeconstructResult> batch = Collections.synchronizedList(new ArrayList<>());
-
-                for (String articleId : needSyncIds) {
-                    futures.add(executor.submit(() -> {
-                        try {
-                            Long taskInstanceId = articleIdToTaskInstanceId.get(articleId);
-                            if (taskInstanceId == null) return;
-                            JSONObject dataContent = aigcApiService.getTaskCallbackDetail(taskInstanceId);
-                            if (dataContent != null) {
-                                ArticleDeconstructResult r = new ArticleDeconstructResult();
-                                r.setArticleId(articleId);
-                                r.setSource(SOURCE_AIGC);
-                                r.setResult(dataContent.toJSONString());
-                                batch.add(r);
-                            }
-                        } catch (Exception e) {
-                            log.error("同步 articleId={} 失败: {}", articleId, e.getMessage());
+            List<ArticleDeconstructResult> batch = Collections.synchronizedList(new ArrayList<>());
+
+            for (String articleId : needSyncIds) {
+                executor.submit(() -> {
+                    try {
+                        Long taskInstanceId = articleIdToTaskInstanceId.get(articleId);
+                        if (taskInstanceId == null) return;
+                        JSONObject dataContent = aigcApiService.getTaskCallbackDetail(taskInstanceId);
+                        if (dataContent != null) {
+                            ArticleDeconstructResult r = new ArticleDeconstructResult();
+                            r.setArticleId(articleId);
+                            r.setSource(SOURCE_AIGC);
+                            r.setResult(dataContent.toJSONString());
+                            batch.add(r);
                         }
-                    }));
-                }
-                awaitAndShutdown(futures, executor, 30, "文章同步");
-
-                if (!batch.isEmpty()) {
-                    for (List<ArticleDeconstructResult> subBatch : Lists.partition(batch, 200)) {
-                        insertCount.addAndGet(articleDeconstructResultMapperExt.batchInsertIgnore(subBatch));
+                    } catch (Exception e) {
+                        log.error("同步 articleId={} 失败: {}", articleId, e.getMessage());
                     }
+                });
+            }
+            VectorUtils.awaitAndShutdown(executor, 30, "文章同步");
+
+            if (!batch.isEmpty()) {
+                for (List<ArticleDeconstructResult> subBatch : Lists.partition(batch, 200)) {
+                    insertCount.addAndGet(articleDeconstructResultMapperExt.batchInsertIgnore(subBatch));
                 }
-            } finally {
-                executor.shutdownNow();
             }
         }
     }
@@ -176,7 +170,7 @@ public class ArticleVectorJob {
     @XxlJob("vectorArticleJob")
     public ReturnT<String> vectorArticleJob(String param) {
         log.info("开始执行文章向量化任务, param: {}", param);
-        Integer maxArticleCount = parseMaxCount(param);
+        Integer maxArticleCount = VectorUtils.parseMaxCount(param);
         return doVectorize(maxArticleCount);
     }
 
@@ -215,17 +209,12 @@ public class ArticleVectorJob {
                 Map<String, ParsedArticle> parsedById = loadParsedArticles(articleIds);
 
                 ExecutorService configExecutor = Executors.newFixedThreadPool(configs.size());
-                try {
-                    List<Future<?>> configFutures = new ArrayList<>();
-                    for (DeconstructVectorConfig config : configs) {
-                        configFutures.add(configExecutor.submit(() ->
-                                processConfigForArticle(config, articleIds, parsedById, totalSuccessCount, totalFailCount)
-                        ));
-                    }
-                    awaitAndShutdown(configFutures, configExecutor, 30, "文章向量化配置并发");
-                } finally {
-                    configExecutor.shutdownNow();
-                }
+            for (DeconstructVectorConfig config : configs) {
+                configExecutor.submit(() ->
+                        processConfigForArticle(config, articleIds, parsedById, totalSuccessCount, totalFailCount)
+                );
+            }
+            VectorUtils.awaitAndShutdown(configExecutor, 30, "文章向量化配置并发");
 
                 totalProcessed.addAndGet(articleIds.size());
 
@@ -293,7 +282,7 @@ public class ArticleVectorJob {
                     continue;
                 }
                 try {
-                    List<String> texts = extractTextsFromDataContent(parsed.dataContent, config);
+                    List<String> texts = DeconstructTextExtractor.extractTextsFromDataContent(parsed.dataContent, config);
                     if (CollectionUtils.isEmpty(texts)) {
                         log.info("articleId={} 配置 {} 未提取到文本,跳过", articleId, configCode);
                         totalFailCount.incrementAndGet();
@@ -322,6 +311,9 @@ public class ArticleVectorJob {
         boolean multiPoint = VectorUtils.isMultiPointConfig(config);
 
         if (multiPoint) {
+            // 预清理旧向量,防止上一轮 partial write 留下残缺数据导致 existsByIds 误判已完成
+            articleVectorStoreService.deleteAbovePointIndex(configCode, articleId, 0);
+
             List<String> validTexts = new ArrayList<>(texts.size());
             for (String raw : texts) {
                 if (StringUtils.hasText(raw)) validTexts.add(raw);
@@ -412,223 +404,6 @@ public class ArticleVectorJob {
         return vectorArticleJob(param);
     }
 
-    // ====================================================================
-    // 文本提取(与 MaterialVectorJob 共用同一套逻辑)
-    // ====================================================================
-
-    private List<String> extractTextsFromDataContent(JSONObject dataContent, DeconstructVectorConfig config) {
-        if (dataContent == null) {
-            return Collections.emptyList();
-        }
-        String extractRule = config.getExtractRule();
-        if (StringUtils.hasText(extractRule)) {
-            try {
-                JSONObject rule = JSON.parseObject(extractRule);
-                if ("point_decomposition".equals(rule.getString("type"))) {
-                    return extractTextsFromPointDecomposition(dataContent, rule);
-                }
-            } catch (Exception e) {
-                // 不是 JSON 或无 type 字段,走原有逻辑
-            }
-            return extractTextsWithConfidence(dataContent, config.getSourcePath(), extractRule);
-        } else {
-            return VectorUtils.extractFromJson(dataContent, config.getSourcePath());
-        }
-    }
-
-    private List<String> extractTextsWithConfidence(JSONObject json, String sourcePath, String extractRule) {
-        List<String> texts = new ArrayList<>();
-        try {
-            JSONObject rule = JSON.parseObject(extractRule);
-            String textField = rule.getString("text_field");
-            String confidenceField = rule.getString("confidence_field");
-            double confidenceThreshold = rule.getDoubleValue("confidence_threshold");
-            if (!StringUtils.hasText(textField) || !StringUtils.hasText(confidenceField)) {
-                log.error("extract_rule 缺少必要字段: text_field={}, confidence_field={}", textField, confidenceField);
-                return texts;
-            }
-            if (sourcePath.endsWith("[*]")) {
-                List<JSONObject> items = VectorUtils.extractArrayItemsFromJson(json, sourcePath);
-                for (JSONObject item : items) {
-                    if (isConfidenceQualified(item, confidenceField, confidenceThreshold)) {
-                        String text = item.getString(textField);
-                        if (StringUtils.hasText(text)) {
-                            texts.add(text);
-                        }
-                    }
-                }
-            } else {
-                List<String> pathValues = VectorUtils.extractFromJson(json, sourcePath);
-                if (!pathValues.isEmpty()) {
-                    JSONObject targetObj = navigateToObject(json, sourcePath);
-                    if (targetObj != null && isConfidenceQualified(targetObj, confidenceField, confidenceThreshold)) {
-                        String text = targetObj.getString(textField);
-                        if (StringUtils.hasText(text)) {
-                            texts.add(text);
-                        }
-                    }
-                }
-            }
-        } catch (Exception e) {
-            log.error("置信度过滤提取失败: path={}, error={}", sourcePath, e.getMessage());
-        }
-        return texts;
-    }
-
-    private List<String> extractTextsFromPointDecomposition(JSONObject dataContent, JSONObject rule) {
-        List<String> texts = new ArrayList<>();
-        try {
-            String pointArrayPath = rule.getString("point_array_path");
-            String finalResultPath = rule.getString("final_result_path");
-            String pointNameField = rule.getString("point_name_field");
-            String confidenceField = rule.getString("confidence_field");
-            double confidenceThreshold = rule.getDoubleValue("confidence_threshold");
-            String target = rule.getString("target");
-            String contributionPath = rule.getString("contribution_path");
-            double contributionThreshold = rule.getDoubleValue("contribution_threshold");
-
-            List<JSONObject> finalPoints = VectorUtils.extractArrayItemsFromJson(dataContent, finalResultPath + "[*]");
-            List<String> qualifiedPointNames = new ArrayList<>();
-            for (JSONObject fp : finalPoints) {
-                if (isConfidenceQualified(fp, confidenceField, confidenceThreshold)) {
-                    String pointName = fp.getString(pointNameField);
-                    if (StringUtils.hasText(pointName)) {
-                        qualifiedPointNames.add(pointName);
-                    }
-                }
-            }
-            if (qualifiedPointNames.isEmpty()) return texts;
-
-            List<JSONObject> pointDetails = VectorUtils.extractArrayItemsFromJson(dataContent, pointArrayPath + "[*]");
-            Map<String, Double> contributionMap = buildContributionMap(dataContent, contributionPath);
-
-            for (String pointName : qualifiedPointNames) {
-                try {
-                    JSONObject matchedPoint = null;
-                    for (JSONObject detail : pointDetails) {
-                        if (pointName.equals(detail.getString("点"))) {
-                            matchedPoint = detail;
-                            break;
-                        }
-                    }
-                    if (matchedPoint == null) continue;
-
-                    List<String> itemNames = "substance".equals(target)
-                            ? extractSubstanceNames(matchedPoint)
-                            : extractFormNames(matchedPoint);
-                    for (String name : itemNames) {
-                        Double contribution = contributionMap.get(name);
-                        if (contribution != null && contribution >= contributionThreshold) {
-                            texts.add(name);
-                        }
-                    }
-                } catch (Exception e) {
-                    log.debug("extractTextsFromPointDecomposition 单点处理异常 pointName={}: {}", pointName, e.getMessage());
-                }
-            }
-        } catch (Exception e) {
-            log.error("extractTextsFromPointDecomposition 失败: {}", e.getMessage(), e);
-        }
-        return texts;
-    }
-
-    private List<String> extractSubstanceNames(JSONObject point) {
-        List<String> names = new ArrayList<>();
-        JSONObject substance = point.getJSONObject("实质");
-        if (substance == null) return names;
-        for (String key : new String[]{"具体元素", "具象概念", "抽象概念"}) {
-            try {
-                collectNamesFromArray(substance.getJSONArray(key), names);
-            } catch (Exception e) {
-                log.debug("extractSubstanceNames key={} 异常: {}", key, e.getMessage());
-            }
-        }
-        return names;
-    }
-
-    private List<String> extractFormNames(JSONObject point) {
-        List<String> names = new ArrayList<>();
-        JSONObject form = point.getJSONObject("形式");
-        if (form == null) return names;
-        for (String key : new String[]{"具体元素形式", "具象概念形式", "整体形式"}) {
-            try {
-                collectNamesFromArray(form.getJSONArray(key), names);
-            } catch (Exception e) {
-                log.debug("extractFormNames key={} 异常: {}", key, e.getMessage());
-            }
-        }
-        return names;
-    }
-
-    private void collectNamesFromArray(JSONArray array, List<String> names) {
-        if (array == null || array.isEmpty()) return;
-        for (int i = 0; i < array.size(); i++) {
-            try {
-                JSONObject item = array.getJSONObject(i);
-                if (item != null) {
-                    String name = item.getString("名称");
-                    if (StringUtils.hasText(name)) {
-                        names.add(name);
-                    }
-                }
-            } catch (Exception e) {
-                log.debug("collectNamesFromArray 单元素解析异常: {}", e.getMessage());
-            }
-        }
-    }
-
-    private Map<String, Double> buildContributionMap(JSONObject dataContent, String contributionPath) {
-        Map<String, Double> map = new HashMap<>();
-        try {
-            List<JSONObject> contributions = VectorUtils.extractArrayItemsFromJson(dataContent, contributionPath + "[*]");
-            for (JSONObject c : contributions) {
-                try {
-                    String word = c.getString("词");
-                    Double contribution = c.getDouble("贡献度");
-                    if (StringUtils.hasText(word) && contribution != null) {
-                        map.put(word, contribution);
-                    }
-                } catch (Exception e) {
-                    log.debug("buildContributionMap 单元素解析异常: {}", e.getMessage());
-                }
-            }
-        } catch (Exception e) {
-            log.error("构建贡献度查找表失败: {}", e.getMessage());
-        }
-        return map;
-    }
-
-    private JSONObject navigateToObject(JSONObject json, String path) {
-        if (json == null || !StringUtils.hasText(path) || !path.startsWith("$.")) return null;
-        try {
-            String pathContent = path.substring(2);
-            String[] parts = pathContent.split("\\.");
-            Object current = json;
-            for (String part : parts) {
-                if (current instanceof JSONObject) {
-                    current = ((JSONObject) current).get(part);
-                } else {
-                    return null;
-                }
-            }
-            return current instanceof JSONObject ? (JSONObject) current : null;
-        } catch (Exception e) {
-            return null;
-        }
-    }
-
-    private boolean isConfidenceQualified(JSONObject item, String confidenceField, double threshold) {
-        Object value = item.get(confidenceField);
-        if (value == null) return false;
-        if (value instanceof String) return "high".equalsIgnoreCase((String) value);
-        if (value instanceof Number) return ((Number) value).doubleValue() >= threshold;
-        return false;
-    }
-
-    // ====================================================================
-    // 通用辅助
-    // ====================================================================
-
     private List<DeconstructVectorConfig> getEnabledConfigsBySourceField(String sourceField) {
         DeconstructVectorConfigExample example = new DeconstructVectorConfigExample();
         example.createCriteria()
@@ -638,40 +413,6 @@ public class ArticleVectorJob {
         return vectorConfigMapper.selectByExample(example);
     }
 
-    private void awaitAndShutdown(List<Future<?>> futures, ExecutorService executor,
-                                  long timeoutMinutes, String taskDesc) {
-        long deadline = System.currentTimeMillis() + TimeUnit.MINUTES.toMillis(timeoutMinutes);
-        int completed = 0;
-        for (Future<?> future : futures) {
-            long remaining = deadline - System.currentTimeMillis();
-            if (remaining <= 0) {
-                log.error("{} 整体超时({}分钟),已取消剩余任务 (已完成 {}/{})",
-                        taskDesc, timeoutMinutes, completed, futures.size());
-                for (Future<?> f : futures) {
-                    f.cancel(true);
-                }
-                break;
-            }
-            try {
-                future.get(remaining, TimeUnit.MILLISECONDS);
-                completed++;
-            } catch (Exception e) {
-                log.error("{} 并发任务等待异常: {}", taskDesc, e.getMessage());
-            }
-        }
-        executor.shutdown();
-    }
-
-    private Integer parseMaxCount(String param) {
-        if (!StringUtils.hasText(param)) return null;
-        try {
-            int v = Integer.parseInt(param.trim());
-            return v > 0 ? v : null;
-        } catch (NumberFormatException e) {
-            return null;
-        }
-    }
-
     private String normalizeArticleId(String bizUniqueId) {
         if (!StringUtils.hasText(bizUniqueId)) {
             return null;

+ 38 - 305
core/src/main/java/com/tzld/videoVector/job/MaterialVectorJob.java

@@ -1,7 +1,7 @@
 package com.tzld.videoVector.job;
 
 import com.alibaba.fastjson.JSON;
-import com.alibaba.fastjson.JSONArray;
+
 import com.alibaba.fastjson.JSONObject;
 import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
 import com.google.common.collect.Lists;
@@ -14,6 +14,7 @@ import com.tzld.videoVector.model.po.pgVector.DeconstructVectorConfigExample;
 import com.tzld.videoVector.model.po.pgVector.MaterialDeconstructResult;
 import com.tzld.videoVector.service.EmbeddingService;
 import com.tzld.videoVector.service.MaterialVectorStoreService;
+import com.tzld.videoVector.util.DeconstructTextExtractor;
 import com.tzld.videoVector.util.Md5Util;
 import com.tzld.videoVector.util.VectorUtils;
 import com.xxl.job.core.biz.model.ReturnT;
@@ -35,8 +36,7 @@ import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-import java.util.concurrent.TimeUnit;
+
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.stream.Collectors;
 
@@ -198,38 +198,33 @@ public class MaterialVectorJob {
 
             // 并发调 detail 接口
             ExecutorService executor = Executors.newFixedThreadPool(VectorConstants.AIGC_DETAIL_PARALLELISM);
-            try {
-                List<Future<?>> futures = new ArrayList<>();
-                List<MaterialDeconstructResult> batch = Collections.synchronizedList(new ArrayList<>());
-
-                for (String materialId : needSyncIds) {
-                    futures.add(executor.submit(() -> {
-                        try {
-                            Long taskInstanceId = materialIdToTaskInstanceId.get(materialId);
-                            if (taskInstanceId == null) return;
-                            JSONObject dataContent = aigcApiService.getTaskCallbackDetail(taskInstanceId);
-                            if (dataContent != null) {
-                                MaterialDeconstructResult r = new MaterialDeconstructResult();
-                                r.setMaterialId(materialId);
-                                r.setSource(SOURCE_AIGC);
-                                r.setResult(dataContent.toJSONString());
-                                r.setSourceType(materialIdToSourceType.getOrDefault(materialId, defaultSourceType));
-                                batch.add(r);
-                            }
-                        } catch (Exception e) {
-                            log.error("同步 materialId={} 失败: {}", materialId, e.getMessage());
+            List<MaterialDeconstructResult> batch = Collections.synchronizedList(new ArrayList<>());
+
+            for (String materialId : needSyncIds) {
+                executor.submit(() -> {
+                    try {
+                        Long taskInstanceId = materialIdToTaskInstanceId.get(materialId);
+                        if (taskInstanceId == null) return;
+                        JSONObject dataContent = aigcApiService.getTaskCallbackDetail(taskInstanceId);
+                        if (dataContent != null) {
+                            MaterialDeconstructResult r = new MaterialDeconstructResult();
+                            r.setMaterialId(materialId);
+                            r.setSource(SOURCE_AIGC);
+                            r.setResult(dataContent.toJSONString());
+                            r.setSourceType(materialIdToSourceType.getOrDefault(materialId, defaultSourceType));
+                            batch.add(r);
                         }
-                    }));
-                }
-                awaitAndShutdown(futures, executor, 30, "素材同步");
-
-                if (!batch.isEmpty()) {
-                    for (List<MaterialDeconstructResult> subBatch : Lists.partition(batch, 200)) {
-                        insertCount.addAndGet(materialDeconstructResultMapperExt.batchInsertIgnore(subBatch));
+                    } catch (Exception e) {
+                        log.error("同步 materialId={} 失败: {}", materialId, e.getMessage());
                     }
+                });
+            }
+            VectorUtils.awaitAndShutdown(executor, 30, "素材同步");
+
+            if (!batch.isEmpty()) {
+                for (List<MaterialDeconstructResult> subBatch : Lists.partition(batch, 200)) {
+                    insertCount.addAndGet(materialDeconstructResultMapperExt.batchInsertIgnore(subBatch));
                 }
-            } finally {
-                executor.shutdownNow();
             }
         }
     }
@@ -246,7 +241,7 @@ public class MaterialVectorJob {
     @XxlJob("vectorMaterialJob")
     public ReturnT<String> vectorMaterialJob(String param) {
         log.info("开始执行素材向量化任务, param: {}", param);
-        Integer maxMaterialCount = parseMaxCount(param);
+        Integer maxMaterialCount = VectorUtils.parseMaxCount(param);
         return doVectorize(maxMaterialCount);
     }
 
@@ -293,17 +288,12 @@ public class MaterialVectorJob {
 
                 // 3. 对每个配置并发处理
                 ExecutorService configExecutor = Executors.newFixedThreadPool(configs.size());
-                try {
-                    List<Future<?>> configFutures = new ArrayList<>();
-                    for (DeconstructVectorConfig config : configs) {
-                        configFutures.add(configExecutor.submit(() ->
-                                processConfigForMaterial(config, materialIds, parsedById, totalSuccessCount, totalFailCount)
-                        ));
-                    }
-                    awaitAndShutdown(configFutures, configExecutor, 30, "素材向量化配置并发");
-                } finally {
-                    configExecutor.shutdownNow();
-                }
+            for (DeconstructVectorConfig config : configs) {
+                configExecutor.submit(() ->
+                        processConfigForMaterial(config, materialIds, parsedById, totalSuccessCount, totalFailCount)
+                );
+            }
+            VectorUtils.awaitAndShutdown(configExecutor, 30, "素材向量化配置并发");
 
                 totalProcessed.addAndGet(materialIds.size());
 
@@ -380,7 +370,7 @@ public class MaterialVectorJob {
                     continue;
                 }
                 try {
-                    List<String> texts = extractTextsFromDataContent(parsed.dataContent, config);
+                    List<String> texts = DeconstructTextExtractor.extractTextsFromDataContent(parsed.dataContent, config);
                     if (CollectionUtils.isEmpty(texts)) {
                         log.info("materialId={} 配置 {} 未提取到文本,跳过", materialId, configCode);
                         totalFailCount.incrementAndGet();
@@ -414,9 +404,9 @@ public class MaterialVectorJob {
         boolean multiPoint = VectorUtils.isMultiPointConfig(config);
 
         if (multiPoint) {
-            // 1) 先压缩掉空文本,pointIndex 用紧凑下标
-            // 2) 全部 embed 成功后再统一 save,避免出现"部分点写入、existsByIds 误判已完成"的中间态
-            //    (existsByIds 仅按 materialId 判存,留下"洞"后下一轮会跳过整个素材)
+            // 预清理旧向量,防止上一轮 partial write 留下残缺数据导致 existsByIds 误判已完成
+            materialVectorStoreService.deleteAbovePointIndex(configCode, materialId, 0);
+
             List<String> validTexts = new ArrayList<>(texts.size());
             for (String raw : texts) {
                 if (StringUtils.hasText(raw)) validTexts.add(raw);
@@ -535,226 +525,6 @@ public class MaterialVectorJob {
         return doVectorize(maxMaterialCount);
     }
 
-    // ====================================================================
-    // TODO: 与 VideoVectorJob 的提取逻辑统一抽取到 VectorUtils / ExtractionUtils,避免两边各自维护
-    // ====================================================================
-
-    /**
-     * 从 dataContent 中提取文本(与 VideoVectorJob 完全对称)
-     */
-    private List<String> extractTextsFromDataContent(JSONObject dataContent, DeconstructVectorConfig config) {
-        if (dataContent == null) {
-            return Collections.emptyList();
-        }
-        String extractRule = config.getExtractRule();
-        if (StringUtils.hasText(extractRule)) {
-            try {
-                JSONObject rule = JSON.parseObject(extractRule);
-                if ("point_decomposition".equals(rule.getString("type"))) {
-                    return extractTextsFromPointDecomposition(dataContent, rule);
-                }
-            } catch (Exception e) {
-                // 不是 JSON 或无 type 字段,走原有逻辑
-            }
-            return extractTextsWithConfidence(dataContent, config.getSourcePath(), extractRule);
-        } else {
-            return VectorUtils.extractFromJson(dataContent, config.getSourcePath());
-        }
-    }
-
-    private List<String> extractTextsWithConfidence(JSONObject json, String sourcePath, String extractRule) {
-        List<String> texts = new ArrayList<>();
-        try {
-            JSONObject rule = JSON.parseObject(extractRule);
-            String textField = rule.getString("text_field");
-            String confidenceField = rule.getString("confidence_field");
-            double confidenceThreshold = rule.getDoubleValue("confidence_threshold");
-            if (!StringUtils.hasText(textField) || !StringUtils.hasText(confidenceField)) {
-                log.error("extract_rule 缺少必要字段: text_field={}, confidence_field={}", textField, confidenceField);
-                return texts;
-            }
-            if (sourcePath.endsWith("[*]")) {
-                List<JSONObject> items = VectorUtils.extractArrayItemsFromJson(json, sourcePath);
-                for (JSONObject item : items) {
-                    if (isConfidenceQualified(item, confidenceField, confidenceThreshold)) {
-                        String text = item.getString(textField);
-                        if (StringUtils.hasText(text)) {
-                            texts.add(text);
-                        }
-                    }
-                }
-            } else {
-                List<String> pathValues = VectorUtils.extractFromJson(json, sourcePath);
-                if (!pathValues.isEmpty()) {
-                    JSONObject targetObj = navigateToObject(json, sourcePath);
-                    if (targetObj != null && isConfidenceQualified(targetObj, confidenceField, confidenceThreshold)) {
-                        String text = targetObj.getString(textField);
-                        if (StringUtils.hasText(text)) {
-                            texts.add(text);
-                        }
-                    }
-                }
-            }
-        } catch (Exception e) {
-            log.error("置信度过滤提取失败: path={}, error={}", sourcePath, e.getMessage());
-        }
-        return texts;
-    }
-
-    private List<String> extractTextsFromPointDecomposition(JSONObject dataContent, JSONObject rule) {
-        List<String> texts = new ArrayList<>();
-        try {
-            String pointArrayPath = rule.getString("point_array_path");
-            String finalResultPath = rule.getString("final_result_path");
-            String pointNameField = rule.getString("point_name_field");
-            String confidenceField = rule.getString("confidence_field");
-            double confidenceThreshold = rule.getDoubleValue("confidence_threshold");
-            String target = rule.getString("target");
-            String contributionPath = rule.getString("contribution_path");
-            double contributionThreshold = rule.getDoubleValue("contribution_threshold");
-
-            List<JSONObject> finalPoints = VectorUtils.extractArrayItemsFromJson(dataContent, finalResultPath + "[*]");
-            List<String> qualifiedPointNames = new ArrayList<>();
-            for (JSONObject fp : finalPoints) {
-                if (isConfidenceQualified(fp, confidenceField, confidenceThreshold)) {
-                    String pointName = fp.getString(pointNameField);
-                    if (StringUtils.hasText(pointName)) {
-                        qualifiedPointNames.add(pointName);
-                    }
-                }
-            }
-            if (qualifiedPointNames.isEmpty()) return texts;
-
-            List<JSONObject> pointDetails = VectorUtils.extractArrayItemsFromJson(dataContent, pointArrayPath + "[*]");
-            Map<String, Double> contributionMap = buildContributionMap(dataContent, contributionPath);
-
-            for (String pointName : qualifiedPointNames) {
-                try {
-                    JSONObject matchedPoint = null;
-                    for (JSONObject detail : pointDetails) {
-                        if (pointName.equals(detail.getString("点"))) {
-                            matchedPoint = detail;
-                            break;
-                        }
-                    }
-                    if (matchedPoint == null) continue;
-
-                    List<String> itemNames = "substance".equals(target)
-                            ? extractSubstanceNames(matchedPoint)
-                            : extractFormNames(matchedPoint);
-                    for (String name : itemNames) {
-                        Double contribution = contributionMap.get(name);
-                        if (contribution != null && contribution >= contributionThreshold) {
-                            texts.add(name);
-                        }
-                    }
-                } catch (Exception e) {
-                    log.debug("extractTextsFromPointDecomposition 单点处理异常 pointName={}: {}", pointName, e.getMessage());
-                }
-            }
-        } catch (Exception e) {
-            log.error("extractTextsFromPointDecomposition 失败: {}", e.getMessage(), e);
-        }
-        return texts;
-    }
-
-    private List<String> extractSubstanceNames(JSONObject point) {
-        List<String> names = new ArrayList<>();
-        JSONObject substance = point.getJSONObject("实质");
-        if (substance == null) return names;
-        for (String key : new String[]{"具体元素", "具象概念", "抽象概念"}) {
-            try {
-                collectNamesFromArray(substance.getJSONArray(key), names);
-            } catch (Exception e) {
-                log.debug("extractSubstanceNames key={} 异常: {}", key, e.getMessage());
-            }
-        }
-        return names;
-    }
-
-    private List<String> extractFormNames(JSONObject point) {
-        List<String> names = new ArrayList<>();
-        JSONObject form = point.getJSONObject("形式");
-        if (form == null) return names;
-        for (String key : new String[]{"具体元素形式", "具象概念形式", "整体形式"}) {
-            try {
-                collectNamesFromArray(form.getJSONArray(key), names);
-            } catch (Exception e) {
-                log.debug("extractFormNames key={} 异常: {}", key, e.getMessage());
-            }
-        }
-        return names;
-    }
-
-    private void collectNamesFromArray(JSONArray array, List<String> names) {
-        if (array == null || array.isEmpty()) return;
-        for (int i = 0; i < array.size(); i++) {
-            try {
-                JSONObject item = array.getJSONObject(i);
-                if (item != null) {
-                    String name = item.getString("名称");
-                    if (StringUtils.hasText(name)) {
-                        names.add(name);
-                    }
-                }
-            } catch (Exception e) {
-                log.debug("collectNamesFromArray 单元素解析异常: {}", e.getMessage());
-            }
-        }
-    }
-
-    private Map<String, Double> buildContributionMap(JSONObject dataContent, String contributionPath) {
-        Map<String, Double> map = new HashMap<>();
-        try {
-            List<JSONObject> contributions = VectorUtils.extractArrayItemsFromJson(dataContent, contributionPath + "[*]");
-            for (JSONObject c : contributions) {
-                try {
-                    String word = c.getString("词");
-                    Double contribution = c.getDouble("贡献度");
-                    if (StringUtils.hasText(word) && contribution != null) {
-                        map.put(word, contribution);
-                    }
-                } catch (Exception e) {
-                    log.debug("buildContributionMap 单元素解析异常: {}", e.getMessage());
-                }
-            }
-        } catch (Exception e) {
-            log.error("构建贡献度查找表失败: {}", e.getMessage());
-        }
-        return map;
-    }
-
-    private JSONObject navigateToObject(JSONObject json, String path) {
-        if (json == null || !StringUtils.hasText(path) || !path.startsWith("$.")) return null;
-        try {
-            String pathContent = path.substring(2);
-            String[] parts = pathContent.split("\\.");
-            Object current = json;
-            for (String part : parts) {
-                if (current instanceof JSONObject) {
-                    current = ((JSONObject) current).get(part);
-                } else {
-                    return null;
-                }
-            }
-            return current instanceof JSONObject ? (JSONObject) current : null;
-        } catch (Exception e) {
-            return null;
-        }
-    }
-
-    private boolean isConfidenceQualified(JSONObject item, String confidenceField, double threshold) {
-        Object value = item.get(confidenceField);
-        if (value == null) return false;
-        if (value instanceof String) return "high".equalsIgnoreCase((String) value);
-        if (value instanceof Number) return ((Number) value).doubleValue() >= threshold;
-        return false;
-    }
-
-    // ====================================================================
-    // 通用辅助
-    // ====================================================================
-
     private List<DeconstructVectorConfig> getEnabledConfigsBySourceField(String sourceField) {
         DeconstructVectorConfigExample example = new DeconstructVectorConfigExample();
         example.createCriteria()
@@ -764,43 +534,6 @@ public class MaterialVectorJob {
         return vectorConfigMapper.selectByExample(example);
     }
 
-    private void awaitAndShutdown(List<Future<?>> futures, ExecutorService executor,
-                                  long timeoutMinutes, String taskDesc) {
-        long deadline = System.currentTimeMillis() + TimeUnit.MINUTES.toMillis(timeoutMinutes);
-        int completed = 0;
-        for (Future<?> future : futures) {
-            long remaining = deadline - System.currentTimeMillis();
-            if (remaining <= 0) {
-                log.error("{} 整体超时({}分钟),已取消剩余任务 (已完成 {}/{})",
-                        taskDesc, timeoutMinutes, completed, futures.size());
-                for (Future<?> f : futures) {
-                    f.cancel(true);
-                }
-                break;
-            }
-            try {
-                future.get(remaining, TimeUnit.MILLISECONDS);
-                completed++;
-            } catch (Exception e) {
-                log.error("{} 并发任务等待异常: {}", taskDesc, e.getMessage());
-            }
-        }
-        executor.shutdown();
-    }
-
-    /**
-     * 入参 N 解析为 maxMaterialCount
-     */
-    private Integer parseMaxCount(String param) {
-        if (!StringUtils.hasText(param)) return null;
-        try {
-            int v = Integer.parseInt(param.trim());
-            return v > 0 ? v : null;
-        } catch (NumberFormatException e) {
-            return null;
-        }
-    }
-
     /**
      * 归一化 AIGC bizUniqueId 为 materialId 字符串。
      * 外部合作素材为文件 MD5(32 位 hex),内部素材通常为数字字符串。

+ 2 - 17
core/src/main/java/com/tzld/videoVector/service/impl/PgArticleVectorStoreServiceImpl.java

@@ -48,7 +48,7 @@ public class PgArticleVectorStoreServiceImpl implements ArticleVectorStoreServic
             return false;
         }
 
-        String embedding = vectorToString(vector);
+        String embedding = VectorUtils.vectorToString(vector);
         String textHash = (text != null && !text.isEmpty()) ? Md5Util.encoderByMd5(text) : null;
         articleVectorMapperExt.upsertVector(articleId, configCode, pointIndex, embedding, text, textHash);
         log.debug("保存文章向量成功,configCode={}, articleId={}, pointIndex={}, 维度={}",
@@ -217,7 +217,7 @@ public class PgArticleVectorStoreServiceImpl implements ArticleVectorStoreServic
             return Collections.emptyList();
         }
 
-        String queryVectorStr = vectorToString(queryVector);
+        String queryVectorStr = VectorUtils.vectorToString(queryVector);
         log.info("searchTopN SQL vector前100字符: {}, topN={}, configCode={}",
                 queryVectorStr.substring(0, Math.min(100, queryVectorStr.length())), topN, configCode);
         List<ArticleVector> results = articleVectorMapperExt.searchTopN(configCode, queryVectorStr, topN);
@@ -242,19 +242,4 @@ public class PgArticleVectorStoreServiceImpl implements ArticleVectorStoreServic
                 })
                 .collect(Collectors.toList());
     }
-
-    private String vectorToString(List<Float> vector) {
-        StringBuilder sb = new StringBuilder("[");
-        for (int i = 0; i < vector.size(); i++) {
-            if (i > 0) sb.append(",");
-            float v = vector.get(i);
-            String s = Float.toString(v);
-            if (s.indexOf('E') >= 0 || s.indexOf('e') >= 0) {
-                s = new java.math.BigDecimal(s).toPlainString();
-            }
-            sb.append(s);
-        }
-        sb.append("]");
-        return sb.toString();
-    }
 }

+ 3 - 20
core/src/main/java/com/tzld/videoVector/service/impl/PgMaterialVectorStoreServiceImpl.java

@@ -45,7 +45,7 @@ public class PgMaterialVectorStoreServiceImpl implements MaterialVectorStoreServ
             return false;
         }
 
-        String embedding = vectorToString(vector);
+        String embedding = VectorUtils.vectorToString(vector);
         String textHash = (text != null && !text.isEmpty()) ? Md5Util.encoderByMd5(text) : null;
         materialVectorMapperExt.upsertVector(materialId, configCode, pointIndex, embedding, text, textHash, sourceType);
         log.debug("保存素材向量成功,configCode={}, materialId={}, pointIndex={}, sourceType={}, 维度={}",
@@ -215,7 +215,7 @@ public class PgMaterialVectorStoreServiceImpl implements MaterialVectorStoreServ
             return Collections.emptyList();
         }
 
-        String queryVectorStr = vectorToString(queryVector);
+        String queryVectorStr = VectorUtils.vectorToString(queryVector);
         log.info("searchTopN SQL vector前100字符: {}, topN={}, configCode={}",
                 queryVectorStr.substring(0, Math.min(100, queryVectorStr.length())), topN, configCode);
         List<MaterialVector> results = materialVectorMapperExt.searchTopN(configCode, queryVectorStr, topN);
@@ -242,7 +242,7 @@ public class PgMaterialVectorStoreServiceImpl implements MaterialVectorStoreServ
             return searchTopN(configCode, queryVector, topN);
         }
 
-        String queryVectorStr = vectorToString(queryVector);
+        String queryVectorStr = VectorUtils.vectorToString(queryVector);
         List<MaterialVector> results = materialVectorMapperExt.searchTopNBySource(configCode, queryVectorStr, topN, sourceType);
         if (results == null || results.isEmpty()) {
             log.info("素材向量库无匹配结果,configCode={}, sourceType={}", configCode, sourceType);
@@ -264,21 +264,4 @@ public class PgMaterialVectorStoreServiceImpl implements MaterialVectorStoreServ
                 })
                 .collect(Collectors.toList());
     }
-
-    private String vectorToString(List<Float> vector) {
-        StringBuilder sb = new StringBuilder("[");
-        for (int i = 0; i < vector.size(); i++) {
-            if (i > 0) sb.append(",");
-            // Float.toString() 对 |v| < 1e-3 的值会输出科学计数法(如 6.399564E-4)
-            // pgvector 的 ::vector 只认标准十进制格式, 必须用 BigDecimal.toPlainString() 兜底
-            float v = vector.get(i);
-            String s = Float.toString(v);
-            if (s.indexOf('E') >= 0 || s.indexOf('e') >= 0) {
-                s = new java.math.BigDecimal(s).toPlainString();
-            }
-            sb.append(s);
-        }
-        sb.append("]");
-        return sb.toString();
-    }
 }

+ 43 - 27
core/src/main/java/com/tzld/videoVector/service/recall/impl/VectorRecallTestServiceImpl.java

@@ -271,7 +271,27 @@ public class VectorRecallTestServiceImpl implements VectorRecallTestService {
                     log.info("素材召回(rawVector) 无结果, configCode={}", configCode);
                     return Collections.emptyList();
                 }
-                log.info("素材召回 text_hash 缓存未命中, textHash={}, 降级到 embedding API", textHash);
+                log.info("素材召回 rawVector 缓存未命中, textHash={}, 尝试 parsed vector 缓存", textHash);
+            }
+
+            if (StringUtils.hasText(textHash)) {
+                List<Float> cachedVector = materialVectorStoreService.getVectorByTextHash(textHash, configCode);
+                if (cachedVector != null && !cachedVector.isEmpty()) {
+                    log.info("素材召回 使用缓存的 parsed vector, dim={}", cachedVector.size());
+                    List<MaterialMatch> raw = materialVectorStoreService.searchTopN(configCode, cachedVector, candidate);
+                    List<MaterialMatch> matches = deduplicateMaterialMatches(raw, topN);
+                    if (!CollectionUtils.isEmpty(matches)) {
+                        List<String> matchSample = new ArrayList<>();
+                        for (MaterialMatch m : matches) {
+                            matchSample.add(m.getMaterialId() + ":" + String.format("%.4f", m.getScore()));
+                        }
+                        log.info("素材召回(parsed vector缓存) 去重后({}条): {}, configCode={}",
+                                matches.size(), matchSample, configCode);
+                        return limitEnrichedItemsByScore(enrichMaterialMatches(matches, configCode), topN);
+                    }
+                    log.info("素材召回(parsed vector缓存) 无结果, configCode={}", configCode);
+                    return Collections.emptyList();
+                }
             }
 
             // 降级:embedding API → Float 向量 → 搜索(非缓存路径,容忍精度损失)
@@ -314,19 +334,6 @@ public class VectorRecallTestServiceImpl implements VectorRecallTestService {
         log.info("resolveQueryVectorForMaterial: queryText={}, configCode={}, model={}, dim={}",
                 queryText, configCode, config.getEmbeddingModel(), config.getDimension());
 
-        // 1. 先查 material_vectors 的 text_hash 缓存
-        String textHash = Md5Util.encoderByMd5(queryText);
-        if (StringUtils.hasText(textHash)) {
-            log.info("resolveQueryVectorForMaterial textHash={}, 开始查 text_hash 缓存", textHash);
-            List<Float> cached = materialVectorStoreService.getVectorByTextHash(textHash, configCode);
-            if (cached != null && !cached.isEmpty()) {
-                log.info("resolveQueryVectorForMaterial 命中 text_hash 缓存,dim={}", cached.size());
-                return cached;
-            }
-            log.info("resolveQueryVectorForMaterial text_hash 缓存未命中,降级到 embedding API");
-        }
-
-        // 2. 调用 embedding API(与入库时相同的 model / dimension)
         try {
             log.info("resolveQueryVectorForMaterial 调用 embedding API: text={}, model={}, dim={}",
                     queryText, config.getEmbeddingModel(), config.getDimension());
@@ -521,7 +528,27 @@ public class VectorRecallTestServiceImpl implements VectorRecallTestService {
                     log.info("文章召回(rawVector) 无结果, configCode={}", configCode);
                     return Collections.emptyList();
                 }
-                log.info("文章召回 text_hash 缓存未命中, textHash={}, 降级到 embedding API", textHash);
+                log.info("文章召回 rawVector 缓存未命中, textHash={}, 尝试 parsed vector 缓存", textHash);
+            }
+
+            if (StringUtils.hasText(textHash)) {
+                List<Float> cachedVector = articleVectorStoreService.getVectorByTextHash(textHash, configCode);
+                if (cachedVector != null && !cachedVector.isEmpty()) {
+                    log.info("文章召回 使用缓存的 parsed vector, dim={}", cachedVector.size());
+                    List<ArticleMatch> raw = articleVectorStoreService.searchTopN(configCode, cachedVector, candidate);
+                    List<ArticleMatch> matches = deduplicateArticleMatches(raw, topN);
+                    if (!CollectionUtils.isEmpty(matches)) {
+                        List<String> matchSample = new ArrayList<>();
+                        for (ArticleMatch m : matches) {
+                            matchSample.add(m.getArticleId() + ":" + String.format("%.4f", m.getScore()));
+                        }
+                        log.info("文章召回(parsed vector缓存) 去重后({}条): {}, configCode={}",
+                                matches.size(), matchSample, configCode);
+                        return limitEnrichedItemsByScore(enrichArticleMatches(matches, configCode), topN);
+                    }
+                    log.info("文章召回(parsed vector缓存) 无结果, configCode={}", configCode);
+                    return Collections.emptyList();
+                }
             }
 
             List<Float> queryVector = resolveQueryVectorForArticle(queryText, configCode);
@@ -558,16 +585,6 @@ public class VectorRecallTestServiceImpl implements VectorRecallTestService {
             config.setConfigCode(configCode);
         }
 
-        String textHash = Md5Util.encoderByMd5(queryText);
-        if (StringUtils.hasText(textHash)) {
-            List<Float> cached = articleVectorStoreService.getVectorByTextHash(textHash, configCode);
-            if (cached != null && !cached.isEmpty()) {
-                log.info("resolveQueryVectorForArticle 命中 text_hash 缓存,dim={}", cached.size());
-                return cached;
-            }
-            log.info("resolveQueryVectorForArticle text_hash 缓存未命中,降级到 embedding API");
-        }
-
         try {
             log.info("resolveQueryVectorForArticle 调用 embedding API: text={}, model={}, dim={}",
                     queryText, config.getEmbeddingModel(), config.getDimension());
@@ -715,7 +732,7 @@ public class VectorRecallTestServiceImpl implements VectorRecallTestService {
             }
         }
 
-        if (!StringUtils.hasText(meta.title) && !StringUtils.hasText(meta.content)) {
+        if (!StringUtils.hasText(meta.title)) {
             return null;
         }
         return meta;
@@ -1145,7 +1162,6 @@ public class VectorRecallTestServiceImpl implements VectorRecallTestService {
     private static class ArticleBasicMeta {
         String articleId;
         String title;
-        String content;
         String summary;
         List<String> tags;
         String cover;

+ 237 - 0
core/src/main/java/com/tzld/videoVector/util/DeconstructTextExtractor.java

@@ -0,0 +1,237 @@
+package com.tzld.videoVector.util;
+
+import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson.JSONArray;
+import com.alibaba.fastjson.JSONObject;
+import com.tzld.videoVector.model.po.pgVector.DeconstructVectorConfig;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.util.StringUtils;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * 解构文本提取工具,从 AIGC 解构结果的 dataContent 中按配置规则提取文本。
+ * MaterialVectorJob / ArticleVectorJob / VideoVectorJob 共享。
+ */
+@Slf4j
+public final class DeconstructTextExtractor {
+
+    private DeconstructTextExtractor() {
+    }
+
+    /**
+     * 从 dataContent 中提取文本
+     */
+    public static List<String> extractTextsFromDataContent(JSONObject dataContent, DeconstructVectorConfig config) {
+        if (dataContent == null) {
+            return Collections.emptyList();
+        }
+        String extractRule = config.getExtractRule();
+        if (StringUtils.hasText(extractRule)) {
+            try {
+                JSONObject rule = JSON.parseObject(extractRule);
+                if ("point_decomposition".equals(rule.getString("type"))) {
+                    return extractTextsFromPointDecomposition(dataContent, rule);
+                }
+            } catch (Exception e) {
+                // 不是 JSON 或无 type 字段,走原有逻辑
+            }
+            return extractTextsWithConfidence(dataContent, config.getSourcePath(), extractRule);
+        } else {
+            return VectorUtils.extractFromJson(dataContent, config.getSourcePath());
+        }
+    }
+
+    public static List<String> extractTextsWithConfidence(JSONObject json, String sourcePath, String extractRule) {
+        List<String> texts = new ArrayList<>();
+        try {
+            JSONObject rule = JSON.parseObject(extractRule);
+            String textField = rule.getString("text_field");
+            String confidenceField = rule.getString("confidence_field");
+            double confidenceThreshold = rule.getDoubleValue("confidence_threshold");
+            if (!StringUtils.hasText(textField) || !StringUtils.hasText(confidenceField)) {
+                log.error("extract_rule 缺少必要字段: text_field={}, confidence_field={}", textField, confidenceField);
+                return texts;
+            }
+            if (sourcePath.endsWith("[*]")) {
+                List<JSONObject> items = VectorUtils.extractArrayItemsFromJson(json, sourcePath);
+                for (JSONObject item : items) {
+                    if (isConfidenceQualified(item, confidenceField, confidenceThreshold)) {
+                        String text = item.getString(textField);
+                        if (StringUtils.hasText(text)) {
+                            texts.add(text);
+                        }
+                    }
+                }
+            } else {
+                List<String> pathValues = VectorUtils.extractFromJson(json, sourcePath);
+                if (!pathValues.isEmpty()) {
+                    JSONObject targetObj = navigateToObject(json, sourcePath);
+                    if (targetObj != null && isConfidenceQualified(targetObj, confidenceField, confidenceThreshold)) {
+                        String text = targetObj.getString(textField);
+                        if (StringUtils.hasText(text)) {
+                            texts.add(text);
+                        }
+                    }
+                }
+            }
+        } catch (Exception e) {
+            log.error("置信度过滤提取失败: path={}, error={}", sourcePath, e.getMessage());
+        }
+        return texts;
+    }
+
+    static List<String> extractTextsFromPointDecomposition(JSONObject dataContent, JSONObject rule) {
+        List<String> texts = new ArrayList<>();
+        try {
+            String pointArrayPath = rule.getString("point_array_path");
+            String finalResultPath = rule.getString("final_result_path");
+            String pointNameField = rule.getString("point_name_field");
+            String confidenceField = rule.getString("confidence_field");
+            double confidenceThreshold = rule.getDoubleValue("confidence_threshold");
+            String target = rule.getString("target");
+            String contributionPath = rule.getString("contribution_path");
+            double contributionThreshold = rule.getDoubleValue("contribution_threshold");
+
+            List<JSONObject> finalPoints = VectorUtils.extractArrayItemsFromJson(dataContent, finalResultPath + "[*]");
+            List<String> qualifiedPointNames = new ArrayList<>();
+            for (JSONObject fp : finalPoints) {
+                if (isConfidenceQualified(fp, confidenceField, confidenceThreshold)) {
+                    String pointName = fp.getString(pointNameField);
+                    if (StringUtils.hasText(pointName)) {
+                        qualifiedPointNames.add(pointName);
+                    }
+                }
+            }
+            if (qualifiedPointNames.isEmpty()) return texts;
+
+            List<JSONObject> pointDetails = VectorUtils.extractArrayItemsFromJson(dataContent, pointArrayPath + "[*]");
+            Map<String, Double> contributionMap = buildContributionMap(dataContent, contributionPath);
+
+            for (String pointName : qualifiedPointNames) {
+                try {
+                    JSONObject matchedPoint = null;
+                    for (JSONObject detail : pointDetails) {
+                        if (pointName.equals(detail.getString("点"))) {
+                            matchedPoint = detail;
+                            break;
+                        }
+                    }
+                    if (matchedPoint == null) continue;
+
+                    List<String> itemNames = "substance".equals(target)
+                            ? extractSubstanceNames(matchedPoint)
+                            : extractFormNames(matchedPoint);
+                    for (String name : itemNames) {
+                        Double contribution = contributionMap.get(name);
+                        if (contribution != null && contribution >= contributionThreshold) {
+                            texts.add(name);
+                        }
+                    }
+                } catch (Exception e) {
+                    log.debug("extractTextsFromPointDecomposition 单点处理异常 pointName={}: {}", pointName, e.getMessage());
+                }
+            }
+        } catch (Exception e) {
+            log.error("extractTextsFromPointDecomposition 失败: {}", e.getMessage(), e);
+        }
+        return texts;
+    }
+
+    static Map<String, Double> buildContributionMap(JSONObject dataContent, String contributionPath) {
+        Map<String, Double> map = new HashMap<>();
+        try {
+            List<JSONObject> contributions = VectorUtils.extractArrayItemsFromJson(dataContent, contributionPath + "[*]");
+            for (JSONObject c : contributions) {
+                try {
+                    String word = c.getString("词");
+                    Double contribution = c.getDouble("贡献度");
+                    if (StringUtils.hasText(word) && contribution != null) {
+                        map.put(word, contribution);
+                    }
+                } catch (Exception e) {
+                    log.debug("buildContributionMap 单元素解析异常: {}", e.getMessage());
+                }
+            }
+        } catch (Exception e) {
+            log.error("构建贡献度查找表失败: {}", e.getMessage());
+        }
+        return map;
+    }
+
+    static List<String> extractSubstanceNames(JSONObject point) {
+        List<String> names = new ArrayList<>();
+        JSONObject substance = point.getJSONObject("实质");
+        if (substance == null) return names;
+        for (String key : new String[]{"具体元素", "具象概念", "抽象概念"}) {
+            try {
+                collectNamesFromArray(substance.getJSONArray(key), names);
+            } catch (Exception e) {
+                log.debug("extractSubstanceNames key={} 异常: {}", key, e.getMessage());
+            }
+        }
+        return names;
+    }
+
+    static List<String> extractFormNames(JSONObject point) {
+        List<String> names = new ArrayList<>();
+        JSONObject form = point.getJSONObject("形式");
+        if (form == null) return names;
+        for (String key : new String[]{"具体元素形式", "具象概念形式", "整体形式"}) {
+            try {
+                collectNamesFromArray(form.getJSONArray(key), names);
+            } catch (Exception e) {
+                log.debug("extractFormNames key={} 异常: {}", key, e.getMessage());
+            }
+        }
+        return names;
+    }
+
+    static void collectNamesFromArray(JSONArray array, List<String> names) {
+        if (array == null || array.isEmpty()) return;
+        for (int i = 0; i < array.size(); i++) {
+            try {
+                JSONObject item = array.getJSONObject(i);
+                if (item != null) {
+                    String name = item.getString("名称");
+                    if (StringUtils.hasText(name)) {
+                        names.add(name);
+                    }
+                }
+            } catch (Exception e) {
+                log.debug("collectNamesFromArray 单元素解析异常: {}", e.getMessage());
+            }
+        }
+    }
+
+    static JSONObject navigateToObject(JSONObject json, String path) {
+        if (json == null || !StringUtils.hasText(path) || !path.startsWith("$.")) return null;
+        try {
+            String pathContent = path.substring(2);
+            String[] parts = pathContent.split("\\.");
+            Object current = json;
+            for (String part : parts) {
+                if (current instanceof JSONObject) {
+                    current = ((JSONObject) current).get(part);
+                } else {
+                    return null;
+                }
+            }
+            return current instanceof JSONObject ? (JSONObject) current : null;
+        } catch (Exception e) {
+            return null;
+        }
+    }
+
+    static boolean isConfidenceQualified(JSONObject item, String confidenceField, double threshold) {
+        Object value = item.get(confidenceField);
+        if (value == null) return false;
+        if (value instanceof String) return "high".equalsIgnoreCase((String) value);
+        if (value instanceof Number) return ((Number) value).doubleValue() >= threshold;
+        return false;
+    }
+}

+ 72 - 0
core/src/main/java/com/tzld/videoVector/util/VectorUtils.java

@@ -4,15 +4,19 @@ import com.alibaba.fastjson.JSON;
 import com.alibaba.fastjson.JSONArray;
 import com.alibaba.fastjson.JSONObject;
 import com.tzld.videoVector.model.po.pgVector.DeconstructVectorConfig;
+import lombok.extern.slf4j.Slf4j;
 import org.springframework.util.StringUtils;
 
 import java.util.ArrayList;
 import java.util.List;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.TimeUnit;
 
 /**
  * 向量化公共工具方法
  * 集中管理 parseVectorString、extractFromJson、parseJsonPath、isMultiPointConfig 等通用逻辑
  */
+@Slf4j
 public final class VectorUtils {
 
     private VectorUtils() {
@@ -307,4 +311,72 @@ public final class VectorUtils {
 
         return parts;
     }
+
+    // ========================== 向量字符串序列化 ==========================
+
+    /**
+     * 将 float 向量序列化为 pgvector 兼容字符串: "[0.1,0.2,...]"
+     * 科学计数法的值会自动转为 toPlainString() 以避免 pgvector 解析失败。
+     */
+    public static String vectorToString(List<Float> vector) {
+        StringBuilder sb = new StringBuilder("[");
+        for (int i = 0; i < vector.size(); i++) {
+            if (i > 0) sb.append(",");
+            float v = vector.get(i);
+            String s = Float.toString(v);
+            if (s.indexOf('E') >= 0 || s.indexOf('e') >= 0) {
+                s = new java.math.BigDecimal(s).toPlainString();
+            }
+            sb.append(s);
+        }
+        sb.append("]");
+        return sb.toString();
+    }
+
+    // ========================== 并发工具 ==========================
+
+    /**
+     * 关闭线程池并等待所有已提交任务完成,超时则强制终止。
+     * 使用 shutdown() + awaitTermination() 替代逐个 Future.get(),
+     * 解决原实现中一个慢任务阻塞后续任务超时检查的问题。
+     *
+     * @param executor       要关闭的线程池(调用后不可再提交新任务)
+     * @param timeoutMinutes 等待超时(分钟)
+     * @param taskDesc       任务描述(用于日志)
+     */
+    public static void awaitAndShutdown(ExecutorService executor,
+                                         long timeoutMinutes, String taskDesc) {
+        executor.shutdown();
+        try {
+            if (!executor.awaitTermination(timeoutMinutes, TimeUnit.MINUTES)) {
+                log.error("{} 整体超时({}分钟),强制取消剩余任务", taskDesc, timeoutMinutes);
+                executor.shutdownNow();
+                if (!executor.awaitTermination(60, TimeUnit.SECONDS)) {
+                    log.error("{} 强制终止未在60秒内完成", taskDesc);
+                }
+            }
+        } catch (InterruptedException e) {
+            executor.shutdownNow();
+            Thread.currentThread().interrupt();
+            log.error("{} 等待被中断", taskDesc);
+        }
+    }
+
+    // ========================== 参数解析 ==========================
+
+    /**
+     * 解析任务入参字符串为最大处理数量。
+     *
+     * @param param 入参(期望为正整数),null/空/非正整数均返回 null
+     * @return 正整数值,或 null(表示不限制)
+     */
+    public static Integer parseMaxCount(String param) {
+        if (!StringUtils.hasText(param)) return null;
+        try {
+            int v = Integer.parseInt(param.trim());
+            return v > 0 ? v : null;
+        } catch (NumberFormatException e) {
+            return null;
+        }
+    }
 }