|
|
@@ -27,6 +27,7 @@ import org.springframework.util.StringUtils;
|
|
|
import javax.annotation.Resource;
|
|
|
import java.util.ArrayList;
|
|
|
import java.util.Collections;
|
|
|
+import java.util.Comparator;
|
|
|
import java.util.HashMap;
|
|
|
import java.util.HashSet;
|
|
|
import java.util.List;
|
|
|
@@ -138,11 +139,13 @@ public class MaterialVectorJob {
|
|
|
log.info("aigc.material.task.source.map: {}", aigcMaterialTaskSourceMap);
|
|
|
|
|
|
// 1. 收集 (materialId, taskInstanceId) 和 (materialId, sourceType) 映射
|
|
|
- // 同一 materialId 若在多个 task 中出现,后处理 task 的 sourceType 会覆盖前者
|
|
|
+ // 按 taskId 自然序遍历保证确定性;同一 materialId 在多个 task 中出现时,taskId 较大的覆盖前者
|
|
|
Map<String, Long> materialIdToTaskInstanceId = new HashMap<>();
|
|
|
Map<String, Short> materialIdToSourceType = new HashMap<>();
|
|
|
|
|
|
- for (Map.Entry<String, Short> entry : aigcMaterialTaskSourceMap.entrySet()) {
|
|
|
+ List<Map.Entry<String, Short>> sortedEntries = new ArrayList<>(aigcMaterialTaskSourceMap.entrySet());
|
|
|
+ sortedEntries.sort(Comparator.comparingInt(e -> Integer.parseInt(e.getKey())));
|
|
|
+ for (Map.Entry<String, Short> entry : sortedEntries) {
|
|
|
Integer taskId;
|
|
|
try {
|
|
|
taskId = Integer.parseInt(entry.getKey());
|
|
|
@@ -163,7 +166,11 @@ public class MaterialVectorJob {
|
|
|
continue;
|
|
|
}
|
|
|
materialIdToTaskInstanceId.put(materialId, input.getTaskInstanceId());
|
|
|
- materialIdToSourceType.put(materialId, sourceType);
|
|
|
+ Short prevSourceType = materialIdToSourceType.put(materialId, sourceType);
|
|
|
+ if (prevSourceType != null && !prevSourceType.equals(sourceType)) {
|
|
|
+ log.warn("materialId={} 在 taskId={}(sourceType={}) 中 sourceType 被覆盖,原值={}",
|
|
|
+ materialId, taskId, sourceType, prevSourceType);
|
|
|
+ }
|
|
|
}
|
|
|
log.info("taskId={} sourceType={} 拉到 {} 条素材", taskId, sourceType, taskInputList.size());
|
|
|
}
|
|
|
@@ -191,34 +198,38 @@ public class MaterialVectorJob {
|
|
|
|
|
|
// 并发调 detail 接口
|
|
|
ExecutorService executor = Executors.newFixedThreadPool(VectorConstants.AIGC_DETAIL_PARALLELISM);
|
|
|
- List<Future<?>> futures = new ArrayList<>();
|
|
|
- List<MaterialDeconstructResult> batch = Collections.synchronizedList(new ArrayList<>());
|
|
|
-
|
|
|
- for (String materialId : needSyncIds) {
|
|
|
- futures.add(executor.submit(() -> {
|
|
|
- try {
|
|
|
- Long taskInstanceId = materialIdToTaskInstanceId.get(materialId);
|
|
|
- if (taskInstanceId == null) return;
|
|
|
- JSONObject dataContent = aigcApiService.getTaskCallbackDetail(taskInstanceId);
|
|
|
- if (dataContent != null) {
|
|
|
- MaterialDeconstructResult r = new MaterialDeconstructResult();
|
|
|
- r.setMaterialId(materialId);
|
|
|
- r.setSource(SOURCE_AIGC);
|
|
|
- r.setResult(dataContent.toJSONString());
|
|
|
- r.setSourceType(materialIdToSourceType.getOrDefault(materialId, defaultSourceType));
|
|
|
- batch.add(r);
|
|
|
+ try {
|
|
|
+ List<Future<?>> futures = new ArrayList<>();
|
|
|
+ List<MaterialDeconstructResult> batch = Collections.synchronizedList(new ArrayList<>());
|
|
|
+
|
|
|
+ for (String materialId : needSyncIds) {
|
|
|
+ futures.add(executor.submit(() -> {
|
|
|
+ try {
|
|
|
+ Long taskInstanceId = materialIdToTaskInstanceId.get(materialId);
|
|
|
+ if (taskInstanceId == null) return;
|
|
|
+ JSONObject dataContent = aigcApiService.getTaskCallbackDetail(taskInstanceId);
|
|
|
+ if (dataContent != null) {
|
|
|
+ MaterialDeconstructResult r = new MaterialDeconstructResult();
|
|
|
+ r.setMaterialId(materialId);
|
|
|
+ r.setSource(SOURCE_AIGC);
|
|
|
+ r.setResult(dataContent.toJSONString());
|
|
|
+ r.setSourceType(materialIdToSourceType.getOrDefault(materialId, defaultSourceType));
|
|
|
+ batch.add(r);
|
|
|
+ }
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error("同步 materialId={} 失败: {}", materialId, e.getMessage());
|
|
|
}
|
|
|
- } catch (Exception e) {
|
|
|
- log.error("同步 materialId={} 失败: {}", materialId, e.getMessage());
|
|
|
- }
|
|
|
- }));
|
|
|
- }
|
|
|
- awaitAndShutdown(futures, executor, 30, "素材同步");
|
|
|
+ }));
|
|
|
+ }
|
|
|
+ awaitAndShutdown(futures, executor, 30, "素材同步");
|
|
|
|
|
|
- if (!batch.isEmpty()) {
|
|
|
- for (List<MaterialDeconstructResult> subBatch : Lists.partition(batch, 200)) {
|
|
|
- insertCount.addAndGet(materialDeconstructResultMapperExt.batchInsertIgnore(subBatch));
|
|
|
+ if (!batch.isEmpty()) {
|
|
|
+ for (List<MaterialDeconstructResult> subBatch : Lists.partition(batch, 200)) {
|
|
|
+ insertCount.addAndGet(materialDeconstructResultMapperExt.batchInsertIgnore(subBatch));
|
|
|
+ }
|
|
|
}
|
|
|
+ } finally {
|
|
|
+ executor.shutdownNow();
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
@@ -282,13 +293,17 @@ public class MaterialVectorJob {
|
|
|
|
|
|
// 3. 对每个配置并发处理
|
|
|
ExecutorService configExecutor = Executors.newFixedThreadPool(configs.size());
|
|
|
- List<Future<?>> configFutures = new ArrayList<>();
|
|
|
- for (DeconstructVectorConfig config : configs) {
|
|
|
- configFutures.add(configExecutor.submit(() ->
|
|
|
- processConfigForMaterial(config, materialIds, parsedById, totalSuccessCount, totalFailCount)
|
|
|
- ));
|
|
|
+ try {
|
|
|
+ List<Future<?>> configFutures = new ArrayList<>();
|
|
|
+ for (DeconstructVectorConfig config : configs) {
|
|
|
+ configFutures.add(configExecutor.submit(() ->
|
|
|
+ processConfigForMaterial(config, materialIds, parsedById, totalSuccessCount, totalFailCount)
|
|
|
+ ));
|
|
|
+ }
|
|
|
+ awaitAndShutdown(configFutures, configExecutor, 30, "素材向量化配置并发");
|
|
|
+ } finally {
|
|
|
+ configExecutor.shutdownNow();
|
|
|
}
|
|
|
- awaitAndShutdown(configFutures, configExecutor, 30, "素材向量化配置并发");
|
|
|
|
|
|
totalProcessed.addAndGet(materialIds.size());
|
|
|
|
|
|
@@ -320,7 +335,7 @@ public class MaterialVectorJob {
|
|
|
private Map<String, ParsedMaterial> loadParsedMaterials(List<String> materialIds) {
|
|
|
List<MaterialDeconstructResult> results = materialDeconstructResultMapperExt
|
|
|
.selectResultsByMaterialIds(SOURCE_AIGC, materialIds);
|
|
|
- Map<String, ParsedMaterial> map = new HashMap<>(results.size() * 2);
|
|
|
+ Map<String, ParsedMaterial> map = new HashMap<>(materialIds.size());
|
|
|
for (MaterialDeconstructResult r : results) {
|
|
|
if (r == null || !StringUtils.hasText(r.getResult())) continue;
|
|
|
JSONObject dataContent;
|
|
|
@@ -428,9 +443,14 @@ public class MaterialVectorJob {
|
|
|
truncated.add(text);
|
|
|
}
|
|
|
for (int i = 0; i < vectors.size(); i++) {
|
|
|
- materialVectorStoreService.save(configCode, materialId, i, vectors.get(i), truncated.get(i), sourceType);
|
|
|
+ if (!materialVectorStoreService.save(configCode, materialId, i, vectors.get(i), truncated.get(i), sourceType)) {
|
|
|
+ log.error("materialId={} 配置 {} 第{}个点 save 返回 false", materialId, configCode, i);
|
|
|
+ return false;
|
|
|
+ }
|
|
|
}
|
|
|
- log.info("materialId={} 配置 {} 多点向量化存储成功,共 {} 个点", materialId, configCode, vectors.size());
|
|
|
+ // 清理不再需要的旧点(例如上次 5 个点,本次只有 3 个)
|
|
|
+ materialVectorStoreService.deleteAbovePointIndex(configCode, materialId, vectors.size());
|
|
|
+ log.debug("materialId={} 配置 {} 多点向量化存储成功,共 {} 个点", materialId, configCode, vectors.size());
|
|
|
return true;
|
|
|
} else {
|
|
|
String text = null;
|
|
|
@@ -452,8 +472,11 @@ public class MaterialVectorJob {
|
|
|
log.error("materialId={} 配置 {} 文本向量化失败", materialId, configCode);
|
|
|
return false;
|
|
|
}
|
|
|
- materialVectorStoreService.save(configCode, materialId, vector, text, sourceType);
|
|
|
- log.info("materialId={} 配置 {} 向量化存储成功", materialId, configCode);
|
|
|
+ if (!materialVectorStoreService.save(configCode, materialId, vector, text, sourceType)) {
|
|
|
+ log.error("materialId={} 配置 {} save 返回 false", materialId, configCode);
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ log.debug("materialId={} 配置 {} 向量化存储成功", materialId, configCode);
|
|
|
return true;
|
|
|
}
|
|
|
}
|
|
|
@@ -468,7 +491,7 @@ public class MaterialVectorJob {
|
|
|
if (StringUtils.hasText(textHash)) {
|
|
|
List<Float> cached = materialVectorStoreService.getVectorByTextHash(textHash, configCode);
|
|
|
if (cached != null && !cached.isEmpty()) {
|
|
|
- log.info("命中 text_hash 缓存(material),hash={}, configCode={}", textHash, configCode);
|
|
|
+ log.debug("命中 text_hash 缓存(material),hash={}, configCode={}", textHash, configCode);
|
|
|
return cached;
|
|
|
}
|
|
|
}
|
|
|
@@ -513,7 +536,7 @@ public class MaterialVectorJob {
|
|
|
}
|
|
|
|
|
|
// ====================================================================
|
|
|
- // 复用 VideoVectorJob 的私有方法(直接 copy 一份,未来再统一抽工具类)
|
|
|
+ // TODO: 与 VideoVectorJob 的提取逻辑统一抽取到 VectorUtils / ExtractionUtils,避免两边各自维护
|
|
|
// ====================================================================
|
|
|
|
|
|
/**
|
|
|
@@ -625,7 +648,8 @@ public class MaterialVectorJob {
|
|
|
texts.add(name);
|
|
|
}
|
|
|
}
|
|
|
- } catch (Exception ignored) {
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.debug("extractTextsFromPointDecomposition 单点处理异常 pointName={}: {}", pointName, e.getMessage());
|
|
|
}
|
|
|
}
|
|
|
} catch (Exception e) {
|
|
|
@@ -641,7 +665,8 @@ public class MaterialVectorJob {
|
|
|
for (String key : new String[]{"具体元素", "具象概念", "抽象概念"}) {
|
|
|
try {
|
|
|
collectNamesFromArray(substance.getJSONArray(key), names);
|
|
|
- } catch (Exception ignored) {
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.debug("extractSubstanceNames key={} 异常: {}", key, e.getMessage());
|
|
|
}
|
|
|
}
|
|
|
return names;
|
|
|
@@ -654,7 +679,8 @@ public class MaterialVectorJob {
|
|
|
for (String key : new String[]{"具体元素形式", "具象概念形式", "整体形式"}) {
|
|
|
try {
|
|
|
collectNamesFromArray(form.getJSONArray(key), names);
|
|
|
- } catch (Exception ignored) {
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.debug("extractFormNames key={} 异常: {}", key, e.getMessage());
|
|
|
}
|
|
|
}
|
|
|
return names;
|
|
|
@@ -671,7 +697,8 @@ public class MaterialVectorJob {
|
|
|
names.add(name);
|
|
|
}
|
|
|
}
|
|
|
- } catch (Exception ignored) {
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.debug("collectNamesFromArray 单元素解析异常: {}", e.getMessage());
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
@@ -687,7 +714,8 @@ public class MaterialVectorJob {
|
|
|
if (StringUtils.hasText(word) && contribution != null) {
|
|
|
map.put(word, contribution);
|
|
|
}
|
|
|
- } catch (Exception ignored) {
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.debug("buildContributionMap 单元素解析异常: {}", e.getMessage());
|
|
|
}
|
|
|
}
|
|
|
} catch (Exception e) {
|
|
|
@@ -738,11 +766,23 @@ public class MaterialVectorJob {
|
|
|
|
|
|
private void awaitAndShutdown(List<Future<?>> futures, ExecutorService executor,
|
|
|
long timeoutMinutes, String taskDesc) {
|
|
|
+ long deadline = System.currentTimeMillis() + TimeUnit.MINUTES.toMillis(timeoutMinutes);
|
|
|
+ int completed = 0;
|
|
|
for (Future<?> future : futures) {
|
|
|
+ long remaining = deadline - System.currentTimeMillis();
|
|
|
+ if (remaining <= 0) {
|
|
|
+ log.error("{} 整体超时({}分钟),已取消剩余任务 (已完成 {}/{})",
|
|
|
+ taskDesc, timeoutMinutes, completed, futures.size());
|
|
|
+ for (Future<?> f : futures) {
|
|
|
+ f.cancel(true);
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ }
|
|
|
try {
|
|
|
- future.get(timeoutMinutes, TimeUnit.MINUTES);
|
|
|
+ future.get(remaining, TimeUnit.MILLISECONDS);
|
|
|
+ completed++;
|
|
|
} catch (Exception e) {
|
|
|
- log.error("{} 并发任务等待异常: {}", taskDesc, e.getMessage(), e);
|
|
|
+ log.error("{} 并发任务等待异常: {}", taskDesc, e.getMessage());
|
|
|
}
|
|
|
}
|
|
|
executor.shutdown();
|