|
|
@@ -176,10 +176,8 @@ public class VectorRecallTestServiceImpl implements VectorRecallTestService {
|
|
|
materialItems = Collections.emptyList();
|
|
|
}
|
|
|
|
|
|
- List<VideoMatchEnrichedVO> videoItems = limitEnrichedItemsByScore(
|
|
|
- enrichVideoMatches(videoMatches, configCode), videoTopN);
|
|
|
- materialItems = limitEnrichedItemsByScore(materialItems, materialTopN);
|
|
|
- return buildResult(videoItems, materialItems, defaultTopN);
|
|
|
+ List<VideoMatchEnrichedVO> videoItems = enrichVideoMatches(videoMatches, configCode);
|
|
|
+ return buildResult(videoItems, materialItems);
|
|
|
}
|
|
|
|
|
|
private List<VideoMatchResult> limitVideoMatchesByScore(List<VideoMatchResult> matches, int topN) {
|
|
|
@@ -205,25 +203,56 @@ public class VectorRecallTestServiceImpl implements VectorRecallTestService {
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * 素材文本召回:material_vectors → material_deconstruct_result(已废弃 deconstruct_content)
|
|
|
+ * 素材文本召回:material_vectors → material_deconstruct_result
|
|
|
*/
|
|
|
private List<VideoMatchEnrichedVO> recallMaterialItems(String queryText, String configCode, int topN) {
|
|
|
try {
|
|
|
+ int candidate = Math.max(topN * VectorConstants.MULTI_POINT_RECALL_CANDIDATE_FACTOR,
|
|
|
+ VectorConstants.MULTI_POINT_RECALL_MIN_CANDIDATES);
|
|
|
+
|
|
|
+ // 优先尝试 text_hash 缓存:直接用 PG 返回的原始 embedding 字符串搜索,
|
|
|
+ // 绕过 Java Float.parseFloat/Float.toString 回环的精度损失
|
|
|
+ String textHash = Md5Util.encoderByMd5(queryText);
|
|
|
+ if (StringUtils.hasText(textHash)) {
|
|
|
+ String rawVector = materialVectorStoreService.getRawVectorByTextHash(textHash, configCode);
|
|
|
+ if (rawVector != null && !rawVector.isEmpty()) {
|
|
|
+ log.info("素材召回 使用缓存的原始向量字符串,跳过 Float 回环, configCode={}", configCode);
|
|
|
+ List<MaterialMatch> raw = materialVectorStoreService.searchTopNByRawVector(
|
|
|
+ configCode, rawVector, candidate);
|
|
|
+ List<MaterialMatch> matches = deduplicateMaterialMatches(raw, topN);
|
|
|
+ if (!CollectionUtils.isEmpty(matches)) {
|
|
|
+ List<String> matchSample = new ArrayList<>();
|
|
|
+ for (MaterialMatch m : matches) {
|
|
|
+ matchSample.add(m.getMaterialId() + ":" + String.format("%.4f", m.getScore()));
|
|
|
+ }
|
|
|
+ log.info("素材召回(rawVector) 去重后({}条): {}, configCode={}",
|
|
|
+ matches.size(), matchSample, configCode);
|
|
|
+ return limitEnrichedItemsByScore(enrichMaterialMatches(matches, configCode), topN);
|
|
|
+ }
|
|
|
+ log.info("素材召回(rawVector) 无结果, configCode={}", configCode);
|
|
|
+ return Collections.emptyList();
|
|
|
+ }
|
|
|
+ log.info("素材召回 text_hash 缓存未命中, textHash={}, 降级到 embedding API", textHash);
|
|
|
+ }
|
|
|
+
|
|
|
+ // 降级:embedding API → Float 向量 → 搜索(非缓存路径,容忍精度损失)
|
|
|
List<Float> queryVector = resolveQueryVectorForMaterial(queryText, configCode);
|
|
|
if (queryVector == null || queryVector.isEmpty()) {
|
|
|
log.info("素材召回: 无法获取查询向量, queryText={}", queryText);
|
|
|
return Collections.emptyList();
|
|
|
}
|
|
|
- // 多点配置下同一素材会被多个 point 命中,需取 topN * 倍数 候选后在应用层按 materialId 去重
|
|
|
- int candidate = Math.max(topN * VectorConstants.MULTI_POINT_RECALL_CANDIDATE_FACTOR,
|
|
|
- VectorConstants.MULTI_POINT_RECALL_MIN_CANDIDATES);
|
|
|
+ log.info("素材召回 使用 embedding API 向量, dim={}", queryVector.size());
|
|
|
List<MaterialMatch> raw = materialVectorStoreService.searchTopN(configCode, queryVector, candidate);
|
|
|
List<MaterialMatch> matches = deduplicateMaterialMatches(raw, topN);
|
|
|
if (CollectionUtils.isEmpty(matches)) {
|
|
|
log.info("素材召回 material_vectors 无结果, configCode={}", configCode);
|
|
|
return Collections.emptyList();
|
|
|
}
|
|
|
- log.info("素材召回 material_vectors 命中 {} 条, configCode={}", matches.size(), configCode);
|
|
|
+ List<String> matchSample = new ArrayList<>();
|
|
|
+ for (MaterialMatch m : matches) {
|
|
|
+ matchSample.add(m.getMaterialId() + ":" + String.format("%.4f", m.getScore()));
|
|
|
+ }
|
|
|
+ log.info("素材召回(embedding API) 去重后({}条): {}, configCode={}", matches.size(), matchSample, configCode);
|
|
|
return limitEnrichedItemsByScore(enrichMaterialMatches(matches, configCode), topN);
|
|
|
} catch (Exception e) {
|
|
|
log.error("素材召回 material_vectors 异常: {}", e.getMessage(), e);
|
|
|
@@ -243,19 +272,28 @@ public class VectorRecallTestServiceImpl implements VectorRecallTestService {
|
|
|
config = new DeconstructVectorConfig();
|
|
|
config.setConfigCode(configCode);
|
|
|
}
|
|
|
+ log.info("resolveQueryVectorForMaterial: queryText={}, configCode={}, model={}, dim={}",
|
|
|
+ queryText, configCode, config.getEmbeddingModel(), config.getDimension());
|
|
|
|
|
|
// 1. 先查 material_vectors 的 text_hash 缓存
|
|
|
String textHash = Md5Util.encoderByMd5(queryText);
|
|
|
if (StringUtils.hasText(textHash)) {
|
|
|
+ log.info("resolveQueryVectorForMaterial textHash={}, 开始查 text_hash 缓存", textHash);
|
|
|
List<Float> cached = materialVectorStoreService.getVectorByTextHash(textHash, configCode);
|
|
|
if (cached != null && !cached.isEmpty()) {
|
|
|
+ log.info("resolveQueryVectorForMaterial 命中 text_hash 缓存,dim={}", cached.size());
|
|
|
return cached;
|
|
|
}
|
|
|
+ log.info("resolveQueryVectorForMaterial text_hash 缓存未命中,降级到 embedding API");
|
|
|
}
|
|
|
|
|
|
// 2. 调用 embedding API(与入库时相同的 model / dimension)
|
|
|
try {
|
|
|
- return embeddingService.embed(queryText, config);
|
|
|
+ log.info("resolveQueryVectorForMaterial 调用 embedding API: text={}, model={}, dim={}",
|
|
|
+ queryText, config.getEmbeddingModel(), config.getDimension());
|
|
|
+ List<Float> result = embeddingService.embed(queryText, config);
|
|
|
+ log.info("resolveQueryVectorForMaterial embedding API 返回, dim={}", result != null ? result.size() : 0);
|
|
|
+ return result;
|
|
|
} catch (Exception e) {
|
|
|
log.error("素材召回 embedding 失败: queryText={}, error={}", queryText, e.getMessage());
|
|
|
return null;
|
|
|
@@ -614,13 +652,11 @@ public class VectorRecallTestServiceImpl implements VectorRecallTestService {
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * 组装返回结果:视频 + 素材合并为 items
|
|
|
- * 各模态在前置链路里已按各自 topN 截断,这里仅做拼接 + 计数,
|
|
|
- * 不再合并截断(否则视频分数普遍较高会把素材全部挤掉)。
|
|
|
+ * 组装返回结果:视频 + 素材合并为 items。
|
|
|
+ * 各模态在前置链路已按 videoTopN / materialTopN 各自截断,此处仅拼接 + 计数,不做合并截断。
|
|
|
*/
|
|
|
private RecallResultVO buildResult(List<VideoMatchEnrichedVO> videoItems,
|
|
|
- List<VideoMatchEnrichedVO> materialItems,
|
|
|
- int topN) {
|
|
|
+ List<VideoMatchEnrichedVO> materialItems) {
|
|
|
if (videoItems == null) {
|
|
|
videoItems = Collections.emptyList();
|
|
|
}
|
|
|
@@ -694,7 +730,7 @@ public class VectorRecallTestServiceImpl implements VectorRecallTestService {
|
|
|
String configCode = StringUtils.hasText(param.getConfigCode())
|
|
|
? param.getConfigCode() : VectorConstants.DEFAULT_CONFIG_CODE;
|
|
|
List<VideoMatchEnrichedVO> videoItems = enrichVideoMatches(rawMatches, configCode);
|
|
|
- return buildResult(videoItems, Collections.emptyList(), matchParam.getTopN());
|
|
|
+ return buildResult(videoItems, Collections.emptyList());
|
|
|
}
|
|
|
|
|
|
@Override
|