|
@@ -14,6 +14,7 @@ import com.tzld.videoVector.model.entity.MaterialMatch;
|
|
|
import com.tzld.videoVector.model.entity.VideoDetail;
|
|
import com.tzld.videoVector.model.entity.VideoDetail;
|
|
|
import com.tzld.videoVector.model.entity.VideoMatch;
|
|
import com.tzld.videoVector.model.entity.VideoMatch;
|
|
|
import com.tzld.videoVector.model.param.MatchTopNVideoParam;
|
|
import com.tzld.videoVector.model.param.MatchTopNVideoParam;
|
|
|
|
|
+import com.tzld.videoVector.model.param.recall.MatchByArticleIdParam;
|
|
|
import com.tzld.videoVector.model.param.recall.MatchByMaterialIdParam;
|
|
import com.tzld.videoVector.model.param.recall.MatchByMaterialIdParam;
|
|
|
import com.tzld.videoVector.model.param.recall.MatchByTextParam;
|
|
import com.tzld.videoVector.model.param.recall.MatchByTextParam;
|
|
|
import com.tzld.videoVector.model.param.recall.MatchByVideoIdParam;
|
|
import com.tzld.videoVector.model.param.recall.MatchByVideoIdParam;
|
|
@@ -23,6 +24,7 @@ import com.tzld.videoVector.model.po.pgVector.DeconstructVectorConfigExample;
|
|
|
import com.tzld.videoVector.model.po.pgVector.MaterialDeconstructResult;
|
|
import com.tzld.videoVector.model.po.pgVector.MaterialDeconstructResult;
|
|
|
import com.tzld.videoVector.model.vo.VideoMatchResult;
|
|
import com.tzld.videoVector.model.vo.VideoMatchResult;
|
|
|
import com.tzld.videoVector.model.vo.recall.AIUnderstandingVO;
|
|
import com.tzld.videoVector.model.vo.recall.AIUnderstandingVO;
|
|
|
|
|
+import com.tzld.videoVector.model.vo.recall.ArticleBasicVO;
|
|
|
import com.tzld.videoVector.model.vo.recall.DeconstructPointsVO;
|
|
import com.tzld.videoVector.model.vo.recall.DeconstructPointsVO;
|
|
|
import com.tzld.videoVector.model.vo.recall.ArticleDetailVO;
|
|
import com.tzld.videoVector.model.vo.recall.ArticleDetailVO;
|
|
|
import com.tzld.videoVector.model.vo.recall.MaterialBasicVO;
|
|
import com.tzld.videoVector.model.vo.recall.MaterialBasicVO;
|
|
@@ -39,6 +41,7 @@ import com.tzld.videoVector.service.recall.VectorRecallTestService;
|
|
|
import com.tzld.videoVector.util.Md5Util;
|
|
import com.tzld.videoVector.util.Md5Util;
|
|
|
import com.tzld.videoVector.util.RedisUtils;
|
|
import com.tzld.videoVector.util.RedisUtils;
|
|
|
import com.tzld.videoVector.util.VectorUtils;
|
|
import com.tzld.videoVector.util.VectorUtils;
|
|
|
|
|
+import com.tzld.videoVector.model.po.pgVector.ArticleVector;
|
|
|
import com.tzld.videoVector.model.po.pgVector.MaterialVector;
|
|
import com.tzld.videoVector.model.po.pgVector.MaterialVector;
|
|
|
import lombok.extern.slf4j.Slf4j;
|
|
import lombok.extern.slf4j.Slf4j;
|
|
|
import org.springframework.beans.factory.annotation.Autowired;
|
|
import org.springframework.beans.factory.annotation.Autowired;
|
|
@@ -663,6 +666,8 @@ public class VectorRecallTestServiceImpl implements VectorRecallTestService {
|
|
|
detail.setSummary(basic.summary);
|
|
detail.setSummary(basic.summary);
|
|
|
detail.setCover(basic.cover);
|
|
detail.setCover(basic.cover);
|
|
|
detail.setImages(basic.images);
|
|
detail.setImages(basic.images);
|
|
|
|
|
+ detail.setUrl(basic.url);
|
|
|
|
|
+ detail.setSource(basic.source);
|
|
|
}
|
|
}
|
|
|
detail.setDeconstruct(deconstructFlat);
|
|
detail.setDeconstruct(deconstructFlat);
|
|
|
vo.setArticleDetail(detail);
|
|
vo.setArticleDetail(detail);
|
|
@@ -738,6 +743,18 @@ public class VectorRecallTestServiceImpl implements VectorRecallTestService {
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ String url = targetPost.getString("url");
|
|
|
|
|
+ if (!StringUtils.hasText(url)) {
|
|
|
|
|
+ url = targetPost.getString("source_url");
|
|
|
|
|
+ }
|
|
|
|
|
+ meta.url = url;
|
|
|
|
|
+
|
|
|
|
|
+ String source = targetPost.getString("source");
|
|
|
|
|
+ if (!StringUtils.hasText(source)) {
|
|
|
|
|
+ source = targetPost.getString("account_name");
|
|
|
|
|
+ }
|
|
|
|
|
+ meta.source = source;
|
|
|
|
|
+
|
|
|
if (!StringUtils.hasText(meta.title)) {
|
|
if (!StringUtils.hasText(meta.title)) {
|
|
|
return null;
|
|
return null;
|
|
|
}
|
|
}
|
|
@@ -1441,6 +1458,234 @@ public class VectorRecallTestServiceImpl implements VectorRecallTestService {
|
|
|
return vo;
|
|
return vo;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ // ====================================================================
|
|
|
|
|
+ // articleDetail — 长文详情预览
|
|
|
|
|
+ // ====================================================================
|
|
|
|
|
+
|
|
|
|
|
+ @Override
|
|
|
|
|
+ public ArticleBasicVO getArticleDetail(String articleId) {
|
|
|
|
|
+ if (!StringUtils.hasText(articleId)) {
|
|
|
|
|
+ return null;
|
|
|
|
|
+ }
|
|
|
|
|
+ String trimmed = articleId.trim();
|
|
|
|
|
+ Map<String, ArticleDeconstructResult> rows = loadArticleDeconstructRows(Collections.singletonList(trimmed));
|
|
|
|
|
+ ArticleDeconstructResult row = rows.get(trimmed);
|
|
|
|
|
+ if (row == null) {
|
|
|
|
|
+ log.info("getArticleDetail: articleId={} 不存在", trimmed);
|
|
|
|
|
+ return null;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ JSONObject raw = parseArticleResultJson(row);
|
|
|
|
|
+ ArticleBasicMeta basic = raw != null ? extractArticleBasicMeta(raw) : null;
|
|
|
|
|
+
|
|
|
|
|
+ ArticleBasicVO vo = new ArticleBasicVO();
|
|
|
|
|
+ vo.setArticleId(trimmed);
|
|
|
|
|
+ if (basic != null) {
|
|
|
|
|
+ vo.setTitle(basic.title);
|
|
|
|
|
+ vo.setSummary(basic.summary);
|
|
|
|
|
+ vo.setCover(basic.cover);
|
|
|
|
|
+ vo.setUrl(basic.url);
|
|
|
|
|
+ vo.setSource(basic.source);
|
|
|
|
|
+ }
|
|
|
|
|
+ return vo;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // ====================================================================
|
|
|
|
|
+ // matchByArticleId — 长文ID跨模态召回
|
|
|
|
|
+ // ====================================================================
|
|
|
|
|
+
|
|
|
|
|
+ @Override
|
|
|
|
|
+ public RecallResultVO matchByArticleId(MatchByArticleIdParam param) {
|
|
|
|
|
+ RecallResultVO empty = emptyResult();
|
|
|
|
|
+ if (param == null || !StringUtils.hasText(param.getArticleId())) {
|
|
|
|
|
+ log.info("matchByArticleId: articleId 为空");
|
|
|
|
|
+ return empty;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ String articleId = param.getArticleId().trim();
|
|
|
|
|
+ int topN = param.getTopN() != null && param.getTopN() > 0 ? param.getTopN() : 50;
|
|
|
|
|
+
|
|
|
|
|
+ List<String> configCodes;
|
|
|
|
|
+ if (StringUtils.hasText(param.getConfigCode())) {
|
|
|
|
|
+ configCodes = Collections.singletonList(param.getConfigCode().trim());
|
|
|
|
|
+ } else {
|
|
|
|
|
+ configCodes = articleVectorStoreService.getDistinctConfigCodes(articleId);
|
|
|
|
|
+ if (configCodes.isEmpty()) {
|
|
|
|
|
+ log.info("matchByArticleId: articleId={} 无向量数据", articleId);
|
|
|
|
|
+ return empty;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ log.info("matchByArticleId: articleId={}, topN={}, configCodes={}", articleId, topN, configCodes);
|
|
|
|
|
+
|
|
|
|
|
+ // 收集所有有效向量点(支持多点模式)
|
|
|
|
|
+ List<MaterialVectorQuery> vectorQueries = new ArrayList<>();
|
|
|
|
|
+ for (String configCode : configCodes) {
|
|
|
|
|
+ List<ArticleVector> vectors = articleVectorStoreService.getVectorsByArticleId(articleId, configCode);
|
|
|
|
|
+ for (ArticleVector vector : vectors) {
|
|
|
|
|
+ if (vector != null && StringUtils.hasText(vector.getEmbedding())) {
|
|
|
|
|
+ vectorQueries.add(new MaterialVectorQuery(configCode, vector.getEmbedding()));
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ if (vectorQueries.isEmpty()) {
|
|
|
|
|
+ log.info("matchByArticleId: articleId={} 无有效向量 embedding", articleId);
|
|
|
|
|
+ return empty;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ int candidateTopN = Math.max(topN * VectorConstants.MULTI_POINT_RECALL_CANDIDATE_FACTOR,
|
|
|
|
|
+ VectorConstants.MULTI_POINT_RECALL_MIN_CANDIDATES);
|
|
|
|
|
+
|
|
|
|
|
+ List<VideoMatchEnrichedVO> allResults = Collections.synchronizedList(new ArrayList<>());
|
|
|
|
|
+ List<CompletableFuture<Void>> allFutures = new ArrayList<>();
|
|
|
|
|
+
|
|
|
|
|
+ for (MaterialVectorQuery query : vectorQueries) {
|
|
|
|
|
+ final String cc = query.configCode;
|
|
|
|
|
+ final String rawEmbedding = query.rawEmbedding;
|
|
|
|
|
+ final int ctn = candidateTopN;
|
|
|
|
|
+ final int tn = topN;
|
|
|
|
|
+
|
|
|
|
|
+ allFutures.add(CompletableFuture.runAsync(() -> {
|
|
|
|
|
+ try {
|
|
|
|
|
+ List<VideoMatch> matches = vectorStoreService.searchTopNByRawVector(cc, rawEmbedding, ctn);
|
|
|
|
|
+ List<VideoMatch> deduped = deduplicateVideoMatches(matches, tn);
|
|
|
|
|
+ log.info("matchByArticleId 视频搜索 cc={}: {} 条, 去重后 {} 条",
|
|
|
|
|
+ cc, matches != null ? matches.size() : 0, deduped.size());
|
|
|
|
|
+ if (!deduped.isEmpty()) {
|
|
|
|
|
+ List<VideoMatchResult> videoResults = toVideoMatchResults(deduped, cc);
|
|
|
|
|
+ populateVideoMatchResultDetails(videoResults);
|
|
|
|
|
+ allResults.addAll(enrichVideoMatches(videoResults, cc));
|
|
|
|
|
+ }
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ log.error("matchByArticleId 视频搜索失败 configCode={}: {}", cc, e.getMessage(), e);
|
|
|
|
|
+ }
|
|
|
|
|
+ }, RECALL_EXECUTOR));
|
|
|
|
|
+
|
|
|
|
|
+ allFutures.add(CompletableFuture.runAsync(() -> {
|
|
|
|
|
+ try {
|
|
|
|
|
+ List<MaterialMatch> matches = materialVectorStoreService.searchTopNByRawVector(cc, rawEmbedding, ctn);
|
|
|
|
|
+ List<MaterialMatch> deduped = deduplicateMaterialMatches(matches, tn);
|
|
|
|
|
+ if (!deduped.isEmpty()) {
|
|
|
|
|
+ allResults.addAll(enrichMaterialMatches(deduped, cc));
|
|
|
|
|
+ }
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ log.error("matchByArticleId 素材搜索失败 configCode={}: {}", cc, e.getMessage(), e);
|
|
|
|
|
+ }
|
|
|
|
|
+ }, RECALL_EXECUTOR));
|
|
|
|
|
+
|
|
|
|
|
+ allFutures.add(CompletableFuture.runAsync(() -> {
|
|
|
|
|
+ try {
|
|
|
|
|
+ List<ArticleMatch> matches = articleVectorStoreService.searchTopNByRawVector(cc, rawEmbedding, ctn);
|
|
|
|
|
+ matches = matches.stream()
|
|
|
|
|
+ .filter(m -> !articleId.equals(m.getArticleId()))
|
|
|
|
|
+ .collect(Collectors.toList());
|
|
|
|
|
+ List<ArticleMatch> deduped = deduplicateArticleMatches(matches, tn);
|
|
|
|
|
+ if (!deduped.isEmpty()) {
|
|
|
|
|
+ allResults.addAll(enrichArticleMatches(deduped, cc));
|
|
|
|
|
+ }
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ log.error("matchByArticleId 文章搜索失败 configCode={}: {}", cc, e.getMessage(), e);
|
|
|
|
|
+ }
|
|
|
|
|
+ }, RECALL_EXECUTOR));
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ for (CompletableFuture<Void> future : allFutures) {
|
|
|
|
|
+ try {
|
|
|
|
|
+ future.get(30, TimeUnit.SECONDS);
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ log.error("matchByArticleId 并行搜索等待异常: {}", e.getMessage(), e);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ List<VideoMatchEnrichedVO> merged = deduplicateCrossConfigCode(allResults);
|
|
|
|
|
+
|
|
|
|
|
+ List<VideoMatchEnrichedVO> videoItems = merged.stream()
|
|
|
|
|
+ .filter(it -> it.getModality() == Modality.VIDEO)
|
|
|
|
|
+ .sorted(Comparator.comparing(VideoMatchEnrichedVO::getScore,
|
|
|
|
|
+ Comparator.nullsLast(Comparator.reverseOrder())))
|
|
|
|
|
+ .limit(topN)
|
|
|
|
|
+ .collect(Collectors.toList());
|
|
|
|
|
+ List<VideoMatchEnrichedVO> materialItems = merged.stream()
|
|
|
|
|
+ .filter(it -> it.getModality() == Modality.MATERIAL)
|
|
|
|
|
+ .sorted(Comparator.comparing(VideoMatchEnrichedVO::getScore,
|
|
|
|
|
+ Comparator.nullsLast(Comparator.reverseOrder())))
|
|
|
|
|
+ .limit(topN)
|
|
|
|
|
+ .collect(Collectors.toList());
|
|
|
|
|
+ List<VideoMatchEnrichedVO> articleItems = merged.stream()
|
|
|
|
|
+ .filter(it -> it.getModality() == Modality.ARTICLE)
|
|
|
|
|
+ .sorted(Comparator.comparing(VideoMatchEnrichedVO::getScore,
|
|
|
|
|
+ Comparator.nullsLast(Comparator.reverseOrder())))
|
|
|
|
|
+ .limit(topN)
|
|
|
|
|
+ .collect(Collectors.toList());
|
|
|
|
|
+
|
|
|
|
|
+ log.info("matchByArticleId 按模态截断后: video={}, material={}, article={}",
|
|
|
|
|
+ videoItems.size(), materialItems.size(), articleItems.size());
|
|
|
|
|
+
|
|
|
|
|
+ String selfConfigCode = StringUtils.hasText(param.getConfigCode())
|
|
|
|
|
+ ? param.getConfigCode().trim()
|
|
|
|
|
+ : vectorQueries.get(0).configCode;
|
|
|
|
|
+ ensureSelfArticleInResults(articleItems, articleId, selfConfigCode, topN);
|
|
|
|
|
+
|
|
|
|
|
+ RecallResultVO result = buildResult(videoItems, materialItems, articleItems);
|
|
|
|
|
+ log.info("matchByArticleId 完成: total={}, video={}, material={}, article={}",
|
|
|
|
|
+ result.getTotal(), result.getVideoCount(), result.getMaterialCount(), result.getArticleCount());
|
|
|
|
|
+ return result;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * 构建输入长文自身的 enriched 条目(score=1.0)
|
|
|
|
|
+ */
|
|
|
|
|
+ private VideoMatchEnrichedVO enrichSelfArticle(String articleId, String configCode) {
|
|
|
|
|
+ Map<String, ArticleDeconstructResult> rows = loadArticleDeconstructRows(Collections.singletonList(articleId));
|
|
|
|
|
+ ArticleDeconstructResult row = rows.get(articleId);
|
|
|
|
|
+
|
|
|
|
|
+ JSONObject raw = row != null ? parseArticleResultJson(row) : null;
|
|
|
|
|
+ ArticleBasicMeta basic = raw != null ? extractArticleBasicMeta(raw) : null;
|
|
|
|
|
+ Map<String, Object> deconstructFlat = raw != null ? buildDeconstructFromRaw(raw) : null;
|
|
|
|
|
+
|
|
|
|
|
+ VideoMatchEnrichedVO vo = new VideoMatchEnrichedVO();
|
|
|
|
|
+ vo.setArticleId(articleId);
|
|
|
|
|
+ vo.setModality(Modality.ARTICLE);
|
|
|
|
|
+ vo.setConfigCode(StringUtils.hasText(configCode) ? configCode : null);
|
|
|
|
|
+ vo.setScore(1.0);
|
|
|
|
|
+
|
|
|
|
|
+ if (basic != null) {
|
|
|
|
|
+ vo.setTitle(basic.title);
|
|
|
|
|
+ vo.setCover(basic.cover);
|
|
|
|
|
+ if (basic.images != null && !basic.images.isEmpty()) {
|
|
|
|
|
+ vo.setImageList(basic.images);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ ArticleDetailVO detail = new ArticleDetailVO();
|
|
|
|
|
+ if (basic != null) {
|
|
|
|
|
+ detail.setTitle(basic.title);
|
|
|
|
|
+ detail.setSummary(basic.summary);
|
|
|
|
|
+ detail.setCover(basic.cover);
|
|
|
|
|
+ detail.setImages(basic.images);
|
|
|
|
|
+ detail.setUrl(basic.url);
|
|
|
|
|
+ detail.setSource(basic.source);
|
|
|
|
|
+ }
|
|
|
|
|
+ detail.setDeconstruct(deconstructFlat);
|
|
|
|
|
+ vo.setArticleDetail(detail);
|
|
|
|
|
+
|
|
|
|
|
+ applyCompatibilityFields(vo);
|
|
|
|
|
+ return vo;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private void ensureSelfArticleInResults(List<VideoMatchEnrichedVO> articleItems,
|
|
|
|
|
+ String articleId, String configCode, int topN) {
|
|
|
|
|
+ boolean exists = articleItems.stream()
|
|
|
|
|
+ .anyMatch(it -> articleId.equals(it.getArticleId()));
|
|
|
|
|
+ if (exists) {
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+ VideoMatchEnrichedVO self = enrichSelfArticle(articleId, configCode);
|
|
|
|
|
+ articleItems.add(0, self);
|
|
|
|
|
+ if (articleItems.size() > topN) {
|
|
|
|
|
+ articleItems.remove(articleItems.size() - 1);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
@Override
|
|
@Override
|
|
|
public DeconstructPointsVO getDeconstructPoints(Long videoId) {
|
|
public DeconstructPointsVO getDeconstructPoints(Long videoId) {
|
|
|
if (videoId == null || videoId <= 0L) {
|
|
if (videoId == null || videoId <= 0L) {
|
|
@@ -1598,6 +1843,7 @@ public class VectorRecallTestServiceImpl implements VectorRecallTestService {
|
|
|
List<String> tags;
|
|
List<String> tags;
|
|
|
String cover;
|
|
String cover;
|
|
|
String url;
|
|
String url;
|
|
|
|
|
+ String source;
|
|
|
List<String> images;
|
|
List<String> images;
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|