14 ساعت پیش · 02640039bf
--- a/core/src/main/java/com/tzld/videoVector/api/RecommendApiService.java
+++ b/core/src/main/java/com/tzld/videoVector/api/RecommendApiService.java
@@ -0,0 +1,122 @@
 
															+package com.tzld.videoVector.api;
														
 
															+
														
 
															+import com.alibaba.fastjson.JSONArray;
														
 
															+import com.alibaba.fastjson.JSONObject;
														
 
															+import lombok.extern.slf4j.Slf4j;
														
 
															+import okhttp3.*;
														
 
															+import org.springframework.beans.factory.annotation.Value;
														
 
															+import org.springframework.stereotype.Service;
														
 
															+
														
 
															+import javax.annotation.PostConstruct;
														
 
															+import java.io.IOException;
														
 
															+import java.io.UnsupportedEncodingException;
														
 
															+import java.net.URLEncoder;
														
 
															+import java.util.*;
														
 
															+import java.util.concurrent.TimeUnit;
														
 
															+
														
 
															+/**
														
 
															+ * 外部投放系统 API 服务。
														
 
															+ * <p>用于查询各账号下已投放素材/文章列表，辅助匹配结果去重。
														
 
															+ */
														
 
															+@Slf4j
														
 
															+@Service
														
 
															+public class RecommendApiService {
														
 
															+
														
 
															+    private OkHttpClient client;
														
 
															+
														
 
															+    /** 已发送素材查询接口 */
														
 
															+    @Value("${external.api.get-source-ids.url:http://101.37.174.139:80/api/getSourceIdsByAccount}")
														
 
															+    private String getSourceIdsUrl;
														
 
															+
														
 
															+    /** HTTP 超时（秒） */
														
 
															+    @Value("${external.api.timeout:30}")
														
 
															+    private int timeout;
														
 
															+
														
 
															+    @PostConstruct
														
 
															+    public void init() {
														
 
															+        this.client = new OkHttpClient.Builder()
														
 
															+                .connectTimeout(timeout, TimeUnit.SECONDS)
														
 
															+                .readTimeout(timeout, TimeUnit.SECONDS)
														
 
															+                .writeTimeout(timeout, TimeUnit.SECONDS)
														
 
															+                .build();
														
 
															+    }
														
 
															+
														
 
															+    /**
														
 
															+     * 获取指定账号已发送的素材 ID 集合。
														
 
															+     *
														
 
															+     * @param accountName 账号名称（channelLevel3）
														
 
															+     * @return 已发送素材 ID 集合，为空或失败时返回空集合
														
 
															+     */
														
 
															+    public Set<String> getSentSourceIds(String accountName) {
														
 
															+        if (accountName == null || accountName.isEmpty()) {
														
 
															+            return Collections.emptySet();
														
 
															+        }
														
 
															+
														
 
															+        try {
														
 
															+            String encodedName = URLEncoder.encode(accountName, "UTF-8");
														
 
															+            String url = getSourceIdsUrl + "?accountName=" + encodedName + "&type=9&position=1";
														
 
															+
														
 
															+            Request request = new Request.Builder()
														
 
															+                    .url(url)
														
 
															+                    .get()
														
 
															+                    .build();
														
 
															+
														
 
															+            try (Response response = client.newCall(request).execute()) {
														
 
															+                if (!response.isSuccessful()) {
														
 
															+                    log.error("获取已发送素材失败: accountName={}, HTTP {}, body={}",
														
 
															+                            accountName, response.code(),
														
 
															+                            response.body() != null ? response.body().string() : "");
														
 
															+                    return Collections.emptySet();
														
 
															+                }
														
 
															+
														
 
															+                String body = response.body() != null ? response.body().string() : "";
														
 
															+                JSONObject res = JSONObject.parseObject(body);
														
 
															+                if (res == null || res.getInteger("code") == null || res.getInteger("code") != 0) {
														
 
															+                    log.error("获取已发送素材接口返回异常: accountName={}, response={}", accountName, body);
														
 
															+                    return Collections.emptySet();
														
 
															+                }
														
 
															+
														
 
															+                JSONArray data = res.getJSONArray("data");
														
 
															+                if (data == null || data.isEmpty()) {
														
 
															+                    return Collections.emptySet();
														
 
															+                }
														
 
															+
														
 
															+                Set<String> result = new LinkedHashSet<>(data.size());
														
 
															+                for (int i = 0; i < data.size(); i++) {
														
 
															+                    String id = data.getString(i);
														
 
															+                    if (id != null && !id.isEmpty()) {
														
 
															+                        result.add(id);
														
 
															+                    }
														
 
															+                }
														
 
															+                log.info("获取已发送素材: accountName={}, count={}", accountName, result.size());
														
 
															+                return result;
														
 
															+            }
														
 
															+        } catch (UnsupportedEncodingException e) {
														
 
															+            log.error("URL 编码失败: accountName={}, {}", accountName, e.getMessage());
														
 
															+            return Collections.emptySet();
														
 
															+        } catch (IOException e) {
														
 
															+            log.error("获取已发送素材网络异常: accountName={}, {}", accountName, e.getMessage());
														
 
															+            return Collections.emptySet();
														
 
															+        } catch (Exception e) {
														
 
															+            log.error("获取已发送素材异常: accountName={}, {}", accountName, e.getMessage());
														
 
															+            return Collections.emptySet();
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    /**
														
 
															+     * 批量获取多个账号的已发送素材 ID 集合。
														
 
															+     *
														
 
															+     * @param accountNames 账号名称列表
														
 
															+     * @return accountName → 已发送素材 ID 集合
														
 
															+     */
														
 
															+    public Map<String, Set<String>> getAllSentSourceIds(Collection<String> accountNames) {
														
 
															+        Map<String, Set<String>> result = new LinkedHashMap<>();
														
 
															+        if (accountNames == null || accountNames.isEmpty()) {
														
 
															+            return result;
														
 
															+        }
														
 
															+        for (String name : accountNames) {
														
 
															+            result.put(name, getSentSourceIds(name));
														
 
															+        }
														
 
															+        return result;
														
 
															+    }
														
 
															+}
														
--- a/core/src/main/java/com/tzld/videoVector/job/VideoArticleMatchJob.java
+++ b/core/src/main/java/com/tzld/videoVector/job/VideoArticleMatchJob.java
@@ -2,6 +2,7 @@ package com.tzld.videoVector.job;
 
															 import com.alibaba.fastjson.JSON;
														
 
															 import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
														
 
															+import com.tzld.videoVector.api.RecommendApiService;
														
 
															 import com.tzld.videoVector.api.VideoApiService;
														
 
															 import com.tzld.videoVector.common.constant.VectorConstants;
														
 
															 import com.tzld.videoVector.dao.mapper.pgVector.ChannelDemandMatchResultMapper;
														
@@ -69,6 +70,9 @@ public class VideoArticleMatchJob {
 
															     @Resource
														
 
															     private VideoApiService videoApiService;
														
 
															+    @Resource
														
 
															+    private RecommendApiService recommendApiService;
														
 
															+
														
 
															     @Resource
														
 
															     private RedisUtils redisUtils;
														
@@ -173,11 +177,18 @@ public class VideoArticleMatchJob {
 
															                 return ReturnT.SUCCESS;
														
 
															             }
														
 
															+            // 2.1 获取各账号已发送素材 ID（用于过滤已投文章）
														
 
															+            Map<String, Set<String>> sentSourceIds = fetchSentSourceIds(records);
														
 
															+
														
 
															             // 3. 批量获取视频标题
														
 
															             Map<Long, String> videoTitleMap = fetchVideoTitles(records);
														
 
															-            // 4. 视频标题 → 长文向量召回
														
 
															-            Map<Long, List<ArticleMatchItem>> videoArticleMatches = matchArticlesByTitles(videoTitleMap);
														
 
															+            // 3.1 构建 videoId → channelLevel3 映射（用于匹配时过滤已发送文章）
														
 
															+            Map<Long, String> videoChannelMap = buildVideoChannelMap(records);
														
 
															+
														
 
															+            // 4. 视频标题 → 长文向量召回（过滤已发送文章）
														
 
															+            Map<Long, List<ArticleMatchItem>> videoArticleMatches =
														
 
															+                    matchArticlesByTitles(videoTitleMap, videoChannelMap, sentSourceIds);
														
 
															             // 5. 1v1 去重配对
														
 
															             Map<Long, ArticleMatchItem> finalPairs = dedupOneToOne(videoArticleMatches);
														
@@ -233,6 +244,30 @@ public class VideoArticleMatchJob {
 
															         return records;
														
 
															     }
														
 
															+    // =====================================================
														
 
															+    // 步骤 2.1: 获取各账号已发送素材 ID
														
 
															+    // =====================================================
														
 
															+
														
 
															+    /**
														
 
															+     * 调用外部 API 获取各账号已发送的素材 ID 集合，用于过滤已投放内容。
														
 
															+     *
														
 
															+     * @param records 需求匹配记录列表
														
 
															+     * @return channelLevel3 → 已发送素材 ID 集合
														
 
															+     */
														
 
															+    private Map<String, Set<String>> fetchSentSourceIds(List<ChannelDemandMatchResult> records) {
														
 
															+        // 提取所有唯一的 channelLevel3
														
 
															+        Set<String> uniqueAccounts = records.stream()
														
 
															+                .map(ChannelDemandMatchResult::getChannelLevel3)
														
 
															+                .filter(StringUtils::hasText)
														
 
															+                .collect(Collectors.toSet());
														
 
															+
														
 
															+        Map<String, Set<String>> result = recommendApiService.getAllSentSourceIds(uniqueAccounts);
														
 
															+
														
 
															+        int totalSent = result.values().stream().mapToInt(Set::size).sum();
														
 
															+        log.info("获取各账号已发送素材: {} 个账号, 共 {} 条", result.size(), totalSent);
														
 
															+        return result;
														
 
															+    }
														
 
															+
														
 
															     // =====================================================
														
 
															     // 步骤 3: 批量获取视频标题
														
 
															     // =====================================================
														
@@ -309,6 +344,22 @@ public class VideoArticleMatchJob {
 
															         return videoTitleMap;
														
 
															     }
														
 
															+    // =====================================================
														
 
															+    // 步骤 3.1: 构建 videoId → channelLevel3 映射
														
 
															+    // =====================================================
														
 
															+
														
 
															+    /**
														
 
															+     * 从匹配记录中提取 videoId → channelLevel3 映射。
														
 
															+     * <p>同一视频可能对应多条记录，取第一条的 channelLevel3。
														
 
															+     */
														
 
															+    private Map<Long, String> buildVideoChannelMap(List<ChannelDemandMatchResult> records) {
														
 
															+        Map<Long, String> map = new LinkedHashMap<>();
														
 
															+        for (ChannelDemandMatchResult r : records) {
														
 
															+            map.putIfAbsent(r.getMatchVideoId(), r.getChannelLevel3());
														
 
															+        }
														
 
															+        return map;
														
 
															+    }
														
 
															+
														
 
															     // =====================================================
														
 
															     // 步骤 4: 视频标题 → 长文向量召回
														
 
															     // =====================================================
														
@@ -319,23 +370,31 @@ public class VideoArticleMatchJob {
 
															      * <p>使用线程池并发执行，单条失败不影响整体流程。
														
 
															      * 每个标题使用 configCodes=[ARTICLE_TITLE, ARTICLE_SUMMARY] 进行并行 ANN 查询，
														
 
															      * 结果只保留 modality=ARTICLE 的条目，按 score 降序排列。
														
 
															+     * 召回结果从上往下（按 score 降序）排除该账号已发送的文章，取剩余的第一条。
														
 
															      *
														
 
															-     * @param videoTitleMap videoId → title 映射
														
 
															-     * @return videoId → 文章匹配列表 映射（按 score 降序）
														
 
															+     * @param videoTitleMap  videoId → title 映射
														
 
															+     * @param videoChannelMap videoId → channelLevel3 映射
														
 
															+     * @param sentSourceIds   channelLevel3 → 已发送素材 ID 集合
														
 
															+     * @return videoId → 文章匹配列表 映射（已过滤已发送，按 score 降序）
														
 
															      */
														
 
															-    private Map<Long, List<ArticleMatchItem>> matchArticlesByTitles(Map<Long, String> videoTitleMap) {
														
 
															+    private Map<Long, List<ArticleMatchItem>> matchArticlesByTitles(
														
 
															+            Map<Long, String> videoTitleMap,
														
 
															+            Map<Long, String> videoChannelMap,
														
 
															+            Map<String, Set<String>> sentSourceIds) {
														
 
															         ConcurrentHashMap<Long, List<ArticleMatchItem>> resultMap = new ConcurrentHashMap<>();
														
 
															         RankingSpec ranking = buildRankingSpec();
														
 
															         int totalVideos = videoTitleMap.size();
														
 
															         AtomicInteger processed = new AtomicInteger(0);
														
 
															         AtomicInteger matchedCount = new AtomicInteger(0);
														
 
															+        AtomicInteger skippedSentCount = new AtomicInteger(0);
														
 
															         // 构建并发任务
														
 
															         List<CompletableFuture<Void>> futures = new ArrayList<>(totalVideos);
														
 
															         for (Map.Entry<Long, String> entry : videoTitleMap.entrySet()) {
														
 
															             Long videoId = entry.getKey();
														
 
															             String title = entry.getValue();
														
 
															+            String channelLevel3 = videoChannelMap.get(videoId);
														
 
															             CompletableFuture<Void> future = CompletableFuture.runAsync(() -> {
														
 
															                 try {
														
@@ -344,17 +403,24 @@ public class VideoArticleMatchJob {
 
															                     List<ArticleMatchItem> articles = extractArticleItems(recallResult);
														
 
															                     if (!articles.isEmpty()) {
														
 
															-                        resultMap.put(videoId, articles);
														
 
															-                        matchedCount.incrementAndGet();
														
 
															+                        // 从上往下（score 降序）排除已发送的文章，保留剩余列表
														
 
															+                        List<ArticleMatchItem> filtered = filterSentArticles(articles, channelLevel3, sentSourceIds);
														
 
															+                        int skipped = articles.size() - filtered.size();
														
 
															+                        if (skipped > 0) {
														
 
															+                            skippedSentCount.addAndGet(skipped);
														
 
															+                        }
														
 
															+                        if (!filtered.isEmpty()) {
														
 
															+                            resultMap.put(videoId, filtered);
														
 
															+                            matchedCount.incrementAndGet();
														
 
															+                        }
														
 
															                     }
														
 
															                 } catch (Exception e) {
														
 
															                     log.error("视频 {} (标题: {}) 长文匹配失败: {}", videoId, title, e.getMessage());
														
 
															                 } finally {
														
 
															                     int done = processed.incrementAndGet();
														
 
															-                    // 每 10 条或每 50 条倍数的进度输出一次
														
 
															                     if (done % 10 == 0 || done == totalVideos) {
														
 
															-                        log.info("长文匹配进度: {}/{} 视频已处理, {} 个命中",
														
 
															-                                done, totalVideos, matchedCount.get());
														
 
															+                        log.info("长文匹配进度: {}/{} 视频已处理, {} 个命中, 跳过已发送 {} 个",
														
 
															+                                done, totalVideos, matchedCount.get(), skippedSentCount.get());
														
 
															                     }
														
 
															                 }
														
 
															             }, matchExecutor);
														
@@ -362,21 +428,48 @@ public class VideoArticleMatchJob {
 
															             futures.add(future);
														
 
															         }
														
 
															-        // 等待所有任务完成（每个 future 内部已 catch 异常，不会失败）
														
 
															+        // 等待所有任务完成
														
 
															         CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])).join();
														
 
															-        // 二次校验：确保所有任务都已执行完毕
														
 
															         int finalProcessed = processed.get();
														
 
															         if (finalProcessed != totalVideos) {
														
 
															             log.warn("长文匹配未完全完成: 预期 {} 个, 实际完成 {} 个", totalVideos, finalProcessed);
														
 
															         }
														
 
															-        // 转换回 LinkedHashMap 保持顺序
														
 
															         Map<Long, List<ArticleMatchItem>> result = new LinkedHashMap<>(resultMap);
														
 
															-        log.info("长文匹配完成: {}/{} 个视频命中长文", matchedCount.get(), totalVideos);
														
 
															+        log.info("长文匹配完成: {}/{} 个视频命中长文, 跳过已发送 {} 篇",
														
 
															+                matchedCount.get(), totalVideos, skippedSentCount.get());
														
 
															         return result;
														
 
															     }
														
 
															+    /**
														
 
															+     * 过滤已发送的文章：从上往下（按原始 score 降序）遍历，跳过在已发送集合中的文章。
														
 
															+     * <p>注意：batchByText 结果已按 score 降序排列，跳过已发送后自动取下一个最优的。
														
 
															+     *
														
 
															+     * @param articles       原始匹配文章列表（已按 score 降序）
														
 
															+     * @param channelLevel3  该视频所属账号
														
 
															+     * @param sentSourceIds  channelLevel3 → 已发送素材 ID 集合
														
 
															+     * @return 过滤后的文章列表（仍按 score 降序）
														
 
															+     */
														
 
															+    private List<ArticleMatchItem> filterSentArticles(
														
 
															+            List<ArticleMatchItem> articles,
														
 
															+            String channelLevel3,
														
 
															+            Map<String, Set<String>> sentSourceIds) {
														
 
															+        Set<String> sentIds = (channelLevel3 != null) ? sentSourceIds.get(channelLevel3) : null;
														
 
															+        if (sentIds == null || sentIds.isEmpty()) {
														
 
															+            return articles;
														
 
															+        }
														
 
															+
														
 
															+        List<ArticleMatchItem> filtered = new ArrayList<>(articles.size());
														
 
															+        for (ArticleMatchItem item : articles) {
														
 
															+            if (item.articleId == null || sentIds.contains(item.articleId)) {
														
 
															+                continue;
														
 
															+            }
														
 
															+            filtered.add(item);
														
 
															+        }
														
 
															+        return filtered;
														
 
															+    }
														
 
															+
														
 
															     /**
														
 
															      * 从 batchByText 返回结果中提取 Article 模态的匹配条目。
														
 
															      *
														
@@ -467,7 +560,7 @@ public class VideoArticleMatchJob {
 
															      *
														
 
															      * <p>每对 (video, article) 只产生一条记录（不关联需求维度），
														
 
															      * channelLevel3 / account 取自原始匹配记录。
														
 
															-     * 先清理同日 dt 旧数据（幂等重跑），再批量插入新结果。
														
 
															+     * 先清理同日 dt 旧数据（幂等重跑），再分批插入新结果。
														
 
															      */
														
 
															     private void saveResults(String dt,
														
 
															                              List<ChannelDemandMatchResult> records,