Просмотр исходного кода

增加需求匹配长文内容池

wangyunpeng 5 часов назад
Родитель
Сommit
9dca8d1e45

+ 10 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/common/constant/RedisConstants.java

@@ -23,4 +23,14 @@ public class RedisConstants {
     public static String buildContentPreFilterJobKey(String dateStr, String planId, String accountId) {
         return CONTENT_PRE_FILTER_JOB_KEY_PREFIX + dateStr + ":" + planId + "-" + accountId;
     }
+
+    /**
+     * VideoArticleMatch 源ID缓存 key 前缀
+     * 完整 key 格式:VideoArticleMatch:{yyyyMMdd}:{channelLevel3}
+     */
+    private static final String VIDEO_ARTICLE_MATCH_KEY_PREFIX = "VideoArticleMatch:";
+
+    public static String buildVideoArticleMatchKey(String dateStr, String channelLevel3) {
+        return VIDEO_ARTICLE_MATCH_KEY_PREFIX + dateStr + ":" + channelLevel3;
+    }
 }

+ 14 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/cgi/VideoArticleMatchData.java

@@ -0,0 +1,14 @@
+package com.tzld.longarticle.recommend.server.model.cgi;
+
+import lombok.Data;
+
+import java.util.List;
+
+@Data
+public class VideoArticleMatchData {
+    private Integer pageNum;
+    private Integer pageSize;
+    private Integer total;
+    private Integer totalPages;
+    private List<VideoArticleMatchRecord> records;
+}

+ 23 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/cgi/VideoArticleMatchRecord.java

@@ -0,0 +1,23 @@
+package com.tzld.longarticle.recommend.server.model.cgi;
+
+import lombok.Data;
+
+@Data
+public class VideoArticleMatchRecord {
+    private Long id;
+    private String dt;
+    private String channelName;
+    private String channelLevel3;
+    private String account;
+    private Long matchVideoId;
+    private String videoTitle;
+    private String matchedArticleId;
+    private String matchedArticleTitle;
+    private Double matchScore;
+    private String matchConfigCode;
+    private String queryText;
+    private String configCodes;
+    private String rankingParams;
+    private String createTime;
+    private String experimentId;
+}

+ 11 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/cgi/VideoArticleMatchResponse.java

@@ -0,0 +1,11 @@
+package com.tzld.longarticle.recommend.server.model.cgi;
+
+import lombok.Data;
+
+@Data
+public class VideoArticleMatchResponse {
+    private long code;
+    private String msg;
+    private boolean success;
+    private VideoArticleMatchData data;
+}

+ 1 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/vo/ArticleSortResponseDataItem.java

@@ -17,6 +17,7 @@ public class ArticleSortResponseDataItem {
     private String title;
     private String producePlanName;
     private String filterReason;
+    private String experimentId;
 
     private Map<String, Double> scoreMap;
     private double score;

+ 66 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/remote/pq/VideoArticleMatchService.java

@@ -0,0 +1,66 @@
+package com.tzld.longarticle.recommend.server.remote.pq;
+
+import com.alibaba.fastjson.JSONObject;
+import com.tzld.longarticle.recommend.server.common.HttpPoolFactory;
+import com.tzld.longarticle.recommend.server.model.cgi.VideoArticleMatchResponse;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.http.HttpEntity;
+import org.apache.http.StatusLine;
+import org.apache.http.client.methods.CloseableHttpResponse;
+import org.apache.http.client.methods.HttpPost;
+import org.apache.http.entity.StringEntity;
+import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.http.util.EntityUtils;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.stereotype.Service;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.Objects;
+
+@Service
+@Slf4j
+public class VideoArticleMatchService {
+
+    @Value("${pq.videoVector.host:http://api-internal.piaoquantv.com}")
+    private String host;
+
+    private final CloseableHttpClient client = HttpPoolFactory.defaultPool();
+    private static final String URL = "/videoVector/videoArticleMatch/query";
+
+    public VideoArticleMatchResponse query(String channelLevel3) {
+        int retryTimes = 3;
+        while (retryTimes > 0) {
+            VideoArticleMatchResponse result = post(channelLevel3);
+            if (Objects.nonNull(result) && result.getCode() == 0 && result.isSuccess()) {
+                return result;
+            }
+            retryTimes--;
+        }
+        return null;
+    }
+
+    private VideoArticleMatchResponse post(String channelLevel3) {
+        JSONObject params = new JSONObject();
+        params.put("channelLevel3", channelLevel3);
+        try {
+            HttpPost httpPost = new HttpPost(host + URL);
+            StringEntity stringEntity = new StringEntity(params.toJSONString(), StandardCharsets.UTF_8);
+            httpPost.setHeader("Content-Type", "application/json;charset=UTF-8");
+            httpPost.setEntity(stringEntity);
+            CloseableHttpResponse response = client.execute(httpPost);
+            StatusLine statusLine = response.getStatusLine();
+            if (statusLine.getStatusCode() == 200) {
+                HttpEntity responseEntity = response.getEntity();
+                if (Objects.nonNull(responseEntity)) {
+                    String responseBody = EntityUtils.toString(responseEntity, "UTF-8");
+                    return JSONObject.parseObject(responseBody, VideoArticleMatchResponse.class);
+                }
+            }
+        } catch (IOException e) {
+            log.error("VideoArticleMatchService error, channelLevel3: {}", channelLevel3, e);
+        }
+        return null;
+    }
+
+}

+ 119 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/RecommendService.java

@@ -1,6 +1,8 @@
 package com.tzld.longarticle.recommend.server.service.recommend;
 
+import com.alibaba.fastjson.JSON;
 import com.alibaba.fastjson.JSONObject;
+import com.alibaba.fastjson.TypeReference;
 import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
 import com.google.common.collect.Lists;
 import com.tzld.longarticle.recommend.server.common.CostMonitor;
@@ -8,9 +10,12 @@ import com.tzld.longarticle.recommend.server.common.constant.RedisConstants;
 import com.tzld.longarticle.recommend.server.common.constant.SceneConstants;
 import com.tzld.longarticle.recommend.server.common.enums.aigc.PushTypeEnum;
 import com.tzld.longarticle.recommend.server.common.enums.recommend.ArticleTypeEnum;
+import com.tzld.longarticle.recommend.server.common.enums.recommend.ContentPoolEnum;
 import com.tzld.longarticle.recommend.server.common.enums.recommend.RankStrategyEnum;
 import com.tzld.longarticle.recommend.server.mapper.crawler.ext.ArticleUserGroupMapperExt;
 import com.tzld.longarticle.recommend.server.mapper.longArticle.LongArticleBaseMapper;
+import com.tzld.longarticle.recommend.server.model.cgi.VideoArticleMatchRecord;
+import com.tzld.longarticle.recommend.server.model.cgi.VideoArticleMatchResponse;
 import com.tzld.longarticle.recommend.server.model.dto.Content;
 import com.tzld.longarticle.recommend.server.model.dto.UserGroupCountDTO;
 import com.tzld.longarticle.recommend.server.model.entity.crawler.AccountAvgInfo;
@@ -21,6 +26,7 @@ import com.tzld.longarticle.recommend.server.model.vo.ArticleSortResponseData;
 import com.tzld.longarticle.recommend.server.model.vo.ArticleSortResponseDataItem;
 import com.tzld.longarticle.recommend.server.model.vo.RecommendResponse;
 import com.tzld.longarticle.recommend.server.model.vo.RecommendWithUserGroupResponse;
+import com.tzld.longarticle.recommend.server.remote.pq.VideoArticleMatchService;
 import com.tzld.longarticle.recommend.server.repository.aigc.PublishPlanRepository;
 import com.tzld.longarticle.recommend.server.repository.crawler.AccountAvgInfoRepository;
 import com.tzld.longarticle.recommend.server.repository.crawler.ArticleRepository;
@@ -35,6 +41,7 @@ import com.tzld.longarticle.recommend.server.service.recommend.recall.RecallServ
 import com.tzld.longarticle.recommend.server.util.CommonCollectionUtils;
 import com.tzld.longarticle.recommend.server.util.DateUtils;
 import com.tzld.longarticle.recommend.server.util.JSONUtils;
+import com.tzld.longarticle.recommend.server.util.RedisUtil;
 import lombok.extern.slf4j.Slf4j;
 import org.apache.commons.collections4.CollectionUtils;
 import org.springframework.beans.BeanUtils;
@@ -45,6 +52,8 @@ import org.springframework.stereotype.Service;
 import org.springframework.util.StringUtils;
 
 import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.temporal.ChronoUnit;
 import java.util.*;
 import java.util.concurrent.CompletableFuture;
 import java.util.stream.Collectors;
@@ -78,11 +87,17 @@ public class RecommendService {
     private LongArticleBaseMapper longArticleBaseMapper;
     @Autowired
     private RedisTemplate<String, String> redisTemplate;
+    @Autowired
+    private RedisUtil redisUtil;
+    @Autowired
+    private VideoArticleMatchService videoArticleMatchService;
 
     @ApolloJsonValue("${accountStrategyConfig:{}}")
     private Map<String, String> accountStrategyConfigMap;
     @ApolloJsonValue("${accountHisJumpStrategyConfig:[]]}")
     private List<String> accountHisJumpStrategyList;
+    @ApolloJsonValue("${videoArticleMatch.accounts:[\"趣味生活方式\",\"生活慢时光\",\"趣味生活漫谈\",\"趣味生活漫时光\",\"史趣探秘\",\"银发生活畅谈\",\"银发乐活驿站\",\"时光趣味生活\",\"历史长河流淌\",\"小阳看天下\"]}")
+    private List<String> videoArticleMatchAccountList;
     @Value("${spring.profiles.active}")
     private String env;
 
@@ -94,7 +109,15 @@ public class RecommendService {
         // 获取账号排序设置
         setStrategy(request, param);
 
+        // 获取 video-article match 结果(仅配置中的账号,当日缓存)
+        Map<String, String> sourceIdToExperimentId = videoArticleMatchAccountList.contains(request.getAccountName())
+                ? fetchSourceIdToExperimentId(request.getAccountName())
+                : Collections.emptyMap();
+
         RecallResult recallResult = recallService.recall(convertToRecallParam(param));
+
+        filterRecallBySourceId(recallResult, sourceIdToExperimentId);
+
         if (CollectionUtils.isEmpty(recallResult.getData())) {
             RecommendResponse response = new RecommendResponse();
             response.setCode(0);
@@ -110,6 +133,15 @@ public class RecommendService {
         saveSortLog(param, rankResult);
 
         RecommendResponse response = buildRecommendResponse(recallResult, rankResult, param.getPublishNum());
+        // 设置每个 item 的 experimentId
+        if (CollectionUtils.isNotEmpty(sourceIdToExperimentId.keySet())) {
+            for (ArticleSortResponseDataItem item : response.getData().getRank_list()) {
+                String expId = sourceIdToExperimentId.get(item.getSourceId());
+                if (expId != null) {
+                    item.setExperimentId(expId);
+                }
+            }
+        }
         long t4 = System.currentTimeMillis();
         log.info("recommendCost param:{} total cost:{} recall:{} rank:{} response: {}", JSONObject.toJSONString(request),
                 t4 - start, t2 - start, t3 - t2, JSONObject.toJSONString(response));
@@ -117,6 +149,93 @@ public class RecommendService {
         return response;
     }
 
+    /**
+     * 获取 video-article match 结果(当日缓存,不重复调用)
+     */
+    private Map<String, String> fetchSourceIdToExperimentId(String accountName) {
+        if (!StringUtils.hasText(accountName)) {
+            return Collections.emptyMap();
+        }
+        String dateStr = DateUtils.getCurrentDateStr("yyyyMMdd");
+        String cacheKey = RedisConstants.buildVideoArticleMatchKey(dateStr, accountName);
+
+        // 优先从缓存获取
+        String cached = redisUtil.get(cacheKey);
+        if (StringUtils.hasText(cached)) {
+            log.info("VideoArticleMatch cache hit, key: {}", cacheKey);
+            return JSON.parseObject(cached, new TypeReference<Map<String, String>>() {});
+        }
+
+        // 缓存未命中,调用远程接口
+        Map<String, String> result = Collections.emptyMap();
+        VideoArticleMatchResponse matchResponse = videoArticleMatchService.query(accountName);
+        if (matchResponse != null && matchResponse.isSuccess()
+                && matchResponse.getData() != null
+                && matchResponse.getData().getRecords() != null
+                && !matchResponse.getData().getRecords().isEmpty()) {
+            result = new HashMap<>();
+            for (VideoArticleMatchRecord record : matchResponse.getData().getRecords()) {
+                if (StringUtils.hasText(record.getMatchedArticleId())) {
+                    result.put(record.getMatchedArticleId(), record.getExperimentId());
+                }
+            }
+        }
+
+        // 写入缓存,当日有效
+        if (!result.isEmpty()) {
+            long secondsToEndOfDay = LocalDateTime.now().until(
+                    LocalDate.now().plusDays(1).atStartOfDay(), ChronoUnit.SECONDS);
+            redisUtil.set(cacheKey, JSON.toJSONString(result), secondsToEndOfDay);
+            log.info("VideoArticleMatch cached, key: {}, expire: {}s", cacheKey, secondsToEndOfDay);
+        }
+
+        return result;
+    }
+
+    private void filterRecallBySourceId(RecallResult recallResult, Map<String, String> sourceIdToExperimentId) {
+        if (CollectionUtils.isEmpty(sourceIdToExperimentId.keySet())) {
+            return;
+        }
+        Set<String> matchedSourceIds = sourceIdToExperimentId.keySet();
+        String toutiaoPool = ContentPoolEnum.autoArticlePoolLevel1.getContentPool();
+        List<RecallResult.RecallData> filteredDataList = new ArrayList<>();
+        boolean hasContentAfterFilter = false;
+        for (RecallResult.RecallData rd : recallResult.getData()) {
+            RecallResult.RecallData filteredRd = new RecallResult.RecallData();
+            filteredRd.setBackup(rd.isBackup());
+
+            if (CollectionUtils.isNotEmpty(rd.getContents())) {
+                List<Content> filteredContents = rd.getContents().stream()
+                        .filter(c -> {
+                            // 只有头条内容池才按 sourceId 过滤,其他内容池保留
+                            if (toutiaoPool.equals(c.getContentPoolType())) {
+                                return matchedSourceIds.contains(c.getSourceId());
+                            }
+                            return true;
+                        })
+                        .collect(Collectors.toList());
+                filteredRd.setContents(filteredContents);
+                if (CollectionUtils.isNotEmpty(filteredContents)) {
+                    hasContentAfterFilter = true;
+                }
+            } else {
+                filteredRd.setContents(rd.getContents());
+            }
+
+            // 匹配过滤掉的内容不写入 filter_list,保留原始 filterContents
+            filteredRd.setFilterContents(rd.getFilterContents());
+
+            filteredDataList.add(filteredRd);
+        }
+
+        if (hasContentAfterFilter) {
+            recallResult.setData(filteredDataList);
+            log.info("VideoArticleMatch filter applied, sourceId count: {}", matchedSourceIds.size());
+        } else {
+            log.info("VideoArticleMatch filter would eliminate all content, using original recall");
+        }
+    }
+
     private void excludeArticleIndex(RankResult rankResult, List<Integer> excludeContentIndex) {
         if (CollectionUtils.isEmpty(excludeContentIndex)) {
             return;

+ 0 - 2
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/score/ScoreService.java

@@ -86,8 +86,6 @@ public class ScoreService implements ApplicationContextAware {
                 for (Score score : data) {
                     Double scoreValue = score.getScore();
                     if (scoreValue == null || Double.isNaN(scoreValue) || Double.isInfinite(scoreValue)) {
-                        log.warn("Invalid score detected: strategy={}, contentId={}, score={}",
-                                score.getStrategy(), score.getContentId(), scoreValue);
                         continue;
                     }
                     Map<String, Double> map