Bläddra i källkod

召回内容历史表现增加缓存

wangyunpeng 7 månader sedan
förälder
incheckning
c821f21306

+ 4 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/entity/longArticle/ArticleTitleHisCache.java

@@ -23,6 +23,10 @@ public class ArticleTitleHisCache implements Serializable {
     private String type;
     @Column(name = "title")
     private String title;
+    @Column(name = "crawler_title")
+    private String crawlerTitle;
+    @Column(name = "category")
+    private String category;
     @Column(name = "his_publish_article_list")
     private String hisPublishArticleList;
     @Column(name = "t0_fission_by_fans_mean")

+ 13 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/param/TitleHisCacheParam.java

@@ -0,0 +1,13 @@
+package com.tzld.longarticle.recommend.server.model.param;
+
+import lombok.Data;
+
+import java.util.List;
+
+@Data
+public class TitleHisCacheParam {
+    private String title;
+    private String crawlerTitle;
+    private String titleMd5;
+    private List<String> category;
+}

+ 13 - 1
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/XxlJobService.java

@@ -24,6 +24,7 @@ import com.tzld.longarticle.recommend.server.model.entity.crawler.GetOffVideoCra
 import com.tzld.longarticle.recommend.server.model.entity.crawler.LongArticlesVideo;
 import com.tzld.longarticle.recommend.server.model.entity.longArticle.*;
 import com.tzld.longarticle.recommend.server.model.param.ArticleFindSourceParam;
+import com.tzld.longarticle.recommend.server.model.param.TitleHisCacheParam;
 import com.tzld.longarticle.recommend.server.remote.ODPSManager;
 import com.tzld.longarticle.recommend.server.repository.crawler.GetOffVideoCrawlerRepository;
 import com.tzld.longarticle.recommend.server.repository.crawler.LongArticlesVideoRepository;
@@ -532,7 +533,18 @@ public class XxlJobService {
             String type = typeEntry.getKey();
             Map<String, ArticleTitleHisCache> titleMap = typeEntry.getValue();
             Set<String> titleMd5List = titleMap.keySet();
-            Map<String, Content> hisCacheMap = recallService.getArticleTitleHisCacheMap(titleMd5List, type);
+            List<TitleHisCacheParam> paramList = titleMd5List.stream().map(titleMd5 -> {
+                ArticleTitleHisCache cache = cacheMap.get(type).get(titleMd5);
+                TitleHisCacheParam cacheParam = new TitleHisCacheParam();
+                cacheParam.setTitleMd5(titleMd5);
+                cacheParam.setTitle(cache.getTitle());
+                cacheParam.setCrawlerTitle(cache.getCrawlerTitle());
+                if (StringUtils.hasText(cache.getCategory())) {
+                    cacheParam.setCategory(JSONArray.parseArray(cache.getCategory(), String.class));
+                }
+                return cacheParam;
+            }).collect(Collectors.toList());
+            Map<String, Content> hisCacheMap = recallService.getArticleTitleHisCacheMap(paramList, type);
             for (String titleMd5 : titleMd5List) {
                 Content content = hisCacheMap.get(titleMd5);
                 ArticleTitleHisCache cache = titleMap.get(titleMd5);

+ 23 - 10
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/recall/RecallService.java

@@ -23,6 +23,7 @@ import com.tzld.longarticle.recommend.server.model.entity.longArticle.AccountCat
 import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticleCategory;
 import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticlePoolPromotionSource;
 import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticleTitleHisCache;
+import com.tzld.longarticle.recommend.server.model.param.TitleHisCacheParam;
 import com.tzld.longarticle.recommend.server.remote.aigc.AIGCWaitingPublishContentService;
 import com.tzld.longarticle.recommend.server.repository.aigc.CrawlerMetaArticleRepository;
 import com.tzld.longarticle.recommend.server.repository.aigc.PublishContentRepository;
@@ -274,6 +275,8 @@ public class RecallService implements ApplicationContextAware {
     public void setTitleAvgViewCount(List<Content> contentList, String ghId, String type) {
         long start = System.currentTimeMillis();
         Set<String> titleMd5List = contentList.stream().map(Content::getTitleMd5).collect(Collectors.toSet());
+        Map<String, Content> md5ContentMap = contentList.stream().collect(
+                Collectors.toMap(Content::getTitleMd5, Function.identity(), (o1, o2) -> o2));
         // 根据titleMd5查询数据库获取数据
         List<ArticleTitleHisCache> articleTitleHisCacheList = articleTitleHisCacheRepository
                 .getByTitleMd5InAndType(titleMd5List, type);
@@ -285,7 +288,16 @@ public class RecallService implements ApplicationContextAware {
         List<AccountCorrelation> accountCorrelationList = accountCorrelationRepository.findByGhIdAndStatus(ghId, 1);
         Map<String, Double> accountCorrelationMap = accountCorrelationList.stream().collect(
                 Collectors.toMap(AccountCorrelation::getRelGhId, AccountCorrelation::getCorrelation));
-        Map<String, Content> hisArticleCacheMap = getArticleTitleHisCacheMap(titleMd5List, type);
+        List<TitleHisCacheParam> paramList = titleMd5List.stream().map(titleMd5 -> {
+            Content content = md5ContentMap.get(titleMd5);
+            TitleHisCacheParam cacheParam = new TitleHisCacheParam();
+            cacheParam.setTitleMd5(titleMd5);
+            cacheParam.setTitle(content.getTitle());
+            cacheParam.setCrawlerTitle(content.getCrawlerTitle());
+            cacheParam.setCategory(content.getCategory());
+            return cacheParam;
+        }).collect(Collectors.toList());
+        Map<String, Content> hisArticleCacheMap = getArticleTitleHisCacheMap(paramList, type);
         for (Content content : contentList) {
             if (articleTitleHisCacheMap.containsKey(content.getTitleMd5())) {
                 ArticleTitleHisCache cache = articleTitleHisCacheMap.get(content.getTitleMd5());
@@ -333,8 +345,9 @@ public class RecallService implements ApplicationContextAware {
         articleTitleHisCacheRepository.save(cache);
     }
 
-    public Map<String, Content> getArticleTitleHisCacheMap(Set<String> titleMd5List, String type) {
+    public Map<String, Content> getArticleTitleHisCacheMap(List<TitleHisCacheParam> paramList, String type) {
         Map<String, Content> result = new HashMap<>();
+        List<String> titleMd5List = paramList.stream().map(TitleHisCacheParam::getTitleMd5).collect(Collectors.toList());
         // 获取历史已发布文章
         List<Article> hisArticleList = new ArrayList<>();
         List<List<String>> titleMd5Partition = Lists.partition(new ArrayList<>(titleMd5List), 1000);
@@ -371,14 +384,14 @@ public class RecallService implements ApplicationContextAware {
         List<AccountCategory> accountCategoryList = accountCategoryRepository.getByStatus(StatusEnum.ONE.getCode());
         Map<String, JSONObject> accountCategoryMap = accountCategoryList.stream().filter(o -> StringUtils.hasText(o.getCategoryMap()))
                 .collect(Collectors.toMap(AccountCategory::getGhId, o -> JSONObject.parseObject(o.getCategoryMap())));
-        for (String titleMd5 : titleMd5List) {
+        for (TitleHisCacheParam cacheParam : paramList) {
             Content res = new Content();
             List<Article> hisArticles = new ArrayList<>();
-            Map<Integer, List<Article>> indexArticleMap = map.get(content.getTitle());
+            Map<Integer, List<Article>> indexArticleMap = map.get(cacheParam.getTitle());
             if (Objects.isNull(indexArticleMap)) {
-                indexArticleMap = map.get(content.getCrawlerTitle());
-            } else if (!content.getTitle().equals(content.getCrawlerTitle())) {
-                Map<Integer, List<Article>> crawlerTitleIndexArticleMap = map.get(content.getCrawlerTitle());
+                indexArticleMap = map.get(cacheParam.getCrawlerTitle());
+            } else if (!cacheParam.getTitle().equals(cacheParam.getCrawlerTitle())) {
+                Map<Integer, List<Article>> crawlerTitleIndexArticleMap = map.get(cacheParam.getCrawlerTitle());
                 if (Objects.nonNull(crawlerTitleIndexArticleMap)) {
                     for (Map.Entry<Integer, List<Article>> entry : crawlerTitleIndexArticleMap.entrySet()) {
                         if (indexArticleMap.containsKey(entry.getKey())) {
@@ -404,8 +417,8 @@ public class RecallService implements ApplicationContextAware {
                 }
                 // 历史表现 文章品类如果与历史发布账号负相关 则过滤,不计算该历史发布表现
                 JSONObject categoryWeightMap = accountCategoryMap.get(hisArticle.getGhId());
-                if (Objects.nonNull(categoryWeightMap) && CollectionUtils.isNotEmpty(content.getCategory())) {
-                    String category = content.getCategory().get(0);
+                if (Objects.nonNull(categoryWeightMap) && CollectionUtils.isNotEmpty(cacheParam.getCategory())) {
+                    String category = cacheParam.getCategory().get(0);
                     if (categoryWeightMap.containsKey(category)) {
                         double weight = categoryWeightMap.getDoubleValue(category);
                         if (weight < 0) {
@@ -468,7 +481,7 @@ public class RecallService implements ApplicationContextAware {
             }
             // 设置头条阅读均值
             setT0Data(res);
-            result.put(titleMd5, res);
+            result.put(cacheParam.getTitleMd5(), res);
         }
         return result;
     }