Przeglądaj źródła

召回内容历史表现增加缓存

wangyunpeng 7 miesięcy temu
rodzic
commit
6fb3ab693a

+ 1 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/dto/Content.java

@@ -20,6 +20,7 @@ public class Content {
     private String id;
     private String sourceId;
     private String title;
+    private String titleMd5;
     private Long createTimestamp;
     private String producePlanName;
     private String contentPoolType; // 内容池类别

+ 58 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/entity/longArticle/ArticleTitleHisCache.java

@@ -0,0 +1,58 @@
+package com.tzld.longarticle.recommend.server.model.entity.longArticle;
+
+import lombok.AllArgsConstructor;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+import javax.persistence.*;
+import java.io.Serializable;
+
+@Data
+@AllArgsConstructor
+@NoArgsConstructor
+@Entity
+@Table(name = "article_title_his_cache")
+@IdClass(ArticleTitleHisCache.PK.class)
+public class ArticleTitleHisCache implements Serializable {
+
+    @Id
+    @Column(name = "title_md5")
+    private String titleMd5;
+    @Id
+    @Column(name = "type")
+    private String type;
+    @Column(name = "title")
+    private String title;
+    @Column(name = "his_publish_article_list")
+    private String hisPublishArticleList;
+    @Column(name = "t0_fission_by_fans_mean")
+    private Double t0FissionByFansMean;
+    @Column(name = "t0_fission_by_read_avg_mean")
+    private Double t0FissionByReadAvgMean;
+    @Column(name = "t0_fission_by_read_avg_correlation_mean")
+    private Double t0FissionByReadAvgCorrelationMean;
+    @Column(name = "t0_fission_by_fans_sum_avg")
+    private Double t0FissionByFansSumAvg;
+    @Column(name = "t0_fission_by_read_avg_sum_avg")
+    private Double t0FissionByReadAvgSumAvg;
+    @Column(name = "t0_fission_de_weight_by_read_avg_sum_avg")
+    private Double t0FissionDeWeightByReadAvgSumAvg;
+    @Column(name = "create_timestamp")
+    private Long createTimestamp;
+    @Column(name = "update_timestamp")
+    private Long updateTimestamp;
+
+
+    @Data
+    public static class PK implements Serializable {
+
+        @Column(name = "title_md5")
+        private String titleMd5;
+        @Column(name = "type")
+        private String type;
+
+        public PK() {
+        }
+
+    }
+}

+ 14 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/longArticle/ArticleTitleHisCacheRepository.java

@@ -0,0 +1,14 @@
+package com.tzld.longarticle.recommend.server.repository.longArticle;
+
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticleTitleHisCache;
+import org.springframework.data.jpa.repository.JpaRepository;
+import org.springframework.stereotype.Repository;
+
+import java.util.List;
+import java.util.Set;
+
+@Repository
+public interface ArticleTitleHisCacheRepository extends JpaRepository<ArticleTitleHisCache, ArticleTitleHisCache.PK> {
+
+    List<ArticleTitleHisCache> getByTitleMd5InAndType(Set<String> titleMd5List, String type);
+}

+ 36 - 9
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/XxlJobService.java

@@ -15,27 +15,23 @@ import com.tzld.longarticle.recommend.server.mapper.crawler.CrawlerBaseMapper;
 import com.tzld.longarticle.recommend.server.mapper.growth.NewPushMessageCallbackMapper;
 import com.tzld.longarticle.recommend.server.mapper.longArticle.LongArticleBaseMapper;
 import com.tzld.longarticle.recommend.server.model.dto.AccountTypeFansDTO;
+import com.tzld.longarticle.recommend.server.model.dto.Content;
 import com.tzld.longarticle.recommend.server.model.dto.NotPublishPlan;
 import com.tzld.longarticle.recommend.server.model.dto.PublishPlanAccountNotifyDTO;
 import com.tzld.longarticle.recommend.server.model.entity.aigc.PublishAccount;
 import com.tzld.longarticle.recommend.server.model.entity.crawler.AccountAvgInfo;
 import com.tzld.longarticle.recommend.server.model.entity.crawler.GetOffVideoCrawler;
 import com.tzld.longarticle.recommend.server.model.entity.crawler.LongArticlesVideo;
-import com.tzld.longarticle.recommend.server.model.entity.longArticle.GetOffVideoArticle;
-import com.tzld.longarticle.recommend.server.model.entity.longArticle.LongArticlesMatchVideo;
-import com.tzld.longarticle.recommend.server.model.entity.longArticle.LongArticlesReadRate;
-import com.tzld.longarticle.recommend.server.model.entity.longArticle.LongArticlesRootSourceId;
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.*;
 import com.tzld.longarticle.recommend.server.model.param.ArticleFindSourceParam;
 import com.tzld.longarticle.recommend.server.remote.ODPSManager;
 import com.tzld.longarticle.recommend.server.repository.crawler.GetOffVideoCrawlerRepository;
 import com.tzld.longarticle.recommend.server.repository.crawler.LongArticlesVideoRepository;
-import com.tzld.longarticle.recommend.server.repository.longArticle.GetOffVideoArticleRepository;
-import com.tzld.longarticle.recommend.server.repository.longArticle.LongArticlesMatchVideoRepository;
-import com.tzld.longarticle.recommend.server.repository.longArticle.LongArticlesReadRateRepository;
-import com.tzld.longarticle.recommend.server.repository.longArticle.LongArticlesRootSourceIdRepository;
+import com.tzld.longarticle.recommend.server.repository.longArticle.*;
 import com.tzld.longarticle.recommend.server.repository.model.PushMessageCallbackExample;
 import com.tzld.longarticle.recommend.server.service.recommend.ArticlePromotionService;
 import com.tzld.longarticle.recommend.server.service.recommend.ArticleService;
+import com.tzld.longarticle.recommend.server.service.recommend.recall.RecallService;
 import com.tzld.longarticle.recommend.server.util.DateUtils;
 import com.tzld.longarticle.recommend.server.util.LarkRobotUtil;
 import com.tzld.longarticle.recommend.server.util.feishu.FeishuMessageSender;
@@ -53,6 +49,7 @@ import org.springframework.util.StringUtils;
 import java.time.LocalTime;
 import java.util.*;
 import java.util.concurrent.*;
+import java.util.function.Function;
 import java.util.stream.Collectors;
 
 import static com.tzld.longarticle.recommend.server.common.constant.TimeConstant.MILLISECOND_DAY;
@@ -88,6 +85,10 @@ public class XxlJobService {
     private ArticleService articleService;
     @Autowired
     private ArticlePromotionService articlePromotionService;
+    @Autowired
+    private ArticleTitleHisCacheRepository articleTitleHisCacheRepository;
+    @Autowired
+    private RecallService recallService;
 
     ExecutorService thread = new CommonThreadPoolExecutor(
             5,
@@ -522,7 +523,33 @@ public class XxlJobService {
 
     @XxlJob("refreshArticleHisCache")
     public ReturnT<String> refreshArticleHisCache(String param) {
-        // todo 刷新历史表现缓存
+        // 刷新历史表现缓存
+        List<ArticleTitleHisCache> cacheList = articleTitleHisCacheRepository.findAll();
+        Map<String, Map<String, ArticleTitleHisCache>> cacheMap = cacheList.stream().collect(
+                Collectors.groupingBy(ArticleTitleHisCache::getType,
+                        Collectors.toMap(ArticleTitleHisCache::getTitleMd5, Function.identity())));
+        for (Map.Entry<String, Map<String, ArticleTitleHisCache>> typeEntry : cacheMap.entrySet()) {
+            String type = typeEntry.getKey();
+            Map<String, ArticleTitleHisCache> titleMap = typeEntry.getValue();
+            Set<String> titleMd5List = titleMap.keySet();
+            Map<String, Content> hisCacheMap = recallService.getArticleTitleHisCacheMap(titleMd5List, type);
+            for (String titleMd5 : titleMd5List) {
+                Content content = hisCacheMap.get(titleMd5);
+                ArticleTitleHisCache cache = titleMap.get(titleMd5);
+                if (content != null) {
+                    cache.setHisPublishArticleList(JSONObject.toJSONString(content.getHisPublishArticleList()));
+                    cache.setT0FissionByFansMean(content.getT0FissionByFansMean());
+                    cache.setT0FissionByReadAvgMean(content.getT0FissionByReadAvgMean());
+                    cache.setT0FissionByReadAvgCorrelationMean(content.getT0FissionByReadAvgCorrelationMean());
+                    cache.setT0FissionByFansSumAvg(content.getT0FissionByFansSumAvg());
+                    cache.setT0FissionByReadAvgSumAvg(content.getT0FissionByReadAvgSumAvg());
+                    cache.setT0FissionDeWeightByReadAvgSumAvg(content.getT0FissionDeWeightByReadAvgSumAvg());
+                    cache.setUpdateTimestamp(System.currentTimeMillis());
+                    articleTitleHisCacheRepository.save(cache);
+                }
+            }
+        }
+
         return ReturnT.SUCCESS;
     }
 

+ 72 - 16
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/recall/RecallService.java

@@ -1,5 +1,6 @@
 package com.tzld.longarticle.recommend.server.service.recommend.recall;
 
+import com.alibaba.fastjson.JSONArray;
 import com.alibaba.fastjson.JSONObject;
 import com.google.common.collect.Lists;
 import com.tzld.longarticle.recommend.server.common.CostMonitor;
@@ -21,6 +22,7 @@ import com.tzld.longarticle.recommend.server.model.entity.crawler.ArticleDetailI
 import com.tzld.longarticle.recommend.server.model.entity.longArticle.AccountCategory;
 import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticleCategory;
 import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticlePoolPromotionSource;
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticleTitleHisCache;
 import com.tzld.longarticle.recommend.server.remote.aigc.AIGCWaitingPublishContentService;
 import com.tzld.longarticle.recommend.server.repository.aigc.CrawlerMetaArticleRepository;
 import com.tzld.longarticle.recommend.server.repository.aigc.PublishContentRepository;
@@ -31,6 +33,7 @@ import com.tzld.longarticle.recommend.server.repository.crawler.ArticleRepositor
 import com.tzld.longarticle.recommend.server.repository.longArticle.AccountCategoryRepository;
 import com.tzld.longarticle.recommend.server.repository.longArticle.ArticleCategoryRepository;
 import com.tzld.longarticle.recommend.server.repository.longArticle.ArticlePoolPromotionSourceRepository;
+import com.tzld.longarticle.recommend.server.repository.longArticle.ArticleTitleHisCacheRepository;
 import com.tzld.longarticle.recommend.server.service.recommend.config.AccountIndexAvgViewCountService;
 import com.tzld.longarticle.recommend.server.service.recommend.recall.strategy.DefaultRecallStrategy;
 import com.tzld.longarticle.recommend.server.service.recommend.score.ScoreStrategy;
@@ -90,6 +93,8 @@ public class RecallService implements ApplicationContextAware {
     PublishContentRepository publishContentRepository;
     @Autowired
     AccountCategoryRepository accountCategoryRepository;
+    @Autowired
+    ArticleTitleHisCacheRepository articleTitleHisCacheRepository;
 
     private final Map<String, RecallStrategy> strategyMap = new HashMap<>();
     private ApplicationContext applicationContext;
@@ -188,6 +193,7 @@ public class RecallService implements ApplicationContextAware {
     }
 
     public void setContentCategory(List<Content> contentList) {
+        contentList.forEach(content -> content.setTitleMd5(Md5Util.encoderByMd5(content.getTitle())));
         List<String> channelContentIds = contentList.stream().map(Content::getCrawlerChannelContentId)
                 .collect(Collectors.toList());
         // 查询晋升rootProduceContentId
@@ -223,8 +229,7 @@ public class RecallService implements ApplicationContextAware {
                 category = coldStartCategoryMap.get(content.getCrawlerChannelContentId());
             }
             if (Objects.isNull(category)) {
-                String titleMd5 = Md5Util.encoderByMd5(content.getTitle());
-                category = titleCategoryMap.get(titleMd5);
+                category = titleCategoryMap.get(content.getTitleMd5());
             }
             if (Objects.nonNull(category)) {
                 content.setCategory(Collections.singletonList(category.getCategory()));
@@ -268,17 +273,68 @@ public class RecallService implements ApplicationContextAware {
 
     public void setTitleAvgViewCount(List<Content> contentList, String ghId, String type) {
         long start = System.currentTimeMillis();
-
-        Set<String> titleMd5List = contentList.stream().map(o -> Md5Util.encoderByMd5(o.getTitle())).collect(Collectors.toSet());
-        // todo 根据titleMd5查询数据库获取数据
-        // todo titleMd5 进行过滤 排除缓存中数据 重新走下方查询
+        Set<String> titleMd5List = contentList.stream().map(Content::getTitleMd5).collect(Collectors.toSet());
+        // 根据titleMd5查询数据库获取数据
+        List<ArticleTitleHisCache> articleTitleHisCacheList = articleTitleHisCacheRepository
+                .getByTitleMd5InAndType(titleMd5List, type);
+        Map<String, ArticleTitleHisCache> articleTitleHisCacheMap = articleTitleHisCacheList.stream()
+                .collect(Collectors.toMap(ArticleTitleHisCache::getTitleMd5, Function.identity()));
+        // titleMd5 进行过滤 排除缓存中数据 重新走下方查询
+        titleMd5List.removeIf(articleTitleHisCacheMap::containsKey);
         // 获取账号相关性
         List<AccountCorrelation> accountCorrelationList = accountCorrelationRepository.findByGhIdAndStatus(ghId, 1);
         Map<String, Double> accountCorrelationMap = accountCorrelationList.stream().collect(
                 Collectors.toMap(AccountCorrelation::getRelGhId, AccountCorrelation::getCorrelation));
-//        Set<String> titleList = contentList.stream().map(Content::getTitle).collect(Collectors.toSet());
-//        Set<String> crawlerTitleList = contentList.stream().map(Content::getCrawlerTitle).collect(Collectors.toSet());
-//        titleList.addAll(crawlerTitleList);
+        Map<String, Content> hisArticleCacheMap = getArticleTitleHisCacheMap(titleMd5List, type);
+        for (Content content : contentList) {
+            if (articleTitleHisCacheMap.containsKey(content.getTitleMd5())) {
+                ArticleTitleHisCache cache = articleTitleHisCacheMap.get(content.getTitleMd5());
+                List<ContentHisPublishArticle> hisPublishArticleList =
+                        JSONArray.parseArray(cache.getHisPublishArticleList(), ContentHisPublishArticle.class);
+                for (ContentHisPublishArticle article : hisPublishArticleList) {
+                    article.setCorrelation(Optional.ofNullable(accountCorrelationMap.get(article.getGhId())).orElse(0.0));
+                }
+                content.setHisPublishArticleList(hisPublishArticleList);
+                content.setT0FissionByFansMean(cache.getT0FissionByFansMean());
+                content.setT0FissionByReadAvgMean(cache.getT0FissionByReadAvgMean());
+                content.setT0FissionByReadAvgCorrelationMean(cache.getT0FissionByReadAvgCorrelationMean());
+                content.setT0FissionDeWeightByReadAvgSumAvg(cache.getT0FissionDeWeightByReadAvgSumAvg());
+                content.setT0FissionByFansSumAvg(cache.getT0FissionByFansSumAvg());
+                content.setT0FissionByReadAvgSumAvg(cache.getT0FissionByReadAvgSumAvg());
+                continue;
+            }
+            if (hisArticleCacheMap.containsKey(content.getTitleMd5())) {
+                Content cache = hisArticleCacheMap.get(content.getTitleMd5());
+                content.setHisPublishArticleList(cache.getHisPublishArticleList());
+                for (ContentHisPublishArticle article : content.getHisPublishArticleList()) {
+                    article.setCorrelation(Optional.ofNullable(accountCorrelationMap.get(article.getGhId())).orElse(0.0));
+                }
+                content.setT0FissionByFansMean(cache.getT0FissionByFansMean());
+                content.setT0FissionByReadAvgMean(cache.getT0FissionByReadAvgMean());
+                content.setT0FissionByReadAvgCorrelationMean(cache.getT0FissionByReadAvgCorrelationMean());
+                content.setT0FissionDeWeightByReadAvgSumAvg(cache.getT0FissionDeWeightByReadAvgSumAvg());
+                content.setT0FissionByFansSumAvg(cache.getT0FissionByFansSumAvg());
+                content.setT0FissionByReadAvgSumAvg(cache.getT0FissionByReadAvgSumAvg());
+            }
+            // 写入缓存
+            saveArticleTitleHisCache(content);
+        }
+        log.info("setTitleAvgViewCount cost:{}", System.currentTimeMillis() - start);
+    }
+
+    private void saveArticleTitleHisCache(Content content) {
+        if (CollectionUtils.isEmpty(content.getHisPublishArticleList())) {
+            return;
+        }
+        ArticleTitleHisCache cache = new ArticleTitleHisCache();
+        BeanUtils.copyProperties(content, cache);
+        cache.setHisPublishArticleList(JSONObject.toJSONString(content.getHisPublishArticleList()));
+        cache.setCreateTimestamp(System.currentTimeMillis());
+        articleTitleHisCacheRepository.save(cache);
+    }
+
+    public Map<String, Content> getArticleTitleHisCacheMap(Set<String> titleMd5List, String type) {
+        Map<String, Content> result = new HashMap<>();
         // 获取历史已发布文章
         List<Article> hisArticleList = new ArrayList<>();
         List<List<String>> titleMd5Partition = Lists.partition(new ArrayList<>(titleMd5List), 1000);
@@ -315,7 +371,8 @@ public class RecallService implements ApplicationContextAware {
         List<AccountCategory> accountCategoryList = accountCategoryRepository.getByStatus(StatusEnum.ONE.getCode());
         Map<String, JSONObject> accountCategoryMap = accountCategoryList.stream().filter(o -> StringUtils.hasText(o.getCategoryMap()))
                 .collect(Collectors.toMap(AccountCategory::getGhId, o -> JSONObject.parseObject(o.getCategoryMap())));
-        for (Content content : contentList) {
+        for (String titleMd5 : titleMd5List) {
+            Content res = new Content();
             List<Article> hisArticles = new ArrayList<>();
             Map<Integer, List<Article>> indexArticleMap = map.get(content.getTitle());
             if (Objects.isNull(indexArticleMap)) {
@@ -340,7 +397,7 @@ public class RecallService implements ApplicationContextAware {
                     hisArticles.addAll(indexArticleList);
                 }
             }
-            content.setHisPublishArticleList(new ArrayList<>());
+            res.setHisPublishArticleList(new ArrayList<>());
             for (Article hisArticle : hisArticles) {
                 if (ScoreStrategy.hisContentLateFilter(hisArticle.getPublishTimestamp())) {
                     continue;
@@ -407,14 +464,13 @@ public class RecallService implements ApplicationContextAware {
                         article.setFirstViewCountRate((firstArticle.getShowViewCount() * 1.0) / firstIndexAvgInfo.getReadAvg());
                     }
                 }
-                article.setCorrelation(Optional.ofNullable(accountCorrelationMap.get(article.getGhId())).orElse(0.0));
-                content.getHisPublishArticleList().add(article);
+                res.getHisPublishArticleList().add(article);
             }
             // 设置头条阅读均值
-            setT0Data(content);
-            // todo 写入缓存
+            setT0Data(res);
+            result.put(titleMd5, res);
         }
-        log.info("setTitleAvgViewCount cost:{}", System.currentTimeMillis() - start);
+        return result;
     }
 
     private void setT0Data(Content content) {