فهرست منبع

Merge branch 'wyp/1206-titleHisCache' of Server/long-article-recommend into master

wangyunpeng 7 ماه پیش
والد
کامیت
f03b50236d
11فایلهای تغییر یافته به همراه431 افزوده شده و 106 حذف شده
  1. 2 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/mapper/longArticle/LongArticleBaseMapper.java
  2. 1 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/dto/Content.java
  3. 1 1
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/entity/longArticle/ArticleCategory.java
  4. 56 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/entity/longArticle/ArticleTitleHisCache.java
  5. 16 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/param/TitleHisCacheParam.java
  6. 13 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/longArticle/ArticleTitleHisCacheRepository.java
  7. 64 4
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/XxlJobService.java
  8. 74 5
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/ArticleService.java
  9. 185 94
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/recall/RecallService.java
  10. 4 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/web/XxlJobController.java
  11. 15 2
      long-article-recommend-service/src/main/resources/mapper/longArticle/LongArticleBaseMapper.xml

+ 2 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/mapper/longArticle/LongArticleBaseMapper.java

@@ -21,6 +21,8 @@ public interface LongArticleBaseMapper {
 
     void batchInsertArticlePoolPromotionSource(List<ArticlePoolPromotionSource> list);
 
+    void batchInsertArticleTitleHisCache(List<ArticleTitleHisCache> list);
+
     void updateRootProduceContentLevel(String rootProduceContentId, String level);
 
     void deleteDatastatScoreByDtIn(List<String> dateStrList);

+ 1 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/dto/Content.java

@@ -21,6 +21,7 @@ public class Content {
     private Integer sourceType;
     private String sourceId;
     private String title;
+    private String titleMd5;
     private Long createTimestamp;
     private String producePlanName;
     private String contentPoolType; // 内容池类别

+ 1 - 1
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/entity/longArticle/ArticleCategory.java

@@ -39,7 +39,7 @@ public class ArticleCategory {
     private String kimiResult;
 
     @Column(name = "status")
-    private Integer status;
+    private Integer status = 0;
 
     @Column(name = "fail_reason")
     private String failReason;

+ 56 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/entity/longArticle/ArticleTitleHisCache.java

@@ -0,0 +1,56 @@
+package com.tzld.longarticle.recommend.server.model.entity.longArticle;
+
+import lombok.AllArgsConstructor;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+import javax.persistence.*;
+import java.io.Serializable;
+
+@Data
+@AllArgsConstructor
+@NoArgsConstructor
+@Entity
+@Table(name = "article_title_his_cache")
+@IdClass(ArticleTitleHisCache.PK.class)
+public class ArticleTitleHisCache implements Serializable {
+
+    @Id
+    @Column(name = "source_id")
+    private String sourceId;
+    @Id
+    @Column(name = "type")
+    private String type;
+    @Column(name = "title")
+    private String title;
+    @Column(name = "title_md5")
+    private String titleMd5;
+    @Column(name = "crawler_title")
+    private String crawlerTitle;
+    @Column(name = "channel_content_id")
+    private String channelContentId;
+    @Column(name = "root_publish_timestamp")
+    private Long rootPublishTimestamp;
+    @Column(name = "category")
+    private String category;
+    @Column(name = "his_publish_article_list")
+    private String hisPublishArticleList;
+    @Column(name = "create_timestamp")
+    private Long createTimestamp;
+    @Column(name = "update_timestamp")
+    private Long updateTimestamp;
+
+
+    @Data
+    public static class PK implements Serializable {
+
+        @Column(name = "source_id")
+        private String sourceId;
+        @Column(name = "type")
+        private String type;
+
+        public PK() {
+        }
+
+    }
+}

+ 16 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/param/TitleHisCacheParam.java

@@ -0,0 +1,16 @@
+package com.tzld.longarticle.recommend.server.model.param;
+
+import lombok.Data;
+
+import java.util.List;
+
+@Data
+public class TitleHisCacheParam {
+
+    private String sourceId;
+    private String title;
+    private String crawlerTitle;
+    private String crawlerChannelContentId;
+    private String titleMd5;
+    private List<String> category;
+}

+ 13 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/longArticle/ArticleTitleHisCacheRepository.java

@@ -0,0 +1,13 @@
+package com.tzld.longarticle.recommend.server.repository.longArticle;
+
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticleTitleHisCache;
+import org.springframework.data.jpa.repository.JpaRepository;
+import org.springframework.stereotype.Repository;
+
+import java.util.List;
+
+@Repository
+public interface ArticleTitleHisCacheRepository extends JpaRepository<ArticleTitleHisCache, ArticleTitleHisCache.PK> {
+
+    List<ArticleTitleHisCache> getBySourceIdInAndType(List<String> sourceIdList, String type);
+}

+ 64 - 4
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/XxlJobService.java

@@ -15,6 +15,7 @@ import com.tzld.longarticle.recommend.server.mapper.crawler.CrawlerBaseMapper;
 import com.tzld.longarticle.recommend.server.mapper.growth.NewPushMessageCallbackMapper;
 import com.tzld.longarticle.recommend.server.mapper.longArticle.LongArticleBaseMapper;
 import com.tzld.longarticle.recommend.server.model.dto.AccountTypeFansDTO;
+import com.tzld.longarticle.recommend.server.model.dto.Content;
 import com.tzld.longarticle.recommend.server.model.dto.NotPublishPlan;
 import com.tzld.longarticle.recommend.server.model.dto.PublishPlanAccountNotifyDTO;
 import com.tzld.longarticle.recommend.server.model.entity.aigc.CrawlerPlan;
@@ -22,11 +23,9 @@ import com.tzld.longarticle.recommend.server.model.entity.aigc.PublishAccount;
 import com.tzld.longarticle.recommend.server.model.entity.crawler.AccountAvgInfo;
 import com.tzld.longarticle.recommend.server.model.entity.crawler.GetOffVideoCrawler;
 import com.tzld.longarticle.recommend.server.model.entity.crawler.LongArticlesVideo;
-import com.tzld.longarticle.recommend.server.model.entity.longArticle.GetOffVideoArticle;
-import com.tzld.longarticle.recommend.server.model.entity.longArticle.LongArticlesMatchVideo;
-import com.tzld.longarticle.recommend.server.model.entity.longArticle.LongArticlesReadRate;
-import com.tzld.longarticle.recommend.server.model.entity.longArticle.LongArticlesRootSourceId;
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.*;
 import com.tzld.longarticle.recommend.server.model.param.ArticleFindSourceParam;
+import com.tzld.longarticle.recommend.server.model.param.TitleHisCacheParam;
 import com.tzld.longarticle.recommend.server.remote.ODPSManager;
 import com.tzld.longarticle.recommend.server.repository.crawler.GetOffVideoCrawlerRepository;
 import com.tzld.longarticle.recommend.server.repository.crawler.LongArticlesVideoRepository;
@@ -34,6 +33,7 @@ import com.tzld.longarticle.recommend.server.repository.longArticle.*;
 import com.tzld.longarticle.recommend.server.repository.model.PushMessageCallbackExample;
 import com.tzld.longarticle.recommend.server.service.recommend.ArticlePromotionService;
 import com.tzld.longarticle.recommend.server.service.recommend.ArticleService;
+import com.tzld.longarticle.recommend.server.service.recommend.recall.RecallService;
 import com.tzld.longarticle.recommend.server.util.DateUtils;
 import com.tzld.longarticle.recommend.server.util.LarkRobotUtil;
 import com.tzld.longarticle.recommend.server.util.feishu.FeishuMessageSender;
@@ -43,14 +43,18 @@ import lombok.Getter;
 import lombok.Setter;
 import lombok.experimental.Accessors;
 import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.collections4.CollectionUtils;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.dao.DuplicateKeyException;
+import org.springframework.data.domain.Page;
+import org.springframework.data.domain.PageRequest;
 import org.springframework.stereotype.Service;
 import org.springframework.util.StringUtils;
 
 import java.time.LocalTime;
 import java.util.*;
 import java.util.concurrent.*;
+import java.util.function.Function;
 import java.util.stream.Collectors;
 
 import static com.tzld.longarticle.recommend.server.common.constant.TimeConstant.MILLISECOND_DAY;
@@ -86,6 +90,10 @@ public class XxlJobService {
     private ArticleService articleService;
     @Autowired
     private ArticlePromotionService articlePromotionService;
+    @Autowired
+    private ArticleTitleHisCacheRepository articleTitleHisCacheRepository;
+    @Autowired
+    private RecallService recallService;
 
     ExecutorService thread = new CommonThreadPoolExecutor(
             5,
@@ -556,4 +564,56 @@ public class XxlJobService {
         return ReturnT.SUCCESS;
     }
 
+    @XxlJob("refreshArticleHisCache")
+    public ReturnT<String> refreshArticleHisCache(String param) {
+        // 刷新历史表现缓存
+        long count = articleTitleHisCacheRepository.count();
+        int pageSize = 100;
+        long page = (count / pageSize) + 1;
+        for (int i = 0; i < page; i++) {
+            Page<ArticleTitleHisCache> articleTitleHisCachePage = articleTitleHisCacheRepository.findAll(
+                    PageRequest.of(i, pageSize));
+            List<ArticleTitleHisCache> cacheList = articleTitleHisCachePage.getContent();
+            if (CollectionUtils.isEmpty(cacheList)) {
+                continue;
+            }
+            Map<String, Map<String, ArticleTitleHisCache>> cacheMap = cacheList.stream().collect(
+                    Collectors.groupingBy(ArticleTitleHisCache::getType,
+                            Collectors.toMap(ArticleTitleHisCache::getSourceId, Function.identity())));
+            for (Map.Entry<String, Map<String, ArticleTitleHisCache>> typeEntry : cacheMap.entrySet()) {
+                String type = typeEntry.getKey();
+                Map<String, ArticleTitleHisCache> sourceIdToCacheMap = typeEntry.getValue();
+                Set<String> sourceIdList = sourceIdToCacheMap.keySet();
+                List<TitleHisCacheParam> paramList = sourceIdList.stream().map(sourceId -> {
+                    ArticleTitleHisCache cache = cacheMap.get(type).get(sourceId);
+                    TitleHisCacheParam cacheParam = new TitleHisCacheParam();
+                    cacheParam.setSourceId(sourceId);
+                    cacheParam.setTitleMd5(cache.getTitleMd5());
+                    cacheParam.setTitle(cache.getTitle());
+                    cacheParam.setCrawlerTitle(cache.getCrawlerTitle());
+                    cacheParam.setCrawlerChannelContentId(cache.getChannelContentId());
+                    if (StringUtils.hasText(cache.getCategory())) {
+                        cacheParam.setCategory(JSONArray.parseArray(cache.getCategory(), String.class));
+                    }
+                    return cacheParam;
+                }).collect(Collectors.toList());
+                Map<String, Content> articlesWithHistory = recallService.getArticleWithHistory(paramList, type);
+                for (String sourceId : sourceIdList) {
+                    Content content = articlesWithHistory.get(sourceId);
+                    if (Objects.nonNull(content) && CollectionUtils.isNotEmpty(content.getHisPublishArticleList())) {
+                        ArticleTitleHisCache cache = sourceIdToCacheMap.get(sourceId);
+                        cache.setHisPublishArticleList(JSONObject.toJSONString(content.getHisPublishArticleList()));
+                        if (CollectionUtil.isNotEmpty(content.getCategory())) {
+                            cache.setCategory(JSONObject.toJSONString(content.getCategory()));
+                        }
+                        cache.setUpdateTimestamp(System.currentTimeMillis());
+                        articleTitleHisCacheRepository.save(cache);
+                    }
+                }
+            }
+        }
+
+        return ReturnT.SUCCESS;
+    }
+
 }

+ 74 - 5
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/ArticleService.java

@@ -92,6 +92,9 @@ public class ArticleService {
     @Value("${kimiCategoryPrompt:}")
     private String kimiCategoryPrompt;
 
+    @ApolloJsonValue("${articlePromotionProduceConfig:{}}")
+    private Map<String, Map<String, Map<String, String>>> produceConfig;
+
     private final static ExecutorService pool = new CommonThreadPoolExecutor(
             32,
             128,
@@ -380,8 +383,13 @@ public class ArticleService {
 
 
     public void articleCategory() {
+        // 根据抓取计划 添加品类处理任务
         addArticleCategoryByCrawlerPlan();
-        addArticleCategoryByProducePlan();
+        // 冷启层 生成计划 添加品类处理任务
+        addColdArticleCategoryByProducePlan();
+        // 晋级 生成计划 添加品类处理任务
+        addPromotionArticleCategoryByProducePlan();
+        // 调用kimi进行内容分类
         dealArticleCategory();
     }
 
@@ -449,7 +457,70 @@ public class ArticleService {
         }
     }
 
-    private void addArticleCategoryByProducePlan() {
+    private void addColdArticleCategoryByProducePlan() {
+        List<ArticleCategory> saveList = addArticleCategoryByProducePlan(producePlanIds);
+        if (CollectionUtils.isNotEmpty(saveList)) {
+            longArticleBaseMapper.batchInsertArticleCategory(saveList);
+        }
+    }
+
+    private void addPromotionArticleCategoryByProducePlan() {
+        List<String> articlePromotionProducePlanIds = new ArrayList<>();
+        // 获取晋级生成计划Id
+        for (Map.Entry<String, Map<String, Map<String, String>>> oneEntry : produceConfig.entrySet()) {
+            for (Map.Entry<String, Map<String, String>> twoEntry : oneEntry.getValue().entrySet()) {
+                twoEntry.getValue().forEach((key, value) -> {
+                    if (StringUtils.hasText(value) && !producePlanIds.contains(value)) {
+                        articlePromotionProducePlanIds.add(value);
+                    }
+                });
+            }
+        }
+        List<ArticleCategory> saveList = addArticleCategoryByProducePlan(articlePromotionProducePlanIds);
+        // 已晋级文章 先溯源查找源内容品类,查询不到再用kimi进行分类
+        if (CollectionUtils.isNotEmpty(saveList)) {
+            List<String> channelContentIds = saveList.stream().map(ArticleCategory::getChannelContentId)
+                    .collect(Collectors.toList());
+            // 查询晋升rootProduceContentId
+            List<ArticlePoolPromotionSource> sourceList = articlePoolPromotionSourceRepository
+                    .getByChannelContentIdInAndStatusAndDeleted(channelContentIds,
+                            ArticlePoolPromotionSourceStatusEnum.FINISH.getCode(), 0);
+            Map<String, ArticlePoolPromotionSource> sourceMap = sourceList.stream()
+                    .collect(Collectors.toMap(ArticlePoolPromotionSource::getChannelContentId, Function.identity()));
+            // 根据produceContentId查询category
+            List<ArticleCategory> articleCategoryList = articleCategoryRepository.getByStatus(ArticleCategoryStatusEnum.SUCCESS.getCode());
+            Map<String, ArticleCategory> categoryMap = articleCategoryList.stream()
+                    .collect(Collectors.toMap(ArticleCategory::getProduceContentId, Function.identity()));
+            Map<String, ArticleCategory> coldStartCategoryMap = articleCategoryList.stream()
+                    .collect(Collectors.toMap(ArticleCategory::getChannelContentId, Function.identity(), (a, b) -> a));
+            Map<String, ArticleCategory> titleCategoryMap = articleCategoryList.stream()
+                    .collect(Collectors.toMap(ArticleCategory::getTitleMd5, Function.identity(), (a, b) -> a));
+            for (ArticleCategory articleCategory : saveList) {
+                ArticlePoolPromotionSource source = sourceMap.get(articleCategory.getChannelContentId());
+                ArticleCategory category = null;
+                if (Objects.nonNull(source) && Objects.nonNull(source.getRootProduceContentId())) {
+                    category = categoryMap.get(source.getRootProduceContentId());
+                }
+                if (Objects.isNull(category)) {
+                    category = coldStartCategoryMap.get(articleCategory.getChannelContentId());
+                }
+                if (Objects.isNull(category)) {
+                    category = titleCategoryMap.get(articleCategory.getTitleMd5());
+                }
+                if (Objects.nonNull(category) && StringUtils.hasText(category.getCategory())) {
+                    articleCategory.setCategory(category.getCategory());
+                    articleCategory.setKimiResult(category.getKimiResult());
+                    articleCategory.setStatus(ArticleCategoryStatusEnum.SUCCESS.getCode());
+                }
+            }
+            longArticleBaseMapper.batchInsertArticleCategory(saveList);
+        }
+    }
+
+    /**
+     * 根据生成计划获取需要进行分类内容
+     */
+    private List<ArticleCategory> addArticleCategoryByProducePlan(List<String> producePlanIds) {
         List<ProducePlanExeRecord> produceContentList = aigcBaseMapper.getAllByProducePlanId(producePlanIds);
         List<String> channelContentIds = produceContentList.stream().map(ProducePlanExeRecord::getChannelContentId).distinct().collect(Collectors.toList());
         List<ArticleCategory> articleCategoryList = articleCategoryRepository.getAllByChannelContentIdIn(channelContentIds);
@@ -479,9 +550,7 @@ public class ArticleService {
                 saveList.add(item);
             }
         }
-        if (CollectionUtils.isNotEmpty(saveList)) {
-            longArticleBaseMapper.batchInsertArticleCategory(saveList);
-        }
+        return saveList;
     }
 
     private String buildKimiPrompt(List<String> titleList) {

+ 185 - 94
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/recall/RecallService.java

@@ -1,17 +1,15 @@
 package com.tzld.longarticle.recommend.server.service.recommend.recall;
 
+import com.alibaba.fastjson.JSONArray;
 import com.alibaba.fastjson.JSONObject;
 import com.google.common.collect.Lists;
 import com.tzld.longarticle.recommend.server.common.CostMonitor;
 import com.tzld.longarticle.recommend.server.common.ThreadPoolFactory;
 import com.tzld.longarticle.recommend.server.common.enums.StatusEnum;
 import com.tzld.longarticle.recommend.server.common.enums.aigc.PublishPlanInputSourceTypesEnum;
-import com.tzld.longarticle.recommend.server.common.enums.recommend.ArticleCategoryStatusEnum;
-import com.tzld.longarticle.recommend.server.common.enums.recommend.ArticlePoolPromotionSourceStatusEnum;
-import com.tzld.longarticle.recommend.server.common.enums.recommend.ArticleTypeEnum;
-import com.tzld.longarticle.recommend.server.common.enums.recommend.ContentPoolEnum;
-import com.tzld.longarticle.recommend.server.common.enums.recommend.FeishuRobotIdEnum;
+import com.tzld.longarticle.recommend.server.common.enums.recommend.*;
 import com.tzld.longarticle.recommend.server.mapper.crawler.CrawlerBaseMapper;
+import com.tzld.longarticle.recommend.server.mapper.longArticle.LongArticleBaseMapper;
 import com.tzld.longarticle.recommend.server.model.dto.Content;
 import com.tzld.longarticle.recommend.server.model.dto.ContentHisPublishArticle;
 import com.tzld.longarticle.recommend.server.model.entity.aigc.CrawlerMetaArticle;
@@ -20,10 +18,8 @@ import com.tzld.longarticle.recommend.server.model.entity.crawler.AccountAvgInfo
 import com.tzld.longarticle.recommend.server.model.entity.crawler.AccountCorrelation;
 import com.tzld.longarticle.recommend.server.model.entity.crawler.Article;
 import com.tzld.longarticle.recommend.server.model.entity.crawler.ArticleDetailInfo;
-import com.tzld.longarticle.recommend.server.model.entity.longArticle.AccountCategory;
-import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticleCategory;
-import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticlePoolPromotionSource;
-import com.tzld.longarticle.recommend.server.model.entity.longArticle.PublishSingleVideoSource;
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.*;
+import com.tzld.longarticle.recommend.server.model.param.TitleHisCacheParam;
 import com.tzld.longarticle.recommend.server.remote.aigc.AIGCWaitingPublishContentService;
 import com.tzld.longarticle.recommend.server.repository.aigc.CrawlerMetaArticleRepository;
 import com.tzld.longarticle.recommend.server.repository.aigc.PublishContentRepository;
@@ -31,10 +27,7 @@ import com.tzld.longarticle.recommend.server.repository.crawler.AccountAvgInfoRe
 import com.tzld.longarticle.recommend.server.repository.crawler.AccountCorrelationRepository;
 import com.tzld.longarticle.recommend.server.repository.crawler.ArticleDetailInfoRepository;
 import com.tzld.longarticle.recommend.server.repository.crawler.ArticleRepository;
-import com.tzld.longarticle.recommend.server.repository.longArticle.AccountCategoryRepository;
-import com.tzld.longarticle.recommend.server.repository.longArticle.ArticleCategoryRepository;
-import com.tzld.longarticle.recommend.server.repository.longArticle.ArticlePoolPromotionSourceRepository;
-import com.tzld.longarticle.recommend.server.repository.longArticle.PublishSingleVideoSourceRepository;
+import com.tzld.longarticle.recommend.server.repository.longArticle.*;
 import com.tzld.longarticle.recommend.server.service.recommend.config.AccountIndexAvgViewCountService;
 import com.tzld.longarticle.recommend.server.service.recommend.recall.strategy.DefaultRecallStrategy;
 import com.tzld.longarticle.recommend.server.service.recommend.score.ScoreStrategy;
@@ -96,6 +89,10 @@ public class RecallService implements ApplicationContextAware {
     AccountCategoryRepository accountCategoryRepository;
     @Autowired
     PublishSingleVideoSourceRepository publishSingleVideoSourceRepository;
+    @Autowired
+    ArticleTitleHisCacheRepository articleTitleHisCacheRepository;
+    @Autowired
+    LongArticleBaseMapper longArticleBaseMapper;
 
     private final Map<String, RecallStrategy> strategyMap = new HashMap<>();
     private ApplicationContext applicationContext;
@@ -182,10 +179,7 @@ public class RecallService implements ApplicationContextAware {
                             + "账号名称: " + param.getAccountName());
             return content;
         }
-        // category 查询
-        setContentCategory(content);
         long t3 = System.currentTimeMillis();
-        CostMonitor.logCost("Recall", "GetCategory", t3 - t2);
         // 标题历史均值
         setTitleAvgViewCount(content, param.getGhId(), param.getType());
         long t4 = System.currentTimeMillis();
@@ -203,10 +197,10 @@ public class RecallService implements ApplicationContextAware {
             return;
         }
         Map<String, Content> contentMap = contentList.stream()
-               .collect(Collectors.toMap(Content::getSourceId, Function.identity()));
+                .collect(Collectors.toMap(Content::getSourceId, Function.identity()));
         List<PublishSingleVideoSource> sourceList = publishSingleVideoSourceRepository.getByContentTraceIdIn(contentTraceIds);
         Map<String, PublishSingleVideoSource> sourceMap = sourceList.stream()
-              .collect(Collectors.toMap(PublishSingleVideoSource::getContentTraceId, Function.identity()));
+                .collect(Collectors.toMap(PublishSingleVideoSource::getContentTraceId, Function.identity()));
         for (String contentTraceId : contentTraceIds) {
             Content content = contentMap.get(contentTraceId);
             PublishSingleVideoSource source = sourceMap.get(contentTraceId);
@@ -219,53 +213,6 @@ public class RecallService implements ApplicationContextAware {
         }
     }
 
-    public void setContentCategory(List<Content> contentList) {
-        List<String> channelContentIds = contentList.stream().map(Content::getCrawlerChannelContentId)
-                .collect(Collectors.toList());
-        // 查询晋升rootProduceContentId
-        List<ArticlePoolPromotionSource> sourceList = articlePoolPromotionSourceRepository
-                .getByChannelContentIdInAndStatusAndDeleted(channelContentIds,
-                        ArticlePoolPromotionSourceStatusEnum.FINISH.getCode(), 0);
-        Map<String, ArticlePoolPromotionSource> sourceMap = sourceList.stream()
-                .collect(Collectors.toMap(ArticlePoolPromotionSource::getChannelContentId, Function.identity()));
-        List<String> publishContentIds = sourceList.stream().
-                map(ArticlePoolPromotionSource::getRootPublishContentId).collect(Collectors.toList());
-        List<PublishContent> publishContentList = publishContentRepository.getByIdIn(publishContentIds);
-        Map<String, PublishContent> publishContentMap = publishContentList.stream()
-                .collect(Collectors.toMap(PublishContent::getId, Function.identity()));
-        // 根据produceContentId查询category
-        List<ArticleCategory> articleCategoryList = articleCategoryRepository.getByStatus(ArticleCategoryStatusEnum.SUCCESS.getCode());
-        Map<String, ArticleCategory> categoryMap = articleCategoryList.stream()
-                .collect(Collectors.toMap(ArticleCategory::getProduceContentId, Function.identity()));
-        Map<String, ArticleCategory> coldStartCategoryMap = articleCategoryList.stream()
-                .collect(Collectors.toMap(ArticleCategory::getChannelContentId, Function.identity(), (a, b) -> a));
-        Map<String, ArticleCategory> titleCategoryMap = articleCategoryList.stream()
-                .collect(Collectors.toMap(ArticleCategory::getTitleMd5, Function.identity(), (a, b) -> a));
-        for (Content content : contentList) {
-            ArticlePoolPromotionSource source = sourceMap.get(content.getCrawlerChannelContentId());
-            ArticleCategory category = null;
-            if (Objects.nonNull(source) && Objects.nonNull(source.getRootProduceContentId())) {
-                category = categoryMap.get(source.getRootProduceContentId());
-                PublishContent publishContent = publishContentMap.get(source.getRootPublishContentId());
-                if (Objects.nonNull(publishContent)) {
-                    content.setRootPublishTimestamp(publishContent.getPublishTimestamp());
-                }
-            }
-            if (Objects.isNull(category)) {
-                category = coldStartCategoryMap.get(content.getCrawlerChannelContentId());
-            }
-            if (Objects.isNull(category)) {
-                String titleMd5 = Md5Util.encoderByMd5(content.getTitle());
-                category = titleCategoryMap.get(titleMd5);
-            }
-            if (Objects.nonNull(category)) {
-                content.setCategory(Collections.singletonList(category.getCategory()));
-                continue;
-            }
-//            log.error("setContentCategory NullError channelContentId:{}", content.getCrawlerChannelContentId());
-        }
-    }
-
     private List<CrawlerMetaArticle> getByUniqueIndexIn(List<String> md5List) {
         if (CollectionUtils.isEmpty(md5List)) {
             return new ArrayList<>();
@@ -300,15 +247,109 @@ public class RecallService implements ApplicationContextAware {
 
     public void setTitleAvgViewCount(List<Content> contentList, String ghId, String type) {
         long start = System.currentTimeMillis();
+        contentList.forEach(content -> content.setTitleMd5(Md5Util.encoderByMd5(content.getTitle())));
+        List<String> sourceIdList = contentList.stream().map(Content::getSourceId).distinct().collect(Collectors.toList());
+        Map<String, Content> sourceIdToContentMap = contentList.stream().collect(
+                Collectors.toMap(Content::getSourceId, Function.identity(), (o1, o2) -> o2));
+        // 根据sourceId查询数据库获取数据
+        List<ArticleTitleHisCache> articleTitleHisCacheList = new ArrayList<>(sourceIdList.size());
+        for (List<String> partition : Lists.partition(sourceIdList, 1000)) {
+            articleTitleHisCacheList.addAll(articleTitleHisCacheRepository.getBySourceIdInAndType(partition, type));
+        }
+        Map<String, ArticleTitleHisCache> articleTitleHisCacheMap = articleTitleHisCacheList.stream()
+                .collect(Collectors.toMap(ArticleTitleHisCache::getSourceId, Function.identity()));
+        // sourceId 进行过滤 排除缓存中数据 重新走下方查询
+        sourceIdList.removeIf(articleTitleHisCacheMap::containsKey);
         // 获取账号相关性
         List<AccountCorrelation> accountCorrelationList = accountCorrelationRepository.findByGhIdAndStatus(ghId, 1);
         Map<String, Double> accountCorrelationMap = accountCorrelationList.stream().collect(
                 Collectors.toMap(AccountCorrelation::getRelGhId, AccountCorrelation::getCorrelation));
+        List<TitleHisCacheParam> paramList = sourceIdList.stream().map(sourceId -> {
+            Content content = sourceIdToContentMap.get(sourceId);
+            TitleHisCacheParam cacheParam = new TitleHisCacheParam();
+            cacheParam.setSourceId(sourceId);
+            cacheParam.setTitleMd5(content.getTitleMd5());
+            cacheParam.setTitle(content.getTitle());
+            cacheParam.setCrawlerTitle(content.getCrawlerTitle());
+            cacheParam.setCrawlerChannelContentId(content.getCrawlerChannelContentId());
+            cacheParam.setCategory(content.getCategory());
+            return cacheParam;
+        }).collect(Collectors.toList());
+        Map<String, Content> articlesWithHistory = getArticleWithHistory(paramList, type);
+        List<Content> newCacheSaveList = new ArrayList<>();
+        Set<String> newCacheSourceIdSet = new HashSet<>();
+        for (Content content : contentList) {
+            if (articleTitleHisCacheMap.containsKey(content.getSourceId())) {
+                ArticleTitleHisCache cache = articleTitleHisCacheMap.get(content.getSourceId());
+                List<ContentHisPublishArticle> hisPublishArticleList =
+                        JSONArray.parseArray(cache.getHisPublishArticleList(), ContentHisPublishArticle.class);
+                for (ContentHisPublishArticle article : hisPublishArticleList) {
+                    article.setCorrelation(Optional.ofNullable(accountCorrelationMap.get(article.getGhId())).orElse(0.0));
+                }
+                if (StringUtils.hasText(cache.getCategory())) {
+                    content.setCategory(JSONArray.parseArray(cache.getCategory(), String.class));
+                }
+                content.setRootPublishTimestamp(cache.getRootPublishTimestamp());
+                content.setHisPublishArticleList(hisPublishArticleList);
+                setT0Data(content);
+                continue;
+            }
+            if (articlesWithHistory.containsKey(content.getSourceId())) {
+                Content articleWithHistory = articlesWithHistory.get(content.getSourceId());
+                content.setHisPublishArticleList(articleWithHistory.getHisPublishArticleList());
+                if (CollectionUtils.isNotEmpty(articleWithHistory.getCategory())) {
+                    content.setCategory(articleWithHistory.getCategory());
+                }
+                content.setRootPublishTimestamp(articleWithHistory.getRootPublishTimestamp());
+                for (ContentHisPublishArticle article : content.getHisPublishArticleList()) {
+                    article.setCorrelation(Optional.ofNullable(accountCorrelationMap.get(article.getGhId())).orElse(0.0));
+                }
+                setT0Data(content);
+            }
+            if (!newCacheSourceIdSet.contains(content.getSourceId())) {
+                newCacheSaveList.add(content);
+                newCacheSourceIdSet.add(content.getSourceId());
+            }
+        }
+        // 写入缓存
+        saveArticleTitleHisCache(newCacheSaveList, type);
+        log.info("setTitleAvgViewCount cost:{}", System.currentTimeMillis() - start);
+    }
+
+    private void saveArticleTitleHisCache(List<Content> saveList, String type) {
+        if (CollectionUtils.isEmpty(saveList)) {
+            return;
+        }
+        List<ArticleTitleHisCache> cacheList = new ArrayList<>();
+        try {
+            for (Content content : saveList) {
+                if (CollectionUtils.isEmpty(content.getHisPublishArticleList())) {
+                    continue;
+                }
+                ArticleTitleHisCache cache = new ArticleTitleHisCache();
+                BeanUtils.copyProperties(content, cache);
+                cache.setType(type);
+                cache.setChannelContentId(content.getCrawlerChannelContentId());
+                if (CollectionUtils.isNotEmpty(content.getCategory())) {
+                    cache.setCategory(JSONObject.toJSONString(content.getCategory()));
+                }
+                cache.setRootPublishTimestamp(content.getRootPublishTimestamp());
+                cache.setHisPublishArticleList(JSONObject.toJSONString(content.getHisPublishArticleList()));
+                cache.setCreateTimestamp(System.currentTimeMillis());
+                cacheList.add(cache);
+            }
+            if (CollectionUtils.isEmpty(cacheList)) {
+                return;
+            }
+            longArticleBaseMapper.batchInsertArticleTitleHisCache(cacheList);
+        } catch (Exception e) {
+            log.error("saveArticleTitleHisCache error:{}", e.getMessage());
+        }
+    }
 
-        Set<String> titleMd5List = contentList.stream().map(o -> Md5Util.encoderByMd5(o.getTitle())).collect(Collectors.toSet());
-//        Set<String> titleList = contentList.stream().map(Content::getTitle).collect(Collectors.toSet());
-//        Set<String> crawlerTitleList = contentList.stream().map(Content::getCrawlerTitle).collect(Collectors.toSet());
-//        titleList.addAll(crawlerTitleList);
+    public Map<String, Content> getArticleWithHistory(List<TitleHisCacheParam> paramList, String type) {
+        Map<String, Content> result = new HashMap<>();
+        List<String> titleMd5List = paramList.stream().map(TitleHisCacheParam::getTitleMd5).collect(Collectors.toList());
         // 获取历史已发布文章
         List<Article> hisArticleList = new ArrayList<>();
         List<List<String>> titleMd5Partition = Lists.partition(new ArrayList<>(titleMd5List), 1000);
@@ -345,13 +386,58 @@ public class RecallService implements ApplicationContextAware {
         List<AccountCategory> accountCategoryList = accountCategoryRepository.getByStatus(StatusEnum.ONE.getCode());
         Map<String, JSONObject> accountCategoryMap = accountCategoryList.stream().filter(o -> StringUtils.hasText(o.getCategoryMap()))
                 .collect(Collectors.toMap(AccountCategory::getGhId, o -> JSONObject.parseObject(o.getCategoryMap())));
-        for (Content content : contentList) {
+
+        // 获取品类
+        List<String> channelContentIds = paramList.stream().map(TitleHisCacheParam::getCrawlerChannelContentId)
+                .collect(Collectors.toList());
+        // 查询晋升rootProduceContentId
+        List<ArticlePoolPromotionSource> sourceList = articlePoolPromotionSourceRepository
+                .getByChannelContentIdInAndStatusAndDeleted(channelContentIds,
+                        ArticlePoolPromotionSourceStatusEnum.FINISH.getCode(), 0);
+        Map<String, ArticlePoolPromotionSource> sourceMap = sourceList.stream()
+                .collect(Collectors.toMap(ArticlePoolPromotionSource::getChannelContentId, Function.identity()));
+        List<String> publishContentIds = sourceList.stream().
+                map(ArticlePoolPromotionSource::getRootPublishContentId).collect(Collectors.toList());
+        List<PublishContent> publishContentList = publishContentRepository.getByIdIn(publishContentIds);
+        Map<String, PublishContent> publishContentMap = publishContentList.stream()
+                .collect(Collectors.toMap(PublishContent::getId, Function.identity()));
+        // 根据produceContentId查询category
+        List<ArticleCategory> articleCategoryList = articleCategoryRepository.getByStatus(ArticleCategoryStatusEnum.SUCCESS.getCode());
+        Map<String, ArticleCategory> categoryMap = articleCategoryList.stream()
+                .collect(Collectors.toMap(ArticleCategory::getProduceContentId, Function.identity()));
+        Map<String, ArticleCategory> coldStartCategoryMap = articleCategoryList.stream()
+                .collect(Collectors.toMap(ArticleCategory::getChannelContentId, Function.identity(), (a, b) -> a));
+        Map<String, ArticleCategory> titleCategoryMap = articleCategoryList.stream()
+                .collect(Collectors.toMap(ArticleCategory::getTitleMd5, Function.identity(), (a, b) -> a));
+
+        for (TitleHisCacheParam cacheParam : paramList) {
+            Content res = new Content();
+            // 设置品类
+            ArticleCategory category = categoryMap.get(cacheParam.getSourceId());
+            if (Objects.isNull(category)) {
+                category = coldStartCategoryMap.get(cacheParam.getCrawlerChannelContentId());
+            }
+            if (Objects.isNull(category)) {
+                category = titleCategoryMap.get(cacheParam.getTitleMd5());
+            }
+            if (Objects.nonNull(category)) {
+                res.setCategory(Collections.singletonList(category.getCategory()));
+            }
+            // 溯源查找源发布时间
+            ArticlePoolPromotionSource source = sourceMap.get(cacheParam.getCrawlerChannelContentId());
+            if (Objects.nonNull(source) && Objects.nonNull(source.getRootProduceContentId())) {
+                PublishContent publishContent = publishContentMap.get(source.getRootPublishContentId());
+                if (Objects.nonNull(publishContent)) {
+                    res.setRootPublishTimestamp(publishContent.getPublishTimestamp());
+                }
+            }
+            // 设置历史表现
             List<Article> hisArticles = new ArrayList<>();
-            Map<Integer, List<Article>> indexArticleMap = map.get(content.getTitle());
+            Map<Integer, List<Article>> indexArticleMap = map.get(cacheParam.getTitle());
             if (Objects.isNull(indexArticleMap)) {
-                indexArticleMap = map.get(content.getCrawlerTitle());
-            } else if (!content.getTitle().equals(content.getCrawlerTitle())) {
-                Map<Integer, List<Article>> crawlerTitleIndexArticleMap = map.get(content.getCrawlerTitle());
+                indexArticleMap = map.get(cacheParam.getCrawlerTitle());
+            } else if (!cacheParam.getTitle().equals(cacheParam.getCrawlerTitle())) {
+                Map<Integer, List<Article>> crawlerTitleIndexArticleMap = map.get(cacheParam.getCrawlerTitle());
                 if (Objects.nonNull(crawlerTitleIndexArticleMap)) {
                     for (Map.Entry<Integer, List<Article>> entry : crawlerTitleIndexArticleMap.entrySet()) {
                         if (indexArticleMap.containsKey(entry.getKey())) {
@@ -370,17 +456,17 @@ public class RecallService implements ApplicationContextAware {
                     hisArticles.addAll(indexArticleList);
                 }
             }
-            content.setHisPublishArticleList(new ArrayList<>());
+            res.setHisPublishArticleList(new ArrayList<>());
             for (Article hisArticle : hisArticles) {
                 if (ScoreStrategy.hisContentLateFilter(hisArticle.getPublishTimestamp())) {
                     continue;
                 }
                 // 历史表现 文章品类如果与历史发布账号负相关 则过滤,不计算该历史发布表现
                 JSONObject categoryWeightMap = accountCategoryMap.get(hisArticle.getGhId());
-                if (Objects.nonNull(categoryWeightMap) && CollectionUtils.isNotEmpty(content.getCategory())) {
-                    String category = content.getCategory().get(0);
-                    if (categoryWeightMap.containsKey(category)) {
-                        double weight = categoryWeightMap.getDoubleValue(category);
+                if (Objects.nonNull(categoryWeightMap) && CollectionUtils.isNotEmpty(cacheParam.getCategory())) {
+                    String hisCategory = cacheParam.getCategory().get(0);
+                    if (categoryWeightMap.containsKey(hisCategory)) {
+                        double weight = categoryWeightMap.getDoubleValue(hisCategory);
                         if (weight < 0) {
                             continue;
                         }
@@ -403,15 +489,18 @@ public class RecallService implements ApplicationContextAware {
                         article.setInnerAccount(true);
                         avgViewCount = Optional.ofNullable(indexMap.get(hisArticle.getItemIndex().toString()).getReadAvg())
                                 .orElse(0.0).intValue();
-//                    } else {
-//                        if (ArticleTypeEnum.QUNFA.getVal().equals(type)) {
+                    } else {
+                        if (ArticleTypeEnum.QUNFA.getVal().equals(type)) {
+                            log.error("历史表现阅读均值获取失败 ghId:{} accountName:{} date:{} index:{}",
+                                    hisArticle.getGhId(), hisArticle.getAccountName(), hisPublishDate,
+                                    hisArticle.getItemIndex());
 //                            FeishuMessageSender.sendWebHookMessage(FeishuRobotIdEnum.RECOMMEND.getRobotId(),
 //                                    "历史表现阅读均值获取失败\n"
 //                                            + "ghId: " + hisArticle.getGhId() + "\n"
 //                                            + "账号名称: " + hisArticle.getAccountName() + "\n"
 //                                            + "日期: " + hisPublishDate + "\n"
 //                                            + "位置: " + hisArticle.getItemIndex());
-//                        }
+                        }
                     }
                 }
                 article.setAvgViewCount(avgViewCount);
@@ -437,13 +526,12 @@ public class RecallService implements ApplicationContextAware {
                         article.setFirstViewCountRate((firstArticle.getShowViewCount() * 1.0) / firstIndexAvgInfo.getReadAvg());
                     }
                 }
-                article.setCorrelation(Optional.ofNullable(accountCorrelationMap.get(article.getGhId())).orElse(0.0));
-                content.getHisPublishArticleList().add(article);
+                res.getHisPublishArticleList().add(article);
             }
             // 设置头条阅读均值
-            setT0Data(content);
+            result.put(cacheParam.getSourceId(), res);
         }
-        log.info("setTitleAvgViewCount cost:{}", System.currentTimeMillis() - start);
+        return result;
     }
 
     private void setT0Data(Content content) {
@@ -465,14 +553,17 @@ public class RecallService implements ApplicationContextAware {
             if (CollectionUtils.isEmpty(article.getArticleDetailInfoList())) {
                 // 仅判断7.12以后发布文章
                 if (article.getPublishTimestamp() > 1720713600 && contentHisFeishuEnable) {
-                    FeishuMessageSender.sendWebHookMessage(FeishuRobotIdEnum.RECOMMEND.getRobotId(),
-                            "历史表现裂变特征获取失败\n"
-                                    + "ghId: " + article.getGhId() + "\n"
-                                    + "账号名称: " + article.getAccountName() + "\n"
-                                    + "位置: " + article.getItemIndex() + "\n"
-                                    + "标题: " + article.getTitle() + "\n"
-                                    + "发布时间: " + DateUtils.timestampToYMDStr(article.getPublishTimestamp(), "yyyyMMdd") + "\n"
-                                    + "wxsn: " + article.getWxSn());
+                    log.error("历史表现裂变特征获取失败 ghId:{} accountName:{} itemIndex:{} title:{} date:{} wxsn:{}",
+                            article.getGhId(), article.getAccountName(), article.getItemIndex(), article.getTitle(),
+                            DateUtils.timestampToYMDStr(article.getPublishTimestamp(), "yyyyMMdd"), article.getWxSn());
+//                    FeishuMessageSender.sendWebHookMessage(FeishuRobotIdEnum.RECOMMEND.getRobotId(),
+//                            "历史表现裂变特征获取失败\n"
+//                                    + "ghId: " + article.getGhId() + "\n"
+//                                    + "账号名称: " + article.getAccountName() + "\n"
+//                                    + "位置: " + article.getItemIndex() + "\n"
+//                                    + "标题: " + article.getTitle() + "\n"
+//                                    + "发布时间: " + DateUtils.timestampToYMDStr(article.getPublishTimestamp(), "yyyyMMdd") + "\n"
+//                                    + "wxsn: " + article.getWxSn());
                 }
                 continue;
             }

+ 4 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/web/XxlJobController.java

@@ -74,5 +74,9 @@ public class XxlJobController {
         service.articleCategoryJobRetry(null);
     }
 
+    @GetMapping("/refreshArticleHisCache")
+    public void refreshArticleHisCache() {
+        service.refreshArticleHisCache(null);
+    }
 
 }

+ 15 - 2
long-article-recommend-service/src/main/resources/mapper/longArticle/LongArticleBaseMapper.xml

@@ -87,11 +87,12 @@
 
     <insert id="batchInsertArticleCategory">
         INSERT INTO article_category
-        (produce_content_id, channel_content_id, crawler_plan_id, title, title_md5, create_timestamp)
+        (produce_content_id, channel_content_id, crawler_plan_id, title, title_md5, category,
+         kimi_result, status, create_timestamp)
         VALUES
         <foreach collection="list" item="item" separator=",">
             (#{item.produceContentId}, #{item.channelContentId}, #{item.crawlerPlanId}, #{item.title}, #{item.titleMd5},
-             #{item.createTimestamp})
+            #{item.category}, #{item.kimiResult}, #{item.status}, #{item.createTimestamp})
         </foreach>
     </insert>
 
@@ -243,4 +244,16 @@
         select title from cold_start_title_pool where status in (-1, 1)
     </select>
 
+    <insert id="batchInsertArticleTitleHisCache">
+        insert into article_title_his_cache
+        (source_id, type, title, title_md5, channel_content_id, root_publish_timestamp, crawler_title,
+         category, his_publish_article_list, create_timestamp)
+        values
+        <foreach collection="list" item="item" separator=",">
+            (#{item.sourceId}, #{item.type}, #{item.title}, #{item.titleMd5}, #{item.channelContentId},
+             #{item.rootPublishTimestamp}, #{item.crawlerTitle}, #{item.category}, #{item.hisPublishArticleList},
+             #{item.createTimestamp})
+        </foreach>
+    </insert>
+
 </mapper>