فهرست منبع

召回内容历史表现增加缓存

wangyunpeng 7 ماه پیش
والد
کامیت
2188995a0e

+ 2 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/mapper/longArticle/LongArticleBaseMapper.java

@@ -21,6 +21,8 @@ public interface LongArticleBaseMapper {
 
     void batchInsertArticlePoolPromotionSource(List<ArticlePoolPromotionSource> list);
 
+    void batchInsertArticleTitleHisCache(List<ArticleTitleHisCache> list);
+
     void updateRootProduceContentLevel(String rootProduceContentId, String level);
 
     void deleteDatastatScoreByDtIn(List<String> dateStrList);

+ 0 - 12
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/entity/longArticle/ArticleTitleHisCache.java

@@ -29,18 +29,6 @@ public class ArticleTitleHisCache implements Serializable {
     private String category;
     @Column(name = "his_publish_article_list")
     private String hisPublishArticleList;
-    @Column(name = "t0_fission_by_fans_mean")
-    private Double t0FissionByFansMean;
-    @Column(name = "t0_fission_by_read_avg_mean")
-    private Double t0FissionByReadAvgMean;
-    @Column(name = "t0_fission_by_read_avg_correlation_mean")
-    private Double t0FissionByReadAvgCorrelationMean;
-    @Column(name = "t0_fission_by_fans_sum_avg")
-    private Double t0FissionByFansSumAvg;
-    @Column(name = "t0_fission_by_read_avg_sum_avg")
-    private Double t0FissionByReadAvgSumAvg;
-    @Column(name = "t0_fission_de_weight_by_read_avg_sum_avg")
-    private Double t0FissionDeWeightByReadAvgSumAvg;
     @Column(name = "create_timestamp")
     private Long createTimestamp;
     @Column(name = "update_timestamp")

+ 1 - 2
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/longArticle/ArticleTitleHisCacheRepository.java

@@ -5,10 +5,9 @@ import org.springframework.data.jpa.repository.JpaRepository;
 import org.springframework.stereotype.Repository;
 
 import java.util.List;
-import java.util.Set;
 
 @Repository
 public interface ArticleTitleHisCacheRepository extends JpaRepository<ArticleTitleHisCache, ArticleTitleHisCache.PK> {
 
-    List<ArticleTitleHisCache> getByTitleMd5InAndType(Set<String> titleMd5List, String type);
+    List<ArticleTitleHisCache> getByTitleMd5InAndType(List<String> titleMd5List, String type);
 }

+ 0 - 6
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/XxlJobService.java

@@ -589,12 +589,6 @@ public class XxlJobService {
                 ArticleTitleHisCache cache = titleMap.get(titleMd5);
                 if (content != null) {
                     cache.setHisPublishArticleList(JSONObject.toJSONString(content.getHisPublishArticleList()));
-                    cache.setT0FissionByFansMean(content.getT0FissionByFansMean());
-                    cache.setT0FissionByReadAvgMean(content.getT0FissionByReadAvgMean());
-                    cache.setT0FissionByReadAvgCorrelationMean(content.getT0FissionByReadAvgCorrelationMean());
-                    cache.setT0FissionByFansSumAvg(content.getT0FissionByFansSumAvg());
-                    cache.setT0FissionByReadAvgSumAvg(content.getT0FissionByReadAvgSumAvg());
-                    cache.setT0FissionDeWeightByReadAvgSumAvg(content.getT0FissionDeWeightByReadAvgSumAvg());
                     cache.setUpdateTimestamp(System.currentTimeMillis());
                     articleTitleHisCacheRepository.save(cache);
                 }

+ 61 - 53
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/recall/RecallService.java

@@ -7,12 +7,9 @@ import com.tzld.longarticle.recommend.server.common.CostMonitor;
 import com.tzld.longarticle.recommend.server.common.ThreadPoolFactory;
 import com.tzld.longarticle.recommend.server.common.enums.StatusEnum;
 import com.tzld.longarticle.recommend.server.common.enums.aigc.PublishPlanInputSourceTypesEnum;
-import com.tzld.longarticle.recommend.server.common.enums.recommend.ArticleCategoryStatusEnum;
-import com.tzld.longarticle.recommend.server.common.enums.recommend.ArticlePoolPromotionSourceStatusEnum;
-import com.tzld.longarticle.recommend.server.common.enums.recommend.ArticleTypeEnum;
-import com.tzld.longarticle.recommend.server.common.enums.recommend.ContentPoolEnum;
-import com.tzld.longarticle.recommend.server.common.enums.recommend.FeishuRobotIdEnum;
+import com.tzld.longarticle.recommend.server.common.enums.recommend.*;
 import com.tzld.longarticle.recommend.server.mapper.crawler.CrawlerBaseMapper;
+import com.tzld.longarticle.recommend.server.mapper.longArticle.LongArticleBaseMapper;
 import com.tzld.longarticle.recommend.server.model.dto.Content;
 import com.tzld.longarticle.recommend.server.model.dto.ContentHisPublishArticle;
 import com.tzld.longarticle.recommend.server.model.entity.aigc.CrawlerMetaArticle;
@@ -21,11 +18,7 @@ import com.tzld.longarticle.recommend.server.model.entity.crawler.AccountAvgInfo
 import com.tzld.longarticle.recommend.server.model.entity.crawler.AccountCorrelation;
 import com.tzld.longarticle.recommend.server.model.entity.crawler.Article;
 import com.tzld.longarticle.recommend.server.model.entity.crawler.ArticleDetailInfo;
-import com.tzld.longarticle.recommend.server.model.entity.longArticle.AccountCategory;
-import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticleCategory;
-import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticlePoolPromotionSource;
-import com.tzld.longarticle.recommend.server.model.entity.longArticle.PublishSingleVideoSource;
-import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticleTitleHisCache;
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.*;
 import com.tzld.longarticle.recommend.server.model.param.TitleHisCacheParam;
 import com.tzld.longarticle.recommend.server.remote.aigc.AIGCWaitingPublishContentService;
 import com.tzld.longarticle.recommend.server.repository.aigc.CrawlerMetaArticleRepository;
@@ -34,11 +27,7 @@ import com.tzld.longarticle.recommend.server.repository.crawler.AccountAvgInfoRe
 import com.tzld.longarticle.recommend.server.repository.crawler.AccountCorrelationRepository;
 import com.tzld.longarticle.recommend.server.repository.crawler.ArticleDetailInfoRepository;
 import com.tzld.longarticle.recommend.server.repository.crawler.ArticleRepository;
-import com.tzld.longarticle.recommend.server.repository.longArticle.AccountCategoryRepository;
-import com.tzld.longarticle.recommend.server.repository.longArticle.ArticleCategoryRepository;
-import com.tzld.longarticle.recommend.server.repository.longArticle.ArticlePoolPromotionSourceRepository;
-import com.tzld.longarticle.recommend.server.repository.longArticle.PublishSingleVideoSourceRepository;
-import com.tzld.longarticle.recommend.server.repository.longArticle.ArticleTitleHisCacheRepository;
+import com.tzld.longarticle.recommend.server.repository.longArticle.*;
 import com.tzld.longarticle.recommend.server.service.recommend.config.AccountIndexAvgViewCountService;
 import com.tzld.longarticle.recommend.server.service.recommend.recall.strategy.DefaultRecallStrategy;
 import com.tzld.longarticle.recommend.server.service.recommend.score.ScoreStrategy;
@@ -102,6 +91,8 @@ public class RecallService implements ApplicationContextAware {
     PublishSingleVideoSourceRepository publishSingleVideoSourceRepository;
     @Autowired
     ArticleTitleHisCacheRepository articleTitleHisCacheRepository;
+    @Autowired
+    LongArticleBaseMapper longArticleBaseMapper;
 
     private final Map<String, RecallStrategy> strategyMap = new HashMap<>();
     private ApplicationContext applicationContext;
@@ -209,10 +200,10 @@ public class RecallService implements ApplicationContextAware {
             return;
         }
         Map<String, Content> contentMap = contentList.stream()
-               .collect(Collectors.toMap(Content::getSourceId, Function.identity()));
+                .collect(Collectors.toMap(Content::getSourceId, Function.identity()));
         List<PublishSingleVideoSource> sourceList = publishSingleVideoSourceRepository.getByContentTraceIdIn(contentTraceIds);
         Map<String, PublishSingleVideoSource> sourceMap = sourceList.stream()
-              .collect(Collectors.toMap(PublishSingleVideoSource::getContentTraceId, Function.identity()));
+                .collect(Collectors.toMap(PublishSingleVideoSource::getContentTraceId, Function.identity()));
         for (String contentTraceId : contentTraceIds) {
             Content content = contentMap.get(contentTraceId);
             PublishSingleVideoSource source = sourceMap.get(contentTraceId);
@@ -306,12 +297,14 @@ public class RecallService implements ApplicationContextAware {
 
     public void setTitleAvgViewCount(List<Content> contentList, String ghId, String type) {
         long start = System.currentTimeMillis();
-        Set<String> titleMd5List = contentList.stream().map(Content::getTitleMd5).collect(Collectors.toSet());
+        List<String> titleMd5List = contentList.stream().map(Content::getTitleMd5).distinct().collect(Collectors.toList());
         Map<String, Content> md5ContentMap = contentList.stream().collect(
                 Collectors.toMap(Content::getTitleMd5, Function.identity(), (o1, o2) -> o2));
         // 根据titleMd5查询数据库获取数据
-        List<ArticleTitleHisCache> articleTitleHisCacheList = articleTitleHisCacheRepository
-                .getByTitleMd5InAndType(titleMd5List, type);
+        List<ArticleTitleHisCache> articleTitleHisCacheList = new ArrayList<>();
+        for (List<String> partition : Lists.partition(titleMd5List, 1000)) {
+            articleTitleHisCacheList.addAll(articleTitleHisCacheRepository.getByTitleMd5InAndType(partition, type));
+        }
         Map<String, ArticleTitleHisCache> articleTitleHisCacheMap = articleTitleHisCacheList.stream()
                 .collect(Collectors.toMap(ArticleTitleHisCache::getTitleMd5, Function.identity()));
         // titleMd5 进行过滤 排除缓存中数据 重新走下方查询
@@ -330,6 +323,7 @@ public class RecallService implements ApplicationContextAware {
             return cacheParam;
         }).collect(Collectors.toList());
         Map<String, Content> hisArticleCacheMap = getArticleTitleHisCacheMap(paramList, type);
+        List<Content> saveList = new ArrayList<>();
         for (Content content : contentList) {
             if (articleTitleHisCacheMap.containsKey(content.getTitleMd5())) {
                 ArticleTitleHisCache cache = articleTitleHisCacheMap.get(content.getTitleMd5());
@@ -339,12 +333,7 @@ public class RecallService implements ApplicationContextAware {
                     article.setCorrelation(Optional.ofNullable(accountCorrelationMap.get(article.getGhId())).orElse(0.0));
                 }
                 content.setHisPublishArticleList(hisPublishArticleList);
-                content.setT0FissionByFansMean(cache.getT0FissionByFansMean());
-                content.setT0FissionByReadAvgMean(cache.getT0FissionByReadAvgMean());
-                content.setT0FissionByReadAvgCorrelationMean(cache.getT0FissionByReadAvgCorrelationMean());
-                content.setT0FissionDeWeightByReadAvgSumAvg(cache.getT0FissionDeWeightByReadAvgSumAvg());
-                content.setT0FissionByFansSumAvg(cache.getT0FissionByFansSumAvg());
-                content.setT0FissionByReadAvgSumAvg(cache.getT0FissionByReadAvgSumAvg());
+                setT0Data(content);
                 continue;
             }
             if (hisArticleCacheMap.containsKey(content.getTitleMd5())) {
@@ -353,28 +342,42 @@ public class RecallService implements ApplicationContextAware {
                 for (ContentHisPublishArticle article : content.getHisPublishArticleList()) {
                     article.setCorrelation(Optional.ofNullable(accountCorrelationMap.get(article.getGhId())).orElse(0.0));
                 }
-                content.setT0FissionByFansMean(cache.getT0FissionByFansMean());
-                content.setT0FissionByReadAvgMean(cache.getT0FissionByReadAvgMean());
-                content.setT0FissionByReadAvgCorrelationMean(cache.getT0FissionByReadAvgCorrelationMean());
-                content.setT0FissionDeWeightByReadAvgSumAvg(cache.getT0FissionDeWeightByReadAvgSumAvg());
-                content.setT0FissionByFansSumAvg(cache.getT0FissionByFansSumAvg());
-                content.setT0FissionByReadAvgSumAvg(cache.getT0FissionByReadAvgSumAvg());
+                setT0Data(content);
             }
-            // 写入缓存
-            saveArticleTitleHisCache(content);
+            saveList.add(content);
         }
+        // 写入缓存
+        saveArticleTitleHisCache(saveList, type);
         log.info("setTitleAvgViewCount cost:{}", System.currentTimeMillis() - start);
     }
 
-    private void saveArticleTitleHisCache(Content content) {
-        if (CollectionUtils.isEmpty(content.getHisPublishArticleList())) {
+    private void saveArticleTitleHisCache(List<Content> saveList, String type) {
+        if (CollectionUtils.isEmpty(saveList)) {
             return;
         }
-        ArticleTitleHisCache cache = new ArticleTitleHisCache();
-        BeanUtils.copyProperties(content, cache);
-        cache.setHisPublishArticleList(JSONObject.toJSONString(content.getHisPublishArticleList()));
-        cache.setCreateTimestamp(System.currentTimeMillis());
-        articleTitleHisCacheRepository.save(cache);
+        List<ArticleTitleHisCache> cacheList = new ArrayList<>();
+        try {
+            for (Content content : saveList) {
+                if (CollectionUtils.isEmpty(content.getHisPublishArticleList())) {
+                    continue;
+                }
+                ArticleTitleHisCache cache = new ArticleTitleHisCache();
+                BeanUtils.copyProperties(content, cache);
+                cache.setType(type);
+                if (CollectionUtils.isNotEmpty(content.getCategory())) {
+                    cache.setCategory(JSONObject.toJSONString(content.getCategory()));
+                }
+                cache.setHisPublishArticleList(JSONObject.toJSONString(content.getHisPublishArticleList()));
+                cache.setCreateTimestamp(System.currentTimeMillis());
+                cacheList.add(cache);
+            }
+            if (CollectionUtils.isEmpty(cacheList)) {
+                return;
+            }
+            longArticleBaseMapper.batchInsertArticleTitleHisCache(cacheList);
+        } catch (Exception e) {
+            log.error("saveArticleTitleHisCache error:{}", e.getMessage());
+        }
     }
 
     public Map<String, Content> getArticleTitleHisCacheMap(List<TitleHisCacheParam> paramList, String type) {
@@ -475,15 +478,18 @@ public class RecallService implements ApplicationContextAware {
                         article.setInnerAccount(true);
                         avgViewCount = Optional.ofNullable(indexMap.get(hisArticle.getItemIndex().toString()).getReadAvg())
                                 .orElse(0.0).intValue();
-//                    } else {
-//                        if (ArticleTypeEnum.QUNFA.getVal().equals(type)) {
+                    } else {
+                        if (ArticleTypeEnum.QUNFA.getVal().equals(type)) {
+                            log.error("历史表现阅读均值获取失败 ghId:{} accountName:{} date:{} index:{}",
+                                    hisArticle.getGhId(), hisArticle.getAccountName(), hisPublishDate,
+                                    hisArticle.getItemIndex());
 //                            FeishuMessageSender.sendWebHookMessage(FeishuRobotIdEnum.RECOMMEND.getRobotId(),
 //                                    "历史表现阅读均值获取失败\n"
 //                                            + "ghId: " + hisArticle.getGhId() + "\n"
 //                                            + "账号名称: " + hisArticle.getAccountName() + "\n"
 //                                            + "日期: " + hisPublishDate + "\n"
 //                                            + "位置: " + hisArticle.getItemIndex());
-//                        }
+                        }
                     }
                 }
                 article.setAvgViewCount(avgViewCount);
@@ -512,7 +518,6 @@ public class RecallService implements ApplicationContextAware {
                 res.getHisPublishArticleList().add(article);
             }
             // 设置头条阅读均值
-            setT0Data(res);
             result.put(cacheParam.getTitleMd5(), res);
         }
         return result;
@@ -537,14 +542,17 @@ public class RecallService implements ApplicationContextAware {
             if (CollectionUtils.isEmpty(article.getArticleDetailInfoList())) {
                 // 仅判断7.12以后发布文章
                 if (article.getPublishTimestamp() > 1720713600 && contentHisFeishuEnable) {
-                    FeishuMessageSender.sendWebHookMessage(FeishuRobotIdEnum.RECOMMEND.getRobotId(),
-                            "历史表现裂变特征获取失败\n"
-                                    + "ghId: " + article.getGhId() + "\n"
-                                    + "账号名称: " + article.getAccountName() + "\n"
-                                    + "位置: " + article.getItemIndex() + "\n"
-                                    + "标题: " + article.getTitle() + "\n"
-                                    + "发布时间: " + DateUtils.timestampToYMDStr(article.getPublishTimestamp(), "yyyyMMdd") + "\n"
-                                    + "wxsn: " + article.getWxSn());
+                    log.error("历史表现裂变特征获取失败 ghId:{} accountName:{} itemIndex:{} title:{} date:{} wxsn:{}",
+                            article.getGhId(), article.getAccountName(), article.getItemIndex(), article.getTitle(),
+                            DateUtils.timestampToYMDStr(article.getPublishTimestamp(), "yyyyMMdd"), article.getWxSn());
+//                    FeishuMessageSender.sendWebHookMessage(FeishuRobotIdEnum.RECOMMEND.getRobotId(),
+//                            "历史表现裂变特征获取失败\n"
+//                                    + "ghId: " + article.getGhId() + "\n"
+//                                    + "账号名称: " + article.getAccountName() + "\n"
+//                                    + "位置: " + article.getItemIndex() + "\n"
+//                                    + "标题: " + article.getTitle() + "\n"
+//                                    + "发布时间: " + DateUtils.timestampToYMDStr(article.getPublishTimestamp(), "yyyyMMdd") + "\n"
+//                                    + "wxsn: " + article.getWxSn());
                 }
                 continue;
             }

+ 10 - 0
long-article-recommend-service/src/main/resources/mapper/longArticle/LongArticleBaseMapper.xml

@@ -243,4 +243,14 @@
         select title from cold_start_title_pool where status in (-1, 1)
     </select>
 
+    <insert id="batchInsertArticleTitleHisCache">
+        insert into article_title_his_cache
+        (title_md5, type, title, crawler_title, category, his_publish_article_list, create_timestamp)
+        values
+        <foreach collection="list" item="item" separator=",">
+            (#{item.titleMd5}, #{item.type}, #{item.title}, #{item.crawlerTitle}, #{item.category},
+             #{item.hisPublishArticleList}, #{item.createTimestamp})
+        </foreach>
+    </insert>
+
 </mapper>