|
@@ -1,5 +1,6 @@
|
|
|
package com.tzld.longarticle.recommend.server.service.recommend.recall;
|
|
|
|
|
|
+import com.alibaba.fastjson.JSONArray;
|
|
|
import com.alibaba.fastjson.JSONObject;
|
|
|
import com.google.common.collect.Lists;
|
|
|
import com.tzld.longarticle.recommend.server.common.CostMonitor;
|
|
@@ -21,6 +22,7 @@ import com.tzld.longarticle.recommend.server.model.entity.crawler.ArticleDetailI
|
|
|
import com.tzld.longarticle.recommend.server.model.entity.longArticle.AccountCategory;
|
|
|
import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticleCategory;
|
|
|
import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticlePoolPromotionSource;
|
|
|
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticleTitleHisCache;
|
|
|
import com.tzld.longarticle.recommend.server.remote.aigc.AIGCWaitingPublishContentService;
|
|
|
import com.tzld.longarticle.recommend.server.repository.aigc.CrawlerMetaArticleRepository;
|
|
|
import com.tzld.longarticle.recommend.server.repository.aigc.PublishContentRepository;
|
|
@@ -31,6 +33,7 @@ import com.tzld.longarticle.recommend.server.repository.crawler.ArticleRepositor
|
|
|
import com.tzld.longarticle.recommend.server.repository.longArticle.AccountCategoryRepository;
|
|
|
import com.tzld.longarticle.recommend.server.repository.longArticle.ArticleCategoryRepository;
|
|
|
import com.tzld.longarticle.recommend.server.repository.longArticle.ArticlePoolPromotionSourceRepository;
|
|
|
+import com.tzld.longarticle.recommend.server.repository.longArticle.ArticleTitleHisCacheRepository;
|
|
|
import com.tzld.longarticle.recommend.server.service.recommend.config.AccountIndexAvgViewCountService;
|
|
|
import com.tzld.longarticle.recommend.server.service.recommend.recall.strategy.DefaultRecallStrategy;
|
|
|
import com.tzld.longarticle.recommend.server.service.recommend.score.ScoreStrategy;
|
|
@@ -90,6 +93,8 @@ public class RecallService implements ApplicationContextAware {
|
|
|
PublishContentRepository publishContentRepository;
|
|
|
@Autowired
|
|
|
AccountCategoryRepository accountCategoryRepository;
|
|
|
+ @Autowired
|
|
|
+ ArticleTitleHisCacheRepository articleTitleHisCacheRepository;
|
|
|
|
|
|
private final Map<String, RecallStrategy> strategyMap = new HashMap<>();
|
|
|
private ApplicationContext applicationContext;
|
|
@@ -188,6 +193,7 @@ public class RecallService implements ApplicationContextAware {
|
|
|
}
|
|
|
|
|
|
public void setContentCategory(List<Content> contentList) {
|
|
|
+ contentList.forEach(content -> content.setTitleMd5(Md5Util.encoderByMd5(content.getTitle())));
|
|
|
List<String> channelContentIds = contentList.stream().map(Content::getCrawlerChannelContentId)
|
|
|
.collect(Collectors.toList());
|
|
|
// 查询晋升rootProduceContentId
|
|
@@ -223,8 +229,7 @@ public class RecallService implements ApplicationContextAware {
|
|
|
category = coldStartCategoryMap.get(content.getCrawlerChannelContentId());
|
|
|
}
|
|
|
if (Objects.isNull(category)) {
|
|
|
- String titleMd5 = Md5Util.encoderByMd5(content.getTitle());
|
|
|
- category = titleCategoryMap.get(titleMd5);
|
|
|
+ category = titleCategoryMap.get(content.getTitleMd5());
|
|
|
}
|
|
|
if (Objects.nonNull(category)) {
|
|
|
content.setCategory(Collections.singletonList(category.getCategory()));
|
|
@@ -268,17 +273,68 @@ public class RecallService implements ApplicationContextAware {
|
|
|
|
|
|
public void setTitleAvgViewCount(List<Content> contentList, String ghId, String type) {
|
|
|
long start = System.currentTimeMillis();
|
|
|
-
|
|
|
- Set<String> titleMd5List = contentList.stream().map(o -> Md5Util.encoderByMd5(o.getTitle())).collect(Collectors.toSet());
|
|
|
- // todo 根据titleMd5查询数据库获取数据
|
|
|
- // todo titleMd5 进行过滤 排除缓存中数据 重新走下方查询
|
|
|
+ Set<String> titleMd5List = contentList.stream().map(Content::getTitleMd5).collect(Collectors.toSet());
|
|
|
+ // 根据titleMd5查询数据库获取数据
|
|
|
+ List<ArticleTitleHisCache> articleTitleHisCacheList = articleTitleHisCacheRepository
|
|
|
+ .getByTitleMd5InAndType(titleMd5List, type);
|
|
|
+ Map<String, ArticleTitleHisCache> articleTitleHisCacheMap = articleTitleHisCacheList.stream()
|
|
|
+ .collect(Collectors.toMap(ArticleTitleHisCache::getTitleMd5, Function.identity()));
|
|
|
+ // titleMd5 进行过滤 排除缓存中数据 重新走下方查询
|
|
|
+ titleMd5List.removeIf(articleTitleHisCacheMap::containsKey);
|
|
|
// 获取账号相关性
|
|
|
List<AccountCorrelation> accountCorrelationList = accountCorrelationRepository.findByGhIdAndStatus(ghId, 1);
|
|
|
Map<String, Double> accountCorrelationMap = accountCorrelationList.stream().collect(
|
|
|
Collectors.toMap(AccountCorrelation::getRelGhId, AccountCorrelation::getCorrelation));
|
|
|
-// Set<String> titleList = contentList.stream().map(Content::getTitle).collect(Collectors.toSet());
|
|
|
-// Set<String> crawlerTitleList = contentList.stream().map(Content::getCrawlerTitle).collect(Collectors.toSet());
|
|
|
-// titleList.addAll(crawlerTitleList);
|
|
|
+ Map<String, Content> hisArticleCacheMap = getArticleTitleHisCacheMap(titleMd5List, type);
|
|
|
+ for (Content content : contentList) {
|
|
|
+ if (articleTitleHisCacheMap.containsKey(content.getTitleMd5())) {
|
|
|
+ ArticleTitleHisCache cache = articleTitleHisCacheMap.get(content.getTitleMd5());
|
|
|
+ List<ContentHisPublishArticle> hisPublishArticleList =
|
|
|
+ JSONArray.parseArray(cache.getHisPublishArticleList(), ContentHisPublishArticle.class);
|
|
|
+ for (ContentHisPublishArticle article : hisPublishArticleList) {
|
|
|
+ article.setCorrelation(Optional.ofNullable(accountCorrelationMap.get(article.getGhId())).orElse(0.0));
|
|
|
+ }
|
|
|
+ content.setHisPublishArticleList(hisPublishArticleList);
|
|
|
+ content.setT0FissionByFansMean(cache.getT0FissionByFansMean());
|
|
|
+ content.setT0FissionByReadAvgMean(cache.getT0FissionByReadAvgMean());
|
|
|
+ content.setT0FissionByReadAvgCorrelationMean(cache.getT0FissionByReadAvgCorrelationMean());
|
|
|
+ content.setT0FissionDeWeightByReadAvgSumAvg(cache.getT0FissionDeWeightByReadAvgSumAvg());
|
|
|
+ content.setT0FissionByFansSumAvg(cache.getT0FissionByFansSumAvg());
|
|
|
+ content.setT0FissionByReadAvgSumAvg(cache.getT0FissionByReadAvgSumAvg());
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ if (hisArticleCacheMap.containsKey(content.getTitleMd5())) {
|
|
|
+ Content cache = hisArticleCacheMap.get(content.getTitleMd5());
|
|
|
+ content.setHisPublishArticleList(cache.getHisPublishArticleList());
|
|
|
+ for (ContentHisPublishArticle article : content.getHisPublishArticleList()) {
|
|
|
+ article.setCorrelation(Optional.ofNullable(accountCorrelationMap.get(article.getGhId())).orElse(0.0));
|
|
|
+ }
|
|
|
+ content.setT0FissionByFansMean(cache.getT0FissionByFansMean());
|
|
|
+ content.setT0FissionByReadAvgMean(cache.getT0FissionByReadAvgMean());
|
|
|
+ content.setT0FissionByReadAvgCorrelationMean(cache.getT0FissionByReadAvgCorrelationMean());
|
|
|
+ content.setT0FissionDeWeightByReadAvgSumAvg(cache.getT0FissionDeWeightByReadAvgSumAvg());
|
|
|
+ content.setT0FissionByFansSumAvg(cache.getT0FissionByFansSumAvg());
|
|
|
+ content.setT0FissionByReadAvgSumAvg(cache.getT0FissionByReadAvgSumAvg());
|
|
|
+ }
|
|
|
+ // 写入缓存
|
|
|
+ saveArticleTitleHisCache(content);
|
|
|
+ }
|
|
|
+ log.info("setTitleAvgViewCount cost:{}", System.currentTimeMillis() - start);
|
|
|
+ }
|
|
|
+
|
|
|
+ private void saveArticleTitleHisCache(Content content) {
|
|
|
+ if (CollectionUtils.isEmpty(content.getHisPublishArticleList())) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ ArticleTitleHisCache cache = new ArticleTitleHisCache();
|
|
|
+ BeanUtils.copyProperties(content, cache);
|
|
|
+ cache.setHisPublishArticleList(JSONObject.toJSONString(content.getHisPublishArticleList()));
|
|
|
+ cache.setCreateTimestamp(System.currentTimeMillis());
|
|
|
+ articleTitleHisCacheRepository.save(cache);
|
|
|
+ }
|
|
|
+
|
|
|
+ public Map<String, Content> getArticleTitleHisCacheMap(Set<String> titleMd5List, String type) {
|
|
|
+ Map<String, Content> result = new HashMap<>();
|
|
|
// 获取历史已发布文章
|
|
|
List<Article> hisArticleList = new ArrayList<>();
|
|
|
List<List<String>> titleMd5Partition = Lists.partition(new ArrayList<>(titleMd5List), 1000);
|
|
@@ -315,7 +371,8 @@ public class RecallService implements ApplicationContextAware {
|
|
|
List<AccountCategory> accountCategoryList = accountCategoryRepository.getByStatus(StatusEnum.ONE.getCode());
|
|
|
Map<String, JSONObject> accountCategoryMap = accountCategoryList.stream().filter(o -> StringUtils.hasText(o.getCategoryMap()))
|
|
|
.collect(Collectors.toMap(AccountCategory::getGhId, o -> JSONObject.parseObject(o.getCategoryMap())));
|
|
|
- for (Content content : contentList) {
|
|
|
+ for (String titleMd5 : titleMd5List) {
|
|
|
+ Content res = new Content();
|
|
|
List<Article> hisArticles = new ArrayList<>();
|
|
|
Map<Integer, List<Article>> indexArticleMap = map.get(content.getTitle());
|
|
|
if (Objects.isNull(indexArticleMap)) {
|
|
@@ -340,7 +397,7 @@ public class RecallService implements ApplicationContextAware {
|
|
|
hisArticles.addAll(indexArticleList);
|
|
|
}
|
|
|
}
|
|
|
- content.setHisPublishArticleList(new ArrayList<>());
|
|
|
+ res.setHisPublishArticleList(new ArrayList<>());
|
|
|
for (Article hisArticle : hisArticles) {
|
|
|
if (ScoreStrategy.hisContentLateFilter(hisArticle.getPublishTimestamp())) {
|
|
|
continue;
|
|
@@ -407,14 +464,13 @@ public class RecallService implements ApplicationContextAware {
|
|
|
article.setFirstViewCountRate((firstArticle.getShowViewCount() * 1.0) / firstIndexAvgInfo.getReadAvg());
|
|
|
}
|
|
|
}
|
|
|
- article.setCorrelation(Optional.ofNullable(accountCorrelationMap.get(article.getGhId())).orElse(0.0));
|
|
|
- content.getHisPublishArticleList().add(article);
|
|
|
+ res.getHisPublishArticleList().add(article);
|
|
|
}
|
|
|
// 设置头条阅读均值
|
|
|
- setT0Data(content);
|
|
|
- // todo 写入缓存
|
|
|
+ setT0Data(res);
|
|
|
+ result.put(titleMd5, res);
|
|
|
}
|
|
|
- log.info("setTitleAvgViewCount cost:{}", System.currentTimeMillis() - start);
|
|
|
+ return result;
|
|
|
}
|
|
|
|
|
|
private void setT0Data(Content content) {
|