|
@@ -23,6 +23,7 @@ import com.tzld.longarticle.recommend.server.model.entity.longArticle.AccountCat
|
|
|
import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticleCategory;
|
|
|
import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticlePoolPromotionSource;
|
|
|
import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticleTitleHisCache;
|
|
|
+import com.tzld.longarticle.recommend.server.model.param.TitleHisCacheParam;
|
|
|
import com.tzld.longarticle.recommend.server.remote.aigc.AIGCWaitingPublishContentService;
|
|
|
import com.tzld.longarticle.recommend.server.repository.aigc.CrawlerMetaArticleRepository;
|
|
|
import com.tzld.longarticle.recommend.server.repository.aigc.PublishContentRepository;
|
|
@@ -274,6 +275,8 @@ public class RecallService implements ApplicationContextAware {
|
|
|
public void setTitleAvgViewCount(List<Content> contentList, String ghId, String type) {
|
|
|
long start = System.currentTimeMillis();
|
|
|
Set<String> titleMd5List = contentList.stream().map(Content::getTitleMd5).collect(Collectors.toSet());
|
|
|
+ Map<String, Content> md5ContentMap = contentList.stream().collect(
|
|
|
+ Collectors.toMap(Content::getTitleMd5, Function.identity(), (o1, o2) -> o2));
|
|
|
// 根据titleMd5查询数据库获取数据
|
|
|
List<ArticleTitleHisCache> articleTitleHisCacheList = articleTitleHisCacheRepository
|
|
|
.getByTitleMd5InAndType(titleMd5List, type);
|
|
@@ -285,7 +288,16 @@ public class RecallService implements ApplicationContextAware {
|
|
|
List<AccountCorrelation> accountCorrelationList = accountCorrelationRepository.findByGhIdAndStatus(ghId, 1);
|
|
|
Map<String, Double> accountCorrelationMap = accountCorrelationList.stream().collect(
|
|
|
Collectors.toMap(AccountCorrelation::getRelGhId, AccountCorrelation::getCorrelation));
|
|
|
- Map<String, Content> hisArticleCacheMap = getArticleTitleHisCacheMap(titleMd5List, type);
|
|
|
+ List<TitleHisCacheParam> paramList = titleMd5List.stream().map(titleMd5 -> {
|
|
|
+ Content content = md5ContentMap.get(titleMd5);
|
|
|
+ TitleHisCacheParam cacheParam = new TitleHisCacheParam();
|
|
|
+ cacheParam.setTitleMd5(titleMd5);
|
|
|
+ cacheParam.setTitle(content.getTitle());
|
|
|
+ cacheParam.setCrawlerTitle(content.getCrawlerTitle());
|
|
|
+ cacheParam.setCategory(content.getCategory());
|
|
|
+ return cacheParam;
|
|
|
+ }).collect(Collectors.toList());
|
|
|
+ Map<String, Content> hisArticleCacheMap = getArticleTitleHisCacheMap(paramList, type);
|
|
|
for (Content content : contentList) {
|
|
|
if (articleTitleHisCacheMap.containsKey(content.getTitleMd5())) {
|
|
|
ArticleTitleHisCache cache = articleTitleHisCacheMap.get(content.getTitleMd5());
|
|
@@ -333,8 +345,9 @@ public class RecallService implements ApplicationContextAware {
|
|
|
articleTitleHisCacheRepository.save(cache);
|
|
|
}
|
|
|
|
|
|
- public Map<String, Content> getArticleTitleHisCacheMap(Set<String> titleMd5List, String type) {
|
|
|
+ public Map<String, Content> getArticleTitleHisCacheMap(List<TitleHisCacheParam> paramList, String type) {
|
|
|
Map<String, Content> result = new HashMap<>();
|
|
|
+ List<String> titleMd5List = paramList.stream().map(TitleHisCacheParam::getTitleMd5).collect(Collectors.toList());
|
|
|
// 获取历史已发布文章
|
|
|
List<Article> hisArticleList = new ArrayList<>();
|
|
|
List<List<String>> titleMd5Partition = Lists.partition(new ArrayList<>(titleMd5List), 1000);
|
|
@@ -371,14 +384,14 @@ public class RecallService implements ApplicationContextAware {
|
|
|
List<AccountCategory> accountCategoryList = accountCategoryRepository.getByStatus(StatusEnum.ONE.getCode());
|
|
|
Map<String, JSONObject> accountCategoryMap = accountCategoryList.stream().filter(o -> StringUtils.hasText(o.getCategoryMap()))
|
|
|
.collect(Collectors.toMap(AccountCategory::getGhId, o -> JSONObject.parseObject(o.getCategoryMap())));
|
|
|
- for (String titleMd5 : titleMd5List) {
|
|
|
+ for (TitleHisCacheParam cacheParam : paramList) {
|
|
|
Content res = new Content();
|
|
|
List<Article> hisArticles = new ArrayList<>();
|
|
|
- Map<Integer, List<Article>> indexArticleMap = map.get(content.getTitle());
|
|
|
+ Map<Integer, List<Article>> indexArticleMap = map.get(cacheParam.getTitle());
|
|
|
if (Objects.isNull(indexArticleMap)) {
|
|
|
- indexArticleMap = map.get(content.getCrawlerTitle());
|
|
|
- } else if (!content.getTitle().equals(content.getCrawlerTitle())) {
|
|
|
- Map<Integer, List<Article>> crawlerTitleIndexArticleMap = map.get(content.getCrawlerTitle());
|
|
|
+ indexArticleMap = map.get(cacheParam.getCrawlerTitle());
|
|
|
+ } else if (!cacheParam.getTitle().equals(cacheParam.getCrawlerTitle())) {
|
|
|
+ Map<Integer, List<Article>> crawlerTitleIndexArticleMap = map.get(cacheParam.getCrawlerTitle());
|
|
|
if (Objects.nonNull(crawlerTitleIndexArticleMap)) {
|
|
|
for (Map.Entry<Integer, List<Article>> entry : crawlerTitleIndexArticleMap.entrySet()) {
|
|
|
if (indexArticleMap.containsKey(entry.getKey())) {
|
|
@@ -404,8 +417,8 @@ public class RecallService implements ApplicationContextAware {
|
|
|
}
|
|
|
// 历史表现 文章品类如果与历史发布账号负相关 则过滤,不计算该历史发布表现
|
|
|
JSONObject categoryWeightMap = accountCategoryMap.get(hisArticle.getGhId());
|
|
|
- if (Objects.nonNull(categoryWeightMap) && CollectionUtils.isNotEmpty(content.getCategory())) {
|
|
|
- String category = content.getCategory().get(0);
|
|
|
+ if (Objects.nonNull(categoryWeightMap) && CollectionUtils.isNotEmpty(cacheParam.getCategory())) {
|
|
|
+ String category = cacheParam.getCategory().get(0);
|
|
|
if (categoryWeightMap.containsKey(category)) {
|
|
|
double weight = categoryWeightMap.getDoubleValue(category);
|
|
|
if (weight < 0) {
|
|
@@ -468,7 +481,7 @@ public class RecallService implements ApplicationContextAware {
|
|
|
}
|
|
|
// 设置头条阅读均值
|
|
|
setT0Data(res);
|
|
|
- result.put(titleMd5, res);
|
|
|
+ result.put(cacheParam.getTitleMd5(), res);
|
|
|
}
|
|
|
return result;
|
|
|
}
|