|
@@ -1,7 +1,9 @@
|
|
|
package com.tzld.longarticle.recommend.server.service.recommend.recall;
|
|
|
|
|
|
+import com.alibaba.fastjson.JSONObject;
|
|
|
import com.google.common.collect.Lists;
|
|
|
import com.tzld.longarticle.recommend.server.common.ThreadPoolFactory;
|
|
|
+import com.tzld.longarticle.recommend.server.common.enums.StatusEnum;
|
|
|
import com.tzld.longarticle.recommend.server.common.enums.recommend.ArticleTypeEnum;
|
|
|
import com.tzld.longarticle.recommend.server.common.enums.recommend.FeishuRobotIdEnum;
|
|
|
import com.tzld.longarticle.recommend.server.mapper.crawler.CrawlerBaseMapper;
|
|
@@ -13,6 +15,7 @@ import com.tzld.longarticle.recommend.server.model.entity.crawler.AccountAvgInfo
|
|
|
import com.tzld.longarticle.recommend.server.model.entity.crawler.AccountCorrelation;
|
|
|
import com.tzld.longarticle.recommend.server.model.entity.crawler.Article;
|
|
|
import com.tzld.longarticle.recommend.server.model.entity.crawler.ArticleDetailInfo;
|
|
|
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.AccountCategory;
|
|
|
import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticleCategory;
|
|
|
import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticlePoolPromotionSource;
|
|
|
import com.tzld.longarticle.recommend.server.remote.aigc.AIGCWaitingPublishContentService;
|
|
@@ -22,6 +25,7 @@ import com.tzld.longarticle.recommend.server.repository.crawler.AccountAvgInfoRe
|
|
|
import com.tzld.longarticle.recommend.server.repository.crawler.AccountCorrelationRepository;
|
|
|
import com.tzld.longarticle.recommend.server.repository.crawler.ArticleDetailInfoRepository;
|
|
|
import com.tzld.longarticle.recommend.server.repository.crawler.ArticleRepository;
|
|
|
+import com.tzld.longarticle.recommend.server.repository.longArticle.AccountCategoryRepository;
|
|
|
import com.tzld.longarticle.recommend.server.repository.longArticle.ArticleCategoryRepository;
|
|
|
import com.tzld.longarticle.recommend.server.repository.longArticle.ArticlePoolPromotionSourceRepository;
|
|
|
import com.tzld.longarticle.recommend.server.service.recommend.config.AccountIndexAvgViewCountService;
|
|
@@ -41,6 +45,7 @@ import org.springframework.beans.factory.annotation.Value;
|
|
|
import org.springframework.context.ApplicationContext;
|
|
|
import org.springframework.context.ApplicationContextAware;
|
|
|
import org.springframework.stereotype.Service;
|
|
|
+import org.springframework.util.StringUtils;
|
|
|
|
|
|
import javax.annotation.PostConstruct;
|
|
|
import java.util.*;
|
|
@@ -80,6 +85,8 @@ public class RecallService implements ApplicationContextAware {
|
|
|
ArticleCategoryRepository articleCategoryRepository;
|
|
|
@Autowired
|
|
|
PublishContentRepository publishContentRepository;
|
|
|
+ @Autowired
|
|
|
+ AccountCategoryRepository accountCategoryRepository;
|
|
|
|
|
|
private final Map<String, RecallStrategy> strategyMap = new HashMap<>();
|
|
|
private ApplicationContext applicationContext;
|
|
@@ -163,31 +170,14 @@ public class RecallService implements ApplicationContextAware {
|
|
|
+ "账号名称: " + param.getAccountName());
|
|
|
return content;
|
|
|
}
|
|
|
- // 标题历史均值
|
|
|
- setTitleAvgViewCount(content, param.getGhId(), param.getType());
|
|
|
// category 查询
|
|
|
setContentCategory(content);
|
|
|
+ // 标题历史均值
|
|
|
+ setTitleAvgViewCount(content, param.getGhId(), param.getType());
|
|
|
return content;
|
|
|
}
|
|
|
|
|
|
public void setContentCategory(List<Content> contentList) {
|
|
|
-// Map<String, String> articleMd5Map = new HashMap<>();
|
|
|
-// List<String> md5List = new ArrayList<>();
|
|
|
-// for (Content content : contentList) {
|
|
|
-// String md5 = generateArticleUniqueMd5(content.getCrawlerLink());
|
|
|
-// md5List.add(md5);
|
|
|
-// articleMd5Map.put(content.getId(), md5);
|
|
|
-// }
|
|
|
-// List<CrawlerMetaArticle> categoryList = getByUniqueIndexIn(md5List);
|
|
|
-// if (CollectionUtils.isEmpty(categoryList)) {
|
|
|
-// return;
|
|
|
-// }
|
|
|
-// Map<String, List<String>> categoryMap = categoryList.stream().collect(Collectors.groupingBy(CrawlerMetaArticle::getUniqueIndex,
|
|
|
-// Collectors.mapping(CrawlerMetaArticle::getCategory, Collectors.toList())));
|
|
|
-// for (Content content : contentList) {
|
|
|
-// String md5 = articleMd5Map.get(content.getId());
|
|
|
-// content.setCategory(categoryMap.get(md5));
|
|
|
-// }
|
|
|
List<String> channelContentIds = contentList.stream().map(Content::getCrawlerChannelContentId).collect(Collectors.toList());
|
|
|
// 查询晋升rootProduceContentId
|
|
|
List<ArticlePoolPromotionSource> sourceList = articlePoolPromotionSourceRepository.getByChannelContentIdIn(channelContentIds);
|
|
@@ -195,7 +185,6 @@ public class RecallService implements ApplicationContextAware {
|
|
|
List<String> publishContentIds = sourceList.stream().map(ArticlePoolPromotionSource::getRootPublishContentId).collect(Collectors.toList());
|
|
|
List<PublishContent> publishContentList = publishContentRepository.getByIdIn(publishContentIds);
|
|
|
Map<String, PublishContent> publishContentMap = publishContentList.stream().collect(Collectors.toMap(PublishContent::getId, Function.identity()));
|
|
|
- List<String> produceContentIds = sourceMap.values().stream().map(ArticlePoolPromotionSource::getRootProduceContentId).collect(Collectors.toList());
|
|
|
// 根据produceContentId查询category
|
|
|
List<ArticleCategory> articleCategoryList = articleCategoryRepository.findAll();
|
|
|
Map<String, ArticleCategory> categoryMap = articleCategoryList.stream().collect(Collectors.toMap(ArticleCategory::getProduceContentId, Function.identity()));
|
|
@@ -302,6 +291,9 @@ public class RecallService implements ApplicationContextAware {
|
|
|
.filter(o -> "1".equals(o.getPosition()))
|
|
|
.filter(o -> Objects.nonNull(o.getReadAvg()) && o.getReadAvg() > 0 && o.getFans() > 1000)
|
|
|
.collect(Collectors.groupingBy(AccountAvgInfo::getGhId, Collectors.toMap(AccountAvgInfo::getUpdateTime, o -> o)));
|
|
|
+ List<AccountCategory> accountCategoryList = accountCategoryRepository.getByStatus(StatusEnum.ONE.getCode());
|
|
|
+ Map<String, JSONObject> accountCategoryMap = accountCategoryList.stream().filter(o -> StringUtils.hasText(o.getCategoryMap()))
|
|
|
+ .collect(Collectors.toMap(AccountCategory::getGhId, o -> JSONObject.parseObject(o.getCategoryMap())));
|
|
|
for (Content content : contentList) {
|
|
|
List<Article> hisArticles = new ArrayList<>();
|
|
|
Map<Integer, List<Article>> indexArticleMap = map.get(content.getTitle());
|
|
@@ -332,6 +324,17 @@ public class RecallService implements ApplicationContextAware {
|
|
|
if (ScoreStrategy.hisContentLateFilter(hisArticle.getPublishTimestamp())) {
|
|
|
continue;
|
|
|
}
|
|
|
+ // 历史表现 文章品类如果与历史发布账号负相关 则过滤,不计算该历史发布表现
|
|
|
+ JSONObject categoryWeightMap = accountCategoryMap.get(hisArticle.getGhId());
|
|
|
+ if (Objects.nonNull(categoryWeightMap) && CollectionUtils.isNotEmpty(content.getCategory())) {
|
|
|
+ String category = content.getCategory().get(0);
|
|
|
+ if (categoryWeightMap.containsKey(category)) {
|
|
|
+ double weight = categoryWeightMap.getDoubleValue(category);
|
|
|
+ if (weight < 0) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
ContentHisPublishArticle article = new ContentHisPublishArticle();
|
|
|
BeanUtils.copyProperties(hisArticle, article);
|
|
|
article.setViewCount(hisArticle.getShowViewCount());
|