|
@@ -5,12 +5,16 @@ import com.tzld.longarticle.recommend.server.model.Content;
|
|
|
import com.tzld.longarticle.recommend.server.model.ContentHisPublishArticle;
|
|
|
import com.tzld.longarticle.recommend.server.remote.AIGCRemoteService;
|
|
|
import com.tzld.longarticle.recommend.server.repository.aigc.CrawlerMetaArticleRepository;
|
|
|
+import com.tzld.longarticle.recommend.server.repository.crawler.AccountAvgInfoRepository;
|
|
|
+import com.tzld.longarticle.recommend.server.repository.crawler.ArticleDetailInfoRepository;
|
|
|
import com.tzld.longarticle.recommend.server.repository.crawler.ArticleRepository;
|
|
|
import com.tzld.longarticle.recommend.server.repository.entity.aigc.CrawlerMetaArticle;
|
|
|
+import com.tzld.longarticle.recommend.server.repository.entity.crawler.AccountAvgInfo;
|
|
|
import com.tzld.longarticle.recommend.server.repository.entity.crawler.Article;
|
|
|
+import com.tzld.longarticle.recommend.server.repository.entity.crawler.ArticleDetailInfo;
|
|
|
+import com.tzld.longarticle.recommend.server.repository.mapper.crawler.CrawlerBaseMapper;
|
|
|
import com.tzld.longarticle.recommend.server.service.AccountIndexAvgViewCountService;
|
|
|
import com.tzld.longarticle.recommend.server.service.recall.strategy.DefaultRecallStrategy;
|
|
|
-import com.tzld.longarticle.recommend.server.service.score.AvgReadDTO;
|
|
|
import com.tzld.longarticle.recommend.server.util.CommonCollectionUtils;
|
|
|
import com.tzld.longarticle.recommend.server.util.JSONUtils;
|
|
|
import com.tzld.longarticle.recommend.server.util.Md5Util;
|
|
@@ -41,11 +45,17 @@ public class RecallService implements ApplicationContextAware {
|
|
|
@Autowired
|
|
|
ArticleRepository articleRepository;
|
|
|
@Autowired
|
|
|
+ AccountAvgInfoRepository accountAvgInfoRepository;
|
|
|
+ @Autowired
|
|
|
+ ArticleDetailInfoRepository articleDetailInfoRepository;
|
|
|
+ @Autowired
|
|
|
CrawlerMetaArticleRepository crawlerMetaArticleRepository;
|
|
|
@Autowired
|
|
|
AIGCRemoteService aigcRemoteService;
|
|
|
@Autowired
|
|
|
AccountIndexAvgViewCountService accountIndexAvgViewCountService;
|
|
|
+ @Autowired
|
|
|
+ CrawlerBaseMapper crawlerBaseMapper;
|
|
|
|
|
|
private final Map<String, RecallStrategy> strategyMap = new HashMap<>();
|
|
|
private ApplicationContext applicationContext;
|
|
@@ -177,12 +187,29 @@ public class RecallService implements ApplicationContextAware {
|
|
|
public void setTitleAvgViewCount(List<Content> contentList) {
|
|
|
long start = System.currentTimeMillis();
|
|
|
|
|
|
- List<String> titleList = contentList.stream().map(Content::getTitle).collect(Collectors.toList());
|
|
|
- List<String> crawlerTitleList = contentList.stream().map(Content::getCrawlerTitle).collect(Collectors.toList());
|
|
|
+ Set<String> titleList = contentList.stream().map(Content::getTitle).collect(Collectors.toSet());
|
|
|
+ Set<String> crawlerTitleList = contentList.stream().map(Content::getCrawlerTitle).collect(Collectors.toSet());
|
|
|
titleList.addAll(crawlerTitleList);
|
|
|
+ // 获取历史已发布文章
|
|
|
List<Article> hisArticleList = articleRepository.getByTitleIn(titleList);
|
|
|
Map<String, Map<Integer, List<Article>>> map = hisArticleList.stream()
|
|
|
.collect(Collectors.groupingBy(Article::getTitle, Collectors.groupingBy(Article::getItemIndex)));
|
|
|
+ Set<String> snList = hisArticleList.stream().map(Article::getWxSn).collect(Collectors.toSet());
|
|
|
+ List<ArticleDetailInfo> articleDetailInfoList = articleDetailInfoRepository.getAllByWxSnIn(snList);
|
|
|
+ Map<String, List<ArticleDetailInfo>> articleDetailInfoMap = articleDetailInfoList.stream()
|
|
|
+ .collect(Collectors.groupingBy(ArticleDetailInfo::getWxSn));
|
|
|
+ // 获取历史已发布文章所属头条内容
|
|
|
+ Set<String> ghIds = hisArticleList.stream().map(Article::getGhId).collect(Collectors.toSet());
|
|
|
+ Set<String> appMsgIds = hisArticleList.stream().map(Article::getAppMsgId).collect(Collectors.toSet());
|
|
|
+ List<Article> firstIndexHisArticleList = articleRepository.getByGhIdInAndAppMsgIdInAndItemIndex(ghIds, appMsgIds, 1);
|
|
|
+ Map<String, Map<String, Article>> firstIndexHisArticleMap = firstIndexHisArticleList.stream().collect(
|
|
|
+ Collectors.groupingBy(Article::getGhId, Collectors.toMap(Article::getAppMsgId, o -> o)));
|
|
|
+ // 获取发布账号 位置历史均值
|
|
|
+ List<AccountAvgInfo> accountAvgInfoList = accountAvgInfoRepository.getAllByGhIdIn(ghIds);
|
|
|
+ Map<String, Map<String, AccountAvgInfo>> accountAvgInfoIndexMap = accountAvgInfoList.stream().collect(
|
|
|
+ Collectors.groupingBy(AccountAvgInfo::getGhId, Collectors.toMap(AccountAvgInfo::getPosition, o -> o)));
|
|
|
+ Map<String, AccountAvgInfo> firstIndexAvgInfoMap = accountAvgInfoList.stream().filter(o -> "1".equals(o.getPosition()))
|
|
|
+ .collect(Collectors.toMap(AccountAvgInfo::getGhId, o -> o));
|
|
|
for (Content content : contentList) {
|
|
|
List<Article> hisArticles = new ArrayList<>();
|
|
|
Map<Integer, List<Article>> indexArticleMap = map.get(content.getTitle());
|
|
@@ -212,22 +239,94 @@ public class RecallService implements ApplicationContextAware {
|
|
|
for (Article hisArticle : hisArticles) {
|
|
|
ContentHisPublishArticle article = new ContentHisPublishArticle();
|
|
|
BeanUtils.copyProperties(hisArticle, article);
|
|
|
- AvgReadDTO dto = accountIndexAvgViewCountService.getAvgReadDto(hisArticle.getGhId() + "_" + hisArticle.getItemIndex());
|
|
|
+ article.setViewCount(hisArticle.getShowViewCount());
|
|
|
+ article.setArticleDetailInfoList(articleDetailInfoMap.get(hisArticle.getWxSn()));
|
|
|
+ // 设置账号位置阅读均值
|
|
|
int avgViewCount = 0;
|
|
|
- if (Objects.nonNull(dto)) {
|
|
|
+ Map<String, AccountAvgInfo> indexMap = accountAvgInfoIndexMap.get(hisArticle.getGhId());
|
|
|
+ if (Objects.nonNull(indexMap) && indexMap.containsKey(hisArticle.getItemIndex().toString())) {
|
|
|
article.setInnerAccount(true);
|
|
|
- avgViewCount = (int) dto.getReadAvg();
|
|
|
+ avgViewCount = Optional.ofNullable(indexMap.get(hisArticle.getItemIndex().toString()).getReadAvg())
|
|
|
+ .orElse(0.0).intValue();
|
|
|
}
|
|
|
article.setAvgViewCount(avgViewCount);
|
|
|
- if (Objects.nonNull(article.getAvgViewCount()) && article.getAvgViewCount() > 0) {
|
|
|
- article.setViewCountRate((article.getShowViewCount() * 1.0) / article.getAvgViewCount());
|
|
|
+ if (Objects.nonNull(article.getAvgViewCount()) && article.getAvgViewCount() > 0
|
|
|
+ && Objects.nonNull(article.getViewCount())) {
|
|
|
+ article.setViewCountRate((article.getViewCount() * 1.0) / article.getAvgViewCount());
|
|
|
+ }
|
|
|
+ // 设置头条阅读均值
|
|
|
+ AccountAvgInfo firstIndexAvgInfo = firstIndexAvgInfoMap.get(hisArticle.getGhId());
|
|
|
+ if (Objects.nonNull(firstIndexAvgInfo)) {
|
|
|
+ article.setFans(firstIndexAvgInfo.getFans());
|
|
|
+ }
|
|
|
+ Map<String, Article> firstIndexArticle = firstIndexHisArticleMap.get(hisArticle.getGhId());
|
|
|
+ if (Objects.nonNull(firstIndexArticle) && firstIndexArticle.containsKey(hisArticle.getAppMsgId())) {
|
|
|
+ Article firstArticle = firstIndexArticle.get(hisArticle.getAppMsgId());
|
|
|
+ article.setFirstViewCount(firstArticle.getShowViewCount());
|
|
|
+ if (Objects.nonNull(firstIndexAvgInfo) && Objects.nonNull(firstIndexAvgInfo.getReadAvg())
|
|
|
+ && firstIndexAvgInfo.getReadAvg() > 0 && Objects.nonNull(firstArticle.getShowViewCount())) {
|
|
|
+ article.setFirstViewCountRate((firstArticle.getShowViewCount() * 1.0) / firstIndexAvgInfo.getReadAvg());
|
|
|
+ }
|
|
|
}
|
|
|
content.getHisPublishArticleList().add(article);
|
|
|
}
|
|
|
+ // 设置头条阅读均值
|
|
|
+ setT0Data(content);
|
|
|
}
|
|
|
log.info("setTitleAvgViewCount cost:{}", System.currentTimeMillis() - start);
|
|
|
}
|
|
|
|
|
|
+ private void setT0Data(Content content) {
|
|
|
+ if (CollectionUtils.isEmpty(content.getHisPublishArticleList())) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ int firstLevelSize = 0;
|
|
|
+ int fissionSum = 0;
|
|
|
+ int fansSum = 0;
|
|
|
+ int avgReadCountSum = 0;
|
|
|
+ Double t0FissionByFansSum = 0.0;
|
|
|
+ Double t0FissionByReadAvgSum = 0.0;
|
|
|
+ for (ContentHisPublishArticle article : content.getHisPublishArticleList()) {
|
|
|
+ if (article.getItemIndex() != 1
|
|
|
+ || CollectionUtils.isEmpty(article.getArticleDetailInfoList())) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ int sumFission = 0;
|
|
|
+ Date minDate = article.getArticleDetailInfoList().stream().map(ArticleDetailInfo::getRecallDt).min(Date::compareTo).orElse(new Date());
|
|
|
+ for (ArticleDetailInfo articleDetailInfo : article.getArticleDetailInfoList()) {
|
|
|
+ if (articleDetailInfo.getRecallDt().equals(minDate) && Objects.nonNull(articleDetailInfo.getFission0())) {
|
|
|
+ sumFission += articleDetailInfo.getFission0();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (sumFission == 0) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ article.setT0FissionSum(sumFission);
|
|
|
+ if (article.getFans() > 0) {
|
|
|
+ article.setT0FissionByFans(sumFission * 1.0 / article.getFans());
|
|
|
+ fansSum += (int) article.getFans();
|
|
|
+ t0FissionByFansSum += article.getT0FissionByFans();
|
|
|
+ }
|
|
|
+ if (Objects.nonNull(article.getAvgViewCount()) && article.getAvgViewCount() > 0) {
|
|
|
+ article.setT0FissionByReadAvg(sumFission * 1.0 / article.getAvgViewCount());
|
|
|
+ avgReadCountSum += article.getAvgViewCount();
|
|
|
+ t0FissionByReadAvgSum += article.getT0FissionByReadAvg();
|
|
|
+ }
|
|
|
+ fissionSum += sumFission;
|
|
|
+ firstLevelSize++;
|
|
|
+ }
|
|
|
+ if (firstLevelSize > 0) {
|
|
|
+ content.setT0FissionByFansMean(t0FissionByFansSum / firstLevelSize);
|
|
|
+ content.setT0FissionByReadAvgMean(t0FissionByReadAvgSum / firstLevelSize);
|
|
|
+ if (fansSum > 0) {
|
|
|
+ content.setT0FissionByFansSumAvg(fissionSum * 1.0 / fansSum);
|
|
|
+ }
|
|
|
+ if (avgReadCountSum > 0) {
|
|
|
+ content.setT0FissionByReadAvgSumAvg(fissionSum * 1.0 / avgReadCountSum);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
public static void main(String[] args) {
|
|
|
String url = "http://mp.weixin.qq.com/s?__biz=Mzg2ODk4MTg3OQ==&mid=2247488306&idx=1&sn=93ebadc5bc7161a0dee48355013d3bc4&chksm=cfb6c1cb2bcdd80dd16d5d604d741a0019ae791125265a042d26100ba21ddb9e5c643ecc2264&scene=126&sessionid=1679649075#rd";
|
|
|
String md5 = generateArticleUniqueMd5(url);
|