|
@@ -1,17 +1,15 @@
|
|
|
package com.tzld.longarticle.recommend.server.service.recommend.recall;
|
|
|
|
|
|
+import com.alibaba.fastjson.JSONArray;
|
|
|
import com.alibaba.fastjson.JSONObject;
|
|
|
import com.google.common.collect.Lists;
|
|
|
import com.tzld.longarticle.recommend.server.common.CostMonitor;
|
|
|
import com.tzld.longarticle.recommend.server.common.ThreadPoolFactory;
|
|
|
import com.tzld.longarticle.recommend.server.common.enums.StatusEnum;
|
|
|
import com.tzld.longarticle.recommend.server.common.enums.aigc.PublishPlanInputSourceTypesEnum;
|
|
|
-import com.tzld.longarticle.recommend.server.common.enums.recommend.ArticleCategoryStatusEnum;
|
|
|
-import com.tzld.longarticle.recommend.server.common.enums.recommend.ArticlePoolPromotionSourceStatusEnum;
|
|
|
-import com.tzld.longarticle.recommend.server.common.enums.recommend.ArticleTypeEnum;
|
|
|
-import com.tzld.longarticle.recommend.server.common.enums.recommend.ContentPoolEnum;
|
|
|
-import com.tzld.longarticle.recommend.server.common.enums.recommend.FeishuRobotIdEnum;
|
|
|
+import com.tzld.longarticle.recommend.server.common.enums.recommend.*;
|
|
|
import com.tzld.longarticle.recommend.server.mapper.crawler.CrawlerBaseMapper;
|
|
|
+import com.tzld.longarticle.recommend.server.mapper.longArticle.LongArticleBaseMapper;
|
|
|
import com.tzld.longarticle.recommend.server.model.dto.Content;
|
|
|
import com.tzld.longarticle.recommend.server.model.dto.ContentHisPublishArticle;
|
|
|
import com.tzld.longarticle.recommend.server.model.entity.aigc.CrawlerMetaArticle;
|
|
@@ -20,10 +18,8 @@ import com.tzld.longarticle.recommend.server.model.entity.crawler.AccountAvgInfo
|
|
|
import com.tzld.longarticle.recommend.server.model.entity.crawler.AccountCorrelation;
|
|
|
import com.tzld.longarticle.recommend.server.model.entity.crawler.Article;
|
|
|
import com.tzld.longarticle.recommend.server.model.entity.crawler.ArticleDetailInfo;
|
|
|
-import com.tzld.longarticle.recommend.server.model.entity.longArticle.AccountCategory;
|
|
|
-import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticleCategory;
|
|
|
-import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticlePoolPromotionSource;
|
|
|
-import com.tzld.longarticle.recommend.server.model.entity.longArticle.PublishSingleVideoSource;
|
|
|
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.*;
|
|
|
+import com.tzld.longarticle.recommend.server.model.param.TitleHisCacheParam;
|
|
|
import com.tzld.longarticle.recommend.server.remote.aigc.AIGCWaitingPublishContentService;
|
|
|
import com.tzld.longarticle.recommend.server.repository.aigc.CrawlerMetaArticleRepository;
|
|
|
import com.tzld.longarticle.recommend.server.repository.aigc.PublishContentRepository;
|
|
@@ -31,10 +27,7 @@ import com.tzld.longarticle.recommend.server.repository.crawler.AccountAvgInfoRe
|
|
|
import com.tzld.longarticle.recommend.server.repository.crawler.AccountCorrelationRepository;
|
|
|
import com.tzld.longarticle.recommend.server.repository.crawler.ArticleDetailInfoRepository;
|
|
|
import com.tzld.longarticle.recommend.server.repository.crawler.ArticleRepository;
|
|
|
-import com.tzld.longarticle.recommend.server.repository.longArticle.AccountCategoryRepository;
|
|
|
-import com.tzld.longarticle.recommend.server.repository.longArticle.ArticleCategoryRepository;
|
|
|
-import com.tzld.longarticle.recommend.server.repository.longArticle.ArticlePoolPromotionSourceRepository;
|
|
|
-import com.tzld.longarticle.recommend.server.repository.longArticle.PublishSingleVideoSourceRepository;
|
|
|
+import com.tzld.longarticle.recommend.server.repository.longArticle.*;
|
|
|
import com.tzld.longarticle.recommend.server.service.recommend.config.AccountIndexAvgViewCountService;
|
|
|
import com.tzld.longarticle.recommend.server.service.recommend.recall.strategy.DefaultRecallStrategy;
|
|
|
import com.tzld.longarticle.recommend.server.service.recommend.score.ScoreStrategy;
|
|
@@ -96,6 +89,10 @@ public class RecallService implements ApplicationContextAware {
|
|
|
AccountCategoryRepository accountCategoryRepository;
|
|
|
@Autowired
|
|
|
PublishSingleVideoSourceRepository publishSingleVideoSourceRepository;
|
|
|
+ @Autowired
|
|
|
+ ArticleTitleHisCacheRepository articleTitleHisCacheRepository;
|
|
|
+ @Autowired
|
|
|
+ LongArticleBaseMapper longArticleBaseMapper;
|
|
|
|
|
|
private final Map<String, RecallStrategy> strategyMap = new HashMap<>();
|
|
|
private ApplicationContext applicationContext;
|
|
@@ -182,10 +179,7 @@ public class RecallService implements ApplicationContextAware {
|
|
|
+ "账号名称: " + param.getAccountName());
|
|
|
return content;
|
|
|
}
|
|
|
- // category 查询
|
|
|
- setContentCategory(content);
|
|
|
long t3 = System.currentTimeMillis();
|
|
|
- CostMonitor.logCost("Recall", "GetCategory", t3 - t2);
|
|
|
// 标题历史均值
|
|
|
setTitleAvgViewCount(content, param.getGhId(), param.getType());
|
|
|
long t4 = System.currentTimeMillis();
|
|
@@ -203,10 +197,10 @@ public class RecallService implements ApplicationContextAware {
|
|
|
return;
|
|
|
}
|
|
|
Map<String, Content> contentMap = contentList.stream()
|
|
|
- .collect(Collectors.toMap(Content::getSourceId, Function.identity()));
|
|
|
+ .collect(Collectors.toMap(Content::getSourceId, Function.identity()));
|
|
|
List<PublishSingleVideoSource> sourceList = publishSingleVideoSourceRepository.getByContentTraceIdIn(contentTraceIds);
|
|
|
Map<String, PublishSingleVideoSource> sourceMap = sourceList.stream()
|
|
|
- .collect(Collectors.toMap(PublishSingleVideoSource::getContentTraceId, Function.identity()));
|
|
|
+ .collect(Collectors.toMap(PublishSingleVideoSource::getContentTraceId, Function.identity()));
|
|
|
for (String contentTraceId : contentTraceIds) {
|
|
|
Content content = contentMap.get(contentTraceId);
|
|
|
PublishSingleVideoSource source = sourceMap.get(contentTraceId);
|
|
@@ -219,53 +213,6 @@ public class RecallService implements ApplicationContextAware {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- public void setContentCategory(List<Content> contentList) {
|
|
|
- List<String> channelContentIds = contentList.stream().map(Content::getCrawlerChannelContentId)
|
|
|
- .collect(Collectors.toList());
|
|
|
- // 查询晋升rootProduceContentId
|
|
|
- List<ArticlePoolPromotionSource> sourceList = articlePoolPromotionSourceRepository
|
|
|
- .getByChannelContentIdInAndStatusAndDeleted(channelContentIds,
|
|
|
- ArticlePoolPromotionSourceStatusEnum.FINISH.getCode(), 0);
|
|
|
- Map<String, ArticlePoolPromotionSource> sourceMap = sourceList.stream()
|
|
|
- .collect(Collectors.toMap(ArticlePoolPromotionSource::getChannelContentId, Function.identity()));
|
|
|
- List<String> publishContentIds = sourceList.stream().
|
|
|
- map(ArticlePoolPromotionSource::getRootPublishContentId).collect(Collectors.toList());
|
|
|
- List<PublishContent> publishContentList = publishContentRepository.getByIdIn(publishContentIds);
|
|
|
- Map<String, PublishContent> publishContentMap = publishContentList.stream()
|
|
|
- .collect(Collectors.toMap(PublishContent::getId, Function.identity()));
|
|
|
- // 根据produceContentId查询category
|
|
|
- List<ArticleCategory> articleCategoryList = articleCategoryRepository.getByStatus(ArticleCategoryStatusEnum.SUCCESS.getCode());
|
|
|
- Map<String, ArticleCategory> categoryMap = articleCategoryList.stream()
|
|
|
- .collect(Collectors.toMap(ArticleCategory::getProduceContentId, Function.identity()));
|
|
|
- Map<String, ArticleCategory> coldStartCategoryMap = articleCategoryList.stream()
|
|
|
- .collect(Collectors.toMap(ArticleCategory::getChannelContentId, Function.identity(), (a, b) -> a));
|
|
|
- Map<String, ArticleCategory> titleCategoryMap = articleCategoryList.stream()
|
|
|
- .collect(Collectors.toMap(ArticleCategory::getTitleMd5, Function.identity(), (a, b) -> a));
|
|
|
- for (Content content : contentList) {
|
|
|
- ArticlePoolPromotionSource source = sourceMap.get(content.getCrawlerChannelContentId());
|
|
|
- ArticleCategory category = null;
|
|
|
- if (Objects.nonNull(source) && Objects.nonNull(source.getRootProduceContentId())) {
|
|
|
- category = categoryMap.get(source.getRootProduceContentId());
|
|
|
- PublishContent publishContent = publishContentMap.get(source.getRootPublishContentId());
|
|
|
- if (Objects.nonNull(publishContent)) {
|
|
|
- content.setRootPublishTimestamp(publishContent.getPublishTimestamp());
|
|
|
- }
|
|
|
- }
|
|
|
- if (Objects.isNull(category)) {
|
|
|
- category = coldStartCategoryMap.get(content.getCrawlerChannelContentId());
|
|
|
- }
|
|
|
- if (Objects.isNull(category)) {
|
|
|
- String titleMd5 = Md5Util.encoderByMd5(content.getTitle());
|
|
|
- category = titleCategoryMap.get(titleMd5);
|
|
|
- }
|
|
|
- if (Objects.nonNull(category)) {
|
|
|
- content.setCategory(Collections.singletonList(category.getCategory()));
|
|
|
- continue;
|
|
|
- }
|
|
|
-// log.error("setContentCategory NullError channelContentId:{}", content.getCrawlerChannelContentId());
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
private List<CrawlerMetaArticle> getByUniqueIndexIn(List<String> md5List) {
|
|
|
if (CollectionUtils.isEmpty(md5List)) {
|
|
|
return new ArrayList<>();
|
|
@@ -300,15 +247,109 @@ public class RecallService implements ApplicationContextAware {
|
|
|
|
|
|
public void setTitleAvgViewCount(List<Content> contentList, String ghId, String type) {
|
|
|
long start = System.currentTimeMillis();
|
|
|
+ contentList.forEach(content -> content.setTitleMd5(Md5Util.encoderByMd5(content.getTitle())));
|
|
|
+ List<String> sourceIdList = contentList.stream().map(Content::getSourceId).distinct().collect(Collectors.toList());
|
|
|
+ Map<String, Content> sourceIdToContentMap = contentList.stream().collect(
|
|
|
+ Collectors.toMap(Content::getSourceId, Function.identity(), (o1, o2) -> o2));
|
|
|
+ // 根据sourceId查询数据库获取数据
|
|
|
+ List<ArticleTitleHisCache> articleTitleHisCacheList = new ArrayList<>(sourceIdList.size());
|
|
|
+ for (List<String> partition : Lists.partition(sourceIdList, 1000)) {
|
|
|
+ articleTitleHisCacheList.addAll(articleTitleHisCacheRepository.getBySourceIdInAndType(partition, type));
|
|
|
+ }
|
|
|
+ Map<String, ArticleTitleHisCache> articleTitleHisCacheMap = articleTitleHisCacheList.stream()
|
|
|
+ .collect(Collectors.toMap(ArticleTitleHisCache::getSourceId, Function.identity()));
|
|
|
+ // sourceId 进行过滤 排除缓存中数据 重新走下方查询
|
|
|
+ sourceIdList.removeIf(articleTitleHisCacheMap::containsKey);
|
|
|
// 获取账号相关性
|
|
|
List<AccountCorrelation> accountCorrelationList = accountCorrelationRepository.findByGhIdAndStatus(ghId, 1);
|
|
|
Map<String, Double> accountCorrelationMap = accountCorrelationList.stream().collect(
|
|
|
Collectors.toMap(AccountCorrelation::getRelGhId, AccountCorrelation::getCorrelation));
|
|
|
+ List<TitleHisCacheParam> paramList = sourceIdList.stream().map(sourceId -> {
|
|
|
+ Content content = sourceIdToContentMap.get(sourceId);
|
|
|
+ TitleHisCacheParam cacheParam = new TitleHisCacheParam();
|
|
|
+ cacheParam.setSourceId(sourceId);
|
|
|
+ cacheParam.setTitleMd5(content.getTitleMd5());
|
|
|
+ cacheParam.setTitle(content.getTitle());
|
|
|
+ cacheParam.setCrawlerTitle(content.getCrawlerTitle());
|
|
|
+ cacheParam.setCrawlerChannelContentId(content.getCrawlerChannelContentId());
|
|
|
+ cacheParam.setCategory(content.getCategory());
|
|
|
+ return cacheParam;
|
|
|
+ }).collect(Collectors.toList());
|
|
|
+ Map<String, Content> articlesWithHistory = getArticleWithHistory(paramList, type);
|
|
|
+ List<Content> newCacheSaveList = new ArrayList<>();
|
|
|
+ Set<String> newCacheSourceIdSet = new HashSet<>();
|
|
|
+ for (Content content : contentList) {
|
|
|
+ if (articleTitleHisCacheMap.containsKey(content.getSourceId())) {
|
|
|
+ ArticleTitleHisCache cache = articleTitleHisCacheMap.get(content.getSourceId());
|
|
|
+ List<ContentHisPublishArticle> hisPublishArticleList =
|
|
|
+ JSONArray.parseArray(cache.getHisPublishArticleList(), ContentHisPublishArticle.class);
|
|
|
+ for (ContentHisPublishArticle article : hisPublishArticleList) {
|
|
|
+ article.setCorrelation(Optional.ofNullable(accountCorrelationMap.get(article.getGhId())).orElse(0.0));
|
|
|
+ }
|
|
|
+ if (StringUtils.hasText(cache.getCategory())) {
|
|
|
+ content.setCategory(JSONArray.parseArray(cache.getCategory(), String.class));
|
|
|
+ }
|
|
|
+ content.setRootPublishTimestamp(cache.getRootPublishTimestamp());
|
|
|
+ content.setHisPublishArticleList(hisPublishArticleList);
|
|
|
+ setT0Data(content);
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ if (articlesWithHistory.containsKey(content.getSourceId())) {
|
|
|
+ Content articleWithHistory = articlesWithHistory.get(content.getSourceId());
|
|
|
+ content.setHisPublishArticleList(articleWithHistory.getHisPublishArticleList());
|
|
|
+ if (CollectionUtils.isNotEmpty(articleWithHistory.getCategory())) {
|
|
|
+ content.setCategory(articleWithHistory.getCategory());
|
|
|
+ }
|
|
|
+ content.setRootPublishTimestamp(articleWithHistory.getRootPublishTimestamp());
|
|
|
+ for (ContentHisPublishArticle article : content.getHisPublishArticleList()) {
|
|
|
+ article.setCorrelation(Optional.ofNullable(accountCorrelationMap.get(article.getGhId())).orElse(0.0));
|
|
|
+ }
|
|
|
+ setT0Data(content);
|
|
|
+ }
|
|
|
+ if (!newCacheSourceIdSet.contains(content.getSourceId())) {
|
|
|
+ newCacheSaveList.add(content);
|
|
|
+ newCacheSourceIdSet.add(content.getSourceId());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ // 写入缓存
|
|
|
+ saveArticleTitleHisCache(newCacheSaveList, type);
|
|
|
+ log.info("setTitleAvgViewCount cost:{}", System.currentTimeMillis() - start);
|
|
|
+ }
|
|
|
+
|
|
|
+ private void saveArticleTitleHisCache(List<Content> saveList, String type) {
|
|
|
+ if (CollectionUtils.isEmpty(saveList)) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ List<ArticleTitleHisCache> cacheList = new ArrayList<>();
|
|
|
+ try {
|
|
|
+ for (Content content : saveList) {
|
|
|
+ if (CollectionUtils.isEmpty(content.getHisPublishArticleList())) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ ArticleTitleHisCache cache = new ArticleTitleHisCache();
|
|
|
+ BeanUtils.copyProperties(content, cache);
|
|
|
+ cache.setType(type);
|
|
|
+ cache.setChannelContentId(content.getCrawlerChannelContentId());
|
|
|
+ if (CollectionUtils.isNotEmpty(content.getCategory())) {
|
|
|
+ cache.setCategory(JSONObject.toJSONString(content.getCategory()));
|
|
|
+ }
|
|
|
+ cache.setRootPublishTimestamp(content.getRootPublishTimestamp());
|
|
|
+ cache.setHisPublishArticleList(JSONObject.toJSONString(content.getHisPublishArticleList()));
|
|
|
+ cache.setCreateTimestamp(System.currentTimeMillis());
|
|
|
+ cacheList.add(cache);
|
|
|
+ }
|
|
|
+ if (CollectionUtils.isEmpty(cacheList)) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ longArticleBaseMapper.batchInsertArticleTitleHisCache(cacheList);
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error("saveArticleTitleHisCache error:{}", e.getMessage());
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
- Set<String> titleMd5List = contentList.stream().map(o -> Md5Util.encoderByMd5(o.getTitle())).collect(Collectors.toSet());
|
|
|
-// Set<String> titleList = contentList.stream().map(Content::getTitle).collect(Collectors.toSet());
|
|
|
-// Set<String> crawlerTitleList = contentList.stream().map(Content::getCrawlerTitle).collect(Collectors.toSet());
|
|
|
-// titleList.addAll(crawlerTitleList);
|
|
|
+ public Map<String, Content> getArticleWithHistory(List<TitleHisCacheParam> paramList, String type) {
|
|
|
+ Map<String, Content> result = new HashMap<>();
|
|
|
+ List<String> titleMd5List = paramList.stream().map(TitleHisCacheParam::getTitleMd5).collect(Collectors.toList());
|
|
|
// 获取历史已发布文章
|
|
|
List<Article> hisArticleList = new ArrayList<>();
|
|
|
List<List<String>> titleMd5Partition = Lists.partition(new ArrayList<>(titleMd5List), 1000);
|
|
@@ -345,13 +386,58 @@ public class RecallService implements ApplicationContextAware {
|
|
|
List<AccountCategory> accountCategoryList = accountCategoryRepository.getByStatus(StatusEnum.ONE.getCode());
|
|
|
Map<String, JSONObject> accountCategoryMap = accountCategoryList.stream().filter(o -> StringUtils.hasText(o.getCategoryMap()))
|
|
|
.collect(Collectors.toMap(AccountCategory::getGhId, o -> JSONObject.parseObject(o.getCategoryMap())));
|
|
|
- for (Content content : contentList) {
|
|
|
+
|
|
|
+ // 获取品类
|
|
|
+ List<String> channelContentIds = paramList.stream().map(TitleHisCacheParam::getCrawlerChannelContentId)
|
|
|
+ .collect(Collectors.toList());
|
|
|
+ // 查询晋升rootProduceContentId
|
|
|
+ List<ArticlePoolPromotionSource> sourceList = articlePoolPromotionSourceRepository
|
|
|
+ .getByChannelContentIdInAndStatusAndDeleted(channelContentIds,
|
|
|
+ ArticlePoolPromotionSourceStatusEnum.FINISH.getCode(), 0);
|
|
|
+ Map<String, ArticlePoolPromotionSource> sourceMap = sourceList.stream()
|
|
|
+ .collect(Collectors.toMap(ArticlePoolPromotionSource::getChannelContentId, Function.identity()));
|
|
|
+ List<String> publishContentIds = sourceList.stream().
|
|
|
+ map(ArticlePoolPromotionSource::getRootPublishContentId).collect(Collectors.toList());
|
|
|
+ List<PublishContent> publishContentList = publishContentRepository.getByIdIn(publishContentIds);
|
|
|
+ Map<String, PublishContent> publishContentMap = publishContentList.stream()
|
|
|
+ .collect(Collectors.toMap(PublishContent::getId, Function.identity()));
|
|
|
+ // 根据produceContentId查询category
|
|
|
+ List<ArticleCategory> articleCategoryList = articleCategoryRepository.getByStatus(ArticleCategoryStatusEnum.SUCCESS.getCode());
|
|
|
+ Map<String, ArticleCategory> categoryMap = articleCategoryList.stream()
|
|
|
+ .collect(Collectors.toMap(ArticleCategory::getProduceContentId, Function.identity()));
|
|
|
+ Map<String, ArticleCategory> coldStartCategoryMap = articleCategoryList.stream()
|
|
|
+ .collect(Collectors.toMap(ArticleCategory::getChannelContentId, Function.identity(), (a, b) -> a));
|
|
|
+ Map<String, ArticleCategory> titleCategoryMap = articleCategoryList.stream()
|
|
|
+ .collect(Collectors.toMap(ArticleCategory::getTitleMd5, Function.identity(), (a, b) -> a));
|
|
|
+
|
|
|
+ for (TitleHisCacheParam cacheParam : paramList) {
|
|
|
+ Content res = new Content();
|
|
|
+ // 设置品类
|
|
|
+ ArticleCategory category = categoryMap.get(cacheParam.getSourceId());
|
|
|
+ if (Objects.isNull(category)) {
|
|
|
+ category = coldStartCategoryMap.get(cacheParam.getCrawlerChannelContentId());
|
|
|
+ }
|
|
|
+ if (Objects.isNull(category)) {
|
|
|
+ category = titleCategoryMap.get(cacheParam.getTitleMd5());
|
|
|
+ }
|
|
|
+ if (Objects.nonNull(category)) {
|
|
|
+ res.setCategory(Collections.singletonList(category.getCategory()));
|
|
|
+ }
|
|
|
+ // 溯源查找源发布时间
|
|
|
+ ArticlePoolPromotionSource source = sourceMap.get(cacheParam.getCrawlerChannelContentId());
|
|
|
+ if (Objects.nonNull(source) && Objects.nonNull(source.getRootProduceContentId())) {
|
|
|
+ PublishContent publishContent = publishContentMap.get(source.getRootPublishContentId());
|
|
|
+ if (Objects.nonNull(publishContent)) {
|
|
|
+ res.setRootPublishTimestamp(publishContent.getPublishTimestamp());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ // 设置历史表现
|
|
|
List<Article> hisArticles = new ArrayList<>();
|
|
|
- Map<Integer, List<Article>> indexArticleMap = map.get(content.getTitle());
|
|
|
+ Map<Integer, List<Article>> indexArticleMap = map.get(cacheParam.getTitle());
|
|
|
if (Objects.isNull(indexArticleMap)) {
|
|
|
- indexArticleMap = map.get(content.getCrawlerTitle());
|
|
|
- } else if (!content.getTitle().equals(content.getCrawlerTitle())) {
|
|
|
- Map<Integer, List<Article>> crawlerTitleIndexArticleMap = map.get(content.getCrawlerTitle());
|
|
|
+ indexArticleMap = map.get(cacheParam.getCrawlerTitle());
|
|
|
+ } else if (!cacheParam.getTitle().equals(cacheParam.getCrawlerTitle())) {
|
|
|
+ Map<Integer, List<Article>> crawlerTitleIndexArticleMap = map.get(cacheParam.getCrawlerTitle());
|
|
|
if (Objects.nonNull(crawlerTitleIndexArticleMap)) {
|
|
|
for (Map.Entry<Integer, List<Article>> entry : crawlerTitleIndexArticleMap.entrySet()) {
|
|
|
if (indexArticleMap.containsKey(entry.getKey())) {
|
|
@@ -370,17 +456,17 @@ public class RecallService implements ApplicationContextAware {
|
|
|
hisArticles.addAll(indexArticleList);
|
|
|
}
|
|
|
}
|
|
|
- content.setHisPublishArticleList(new ArrayList<>());
|
|
|
+ res.setHisPublishArticleList(new ArrayList<>());
|
|
|
for (Article hisArticle : hisArticles) {
|
|
|
if (ScoreStrategy.hisContentLateFilter(hisArticle.getPublishTimestamp())) {
|
|
|
continue;
|
|
|
}
|
|
|
// 历史表现 文章品类如果与历史发布账号负相关 则过滤,不计算该历史发布表现
|
|
|
JSONObject categoryWeightMap = accountCategoryMap.get(hisArticle.getGhId());
|
|
|
- if (Objects.nonNull(categoryWeightMap) && CollectionUtils.isNotEmpty(content.getCategory())) {
|
|
|
- String category = content.getCategory().get(0);
|
|
|
- if (categoryWeightMap.containsKey(category)) {
|
|
|
- double weight = categoryWeightMap.getDoubleValue(category);
|
|
|
+ if (Objects.nonNull(categoryWeightMap) && CollectionUtils.isNotEmpty(cacheParam.getCategory())) {
|
|
|
+ String hisCategory = cacheParam.getCategory().get(0);
|
|
|
+ if (categoryWeightMap.containsKey(hisCategory)) {
|
|
|
+ double weight = categoryWeightMap.getDoubleValue(hisCategory);
|
|
|
if (weight < 0) {
|
|
|
continue;
|
|
|
}
|
|
@@ -403,15 +489,18 @@ public class RecallService implements ApplicationContextAware {
|
|
|
article.setInnerAccount(true);
|
|
|
avgViewCount = Optional.ofNullable(indexMap.get(hisArticle.getItemIndex().toString()).getReadAvg())
|
|
|
.orElse(0.0).intValue();
|
|
|
-// } else {
|
|
|
-// if (ArticleTypeEnum.QUNFA.getVal().equals(type)) {
|
|
|
+ } else {
|
|
|
+ if (ArticleTypeEnum.QUNFA.getVal().equals(type)) {
|
|
|
+ log.error("历史表现阅读均值获取失败 ghId:{} accountName:{} date:{} index:{}",
|
|
|
+ hisArticle.getGhId(), hisArticle.getAccountName(), hisPublishDate,
|
|
|
+ hisArticle.getItemIndex());
|
|
|
// FeishuMessageSender.sendWebHookMessage(FeishuRobotIdEnum.RECOMMEND.getRobotId(),
|
|
|
// "历史表现阅读均值获取失败\n"
|
|
|
// + "ghId: " + hisArticle.getGhId() + "\n"
|
|
|
// + "账号名称: " + hisArticle.getAccountName() + "\n"
|
|
|
// + "日期: " + hisPublishDate + "\n"
|
|
|
// + "位置: " + hisArticle.getItemIndex());
|
|
|
-// }
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
article.setAvgViewCount(avgViewCount);
|
|
@@ -437,13 +526,12 @@ public class RecallService implements ApplicationContextAware {
|
|
|
article.setFirstViewCountRate((firstArticle.getShowViewCount() * 1.0) / firstIndexAvgInfo.getReadAvg());
|
|
|
}
|
|
|
}
|
|
|
- article.setCorrelation(Optional.ofNullable(accountCorrelationMap.get(article.getGhId())).orElse(0.0));
|
|
|
- content.getHisPublishArticleList().add(article);
|
|
|
+ res.getHisPublishArticleList().add(article);
|
|
|
}
|
|
|
// 设置头条阅读均值
|
|
|
- setT0Data(content);
|
|
|
+ result.put(cacheParam.getSourceId(), res);
|
|
|
}
|
|
|
- log.info("setTitleAvgViewCount cost:{}", System.currentTimeMillis() - start);
|
|
|
+ return result;
|
|
|
}
|
|
|
|
|
|
private void setT0Data(Content content) {
|
|
@@ -465,14 +553,17 @@ public class RecallService implements ApplicationContextAware {
|
|
|
if (CollectionUtils.isEmpty(article.getArticleDetailInfoList())) {
|
|
|
// 仅判断7.12以后发布文章
|
|
|
if (article.getPublishTimestamp() > 1720713600 && contentHisFeishuEnable) {
|
|
|
- FeishuMessageSender.sendWebHookMessage(FeishuRobotIdEnum.RECOMMEND.getRobotId(),
|
|
|
- "历史表现裂变特征获取失败\n"
|
|
|
- + "ghId: " + article.getGhId() + "\n"
|
|
|
- + "账号名称: " + article.getAccountName() + "\n"
|
|
|
- + "位置: " + article.getItemIndex() + "\n"
|
|
|
- + "标题: " + article.getTitle() + "\n"
|
|
|
- + "发布时间: " + DateUtils.timestampToYMDStr(article.getPublishTimestamp(), "yyyyMMdd") + "\n"
|
|
|
- + "wxsn: " + article.getWxSn());
|
|
|
+ log.error("历史表现裂变特征获取失败 ghId:{} accountName:{} itemIndex:{} title:{} date:{} wxsn:{}",
|
|
|
+ article.getGhId(), article.getAccountName(), article.getItemIndex(), article.getTitle(),
|
|
|
+ DateUtils.timestampToYMDStr(article.getPublishTimestamp(), "yyyyMMdd"), article.getWxSn());
|
|
|
+// FeishuMessageSender.sendWebHookMessage(FeishuRobotIdEnum.RECOMMEND.getRobotId(),
|
|
|
+// "历史表现裂变特征获取失败\n"
|
|
|
+// + "ghId: " + article.getGhId() + "\n"
|
|
|
+// + "账号名称: " + article.getAccountName() + "\n"
|
|
|
+// + "位置: " + article.getItemIndex() + "\n"
|
|
|
+// + "标题: " + article.getTitle() + "\n"
|
|
|
+// + "发布时间: " + DateUtils.timestampToYMDStr(article.getPublishTimestamp(), "yyyyMMdd") + "\n"
|
|
|
+// + "wxsn: " + article.getWxSn());
|
|
|
}
|
|
|
continue;
|
|
|
}
|