|
@@ -7,12 +7,9 @@ import com.tzld.longarticle.recommend.server.common.CostMonitor;
|
|
|
import com.tzld.longarticle.recommend.server.common.ThreadPoolFactory;
|
|
|
import com.tzld.longarticle.recommend.server.common.enums.StatusEnum;
|
|
|
import com.tzld.longarticle.recommend.server.common.enums.aigc.PublishPlanInputSourceTypesEnum;
|
|
|
-import com.tzld.longarticle.recommend.server.common.enums.recommend.ArticleCategoryStatusEnum;
|
|
|
-import com.tzld.longarticle.recommend.server.common.enums.recommend.ArticlePoolPromotionSourceStatusEnum;
|
|
|
-import com.tzld.longarticle.recommend.server.common.enums.recommend.ArticleTypeEnum;
|
|
|
-import com.tzld.longarticle.recommend.server.common.enums.recommend.ContentPoolEnum;
|
|
|
-import com.tzld.longarticle.recommend.server.common.enums.recommend.FeishuRobotIdEnum;
|
|
|
+import com.tzld.longarticle.recommend.server.common.enums.recommend.*;
|
|
|
import com.tzld.longarticle.recommend.server.mapper.crawler.CrawlerBaseMapper;
|
|
|
+import com.tzld.longarticle.recommend.server.mapper.longArticle.LongArticleBaseMapper;
|
|
|
import com.tzld.longarticle.recommend.server.model.dto.Content;
|
|
|
import com.tzld.longarticle.recommend.server.model.dto.ContentHisPublishArticle;
|
|
|
import com.tzld.longarticle.recommend.server.model.entity.aigc.CrawlerMetaArticle;
|
|
@@ -21,11 +18,7 @@ import com.tzld.longarticle.recommend.server.model.entity.crawler.AccountAvgInfo
|
|
|
import com.tzld.longarticle.recommend.server.model.entity.crawler.AccountCorrelation;
|
|
|
import com.tzld.longarticle.recommend.server.model.entity.crawler.Article;
|
|
|
import com.tzld.longarticle.recommend.server.model.entity.crawler.ArticleDetailInfo;
|
|
|
-import com.tzld.longarticle.recommend.server.model.entity.longArticle.AccountCategory;
|
|
|
-import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticleCategory;
|
|
|
-import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticlePoolPromotionSource;
|
|
|
-import com.tzld.longarticle.recommend.server.model.entity.longArticle.PublishSingleVideoSource;
|
|
|
-import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticleTitleHisCache;
|
|
|
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.*;
|
|
|
import com.tzld.longarticle.recommend.server.model.param.TitleHisCacheParam;
|
|
|
import com.tzld.longarticle.recommend.server.remote.aigc.AIGCWaitingPublishContentService;
|
|
|
import com.tzld.longarticle.recommend.server.repository.aigc.CrawlerMetaArticleRepository;
|
|
@@ -34,11 +27,7 @@ import com.tzld.longarticle.recommend.server.repository.crawler.AccountAvgInfoRe
|
|
|
import com.tzld.longarticle.recommend.server.repository.crawler.AccountCorrelationRepository;
|
|
|
import com.tzld.longarticle.recommend.server.repository.crawler.ArticleDetailInfoRepository;
|
|
|
import com.tzld.longarticle.recommend.server.repository.crawler.ArticleRepository;
|
|
|
-import com.tzld.longarticle.recommend.server.repository.longArticle.AccountCategoryRepository;
|
|
|
-import com.tzld.longarticle.recommend.server.repository.longArticle.ArticleCategoryRepository;
|
|
|
-import com.tzld.longarticle.recommend.server.repository.longArticle.ArticlePoolPromotionSourceRepository;
|
|
|
-import com.tzld.longarticle.recommend.server.repository.longArticle.PublishSingleVideoSourceRepository;
|
|
|
-import com.tzld.longarticle.recommend.server.repository.longArticle.ArticleTitleHisCacheRepository;
|
|
|
+import com.tzld.longarticle.recommend.server.repository.longArticle.*;
|
|
|
import com.tzld.longarticle.recommend.server.service.recommend.config.AccountIndexAvgViewCountService;
|
|
|
import com.tzld.longarticle.recommend.server.service.recommend.recall.strategy.DefaultRecallStrategy;
|
|
|
import com.tzld.longarticle.recommend.server.service.recommend.score.ScoreStrategy;
|
|
@@ -102,6 +91,8 @@ public class RecallService implements ApplicationContextAware {
|
|
|
PublishSingleVideoSourceRepository publishSingleVideoSourceRepository;
|
|
|
@Autowired
|
|
|
ArticleTitleHisCacheRepository articleTitleHisCacheRepository;
|
|
|
+ @Autowired
|
|
|
+ LongArticleBaseMapper longArticleBaseMapper;
|
|
|
|
|
|
private final Map<String, RecallStrategy> strategyMap = new HashMap<>();
|
|
|
private ApplicationContext applicationContext;
|
|
@@ -209,10 +200,10 @@ public class RecallService implements ApplicationContextAware {
|
|
|
return;
|
|
|
}
|
|
|
Map<String, Content> contentMap = contentList.stream()
|
|
|
- .collect(Collectors.toMap(Content::getSourceId, Function.identity()));
|
|
|
+ .collect(Collectors.toMap(Content::getSourceId, Function.identity()));
|
|
|
List<PublishSingleVideoSource> sourceList = publishSingleVideoSourceRepository.getByContentTraceIdIn(contentTraceIds);
|
|
|
Map<String, PublishSingleVideoSource> sourceMap = sourceList.stream()
|
|
|
- .collect(Collectors.toMap(PublishSingleVideoSource::getContentTraceId, Function.identity()));
|
|
|
+ .collect(Collectors.toMap(PublishSingleVideoSource::getContentTraceId, Function.identity()));
|
|
|
for (String contentTraceId : contentTraceIds) {
|
|
|
Content content = contentMap.get(contentTraceId);
|
|
|
PublishSingleVideoSource source = sourceMap.get(contentTraceId);
|
|
@@ -306,12 +297,14 @@ public class RecallService implements ApplicationContextAware {
|
|
|
|
|
|
public void setTitleAvgViewCount(List<Content> contentList, String ghId, String type) {
|
|
|
long start = System.currentTimeMillis();
|
|
|
- Set<String> titleMd5List = contentList.stream().map(Content::getTitleMd5).collect(Collectors.toSet());
|
|
|
+ List<String> titleMd5List = contentList.stream().map(Content::getTitleMd5).distinct().collect(Collectors.toList());
|
|
|
Map<String, Content> md5ContentMap = contentList.stream().collect(
|
|
|
Collectors.toMap(Content::getTitleMd5, Function.identity(), (o1, o2) -> o2));
|
|
|
// 根据titleMd5查询数据库获取数据
|
|
|
- List<ArticleTitleHisCache> articleTitleHisCacheList = articleTitleHisCacheRepository
|
|
|
- .getByTitleMd5InAndType(titleMd5List, type);
|
|
|
+ List<ArticleTitleHisCache> articleTitleHisCacheList = new ArrayList<>();
|
|
|
+ for (List<String> partition : Lists.partition(titleMd5List, 1000)) {
|
|
|
+ articleTitleHisCacheList.addAll(articleTitleHisCacheRepository.getByTitleMd5InAndType(partition, type));
|
|
|
+ }
|
|
|
Map<String, ArticleTitleHisCache> articleTitleHisCacheMap = articleTitleHisCacheList.stream()
|
|
|
.collect(Collectors.toMap(ArticleTitleHisCache::getTitleMd5, Function.identity()));
|
|
|
// titleMd5 进行过滤 排除缓存中数据 重新走下方查询
|
|
@@ -330,6 +323,7 @@ public class RecallService implements ApplicationContextAware {
|
|
|
return cacheParam;
|
|
|
}).collect(Collectors.toList());
|
|
|
Map<String, Content> hisArticleCacheMap = getArticleTitleHisCacheMap(paramList, type);
|
|
|
+ List<Content> saveList = new ArrayList<>();
|
|
|
for (Content content : contentList) {
|
|
|
if (articleTitleHisCacheMap.containsKey(content.getTitleMd5())) {
|
|
|
ArticleTitleHisCache cache = articleTitleHisCacheMap.get(content.getTitleMd5());
|
|
@@ -339,12 +333,7 @@ public class RecallService implements ApplicationContextAware {
|
|
|
article.setCorrelation(Optional.ofNullable(accountCorrelationMap.get(article.getGhId())).orElse(0.0));
|
|
|
}
|
|
|
content.setHisPublishArticleList(hisPublishArticleList);
|
|
|
- content.setT0FissionByFansMean(cache.getT0FissionByFansMean());
|
|
|
- content.setT0FissionByReadAvgMean(cache.getT0FissionByReadAvgMean());
|
|
|
- content.setT0FissionByReadAvgCorrelationMean(cache.getT0FissionByReadAvgCorrelationMean());
|
|
|
- content.setT0FissionDeWeightByReadAvgSumAvg(cache.getT0FissionDeWeightByReadAvgSumAvg());
|
|
|
- content.setT0FissionByFansSumAvg(cache.getT0FissionByFansSumAvg());
|
|
|
- content.setT0FissionByReadAvgSumAvg(cache.getT0FissionByReadAvgSumAvg());
|
|
|
+ setT0Data(content);
|
|
|
continue;
|
|
|
}
|
|
|
if (hisArticleCacheMap.containsKey(content.getTitleMd5())) {
|
|
@@ -353,28 +342,42 @@ public class RecallService implements ApplicationContextAware {
|
|
|
for (ContentHisPublishArticle article : content.getHisPublishArticleList()) {
|
|
|
article.setCorrelation(Optional.ofNullable(accountCorrelationMap.get(article.getGhId())).orElse(0.0));
|
|
|
}
|
|
|
- content.setT0FissionByFansMean(cache.getT0FissionByFansMean());
|
|
|
- content.setT0FissionByReadAvgMean(cache.getT0FissionByReadAvgMean());
|
|
|
- content.setT0FissionByReadAvgCorrelationMean(cache.getT0FissionByReadAvgCorrelationMean());
|
|
|
- content.setT0FissionDeWeightByReadAvgSumAvg(cache.getT0FissionDeWeightByReadAvgSumAvg());
|
|
|
- content.setT0FissionByFansSumAvg(cache.getT0FissionByFansSumAvg());
|
|
|
- content.setT0FissionByReadAvgSumAvg(cache.getT0FissionByReadAvgSumAvg());
|
|
|
+ setT0Data(content);
|
|
|
}
|
|
|
- // 写入缓存
|
|
|
- saveArticleTitleHisCache(content);
|
|
|
+ saveList.add(content);
|
|
|
}
|
|
|
+ // 写入缓存
|
|
|
+ saveArticleTitleHisCache(saveList, type);
|
|
|
log.info("setTitleAvgViewCount cost:{}", System.currentTimeMillis() - start);
|
|
|
}
|
|
|
|
|
|
- private void saveArticleTitleHisCache(Content content) {
|
|
|
- if (CollectionUtils.isEmpty(content.getHisPublishArticleList())) {
|
|
|
+ private void saveArticleTitleHisCache(List<Content> saveList, String type) {
|
|
|
+ if (CollectionUtils.isEmpty(saveList)) {
|
|
|
return;
|
|
|
}
|
|
|
- ArticleTitleHisCache cache = new ArticleTitleHisCache();
|
|
|
- BeanUtils.copyProperties(content, cache);
|
|
|
- cache.setHisPublishArticleList(JSONObject.toJSONString(content.getHisPublishArticleList()));
|
|
|
- cache.setCreateTimestamp(System.currentTimeMillis());
|
|
|
- articleTitleHisCacheRepository.save(cache);
|
|
|
+ List<ArticleTitleHisCache> cacheList = new ArrayList<>();
|
|
|
+ try {
|
|
|
+ for (Content content : saveList) {
|
|
|
+ if (CollectionUtils.isEmpty(content.getHisPublishArticleList())) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ ArticleTitleHisCache cache = new ArticleTitleHisCache();
|
|
|
+ BeanUtils.copyProperties(content, cache);
|
|
|
+ cache.setType(type);
|
|
|
+ if (CollectionUtils.isNotEmpty(content.getCategory())) {
|
|
|
+ cache.setCategory(JSONObject.toJSONString(content.getCategory()));
|
|
|
+ }
|
|
|
+ cache.setHisPublishArticleList(JSONObject.toJSONString(content.getHisPublishArticleList()));
|
|
|
+ cache.setCreateTimestamp(System.currentTimeMillis());
|
|
|
+ cacheList.add(cache);
|
|
|
+ }
|
|
|
+ if (CollectionUtils.isEmpty(cacheList)) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ longArticleBaseMapper.batchInsertArticleTitleHisCache(cacheList);
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error("saveArticleTitleHisCache error:{}", e.getMessage());
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
public Map<String, Content> getArticleTitleHisCacheMap(List<TitleHisCacheParam> paramList, String type) {
|
|
@@ -475,15 +478,18 @@ public class RecallService implements ApplicationContextAware {
|
|
|
article.setInnerAccount(true);
|
|
|
avgViewCount = Optional.ofNullable(indexMap.get(hisArticle.getItemIndex().toString()).getReadAvg())
|
|
|
.orElse(0.0).intValue();
|
|
|
-// } else {
|
|
|
-// if (ArticleTypeEnum.QUNFA.getVal().equals(type)) {
|
|
|
+ } else {
|
|
|
+ if (ArticleTypeEnum.QUNFA.getVal().equals(type)) {
|
|
|
+ log.error("历史表现阅读均值获取失败 ghId:{} accountName:{} date:{} index:{}",
|
|
|
+ hisArticle.getGhId(), hisArticle.getAccountName(), hisPublishDate,
|
|
|
+ hisArticle.getItemIndex());
|
|
|
// FeishuMessageSender.sendWebHookMessage(FeishuRobotIdEnum.RECOMMEND.getRobotId(),
|
|
|
// "历史表现阅读均值获取失败\n"
|
|
|
// + "ghId: " + hisArticle.getGhId() + "\n"
|
|
|
// + "账号名称: " + hisArticle.getAccountName() + "\n"
|
|
|
// + "日期: " + hisPublishDate + "\n"
|
|
|
// + "位置: " + hisArticle.getItemIndex());
|
|
|
-// }
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
article.setAvgViewCount(avgViewCount);
|
|
@@ -512,7 +518,6 @@ public class RecallService implements ApplicationContextAware {
|
|
|
res.getHisPublishArticleList().add(article);
|
|
|
}
|
|
|
// 设置头条阅读均值
|
|
|
- setT0Data(res);
|
|
|
result.put(cacheParam.getTitleMd5(), res);
|
|
|
}
|
|
|
return result;
|
|
@@ -537,14 +542,17 @@ public class RecallService implements ApplicationContextAware {
|
|
|
if (CollectionUtils.isEmpty(article.getArticleDetailInfoList())) {
|
|
|
// 仅判断7.12以后发布文章
|
|
|
if (article.getPublishTimestamp() > 1720713600 && contentHisFeishuEnable) {
|
|
|
- FeishuMessageSender.sendWebHookMessage(FeishuRobotIdEnum.RECOMMEND.getRobotId(),
|
|
|
- "历史表现裂变特征获取失败\n"
|
|
|
- + "ghId: " + article.getGhId() + "\n"
|
|
|
- + "账号名称: " + article.getAccountName() + "\n"
|
|
|
- + "位置: " + article.getItemIndex() + "\n"
|
|
|
- + "标题: " + article.getTitle() + "\n"
|
|
|
- + "发布时间: " + DateUtils.timestampToYMDStr(article.getPublishTimestamp(), "yyyyMMdd") + "\n"
|
|
|
- + "wxsn: " + article.getWxSn());
|
|
|
+ log.error("历史表现裂变特征获取失败 ghId:{} accountName:{} itemIndex:{} title:{} date:{} wxsn:{}",
|
|
|
+ article.getGhId(), article.getAccountName(), article.getItemIndex(), article.getTitle(),
|
|
|
+ DateUtils.timestampToYMDStr(article.getPublishTimestamp(), "yyyyMMdd"), article.getWxSn());
|
|
|
+// FeishuMessageSender.sendWebHookMessage(FeishuRobotIdEnum.RECOMMEND.getRobotId(),
|
|
|
+// "历史表现裂变特征获取失败\n"
|
|
|
+// + "ghId: " + article.getGhId() + "\n"
|
|
|
+// + "账号名称: " + article.getAccountName() + "\n"
|
|
|
+// + "位置: " + article.getItemIndex() + "\n"
|
|
|
+// + "标题: " + article.getTitle() + "\n"
|
|
|
+// + "发布时间: " + DateUtils.timestampToYMDStr(article.getPublishTimestamp(), "yyyyMMdd") + "\n"
|
|
|
+// + "wxsn: " + article.getWxSn());
|
|
|
}
|
|
|
continue;
|
|
|
}
|