|
@@ -0,0 +1,310 @@
|
|
|
+package com.tzld.longarticle.recommend.server.service.recommend;
|
|
|
+
|
|
|
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
|
|
+import com.tzld.longarticle.recommend.server.common.CommonThreadPoolExecutor;
|
|
|
+import com.tzld.longarticle.recommend.server.common.enums.StatusEnum;
|
|
|
+import com.tzld.longarticle.recommend.server.common.enums.aigc.PublishContentTypeEnum;
|
|
|
+import com.tzld.longarticle.recommend.server.common.enums.recommend.ArticlePoolPromotionSourceStatusEnum;
|
|
|
+import com.tzld.longarticle.recommend.server.mapper.aigc.AigcBaseMapper;
|
|
|
+import com.tzld.longarticle.recommend.server.mapper.crawler.CrawlerBaseMapper;
|
|
|
+import com.tzld.longarticle.recommend.server.mapper.longArticle.LongArticleBaseMapper;
|
|
|
+import com.tzld.longarticle.recommend.server.model.dto.CrawlerContent;
|
|
|
+import com.tzld.longarticle.recommend.server.model.entity.aigc.PublishAccount;
|
|
|
+import com.tzld.longarticle.recommend.server.model.entity.aigc.PublishContent;
|
|
|
+import com.tzld.longarticle.recommend.server.model.entity.aigc.PublishContentOutput;
|
|
|
+import com.tzld.longarticle.recommend.server.model.entity.crawler.Article;
|
|
|
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticlePoolPromotionSource;
|
|
|
+import com.tzld.longarticle.recommend.server.model.param.ArticleFindSourceParam;
|
|
|
+import com.tzld.longarticle.recommend.server.model.vo.RootPublishContentVO;
|
|
|
+import com.tzld.longarticle.recommend.server.repository.aigc.PublishAccountRepository;
|
|
|
+import com.tzld.longarticle.recommend.server.repository.aigc.PublishContentOutputRepository;
|
|
|
+import com.tzld.longarticle.recommend.server.repository.crawler.AccountAvgInfoRepository;
|
|
|
+import com.tzld.longarticle.recommend.server.repository.crawler.ArticleRepository;
|
|
|
+import com.tzld.longarticle.recommend.server.repository.longArticle.ArticlePoolPromotionSourceRepository;
|
|
|
+import com.tzld.longarticle.recommend.server.service.recommend.config.AccountIndexAvgViewCountService;
|
|
|
+import com.tzld.longarticle.recommend.server.util.DateUtils;
|
|
|
+import com.tzld.longarticle.recommend.server.util.TitleSimilarCheckUtil;
|
|
|
+import lombok.extern.slf4j.Slf4j;
|
|
|
+import org.apache.commons.collections4.CollectionUtils;
|
|
|
+import org.springframework.beans.BeanUtils;
|
|
|
+import org.springframework.beans.factory.annotation.Autowired;
|
|
|
+import org.springframework.stereotype.Service;
|
|
|
+import org.springframework.util.StringUtils;
|
|
|
+
|
|
|
+import java.util.ArrayList;
|
|
|
+import java.util.List;
|
|
|
+import java.util.Map;
|
|
|
+import java.util.Objects;
|
|
|
+import java.util.concurrent.*;
|
|
|
+import java.util.stream.Collectors;
|
|
|
+
|
|
|
+/**
|
|
|
+ * @author dyp
|
|
|
+ */
|
|
|
+@Service
|
|
|
+@Slf4j
|
|
|
+public class ArticleService {
|
|
|
+
|
|
|
+ @Autowired
|
|
|
+ AccountIndexAvgViewCountService accountIndexAvgViewCountService;
|
|
|
+ @Autowired
|
|
|
+ AccountAvgInfoRepository accountAvgInfoRepository;
|
|
|
+ @Autowired
|
|
|
+ ArticleRepository articleRepository;
|
|
|
+ @Autowired
|
|
|
+ PublishContentOutputRepository publishContentOutputRepository;
|
|
|
+ @Autowired
|
|
|
+ PublishAccountRepository publishAccountRepository;
|
|
|
+ @Autowired
|
|
|
+ AigcBaseMapper aigcBaseMapper;
|
|
|
+ @Autowired
|
|
|
+ CrawlerBaseMapper crawlerBaseMapper;
|
|
|
+ @Autowired
|
|
|
+ LongArticleBaseMapper longArticleBaseMapper;
|
|
|
+ @Autowired
|
|
|
+ ArticlePoolPromotionSourceRepository articlePoolPromotionSourceRepository;
|
|
|
+
|
|
|
+ private final static ExecutorService pool = new CommonThreadPoolExecutor(
|
|
|
+ 32,
|
|
|
+ 128,
|
|
|
+ 0L, TimeUnit.SECONDS,
|
|
|
+ new LinkedBlockingQueue<>(1000),
|
|
|
+ new ThreadFactoryBuilder().setNameFormat("DEFAULT-%d").build(),
|
|
|
+ new ThreadPoolExecutor.AbortPolicy());
|
|
|
+
|
|
|
+ public void findSource(ArticleFindSourceParam param) {
|
|
|
+ if (StringUtils.hasText(param.getWxSn())) {
|
|
|
+ syncAigcIdByWxSn(param.getWxSn());
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ long minUpdateTimestamp;
|
|
|
+ if (StringUtils.hasText(param.getDateStr())) {
|
|
|
+ minUpdateTimestamp = DateUtils.dateStrToTimestamp(param.getDateStr(), "yyyyMMdd") - 86400 * 7;
|
|
|
+ } else {
|
|
|
+ minUpdateTimestamp = DateUtils.getTodayStart() - 86400 * 7;
|
|
|
+ }
|
|
|
+ while (true) {
|
|
|
+ List<Article> articleList = crawlerBaseMapper.getWaitingFindArticle(minUpdateTimestamp);
|
|
|
+ if (CollectionUtils.isEmpty(articleList)) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ CountDownLatch cdl = new CountDownLatch(articleList.size());
|
|
|
+ for (Article article : articleList) {
|
|
|
+ pool.execute(() -> {
|
|
|
+ try {
|
|
|
+ syncAigcIdByWxSn(article.getWxSn());
|
|
|
+ } finally {
|
|
|
+ cdl.countDown();
|
|
|
+ }
|
|
|
+ });
|
|
|
+ minUpdateTimestamp = minUpdateTimestamp > article.getUpdateTime() ? minUpdateTimestamp : article.getUpdateTime();
|
|
|
+ }
|
|
|
+ try {
|
|
|
+ cdl.await();
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error("cdl error", e);
|
|
|
+ }
|
|
|
+ log.info("findSource timestamp:{}", minUpdateTimestamp);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private void syncAigcIdByWxSn(String wxSn) {
|
|
|
+ Article article = articleRepository.getByWxSn(wxSn);
|
|
|
+ String ghId = article.getGhId();
|
|
|
+ String title = article.getTitle();
|
|
|
+ Long publishTimestamp = article.getUpdateTime() * 1000;
|
|
|
+ PublishAccount publishAccount = publishAccountRepository.getByGhId(ghId);
|
|
|
+ List<PublishContent> publishContentList = aigcBaseMapper.getNearestPublishContent(publishAccount.getId(), publishTimestamp, 100);
|
|
|
+ if (CollectionUtils.isEmpty(publishContentList)) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ String publishContentId = null;
|
|
|
+ String channelContentId = null;
|
|
|
+
|
|
|
+ PublishContent publishContent = findPublishContent(publishContentList, title, publishTimestamp);
|
|
|
+ if (Objects.nonNull(publishContent)) {
|
|
|
+ publishContentId = publishContent.getId();
|
|
|
+ channelContentId = publishContent.getCrawlerChannelContentId();
|
|
|
+ }
|
|
|
+ log.info("syncAigcIdByWxSn titleMatch finish");
|
|
|
+ if (Objects.isNull(channelContentId)) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ // 更新 official_article_v2
|
|
|
+ crawlerBaseMapper.updateArticleAigcId(wxSn, publishContentId, channelContentId);
|
|
|
+ // 查找记录根记录
|
|
|
+ long start = System.currentTimeMillis();
|
|
|
+ RootPublishContentVO result = getRootPublishContent(channelContentId, null, publishContentId, null, 0);
|
|
|
+ log.info("syncAigcIdByWxSn getRootPublishContent finish cost:{}", System.currentTimeMillis() - start);
|
|
|
+ // 更新source root publish_content_id
|
|
|
+ crawlerBaseMapper.updateArticleSourceRootId(wxSn, result.getSourcePublishContentId(), result.getRootPublishContentId(),
|
|
|
+ result.getRootProduceContentId());
|
|
|
+ }
|
|
|
+
|
|
|
+ private PublishContent findPublishContent(List<PublishContent> publishContentList,
|
|
|
+ String title,
|
|
|
+ Long publishTimestamp) {
|
|
|
+ Map<String, PublishContent> publishContentMap = publishContentList.stream().collect(
|
|
|
+ Collectors.toMap(PublishContent::getId, publishContent -> publishContent));
|
|
|
+ List<String> publishContentIds = publishContentList.stream().map(PublishContent::getId).collect(Collectors.toList());
|
|
|
+ List<PublishContentOutput> publishContentOutputList = publishContentOutputRepository.
|
|
|
+ getByPublishContentIdInAndContentTypeAndSelectStatus(publishContentIds, PublishContentTypeEnum.title.getVal(), 1);
|
|
|
+ Map<String, List<PublishContentOutput>> publishContentOutputMap = publishContentOutputList.stream().collect(
|
|
|
+ Collectors.groupingBy(PublishContentOutput::getOutput));
|
|
|
+ List<String> titles = publishContentOutputList.stream().map(PublishContentOutput::getOutput).collect(Collectors.toList());
|
|
|
+ PublishContent publishContent = null;
|
|
|
+ if (titles.contains(title)) {
|
|
|
+ publishContent = getPublishContentByTitle(publishContentOutputMap, publishContentMap, title,
|
|
|
+ publishTimestamp);
|
|
|
+ } else {
|
|
|
+ for (String aTitle : titles) {
|
|
|
+ if (TitleSimilarCheckUtil.isSimilar(title, aTitle, TitleSimilarCheckUtil.SIMILARITY_THRESHOLD)) {
|
|
|
+ publishContent = getPublishContentByTitle(publishContentOutputMap, publishContentMap, aTitle,
|
|
|
+ publishTimestamp);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return publishContent;
|
|
|
+ }
|
|
|
+
|
|
|
+ private PublishContent getPublishContentByTitle(Map<String, List<PublishContentOutput>> publishContentOutputMap,
|
|
|
+ Map<String, PublishContent> publishContentMap,
|
|
|
+ String title,
|
|
|
+ Long publishTimestamp) {
|
|
|
+ List<PublishContentOutput> outputList = publishContentOutputMap.get(title);
|
|
|
+ List<PublishContent> publishContents = outputList.stream().map(o -> publishContentMap.get(o.getPublishContentId()))
|
|
|
+ .collect(Collectors.toList());
|
|
|
+ return getNearestContent(publishContents, publishTimestamp);
|
|
|
+ }
|
|
|
+
|
|
|
+ private PublishContent getNearestContent(List<PublishContent> publishContents, Long publishTimestamp) {
|
|
|
+ if (publishContents.size() == 1) {
|
|
|
+ return publishContents.get(0);
|
|
|
+ }
|
|
|
+ PublishContent result = null;
|
|
|
+ Long nearest = 0L;
|
|
|
+ for (PublishContent publishContent : publishContents) {
|
|
|
+ Long timestamp = publishContent.getPublishTimestamp();
|
|
|
+ if (Objects.isNull(result)) {
|
|
|
+ result = publishContent;
|
|
|
+ nearest = timestamp;
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ if (Math.abs(timestamp - publishTimestamp) < Math.abs(nearest - publishTimestamp)) {
|
|
|
+ result = publishContent;
|
|
|
+ nearest = timestamp;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
+ public RootPublishContentVO getRootPublishContent(String channelContentId,
|
|
|
+ String sourcePublishContentId,
|
|
|
+ String rootPublishContentId,
|
|
|
+ String rootProduceContentId,
|
|
|
+ int times) {
|
|
|
+ RootPublishContentVO result = new RootPublishContentVO();
|
|
|
+ result.setChannelContentId(channelContentId);
|
|
|
+ result.setSourcePublishContentId(sourcePublishContentId);
|
|
|
+ result.setRootPublishContentId(rootPublishContentId);
|
|
|
+ result.setRootProduceContentId(rootProduceContentId);
|
|
|
+ if (times > 20) {
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+ CrawlerContent crawlerContent = aigcBaseMapper.getCrawlerContentByChannelContentId(channelContentId);
|
|
|
+ if (Objects.isNull(crawlerContent) || !StringUtils.hasText(crawlerContent.getGhId())) {
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+ PublishAccount publishAccount = publishAccountRepository.getByGhId(crawlerContent.getGhId());
|
|
|
+ if (Objects.isNull(publishAccount)) {
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+ String title = crawlerContent.getTitle();
|
|
|
+ Long publishTimestamp = crawlerContent.getPublishTimestamp();
|
|
|
+ List<PublishContent> publishContentList = aigcBaseMapper.getNearestPublishContent(publishAccount.getId(), publishTimestamp, 100);
|
|
|
+ if (CollectionUtils.isEmpty(publishContentList)) {
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+ PublishContent publishContent = findPublishContent(publishContentList, title, publishTimestamp);
|
|
|
+ if (Objects.nonNull(publishContent)) {
|
|
|
+ if (!StringUtils.hasText(sourcePublishContentId)) {
|
|
|
+ result.setSourcePublishContentId(publishContent.getId());
|
|
|
+ }
|
|
|
+ result.setRootPublishContentId(publishContent.getId());
|
|
|
+ result.setRootProduceContentId(publishContent.getSourceId());
|
|
|
+ channelContentId = publishContent.getCrawlerChannelContentId();
|
|
|
+ }
|
|
|
+ // channelContentId未被修改,说明未找到
|
|
|
+ if (channelContentId.equals(crawlerContent.getChannelContentId())) {
|
|
|
+ return result;
|
|
|
+ } else {
|
|
|
+ result = getRootPublishContent(channelContentId, result.getSourcePublishContentId(),
|
|
|
+ result.getRootPublishContentId(), result.getRootProduceContentId(), ++times);
|
|
|
+ if (!channelContentId.equals(result.getChannelContentId())) {
|
|
|
+ result.getMidChannelContentIds().add(channelContentId);
|
|
|
+ }
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ public void articlePromotionTraceability(String channelContentId) {
|
|
|
+ // 查找待处理任务
|
|
|
+ List<ArticlePoolPromotionSource> tasks = new ArrayList<>();
|
|
|
+ if (StringUtils.hasText(channelContentId)) {
|
|
|
+ ArticlePoolPromotionSource item = articlePoolPromotionSourceRepository.getByChannelContentId(channelContentId);
|
|
|
+ tasks.add(item);
|
|
|
+ } else {
|
|
|
+ tasks = articlePoolPromotionSourceRepository.getByStatus(0);
|
|
|
+ }
|
|
|
+ long now = System.currentTimeMillis();
|
|
|
+ for (ArticlePoolPromotionSource task : tasks) {
|
|
|
+ // 溯源
|
|
|
+ Article article = articleRepository.getByWxSn(task.getWxSn());
|
|
|
+ PublishAccount publishAccount = publishAccountRepository.getByGhId(article.getGhId());
|
|
|
+ if (Objects.isNull(publishAccount)) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ long publishTimestamp = article.getPublishTimestamp() > 0 ? article.getPublishTimestamp() * 1000 : article.getUpdateTime() * 1000;
|
|
|
+ List<PublishContent> publishContentList = aigcBaseMapper.getNearestPublishContent(publishAccount.getId(), publishTimestamp, 100);
|
|
|
+ PublishContent publishContent = findPublishContent(publishContentList, task.getTitle(), publishTimestamp);
|
|
|
+ if (Objects.isNull(publishContent)) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ RootPublishContentVO source = getRootPublishContent(publishContent.getCrawlerChannelContentId(), null, publishContent.getId(), null, 0);
|
|
|
+ // 更新
|
|
|
+ if (StringUtils.hasText(source.getRootProduceContentId())) {
|
|
|
+ task.setStatus(ArticlePoolPromotionSourceStatusEnum.FINISH.getCode());
|
|
|
+ task.setSourcePublishContentId(source.getSourcePublishContentId());
|
|
|
+ task.setRootProduceContentId(source.getRootProduceContentId());
|
|
|
+ task.setRootPublishContentId(source.getRootPublishContentId());
|
|
|
+ task.setUpdateTimestamp(now);
|
|
|
+ articlePoolPromotionSourceRepository.save(task);
|
|
|
+ // 保存中间环节 晋级溯源
|
|
|
+ for (String midChannelContentId : source.getMidChannelContentIds()) {
|
|
|
+ ArticlePoolPromotionSource item = new ArticlePoolPromotionSource();
|
|
|
+ BeanUtils.copyProperties(task, item);
|
|
|
+ ArticlePoolPromotionSource dto = articlePoolPromotionSourceRepository.getByChannelContentId(midChannelContentId);
|
|
|
+ if (Objects.nonNull(dto)) {
|
|
|
+ // 以dto为基础
|
|
|
+ dto.setRootProduceContentId(task.getRootProduceContentId());
|
|
|
+ dto.setRootPublishContentId(task.getRootPublishContentId());
|
|
|
+ dto.setStatus(task.getStatus());
|
|
|
+ dto.setLevel(task.getLevel());
|
|
|
+ dto.setUpdateTimestamp(task.getUpdateTimestamp());
|
|
|
+ item = dto;
|
|
|
+ } else {
|
|
|
+ // 以新item为基础
|
|
|
+ item.setChannelContentId(midChannelContentId);
|
|
|
+ item.setCreateTimestamp(item.getUpdateTimestamp());
|
|
|
+ }
|
|
|
+ articlePoolPromotionSourceRepository.save(item);
|
|
|
+ }
|
|
|
+ longArticleBaseMapper.updateRootProduceContentLevel(task.getRootProduceContentId(), task.getLevel());
|
|
|
+ } else {
|
|
|
+ task.setDeleted(StatusEnum.SUCCESS.getCode());
|
|
|
+ articlePoolPromotionSourceRepository.save(task);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|