Browse Source

发布内容查找根内容id

wangyunpeng 8 months ago
parent
commit
da6018364c
18 changed files with 509 additions and 8 deletions
  1. 43 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/common/enums/aigc/PublishContentTypeEnum.java
  2. 1 1
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/common/enums/aigc/PushTypeEnum.java
  3. 5 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/mapper/aigc/AigcBaseMapper.java
  4. 6 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/mapper/crawler/CrawlerBaseMapper.java
  5. 11 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/dto/CrawlerContent.java
  6. 82 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/entity/aigc/PublishContentOutput.java
  7. 8 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/entity/crawler/Article.java
  8. 10 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/param/ArticleFindSourceParam.java
  9. 2 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/aigc/PublishAccountRepository.java
  10. 13 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/aigc/PublishContentOutputRepository.java
  11. 4 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/crawler/ArticleRepository.java
  12. 244 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/ArticleService.java
  13. 15 5
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/DataDashboardService.java
  14. 1 1
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/RecommendService.java
  15. 1 1
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/util/TitleSimilarCheckUtil.java
  16. 24 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/web/recommend/ArticleController.java
  17. 21 0
      long-article-recommend-service/src/main/resources/mapper/aigc/AigcBaseMapper.xml
  18. 18 0
      long-article-recommend-service/src/main/resources/mapper/crawler/CrawlerBaseMapper.xml

+ 43 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/common/enums/aigc/PublishContentTypeEnum.java

@@ -0,0 +1,43 @@
+package com.tzld.longarticle.recommend.server.common.enums.aigc;
+
+import lombok.Getter;
+
+import java.util.Objects;
+
+@Getter
+public enum PublishContentTypeEnum {
+    cover(1, "封面"),
+    image(2, "图片"),
+    title(3, "标题"),
+    bodyText(4, "正文"),
+    animation(5, "动效"),
+    video(6, "视频"),
+    textLetter(7, "文本私信"),
+    imageLetter(8, "图片私信"),
+    noteLetter(9, "笔记私信"),
+    textComment(10, "文本评论"),
+    imageComment(11, "图片评论"),
+    commentId(12, "动作对象为评论,被评论/被删除的/被点赞的评论ID"),
+    contentStageId(13, "动作对象为内容,被评论的/被隐藏的/被删除的/被修改的内容的三方平台ID"),
+    receiverUserPlatformId(14, "动作对象为用户,动作接收人三方平台用户ID"),
+    actionAccountId(15, "动作账号ID"),
+    other(999, "其他"),
+    ;
+
+    private final Integer val;
+    private final String description;
+
+    PublishContentTypeEnum(Integer val, String description) {
+        this.val = val;
+        this.description = description;
+    }
+
+    public static PublishContentTypeEnum from(Integer val) {
+        for (PublishContentTypeEnum statusEnum : PublishContentTypeEnum.values()) {
+            if (Objects.equals(statusEnum.val, val)) {
+                return statusEnum;
+            }
+        }
+        return other;
+    }
+}

+ 1 - 1
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/common/enums/recommend/PushTypeEnum.java → long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/common/enums/aigc/PushTypeEnum.java

@@ -1,4 +1,4 @@
-package com.tzld.longarticle.recommend.server.common.enums.recommend;
+package com.tzld.longarticle.recommend.server.common.enums.aigc;
 
 import lombok.Getter;
 

+ 5 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/mapper/aigc/AigcBaseMapper.java

@@ -1,6 +1,7 @@
 package com.tzld.longarticle.recommend.server.mapper.aigc;
 
 import com.tzld.longarticle.recommend.server.model.dto.AccountTypeFansDTO;
+import com.tzld.longarticle.recommend.server.model.dto.CrawlerContent;
 import com.tzld.longarticle.recommend.server.model.dto.NotPublishPlan;
 import com.tzld.longarticle.recommend.server.model.dto.ProduceContentDTO;
 import com.tzld.longarticle.recommend.server.model.entity.aigc.*;
@@ -32,4 +33,8 @@ public interface AigcBaseMapper {
     List<AccountTypeFansDTO> getAccountTypeFans();
 
     List<PublishContent> getHisPublishByTitles(List<String> titleList);
+
+    List<PublishContent> getNearestPublishContent(String publishAccountId, Integer size);
+
+    CrawlerContent getCrawlerContentByChannelContentId(String channelContentId);
 }

+ 6 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/mapper/crawler/CrawlerBaseMapper.java

@@ -2,6 +2,7 @@ package com.tzld.longarticle.recommend.server.mapper.crawler;
 
 import com.tzld.longarticle.recommend.server.model.entity.crawler.AccountAvgInfo;
 import com.tzld.longarticle.recommend.server.model.entity.crawler.AccountCorrelation;
+import com.tzld.longarticle.recommend.server.model.entity.crawler.Article;
 import com.tzld.longarticle.recommend.server.model.entity.crawler.LongArticlesRootSourceId;
 
 import java.util.List;
@@ -18,4 +19,9 @@ public interface CrawlerBaseMapper {
 
     void updateAccountAvgInfoStatus(String ghId, String date);
 
+    void updateArticleAigcId(String wxsn, String publishContentId, String channelContentId);
+
+    void updateArticleSourceRootId(String wxsn, String sourcePublishContentId, String rootPublishContentId);
+
+    List<Article> getWaitingFindArticle(Long timestamp);
 }

+ 11 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/dto/CrawlerContent.java

@@ -0,0 +1,11 @@
+package com.tzld.longarticle.recommend.server.model.dto;
+
+import lombok.Data;
+
+@Data
+public class CrawlerContent {
+    private String channelContentId;
+    private String ghId;
+    private String title;
+    private Long publishTimestamp;
+}

+ 82 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/entity/aigc/PublishContentOutput.java

@@ -0,0 +1,82 @@
+package com.tzld.longarticle.recommend.server.model.entity.aigc;
+
+import lombok.AllArgsConstructor;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+import javax.persistence.*;
+import java.math.BigDecimal;
+
+@Data
+@AllArgsConstructor
+@NoArgsConstructor
+@Entity
+@Table(name = "publish_content_output")
+public class PublishContentOutput {
+
+    @Id
+    @GeneratedValue(strategy = GenerationType.IDENTITY)
+    @Column(name = "id", nullable = false)
+    private Long id;
+
+    @Column(name = "publish_content_id", length = 64)
+    private String publishContentId;
+
+    @Column(name = "content_type", nullable = false)
+    private Integer contentType;
+
+    @Column(name = "source_output_id", length = 64)
+    private String sourceOutputId;
+
+    @Column(name = "group_id", length = 64)
+    private String groupId;
+
+    @Column(name = "output_from", columnDefinition = "int default 1")
+    private Integer outputFrom = 1;
+
+    @Column(name = "output", columnDefinition = "mediumtext", nullable = false)
+    private String output;
+
+    @Column(name = "output_ref_data", columnDefinition = "mediumtext")
+    private String outputRefData;
+
+    @Column(name = "`index`")
+    private Integer index;
+
+    @Column(name = "select_status")
+    private Integer selectStatus;
+
+    @Column(name = "require_word_num")
+    private Integer requireWordNum;
+
+    @Column(name = "require_word_operator", length = 20)
+    private String requireWordOperator;
+
+    @Column(name = "word_num")
+    private Integer wordNum;
+
+    @Column(name = "require_similarity_ratio", precision = 5, scale = 2)
+    private BigDecimal requireSimilarityRatio;
+
+    @Column(name = "require_similarity_operator", length = 20)
+    private String requireSimilarityOperator;
+
+    @Column(name = "similarity_ratio", precision = 6, scale = 2)
+    private BigDecimal similarityRatio;
+
+    @Column(name = "edit_account", length = 255)
+    private String editAccount;
+
+    @Column(name = "edit_timestamp")
+    private Long editTimestamp;
+
+    @Column(name = "edit_param", length = 2048)
+    private String editParam;
+
+    @Column(name = "create_timestamp", nullable = false)
+    private Long createTimestamp;
+
+    @Column(name = "update_timestamp")
+    private Long updateTimestamp;
+
+}

+ 8 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/entity/crawler/Article.java

@@ -72,5 +72,13 @@ public class Article implements Serializable {
     private String articleGroup;
     @Column(name = "status")
     private Integer status;
+    @Column(name = "publish_content_id")
+    private String publishContentId;
+    @Column(name = "channel_content_id")
+    private String channelContentId;
+    @Column(name = "source_publish_content_id")
+    private String sourcePublishContentId;
+    @Column(name = "root_publish_content_id")
+    private String rootPublishContentId;
 }
 

+ 10 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/param/ArticleFindSourceParam.java

@@ -0,0 +1,10 @@
+package com.tzld.longarticle.recommend.server.model.param;
+
+import lombok.Data;
+
+@Data
+public class ArticleFindSourceParam {
+
+    private String dateStr;
+    private String wxSn;
+}

+ 2 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/aigc/PublishAccountRepository.java

@@ -10,4 +10,6 @@ import java.util.List;
 public interface PublishAccountRepository extends JpaRepository<PublishAccount, String> {
 
     List<PublishAccount> getAllByGhIdIn(List<String> ghIds);
+
+    PublishAccount getByGhId(String ghId);
 }

+ 13 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/aigc/PublishContentOutputRepository.java

@@ -0,0 +1,13 @@
+package com.tzld.longarticle.recommend.server.repository.aigc;
+
+import com.tzld.longarticle.recommend.server.model.entity.aigc.PublishContentOutput;
+import org.springframework.data.jpa.repository.JpaRepository;
+import org.springframework.stereotype.Repository;
+
+import java.util.List;
+
+@Repository
+public interface PublishContentOutputRepository extends JpaRepository<PublishContentOutput, String> {
+
+    List<PublishContentOutput> getByPublishContentIdInAndContentTypeAndSelectStatus(List<String> publishContentIds, Integer contentType, Integer selectStatus);
+}

+ 4 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/crawler/ArticleRepository.java

@@ -29,4 +29,8 @@ public interface ArticleRepository extends JpaRepository<Article, String> {
 
     List<Article> getByGhIdAndItemIndexAndTypeEqualsAndStatusEquals(String ghId, Integer itemIndex, String type, Integer status);
 
+    List<Article> getByChannelContentIdAndRootPublishContentIdIsNotNull(String channelContentId);
+
+    Article getByWxSn(String wxSn);
+
 }

+ 244 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/ArticleService.java

@@ -0,0 +1,244 @@
+package com.tzld.longarticle.recommend.server.service.recommend;
+
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import com.tzld.longarticle.recommend.server.common.CommonThreadPoolExecutor;
+import com.tzld.longarticle.recommend.server.common.enums.aigc.PublishContentTypeEnum;
+import com.tzld.longarticle.recommend.server.mapper.aigc.AigcBaseMapper;
+import com.tzld.longarticle.recommend.server.mapper.crawler.CrawlerBaseMapper;
+import com.tzld.longarticle.recommend.server.model.dto.CrawlerContent;
+import com.tzld.longarticle.recommend.server.model.entity.aigc.PublishAccount;
+import com.tzld.longarticle.recommend.server.model.entity.aigc.PublishContent;
+import com.tzld.longarticle.recommend.server.model.entity.aigc.PublishContentOutput;
+import com.tzld.longarticle.recommend.server.model.entity.crawler.Article;
+import com.tzld.longarticle.recommend.server.model.param.ArticleFindSourceParam;
+import com.tzld.longarticle.recommend.server.repository.aigc.PublishAccountRepository;
+import com.tzld.longarticle.recommend.server.repository.aigc.PublishContentOutputRepository;
+import com.tzld.longarticle.recommend.server.repository.crawler.AccountAvgInfoRepository;
+import com.tzld.longarticle.recommend.server.repository.crawler.ArticleRepository;
+import com.tzld.longarticle.recommend.server.service.recommend.config.AccountIndexAvgViewCountService;
+import com.tzld.longarticle.recommend.server.util.DateUtils;
+import com.tzld.longarticle.recommend.server.util.TitleSimilarCheckUtil;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.collections4.CollectionUtils;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.stereotype.Service;
+import org.springframework.util.StringUtils;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.concurrent.*;
+import java.util.stream.Collectors;
+
+/**
+ * @author dyp
+ */
+@Service
+@Slf4j
+public class ArticleService {
+
+    @Autowired
+    AccountIndexAvgViewCountService accountIndexAvgViewCountService;
+    @Autowired
+    AccountAvgInfoRepository accountAvgInfoRepository;
+    @Autowired
+    ArticleRepository articleRepository;
+    @Autowired
+    PublishContentOutputRepository publishContentOutputRepository;
+    @Autowired
+    PublishAccountRepository publishAccountRepository;
+    @Autowired
+    AigcBaseMapper aigcBaseMapper;
+    @Autowired
+    CrawlerBaseMapper crawlerBaseMapper;
+
+    private final static ExecutorService pool = new CommonThreadPoolExecutor(
+            32,
+            128,
+            0L, TimeUnit.SECONDS,
+            new LinkedBlockingQueue<>(1000),
+            new ThreadFactoryBuilder().setNameFormat("DEFAULT-%d").build(),
+            new ThreadPoolExecutor.AbortPolicy());
+
+    public void findSource(ArticleFindSourceParam param) {
+        if (StringUtils.hasText(param.getDateStr())) {
+            long minUpdateTimestamp = DateUtils.dateStrToTimestamp(param.getDateStr(), "yyyyMMdd") - 86400 * 7;
+        } else {
+            long minUpdateTimestamp = DateUtils.getTodayStart() - 86400 * 7;
+        }
+        long minUpdateTimestamp = 1704081913L;
+        while (true) {
+            List<Article> articleList = crawlerBaseMapper.getWaitingFindArticle(minUpdateTimestamp);
+            if (CollectionUtils.isEmpty(articleList)) {
+                return;
+            }
+            CountDownLatch cdl = new CountDownLatch(articleList.size());
+            for (Article article : articleList) {
+                pool.execute(() -> {
+                    try {
+                        syncAigcIdByWxSn(article.getWxSn());
+                    } finally {
+                        cdl.countDown();
+                    }
+                });
+                minUpdateTimestamp = minUpdateTimestamp > article.getUpdateTime() ? minUpdateTimestamp : article.getUpdateTime();
+            }
+            try {
+                cdl.await();
+            } catch (Exception e) {
+                log.error("cdl error", e);
+            }
+            log.info("findSource timestamp:{}", minUpdateTimestamp);
+        }
+    }
+
+    private void syncAigcIdByWxSn(String wxSn) {
+        Article article = articleRepository.getByWxSn(wxSn);
+        String ghId = article.getGhId();
+        String title = article.getTitle();
+        PublishAccount publishAccount = publishAccountRepository.getByGhId(ghId);
+        List<PublishContent> publishContentList = aigcBaseMapper.getNearestPublishContent(publishAccount.getId(), null);
+        if (CollectionUtils.isEmpty(publishContentList)) {
+            return;
+        }
+        log.info("syncAigcIdByWxSn publishContentList finish");
+        Map<String, PublishContent> publishContentMap = publishContentList.stream().collect(
+                Collectors.toMap(PublishContent::getId, publishContent -> publishContent));
+        List<String> publishContentIds = publishContentList.stream().map(PublishContent::getId).collect(Collectors.toList());
+        List<PublishContentOutput> publishContentOutputList = publishContentOutputRepository.
+                getByPublishContentIdInAndContentTypeAndSelectStatus(publishContentIds, PublishContentTypeEnum.title.getVal(), 1);
+        Map<String, List<PublishContentOutput>> publishContentOutputMap = publishContentOutputList.stream().collect(
+                Collectors.groupingBy(PublishContentOutput::getOutput));
+        log.info("syncAigcIdByWxSn publishContentOutputList finish");
+        List<String> titles = publishContentOutputList.stream().map(PublishContentOutput::getOutput).collect(Collectors.toList());
+        String publishContentId = null;
+        String channelContentId = null;
+        if (titles.contains(title)) {
+            PublishContent publishContent = getPublishContentByTitle(publishContentOutputMap,
+                    publishContentMap, title, article.getUpdateTime() * 1000);
+            publishContentId = publishContent.getId();
+            channelContentId = publishContent.getCrawlerChannelContentId();
+        } else {
+            for (String aTitle : titles) {
+                if (TitleSimilarCheckUtil.isSimilar(title, aTitle, TitleSimilarCheckUtil.SIMILARITY_THRESHOLD)) {
+                    PublishContent publishContent = getPublishContentByTitle(publishContentOutputMap,
+                            publishContentMap, aTitle, article.getUpdateTime() * 1000);
+                    publishContentId = publishContent.getId();
+                    channelContentId = publishContent.getCrawlerChannelContentId();
+                    break;
+                }
+            }
+        }
+        log.info("syncAigcIdByWxSn titleMatch finish");
+        if (Objects.isNull(channelContentId)) {
+            return;
+        }
+        // 更新 official_article_v2
+        crawlerBaseMapper.updateArticleAigcId(wxSn, publishContentId, channelContentId);
+        // 查找记录根记录
+        long start = System.currentTimeMillis();
+        Article result = getRootPublishContent(channelContentId, null, publishContentId, 0);
+        log.info("syncAigcIdByWxSn getRootPublishContent finish cost:{}", System.currentTimeMillis() - start);
+        // 更新source root publish_content_id
+        crawlerBaseMapper.updateArticleSourceRootId(wxSn, result.getSourcePublishContentId(), result.getRootPublishContentId());
+    }
+
+    private PublishContent getPublishContentByTitle(Map<String, List<PublishContentOutput>> publishContentOutputMap,
+                                                    Map<String, PublishContent> publishContentMap,
+                                                    String title,
+                                                    Long publishTimestamp) {
+        List<PublishContentOutput> outputList = publishContentOutputMap.get(title);
+        List<PublishContent> publishContents = outputList.stream().map(o -> publishContentMap.get(o.getPublishContentId()))
+                .collect(Collectors.toList());
+        return getNearestContent(publishContents, publishTimestamp);
+    }
+
+    private PublishContent getNearestContent(List<PublishContent> publishContents, Long publishTimestamp) {
+        if (publishContents.size() == 1) {
+            return publishContents.get(0);
+        }
+        PublishContent result = null;
+        Long nearest = 0L;
+        for (PublishContent publishContent : publishContents) {
+            Long timestamp = publishContent.getPublishTimestamp();
+            if (Objects.isNull(result)) {
+                result = publishContent;
+                nearest = timestamp;
+                continue;
+            }
+            if (Math.abs(timestamp - publishTimestamp) < Math.abs(nearest - publishTimestamp)) {
+                result = publishContent;
+                nearest = timestamp;
+            }
+        }
+        return result;
+    }
+
+    public Article getRootPublishContent(String channelContentId, String sourcePublishContentId, String rootPublishContentId,
+                                         int times) {
+        Article result = new Article();
+        result.setSourcePublishContentId(sourcePublishContentId);
+        result.setRootPublishContentId(rootPublishContentId);
+        if (times > 20) {
+            return result;
+        }
+        List<Article> articleList = articleRepository.getByChannelContentIdAndRootPublishContentIdIsNotNull(channelContentId);
+        if (CollectionUtils.isNotEmpty(articleList)) {
+            if (!StringUtils.hasText(sourcePublishContentId)) {
+                result.setSourcePublishContentId(articleList.get(0).getSourcePublishContentId());
+            }
+            result.setRootPublishContentId(articleList.get(0).getRootPublishContentId());
+            return result;
+        }
+        result.setRootPublishContentId(rootPublishContentId);
+        CrawlerContent crawlerContent = aigcBaseMapper.getCrawlerContentByChannelContentId(channelContentId);
+        if (Objects.isNull(crawlerContent) || !StringUtils.hasText(crawlerContent.getGhId())) {
+            return result;
+        }
+        PublishAccount publishAccount = publishAccountRepository.getByGhId(crawlerContent.getGhId());
+        if (Objects.isNull(publishAccount)) {
+            return result;
+        }
+        String title = crawlerContent.getTitle();
+        List<PublishContent> publishContentList = aigcBaseMapper.getNearestPublishContent(publishAccount.getId(), null);
+        if (CollectionUtils.isEmpty(publishContentList)) {
+            return result;
+        }
+        Map<String, PublishContent> publishContentMap = publishContentList.stream().collect(
+                Collectors.toMap(PublishContent::getId, publishContent -> publishContent));
+        List<String> publishContentIds = publishContentList.stream().map(PublishContent::getId).collect(Collectors.toList());
+        List<PublishContentOutput> publishContentOutputList = publishContentOutputRepository.
+                getByPublishContentIdInAndContentTypeAndSelectStatus(publishContentIds, PublishContentTypeEnum.title.getVal(), 1);
+        Map<String, List<PublishContentOutput>> publishContentOutputMap = publishContentOutputList.stream().collect(
+                Collectors.groupingBy(PublishContentOutput::getOutput));
+        List<String> titles = publishContentOutputList.stream().map(PublishContentOutput::getOutput).collect(Collectors.toList());
+        if (titles.contains(title)) {
+            PublishContent publishContent = getPublishContentByTitle(publishContentOutputMap,
+                    publishContentMap, title, crawlerContent.getPublishTimestamp());
+            if (!StringUtils.hasText(sourcePublishContentId)) {
+                result.setSourcePublishContentId(publishContent.getId());
+            }
+            result.setRootPublishContentId(publishContent.getId());
+            channelContentId = publishContent.getCrawlerChannelContentId();
+        } else {
+            for (String aTitle : titles) {
+                if (TitleSimilarCheckUtil.isSimilar(title, aTitle, TitleSimilarCheckUtil.SIMILARITY_THRESHOLD)) {
+                    PublishContent publishContent = getPublishContentByTitle(publishContentOutputMap,
+                            publishContentMap, aTitle, crawlerContent.getPublishTimestamp());
+                    if (!StringUtils.hasText(sourcePublishContentId)) {
+                        result.setSourcePublishContentId(publishContent.getId());
+                    }
+                    result.setRootPublishContentId(publishContent.getId());
+                    channelContentId = publishContent.getCrawlerChannelContentId();
+                    break;
+                }
+            }
+        }
+        if (channelContentId.equals(crawlerContent.getChannelContentId())) {
+            return result;
+        } else {
+            return getRootPublishContent(channelContentId, result.getSourcePublishContentId(), result.getRootPublishContentId(), ++times);
+        }
+    }
+
+}

+ 15 - 5
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/DataDashboardService.java

@@ -744,7 +744,7 @@ public class DataDashboardService {
     public void intermediateIndicatorsExport(String beginDate, String endDate) {
         List<String> dateStrList = DateUtils.getBeforeDays(beginDate, endDate, 3);
         for (String date : dateStrList) {
-            exportFeishuIntermediateIndicators(date, dateStrList, sheetToken, "OuaLWV");
+            exportFeishuIntermediateIndicators(date, dateStrList, sheetToken, "pn0ABU");
         }
     }
 
@@ -875,13 +875,25 @@ public class DataDashboardService {
             list = list.stream().filter(publish -> publish.getUpdateTime() < (article.getUpdateTime() - 3600 * 8))
                     .collect(Collectors.toList());
             Integer poolLevel = getArticlePoolLevel(data.getGhId(), list, small);
+            // L1层仅统计历史发布在3-8位置文章
+            if (poolLevel > 1 && "L1".equals(item.getType())) {
+                continue;
+            }
             List<String> titles = titleTypeMap.computeIfAbsent(type, k -> new ArrayList<>());
             Map<Integer, List<String>> titlePoolMap = titleTypePoolMap.computeIfAbsent(type, k -> new HashMap<>());
             List<String> poolTitles = titlePoolMap.computeIfAbsent(poolLevel, k -> new ArrayList<>());
 
+            AccountAvgInfo accountAvgInfo = getAccountAvgInfo(accountAvgInfoIndexMap, article.getGhId(),
+                    article.getUpdateTime(), article.getItemIndex());
+//            // L4层仅统计 稳定-大和稳定
+//            if ("L4".equals(item.getType()) && (Objects.isNull(accountAvgInfo)
+//                    || !StringUtils.hasText(accountAvgInfo.getAccountStatus())
+//                    || !accountAvgInfo.getAccountStatus().contains("稳定"))) {
+//                continue;
+//            }
             // 发布情况
             setPublishSituation(item, type, titleTypeMap, titleTypePoolMap, poolLevel, article, fansAccountTypeMap,
-                    accountAvgInfoIndexMap);
+                    accountAvgInfo);
             // 发布表现
             setPublishPerformance(item, data, publishSortLogMap);
             // 发布依赖表现
@@ -1217,7 +1229,7 @@ public class DataDashboardService {
     private void setPublishSituation(IntermediateIndicatorsExport item, String type, Map<String, List<String>> titleTypeMap,
                                      Map<String, Map<Integer, List<String>>> titleTypePoolMap, Integer poolLevel,
                                      Article article, Map<String, List<String>> fansAccountTypeMap,
-                                     Map<String, Map<String, Map<String, AccountAvgInfo>>> accountAvgInfoIndexMap) {
+                                     AccountAvgInfo accountAvgInfo) {
         item.setArticleReleaseSlot(item.getArticleReleaseSlot() + 1);
         item.setActualArticleReleaseCount(item.getActualArticleReleaseCount() + 1);
         if (titleTypeMap.containsKey(type) && !titleTypeMap.get(type).contains(article.getTitle())) {
@@ -1227,8 +1239,6 @@ public class DataDashboardService {
         List<String> ghIds = fansAccountTypeMap.computeIfAbsent(type, k -> new ArrayList<>());
         if (!ghIds.contains(article.getGhId())) {
             ghIds.add(article.getGhId());
-            AccountAvgInfo accountAvgInfo = getAccountAvgInfo(accountAvgInfoIndexMap, article.getGhId(),
-                    article.getUpdateTime(), article.getItemIndex());
             if (Objects.nonNull(accountAvgInfo)) {
                 item.setFansCount(item.getFansCount() + accountAvgInfo.getFans());
             }

+ 1 - 1
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/RecommendService.java

@@ -3,7 +3,7 @@ package com.tzld.longarticle.recommend.server.service.recommend;
 import com.alibaba.fastjson.JSONObject;
 import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
 import com.tzld.longarticle.recommend.server.common.enums.recommend.ArticleTypeEnum;
-import com.tzld.longarticle.recommend.server.common.enums.recommend.PushTypeEnum;
+import com.tzld.longarticle.recommend.server.common.enums.aigc.PushTypeEnum;
 import com.tzld.longarticle.recommend.server.common.enums.recommend.RankStrategyEnum;
 import com.tzld.longarticle.recommend.server.mapper.aigc.AigcBaseMapper;
 import com.tzld.longarticle.recommend.server.mapper.crawler.ArticleUserGroupMapper;

+ 1 - 1
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/util/TitleSimilarCheckUtil.java

@@ -9,7 +9,7 @@ import java.util.Set;
 
 public class TitleSimilarCheckUtil {
 
-    private static final double SIMILARITY_THRESHOLD = 0.8; // 相似度阈值
+    public static final double SIMILARITY_THRESHOLD = 0.8; // 相似度阈值
 
     public static boolean isDuplicateContent(String title, List<String> existsContentTitle) {
         boolean result = false;

+ 24 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/web/recommend/ArticleController.java

@@ -0,0 +1,24 @@
+package com.tzld.longarticle.recommend.server.web.recommend;
+
+import com.tzld.longarticle.recommend.server.common.response.CommonResponse;
+import com.tzld.longarticle.recommend.server.model.param.ArticleFindSourceParam;
+import com.tzld.longarticle.recommend.server.service.recommend.ArticleService;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.web.bind.annotation.*;
+
+@RestController
+@RequestMapping("/article")
+@Slf4j
+public class ArticleController {
+
+    @Autowired
+    private ArticleService service;
+
+    @PostMapping("/findSource")
+    public CommonResponse<Void> findSource(@RequestBody ArticleFindSourceParam param) {
+        service.findSource(param);
+        return CommonResponse.success();
+    }
+
+}

+ 21 - 0
long-article-recommend-service/src/main/resources/mapper/aigc/AigcBaseMapper.xml

@@ -144,5 +144,26 @@
         </foreach>
     </select>
 
+    <select id="getNearestPublishContent"
+            resultType="com.tzld.longarticle.recommend.server.model.entity.aigc.PublishContent">
+        select *
+        from publish_content
+        where publish_account_id = #{publishAccountId}
+          and channel = 5
+          and status = 2
+        order by publish_timestamp desc
+        <if test="size != null">
+            limit #{size}
+        </if>
+    </select>
+
+    <select id="getCrawlerContentByChannelContentId"
+            resultType="com.tzld.longarticle.recommend.server.model.dto.CrawlerContent">
+        select cc.channel_content_id, ca.wx_gh as ghId, cc.title, cc.publish_timestamp
+        from crawler_content cc
+        join crawler_account ca on cc.channel_account_id = ca.channel_account_id
+        where cc.channel_content_id = #{channelContentId}
+    </select>
+
 
 </mapper>

+ 18 - 0
long-article-recommend-service/src/main/resources/mapper/crawler/CrawlerBaseMapper.xml

@@ -40,4 +40,22 @@
         update account_avg_info_v3 set status = 0 where gh_id = #{ghId} and update_time != #{date}
     </update>
 
+    <update id="updateArticleAigcId">
+        update official_articles_v2
+        set publish_content_id = #{publishContentId},
+            channel_content_id = #{channelContentId}
+        where wx_sn = #{wxsn}
+    </update>
+    <update id="updateArticleSourceRootId">
+        update official_articles_v2
+        set source_publish_content_id = #{sourcePublishContentId},
+            root_publish_content_id = #{rootPublishContentId}
+        where wx_sn = #{wxsn}
+    </update>
+
+    <select id="getWaitingFindArticle"
+            resultType="com.tzld.longarticle.recommend.server.model.entity.crawler.Article">
+        select * from official_articles_v2 where updateTime > #{timestamp} order by updateTime limit 100
+    </select>
+
 </mapper>