Explorar el Código

Merge branch 'master' into wyp/1202-bugFix

# Conflicts:
#	long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/mapper/crawler/CrawlerBaseMapper.java
wangyunpeng hace 7 meses
padre
commit
346056d00e
Se han modificado 22 ficheros con 1117 adiciones y 115 borrados
  1. 25 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/mapper/crawler/CrawlerBaseMapper.java
  2. 29 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/mapper/longArticle/LongArticleBaseMapper.java
  3. 22 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/dto/ArticleMatchVideos.java
  4. 13 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/dto/GetOffVideos.java
  5. 28 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/dto/LongArticlesCrawlerVideos.java
  6. 21 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/dto/LongArticlesMatchVideos.java
  7. 14 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/dto/LongArticlesText.java
  8. 30 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/dto/LongArticlesVideoDTO.java
  9. 2 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/crawler/ArticleRepository.java
  10. 380 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/DataFlushService.java
  11. 5 1
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/filter/FilterService.java
  12. 0 1
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/filter/strategy/BadStrategy.java
  13. 0 1
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/filter/strategy/ColdStartBackupFilterStrategy.java
  14. 0 1
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/filter/strategy/HistoryTitleForFwhColdStartStrategy.java
  15. 17 45
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/filter/strategy/HistoryTitleStrategy.java
  16. 0 1
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/filter/strategy/InfiniteHisTitleStrategy.java
  17. 19 53
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/filter/strategy/SensitiveStrategy.java
  18. 44 12
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/util/TitleSimilarCheckUtil.java
  19. 56 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/web/DataFlushController.java
  20. 37 0
      long-article-recommend-service/src/main/resources/mapper/crawler/CrawlerBaseMapper.xml
  21. 113 0
      long-article-recommend-service/src/main/resources/mapper/longArticle/LongArticleBaseMapper.xml
  22. 262 0
      long-article-recommend-service/src/test/java/com/tzld/longarticle/recommend/server/RecommendTest.java

+ 25 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/mapper/crawler/CrawlerBaseMapper.java

@@ -1,9 +1,14 @@
 package com.tzld.longarticle.recommend.server.mapper.crawler;
 
+import com.tzld.longarticle.recommend.server.model.dto.ArticleMatchVideos;
+import com.tzld.longarticle.recommend.server.model.dto.GetOffVideos;
+import com.tzld.longarticle.recommend.server.model.dto.LongArticlesText;
+import com.tzld.longarticle.recommend.server.model.dto.LongArticlesVideoDTO;
 import com.tzld.longarticle.recommend.server.model.entity.crawler.AccountAvgInfo;
 import com.tzld.longarticle.recommend.server.model.entity.crawler.AccountCorrelation;
 import com.tzld.longarticle.recommend.server.model.entity.crawler.Article;
 import com.tzld.longarticle.recommend.server.model.entity.crawler.ArticleDetailInfo;
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.LongArticlesRootSourceId;
 
 import java.util.List;
 import java.util.Set;
@@ -24,6 +29,26 @@ public interface CrawlerBaseMapper {
 
     List<Article> getWaitingFindArticle(Long timestamp);
 
+    Integer countGetOffVideos();
+
+    List<GetOffVideos> pageGetOffVideos(int offset, int pageSize);
+
+    Integer countLongArticlesRootSourceId();
+
+    List<LongArticlesRootSourceId> pageLongArticlesRootSourceId(int offset, int pageSize);
+
+    Integer countArticleMatchVideos();
+
+    List<ArticleMatchVideos> pageArticleMatchVideos();
+
+    Integer countLongArticlesVideos();
+
+    List<LongArticlesVideoDTO> pageLongArticlesVideos(long id, int pageSize);
+
+    List<LongArticlesText> getLongArticlesText();
+
+    List<LongArticlesVideoDTO> getLongArticlesVideo(List<String> traceIds);
+
     List<Article> getByTitleMd5InAndTypeEqualsAndStatusEquals(List<String> titleMd5s, String type, Integer status);
 
     List<ArticleDetailInfo> getAllByWxSnIn(List<String> wxSnList);

+ 29 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/mapper/longArticle/LongArticleBaseMapper.java

@@ -4,6 +4,7 @@ import com.tzld.longarticle.recommend.server.model.entity.longArticle.LongArticl
 import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticleCategory;
 import com.tzld.longarticle.recommend.server.model.entity.longArticle.DatastatScore;
 import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticlePoolPromotionSource;
+import com.tzld.longarticle.recommend.server.model.dto.*;
 import com.tzld.longarticle.recommend.server.model.entity.longArticle.DatastatSortStrategy;
 import org.apache.ibatis.annotations.Mapper;
 
@@ -32,4 +33,32 @@ public interface LongArticleBaseMapper {
                                                    Integer fans, String dateStr, List<Integer> positions);
 
     void batchInsertLongArticlesRootSourceId(List<LongArticlesRootSourceId> list);
+
+    void batchInsertGetOffVideos(List<GetOffVideos> list);
+
+    int batchInsertLongArticlesCrawlerVideos(List<LongArticlesCrawlerVideos> list);
+
+    int batchInsertLongArticlesText(List<LongArticlesText> list);
+
+    List<String> getLongArticlesTextByContentIds(List<String> contentIds);
+
+    int batchInsertLongArticlesMatchVideos(List<LongArticlesMatchVideos> list);
+
+    List<LongArticlesText> getNeedUpdateRecords();
+
+    int updateLongArticlesText(LongArticlesText item);
+
+    List<LongArticlesRootSourceId> getLongArticlesRootSourceId(List<String> rootSourceIdList);
+
+    List<GetOffVideos> getGetOffVideos(List<Long> videoIds);
+
+    List<LongArticlesMatchVideos> getLongArticlesMatchVideos(List<String> traceIds);
+
+    List<LongArticlesCrawlerVideos> getLongArticlesCrawlerVideos(List<String> contentIds);
+
+    List<LongArticlesMatchVideos> getNeedMatchVideos(Long id, Integer pageSize);
+
+    void updateLongArticleMatchVideosResponse(LongArticlesMatchVideos longArticlesMatchVideos);
+
+    int countNeedMatchVideos(Long id);
 }

+ 22 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/dto/ArticleMatchVideos.java

@@ -0,0 +1,22 @@
+package com.tzld.longarticle.recommend.server.model.dto;
+
+import lombok.Data;
+
+import java.util.Date;
+
+@Data
+public class ArticleMatchVideos {
+    private String videoId;
+    private String traceId;
+    private String contentId;
+    private String rootSourceId;
+    private String videoPath;
+    private Long requestTime;
+    private Date updateTime;
+    private Integer videoStatus;
+    private Integer ossStatus;
+    private String coverPath;
+    private String platform;
+    private String uid;
+    private String videoTitle;
+}

+ 13 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/dto/GetOffVideos.java

@@ -0,0 +1,13 @@
+package com.tzld.longarticle.recommend.server.model.dto;
+
+import lombok.Data;
+
+@Data
+public class GetOffVideos {
+    private Long videoId; // 视频id
+    private Long publishTime; // 视频发布时间
+    private Integer videoStatus; // 视频状态
+    private String traceId; // 最终id
+    private Long getOffTime; // 视频下架时间
+    private Integer checkStatus; // 校验状态,默认为0
+}

+ 28 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/dto/LongArticlesCrawlerVideos.java

@@ -0,0 +1,28 @@
+package com.tzld.longarticle.recommend.server.model.dto;
+
+import lombok.Data;
+
+import java.util.Date;
+
+@Data
+public class LongArticlesCrawlerVideos {
+    private Integer id;
+    private String contentId;
+    private String outVideoId;
+    private String platform;
+    private String videoTitle;
+    private Integer playCount;
+    private Integer likeCount;
+    private Integer shareCount;
+    private Date publishTime;
+    private Date crawlerTime;
+    private Integer duration;
+    private String videoUrl;
+    private String coverUrl;
+    private Integer downloadStatus;
+    private String videoOssPath;
+    private String coverOssPath;
+    private String userId;
+    private String traceId;
+    private Double score;
+}

+ 21 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/dto/LongArticlesMatchVideos.java

@@ -0,0 +1,21 @@
+package com.tzld.longarticle.recommend.server.model.dto;
+
+import lombok.Data;
+
+import java.util.Date;
+
+@Data
+public class LongArticlesMatchVideos {
+    private Integer id;
+    private String traceId;
+    private String contentId;
+    private String flowPoolLevel;
+    private String ghId;
+    private String accountName;
+    private Integer contentStatus;
+    private Integer successStatus;
+    private Integer requestTimestamp;
+    private Date updateTime;
+    private String response;
+    private Integer processTimes;
+}

+ 14 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/dto/LongArticlesText.java

@@ -0,0 +1,14 @@
+package com.tzld.longarticle.recommend.server.model.dto;
+
+import lombok.Data;
+
+@Data
+public class LongArticlesText {
+    private String contentId;
+    private String articleTitle;
+    private String articleText;
+    private String kimiTitle;
+    private String kimiSummary;
+    private String kimiKeys;
+    private Integer kimiStatus;
+}

+ 30 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/dto/LongArticlesVideoDTO.java

@@ -0,0 +1,30 @@
+package com.tzld.longarticle.recommend.server.model.dto;
+
+import lombok.Data;
+
+import java.util.Date;
+
+@Data
+public class LongArticlesVideoDTO {
+    private Long id;
+    private String traceId;
+    private String contentId;
+    private String ghId;
+    private String accountName;
+    private String articleTitle;
+    private String articleText;
+    private Integer contentStatus;
+    private String kimiTitle;
+    private String kimiSummary;
+    private String kimiKeys;
+    private Integer recallVideoId1;
+    private Integer recallVideoId2;
+    private Integer recallVideoId3;
+    private String result1;
+    private String result2;
+    private String result3;
+    private Integer success;
+    private Date updateTime;
+    private Integer requestTimeStamp;
+    private Integer processTimes;
+}

+ 2 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/crawler/ArticleRepository.java

@@ -34,4 +34,6 @@ public interface ArticleRepository extends JpaRepository<Article, String> {
     Article getByWxSn(String wxSn);
 
     int countByGhIdAndTypeAndItemIndex(String ghId, String val, Integer itemIndex);
+
+    List<Article> getByGhIdInAndUpdateTimeGreaterThanAndTypeEquals(List<String> ghIds, long l, String number);
 }

+ 380 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/DataFlushService.java

@@ -0,0 +1,380 @@
+package com.tzld.longarticle.recommend.server.service;
+
+import cn.hutool.core.collection.CollectionUtil;
+import com.alibaba.fastjson.JSONArray;
+import com.alibaba.fastjson.JSONObject;
+import com.google.common.collect.Lists;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import com.tzld.longarticle.recommend.server.common.CommonThreadPoolExecutor;
+import com.tzld.longarticle.recommend.server.common.ThreadPoolFactory;
+import com.tzld.longarticle.recommend.server.mapper.crawler.CrawlerBaseMapper;
+import com.tzld.longarticle.recommend.server.mapper.longArticle.LongArticleBaseMapper;
+import com.tzld.longarticle.recommend.server.model.dto.*;
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.LongArticlesRootSourceId;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.stereotype.Service;
+import org.springframework.util.CollectionUtils;
+import org.springframework.util.StringUtils;
+
+import java.net.URLDecoder;
+import java.util.*;
+import java.util.concurrent.*;
+import java.util.stream.Collectors;
+
+@Service
+@Slf4j
+public class DataFlushService {
+
+    @Autowired
+    private LongArticleBaseMapper longArticleBaseMapper;
+    @Autowired
+    private CrawlerBaseMapper crawlerBaseMapper;
+
+    private final ExecutorService pool = ThreadPoolFactory.deDuplicatePool();
+
+
+    public void flushGetOffVideos(Integer pageNum) {
+        int pageSize = 1000;
+        if (pageNum == null) {
+            pageNum = 1;
+        }
+        int count = crawlerBaseMapper.countGetOffVideos();
+        int totalPage = count / pageSize + 1;
+        int flushNum = 0;
+        while (pageNum <= totalPage) {
+            int offset = (pageNum - 1) * pageSize;
+            List<GetOffVideos> list = crawlerBaseMapper.pageGetOffVideos(offset, pageSize);
+            List<Long> videoIds = list.stream().map(GetOffVideos::getVideoId)
+                    .distinct().collect(Collectors.toList());
+            List<GetOffVideos> existsList = longArticleBaseMapper.getGetOffVideos(videoIds);
+            Set<Long> existsIds = existsList.stream().map(GetOffVideos::getVideoId).collect(Collectors.toSet());
+            list = list.stream().filter(o -> !existsIds.contains(o.getVideoId())).collect(Collectors.toList());
+            if (CollectionUtil.isNotEmpty(list)) {
+                longArticleBaseMapper.batchInsertGetOffVideos(list);
+            }
+            log.info("flushGetOffVideos pageNum:{} totalPage:{} flushNum:{}", pageNum, totalPage, list.size());
+            flushNum += list.size();
+            pageNum++;
+        }
+        log.info("flushGetOffVideos flushNum:{}", flushNum);
+    }
+
+    public void flushLongArticlesRootSourceId(Integer pageNum) {
+        int pageSize = 1000;
+        if (pageNum == null) {
+            pageNum = 1;
+        }
+        int count = crawlerBaseMapper.countLongArticlesRootSourceId();
+        int totalPage = count / pageSize + 1;
+        while (pageNum <= totalPage) {
+            int offset = (pageNum - 1) * pageSize;
+            List<LongArticlesRootSourceId> list = crawlerBaseMapper.pageLongArticlesRootSourceId(offset, pageSize);
+            List<String> rootSourceIdList = list.stream().map(LongArticlesRootSourceId::getRootSourceId)
+                    .distinct().collect(Collectors.toList());
+            List<LongArticlesRootSourceId> existsList = longArticleBaseMapper.getLongArticlesRootSourceId(rootSourceIdList);
+            Set<String> existsIds = existsList.stream().map(LongArticlesRootSourceId::getRootSourceId).collect(Collectors.toSet());
+            list = list.stream().filter(o -> !existsIds.contains(o.getRootSourceId())).collect(Collectors.toList());
+            if (CollectionUtil.isNotEmpty(list)) {
+                longArticleBaseMapper.batchInsertLongArticlesRootSourceId(list);
+            }
+            log.info("flushLongArticlesRootSourceId pageNum:{} totalPage:{} existsSize: {}", pageNum, totalPage, existsList.size());
+            pageNum++;
+        }
+    }
+
+    public void flushLongArticlesCrawlerVideos(Integer pageNum) {
+        List<ArticleMatchVideos> list = crawlerBaseMapper.pageArticleMatchVideos();
+        list = list.stream().filter(o -> StringUtils.hasText(o.getVideoPath())).collect(Collectors.toList());
+        Map<String, List<ArticleMatchVideos>> map = list.stream().collect(Collectors.groupingBy(ArticleMatchVideos::getContentId));
+        List<LongArticlesCrawlerVideos> batchSaveList = new ArrayList<>();
+        for (ArticleMatchVideos articleMatchVideos : list) {
+            List<ArticleMatchVideos> mapList = map.get(articleMatchVideos.getContentId());
+            List<Date> orderDate = mapList.stream().map(ArticleMatchVideos::getUpdateTime)
+                    .sorted().collect(Collectors.toList());
+            double score = 0.2;
+            for (int i = 0; i < orderDate.size(); i++) {
+                if (orderDate.get(i).equals(articleMatchVideos.getUpdateTime())) {
+                    if (i == 0) {
+                        score = 1;
+                    } else if (i == 1) {
+                        score = 0.5;
+                    } else {
+                        break;
+                    }
+                }
+            }
+            LongArticlesCrawlerVideos saveItem = new LongArticlesCrawlerVideos();
+            saveItem.setContentId(articleMatchVideos.getContentId());
+            saveItem.setPlatform(articleMatchVideos.getPlatform());
+            saveItem.setVideoTitle(articleMatchVideos.getVideoTitle());
+            saveItem.setCrawlerTime(articleMatchVideos.getUpdateTime());
+            saveItem.setVideoOssPath(articleMatchVideos.getVideoPath());
+            saveItem.setCoverOssPath(articleMatchVideos.getCoverPath());
+            saveItem.setUserId(articleMatchVideos.getUid());
+            saveItem.setTraceId(articleMatchVideos.getTraceId());
+            saveItem.setDownloadStatus(2);
+            saveItem.setScore(score / 1000);
+            batchSaveList.add(saveItem);
+        }
+        int flushNum = 0;
+        if (!CollectionUtils.isEmpty(batchSaveList)) {
+            for (List<LongArticlesCrawlerVideos> partition : Lists.partition(batchSaveList, 1000)) {
+                List<String> contentIds = partition.stream().map(LongArticlesCrawlerVideos::getContentId).collect(Collectors.toList());
+                List<LongArticlesCrawlerVideos> existsList = longArticleBaseMapper.getLongArticlesCrawlerVideos(contentIds);
+                Map<String, List<LongArticlesCrawlerVideos>> existsMap = existsList.stream()
+                        .collect(Collectors.groupingBy(LongArticlesCrawlerVideos::getContentId));
+                partition = partition.stream().filter(o -> {
+                    List<LongArticlesCrawlerVideos> itemList = existsMap.get(o.getContentId());
+                    return CollectionUtil.isEmpty(itemList) || itemList.size() < 3;
+                }).collect(Collectors.toList());
+                if (CollectionUtil.isNotEmpty(partition)) {
+                    longArticleBaseMapper.batchInsertLongArticlesCrawlerVideos(partition);
+                    flushNum += partition.size();
+                }
+            }
+        }
+        log.info("flushLongArticlesCrawlerVideos flushNum:{}", flushNum);
+    }
+
+    public void flushLongArticlesVideos(Integer pageNum, Long id) {
+        int pageSize = 1000;
+        List<LongArticlesText> kimiTitleList = crawlerBaseMapper.getLongArticlesText();
+        Map<String, LongArticlesText> kimiTitleMap = kimiTitleList.stream().collect(
+                Collectors.toMap(LongArticlesText::getContentId, o -> o, (existing, replacement) -> replacement));
+        int count = crawlerBaseMapper.countLongArticlesVideos();
+        int totalPage = count / pageSize + 1;
+        int longArticlesTextFlushNum = 0;
+        int longArticlesMatchVideosFlushNum = 0;
+        while (true) {
+            List<LongArticlesVideoDTO> list = crawlerBaseMapper.pageLongArticlesVideos(id, pageSize);
+            if (CollectionUtils.isEmpty(list)) {
+                break;
+            }
+            id = list.stream().mapToLong(LongArticlesVideoDTO::getId).max().getAsLong();
+            List<LongArticlesText> batchSaveLongArticlesTextList = new ArrayList<>();
+            List<LongArticlesMatchVideos> batchSaveLongArticlesMatchVideosList = new ArrayList<>();
+            Set<String> existsIdSet = new HashSet<>();
+            for (LongArticlesVideoDTO longArticlesVideoDTO : list) {
+                if (longArticlesVideoDTO.getContentId().endsWith("lehuo")) {
+                    continue;
+                }
+                if (!existsIdSet.contains(longArticlesVideoDTO.getContentId())) {
+                    LongArticlesText longArticlesText = new LongArticlesText();
+                    longArticlesText.setContentId(longArticlesVideoDTO.getContentId());
+                    longArticlesText.setArticleTitle(longArticlesVideoDTO.getArticleTitle());
+                    longArticlesText.setArticleText(longArticlesVideoDTO.getArticleText());
+                    if (StringUtils.hasText(longArticlesVideoDTO.getArticleText())) {
+                        longArticlesText.setKimiTitle(longArticlesVideoDTO.getKimiTitle().replace("\"", ""));
+                    }
+                    if (StringUtils.hasText(longArticlesVideoDTO.getKimiSummary())) {
+                        longArticlesText.setKimiSummary(longArticlesVideoDTO.getKimiSummary());
+                        longArticlesText.setKimiKeys(longArticlesVideoDTO.getKimiKeys());
+                    } else {
+                        LongArticlesText text = kimiTitleMap.get(longArticlesVideoDTO.getContentId());
+                        longArticlesText.setKimiSummary(text.getKimiSummary());
+                        longArticlesText.setKimiKeys(text.getKimiKeys());
+                    }
+                    longArticlesText.setKimiStatus(1);
+                    batchSaveLongArticlesTextList.add(longArticlesText);
+                    existsIdSet.add(longArticlesVideoDTO.getContentId());
+                }
+                if (Objects.isNull(longArticlesVideoDTO.getRequestTimeStamp())) {
+                    continue;
+                }
+                LongArticlesMatchVideos longArticlesMatchVideos = new LongArticlesMatchVideos();
+                longArticlesMatchVideos.setContentId(longArticlesVideoDTO.getContentId());
+                longArticlesMatchVideos.setTraceId(longArticlesVideoDTO.getTraceId());
+                longArticlesMatchVideos.setGhId(longArticlesVideoDTO.getGhId());
+                longArticlesMatchVideos.setAccountName(longArticlesVideoDTO.getAccountName());
+                longArticlesMatchVideos.setContentStatus(4);
+                longArticlesMatchVideos.setSuccessStatus(longArticlesVideoDTO.getSuccess());
+                longArticlesMatchVideos.setRequestTimestamp(longArticlesVideoDTO.getRequestTimeStamp());
+                longArticlesMatchVideos.setUpdateTime(longArticlesVideoDTO.getUpdateTime());
+                longArticlesMatchVideos.setProcessTimes(longArticlesVideoDTO.getProcessTimes());
+                longArticlesMatchVideos.setResponse(getLongArticleVideoResponse(longArticlesVideoDTO));
+                batchSaveLongArticlesMatchVideosList.add(longArticlesMatchVideos);
+            }
+            if (CollectionUtil.isNotEmpty(batchSaveLongArticlesTextList)) {
+                List<String> contentIds = batchSaveLongArticlesTextList.stream()
+                        .map(LongArticlesText::getContentId).distinct().collect(Collectors.toList());
+                List<String> existsContentIds = longArticleBaseMapper.getLongArticlesTextByContentIds(contentIds);
+                if (CollectionUtil.isNotEmpty(existsContentIds)) {
+                    batchSaveLongArticlesTextList = batchSaveLongArticlesTextList.stream()
+                            .filter(o -> !existsContentIds.contains(o.getContentId())).collect(Collectors.toList());
+                }
+                if (CollectionUtil.isNotEmpty(batchSaveLongArticlesTextList)) {
+                    longArticleBaseMapper.batchInsertLongArticlesText(batchSaveLongArticlesTextList);
+                    longArticlesTextFlushNum += batchSaveLongArticlesTextList.size();
+                }
+            }
+            if (CollectionUtil.isNotEmpty(batchSaveLongArticlesMatchVideosList)) {
+                List<String> traceIds = batchSaveLongArticlesMatchVideosList.stream().map(LongArticlesMatchVideos::getTraceId)
+                        .distinct().collect(Collectors.toList());
+                List<LongArticlesMatchVideos> existsList = longArticleBaseMapper.getLongArticlesMatchVideos(traceIds);
+                Set<String> existsIds = existsList.stream().map(LongArticlesMatchVideos::getTraceId).collect(Collectors.toSet());
+                batchSaveLongArticlesMatchVideosList = batchSaveLongArticlesMatchVideosList.stream()
+                        .filter(o -> !existsIds.contains(o.getTraceId())).collect(Collectors.toList());
+                if (CollectionUtil.isNotEmpty(batchSaveLongArticlesMatchVideosList)) {
+                    longArticleBaseMapper.batchInsertLongArticlesMatchVideos(batchSaveLongArticlesMatchVideosList);
+                    longArticlesMatchVideosFlushNum += batchSaveLongArticlesMatchVideosList.size();
+                }
+            }
+            log.info("flushLongArticlesVideos pageNum:{} totalPage:{} id:{} longArticlesMatchVideosFlushNum:{}",
+                    pageNum, totalPage, id, longArticlesMatchVideosFlushNum);
+            pageNum++;
+        }
+        log.info("flushLongArticlesVideos longArticlesTextFlushNum:{} longArticlesMatchVideosFlushNum:{}",
+                longArticlesTextFlushNum, longArticlesMatchVideosFlushNum);
+    }
+
+    private String getLongArticleVideoResponse(LongArticlesVideoDTO longArticlesVideoDTO) {
+        JSONArray jsonArray = new JSONArray();
+        if (StringUtils.hasText(longArticlesVideoDTO.getResult1())) {
+            if (StringUtils.hasText(longArticlesVideoDTO.getKimiTitle()) && longArticlesVideoDTO.getKimiTitle().contains("\"")) {
+                longArticlesVideoDTO.setResult1(longArticlesVideoDTO.getResult1().replace(longArticlesVideoDTO.getKimiTitle(),
+                        longArticlesVideoDTO.getKimiTitle().replace("\"", "")));
+            }
+            jsonArray.add(resultToResponse(longArticlesVideoDTO.getResult1()));
+        }
+        if (StringUtils.hasText(longArticlesVideoDTO.getResult2())) {
+            if (StringUtils.hasText(longArticlesVideoDTO.getKimiTitle()) && longArticlesVideoDTO.getKimiTitle().contains("\"")) {
+                longArticlesVideoDTO.setResult2(longArticlesVideoDTO.getResult2().replace(longArticlesVideoDTO.getKimiTitle(),
+                        longArticlesVideoDTO.getKimiTitle().replace("\"", "")));
+            }
+            jsonArray.add(resultToResponse(longArticlesVideoDTO.getResult2()));
+        }
+        if (StringUtils.hasText(longArticlesVideoDTO.getResult3())) {
+            if (StringUtils.hasText(longArticlesVideoDTO.getKimiTitle()) && longArticlesVideoDTO.getKimiTitle().contains("\"")) {
+                longArticlesVideoDTO.setResult3(longArticlesVideoDTO.getResult3().replace(longArticlesVideoDTO.getKimiTitle(),
+                        longArticlesVideoDTO.getKimiTitle().replace("\"", "")));
+            }
+            jsonArray.add(resultToResponse(longArticlesVideoDTO.getResult3()));
+        }
+        return JSONObject.toJSONString(jsonArray);
+    }
+
+    private JSONObject resultToResponse(String result) {
+        JSONObject jsonObject = new JSONObject();
+        JSONObject fromJSON = JSONObject.parseObject(result);
+        jsonObject.put("kimiTitle", fromJSON.getString("productionName"));
+        jsonObject.put("videoCover", fromJSON.getString("productionCover"));
+        jsonObject.put("videoPath", fromJSON.getString("videoUrl"));
+        jsonObject.put("source", fromJSON.getString("source"));
+        String productionPath = fromJSON.getString("productionPath");
+        String uid = getParamFromPath(productionPath, "su");
+        String videoId = getParamFromPath(productionPath, "id");
+        String rootSourceId = getParamFromPath(productionPath, "rootSourceId");
+        jsonObject.put("uid", uid);
+        if (StringUtils.hasText(videoId)) {
+            jsonObject.put("videoId", Long.valueOf(videoId));
+        }
+        if (StringUtils.hasText(rootSourceId)) {
+            jsonObject.put("rootSourceId", rootSourceId);
+        }
+        return jsonObject;
+    }
+
+    private String getParamFromPath(String productionPath, String param) {
+        String decode = URLDecoder.decode(productionPath);
+        String[] sss = decode.split("\\?");
+        for (String ss : sss) {
+            String[] split = ss.split("&");
+            for (String s : split) {
+                if (s.startsWith(param)) {
+                    String[] uid = s.split("=");
+                    return uid[1];
+                }
+            }
+        }
+        return null;
+    }
+
+    public void flushLongArticlesText() {
+        List<LongArticlesText> kimiTitleList = crawlerBaseMapper.getLongArticlesText();
+        Map<String, LongArticlesText> kimiTitleMap = kimiTitleList.stream().collect(
+                Collectors.toMap(LongArticlesText::getContentId, o -> o, (existing, replacement) -> replacement));
+        List<LongArticlesText> updateList = longArticleBaseMapper.getNeedUpdateRecords();
+        int updateNum = 0;
+        for (LongArticlesText update : updateList) {
+            LongArticlesText kimi = kimiTitleMap.get(update.getContentId());
+            if (Objects.nonNull(kimi)) {
+                pool.submit(() -> {
+                    update.setKimiTitle(kimi.getKimiTitle());
+                    update.setKimiSummary(kimi.getKimiSummary());
+                    update.setKimiKeys(kimi.getKimiKeys());
+                    longArticleBaseMapper.updateLongArticlesText(update);
+                });
+                updateNum++;
+            }
+        }
+        log.info("flushLongArticlesText updateNum:{}", updateNum);
+    }
+
+    private final static ExecutorService batchPool = new CommonThreadPoolExecutor(
+            5,
+            5,
+            0L, TimeUnit.SECONDS,
+            new LinkedBlockingQueue<>(10000),
+            new ThreadFactoryBuilder().setNameFormat("batch-%d").build(),
+            new ThreadPoolExecutor.AbortPolicy());
+
+    public void updateLongArticleMatchVideosResponse(Long id) {
+        int pageSize = 1000;
+        if (Objects.isNull(id)) {
+            id = 0L;
+        }
+        int count = longArticleBaseMapper.countNeedMatchVideos(id);
+        CountDownLatch cdl = new CountDownLatch((count / 1000) + 1);
+        while (true) {
+            List<LongArticlesMatchVideos> matchVideosList = longArticleBaseMapper.getNeedMatchVideos(id, pageSize);
+            if (CollectionUtil.isEmpty(matchVideosList)) {
+                break;
+            }
+            id = matchVideosList.stream().mapToLong(LongArticlesMatchVideos::getId).max().getAsLong();
+            Long finalId = id;
+            batchPool.submit(() -> {
+                try {
+                    long start = System.currentTimeMillis();
+                    List<String> traceIds = matchVideosList.stream().map(LongArticlesMatchVideos::getTraceId)
+                            .distinct().collect(Collectors.toList());
+                    List<LongArticlesVideoDTO> longArticlesVideoDTOList = crawlerBaseMapper.getLongArticlesVideo(traceIds);
+                    Map<String, LongArticlesVideoDTO> longArticlesVideoMap = longArticlesVideoDTOList.stream().collect(
+                            Collectors.toMap(LongArticlesVideoDTO::getTraceId, o -> o, (existing, replacement) -> replacement));
+                    CountDownLatch countDownLatch = new CountDownLatch(matchVideosList.size());
+                    for (LongArticlesMatchVideos longArticlesMatchVideos : matchVideosList) {
+                        pool.submit(() -> {
+                            try {
+                                LongArticlesVideoDTO longArticlesVideoDTO = longArticlesVideoMap.get(longArticlesMatchVideos.getTraceId());
+                                if (Objects.nonNull(longArticlesVideoDTO)) {
+                                    longArticlesMatchVideos.setResponse(getLongArticleVideoResponse(longArticlesVideoDTO));
+                                    longArticleBaseMapper.updateLongArticleMatchVideosResponse(longArticlesMatchVideos);
+                                }
+                            } finally {
+                                countDownLatch.countDown();
+                            }
+                        });
+                    }
+                    try {
+                        countDownLatch.await();
+                    } catch (InterruptedException e) {
+                        log.error("updateLongArticleMatchVideosResponse InterruptedException", e);
+                    }
+                    log.info("updateLongArticleMatchVideosResponse end id:{}, cost:{}", finalId, System.currentTimeMillis() - start);
+                } finally {
+                    cdl.countDown();
+                }
+            });
+            try {
+                Thread.sleep(2000);
+            } catch (InterruptedException e) {
+                log.error("updateLongArticleMatchVideosResponse InterruptedException", e);
+            }
+        }
+        try {
+            cdl.await();
+        } catch (InterruptedException e) {
+            log.error("updateLongArticleMatchVideosResponse InterruptedException", e);
+        }
+    }
+}

+ 5 - 1
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/filter/FilterService.java

@@ -41,7 +41,11 @@ public class FilterService {
         for (final FilterStrategy strategy : strategies) {
             Future<FilterResult> future = pool.submit(() -> {
                 try {
-                    return strategy.filter(param);
+                    long t1 = System.currentTimeMillis();
+                    FilterResult ret = strategy.filter(param);
+                    long t2 = System.currentTimeMillis();
+                    log.info("Filter {} cost: {}", strategy.getClass().getSimpleName(), t2 - t1);
+                    return ret;
                 } finally {
                     cdl.countDown();
                 }

+ 0 - 1
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/filter/strategy/BadStrategy.java

@@ -41,7 +41,6 @@ public class BadStrategy implements FilterStrategy {
         }
         filterResult.setContentIds(result);
         filterResult.setFilterContent(filterContents);
-        log.info("BadStrategy cost:{}", System.currentTimeMillis() - start);
         return filterResult;
     }
 

+ 0 - 1
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/filter/strategy/ColdStartBackupFilterStrategy.java

@@ -35,7 +35,6 @@ public class ColdStartBackupFilterStrategy implements FilterStrategy {
             }
         }
         filterResult.setContentIds(result);
-        log.info("ColdStartBackupFilterStrategy cost:{}", System.currentTimeMillis() - start);
         return filterResult;
     }
 

+ 0 - 1
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/filter/strategy/HistoryTitleForFwhColdStartStrategy.java

@@ -50,7 +50,6 @@ public class HistoryTitleForFwhColdStartStrategy implements FilterStrategy {
         }
         filterResult.setContentIds(result);
         filterResult.setFilterContent(filterContents);
-        log.info("HistoryTitleForFwhColdStartStrategy cost:{}", System.currentTimeMillis() - start);
         return filterResult;
     }
 

+ 17 - 45
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/filter/strategy/HistoryTitleStrategy.java

@@ -15,10 +15,7 @@ import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.stereotype.Component;
 import org.springframework.util.StringUtils;
 
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Objects;
+import java.util.*;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Future;
@@ -43,9 +40,8 @@ public class HistoryTitleStrategy implements FilterStrategy {
 
     @Override
     public FilterResult filter(FilterParam param) {
-        long start = System.currentTimeMillis();
         FilterResult filterResult = new FilterResult();
-        List<String> result = new ArrayList<>();
+        List<String> result = new ArrayList<>(param.getContents().size());
         List<Content> filterContents = new ArrayList<>();
         List<Article> allArticleList = articleListRemoteService.articleList(param.getGhId(), allIndex, param.getType());
         List<String> allTitleList = allArticleList.stream().map(Article::getTitle).distinct().collect(Collectors.toList());
@@ -58,50 +54,26 @@ public class HistoryTitleStrategy implements FilterStrategy {
         if (Objects.nonNull(contentPoolConfig)) {
             firstSecondContentPool.addAll(Arrays.asList(contentPoolConfig[0], contentPoolConfig[1]));
         }
-        List<Future<Content>> futures = new ArrayList<>();
-        CountDownLatch cdl = new CountDownLatch(param.getContents().size());
+        List<Set<Character>> firstSecondTitleCache = TitleSimilarCheckUtil.makeCache(firstSecondTitleList);
+        List<Set<Character>> allTitleCache = TitleSimilarCheckUtil.makeCache(allTitleList);
+        // TODO: batching for parallelism
         for (Content content : param.getContents()) {
-            Future<Content> future = pool.submit(() -> {
-                try {
-                    boolean isDuplicate;
-                    if (CollectionUtils.isNotEmpty(firstSecondContentPool) && firstSecondContentPool.contains(content.getContentPoolType())) {
-                        // 四个内容池 配置 判断头条,次头条
-                        isDuplicate = TitleSimilarCheckUtil.isDuplicateContent(content.getTitle(), firstSecondTitleList, TitleSimilarCheckUtil.SIMILARITY_THRESHOLD);
-                    } else {
-                        isDuplicate = TitleSimilarCheckUtil.isDuplicateContent(content.getTitle(), allTitleList, TitleSimilarCheckUtil.SIMILARITY_THRESHOLD);
-                    }
-                    if (isDuplicate) {
-                        content.setFilterReason("历史已发布文章");
-                    }
-                    return content;
-                } finally {
-                    cdl.countDown();
-                }
-            });
-            futures.add(future);
-        }
-        try {
-            cdl.await();
-        } catch (InterruptedException e) {
-            log.error("filter error", e);
-            return null;
-        }
-
-        for (Future<Content> f : futures) {
-            try {
-                Content content = f.get();
-                if (StringUtils.hasText(content.getFilterReason())) {
-                    filterContents.add(content);
-                } else {
-                    result.add(content.getId());
-                }
-            } catch (Exception e) {
-                log.error("future get error ", e);
+            boolean isDuplicate;
+            if (CollectionUtils.isNotEmpty(firstSecondContentPool) && firstSecondContentPool.contains(content.getContentPoolType())) {
+                // 四个内容池 配置 判断头条,次头条
+                isDuplicate = TitleSimilarCheckUtil.isDuplicateContentByCache(content.getTitle(), firstSecondTitleCache, TitleSimilarCheckUtil.SIMILARITY_THRESHOLD);
+            } else {
+                isDuplicate = TitleSimilarCheckUtil.isDuplicateContentByCache(content.getTitle(), allTitleCache, TitleSimilarCheckUtil.SIMILARITY_THRESHOLD);
+            }
+            if (isDuplicate) {
+                content.setFilterReason("历史已发布文章");
+                filterContents.add(content);
+            } else {
+                result.add(content.getId());
             }
         }
         filterResult.setContentIds(result);
         filterResult.setFilterContent(filterContents);
-        log.info("HistoryTitleStrategy cost:{}", System.currentTimeMillis() - start);
         return filterResult;
     }
 

+ 0 - 1
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/filter/strategy/InfiniteHisTitleStrategy.java

@@ -84,7 +84,6 @@ public class InfiniteHisTitleStrategy implements FilterStrategy {
         }
         filterResult.setContentIds(result);
         filterResult.setFilterContent(filterContents);
-        log.info("InfiniteHisTitleStrategy cost:{}", System.currentTimeMillis() - start);
         return filterResult;
     }
 

+ 19 - 53
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/filter/strategy/SensitiveStrategy.java

@@ -15,16 +15,11 @@ import com.tzld.longarticle.recommend.server.util.TitleSimilarCheckUtil;
 import lombok.extern.slf4j.Slf4j;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.stereotype.Component;
-import org.springframework.util.StringUtils;
 
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Future;
-import java.util.concurrent.TimeUnit;
 import java.util.stream.Collectors;
 
 @Component
@@ -43,68 +38,39 @@ public class SensitiveStrategy implements FilterStrategy {
 
     @Override
     public FilterResult filter(FilterParam param) {
-        long start = System.currentTimeMillis();
         FilterResult filterResult = new FilterResult();
-        List<String> result = new ArrayList<>();
+        List<String> result = new ArrayList<>(param.getContents().size());
         List<Content> filterContents = new ArrayList<>();
 
-        CountDownLatch cdl = new CountDownLatch(param.getContents().size());
-        List<Future<Content>> futures = new ArrayList<>();
-        Map<String, String> titleMd5Map = new HashMap<>();
-        Map<String, ArticleSensitive> articleSensitiveMap = new HashMap<>();
-        List<String> md5List = new ArrayList<>();
-        for (Content content : param.getContents()) {
-            String md5 = Md5Util.encoderByMd5(content.getTitle());
-            md5List.add(md5);
-            titleMd5Map.put(content.getTitle(), md5);
-        }
-        List<ArticleSensitive> articleSensitiveList = getArticleSensitive(md5List);
-        if (CollectionUtil.isNotEmpty(articleSensitiveList)) {
-            articleSensitiveMap = articleSensitiveList.stream().collect(Collectors.toMap(ArticleSensitive::getMd5, o -> o));
-        }
+//        Map<String, String> titleMd5Map = new HashMap<>();
+//        Map<String, ArticleSensitive> articleSensitiveMap = new HashMap<>();
+//        List<String> md5List = new ArrayList<>();
+//        for (Content content : param.getContents()) {
+//            String md5 = Md5Util.encoderByMd5(content.getTitle());
+//            md5List.add(md5);
+//            titleMd5Map.put(content.getTitle(), md5);
+//        }
+//        List<ArticleSensitive> articleSensitiveList = getArticleSensitive(md5List);
+//        if (CollectionUtil.isNotEmpty(articleSensitiveList)) {
+//            articleSensitiveMap = articleSensitiveList.stream().collect(Collectors.toMap(ArticleSensitive::getMd5, o -> o));
+//        }
 
+        List<Set<Character>> unsafeTitleCache = TitleSimilarCheckUtil.makeCache(UnSafeTitles);
         for (Content content : param.getContents()) {
-            Map<String, ArticleSensitive> finalArticleSensitiveMap = articleSensitiveMap;
-            Future<Content> future = pool.submit(() -> {
-                try {
-//                    boolean isSensitive = articleSensitiveRemoteService.articleSensitive(content.getTitle(),
-//                            titleMd5Map,
-//                            finalArticleSensitiveMap);
-//                    if (isSensitive) {
-//                        content.setFilterReason("安全违规");
-//                    } else
-                    if (TitleSimilarCheckUtil.isDuplicateContent(content.getTitle(), UnSafeTitles, TitleSimilarCheckUtil.SIMILARITY_THRESHOLD)) {
-                        content.setFilterReason("安全违规");
-                    }
-                    return content;
-                } finally {
-                    cdl.countDown();
-                }
-            });
-            futures.add(future);
-        }
-        try {
-            cdl.await(5000, TimeUnit.MILLISECONDS);
-        } catch (InterruptedException e) {
-            log.error("filter error", e);
-            return null;
-        }
-
-        for (Future<Content> f : futures) {
             try {
-                Content content = f.get();
-                if (StringUtils.hasText(content.getFilterReason())) {
+                if (TitleSimilarCheckUtil.isDuplicateContentByCache(content.getTitle(), unsafeTitleCache,
+                        TitleSimilarCheckUtil.SIMILARITY_THRESHOLD)) {
+                    content.setFilterReason("安全违规");
                     filterContents.add(content);
                 } else {
                     result.add(content.getId());
                 }
             } catch (Exception e) {
-                log.error("future get error ", e);
+                log.error("similar check error ", e);
             }
         }
         filterResult.setContentIds(result);
         filterResult.setFilterContent(filterContents);
-        log.info("SensitiveStrategy cost:{}", System.currentTimeMillis() - start);
         return filterResult;
     }
 

+ 44 - 12
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/util/TitleSimilarCheckUtil.java

@@ -2,16 +2,38 @@ package com.tzld.longarticle.recommend.server.util;
 
 import org.springframework.util.CollectionUtils;
 
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
+import java.util.*;
 
 public class TitleSimilarCheckUtil {
 
     public static final double SIMILARITY_THRESHOLD = 0.8;
     public static final double ARTICLE_PROMOTION_THRESHOLD = 0.75;
 
+    public static List<Set<Character>> makeCache(List<String> titles) {
+        List<Set<Character>> cache = new ArrayList<>(titles.size());
+        for (String title : titles) {
+            Set<Character> currentSet = new HashSet<>(title.length());
+            for (char c : title.toCharArray()) {
+                currentSet.add(c);
+            }
+            cache.add(currentSet);
+        }
+        return cache;
+    }
+
+    public static boolean isDuplicateContentByCache(String title, List<Set<Character>> existsContentCache, double threshold) {
+        if (CollectionUtils.isEmpty(existsContentCache)) {
+            return false;
+        }
+        title = title.trim().replace("\u200b", "");
+        for (Set<Character> existTitleCache : existsContentCache) {
+            if (isSimilar(title, existTitleCache, threshold)) {
+                return true;
+            }
+        }
+        return false;
+    }
+
     public static boolean isDuplicateContent(String title, List<String> existsContentTitle, double threshold) {
         boolean result = false;
         if (CollectionUtils.isEmpty(existsContentTitle)) {
@@ -27,23 +49,29 @@ public class TitleSimilarCheckUtil {
         return result;
     }
 
-    public static boolean isSimilar(String titleA, String titleB, double threshold) {
+    public static boolean isSimilar(String titleA, Set<Character> titleB, double threshold) {
         if (titleA.isEmpty() || titleB.isEmpty()) {
             return false;
         }
-        Set<Character> setA = new HashSet<>();
+        Set<Character> setA = new HashSet<>(titleA.length());
         for (char c : titleA.toCharArray()) {
             setA.add(c);
         }
-        Set<Character> setB = new HashSet<>();
+        int minLen = Math.max(Math.min(setA.size(), titleB.size()), 1);
+        setA.retainAll(titleB);
+        double rate = setA.size() / (double) minLen;
+        return rate >= threshold;
+    }
+
+    public static boolean isSimilar(String titleA, String titleB, double threshold) {
+        if (titleA.isEmpty() || titleB.isEmpty()) {
+            return false;
+        }
+        Set<Character> setB = new HashSet<>(titleB.length());
         for (char c : titleB.toCharArray()) {
             setB.add(c);
         }
-        Set<Character> setCross = new HashSet<>(setA);
-        setCross.retainAll(setB);
-        int minLen = Math.max(Math.min(setA.size(), setB.size()), 1);
-        double rate = (double) setCross.size() / minLen;
-        return rate >= threshold;
+        return isSimilar(titleA, setB, threshold);
     }
 
     public static void main(String[] args) {
@@ -54,5 +82,9 @@ public class TitleSimilarCheckUtil {
                 "陕西女孩去医院体检后,发现左肾不见了,意外牵出8年前手术疑云");
         boolean result = isDuplicateContent(title, existsContentTitle, SIMILARITY_THRESHOLD);
         System.out.println(result);
+
+        List<Set<Character>> titlesCache = makeCache(existsContentTitle);
+        result = isDuplicateContentByCache(title, titlesCache, SIMILARITY_THRESHOLD);
+        System.out.println(result);
     }
 }

+ 56 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/web/DataFlushController.java

@@ -0,0 +1,56 @@
+package com.tzld.longarticle.recommend.server.web;
+
+import com.tzld.longarticle.recommend.server.service.DataFlushService;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.web.bind.annotation.GetMapping;
+import org.springframework.web.bind.annotation.RequestMapping;
+import org.springframework.web.bind.annotation.RestController;
+
+@RestController
+@RequestMapping("/data")
+@Slf4j
+public class DataFlushController {
+
+    @Autowired
+    private DataFlushService service;
+
+    @GetMapping("/flush/get_off_videos")
+    public void flushGetOffVideos(Integer pageNum) {
+        new Thread(() -> {
+            service.flushGetOffVideos(pageNum);
+        }).start();
+    }
+    @GetMapping("/flush/long_articles_root_source_id")
+    public void flushLongArticlesRootSourceId(Integer pageNum) {
+        new Thread(() -> {
+            service.flushLongArticlesRootSourceId(pageNum);
+        }).start();
+    }
+    @GetMapping("/flush/long_articles_crawler_videos")
+    public void flushLongArticlesCrawlerVideos(Integer pageNum) {
+        new Thread(() -> {
+            service.flushLongArticlesCrawlerVideos(pageNum);
+        }).start();
+    }
+    @GetMapping("/flush/long_articles_video")
+    public void flushLongArticlesVideos(Integer pageNum, Long id) {
+        new Thread(() -> {
+            service.flushLongArticlesVideos(pageNum, id);
+        }).start();
+    }
+    @GetMapping("/flush/long_articles_text")
+    public void flushLongArticlesText() {
+        new Thread(() -> {
+            service.flushLongArticlesText();
+        }).start();
+    }
+    @GetMapping("/update/long_article_match_videos/response")
+    public void updateLongArticleMatchVideosResponse(Long id) {
+        new Thread(() -> {
+            service.updateLongArticleMatchVideosResponse(id);
+        }).start();
+    }
+
+
+}

+ 37 - 0
long-article-recommend-service/src/main/resources/mapper/crawler/CrawlerBaseMapper.xml

@@ -9,6 +9,43 @@
             (#{item.dateStr}, #{item.ghId}, #{item.accountName}, #{item.relGhId}, #{item.relAccountName}, #{item.status}, #{item.correlation})
         </foreach>
     </insert>
+    <select id="countGetOffVideos" resultType="java.lang.Integer">
+        select count(1) from get_off_videos where publish_time > 1727239202
+    </select>
+    <select id="pageGetOffVideos" resultType="com.tzld.longarticle.recommend.server.model.dto.GetOffVideos">
+        select * from get_off_videos order by video_id limit #{offset}, #{pageSize}
+    </select>
+    <select id="countLongArticlesRootSourceId" resultType="java.lang.Integer">
+        select count(1) from long_articles_root_source_id where requestTime > 1727192229
+    </select>
+    <select id="pageLongArticlesRootSourceId" resultType="com.tzld.longarticle.recommend.server.model.entity.longArticle.LongArticlesRootSourceId">
+        select * from long_articles_root_source_id where requestTime > 1727192229 order by rootSourceId limit #{offset}, #{pageSize}
+    </select>
+    <select id="countArticleMatchVideos" resultType="java.lang.Integer">
+        select count(1) from article_match_videos where DATE(update_time) &gt;= '2024-09-20'
+    </select>
+    <select id="pageArticleMatchVideos"
+            resultType="com.tzld.longarticle.recommend.server.model.dto.ArticleMatchVideos">
+        select * from article_match_videos order by video_id
+    </select>
+    <select id="countLongArticlesVideos" resultType="java.lang.Integer">
+        select count(1) from long_articles_video where content_status = 2 and success = 0
+    </select>
+    <select id="pageLongArticlesVideos"
+            resultType="com.tzld.longarticle.recommend.server.model.dto.LongArticlesVideoDTO">
+        select * from long_articles_video where content_status = 2 and success = 0 and id > #{id} order by id limit #{pageSize}
+    </select>
+    <select id="getLongArticlesText" resultType="com.tzld.longarticle.recommend.server.model.dto.LongArticlesText">
+        select content_id, kimi_title, kimi_summary, kimi_keys from long_articles_video where kimi_summary is not null group by content_id
+    </select>
+
+    <select id="getLongArticlesVideo"
+            resultType="com.tzld.longarticle.recommend.server.model.dto.LongArticlesVideoDTO">
+        select * from long_articles_video where trace_id in
+        <foreach collection="traceIds" item="item" open="(" close=")" separator=",">
+            #{item}
+        </foreach>
+    </select>
 
     <insert id="batchInsertAccountAvgInfo">
         INSERT INTO account_avg_info_v3

+ 113 - 0
long-article-recommend-service/src/main/resources/mapper/longArticle/LongArticleBaseMapper.xml

@@ -10,6 +10,10 @@
         delete from datastat_sort_strategy where date_str between #{dateStrBegin} and #{dateStrEnd} and type = #{type}
     </delete>
 
+    <select id="countGetOffVideos" resultType="java.lang.Long">
+        select count(1) from get_off_videos limit #{offset}, #{pageSize}
+    </select>
+
     <insert id="batchInsertDatastatSortStrategy">
         INSERT INTO datastat_sort_strategy
         (date_str, publish_time, account_mode, account_source, account_type, account_status, bussiness_type,
@@ -114,4 +118,113 @@
         </foreach>
     </insert>
 
+    <select id="getGetOffVideos" resultType="com.tzld.longarticle.recommend.server.model.dto.GetOffVideos">
+        select * from get_off_videos where video_id in
+        <foreach collection="videoIds" item="item" open="(" close=")" separator=",">
+            #{item}
+        </foreach>
+    </select>
+
+    <insert id="batchInsertGetOffVideos">
+        INSERT INTO get_off_videos (video_id, publish_time, video_status, trace_id, get_off_time, check_status)
+        VALUES
+        <foreach collection="list" item="item" separator=",">
+            (#{item.videoId}, #{item.publishTime}, #{item.videoStatus}, #{item.traceId}, #{item.getOffTime}, #{item.checkStatus})
+        </foreach>
+    </insert>
+
+    <insert id="batchInsertLongArticlesCrawlerVideos" parameterType="list">
+        INSERT INTO long_articles_crawler_videos (content_id, out_video_id, platform, video_title, play_count,
+                                                  like_count, share_count, publish_time, crawler_time, duration,
+                                                  video_url, cover_url, download_status, video_oss_path, cover_oss_path,
+                                                  user_id, trace_id, score)
+        VALUES
+        <foreach collection="list" item="item" separator=",">
+            (#{item.contentId}, #{item.outVideoId}, #{item.platform}, #{item.videoTitle}, #{item.playCount},
+             #{item.likeCount}, #{item.shareCount}, #{item.publishTime}, #{item.crawlerTime}, #{item.duration},
+             #{item.videoUrl}, #{item.coverUrl}, #{item.downloadStatus}, #{item.videoOssPath}, #{item.coverOssPath},
+             #{item.userId}, #{item.traceId}, #{item.score})
+        </foreach>
+    </insert>
+
+    <insert id="batchInsertLongArticlesText" parameterType="list">
+        INSERT INTO long_articles_text (content_id, article_title, article_text, kimi_title, kimi_summary,
+                                              kimi_keys, kimi_status)
+        VALUES
+        <foreach collection="list" item="item" separator=",">
+            (#{item.contentId}, #{item.articleTitle}, #{item.articleText}, #{item.kimiTitle}, #{item.kimiSummary},
+             #{item.kimiKeys}, #{item.kimiStatus})
+        </foreach>
+    </insert>
+
+    <select id="getLongArticlesTextByContentIds" resultType="java.lang.String">
+        select content_id from long_articles_text where content_id in
+        <foreach collection="contentIds" item="item" open="(" close=")" separator=",">
+            #{item}
+        </foreach>
+    </select>
+
+    <select id="getNeedUpdateRecords"
+            resultType="com.tzld.longarticle.recommend.server.model.dto.LongArticlesText">
+        select * from long_articles_text where kimi_title is null
+    </select>
+
+    <select id="getLongArticlesRootSourceId"
+            resultType="com.tzld.longarticle.recommend.server.model.entity.longArticle.LongArticlesRootSourceId">
+        select * from long_articles_root_source_id where root_source_id in
+        <foreach collection="rootSourceIdList" item="item" open="(" close=")" separator=",">
+            #{item}
+        </foreach>
+    </select>
+
+    <update id="updateLongArticlesText">
+        update long_articles_text
+        set kimi_title = #{kimiTitle},
+            kimi_summary = #{kimiSummary},
+            kimi_keys = #{kimiKeys}
+        where content_id = #{contentId}
+    </update>
+
+    <update id="updateLongArticleMatchVideosResponse">
+        update long_articles_match_videos
+        set response = #{response}
+        where trace_id = #{traceId}
+    </update>
+
+    <select id="getLongArticlesMatchVideos"
+            resultType="com.tzld.longarticle.recommend.server.model.dto.LongArticlesMatchVideos">
+        select * from long_articles_match_videos where trace_id in
+        <foreach collection="traceIds" item="item" open="(" close=")" separator=",">
+            #{item}
+        </foreach>
+    </select>
+    <select id="getLongArticlesCrawlerVideos"
+            resultType="com.tzld.longarticle.recommend.server.model.dto.LongArticlesCrawlerVideos">
+        select * from long_articles_crawler_videos where content_id in
+        <foreach collection="contentIds" item="item" open="(" close=")" separator=",">
+            #{item}
+        </foreach>
+        and download_status = 2
+    </select>
+
+    <select id="getNeedMatchVideos"
+            resultType="com.tzld.longarticle.recommend.server.model.dto.LongArticlesMatchVideos">
+        select * from long_articles_match_videos where flow_pool_level is null and id > #{id} order by id limit #{pageSize}
+    </select>
+    <select id="countNeedMatchVideos" resultType="java.lang.Integer">
+        select count(1) from long_articles_match_videos where id > 3962393 and flow_pool_level is null
+    </select>
+
+    <insert id="batchInsertLongArticlesMatchVideos" parameterType="list">
+        INSERT INTO long_articles_match_videos (trace_id, content_id, flow_pool_level, gh_id, account_name,
+                                                      content_status, success_status, request_timestamp, response,
+                                                      process_times)
+        VALUES
+        <foreach collection="list" item="item" separator=",">
+            (#{item.traceId}, #{item.contentId}, #{item.flowPoolLevel}, #{item.ghId}, #{item.accountName},
+             #{item.contentStatus}, #{item.successStatus}, #{item.requestTimestamp}, #{item.response},
+             #{item.processTimes})
+        </foreach>
+    </insert>
+
 </mapper>

+ 262 - 0
long-article-recommend-service/src/test/java/com/tzld/longarticle/recommend/server/RecommendTest.java

@@ -18,6 +18,7 @@ import com.tzld.longarticle.recommend.server.service.recommend.RecommendService;
 import com.tzld.longarticle.recommend.server.service.recommend.recall.RecallService;
 import com.tzld.longarticle.recommend.server.util.DateUtils;
 import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.math3.stat.correlation.PearsonsCorrelation;
 import org.apache.poi.ss.usermodel.Cell;
 import org.apache.poi.ss.usermodel.Row;
 import org.apache.poi.ss.usermodel.Sheet;
@@ -549,4 +550,265 @@ public class RecommendTest {
         }
     }
 
+
+    @Test
+    public void correlation() {
+        List<String> ghIds = Lists.newArrayList("gh_e24da99dc899",
+                "gh_183d80deffb8",
+                "gh_be8c29139989",
+                "gh_c69776baf2cd",
+                "gh_b15de7c99912",
+                "gh_1d887d61088c",
+                "gh_3ed305b5817f",
+                "gh_3e91f0624545",
+                "gh_30816d8adb52",
+                "gh_970460d9ccec",
+                "gh_749271f1ccd5",
+                "gh_ac43e43b253b"
+        );
+        List<PublishSortLog> sortLogList = publishSortLogRepository.findByGhIdInAndDateStrGreaterThanEqual(ghIds, "20240907");
+        sortLogList = sortLogList.stream().filter(o -> o.getIndex() == 1).collect(Collectors.toList());
+        sortLogList.sort(Comparator.comparing(PublishSortLog::getGhId).thenComparing(PublishSortLog::getDateStr));
+        List<Article> articleList = articleRepository.getByGhIdInAndUpdateTimeGreaterThanAndTypeEquals(ghIds, 1725638400L, "9");
+        articleList = articleList.stream().filter(o -> o.getItemIndex() == 1).collect(Collectors.toList());
+        Map<String, Map<String, Article>> articleMap = articleList.stream().collect(Collectors.groupingBy(Article::getGhId, Collectors.toMap(
+                o -> DateUtils.timestampToYMDStr(o.getUpdateTime(),"yyyyMMdd"), o -> o,
+                (existing, replacement) -> replacement)));
+        List<AccountAvgInfo> accountAvgInfoList = accountAvgInfoRepository.getAllByGhIdIn(new HashSet<>(ghIds));
+        Map<String, Map<String, AccountAvgInfo>> accountAvgInfoMap = accountAvgInfoList.stream()
+                .filter(o -> Objects.equals(o.getPosition(), "1")).collect(Collectors.groupingBy(AccountAvgInfo::getGhId,
+                        Collectors.toMap(AccountAvgInfo::getUpdateTime, o -> o)));
+        int rowNum = 0;
+        Map<String, List<PublishSortLog>> sortLogMap = sortLogList.stream().collect(Collectors.groupingBy(PublishSortLog::getGhId));
+        PearsonsCorrelation correlation = new PearsonsCorrelation();
+        Workbook workbook = new XSSFWorkbook();
+        Sheet sheet = workbook.createSheet("ExampleSheet");
+        // 创建标题行
+        Row titleRow = sheet.createRow(rowNum);
+        for (Map.Entry<String, List<PublishSortLog>> entry : sortLogMap.entrySet()) {
+            String ghId = entry.getKey();
+            String name = entry.getValue().get(0).getAccountName();
+            List<PublishSortLog> itemList = entry.getValue();
+            String title = "";
+            double[] scoreArr = new double[itemList.size()];
+            double[] HisFissionFansRateRateStrategyArr = new double[itemList.size()];
+            double[] HisFissionAvgReadRateRateStrategyArr = new double[itemList.size()];
+            double[] PublishTimesStrategyArr = new double[itemList.size()];
+            double[] ViewCountRateCorrelationStrategyArr = new double[itemList.size()];
+            double[] HisFissionAvgReadSumRateStrategyArr = new double[itemList.size()];
+            double[] HisFissionAvgReadRateCorrelationRateStrategyArr = new double[itemList.size()];
+            double[] HisFissionFansSumRateStrategyArr = new double[itemList.size()];
+            double[] SimilarityStrategyArr = new double[itemList.size()];
+            double[] ViewCountStrategyArr = new double[itemList.size()];
+            double[] ViewCountRateStrategyArr = new double[itemList.size()];
+            double[] HisFissionDeWeightAvgReadSumRateStrategyArr = new double[itemList.size()];
+            double[] scoreRateArr = new double[itemList.size()];
+            for (int i = 0; i < itemList.size(); i++) {
+                PublishSortLog publishSortLog = itemList.get(i);
+                Map<String, Article> dateArticleMap = articleMap.get(publishSortLog.getGhId());
+                Article article = dateArticleMap.get(publishSortLog.getDateStr());
+                if (Objects.isNull(article) || !publishSortLog.getTitle().equals(article.getTitle())) {
+                    continue;
+                }
+                if (publishSortLog.getTitle().equals(title)) {
+                    continue;
+                }
+                title = publishSortLog.getTitle();
+                scoreArr[i] = Double.parseDouble(publishSortLog.getScore());
+                JSONObject scoreMap = JSONObject.parseObject(publishSortLog.getScoreMap());
+                HisFissionFansRateRateStrategyArr[i] =  Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionFansRateRateStrategy")).orElse(0.0)));
+                HisFissionAvgReadRateRateStrategyArr[i] =  Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadRateRateStrategy")).orElse(0.0)));
+                PublishTimesStrategyArr[i] =  Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("PublishTimesStrategy")).orElse(0.0)));
+                ViewCountRateCorrelationStrategyArr[i] =  Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountRateCorrelationStrategy")).orElse(0.0)));
+                HisFissionAvgReadSumRateStrategyArr[i] =  Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadSumRateStrategy")).orElse(0.0)));
+                HisFissionAvgReadRateCorrelationRateStrategyArr[i] =  Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadRateCorrelationRateStrategy")).orElse(0.0)));
+                HisFissionFansSumRateStrategyArr[i] =  Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionFansSumRateStrategy")).orElse(0.0)));
+                SimilarityStrategyArr[i] =  Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("SimilarityStrategy")).orElse(0.0)));
+                ViewCountStrategyArr[i] =  Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountStrategy")).orElse(0.0)));
+                ViewCountRateStrategyArr[i] =  Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountRateStrategy")).orElse(0.0)));
+                HisFissionDeWeightAvgReadSumRateStrategyArr[i] =  Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionDeWeightAvgReadSumRateStrategy")).orElse(0.0)));
+                Map<String, AccountAvgInfo> map = accountAvgInfoMap.get(article.getGhId());
+                if (Objects.nonNull(map)) {
+                    List<String> avgMapDateList = new ArrayList<>(map.keySet());
+                    String publishDate = DateUtils.findNearestDate(avgMapDateList,
+                            DateUtils.timestampToYMDStr(article.getUpdateTime(), "yyyy-MM-dd"), "yyyy-MM-dd");
+                    AccountAvgInfo accountAvgInfo = map.get(publishDate);
+                    if (Objects.nonNull(accountAvgInfo)) {
+                        scoreRateArr[i] = Double.parseDouble(String.format("%.3f", article.getShowViewCount() / (double) accountAvgInfo.getReadAvg()));
+                    }
+                }
+            }
+
+            rowNum++;
+            Row row = sheet.createRow(rowNum);
+            Cell cell = row.createCell(0);
+            cell = row.createCell(1);
+            cell.setCellValue(ghId);
+            cell = row.createCell(2);
+            cell.setCellValue(name);
+            cell = row.createCell(3);
+            cell = row.createCell(4);
+            cell = row.createCell(5);
+            cell.setCellValue(correlation.correlation(scoreArr, scoreRateArr));
+            cell = row.createCell(6);
+            cell.setCellValue(correlation.correlation(HisFissionFansRateRateStrategyArr, scoreRateArr));
+            cell = row.createCell(7);
+            cell.setCellValue(correlation.correlation(HisFissionAvgReadRateRateStrategyArr, scoreRateArr));
+            cell = row.createCell(8);
+            cell.setCellValue(correlation.correlation(PublishTimesStrategyArr, scoreRateArr));
+            cell = row.createCell(9);
+            cell.setCellValue(correlation.correlation(ViewCountRateCorrelationStrategyArr, scoreRateArr));
+            cell = row.createCell(10);
+            cell.setCellValue(correlation.correlation(HisFissionAvgReadSumRateStrategyArr, scoreRateArr));
+            cell = row.createCell(11);
+            cell.setCellValue(correlation.correlation(HisFissionAvgReadRateCorrelationRateStrategyArr, scoreRateArr));
+            cell = row.createCell(12);
+            cell.setCellValue(correlation.correlation(HisFissionFansSumRateStrategyArr, scoreRateArr));
+            cell = row.createCell(13);
+            cell.setCellValue(correlation.correlation(SimilarityStrategyArr, scoreRateArr));
+            cell = row.createCell(14);
+            cell.setCellValue(correlation.correlation(ViewCountStrategyArr, scoreRateArr));
+            cell = row.createCell(15);
+            cell.setCellValue(correlation.correlation(ViewCountRateStrategyArr, scoreRateArr));
+            cell = row.createCell(16);
+            cell.setCellValue(correlation.correlation(HisFissionDeWeightAvgReadSumRateStrategyArr, scoreRateArr));
+        }
+
+        try (FileOutputStream outputStream = new FileOutputStream("/Users/wangyunpeng/Downloads/example.xlsx")) {
+            workbook.write(outputStream);
+        } catch (IOException e) {
+            e.printStackTrace();
+        } finally {
+            try {
+                workbook.close();
+            } catch (IOException e) {
+                e.printStackTrace();
+            }
+        }
+    }
+
+
+
+    @Test
+    void getScoreFromLogFile() {
+
+        String folderPath = "/Users/wangyunpeng/Downloads/b78020b8-d9df-466f-bd01-cd982bb986d0.json";
+
+        File file = new File(folderPath);
+
+        Workbook workbook = new XSSFWorkbook();
+        Sheet sheet = workbook.createSheet("ExampleSheet");
+        int rowNum = 0;
+        // 创建标题行
+        Row titleRow = sheet.createRow(rowNum);
+        Cell titleCell = titleRow.createCell(0);
+        titleCell.setCellValue("日期");
+        titleCell = titleRow.createCell(1);
+        titleCell.setCellValue("账号名称");
+        titleCell = titleRow.createCell(2);
+        titleCell.setCellValue("id");
+        titleCell = titleRow.createCell(3);
+        titleCell.setCellValue("标题");
+        titleCell = titleRow.createCell(4);
+        titleCell.setCellValue("策略");
+        titleCell = titleRow.createCell(5);
+        titleCell.setCellValue("得分");
+        titleCell = titleRow.createCell(6);
+        titleCell.setCellValue("HisFissionFansRateRateStrategy");
+        titleCell = titleRow.createCell(7);
+        titleCell.setCellValue("HisFissionAvgReadRateRateStrategy");
+        titleCell = titleRow.createCell(8);
+        titleCell.setCellValue("PublishTimesStrategy");
+        titleCell = titleRow.createCell(9);
+        titleCell.setCellValue("ViewCountRateCorrelationStrategy");
+        titleCell = titleRow.createCell(10);
+        titleCell.setCellValue("HisFissionAvgReadSumRateStrategy");
+        titleCell = titleRow.createCell(11);
+        titleCell.setCellValue("HisFissionAvgReadRateCorrelationRateStrategy");
+        titleCell = titleRow.createCell(12);
+        titleCell.setCellValue("HisFissionFansSumRateStrategy");
+        titleCell = titleRow.createCell(13);
+        titleCell.setCellValue("SimilarityStrategy");
+        titleCell = titleRow.createCell(14);
+        titleCell.setCellValue("ViewCountStrategy");
+        titleCell = titleRow.createCell(15);
+        titleCell.setCellValue("ViewCountRateStrategy");
+        titleCell = titleRow.createCell(16);
+        titleCell.setCellValue("HisFissionDeWeightAvgReadSumRateStrategy");
+        try {
+            String content = new String(Files.readAllBytes(file.toPath()), StandardCharsets.UTF_8);
+            JSONArray jsonArray = JSONArray.parseArray(content);
+            for (Object o : jsonArray) {
+                JSONObject jsonObject = (JSONObject) o;
+                Long time = jsonObject.getLong("__time__");
+                String message = jsonObject.getString("message");
+                int index = message.indexOf("[");
+                String info = message.substring(0, index);
+                String strategy = info.substring(0, info.indexOf(" "));
+                String accountName = info.substring(info.indexOf(" ")).replace("账号名称 ", "")
+                        .replace(" 头条评分结果", "");
+                String json = message.substring(index);
+                JSONArray scoreArray = JSONArray.parseArray(json);
+                for (Object scoreJSON : scoreArray) {
+                    JSONObject scoreObject = (JSONObject) scoreJSON;
+                    String id = scoreObject.getString("id");
+                    String title = scoreObject.getString("title");
+                    String score = scoreObject.getString("score");
+                    String scoreMapStr = scoreObject.getString("scoreMap");
+                    rowNum++;
+                    Row row = sheet.createRow(rowNum);
+                    Cell cell = row.createCell(0);
+                    cell.setCellValue(DateUtils.timestampToYMDStr(time, "yyyyMMdd"));
+                    cell = row.createCell(1);
+                    cell.setCellValue(accountName);
+                    cell = row.createCell(2);
+                    cell.setCellValue(id);
+                    cell = row.createCell(3);
+                    cell.setCellValue(title);
+                    cell = row.createCell(4);
+                    cell.setCellValue(strategy);
+                    cell = row.createCell(5);
+                    cell.setCellValue(score);
+                    cell = row.createCell(6);
+                    JSONObject scoreMap = JSONObject.parseObject(scoreMapStr);
+                    cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionFansRateRateStrategy")).orElse(0.0)));
+                    cell = row.createCell(7);
+                    cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadRateRateStrategy")).orElse(0.0)));
+                    cell = row.createCell(8);
+                    cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("PublishTimesStrategy")).orElse(0.0)));
+                    cell = row.createCell(9);
+                    cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountRateCorrelationStrategy")).orElse(0.0)));
+                    cell = row.createCell(10);
+                    cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadSumRateStrategy")).orElse(0.0)));
+                    cell = row.createCell(11);
+                    cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadRateCorrelationRateStrategy")).orElse(0.0)));
+                    cell = row.createCell(12);
+                    cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionFansSumRateStrategy")).orElse(0.0)));
+                    cell = row.createCell(13);
+                    cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("SimilarityStrategy")).orElse(0.0)));
+                    cell = row.createCell(14);
+                    cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountStrategy")).orElse(0.0)));
+                    cell = row.createCell(15);
+                    cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountRateStrategy")).orElse(0.0)));
+                    cell = row.createCell(16);
+                    cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionDeWeightAvgReadSumRateStrategy")).orElse(0.0)));
+                }
+            }
+
+            try (FileOutputStream outputStream = new FileOutputStream("/Users/wangyunpeng/Downloads/example.xlsx")) {
+                workbook.write(outputStream);
+            } catch (IOException e) {
+                e.printStackTrace();
+            } finally {
+                try {
+                    workbook.close();
+                } catch (IOException e) {
+                    e.printStackTrace();
+                }
+            }
+        } catch (Exception e) {
+            log.error("readFileError fileName:{}", file.getName(), e);
+        }
+    }
+
 }