Browse Source

Merge branch 'wyp/0925-data-migration' of Server/long-article-recommend into master

wangyunpeng 7 tháng trước cách đây
mục cha
commit
b2ebb7f781
14 tập tin đã thay đổi với 1027 bổ sung0 xóa
  1. 25 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/mapper/crawler/CrawlerBaseMapper.java
  2. 29 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/mapper/longArticle/LongArticleBaseMapper.java
  3. 22 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/dto/ArticleMatchVideos.java
  4. 13 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/dto/GetOffVideos.java
  5. 28 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/dto/LongArticlesCrawlerVideos.java
  6. 21 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/dto/LongArticlesMatchVideos.java
  7. 14 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/dto/LongArticlesText.java
  8. 30 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/dto/LongArticlesVideoDTO.java
  9. 2 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/crawler/ArticleRepository.java
  10. 375 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/DataFlushService.java
  11. 56 0
      long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/web/DataFlushController.java
  12. 37 0
      long-article-recommend-service/src/main/resources/mapper/crawler/CrawlerBaseMapper.xml
  13. 113 0
      long-article-recommend-service/src/main/resources/mapper/longArticle/LongArticleBaseMapper.xml
  14. 262 0
      long-article-recommend-service/src/test/java/com/tzld/longarticle/recommend/server/RecommendTest.java

+ 25 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/mapper/crawler/CrawlerBaseMapper.java

@@ -1,8 +1,13 @@
 package com.tzld.longarticle.recommend.server.mapper.crawler;
 
+import com.tzld.longarticle.recommend.server.model.dto.ArticleMatchVideos;
+import com.tzld.longarticle.recommend.server.model.dto.GetOffVideos;
+import com.tzld.longarticle.recommend.server.model.dto.LongArticlesText;
+import com.tzld.longarticle.recommend.server.model.dto.LongArticlesVideoDTO;
 import com.tzld.longarticle.recommend.server.model.entity.crawler.AccountAvgInfo;
 import com.tzld.longarticle.recommend.server.model.entity.crawler.AccountCorrelation;
 import com.tzld.longarticle.recommend.server.model.entity.crawler.Article;
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.LongArticlesRootSourceId;
 
 import java.util.List;
 
@@ -21,4 +26,24 @@ public interface CrawlerBaseMapper {
     void updateArticleSourceRootId(String wxsn, String sourcePublishContentId, String rootPublishContentId, String rootProduceContentId);
 
     List<Article> getWaitingFindArticle(Long timestamp);
+
+    Integer countGetOffVideos();
+
+    List<GetOffVideos> pageGetOffVideos(int offset, int pageSize);
+
+    Integer countLongArticlesRootSourceId();
+
+    List<LongArticlesRootSourceId> pageLongArticlesRootSourceId(int offset, int pageSize);
+
+    Integer countArticleMatchVideos();
+
+    List<ArticleMatchVideos> pageArticleMatchVideos();
+
+    Integer countLongArticlesVideos();
+
+    List<LongArticlesVideoDTO> pageLongArticlesVideos(long id, int pageSize);
+
+    List<LongArticlesText> getLongArticlesText();
+
+    List<LongArticlesVideoDTO> getLongArticlesVideo(List<String> traceIds);
 }

+ 29 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/mapper/longArticle/LongArticleBaseMapper.java

@@ -4,6 +4,7 @@ import com.tzld.longarticle.recommend.server.model.entity.longArticle.LongArticl
 import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticleCategory;
 import com.tzld.longarticle.recommend.server.model.entity.longArticle.DatastatScore;
 import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticlePoolPromotionSource;
+import com.tzld.longarticle.recommend.server.model.dto.*;
 import com.tzld.longarticle.recommend.server.model.entity.longArticle.DatastatSortStrategy;
 import org.apache.ibatis.annotations.Mapper;
 
@@ -32,4 +33,32 @@ public interface LongArticleBaseMapper {
                                                    Integer fans, String dateStr, List<Integer> positions);
 
     void batchInsertLongArticlesRootSourceId(List<LongArticlesRootSourceId> list);
+
+    void batchInsertGetOffVideos(List<GetOffVideos> list);
+
+    int batchInsertLongArticlesCrawlerVideos(List<LongArticlesCrawlerVideos> list);
+
+    int batchInsertLongArticlesText(List<LongArticlesText> list);
+
+    List<String> getLongArticlesTextByContentIds(List<String> contentIds);
+
+    int batchInsertLongArticlesMatchVideos(List<LongArticlesMatchVideos> list);
+
+    List<LongArticlesText> getNeedUpdateRecords();
+
+    int updateLongArticlesText(LongArticlesText item);
+
+    List<LongArticlesRootSourceId> getLongArticlesRootSourceId(List<String> rootSourceIdList);
+
+    List<GetOffVideos> getGetOffVideos(List<Long> videoIds);
+
+    List<LongArticlesMatchVideos> getLongArticlesMatchVideos(List<String> traceIds);
+
+    List<LongArticlesCrawlerVideos> getLongArticlesCrawlerVideos(List<String> contentIds);
+
+    List<LongArticlesMatchVideos> getNeedMatchVideos(Long id, Integer pageSize);
+
+    void updateLongArticleMatchVideosResponse(LongArticlesMatchVideos longArticlesMatchVideos);
+
+    int countNeedMatchVideos(Long id);
 }

+ 22 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/dto/ArticleMatchVideos.java

@@ -0,0 +1,22 @@
+package com.tzld.longarticle.recommend.server.model.dto;
+
+import lombok.Data;
+
+import java.util.Date;
+
+@Data
+public class ArticleMatchVideos {
+    private String videoId;
+    private String traceId;
+    private String contentId;
+    private String rootSourceId;
+    private String videoPath;
+    private Long requestTime;
+    private Date updateTime;
+    private Integer videoStatus;
+    private Integer ossStatus;
+    private String coverPath;
+    private String platform;
+    private String uid;
+    private String videoTitle;
+}

+ 13 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/dto/GetOffVideos.java

@@ -0,0 +1,13 @@
+package com.tzld.longarticle.recommend.server.model.dto;
+
+import lombok.Data;
+
+@Data
+public class GetOffVideos {
+    private Long videoId; // 视频id
+    private Long publishTime; // 视频发布时间
+    private Integer videoStatus; // 视频状态
+    private String traceId; // 最终id
+    private Long getOffTime; // 视频下架时间
+    private Integer checkStatus; // 校验状态,默认为0
+}

+ 28 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/dto/LongArticlesCrawlerVideos.java

@@ -0,0 +1,28 @@
+package com.tzld.longarticle.recommend.server.model.dto;
+
+import lombok.Data;
+
+import java.util.Date;
+
+@Data
+public class LongArticlesCrawlerVideos {
+    private Integer id;
+    private String contentId;
+    private String outVideoId;
+    private String platform;
+    private String videoTitle;
+    private Integer playCount;
+    private Integer likeCount;
+    private Integer shareCount;
+    private Date publishTime;
+    private Date crawlerTime;
+    private Integer duration;
+    private String videoUrl;
+    private String coverUrl;
+    private Integer downloadStatus;
+    private String videoOssPath;
+    private String coverOssPath;
+    private String userId;
+    private String traceId;
+    private Double score;
+}

+ 21 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/dto/LongArticlesMatchVideos.java

@@ -0,0 +1,21 @@
+package com.tzld.longarticle.recommend.server.model.dto;
+
+import lombok.Data;
+
+import java.util.Date;
+
+@Data
+public class LongArticlesMatchVideos {
+    private Integer id;
+    private String traceId;
+    private String contentId;
+    private String flowPoolLevel;
+    private String ghId;
+    private String accountName;
+    private Integer contentStatus;
+    private Integer successStatus;
+    private Integer requestTimestamp;
+    private Date updateTime;
+    private String response;
+    private Integer processTimes;
+}

+ 14 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/dto/LongArticlesText.java

@@ -0,0 +1,14 @@
+package com.tzld.longarticle.recommend.server.model.dto;
+
+import lombok.Data;
+
+@Data
+public class LongArticlesText {
+    private String contentId;
+    private String articleTitle;
+    private String articleText;
+    private String kimiTitle;
+    private String kimiSummary;
+    private String kimiKeys;
+    private Integer kimiStatus;
+}

+ 30 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/dto/LongArticlesVideoDTO.java

@@ -0,0 +1,30 @@
+package com.tzld.longarticle.recommend.server.model.dto;
+
+import lombok.Data;
+
+import java.util.Date;
+
+@Data
+public class LongArticlesVideoDTO {
+    private Long id;
+    private String traceId;
+    private String contentId;
+    private String ghId;
+    private String accountName;
+    private String articleTitle;
+    private String articleText;
+    private Integer contentStatus;
+    private String kimiTitle;
+    private String kimiSummary;
+    private String kimiKeys;
+    private Integer recallVideoId1;
+    private Integer recallVideoId2;
+    private Integer recallVideoId3;
+    private String result1;
+    private String result2;
+    private String result3;
+    private Integer success;
+    private Date updateTime;
+    private Integer requestTimeStamp;
+    private Integer processTimes;
+}

+ 2 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/crawler/ArticleRepository.java

@@ -34,4 +34,6 @@ public interface ArticleRepository extends JpaRepository<Article, String> {
     Article getByWxSn(String wxSn);
 
     int countByGhIdAndTypeAndItemIndex(String ghId, String val, Integer itemIndex);
+
+    List<Article> getByGhIdInAndUpdateTimeGreaterThanAndTypeEquals(List<String> ghIds, long l, String number);
 }

+ 375 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/DataFlushService.java

@@ -0,0 +1,375 @@
+package com.tzld.longarticle.recommend.server.service;
+
+import cn.hutool.core.collection.CollectionUtil;
+import com.alibaba.fastjson.JSONArray;
+import com.alibaba.fastjson.JSONObject;
+import com.google.common.collect.Lists;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import com.tzld.longarticle.recommend.server.common.CommonThreadPoolExecutor;
+import com.tzld.longarticle.recommend.server.common.ThreadPoolFactory;
+import com.tzld.longarticle.recommend.server.mapper.crawler.CrawlerBaseMapper;
+import com.tzld.longarticle.recommend.server.mapper.longArticle.LongArticleBaseMapper;
+import com.tzld.longarticle.recommend.server.model.dto.*;
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.LongArticlesRootSourceId;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.stereotype.Service;
+import org.springframework.util.CollectionUtils;
+import org.springframework.util.StringUtils;
+
+import java.net.URLDecoder;
+import java.util.*;
+import java.util.concurrent.*;
+import java.util.stream.Collectors;
+
+@Service
+@Slf4j
+public class DataFlushService {
+
+    @Autowired
+    private LongArticleBaseMapper longArticleBaseMapper;
+    @Autowired
+    private CrawlerBaseMapper crawlerBaseMapper;
+
+    private final ExecutorService pool = ThreadPoolFactory.deDuplicatePool();
+
+
+    public void flushGetOffVideos(Integer pageNum) {
+        int pageSize = 1000;
+        if (pageNum == null) {
+            pageNum = 1;
+        }
+        int count = crawlerBaseMapper.countGetOffVideos();
+        int totalPage = count / pageSize + 1;
+        int flushNum = 0;
+        while (pageNum <= totalPage) {
+            int offset = (pageNum - 1) * pageSize;
+            List<GetOffVideos> list = crawlerBaseMapper.pageGetOffVideos(offset, pageSize);
+            List<Long> videoIds = list.stream().map(GetOffVideos::getVideoId)
+                    .distinct().collect(Collectors.toList());
+            List<GetOffVideos> existsList = longArticleBaseMapper.getGetOffVideos(videoIds);
+            Set<Long> existsIds = existsList.stream().map(GetOffVideos::getVideoId).collect(Collectors.toSet());
+            list = list.stream().filter(o -> !existsIds.contains(o.getVideoId())).collect(Collectors.toList());
+            if (CollectionUtil.isNotEmpty(list)) {
+                longArticleBaseMapper.batchInsertGetOffVideos(list);
+            }
+            log.info("flushGetOffVideos pageNum:{} totalPage:{} flushNum:{}", pageNum, totalPage, list.size());
+            flushNum += list.size();
+            pageNum++;
+        }
+        log.info("flushGetOffVideos flushNum:{}", flushNum);
+    }
+
+    public void flushLongArticlesRootSourceId(Integer pageNum) {
+        int pageSize = 1000;
+        if (pageNum == null) {
+            pageNum = 1;
+        }
+        int count = crawlerBaseMapper.countLongArticlesRootSourceId();
+        int totalPage = count / pageSize + 1;
+        while (pageNum <= totalPage) {
+            int offset = (pageNum - 1) * pageSize;
+            List<LongArticlesRootSourceId> list = crawlerBaseMapper.pageLongArticlesRootSourceId(offset, pageSize);
+            List<String> rootSourceIdList = list.stream().map(LongArticlesRootSourceId::getRootSourceId)
+                    .distinct().collect(Collectors.toList());
+            List<LongArticlesRootSourceId> existsList = longArticleBaseMapper.getLongArticlesRootSourceId(rootSourceIdList);
+            Set<String> existsIds = existsList.stream().map(LongArticlesRootSourceId::getRootSourceId).collect(Collectors.toSet());
+            list = list.stream().filter(o -> !existsIds.contains(o.getRootSourceId())).collect(Collectors.toList());
+            if (CollectionUtil.isNotEmpty(list)) {
+                longArticleBaseMapper.batchInsertLongArticlesRootSourceId(list);
+            }
+            log.info("flushLongArticlesRootSourceId pageNum:{} totalPage:{} existsSize: {}", pageNum, totalPage, existsList.size());
+            pageNum++;
+        }
+    }
+
+    public void flushLongArticlesCrawlerVideos(Integer pageNum) {
+        List<ArticleMatchVideos> list = crawlerBaseMapper.pageArticleMatchVideos();
+        list = list.stream().filter(o -> StringUtils.hasText(o.getVideoPath())).collect(Collectors.toList());
+        Map<String, List<ArticleMatchVideos>> map = list.stream().collect(Collectors.groupingBy(ArticleMatchVideos::getContentId));
+        List<LongArticlesCrawlerVideos> batchSaveList = new ArrayList<>();
+        for (ArticleMatchVideos articleMatchVideos : list) {
+            List<ArticleMatchVideos> mapList = map.get(articleMatchVideos.getContentId());
+            List<Date> orderDate = mapList.stream().map(ArticleMatchVideos::getUpdateTime)
+                    .sorted().collect(Collectors.toList());
+            double score = 0.2;
+            for (int i = 0; i < orderDate.size(); i++) {
+                if (orderDate.get(i).equals(articleMatchVideos.getUpdateTime())) {
+                    if (i == 0) {
+                        score = 1;
+                    } else if (i == 1) {
+                        score = 0.5;
+                    } else {
+                        break;
+                    }
+                }
+            }
+            LongArticlesCrawlerVideos saveItem = new LongArticlesCrawlerVideos();
+            saveItem.setContentId(articleMatchVideos.getContentId());
+            saveItem.setPlatform(articleMatchVideos.getPlatform());
+            saveItem.setVideoTitle(articleMatchVideos.getVideoTitle());
+            saveItem.setCrawlerTime(articleMatchVideos.getUpdateTime());
+            saveItem.setVideoOssPath(articleMatchVideos.getVideoPath());
+            saveItem.setCoverOssPath(articleMatchVideos.getCoverPath());
+            saveItem.setUserId(articleMatchVideos.getUid());
+            saveItem.setTraceId(articleMatchVideos.getTraceId());
+            saveItem.setDownloadStatus(2);
+            saveItem.setScore(score / 1000);
+            batchSaveList.add(saveItem);
+        }
+        int flushNum = 0;
+        if (!CollectionUtils.isEmpty(batchSaveList)) {
+            for (List<LongArticlesCrawlerVideos> partition : Lists.partition(batchSaveList, 1000)) {
+                List<String> contentIds = partition.stream().map(LongArticlesCrawlerVideos::getContentId).collect(Collectors.toList());
+                List<LongArticlesCrawlerVideos> existsList = longArticleBaseMapper.getLongArticlesCrawlerVideos(contentIds);
+                Map<String, List<LongArticlesCrawlerVideos>> existsMap = existsList.stream()
+                        .collect(Collectors.groupingBy(LongArticlesCrawlerVideos::getContentId));
+                partition = partition.stream().filter(o -> {
+                    List<LongArticlesCrawlerVideos> itemList = existsMap.get(o.getContentId());
+                    return CollectionUtil.isEmpty(itemList) || itemList.size() < 3;
+                }).collect(Collectors.toList());
+                if (CollectionUtil.isNotEmpty(partition)) {
+                    longArticleBaseMapper.batchInsertLongArticlesCrawlerVideos(partition);
+                    flushNum += partition.size();
+                }
+            }
+        }
+        log.info("flushLongArticlesCrawlerVideos flushNum:{}", flushNum);
+    }
+
+    public void flushLongArticlesVideos(Integer pageNum, Long id) {
+        int pageSize = 1000;
+        List<LongArticlesText> kimiTitleList = crawlerBaseMapper.getLongArticlesText();
+        Map<String, LongArticlesText> kimiTitleMap = kimiTitleList.stream().collect(
+                Collectors.toMap(LongArticlesText::getContentId, o -> o, (existing, replacement) -> replacement));
+        int count = crawlerBaseMapper.countLongArticlesVideos();
+        int totalPage = count / pageSize + 1;
+        int longArticlesTextFlushNum = 0;
+        int longArticlesMatchVideosFlushNum = 0;
+        while (true) {
+            List<LongArticlesVideoDTO> list = crawlerBaseMapper.pageLongArticlesVideos(id, pageSize);
+            if (CollectionUtils.isEmpty(list)) {
+                break;
+            }
+            id = list.stream().mapToLong(LongArticlesVideoDTO::getId).max().getAsLong();
+            List<LongArticlesText> batchSaveLongArticlesTextList = new ArrayList<>();
+            List<LongArticlesMatchVideos> batchSaveLongArticlesMatchVideosList = new ArrayList<>();
+            Set<String> existsIdSet = new HashSet<>();
+            for (LongArticlesVideoDTO longArticlesVideoDTO : list) {
+                if (longArticlesVideoDTO.getContentId().endsWith("lehuo")) {
+                    continue;
+                }
+                if (!existsIdSet.contains(longArticlesVideoDTO.getContentId())) {
+                    LongArticlesText longArticlesText = new LongArticlesText();
+                    longArticlesText.setContentId(longArticlesVideoDTO.getContentId());
+                    longArticlesText.setArticleTitle(longArticlesVideoDTO.getArticleTitle());
+                    longArticlesText.setArticleText(longArticlesVideoDTO.getArticleText());
+                    if (StringUtils.hasText(longArticlesVideoDTO.getArticleText())) {
+                        longArticlesText.setKimiTitle(longArticlesVideoDTO.getKimiTitle().replace("\"", ""));
+                    }
+                    if (StringUtils.hasText(longArticlesVideoDTO.getKimiSummary())) {
+                        longArticlesText.setKimiSummary(longArticlesVideoDTO.getKimiSummary());
+                        longArticlesText.setKimiKeys(longArticlesVideoDTO.getKimiKeys());
+                    } else {
+                        LongArticlesText text = kimiTitleMap.get(longArticlesVideoDTO.getContentId());
+                        longArticlesText.setKimiSummary(text.getKimiSummary());
+                        longArticlesText.setKimiKeys(text.getKimiKeys());
+                    }
+                    longArticlesText.setKimiStatus(1);
+                    batchSaveLongArticlesTextList.add(longArticlesText);
+                    existsIdSet.add(longArticlesVideoDTO.getContentId());
+                }
+                if (Objects.isNull(longArticlesVideoDTO.getRequestTimeStamp())) {
+                    continue;
+                }
+                LongArticlesMatchVideos longArticlesMatchVideos = new LongArticlesMatchVideos();
+                longArticlesMatchVideos.setContentId(longArticlesVideoDTO.getContentId());
+                longArticlesMatchVideos.setTraceId(longArticlesVideoDTO.getTraceId());
+                longArticlesMatchVideos.setGhId(longArticlesVideoDTO.getGhId());
+                longArticlesMatchVideos.setAccountName(longArticlesVideoDTO.getAccountName());
+                longArticlesMatchVideos.setContentStatus(4);
+                longArticlesMatchVideos.setSuccessStatus(longArticlesVideoDTO.getSuccess());
+                longArticlesMatchVideos.setRequestTimestamp(longArticlesVideoDTO.getRequestTimeStamp());
+                longArticlesMatchVideos.setUpdateTime(longArticlesVideoDTO.getUpdateTime());
+                longArticlesMatchVideos.setProcessTimes(longArticlesVideoDTO.getProcessTimes());
+                longArticlesMatchVideos.setResponse(getLongArticleVideoResponse(longArticlesVideoDTO));
+                batchSaveLongArticlesMatchVideosList.add(longArticlesMatchVideos);
+            }
+            if (CollectionUtil.isNotEmpty(batchSaveLongArticlesTextList)) {
+                List<String> contentIds = batchSaveLongArticlesTextList.stream()
+                        .map(LongArticlesText::getContentId).distinct().collect(Collectors.toList());
+                List<String> existsContentIds = longArticleBaseMapper.getLongArticlesTextByContentIds(contentIds);
+                if (CollectionUtil.isNotEmpty(existsContentIds)) {
+                    batchSaveLongArticlesTextList = batchSaveLongArticlesTextList.stream()
+                            .filter(o -> !existsContentIds.contains(o.getContentId())).collect(Collectors.toList());
+                }
+                if (CollectionUtil.isNotEmpty(batchSaveLongArticlesTextList)) {
+                    longArticleBaseMapper.batchInsertLongArticlesText(batchSaveLongArticlesTextList);
+                    longArticlesTextFlushNum += batchSaveLongArticlesTextList.size();
+                }
+            }
+            if (CollectionUtil.isNotEmpty(batchSaveLongArticlesMatchVideosList)) {
+                List<String> traceIds = batchSaveLongArticlesMatchVideosList.stream().map(LongArticlesMatchVideos::getTraceId)
+                        .distinct().collect(Collectors.toList());
+                List<LongArticlesMatchVideos> existsList = longArticleBaseMapper.getLongArticlesMatchVideos(traceIds);
+                Set<String> existsIds = existsList.stream().map(LongArticlesMatchVideos::getTraceId).collect(Collectors.toSet());
+                batchSaveLongArticlesMatchVideosList = batchSaveLongArticlesMatchVideosList.stream()
+                        .filter(o -> !existsIds.contains(o.getTraceId())).collect(Collectors.toList());
+                if (CollectionUtil.isNotEmpty(batchSaveLongArticlesMatchVideosList)) {
+                    longArticleBaseMapper.batchInsertLongArticlesMatchVideos(batchSaveLongArticlesMatchVideosList);
+                    longArticlesMatchVideosFlushNum += batchSaveLongArticlesMatchVideosList.size();
+                }
+            }
+            log.info("flushLongArticlesVideos pageNum:{} totalPage:{} id:{} longArticlesMatchVideosFlushNum:{}",
+                    pageNum, totalPage, id, longArticlesMatchVideosFlushNum);
+            pageNum++;
+        }
+        log.info("flushLongArticlesVideos longArticlesTextFlushNum:{} longArticlesMatchVideosFlushNum:{}",
+                longArticlesTextFlushNum, longArticlesMatchVideosFlushNum);
+    }
+
+    private String getLongArticleVideoResponse(LongArticlesVideoDTO longArticlesVideoDTO) {
+        JSONArray jsonArray = new JSONArray();
+        if (StringUtils.hasText(longArticlesVideoDTO.getResult1())) {
+            if (StringUtils.hasText(longArticlesVideoDTO.getKimiTitle()) && longArticlesVideoDTO.getKimiTitle().contains("\"")) {
+                longArticlesVideoDTO.setResult1(longArticlesVideoDTO.getResult1().replace(longArticlesVideoDTO.getKimiTitle(),
+                        longArticlesVideoDTO.getKimiTitle().replace("\"", "")));
+            }
+            jsonArray.add(resultToResponse(longArticlesVideoDTO.getResult1()));
+        }
+        if (StringUtils.hasText(longArticlesVideoDTO.getResult2())) {
+            if (StringUtils.hasText(longArticlesVideoDTO.getKimiTitle()) && longArticlesVideoDTO.getKimiTitle().contains("\"")) {
+                longArticlesVideoDTO.setResult2(longArticlesVideoDTO.getResult2().replace(longArticlesVideoDTO.getKimiTitle(),
+                        longArticlesVideoDTO.getKimiTitle().replace("\"", "")));
+            }
+            jsonArray.add(resultToResponse(longArticlesVideoDTO.getResult2()));
+        }
+        if (StringUtils.hasText(longArticlesVideoDTO.getResult3())) {
+            if (StringUtils.hasText(longArticlesVideoDTO.getKimiTitle()) && longArticlesVideoDTO.getKimiTitle().contains("\"")) {
+                longArticlesVideoDTO.setResult3(longArticlesVideoDTO.getResult3().replace(longArticlesVideoDTO.getKimiTitle(),
+                        longArticlesVideoDTO.getKimiTitle().replace("\"", "")));
+            }
+            jsonArray.add(resultToResponse(longArticlesVideoDTO.getResult3()));
+        }
+        return JSONObject.toJSONString(jsonArray);
+    }
+
+    private JSONObject resultToResponse(String result) {
+        JSONObject jsonObject = new JSONObject();
+        JSONObject fromJSON = JSONObject.parseObject(result);
+        jsonObject.put("kimiTitle", fromJSON.getString("productionName"));
+        jsonObject.put("videoCover", fromJSON.getString("productionCover"));
+        jsonObject.put("videoPath", fromJSON.getString("videoUrl"));
+        jsonObject.put("source", fromJSON.getString("source"));
+        String productionPath = fromJSON.getString("productionPath");
+        String uid = getParamFromPath(productionPath, "su");
+        String videoId = getParamFromPath(productionPath, "id");
+        String rootSourceId = getParamFromPath(productionPath, "rootSourceId");
+        jsonObject.put("uid", uid);
+        if (StringUtils.hasText(videoId)) {
+            jsonObject.put("videoId", Long.valueOf(videoId));
+        }
+        if (StringUtils.hasText(rootSourceId)) {
+            jsonObject.put("rootSourceId", rootSourceId);
+        }
+        return jsonObject;
+    }
+
+    private String getParamFromPath(String productionPath, String param) {
+        String decode = URLDecoder.decode(productionPath);
+        String[] sss = decode.split("\\?");
+        for (String ss : sss) {
+            String[] split = ss.split("&");
+            for (String s : split) {
+                if (s.startsWith(param)) {
+                    String[] uid = s.split("=");
+                    return uid[1];
+                }
+            }
+        }
+        return null;
+    }
+
+    public void flushLongArticlesText() {
+        List<LongArticlesText> kimiTitleList = crawlerBaseMapper.getLongArticlesText();
+        Map<String, LongArticlesText> kimiTitleMap = kimiTitleList.stream().collect(
+                Collectors.toMap(LongArticlesText::getContentId, o -> o, (existing, replacement) -> replacement));
+        List<LongArticlesText> updateList = longArticleBaseMapper.getNeedUpdateRecords();
+        int updateNum = 0;
+        for (LongArticlesText update : updateList) {
+            LongArticlesText kimi = kimiTitleMap.get(update.getContentId());
+            if (Objects.nonNull(kimi)) {
+                pool.submit(() -> {
+                    update.setKimiTitle(kimi.getKimiTitle());
+                    update.setKimiSummary(kimi.getKimiSummary());
+                    update.setKimiKeys(kimi.getKimiKeys());
+                    longArticleBaseMapper.updateLongArticlesText(update);
+                });
+                updateNum++;
+            }
+        }
+        log.info("flushLongArticlesText updateNum:{}", updateNum);
+    }
+
+    private final static ExecutorService batchPool = new CommonThreadPoolExecutor(
+            5,
+            5,
+            0L, TimeUnit.SECONDS,
+            new LinkedBlockingQueue<>(10000),
+            new ThreadFactoryBuilder().setNameFormat("batch-%d").build(),
+            new ThreadPoolExecutor.AbortPolicy());
+
+    public void updateLongArticleMatchVideosResponse(Long id) {
+        int pageSize = 1000;
+        if (Objects.isNull(id)) {
+            id = 0L;
+        }
+        int count = longArticleBaseMapper.countNeedMatchVideos(id);
+        CountDownLatch cdl = new CountDownLatch((count / 1000) + 1);
+        while (true) {
+            List<LongArticlesMatchVideos> matchVideosList = longArticleBaseMapper.getNeedMatchVideos(id, pageSize);
+            if (CollectionUtil.isEmpty(matchVideosList)) {
+                break;
+            }
+            id = matchVideosList.stream().mapToLong(LongArticlesMatchVideos::getId).max().getAsLong();
+            Long finalId = id;
+            batchPool.submit(() -> {
+                try {
+                    long start = System.currentTimeMillis();
+                    List<String> traceIds = matchVideosList.stream().map(LongArticlesMatchVideos::getTraceId)
+                            .distinct().collect(Collectors.toList());
+                    List<LongArticlesVideoDTO> longArticlesVideoDTOList = crawlerBaseMapper.getLongArticlesVideo(traceIds);
+                    Map<String, LongArticlesVideoDTO> longArticlesVideoMap = longArticlesVideoDTOList.stream().collect(
+                            Collectors.toMap(LongArticlesVideoDTO::getTraceId, o -> o, (existing, replacement) -> replacement));
+                    CountDownLatch countDownLatch = new CountDownLatch(matchVideosList.size());
+                    for (LongArticlesMatchVideos longArticlesMatchVideos : matchVideosList) {
+                        pool.submit(() -> {
+                            try {
+                                LongArticlesVideoDTO longArticlesVideoDTO = longArticlesVideoMap.get(longArticlesMatchVideos.getTraceId());
+                                if (Objects.nonNull(longArticlesVideoDTO)) {
+                                    longArticlesMatchVideos.setResponse(getLongArticleVideoResponse(longArticlesVideoDTO));
+                                    longArticleBaseMapper.updateLongArticleMatchVideosResponse(longArticlesMatchVideos);
+                                }
+                            } finally {
+                                countDownLatch.countDown();
+                            }
+                        });
+                    }
+                    try {
+                        countDownLatch.await();
+                    } catch (InterruptedException e) {
+                        log.error("updateLongArticleMatchVideosResponse InterruptedException", e);
+                    }
+                    log.info("updateLongArticleMatchVideosResponse end id:{}, cost:{}", finalId, System.currentTimeMillis() - start);
+                } finally {
+                    cdl.countDown();
+                }
+            });
+        }
+        try {
+            cdl.await();
+        } catch (InterruptedException e) {
+            log.error("updateLongArticleMatchVideosResponse InterruptedException", e);
+        }
+    }
+}

+ 56 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/web/DataFlushController.java

@@ -0,0 +1,56 @@
+package com.tzld.longarticle.recommend.server.web;
+
+import com.tzld.longarticle.recommend.server.service.DataFlushService;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.web.bind.annotation.GetMapping;
+import org.springframework.web.bind.annotation.RequestMapping;
+import org.springframework.web.bind.annotation.RestController;
+
+@RestController
+@RequestMapping("/data")
+@Slf4j
+public class DataFlushController {
+
+    @Autowired
+    private DataFlushService service;
+
+    @GetMapping("/flush/get_off_videos")
+    public void flushGetOffVideos(Integer pageNum) {
+        new Thread(() -> {
+            service.flushGetOffVideos(pageNum);
+        }).start();
+    }
+    @GetMapping("/flush/long_articles_root_source_id")
+    public void flushLongArticlesRootSourceId(Integer pageNum) {
+        new Thread(() -> {
+            service.flushLongArticlesRootSourceId(pageNum);
+        }).start();
+    }
+    @GetMapping("/flush/long_articles_crawler_videos")
+    public void flushLongArticlesCrawlerVideos(Integer pageNum) {
+        new Thread(() -> {
+            service.flushLongArticlesCrawlerVideos(pageNum);
+        }).start();
+    }
+    @GetMapping("/flush/long_articles_video")
+    public void flushLongArticlesVideos(Integer pageNum, Long id) {
+        new Thread(() -> {
+            service.flushLongArticlesVideos(pageNum, id);
+        }).start();
+    }
+    @GetMapping("/flush/long_articles_text")
+    public void flushLongArticlesText() {
+        new Thread(() -> {
+            service.flushLongArticlesText();
+        }).start();
+    }
+    @GetMapping("/update/long_article_match_videos/response")
+    public void updateLongArticleMatchVideosResponse(Long id) {
+        new Thread(() -> {
+            service.updateLongArticleMatchVideosResponse(id);
+        }).start();
+    }
+
+
+}

+ 37 - 0
long-article-recommend-service/src/main/resources/mapper/crawler/CrawlerBaseMapper.xml

@@ -9,6 +9,43 @@
             (#{item.dateStr}, #{item.ghId}, #{item.accountName}, #{item.relGhId}, #{item.relAccountName}, #{item.status}, #{item.correlation})
         </foreach>
     </insert>
+    <select id="countGetOffVideos" resultType="java.lang.Integer">
+        select count(1) from get_off_videos where publish_time > 1727239202
+    </select>
+    <select id="pageGetOffVideos" resultType="com.tzld.longarticle.recommend.server.model.dto.GetOffVideos">
+        select * from get_off_videos order by video_id limit #{offset}, #{pageSize}
+    </select>
+    <select id="countLongArticlesRootSourceId" resultType="java.lang.Integer">
+        select count(1) from long_articles_root_source_id where requestTime > 1727192229
+    </select>
+    <select id="pageLongArticlesRootSourceId" resultType="com.tzld.longarticle.recommend.server.model.entity.longArticle.LongArticlesRootSourceId">
+        select * from long_articles_root_source_id where requestTime > 1727192229 order by rootSourceId limit #{offset}, #{pageSize}
+    </select>
+    <select id="countArticleMatchVideos" resultType="java.lang.Integer">
+        select count(1) from article_match_videos where DATE(update_time) &gt;= '2024-09-20'
+    </select>
+    <select id="pageArticleMatchVideos"
+            resultType="com.tzld.longarticle.recommend.server.model.dto.ArticleMatchVideos">
+        select * from article_match_videos order by video_id
+    </select>
+    <select id="countLongArticlesVideos" resultType="java.lang.Integer">
+        select count(1) from long_articles_video where content_status = 2 and success = 0
+    </select>
+    <select id="pageLongArticlesVideos"
+            resultType="com.tzld.longarticle.recommend.server.model.dto.LongArticlesVideoDTO">
+        select * from long_articles_video where content_status = 2 and success = 0 and id > #{id} order by id limit #{pageSize}
+    </select>
+    <select id="getLongArticlesText" resultType="com.tzld.longarticle.recommend.server.model.dto.LongArticlesText">
+        select content_id, kimi_title, kimi_summary, kimi_keys from long_articles_video where kimi_summary is not null group by content_id
+    </select>
+
+    <select id="getLongArticlesVideo"
+            resultType="com.tzld.longarticle.recommend.server.model.dto.LongArticlesVideoDTO">
+        select * from long_articles_video where trace_id in
+        <foreach collection="traceIds" item="item" open="(" close=")" separator=",">
+            #{item}
+        </foreach>
+    </select>
 
     <insert id="batchInsertAccountAvgInfo">
         INSERT INTO account_avg_info_v3

+ 113 - 0
long-article-recommend-service/src/main/resources/mapper/longArticle/LongArticleBaseMapper.xml

@@ -10,6 +10,10 @@
         delete from datastat_sort_strategy where date_str between #{dateStrBegin} and #{dateStrEnd} and type = #{type}
     </delete>
 
+    <select id="countGetOffVideos" resultType="java.lang.Long">
+        select count(1) from get_off_videos limit #{offset}, #{pageSize}
+    </select>
+
     <insert id="batchInsertDatastatSortStrategy">
         INSERT INTO datastat_sort_strategy
         (date_str, publish_time, account_mode, account_source, account_type, account_status, bussiness_type,
@@ -114,4 +118,113 @@
         </foreach>
     </insert>
 
+    <select id="getGetOffVideos" resultType="com.tzld.longarticle.recommend.server.model.dto.GetOffVideos">
+        select * from get_off_videos where video_id in
+        <foreach collection="videoIds" item="item" open="(" close=")" separator=",">
+            #{item}
+        </foreach>
+    </select>
+
+    <insert id="batchInsertGetOffVideos">
+        INSERT INTO get_off_videos (video_id, publish_time, video_status, trace_id, get_off_time, check_status)
+        VALUES
+        <foreach collection="list" item="item" separator=",">
+            (#{item.videoId}, #{item.publishTime}, #{item.videoStatus}, #{item.traceId}, #{item.getOffTime}, #{item.checkStatus})
+        </foreach>
+    </insert>
+
+    <insert id="batchInsertLongArticlesCrawlerVideos" parameterType="list">
+        INSERT INTO long_articles_crawler_videos (content_id, out_video_id, platform, video_title, play_count,
+                                                  like_count, share_count, publish_time, crawler_time, duration,
+                                                  video_url, cover_url, download_status, video_oss_path, cover_oss_path,
+                                                  user_id, trace_id, score)
+        VALUES
+        <foreach collection="list" item="item" separator=",">
+            (#{item.contentId}, #{item.outVideoId}, #{item.platform}, #{item.videoTitle}, #{item.playCount},
+             #{item.likeCount}, #{item.shareCount}, #{item.publishTime}, #{item.crawlerTime}, #{item.duration},
+             #{item.videoUrl}, #{item.coverUrl}, #{item.downloadStatus}, #{item.videoOssPath}, #{item.coverOssPath},
+             #{item.userId}, #{item.traceId}, #{item.score})
+        </foreach>
+    </insert>
+
+    <insert id="batchInsertLongArticlesText" parameterType="list">
+        INSERT INTO long_articles_text (content_id, article_title, article_text, kimi_title, kimi_summary,
+                                              kimi_keys, kimi_status)
+        VALUES
+        <foreach collection="list" item="item" separator=",">
+            (#{item.contentId}, #{item.articleTitle}, #{item.articleText}, #{item.kimiTitle}, #{item.kimiSummary},
+             #{item.kimiKeys}, #{item.kimiStatus})
+        </foreach>
+    </insert>
+
+    <select id="getLongArticlesTextByContentIds" resultType="java.lang.String">
+        select content_id from long_articles_text where content_id in
+        <foreach collection="contentIds" item="item" open="(" close=")" separator=",">
+            #{item}
+        </foreach>
+    </select>
+
+    <select id="getNeedUpdateRecords"
+            resultType="com.tzld.longarticle.recommend.server.model.dto.LongArticlesText">
+        select * from long_articles_text where kimi_title is null
+    </select>
+
+    <select id="getLongArticlesRootSourceId"
+            resultType="com.tzld.longarticle.recommend.server.model.entity.longArticle.LongArticlesRootSourceId">
+        select * from long_articles_root_source_id where root_source_id in
+        <foreach collection="rootSourceIdList" item="item" open="(" close=")" separator=",">
+            #{item}
+        </foreach>
+    </select>
+
+    <update id="updateLongArticlesText">
+        update long_articles_text
+        set kimi_title = #{kimiTitle},
+            kimi_summary = #{kimiSummary},
+            kimi_keys = #{kimiKeys}
+        where content_id = #{contentId}
+    </update>
+
+    <update id="updateLongArticleMatchVideosResponse">
+        update long_articles_match_videos
+        set response = #{response}
+        where trace_id = #{traceId}
+    </update>
+
+    <select id="getLongArticlesMatchVideos"
+            resultType="com.tzld.longarticle.recommend.server.model.dto.LongArticlesMatchVideos">
+        select * from long_articles_match_videos where trace_id in
+        <foreach collection="traceIds" item="item" open="(" close=")" separator=",">
+            #{item}
+        </foreach>
+    </select>
+    <select id="getLongArticlesCrawlerVideos"
+            resultType="com.tzld.longarticle.recommend.server.model.dto.LongArticlesCrawlerVideos">
+        select * from long_articles_crawler_videos where content_id in
+        <foreach collection="contentIds" item="item" open="(" close=")" separator=",">
+            #{item}
+        </foreach>
+        and download_status = 2
+    </select>
+
+    <select id="getNeedMatchVideos"
+            resultType="com.tzld.longarticle.recommend.server.model.dto.LongArticlesMatchVideos">
+        select * from long_articles_match_videos where flow_pool_level is null and id > #{id} order by id limit #{pageSize}
+    </select>
+    <select id="countNeedMatchVideos" resultType="java.lang.Integer">
+        select count(1) from long_articles_match_videos where id > 3962393 and flow_pool_level is null
+    </select>
+
+    <insert id="batchInsertLongArticlesMatchVideos" parameterType="list">
+        INSERT INTO long_articles_match_videos (trace_id, content_id, flow_pool_level, gh_id, account_name,
+                                                      content_status, success_status, request_timestamp, response,
+                                                      process_times)
+        VALUES
+        <foreach collection="list" item="item" separator=",">
+            (#{item.traceId}, #{item.contentId}, #{item.flowPoolLevel}, #{item.ghId}, #{item.accountName},
+             #{item.contentStatus}, #{item.successStatus}, #{item.requestTimestamp}, #{item.response},
+             #{item.processTimes})
+        </foreach>
+    </insert>
+
 </mapper>

+ 262 - 0
long-article-recommend-service/src/test/java/com/tzld/longarticle/recommend/server/RecommendTest.java

@@ -18,6 +18,7 @@ import com.tzld.longarticle.recommend.server.service.recommend.RecommendService;
 import com.tzld.longarticle.recommend.server.service.recommend.recall.RecallService;
 import com.tzld.longarticle.recommend.server.util.DateUtils;
 import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.math3.stat.correlation.PearsonsCorrelation;
 import org.apache.poi.ss.usermodel.Cell;
 import org.apache.poi.ss.usermodel.Row;
 import org.apache.poi.ss.usermodel.Sheet;
@@ -549,4 +550,265 @@ public class RecommendTest {
         }
     }
 
+
+    @Test
+    public void correlation() {
+        List<String> ghIds = Lists.newArrayList("gh_e24da99dc899",
+                "gh_183d80deffb8",
+                "gh_be8c29139989",
+                "gh_c69776baf2cd",
+                "gh_b15de7c99912",
+                "gh_1d887d61088c",
+                "gh_3ed305b5817f",
+                "gh_3e91f0624545",
+                "gh_30816d8adb52",
+                "gh_970460d9ccec",
+                "gh_749271f1ccd5",
+                "gh_ac43e43b253b"
+        );
+        List<PublishSortLog> sortLogList = publishSortLogRepository.findByGhIdInAndDateStrGreaterThanEqual(ghIds, "20240907");
+        sortLogList = sortLogList.stream().filter(o -> o.getIndex() == 1).collect(Collectors.toList());
+        sortLogList.sort(Comparator.comparing(PublishSortLog::getGhId).thenComparing(PublishSortLog::getDateStr));
+        List<Article> articleList = articleRepository.getByGhIdInAndUpdateTimeGreaterThanAndTypeEquals(ghIds, 1725638400L, "9");
+        articleList = articleList.stream().filter(o -> o.getItemIndex() == 1).collect(Collectors.toList());
+        Map<String, Map<String, Article>> articleMap = articleList.stream().collect(Collectors.groupingBy(Article::getGhId, Collectors.toMap(
+                o -> DateUtils.timestampToYMDStr(o.getUpdateTime(),"yyyyMMdd"), o -> o,
+                (existing, replacement) -> replacement)));
+        List<AccountAvgInfo> accountAvgInfoList = accountAvgInfoRepository.getAllByGhIdIn(new HashSet<>(ghIds));
+        Map<String, Map<String, AccountAvgInfo>> accountAvgInfoMap = accountAvgInfoList.stream()
+                .filter(o -> Objects.equals(o.getPosition(), "1")).collect(Collectors.groupingBy(AccountAvgInfo::getGhId,
+                        Collectors.toMap(AccountAvgInfo::getUpdateTime, o -> o)));
+        int rowNum = 0;
+        Map<String, List<PublishSortLog>> sortLogMap = sortLogList.stream().collect(Collectors.groupingBy(PublishSortLog::getGhId));
+        PearsonsCorrelation correlation = new PearsonsCorrelation();
+        Workbook workbook = new XSSFWorkbook();
+        Sheet sheet = workbook.createSheet("ExampleSheet");
+        // 创建标题行
+        Row titleRow = sheet.createRow(rowNum);
+        for (Map.Entry<String, List<PublishSortLog>> entry : sortLogMap.entrySet()) {
+            String ghId = entry.getKey();
+            String name = entry.getValue().get(0).getAccountName();
+            List<PublishSortLog> itemList = entry.getValue();
+            String title = "";
+            double[] scoreArr = new double[itemList.size()];
+            double[] HisFissionFansRateRateStrategyArr = new double[itemList.size()];
+            double[] HisFissionAvgReadRateRateStrategyArr = new double[itemList.size()];
+            double[] PublishTimesStrategyArr = new double[itemList.size()];
+            double[] ViewCountRateCorrelationStrategyArr = new double[itemList.size()];
+            double[] HisFissionAvgReadSumRateStrategyArr = new double[itemList.size()];
+            double[] HisFissionAvgReadRateCorrelationRateStrategyArr = new double[itemList.size()];
+            double[] HisFissionFansSumRateStrategyArr = new double[itemList.size()];
+            double[] SimilarityStrategyArr = new double[itemList.size()];
+            double[] ViewCountStrategyArr = new double[itemList.size()];
+            double[] ViewCountRateStrategyArr = new double[itemList.size()];
+            double[] HisFissionDeWeightAvgReadSumRateStrategyArr = new double[itemList.size()];
+            double[] scoreRateArr = new double[itemList.size()];
+            for (int i = 0; i < itemList.size(); i++) {
+                PublishSortLog publishSortLog = itemList.get(i);
+                Map<String, Article> dateArticleMap = articleMap.get(publishSortLog.getGhId());
+                Article article = dateArticleMap.get(publishSortLog.getDateStr());
+                if (Objects.isNull(article) || !publishSortLog.getTitle().equals(article.getTitle())) {
+                    continue;
+                }
+                if (publishSortLog.getTitle().equals(title)) {
+                    continue;
+                }
+                title = publishSortLog.getTitle();
+                scoreArr[i] = Double.parseDouble(publishSortLog.getScore());
+                JSONObject scoreMap = JSONObject.parseObject(publishSortLog.getScoreMap());
+                HisFissionFansRateRateStrategyArr[i] =  Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionFansRateRateStrategy")).orElse(0.0)));
+                HisFissionAvgReadRateRateStrategyArr[i] =  Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadRateRateStrategy")).orElse(0.0)));
+                PublishTimesStrategyArr[i] =  Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("PublishTimesStrategy")).orElse(0.0)));
+                ViewCountRateCorrelationStrategyArr[i] =  Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountRateCorrelationStrategy")).orElse(0.0)));
+                HisFissionAvgReadSumRateStrategyArr[i] =  Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadSumRateStrategy")).orElse(0.0)));
+                HisFissionAvgReadRateCorrelationRateStrategyArr[i] =  Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadRateCorrelationRateStrategy")).orElse(0.0)));
+                HisFissionFansSumRateStrategyArr[i] =  Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionFansSumRateStrategy")).orElse(0.0)));
+                SimilarityStrategyArr[i] =  Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("SimilarityStrategy")).orElse(0.0)));
+                ViewCountStrategyArr[i] =  Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountStrategy")).orElse(0.0)));
+                ViewCountRateStrategyArr[i] =  Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountRateStrategy")).orElse(0.0)));
+                HisFissionDeWeightAvgReadSumRateStrategyArr[i] =  Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionDeWeightAvgReadSumRateStrategy")).orElse(0.0)));
+                Map<String, AccountAvgInfo> map = accountAvgInfoMap.get(article.getGhId());
+                if (Objects.nonNull(map)) {
+                    List<String> avgMapDateList = new ArrayList<>(map.keySet());
+                    String publishDate = DateUtils.findNearestDate(avgMapDateList,
+                            DateUtils.timestampToYMDStr(article.getUpdateTime(), "yyyy-MM-dd"), "yyyy-MM-dd");
+                    AccountAvgInfo accountAvgInfo = map.get(publishDate);
+                    if (Objects.nonNull(accountAvgInfo)) {
+                        scoreRateArr[i] = Double.parseDouble(String.format("%.3f", article.getShowViewCount() / (double) accountAvgInfo.getReadAvg()));
+                    }
+                }
+            }
+
+            rowNum++;
+            Row row = sheet.createRow(rowNum);
+            Cell cell = row.createCell(0);
+            cell = row.createCell(1);
+            cell.setCellValue(ghId);
+            cell = row.createCell(2);
+            cell.setCellValue(name);
+            cell = row.createCell(3);
+            cell = row.createCell(4);
+            cell = row.createCell(5);
+            cell.setCellValue(correlation.correlation(scoreArr, scoreRateArr));
+            cell = row.createCell(6);
+            cell.setCellValue(correlation.correlation(HisFissionFansRateRateStrategyArr, scoreRateArr));
+            cell = row.createCell(7);
+            cell.setCellValue(correlation.correlation(HisFissionAvgReadRateRateStrategyArr, scoreRateArr));
+            cell = row.createCell(8);
+            cell.setCellValue(correlation.correlation(PublishTimesStrategyArr, scoreRateArr));
+            cell = row.createCell(9);
+            cell.setCellValue(correlation.correlation(ViewCountRateCorrelationStrategyArr, scoreRateArr));
+            cell = row.createCell(10);
+            cell.setCellValue(correlation.correlation(HisFissionAvgReadSumRateStrategyArr, scoreRateArr));
+            cell = row.createCell(11);
+            cell.setCellValue(correlation.correlation(HisFissionAvgReadRateCorrelationRateStrategyArr, scoreRateArr));
+            cell = row.createCell(12);
+            cell.setCellValue(correlation.correlation(HisFissionFansSumRateStrategyArr, scoreRateArr));
+            cell = row.createCell(13);
+            cell.setCellValue(correlation.correlation(SimilarityStrategyArr, scoreRateArr));
+            cell = row.createCell(14);
+            cell.setCellValue(correlation.correlation(ViewCountStrategyArr, scoreRateArr));
+            cell = row.createCell(15);
+            cell.setCellValue(correlation.correlation(ViewCountRateStrategyArr, scoreRateArr));
+            cell = row.createCell(16);
+            cell.setCellValue(correlation.correlation(HisFissionDeWeightAvgReadSumRateStrategyArr, scoreRateArr));
+        }
+
+        try (FileOutputStream outputStream = new FileOutputStream("/Users/wangyunpeng/Downloads/example.xlsx")) {
+            workbook.write(outputStream);
+        } catch (IOException e) {
+            e.printStackTrace();
+        } finally {
+            try {
+                workbook.close();
+            } catch (IOException e) {
+                e.printStackTrace();
+            }
+        }
+    }
+
+
+
+    @Test
+    void getScoreFromLogFile() {
+
+        String folderPath = "/Users/wangyunpeng/Downloads/b78020b8-d9df-466f-bd01-cd982bb986d0.json";
+
+        File file = new File(folderPath);
+
+        Workbook workbook = new XSSFWorkbook();
+        Sheet sheet = workbook.createSheet("ExampleSheet");
+        int rowNum = 0;
+        // 创建标题行
+        Row titleRow = sheet.createRow(rowNum);
+        Cell titleCell = titleRow.createCell(0);
+        titleCell.setCellValue("日期");
+        titleCell = titleRow.createCell(1);
+        titleCell.setCellValue("账号名称");
+        titleCell = titleRow.createCell(2);
+        titleCell.setCellValue("id");
+        titleCell = titleRow.createCell(3);
+        titleCell.setCellValue("标题");
+        titleCell = titleRow.createCell(4);
+        titleCell.setCellValue("策略");
+        titleCell = titleRow.createCell(5);
+        titleCell.setCellValue("得分");
+        titleCell = titleRow.createCell(6);
+        titleCell.setCellValue("HisFissionFansRateRateStrategy");
+        titleCell = titleRow.createCell(7);
+        titleCell.setCellValue("HisFissionAvgReadRateRateStrategy");
+        titleCell = titleRow.createCell(8);
+        titleCell.setCellValue("PublishTimesStrategy");
+        titleCell = titleRow.createCell(9);
+        titleCell.setCellValue("ViewCountRateCorrelationStrategy");
+        titleCell = titleRow.createCell(10);
+        titleCell.setCellValue("HisFissionAvgReadSumRateStrategy");
+        titleCell = titleRow.createCell(11);
+        titleCell.setCellValue("HisFissionAvgReadRateCorrelationRateStrategy");
+        titleCell = titleRow.createCell(12);
+        titleCell.setCellValue("HisFissionFansSumRateStrategy");
+        titleCell = titleRow.createCell(13);
+        titleCell.setCellValue("SimilarityStrategy");
+        titleCell = titleRow.createCell(14);
+        titleCell.setCellValue("ViewCountStrategy");
+        titleCell = titleRow.createCell(15);
+        titleCell.setCellValue("ViewCountRateStrategy");
+        titleCell = titleRow.createCell(16);
+        titleCell.setCellValue("HisFissionDeWeightAvgReadSumRateStrategy");
+        try {
+            String content = new String(Files.readAllBytes(file.toPath()), StandardCharsets.UTF_8);
+            JSONArray jsonArray = JSONArray.parseArray(content);
+            for (Object o : jsonArray) {
+                JSONObject jsonObject = (JSONObject) o;
+                Long time = jsonObject.getLong("__time__");
+                String message = jsonObject.getString("message");
+                int index = message.indexOf("[");
+                String info = message.substring(0, index);
+                String strategy = info.substring(0, info.indexOf(" "));
+                String accountName = info.substring(info.indexOf(" ")).replace("账号名称 ", "")
+                        .replace(" 头条评分结果", "");
+                String json = message.substring(index);
+                JSONArray scoreArray = JSONArray.parseArray(json);
+                for (Object scoreJSON : scoreArray) {
+                    JSONObject scoreObject = (JSONObject) scoreJSON;
+                    String id = scoreObject.getString("id");
+                    String title = scoreObject.getString("title");
+                    String score = scoreObject.getString("score");
+                    String scoreMapStr = scoreObject.getString("scoreMap");
+                    rowNum++;
+                    Row row = sheet.createRow(rowNum);
+                    Cell cell = row.createCell(0);
+                    cell.setCellValue(DateUtils.timestampToYMDStr(time, "yyyyMMdd"));
+                    cell = row.createCell(1);
+                    cell.setCellValue(accountName);
+                    cell = row.createCell(2);
+                    cell.setCellValue(id);
+                    cell = row.createCell(3);
+                    cell.setCellValue(title);
+                    cell = row.createCell(4);
+                    cell.setCellValue(strategy);
+                    cell = row.createCell(5);
+                    cell.setCellValue(score);
+                    cell = row.createCell(6);
+                    JSONObject scoreMap = JSONObject.parseObject(scoreMapStr);
+                    cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionFansRateRateStrategy")).orElse(0.0)));
+                    cell = row.createCell(7);
+                    cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadRateRateStrategy")).orElse(0.0)));
+                    cell = row.createCell(8);
+                    cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("PublishTimesStrategy")).orElse(0.0)));
+                    cell = row.createCell(9);
+                    cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountRateCorrelationStrategy")).orElse(0.0)));
+                    cell = row.createCell(10);
+                    cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadSumRateStrategy")).orElse(0.0)));
+                    cell = row.createCell(11);
+                    cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadRateCorrelationRateStrategy")).orElse(0.0)));
+                    cell = row.createCell(12);
+                    cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionFansSumRateStrategy")).orElse(0.0)));
+                    cell = row.createCell(13);
+                    cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("SimilarityStrategy")).orElse(0.0)));
+                    cell = row.createCell(14);
+                    cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountStrategy")).orElse(0.0)));
+                    cell = row.createCell(15);
+                    cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountRateStrategy")).orElse(0.0)));
+                    cell = row.createCell(16);
+                    cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionDeWeightAvgReadSumRateStrategy")).orElse(0.0)));
+                }
+            }
+
+            try (FileOutputStream outputStream = new FileOutputStream("/Users/wangyunpeng/Downloads/example.xlsx")) {
+                workbook.write(outputStream);
+            } catch (IOException e) {
+                e.printStackTrace();
+            } finally {
+                try {
+                    workbook.close();
+                } catch (IOException e) {
+                    e.printStackTrace();
+                }
+            }
+        } catch (Exception e) {
+            log.error("readFileError fileName:{}", file.getName(), e);
+        }
+    }
+
 }