Prechádzať zdrojové kódy

Merge branch 'wyp/1014-data-export' of Server/long-article-recommend into master

wangyunpeng 9 mesiacov pred
rodič
commit
fe8cfda402

+ 5 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/vo/NewSortStrategyExport.java

@@ -40,6 +40,11 @@ public class NewSortStrategyExport {
     private Double fission0FirstRate = 0.0;
     private Double fission1Fission0Rate = 0.0;
     private Double fission0ReadAvgRate = 0.0;
+    //历史信息
+    private Double hisReadRate = 0.0;
+    private Double hisFirstFansRate = 0.0;
+    private Double hisFission0FirstRate = 0.0;
+
     private Integer position;
     private String ghId;
     private String title;

+ 1 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/crawler/PublishSortLogRepository.java

@@ -19,5 +19,6 @@ public interface PublishSortLogRepository extends JpaRepository<PublishSortLog,
 
     List<PublishSortLog> findByGhIdInAndDateStrGreaterThanEqual(List<String> ghIds, String dateStr);
 
+    List<PublishSortLog> findByStrategyInAndDateStrGreaterThanEqual(List<String> strategies, String dateStr);
 
 }

+ 66 - 4
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/DataDashboardService.java

@@ -26,6 +26,7 @@ import com.tzld.longarticle.recommend.server.repository.crawler.AccountAvgInfoRe
 import com.tzld.longarticle.recommend.server.repository.crawler.ArticleDetailInfoRepository;
 import com.tzld.longarticle.recommend.server.repository.crawler.ArticleRepository;
 import com.tzld.longarticle.recommend.server.repository.crawler.PublishSortLogRepository;
+import com.tzld.longarticle.recommend.server.service.score.ScoreStrategy;
 import com.tzld.longarticle.recommend.server.util.DateUtils;
 import com.tzld.longarticle.recommend.server.util.MapBuilder;
 import com.tzld.longarticle.recommend.server.util.feishu.FeiShu;
@@ -124,9 +125,12 @@ public class DataDashboardService {
                         Pair.of("AD", "0.00%"),
                         Pair.of("AE", "0.00%"),
                         Pair.of("AF", "0.00%"),
-                        Pair.of("AL", "0.00%"),
-                        Pair.of("AM", "0.00%"),
-                        Pair.of("AN", "0.00%")
+                        Pair.of("AG", "0.00%"),
+                        Pair.of("AH", "0.00%"),
+                        Pair.of("AI", "0.00%"),
+                        Pair.of("AO", "0.00%"),
+                        Pair.of("AP", "0.00%"),
+                        Pair.of("AQ", "0.00%")
                 );
 
         doSendFeishuSheet(dateStrList, sheetToken, sheetId, rowNum, rows, 2, styles);
@@ -135,7 +139,7 @@ public class DataDashboardService {
     private List<NewSortStrategyExport> newSortStrategyData(String dateStr) {
         long timestamp = DateUtils.dateStrToTimestamp(dateStr, "yyyyMMdd");
         String dateStrS = DateUtils.timestampToYMDStr(timestamp, "yyyy-MM-dd");
-        List<AccountAvgInfo> accountAvgInfoList = accountAvgInfoRepository.getAllByUpdateTimeGreaterThanEqual(dateStrS);
+        List<AccountAvgInfo> accountAvgInfoList = accountAvgInfoRepository.findAll();
         List<String> ghIds = accountAvgInfoList.stream().map(AccountAvgInfo::getGhId).distinct().collect(Collectors.toList());
 
         List<Article> articleList = articleRepository.getByGhIdInAndUpdateTimeGreaterThanAndTypeEquals(ghIds, timestamp, "9");
@@ -251,6 +255,23 @@ public class DataDashboardService {
         // 源生成计划
         List<String> titleList = articleList.stream().map(Article::getTitle).distinct().collect(Collectors.toList());
         Map<String, ProducePlan> sourceTitlePlanMap = getTitleSourceProducePlanMap(titleList);
+        // 历史发布情况
+        List<String> titleMd5List = articleList.stream().map(Article::getTitleMd5).distinct().collect(Collectors.toList());
+        List<Article> hisArticleList = new ArrayList<>();
+        List<List<String>> titleMd5Partition = Lists.partition(new ArrayList<>(titleMd5List), 1000);
+        for (List<String> titleMd5s : titleMd5Partition) {
+            hisArticleList.addAll(articleRepository.getByTitleMd5InAndTypeEqualsAndStatusEquals(titleMd5s, "9", 1));
+        }
+        Map<String, List<Article>> hisArticleMap = hisArticleList.stream().collect(Collectors.groupingBy(Article::getTitle));
+        Set<String> hisWxSnList = hisArticleList.stream().map(Article::getWxSn).collect(Collectors.toSet());
+        List<ArticleDetailInfo> hisArticleDetailInfoList = new ArrayList<>();
+        List<List<String>> hisSnPartition = Lists.partition(new ArrayList<>(hisWxSnList), 1000);
+        for (List<String> sns : hisSnPartition) {
+            hisArticleDetailInfoList.addAll(articleDetailInfoRepository.getAllByWxSnIn(sns));
+        }
+        Map<String, List<ArticleDetailInfo>> hisArticleDetailInfoMap = hisArticleDetailInfoList.stream()
+                .collect(Collectors.groupingBy(ArticleDetailInfo::getWxSn));
+
         // result
         List<NewSortStrategyExport> result = new ArrayList<>();
         for (Article article : articleList) {
@@ -435,6 +456,47 @@ public class DataDashboardService {
             if (Objects.nonNull(sourceProducePlan)) {
                 obj.setSourceProducePlanName(sourceProducePlan.getName());
             }
+            List<Article> hisArticles = hisArticleMap.get(article.getTitle());
+            hisArticles = hisArticles.stream().filter(o -> o.getUpdateTime() < (article.getUpdateTime() - 3600 * 8)
+                            && (1 == o.getItemIndex() || 2 == o.getItemIndex())
+                            && !ScoreStrategy.hisContentLateFilter(o.getUpdateTime())).collect(Collectors.toList());
+            Integer readCount = 0;
+            Double readAvgCount = 0.0;
+            Integer fansCount = 0;
+            int firstCount = 0;
+            int fission0Count = 0;
+            for (Article hisArticle : hisArticles) {
+                readCount += hisArticle.getShowViewCount();
+                AccountAvgInfo accountAvgInfo = getAccountAvgInfo(accountAvgInfoIndexMap, hisArticle.getGhId(),
+                        hisArticle.getUpdateTime(), hisArticle.getItemIndex());
+                if (Objects.nonNull(accountAvgInfo)) {
+                    readAvgCount += accountAvgInfo.getReadAvg();
+                    fansCount += accountAvgInfo.getFans();
+                }
+                List<ArticleDetailInfo> hisADIList = hisArticleDetailInfoMap.get(hisArticle.getWxSn());
+                if (CollectionUtil.isNotEmpty(hisADIList)) {
+                    Date hisMinDate = hisADIList.stream().map(ArticleDetailInfo::getRecallDt).min(Date::compareTo).orElse(new Date());
+                    for (ArticleDetailInfo articleDetailInfo : hisADIList) {
+                        if (articleDetailInfo.getRecallDt().equals(hisMinDate)) {
+                            if (Objects.nonNull(articleDetailInfo.getFirstLevel())) {
+                                firstCount += articleDetailInfo.getFirstLevel();
+                            }
+                            if (Objects.nonNull(articleDetailInfo.getFission0())) {
+                                fission0Count += articleDetailInfo.getFission0();
+                            }
+                        }
+                    }
+                }
+            }
+            if (readAvgCount > 0) {
+                obj.setHisReadRate(readCount / readAvgCount);
+            }
+            if (fansCount > 0) {
+                obj.setHisFirstFansRate(firstCount / (double) fansCount);
+            }
+            if (firstCount > 0) {
+                obj.setHisFission0FirstRate(fission0Count / (double) firstCount);
+            }
         }
         result.sort(Comparator.comparing(NewSortStrategyExport::getDateStr).reversed()
                 .thenComparing(NewSortStrategyExport::getGhId).thenComparing(NewSortStrategyExport::getPosition));

+ 131 - 14
long-article-recommend-service/src/test/java/com/tzld/longarticle/recommend/server/RecommendTest.java

@@ -16,6 +16,7 @@ import com.tzld.longarticle.recommend.server.repository.crawler.PublishSortLogRe
 import com.tzld.longarticle.recommend.server.service.RecommendService;
 import com.tzld.longarticle.recommend.server.service.recall.RecallService;
 import com.tzld.longarticle.recommend.server.util.DateUtils;
+import lombok.extern.slf4j.Slf4j;
 import org.apache.poi.ss.usermodel.Cell;
 import org.apache.poi.ss.usermodel.Row;
 import org.apache.poi.ss.usermodel.Sheet;
@@ -25,12 +26,16 @@ import org.junit.jupiter.api.Test;
 import org.springframework.boot.test.context.SpringBootTest;
 
 import javax.annotation.Resource;
+import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
 import java.util.*;
 import java.util.stream.Collectors;
 
 @SpringBootTest(classes = Application.class)
+@Slf4j
 public class RecommendTest {
 
     @Resource
@@ -257,22 +262,12 @@ public class RecommendTest {
 
     @Test
     public void exportScoreData() {
-        List<String> ghIds = Lists.newArrayList("gh_e24da99dc899",
-                "gh_183d80deffb8",
-                "gh_be8c29139989",
-                "gh_c69776baf2cd",
-                "gh_b15de7c99912",
-                "gh_1d887d61088c",
-                "gh_3ed305b5817f",
-                "gh_3e91f0624545",
-                "gh_30816d8adb52",
-                "gh_970460d9ccec",
-                "gh_749271f1ccd5"
-        );
-        List<PublishSortLog> sortLogList = publishSortLogRepository.findByGhIdInAndDateStrGreaterThanEqual(ghIds, "20240907");
+        List<String> strategies = Arrays.asList("ArticleRankV11", "ArticleRankV12");
+        List<PublishSortLog> sortLogList = publishSortLogRepository.findByStrategyInAndDateStrGreaterThanEqual(strategies, "20240928");
         sortLogList = sortLogList.stream().filter(o -> o.getIndex() == 1).collect(Collectors.toList());
         sortLogList.sort(Comparator.comparing(PublishSortLog::getGhId).thenComparing(PublishSortLog::getDateStr));
-        List<Article> articleList = articleRepository.getByGhIdInAndUpdateTimeGreaterThanAndTypeEquals(ghIds, 1725638400L, "9");
+        List<String> ghIds = sortLogList.stream().map(PublishSortLog::getGhId).distinct().collect(Collectors.toList());
+        List<Article> articleList = articleRepository.getByGhIdInAndUpdateTimeGreaterThanAndTypeEquals(ghIds, 1727452800L, "9");
         articleList = articleList.stream().filter(o -> o.getItemIndex() == 1).collect(Collectors.toList());
         Map<String, Map<String, Article>> articleMap = articleList.stream().collect(Collectors.groupingBy(Article::getGhId, Collectors.toMap(
                 o -> DateUtils.timestampToYMDStr(o.getUpdateTime(),"yyyyMMdd"), o -> o,
@@ -406,4 +401,126 @@ public class RecommendTest {
         }
     }
 
+    @Test
+    public void exportFromAliyunLog() {
+
+        String folderPath = "/Users/wangyunpeng/Downloads/longarticle-recommend-server-test_info-log_20241014_150245.json";
+
+        File file = new File(folderPath);
+
+        Workbook workbook = new XSSFWorkbook();
+        Sheet sheet = workbook.createSheet("ExampleSheet");
+        int rowNum = 0;
+        // 创建标题行
+        Row titleRow = sheet.createRow(rowNum);
+        Cell titleCell = titleRow.createCell(0);
+        titleCell.setCellValue("日期");
+        titleCell = titleRow.createCell(1);
+        titleCell.setCellValue("账号名称");
+        titleCell = titleRow.createCell(2);
+        titleCell.setCellValue("id");
+        titleCell = titleRow.createCell(3);
+        titleCell.setCellValue("标题");
+        titleCell = titleRow.createCell(4);
+        titleCell.setCellValue("策略");
+        titleCell = titleRow.createCell(5);
+        titleCell.setCellValue("得分");
+        titleCell = titleRow.createCell(6);
+        titleCell.setCellValue("HisFissionFansRateRateStrategy");
+        titleCell = titleRow.createCell(7);
+        titleCell.setCellValue("HisFissionAvgReadRateRateStrategy");
+        titleCell = titleRow.createCell(8);
+        titleCell.setCellValue("PublishTimesStrategy");
+        titleCell = titleRow.createCell(9);
+        titleCell.setCellValue("ViewCountRateCorrelationStrategy");
+        titleCell = titleRow.createCell(10);
+        titleCell.setCellValue("HisFissionAvgReadSumRateStrategy");
+        titleCell = titleRow.createCell(11);
+        titleCell.setCellValue("HisFissionAvgReadRateCorrelationRateStrategy");
+        titleCell = titleRow.createCell(12);
+        titleCell.setCellValue("HisFissionFansSumRateStrategy");
+        titleCell = titleRow.createCell(13);
+        titleCell.setCellValue("SimilarityStrategy");
+        titleCell = titleRow.createCell(14);
+        titleCell.setCellValue("ViewCountStrategy");
+        titleCell = titleRow.createCell(15);
+        titleCell.setCellValue("ViewCountRateStrategy");
+        titleCell = titleRow.createCell(16);
+        titleCell.setCellValue("HisFissionDeWeightAvgReadSumRateStrategy");
+        try {
+            String content = new String(Files.readAllBytes(file.toPath()), StandardCharsets.UTF_8);
+            JSONArray jsonArray = JSONArray.parseArray(content);
+            for (Object o : jsonArray) {
+                JSONObject jsonObject = (JSONObject) o;
+                Long time = jsonObject.getLong("__time__");
+                String message = jsonObject.getString("message");
+                int index = message.indexOf("[");
+                String info = message.substring(0, index);
+                String strategy = info.substring(0, info.indexOf(" "));
+                String accountName = info.substring(info.indexOf(" ")).replace("账号名称 ", "")
+                        .replace(" 头条评分结果", "");
+                String json = message.substring(index);
+                JSONArray scoreArray = JSONArray.parseArray(json);
+                for (Object scoreJSON : scoreArray) {
+                    JSONObject scoreObject = (JSONObject) scoreJSON;
+                    String id = scoreObject.getString("id");
+                    String title = scoreObject.getString("title");
+                    String score = scoreObject.getString("score");
+                    String scoreMapStr = scoreObject.getString("scoreMap");
+                    rowNum++;
+                    Row row = sheet.createRow(rowNum);
+                    Cell cell = row.createCell(0);
+                    cell.setCellValue(DateUtils.timestampToYMDStr(time, "yyyyMMdd"));
+                    cell = row.createCell(1);
+                    cell.setCellValue(accountName);
+                    cell = row.createCell(2);
+                    cell.setCellValue(id);
+                    cell = row.createCell(3);
+                    cell.setCellValue(title);
+                    cell = row.createCell(4);
+                    cell.setCellValue(strategy);
+                    cell = row.createCell(5);
+                    cell.setCellValue(score);
+                    cell = row.createCell(6);
+                    JSONObject scoreMap = JSONObject.parseObject(scoreMapStr);
+                    cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionFansRateRateStrategy")).orElse(0.0)));
+                    cell = row.createCell(7);
+                    cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadRateRateStrategy")).orElse(0.0)));
+                    cell = row.createCell(8);
+                    cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("PublishTimesStrategy")).orElse(0.0)));
+                    cell = row.createCell(9);
+                    cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountRateCorrelationStrategy")).orElse(0.0)));
+                    cell = row.createCell(10);
+                    cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadSumRateStrategy")).orElse(0.0)));
+                    cell = row.createCell(11);
+                    cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadRateCorrelationRateStrategy")).orElse(0.0)));
+                    cell = row.createCell(12);
+                    cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionFansSumRateStrategy")).orElse(0.0)));
+                    cell = row.createCell(13);
+                    cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("SimilarityStrategy")).orElse(0.0)));
+                    cell = row.createCell(14);
+                    cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountStrategy")).orElse(0.0)));
+                    cell = row.createCell(15);
+                    cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountRateStrategy")).orElse(0.0)));
+                    cell = row.createCell(16);
+                    cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionDeWeightAvgReadSumRateStrategy")).orElse(0.0)));
+                }
+            }
+
+            try (FileOutputStream outputStream = new FileOutputStream("/Users/wangyunpeng/Downloads/example.xlsx")) {
+                workbook.write(outputStream);
+            } catch (IOException e) {
+                e.printStackTrace();
+            } finally {
+                try {
+                    workbook.close();
+                } catch (IOException e) {
+                    e.printStackTrace();
+                }
+            }
+        } catch (Exception e) {
+            log.error("readFileError fileName:{}", file.getName(), e);
+        }
+    }
+
 }