package com.tzld.longarticle.recommend.server; import cn.hutool.core.collection.CollectionUtil; import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONObject; import com.google.common.collect.Lists; import com.tzld.longarticle.recommend.server.common.enums.recommend.ArticleTypeEnum; import com.tzld.longarticle.recommend.server.mapper.aigc.AigcBaseMapper; import com.tzld.longarticle.recommend.server.mapper.aigc.PublishContentMapper; import com.tzld.longarticle.recommend.server.mapper.crawler.CrawlerBaseMapper; import com.tzld.longarticle.recommend.server.model.dto.PublishContentDTO; import com.tzld.longarticle.recommend.server.model.entity.aigc.PublishAccount; import com.tzld.longarticle.recommend.server.model.entity.crawler.AccountAvgInfo; import com.tzld.longarticle.recommend.server.model.entity.crawler.Article; import com.tzld.longarticle.recommend.server.model.entity.crawler.ArticleDetailInfo; import com.tzld.longarticle.recommend.server.model.entity.crawler.PublishSortLog; import com.tzld.longarticle.recommend.server.model.param.PublishContentParam; import com.tzld.longarticle.recommend.server.repository.aigc.PublishAccountRepository; import com.tzld.longarticle.recommend.server.repository.crawler.AccountAvgInfoRepository; import com.tzld.longarticle.recommend.server.repository.crawler.ArticleDetailInfoRepository; import com.tzld.longarticle.recommend.server.repository.crawler.ArticleRepository; import com.tzld.longarticle.recommend.server.repository.crawler.PublishSortLogRepository; import com.tzld.longarticle.recommend.server.service.recommend.RecommendService; import com.tzld.longarticle.recommend.server.service.recommend.recall.RecallService; import com.tzld.longarticle.recommend.server.util.DateUtils; import lombok.extern.slf4j.Slf4j; import org.apache.commons.math3.stat.correlation.PearsonsCorrelation; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.junit.jupiter.api.Test; import org.springframework.beans.BeanUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.context.SpringBootTest; import javax.annotation.Resource; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.util.*; import java.util.stream.Collectors; @SpringBootTest(classes = Application.class) @Slf4j public class RecommendTest { @Resource private RecommendService recommendService; @Resource private RecallService recallService; @Resource private ArticleRepository articleRepository; @Resource private ArticleDetailInfoRepository articleDetailInfoRepository; @Resource private AccountAvgInfoRepository accountAvgInfoRepository; @Resource private CrawlerBaseMapper crawlerBaseMapper; @Resource private PublishSortLogRepository publishSortLogRepository; @Autowired private PublishAccountRepository publishAccountRepository; @Autowired private AigcBaseMapper aigcBaseMapper; @Autowired private PublishContentMapper publishContentMapper; // @Test // void recall() { // RecallParam param = new RecallParam(); // param.setAccountId("20231213123536190184852"); // param.setPlanId("20240718181730864154902"); // RecallResult recallResult = recallService.recall(param); // System.out.println(JSONObject.toJSONString(recallResult)); // } // // @Test // void exportData() { // Set ghIds = new HashSet<>(Arrays.asList("gh_adca24a8f429", "gh_e0eb490115f5", "gh_51e4ad40466d", "gh_95ed5ecf9363")); // List
articleList = articleRepository.getByGhIdInAndPublishTimestampGreaterThanAndTypeEquals(ghIds, 1722441600L, ArticleTypeEnum.qunfa.getVal()); // // Map>> map = articleList.stream() // .collect(Collectors.groupingBy(Article::getTitle, Collectors.groupingBy(Article::getItemIndex))); // Set snList = articleList.stream().map(Article::getWxSn).collect(Collectors.toSet()); // List articleDetailInfoList = articleDetailInfoRepository.getAllByWxSnIn(new ArrayList<>(snList)); // Map> articleDetailInfoMap = articleDetailInfoList.stream() // .collect(Collectors.groupingBy(ArticleDetailInfo::getWxSn)); // // List accountAvgInfoList = accountAvgInfoRepository.getAllByGhIdInAndStatusEquals(ghIds, 1); // Map> accountAvgInfoIndexMap = accountAvgInfoList.stream().collect( // Collectors.groupingBy(AccountAvgInfo::getGhId, Collectors.toMap(AccountAvgInfo::getPosition, o -> o))); // JSONArray jsonArray = new JSONArray(); // for (Article article : articleList) { // List articleDetailInfos = articleDetailInfoMap.get(article.getWxSn()); // if (CollectionUtils.isEmpty(articleDetailInfos)) { // continue; // } // Date minDate = articleDetailInfos.stream().map(ArticleDetailInfo::getRecallDt).min(Date::compareTo).orElse(new Date()); // int sumfirstLevel = 0; // int sumFission0 = 0; // int sumFission1 = 0; // int sumFission2 = 0; // for (ArticleDetailInfo articleDetailInfo : articleDetailInfos) { // if (articleDetailInfo.getRecallDt().equals(minDate)) { // sumfirstLevel += Optional.ofNullable(articleDetailInfo.getFirstLevel()).orElse(0); // sumFission0 += Optional.ofNullable(articleDetailInfo.getFission0()).orElse(0); // sumFission1 += Optional.ofNullable(articleDetailInfo.getFission1()).orElse(0); // sumFission2 += Optional.ofNullable(articleDetailInfo.getFission2()).orElse(0); // } // } // Map accountAvgInfoMap = accountAvgInfoIndexMap.get(article.getGhId()); // AccountAvgInfo avgInfo = accountAvgInfoMap.get(article.getItemIndex().toString()); // SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd"); // String date = sdf.format(new Date(article.getPublishTimestamp() * 1000)); // JSONObject obj = new JSONObject(); // obj.put("ghId", article.getGhId()); // obj.put("accountName", article.getAccountName()); // obj.put("title", article.getTitle()); // obj.put("index", article.getItemIndex()); // obj.put("viewCount", article.getShowViewCount()); // obj.put("time", date); // if (Objects.nonNull(avgInfo)) { // obj.put("fans", avgInfo.getFans()); // obj.put("avgViewCount", avgInfo.getReadAvg()); // obj.put("viewCountRate", (article.getShowViewCount() * 1.0) / avgInfo.getReadAvg()); // } // obj.put("firstLevel", sumfirstLevel); // obj.put("fission0", sumFission0); // obj.put("fission1", sumFission1); // obj.put("fission2", sumFission2); // jsonArray.add(obj); // } // System.out.println(jsonArray.toJSONString()); // } // // @Test // void ii() throws IOException { // String dateStr = "20240911"; // List accountAvgInfoList = accountAvgInfoRepository.getAllByStatusEquals(1); // Map accountMap = accountAvgInfoList.stream().collect( // Collectors.toMap(AccountAvgInfo::getAccountName, AccountAvgInfo::getGhId, (existing, replacement) -> replacement)); // BufferedReader reader = new BufferedReader(new FileReader("/Users/wangyunpeng/Downloads/账号相关性.json")); // StringBuilder sb = new StringBuilder(); // String line; // while ((line = reader.readLine()) != null) { // sb.append(line); // } // String jsonStr = sb.toString(); // // 使用 ObjectMapper 解析 JSON // ObjectMapper objectMapper = new ObjectMapper(); // // try { // // 将 JSON 转换为 Map> // Map> result = objectMapper.readValue(jsonStr, // new TypeReference>>() { // }); // // // 输出转换结果 // System.out.println(result); // List saveList = new ArrayList<>(); // result.forEach((k, v) -> { // String ghId = accountMap.get(k); // v.forEach((k1, v1) -> { // String relGhId = accountMap.get(k1); // AccountCorrelation save = new AccountCorrelation(); // save.setDateStr(dateStr); // save.setGhId(ghId); // save.setAccountName(k); // save.setRelGhId(relGhId); // save.setRelAccountName(k1); // save.setStatus(1); // save.setCorrelation(v1); // saveList.add(save); // }); // }); // List all = new ArrayList<>(saveList); // for (AccountCorrelation item : all) { // if (!item.getGhId().equals(item.getRelGhId())) { // AccountCorrelation save = new AccountCorrelation(); // BeanUtils.copyProperties(item, save); // save.setGhId(item.getRelGhId()); // save.setAccountName(item.getRelAccountName()); // save.setRelGhId(item.getGhId()); // save.setRelAccountName(item.getAccountName()); // saveList.add(save); // } // } // crawlerBaseMapper.batchInsertAccountCorrelation(saveList); // // // } catch (IOException e) { // e.printStackTrace(); // } // } @Test public void test() { List morning = Lists.newArrayList("gh_084a485e859a", "gh_183d80deffb8", "gh_5ff48e9fb9ef", "gh_6d9f36e3a7be", "gh_9f8dc5b0c74e", "gh_e0eb490115f5", "gh_e24da99dc899"); List noon = Lists.newArrayList("gh_080bb43aa0dc", "gh_0c89e11f8bf3", "gh_192c9cf58b13", "gh_1b27dd1beeca", "gh_1d887d61088c", "gh_29074b51f2b7", "gh_3ed305b5817f", "gh_5ae65db96cb7", "gh_6b7c2a257263", "gh_6cfd1132df94", "gh_6d205db62f04", "gh_72bace6b3059", "gh_7e5818b2dd83", "gh_7f5075624a50", "gh_89ef4798d3ea", "gh_9877c8541764", "gh_9eef14ad6c16", "gh_a2901d34f75b", "gh_b15de7c99912", "gh_b676b7ad9b74", "gh_b6f2c5332c72", "gh_bfe5b705324a", "gh_bff0bcb0694a", "gh_c5cdf60d9ab4", "gh_c69776baf2cd", "gh_d49df5e974ca", "gh_d4dffc34ac39", "gh_dd4c857bbb36", "gh_ee78360d06f5", "gh_f25b5fb01977", "gh_f902cea89e48", "gh_ff487cb5dab3"); String dateStr = "2024-09-12"; List
articleList = articleRepository.getByPublishTimestampGreaterThanAndTypeEquals(1725120000L, ArticleTypeEnum.QUNFA.getVal()); articleList = articleList.stream().filter(o -> o.getItemIndex() == 1 && o.getPublishTimestamp() < 1726675200).collect(Collectors.toList()); Map> map = articleList.stream().collect(Collectors.groupingBy(Article::getTitle)); List accountAvgInfoList = accountAvgInfoRepository.getAllByUpdateTime(dateStr); accountAvgInfoList = accountAvgInfoList.stream().filter(o -> o.getPosition().equals("1")).collect(Collectors.toList()); Map accountAvgInfoMap = accountAvgInfoList.stream().collect(Collectors.toMap(AccountAvgInfo::getGhId, o -> o)); List wxSnList = articleList.stream().map(Article::getWxSn).collect(Collectors.toList()); List articleDetailInfoList = articleDetailInfoRepository.getAllByWxSnIn(wxSnList); Map> articleDetailInfoMap = articleDetailInfoList.stream() .collect(Collectors.groupingBy(ArticleDetailInfo::getWxSn)); JSONArray result = new JSONArray(); int sumFissionMorning = 0; int sumFissionMoon = 0; int sumReadMorning = 0; int sumReadNoon = 0; long sumFansMorning = 0; long sumFansMoon = 0; double readAvgMorning = 0.0; double readAvgMoon = 0.0; for (Article article : articleList) { List articleDetailInfos = articleDetailInfoMap.get(article.getWxSn()); if (CollectionUtil.isEmpty(articleDetailInfos)) { continue; } Date minDate = articleDetailInfos.stream().map(ArticleDetailInfo::getRecallDt).min(Date::compareTo).orElse(new Date()); AccountAvgInfo accountAvgInfo = accountAvgInfoMap.get(article.getGhId()); for (ArticleDetailInfo articleDetailInfo : articleDetailInfos) { if (articleDetailInfo.getRecallDt().equals(minDate)) { if (morning.contains(article.getGhId())) { sumFissionMorning += Optional.ofNullable(articleDetailInfo.getFission0()).orElse(0); sumFissionMorning += Optional.ofNullable(articleDetailInfo.getFission1()).orElse(0); sumFissionMorning += Optional.ofNullable(articleDetailInfo.getFission2()).orElse(0); } if (noon.contains(article.getGhId())) { sumFissionMoon += Optional.ofNullable(articleDetailInfo.getFission0()).orElse(0); sumFissionMoon += Optional.ofNullable(articleDetailInfo.getFission1()).orElse(0); sumFissionMoon += Optional.ofNullable(articleDetailInfo.getFission2()).orElse(0); } } } if (Objects.nonNull(accountAvgInfo)) { if (morning.contains(article.getGhId())) { readAvgMorning += accountAvgInfo.getReadAvg(); sumFansMorning += accountAvgInfo.getFans(); sumReadMorning += article.getShowViewCount(); } if (noon.contains(article.getGhId())) { readAvgMoon += accountAvgInfo.getReadAvg(); sumFansMoon += accountAvgInfo.getFans(); sumReadNoon += article.getShowViewCount(); } } } JSONObject jsonObjectMorning = new JSONObject(); jsonObjectMorning.put("时间", "早上"); jsonObjectMorning.put("sumFission", sumFissionMorning); jsonObjectMorning.put("readAvg", readAvgMorning); jsonObjectMorning.put("rate", sumFissionMorning / readAvgMorning); jsonObjectMorning.put("sumRead", sumReadMorning); jsonObjectMorning.put("sumFans", sumFansMorning); jsonObjectMorning.put("阅读率", sumReadMorning / (double) sumFansMorning); result.add(jsonObjectMorning); JSONObject jsonObjectMoon = new JSONObject(); jsonObjectMoon.put("时间", "中午"); jsonObjectMoon.put("sumFission", sumFissionMoon); jsonObjectMoon.put("readAvg", readAvgMoon); jsonObjectMoon.put("rate", sumFissionMoon / readAvgMoon); jsonObjectMoon.put("sumRead", sumReadNoon); jsonObjectMoon.put("sumFans", sumFansMoon); jsonObjectMoon.put("阅读率", sumReadNoon / (double) sumFansMoon); result.add(jsonObjectMoon); System.out.println(JSONObject.toJSONString(result)); } @Test public void exportScoreData() { List strategies = Arrays.asList("ArticleRankV11", "ArticleRankV12"); List sortLogList = publishSortLogRepository.findByStrategyInAndDateStrGreaterThanEqual(strategies, "20240928"); sortLogList = sortLogList.stream().filter(o -> o.getIndex() == 1).collect(Collectors.toList()); sortLogList.sort(Comparator.comparing(PublishSortLog::getGhId).thenComparing(PublishSortLog::getDateStr)); List ghIds = sortLogList.stream().map(PublishSortLog::getGhId).distinct().collect(Collectors.toList()); List
articleList = articleRepository.getByGhIdInAndPublishTimestampGreaterThanAndTypeEquals(ghIds, 1727452800L, ArticleTypeEnum.QUNFA.getVal()); articleList = articleList.stream().filter(o -> o.getItemIndex() == 1).collect(Collectors.toList()); Map> articleMap = articleList.stream().collect(Collectors.groupingBy(Article::getGhId, Collectors.toMap( o -> DateUtils.timestampToYMDStr(o.getPublishTimestamp(),"yyyyMMdd"), o -> o, (existing, replacement) -> replacement))); List accountAvgInfoList = accountAvgInfoRepository.getAllByGhIdIn(new HashSet<>(ghIds)); Map> accountAvgInfoMap = accountAvgInfoList.stream() .filter(o -> Objects.equals(o.getPosition(), "1")).collect(Collectors.groupingBy(AccountAvgInfo::getGhId, Collectors.toMap(AccountAvgInfo::getUpdateTime, o -> o))); Workbook workbook = new XSSFWorkbook(); Sheet sheet = workbook.createSheet("ExampleSheet"); int rowNum = 0; // 创建标题行 Row titleRow = sheet.createRow(rowNum); Cell titleCell = titleRow.createCell(0); titleCell.setCellValue("日期"); titleCell = titleRow.createCell(1); titleCell.setCellValue("ghID"); titleCell = titleRow.createCell(2); titleCell.setCellValue("账号名称"); titleCell = titleRow.createCell(3); titleCell.setCellValue("标题"); titleCell = titleRow.createCell(4); titleCell.setCellValue("策略"); titleCell = titleRow.createCell(5); titleCell.setCellValue("得分"); titleCell = titleRow.createCell(6); titleCell.setCellValue("HisFissionFansRateRateStrategy"); titleCell = titleRow.createCell(7); titleCell.setCellValue("HisFissionAvgReadRateRateStrategy"); titleCell = titleRow.createCell(8); titleCell.setCellValue("PublishTimesStrategy"); titleCell = titleRow.createCell(9); titleCell.setCellValue("ViewCountRateCorrelationStrategy"); titleCell = titleRow.createCell(10); titleCell.setCellValue("HisFissionAvgReadSumRateStrategy"); titleCell = titleRow.createCell(11); titleCell.setCellValue("HisFissionAvgReadRateCorrelationRateStrategy"); titleCell = titleRow.createCell(12); titleCell.setCellValue("HisFissionFansSumRateStrategy"); titleCell = titleRow.createCell(13); titleCell.setCellValue("SimilarityStrategy"); titleCell = titleRow.createCell(14); titleCell.setCellValue("ViewCountStrategy"); titleCell = titleRow.createCell(15); titleCell.setCellValue("ViewCountRateStrategy"); titleCell = titleRow.createCell(16); titleCell.setCellValue("HisFissionDeWeightAvgReadSumRateStrategy"); titleCell = titleRow.createCell(17); titleCell.setCellValue("阅读量"); titleCell = titleRow.createCell(18); titleCell.setCellValue("阅读均值"); titleCell = titleRow.createCell(19); titleCell.setCellValue("阅读均值倍数"); // 填充数据 String title = ""; for (PublishSortLog publishSortLog : sortLogList) { Map dateArticleMap = articleMap.get(publishSortLog.getGhId()); Article article = dateArticleMap.get(publishSortLog.getDateStr()); if (Objects.isNull(article) || !publishSortLog.getTitle().equals(article.getTitle())) { continue; } if (publishSortLog.getTitle().equals(title)) { continue; } title = publishSortLog.getTitle(); rowNum++; Row row = sheet.createRow(rowNum); Cell cell = row.createCell(0); cell.setCellValue(publishSortLog.getDateStr()); cell = row.createCell(1); cell.setCellValue(publishSortLog.getGhId()); cell = row.createCell(2); cell.setCellValue(publishSortLog.getAccountName()); cell = row.createCell(3); cell.setCellValue(publishSortLog.getTitle()); cell = row.createCell(4); cell.setCellValue(publishSortLog.getStrategy()); cell = row.createCell(5); cell.setCellValue(publishSortLog.getScore()); cell = row.createCell(6); JSONObject scoreMap = JSONObject.parseObject(publishSortLog.getScoreMap()); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionFansRateRateStrategy")).orElse(0.0))); cell = row.createCell(7); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadRateRateStrategy")).orElse(0.0))); cell = row.createCell(8); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("PublishTimesStrategy")).orElse(0.0))); cell = row.createCell(9); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountRateCorrelationStrategy")).orElse(0.0))); cell = row.createCell(10); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadSumRateStrategy")).orElse(0.0))); cell = row.createCell(11); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadRateCorrelationRateStrategy")).orElse(0.0))); cell = row.createCell(12); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionFansSumRateStrategy")).orElse(0.0))); cell = row.createCell(13); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("SimilarityStrategy")).orElse(0.0))); cell = row.createCell(14); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountStrategy")).orElse(0.0))); cell = row.createCell(15); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountRateStrategy")).orElse(0.0))); cell = row.createCell(16); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionDeWeightAvgReadSumRateStrategy")).orElse(0.0))); cell = row.createCell(17); cell.setCellValue(article.getShowViewCount()); cell = row.createCell(18); Map map = accountAvgInfoMap.get(article.getGhId()); if (Objects.nonNull(map)) { List avgMapDateList = new ArrayList<>(map.keySet()); String publishDate = DateUtils.findNearestDate(avgMapDateList, DateUtils.timestampToYMDStr(article.getPublishTimestamp(), "yyyy-MM-dd"), "yyyy-MM-dd"); AccountAvgInfo accountAvgInfo = map.get(publishDate); if (Objects.nonNull(accountAvgInfo)) { cell.setCellValue(accountAvgInfo.getReadAvg()); cell = row.createCell(19); cell.setCellValue(String.format("%.3f", article.getShowViewCount() / (double) accountAvgInfo.getReadAvg())); } } } try (FileOutputStream outputStream = new FileOutputStream("/Users/wangyunpeng/Downloads/example.xlsx")) { workbook.write(outputStream); } catch (IOException e) { e.printStackTrace(); } finally { try { workbook.close(); } catch (IOException e) { e.printStackTrace(); } } } @Test public void exportFromAliyunLog() { String folderPath = "/Users/wangyunpeng/Downloads/longarticle-recommend-server-test_info-log_20241014_150245.json"; File file = new File(folderPath); Workbook workbook = new XSSFWorkbook(); Sheet sheet = workbook.createSheet("ExampleSheet"); int rowNum = 0; // 创建标题行 Row titleRow = sheet.createRow(rowNum); Cell titleCell = titleRow.createCell(0); titleCell.setCellValue("日期"); titleCell = titleRow.createCell(1); titleCell.setCellValue("账号名称"); titleCell = titleRow.createCell(2); titleCell.setCellValue("id"); titleCell = titleRow.createCell(3); titleCell.setCellValue("标题"); titleCell = titleRow.createCell(4); titleCell.setCellValue("策略"); titleCell = titleRow.createCell(5); titleCell.setCellValue("得分"); titleCell = titleRow.createCell(6); titleCell.setCellValue("HisFissionFansRateRateStrategy"); titleCell = titleRow.createCell(7); titleCell.setCellValue("HisFissionAvgReadRateRateStrategy"); titleCell = titleRow.createCell(8); titleCell.setCellValue("PublishTimesStrategy"); titleCell = titleRow.createCell(9); titleCell.setCellValue("ViewCountRateCorrelationStrategy"); titleCell = titleRow.createCell(10); titleCell.setCellValue("HisFissionAvgReadSumRateStrategy"); titleCell = titleRow.createCell(11); titleCell.setCellValue("HisFissionAvgReadRateCorrelationRateStrategy"); titleCell = titleRow.createCell(12); titleCell.setCellValue("HisFissionFansSumRateStrategy"); titleCell = titleRow.createCell(13); titleCell.setCellValue("SimilarityStrategy"); titleCell = titleRow.createCell(14); titleCell.setCellValue("ViewCountStrategy"); titleCell = titleRow.createCell(15); titleCell.setCellValue("ViewCountRateStrategy"); titleCell = titleRow.createCell(16); titleCell.setCellValue("HisFissionDeWeightAvgReadSumRateStrategy"); try { String content = new String(Files.readAllBytes(file.toPath()), StandardCharsets.UTF_8); JSONArray jsonArray = JSONArray.parseArray(content); for (Object o : jsonArray) { JSONObject jsonObject = (JSONObject) o; Long time = jsonObject.getLong("__time__"); String message = jsonObject.getString("message"); int index = message.indexOf("["); String info = message.substring(0, index); String strategy = info.substring(0, info.indexOf(" ")); String accountName = info.substring(info.indexOf(" ")).replace("账号名称 ", "") .replace(" 头条评分结果", ""); String json = message.substring(index); JSONArray scoreArray = JSONArray.parseArray(json); for (Object scoreJSON : scoreArray) { JSONObject scoreObject = (JSONObject) scoreJSON; String id = scoreObject.getString("id"); String title = scoreObject.getString("title"); String score = scoreObject.getString("score"); String scoreMapStr = scoreObject.getString("scoreMap"); rowNum++; Row row = sheet.createRow(rowNum); Cell cell = row.createCell(0); cell.setCellValue(DateUtils.timestampToYMDStr(time, "yyyyMMdd")); cell = row.createCell(1); cell.setCellValue(accountName); cell = row.createCell(2); cell.setCellValue(id); cell = row.createCell(3); cell.setCellValue(title); cell = row.createCell(4); cell.setCellValue(strategy); cell = row.createCell(5); cell.setCellValue(score); cell = row.createCell(6); JSONObject scoreMap = JSONObject.parseObject(scoreMapStr); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionFansRateRateStrategy")).orElse(0.0))); cell = row.createCell(7); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadRateRateStrategy")).orElse(0.0))); cell = row.createCell(8); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("PublishTimesStrategy")).orElse(0.0))); cell = row.createCell(9); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountRateCorrelationStrategy")).orElse(0.0))); cell = row.createCell(10); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadSumRateStrategy")).orElse(0.0))); cell = row.createCell(11); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadRateCorrelationRateStrategy")).orElse(0.0))); cell = row.createCell(12); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionFansSumRateStrategy")).orElse(0.0))); cell = row.createCell(13); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("SimilarityStrategy")).orElse(0.0))); cell = row.createCell(14); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountStrategy")).orElse(0.0))); cell = row.createCell(15); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountRateStrategy")).orElse(0.0))); cell = row.createCell(16); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionDeWeightAvgReadSumRateStrategy")).orElse(0.0))); } } try (FileOutputStream outputStream = new FileOutputStream("/Users/wangyunpeng/Downloads/example.xlsx")) { workbook.write(outputStream); } catch (IOException e) { e.printStackTrace(); } finally { try { workbook.close(); } catch (IOException e) { e.printStackTrace(); } } } catch (Exception e) { log.error("readFileError fileName:{}", file.getName(), e); } } @Test public void account() { List ghIds = Arrays.asList("gh_d7fa1998b4e1", "gh_52100b6803fb", "gh_8d7fc54d5026"); List accountNames = Arrays.asList("生活超读", "灵读生活", "生活情感读"); List fans = Arrays.asList(85759, 103083, 79214); List
articleList = articleRepository.getByGhIdInAndPublishTimestampLessThanAndTypeEquals( Arrays.asList("gh_02f5bca5b5d9"), 1729353600L, ArticleTypeEnum.QUNFA.getVal()); for (int i = 0; i < ghIds.size(); i++) { String ghId = ghIds.get(i); String accountName = accountNames.get(i); Integer fanCount = fans.get(i); Double rate = fanCount / 233474.0; for (Article article : articleList) { Article saveItem = new Article(); BeanUtils.copyProperties(article, saveItem); saveItem.setGhId(ghId); saveItem.setAccountName(accountName); saveItem.setShowViewCount((int) (article.getShowViewCount() * rate)); saveItem.setWxSn(UUID.randomUUID().toString().replace("-", "")); articleRepository.save(saveItem); } } } @Test public void correlation() { List ghIds = Lists.newArrayList("gh_e24da99dc899", "gh_183d80deffb8", "gh_be8c29139989", "gh_c69776baf2cd", "gh_b15de7c99912", "gh_1d887d61088c", "gh_3ed305b5817f", "gh_3e91f0624545", "gh_30816d8adb52", "gh_970460d9ccec", "gh_749271f1ccd5", "gh_ac43e43b253b" ); List sortLogList = publishSortLogRepository.findByGhIdInAndDateStrGreaterThanEqual(ghIds, "20240907"); sortLogList = sortLogList.stream().filter(o -> o.getIndex() == 1).collect(Collectors.toList()); sortLogList.sort(Comparator.comparing(PublishSortLog::getGhId).thenComparing(PublishSortLog::getDateStr)); List
articleList = articleRepository.getByGhIdInAndUpdateTimeGreaterThanAndTypeEquals(ghIds, 1725638400L, "9"); articleList = articleList.stream().filter(o -> o.getItemIndex() == 1).collect(Collectors.toList()); Map> articleMap = articleList.stream().collect(Collectors.groupingBy(Article::getGhId, Collectors.toMap( o -> DateUtils.timestampToYMDStr(o.getUpdateTime(), "yyyyMMdd"), o -> o, (existing, replacement) -> replacement))); List accountAvgInfoList = accountAvgInfoRepository.getAllByGhIdIn(new HashSet<>(ghIds)); Map> accountAvgInfoMap = accountAvgInfoList.stream() .filter(o -> Objects.equals(o.getPosition(), "1")).collect(Collectors.groupingBy(AccountAvgInfo::getGhId, Collectors.toMap(AccountAvgInfo::getUpdateTime, o -> o))); int rowNum = 0; Map> sortLogMap = sortLogList.stream().collect(Collectors.groupingBy(PublishSortLog::getGhId)); PearsonsCorrelation correlation = new PearsonsCorrelation(); Workbook workbook = new XSSFWorkbook(); Sheet sheet = workbook.createSheet("ExampleSheet"); // 创建标题行 Row titleRow = sheet.createRow(rowNum); for (Map.Entry> entry : sortLogMap.entrySet()) { String ghId = entry.getKey(); String name = entry.getValue().get(0).getAccountName(); List itemList = entry.getValue(); String title = ""; double[] scoreArr = new double[itemList.size()]; double[] HisFissionFansRateRateStrategyArr = new double[itemList.size()]; double[] HisFissionAvgReadRateRateStrategyArr = new double[itemList.size()]; double[] PublishTimesStrategyArr = new double[itemList.size()]; double[] ViewCountRateCorrelationStrategyArr = new double[itemList.size()]; double[] HisFissionAvgReadSumRateStrategyArr = new double[itemList.size()]; double[] HisFissionAvgReadRateCorrelationRateStrategyArr = new double[itemList.size()]; double[] HisFissionFansSumRateStrategyArr = new double[itemList.size()]; double[] SimilarityStrategyArr = new double[itemList.size()]; double[] ViewCountStrategyArr = new double[itemList.size()]; double[] ViewCountRateStrategyArr = new double[itemList.size()]; double[] HisFissionDeWeightAvgReadSumRateStrategyArr = new double[itemList.size()]; double[] scoreRateArr = new double[itemList.size()]; for (int i = 0; i < itemList.size(); i++) { PublishSortLog publishSortLog = itemList.get(i); Map dateArticleMap = articleMap.get(publishSortLog.getGhId()); Article article = dateArticleMap.get(publishSortLog.getDateStr()); if (Objects.isNull(article) || !publishSortLog.getTitle().equals(article.getTitle())) { continue; } if (publishSortLog.getTitle().equals(title)) { continue; } title = publishSortLog.getTitle(); scoreArr[i] = Double.parseDouble(publishSortLog.getScore()); JSONObject scoreMap = JSONObject.parseObject(publishSortLog.getScoreMap()); HisFissionFansRateRateStrategyArr[i] = Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionFansRateRateStrategy")).orElse(0.0))); HisFissionAvgReadRateRateStrategyArr[i] = Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadRateRateStrategy")).orElse(0.0))); PublishTimesStrategyArr[i] = Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("PublishTimesStrategy")).orElse(0.0))); ViewCountRateCorrelationStrategyArr[i] = Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountRateCorrelationStrategy")).orElse(0.0))); HisFissionAvgReadSumRateStrategyArr[i] = Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadSumRateStrategy")).orElse(0.0))); HisFissionAvgReadRateCorrelationRateStrategyArr[i] = Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadRateCorrelationRateStrategy")).orElse(0.0))); HisFissionFansSumRateStrategyArr[i] = Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionFansSumRateStrategy")).orElse(0.0))); SimilarityStrategyArr[i] = Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("SimilarityStrategy")).orElse(0.0))); ViewCountStrategyArr[i] = Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountStrategy")).orElse(0.0))); ViewCountRateStrategyArr[i] = Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountRateStrategy")).orElse(0.0))); HisFissionDeWeightAvgReadSumRateStrategyArr[i] = Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionDeWeightAvgReadSumRateStrategy")).orElse(0.0))); Map map = accountAvgInfoMap.get(article.getGhId()); if (Objects.nonNull(map)) { List avgMapDateList = new ArrayList<>(map.keySet()); String publishDate = DateUtils.findNearestDate(avgMapDateList, DateUtils.timestampToYMDStr(article.getUpdateTime(), "yyyy-MM-dd"), "yyyy-MM-dd"); AccountAvgInfo accountAvgInfo = map.get(publishDate); if (Objects.nonNull(accountAvgInfo)) { scoreRateArr[i] = Double.parseDouble(String.format("%.3f", article.getShowViewCount() / (double) accountAvgInfo.getReadAvg())); } } } rowNum++; Row row = sheet.createRow(rowNum); Cell cell = row.createCell(0); cell = row.createCell(1); cell.setCellValue(ghId); cell = row.createCell(2); cell.setCellValue(name); cell = row.createCell(3); cell = row.createCell(4); cell = row.createCell(5); cell.setCellValue(correlation.correlation(scoreArr, scoreRateArr)); cell = row.createCell(6); cell.setCellValue(correlation.correlation(HisFissionFansRateRateStrategyArr, scoreRateArr)); cell = row.createCell(7); cell.setCellValue(correlation.correlation(HisFissionAvgReadRateRateStrategyArr, scoreRateArr)); cell = row.createCell(8); cell.setCellValue(correlation.correlation(PublishTimesStrategyArr, scoreRateArr)); cell = row.createCell(9); cell.setCellValue(correlation.correlation(ViewCountRateCorrelationStrategyArr, scoreRateArr)); cell = row.createCell(10); cell.setCellValue(correlation.correlation(HisFissionAvgReadSumRateStrategyArr, scoreRateArr)); cell = row.createCell(11); cell.setCellValue(correlation.correlation(HisFissionAvgReadRateCorrelationRateStrategyArr, scoreRateArr)); cell = row.createCell(12); cell.setCellValue(correlation.correlation(HisFissionFansSumRateStrategyArr, scoreRateArr)); cell = row.createCell(13); cell.setCellValue(correlation.correlation(SimilarityStrategyArr, scoreRateArr)); cell = row.createCell(14); cell.setCellValue(correlation.correlation(ViewCountStrategyArr, scoreRateArr)); cell = row.createCell(15); cell.setCellValue(correlation.correlation(ViewCountRateStrategyArr, scoreRateArr)); cell = row.createCell(16); cell.setCellValue(correlation.correlation(HisFissionDeWeightAvgReadSumRateStrategyArr, scoreRateArr)); } try (FileOutputStream outputStream = new FileOutputStream("/Users/wangyunpeng/Downloads/example.xlsx")) { workbook.write(outputStream); } catch (IOException e) { e.printStackTrace(); } finally { try { workbook.close(); } catch (IOException e) { e.printStackTrace(); } } } @Test void getScoreFromLogFile() { String folderPath = "/Users/wangyunpeng/Downloads/b78020b8-d9df-466f-bd01-cd982bb986d0.json"; File file = new File(folderPath); Workbook workbook = new XSSFWorkbook(); Sheet sheet = workbook.createSheet("ExampleSheet"); int rowNum = 0; // 创建标题行 Row titleRow = sheet.createRow(rowNum); Cell titleCell = titleRow.createCell(0); titleCell.setCellValue("日期"); titleCell = titleRow.createCell(1); titleCell.setCellValue("账号名称"); titleCell = titleRow.createCell(2); titleCell.setCellValue("id"); titleCell = titleRow.createCell(3); titleCell.setCellValue("标题"); titleCell = titleRow.createCell(4); titleCell.setCellValue("策略"); titleCell = titleRow.createCell(5); titleCell.setCellValue("得分"); titleCell = titleRow.createCell(6); titleCell.setCellValue("HisFissionFansRateRateStrategy"); titleCell = titleRow.createCell(7); titleCell.setCellValue("HisFissionAvgReadRateRateStrategy"); titleCell = titleRow.createCell(8); titleCell.setCellValue("PublishTimesStrategy"); titleCell = titleRow.createCell(9); titleCell.setCellValue("ViewCountRateCorrelationStrategy"); titleCell = titleRow.createCell(10); titleCell.setCellValue("HisFissionAvgReadSumRateStrategy"); titleCell = titleRow.createCell(11); titleCell.setCellValue("HisFissionAvgReadRateCorrelationRateStrategy"); titleCell = titleRow.createCell(12); titleCell.setCellValue("HisFissionFansSumRateStrategy"); titleCell = titleRow.createCell(13); titleCell.setCellValue("SimilarityStrategy"); titleCell = titleRow.createCell(14); titleCell.setCellValue("ViewCountStrategy"); titleCell = titleRow.createCell(15); titleCell.setCellValue("ViewCountRateStrategy"); titleCell = titleRow.createCell(16); titleCell.setCellValue("HisFissionDeWeightAvgReadSumRateStrategy"); try { String content = new String(Files.readAllBytes(file.toPath()), StandardCharsets.UTF_8); JSONArray jsonArray = JSONArray.parseArray(content); for (Object o : jsonArray) { JSONObject jsonObject = (JSONObject) o; Long time = jsonObject.getLong("__time__"); String message = jsonObject.getString("message"); int index = message.indexOf("["); String info = message.substring(0, index); String strategy = info.substring(0, info.indexOf(" ")); String accountName = info.substring(info.indexOf(" ")).replace("账号名称 ", "") .replace(" 头条评分结果", ""); String json = message.substring(index); JSONArray scoreArray = JSONArray.parseArray(json); for (Object scoreJSON : scoreArray) { JSONObject scoreObject = (JSONObject) scoreJSON; String id = scoreObject.getString("id"); String title = scoreObject.getString("title"); String score = scoreObject.getString("score"); String scoreMapStr = scoreObject.getString("scoreMap"); rowNum++; Row row = sheet.createRow(rowNum); Cell cell = row.createCell(0); cell.setCellValue(DateUtils.timestampToYMDStr(time, "yyyyMMdd")); cell = row.createCell(1); cell.setCellValue(accountName); cell = row.createCell(2); cell.setCellValue(id); cell = row.createCell(3); cell.setCellValue(title); cell = row.createCell(4); cell.setCellValue(strategy); cell = row.createCell(5); cell.setCellValue(score); cell = row.createCell(6); JSONObject scoreMap = JSONObject.parseObject(scoreMapStr); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionFansRateRateStrategy")).orElse(0.0))); cell = row.createCell(7); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadRateRateStrategy")).orElse(0.0))); cell = row.createCell(8); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("PublishTimesStrategy")).orElse(0.0))); cell = row.createCell(9); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountRateCorrelationStrategy")).orElse(0.0))); cell = row.createCell(10); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadSumRateStrategy")).orElse(0.0))); cell = row.createCell(11); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadRateCorrelationRateStrategy")).orElse(0.0))); cell = row.createCell(12); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionFansSumRateStrategy")).orElse(0.0))); cell = row.createCell(13); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("SimilarityStrategy")).orElse(0.0))); cell = row.createCell(14); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountStrategy")).orElse(0.0))); cell = row.createCell(15); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountRateStrategy")).orElse(0.0))); cell = row.createCell(16); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionDeWeightAvgReadSumRateStrategy")).orElse(0.0))); } } try (FileOutputStream outputStream = new FileOutputStream("/Users/wangyunpeng/Downloads/example.xlsx")) { workbook.write(outputStream); } catch (IOException e) { e.printStackTrace(); } finally { try { workbook.close(); } catch (IOException e) { e.printStackTrace(); } } } catch (Exception e) { log.error("readFileError fileName:{}", file.getName(), e); } } @Test public void checkTest() { List
articleList = articleRepository.getByPublishTimestampGreaterThan(1732982400L); List
singleArticleList = new ArrayList<>(); for (Article article : articleList) { if (Objects.nonNull(article.getRootSourceIdList())) { try { List rootSourceIdList = JSONArray.parseArray(article.getRootSourceIdList(), String.class); if (rootSourceIdList.size() == 1) { singleArticleList.add(article); } } catch (Exception ignore) { } } } List ghIds = singleArticleList.stream().map(Article::getGhId).distinct().collect(Collectors.toList()); List publishAccountList = publishAccountRepository.getAllByGhIdIn(ghIds); log.info("newSortStrategyData publishAccountList finish"); Map publishAccountMap = publishAccountList.stream().collect(Collectors.toMap(PublishAccount::getGhId, o -> o)); // 获取发布内容 List publishContentParamList = singleArticleList.stream().map(article -> { PublishContentParam item = new PublishContentParam(); item.setTitle(article.getTitle()); PublishAccount account = publishAccountMap.get(article.getGhId()); if (Objects.nonNull(account)) { item.setPublishAccountId(account.getId()); return item; } return null; }).filter(Objects::nonNull).collect(Collectors.toList()); List publishContents = new ArrayList<>(); for (List partitions : Lists.partition(publishContentParamList, 100)) { publishContents.addAll(publishContentMapper.getPublishContentByTitle(partitions)); } List publishContentIds = publishContents.stream().map(PublishContentDTO::getId).collect(Collectors.toList()); for (List partition : Lists.partition(publishContentIds, 500)) { publishContentMapper.updatePublishContentSingleMiniProgram(partition); } } }