package com.tzld.longarticle.recommend.server; import cn.hutool.core.collection.CollectionUtil; import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONObject; import com.google.common.collect.Lists; import com.tzld.longarticle.recommend.server.mapper.crawler.CrawlerBaseMapper; import com.tzld.longarticle.recommend.server.model.entity.crawler.AccountAvgInfo; import com.tzld.longarticle.recommend.server.model.entity.crawler.Article; import com.tzld.longarticle.recommend.server.model.entity.crawler.ArticleDetailInfo; import com.tzld.longarticle.recommend.server.model.entity.crawler.PublishSortLog; import com.tzld.longarticle.recommend.server.repository.crawler.AccountAvgInfoRepository; import com.tzld.longarticle.recommend.server.repository.crawler.ArticleDetailInfoRepository; import com.tzld.longarticle.recommend.server.repository.crawler.ArticleRepository; import com.tzld.longarticle.recommend.server.repository.crawler.PublishSortLogRepository; import com.tzld.longarticle.recommend.server.service.RecommendService; import com.tzld.longarticle.recommend.server.service.recall.RecallService; import com.tzld.longarticle.recommend.server.util.DateUtils; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.junit.jupiter.api.Test; import org.springframework.boot.test.context.SpringBootTest; import javax.annotation.Resource; import java.io.FileOutputStream; import java.io.IOException; import java.util.*; import java.util.stream.Collectors; @SpringBootTest(classes = Application.class) public class RecommendTest { @Resource private RecommendService recommendService; @Resource private RecallService recallService; @Resource private ArticleRepository articleRepository; @Resource private ArticleDetailInfoRepository articleDetailInfoRepository; @Resource private AccountAvgInfoRepository accountAvgInfoRepository; @Resource private CrawlerBaseMapper crawlerBaseMapper; @Resource private PublishSortLogRepository publishSortLogRepository; // @Test // void recall() { // RecallParam param = new RecallParam(); // param.setAccountId("20231213123536190184852"); // param.setPlanId("20240718181730864154902"); // RecallResult recallResult = recallService.recall(param); // System.out.println(JSONObject.toJSONString(recallResult)); // } // // @Test // void exportData() { // Set ghIds = new HashSet<>(Arrays.asList("gh_adca24a8f429", "gh_e0eb490115f5", "gh_51e4ad40466d", "gh_95ed5ecf9363")); // List
articleList = articleRepository.getByGhIdInAndUpdateTimeGreaterThanAndTypeEquals(ghIds, 1722441600L, "9"); // // Map>> map = articleList.stream() // .collect(Collectors.groupingBy(Article::getTitle, Collectors.groupingBy(Article::getItemIndex))); // Set snList = articleList.stream().map(Article::getWxSn).collect(Collectors.toSet()); // List articleDetailInfoList = articleDetailInfoRepository.getAllByWxSnIn(new ArrayList<>(snList)); // Map> articleDetailInfoMap = articleDetailInfoList.stream() // .collect(Collectors.groupingBy(ArticleDetailInfo::getWxSn)); // // List accountAvgInfoList = accountAvgInfoRepository.getAllByGhIdInAndStatusEquals(ghIds, 1); // Map> accountAvgInfoIndexMap = accountAvgInfoList.stream().collect( // Collectors.groupingBy(AccountAvgInfo::getGhId, Collectors.toMap(AccountAvgInfo::getPosition, o -> o))); // JSONArray jsonArray = new JSONArray(); // for (Article article : articleList) { // List articleDetailInfos = articleDetailInfoMap.get(article.getWxSn()); // if (CollectionUtils.isEmpty(articleDetailInfos)) { // continue; // } // Date minDate = articleDetailInfos.stream().map(ArticleDetailInfo::getRecallDt).min(Date::compareTo).orElse(new Date()); // int sumfirstLevel = 0; // int sumFission0 = 0; // int sumFission1 = 0; // int sumFission2 = 0; // for (ArticleDetailInfo articleDetailInfo : articleDetailInfos) { // if (articleDetailInfo.getRecallDt().equals(minDate)) { // sumfirstLevel += Optional.ofNullable(articleDetailInfo.getFirstLevel()).orElse(0); // sumFission0 += Optional.ofNullable(articleDetailInfo.getFission0()).orElse(0); // sumFission1 += Optional.ofNullable(articleDetailInfo.getFission1()).orElse(0); // sumFission2 += Optional.ofNullable(articleDetailInfo.getFission2()).orElse(0); // } // } // Map accountAvgInfoMap = accountAvgInfoIndexMap.get(article.getGhId()); // AccountAvgInfo avgInfo = accountAvgInfoMap.get(article.getItemIndex().toString()); // SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd"); // String date = sdf.format(new Date(article.getUpdateTime() * 1000)); // JSONObject obj = new JSONObject(); // obj.put("ghId", article.getGhId()); // obj.put("accountName", article.getAccountName()); // obj.put("title", article.getTitle()); // obj.put("index", article.getItemIndex()); // obj.put("viewCount", article.getShowViewCount()); // obj.put("time", date); // if (Objects.nonNull(avgInfo)) { // obj.put("fans", avgInfo.getFans()); // obj.put("avgViewCount", avgInfo.getReadAvg()); // obj.put("viewCountRate", (article.getShowViewCount() * 1.0) / avgInfo.getReadAvg()); // } // obj.put("firstLevel", sumfirstLevel); // obj.put("fission0", sumFission0); // obj.put("fission1", sumFission1); // obj.put("fission2", sumFission2); // jsonArray.add(obj); // } // System.out.println(jsonArray.toJSONString()); // } // // @Test // void ii() throws IOException { // String dateStr = "20240911"; // List accountAvgInfoList = accountAvgInfoRepository.getAllByStatusEquals(1); // Map accountMap = accountAvgInfoList.stream().collect( // Collectors.toMap(AccountAvgInfo::getAccountName, AccountAvgInfo::getGhId, (existing, replacement) -> replacement)); // BufferedReader reader = new BufferedReader(new FileReader("/Users/wangyunpeng/Downloads/账号相关性.json")); // StringBuilder sb = new StringBuilder(); // String line; // while ((line = reader.readLine()) != null) { // sb.append(line); // } // String jsonStr = sb.toString(); // // 使用 ObjectMapper 解析 JSON // ObjectMapper objectMapper = new ObjectMapper(); // // try { // // 将 JSON 转换为 Map> // Map> result = objectMapper.readValue(jsonStr, // new TypeReference>>() { // }); // // // 输出转换结果 // System.out.println(result); // List saveList = new ArrayList<>(); // result.forEach((k, v) -> { // String ghId = accountMap.get(k); // v.forEach((k1, v1) -> { // String relGhId = accountMap.get(k1); // AccountCorrelation save = new AccountCorrelation(); // save.setDateStr(dateStr); // save.setGhId(ghId); // save.setAccountName(k); // save.setRelGhId(relGhId); // save.setRelAccountName(k1); // save.setStatus(1); // save.setCorrelation(v1); // saveList.add(save); // }); // }); // List all = new ArrayList<>(saveList); // for (AccountCorrelation item : all) { // if (!item.getGhId().equals(item.getRelGhId())) { // AccountCorrelation save = new AccountCorrelation(); // BeanUtils.copyProperties(item, save); // save.setGhId(item.getRelGhId()); // save.setAccountName(item.getRelAccountName()); // save.setRelGhId(item.getGhId()); // save.setRelAccountName(item.getAccountName()); // saveList.add(save); // } // } // crawlerBaseMapper.batchInsertAccountCorrelation(saveList); // // // } catch (IOException e) { // e.printStackTrace(); // } // } @Test public void test() { List morning = Lists.newArrayList("gh_084a485e859a", "gh_183d80deffb8", "gh_5ff48e9fb9ef", "gh_6d9f36e3a7be", "gh_9f8dc5b0c74e", "gh_e0eb490115f5", "gh_e24da99dc899"); List noon = Lists.newArrayList("gh_080bb43aa0dc", "gh_0c89e11f8bf3", "gh_192c9cf58b13", "gh_1b27dd1beeca", "gh_1d887d61088c", "gh_29074b51f2b7", "gh_3ed305b5817f", "gh_5ae65db96cb7", "gh_6b7c2a257263", "gh_6cfd1132df94", "gh_6d205db62f04", "gh_72bace6b3059", "gh_7e5818b2dd83", "gh_7f5075624a50", "gh_89ef4798d3ea", "gh_9877c8541764", "gh_9eef14ad6c16", "gh_a2901d34f75b", "gh_b15de7c99912", "gh_b676b7ad9b74", "gh_b6f2c5332c72", "gh_bfe5b705324a", "gh_bff0bcb0694a", "gh_c5cdf60d9ab4", "gh_c69776baf2cd", "gh_d49df5e974ca", "gh_d4dffc34ac39", "gh_dd4c857bbb36", "gh_ee78360d06f5", "gh_f25b5fb01977", "gh_f902cea89e48", "gh_ff487cb5dab3"); String dateStr = "2024-09-12"; List
articleList = articleRepository.getByUpdateTimeGreaterThanAndTypeEquals(1725120000L, "9"); articleList = articleList.stream().filter(o -> o.getItemIndex() == 1 && o.getUpdateTime() < 1726675200).collect(Collectors.toList()); Map> map = articleList.stream().collect(Collectors.groupingBy(Article::getTitle)); List accountAvgInfoList = accountAvgInfoRepository.getAllByUpdateTime(dateStr); accountAvgInfoList = accountAvgInfoList.stream().filter(o -> o.getPosition().equals("1")).collect(Collectors.toList()); Map accountAvgInfoMap = accountAvgInfoList.stream().collect(Collectors.toMap(AccountAvgInfo::getGhId, o -> o)); List wxSnList = articleList.stream().map(Article::getWxSn).collect(Collectors.toList()); List articleDetailInfoList = articleDetailInfoRepository.getAllByWxSnIn(wxSnList); Map> articleDetailInfoMap = articleDetailInfoList.stream() .collect(Collectors.groupingBy(ArticleDetailInfo::getWxSn)); JSONArray result = new JSONArray(); int sumFissionMorning = 0; int sumFissionMoon = 0; int sumReadMorning = 0; int sumReadNoon = 0; long sumFansMorning = 0; long sumFansMoon = 0; double readAvgMorning = 0.0; double readAvgMoon = 0.0; for (Article article : articleList) { List articleDetailInfos = articleDetailInfoMap.get(article.getWxSn()); if (CollectionUtil.isEmpty(articleDetailInfos)) { continue; } Date minDate = articleDetailInfos.stream().map(ArticleDetailInfo::getRecallDt).min(Date::compareTo).orElse(new Date()); AccountAvgInfo accountAvgInfo = accountAvgInfoMap.get(article.getGhId()); for (ArticleDetailInfo articleDetailInfo : articleDetailInfos) { if (articleDetailInfo.getRecallDt().equals(minDate)) { if (morning.contains(article.getGhId())) { sumFissionMorning += Optional.ofNullable(articleDetailInfo.getFission0()).orElse(0); sumFissionMorning += Optional.ofNullable(articleDetailInfo.getFission1()).orElse(0); sumFissionMorning += Optional.ofNullable(articleDetailInfo.getFission2()).orElse(0); } if (noon.contains(article.getGhId())) { sumFissionMoon += Optional.ofNullable(articleDetailInfo.getFission0()).orElse(0); sumFissionMoon += Optional.ofNullable(articleDetailInfo.getFission1()).orElse(0); sumFissionMoon += Optional.ofNullable(articleDetailInfo.getFission2()).orElse(0); } } } if (Objects.nonNull(accountAvgInfo)) { if (morning.contains(article.getGhId())) { readAvgMorning += accountAvgInfo.getReadAvg(); sumFansMorning += accountAvgInfo.getFans(); sumReadMorning += article.getShowViewCount(); } if (noon.contains(article.getGhId())) { readAvgMoon += accountAvgInfo.getReadAvg(); sumFansMoon += accountAvgInfo.getFans(); sumReadNoon += article.getShowViewCount(); } } } JSONObject jsonObjectMorning = new JSONObject(); jsonObjectMorning.put("时间", "早上"); jsonObjectMorning.put("sumFission", sumFissionMorning); jsonObjectMorning.put("readAvg", readAvgMorning); jsonObjectMorning.put("rate", sumFissionMorning / readAvgMorning); jsonObjectMorning.put("sumRead", sumReadMorning); jsonObjectMorning.put("sumFans", sumFansMorning); jsonObjectMorning.put("阅读率", sumReadMorning / (double) sumFansMorning); result.add(jsonObjectMorning); JSONObject jsonObjectMoon = new JSONObject(); jsonObjectMoon.put("时间", "中午"); jsonObjectMoon.put("sumFission", sumFissionMoon); jsonObjectMoon.put("readAvg", readAvgMoon); jsonObjectMoon.put("rate", sumFissionMoon / readAvgMoon); jsonObjectMoon.put("sumRead", sumReadNoon); jsonObjectMoon.put("sumFans", sumFansMoon); jsonObjectMoon.put("阅读率", sumReadNoon / (double) sumFansMoon); result.add(jsonObjectMoon); System.out.println(JSONObject.toJSONString(result)); } @Test public void exportScoreData() { List ghIds = Lists.newArrayList("gh_e24da99dc899", "gh_183d80deffb8", "gh_be8c29139989", "gh_c69776baf2cd", "gh_b15de7c99912", "gh_1d887d61088c", "gh_3ed305b5817f", "gh_3e91f0624545", "gh_30816d8adb52", "gh_970460d9ccec", "gh_749271f1ccd5" ); List sortLogList = publishSortLogRepository.findByGhIdInAndDateStrGreaterThanEqual(ghIds, "20240907"); sortLogList = sortLogList.stream().filter(o -> o.getIndex() == 1).collect(Collectors.toList()); sortLogList.sort(Comparator.comparing(PublishSortLog::getGhId).thenComparing(PublishSortLog::getDateStr)); List
articleList = articleRepository.getByGhIdInAndUpdateTimeGreaterThanAndTypeEquals(ghIds, 1725638400L, "9"); articleList = articleList.stream().filter(o -> o.getItemIndex() == 1).collect(Collectors.toList()); Map> articleMap = articleList.stream().collect(Collectors.groupingBy(Article::getGhId, Collectors.toMap( o -> DateUtils.timestampToYMDStr(o.getUpdateTime(),"yyyyMMdd"), o -> o, (existing, replacement) -> replacement))); List accountAvgInfoList = accountAvgInfoRepository.getAllByGhIdIn(new HashSet<>(ghIds)); Map> accountAvgInfoMap = accountAvgInfoList.stream() .filter(o -> Objects.equals(o.getPosition(), "1")).collect(Collectors.groupingBy(AccountAvgInfo::getGhId, Collectors.toMap(AccountAvgInfo::getUpdateTime, o -> o))); Workbook workbook = new XSSFWorkbook(); Sheet sheet = workbook.createSheet("ExampleSheet"); int rowNum = 0; // 创建标题行 Row titleRow = sheet.createRow(rowNum); Cell titleCell = titleRow.createCell(0); titleCell.setCellValue("日期"); titleCell = titleRow.createCell(1); titleCell.setCellValue("ghID"); titleCell = titleRow.createCell(2); titleCell.setCellValue("账号名称"); titleCell = titleRow.createCell(3); titleCell.setCellValue("标题"); titleCell = titleRow.createCell(4); titleCell.setCellValue("策略"); titleCell = titleRow.createCell(5); titleCell.setCellValue("得分"); titleCell = titleRow.createCell(6); titleCell.setCellValue("HisFissionFansRateRateStrategy"); titleCell = titleRow.createCell(7); titleCell.setCellValue("HisFissionAvgReadRateRateStrategy"); titleCell = titleRow.createCell(8); titleCell.setCellValue("PublishTimesStrategy"); titleCell = titleRow.createCell(9); titleCell.setCellValue("ViewCountRateCorrelationStrategy"); titleCell = titleRow.createCell(10); titleCell.setCellValue("HisFissionAvgReadSumRateStrategy"); titleCell = titleRow.createCell(11); titleCell.setCellValue("HisFissionAvgReadRateCorrelationRateStrategy"); titleCell = titleRow.createCell(12); titleCell.setCellValue("HisFissionFansSumRateStrategy"); titleCell = titleRow.createCell(13); titleCell.setCellValue("SimilarityStrategy"); titleCell = titleRow.createCell(14); titleCell.setCellValue("ViewCountStrategy"); titleCell = titleRow.createCell(15); titleCell.setCellValue("ViewCountRateStrategy"); titleCell = titleRow.createCell(16); titleCell.setCellValue("HisFissionDeWeightAvgReadSumRateStrategy"); titleCell = titleRow.createCell(17); titleCell.setCellValue("阅读量"); titleCell = titleRow.createCell(18); titleCell.setCellValue("阅读均值"); titleCell = titleRow.createCell(19); titleCell.setCellValue("阅读均值倍数"); // 填充数据 String title = ""; for (PublishSortLog publishSortLog : sortLogList) { Map dateArticleMap = articleMap.get(publishSortLog.getGhId()); Article article = dateArticleMap.get(publishSortLog.getDateStr()); if (Objects.isNull(article) || !publishSortLog.getTitle().equals(article.getTitle())) { continue; } if (publishSortLog.getTitle().equals(title)) { continue; } title = publishSortLog.getTitle(); rowNum++; Row row = sheet.createRow(rowNum); Cell cell = row.createCell(0); cell.setCellValue(publishSortLog.getDateStr()); cell = row.createCell(1); cell.setCellValue(publishSortLog.getGhId()); cell = row.createCell(2); cell.setCellValue(publishSortLog.getAccountName()); cell = row.createCell(3); cell.setCellValue(publishSortLog.getTitle()); cell = row.createCell(4); cell.setCellValue(publishSortLog.getStrategy()); cell = row.createCell(5); cell.setCellValue(publishSortLog.getScore()); cell = row.createCell(6); JSONObject scoreMap = JSONObject.parseObject(publishSortLog.getScoreMap()); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionFansRateRateStrategy")).orElse(0.0))); cell = row.createCell(7); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadRateRateStrategy")).orElse(0.0))); cell = row.createCell(8); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("PublishTimesStrategy")).orElse(0.0))); cell = row.createCell(9); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountRateCorrelationStrategy")).orElse(0.0))); cell = row.createCell(10); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadSumRateStrategy")).orElse(0.0))); cell = row.createCell(11); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadRateCorrelationRateStrategy")).orElse(0.0))); cell = row.createCell(12); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionFansSumRateStrategy")).orElse(0.0))); cell = row.createCell(13); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("SimilarityStrategy")).orElse(0.0))); cell = row.createCell(14); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountStrategy")).orElse(0.0))); cell = row.createCell(15); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountRateStrategy")).orElse(0.0))); cell = row.createCell(16); cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionDeWeightAvgReadSumRateStrategy")).orElse(0.0))); cell = row.createCell(17); cell.setCellValue(article.getShowViewCount()); cell = row.createCell(18); Map map = accountAvgInfoMap.get(article.getGhId()); if (Objects.nonNull(map)) { List avgMapDateList = new ArrayList<>(map.keySet()); String publishDate = DateUtils.findNearestDate(avgMapDateList, DateUtils.timestampToYMDStr(article.getUpdateTime(), "yyyy-MM-dd"), "yyyy-MM-dd"); AccountAvgInfo accountAvgInfo = map.get(publishDate); if (Objects.nonNull(accountAvgInfo)) { cell.setCellValue(accountAvgInfo.getReadAvg()); cell = row.createCell(19); cell.setCellValue(String.format("%.3f", article.getShowViewCount() / (double) accountAvgInfo.getReadAvg())); } } } try (FileOutputStream outputStream = new FileOutputStream("/Users/wangyunpeng/Downloads/example.xlsx")) { workbook.write(outputStream); } catch (IOException e) { e.printStackTrace(); } finally { try { workbook.close(); } catch (IOException e) { e.printStackTrace(); } } } }