RecommendTest.java 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675
  1. package com.tzld.longarticle.recommend.server;
  2. import cn.hutool.core.collection.CollectionUtil;
  3. import com.alibaba.fastjson.JSONArray;
  4. import com.alibaba.fastjson.JSONObject;
  5. import com.google.common.collect.Lists;
  6. import com.tzld.longarticle.recommend.server.mapper.crawler.CrawlerBaseMapper;
  7. import com.tzld.longarticle.recommend.server.model.entity.crawler.AccountAvgInfo;
  8. import com.tzld.longarticle.recommend.server.model.entity.crawler.Article;
  9. import com.tzld.longarticle.recommend.server.model.entity.crawler.ArticleDetailInfo;
  10. import com.tzld.longarticle.recommend.server.model.entity.crawler.PublishSortLog;
  11. import com.tzld.longarticle.recommend.server.repository.crawler.AccountAvgInfoRepository;
  12. import com.tzld.longarticle.recommend.server.repository.crawler.ArticleDetailInfoRepository;
  13. import com.tzld.longarticle.recommend.server.repository.crawler.ArticleRepository;
  14. import com.tzld.longarticle.recommend.server.repository.crawler.PublishSortLogRepository;
  15. import com.tzld.longarticle.recommend.server.service.RecommendService;
  16. import com.tzld.longarticle.recommend.server.service.recall.RecallService;
  17. import com.tzld.longarticle.recommend.server.util.DateUtils;
  18. import lombok.extern.slf4j.Slf4j;
  19. import org.apache.commons.math3.stat.correlation.PearsonsCorrelation;
  20. import org.apache.poi.ss.usermodel.Cell;
  21. import org.apache.poi.ss.usermodel.Row;
  22. import org.apache.poi.ss.usermodel.Sheet;
  23. import org.apache.poi.ss.usermodel.Workbook;
  24. import org.apache.poi.xssf.usermodel.XSSFWorkbook;
  25. import org.junit.jupiter.api.Test;
  26. import org.springframework.boot.test.context.SpringBootTest;
  27. import javax.annotation.Resource;
  28. import java.io.File;
  29. import java.io.FileOutputStream;
  30. import java.io.IOException;
  31. import java.nio.charset.StandardCharsets;
  32. import java.nio.file.Files;
  33. import java.util.*;
  34. import java.util.stream.Collectors;
  35. @SpringBootTest(classes = Application.class)
  36. @Slf4j
  37. public class RecommendTest {
  38. @Resource
  39. private RecommendService recommendService;
  40. @Resource
  41. private RecallService recallService;
  42. @Resource
  43. private ArticleRepository articleRepository;
  44. @Resource
  45. private ArticleDetailInfoRepository articleDetailInfoRepository;
  46. @Resource
  47. private AccountAvgInfoRepository accountAvgInfoRepository;
  48. @Resource
  49. private CrawlerBaseMapper crawlerBaseMapper;
  50. @Resource
  51. private PublishSortLogRepository publishSortLogRepository;
  52. // @Test
  53. // void recall() {
  54. // RecallParam param = new RecallParam();
  55. // param.setAccountId("20231213123536190184852");
  56. // param.setPlanId("20240718181730864154902");
  57. // RecallResult recallResult = recallService.recall(param);
  58. // System.out.println(JSONObject.toJSONString(recallResult));
  59. // }
  60. //
  61. // @Test
  62. // void exportData() {
  63. // Set<String> ghIds = new HashSet<>(Arrays.asList("gh_adca24a8f429", "gh_e0eb490115f5", "gh_51e4ad40466d", "gh_95ed5ecf9363"));
  64. // List<Article> articleList = articleRepository.getByGhIdInAndUpdateTimeGreaterThanAndTypeEquals(ghIds, 1722441600L, "9");
  65. //
  66. // Map<String, Map<Integer, List<Article>>> map = articleList.stream()
  67. // .collect(Collectors.groupingBy(Article::getTitle, Collectors.groupingBy(Article::getItemIndex)));
  68. // Set<String> snList = articleList.stream().map(Article::getWxSn).collect(Collectors.toSet());
  69. // List<ArticleDetailInfo> articleDetailInfoList = articleDetailInfoRepository.getAllByWxSnIn(new ArrayList<>(snList));
  70. // Map<String, List<ArticleDetailInfo>> articleDetailInfoMap = articleDetailInfoList.stream()
  71. // .collect(Collectors.groupingBy(ArticleDetailInfo::getWxSn));
  72. //
  73. // List<AccountAvgInfo> accountAvgInfoList = accountAvgInfoRepository.getAllByGhIdInAndStatusEquals(ghIds, 1);
  74. // Map<String, Map<String, AccountAvgInfo>> accountAvgInfoIndexMap = accountAvgInfoList.stream().collect(
  75. // Collectors.groupingBy(AccountAvgInfo::getGhId, Collectors.toMap(AccountAvgInfo::getPosition, o -> o)));
  76. // JSONArray jsonArray = new JSONArray();
  77. // for (Article article : articleList) {
  78. // List<ArticleDetailInfo> articleDetailInfos = articleDetailInfoMap.get(article.getWxSn());
  79. // if (CollectionUtils.isEmpty(articleDetailInfos)) {
  80. // continue;
  81. // }
  82. // Date minDate = articleDetailInfos.stream().map(ArticleDetailInfo::getRecallDt).min(Date::compareTo).orElse(new Date());
  83. // int sumfirstLevel = 0;
  84. // int sumFission0 = 0;
  85. // int sumFission1 = 0;
  86. // int sumFission2 = 0;
  87. // for (ArticleDetailInfo articleDetailInfo : articleDetailInfos) {
  88. // if (articleDetailInfo.getRecallDt().equals(minDate)) {
  89. // sumfirstLevel += Optional.ofNullable(articleDetailInfo.getFirstLevel()).orElse(0);
  90. // sumFission0 += Optional.ofNullable(articleDetailInfo.getFission0()).orElse(0);
  91. // sumFission1 += Optional.ofNullable(articleDetailInfo.getFission1()).orElse(0);
  92. // sumFission2 += Optional.ofNullable(articleDetailInfo.getFission2()).orElse(0);
  93. // }
  94. // }
  95. // Map<String, AccountAvgInfo> accountAvgInfoMap = accountAvgInfoIndexMap.get(article.getGhId());
  96. // AccountAvgInfo avgInfo = accountAvgInfoMap.get(article.getItemIndex().toString());
  97. // SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd");
  98. // String date = sdf.format(new Date(article.getUpdateTime() * 1000));
  99. // JSONObject obj = new JSONObject();
  100. // obj.put("ghId", article.getGhId());
  101. // obj.put("accountName", article.getAccountName());
  102. // obj.put("title", article.getTitle());
  103. // obj.put("index", article.getItemIndex());
  104. // obj.put("viewCount", article.getShowViewCount());
  105. // obj.put("time", date);
  106. // if (Objects.nonNull(avgInfo)) {
  107. // obj.put("fans", avgInfo.getFans());
  108. // obj.put("avgViewCount", avgInfo.getReadAvg());
  109. // obj.put("viewCountRate", (article.getShowViewCount() * 1.0) / avgInfo.getReadAvg());
  110. // }
  111. // obj.put("firstLevel", sumfirstLevel);
  112. // obj.put("fission0", sumFission0);
  113. // obj.put("fission1", sumFission1);
  114. // obj.put("fission2", sumFission2);
  115. // jsonArray.add(obj);
  116. // }
  117. // System.out.println(jsonArray.toJSONString());
  118. // }
  119. //
  120. // @Test
  121. // void ii() throws IOException {
  122. // String dateStr = "20240911";
  123. // List<AccountAvgInfo> accountAvgInfoList = accountAvgInfoRepository.getAllByStatusEquals(1);
  124. // Map<String, String> accountMap = accountAvgInfoList.stream().collect(
  125. // Collectors.toMap(AccountAvgInfo::getAccountName, AccountAvgInfo::getGhId, (existing, replacement) -> replacement));
  126. // BufferedReader reader = new BufferedReader(new FileReader("/Users/wangyunpeng/Downloads/账号相关性.json"));
  127. // StringBuilder sb = new StringBuilder();
  128. // String line;
  129. // while ((line = reader.readLine()) != null) {
  130. // sb.append(line);
  131. // }
  132. // String jsonStr = sb.toString();
  133. // // 使用 ObjectMapper 解析 JSON
  134. // ObjectMapper objectMapper = new ObjectMapper();
  135. //
  136. // try {
  137. // // 将 JSON 转换为 Map<String, Map<String, Double>>
  138. // Map<String, Map<String, Double>> result = objectMapper.readValue(jsonStr,
  139. // new TypeReference<Map<String, Map<String, Double>>>() {
  140. // });
  141. //
  142. // // 输出转换结果
  143. // System.out.println(result);
  144. // List<AccountCorrelation> saveList = new ArrayList<>();
  145. // result.forEach((k, v) -> {
  146. // String ghId = accountMap.get(k);
  147. // v.forEach((k1, v1) -> {
  148. // String relGhId = accountMap.get(k1);
  149. // AccountCorrelation save = new AccountCorrelation();
  150. // save.setDateStr(dateStr);
  151. // save.setGhId(ghId);
  152. // save.setAccountName(k);
  153. // save.setRelGhId(relGhId);
  154. // save.setRelAccountName(k1);
  155. // save.setStatus(1);
  156. // save.setCorrelation(v1);
  157. // saveList.add(save);
  158. // });
  159. // });
  160. // List<AccountCorrelation> all = new ArrayList<>(saveList);
  161. // for (AccountCorrelation item : all) {
  162. // if (!item.getGhId().equals(item.getRelGhId())) {
  163. // AccountCorrelation save = new AccountCorrelation();
  164. // BeanUtils.copyProperties(item, save);
  165. // save.setGhId(item.getRelGhId());
  166. // save.setAccountName(item.getRelAccountName());
  167. // save.setRelGhId(item.getGhId());
  168. // save.setRelAccountName(item.getAccountName());
  169. // saveList.add(save);
  170. // }
  171. // }
  172. // crawlerBaseMapper.batchInsertAccountCorrelation(saveList);
  173. //
  174. //
  175. // } catch (IOException e) {
  176. // e.printStackTrace();
  177. // }
  178. // }
  179. @Test
  180. public void test() {
  181. List<String> morning = Lists.newArrayList("gh_084a485e859a", "gh_183d80deffb8", "gh_5ff48e9fb9ef", "gh_6d9f36e3a7be", "gh_9f8dc5b0c74e", "gh_e0eb490115f5", "gh_e24da99dc899");
  182. List<String> noon = Lists.newArrayList("gh_080bb43aa0dc", "gh_0c89e11f8bf3", "gh_192c9cf58b13", "gh_1b27dd1beeca", "gh_1d887d61088c", "gh_29074b51f2b7", "gh_3ed305b5817f", "gh_5ae65db96cb7", "gh_6b7c2a257263", "gh_6cfd1132df94", "gh_6d205db62f04", "gh_72bace6b3059", "gh_7e5818b2dd83", "gh_7f5075624a50", "gh_89ef4798d3ea", "gh_9877c8541764", "gh_9eef14ad6c16", "gh_a2901d34f75b", "gh_b15de7c99912", "gh_b676b7ad9b74", "gh_b6f2c5332c72", "gh_bfe5b705324a", "gh_bff0bcb0694a", "gh_c5cdf60d9ab4", "gh_c69776baf2cd", "gh_d49df5e974ca", "gh_d4dffc34ac39", "gh_dd4c857bbb36", "gh_ee78360d06f5", "gh_f25b5fb01977", "gh_f902cea89e48", "gh_ff487cb5dab3");
  183. String dateStr = "2024-09-12";
  184. List<Article> articleList = articleRepository.getByUpdateTimeGreaterThanAndTypeEquals(1725120000L, "9");
  185. articleList = articleList.stream().filter(o -> o.getItemIndex() == 1 && o.getUpdateTime() < 1726675200).collect(Collectors.toList());
  186. Map<String, List<Article>> map = articleList.stream().collect(Collectors.groupingBy(Article::getTitle));
  187. List<AccountAvgInfo> accountAvgInfoList = accountAvgInfoRepository.getAllByUpdateTime(dateStr);
  188. accountAvgInfoList = accountAvgInfoList.stream().filter(o -> o.getPosition().equals("1")).collect(Collectors.toList());
  189. Map<String, AccountAvgInfo> accountAvgInfoMap = accountAvgInfoList.stream().collect(Collectors.toMap(AccountAvgInfo::getGhId, o -> o));
  190. List<String> wxSnList = articleList.stream().map(Article::getWxSn).collect(Collectors.toList());
  191. List<ArticleDetailInfo> articleDetailInfoList = articleDetailInfoRepository.getAllByWxSnIn(wxSnList);
  192. Map<String, List<ArticleDetailInfo>> articleDetailInfoMap = articleDetailInfoList.stream()
  193. .collect(Collectors.groupingBy(ArticleDetailInfo::getWxSn));
  194. JSONArray result = new JSONArray();
  195. int sumFissionMorning = 0;
  196. int sumFissionMoon = 0;
  197. int sumReadMorning = 0;
  198. int sumReadNoon = 0;
  199. long sumFansMorning = 0;
  200. long sumFansMoon = 0;
  201. double readAvgMorning = 0.0;
  202. double readAvgMoon = 0.0;
  203. for (Article article : articleList) {
  204. List<ArticleDetailInfo> articleDetailInfos = articleDetailInfoMap.get(article.getWxSn());
  205. if (CollectionUtil.isEmpty(articleDetailInfos)) {
  206. continue;
  207. }
  208. Date minDate = articleDetailInfos.stream().map(ArticleDetailInfo::getRecallDt).min(Date::compareTo).orElse(new Date());
  209. AccountAvgInfo accountAvgInfo = accountAvgInfoMap.get(article.getGhId());
  210. for (ArticleDetailInfo articleDetailInfo : articleDetailInfos) {
  211. if (articleDetailInfo.getRecallDt().equals(minDate)) {
  212. if (morning.contains(article.getGhId())) {
  213. sumFissionMorning += Optional.ofNullable(articleDetailInfo.getFission0()).orElse(0);
  214. sumFissionMorning += Optional.ofNullable(articleDetailInfo.getFission1()).orElse(0);
  215. sumFissionMorning += Optional.ofNullable(articleDetailInfo.getFission2()).orElse(0);
  216. }
  217. if (noon.contains(article.getGhId())) {
  218. sumFissionMoon += Optional.ofNullable(articleDetailInfo.getFission0()).orElse(0);
  219. sumFissionMoon += Optional.ofNullable(articleDetailInfo.getFission1()).orElse(0);
  220. sumFissionMoon += Optional.ofNullable(articleDetailInfo.getFission2()).orElse(0);
  221. }
  222. }
  223. }
  224. if (Objects.nonNull(accountAvgInfo)) {
  225. if (morning.contains(article.getGhId())) {
  226. readAvgMorning += accountAvgInfo.getReadAvg();
  227. sumFansMorning += accountAvgInfo.getFans();
  228. sumReadMorning += article.getShowViewCount();
  229. }
  230. if (noon.contains(article.getGhId())) {
  231. readAvgMoon += accountAvgInfo.getReadAvg();
  232. sumFansMoon += accountAvgInfo.getFans();
  233. sumReadNoon += article.getShowViewCount();
  234. }
  235. }
  236. }
  237. JSONObject jsonObjectMorning = new JSONObject();
  238. jsonObjectMorning.put("时间", "早上");
  239. jsonObjectMorning.put("sumFission", sumFissionMorning);
  240. jsonObjectMorning.put("readAvg", readAvgMorning);
  241. jsonObjectMorning.put("rate", sumFissionMorning / readAvgMorning);
  242. jsonObjectMorning.put("sumRead", sumReadMorning);
  243. jsonObjectMorning.put("sumFans", sumFansMorning);
  244. jsonObjectMorning.put("阅读率", sumReadMorning / (double) sumFansMorning);
  245. result.add(jsonObjectMorning);
  246. JSONObject jsonObjectMoon = new JSONObject();
  247. jsonObjectMoon.put("时间", "中午");
  248. jsonObjectMoon.put("sumFission", sumFissionMoon);
  249. jsonObjectMoon.put("readAvg", readAvgMoon);
  250. jsonObjectMoon.put("rate", sumFissionMoon / readAvgMoon);
  251. jsonObjectMoon.put("sumRead", sumReadNoon);
  252. jsonObjectMoon.put("sumFans", sumFansMoon);
  253. jsonObjectMoon.put("阅读率", sumReadNoon / (double) sumFansMoon);
  254. result.add(jsonObjectMoon);
  255. System.out.println(JSONObject.toJSONString(result));
  256. }
  257. @Test
  258. public void exportScoreData() {
  259. List<String> ghIds = Lists.newArrayList("gh_e24da99dc899",
  260. "gh_183d80deffb8",
  261. "gh_be8c29139989",
  262. "gh_c69776baf2cd",
  263. "gh_b15de7c99912",
  264. "gh_1d887d61088c",
  265. "gh_3ed305b5817f",
  266. "gh_3e91f0624545",
  267. "gh_30816d8adb52",
  268. "gh_970460d9ccec",
  269. "gh_749271f1ccd5"
  270. );
  271. List<PublishSortLog> sortLogList = publishSortLogRepository.findByGhIdInAndDateStrGreaterThanEqual(ghIds, "20240907");
  272. sortLogList = sortLogList.stream().filter(o -> o.getIndex() == 1).collect(Collectors.toList());
  273. sortLogList.sort(Comparator.comparing(PublishSortLog::getGhId).thenComparing(PublishSortLog::getDateStr));
  274. List<Article> articleList = articleRepository.getByGhIdInAndUpdateTimeGreaterThanAndTypeEquals(ghIds, 1725638400L, "9");
  275. articleList = articleList.stream().filter(o -> o.getItemIndex() == 1).collect(Collectors.toList());
  276. Map<String, Map<String, Article>> articleMap = articleList.stream().collect(Collectors.groupingBy(Article::getGhId, Collectors.toMap(
  277. o -> DateUtils.timestampToYMDStr(o.getUpdateTime(),"yyyyMMdd"), o -> o,
  278. (existing, replacement) -> replacement)));
  279. List<AccountAvgInfo> accountAvgInfoList = accountAvgInfoRepository.getAllByGhIdIn(new HashSet<>(ghIds));
  280. Map<String, Map<String, AccountAvgInfo>> accountAvgInfoMap = accountAvgInfoList.stream()
  281. .filter(o -> Objects.equals(o.getPosition(), "1")).collect(Collectors.groupingBy(AccountAvgInfo::getGhId,
  282. Collectors.toMap(AccountAvgInfo::getUpdateTime, o -> o)));
  283. Workbook workbook = new XSSFWorkbook();
  284. Sheet sheet = workbook.createSheet("ExampleSheet");
  285. int rowNum = 0;
  286. // 创建标题行
  287. Row titleRow = sheet.createRow(rowNum);
  288. Cell titleCell = titleRow.createCell(0);
  289. titleCell.setCellValue("日期");
  290. titleCell = titleRow.createCell(1);
  291. titleCell.setCellValue("ghID");
  292. titleCell = titleRow.createCell(2);
  293. titleCell.setCellValue("账号名称");
  294. titleCell = titleRow.createCell(3);
  295. titleCell.setCellValue("标题");
  296. titleCell = titleRow.createCell(4);
  297. titleCell.setCellValue("策略");
  298. titleCell = titleRow.createCell(5);
  299. titleCell.setCellValue("得分");
  300. titleCell = titleRow.createCell(6);
  301. titleCell.setCellValue("HisFissionFansRateRateStrategy");
  302. titleCell = titleRow.createCell(7);
  303. titleCell.setCellValue("HisFissionAvgReadRateRateStrategy");
  304. titleCell = titleRow.createCell(8);
  305. titleCell.setCellValue("PublishTimesStrategy");
  306. titleCell = titleRow.createCell(9);
  307. titleCell.setCellValue("ViewCountRateCorrelationStrategy");
  308. titleCell = titleRow.createCell(10);
  309. titleCell.setCellValue("HisFissionAvgReadSumRateStrategy");
  310. titleCell = titleRow.createCell(11);
  311. titleCell.setCellValue("HisFissionAvgReadRateCorrelationRateStrategy");
  312. titleCell = titleRow.createCell(12);
  313. titleCell.setCellValue("HisFissionFansSumRateStrategy");
  314. titleCell = titleRow.createCell(13);
  315. titleCell.setCellValue("SimilarityStrategy");
  316. titleCell = titleRow.createCell(14);
  317. titleCell.setCellValue("ViewCountStrategy");
  318. titleCell = titleRow.createCell(15);
  319. titleCell.setCellValue("ViewCountRateStrategy");
  320. titleCell = titleRow.createCell(16);
  321. titleCell.setCellValue("HisFissionDeWeightAvgReadSumRateStrategy");
  322. titleCell = titleRow.createCell(17);
  323. titleCell.setCellValue("阅读量");
  324. titleCell = titleRow.createCell(18);
  325. titleCell.setCellValue("阅读均值");
  326. titleCell = titleRow.createCell(19);
  327. titleCell.setCellValue("阅读均值倍数");
  328. // 填充数据
  329. String title = "";
  330. for (PublishSortLog publishSortLog : sortLogList) {
  331. Map<String, Article> dateArticleMap = articleMap.get(publishSortLog.getGhId());
  332. Article article = dateArticleMap.get(publishSortLog.getDateStr());
  333. if (Objects.isNull(article) || !publishSortLog.getTitle().equals(article.getTitle())) {
  334. continue;
  335. }
  336. if (publishSortLog.getTitle().equals(title)) {
  337. continue;
  338. }
  339. title = publishSortLog.getTitle();
  340. rowNum++;
  341. Row row = sheet.createRow(rowNum);
  342. Cell cell = row.createCell(0);
  343. cell.setCellValue(publishSortLog.getDateStr());
  344. cell = row.createCell(1);
  345. cell.setCellValue(publishSortLog.getGhId());
  346. cell = row.createCell(2);
  347. cell.setCellValue(publishSortLog.getAccountName());
  348. cell = row.createCell(3);
  349. cell.setCellValue(publishSortLog.getTitle());
  350. cell = row.createCell(4);
  351. cell.setCellValue(publishSortLog.getStrategy());
  352. cell = row.createCell(5);
  353. cell.setCellValue(publishSortLog.getScore());
  354. cell = row.createCell(6);
  355. JSONObject scoreMap = JSONObject.parseObject(publishSortLog.getScoreMap());
  356. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionFansRateRateStrategy")).orElse(0.0)));
  357. cell = row.createCell(7);
  358. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadRateRateStrategy")).orElse(0.0)));
  359. cell = row.createCell(8);
  360. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("PublishTimesStrategy")).orElse(0.0)));
  361. cell = row.createCell(9);
  362. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountRateCorrelationStrategy")).orElse(0.0)));
  363. cell = row.createCell(10);
  364. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadSumRateStrategy")).orElse(0.0)));
  365. cell = row.createCell(11);
  366. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadRateCorrelationRateStrategy")).orElse(0.0)));
  367. cell = row.createCell(12);
  368. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionFansSumRateStrategy")).orElse(0.0)));
  369. cell = row.createCell(13);
  370. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("SimilarityStrategy")).orElse(0.0)));
  371. cell = row.createCell(14);
  372. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountStrategy")).orElse(0.0)));
  373. cell = row.createCell(15);
  374. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountRateStrategy")).orElse(0.0)));
  375. cell = row.createCell(16);
  376. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionDeWeightAvgReadSumRateStrategy")).orElse(0.0)));
  377. cell = row.createCell(17);
  378. cell.setCellValue(article.getShowViewCount());
  379. cell = row.createCell(18);
  380. Map<String, AccountAvgInfo> map = accountAvgInfoMap.get(article.getGhId());
  381. if (Objects.nonNull(map)) {
  382. List<String> avgMapDateList = new ArrayList<>(map.keySet());
  383. String publishDate = DateUtils.findNearestDate(avgMapDateList,
  384. DateUtils.timestampToYMDStr(article.getUpdateTime(), "yyyy-MM-dd"), "yyyy-MM-dd");
  385. AccountAvgInfo accountAvgInfo = map.get(publishDate);
  386. if (Objects.nonNull(accountAvgInfo)) {
  387. cell.setCellValue(accountAvgInfo.getReadAvg());
  388. cell = row.createCell(19);
  389. cell.setCellValue(String.format("%.3f", article.getShowViewCount() / (double) accountAvgInfo.getReadAvg()));
  390. }
  391. }
  392. }
  393. try (FileOutputStream outputStream = new FileOutputStream("/Users/wangyunpeng/Downloads/example.xlsx")) {
  394. workbook.write(outputStream);
  395. } catch (IOException e) {
  396. e.printStackTrace();
  397. } finally {
  398. try {
  399. workbook.close();
  400. } catch (IOException e) {
  401. e.printStackTrace();
  402. }
  403. }
  404. }
  405. @Test
  406. public void correlation() {
  407. List<String> ghIds = Lists.newArrayList("gh_e24da99dc899",
  408. "gh_183d80deffb8",
  409. "gh_be8c29139989",
  410. "gh_c69776baf2cd",
  411. "gh_b15de7c99912",
  412. "gh_1d887d61088c",
  413. "gh_3ed305b5817f",
  414. "gh_3e91f0624545",
  415. "gh_30816d8adb52",
  416. "gh_970460d9ccec",
  417. "gh_749271f1ccd5",
  418. "gh_ac43e43b253b"
  419. );
  420. List<PublishSortLog> sortLogList = publishSortLogRepository.findByGhIdInAndDateStrGreaterThanEqual(ghIds, "20240907");
  421. sortLogList = sortLogList.stream().filter(o -> o.getIndex() == 1).collect(Collectors.toList());
  422. sortLogList.sort(Comparator.comparing(PublishSortLog::getGhId).thenComparing(PublishSortLog::getDateStr));
  423. List<Article> articleList = articleRepository.getByGhIdInAndUpdateTimeGreaterThanAndTypeEquals(ghIds, 1725638400L, "9");
  424. articleList = articleList.stream().filter(o -> o.getItemIndex() == 1).collect(Collectors.toList());
  425. Map<String, Map<String, Article>> articleMap = articleList.stream().collect(Collectors.groupingBy(Article::getGhId, Collectors.toMap(
  426. o -> DateUtils.timestampToYMDStr(o.getUpdateTime(),"yyyyMMdd"), o -> o,
  427. (existing, replacement) -> replacement)));
  428. List<AccountAvgInfo> accountAvgInfoList = accountAvgInfoRepository.getAllByGhIdIn(new HashSet<>(ghIds));
  429. Map<String, Map<String, AccountAvgInfo>> accountAvgInfoMap = accountAvgInfoList.stream()
  430. .filter(o -> Objects.equals(o.getPosition(), "1")).collect(Collectors.groupingBy(AccountAvgInfo::getGhId,
  431. Collectors.toMap(AccountAvgInfo::getUpdateTime, o -> o)));
  432. int rowNum = 0;
  433. Map<String, List<PublishSortLog>> sortLogMap = sortLogList.stream().collect(Collectors.groupingBy(PublishSortLog::getGhId));
  434. PearsonsCorrelation correlation = new PearsonsCorrelation();
  435. Workbook workbook = new XSSFWorkbook();
  436. Sheet sheet = workbook.createSheet("ExampleSheet");
  437. // 创建标题行
  438. Row titleRow = sheet.createRow(rowNum);
  439. for (Map.Entry<String, List<PublishSortLog>> entry : sortLogMap.entrySet()) {
  440. String ghId = entry.getKey();
  441. String name = entry.getValue().get(0).getAccountName();
  442. List<PublishSortLog> itemList = entry.getValue();
  443. String title = "";
  444. double[] scoreArr = new double[itemList.size()];
  445. double[] HisFissionFansRateRateStrategyArr = new double[itemList.size()];
  446. double[] HisFissionAvgReadRateRateStrategyArr = new double[itemList.size()];
  447. double[] PublishTimesStrategyArr = new double[itemList.size()];
  448. double[] ViewCountRateCorrelationStrategyArr = new double[itemList.size()];
  449. double[] HisFissionAvgReadSumRateStrategyArr = new double[itemList.size()];
  450. double[] HisFissionAvgReadRateCorrelationRateStrategyArr = new double[itemList.size()];
  451. double[] HisFissionFansSumRateStrategyArr = new double[itemList.size()];
  452. double[] SimilarityStrategyArr = new double[itemList.size()];
  453. double[] ViewCountStrategyArr = new double[itemList.size()];
  454. double[] ViewCountRateStrategyArr = new double[itemList.size()];
  455. double[] HisFissionDeWeightAvgReadSumRateStrategyArr = new double[itemList.size()];
  456. double[] scoreRateArr = new double[itemList.size()];
  457. for (int i = 0; i < itemList.size(); i++) {
  458. PublishSortLog publishSortLog = itemList.get(i);
  459. Map<String, Article> dateArticleMap = articleMap.get(publishSortLog.getGhId());
  460. Article article = dateArticleMap.get(publishSortLog.getDateStr());
  461. if (Objects.isNull(article) || !publishSortLog.getTitle().equals(article.getTitle())) {
  462. continue;
  463. }
  464. if (publishSortLog.getTitle().equals(title)) {
  465. continue;
  466. }
  467. title = publishSortLog.getTitle();
  468. scoreArr[i] = Double.parseDouble(publishSortLog.getScore());
  469. JSONObject scoreMap = JSONObject.parseObject(publishSortLog.getScoreMap());
  470. HisFissionFansRateRateStrategyArr[i] = Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionFansRateRateStrategy")).orElse(0.0)));
  471. HisFissionAvgReadRateRateStrategyArr[i] = Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadRateRateStrategy")).orElse(0.0)));
  472. PublishTimesStrategyArr[i] = Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("PublishTimesStrategy")).orElse(0.0)));
  473. ViewCountRateCorrelationStrategyArr[i] = Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountRateCorrelationStrategy")).orElse(0.0)));
  474. HisFissionAvgReadSumRateStrategyArr[i] = Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadSumRateStrategy")).orElse(0.0)));
  475. HisFissionAvgReadRateCorrelationRateStrategyArr[i] = Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadRateCorrelationRateStrategy")).orElse(0.0)));
  476. HisFissionFansSumRateStrategyArr[i] = Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionFansSumRateStrategy")).orElse(0.0)));
  477. SimilarityStrategyArr[i] = Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("SimilarityStrategy")).orElse(0.0)));
  478. ViewCountStrategyArr[i] = Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountStrategy")).orElse(0.0)));
  479. ViewCountRateStrategyArr[i] = Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountRateStrategy")).orElse(0.0)));
  480. HisFissionDeWeightAvgReadSumRateStrategyArr[i] = Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionDeWeightAvgReadSumRateStrategy")).orElse(0.0)));
  481. Map<String, AccountAvgInfo> map = accountAvgInfoMap.get(article.getGhId());
  482. if (Objects.nonNull(map)) {
  483. List<String> avgMapDateList = new ArrayList<>(map.keySet());
  484. String publishDate = DateUtils.findNearestDate(avgMapDateList,
  485. DateUtils.timestampToYMDStr(article.getUpdateTime(), "yyyy-MM-dd"), "yyyy-MM-dd");
  486. AccountAvgInfo accountAvgInfo = map.get(publishDate);
  487. if (Objects.nonNull(accountAvgInfo)) {
  488. scoreRateArr[i] = Double.parseDouble(String.format("%.3f", article.getShowViewCount() / (double) accountAvgInfo.getReadAvg()));
  489. }
  490. }
  491. }
  492. rowNum++;
  493. Row row = sheet.createRow(rowNum);
  494. Cell cell = row.createCell(0);
  495. cell = row.createCell(1);
  496. cell.setCellValue(ghId);
  497. cell = row.createCell(2);
  498. cell.setCellValue(name);
  499. cell = row.createCell(3);
  500. cell = row.createCell(4);
  501. cell = row.createCell(5);
  502. cell.setCellValue(correlation.correlation(scoreArr, scoreRateArr));
  503. cell = row.createCell(6);
  504. cell.setCellValue(correlation.correlation(HisFissionFansRateRateStrategyArr, scoreRateArr));
  505. cell = row.createCell(7);
  506. cell.setCellValue(correlation.correlation(HisFissionAvgReadRateRateStrategyArr, scoreRateArr));
  507. cell = row.createCell(8);
  508. cell.setCellValue(correlation.correlation(PublishTimesStrategyArr, scoreRateArr));
  509. cell = row.createCell(9);
  510. cell.setCellValue(correlation.correlation(ViewCountRateCorrelationStrategyArr, scoreRateArr));
  511. cell = row.createCell(10);
  512. cell.setCellValue(correlation.correlation(HisFissionAvgReadSumRateStrategyArr, scoreRateArr));
  513. cell = row.createCell(11);
  514. cell.setCellValue(correlation.correlation(HisFissionAvgReadRateCorrelationRateStrategyArr, scoreRateArr));
  515. cell = row.createCell(12);
  516. cell.setCellValue(correlation.correlation(HisFissionFansSumRateStrategyArr, scoreRateArr));
  517. cell = row.createCell(13);
  518. cell.setCellValue(correlation.correlation(SimilarityStrategyArr, scoreRateArr));
  519. cell = row.createCell(14);
  520. cell.setCellValue(correlation.correlation(ViewCountStrategyArr, scoreRateArr));
  521. cell = row.createCell(15);
  522. cell.setCellValue(correlation.correlation(ViewCountRateStrategyArr, scoreRateArr));
  523. cell = row.createCell(16);
  524. cell.setCellValue(correlation.correlation(HisFissionDeWeightAvgReadSumRateStrategyArr, scoreRateArr));
  525. }
  526. try (FileOutputStream outputStream = new FileOutputStream("/Users/wangyunpeng/Downloads/example.xlsx")) {
  527. workbook.write(outputStream);
  528. } catch (IOException e) {
  529. e.printStackTrace();
  530. } finally {
  531. try {
  532. workbook.close();
  533. } catch (IOException e) {
  534. e.printStackTrace();
  535. }
  536. }
  537. }
  538. @Test
  539. void getScoreFromLogFile() {
  540. String folderPath = "/Users/wangyunpeng/Downloads/b78020b8-d9df-466f-bd01-cd982bb986d0.json";
  541. File file = new File(folderPath);
  542. Workbook workbook = new XSSFWorkbook();
  543. Sheet sheet = workbook.createSheet("ExampleSheet");
  544. int rowNum = 0;
  545. // 创建标题行
  546. Row titleRow = sheet.createRow(rowNum);
  547. Cell titleCell = titleRow.createCell(0);
  548. titleCell.setCellValue("日期");
  549. titleCell = titleRow.createCell(1);
  550. titleCell.setCellValue("账号名称");
  551. titleCell = titleRow.createCell(2);
  552. titleCell.setCellValue("id");
  553. titleCell = titleRow.createCell(3);
  554. titleCell.setCellValue("标题");
  555. titleCell = titleRow.createCell(4);
  556. titleCell.setCellValue("策略");
  557. titleCell = titleRow.createCell(5);
  558. titleCell.setCellValue("得分");
  559. titleCell = titleRow.createCell(6);
  560. titleCell.setCellValue("HisFissionFansRateRateStrategy");
  561. titleCell = titleRow.createCell(7);
  562. titleCell.setCellValue("HisFissionAvgReadRateRateStrategy");
  563. titleCell = titleRow.createCell(8);
  564. titleCell.setCellValue("PublishTimesStrategy");
  565. titleCell = titleRow.createCell(9);
  566. titleCell.setCellValue("ViewCountRateCorrelationStrategy");
  567. titleCell = titleRow.createCell(10);
  568. titleCell.setCellValue("HisFissionAvgReadSumRateStrategy");
  569. titleCell = titleRow.createCell(11);
  570. titleCell.setCellValue("HisFissionAvgReadRateCorrelationRateStrategy");
  571. titleCell = titleRow.createCell(12);
  572. titleCell.setCellValue("HisFissionFansSumRateStrategy");
  573. titleCell = titleRow.createCell(13);
  574. titleCell.setCellValue("SimilarityStrategy");
  575. titleCell = titleRow.createCell(14);
  576. titleCell.setCellValue("ViewCountStrategy");
  577. titleCell = titleRow.createCell(15);
  578. titleCell.setCellValue("ViewCountRateStrategy");
  579. titleCell = titleRow.createCell(16);
  580. titleCell.setCellValue("HisFissionDeWeightAvgReadSumRateStrategy");
  581. try {
  582. String content = new String(Files.readAllBytes(file.toPath()), StandardCharsets.UTF_8);
  583. JSONArray jsonArray = JSONArray.parseArray(content);
  584. for (Object o : jsonArray) {
  585. JSONObject jsonObject = (JSONObject) o;
  586. Long time = jsonObject.getLong("__time__");
  587. String message = jsonObject.getString("message");
  588. int index = message.indexOf("[");
  589. String info = message.substring(0, index);
  590. String strategy = info.substring(0, info.indexOf(" "));
  591. String accountName = info.substring(info.indexOf(" ")).replace("账号名称 ", "")
  592. .replace(" 头条评分结果", "");
  593. String json = message.substring(index);
  594. JSONArray scoreArray = JSONArray.parseArray(json);
  595. for (Object scoreJSON : scoreArray) {
  596. JSONObject scoreObject = (JSONObject) scoreJSON;
  597. String id = scoreObject.getString("id");
  598. String title = scoreObject.getString("title");
  599. String score = scoreObject.getString("score");
  600. String scoreMapStr = scoreObject.getString("scoreMap");
  601. rowNum++;
  602. Row row = sheet.createRow(rowNum);
  603. Cell cell = row.createCell(0);
  604. cell.setCellValue(DateUtils.timestampToYMDStr(time, "yyyyMMdd"));
  605. cell = row.createCell(1);
  606. cell.setCellValue(accountName);
  607. cell = row.createCell(2);
  608. cell.setCellValue(id);
  609. cell = row.createCell(3);
  610. cell.setCellValue(title);
  611. cell = row.createCell(4);
  612. cell.setCellValue(strategy);
  613. cell = row.createCell(5);
  614. cell.setCellValue(score);
  615. cell = row.createCell(6);
  616. JSONObject scoreMap = JSONObject.parseObject(scoreMapStr);
  617. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionFansRateRateStrategy")).orElse(0.0)));
  618. cell = row.createCell(7);
  619. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadRateRateStrategy")).orElse(0.0)));
  620. cell = row.createCell(8);
  621. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("PublishTimesStrategy")).orElse(0.0)));
  622. cell = row.createCell(9);
  623. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountRateCorrelationStrategy")).orElse(0.0)));
  624. cell = row.createCell(10);
  625. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadSumRateStrategy")).orElse(0.0)));
  626. cell = row.createCell(11);
  627. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadRateCorrelationRateStrategy")).orElse(0.0)));
  628. cell = row.createCell(12);
  629. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionFansSumRateStrategy")).orElse(0.0)));
  630. cell = row.createCell(13);
  631. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("SimilarityStrategy")).orElse(0.0)));
  632. cell = row.createCell(14);
  633. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountStrategy")).orElse(0.0)));
  634. cell = row.createCell(15);
  635. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountRateStrategy")).orElse(0.0)));
  636. cell = row.createCell(16);
  637. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionDeWeightAvgReadSumRateStrategy")).orElse(0.0)));
  638. }
  639. }
  640. try (FileOutputStream outputStream = new FileOutputStream("/Users/wangyunpeng/Downloads/example.xlsx")) {
  641. workbook.write(outputStream);
  642. } catch (IOException e) {
  643. e.printStackTrace();
  644. } finally {
  645. try {
  646. workbook.close();
  647. } catch (IOException e) {
  648. e.printStackTrace();
  649. }
  650. }
  651. } catch (Exception e) {
  652. log.error("readFileError fileName:{}", file.getName(), e);
  653. }
  654. }
  655. }