RecommendTest.java 50 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862
  1. package com.tzld.longarticle.recommend.server;
  2. import cn.hutool.core.collection.CollectionUtil;
  3. import com.alibaba.fastjson.JSONArray;
  4. import com.alibaba.fastjson.JSONObject;
  5. import com.google.common.collect.Lists;
  6. import com.tzld.longarticle.recommend.server.common.enums.recommend.ArticleTypeEnum;
  7. import com.tzld.longarticle.recommend.server.mapper.aigc.AigcBaseMapper;
  8. import com.tzld.longarticle.recommend.server.mapper.crawler.CrawlerBaseMapper;
  9. import com.tzld.longarticle.recommend.server.model.dto.PublishContentDTO;
  10. import com.tzld.longarticle.recommend.server.model.entity.aigc.PublishAccount;
  11. import com.tzld.longarticle.recommend.server.model.entity.crawler.AccountAvgInfo;
  12. import com.tzld.longarticle.recommend.server.model.entity.crawler.Article;
  13. import com.tzld.longarticle.recommend.server.model.entity.crawler.ArticleDetailInfo;
  14. import com.tzld.longarticle.recommend.server.model.entity.crawler.PublishSortLog;
  15. import com.tzld.longarticle.recommend.server.model.param.PublishContentParam;
  16. import com.tzld.longarticle.recommend.server.repository.aigc.PublishAccountRepository;
  17. import com.tzld.longarticle.recommend.server.repository.crawler.AccountAvgInfoRepository;
  18. import com.tzld.longarticle.recommend.server.repository.crawler.ArticleDetailInfoRepository;
  19. import com.tzld.longarticle.recommend.server.repository.crawler.ArticleRepository;
  20. import com.tzld.longarticle.recommend.server.repository.crawler.PublishSortLogRepository;
  21. import com.tzld.longarticle.recommend.server.service.recommend.RecommendService;
  22. import com.tzld.longarticle.recommend.server.service.recommend.recall.RecallService;
  23. import com.tzld.longarticle.recommend.server.util.DateUtils;
  24. import lombok.extern.slf4j.Slf4j;
  25. import org.apache.commons.math3.stat.correlation.PearsonsCorrelation;
  26. import org.apache.poi.ss.usermodel.Cell;
  27. import org.apache.poi.ss.usermodel.Row;
  28. import org.apache.poi.ss.usermodel.Sheet;
  29. import org.apache.poi.ss.usermodel.Workbook;
  30. import org.apache.poi.xssf.usermodel.XSSFWorkbook;
  31. import org.junit.jupiter.api.Test;
  32. import org.springframework.beans.BeanUtils;
  33. import org.springframework.beans.factory.annotation.Autowired;
  34. import org.springframework.boot.test.context.SpringBootTest;
  35. import javax.annotation.Resource;
  36. import java.io.File;
  37. import java.io.FileOutputStream;
  38. import java.io.IOException;
  39. import java.nio.charset.StandardCharsets;
  40. import java.nio.file.Files;
  41. import java.util.*;
  42. import java.util.stream.Collectors;
  43. @SpringBootTest(classes = Application.class)
  44. @Slf4j
  45. public class RecommendTest {
  46. @Resource
  47. private RecommendService recommendService;
  48. @Resource
  49. private RecallService recallService;
  50. @Resource
  51. private ArticleRepository articleRepository;
  52. @Resource
  53. private ArticleDetailInfoRepository articleDetailInfoRepository;
  54. @Resource
  55. private AccountAvgInfoRepository accountAvgInfoRepository;
  56. @Resource
  57. private CrawlerBaseMapper crawlerBaseMapper;
  58. @Resource
  59. private PublishSortLogRepository publishSortLogRepository;
  60. @Autowired
  61. private PublishAccountRepository publishAccountRepository;
  62. @Autowired
  63. private AigcBaseMapper aigcBaseMapper;
  64. // @Test
  65. // void recall() {
  66. // RecallParam param = new RecallParam();
  67. // param.setAccountId("20231213123536190184852");
  68. // param.setPlanId("20240718181730864154902");
  69. // RecallResult recallResult = recallService.recall(param);
  70. // System.out.println(JSONObject.toJSONString(recallResult));
  71. // }
  72. //
  73. // @Test
  74. // void exportData() {
  75. // Set<String> ghIds = new HashSet<>(Arrays.asList("gh_adca24a8f429", "gh_e0eb490115f5", "gh_51e4ad40466d", "gh_95ed5ecf9363"));
  76. // List<Article> articleList = articleRepository.getByGhIdInAndPublishTimestampGreaterThanAndTypeEquals(ghIds, 1722441600L, ArticleTypeEnum.qunfa.getVal());
  77. //
  78. // Map<String, Map<Integer, List<Article>>> map = articleList.stream()
  79. // .collect(Collectors.groupingBy(Article::getTitle, Collectors.groupingBy(Article::getItemIndex)));
  80. // Set<String> snList = articleList.stream().map(Article::getWxSn).collect(Collectors.toSet());
  81. // List<ArticleDetailInfo> articleDetailInfoList = articleDetailInfoRepository.getAllByWxSnIn(new ArrayList<>(snList));
  82. // Map<String, List<ArticleDetailInfo>> articleDetailInfoMap = articleDetailInfoList.stream()
  83. // .collect(Collectors.groupingBy(ArticleDetailInfo::getWxSn));
  84. //
  85. // List<AccountAvgInfo> accountAvgInfoList = accountAvgInfoRepository.getAllByGhIdInAndStatusEquals(ghIds, 1);
  86. // Map<String, Map<String, AccountAvgInfo>> accountAvgInfoIndexMap = accountAvgInfoList.stream().collect(
  87. // Collectors.groupingBy(AccountAvgInfo::getGhId, Collectors.toMap(AccountAvgInfo::getPosition, o -> o)));
  88. // JSONArray jsonArray = new JSONArray();
  89. // for (Article article : articleList) {
  90. // List<ArticleDetailInfo> articleDetailInfos = articleDetailInfoMap.get(article.getWxSn());
  91. // if (CollectionUtils.isEmpty(articleDetailInfos)) {
  92. // continue;
  93. // }
  94. // Date minDate = articleDetailInfos.stream().map(ArticleDetailInfo::getRecallDt).min(Date::compareTo).orElse(new Date());
  95. // int sumfirstLevel = 0;
  96. // int sumFission0 = 0;
  97. // int sumFission1 = 0;
  98. // int sumFission2 = 0;
  99. // for (ArticleDetailInfo articleDetailInfo : articleDetailInfos) {
  100. // if (articleDetailInfo.getRecallDt().equals(minDate)) {
  101. // sumfirstLevel += Optional.ofNullable(articleDetailInfo.getFirstLevel()).orElse(0);
  102. // sumFission0 += Optional.ofNullable(articleDetailInfo.getFission0()).orElse(0);
  103. // sumFission1 += Optional.ofNullable(articleDetailInfo.getFission1()).orElse(0);
  104. // sumFission2 += Optional.ofNullable(articleDetailInfo.getFission2()).orElse(0);
  105. // }
  106. // }
  107. // Map<String, AccountAvgInfo> accountAvgInfoMap = accountAvgInfoIndexMap.get(article.getGhId());
  108. // AccountAvgInfo avgInfo = accountAvgInfoMap.get(article.getItemIndex().toString());
  109. // SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd");
  110. // String date = sdf.format(new Date(article.getPublishTimestamp() * 1000));
  111. // JSONObject obj = new JSONObject();
  112. // obj.put("ghId", article.getGhId());
  113. // obj.put("accountName", article.getAccountName());
  114. // obj.put("title", article.getTitle());
  115. // obj.put("index", article.getItemIndex());
  116. // obj.put("viewCount", article.getShowViewCount());
  117. // obj.put("time", date);
  118. // if (Objects.nonNull(avgInfo)) {
  119. // obj.put("fans", avgInfo.getFans());
  120. // obj.put("avgViewCount", avgInfo.getReadAvg());
  121. // obj.put("viewCountRate", (article.getShowViewCount() * 1.0) / avgInfo.getReadAvg());
  122. // }
  123. // obj.put("firstLevel", sumfirstLevel);
  124. // obj.put("fission0", sumFission0);
  125. // obj.put("fission1", sumFission1);
  126. // obj.put("fission2", sumFission2);
  127. // jsonArray.add(obj);
  128. // }
  129. // System.out.println(jsonArray.toJSONString());
  130. // }
  131. //
  132. // @Test
  133. // void ii() throws IOException {
  134. // String dateStr = "20240911";
  135. // List<AccountAvgInfo> accountAvgInfoList = accountAvgInfoRepository.getAllByStatusEquals(1);
  136. // Map<String, String> accountMap = accountAvgInfoList.stream().collect(
  137. // Collectors.toMap(AccountAvgInfo::getAccountName, AccountAvgInfo::getGhId, (existing, replacement) -> replacement));
  138. // BufferedReader reader = new BufferedReader(new FileReader("/Users/wangyunpeng/Downloads/账号相关性.json"));
  139. // StringBuilder sb = new StringBuilder();
  140. // String line;
  141. // while ((line = reader.readLine()) != null) {
  142. // sb.append(line);
  143. // }
  144. // String jsonStr = sb.toString();
  145. // // 使用 ObjectMapper 解析 JSON
  146. // ObjectMapper objectMapper = new ObjectMapper();
  147. //
  148. // try {
  149. // // 将 JSON 转换为 Map<String, Map<String, Double>>
  150. // Map<String, Map<String, Double>> result = objectMapper.readValue(jsonStr,
  151. // new TypeReference<Map<String, Map<String, Double>>>() {
  152. // });
  153. //
  154. // // 输出转换结果
  155. // System.out.println(result);
  156. // List<AccountCorrelation> saveList = new ArrayList<>();
  157. // result.forEach((k, v) -> {
  158. // String ghId = accountMap.get(k);
  159. // v.forEach((k1, v1) -> {
  160. // String relGhId = accountMap.get(k1);
  161. // AccountCorrelation save = new AccountCorrelation();
  162. // save.setDateStr(dateStr);
  163. // save.setGhId(ghId);
  164. // save.setAccountName(k);
  165. // save.setRelGhId(relGhId);
  166. // save.setRelAccountName(k1);
  167. // save.setStatus(1);
  168. // save.setCorrelation(v1);
  169. // saveList.add(save);
  170. // });
  171. // });
  172. // List<AccountCorrelation> all = new ArrayList<>(saveList);
  173. // for (AccountCorrelation item : all) {
  174. // if (!item.getGhId().equals(item.getRelGhId())) {
  175. // AccountCorrelation save = new AccountCorrelation();
  176. // BeanUtils.copyProperties(item, save);
  177. // save.setGhId(item.getRelGhId());
  178. // save.setAccountName(item.getRelAccountName());
  179. // save.setRelGhId(item.getGhId());
  180. // save.setRelAccountName(item.getAccountName());
  181. // saveList.add(save);
  182. // }
  183. // }
  184. // crawlerBaseMapper.batchInsertAccountCorrelation(saveList);
  185. //
  186. //
  187. // } catch (IOException e) {
  188. // e.printStackTrace();
  189. // }
  190. // }
  191. @Test
  192. public void test() {
  193. List<String> morning = Lists.newArrayList("gh_084a485e859a", "gh_183d80deffb8", "gh_5ff48e9fb9ef", "gh_6d9f36e3a7be", "gh_9f8dc5b0c74e", "gh_e0eb490115f5", "gh_e24da99dc899");
  194. List<String> noon = Lists.newArrayList("gh_080bb43aa0dc", "gh_0c89e11f8bf3", "gh_192c9cf58b13", "gh_1b27dd1beeca", "gh_1d887d61088c", "gh_29074b51f2b7", "gh_3ed305b5817f", "gh_5ae65db96cb7", "gh_6b7c2a257263", "gh_6cfd1132df94", "gh_6d205db62f04", "gh_72bace6b3059", "gh_7e5818b2dd83", "gh_7f5075624a50", "gh_89ef4798d3ea", "gh_9877c8541764", "gh_9eef14ad6c16", "gh_a2901d34f75b", "gh_b15de7c99912", "gh_b676b7ad9b74", "gh_b6f2c5332c72", "gh_bfe5b705324a", "gh_bff0bcb0694a", "gh_c5cdf60d9ab4", "gh_c69776baf2cd", "gh_d49df5e974ca", "gh_d4dffc34ac39", "gh_dd4c857bbb36", "gh_ee78360d06f5", "gh_f25b5fb01977", "gh_f902cea89e48", "gh_ff487cb5dab3");
  195. String dateStr = "2024-09-12";
  196. List<Article> articleList = articleRepository.getByPublishTimestampGreaterThanAndTypeEquals(1725120000L, ArticleTypeEnum.QUNFA.getVal());
  197. articleList = articleList.stream().filter(o -> o.getItemIndex() == 1 && o.getPublishTimestamp() < 1726675200).collect(Collectors.toList());
  198. Map<String, List<Article>> map = articleList.stream().collect(Collectors.groupingBy(Article::getTitle));
  199. List<AccountAvgInfo> accountAvgInfoList = accountAvgInfoRepository.getAllByUpdateTime(dateStr);
  200. accountAvgInfoList = accountAvgInfoList.stream().filter(o -> o.getPosition().equals("1")).collect(Collectors.toList());
  201. Map<String, AccountAvgInfo> accountAvgInfoMap = accountAvgInfoList.stream().collect(Collectors.toMap(AccountAvgInfo::getGhId, o -> o));
  202. List<String> wxSnList = articleList.stream().map(Article::getWxSn).collect(Collectors.toList());
  203. List<ArticleDetailInfo> articleDetailInfoList = articleDetailInfoRepository.getAllByWxSnIn(wxSnList);
  204. Map<String, List<ArticleDetailInfo>> articleDetailInfoMap = articleDetailInfoList.stream()
  205. .collect(Collectors.groupingBy(ArticleDetailInfo::getWxSn));
  206. JSONArray result = new JSONArray();
  207. int sumFissionMorning = 0;
  208. int sumFissionMoon = 0;
  209. int sumReadMorning = 0;
  210. int sumReadNoon = 0;
  211. long sumFansMorning = 0;
  212. long sumFansMoon = 0;
  213. double readAvgMorning = 0.0;
  214. double readAvgMoon = 0.0;
  215. for (Article article : articleList) {
  216. List<ArticleDetailInfo> articleDetailInfos = articleDetailInfoMap.get(article.getWxSn());
  217. if (CollectionUtil.isEmpty(articleDetailInfos)) {
  218. continue;
  219. }
  220. Date minDate = articleDetailInfos.stream().map(ArticleDetailInfo::getRecallDt).min(Date::compareTo).orElse(new Date());
  221. AccountAvgInfo accountAvgInfo = accountAvgInfoMap.get(article.getGhId());
  222. for (ArticleDetailInfo articleDetailInfo : articleDetailInfos) {
  223. if (articleDetailInfo.getRecallDt().equals(minDate)) {
  224. if (morning.contains(article.getGhId())) {
  225. sumFissionMorning += Optional.ofNullable(articleDetailInfo.getFission0()).orElse(0);
  226. sumFissionMorning += Optional.ofNullable(articleDetailInfo.getFission1()).orElse(0);
  227. sumFissionMorning += Optional.ofNullable(articleDetailInfo.getFission2()).orElse(0);
  228. }
  229. if (noon.contains(article.getGhId())) {
  230. sumFissionMoon += Optional.ofNullable(articleDetailInfo.getFission0()).orElse(0);
  231. sumFissionMoon += Optional.ofNullable(articleDetailInfo.getFission1()).orElse(0);
  232. sumFissionMoon += Optional.ofNullable(articleDetailInfo.getFission2()).orElse(0);
  233. }
  234. }
  235. }
  236. if (Objects.nonNull(accountAvgInfo)) {
  237. if (morning.contains(article.getGhId())) {
  238. readAvgMorning += accountAvgInfo.getReadAvg();
  239. sumFansMorning += accountAvgInfo.getFans();
  240. sumReadMorning += article.getShowViewCount();
  241. }
  242. if (noon.contains(article.getGhId())) {
  243. readAvgMoon += accountAvgInfo.getReadAvg();
  244. sumFansMoon += accountAvgInfo.getFans();
  245. sumReadNoon += article.getShowViewCount();
  246. }
  247. }
  248. }
  249. JSONObject jsonObjectMorning = new JSONObject();
  250. jsonObjectMorning.put("时间", "早上");
  251. jsonObjectMorning.put("sumFission", sumFissionMorning);
  252. jsonObjectMorning.put("readAvg", readAvgMorning);
  253. jsonObjectMorning.put("rate", sumFissionMorning / readAvgMorning);
  254. jsonObjectMorning.put("sumRead", sumReadMorning);
  255. jsonObjectMorning.put("sumFans", sumFansMorning);
  256. jsonObjectMorning.put("阅读率", sumReadMorning / (double) sumFansMorning);
  257. result.add(jsonObjectMorning);
  258. JSONObject jsonObjectMoon = new JSONObject();
  259. jsonObjectMoon.put("时间", "中午");
  260. jsonObjectMoon.put("sumFission", sumFissionMoon);
  261. jsonObjectMoon.put("readAvg", readAvgMoon);
  262. jsonObjectMoon.put("rate", sumFissionMoon / readAvgMoon);
  263. jsonObjectMoon.put("sumRead", sumReadNoon);
  264. jsonObjectMoon.put("sumFans", sumFansMoon);
  265. jsonObjectMoon.put("阅读率", sumReadNoon / (double) sumFansMoon);
  266. result.add(jsonObjectMoon);
  267. System.out.println(JSONObject.toJSONString(result));
  268. }
  269. @Test
  270. public void exportScoreData() {
  271. List<String> strategies = Arrays.asList("ArticleRankV11", "ArticleRankV12");
  272. List<PublishSortLog> sortLogList = publishSortLogRepository.findByStrategyInAndDateStrGreaterThanEqual(strategies, "20240928");
  273. sortLogList = sortLogList.stream().filter(o -> o.getIndex() == 1).collect(Collectors.toList());
  274. sortLogList.sort(Comparator.comparing(PublishSortLog::getGhId).thenComparing(PublishSortLog::getDateStr));
  275. List<String> ghIds = sortLogList.stream().map(PublishSortLog::getGhId).distinct().collect(Collectors.toList());
  276. List<Article> articleList = articleRepository.getByGhIdInAndPublishTimestampGreaterThanAndTypeEquals(ghIds, 1727452800L, ArticleTypeEnum.QUNFA.getVal());
  277. articleList = articleList.stream().filter(o -> o.getItemIndex() == 1).collect(Collectors.toList());
  278. Map<String, Map<String, Article>> articleMap = articleList.stream().collect(Collectors.groupingBy(Article::getGhId, Collectors.toMap(
  279. o -> DateUtils.timestampToYMDStr(o.getPublishTimestamp(),"yyyyMMdd"), o -> o,
  280. (existing, replacement) -> replacement)));
  281. List<AccountAvgInfo> accountAvgInfoList = accountAvgInfoRepository.getAllByGhIdIn(new HashSet<>(ghIds));
  282. Map<String, Map<String, AccountAvgInfo>> accountAvgInfoMap = accountAvgInfoList.stream()
  283. .filter(o -> Objects.equals(o.getPosition(), "1")).collect(Collectors.groupingBy(AccountAvgInfo::getGhId,
  284. Collectors.toMap(AccountAvgInfo::getUpdateTime, o -> o)));
  285. Workbook workbook = new XSSFWorkbook();
  286. Sheet sheet = workbook.createSheet("ExampleSheet");
  287. int rowNum = 0;
  288. // 创建标题行
  289. Row titleRow = sheet.createRow(rowNum);
  290. Cell titleCell = titleRow.createCell(0);
  291. titleCell.setCellValue("日期");
  292. titleCell = titleRow.createCell(1);
  293. titleCell.setCellValue("ghID");
  294. titleCell = titleRow.createCell(2);
  295. titleCell.setCellValue("账号名称");
  296. titleCell = titleRow.createCell(3);
  297. titleCell.setCellValue("标题");
  298. titleCell = titleRow.createCell(4);
  299. titleCell.setCellValue("策略");
  300. titleCell = titleRow.createCell(5);
  301. titleCell.setCellValue("得分");
  302. titleCell = titleRow.createCell(6);
  303. titleCell.setCellValue("HisFissionFansRateRateStrategy");
  304. titleCell = titleRow.createCell(7);
  305. titleCell.setCellValue("HisFissionAvgReadRateRateStrategy");
  306. titleCell = titleRow.createCell(8);
  307. titleCell.setCellValue("PublishTimesStrategy");
  308. titleCell = titleRow.createCell(9);
  309. titleCell.setCellValue("ViewCountRateCorrelationStrategy");
  310. titleCell = titleRow.createCell(10);
  311. titleCell.setCellValue("HisFissionAvgReadSumRateStrategy");
  312. titleCell = titleRow.createCell(11);
  313. titleCell.setCellValue("HisFissionAvgReadRateCorrelationRateStrategy");
  314. titleCell = titleRow.createCell(12);
  315. titleCell.setCellValue("HisFissionFansSumRateStrategy");
  316. titleCell = titleRow.createCell(13);
  317. titleCell.setCellValue("SimilarityStrategy");
  318. titleCell = titleRow.createCell(14);
  319. titleCell.setCellValue("ViewCountStrategy");
  320. titleCell = titleRow.createCell(15);
  321. titleCell.setCellValue("ViewCountRateStrategy");
  322. titleCell = titleRow.createCell(16);
  323. titleCell.setCellValue("HisFissionDeWeightAvgReadSumRateStrategy");
  324. titleCell = titleRow.createCell(17);
  325. titleCell.setCellValue("阅读量");
  326. titleCell = titleRow.createCell(18);
  327. titleCell.setCellValue("阅读均值");
  328. titleCell = titleRow.createCell(19);
  329. titleCell.setCellValue("阅读均值倍数");
  330. // 填充数据
  331. String title = "";
  332. for (PublishSortLog publishSortLog : sortLogList) {
  333. Map<String, Article> dateArticleMap = articleMap.get(publishSortLog.getGhId());
  334. Article article = dateArticleMap.get(publishSortLog.getDateStr());
  335. if (Objects.isNull(article) || !publishSortLog.getTitle().equals(article.getTitle())) {
  336. continue;
  337. }
  338. if (publishSortLog.getTitle().equals(title)) {
  339. continue;
  340. }
  341. title = publishSortLog.getTitle();
  342. rowNum++;
  343. Row row = sheet.createRow(rowNum);
  344. Cell cell = row.createCell(0);
  345. cell.setCellValue(publishSortLog.getDateStr());
  346. cell = row.createCell(1);
  347. cell.setCellValue(publishSortLog.getGhId());
  348. cell = row.createCell(2);
  349. cell.setCellValue(publishSortLog.getAccountName());
  350. cell = row.createCell(3);
  351. cell.setCellValue(publishSortLog.getTitle());
  352. cell = row.createCell(4);
  353. cell.setCellValue(publishSortLog.getStrategy());
  354. cell = row.createCell(5);
  355. cell.setCellValue(publishSortLog.getScore());
  356. cell = row.createCell(6);
  357. JSONObject scoreMap = JSONObject.parseObject(publishSortLog.getScoreMap());
  358. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionFansRateRateStrategy")).orElse(0.0)));
  359. cell = row.createCell(7);
  360. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadRateRateStrategy")).orElse(0.0)));
  361. cell = row.createCell(8);
  362. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("PublishTimesStrategy")).orElse(0.0)));
  363. cell = row.createCell(9);
  364. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountRateCorrelationStrategy")).orElse(0.0)));
  365. cell = row.createCell(10);
  366. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadSumRateStrategy")).orElse(0.0)));
  367. cell = row.createCell(11);
  368. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadRateCorrelationRateStrategy")).orElse(0.0)));
  369. cell = row.createCell(12);
  370. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionFansSumRateStrategy")).orElse(0.0)));
  371. cell = row.createCell(13);
  372. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("SimilarityStrategy")).orElse(0.0)));
  373. cell = row.createCell(14);
  374. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountStrategy")).orElse(0.0)));
  375. cell = row.createCell(15);
  376. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountRateStrategy")).orElse(0.0)));
  377. cell = row.createCell(16);
  378. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionDeWeightAvgReadSumRateStrategy")).orElse(0.0)));
  379. cell = row.createCell(17);
  380. cell.setCellValue(article.getShowViewCount());
  381. cell = row.createCell(18);
  382. Map<String, AccountAvgInfo> map = accountAvgInfoMap.get(article.getGhId());
  383. if (Objects.nonNull(map)) {
  384. List<String> avgMapDateList = new ArrayList<>(map.keySet());
  385. String publishDate = DateUtils.findNearestDate(avgMapDateList,
  386. DateUtils.timestampToYMDStr(article.getPublishTimestamp(), "yyyy-MM-dd"), "yyyy-MM-dd");
  387. AccountAvgInfo accountAvgInfo = map.get(publishDate);
  388. if (Objects.nonNull(accountAvgInfo)) {
  389. cell.setCellValue(accountAvgInfo.getReadAvg());
  390. cell = row.createCell(19);
  391. cell.setCellValue(String.format("%.3f", article.getShowViewCount() / (double) accountAvgInfo.getReadAvg()));
  392. }
  393. }
  394. }
  395. try (FileOutputStream outputStream = new FileOutputStream("/Users/wangyunpeng/Downloads/example.xlsx")) {
  396. workbook.write(outputStream);
  397. } catch (IOException e) {
  398. e.printStackTrace();
  399. } finally {
  400. try {
  401. workbook.close();
  402. } catch (IOException e) {
  403. e.printStackTrace();
  404. }
  405. }
  406. }
  407. @Test
  408. public void exportFromAliyunLog() {
  409. String folderPath = "/Users/wangyunpeng/Downloads/longarticle-recommend-server-test_info-log_20241014_150245.json";
  410. File file = new File(folderPath);
  411. Workbook workbook = new XSSFWorkbook();
  412. Sheet sheet = workbook.createSheet("ExampleSheet");
  413. int rowNum = 0;
  414. // 创建标题行
  415. Row titleRow = sheet.createRow(rowNum);
  416. Cell titleCell = titleRow.createCell(0);
  417. titleCell.setCellValue("日期");
  418. titleCell = titleRow.createCell(1);
  419. titleCell.setCellValue("账号名称");
  420. titleCell = titleRow.createCell(2);
  421. titleCell.setCellValue("id");
  422. titleCell = titleRow.createCell(3);
  423. titleCell.setCellValue("标题");
  424. titleCell = titleRow.createCell(4);
  425. titleCell.setCellValue("策略");
  426. titleCell = titleRow.createCell(5);
  427. titleCell.setCellValue("得分");
  428. titleCell = titleRow.createCell(6);
  429. titleCell.setCellValue("HisFissionFansRateRateStrategy");
  430. titleCell = titleRow.createCell(7);
  431. titleCell.setCellValue("HisFissionAvgReadRateRateStrategy");
  432. titleCell = titleRow.createCell(8);
  433. titleCell.setCellValue("PublishTimesStrategy");
  434. titleCell = titleRow.createCell(9);
  435. titleCell.setCellValue("ViewCountRateCorrelationStrategy");
  436. titleCell = titleRow.createCell(10);
  437. titleCell.setCellValue("HisFissionAvgReadSumRateStrategy");
  438. titleCell = titleRow.createCell(11);
  439. titleCell.setCellValue("HisFissionAvgReadRateCorrelationRateStrategy");
  440. titleCell = titleRow.createCell(12);
  441. titleCell.setCellValue("HisFissionFansSumRateStrategy");
  442. titleCell = titleRow.createCell(13);
  443. titleCell.setCellValue("SimilarityStrategy");
  444. titleCell = titleRow.createCell(14);
  445. titleCell.setCellValue("ViewCountStrategy");
  446. titleCell = titleRow.createCell(15);
  447. titleCell.setCellValue("ViewCountRateStrategy");
  448. titleCell = titleRow.createCell(16);
  449. titleCell.setCellValue("HisFissionDeWeightAvgReadSumRateStrategy");
  450. try {
  451. String content = new String(Files.readAllBytes(file.toPath()), StandardCharsets.UTF_8);
  452. JSONArray jsonArray = JSONArray.parseArray(content);
  453. for (Object o : jsonArray) {
  454. JSONObject jsonObject = (JSONObject) o;
  455. Long time = jsonObject.getLong("__time__");
  456. String message = jsonObject.getString("message");
  457. int index = message.indexOf("[");
  458. String info = message.substring(0, index);
  459. String strategy = info.substring(0, info.indexOf(" "));
  460. String accountName = info.substring(info.indexOf(" ")).replace("账号名称 ", "")
  461. .replace(" 头条评分结果", "");
  462. String json = message.substring(index);
  463. JSONArray scoreArray = JSONArray.parseArray(json);
  464. for (Object scoreJSON : scoreArray) {
  465. JSONObject scoreObject = (JSONObject) scoreJSON;
  466. String id = scoreObject.getString("id");
  467. String title = scoreObject.getString("title");
  468. String score = scoreObject.getString("score");
  469. String scoreMapStr = scoreObject.getString("scoreMap");
  470. rowNum++;
  471. Row row = sheet.createRow(rowNum);
  472. Cell cell = row.createCell(0);
  473. cell.setCellValue(DateUtils.timestampToYMDStr(time, "yyyyMMdd"));
  474. cell = row.createCell(1);
  475. cell.setCellValue(accountName);
  476. cell = row.createCell(2);
  477. cell.setCellValue(id);
  478. cell = row.createCell(3);
  479. cell.setCellValue(title);
  480. cell = row.createCell(4);
  481. cell.setCellValue(strategy);
  482. cell = row.createCell(5);
  483. cell.setCellValue(score);
  484. cell = row.createCell(6);
  485. JSONObject scoreMap = JSONObject.parseObject(scoreMapStr);
  486. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionFansRateRateStrategy")).orElse(0.0)));
  487. cell = row.createCell(7);
  488. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadRateRateStrategy")).orElse(0.0)));
  489. cell = row.createCell(8);
  490. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("PublishTimesStrategy")).orElse(0.0)));
  491. cell = row.createCell(9);
  492. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountRateCorrelationStrategy")).orElse(0.0)));
  493. cell = row.createCell(10);
  494. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadSumRateStrategy")).orElse(0.0)));
  495. cell = row.createCell(11);
  496. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadRateCorrelationRateStrategy")).orElse(0.0)));
  497. cell = row.createCell(12);
  498. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionFansSumRateStrategy")).orElse(0.0)));
  499. cell = row.createCell(13);
  500. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("SimilarityStrategy")).orElse(0.0)));
  501. cell = row.createCell(14);
  502. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountStrategy")).orElse(0.0)));
  503. cell = row.createCell(15);
  504. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountRateStrategy")).orElse(0.0)));
  505. cell = row.createCell(16);
  506. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionDeWeightAvgReadSumRateStrategy")).orElse(0.0)));
  507. }
  508. }
  509. try (FileOutputStream outputStream = new FileOutputStream("/Users/wangyunpeng/Downloads/example.xlsx")) {
  510. workbook.write(outputStream);
  511. } catch (IOException e) {
  512. e.printStackTrace();
  513. } finally {
  514. try {
  515. workbook.close();
  516. } catch (IOException e) {
  517. e.printStackTrace();
  518. }
  519. }
  520. } catch (Exception e) {
  521. log.error("readFileError fileName:{}", file.getName(), e);
  522. }
  523. }
  524. @Test
  525. public void account() {
  526. List<String> ghIds = Arrays.asList("gh_d7fa1998b4e1", "gh_52100b6803fb", "gh_8d7fc54d5026");
  527. List<String> accountNames = Arrays.asList("生活超读", "灵读生活", "生活情感读");
  528. List<Integer> fans = Arrays.asList(85759, 103083, 79214);
  529. List<Article> articleList = articleRepository.getByGhIdInAndPublishTimestampLessThanAndTypeEquals(
  530. Arrays.asList("gh_02f5bca5b5d9"), 1729353600L, ArticleTypeEnum.QUNFA.getVal());
  531. for (int i = 0; i < ghIds.size(); i++) {
  532. String ghId = ghIds.get(i);
  533. String accountName = accountNames.get(i);
  534. Integer fanCount = fans.get(i);
  535. Double rate = fanCount / 233474.0;
  536. for (Article article : articleList) {
  537. Article saveItem = new Article();
  538. BeanUtils.copyProperties(article, saveItem);
  539. saveItem.setGhId(ghId);
  540. saveItem.setAccountName(accountName);
  541. saveItem.setShowViewCount((int) (article.getShowViewCount() * rate));
  542. saveItem.setWxSn(UUID.randomUUID().toString().replace("-", ""));
  543. articleRepository.save(saveItem);
  544. }
  545. }
  546. }
  547. @Test
  548. public void correlation() {
  549. List<String> ghIds = Lists.newArrayList("gh_e24da99dc899",
  550. "gh_183d80deffb8",
  551. "gh_be8c29139989",
  552. "gh_c69776baf2cd",
  553. "gh_b15de7c99912",
  554. "gh_1d887d61088c",
  555. "gh_3ed305b5817f",
  556. "gh_3e91f0624545",
  557. "gh_30816d8adb52",
  558. "gh_970460d9ccec",
  559. "gh_749271f1ccd5",
  560. "gh_ac43e43b253b"
  561. );
  562. List<PublishSortLog> sortLogList = publishSortLogRepository.findByGhIdInAndDateStrGreaterThanEqual(ghIds, "20240907");
  563. sortLogList = sortLogList.stream().filter(o -> o.getIndex() == 1).collect(Collectors.toList());
  564. sortLogList.sort(Comparator.comparing(PublishSortLog::getGhId).thenComparing(PublishSortLog::getDateStr));
  565. List<Article> articleList = articleRepository.getByGhIdInAndUpdateTimeGreaterThanAndTypeEquals(ghIds, 1725638400L, "9");
  566. articleList = articleList.stream().filter(o -> o.getItemIndex() == 1).collect(Collectors.toList());
  567. Map<String, Map<String, Article>> articleMap = articleList.stream().collect(Collectors.groupingBy(Article::getGhId, Collectors.toMap(
  568. o -> DateUtils.timestampToYMDStr(o.getUpdateTime(), "yyyyMMdd"), o -> o,
  569. (existing, replacement) -> replacement)));
  570. List<AccountAvgInfo> accountAvgInfoList = accountAvgInfoRepository.getAllByGhIdIn(new HashSet<>(ghIds));
  571. Map<String, Map<String, AccountAvgInfo>> accountAvgInfoMap = accountAvgInfoList.stream()
  572. .filter(o -> Objects.equals(o.getPosition(), "1")).collect(Collectors.groupingBy(AccountAvgInfo::getGhId,
  573. Collectors.toMap(AccountAvgInfo::getUpdateTime, o -> o)));
  574. int rowNum = 0;
  575. Map<String, List<PublishSortLog>> sortLogMap = sortLogList.stream().collect(Collectors.groupingBy(PublishSortLog::getGhId));
  576. PearsonsCorrelation correlation = new PearsonsCorrelation();
  577. Workbook workbook = new XSSFWorkbook();
  578. Sheet sheet = workbook.createSheet("ExampleSheet");
  579. // 创建标题行
  580. Row titleRow = sheet.createRow(rowNum);
  581. for (Map.Entry<String, List<PublishSortLog>> entry : sortLogMap.entrySet()) {
  582. String ghId = entry.getKey();
  583. String name = entry.getValue().get(0).getAccountName();
  584. List<PublishSortLog> itemList = entry.getValue();
  585. String title = "";
  586. double[] scoreArr = new double[itemList.size()];
  587. double[] HisFissionFansRateRateStrategyArr = new double[itemList.size()];
  588. double[] HisFissionAvgReadRateRateStrategyArr = new double[itemList.size()];
  589. double[] PublishTimesStrategyArr = new double[itemList.size()];
  590. double[] ViewCountRateCorrelationStrategyArr = new double[itemList.size()];
  591. double[] HisFissionAvgReadSumRateStrategyArr = new double[itemList.size()];
  592. double[] HisFissionAvgReadRateCorrelationRateStrategyArr = new double[itemList.size()];
  593. double[] HisFissionFansSumRateStrategyArr = new double[itemList.size()];
  594. double[] SimilarityStrategyArr = new double[itemList.size()];
  595. double[] ViewCountStrategyArr = new double[itemList.size()];
  596. double[] ViewCountRateStrategyArr = new double[itemList.size()];
  597. double[] HisFissionDeWeightAvgReadSumRateStrategyArr = new double[itemList.size()];
  598. double[] scoreRateArr = new double[itemList.size()];
  599. for (int i = 0; i < itemList.size(); i++) {
  600. PublishSortLog publishSortLog = itemList.get(i);
  601. Map<String, Article> dateArticleMap = articleMap.get(publishSortLog.getGhId());
  602. Article article = dateArticleMap.get(publishSortLog.getDateStr());
  603. if (Objects.isNull(article) || !publishSortLog.getTitle().equals(article.getTitle())) {
  604. continue;
  605. }
  606. if (publishSortLog.getTitle().equals(title)) {
  607. continue;
  608. }
  609. title = publishSortLog.getTitle();
  610. scoreArr[i] = Double.parseDouble(publishSortLog.getScore());
  611. JSONObject scoreMap = JSONObject.parseObject(publishSortLog.getScoreMap());
  612. HisFissionFansRateRateStrategyArr[i] = Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionFansRateRateStrategy")).orElse(0.0)));
  613. HisFissionAvgReadRateRateStrategyArr[i] = Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadRateRateStrategy")).orElse(0.0)));
  614. PublishTimesStrategyArr[i] = Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("PublishTimesStrategy")).orElse(0.0)));
  615. ViewCountRateCorrelationStrategyArr[i] = Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountRateCorrelationStrategy")).orElse(0.0)));
  616. HisFissionAvgReadSumRateStrategyArr[i] = Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadSumRateStrategy")).orElse(0.0)));
  617. HisFissionAvgReadRateCorrelationRateStrategyArr[i] = Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadRateCorrelationRateStrategy")).orElse(0.0)));
  618. HisFissionFansSumRateStrategyArr[i] = Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionFansSumRateStrategy")).orElse(0.0)));
  619. SimilarityStrategyArr[i] = Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("SimilarityStrategy")).orElse(0.0)));
  620. ViewCountStrategyArr[i] = Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountStrategy")).orElse(0.0)));
  621. ViewCountRateStrategyArr[i] = Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountRateStrategy")).orElse(0.0)));
  622. HisFissionDeWeightAvgReadSumRateStrategyArr[i] = Double.parseDouble(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionDeWeightAvgReadSumRateStrategy")).orElse(0.0)));
  623. Map<String, AccountAvgInfo> map = accountAvgInfoMap.get(article.getGhId());
  624. if (Objects.nonNull(map)) {
  625. List<String> avgMapDateList = new ArrayList<>(map.keySet());
  626. String publishDate = DateUtils.findNearestDate(avgMapDateList,
  627. DateUtils.timestampToYMDStr(article.getUpdateTime(), "yyyy-MM-dd"), "yyyy-MM-dd");
  628. AccountAvgInfo accountAvgInfo = map.get(publishDate);
  629. if (Objects.nonNull(accountAvgInfo)) {
  630. scoreRateArr[i] = Double.parseDouble(String.format("%.3f", article.getShowViewCount() / (double) accountAvgInfo.getReadAvg()));
  631. }
  632. }
  633. }
  634. rowNum++;
  635. Row row = sheet.createRow(rowNum);
  636. Cell cell = row.createCell(0);
  637. cell = row.createCell(1);
  638. cell.setCellValue(ghId);
  639. cell = row.createCell(2);
  640. cell.setCellValue(name);
  641. cell = row.createCell(3);
  642. cell = row.createCell(4);
  643. cell = row.createCell(5);
  644. cell.setCellValue(correlation.correlation(scoreArr, scoreRateArr));
  645. cell = row.createCell(6);
  646. cell.setCellValue(correlation.correlation(HisFissionFansRateRateStrategyArr, scoreRateArr));
  647. cell = row.createCell(7);
  648. cell.setCellValue(correlation.correlation(HisFissionAvgReadRateRateStrategyArr, scoreRateArr));
  649. cell = row.createCell(8);
  650. cell.setCellValue(correlation.correlation(PublishTimesStrategyArr, scoreRateArr));
  651. cell = row.createCell(9);
  652. cell.setCellValue(correlation.correlation(ViewCountRateCorrelationStrategyArr, scoreRateArr));
  653. cell = row.createCell(10);
  654. cell.setCellValue(correlation.correlation(HisFissionAvgReadSumRateStrategyArr, scoreRateArr));
  655. cell = row.createCell(11);
  656. cell.setCellValue(correlation.correlation(HisFissionAvgReadRateCorrelationRateStrategyArr, scoreRateArr));
  657. cell = row.createCell(12);
  658. cell.setCellValue(correlation.correlation(HisFissionFansSumRateStrategyArr, scoreRateArr));
  659. cell = row.createCell(13);
  660. cell.setCellValue(correlation.correlation(SimilarityStrategyArr, scoreRateArr));
  661. cell = row.createCell(14);
  662. cell.setCellValue(correlation.correlation(ViewCountStrategyArr, scoreRateArr));
  663. cell = row.createCell(15);
  664. cell.setCellValue(correlation.correlation(ViewCountRateStrategyArr, scoreRateArr));
  665. cell = row.createCell(16);
  666. cell.setCellValue(correlation.correlation(HisFissionDeWeightAvgReadSumRateStrategyArr, scoreRateArr));
  667. }
  668. try (FileOutputStream outputStream = new FileOutputStream("/Users/wangyunpeng/Downloads/example.xlsx")) {
  669. workbook.write(outputStream);
  670. } catch (IOException e) {
  671. e.printStackTrace();
  672. } finally {
  673. try {
  674. workbook.close();
  675. } catch (IOException e) {
  676. e.printStackTrace();
  677. }
  678. }
  679. }
  680. @Test
  681. void getScoreFromLogFile() {
  682. String folderPath = "/Users/wangyunpeng/Downloads/b78020b8-d9df-466f-bd01-cd982bb986d0.json";
  683. File file = new File(folderPath);
  684. Workbook workbook = new XSSFWorkbook();
  685. Sheet sheet = workbook.createSheet("ExampleSheet");
  686. int rowNum = 0;
  687. // 创建标题行
  688. Row titleRow = sheet.createRow(rowNum);
  689. Cell titleCell = titleRow.createCell(0);
  690. titleCell.setCellValue("日期");
  691. titleCell = titleRow.createCell(1);
  692. titleCell.setCellValue("账号名称");
  693. titleCell = titleRow.createCell(2);
  694. titleCell.setCellValue("id");
  695. titleCell = titleRow.createCell(3);
  696. titleCell.setCellValue("标题");
  697. titleCell = titleRow.createCell(4);
  698. titleCell.setCellValue("策略");
  699. titleCell = titleRow.createCell(5);
  700. titleCell.setCellValue("得分");
  701. titleCell = titleRow.createCell(6);
  702. titleCell.setCellValue("HisFissionFansRateRateStrategy");
  703. titleCell = titleRow.createCell(7);
  704. titleCell.setCellValue("HisFissionAvgReadRateRateStrategy");
  705. titleCell = titleRow.createCell(8);
  706. titleCell.setCellValue("PublishTimesStrategy");
  707. titleCell = titleRow.createCell(9);
  708. titleCell.setCellValue("ViewCountRateCorrelationStrategy");
  709. titleCell = titleRow.createCell(10);
  710. titleCell.setCellValue("HisFissionAvgReadSumRateStrategy");
  711. titleCell = titleRow.createCell(11);
  712. titleCell.setCellValue("HisFissionAvgReadRateCorrelationRateStrategy");
  713. titleCell = titleRow.createCell(12);
  714. titleCell.setCellValue("HisFissionFansSumRateStrategy");
  715. titleCell = titleRow.createCell(13);
  716. titleCell.setCellValue("SimilarityStrategy");
  717. titleCell = titleRow.createCell(14);
  718. titleCell.setCellValue("ViewCountStrategy");
  719. titleCell = titleRow.createCell(15);
  720. titleCell.setCellValue("ViewCountRateStrategy");
  721. titleCell = titleRow.createCell(16);
  722. titleCell.setCellValue("HisFissionDeWeightAvgReadSumRateStrategy");
  723. try {
  724. String content = new String(Files.readAllBytes(file.toPath()), StandardCharsets.UTF_8);
  725. JSONArray jsonArray = JSONArray.parseArray(content);
  726. for (Object o : jsonArray) {
  727. JSONObject jsonObject = (JSONObject) o;
  728. Long time = jsonObject.getLong("__time__");
  729. String message = jsonObject.getString("message");
  730. int index = message.indexOf("[");
  731. String info = message.substring(0, index);
  732. String strategy = info.substring(0, info.indexOf(" "));
  733. String accountName = info.substring(info.indexOf(" ")).replace("账号名称 ", "")
  734. .replace(" 头条评分结果", "");
  735. String json = message.substring(index);
  736. JSONArray scoreArray = JSONArray.parseArray(json);
  737. for (Object scoreJSON : scoreArray) {
  738. JSONObject scoreObject = (JSONObject) scoreJSON;
  739. String id = scoreObject.getString("id");
  740. String title = scoreObject.getString("title");
  741. String score = scoreObject.getString("score");
  742. String scoreMapStr = scoreObject.getString("scoreMap");
  743. rowNum++;
  744. Row row = sheet.createRow(rowNum);
  745. Cell cell = row.createCell(0);
  746. cell.setCellValue(DateUtils.timestampToYMDStr(time, "yyyyMMdd"));
  747. cell = row.createCell(1);
  748. cell.setCellValue(accountName);
  749. cell = row.createCell(2);
  750. cell.setCellValue(id);
  751. cell = row.createCell(3);
  752. cell.setCellValue(title);
  753. cell = row.createCell(4);
  754. cell.setCellValue(strategy);
  755. cell = row.createCell(5);
  756. cell.setCellValue(score);
  757. cell = row.createCell(6);
  758. JSONObject scoreMap = JSONObject.parseObject(scoreMapStr);
  759. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionFansRateRateStrategy")).orElse(0.0)));
  760. cell = row.createCell(7);
  761. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadRateRateStrategy")).orElse(0.0)));
  762. cell = row.createCell(8);
  763. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("PublishTimesStrategy")).orElse(0.0)));
  764. cell = row.createCell(9);
  765. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountRateCorrelationStrategy")).orElse(0.0)));
  766. cell = row.createCell(10);
  767. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadSumRateStrategy")).orElse(0.0)));
  768. cell = row.createCell(11);
  769. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionAvgReadRateCorrelationRateStrategy")).orElse(0.0)));
  770. cell = row.createCell(12);
  771. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionFansSumRateStrategy")).orElse(0.0)));
  772. cell = row.createCell(13);
  773. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("SimilarityStrategy")).orElse(0.0)));
  774. cell = row.createCell(14);
  775. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountStrategy")).orElse(0.0)));
  776. cell = row.createCell(15);
  777. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("ViewCountRateStrategy")).orElse(0.0)));
  778. cell = row.createCell(16);
  779. cell.setCellValue(String.format("%.3f", Optional.of(scoreMap.getDoubleValue("HisFissionDeWeightAvgReadSumRateStrategy")).orElse(0.0)));
  780. }
  781. }
  782. try (FileOutputStream outputStream = new FileOutputStream("/Users/wangyunpeng/Downloads/example.xlsx")) {
  783. workbook.write(outputStream);
  784. } catch (IOException e) {
  785. e.printStackTrace();
  786. } finally {
  787. try {
  788. workbook.close();
  789. } catch (IOException e) {
  790. e.printStackTrace();
  791. }
  792. }
  793. } catch (Exception e) {
  794. log.error("readFileError fileName:{}", file.getName(), e);
  795. }
  796. }
  797. @Test
  798. public void checkTest() {
  799. List<Article> articleList = articleRepository.getByPublishTimestampGreaterThan(1732982400L);
  800. List<Article> singleArticleList = new ArrayList<>();
  801. for (Article article : articleList) {
  802. if (Objects.nonNull(article.getRootSourceIdList())) {
  803. try {
  804. List<String> rootSourceIdList = JSONArray.parseArray(article.getRootSourceIdList(), String.class);
  805. if (rootSourceIdList.size() == 1) {
  806. singleArticleList.add(article);
  807. }
  808. } catch (Exception ignore) {
  809. }
  810. }
  811. }
  812. List<String> ghIds = singleArticleList.stream().map(Article::getGhId).distinct().collect(Collectors.toList());
  813. List<PublishAccount> publishAccountList = publishAccountRepository.getAllByGhIdIn(ghIds);
  814. log.info("newSortStrategyData publishAccountList finish");
  815. Map<String, PublishAccount> publishAccountMap = publishAccountList.stream().collect(Collectors.toMap(PublishAccount::getGhId, o -> o));
  816. // 获取发布内容
  817. List<PublishContentParam> publishContentParamList = singleArticleList.stream().map(article -> {
  818. PublishContentParam item = new PublishContentParam();
  819. item.setTitle(article.getTitle());
  820. PublishAccount account = publishAccountMap.get(article.getGhId());
  821. if (Objects.nonNull(account)) {
  822. item.setPublishAccountId(account.getId());
  823. return item;
  824. }
  825. return null;
  826. }).filter(Objects::nonNull).collect(Collectors.toList());
  827. List<PublishContentDTO> publishContents = new ArrayList<>();
  828. for (List<PublishContentParam> partitions : Lists.partition(publishContentParamList, 100)) {
  829. publishContents.addAll(aigcBaseMapper.getPublishContentByTitle(partitions));
  830. }
  831. List<String> publishContentIds = publishContents.stream().map(PublishContentDTO::getId).collect(Collectors.toList());
  832. for (List<String> partition : Lists.partition(publishContentIds, 500)) {
  833. aigcBaseMapper.updatePublishContentSingleMiniProgram(partition);
  834. }
  835. }
  836. }