|
@@ -3,6 +3,7 @@ package com.tzld.longarticle.recommend.server.service;
|
|
|
import cn.hutool.core.collection.CollectionUtil;
|
|
|
import com.alibaba.fastjson.JSONArray;
|
|
|
import com.alibaba.fastjson.JSONObject;
|
|
|
+import com.google.common.collect.Lists;
|
|
|
import com.tzld.longarticle.recommend.server.mapper.crawler.CrawlerBaseMapper;
|
|
|
import com.tzld.longarticle.recommend.server.mapper.longArticle.LongArticleBaseMapper;
|
|
|
import com.tzld.longarticle.recommend.server.model.dto.*;
|
|
@@ -59,37 +60,43 @@ public class DataFlushService {
|
|
|
}
|
|
|
|
|
|
public void flushLongArticlesCrawlerVideos(Integer pageNum) {
|
|
|
- int pageSize = 1000;
|
|
|
- if (pageNum == null) {
|
|
|
- pageNum = 1;
|
|
|
- }
|
|
|
- int count = crawlerBaseMapper.countArticleMatchVideos();
|
|
|
- int totalPage = count / pageSize + 1;
|
|
|
- while (pageNum <= totalPage) {
|
|
|
- int offset = (pageNum - 1) * pageSize;
|
|
|
- List<ArticleMatchVideos> list = crawlerBaseMapper.pageArticleMatchVideos(offset, pageSize);
|
|
|
- List<LongArticlesCrawlerVideos> batchSaveList = new ArrayList<>();
|
|
|
- for (ArticleMatchVideos video : list) {
|
|
|
- if (!StringUtils.hasText(video.getVideoPath())) {
|
|
|
- continue;
|
|
|
+ List<ArticleMatchVideos> list = crawlerBaseMapper.pageArticleMatchVideos();
|
|
|
+ list = list.stream().filter(o ->StringUtils.hasText(o.getVideoPath())).collect(Collectors.toList());
|
|
|
+ Map<String, List<ArticleMatchVideos>> map = list.stream().collect(Collectors.groupingBy(ArticleMatchVideos::getContentId));
|
|
|
+ List<LongArticlesCrawlerVideos> batchSaveList = new ArrayList<>();
|
|
|
+ for (ArticleMatchVideos video : list) {
|
|
|
+ List<ArticleMatchVideos> mapList = map.get(video.getContentId());
|
|
|
+ List<Date> orderDate = mapList.stream().map(ArticleMatchVideos::getUpdateTime)
|
|
|
+ .sorted().collect(Collectors.toList());
|
|
|
+ double score = 0.2;
|
|
|
+ for (int i = 0; i < orderDate.size(); i++) {
|
|
|
+ if (orderDate.get(i).equals(video.getUpdateTime())) {
|
|
|
+ if (i == 0) {
|
|
|
+ score = 1;
|
|
|
+ } else if (i == 1) {
|
|
|
+ score = 0.5;
|
|
|
+ } else {
|
|
|
+ break;
|
|
|
+ }
|
|
|
}
|
|
|
- LongArticlesCrawlerVideos saveItem = new LongArticlesCrawlerVideos();
|
|
|
- saveItem.setContentId(video.getContentId());
|
|
|
- saveItem.setPlatform(video.getPlatform());
|
|
|
- saveItem.setVideoTitle(video.getVideoTitle());
|
|
|
- saveItem.setCrawlerTime(video.getUpdateTime());
|
|
|
- saveItem.setVideoOssPath(video.getVideoPath());
|
|
|
- saveItem.setCoverOssPath(video.getCoverPath());
|
|
|
- saveItem.setUserId(video.getUid());
|
|
|
- saveItem.setTraceId(video.getTraceId());
|
|
|
- saveItem.setDownloadStatus(2);
|
|
|
- batchSaveList.add(saveItem);
|
|
|
}
|
|
|
- if (!CollectionUtils.isEmpty(batchSaveList)) {
|
|
|
- longArticleBaseMapper.batchInsertLongArticlesCrawlerVideos(batchSaveList);
|
|
|
+ LongArticlesCrawlerVideos saveItem = new LongArticlesCrawlerVideos();
|
|
|
+ saveItem.setContentId(video.getContentId());
|
|
|
+ saveItem.setPlatform(video.getPlatform());
|
|
|
+ saveItem.setVideoTitle(video.getVideoTitle());
|
|
|
+ saveItem.setCrawlerTime(video.getUpdateTime());
|
|
|
+ saveItem.setVideoOssPath(video.getVideoPath());
|
|
|
+ saveItem.setCoverOssPath(video.getCoverPath());
|
|
|
+ saveItem.setUserId(video.getUid());
|
|
|
+ saveItem.setTraceId(video.getTraceId());
|
|
|
+ saveItem.setDownloadStatus(2);
|
|
|
+ saveItem.setScore(score);
|
|
|
+ batchSaveList.add(saveItem);
|
|
|
+ }
|
|
|
+ if (!CollectionUtils.isEmpty(batchSaveList)) {
|
|
|
+ for (List<LongArticlesCrawlerVideos> partition : Lists.partition(batchSaveList, 1000)) {
|
|
|
+ longArticleBaseMapper.batchInsertLongArticlesCrawlerVideos(partition);
|
|
|
}
|
|
|
- log.info("flushLongArticlesCrawlerVideos pageNum:{} totalPage:{}", pageNum, totalPage);
|
|
|
- pageNum++;
|
|
|
}
|
|
|
}
|
|
|
|