|
@@ -38,6 +38,7 @@ public class DataFlushService {
|
|
|
}
|
|
|
int count = crawlerBaseMapper.countGetOffVideos();
|
|
|
int totalPage = count / pageSize + 1;
|
|
|
+ int flushNum = 0;
|
|
|
while (pageNum <= totalPage) {
|
|
|
int offset = (pageNum - 1) * pageSize;
|
|
|
List<GetOffVideos> list = crawlerBaseMapper.pageGetOffVideos(offset, pageSize);
|
|
@@ -49,9 +50,11 @@ public class DataFlushService {
|
|
|
if (CollectionUtil.isNotEmpty(list)) {
|
|
|
longArticleBaseMapper.batchInsertGetOffVideos(list);
|
|
|
}
|
|
|
- log.info("flushGetOffVideos pageNum:{} totalPage:{}", pageNum, totalPage);
|
|
|
+ log.info("flushGetOffVideos pageNum:{} totalPage:{} flushNum:{}", pageNum, totalPage, list.size());
|
|
|
+ flushNum += list.size();
|
|
|
pageNum++;
|
|
|
}
|
|
|
+ log.info("flushGetOffVideos flushNum:{}", flushNum);
|
|
|
}
|
|
|
|
|
|
public void flushLongArticlesRootSourceId(Integer pageNum) {
|
|
@@ -111,6 +114,7 @@ public class DataFlushService {
|
|
|
saveItem.setScore(score / 1000);
|
|
|
batchSaveList.add(saveItem);
|
|
|
}
|
|
|
+ int flushNum = 0;
|
|
|
if (!CollectionUtils.isEmpty(batchSaveList)) {
|
|
|
for (List<LongArticlesCrawlerVideos> partition : Lists.partition(batchSaveList, 1000)) {
|
|
|
List<String> contentIds = partition.stream().map(LongArticlesCrawlerVideos::getContentId).collect(Collectors.toList());
|
|
@@ -122,8 +126,10 @@ public class DataFlushService {
|
|
|
return CollectionUtil.isEmpty(itemList) || itemList.size() < 3;
|
|
|
}).collect(Collectors.toList());
|
|
|
longArticleBaseMapper.batchInsertLongArticlesCrawlerVideos(partition);
|
|
|
+ flushNum += partition.size();
|
|
|
}
|
|
|
}
|
|
|
+ log.info("flushLongArticlesCrawlerVideos flushNum:{}", flushNum);
|
|
|
}
|
|
|
|
|
|
public void flushLongArticlesVideos(Integer pageNum) {
|
|
@@ -136,6 +142,8 @@ public class DataFlushService {
|
|
|
Collectors.toMap(LongArticlesText::getContentId, o -> o, (existing, replacement) -> replacement));
|
|
|
int count = crawlerBaseMapper.countLongArticlesVideos();
|
|
|
int totalPage = count / pageSize + 1;
|
|
|
+ int longArticlesTextFlushNum = 0;
|
|
|
+ int longArticlesMatchVideosFlushNum = 0;
|
|
|
while (pageNum <= totalPage) {
|
|
|
int offset = (pageNum - 1) * pageSize;
|
|
|
List<LongArticlesVideo> list = crawlerBaseMapper.pageLongArticlesVideos(offset, pageSize);
|
|
@@ -192,6 +200,7 @@ public class DataFlushService {
|
|
|
}
|
|
|
if (CollectionUtil.isNotEmpty(batchSaveLongArticlesTextList)) {
|
|
|
longArticleBaseMapper.batchInsertLongArticlesText(batchSaveLongArticlesTextList);
|
|
|
+ longArticlesTextFlushNum += batchSaveLongArticlesTextList.size();
|
|
|
}
|
|
|
}
|
|
|
if (CollectionUtil.isNotEmpty(batchSaveLongArticlesMatchVideosList)) {
|
|
@@ -203,11 +212,14 @@ public class DataFlushService {
|
|
|
.filter(o -> !existsIds.contains(o.getTraceId())).collect(Collectors.toList());
|
|
|
if (CollectionUtil.isNotEmpty(batchSaveLongArticlesMatchVideosList)) {
|
|
|
longArticleBaseMapper.batchInsertLongArticlesMatchVideos(batchSaveLongArticlesMatchVideosList);
|
|
|
+ longArticlesMatchVideosFlushNum += batchSaveLongArticlesMatchVideosList.size();
|
|
|
}
|
|
|
}
|
|
|
log.info("flushLongArticlesVideos pageNum:{} totalPage:{}", pageNum, totalPage);
|
|
|
pageNum++;
|
|
|
}
|
|
|
+ log.info("flushLongArticlesVideos longArticlesTextFlushNum:{} longArticlesMatchVideosFlushNum:{}",
|
|
|
+ longArticlesTextFlushNum, longArticlesMatchVideosFlushNum);
|
|
|
}
|
|
|
|
|
|
private String getLongArticleVideoResponse(LongArticlesVideo longArticlesVideo) {
|