|
@@ -0,0 +1,217 @@
|
|
|
+package com.tzld.longarticle.recommend.server.service;
|
|
|
+
|
|
|
+import cn.hutool.core.collection.CollectionUtil;
|
|
|
+import com.alibaba.fastjson.JSONArray;
|
|
|
+import com.alibaba.fastjson.JSONObject;
|
|
|
+import com.tzld.longarticle.recommend.server.mapper.crawler.CrawlerBaseMapper;
|
|
|
+import com.tzld.longarticle.recommend.server.mapper.longArticle.LongArticleBaseMapper;
|
|
|
+import com.tzld.longarticle.recommend.server.model.dto.*;
|
|
|
+import lombok.extern.slf4j.Slf4j;
|
|
|
+import org.springframework.beans.factory.annotation.Autowired;
|
|
|
+import org.springframework.stereotype.Service;
|
|
|
+import org.springframework.util.CollectionUtils;
|
|
|
+import org.springframework.util.StringUtils;
|
|
|
+
|
|
|
+import java.net.URLDecoder;
|
|
|
+import java.util.*;
|
|
|
+import java.util.stream.Collectors;
|
|
|
+
|
|
|
+@Service
|
|
|
+@Slf4j
|
|
|
+public class DataFlushService {
|
|
|
+
|
|
|
+ @Autowired
|
|
|
+ private LongArticleBaseMapper longArticleBaseMapper;
|
|
|
+ @Autowired
|
|
|
+ private CrawlerBaseMapper crawlerBaseMapper;
|
|
|
+
|
|
|
+
|
|
|
+ public void flushGetOffVideos(Integer pageNum) {
|
|
|
+ int pageSize = 1000;
|
|
|
+ if (pageNum == null) {
|
|
|
+ pageNum = 1;
|
|
|
+ }
|
|
|
+ int count = crawlerBaseMapper.countGetOffVideos();
|
|
|
+ int totalPage = count / pageSize + 1;
|
|
|
+ while (pageNum <= totalPage) {
|
|
|
+ int offset = (pageNum - 1) * pageSize;
|
|
|
+ List<GetOffVideos> list = crawlerBaseMapper.pageGetOffVideos(offset, pageSize);
|
|
|
+ longArticleBaseMapper.batchInsertGetOffVideos(list);
|
|
|
+ log.info("flushGetOffVideos pageNum:{} totalPage:{}", pageNum, totalPage);
|
|
|
+ pageNum++;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ public void flushLongArticlesRootSourceId(Integer pageNum) {
|
|
|
+ int pageSize = 1000;
|
|
|
+ if (pageNum == null) {
|
|
|
+ pageNum = 1;
|
|
|
+ }
|
|
|
+ int count = crawlerBaseMapper.countLongArticlesRootSourceId();
|
|
|
+ int totalPage = count / pageSize + 1;
|
|
|
+ while (pageNum <= totalPage) {
|
|
|
+ int offset = (pageNum - 1) * pageSize;
|
|
|
+ List<LongArticlesRootSourceId> list = crawlerBaseMapper.pageLongArticlesRootSourceId(offset, pageSize);
|
|
|
+ longArticleBaseMapper.batchInsertLongArticlesRootSourceId(list);
|
|
|
+ log.info("flushLongArticlesRootSourceId pageNum:{} totalPage:{}", pageNum, totalPage);
|
|
|
+ pageNum++;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ public void flushLongArticlesCrawlerVideos(Integer pageNum) {
|
|
|
+ int pageSize = 1000;
|
|
|
+ if (pageNum == null) {
|
|
|
+ pageNum = 1;
|
|
|
+ }
|
|
|
+ int count = crawlerBaseMapper.countArticleMatchVideos();
|
|
|
+ int totalPage = count / pageSize + 1;
|
|
|
+ while (pageNum <= totalPage) {
|
|
|
+ int offset = (pageNum - 1) * pageSize;
|
|
|
+ List<ArticleMatchVideos> list = crawlerBaseMapper.pageArticleMatchVideos(offset, pageSize);
|
|
|
+ List<LongArticlesCrawlerVideos> batchSaveList = new ArrayList<>();
|
|
|
+ for (ArticleMatchVideos video : list) {
|
|
|
+ if (!StringUtils.hasText(video.getVideoPath())) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ LongArticlesCrawlerVideos saveItem = new LongArticlesCrawlerVideos();
|
|
|
+ saveItem.setContentId(video.getContentId());
|
|
|
+ saveItem.setPlatform(video.getPlatform());
|
|
|
+ saveItem.setVideoTitle(video.getVideoTitle());
|
|
|
+ saveItem.setCrawlerTime(video.getUpdateTime());
|
|
|
+ saveItem.setVideoOssPath(video.getVideoPath());
|
|
|
+ saveItem.setCoverOssPath(video.getCoverPath());
|
|
|
+ saveItem.setUserId(video.getUid());
|
|
|
+ saveItem.setTraceId(video.getTraceId());
|
|
|
+ saveItem.setDownloadStatus(2);
|
|
|
+ batchSaveList.add(saveItem);
|
|
|
+ }
|
|
|
+ if (!CollectionUtils.isEmpty(batchSaveList)) {
|
|
|
+ longArticleBaseMapper.batchInsertLongArticlesCrawlerVideos(batchSaveList);
|
|
|
+ }
|
|
|
+ log.info("flushLongArticlesCrawlerVideos pageNum:{} totalPage:{}", pageNum, totalPage);
|
|
|
+ pageNum++;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ public void flushLongArticlesVideos(Integer pageNum) {
|
|
|
+ int pageSize = 1000;
|
|
|
+ if (pageNum == null) {
|
|
|
+ pageNum = 1;
|
|
|
+ }
|
|
|
+ int count = crawlerBaseMapper.countLongArticlesVideos();
|
|
|
+ int totalPage = count / pageSize + 1;
|
|
|
+ while (pageNum <= totalPage) {
|
|
|
+ int offset = (pageNum - 1) * pageSize;
|
|
|
+ List<LongArticlesVideo> list = crawlerBaseMapper.pageLongArticlesVideos(offset, pageSize);
|
|
|
+ List<LongArticlesText> batchSaveLongArticlesTextList = new ArrayList<>();
|
|
|
+ List<LongArticlesMatchVideos> batchSaveLongArticlesMatchVideosList = new ArrayList<>();
|
|
|
+ Set<String> existsIdSet = new HashSet<>();
|
|
|
+ for (LongArticlesVideo video : list) {
|
|
|
+ if (video.getContentId().endsWith("lehuo")) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ if (!existsIdSet.contains(video.getContentId())) {
|
|
|
+ LongArticlesText longArticlesText = new LongArticlesText();
|
|
|
+ longArticlesText.setContentId(video.getContentId());
|
|
|
+ longArticlesText.setArticleTitle(video.getArticleTitle());
|
|
|
+ longArticlesText.setArticleText(video.getArticleText());
|
|
|
+ if (StringUtils.hasText(video.getArticleText())) {
|
|
|
+ longArticlesText.setKimiTitle(video.getKimiTitle().replace("\"", ""));
|
|
|
+ }
|
|
|
+ longArticlesText.setKimiSummary(video.getKimiSummary());
|
|
|
+ longArticlesText.setKimiKeys(video.getKimiKeys());
|
|
|
+ longArticlesText.setKimiStatus(1);
|
|
|
+ batchSaveLongArticlesTextList.add(longArticlesText);
|
|
|
+ existsIdSet.add(video.getContentId());
|
|
|
+ }
|
|
|
+ if (Objects.isNull(video.getRequestTimeStamp())) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ LongArticlesMatchVideos longArticlesMatchVideos = new LongArticlesMatchVideos();
|
|
|
+ longArticlesMatchVideos.setContentId(video.getContentId());
|
|
|
+ longArticlesMatchVideos.setTraceId(video.getTraceId());
|
|
|
+ longArticlesMatchVideos.setGhId(video.getGhId());
|
|
|
+ longArticlesMatchVideos.setAccountName(video.getAccountName());
|
|
|
+ longArticlesMatchVideos.setContentStatus(video.getContentStatus());
|
|
|
+ longArticlesMatchVideos.setSuccessStatus(video.getSuccess());
|
|
|
+ longArticlesMatchVideos.setRequestTimestamp(video.getRequestTimeStamp());
|
|
|
+ longArticlesMatchVideos.setUpdateTime(video.getUpdateTime());
|
|
|
+ longArticlesMatchVideos.setProcessTimes(video.getProcessTimes());
|
|
|
+ longArticlesMatchVideos.setResponse(getLongArticleVideoResponse(video));
|
|
|
+ batchSaveLongArticlesMatchVideosList.add(longArticlesMatchVideos);
|
|
|
+ }
|
|
|
+ if (CollectionUtil.isNotEmpty(batchSaveLongArticlesTextList)) {
|
|
|
+ List<String> contentIds = batchSaveLongArticlesTextList.stream()
|
|
|
+ .map(LongArticlesText::getContentId).distinct().collect(Collectors.toList());
|
|
|
+ List<String> existsContentIds = longArticleBaseMapper.getLongArticlesTextByContentIds(contentIds);
|
|
|
+ if (CollectionUtil.isNotEmpty(existsContentIds)) {
|
|
|
+ batchSaveLongArticlesTextList = batchSaveLongArticlesTextList.stream()
|
|
|
+ .filter(o -> !existsContentIds.contains(o.getContentId())).collect(Collectors.toList());
|
|
|
+ }
|
|
|
+ if (CollectionUtil.isNotEmpty(batchSaveLongArticlesTextList)) {
|
|
|
+ longArticleBaseMapper.batchInsertLongArticlesText(batchSaveLongArticlesTextList);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (CollectionUtil.isNotEmpty(batchSaveLongArticlesMatchVideosList)) {
|
|
|
+ longArticleBaseMapper.batchInsertLongArticlesMatchVideos(batchSaveLongArticlesMatchVideosList);
|
|
|
+ }
|
|
|
+ log.info("flushLongArticlesVideos pageNum:{} totalPage:{}", pageNum, totalPage);
|
|
|
+ pageNum++;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private String getLongArticleVideoResponse(LongArticlesVideo video) {
|
|
|
+ JSONArray jsonArray = new JSONArray();
|
|
|
+ if (StringUtils.hasText(video.getResult1())) {
|
|
|
+ if (StringUtils.hasText(video.getKimiTitle()) && video.getKimiTitle().contains("\"")) {
|
|
|
+ video.setResult1(video.getResult1().replace(video.getKimiTitle(), video.getKimiTitle().replace("\"", "")));
|
|
|
+ }
|
|
|
+ jsonArray.add(resultToResponse(video.getResult1()));
|
|
|
+ }
|
|
|
+ if (StringUtils.hasText(video.getResult2())) {
|
|
|
+ if (StringUtils.hasText(video.getKimiTitle()) && video.getKimiTitle().contains("\"")) {
|
|
|
+ video.setResult2(video.getResult2().replace(video.getKimiTitle(), video.getKimiTitle().replace("\"", "")));
|
|
|
+ }
|
|
|
+ jsonArray.add(resultToResponse(video.getResult2()));
|
|
|
+ }
|
|
|
+ if (StringUtils.hasText(video.getResult3())) {
|
|
|
+ if (StringUtils.hasText(video.getKimiTitle()) && video.getKimiTitle().contains("\"")) {
|
|
|
+ video.setResult3(video.getResult3().replace(video.getKimiTitle(), video.getKimiTitle().replace("\"", "")));
|
|
|
+ }
|
|
|
+ jsonArray.add(resultToResponse(video.getResult3()));
|
|
|
+ }
|
|
|
+ return JSONObject.toJSONString(jsonArray);
|
|
|
+ }
|
|
|
+
|
|
|
+ private JSONObject resultToResponse(String result) {
|
|
|
+ JSONObject jsonObject = new JSONObject();
|
|
|
+ JSONObject fromJSON = JSONObject.parseObject(result);
|
|
|
+ jsonObject.put("kimiTitle", fromJSON.getString("productionName"));
|
|
|
+ jsonObject.put("videoCover", fromJSON.getString("productionCover"));
|
|
|
+ jsonObject.put("videoPath", fromJSON.getString("videoUrl"));
|
|
|
+ jsonObject.put("source", fromJSON.getString("source"));
|
|
|
+ String productionPath = fromJSON.getString("productionPath");
|
|
|
+ String uid = getParamFromPath(productionPath, "su");
|
|
|
+ String videoId = getParamFromPath(productionPath, "id");
|
|
|
+ jsonObject.put("uid", uid);
|
|
|
+ if (StringUtils.hasText(videoId)) {
|
|
|
+ jsonObject.put("videoId", Long.valueOf(videoId));
|
|
|
+ }
|
|
|
+ return jsonObject;
|
|
|
+ }
|
|
|
+
|
|
|
+ private String getParamFromPath(String productionPath, String param) {
|
|
|
+ String decode = URLDecoder.decode(productionPath);
|
|
|
+ String[] sss = decode.split("\\?");
|
|
|
+ for (String ss : sss) {
|
|
|
+ String[] split = ss.split("&");
|
|
|
+ for (String s : split) {
|
|
|
+ if (s.startsWith(param)) {
|
|
|
+ String[] uid = s.split("=");
|
|
|
+ return uid[1];
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+}
|