|
@@ -3,7 +3,9 @@ package com.tzld.piaoquan.longarticle.service.local.impl;
|
|
|
import cn.hutool.json.JSONObject;
|
|
|
import com.alibaba.fastjson.JSONArray;
|
|
|
import com.tzld.piaoquan.longarticle.dao.mapper.CrawlerVideoMapper;
|
|
|
+import com.tzld.piaoquan.longarticle.dao.mapper.LongArticlesTextMapper;
|
|
|
import com.tzld.piaoquan.longarticle.dao.mapper.MatchVideoMapper;
|
|
|
+import com.tzld.piaoquan.longarticle.model.bo.MatchContent;
|
|
|
import com.tzld.piaoquan.longarticle.model.po.*;
|
|
|
import com.tzld.piaoquan.longarticle.model.vo.MatchVideoVo;
|
|
|
import com.tzld.piaoquan.longarticle.service.local.KimiService;
|
|
@@ -12,6 +14,7 @@ import com.tzld.piaoquan.longarticle.utils.other.*;
|
|
|
import org.apache.commons.lang3.StringUtils;
|
|
|
import org.springframework.beans.factory.annotation.Autowired;
|
|
|
import org.springframework.stereotype.Service;
|
|
|
+import org.springframework.transaction.annotation.Transactional;
|
|
|
import org.springframework.util.CollectionUtils;
|
|
|
|
|
|
import java.util.*;
|
|
@@ -30,9 +33,16 @@ public class MatchVideoServiceImpl {
|
|
|
@Autowired
|
|
|
private CrawlerVideoMapper crawlerVideoMapper;
|
|
|
|
|
|
- public void addMatchVideo(MatchVideoVo matchVideoVo) {
|
|
|
+ @Autowired
|
|
|
+ private LongArticlesTextMapper longArticlesTextMapper;
|
|
|
+
|
|
|
+ @Autowired
|
|
|
+ private CrawlerVideoServiceImpl crawlerVideoService;
|
|
|
+
|
|
|
+ @Transactional
|
|
|
+ public void addMatchVideo(MatchContent matchContent) {
|
|
|
MatchVideoExample example = new MatchVideoExample();
|
|
|
- example.createCriteria().andContentIdEqualTo(matchVideoVo.getArticleId()).andGhIdEqualTo(matchVideoVo.getGhId());
|
|
|
+ example.createCriteria().andContentIdEqualTo(matchContent.getSourceId()).andGhIdEqualTo(matchContent.getGhId());
|
|
|
long l = matchVideoMapper.countByExample(example);
|
|
|
if (l > 0) {
|
|
|
return;
|
|
@@ -40,14 +50,27 @@ public class MatchVideoServiceImpl {
|
|
|
String traceId = "search-" + UUID.randomUUID();
|
|
|
MatchVideo matchVideo = new MatchVideo();
|
|
|
matchVideo.setTraceId(traceId);
|
|
|
- matchVideo.setContentId(matchVideoVo.getArticleId());
|
|
|
- matchVideo.setAccountName(matchVideoVo.getAccountName());
|
|
|
+ matchVideo.setContentId(matchContent.getSourceId());
|
|
|
+ matchVideo.setAccountName(matchContent.getAccountName());
|
|
|
+ matchVideo.setGhId(matchContent.getGhId());
|
|
|
matchVideo.setFlowPoolLevel(matchVideo.getFlowPoolLevel());
|
|
|
matchVideo.setProcessTimes(1);
|
|
|
matchVideo.setContentStatus(0);
|
|
|
long timestamp = System.currentTimeMillis() / 1000;
|
|
|
matchVideo.setContentStatusUpdateTime(Long.valueOf(timestamp).intValue());
|
|
|
+ matchVideo.setRequestTimestamp(Long.valueOf(timestamp).intValue());
|
|
|
matchVideoMapper.insertSelective(matchVideo);
|
|
|
+ LongArticlesTextExample longArticlesTextExample = new LongArticlesTextExample();
|
|
|
+ longArticlesTextExample.createCriteria().andContentIdEqualTo(matchContent.getSourceId());
|
|
|
+ long l1 = longArticlesTextMapper.countByExample(longArticlesTextExample);
|
|
|
+ if (l1 == 0) {
|
|
|
+ LongArticlesText longArticlesText = new LongArticlesText();
|
|
|
+ longArticlesText.setArticleTitle(matchContent.getTitle());
|
|
|
+ longArticlesText.setArticleText(matchContent.getContent());
|
|
|
+ longArticlesText.setContentId(matchContent.getSourceId());
|
|
|
+ longArticlesText.setKimiStatus(0);
|
|
|
+ longArticlesTextMapper.insertSelective(longArticlesText);
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
public void matchContent() {
|
|
@@ -56,201 +79,42 @@ public class MatchVideoServiceImpl {
|
|
|
crawlerVideoMapper.selectByExample(example);
|
|
|
}
|
|
|
|
|
|
- public Boolean existHistoryContent(String contentId) {
|
|
|
- CrawlerVideoExample example = new CrawlerVideoExample();
|
|
|
- example.createCriteria().andContentIdEqualTo(contentId).andDownloadStatusEqualTo(2);
|
|
|
- long l = crawlerVideoMapper.countByExample(example);
|
|
|
- return l >= MAX_NUM;
|
|
|
- }
|
|
|
|
|
|
- public void pushOss(String traceId) {
|
|
|
- CrawlerVideoExample example = new CrawlerVideoExample();
|
|
|
- example.createCriteria().andTraceIdEqualTo(traceId).andDownloadStatusEqualTo(0);
|
|
|
- List<CrawlerVideo> crawlerVideoList = crawlerVideoMapper.selectByExampleWithBLOBs(example);
|
|
|
- if (CollectionUtils.isEmpty(crawlerVideoList)) {
|
|
|
+ public void processMatchContent(MatchVideo matchVideo) {
|
|
|
+ //1.执行kimi任务
|
|
|
+ LongArticlesText kimiText = kimiService.getKimiText(matchVideo.getContentId());
|
|
|
+ if (kimiText == null) {
|
|
|
+ //TODO 查询信息重新生成kimi信息
|
|
|
return;
|
|
|
}
|
|
|
- for (CrawlerVideo crawlerVideo : crawlerVideoList) {
|
|
|
- String platform = crawlerVideo.getPlatform();
|
|
|
- String outVideoId = crawlerVideo.getOutVideoId();
|
|
|
- String videoPath = VideoDownloader.generateVideoPath(platform, outVideoId);
|
|
|
- String coverPath = VideoDownloader.generateCoverPath(platform, outVideoId);
|
|
|
- videoPath = VideoDownloader.downloadVideo(videoPath, platform, crawlerVideo.getVideoUrl(), "video");
|
|
|
- coverPath = VideoDownloader.downloadCover(coverPath, platform, crawlerVideo.getCoverUrl());
|
|
|
- if (StringUtils.isNotEmpty(videoPath) && StringUtils.isNotEmpty(coverPath)) {
|
|
|
- String videoOssPath = OSSUploader.uploadToOSS(videoPath, "video");
|
|
|
- String coverOssPath = OSSUploader.uploadToOSS(coverPath, "image");
|
|
|
- if (StringUtils.isNotEmpty(videoOssPath) && StringUtils.isNotEmpty(coverOssPath)) {
|
|
|
- CrawlerVideo udpateCrawlerVideo = new CrawlerVideo();
|
|
|
- udpateCrawlerVideo.setVideoOssPath(videoOssPath);
|
|
|
- udpateCrawlerVideo.setCoverOssPath(coverOssPath);
|
|
|
- udpateCrawlerVideo.setId(crawlerVideo.getId());
|
|
|
- udpateCrawlerVideo.setDownloadStatus(2);
|
|
|
- crawlerVideoMapper.updateByPrimaryKeySelective(udpateCrawlerVideo);
|
|
|
- }
|
|
|
+ if (kimiText.getKimiStatus() == 0) {
|
|
|
+ //TODO 加锁
|
|
|
+ kimiText = kimiService.getAndUpdateContent(matchVideo.getContentId());
|
|
|
+ if (kimiText == null) {
|
|
|
+ //TODO kimi结果获取失败
|
|
|
+ return;
|
|
|
}
|
|
|
}
|
|
|
- }
|
|
|
+ boolean existCrawlerVideo = existCrawlerVideo(matchVideo.getContentId());
|
|
|
+ if(!existCrawlerVideo){
|
|
|
|
|
|
- public boolean addVideo(String contentId, String traceId) {
|
|
|
- LongArticlesText kimiText = kimiService.getKimiText(contentId);
|
|
|
- if (kimiText == null) {
|
|
|
- //TODO 报警 KIMI查询不到
|
|
|
- return false;
|
|
|
- }
|
|
|
- List<JSONObject> video = getVideo(contentId, kimiText);
|
|
|
- List<JSONObject> jsonObjects = SortUtil.titleSimilarityRank(kimiText.getKimiTitle(), video);
|
|
|
- List<CrawlerVideo> crawlerVideoList = getCrawlerVideoList(jsonObjects, traceId, contentId);
|
|
|
- if (!CollectionUtils.isEmpty(crawlerVideoList)) {
|
|
|
- for (CrawlerVideo crawlerVideo : crawlerVideoList) {
|
|
|
- crawlerVideoMapper.insertSelective(crawlerVideo);
|
|
|
- }
|
|
|
}
|
|
|
- System.out.println();
|
|
|
- System.out.println();
|
|
|
- System.out.println(crawlerVideoList);
|
|
|
- return false;
|
|
|
- }
|
|
|
-
|
|
|
- private List<CrawlerVideo> getCrawlerVideoList(List<JSONObject> jsonObjects, String traceId, String contentId) {
|
|
|
- List<CrawlerVideo> crawlerVideoList = new ArrayList<>();
|
|
|
- for (JSONObject jsonObject : jsonObjects) {
|
|
|
- String platform = jsonObject.getStr("platform");
|
|
|
- CrawlerVideo crawlerVideo = new CrawlerVideo();
|
|
|
- crawlerVideo.setContentId(contentId);
|
|
|
- crawlerVideo.setTraceId(traceId);
|
|
|
- crawlerVideo.setPlatform(platform);
|
|
|
- if (Objects.equals(platform, "dy_search")) {
|
|
|
- crawlerVideo.setOutVideoId(jsonObject.getStr("channel_content_id"));
|
|
|
- crawlerVideo.setVideoTitle(jsonObject.getStr("title"));
|
|
|
- crawlerVideo.setPublishTime(jsonObject.getDate("publish_timestamp"));
|
|
|
- List<JSONObject> videoUrlList = jsonObject.get("video_url_list", List.class);
|
|
|
- if (!CollectionUtils.isEmpty(videoUrlList)) {
|
|
|
- crawlerVideo.setVideoUrl(videoUrlList.get(0).getStr("video_url"));
|
|
|
- crawlerVideo.setDuration(videoUrlList.get(0).getInt("video_duration"));
|
|
|
- }
|
|
|
- List<JSONObject> imageUrlList = jsonObject.get("image_url_list", List.class);
|
|
|
- if (!CollectionUtils.isEmpty(imageUrlList)) {
|
|
|
- crawlerVideo.setCoverUrl(imageUrlList.get(0).getStr("image_url"));
|
|
|
- }
|
|
|
- crawlerVideo.setPlayCount(jsonObject.getInt("play_count"));
|
|
|
- crawlerVideo.setLikeCount(jsonObject.getInt("like_count"));
|
|
|
- crawlerVideo.setScore(jsonObject.getFloat("score"));
|
|
|
- if (cheakCrawlerVideo(crawlerVideo)) {
|
|
|
- crawlerVideoList.add(crawlerVideo);
|
|
|
- }
|
|
|
|
|
|
- continue;
|
|
|
- }
|
|
|
- if (Objects.equals(platform, "baidu_search")) {
|
|
|
- crawlerVideo.setOutVideoId(jsonObject.getStr("id"));
|
|
|
- crawlerVideo.setVideoTitle(jsonObject.getStr("title"));
|
|
|
- crawlerVideo.setPublishTime(jsonObject.getDate("publish_timestamp"));
|
|
|
- crawlerVideo.setVideoUrl(jsonObject.getStr("playurl"));
|
|
|
- crawlerVideo.setCoverUrl(jsonObject.getStr("poster"));
|
|
|
- crawlerVideo.setPlayCount(jsonObject.getInt("play_cnt"));
|
|
|
- crawlerVideo.setLikeCount(jsonObject.getInt("like_count") == null ? 0 : jsonObject.getInt("like_count"));
|
|
|
- crawlerVideo.setDuration(jsonObject.getInt("duration"));
|
|
|
- crawlerVideo.setScore(jsonObject.getFloat("score"));
|
|
|
- if (cheakCrawlerVideo(crawlerVideo)) {
|
|
|
- crawlerVideoList.add(crawlerVideo);
|
|
|
- }
|
|
|
- continue;
|
|
|
- }
|
|
|
+ //2.执行爬虫任务
|
|
|
+ int retry = 0;
|
|
|
+ long count = crawlerVideoService.countCrawlerVideo(matchVideo.getContentId());
|
|
|
+ if (count < 3) {
|
|
|
+ crawlerVideoService.addCrawlerVideo(matchVideo.getContentId(), kimiText);
|
|
|
}
|
|
|
- return crawlerVideoList;
|
|
|
}
|
|
|
|
|
|
- private boolean cheakCrawlerVideo(CrawlerVideo crawlerVideo) {
|
|
|
- if (StringUtils.isEmpty(crawlerVideo.getOutVideoId())) {
|
|
|
- return false;
|
|
|
- }
|
|
|
- if (StringUtils.isEmpty(crawlerVideo.getVideoUrl())) {
|
|
|
- return false;
|
|
|
- }
|
|
|
- if (StringUtils.isEmpty(crawlerVideo.getCoverUrl())) {
|
|
|
- return false;
|
|
|
- }
|
|
|
- if (StringUtils.isEmpty(crawlerVideo.getPlatform())) {
|
|
|
- return false;
|
|
|
- }
|
|
|
- if (Objects.isNull(crawlerVideo.getPublishTime())) {
|
|
|
- crawlerVideo.setPublishTime(new Date());
|
|
|
- }
|
|
|
- if (Objects.isNull(crawlerVideo.getDuration())) {
|
|
|
- crawlerVideo.setDuration(0);
|
|
|
- }
|
|
|
- if (Objects.isNull(crawlerVideo.getPlayCount())) {
|
|
|
- crawlerVideo.setPlayCount(0);
|
|
|
- }
|
|
|
- if (Objects.isNull(crawlerVideo.getLikeCount())) {
|
|
|
- crawlerVideo.setLikeCount(0);
|
|
|
- }
|
|
|
- if (Objects.isNull(crawlerVideo.getShareCount())) {
|
|
|
- crawlerVideo.setShareCount(0);
|
|
|
- }
|
|
|
- return true;
|
|
|
- }
|
|
|
-
|
|
|
- public List<JSONObject> getVideo(String contentId, LongArticlesText kimiText) {
|
|
|
-
|
|
|
- List<JSONObject> res = new ArrayList<>();
|
|
|
- List<JSONObject> kimiSummarys = searchVideo(kimiText.getKimiSummary().substring(0, 15), new ArrayList<>(), "");
|
|
|
- if (!CollectionUtils.isEmpty(kimiSummarys)) {
|
|
|
- res.addAll(kimiSummarys);
|
|
|
- }
|
|
|
- if (res.size() > 3) {
|
|
|
- return res;
|
|
|
- }
|
|
|
- List<JSONObject> kimiTitles = searchVideo(kimiText.getKimiTitle().substring(0, 15), new ArrayList<>(), "");
|
|
|
- if (!CollectionUtils.isEmpty(kimiTitles)) {
|
|
|
- res.addAll(kimiTitles);
|
|
|
- }
|
|
|
- if (res.size() > 3) {
|
|
|
- return res;
|
|
|
- }
|
|
|
- String kimiKeys = kimiText.getKimiKeys();
|
|
|
- JSONArray jsonArray = JSONArray.parseArray(kimiKeys);
|
|
|
- if (jsonArray == null || jsonArray.isEmpty()) {
|
|
|
- return res;
|
|
|
- }
|
|
|
- for (int i = 0; i < jsonArray.size(); i++) {
|
|
|
- String key = jsonArray.getString(i);
|
|
|
- List<JSONObject> keys = searchVideo(key, new ArrayList<>(), "");
|
|
|
- if (!CollectionUtils.isEmpty(keys)) {
|
|
|
- res.addAll(keys);
|
|
|
- }
|
|
|
- if (res.size() > 3) {
|
|
|
- return res;
|
|
|
- }
|
|
|
- }
|
|
|
- return res;
|
|
|
+ public boolean existCrawlerVideo(String contentId) {
|
|
|
+ CrawlerVideoExample example = new CrawlerVideoExample();
|
|
|
+ example.createCriteria().andContentIdEqualTo(contentId).andDownloadStatusEqualTo(2);
|
|
|
+ long l = crawlerVideoMapper.countByExample(example);
|
|
|
+ return l >= MAX_NUM;
|
|
|
}
|
|
|
+}
|
|
|
|
|
|
|
|
|
- public List<JSONObject> searchVideo(String keyword, List<String> words, String traceId) {
|
|
|
-// List<JSONObject> jsonObjects = DouyinSearch.douyinSearch(keyword, words, traceId);
|
|
|
-// if (!CollectionUtils.isEmpty(jsonObjects)) {
|
|
|
-// for (JSONObject jsonObject : jsonObjects) {
|
|
|
-// jsonObject.put("platform", "dy_search");
|
|
|
-// }
|
|
|
-// }
|
|
|
-// if (jsonObjects.size() >= 3) {
|
|
|
-// return jsonObjects;
|
|
|
-// }
|
|
|
-// List<JSONObject> jsonObjects1 = HkspSearch.hkspSearch(keyword, words, traceId);
|
|
|
-// if (CollectionUtils.isEmpty(jsonObjects1)) {
|
|
|
-// return jsonObjects;
|
|
|
-// }
|
|
|
-// for (JSONObject jsonObject : jsonObjects1) {
|
|
|
-// jsonObject.put("platform", "baidu_search");
|
|
|
-// }
|
|
|
-// if (CollectionUtils.isEmpty(jsonObjects)) {
|
|
|
-// return jsonObjects1;
|
|
|
-// }
|
|
|
-// jsonObjects.addAll(jsonObjects1);
|
|
|
-// return jsonObjects;
|
|
|
- return null;
|
|
|
- }
|
|
|
|
|
|
-}
|