|
@@ -0,0 +1,256 @@
|
|
|
+package com.tzld.piaoquan.longarticle.service.local.impl;
|
|
|
+
|
|
|
+import cn.hutool.json.JSONObject;
|
|
|
+import com.alibaba.fastjson.JSONArray;
|
|
|
+import com.tzld.piaoquan.longarticle.dao.mapper.CrawlerVideoMapper;
|
|
|
+import com.tzld.piaoquan.longarticle.dao.mapper.MatchVideoMapper;
|
|
|
+import com.tzld.piaoquan.longarticle.model.po.*;
|
|
|
+import com.tzld.piaoquan.longarticle.model.vo.MatchVideoVo;
|
|
|
+import com.tzld.piaoquan.longarticle.service.local.KimiService;
|
|
|
+import com.tzld.piaoquan.longarticle.utils.*;
|
|
|
+import com.tzld.piaoquan.longarticle.utils.other.*;
|
|
|
+import org.apache.commons.lang3.StringUtils;
|
|
|
+import org.springframework.beans.factory.annotation.Autowired;
|
|
|
+import org.springframework.stereotype.Service;
|
|
|
+import org.springframework.util.CollectionUtils;
|
|
|
+
|
|
|
+import java.util.*;
|
|
|
+
|
|
|
+@Service
|
|
|
+public class MatchVideoServiceImpl {
|
|
|
+
|
|
|
+ private static final int MAX_NUM = 3;
|
|
|
+
|
|
|
+ @Autowired
|
|
|
+ KimiService kimiService;
|
|
|
+
|
|
|
+ @Autowired
|
|
|
+ private MatchVideoMapper matchVideoMapper;
|
|
|
+
|
|
|
+ @Autowired
|
|
|
+ private CrawlerVideoMapper crawlerVideoMapper;
|
|
|
+
|
|
|
+ public void addMatchVideo(MatchVideoVo matchVideoVo) {
|
|
|
+ MatchVideoExample example = new MatchVideoExample();
|
|
|
+ example.createCriteria().andContentIdEqualTo(matchVideoVo.getArticleId()).andGhIdEqualTo(matchVideoVo.getGhId());
|
|
|
+ long l = matchVideoMapper.countByExample(example);
|
|
|
+ if (l > 0) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ String traceId = "search-" + UUID.randomUUID();
|
|
|
+ MatchVideo matchVideo = new MatchVideo();
|
|
|
+ matchVideo.setTraceId(traceId);
|
|
|
+ matchVideo.setContentId(matchVideoVo.getArticleId());
|
|
|
+ matchVideo.setAccountName(matchVideoVo.getAccountName());
|
|
|
+ matchVideo.setFlowPoolLevel(matchVideo.getFlowPoolLevel());
|
|
|
+ matchVideo.setProcessTimes(1);
|
|
|
+ matchVideo.setContentStatus(0);
|
|
|
+ long timestamp = System.currentTimeMillis() / 1000;
|
|
|
+ matchVideo.setContentStatusUpdateTime(Long.valueOf(timestamp).intValue());
|
|
|
+ matchVideoMapper.insertSelective(matchVideo);
|
|
|
+ }
|
|
|
+
|
|
|
+ public void matchContent() {
|
|
|
+ CrawlerVideoExample example = new CrawlerVideoExample();
|
|
|
+ example.createCriteria().andDownloadStatusEqualTo(0);
|
|
|
+ crawlerVideoMapper.selectByExample(example);
|
|
|
+ }
|
|
|
+
|
|
|
+ public Boolean existHistoryContent(String contentId) {
|
|
|
+ CrawlerVideoExample example = new CrawlerVideoExample();
|
|
|
+ example.createCriteria().andContentIdEqualTo(contentId).andDownloadStatusEqualTo(2);
|
|
|
+ long l = crawlerVideoMapper.countByExample(example);
|
|
|
+ return l >= MAX_NUM;
|
|
|
+ }
|
|
|
+
|
|
|
+ public void pushOss(String traceId) {
|
|
|
+ CrawlerVideoExample example = new CrawlerVideoExample();
|
|
|
+ example.createCriteria().andTraceIdEqualTo(traceId).andDownloadStatusEqualTo(0);
|
|
|
+ List<CrawlerVideo> crawlerVideoList = crawlerVideoMapper.selectByExampleWithBLOBs(example);
|
|
|
+ if (CollectionUtils.isEmpty(crawlerVideoList)) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ for (CrawlerVideo crawlerVideo : crawlerVideoList) {
|
|
|
+ String platform = crawlerVideo.getPlatform();
|
|
|
+ String outVideoId = crawlerVideo.getOutVideoId();
|
|
|
+ String videoPath = VideoDownloader.generateVideoPath(platform, outVideoId);
|
|
|
+ String coverPath = VideoDownloader.generateCoverPath(platform, outVideoId);
|
|
|
+ videoPath = VideoDownloader.downloadVideo(videoPath, platform, crawlerVideo.getVideoUrl(), "video");
|
|
|
+ coverPath = VideoDownloader.downloadCover(coverPath, platform, crawlerVideo.getCoverUrl());
|
|
|
+ if (StringUtils.isNotEmpty(videoPath) && StringUtils.isNotEmpty(coverPath)) {
|
|
|
+ String videoOssPath = OSSUploader.uploadToOSS(videoPath, "video");
|
|
|
+ String coverOssPath = OSSUploader.uploadToOSS(coverPath, "image");
|
|
|
+ if (StringUtils.isNotEmpty(videoOssPath) && StringUtils.isNotEmpty(coverOssPath)) {
|
|
|
+ CrawlerVideo udpateCrawlerVideo = new CrawlerVideo();
|
|
|
+ udpateCrawlerVideo.setVideoOssPath(videoOssPath);
|
|
|
+ udpateCrawlerVideo.setCoverOssPath(coverOssPath);
|
|
|
+ udpateCrawlerVideo.setId(crawlerVideo.getId());
|
|
|
+ udpateCrawlerVideo.setDownloadStatus(2);
|
|
|
+ crawlerVideoMapper.updateByPrimaryKeySelective(udpateCrawlerVideo);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ public boolean addVideo(String contentId, String traceId) {
|
|
|
+ LongArticlesText kimiText = kimiService.getKimiText(contentId);
|
|
|
+ if (kimiText == null) {
|
|
|
+ //TODO 报警 KIMI查询不到
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ List<JSONObject> video = getVideo(contentId, kimiText);
|
|
|
+ List<JSONObject> jsonObjects = SortUtil.titleSimilarityRank(kimiText.getKimiTitle(), video);
|
|
|
+ List<CrawlerVideo> crawlerVideoList = getCrawlerVideoList(jsonObjects, traceId, contentId);
|
|
|
+ if (!CollectionUtils.isEmpty(crawlerVideoList)) {
|
|
|
+ for (CrawlerVideo crawlerVideo : crawlerVideoList) {
|
|
|
+ crawlerVideoMapper.insertSelective(crawlerVideo);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ System.out.println();
|
|
|
+ System.out.println();
|
|
|
+ System.out.println(crawlerVideoList);
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+
|
|
|
+ private List<CrawlerVideo> getCrawlerVideoList(List<JSONObject> jsonObjects, String traceId, String contentId) {
|
|
|
+ List<CrawlerVideo> crawlerVideoList = new ArrayList<>();
|
|
|
+ for (JSONObject jsonObject : jsonObjects) {
|
|
|
+ String platform = jsonObject.getStr("platform");
|
|
|
+ CrawlerVideo crawlerVideo = new CrawlerVideo();
|
|
|
+ crawlerVideo.setContentId(contentId);
|
|
|
+ crawlerVideo.setTraceId(traceId);
|
|
|
+ crawlerVideo.setPlatform(platform);
|
|
|
+ if (Objects.equals(platform, "dy_search")) {
|
|
|
+ crawlerVideo.setOutVideoId(jsonObject.getStr("channel_content_id"));
|
|
|
+ crawlerVideo.setVideoTitle(jsonObject.getStr("title"));
|
|
|
+ crawlerVideo.setPublishTime(jsonObject.getDate("publish_timestamp"));
|
|
|
+ List<JSONObject> videoUrlList = jsonObject.get("video_url_list", List.class);
|
|
|
+ if (!CollectionUtils.isEmpty(videoUrlList)) {
|
|
|
+ crawlerVideo.setVideoUrl(videoUrlList.get(0).getStr("video_url"));
|
|
|
+ crawlerVideo.setDuration(videoUrlList.get(0).getInt("video_duration"));
|
|
|
+ }
|
|
|
+ List<JSONObject> imageUrlList = jsonObject.get("image_url_list", List.class);
|
|
|
+ if (!CollectionUtils.isEmpty(imageUrlList)) {
|
|
|
+ crawlerVideo.setCoverUrl(imageUrlList.get(0).getStr("image_url"));
|
|
|
+ }
|
|
|
+ crawlerVideo.setPlayCount(jsonObject.getInt("play_count"));
|
|
|
+ crawlerVideo.setLikeCount(jsonObject.getInt("like_count"));
|
|
|
+ crawlerVideo.setScore(jsonObject.getFloat("score"));
|
|
|
+ if (cheakCrawlerVideo(crawlerVideo)) {
|
|
|
+ crawlerVideoList.add(crawlerVideo);
|
|
|
+ }
|
|
|
+
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ if (Objects.equals(platform, "baidu_search")) {
|
|
|
+ crawlerVideo.setOutVideoId(jsonObject.getStr("id"));
|
|
|
+ crawlerVideo.setVideoTitle(jsonObject.getStr("title"));
|
|
|
+ crawlerVideo.setPublishTime(jsonObject.getDate("publish_timestamp"));
|
|
|
+ crawlerVideo.setVideoUrl(jsonObject.getStr("playurl"));
|
|
|
+ crawlerVideo.setCoverUrl(jsonObject.getStr("poster"));
|
|
|
+ crawlerVideo.setPlayCount(jsonObject.getInt("play_cnt"));
|
|
|
+ crawlerVideo.setLikeCount(jsonObject.getInt("like_count") == null ? 0 : jsonObject.getInt("like_count"));
|
|
|
+ crawlerVideo.setDuration(jsonObject.getInt("duration"));
|
|
|
+ crawlerVideo.setScore(jsonObject.getFloat("score"));
|
|
|
+ if (cheakCrawlerVideo(crawlerVideo)) {
|
|
|
+ crawlerVideoList.add(crawlerVideo);
|
|
|
+ }
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return crawlerVideoList;
|
|
|
+ }
|
|
|
+
|
|
|
+ private boolean cheakCrawlerVideo(CrawlerVideo crawlerVideo) {
|
|
|
+ if (StringUtils.isEmpty(crawlerVideo.getOutVideoId())) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ if (StringUtils.isEmpty(crawlerVideo.getVideoUrl())) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ if (StringUtils.isEmpty(crawlerVideo.getCoverUrl())) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ if (StringUtils.isEmpty(crawlerVideo.getPlatform())) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ if (Objects.isNull(crawlerVideo.getPublishTime())) {
|
|
|
+ crawlerVideo.setPublishTime(new Date());
|
|
|
+ }
|
|
|
+ if (Objects.isNull(crawlerVideo.getDuration())) {
|
|
|
+ crawlerVideo.setDuration(0);
|
|
|
+ }
|
|
|
+ if (Objects.isNull(crawlerVideo.getPlayCount())) {
|
|
|
+ crawlerVideo.setPlayCount(0);
|
|
|
+ }
|
|
|
+ if (Objects.isNull(crawlerVideo.getLikeCount())) {
|
|
|
+ crawlerVideo.setLikeCount(0);
|
|
|
+ }
|
|
|
+ if (Objects.isNull(crawlerVideo.getShareCount())) {
|
|
|
+ crawlerVideo.setShareCount(0);
|
|
|
+ }
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+
|
|
|
+ public List<JSONObject> getVideo(String contentId, LongArticlesText kimiText) {
|
|
|
+
|
|
|
+ List<JSONObject> res = new ArrayList<>();
|
|
|
+ List<JSONObject> kimiSummarys = searchVideo(kimiText.getKimiSummary().substring(0, 15), new ArrayList<>(), "");
|
|
|
+ if (!CollectionUtils.isEmpty(kimiSummarys)) {
|
|
|
+ res.addAll(kimiSummarys);
|
|
|
+ }
|
|
|
+ if (res.size() > 3) {
|
|
|
+ return res;
|
|
|
+ }
|
|
|
+ List<JSONObject> kimiTitles = searchVideo(kimiText.getKimiTitle().substring(0, 15), new ArrayList<>(), "");
|
|
|
+ if (!CollectionUtils.isEmpty(kimiTitles)) {
|
|
|
+ res.addAll(kimiTitles);
|
|
|
+ }
|
|
|
+ if (res.size() > 3) {
|
|
|
+ return res;
|
|
|
+ }
|
|
|
+ String kimiKeys = kimiText.getKimiKeys();
|
|
|
+ JSONArray jsonArray = JSONArray.parseArray(kimiKeys);
|
|
|
+ if (jsonArray == null || jsonArray.isEmpty()) {
|
|
|
+ return res;
|
|
|
+ }
|
|
|
+ for (int i = 0; i < jsonArray.size(); i++) {
|
|
|
+ String key = jsonArray.getString(i);
|
|
|
+ List<JSONObject> keys = searchVideo(key, new ArrayList<>(), "");
|
|
|
+ if (!CollectionUtils.isEmpty(keys)) {
|
|
|
+ res.addAll(keys);
|
|
|
+ }
|
|
|
+ if (res.size() > 3) {
|
|
|
+ return res;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return res;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ public List<JSONObject> searchVideo(String keyword, List<String> words, String traceId) {
|
|
|
+// List<JSONObject> jsonObjects = DouyinSearch.douyinSearch(keyword, words, traceId);
|
|
|
+// if (!CollectionUtils.isEmpty(jsonObjects)) {
|
|
|
+// for (JSONObject jsonObject : jsonObjects) {
|
|
|
+// jsonObject.put("platform", "dy_search");
|
|
|
+// }
|
|
|
+// }
|
|
|
+// if (jsonObjects.size() >= 3) {
|
|
|
+// return jsonObjects;
|
|
|
+// }
|
|
|
+// List<JSONObject> jsonObjects1 = HkspSearch.hkspSearch(keyword, words, traceId);
|
|
|
+// if (CollectionUtils.isEmpty(jsonObjects1)) {
|
|
|
+// return jsonObjects;
|
|
|
+// }
|
|
|
+// for (JSONObject jsonObject : jsonObjects1) {
|
|
|
+// jsonObject.put("platform", "baidu_search");
|
|
|
+// }
|
|
|
+// if (CollectionUtils.isEmpty(jsonObjects)) {
|
|
|
+// return jsonObjects1;
|
|
|
+// }
|
|
|
+// jsonObjects.addAll(jsonObjects1);
|
|
|
+// return jsonObjects;
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+
|
|
|
+}
|