Browse Source

增加视频下载

xueyiming 6 months ago
parent
commit
31ad68c3a7

+ 24 - 0
long-article-server/src/main/java/com/tzld/piaoquan/longarticle/service/local/impl/CrawlerVideoServiceImpl.java

@@ -3,12 +3,15 @@ package com.tzld.piaoquan.longarticle.service.local.impl;
 import com.alibaba.fastjson.JSON;
 import com.alibaba.fastjson.JSONArray;
 import com.alibaba.fastjson.JSONObject;
+import com.tzld.piaoquan.longarticle.dao.mapper.CrawlerVideoMapper;
 import com.tzld.piaoquan.longarticle.model.po.CrawlerVideo;
 import com.tzld.piaoquan.longarticle.model.po.LongArticlesText;
+import com.tzld.piaoquan.longarticle.service.local.KimiService;
 import com.tzld.piaoquan.longarticle.utils.other.DouyinSearch;
 import com.tzld.piaoquan.longarticle.utils.other.HkspSearch;
 import com.tzld.piaoquan.longarticle.utils.other.NlpUtils;
 import org.apache.commons.lang3.StringUtils;
+import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.stereotype.Service;
 import org.springframework.util.CollectionUtils;
 
@@ -20,6 +23,12 @@ import java.util.stream.Collectors;
 @Service
 public class CrawlerVideoServiceImpl {
 
+    @Autowired
+    private CrawlerVideoMapper crawlerVideoMapper;
+
+    @Autowired
+    private KimiService kimiService;
+
     private static final String default_user_id = "69637498";
 
     private static final Double NLP_SIMILARITY_THRESHOLD = 0.45;
@@ -37,6 +46,16 @@ public class CrawlerVideoServiceImpl {
         add("习");
     }};
 
+    public void addCrawlerVideo(String contentId) {
+        LongArticlesText kimiText = kimiService.getKimiText(contentId);
+        List<CrawlerVideo> crawlerVideoList = searchVideosFromWeb(kimiText);
+        if (!CollectionUtils.isEmpty(crawlerVideoList)) {
+            for (CrawlerVideo crawlerVideo : crawlerVideoList) {
+                crawlerVideoMapper.insertSelective(crawlerVideo);
+            }
+        }
+    }
+
 
     public List<CrawlerVideo> searchVideosFromWeb(LongArticlesText longArticlesText) {
         String articleSummary = longArticlesText.getKimiSummary().substring(0, 15);
@@ -96,6 +115,11 @@ public class CrawlerVideoServiceImpl {
         return crawlerVideoList;
     }
 
+
+    public void downloadVideos(CrawlerVideo crawlerVideo) {
+
+    }
+
     public List<Float> getTitleSimilarityWithNlp(String oriTitle, List<String> titleList) {
         List<Float> baseScores = NlpUtils.baseNlpTitleSimilarity(oriTitle, titleList);
         if (!CollectionUtils.isEmpty(baseScores)) {

+ 1 - 1
long-article-server/src/main/java/com/tzld/piaoquan/longarticle/service/local/impl/KimiServiceImpl.java

@@ -19,7 +19,7 @@ public class KimiServiceImpl implements KimiService {
     private static final Integer SAFE_SCORE = 7;
 
     @Autowired
-    LongArticlesTextMapper longArticlesTextMapper;
+    private LongArticlesTextMapper longArticlesTextMapper;
 
     @Override
     public LongArticlesText getKimiText(String contentId) {

+ 110 - 0
long-article-server/src/main/java/com/tzld/piaoquan/longarticle/utils/other/VideoDownloader1.java

@@ -0,0 +1,110 @@
+package com.tzld.piaoquan.longarticle.utils.other;
+
+import cn.hutool.http.HttpRequest;
+import cn.hutool.http.HttpUtil;
+import cn.hutool.http.HttpResponse;
+import com.alibaba.fastjson.JSONObject;
+
+import java.io.File;
+import java.util.HashMap;
+import java.util.Map;
+
+public class VideoDownloader1 {
+    public static void main(String[] args) {
+        // 视频下载链接
+        String videoUrl = "https://www.douyin.com/aweme/v1/play/?video_id=v0200fg10000ctj1failjht1sbj70020&ratio=1080p&line=0";
+
+//        // 设置代理
+//        String proxyHost = "http://t11983523373311:mtuhdr2z@l901.kdltps.com:15818/";
+//        int proxyPort = 15818; // 代理端口
+//
+//        // 创建代理配置
+//        System.setProperty("http.proxyHost", proxyHost);
+//        System.setProperty("http.proxyPort", String.valueOf(proxyPort));
+//        System.setProperty("https.proxyHost", proxyHost);
+//        System.setProperty("https.proxyPort", String.valueOf(proxyPort));
+
+        // 下载视频
+        Map<String, String> map = requestHeader("dy_search", videoUrl, "video");
+        HttpResponse response = HttpUtil.createGet(videoUrl)
+                .addHeaders(map)
+                .execute();
+        System.out.println(response);
+        // 检查响应状态
+        if (response.getStatus() == 200) {
+            // 保存文件到本地
+            File videoFile = new File("/Users/shimeng/Desktop/videos/downloaded_video.mp4");
+            HttpUtil.downloadFile(videoUrl, videoFile);
+            System.out.println("视频下载成功,保存为:" + videoFile.getAbsolutePath());
+        } else {
+            System.out.println("下载失败,状态码:" + response.getStatus());
+        }
+    }
+
+    public static Map<String, String> requestHeader(String platform, String url, String downloadType) {
+        Map<String, String> headers = new HashMap<>();
+
+        switch (platform) {
+            case "xg_search":
+                if (url.contains("v9-xg-web-pc.ixigua.com")) {
+                    headers.put("Accept", "*/*");
+                    headers.put("Accept-Language", "zh-CN,zh;q=0.9");
+                    headers.put("Host", "v9-xg-web-pc.ixigua.com");
+                    headers.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36");
+                    headers.put("Origin", "https://www.ixigua.com/");
+                    headers.put("Referer", "https://www.ixigua.com/");
+                } else if (url.contains("v3-xg-web-pc.ixigua.com")) {
+                    headers.put("Accept", "*/*");
+                    headers.put("Accept-Language", "zh-CN,zh;q=0.9");
+                    headers.put("Host", "v3-xg-web-pc.ixigua.com");
+                    headers.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36");
+                    headers.put("Origin", "https://www.ixigua.com/");
+                    headers.put("Referer", "https://www.ixigua.com/");
+                } else if ("cover".equals(downloadType)) {
+                    headers.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7");
+                    headers.put("Accept-Language", "en,zh;q=0.9,zh-CN;q=0.8");
+                    headers.put("Cache-Control", "max-age=0");
+                    headers.put("Proxy-Connection", "keep-alive");
+                    headers.put("Upgrade-Insecure-Requests", "1");
+                    headers.put("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36");
+                } else {
+                    headers.put("Accept", "*/*");
+                    headers.put("Accept-Language", "zh-CN,zh;q=0.9");
+                    headers.put("Host", "v3-xg-web-pc.ixigua.com");
+                    headers.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36");
+                    headers.put("Origin", "https://www.ixigua.com/");
+                    headers.put("Referer", "https://www.ixigua.com/");
+                }
+                break;
+
+            case "baidu_search":
+                headers.put("Accept", "*/*");
+                headers.put("Accept-Language", "zh-CN,zh;q=0.9");
+                headers.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36");
+                break;
+
+            case "wx_search":
+                headers.put("Accept", "*/*");
+                headers.put("Accept-Language", "zh-CN,zh;q=0.9");
+                headers.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36");
+                headers.put("Origin", "https://mp.weixin.qq.com");
+                headers.put("Referer", "https://mp.weixin.qq.com");
+                break;
+
+            case "dy_search":
+                headers.put("accept", "*/*");
+                headers.put("accept-language", "en,zh;q=0.9,zh-CN;q=0.8");
+                headers.put("priority", "i");
+                headers.put("range", "bytes=0-");
+                headers.put("referer", "https://v11-coldf.douyinvod.com/");
+                headers.put("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36");
+                break;
+
+            default:
+                // 其他平台的默认处理
+                break;
+        }
+        return headers;
+    }
+
+}