Procházet zdrojové kódy

更新——http2.0下载方法,针对福气旺系列视频

罗俊辉 před 1 rokem
rodič
revize
923096aaa4

+ 6 - 0
etl-core/src/main/java/com/tzld/crawler/etl/service/impl/EtlServiceImpl.java

@@ -429,6 +429,12 @@ public class EtlServiceImpl implements EtlService {
                                         , !CollectionUtils.isEmpty(proxyPlatform) && proxyPlatform.contains(platfrm),
                                         proxyInfo);
                             }
+                            else if (fileUrl.contains("znl-video-bos.cdn.bcebos.com")) {
+                                FileUtils.downloadForFQW(fileUrl, fpath,
+                                        !CollectionUtils.isEmpty(randomUaPlatform) && randomUaPlatform.contains(platfrm)
+                                        , !CollectionUtils.isEmpty(proxyPlatform) && proxyPlatform.contains(platfrm),
+                                        proxyInfo);
+                            }
                             else {
                                 String tempUrl = fileUrl;
                                 if (platform.equals("zhufuzhonglaonianrenruyijixiang")) {

+ 82 - 7
etl-core/src/main/java/com/tzld/crawler/etl/util/FileUtils.java

@@ -25,17 +25,28 @@
 package com.tzld.crawler.etl.util;
 
 
+import com.aliyun.apache.hc.client5.http.classic.HttpClient;
+import com.aliyun.apache.hc.core5.http.HttpRequest;
 import com.tzld.crawler.etl.common.FakeUserAgent;
 import com.tzld.crawler.etl.common.enums.ExceptionEnum;
 import com.tzld.crawler.etl.common.exception.CommonException;
+import org.apache.http.HttpHost;
+import org.apache.http.HttpResponse;
+import org.apache.http.auth.AuthScope;
+import org.apache.http.auth.UsernamePasswordCredentials;
+import org.apache.http.client.CredentialsProvider;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.impl.client.BasicCredentialsProvider;
+import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.http.impl.client.HttpClients;
+import org.apache.http.util.EntityUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.BufferedReader;
-import java.io.FileOutputStream;
-import java.io.InputStream;
-import java.io.InputStreamReader;
+import java.io.*;
 import java.net.*;
+import java.nio.file.Files;
+import java.nio.file.Paths;
 import java.util.Map;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@@ -196,6 +207,70 @@ public class FileUtils {
         log.info("downloaded successfully [{}] to [{}]", fileUrl, filePath);
     }
 
+    public static void downloadForFQW(String fileUrl, String filePath, boolean useUa, boolean useProxy, Map<String, String> proxyInfo) throws Exception {
+        log.info("begin download [{}] to [{}] useUa [{}] useProxy [{}] proxyInfo[{}]", fileUrl, filePath, useUa, useProxy, proxyInfo);
+        SSLUtils.ignoreSsl();
+        URL url = new URL(fileUrl);
+        HttpHost proxy = new HttpHost("q796.kdltps.com", 15818);
+        CredentialsProvider credsProvider = new BasicCredentialsProvider();
+        credsProvider.setCredentials(
+                new AuthScope(proxy),
+                new UsernamePasswordCredentials("t17772369458618", "5zqcjkmy")
+        );
+
+        try ( CloseableHttpClient client = HttpClients.custom()
+                .setDefaultCredentialsProvider(credsProvider)
+                .setProxy(proxy)
+                .build()) {
+            HttpGet request = new HttpGet(String.valueOf(url));
+            request.setHeader("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.6(0x13080610) XWEB/1156");
+            request.setHeader("Accept-Encoding", "identity;q=1, *;q=0");
+            request.setHeader("Accept", "*/*");
+            request.setHeader("Sec-Fetch-Site", "cross-site");
+            request.setHeader("Sec-Fetch-Mode", "no-cors");
+            request.setHeader("Sec-Fetch-Dest", "video");
+            request.setHeader("Referer", "https://servicewechat.com/wxa1431c6e7acdd32d/2/page-frame.html");
+            request.setHeader("Accept-Language", "en-US,en;q=0.9");
+            request.setHeader("Range", "bytes=0-");
+
+            int attempt = 0;
+            boolean success = false;
+
+            while (attempt < 6 && !success) {
+                try {
+                    // 执行请求
+                    HttpResponse response = client.execute(request);
+                    if (response.getStatusLine().getStatusCode() == 206) {
+                        try (InputStream is = response.getEntity().getContent();
+                             OutputStream os = Files.newOutputStream(Paths.get(filePath))) { // 指定视频输出文件名
+                            byte[] buffer = new byte[1024];
+                            int bytesRead;
+                            // 读取响应内容并写入文件
+                            while ((bytesRead = is.read(buffer)) != -1) {
+                                os.write(buffer, 0, bytesRead);
+                            }
+                            System.out.println("Video download complete.");
+                            success = true;
+                        }
+                        catch (Exception e){
+                            System.out.println("Attempt #" + (attempt + 1) + " failed with exception: " + e.getMessage());
+                        }
+                        finally {
+                            attempt++;
+                        }
+                        if (!success && attempt < 6)
+                        {
+                            Thread.sleep(3000);
+                        }
+                        }
+                    }
+                catch (Exception e) {
+                    e.printStackTrace();
+                }
+            }
+        }
+    }
+
     public static void downloadForGZH(String fileUrl, String filePath, boolean useUa, boolean useProxy, Map<String, String> proxyInfo) throws Exception {
         log.info("begin download [{}] to [{}] useUa [{}] useProxy [{}] proxyInfo[{}]", fileUrl, filePath, useUa, useProxy, proxyInfo);
         SSLUtils.ignoreSsl();
@@ -253,9 +328,9 @@ public class FileUtils {
     public static void main(String[] args) throws Exception {
         // try {
 //        System.out.println("https://api-hl.huoshan.com/hotsoon/item/video/_source/?video_id=v0300fg10000ckmbrbbc77uc3nq19840&line=0&app_id=0&vquality=normal&watermark=0&long_video=0&sf=4&ts=1697528496&item_id=7290410334844718376".replace("https://api-hl.huoshan.com", "http://api-hl.huoshan.com"));
-        downloadForXG(
-                "https://v9-xg-web-pc.ixigua.com/bf37bd4847ea0ef73c1419c3b7def5cd/654a30d9/video/tos/cn/tos-cn-ve-0026/oIBPpenxRaObASeRJ7XGCi5DAM78gBlGARgAUd/?a=1768&ch=0&cr=7&dr=0&er=0&cd=0%7C0%7C0%7C1&cv=1&br=196&bt=196&cs=4&ds=3&eid=1025&ft=_z7ehvvBQ3AUq8yq8Z.wNnOYZlcg6EbK2bLyA0wpuZmka&mime_type=video_mp4&qs=0&rc=ZGU5PDozZzRlOzwzOzg5Z0BpajprZmg6ZmhobzMzNGQzM0AyNmFiM2IxXmExLjQ1Yy5jYSM2MGBzcjRfLS9gLS1kLi9zcw%3D%3D&btag=e00028000&dy_q=1699357188&feature_id=e2624171dedb27d48fb2006f0f0a26ec&l=20231107193948643DF7B13B8779C9525F&__vid=v0201ag10000cl2e0ebc77uevcch379g",
-                "/Users/luojunhui/Downloads/" + System.currentTimeMillis(),
+        downloadForFQW(
+                "https://znl-video-bos.cdn.bcebos.com/e368801a814c548e443835086d37caaf/65e93632/video/20240306/820ee1498e3ed2a59d37aed54d39ae95_1.mp4",
+                "/Users/luojunhui/Downloads/" + System.currentTimeMillis() + ".mp4",
                 true,
                 false,
                 null