Browse Source

西瓜下载优化

罗俊辉 1 năm trước cách đây
mục cha
commit
0eb3b28552

+ 2 - 2
etl-core/src/main/java/com/tzld/crawler/etl/service/impl/EtlServiceImpl.java

@@ -425,14 +425,14 @@ public class EtlServiceImpl implements EtlService {
                             }
                             // 西瓜视频系列 1
                             else if (fileUrl.contains("v9-xg-web-pc.ixigua.com")) {
-                                FileUtils.downloadForXG(fileUrl, fpath,
+                                FileUtils.downloadForXGV9(fileUrl, fpath,
                                         !CollectionUtils.isEmpty(randomUaPlatform) && randomUaPlatform.contains(platfrm)
                                         , !CollectionUtils.isEmpty(proxyPlatform) && proxyPlatform.contains(platfrm),
                                         proxyInfo);
                             }
                             // 西瓜视频系列 2
                             else if (fileUrl.contains("v3-xg-web-pc.ixigua.com")) {
-                                FileUtils.downloadForXG(fileUrl, fpath,
+                                FileUtils.downloadForXGV3(fileUrl, fpath,
                                         !CollectionUtils.isEmpty(randomUaPlatform) && randomUaPlatform.contains(platfrm)
                                         , !CollectionUtils.isEmpty(proxyPlatform) && proxyPlatform.contains(platfrm),
                                         proxyInfo);

+ 53 - 2
etl-core/src/main/java/com/tzld/crawler/etl/util/FileUtils.java

@@ -156,7 +156,7 @@ public class FileUtils {
         log.info("downloaded successfully [{}] to [{}]", fileUrl, filePath);
     }
 
-    public static void downloadForXG(String fileUrl, String filePath, boolean useUa, boolean useProxy, Map<String, String> proxyInfo) throws Exception {
+    public static void downloadForXGV9(String fileUrl, String filePath, boolean useUa, boolean useProxy, Map<String, String> proxyInfo) throws Exception {
         log.info("begin download [{}] to [{}] useUa [{}] useProxy [{}] proxyInfo[{}]", fileUrl, filePath, useUa, useProxy, proxyInfo);
         SSLUtils.ignoreSsl();
         URL url = new URL(fileUrl);
@@ -207,6 +207,57 @@ public class FileUtils {
         log.info("downloaded successfully [{}] to [{}]", fileUrl, filePath);
     }
 
+    public static void downloadForXGV3(String fileUrl, String filePath, boolean useUa, boolean useProxy, Map<String, String> proxyInfo) throws Exception {
+        log.info("begin download [{}] to [{}] useUa [{}] useProxy [{}] proxyInfo[{}]", fileUrl, filePath, useUa, useProxy, proxyInfo);
+        SSLUtils.ignoreSsl();
+        URL url = new URL(fileUrl);
+        HttpURLConnection conn;
+        if (useProxy) {
+            String proxyUrl = proxyInfo.getOrDefault("url", "");
+            int port = Integer.parseInt(proxyInfo.getOrDefault("port", "0"));
+            String username = proxyInfo.getOrDefault("username", "");
+            String password = proxyInfo.getOrDefault("password", "");
+            // 创建代理服务的地址和端口
+            Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(proxyUrl, port));
+            Authenticator authenticator = new Authenticator() {
+                @Override
+                public PasswordAuthentication getPasswordAuthentication() {
+                    return (new PasswordAuthentication(username, password.toCharArray()));
+                }
+            };
+            Authenticator.setDefault(authenticator);
+            conn = (HttpURLConnection) url.openConnection(proxy);
+        } else {
+            conn = (HttpURLConnection) url.openConnection();
+        }
+        if (useUa) {
+            conn.setRequestProperty("User-Agent", FakeUserAgent.getRandomUserAgent());
+        }
+        conn.setRequestProperty("Accept", "*/*");
+        conn.setRequestProperty("Accept-Language", "zh-CN,zh;q=0.9");
+        conn.setRequestProperty("Host", "v3-xg-web-pc.ixigua.com");
+        conn.setRequestProperty("Origin", "https://www.ixigua.com/");
+        conn.setRequestProperty("Referer", "https://www.ixigua.com/");
+
+        if (conn.getResponseCode() == HttpURLConnection.HTTP_FORBIDDEN) {
+            throw new CommonException(ExceptionEnum.URL_FORBIDDEN);
+        }
+        conn.setConnectTimeout(5000);
+        conn.setReadTimeout(5000);
+        log.info("download file size is {} of url [{}]", formatFileSize(conn.getContentLength()), fileUrl);
+
+        InputStream inputStream = conn.getInputStream();
+        FileOutputStream outputStream = new FileOutputStream(filePath);
+        byte[] buffer = new byte[4096];
+        int len;
+        while ((len = inputStream.read(buffer)) != -1) {
+            outputStream.write(buffer, 0, len);
+        }
+        inputStream.close();
+        outputStream.close();
+        log.info("downloaded successfully [{}] to [{}]", fileUrl, filePath);
+    }
+
     public static void downloadForFQW(String fileUrl, String filePath, boolean useUa, boolean useProxy, Map<String, String> proxyInfo) throws Exception {
         log.info("begin download [{}] to [{}] useUa [{}] useProxy [{}] proxyInfo[{}]", fileUrl, filePath, useUa, useProxy, proxyInfo);
         SSLUtils.ignoreSsl();
@@ -345,7 +396,7 @@ public class FileUtils {
         // try {
 //        System.out.println("https://api-hl.huoshan.com/hotsoon/item/video/_source/?video_id=v0300fg10000ckmbrbbc77uc3nq19840&line=0&app_id=0&vquality=normal&watermark=0&long_video=0&sf=4&ts=1697528496&item_id=7290410334844718376".replace("https://api-hl.huoshan.com", "http://api-hl.huoshan.com"));
         downloadForGZH(
-                "https://mpvideo.qpic.cn/0bc3siadqaaa4uamj42j4nsvbewdhcjaaoaa.f10002.mp4?dis_k=119b904f4b94954dd215d29d963d323d&dis_t=1710471237&play_scene=10120&auth_info=D7qugVRFbFm0+6STCiFrOW5pTjZkRTFMSnhkWFNIJGA2W2pVKhgDO2QzR0wuNlEz&auth_key=ccc54f3579c8abf94fd7ebd601f697d1&vid=wxv_3183565730357952514&format_id=10002&support_redirect=0&mmversion=false",
+                "http://apd-vlive.apdcdn.tc.qq.com/vhot2.qqvideo.tc.qq.com/AnS2yvseXsk6EzDFjAukGDVret6HO8htGN6D3R--6Z_0/B_JxNyiJmktHRgresXhfyMehK2KqMhB6_cZg_nN9wLevBm9wYhzoUZmFJm48dD8xAW/svp_50069/q33463ixnac.mp4?vkey=CF0A6609E8410CF2E8FD4629FA349F4B92CD682DD396496480644842EFE9754BB13B747CB3B39A45E80CAB3819EB88D6FE13A4674940BFBD0F81B53C6BC0FA1AFF8B922BE1A63AB8B3B6608239131B45452F92CB928C5248D0BA834FB38FC3DA63DABF6C3DF3E6AC9EA8146341424DC19EE2D2E4D40458D6",
                 "/Users/luojunhui/Downloads/" + System.currentTimeMillis() + ".mp4",
                 true,
                 false,