Explorar o código

公众号增加header

丁云鹏 hai 1 ano
pai
achega
22fc86f8c5

+ 11 - 4
etl-core/src/main/java/com/tzld/crawler/etl/service/impl/EtlServiceImpl.java

@@ -417,10 +417,17 @@ public class EtlServiceImpl implements EtlService {
                         String platfrm = param.getOrDefault("platform", "");
                         try {
                             // 下载文件
-                            FileUtils.download(fileUrl, fpath,
-                                    !CollectionUtils.isEmpty(randomUaPlatform) && randomUaPlatform.contains(platfrm)
-                                    , !CollectionUtils.isEmpty(proxyPlatform) && proxyPlatform.contains(platfrm),
-                                    proxyInfo);
+                            if (platform.equals("gongzhonghao")) {
+                                FileUtils.downloadForGZH(fileUrl, fpath,
+                                        !CollectionUtils.isEmpty(randomUaPlatform) && randomUaPlatform.contains(platfrm)
+                                        , !CollectionUtils.isEmpty(proxyPlatform) && proxyPlatform.contains(platfrm),
+                                        proxyInfo);
+                            } else {
+                                FileUtils.download(fileUrl, fpath,
+                                        !CollectionUtils.isEmpty(randomUaPlatform) && randomUaPlatform.contains(platfrm)
+                                        , !CollectionUtils.isEmpty(proxyPlatform) && proxyPlatform.contains(platfrm),
+                                        proxyInfo);
+                            }
                             return false;
                         } catch (CommonException e) {
                             if (e.getCode() == ExceptionEnum.URL_FORBIDDEN.getCode()) {

+ 55 - 2
etl-core/src/main/java/com/tzld/crawler/etl/util/FileUtils.java

@@ -90,7 +90,7 @@ public class FileUtils {
     }
 
     public static void download(String fileUrl, String filePath, boolean useUa, boolean useProxy) throws Exception {
-        download(fileUrl, filePath, false, false, null);
+        downloadForGZH(fileUrl, filePath, false, false, null);
     }
 
     public static void download(String fileUrl, String filePath, boolean useUa, boolean useProxy, Map<String, String> proxyInfo) throws Exception {
@@ -138,9 +138,62 @@ public class FileUtils {
         log.info("downloaded successfully [{}] to [{}]", fileUrl, filePath);
     }
 
+    public static void downloadForGZH(String fileUrl, String filePath, boolean useUa, boolean useProxy, Map<String, String> proxyInfo) throws Exception {
+        log.info("begin download [{}] to [{}] useUa [{}] useProxy [{}] proxyInfo[{}]", fileUrl, filePath, useUa, useProxy, proxyInfo);
+        URL url = new URL(fileUrl);
+        HttpURLConnection conn;
+        if (useProxy) {
+            String proxyUrl = proxyInfo.getOrDefault("url", "");
+            int port = Integer.parseInt(proxyInfo.getOrDefault("port", "0"));
+            String username = proxyInfo.getOrDefault("username", "");
+            String password = proxyInfo.getOrDefault("password", "");
+            // 创建代理服务的地址和端口
+            Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(proxyUrl, port));
+            Authenticator authenticator = new Authenticator() {
+                @Override
+                public PasswordAuthentication getPasswordAuthentication() {
+                    return (new PasswordAuthentication(username, password.toCharArray()));
+                }
+            };
+            Authenticator.setDefault(authenticator);
+            conn = (HttpURLConnection) url.openConnection(proxy);
+        } else {
+            conn = (HttpURLConnection) url.openConnection();
+        }
+
+        if (useUa) {
+            conn.setRequestProperty("User-Agent", FakeUserAgent.getRandomUserAgent());
+        }
+
+        conn.setRequestProperty("Accept", "*/*");
+        conn.setRequestProperty("Accept-Encoding", "identity;q=1, *;q=0");
+        conn.setRequestProperty("Accept-Language", "zh-CN,zh;q=0.9");
+        conn.setRequestProperty("Host", "mpvideo.qpic.cn");
+        conn.setRequestProperty("Origin", "https://mp.weixin.qq.com");
+        conn.setRequestProperty("Referer", "https://mp.weixin.qq.com");
+
+        if (conn.getResponseCode() == HttpURLConnection.HTTP_FORBIDDEN) {
+            throw new CommonException(ExceptionEnum.URL_FORBIDDEN);
+        }
+        conn.setConnectTimeout(5000);
+        conn.setReadTimeout(5000);
+        log.info("download file size is {} of url [{}]", formatFileSize(conn.getContentLength()), fileUrl);
+
+        InputStream inputStream = conn.getInputStream();
+        FileOutputStream outputStream = new FileOutputStream(filePath);
+        byte[] buffer = new byte[4096];
+        int len;
+        while ((len = inputStream.read(buffer)) != -1) {
+            outputStream.write(buffer, 0, len);
+        }
+        inputStream.close();
+        outputStream.close();
+        log.info("downloaded successfully [{}] to [{}]", fileUrl, filePath);
+    }
+
     public static void main(String[] args) throws Exception {
         // try {
-        download("http://mpvideo.qpic.cn/0bc3zyaagaaaieajowybojsvbtwdaphaaaya.f10002.mp4?dis_k=afa8996b6f4aac67ff2d6b3b7abaa4b4&dis_t=1694751571&play_scene=10120&auth_info=WsS8pdtVOTQL3MuqxRszQlg3FBNoCCQ4PQATPSseNWV8Sz4/BF1kW2kwH14QOSR4Ug==&auth_key=d33c33aa66ca8bd8a05204709a5f92b1&vid=wxv_3103954619094892547&format_id=10002&support_redirect=0&mmversion=false", "/Users/ehlxr/Downloads/" + System.currentTimeMillis(), true, true);
+        download("https://mpvideo.qpic.cn/0b2eamabmaaaiaakmkq3jfsvaa6dcybqafqa.f10002.mp4?dis_k=af0809430b6148733c161cb94368bd30&dis_t=1696833857&play_scene=10120&auth_info=LdSvh8hZbHlJiPWF6A9dSW1HMR9TdjtFUl17LX1YNxkLblZiChITAzhoFzJxNFFq&auth_key=5fca6f1024c9f40e9f6ad50aad0e437e&vid=wxv_3132660909191184387&format_id=10002&support_redirect=0&mmversion=false", "/Users/dingyunpeng/Downloads/" + System.currentTimeMillis(), true, true);
         //
         // } catch (Exception e) {
         //     e.printStackTrace();