浏览代码

File download Add User-Agent header

ehlxr 1 年之前
父节点
当前提交
6b89d2e2be

+ 6 - 2
etl-core/src/main/java/com/tzld/crawler/etl/service/impl/EtlServiceImpl.java

@@ -124,7 +124,10 @@ public class EtlServiceImpl implements EtlService {
     // @Value("${youget.download.platform:}")
     // @Value("${youget.download.platform:}")
     // private List<String> useYougetPlatfrom;
     // private List<String> useYougetPlatfrom;
     @Value("${random.ua.platform:}")
     @Value("${random.ua.platform:}")
-    private List<String> randomUaPlatfom;
+    private List<String> randomUaPlatform;
+
+    @Value("${proxy.platform:}")
+    private List<String> proxyPlatform;
 
 
     private Executor pool;
     private Executor pool;
 
 
@@ -411,7 +414,8 @@ public class EtlServiceImpl implements EtlService {
                         try {
                         try {
                             // 下载文件
                             // 下载文件
                             FileUtils.download(fileUrl, fpath,
                             FileUtils.download(fileUrl, fpath,
-                                    !CollectionUtils.isEmpty(randomUaPlatfom) && randomUaPlatfom.contains(platfrm));
+                                    !CollectionUtils.isEmpty(randomUaPlatform) && randomUaPlatform.contains(platfrm)
+                                    , !CollectionUtils.isEmpty(proxyPlatform) && proxyPlatform.contains(platfrm));
                             return false;
                             return false;
                         } catch (CommonException e) {
                         } catch (CommonException e) {
                             if (e.getCode() == ExceptionEnum.URL_FORBIDDEN.getCode()) {
                             if (e.getCode() == ExceptionEnum.URL_FORBIDDEN.getCode()) {

+ 26 - 8
etl-core/src/main/java/com/tzld/crawler/etl/util/FileUtils.java

@@ -34,8 +34,7 @@ import java.io.BufferedReader;
 import java.io.FileOutputStream;
 import java.io.FileOutputStream;
 import java.io.InputStream;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.InputStreamReader;
-import java.net.HttpURLConnection;
-import java.net.URL;
+import java.net.*;
 import java.util.Random;
 import java.util.Random;
 import java.util.regex.Matcher;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import java.util.regex.Pattern;
@@ -45,7 +44,8 @@ import java.util.regex.Pattern;
  * @since 2023-06-09 15:54.
  * @since 2023-06-09 15:54.
  */
  */
 public class FileUtils {
 public class FileUtils {
-    private FileUtils() {}
+    private FileUtils() {
+    }
 
 
     private static final Logger log = LoggerFactory.getLogger(FileUtils.class);
     private static final Logger log = LoggerFactory.getLogger(FileUtils.class);
 
 
@@ -161,13 +161,31 @@ public class FileUtils {
     }
     }
 
 
     public static void download(String fileUrl, String filePath) throws Exception {
     public static void download(String fileUrl, String filePath) throws Exception {
-        download(fileUrl, filePath, false);
+        download(fileUrl, filePath, false, false);
     }
     }
 
 
-    public static void download(String fileUrl, String filePath, boolean useUa) throws Exception {
-        log.info("begin download [{}] to [{}]", fileUrl, filePath);
+    public static void download(String fileUrl, String filePath, boolean useUa, boolean useProxy) throws Exception {
+        log.info("begin download [{}] to [{}] useUa [{}] useProxy [{}]", fileUrl, filePath, useUa, useProxy);
         URL url = new URL(fileUrl);
         URL url = new URL(fileUrl);
-        HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+        HttpURLConnection conn;
+        if (useProxy) {
+            String tunnel = "q796.kdltps.com";
+            int port = 15818;
+            String username = "t17772369458618";
+            String password = "5zqcjkmy";
+            // 创建代理服务的地址和端口
+            Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(tunnel, port));
+            Authenticator authenticator = new Authenticator() {
+                public PasswordAuthentication getPasswordAuthentication() {
+                    return (new PasswordAuthentication(username, password.toCharArray()));
+                }
+            };
+            Authenticator.setDefault(authenticator);
+            conn = (HttpURLConnection) url.openConnection(proxy);
+        } else {
+            conn = (HttpURLConnection) url.openConnection();
+        }
+
         if (useUa) {
         if (useUa) {
             conn.setRequestProperty("User-Agent", getRandomUserAgent());
             conn.setRequestProperty("User-Agent", getRandomUserAgent());
         }
         }
@@ -192,7 +210,7 @@ public class FileUtils {
 
 
     public static void main(String[] args) throws Exception {
     public static void main(String[] args) throws Exception {
         // try {
         // try {
-        download("http://mpvideo.qpic.cn/0bc3zyaagaaaieajowybojsvbtwdaphaaaya.f10002.mp4?dis_k=afa8996b6f4aac67ff2d6b3b7abaa4b4&dis_t=1694751571&play_scene=10120&auth_info=WsS8pdtVOTQL3MuqxRszQlg3FBNoCCQ4PQATPSseNWV8Sz4/BF1kW2kwH14QOSR4Ug==&auth_key=d33c33aa66ca8bd8a05204709a5f92b1&vid=wxv_3103954619094892547&format_id=10002&support_redirect=0&mmversion=false", "/Users/ehlxr/Downloads/" + System.currentTimeMillis(), true);
+        download("http://mpvideo.qpic.cn/0bc3zyaagaaaieajowybojsvbtwdaphaaaya.f10002.mp4?dis_k=afa8996b6f4aac67ff2d6b3b7abaa4b4&dis_t=1694751571&play_scene=10120&auth_info=WsS8pdtVOTQL3MuqxRszQlg3FBNoCCQ4PQATPSseNWV8Sz4/BF1kW2kwH14QOSR4Ug==&auth_key=d33c33aa66ca8bd8a05204709a5f92b1&vid=wxv_3103954619094892547&format_id=10002&support_redirect=0&mmversion=false", "/Users/ehlxr/Downloads/" + System.currentTimeMillis(), true, true);
         //
         //
         // } catch (Exception e) {
         // } catch (Exception e) {
         //     e.printStackTrace();
         //     e.printStackTrace();