|
@@ -25,17 +25,28 @@
|
|
|
package com.tzld.crawler.etl.util;
|
|
|
|
|
|
|
|
|
+import com.aliyun.apache.hc.client5.http.classic.HttpClient;
|
|
|
+import com.aliyun.apache.hc.core5.http.HttpRequest;
|
|
|
import com.tzld.crawler.etl.common.FakeUserAgent;
|
|
|
import com.tzld.crawler.etl.common.enums.ExceptionEnum;
|
|
|
import com.tzld.crawler.etl.common.exception.CommonException;
|
|
|
+import org.apache.http.HttpHost;
|
|
|
+import org.apache.http.HttpResponse;
|
|
|
+import org.apache.http.auth.AuthScope;
|
|
|
+import org.apache.http.auth.UsernamePasswordCredentials;
|
|
|
+import org.apache.http.client.CredentialsProvider;
|
|
|
+import org.apache.http.client.methods.HttpGet;
|
|
|
+import org.apache.http.impl.client.BasicCredentialsProvider;
|
|
|
+import org.apache.http.impl.client.CloseableHttpClient;
|
|
|
+import org.apache.http.impl.client.HttpClients;
|
|
|
+import org.apache.http.util.EntityUtils;
|
|
|
import org.slf4j.Logger;
|
|
|
import org.slf4j.LoggerFactory;
|
|
|
|
|
|
-import java.io.BufferedReader;
|
|
|
-import java.io.FileOutputStream;
|
|
|
-import java.io.InputStream;
|
|
|
-import java.io.InputStreamReader;
|
|
|
+import java.io.*;
|
|
|
import java.net.*;
|
|
|
+import java.nio.file.Files;
|
|
|
+import java.nio.file.Paths;
|
|
|
import java.util.Map;
|
|
|
import java.util.regex.Matcher;
|
|
|
import java.util.regex.Pattern;
|
|
@@ -196,6 +207,70 @@ public class FileUtils {
|
|
|
log.info("downloaded successfully [{}] to [{}]", fileUrl, filePath);
|
|
|
}
|
|
|
|
|
|
+ public static void downloadForFQW(String fileUrl, String filePath, boolean useUa, boolean useProxy, Map<String, String> proxyInfo) throws Exception {
|
|
|
+ log.info("begin download [{}] to [{}] useUa [{}] useProxy [{}] proxyInfo[{}]", fileUrl, filePath, useUa, useProxy, proxyInfo);
|
|
|
+ SSLUtils.ignoreSsl();
|
|
|
+ URL url = new URL(fileUrl);
|
|
|
+ HttpHost proxy = new HttpHost("q796.kdltps.com", 15818);
|
|
|
+ CredentialsProvider credsProvider = new BasicCredentialsProvider();
|
|
|
+ credsProvider.setCredentials(
|
|
|
+ new AuthScope(proxy),
|
|
|
+ new UsernamePasswordCredentials("t17772369458618", "5zqcjkmy")
|
|
|
+ );
|
|
|
+
|
|
|
+ try ( CloseableHttpClient client = HttpClients.custom()
|
|
|
+ .setDefaultCredentialsProvider(credsProvider)
|
|
|
+ .setProxy(proxy)
|
|
|
+ .build()) {
|
|
|
+ HttpGet request = new HttpGet(String.valueOf(url));
|
|
|
+ request.setHeader("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.6(0x13080610) XWEB/1156");
|
|
|
+ request.setHeader("Accept-Encoding", "identity;q=1, *;q=0");
|
|
|
+ request.setHeader("Accept", "*/*");
|
|
|
+ request.setHeader("Sec-Fetch-Site", "cross-site");
|
|
|
+ request.setHeader("Sec-Fetch-Mode", "no-cors");
|
|
|
+ request.setHeader("Sec-Fetch-Dest", "video");
|
|
|
+ request.setHeader("Referer", "https://servicewechat.com/wxa1431c6e7acdd32d/2/page-frame.html");
|
|
|
+ request.setHeader("Accept-Language", "en-US,en;q=0.9");
|
|
|
+ request.setHeader("Range", "bytes=0-");
|
|
|
+
|
|
|
+ int attempt = 0;
|
|
|
+ boolean success = false;
|
|
|
+
|
|
|
+ while (attempt < 6 && !success) {
|
|
|
+ try {
|
|
|
+ // 执行请求
|
|
|
+ HttpResponse response = client.execute(request);
|
|
|
+ if (response.getStatusLine().getStatusCode() == 206) {
|
|
|
+ try (InputStream is = response.getEntity().getContent();
|
|
|
+ OutputStream os = Files.newOutputStream(Paths.get(filePath))) { // 指定视频输出文件名
|
|
|
+ byte[] buffer = new byte[1024];
|
|
|
+ int bytesRead;
|
|
|
+ // 读取响应内容并写入文件
|
|
|
+ while ((bytesRead = is.read(buffer)) != -1) {
|
|
|
+ os.write(buffer, 0, bytesRead);
|
|
|
+ }
|
|
|
+ System.out.println("Video download complete.");
|
|
|
+ success = true;
|
|
|
+ }
|
|
|
+ catch (Exception e){
|
|
|
+ System.out.println("Attempt #" + (attempt + 1) + " failed with exception: " + e.getMessage());
|
|
|
+ }
|
|
|
+ finally {
|
|
|
+ attempt++;
|
|
|
+ }
|
|
|
+ if (!success && attempt < 6)
|
|
|
+ {
|
|
|
+ Thread.sleep(3000);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ catch (Exception e) {
|
|
|
+ e.printStackTrace();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
public static void downloadForGZH(String fileUrl, String filePath, boolean useUa, boolean useProxy, Map<String, String> proxyInfo) throws Exception {
|
|
|
log.info("begin download [{}] to [{}] useUa [{}] useProxy [{}] proxyInfo[{}]", fileUrl, filePath, useUa, useProxy, proxyInfo);
|
|
|
SSLUtils.ignoreSsl();
|
|
@@ -253,9 +328,9 @@ public class FileUtils {
|
|
|
public static void main(String[] args) throws Exception {
|
|
|
// try {
|
|
|
// System.out.println("https://api-hl.huoshan.com/hotsoon/item/video/_source/?video_id=v0300fg10000ckmbrbbc77uc3nq19840&line=0&app_id=0&vquality=normal&watermark=0&long_video=0&sf=4&ts=1697528496&item_id=7290410334844718376".replace("https://api-hl.huoshan.com", "http://api-hl.huoshan.com"));
|
|
|
- downloadForXG(
|
|
|
- "https://v9-xg-web-pc.ixigua.com/bf37bd4847ea0ef73c1419c3b7def5cd/654a30d9/video/tos/cn/tos-cn-ve-0026/oIBPpenxRaObASeRJ7XGCi5DAM78gBlGARgAUd/?a=1768&ch=0&cr=7&dr=0&er=0&cd=0%7C0%7C0%7C1&cv=1&br=196&bt=196&cs=4&ds=3&eid=1025&ft=_z7ehvvBQ3AUq8yq8Z.wNnOYZlcg6EbK2bLyA0wpuZmka&mime_type=video_mp4&qs=0&rc=ZGU5PDozZzRlOzwzOzg5Z0BpajprZmg6ZmhobzMzNGQzM0AyNmFiM2IxXmExLjQ1Yy5jYSM2MGBzcjRfLS9gLS1kLi9zcw%3D%3D&btag=e00028000&dy_q=1699357188&feature_id=e2624171dedb27d48fb2006f0f0a26ec&l=20231107193948643DF7B13B8779C9525F&__vid=v0201ag10000cl2e0ebc77uevcch379g",
|
|
|
- "/Users/luojunhui/Downloads/" + System.currentTimeMillis(),
|
|
|
+ downloadForFQW(
|
|
|
+ "https://znl-video-bos.cdn.bcebos.com/e368801a814c548e443835086d37caaf/65e93632/video/20240306/820ee1498e3ed2a59d37aed54d39ae95_1.mp4",
|
|
|
+ "/Users/luojunhui/Downloads/" + System.currentTimeMillis() + ".mp4",
|
|
|
true,
|
|
|
false,
|
|
|
null
|