|
|
@@ -1,16 +1,18 @@
|
|
|
package com.tzld.piaoquan.longarticle.utils.other;
|
|
|
|
|
|
+import cn.hutool.crypto.digest.DigestUtil;
|
|
|
import cn.hutool.http.HttpRequest;
|
|
|
import cn.hutool.http.HttpResponse;
|
|
|
import com.alibaba.fastjson.JSONArray;
|
|
|
import com.alibaba.fastjson.JSONObject;
|
|
|
+import okhttp3.*;
|
|
|
import com.tzld.piaoquan.longarticle.common.constants.CrawlerConstant;
|
|
|
import com.tzld.piaoquan.longarticle.model.po.longarticle.CrawlerVideo;
|
|
|
import lombok.extern.slf4j.Slf4j;
|
|
|
|
|
|
|
|
|
-import java.io.IOException;
|
|
|
import java.net.*;
|
|
|
+import okhttp3.Authenticator;
|
|
|
import java.security.MessageDigest;
|
|
|
import java.security.NoSuchAlgorithmException;
|
|
|
import java.util.*;
|
|
|
@@ -41,14 +43,7 @@ public class HkspSearch {
|
|
|
// 这行代码是身份验证的关键配置,不然身份验证不起作用
|
|
|
System.setProperty("jdk.http.auth.tunneling.disabledSchemes", "");
|
|
|
// 身份验证
|
|
|
- Authenticator.setDefault(
|
|
|
- new Authenticator() {
|
|
|
- public PasswordAuthentication getPasswordAuthentication() {
|
|
|
- return new PasswordAuthentication(
|
|
|
- USERNAME, PASSWORD.toCharArray());
|
|
|
- }
|
|
|
- }
|
|
|
- );
|
|
|
+
|
|
|
List<String> sensitiveWords = new ArrayList<>();
|
|
|
List<JSONObject> list = hkspSearch("\uD83D\uDD25中国海军震撼世界!一天服役7艘巨舰,总吨位近半个法国海军!", sensitiveWords, "");
|
|
|
for (JSONObject jsonObject : list) {
|
|
|
@@ -60,37 +55,54 @@ public class HkspSearch {
|
|
|
|
|
|
public static List<JSONObject> hkspSearch(String key, List<String> sensitiveWords, String traceId) {
|
|
|
try {
|
|
|
- long timestampMilliseconds = System.currentTimeMillis();
|
|
|
- String url = "https://haokan.baidu.com/haokan/ui-search/pc/search/video";
|
|
|
+ Proxy proxy = new Proxy(
|
|
|
+ Proxy.Type.HTTP,
|
|
|
+ new InetSocketAddress(PROXY_HOST, PROXY_PORT)
|
|
|
+ );
|
|
|
+ okhttp3.Authenticator authenticator = (route, response) -> {
|
|
|
+ String credential = Credentials.basic(USERNAME, PASSWORD);
|
|
|
+ return response.request().newBuilder()
|
|
|
+ .header("Proxy-Authorization", credential)
|
|
|
+ .build();
|
|
|
+ };
|
|
|
+ OkHttpClient client = new OkHttpClient.Builder()
|
|
|
+ .proxy(proxy)
|
|
|
+ .proxyAuthenticator(authenticator)
|
|
|
+ .build();
|
|
|
+
|
|
|
String encodedKey = URLEncoder.encode(key, "UTF-8");
|
|
|
+ long timestampMilliseconds = System.currentTimeMillis();
|
|
|
String strings = String.format("%d_%s_%d_%d_%d", 1, encodedKey, 10, timestampMilliseconds, 1);
|
|
|
- String sign = md5(strings);
|
|
|
-
|
|
|
- JSONObject params = new JSONObject();
|
|
|
- params.put("pn", 1);
|
|
|
- params.put("rn", 10);
|
|
|
- params.put("type", "video");
|
|
|
- params.put("query", key);
|
|
|
- params.put("sign", sign);
|
|
|
- params.put("version", 1);
|
|
|
- params.put("timestamp", timestampMilliseconds);
|
|
|
-
|
|
|
- String base64String = Base64.getEncoder().encodeToString(UUID.randomUUID().toString().getBytes());
|
|
|
- HttpResponse response = HttpRequest.get(url)
|
|
|
- .header("authority", "haokan.baidu.com")
|
|
|
- .header("accept", "*/*")
|
|
|
- .header("accept-language", "zh,en;q=0.9,zh-CN;q=0.8")
|
|
|
- .header("cookie", "BIDUPSID=" + base64String)
|
|
|
- .header("user-agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36")
|
|
|
- .header("x-requested-with", "xmlhttprequest")
|
|
|
- .timeout(120000) // 设置超时时间
|
|
|
- .setProxy(new Proxy(Proxy.Type.HTTP, new InetSocketAddress(PROXY_HOST, PROXY_PORT)))
|
|
|
- .form(params)
|
|
|
- .execute();
|
|
|
+ String sign = DigestUtil.md5Hex(strings);
|
|
|
+
|
|
|
+ HttpUrl url = HttpUrl.parse("https://haokan.baidu.com/haokan/ui-search/pc/search/video")
|
|
|
+ .newBuilder()
|
|
|
+ .addQueryParameter("pn", "1")
|
|
|
+ .addQueryParameter("rn", "10")
|
|
|
+ .addQueryParameter("type", "video")
|
|
|
+ .addQueryParameter("query", key)
|
|
|
+ .addQueryParameter("sign", sign)
|
|
|
+ .addQueryParameter("version", "1")
|
|
|
+ .addQueryParameter("timestamp", String.valueOf(timestampMilliseconds))
|
|
|
+ .build();
|
|
|
+ Headers headers = Headers.of(
|
|
|
+ "accept", "*/*",
|
|
|
+ "accept-language", "zh,en;q=0.9,zh-CN;q=0.8",
|
|
|
+ "cookie", String.format("BIDUPSID=%s", Base64.getEncoder().encodeToString(UUID.randomUUID().toString().getBytes())),
|
|
|
+ "user-agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36"
|
|
|
+ );
|
|
|
+ Request request = new Request.Builder()
|
|
|
+ .url(url)
|
|
|
+ .headers(headers)
|
|
|
+ .get()
|
|
|
+ .build();
|
|
|
+
|
|
|
+ Response response = client.newCall(request).execute();
|
|
|
+ String result = response.body().string();
|
|
|
|
|
|
List<JSONObject> resultList = new ArrayList<>();
|
|
|
- log.info("hkspSearch response:{}", response.body());
|
|
|
- JSONObject jsonResponse = JSONObject.parseObject(response.body());
|
|
|
+ log.info("hkspSearch response:{}", result);
|
|
|
+ JSONObject jsonResponse = JSONObject.parseObject(result);
|
|
|
JSONArray dataList = jsonResponse.getJSONObject("data").getJSONArray("list");
|
|
|
|
|
|
if (Objects.nonNull(dataList)) {
|