HkspSearch.java 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. package com.tzld.piaoquan.longarticle.utils;
  2. import cn.hutool.http.HttpRequest;
  3. import cn.hutool.http.HttpResponse;
  4. import cn.hutool.json.JSONObject;
  5. import cn.hutool.json.JSONUtil;
  6. import java.io.IOException;
  7. import java.net.*;
  8. import java.security.MessageDigest;
  9. import java.security.NoSuchAlgorithmException;
  10. import java.util.ArrayList;
  11. import java.util.Base64;
  12. import java.util.List;
  13. import java.util.UUID;
  14. public class HkspSearch {
  15. public static List<JSONObject> hkspSearch(String key, List<String> sensitiveWords, String traceId) {
  16. try {
  17. long timestampMilliseconds = System.currentTimeMillis();
  18. String url = "https://haokan.baidu.com/haokan/ui-search/pc/search/video";
  19. String encodedKey = URLEncoder.encode(key, "UTF-8");
  20. String strings = String.format("%d_%s_%d_%d_%d", 1, encodedKey, 10, timestampMilliseconds, 1);
  21. String sign = md5(strings);
  22. JSONObject params = new JSONObject();
  23. params.put("pn", 1);
  24. params.put("rn", 10);
  25. params.put("type", "video");
  26. params.put("query", key);
  27. params.put("sign", sign);
  28. params.put("version", 1);
  29. params.put("timestamp", timestampMilliseconds);
  30. String base64String = Base64.getEncoder().encodeToString(UUID.randomUUID().toString().getBytes());
  31. // 这行代码是身份验证的关键配置,不然身份验证不起作用
  32. System.setProperty("jdk.http.auth.tunneling.disabledSchemes", "");
  33. // 身份验证
  34. Authenticator.setDefault(
  35. new Authenticator() {
  36. public PasswordAuthentication getPasswordAuthentication() {
  37. return new PasswordAuthentication(
  38. "t11983523373311", "mtuhdr2z".toCharArray());
  39. }
  40. }
  41. );
  42. HttpResponse response = HttpRequest.get(url)
  43. .header("authority", "haokan.baidu.com")
  44. .header("accept", "*/*")
  45. .header("accept-language", "zh,en;q=0.9,zh-CN;q=0.8")
  46. .header("cookie", "BIDUPSID=" + base64String)
  47. .header("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0") // 假用户代理
  48. .header("x-requested-with", "xmlhttprequest")
  49. .timeout(120000) // 设置超时时间
  50. .setProxy(new Proxy(Proxy.Type.HTTP, new InetSocketAddress("l901.kdltps.com", 15818)))
  51. .form(params)
  52. .execute();
  53. List<JSONObject> resultList = new ArrayList<>();
  54. System.out.println(response.body());
  55. JSONObject jsonResponse = JSONUtil.parseObj(response.body());
  56. List<JSONObject> dataList = jsonResponse.getByPath("data.list", List.class);
  57. for (JSONObject data : dataList) {
  58. try {
  59. String videoId = data.getStr("vid");
  60. String title = data.getStr("title");
  61. int duration = parseDuration(data.getStr("duration"));
  62. if (sensitiveFlag(sensitiveWords, title) && duration <= 300) {
  63. JSONObject res = getVideoDetail(videoId);
  64. if (res != null) {
  65. resultList.add(res);
  66. }
  67. }
  68. } catch (Exception e) {
  69. // 处理异常
  70. }
  71. }
  72. return resultList;
  73. } catch (Exception e) {
  74. return new ArrayList<>();
  75. }
  76. }
  77. private static int parseDuration(String duration) {
  78. String[] parts = duration.split(":");
  79. return Integer.parseInt(parts[0]) * 60 + Integer.parseInt(parts[1]);
  80. }
  81. private static String md5(String input) {
  82. try {
  83. MessageDigest md = MessageDigest.getInstance("MD5");
  84. byte[] messageDigest = md.digest(input.getBytes());
  85. StringBuilder sb = new StringBuilder();
  86. for (byte b : messageDigest) {
  87. sb.append(String.format("%02x", b));
  88. }
  89. return sb.toString();
  90. } catch (NoSuchAlgorithmException e) {
  91. throw new RuntimeException(e);
  92. }
  93. }
  94. private static boolean sensitiveFlag(List<String> sensitiveWords, String title) {
  95. // 实现敏感词检查逻辑
  96. return true; // 示例
  97. }
  98. public static JSONObject getVideoDetail(String videoId) {
  99. String url = "https://haokan.baidu.com/v";
  100. JSONObject params = new JSONObject();
  101. params.put("vid", videoId);
  102. params.put("_format", "json");
  103. String base64String = Base64.getEncoder().encodeToString(UUID.randomUUID().toString().getBytes());
  104. // 这行代码是身份验证的关键配置,不然身份验证不起作用
  105. System.setProperty("jdk.http.auth.tunneling.disabledSchemes", "");
  106. // 身份验证
  107. Authenticator.setDefault(
  108. new Authenticator() {
  109. public PasswordAuthentication getPasswordAuthentication() {
  110. return new PasswordAuthentication(
  111. "t11983523373311", "mtuhdr2z".toCharArray());
  112. }
  113. }
  114. );
  115. HttpResponse response = HttpRequest.get(url)
  116. .header("Accept", "*/*")
  117. .header("cookie", "BIDUPSID=" + base64String)
  118. .header("Accept-Language", "en,zh;q=0.9,zh-CN;q=0.8")
  119. .header("Cache-Control", "no-cache")
  120. .header("Connection", "keep-alive")
  121. .header("Content-Type", "application/x-www-form-urlencoded")
  122. .header("Referer", "https://haokan.baidu.com")
  123. .form(params)
  124. .setProxy(new Proxy(Proxy.Type.HTTP, new InetSocketAddress("l901.kdltps.com", 15818)))
  125. .execute();
  126. return JSONUtil.parseObj(response.body()).getByPath("data.apiData.curVideoMeta", JSONObject.class);
  127. }
  128. }