XiGuaUtils.java 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
  1. package com.tzld.piaoquan.longarticle.utils;
  2. import cn.hutool.http.HttpRequest;
  3. import cn.hutool.http.HttpResponse;
  4. import com.alibaba.fastjson.JSONArray;
  5. import com.alibaba.fastjson.JSONObject;
  6. import com.fasterxml.jackson.databind.JsonNode;
  7. import com.fasterxml.jackson.databind.ObjectMapper;
  8. import org.jsoup.Jsoup;
  9. import org.jsoup.nodes.Document;
  10. import org.jsoup.select.Elements;
  11. import javax.crypto.Cipher;
  12. import javax.crypto.spec.IvParameterSpec;
  13. import javax.crypto.spec.SecretKeySpec;
  14. import java.net.URLEncoder;
  15. import java.nio.charset.StandardCharsets;
  16. import java.util.*;
  17. import java.util.regex.Matcher;
  18. import java.util.regex.Pattern;
  19. public class XiGuaUtils {
  20. public static List<JSONObject> xiguaSearchV2(String keyword, List<String> sensitiveWords) {
  21. String url = "https://www.ixigua.com/api/searchv2/complex/" + URLEncoder.encode(keyword, StandardCharsets.UTF_8) + "/10";
  22. Map<String, String> headers = new HashMap<>();
  23. headers.put("accept", "application/json, text/plain, */*");
  24. headers.put("accept-language", "en,zh;q=0.9,zh-CN;q=0.8");
  25. headers.put("cookie", "_tea_utm_cache_2285=undefined;");
  26. headers.put("priority", "u=1, i");
  27. headers.put("referer", "https://www.ixigua.com/search/" + URLEncoder.encode(keyword, StandardCharsets.UTF_8));
  28. headers.put("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3");
  29. HttpResponse response = HttpRequest.get(url)
  30. .addHeaders(headers)
  31. .execute();
  32. try {
  33. JSONObject jsonResponse = JSONObject.parseObject(response.body());
  34. JSONArray recallList = jsonResponse.getJSONObject("data").getJSONArray("data");
  35. if (recallList != null) {
  36. for (int i = 0; i < recallList.size(); i++) {
  37. JSONObject videoObj = recallList.getJSONObject(i);
  38. if ("video".equals(videoObj.getString("type"))) {
  39. String title = videoObj.getJSONObject("data").getString("title");
  40. String groupId = videoObj.getJSONObject("data").getString("group_id");
  41. int duration = videoObj.getJSONObject("data").getInteger("video_time");
  42. // 假设 sensitiveFlag 是一个方法用于检查敏感词
  43. if (sensitiveFlag(sensitiveWords, title) && duration <= 300) {
  44. System.out.println(groupId);
  45. JSONObject videoInfo = getVideoInfo(groupId);
  46. if (videoInfo != null) {
  47. return List.of(videoInfo);
  48. }
  49. }
  50. }
  51. }
  52. }
  53. return List.of();
  54. } catch (Exception e) {
  55. e.printStackTrace();
  56. return List.of();
  57. }
  58. }
  59. public static JSONObject getVideoInfo(String itemId) {
  60. String url = "https://www.ixigua.com/" + itemId;
  61. // 设置请求头
  62. HttpRequest request = HttpRequest.get(url)
  63. .header("accept-language", "zh-CN,zh-Hans;q=0.9")
  64. .header("cookie", "UIFID=73355a799e41c2edb6d004baa6cda0116425031dff9117e11075ec8bf266082874fe897f43e66be83a0501afe4a08cfc7e1066ab88423af122641493c7af9f0a745eb85c50fddb096de5cc77cd5ff05503312d84d36ab2681c6e6d930bbe68edaebf8fae03b04eb669359965e01c266b;"
  65. + "__ac_nonce=0666fd1a00053bf535b9f;"
  66. + "__ac_signature=_02B4Z6wo00f01u8PTiQAAIDBvfBuP-YjUQbvL0qAAN25bWfWXQrzRNCBKvFYKS5wAOYPXg5XV1Ck9JEroeWeWKijH2v3i4lxXM37JogiJJfEtYD.8sbXul2-4v.VRRta4xa07ignRnGj5Voh83;"
  67. + "ttwid=1%7C9b5sTIuwZxZKt0wFsvE-2t5OoFxH_Q5VIpVNWEREbAo%7C1718605316%7C9dfc9322350e713e6109ed46a7047ed31c0ab5a724e84de0bb766c195043207c")
  68. .header("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3")
  69. .header("referer", "https://www.ixigua.com/" + itemId);
  70. // 发送请求
  71. HttpResponse response = request.execute();
  72. // 解析响应
  73. String responseBody = response.body();
  74. System.out.println(responseBody);
  75. return extractInfoByRegex(responseBody);
  76. }
  77. public static JSONObject extractInfoByRegex(String text) {
  78. JSONObject result = extractVideoUrl(text);
  79. // 标题
  80. String titleContent = extractTitle(text);
  81. result.put("video_title", titleContent);
  82. return result;
  83. }
  84. private static String extractTitle(String text) {
  85. Pattern pattern = Pattern.compile("<title[^>]*>(.*?)</title>");
  86. Matcher matcher = pattern.matcher(text);
  87. if (matcher.find()) {
  88. String title = matcher.group(1);
  89. // 处理标题内容
  90. String[] parts = title.split(" - ");
  91. String titleContent = parts[0];
  92. // 尝试将标题内容转换为 UTF-8
  93. try {
  94. return new String(titleContent.getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8);
  95. } catch (Exception e) {
  96. e.printStackTrace();
  97. }
  98. return titleContent;
  99. }
  100. return "";
  101. }
  102. public static JSONObject extractVideoUrl(String text) {
  103. Document document = Jsoup.parse(text);
  104. Elements scriptElements = document.select("script#SSR_HYDRATED_DATA");
  105. String str2 = Objects.requireNonNull(scriptElements.first()).html();
  106. String json2 = str2.substring(str2.indexOf('{'), str2.lastIndexOf('}') + 1);
  107. String[] irregulars = {"null", "undefined", "=false", "=true", "false", "true"};
  108. // 处理不规则定义
  109. for (String i : irregulars) {
  110. if ("=false".equals(i) || "=true".equals(i)) {
  111. json2 = json2.replace(i, "=" + Character.toUpperCase(i.charAt(1)) + i.substring(2));
  112. } else {
  113. json2 = json2.replace(i, "12");
  114. }
  115. }
  116. try {
  117. ObjectMapper objectMapper = new ObjectMapper();
  118. JsonNode dict2 = objectMapper.readTree(json2).path("anyVideo").path("gidInformation").path("packerData").path("video");
  119. long duration = dict2.path("video_duration").asLong();
  120. long playCnt = dict2.path("video_watch_count").asLong();
  121. long publishTime = dict2.path("video_publish_time").asLong();
  122. long likeCnt = dict2.path("video_like_count").asLong();
  123. String videoTitle = dict2.path("title").asText();
  124. String videoId = dict2.path("vid").asText();
  125. JsonNode videoRes = dict2.path("videoResource");
  126. String coverUrl = dict2.path("poster_url").asText().replace("\\u002F", "/");
  127. String realVideoUrl;
  128. if (videoRes.path("dash").asInt() == 12) {
  129. JsonNode obj = videoRes.path("normal");
  130. String ptk = obj.path("ptk").asText();
  131. JsonNode videoList = obj.path("video_list");
  132. String mainUrl = videoList.fields().next().getValue().path("main_url").asText();
  133. realVideoUrl = aesDecrypt(mainUrl, ptk);
  134. } else {
  135. JsonNode obj = videoRes.path("dash");
  136. String ptk = obj.path("ptk").asText();
  137. String videoUrl = obj.path("dynamic_video").path("main_url").asText();
  138. realVideoUrl = aesDecrypt(videoUrl, ptk);
  139. }
  140. JSONObject result = new JSONObject();
  141. result.put("video_url", realVideoUrl);
  142. result.put("cover_url", coverUrl);
  143. result.put("video_id", videoId);
  144. result.put("video_title", videoTitle);
  145. result.put("like_cnt", likeCnt);
  146. result.put("play_cnt", playCnt);
  147. result.put("publish_time", publishTime);
  148. result.put("duration", duration);
  149. return result;
  150. } catch (Exception e) {
  151. e.printStackTrace();
  152. return new JSONObject();
  153. }
  154. }
  155. private static String aesDecrypt(String data, String key) {
  156. try {
  157. byte[] keyBytes = key.getBytes(StandardCharsets.UTF_8);
  158. SecretKeySpec secretKey = new SecretKeySpec(keyBytes, "AES");
  159. Cipher cipher = Cipher.getInstance("AES/CBC/PKCS5Padding");
  160. IvParameterSpec ivParams = new IvParameterSpec(keyBytes, 0, 16);
  161. cipher.init(Cipher.DECRYPT_MODE, secretKey, ivParams);
  162. byte[] decryptedBytes = cipher.doFinal(Base64.getDecoder().decode(data));
  163. return new String(decryptedBytes, StandardCharsets.UTF_8);
  164. } catch (Exception e) {
  165. e.printStackTrace();
  166. return null;
  167. }
  168. }
  169. // 假设 sensitiveFlag 方法的实现
  170. private static boolean sensitiveFlag(List<String> sensitiveWords, String title) {
  171. return true;
  172. }
  173. public static void main(String[] args) {
  174. String keyword = "测试"; // 替换为实际关键词
  175. List<String> sensitiveWords = List.of("敏感词1", "敏感词2"); // 替换为实际敏感词
  176. List<JSONObject> results = xiguaSearchV2(keyword, sensitiveWords);
  177. results.forEach(System.out::println);
  178. }
  179. }