package com.tzld.piaoquan.longarticle.utils; import cn.hutool.http.HttpRequest; import cn.hutool.http.HttpResponse; import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONObject; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.select.Elements; import javax.crypto.Cipher; import javax.crypto.spec.IvParameterSpec; import javax.crypto.spec.SecretKeySpec; import java.net.URLEncoder; import java.nio.charset.StandardCharsets; import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; public class XiGuaUtils { public static List xiguaSearchV2(String keyword, List sensitiveWords) { String url = "https://www.ixigua.com/api/searchv2/complex/" + URLEncoder.encode(keyword, StandardCharsets.UTF_8) + "/10"; Map headers = new HashMap<>(); headers.put("accept", "application/json, text/plain, */*"); headers.put("accept-language", "en,zh;q=0.9,zh-CN;q=0.8"); headers.put("cookie", "_tea_utm_cache_2285=undefined;"); headers.put("priority", "u=1, i"); headers.put("referer", "https://www.ixigua.com/search/" + URLEncoder.encode(keyword, StandardCharsets.UTF_8)); headers.put("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"); HttpResponse response = HttpRequest.get(url) .addHeaders(headers) .execute(); try { JSONObject jsonResponse = JSONObject.parseObject(response.body()); JSONArray recallList = jsonResponse.getJSONObject("data").getJSONArray("data"); if (recallList != null) { for (int i = 0; i < recallList.size(); i++) { JSONObject videoObj = recallList.getJSONObject(i); if ("video".equals(videoObj.getString("type"))) { String title = videoObj.getJSONObject("data").getString("title"); String groupId = videoObj.getJSONObject("data").getString("group_id"); int duration = videoObj.getJSONObject("data").getInteger("video_time"); // 假设 sensitiveFlag 是一个方法用于检查敏感词 if (sensitiveFlag(sensitiveWords, title) && duration <= 300) { System.out.println(groupId); JSONObject videoInfo = getVideoInfo(groupId); if (videoInfo != null) { return List.of(videoInfo); } } } } } return List.of(); } catch (Exception e) { e.printStackTrace(); return List.of(); } } public static JSONObject getVideoInfo(String itemId) { String url = "https://www.ixigua.com/" + itemId; // 设置请求头 HttpRequest request = HttpRequest.get(url) .header("accept-language", "zh-CN,zh-Hans;q=0.9") .header("cookie", "UIFID=73355a799e41c2edb6d004baa6cda0116425031dff9117e11075ec8bf266082874fe897f43e66be83a0501afe4a08cfc7e1066ab88423af122641493c7af9f0a745eb85c50fddb096de5cc77cd5ff05503312d84d36ab2681c6e6d930bbe68edaebf8fae03b04eb669359965e01c266b;" + "__ac_nonce=0666fd1a00053bf535b9f;" + "__ac_signature=_02B4Z6wo00f01u8PTiQAAIDBvfBuP-YjUQbvL0qAAN25bWfWXQrzRNCBKvFYKS5wAOYPXg5XV1Ck9JEroeWeWKijH2v3i4lxXM37JogiJJfEtYD.8sbXul2-4v.VRRta4xa07ignRnGj5Voh83;" + "ttwid=1%7C9b5sTIuwZxZKt0wFsvE-2t5OoFxH_Q5VIpVNWEREbAo%7C1718605316%7C9dfc9322350e713e6109ed46a7047ed31c0ab5a724e84de0bb766c195043207c") .header("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3") .header("referer", "https://www.ixigua.com/" + itemId); // 发送请求 HttpResponse response = request.execute(); // 解析响应 String responseBody = response.body(); System.out.println(responseBody); return extractInfoByRegex(responseBody); } public static JSONObject extractInfoByRegex(String text) { JSONObject result = extractVideoUrl(text); // 标题 String titleContent = extractTitle(text); result.put("video_title", titleContent); return result; } private static String extractTitle(String text) { Pattern pattern = Pattern.compile("]*>(.*?)"); Matcher matcher = pattern.matcher(text); if (matcher.find()) { String title = matcher.group(1); // 处理标题内容 String[] parts = title.split(" - "); String titleContent = parts[0]; // 尝试将标题内容转换为 UTF-8 try { return new String(titleContent.getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8); } catch (Exception e) { e.printStackTrace(); } return titleContent; } return ""; } public static JSONObject extractVideoUrl(String text) { Document document = Jsoup.parse(text); Elements scriptElements = document.select("script#SSR_HYDRATED_DATA"); String str2 = Objects.requireNonNull(scriptElements.first()).html(); String json2 = str2.substring(str2.indexOf('{'), str2.lastIndexOf('}') + 1); String[] irregulars = {"null", "undefined", "=false", "=true", "false", "true"}; // 处理不规则定义 for (String i : irregulars) { if ("=false".equals(i) || "=true".equals(i)) { json2 = json2.replace(i, "=" + Character.toUpperCase(i.charAt(1)) + i.substring(2)); } else { json2 = json2.replace(i, "12"); } } try { ObjectMapper objectMapper = new ObjectMapper(); JsonNode dict2 = objectMapper.readTree(json2).path("anyVideo").path("gidInformation").path("packerData").path("video"); long duration = dict2.path("video_duration").asLong(); long playCnt = dict2.path("video_watch_count").asLong(); long publishTime = dict2.path("video_publish_time").asLong(); long likeCnt = dict2.path("video_like_count").asLong(); String videoTitle = dict2.path("title").asText(); String videoId = dict2.path("vid").asText(); JsonNode videoRes = dict2.path("videoResource"); String coverUrl = dict2.path("poster_url").asText().replace("\\u002F", "/"); String realVideoUrl; if (videoRes.path("dash").asInt() == 12) { JsonNode obj = videoRes.path("normal"); String ptk = obj.path("ptk").asText(); JsonNode videoList = obj.path("video_list"); String mainUrl = videoList.fields().next().getValue().path("main_url").asText(); realVideoUrl = aesDecrypt(mainUrl, ptk); } else { JsonNode obj = videoRes.path("dash"); String ptk = obj.path("ptk").asText(); String videoUrl = obj.path("dynamic_video").path("main_url").asText(); realVideoUrl = aesDecrypt(videoUrl, ptk); } JSONObject result = new JSONObject(); result.put("video_url", realVideoUrl); result.put("cover_url", coverUrl); result.put("video_id", videoId); result.put("video_title", videoTitle); result.put("like_cnt", likeCnt); result.put("play_cnt", playCnt); result.put("publish_time", publishTime); result.put("duration", duration); return result; } catch (Exception e) { e.printStackTrace(); return new JSONObject(); } } private static String aesDecrypt(String data, String key) { try { byte[] keyBytes = key.getBytes(StandardCharsets.UTF_8); SecretKeySpec secretKey = new SecretKeySpec(keyBytes, "AES"); Cipher cipher = Cipher.getInstance("AES/CBC/PKCS5Padding"); IvParameterSpec ivParams = new IvParameterSpec(keyBytes, 0, 16); cipher.init(Cipher.DECRYPT_MODE, secretKey, ivParams); byte[] decryptedBytes = cipher.doFinal(Base64.getDecoder().decode(data)); return new String(decryptedBytes, StandardCharsets.UTF_8); } catch (Exception e) { e.printStackTrace(); return null; } } // 假设 sensitiveFlag 方法的实现 private static boolean sensitiveFlag(List sensitiveWords, String title) { return true; } public static void main(String[] args) { String keyword = "测试"; // 替换为实际关键词 List sensitiveWords = List.of("敏感词1", "敏感词2"); // 替换为实际敏感词 List results = xiguaSearchV2(keyword, sensitiveWords); results.forEach(System.out::println); } }