123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207 |
- package com.tzld.piaoquan.longarticle.utils;
- import cn.hutool.http.HttpRequest;
- import cn.hutool.http.HttpResponse;
- import com.alibaba.fastjson.JSONArray;
- import com.alibaba.fastjson.JSONObject;
- import com.fasterxml.jackson.databind.JsonNode;
- import com.fasterxml.jackson.databind.ObjectMapper;
- import org.jsoup.Jsoup;
- import org.jsoup.nodes.Document;
- import org.jsoup.select.Elements;
- import javax.crypto.Cipher;
- import javax.crypto.spec.IvParameterSpec;
- import javax.crypto.spec.SecretKeySpec;
- import java.net.URLEncoder;
- import java.nio.charset.StandardCharsets;
- import java.util.*;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
- public class XiGuaUtils {
- public static List<JSONObject> xiguaSearchV2(String keyword, List<String> sensitiveWords) {
- String url = "https://www.ixigua.com/api/searchv2/complex/" + URLEncoder.encode(keyword, StandardCharsets.UTF_8) + "/10";
- Map<String, String> headers = new HashMap<>();
- headers.put("accept", "application/json, text/plain, */*");
- headers.put("accept-language", "en,zh;q=0.9,zh-CN;q=0.8");
- headers.put("cookie", "_tea_utm_cache_2285=undefined;");
- headers.put("priority", "u=1, i");
- headers.put("referer", "https://www.ixigua.com/search/" + URLEncoder.encode(keyword, StandardCharsets.UTF_8));
- headers.put("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3");
- HttpResponse response = HttpRequest.get(url)
- .addHeaders(headers)
- .execute();
- try {
- JSONObject jsonResponse = JSONObject.parseObject(response.body());
- JSONArray recallList = jsonResponse.getJSONObject("data").getJSONArray("data");
- if (recallList != null) {
- for (int i = 0; i < recallList.size(); i++) {
- JSONObject videoObj = recallList.getJSONObject(i);
- if ("video".equals(videoObj.getString("type"))) {
- String title = videoObj.getJSONObject("data").getString("title");
- String groupId = videoObj.getJSONObject("data").getString("group_id");
- int duration = videoObj.getJSONObject("data").getInteger("video_time");
- // 假设 sensitiveFlag 是一个方法用于检查敏感词
- if (sensitiveFlag(sensitiveWords, title) && duration <= 300) {
- System.out.println(groupId);
- JSONObject videoInfo = getVideoInfo(groupId);
- if (videoInfo != null) {
- return List.of(videoInfo);
- }
- }
- }
- }
- }
- return List.of();
- } catch (Exception e) {
- e.printStackTrace();
- return List.of();
- }
- }
- public static JSONObject getVideoInfo(String itemId) {
- String url = "https://www.ixigua.com/" + itemId;
- // 设置请求头
- HttpRequest request = HttpRequest.get(url)
- .header("accept-language", "zh-CN,zh-Hans;q=0.9")
- .header("cookie", "UIFID=73355a799e41c2edb6d004baa6cda0116425031dff9117e11075ec8bf266082874fe897f43e66be83a0501afe4a08cfc7e1066ab88423af122641493c7af9f0a745eb85c50fddb096de5cc77cd5ff05503312d84d36ab2681c6e6d930bbe68edaebf8fae03b04eb669359965e01c266b;"
- + "__ac_nonce=0666fd1a00053bf535b9f;"
- + "__ac_signature=_02B4Z6wo00f01u8PTiQAAIDBvfBuP-YjUQbvL0qAAN25bWfWXQrzRNCBKvFYKS5wAOYPXg5XV1Ck9JEroeWeWKijH2v3i4lxXM37JogiJJfEtYD.8sbXul2-4v.VRRta4xa07ignRnGj5Voh83;"
- + "ttwid=1%7C9b5sTIuwZxZKt0wFsvE-2t5OoFxH_Q5VIpVNWEREbAo%7C1718605316%7C9dfc9322350e713e6109ed46a7047ed31c0ab5a724e84de0bb766c195043207c")
- .header("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3")
- .header("referer", "https://www.ixigua.com/" + itemId);
- // 发送请求
- HttpResponse response = request.execute();
- // 解析响应
- String responseBody = response.body();
- System.out.println(responseBody);
- return extractInfoByRegex(responseBody);
- }
- public static JSONObject extractInfoByRegex(String text) {
- JSONObject result = extractVideoUrl(text);
- // 标题
- String titleContent = extractTitle(text);
- result.put("video_title", titleContent);
- return result;
- }
- private static String extractTitle(String text) {
- Pattern pattern = Pattern.compile("<title[^>]*>(.*?)</title>");
- Matcher matcher = pattern.matcher(text);
- if (matcher.find()) {
- String title = matcher.group(1);
- // 处理标题内容
- String[] parts = title.split(" - ");
- String titleContent = parts[0];
- // 尝试将标题内容转换为 UTF-8
- try {
- return new String(titleContent.getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8);
- } catch (Exception e) {
- e.printStackTrace();
- }
- return titleContent;
- }
- return "";
- }
- public static JSONObject extractVideoUrl(String text) {
- Document document = Jsoup.parse(text);
- Elements scriptElements = document.select("script#SSR_HYDRATED_DATA");
- String str2 = Objects.requireNonNull(scriptElements.first()).html();
- String json2 = str2.substring(str2.indexOf('{'), str2.lastIndexOf('}') + 1);
- String[] irregulars = {"null", "undefined", "=false", "=true", "false", "true"};
- // 处理不规则定义
- for (String i : irregulars) {
- if ("=false".equals(i) || "=true".equals(i)) {
- json2 = json2.replace(i, "=" + Character.toUpperCase(i.charAt(1)) + i.substring(2));
- } else {
- json2 = json2.replace(i, "12");
- }
- }
- try {
- ObjectMapper objectMapper = new ObjectMapper();
- JsonNode dict2 = objectMapper.readTree(json2).path("anyVideo").path("gidInformation").path("packerData").path("video");
- long duration = dict2.path("video_duration").asLong();
- long playCnt = dict2.path("video_watch_count").asLong();
- long publishTime = dict2.path("video_publish_time").asLong();
- long likeCnt = dict2.path("video_like_count").asLong();
- String videoTitle = dict2.path("title").asText();
- String videoId = dict2.path("vid").asText();
- JsonNode videoRes = dict2.path("videoResource");
- String coverUrl = dict2.path("poster_url").asText().replace("\\u002F", "/");
- String realVideoUrl;
- if (videoRes.path("dash").asInt() == 12) {
- JsonNode obj = videoRes.path("normal");
- String ptk = obj.path("ptk").asText();
- JsonNode videoList = obj.path("video_list");
- String mainUrl = videoList.fields().next().getValue().path("main_url").asText();
- realVideoUrl = aesDecrypt(mainUrl, ptk);
- } else {
- JsonNode obj = videoRes.path("dash");
- String ptk = obj.path("ptk").asText();
- String videoUrl = obj.path("dynamic_video").path("main_url").asText();
- realVideoUrl = aesDecrypt(videoUrl, ptk);
- }
- JSONObject result = new JSONObject();
- result.put("video_url", realVideoUrl);
- result.put("cover_url", coverUrl);
- result.put("video_id", videoId);
- result.put("video_title", videoTitle);
- result.put("like_cnt", likeCnt);
- result.put("play_cnt", playCnt);
- result.put("publish_time", publishTime);
- result.put("duration", duration);
- return result;
- } catch (Exception e) {
- e.printStackTrace();
- return new JSONObject();
- }
- }
- private static String aesDecrypt(String data, String key) {
- try {
- byte[] keyBytes = key.getBytes(StandardCharsets.UTF_8);
- SecretKeySpec secretKey = new SecretKeySpec(keyBytes, "AES");
- Cipher cipher = Cipher.getInstance("AES/CBC/PKCS5Padding");
- IvParameterSpec ivParams = new IvParameterSpec(keyBytes, 0, 16);
- cipher.init(Cipher.DECRYPT_MODE, secretKey, ivParams);
- byte[] decryptedBytes = cipher.doFinal(Base64.getDecoder().decode(data));
- return new String(decryptedBytes, StandardCharsets.UTF_8);
- } catch (Exception e) {
- e.printStackTrace();
- return null;
- }
- }
- // 假设 sensitiveFlag 方法的实现
- private static boolean sensitiveFlag(List<String> sensitiveWords, String title) {
- return true;
- }
- public static void main(String[] args) {
- String keyword = "测试"; // 替换为实际关键词
- List<String> sensitiveWords = List.of("敏感词1", "敏感词2"); // 替换为实际敏感词
- List<JSONObject> results = xiguaSearchV2(keyword, sensitiveWords);
- results.forEach(System.out::println);
- }
- }
|