# -*- coding: utf-8 -*- # @Author: wangkun # @Time: 2023/6/25 import json import os import sys import time import requests import urllib.parse import urllib3 from requests.adapters import HTTPAdapter from selenium.webdriver import DesiredCapabilities from selenium.webdriver.chrome.service import Service from selenium.webdriver.common.by import By from seleniumwire import webdriver sys.path.append(os.getcwd()) from common.common import Common class SearchDev: @classmethod def get_videoList_requests(cls, word): while True: url = f"https://www.ixigua.com/api/searchv2/complex/{str(word)}/0?" \ "fss=default_search&" \ "order_type=publish_time&" \ "click_position=new&" \ "aid=1768&" \ "msToken=EV6DlzmvSZH6yBIIm7tCdxb6EY7xuV7p0EZw4nZUyznGvXk9Wkyx0GiT39zCO2HRROdUYZc0XYpAztUSzg14q3a1Fkoj01Avy_BGjKFFn5wRQDP8nVWECA==&" \ "X-Bogus=DFSzswVuSIsANrq4tnr0UFm4pID1&" \ "_signature=_02B4Z6wo00001jeNZ4AAAIDCr-bw8w.DSLY3jWMAAOmJTnwirif4XNCUKjt3Ms0gS9-upb8jMBZJL5RSZ5dHBQm6GRMtSyn8h6D5rc1Y7tmwZL7a2nP390R3ARXFwF6tVQi97vqO5viH53M0c3" headers = { 'authority': 'www.ixigua.com', 'accept': 'application/json, text/plain, */*', 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6', 'cache-control': 'no-cache', 'cookie': 'MONITOR_WEB_ID=67cb5099-a022-4ec3-bb8e-c4de6ba51dd0; sid_guard=c7472b508ea631823ba765a60cf8757f%7C1680867422%7C3024002%7CFri%2C+12-May-2023+11%3A37%3A04+GMT; odin_tt=b893608d4dde2e1e8df8cd5d97a0e2fbeafc4ca762ac72ebef6e6c97e2ed19859bb01d46b4190ddd6dd17d7f9678e1de; s_v_web_id=verify_lhoket5d_0qlKZtzS_YZkf_4Uaj_82mX_j6lRT4PcYJ7A; __ac_signature=_02B4Z6wo00f01yB6eXwAAIDCWLSSerYAxYsgWn3AAKx5S2D2PsJJ92YblwdDE-9rnwnzZ87S0CUowZ3Xi8XmxMU3JHd0xfP-9VucrE9D.l9E7Vgn6y95sGbL2H6mgsddoCZX0cCgfcfKAzWgcd; ixigua-a-s=1; support_webp=true; support_avif=false; csrf_session_id=a5355d954d3c63ed1ba35faada452b4d; SEARCH_CARD_MODE=7168304743566296612_1; msToken=EV6DlzmvSZH6yBIIm7tCdxb6EY7xuV7p0EZw4nZUyznGvXk9Wkyx0GiT39zCO2HRROdUYZc0XYpAztUSzg14q3a1Fkoj01Avy_BGjKFFn5wRQDP8nVWECA==; tt_scid=rP8nVwFTm4wPZyREet0crbp-ZRgJsK.x5TE0lqU2uibGbUDAhlM.oA14pKRcGzXW0955; ttwid=1%7CHHtv2QqpSGuSu8r-zXF1QoWsvjmNi1SJrqOrZzg-UCY%7C1687685218%7Ca985a413a36bb156ba577dac11fbc14593e5a2a4000001f9cfc7fd72781c4cc5; ixigua-a-s=1', 'pragma': 'no-cache', 'referer': f'https://www.ixigua.com/search/{urllib.parse.quote(str(word))}/?logTag=e0b95015015c05e60b1b&tab_name=home&fss=default_search', 'sec-ch-ua': '"Microsoft Edge";v="113", "Chromium";v="113", "Not-A.Brand";v="24"', 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"macOS"', 'sec-fetch-dest': 'empty', 'sec-fetch-mode': 'cors', 'sec-fetch-site': 'same-origin', 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.57', 'x-secsdk-csrf-token': '0001000000011fd0adbaee655439e86800862b81e3e34974cab6a8656af77695b76ff5c76c96176bdcbf2631eeb7' } urllib3.disable_warnings() s = requests.session() # max_retries=3 重试3次 s.mount('http://', HTTPAdapter(max_retries=3)) s.mount('https://', HTTPAdapter(max_retries=3)) response = requests.get(url=url, headers=headers, verify=False, proxies=Common.tunnel_proxies(), timeout=5) if response.status_code != 200 or "data" not in response.text: print(f"response:{response.text}\n") return elif len(response.json()["data"]["data"]) == 0: print("没有更多数据啦~") return else: feeds = response.json()["data"]["data"] for i in range(len(feeds)): video_type = feeds[i].get("type", "") title = feeds[i].get("data", {}).get("title", "") publish_time = feeds[i].get("data", {}).get("publish_time", "") item_id = feeds[i].get("data", {}).get("group_id", "") print(f"title:{title}") print(f"video_type:{video_type}") print(f"publish_time:{publish_time}") print(f"item_id:{item_id}") print("\n") @classmethod def get_videoList_selenium(cls): # 打印请求配置 ca = DesiredCapabilities.CHROME ca["goog:loggingPrefs"] = {"performance": "ALL"} # # 不打开浏览器运行 chrome_options = webdriver.ChromeOptions() chrome_options.add_argument( f'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36') # chrome_options.add_argument("--headless") chrome_options.add_argument("--window-size=1920,1080") # chrome_options.add_argument("--no-sandbox") chromedriver = "/Users/wangkun/Downloads/chromedriver/chromedriver_v114/chromedriver" # driver初始化 driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service(chromedriver)) driver.implicitly_wait(10) print("打开搜索页:健康") driver.get(f"https://www.ixigua.com/search/健康/") time.sleep(3) # logs = driver.get_log("performance") print("关闭登录弹框") if driver.find_elements(By.XPATH, '//*[@class="xg-notification-close"]') != 0: driver.find_element(By.XPATH, '//*[@class="xg-notification-close"]').click() driver.get_screenshot_as_file("./关闭弹框.png") print("点击筛选按钮") driver.find_element(By.XPATH, '//*[@class="searchPageV2__header-icons-categories"]').click() print("点击最新排序") driver.find_element(By.XPATH, '//*[@class="searchPageV2__header-categories-wrapper"]/*[1]/*[2]/*[1]').click() time.sleep(3) driver.get_screenshot_as_file("./最新排序.png") driver.quit() if __name__ == "__main__": SearchDev.get_videoList_requests("猪八戒") # SearchDev.get_videoList_selenium()