xigua_search_dev.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2023/6/25
  4. import json
  5. import time
  6. import requests
  7. import urllib.parse
  8. from selenium.webdriver import DesiredCapabilities
  9. from selenium.webdriver.chrome.service import Service
  10. from selenium.webdriver.common.by import By
  11. from seleniumwire import webdriver
  12. class SearchDev:
  13. @classmethod
  14. def get_videoList_requests(cls):
  15. url = "https://www.ixigua.com/api/searchv2/complex/猪八戒/0?" \
  16. "fss=default_search&" \
  17. "order_type=publish_time&" \
  18. "click_position=new&" \
  19. "aid=1768&" \
  20. "msToken=EV6DlzmvSZH6yBIIm7tCdxb6EY7xuV7p0EZw4nZUyznGvXk9Wkyx0GiT39zCO2HRROdUYZc0XYpAztUSzg14q3a1Fkoj01Avy_BGjKFFn5wRQDP8nVWECA==&" \
  21. "X-Bogus=DFSzswVuSIsANrq4tnr0UFm4pID1&" \
  22. "_signature=_02B4Z6wo00001jeNZ4AAAIDCr-bw8w.DSLY3jWMAAOmJTnwirif4XNCUKjt3Ms0gS9-upb8jMBZJL5RSZ5dHBQm6GRMtSyn8h6D5rc1Y7tmwZL7a2nP390R3ARXFwF6tVQi97vqO5viH53M0c3"
  23. payload = {}
  24. headers = {
  25. # 'authority': 'www.ixigua.com',
  26. 'accept': 'application/json, text/plain, */*',
  27. 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
  28. 'cache-control': 'no-cache',
  29. # 'cookie': 'MONITOR_WEB_ID=67cb5099-a022-4ec3-bb8e-c4de6ba51dd0; sid_guard=c7472b508ea631823ba765a60cf8757f%7C1680867422%7C3024002%7CFri%2C+12-May-2023+11%3A37%3A04+GMT; odin_tt=b893608d4dde2e1e8df8cd5d97a0e2fbeafc4ca762ac72ebef6e6c97e2ed19859bb01d46b4190ddd6dd17d7f9678e1de; s_v_web_id=verify_lhoket5d_0qlKZtzS_YZkf_4Uaj_82mX_j6lRT4PcYJ7A; __ac_signature=_02B4Z6wo00f01yB6eXwAAIDCWLSSerYAxYsgWn3AAKx5S2D2PsJJ92YblwdDE-9rnwnzZ87S0CUowZ3Xi8XmxMU3JHd0xfP-9VucrE9D.l9E7Vgn6y95sGbL2H6mgsddoCZX0cCgfcfKAzWgcd; ixigua-a-s=1; support_webp=true; support_avif=false; csrf_session_id=a5355d954d3c63ed1ba35faada452b4d; SEARCH_CARD_MODE=7168304743566296612_1; msToken=EV6DlzmvSZH6yBIIm7tCdxb6EY7xuV7p0EZw4nZUyznGvXk9Wkyx0GiT39zCO2HRROdUYZc0XYpAztUSzg14q3a1Fkoj01Avy_BGjKFFn5wRQDP8nVWECA==; tt_scid=rP8nVwFTm4wPZyREet0crbp-ZRgJsK.x5TE0lqU2uibGbUDAhlM.oA14pKRcGzXW0955; ttwid=1%7CHHtv2QqpSGuSu8r-zXF1QoWsvjmNi1SJrqOrZzg-UCY%7C1687685218%7Ca985a413a36bb156ba577dac11fbc14593e5a2a4000001f9cfc7fd72781c4cc5; ixigua-a-s=1',
  30. 'pragma': 'no-cache',
  31. 'referer': f'https://www.ixigua.com/search/{urllib.parse.quote("猪八戒")}/?logTag=e0b95015015c05e60b1b&tab_name=home&fss=default_search',
  32. 'sec-ch-ua': '"Microsoft Edge";v="113", "Chromium";v="113", "Not-A.Brand";v="24"',
  33. 'sec-ch-ua-mobile': '?0',
  34. 'sec-ch-ua-platform': '"macOS"',
  35. 'sec-fetch-dest': 'empty',
  36. 'sec-fetch-mode': 'cors',
  37. 'sec-fetch-site': 'same-origin',
  38. 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.57',
  39. # 'x-secsdk-csrf-token': '0001000000011fd0adbaee655439e86800862b81e3e34974cab6a8656af77695b76ff5c76c96176bdcbf2631eeb7'
  40. }
  41. response = requests.request("GET", url, headers=headers, data=payload)
  42. print(response.text)
  43. @classmethod
  44. def get_videoList_selenium(cls):
  45. # 打印请求配置
  46. ca = DesiredCapabilities.CHROME
  47. ca["goog:loggingPrefs"] = {"performance": "ALL"}
  48. # # 不打开浏览器运行
  49. chrome_options = webdriver.ChromeOptions()
  50. chrome_options.add_argument(
  51. f'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
  52. # chrome_options.add_argument("--headless")
  53. chrome_options.add_argument("--window-size=1920,1080")
  54. # chrome_options.add_argument("--no-sandbox")
  55. chromedriver = "/Users/wangkun/Downloads/chromedriver/chromedriver_v114/chromedriver"
  56. # driver初始化
  57. driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service(chromedriver))
  58. driver.implicitly_wait(10)
  59. print("打开搜索页:健康")
  60. driver.get(f"https://www.ixigua.com/search/健康/")
  61. time.sleep(3)
  62. # logs = driver.get_log("performance")
  63. print("关闭登录弹框")
  64. if driver.find_elements(By.XPATH, '//*[@class="xg-notification-close"]') != 0:
  65. driver.find_element(By.XPATH, '//*[@class="xg-notification-close"]').click()
  66. driver.get_screenshot_as_file("./关闭弹框.png")
  67. print("点击筛选按钮")
  68. driver.find_element(By.XPATH, '//*[@class="searchPageV2__header-icons-categories"]').click()
  69. print("点击最新排序")
  70. driver.find_element(By.XPATH, '//*[@class="searchPageV2__header-categories-wrapper"]/*[1]/*[2]/*[1]').click()
  71. time.sleep(3)
  72. driver.get_screenshot_as_file("./最新排序.png")
  73. driver.quit()
  74. if __name__ == "__main__":
  75. # SearchDev.get_videoList_requests()
  76. SearchDev.get_videoList_selenium()