baidu_search.py 1.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354
  1. """
  2. 好看视频搜索爬虫
  3. """
  4. import json
  5. import requests
  6. import urllib.parse
  7. import time
  8. import hashlib
  9. def hksp_search(key):
  10. """
  11. 好看视频搜索爬虫
  12. """
  13. timestamp_seconds = time.time()
  14. timestamp_milliseconds = int(timestamp_seconds * 1000)
  15. url = 'https://haokan.baidu.com/haokan/ui-search/pc/search/video'
  16. # 定义请求的参数
  17. strings = "{}_{}_{}_{}_{}".format(1, urllib.parse.quote(key), 10, timestamp_milliseconds, 1)
  18. sign = hashlib.md5(strings.encode()).hexdigest()
  19. params = {
  20. 'pn': 1,
  21. 'rn': 10,
  22. 'type': 'video',
  23. 'query': key,
  24. 'sign': sign,
  25. 'version': 1,
  26. 'timestamp': timestamp_milliseconds
  27. }
  28. # 定义请求头
  29. headers = {
  30. 'authority': 'haokan.baidu.com',
  31. 'accept': '*/*',
  32. 'accept-language': 'zh,en;q=0.9,zh-CN;q=0.8',
  33. 'cookie': "BIDUPSID='",
  34. # 'referer': 'https://haokan.baidu.com/web/search/page?query=%E8%80%81%E4%BA%BA',
  35. 'sec-ch-ua': '"Not A(Brand";v="99", "Google Chrome";v="121", "Chromium";v="121"',
  36. 'sec-ch-ua-mobile': '?0',
  37. 'sec-ch-ua-platform': '"macOS"',
  38. 'sec-fetch-dest': 'empty',
  39. 'sec-fetch-mode': 'cors',
  40. 'sec-fetch-site': 'same-origin',
  41. 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
  42. 'x-requested-with': 'xmlhttprequest',
  43. }
  44. # 发送GET请求
  45. response = requests.get(url, headers=headers, params=params).json()
  46. print(json.dumps(response, ensure_ascii=False, indent=4))
  47. if __name__ == '__main__':
  48. hksp_search("人类首次从恐龙蛋化石中获得恐龙的遗传物质")