hksp_search.py 3.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. """
  2. @author: luojunhui
  3. 好看视频搜索爬虫
  4. """
  5. import requests
  6. import urllib.parse
  7. import time
  8. import hashlib
  9. def get_video_detail(video_id):
  10. """
  11. 获取好看视频的视频链接
  12. :param video_id:
  13. :return:
  14. """
  15. url = "https://haokan.baidu.com/v"
  16. params = {
  17. 'vid': video_id,
  18. '_format': 'json',
  19. # 'hk_nonce': 'f47386e95fe657182aa3c1826d9a6b85',
  20. # 'hk_timestamp': '1715225386',
  21. # 'hk_sign': '4b219f5e3971e42b3e23dc2a209fc9d9',
  22. # 'hk_token': 'Dg8DdAVwdwNzDHcFcXF+D3gHBQA'
  23. }
  24. headers = {
  25. 'Accept': '*/*',
  26. 'cookie': "BIDUPSID='",
  27. 'Accept-Language': 'en,zh;q=0.9,zh-CN;q=0.8',
  28. 'Cache-Control': 'no-cache',
  29. 'Connection': 'keep-alive',
  30. 'Content-Type': 'application/x-www-form-urlencoded',
  31. 'Referer': 'https://haokan.baidu.com',
  32. 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
  33. }
  34. response = requests.request("GET", url, headers=headers, params=params).json()
  35. # print(json.dumps(response['data']['apiData']['curVideoMeta'], ensure_ascii=False, indent=4))
  36. return response['data']['apiData']['curVideoMeta']
  37. def hksp_search(key):
  38. """
  39. 好看视频搜索爬虫
  40. """
  41. timestamp_seconds = time.time()
  42. timestamp_milliseconds = int(timestamp_seconds * 1000)
  43. url = 'https://haokan.baidu.com/haokan/ui-search/pc/search/video'
  44. # 定义请求的参数
  45. strings = "{}_{}_{}_{}_{}".format(1, urllib.parse.quote(key), 10, timestamp_milliseconds, 1)
  46. sign = hashlib.md5(strings.encode()).hexdigest()
  47. params = {
  48. 'pn': 1,
  49. 'rn': 10,
  50. 'type': 'video',
  51. 'query': key,
  52. 'sign': sign,
  53. 'version': 1,
  54. 'timestamp': timestamp_milliseconds
  55. }
  56. # 定义请求头
  57. headers = {
  58. 'authority': 'haokan.baidu.com',
  59. 'accept': '*/*',
  60. 'accept-language': 'zh,en;q=0.9,zh-CN;q=0.8',
  61. 'cookie': "BIDUPSID='",
  62. # 'referer': 'https://haokan.baidu.com/web/search/page?query=%E8%80%81%E4%BA%BA',
  63. 'sec-ch-ua': '"Not A(Brand";v="99", "Google Chrome";v="121", "Chromium";v="121"',
  64. 'sec-ch-ua-mobile': '?0',
  65. 'sec-ch-ua-platform': '"macOS"',
  66. 'sec-fetch-dest': 'empty',
  67. 'sec-fetch-mode': 'cors',
  68. 'sec-fetch-site': 'same-origin',
  69. 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
  70. 'x-requested-with': 'xmlhttprequest',
  71. }
  72. # 发送GET请求
  73. response = requests.get(url, headers=headers, params=params).json()
  74. # print(json.dumps(response, ensure_ascii=False, indent=4))
  75. data_list = response['data']['list']
  76. L = []
  77. for data in data_list[:5]:
  78. try:
  79. video_id = data['vid']
  80. res = get_video_detail(video_id)
  81. temp = ["haokanshipin", res['title'], res['playurl'], "https://haokan.baidu.com/v?vid={}".format(video_id)]
  82. L.append(temp)
  83. except:
  84. pass
  85. return L
  86. if __name__ == '__main__':
  87. hksp_search("美国竟对中国提出4个荒唐的条件,真是好大的口气")