searchfunctions.py 4.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. import json
  2. import time
  3. import requests
  4. import urllib3
  5. from requests.adapters import HTTPAdapter
  6. # 通过标题去搜索快手视频,并且把视频的下载地址搜索出来
  7. class SearchTitle:
  8. def __init__(self, title, did):
  9. self.title = title
  10. self.did = did
  11. self.pcursor = ""
  12. self.search_result = []
  13. # 搜索代码
  14. def search_title(self):
  15. payload = json.dumps(
  16. {
  17. "operationName": "visionSearchPhoto",
  18. "variables": {
  19. "keyword": self.title,
  20. "pcursor": self.pcursor,
  21. "page": "search"
  22. },
  23. "query": "fragment photoContent on PhotoEntity {\n __typename\n id\n duration\n caption\n originCaption\n likeCount\n viewCount\n commentCount\n realLikeCount\n coverUrl\n photoUrl\n photoH265Url\n manifest\n manifestH265\n videoResource\n coverUrls {\n url\n __typename\n }\n timestamp\n expTag\n animatedCoverUrl\n distance\n videoRatio\n liked\n stereoType\n profileUserTopPhoto\n musicBlocked\n}\n\nfragment recoPhotoFragment on recoPhotoEntity {\n __typename\n id\n duration\n caption\n originCaption\n likeCount\n viewCount\n commentCount\n realLikeCount\n coverUrl\n photoUrl\n photoH265Url\n manifest\n manifestH265\n videoResource\n coverUrls {\n url\n __typename\n }\n timestamp\n expTag\n animatedCoverUrl\n distance\n videoRatio\n liked\n stereoType\n profileUserTopPhoto\n musicBlocked\n}\n\nfragment feedContent on Feed {\n type\n author {\n id\n name\n headerUrl\n following\n headerUrls {\n url\n __typename\n }\n __typename\n }\n photo {\n ...photoContent\n ...recoPhotoFragment\n __typename\n }\n canAddComment\n llsid\n status\n currentPcursor\n tags {\n type\n name\n __typename\n }\n __typename\n}\n\nquery visionSearchPhoto($keyword: String, $pcursor: String, $searchSessionId: String, $page: String, $webPageArea: String) {\n visionSearchPhoto(keyword: $keyword, pcursor: $pcursor, searchSessionId: $searchSessionId, page: $page, webPageArea: $webPageArea) {\n result\n llsid\n webPageArea\n feeds {\n ...feedContent\n __typename\n }\n searchSessionId\n pcursor\n aladdinBanner {\n imgUrl\n link\n __typename\n }\n __typename\n }\n}\n"
  24. }
  25. )
  26. headers = {
  27. "Accept": "*/*",
  28. "Accept-Encoding": "gzip, deflate, br",
  29. "Accept-Language": "zh-CN,zh;q=0.9",
  30. "Connection": "keep-alive",
  31. "Content-Type": "application/json",
  32. "Cookie": "did={}".format(did),
  33. "Host": "www.kuaishou.com",
  34. "Origin": "https://www.kuaishou.com",
  35. # "Referer": "https://www.kuaishou.com/search/video?searchKey=%23%E5%8C%BB%E9%99%A2%E8%B6%A3%E4%BA%8B%23%E4%B8%AD%E5%8C%BB%E8%B0%83%E7%90%86%23%E5%8C%BB%E5%AD%A6%E7%9F%A5%E8%AF%86%E7%A7%91%E6%99%AE",
  36. "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"
  37. }
  38. urllib3.disable_warnings()
  39. s = requests.session()
  40. # max_retries=3 重试3次
  41. s.mount("http://", HTTPAdapter(max_retries=3))
  42. s.mount("https://", HTTPAdapter(max_retries=3))
  43. response = s.post(
  44. url="https://www.kuaishou.com/graphql",
  45. headers=headers,
  46. data=payload,
  47. # proxies=Common.tunnel_proxies(),
  48. verify=False,
  49. timeout=10,
  50. )
  51. response.close()
  52. return response.json()
  53. # 翻页搜索代码
  54. def search_pages(self):
  55. result = []
  56. self.search_result = self.search_title()
  57. if self.search_result:
  58. result.append(self.search_result)
  59. for i in range(4):
  60. self.pcursor = self.search_result['data']['visionSearchPhoto']['pcursor']
  61. self.search_result = self.search_title()
  62. result.append(self.search_result)
  63. time.sleep(2)
  64. return result
  65. else:
  66. return []
  67. def process_item_list(user_name, data_list):
  68. result = []
  69. for line in data_list:
  70. for obj in line['data']['visionSearchPhoto']['feeds']:
  71. name = obj['author']['name']
  72. if name in user_name:
  73. result.append(obj)
  74. result = sorted(result, reverse=True, key=lambda x: x['photo']['likeCount'])
  75. print(result)
  76. return result[0]
  77. did = "web_7cf1f7a1dd0e8936fcaeebcd4e0a7061"
  78. title = "历史会记住! 日本核污水搜 画面,旅游团遭退团,对我们有何影响… 展开"
  79. result = SearchTitle(title, did).search_pages()
  80. new_r = process_item_list("@主持人颢鑫", result)
  81. print(len(new_r))
  82. print(json.dumps(new_r, ensure_ascii=False, indent=4))