1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495 |
- import json
- import time
- import requests
- import urllib3
- from requests.adapters import HTTPAdapter
- # 通过标题去搜索快手视频,并且把视频的下载地址搜索出来
- class SearchTitle:
- def __init__(self, title, did):
- self.title = title
- self.did = did
- self.pcursor = ""
- self.search_result = []
- # 搜索代码
- def search_title(self):
- payload = json.dumps(
- {
- "operationName": "visionSearchPhoto",
- "variables": {
- "keyword": self.title,
- "pcursor": self.pcursor,
- "page": "search"
- },
- "query": "fragment photoContent on PhotoEntity {\n __typename\n id\n duration\n caption\n originCaption\n likeCount\n viewCount\n commentCount\n realLikeCount\n coverUrl\n photoUrl\n photoH265Url\n manifest\n manifestH265\n videoResource\n coverUrls {\n url\n __typename\n }\n timestamp\n expTag\n animatedCoverUrl\n distance\n videoRatio\n liked\n stereoType\n profileUserTopPhoto\n musicBlocked\n}\n\nfragment recoPhotoFragment on recoPhotoEntity {\n __typename\n id\n duration\n caption\n originCaption\n likeCount\n viewCount\n commentCount\n realLikeCount\n coverUrl\n photoUrl\n photoH265Url\n manifest\n manifestH265\n videoResource\n coverUrls {\n url\n __typename\n }\n timestamp\n expTag\n animatedCoverUrl\n distance\n videoRatio\n liked\n stereoType\n profileUserTopPhoto\n musicBlocked\n}\n\nfragment feedContent on Feed {\n type\n author {\n id\n name\n headerUrl\n following\n headerUrls {\n url\n __typename\n }\n __typename\n }\n photo {\n ...photoContent\n ...recoPhotoFragment\n __typename\n }\n canAddComment\n llsid\n status\n currentPcursor\n tags {\n type\n name\n __typename\n }\n __typename\n}\n\nquery visionSearchPhoto($keyword: String, $pcursor: String, $searchSessionId: String, $page: String, $webPageArea: String) {\n visionSearchPhoto(keyword: $keyword, pcursor: $pcursor, searchSessionId: $searchSessionId, page: $page, webPageArea: $webPageArea) {\n result\n llsid\n webPageArea\n feeds {\n ...feedContent\n __typename\n }\n searchSessionId\n pcursor\n aladdinBanner {\n imgUrl\n link\n __typename\n }\n __typename\n }\n}\n"
- }
- )
- headers = {
- "Accept": "*/*",
- "Accept-Encoding": "gzip, deflate, br",
- "Accept-Language": "zh-CN,zh;q=0.9",
- "Connection": "keep-alive",
- "Content-Type": "application/json",
- "Cookie": "did={}".format(did),
- "Host": "www.kuaishou.com",
- "Origin": "https://www.kuaishou.com",
- # "Referer": "https://www.kuaishou.com/search/video?searchKey=%23%E5%8C%BB%E9%99%A2%E8%B6%A3%E4%BA%8B%23%E4%B8%AD%E5%8C%BB%E8%B0%83%E7%90%86%23%E5%8C%BB%E5%AD%A6%E7%9F%A5%E8%AF%86%E7%A7%91%E6%99%AE",
- "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"
- }
- urllib3.disable_warnings()
- s = requests.session()
- # max_retries=3 重试3次
- s.mount("http://", HTTPAdapter(max_retries=3))
- s.mount("https://", HTTPAdapter(max_retries=3))
- response = s.post(
- url="https://www.kuaishou.com/graphql",
- headers=headers,
- data=payload,
- # proxies=Common.tunnel_proxies(),
- verify=False,
- timeout=10,
- )
- response.close()
- return response.json()
- # 翻页搜索代码
- def search_pages(self):
- result = []
- self.search_result = self.search_title()
- if self.search_result:
- result.append(self.search_result)
- for i in range(4):
- self.pcursor = self.search_result['data']['visionSearchPhoto']['pcursor']
- self.search_result = self.search_title()
- result.append(self.search_result)
- time.sleep(2)
- return result
- else:
- return []
- def process_item_list(user_name, data_list):
- result = []
- for line in data_list:
- for obj in line['data']['visionSearchPhoto']['feeds']:
- name = obj['author']['name']
- if name in user_name:
- result.append(obj)
- result = sorted(result, reverse=True, key=lambda x: x['photo']['likeCount'])
- print(result)
- return result[0]
- did = "web_7cf1f7a1dd0e8936fcaeebcd4e0a7061"
- title = "历史会记住! 日本核污水搜 画面,旅游团遭退团,对我们有何影响… 展开"
- result = SearchTitle(title, did).search_pages()
- new_r = process_item_list("@主持人颢鑫", result)
- print(len(new_r))
- print(json.dumps(new_r, ensure_ascii=False, indent=4))
|