import json import time import requests import urllib3 from requests.adapters import HTTPAdapter from common.common import Common class KuaiShouSearch: def __init__(self, author_name): self.url = "https://www.kuaishou.com/graphql" self.pcursor = "" self.author_name = author_name self.search_result = [] def init_payload(self): payload = json.dumps( { "operationName": "visionProfilePhotoList", "variables": { "userId": self.author_name.replace( "https://www.kuaishou.com/profile/", "" ), "pcursor": self.pcursor, "page": "profile", }, "query": "fragment photoContent on PhotoEntity {\n id\n duration\n caption\n originCaption\n likeCount\n viewCount\n commentCount\n realLikeCount\n coverUrl\n photoUrl\n photoH265Url\n manifest\n manifestH265\n videoResource\n coverUrls {\n url\n __typename\n }\n timestamp\n expTag\n animatedCoverUrl\n distance\n videoRatio\n liked\n stereoType\n profileUserTopPhoto\n musicBlocked\n __typename\n}\n\nfragment feedContent on Feed {\n type\n author {\n id\n name\n headerUrl\n following\n headerUrls {\n url\n __typename\n }\n __typename\n }\n photo {\n ...photoContent\n __typename\n }\n canAddComment\n llsid\n status\n currentPcursor\n tags {\n type\n name\n __typename\n }\n __typename\n}\n\nquery visionProfilePhotoList($pcursor: String, $userId: String, $page: String, $webPageArea: String) {\n visionProfilePhotoList(pcursor: $pcursor, userId: $userId, page: $page, webPageArea: $webPageArea) {\n result\n llsid\n webPageArea\n feeds {\n ...feedContent\n __typename\n }\n hostName\n pcursor\n __typename\n }\n}\n", } ) return payload def init_headers(self, did): headers = { "Accept": "*/*", "Content-Type": "application/json", "Origin": "https://www.kuaishou.com", "Cookie": "did={}".format(did), "Content-Length": "1260", "Accept-Language": "zh-CN,zh-Hans;q=0.9", "Host": "www.kuaishou.com", "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.1 Safari/605.1.15", "Referer": f'https://www.kuaishou.com/profile/{self.author_name.replace("https://www.kuaishou.com/profile/", "")}', "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", } return headers def search(self, did): urllib3.disable_warnings() s = requests.session() # max_retries=3 重试3次 s.mount("http://", HTTPAdapter(max_retries=3)) s.mount("https://", HTTPAdapter(max_retries=3)) response = s.post( url=self.url, headers=self.init_headers(did), data=self.init_payload(), # proxies=Common.tunnel_proxies(), verify=False, timeout=10, ) response.close() # print(json.dumps(response.json(), ensure_ascii=False, indent=4)) return response.json() def search_pages(self, did): page_result = [] self.search_result = self.search(did) # 翻页指示器 page_result.append(self.search_result) for i in range(5): if self.search_result.get("data").get('visionProfilePhotoList'): self.pcursor = self.search_result['data']['visionProfilePhotoList']['pcursor'] self.search_result = self.search(did) page_result.append(self.search_result) time.sleep(5) else: continue return page_result def search_url_from_name(keyword, did): "" payload = { "operationName": "graphqlSearchUser", "query": "query graphqlSearchUser($keyword: String, $pcursor: String, $searchSessionId: String) {\n visionSearchUser(keyword: $keyword, pcursor: $pcursor, searchSessionId: $searchSessionId) {\n result\n users {\n fansCount\n photoCount\n isFollowing\n user_id\n headurl\n user_text\n user_name\n verified\n verifiedDetail {\n description\n iconType\n newVerified\n musicCompany\n type\n __typename\n }\n __typename\n }\n searchSessionId\n pcursor\n __typename\n }\n}\n", "variables": { "keyword": keyword } } headers = { "Content-Type": "application/json", "Cookie": "did={}".format(did), "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36" } response = requests.post( url="https://www.kuaishou.com/graphql", headers=headers, # proxies=Common.tunnel_proxies(), data=json.dumps(payload) ) print(response.json()) user_list = response.json()['data']['visionSearchUser']['users'] print(user_list) account = user_list[0] # print("user basic info") print(json.dumps(account, ensure_ascii=False, indent=4)) return account if __name__ == "__main__": my_did = "web_57fe06bfa96f8fdae46d286e125a5c18" my_did2 = "web_727b05862ce2afa4028018cc79a50257" account = search_url_from_name(keyword="知后品牌优选", did=my_did) author_name = account['user_id'] # print(account) # ksc = KuaiShouSearch(author_name=author_name) # result = ksc.search_pages(did=my_did) # print(result) # c = 0 # f = open("url.txt", "a+", encoding="utf-8") # for obj in result: # for feed in obj['data']['visionProfilePhotoList']['feeds']: # c += 1 # print(json.dumps(feed, ensure_ascii=False)) # f.write(json.dumps(feed, ensure_ascii=False) + "\n") # print(c) # f.close() # print(json.dumps(result, ensure_ascii=False, indent=4)) # for feed in result['data']['visionProfilePhotoList']['feeds']: # print(json.dumps(feed, ensure_ascii=False)) # 读取已经存在的json文件,获取用户id # with open("result_2.json", "r", encoding="utf-8") as f: # my_test_id_dict = json.loads(f.read()) # 遍历id_dict # for my_id in my_test_id_dict: # my_keyword = my_id.replace("@", "") # account = search_url_from_name(keyword=my_keyword, did=my_did) # author_name = account['user_id'] # ksc = KuaiShouSearch(author_name=author_name) # result = ksc.search(did=my_did) # # print(json.dumps(result, ensure_ascii=False, indent=4)) # for feed in result['data']['visionProfilePhotoList']['feeds']: # print(json.dumps(feed, ensure_ascii=False)) # video_url = feed['photo']['photoUrl'] # print("success get kuaishou video_url", video_url)