123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151 |
- import json
- import time
- import requests
- import urllib3
- from requests.adapters import HTTPAdapter
- from common.common import Common
- class KuaiShouSearch:
- def __init__(self, author_name):
- self.url = "https://www.kuaishou.com/graphql"
- self.pcursor = ""
- self.author_name = author_name
- self.search_result = []
- def init_payload(self):
- payload = json.dumps(
- {
- "operationName": "visionProfilePhotoList",
- "variables": {
- "userId": self.author_name.replace(
- "https://www.kuaishou.com/profile/", ""
- ),
- "pcursor": self.pcursor,
- "page": "profile",
- },
- "query": "fragment photoContent on PhotoEntity {\n id\n duration\n caption\n originCaption\n likeCount\n viewCount\n commentCount\n realLikeCount\n coverUrl\n photoUrl\n photoH265Url\n manifest\n manifestH265\n videoResource\n coverUrls {\n url\n __typename\n }\n timestamp\n expTag\n animatedCoverUrl\n distance\n videoRatio\n liked\n stereoType\n profileUserTopPhoto\n musicBlocked\n __typename\n}\n\nfragment feedContent on Feed {\n type\n author {\n id\n name\n headerUrl\n following\n headerUrls {\n url\n __typename\n }\n __typename\n }\n photo {\n ...photoContent\n __typename\n }\n canAddComment\n llsid\n status\n currentPcursor\n tags {\n type\n name\n __typename\n }\n __typename\n}\n\nquery visionProfilePhotoList($pcursor: String, $userId: String, $page: String, $webPageArea: String) {\n visionProfilePhotoList(pcursor: $pcursor, userId: $userId, page: $page, webPageArea: $webPageArea) {\n result\n llsid\n webPageArea\n feeds {\n ...feedContent\n __typename\n }\n hostName\n pcursor\n __typename\n }\n}\n",
- }
- )
- return payload
- def init_headers(self, did):
- headers = {
- "Accept": "*/*",
- "Content-Type": "application/json",
- "Origin": "https://www.kuaishou.com",
- "Cookie": "did={}".format(did),
- "Content-Length": "1260",
- "Accept-Language": "zh-CN,zh-Hans;q=0.9",
- "Host": "www.kuaishou.com",
- "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.1 Safari/605.1.15",
- "Referer": f'https://www.kuaishou.com/profile/{self.author_name.replace("https://www.kuaishou.com/profile/", "")}',
- "Accept-Encoding": "gzip, deflate, br",
- "Connection": "keep-alive",
- }
- return headers
- def search(self, did):
- urllib3.disable_warnings()
- s = requests.session()
- # max_retries=3 重试3次
- s.mount("http://", HTTPAdapter(max_retries=3))
- s.mount("https://", HTTPAdapter(max_retries=3))
- response = s.post(
- url=self.url,
- headers=self.init_headers(did),
- data=self.init_payload(),
- # proxies=Common.tunnel_proxies(),
- verify=False,
- timeout=10,
- )
- response.close()
- # print(json.dumps(response.json(), ensure_ascii=False, indent=4))
- return response.json()
- def search_pages(self, did):
- page_result = []
- self.search_result = self.search(did)
- # 翻页指示器
- page_result.append(self.search_result)
- for i in range(5):
- if self.search_result.get("data").get('visionProfilePhotoList'):
- self.pcursor = self.search_result['data']['visionProfilePhotoList']['pcursor']
- self.search_result = self.search(did)
- page_result.append(self.search_result)
- time.sleep(5)
- else:
- continue
- return page_result
- def search_url_from_name(keyword, did):
- ""
- payload = {
- "operationName": "graphqlSearchUser",
- "query": "query graphqlSearchUser($keyword: String, $pcursor: String, $searchSessionId: String) {\n visionSearchUser(keyword: $keyword, pcursor: $pcursor, searchSessionId: $searchSessionId) {\n result\n users {\n fansCount\n photoCount\n isFollowing\n user_id\n headurl\n user_text\n user_name\n verified\n verifiedDetail {\n description\n iconType\n newVerified\n musicCompany\n type\n __typename\n }\n __typename\n }\n searchSessionId\n pcursor\n __typename\n }\n}\n",
- "variables": {
- "keyword": keyword
- }
- }
- headers = {
- "Content-Type": "application/json",
- "Cookie": "did={}".format(did),
- "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"
- }
- response = requests.post(
- url="https://www.kuaishou.com/graphql",
- headers=headers,
- # proxies=Common.tunnel_proxies(),
- data=json.dumps(payload)
- )
- print(response.json())
- user_list = response.json()['data']['visionSearchUser']['users']
- print(user_list)
- account = user_list[0]
- # print("user basic info")
- print(json.dumps(account, ensure_ascii=False, indent=4))
- return account
- if __name__ == "__main__":
- my_did = "web_57fe06bfa96f8fdae46d286e125a5c18"
- my_did2 = "web_727b05862ce2afa4028018cc79a50257"
- account = search_url_from_name(keyword="知后品牌优选", did=my_did)
- author_name = account['user_id']
- # print(account)
- # ksc = KuaiShouSearch(author_name=author_name)
- # result = ksc.search_pages(did=my_did)
- # print(result)
- # c = 0
- # f = open("url.txt", "a+", encoding="utf-8")
- # for obj in result:
- # for feed in obj['data']['visionProfilePhotoList']['feeds']:
- # c += 1
- # print(json.dumps(feed, ensure_ascii=False))
- # f.write(json.dumps(feed, ensure_ascii=False) + "\n")
- # print(c)
- # f.close()
- # print(json.dumps(result, ensure_ascii=False, indent=4))
- # for feed in result['data']['visionProfilePhotoList']['feeds']:
- # print(json.dumps(feed, ensure_ascii=False))
- # 读取已经存在的json文件,获取用户id
- # with open("result_2.json", "r", encoding="utf-8") as f:
- # my_test_id_dict = json.loads(f.read())
- # 遍历id_dict
- # for my_id in my_test_id_dict:
- # my_keyword = my_id.replace("@", "")
- # account = search_url_from_name(keyword=my_keyword, did=my_did)
- # author_name = account['user_id']
- # ksc = KuaiShouSearch(author_name=author_name)
- # result = ksc.search(did=my_did)
- # # print(json.dumps(result, ensure_ascii=False, indent=4))
- # for feed in result['data']['visionProfilePhotoList']['feeds']:
- # print(json.dumps(feed, ensure_ascii=False))
- # video_url = feed['photo']['photoUrl']
- # print("success get kuaishou video_url", video_url)
|