functions.py 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. import json
  2. import time
  3. import requests
  4. import urllib3
  5. from requests.adapters import HTTPAdapter
  6. from common.common import Common
  7. class KuaiShouSearch:
  8. def __init__(self, author_name):
  9. self.url = "https://www.kuaishou.com/graphql"
  10. self.pcursor = ""
  11. self.author_name = author_name
  12. self.search_result = []
  13. def init_payload(self):
  14. payload = json.dumps(
  15. {
  16. "operationName": "visionProfilePhotoList",
  17. "variables": {
  18. "userId": self.author_name.replace(
  19. "https://www.kuaishou.com/profile/", ""
  20. ),
  21. "pcursor": self.pcursor,
  22. "page": "profile",
  23. },
  24. "query": "fragment photoContent on PhotoEntity {\n id\n duration\n caption\n originCaption\n likeCount\n viewCount\n commentCount\n realLikeCount\n coverUrl\n photoUrl\n photoH265Url\n manifest\n manifestH265\n videoResource\n coverUrls {\n url\n __typename\n }\n timestamp\n expTag\n animatedCoverUrl\n distance\n videoRatio\n liked\n stereoType\n profileUserTopPhoto\n musicBlocked\n __typename\n}\n\nfragment feedContent on Feed {\n type\n author {\n id\n name\n headerUrl\n following\n headerUrls {\n url\n __typename\n }\n __typename\n }\n photo {\n ...photoContent\n __typename\n }\n canAddComment\n llsid\n status\n currentPcursor\n tags {\n type\n name\n __typename\n }\n __typename\n}\n\nquery visionProfilePhotoList($pcursor: String, $userId: String, $page: String, $webPageArea: String) {\n visionProfilePhotoList(pcursor: $pcursor, userId: $userId, page: $page, webPageArea: $webPageArea) {\n result\n llsid\n webPageArea\n feeds {\n ...feedContent\n __typename\n }\n hostName\n pcursor\n __typename\n }\n}\n",
  25. }
  26. )
  27. return payload
  28. def init_headers(self, did):
  29. headers = {
  30. "Accept": "*/*",
  31. "Content-Type": "application/json",
  32. "Origin": "https://www.kuaishou.com",
  33. "Cookie": "did={}".format(did),
  34. "Content-Length": "1260",
  35. "Accept-Language": "zh-CN,zh-Hans;q=0.9",
  36. "Host": "www.kuaishou.com",
  37. "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.1 Safari/605.1.15",
  38. "Referer": f'https://www.kuaishou.com/profile/{self.author_name.replace("https://www.kuaishou.com/profile/", "")}',
  39. "Accept-Encoding": "gzip, deflate, br",
  40. "Connection": "keep-alive",
  41. }
  42. return headers
  43. def search(self, did):
  44. urllib3.disable_warnings()
  45. s = requests.session()
  46. # max_retries=3 重试3次
  47. s.mount("http://", HTTPAdapter(max_retries=3))
  48. s.mount("https://", HTTPAdapter(max_retries=3))
  49. response = s.post(
  50. url=self.url,
  51. headers=self.init_headers(did),
  52. data=self.init_payload(),
  53. # proxies=Common.tunnel_proxies(),
  54. verify=False,
  55. timeout=10,
  56. )
  57. response.close()
  58. # print(json.dumps(response.json(), ensure_ascii=False, indent=4))
  59. return response.json()
  60. def search_pages(self, did):
  61. page_result = []
  62. self.search_result = self.search(did)
  63. # 翻页指示器
  64. page_result.append(self.search_result)
  65. for i in range(5):
  66. if self.search_result.get("data").get('visionProfilePhotoList'):
  67. self.pcursor = self.search_result['data']['visionProfilePhotoList']['pcursor']
  68. self.search_result = self.search(did)
  69. page_result.append(self.search_result)
  70. time.sleep(5)
  71. else:
  72. continue
  73. return page_result
  74. def search_url_from_name(keyword, did):
  75. ""
  76. payload = {
  77. "operationName": "graphqlSearchUser",
  78. "query": "query graphqlSearchUser($keyword: String, $pcursor: String, $searchSessionId: String) {\n visionSearchUser(keyword: $keyword, pcursor: $pcursor, searchSessionId: $searchSessionId) {\n result\n users {\n fansCount\n photoCount\n isFollowing\n user_id\n headurl\n user_text\n user_name\n verified\n verifiedDetail {\n description\n iconType\n newVerified\n musicCompany\n type\n __typename\n }\n __typename\n }\n searchSessionId\n pcursor\n __typename\n }\n}\n",
  79. "variables": {
  80. "keyword": keyword
  81. }
  82. }
  83. headers = {
  84. "Content-Type": "application/json",
  85. "Cookie": "did={}".format(did),
  86. "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"
  87. }
  88. response = requests.post(
  89. url="https://www.kuaishou.com/graphql",
  90. headers=headers,
  91. # proxies=Common.tunnel_proxies(),
  92. data=json.dumps(payload)
  93. )
  94. print(response.json())
  95. user_list = response.json()['data']['visionSearchUser']['users']
  96. print(user_list)
  97. account = user_list[0]
  98. # print("user basic info")
  99. print(json.dumps(account, ensure_ascii=False, indent=4))
  100. return account
  101. if __name__ == "__main__":
  102. my_did = "web_57fe06bfa96f8fdae46d286e125a5c18"
  103. my_did2 = "web_727b05862ce2afa4028018cc79a50257"
  104. account = search_url_from_name(keyword="知后品牌优选", did=my_did)
  105. author_name = account['user_id']
  106. # print(account)
  107. # ksc = KuaiShouSearch(author_name=author_name)
  108. # result = ksc.search_pages(did=my_did)
  109. # print(result)
  110. # c = 0
  111. # f = open("url.txt", "a+", encoding="utf-8")
  112. # for obj in result:
  113. # for feed in obj['data']['visionProfilePhotoList']['feeds']:
  114. # c += 1
  115. # print(json.dumps(feed, ensure_ascii=False))
  116. # f.write(json.dumps(feed, ensure_ascii=False) + "\n")
  117. # print(c)
  118. # f.close()
  119. # print(json.dumps(result, ensure_ascii=False, indent=4))
  120. # for feed in result['data']['visionProfilePhotoList']['feeds']:
  121. # print(json.dumps(feed, ensure_ascii=False))
  122. # 读取已经存在的json文件,获取用户id
  123. # with open("result_2.json", "r", encoding="utf-8") as f:
  124. # my_test_id_dict = json.loads(f.read())
  125. # 遍历id_dict
  126. # for my_id in my_test_id_dict:
  127. # my_keyword = my_id.replace("@", "")
  128. # account = search_url_from_name(keyword=my_keyword, did=my_did)
  129. # author_name = account['user_id']
  130. # ksc = KuaiShouSearch(author_name=author_name)
  131. # result = ksc.search(did=my_did)
  132. # # print(json.dumps(result, ensure_ascii=False, indent=4))
  133. # for feed in result['data']['visionProfilePhotoList']['feeds']:
  134. # print(json.dumps(feed, ensure_ascii=False))
  135. # video_url = feed['photo']['photoUrl']
  136. # print("success get kuaishou video_url", video_url)