|
@@ -122,6 +122,7 @@ class KuaiShouFollow:
|
|
|
def get_out_user_info(cls, log_type, crawler, out_uid):
|
|
|
try:
|
|
|
url = "https://www.kuaishou.com/graphql"
|
|
|
+
|
|
|
payload = json.dumps({
|
|
|
"operationName": "visionProfile",
|
|
|
"variables": {
|
|
@@ -130,20 +131,17 @@ class KuaiShouFollow:
|
|
|
"query": "query visionProfile($userId: String) {\n visionProfile(userId: $userId) {\n result\n hostName\n userProfile {\n ownerCount {\n fan\n photo\n follow\n photo_public\n __typename\n }\n profile {\n gender\n user_name\n user_id\n headurl\n user_text\n user_profile_bg_url\n __typename\n }\n isFollowing\n __typename\n }\n __typename\n }\n}\n"
|
|
|
})
|
|
|
headers = {
|
|
|
- 'Accept-Language': 'zh-CN,zh;q=0.9',
|
|
|
- 'Connection': 'keep-alive',
|
|
|
- 'Cookie': 'kpf=PC_WEB; clientid=3; did=web_3f264336f6a6c191cd36fb15e87ab708; userId=1321650328; kuaishou.server.web_st=ChZrdWFpc2hvdS5zZXJ2ZXIud2ViLnN0EqAB7bjkVapctEeh2Vh6_w9YmSXFnxBUvy3dAIZZmpyd9sAJciYB48W_Ch7rN3r1mhKtJCtBGFqMX-cTy1RoGLLmaKpwQTvxjew1nsH6JacRsJf6qB0N273lzzmGeXQPxb-MVwqtoyvxL8bLJ0DcldtHb1Q36U4efpRFse9WYLL9PtlsEprI6xORB6a009HLlRKiKMzma5s_nhdwr5xt1QwnphoStVKEb-xUGkLo9u0A7O3lj4AGIiDTIZw_4BbSmp0oOBtTtItbuywLAU3zSIErl1q6F5AW8SgFMAE; kuaishou.server.web_ph=0efe610176ceb53a303c1256cd0f6ff0325e; kpn=KUAISHOU_VISION',
|
|
|
+ 'Accept': '*/*',
|
|
|
+ 'Content-Type': 'application/json',
|
|
|
'Origin': 'https://www.kuaishou.com',
|
|
|
+ 'Cookie': 'did=web_b16e6ef862b22f8276ebec7ac73b5a72; clientid=3; kpf=PC_WEB; kpn=KUAISHOU_VISION',
|
|
|
+ 'Content-Length': '552',
|
|
|
+ 'Accept-Language': 'zh-CN,zh-Hans;q=0.9',
|
|
|
+ 'Host': 'www.kuaishou.com',
|
|
|
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.1 Safari/605.1.15',
|
|
|
'Referer': 'https://www.kuaishou.com/profile/{}'.format(out_uid),
|
|
|
- 'Sec-Fetch-Dest': 'empty',
|
|
|
- 'Sec-Fetch-Mode': 'cors',
|
|
|
- 'Sec-Fetch-Site': 'same-origin',
|
|
|
- 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
|
|
|
- 'accept': '*/*',
|
|
|
- 'content-type': 'application/json',
|
|
|
- 'sec-ch-ua': '"Google Chrome";v="111", "Not(A:Brand";v="8", "Chromium";v="111"',
|
|
|
- 'sec-ch-ua-mobile': '?0',
|
|
|
- 'sec-ch-ua-platform': '"macOS"'
|
|
|
+ 'Accept-Encoding': 'gzip, deflate, br',
|
|
|
+ 'Connection': 'keep-alive'
|
|
|
}
|
|
|
urllib3.disable_warnings()
|
|
|
s = requests.session()
|
|
@@ -359,14 +357,13 @@ class KuaiShouFollow:
|
|
|
def get_videoList(cls, log_type, crawler, strategy, our_uid, out_uid, oss_endpoint, env, machine, pcursor=""):
|
|
|
download_cnt_1, download_cnt_2 = 0, 0
|
|
|
|
|
|
- while True:
|
|
|
- rule_dict_1 = cls.get_rule(log_type, crawler, 1)
|
|
|
- rule_dict_2 = cls.get_rule(log_type, crawler, 2)
|
|
|
- if rule_dict_1 is None or rule_dict_2 is None:
|
|
|
- Common.logger(log_type, crawler).warning(f"rule_dict is None, 10秒后重试")
|
|
|
- time.sleep(10)
|
|
|
- else:
|
|
|
- break
|
|
|
+ rule_dict_1 = cls.get_rule(log_type, crawler, 1)
|
|
|
+ rule_dict_2 = cls.get_rule(log_type, crawler, 2)
|
|
|
+ if rule_dict_1 is None or rule_dict_2 is None:
|
|
|
+ Common.logger(log_type, crawler).warning(f"rule_dict is None, 10秒后重试")
|
|
|
+ time.sleep(10)
|
|
|
+ else:
|
|
|
+ return
|
|
|
|
|
|
try:
|
|
|
if download_cnt_1 >= int(
|
|
@@ -378,29 +375,28 @@ class KuaiShouFollow:
|
|
|
return
|
|
|
|
|
|
url = "https://www.kuaishou.com/graphql"
|
|
|
+
|
|
|
payload = json.dumps({
|
|
|
"operationName": "visionProfilePhotoList",
|
|
|
"variables": {
|
|
|
"userId": out_uid,
|
|
|
- "pcursor": pcursor,
|
|
|
+ "pcursor": "",
|
|
|
"page": "profile"
|
|
|
},
|
|
|
"query": "fragment photoContent on PhotoEntity {\n id\n duration\n caption\n originCaption\n likeCount\n viewCount\n realLikeCount\n coverUrl\n photoUrl\n photoH265Url\n manifest\n manifestH265\n videoResource\n coverUrls {\n url\n __typename\n }\n timestamp\n expTag\n animatedCoverUrl\n distance\n videoRatio\n liked\n stereoType\n profileUserTopPhoto\n musicBlocked\n __typename\n}\n\nfragment feedContent on Feed {\n type\n author {\n id\n name\n headerUrl\n following\n headerUrls {\n url\n __typename\n }\n __typename\n }\n photo {\n ...photoContent\n __typename\n }\n canAddComment\n llsid\n status\n currentPcursor\n tags {\n type\n name\n __typename\n }\n __typename\n}\n\nquery visionProfilePhotoList($pcursor: String, $userId: String, $page: String, $webPageArea: String) {\n visionProfilePhotoList(pcursor: $pcursor, userId: $userId, page: $page, webPageArea: $webPageArea) {\n result\n llsid\n webPageArea\n feeds {\n ...feedContent\n __typename\n }\n hostName\n pcursor\n __typename\n }\n}\n"
|
|
|
})
|
|
|
-
|
|
|
headers = {
|
|
|
- 'User-Agent': get_random_user_agent('pc'),
|
|
|
'Accept': '*/*',
|
|
|
- 'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
|
|
|
- 'Accept-Encoding': 'gzip, deflate, br',
|
|
|
- 'Referer': f'https://www.kuaishou.com/profile/{out_uid}',
|
|
|
- 'content-type': 'application/json',
|
|
|
+ 'Content-Type': 'application/json',
|
|
|
'Origin': 'https://www.kuaishou.com',
|
|
|
- 'Connection': 'keep-alive',
|
|
|
- 'Cookie': 'kpf=PC_WEB; clientid=3; did=web_3f264336f6a6c191cd36fb15e87ab708; kpn=KUAISHOU_VISION',
|
|
|
- 'Sec-Fetch-Dest': 'empty',
|
|
|
- 'Sec-Fetch-Mode': 'cors',
|
|
|
- 'Sec-Fetch-Site': 'same-origin'
|
|
|
+ 'Cookie': 'did=web_b16e6ef862b22f8276ebec7ac73b5a72; clientid=3; kpf=PC_WEB; kpn=KUAISHOU_VISION',
|
|
|
+ 'Content-Length': '1244',
|
|
|
+ 'Accept-Language': 'zh-CN,zh-Hans;q=0.9',
|
|
|
+ 'Host': 'www.kuaishou.com',
|
|
|
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.1 Safari/605.1.15',
|
|
|
+ 'Referer': 'https://www.kuaishou.com/profile/{}'.format(out_uid),
|
|
|
+ 'Accept-Encoding': 'gzip, deflate, br',
|
|
|
+ 'Connection': 'keep-alive'
|
|
|
}
|
|
|
urllib3.disable_warnings()
|
|
|
s = requests.session()
|
|
@@ -609,11 +605,11 @@ class KuaiShouFollow:
|
|
|
Common.logger(log_type, crawler).info("不满足下载规则\n")
|
|
|
# Common.logger(log_type, crawler).info(f"feeds: {feeds}\n")
|
|
|
|
|
|
- if pcursor == "no_more":
|
|
|
- Common.logger(log_type, crawler).info(f"作者,{out_uid},已经到底了,没有更多内容了\n")
|
|
|
- return
|
|
|
- cls.get_videoList(log_type, crawler, strategy, our_uid, out_uid, oss_endpoint, env, machine,
|
|
|
- pcursor=pcursor)
|
|
|
+ # if pcursor == "no_more":
|
|
|
+ # Common.logger(log_type, crawler).info(f"作者,{out_uid},已经到底了,没有更多内容了\n")
|
|
|
+ # return
|
|
|
+ # cls.get_videoList(log_type, crawler, strategy, our_uid, out_uid, oss_endpoint, env, machine,
|
|
|
+ # pcursor=pcursor)
|
|
|
except Exception as e:
|
|
|
Common.logger(log_type, crawler).error(f"get_videoList:{e}\n")
|
|
|
|