|
@@ -119,29 +119,31 @@ class Follow:
|
|
|
payload = json.dumps({
|
|
|
"operationName": "visionProfile",
|
|
|
"variables": {
|
|
|
- "userId": out_uid
|
|
|
+ "userId": str(out_uid)
|
|
|
},
|
|
|
"query": "query visionProfile($userId: String) {\n visionProfile(userId: $userId) {\n result\n hostName\n userProfile {\n ownerCount {\n fan\n photo\n follow\n photo_public\n __typename\n }\n profile {\n gender\n user_name\n user_id\n headurl\n user_text\n user_profile_bg_url\n __typename\n }\n isFollowing\n __typename\n }\n __typename\n }\n}\n"
|
|
|
})
|
|
|
headers = {
|
|
|
- 'Cookie': f'kpf=PC_WEB; clientid=3; did=web_e2901e1c5a13c60af81ba88bc7a3ee24; userId={"".join(str(random.choice(range(1, 10))) for _ in range(10))}; kpn=KUAISHOU_VISION; kuaishou.server.web_st=ChZrdWFpc2hvdS5zZXJ2ZXIud2ViLnN0EqABE4wGjnJauApJelOpl9Xqo8TVDAyra7Pvo0rZtVgMSZxgVuw4Z6P2UtHv_CHOk2Ne2el1hdE_McCptWs8tRdtYlhXFlVOu8rQX7CwexzOBudJAfB3lDN8LPc4o4qHNwqFxy5J5j_WzdllbqMmaDUK9yUxX6XA-JFezzq9jvBwtGv7_hzB7pFrUcH39z0EYOQaZo5lDl-pE09Gw7wr8NvlZRoSdWlbobCW6oJxuQLJTUr9oj_uIiBhkeb1psaIIc3VwfYQ1UfvobrXAP_WpnRabE_3UZUBOygFMAE; kuaishou.server.web_ph=2b981e2051d7130c977fd31df97fe6f5ad54',
|
|
|
+ # 'Cookie': f'kpf=PC_WEB; clientid=3; did=web_e2901e1c5a13c60af81ba88bc7a3ee24; userId={"".join(str(random.choice(range(1, 10))) for _ in range(10))}; kpn=KUAISHOU_VISION; kuaishou.server.web_st=ChZrdWFpc2hvdS5zZXJ2ZXIud2ViLnN0EqABE4wGjnJauApJelOpl9Xqo8TVDAyra7Pvo0rZtVgMSZxgVuw4Z6P2UtHv_CHOk2Ne2el1hdE_McCptWs8tRdtYlhXFlVOu8rQX7CwexzOBudJAfB3lDN8LPc4o4qHNwqFxy5J5j_WzdllbqMmaDUK9yUxX6XA-JFezzq9jvBwtGv7_hzB7pFrUcH39z0EYOQaZo5lDl-pE09Gw7wr8NvlZRoSdWlbobCW6oJxuQLJTUr9oj_uIiBhkeb1psaIIc3VwfYQ1UfvobrXAP_WpnRabE_3UZUBOygFMAE; kuaishou.server.web_ph=2b981e2051d7130c977fd31df97fe6f5ad54',
|
|
|
+ 'Cookie': f'kpf=PC_WEB; clientid=3; did=web_e2901e1c5a13c60af81ba88bc7a3ee24; userId=3352428474; kpn=KUAISHOU_VISION; kuaishou.server.web_st=ChZrdWFpc2hvdS5zZXJ2ZXIud2ViLnN0EqABaRXtfRHlzKlQVj0Nm_M1G2wrIN1p6g3UTwfqfez6rkLVj6mPNt3RBAsLkyemMpvTLerPw0h41Q0lowqcImvIv5dlSGDEpQoj-VTAmOR2Suzm8vCRakG7XziAWyI0PXJKhvdXms-9Giy_4TnoniB49Oo3m7qXjXVBCzybcWS5BO90OLkhD30GYmGEnBBvkBI2oErJy3mNbafQdBQ6SxSUHhoS-1Rj5-IBBNoxoIePYcxZFs4oIiCvaT7sRn-zrF7X2ClPhfNh6lgClmH8MUjXszUfY_TPLCgFMAE; kuaishou.server.web_ph=1b62b98fc28bc23a42cd85240e1fd6025983',
|
|
|
'Referer': f'https://www.kuaishou.com/profile/{out_uid}',
|
|
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.41',
|
|
|
'content-type': 'application/json',
|
|
|
- # 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
|
|
|
- # 'Cache-Control': 'no-cache',
|
|
|
- # 'Connection': 'keep-alive',
|
|
|
- # 'Origin': 'https://www.kuaishou.com',
|
|
|
- # 'Pragma': 'no-cache',
|
|
|
- # 'Sec-Fetch-Dest': 'empty',
|
|
|
- # 'Sec-Fetch-Mode': 'cors',
|
|
|
- # 'Sec-Fetch-Site': 'same-origin',
|
|
|
- # 'accept': '*/*',
|
|
|
- # 'sec-ch-ua': '"Chromium";v="110", "Not A(Brand";v="24", "Microsoft Edge";v="110"',
|
|
|
- # 'sec-ch-ua-mobile': '?0',
|
|
|
- # 'sec-ch-ua-platform': '"macOS"'
|
|
|
+ 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
|
|
|
+ 'Cache-Control': 'no-cache',
|
|
|
+ 'Connection': 'keep-alive',
|
|
|
+ 'Origin': 'https://www.kuaishou.com',
|
|
|
+ 'Pragma': 'no-cache',
|
|
|
+ 'Sec-Fetch-Dest': 'empty',
|
|
|
+ 'Sec-Fetch-Mode': 'cors',
|
|
|
+ 'Sec-Fetch-Site': 'same-origin',
|
|
|
+ 'accept': '*/*',
|
|
|
+ 'sec-ch-ua': '"Chromium";v="110", "Not A(Brand";v="24", "Microsoft Edge";v="110"',
|
|
|
+ 'sec-ch-ua-mobile': '?0',
|
|
|
+ 'sec-ch-ua-platform': '"macOS"'
|
|
|
}
|
|
|
response = requests.post(url=url, headers=headers, data=payload)
|
|
|
+ Common.logger(log_type, crawler).info(f"get_out_user_info_response:{response.text}")
|
|
|
if response.status_code != 200:
|
|
|
Common.logger(log_type, crawler).warning(f"get_out_user_info_response:{response.text}\n")
|
|
|
return
|
|
@@ -156,33 +158,42 @@ class Follow:
|
|
|
return
|
|
|
else:
|
|
|
userProfile = response.json()['data']['visionProfile']['userProfile']
|
|
|
- out_user_dict = {}
|
|
|
- if 'ownerCount' not in userProfile:
|
|
|
- out_user_dict['out_fans'] = 0
|
|
|
- out_user_dict['out_fans'] = 0
|
|
|
- elif 'fan' not in userProfile['ownerCount']:
|
|
|
- out_user_dict['out_fans'] = 0
|
|
|
- elif 'follow' not in userProfile['ownerCount']:
|
|
|
- out_user_dict['out_fans'] = 0
|
|
|
- else:
|
|
|
+ Common.logger(log_type, crawler).info(f"userProfile:{userProfile}")
|
|
|
+
|
|
|
+ try:
|
|
|
out_fans_str = str(userProfile['ownerCount']['fan'])
|
|
|
+ except Exception:
|
|
|
+ out_fans_str = "0"
|
|
|
+
|
|
|
+ try:
|
|
|
out_follow_str = str(userProfile['ownerCount']['follow'])
|
|
|
- if "万" in out_fans_str:
|
|
|
- out_user_dict['out_fans'] = int(float(out_fans_str.split("万")[0]) * 10000)
|
|
|
- else:
|
|
|
- out_user_dict['out_fans'] = int(out_fans_str.replace(",", ""))
|
|
|
- if "万" in out_follow_str:
|
|
|
- out_user_dict['out_follow'] = int(float(out_follow_str.split("万")[0]) * 10000)
|
|
|
- else:
|
|
|
- out_user_dict['out_follow'] = int(out_follow_str.replace(",", ""))
|
|
|
+ except Exception:
|
|
|
+ out_follow_str = "0"
|
|
|
|
|
|
- if 'profile' not in userProfile:
|
|
|
- out_user_dict['out_avatar_url'] = ''
|
|
|
- elif 'headurl' not in userProfile['profile']:
|
|
|
- out_user_dict['out_avatar_url'] = ''
|
|
|
- else:
|
|
|
- out_user_dict['out_avatar_url'] = userProfile['profile']['headurl']
|
|
|
+ try:
|
|
|
+ out_avatar_url = userProfile['profile']['headurl']
|
|
|
+ except Exception:
|
|
|
+ out_avatar_url = ""
|
|
|
+
|
|
|
+ Common.logger(log_type, crawler).info(f"out_fans_str:{out_fans_str}")
|
|
|
+ Common.logger(log_type, crawler).info(f"out_follow_str:{out_follow_str}")
|
|
|
+ Common.logger(log_type, crawler).info(f"out_avatar_url:{out_avatar_url}")
|
|
|
|
|
|
+ if "万" in out_fans_str:
|
|
|
+ out_fans = int(float(out_fans_str.split("万")[0]) * 10000)
|
|
|
+ else:
|
|
|
+ out_fans = int(out_fans_str.replace(",", ""))
|
|
|
+ if "万" in out_follow_str:
|
|
|
+ out_follow = int(float(out_follow_str.split("万")[0]) * 10000)
|
|
|
+ else:
|
|
|
+ out_follow = int(out_follow_str.replace(",", ""))
|
|
|
+
|
|
|
+ out_user_dict = {
|
|
|
+ "out_fans": out_fans,
|
|
|
+ "out_follow": out_follow,
|
|
|
+ "out_avatar_url": out_avatar_url
|
|
|
+ }
|
|
|
+ Common.logger(log_type, crawler).info(f"out_user_dict:{out_user_dict}")
|
|
|
return out_user_dict
|
|
|
except Exception as e:
|
|
|
Common.logger(log_type, crawler).error(f"get_out_user_info:{e}\n")
|
|
@@ -197,8 +208,8 @@ class Follow:
|
|
|
Common.logger(log_type, crawler).warning(f"user_sheet:{user_sheet} 10秒钟后重试")
|
|
|
continue
|
|
|
our_user_list = []
|
|
|
- for i in range(1, len(user_sheet)):
|
|
|
- # for i in range(1, 2):
|
|
|
+ # for i in range(1, len(user_sheet)):
|
|
|
+ for i in range(1, 2):
|
|
|
out_uid = user_sheet[i][2]
|
|
|
user_name = user_sheet[i][3]
|
|
|
our_uid = user_sheet[i][6]
|
|
@@ -633,15 +644,17 @@ if __name__ == "__main__":
|
|
|
# print(Follow.filter_words("follow", "kuaishou"))
|
|
|
# print(Follow.random_title("follow", "kuaishou"))
|
|
|
# Follow.get_user_list("follow", "kuaishou", "2OLxLr", "dev", "local")
|
|
|
- Follow.get_videoList(log_type="follow",
|
|
|
- crawler="kuaishou",
|
|
|
- strategy="定向爬虫策略",
|
|
|
- our_uid="6282431",
|
|
|
- out_uid="3xws7ydsnmp5mgq",
|
|
|
- oss_endpoint="out",
|
|
|
- env="dev",
|
|
|
- machine="local")
|
|
|
+ # Follow.get_videoList(log_type="follow",
|
|
|
+ # crawler="kuaishou",
|
|
|
+ # strategy="定向爬虫策略",
|
|
|
+ # our_uid="6282431",
|
|
|
+ # out_uid="3xws7ydsnmp5mgq",
|
|
|
+ # oss_endpoint="out",
|
|
|
+ # env="dev",
|
|
|
+ # machine="local")
|
|
|
# Follow.get_rule("follow", "kuaishou", 1)
|
|
|
# Follow.get_rule("follow", "kuaishou", 2)
|
|
|
+ print(Follow.get_out_user_info("follow", "kuaishou", "3xgh4ja9be3wcaw"))
|
|
|
+ print(Follow.get_out_user_info("follow", "kuaishou", "3x5wgjhfc7tx8ue"))
|
|
|
|
|
|
pass
|