|
@@ -14,7 +14,7 @@ sys.path.append(os.getcwd())
|
|
|
from common.common import Common
|
|
|
from common.feishu import Feishu
|
|
|
from common.getuser import getUser
|
|
|
-from common.db import MysqlHelper
|
|
|
+from common.scheduling_db import MysqlHelper
|
|
|
from common.publish import Publish
|
|
|
from common.public import random_title, get_config_from_mysql
|
|
|
from common.public import get_user_from_mysql
|
|
@@ -23,154 +23,6 @@ from common.public import get_user_from_mysql
|
|
|
class KuaishouauthorScheduling:
|
|
|
platform = "快手"
|
|
|
|
|
|
- # 获取站外用户信息
|
|
|
- @classmethod
|
|
|
- def get_out_user_info(cls, log_type, crawler, out_uid):
|
|
|
- try:
|
|
|
- url = "https://www.kuaishou.com/graphql"
|
|
|
-
|
|
|
- payload = json.dumps({
|
|
|
- "operationName": "visionProfile",
|
|
|
- "variables": {
|
|
|
- "userId": out_uid
|
|
|
- },
|
|
|
- "query": "query visionProfile($userId: String) {\n visionProfile(userId: $userId) {\n result\n hostName\n userProfile {\n ownerCount {\n fan\n photo\n follow\n photo_public\n __typename\n }\n profile {\n gender\n user_name\n user_id\n headurl\n user_text\n user_profile_bg_url\n __typename\n }\n isFollowing\n __typename\n }\n __typename\n }\n}\n"
|
|
|
- })
|
|
|
- # s = string.ascii_lowercase
|
|
|
- # r = random.choice(s)
|
|
|
- headers = {
|
|
|
- 'Accept': '*/*',
|
|
|
- 'Content-Type': 'application/json',
|
|
|
- 'Origin': 'https://www.kuaishou.com',
|
|
|
- 'Cookie': f'kpf=PC_WEB; clientid=3; did={cls.get_did(log_type, crawler)}; kpn=KUAISHOU_VISION',
|
|
|
- 'Content-Length': '552',
|
|
|
- 'Accept-Language': 'zh-CN,zh-Hans;q=0.9',
|
|
|
- 'Host': 'www.kuaishou.com',
|
|
|
- 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.1 Safari/605.1.15',
|
|
|
- 'Referer': 'https://www.kuaishou.com/profile/{}'.format(out_uid),
|
|
|
- 'Accept-Encoding': 'gzip, deflate, br',
|
|
|
- 'Connection': 'keep-alive'
|
|
|
- }
|
|
|
- urllib3.disable_warnings()
|
|
|
- s = requests.session()
|
|
|
- # max_retries=3 重试3次
|
|
|
- s.mount('http://', HTTPAdapter(max_retries=3))
|
|
|
- s.mount('https://', HTTPAdapter(max_retries=3))
|
|
|
- response = s.post(url=url, headers=headers, data=payload, proxies=Common.tunnel_proxies(), verify=False,
|
|
|
- timeout=5)
|
|
|
- response.close()
|
|
|
- # Common.logger(log_type, crawler).info(f"get_out_user_info_response:{response.text}")
|
|
|
- if response.status_code != 200:
|
|
|
- Common.logger(log_type, crawler).warning(f"get_out_user_info_response:{response.text}\n")
|
|
|
- return
|
|
|
- elif 'data' not in response.json():
|
|
|
- Common.logger(log_type, crawler).warning(f"get_out_user_info_response:{response.json()}\n")
|
|
|
- return
|
|
|
- elif 'visionProfile' not in response.json()['data']:
|
|
|
- Common.logger(log_type, crawler).warning(f"get_out_user_info_response:{response.json()['data']}\n")
|
|
|
- return
|
|
|
- elif 'userProfile' not in response.json()['data']['visionProfile']:
|
|
|
- Common.logger(log_type, crawler).warning(
|
|
|
- f"get_out_user_info_response:{response.json()['data']['visionProfile']['userProfile']}\n")
|
|
|
- return
|
|
|
- else:
|
|
|
- userProfile = response.json()['data']['visionProfile']['userProfile']
|
|
|
- # Common.logger(log_type, crawler).info(f"userProfile:{userProfile}")
|
|
|
-
|
|
|
- try:
|
|
|
- out_fans_str = str(userProfile['ownerCount']['fan'])
|
|
|
- except Exception:
|
|
|
- out_fans_str = "0"
|
|
|
-
|
|
|
- try:
|
|
|
- out_follow_str = str(userProfile['ownerCount']['follow'])
|
|
|
- except Exception:
|
|
|
- out_follow_str = "0"
|
|
|
-
|
|
|
- try:
|
|
|
- out_avatar_url = userProfile['profile']['headurl']
|
|
|
- except Exception:
|
|
|
- out_avatar_url = ""
|
|
|
-
|
|
|
- Common.logger(log_type, crawler).info(f"out_fans_str:{out_fans_str}")
|
|
|
- Common.logger(log_type, crawler).info(f"out_follow_str:{out_follow_str}")
|
|
|
- Common.logger(log_type, crawler).info(f"out_avatar_url:{out_avatar_url}")
|
|
|
-
|
|
|
- if "万" in out_fans_str:
|
|
|
- out_fans = int(float(out_fans_str.split("万")[0]) * 10000)
|
|
|
- else:
|
|
|
- out_fans = int(out_fans_str.replace(",", ""))
|
|
|
- if "万" in out_follow_str:
|
|
|
- out_follow = int(float(out_follow_str.split("万")[0]) * 10000)
|
|
|
- else:
|
|
|
- out_follow = int(out_follow_str.replace(",", ""))
|
|
|
-
|
|
|
- out_user_dict = {
|
|
|
- "out_fans": out_fans,
|
|
|
- "out_follow": out_follow,
|
|
|
- "out_avatar_url": out_avatar_url
|
|
|
- }
|
|
|
- Common.logger(log_type, crawler).info(f"out_user_dict:{out_user_dict}")
|
|
|
- return out_user_dict
|
|
|
- except Exception as e:
|
|
|
- Common.logger(log_type, crawler).error(f"get_out_user_info:{e}\n")
|
|
|
-
|
|
|
- # 获取用户信息列表
|
|
|
- @classmethod
|
|
|
- def get_user_list(cls, log_type, crawler, sheetid, env, machine):
|
|
|
- try:
|
|
|
- while True:
|
|
|
- user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
|
|
|
- if user_sheet is None:
|
|
|
- Common.logger(log_type, crawler).warning(f"user_sheet:{user_sheet} 10秒钟后重试")
|
|
|
- continue
|
|
|
- our_user_list = []
|
|
|
- for i in range(1, len(user_sheet)):
|
|
|
- # for i in range(1, 2):
|
|
|
- out_uid = user_sheet[i][2]
|
|
|
- user_name = user_sheet[i][3]
|
|
|
- our_uid = user_sheet[i][6]
|
|
|
- our_user_link = user_sheet[i][7]
|
|
|
- if out_uid is None or user_name is None:
|
|
|
- Common.logger(log_type, crawler).info("空行\n")
|
|
|
- else:
|
|
|
- Common.logger(log_type, crawler).info(f"正在更新 {user_name} 用户信息\n")
|
|
|
- if our_uid is None:
|
|
|
- out_user_info = cls.get_out_user_info(log_type, crawler, out_uid)
|
|
|
- out_user_dict = {
|
|
|
- "out_uid": out_uid,
|
|
|
- "user_name": user_name,
|
|
|
- "out_avatar_url": out_user_info["out_avatar_url"],
|
|
|
- "out_create_time": '',
|
|
|
- "out_tag": '',
|
|
|
- "out_play_cnt": 0,
|
|
|
- "out_fans": out_user_info["out_fans"],
|
|
|
- "out_follow": out_user_info["out_follow"],
|
|
|
- "out_friend": 0,
|
|
|
- "out_like": 0,
|
|
|
- "platform": cls.platform,
|
|
|
- "tag": cls.tag,
|
|
|
- }
|
|
|
- our_user_dict = getUser.create_user(log_type=log_type, crawler=crawler,
|
|
|
- out_user_dict=out_user_dict, env=env, machine=machine)
|
|
|
- our_uid = our_user_dict['our_uid']
|
|
|
- our_user_link = our_user_dict['our_user_link']
|
|
|
- Feishu.update_values(log_type, crawler, sheetid, f'G{i + 1}:H{i + 1}',
|
|
|
- [[our_uid, our_user_link]])
|
|
|
- Common.logger(log_type, crawler).info(f'站内用户信息写入飞书成功!\n')
|
|
|
- our_user_list.append(our_user_dict)
|
|
|
- else:
|
|
|
- our_user_dict = {
|
|
|
- 'out_uid': out_uid,
|
|
|
- 'user_name': user_name,
|
|
|
- 'our_uid': our_uid,
|
|
|
- 'our_user_link': our_user_link,
|
|
|
- }
|
|
|
- our_user_list.append(our_user_dict)
|
|
|
- return our_user_list
|
|
|
- except Exception as e:
|
|
|
- Common.logger(log_type, crawler).error(f'get_user_list:{e}\n')
|
|
|
-
|
|
|
# 处理视频标题
|
|
|
@classmethod
|
|
|
def video_title(cls, log_type, crawler, env, title):
|
|
@@ -205,17 +57,24 @@ class KuaishouauthorScheduling:
|
|
|
return video_title
|
|
|
|
|
|
@classmethod
|
|
|
- def get_did(cls, log_type, crawler):
|
|
|
- while True:
|
|
|
- did_sheet = Feishu.get_values_batch(log_type, crawler, "G7acT6")
|
|
|
- if did_sheet is None:
|
|
|
- Common.logger(log_type, crawler).warning(f"did_sheet:{did_sheet}")
|
|
|
- time.sleep(2)
|
|
|
- continue
|
|
|
- return did_sheet[0][1]
|
|
|
+ def get_cookie(cls, log_type, crawler, env):
|
|
|
+ select_sql = f""" select * from crawler_config where source="{crawler}" """
|
|
|
+ configs = MysqlHelper.get_values(log_type, crawler, select_sql, env, action="")
|
|
|
+ for config in configs:
|
|
|
+ if "cookie" in config["config"]:
|
|
|
+ cookie_dict = {
|
|
|
+ "cookie_id": config["id"],
|
|
|
+ "title": config["title"].strip(),
|
|
|
+ "cookie": dict(eval(config["config"]))["cookie"].strip(),
|
|
|
+ "update_time": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(config["update_time"] / 1000))),
|
|
|
+ "operator": config["operator"].strip()
|
|
|
+ }
|
|
|
+ for k, v in cookie_dict.items():
|
|
|
+ print(f"{k}:{type(v)}, {v}")
|
|
|
+ return cookie_dict
|
|
|
|
|
|
@classmethod
|
|
|
- def get_videoList(cls, log_type, crawler, strategy, our_uid, out_uid, oss_endpoint, env, machine, pcursor=""):
|
|
|
+ def get_videoList(cls, log_type, crawler, strategy, our_uid, out_uid, oss_endpoint, env, machine):
|
|
|
download_cnt_1, download_cnt_2 = 0, 0
|
|
|
rule_dict_1 = cls.get_rule(log_type, crawler, 1)
|
|
|
rule_dict_2 = cls.get_rule(log_type, crawler, 2)
|
|
@@ -237,7 +96,7 @@ class KuaishouauthorScheduling:
|
|
|
'Accept': '*/*',
|
|
|
'Content-Type': 'application/json',
|
|
|
'Origin': 'https://www.kuaishou.com',
|
|
|
- 'Cookie': f'kpf=PC_WEB; clientid=3; did={cls.get_did(log_type, crawler)}; kpn=KUAISHOU_VISION',
|
|
|
+ 'Cookie': f'kpf=PC_WEB; clientid=3; did={cls.get_(log_type, crawler)}; kpn=KUAISHOU_VISION',
|
|
|
'Content-Length': '1260',
|
|
|
'Accept-Language': 'zh-CN,zh-Hans;q=0.9',
|
|
|
'Host': 'www.kuaishou.com',
|
|
@@ -256,7 +115,7 @@ class KuaishouauthorScheduling:
|
|
|
if not feeds:
|
|
|
Common.logger(log_type, crawler).info("没有更多视频啦 ~\n")
|
|
|
return
|
|
|
- pcursor = response.json()['data']['visionProfilePhotoList']['pcursor']
|
|
|
+ # pcursor = response.json()['data']['visionProfilePhotoList']['pcursor']
|
|
|
# Common.logger(log_type, crawler).info(f"feeds0: {feeds}\n")
|
|
|
for i in range(len(feeds)):
|
|
|
try:
|
|
@@ -584,5 +443,5 @@ class KuaishouauthorScheduling:
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
- print(KuaishouauthorScheduling.get_did("follow", "kuaishou"))
|
|
|
+ KuaishouauthorScheduling.get_cookie("author", "kuaishou", "dev")
|
|
|
pass
|