|
@@ -48,76 +48,28 @@ class GongzhonghaoAuthor3:
|
|
|
return token_dict
|
|
|
|
|
|
@classmethod
|
|
|
- def get_users(cls, log_type, crawler, sheetid, env):
|
|
|
- while True:
|
|
|
- user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
|
|
|
- if user_sheet is None:
|
|
|
- Common.logger(log_type, crawler).warning(f"user_sheet:{user_sheet}, 2秒后重试")
|
|
|
- time.sleep(2)
|
|
|
- continue
|
|
|
- user_list = []
|
|
|
- len_sheet = len(user_sheet)
|
|
|
- if len_sheet <= 201:
|
|
|
- Common.logger(log_type, crawler).info("抓取用户数<=200,无需启动第三套抓取脚本\n")
|
|
|
- return
|
|
|
- if len_sheet >= 301:
|
|
|
- len_sheet = 301
|
|
|
- for i in range(201, len_sheet):
|
|
|
- # for i in range(1, 3):
|
|
|
- user_name = user_sheet[i][0]
|
|
|
- wechat_name = user_sheet[i][2]
|
|
|
- if wechat_name is None or wechat_name.strip() == "" or wechat_name.replace(" ", "") == "":
|
|
|
- wechat_name = user_name
|
|
|
- # our_uid = user_sheet[i][5]
|
|
|
- # our_user_link = user_sheet[i][6]
|
|
|
- out_uid = user_sheet[i][3]
|
|
|
- avatar_url = user_sheet[i][4]
|
|
|
- if out_uid is None or out_uid.strip() == "" or out_uid.replace(" ", "") == "":
|
|
|
- user_info_dict = cls.get_user_info(log_type=log_type, crawler=crawler, wechat_name=wechat_name, env=env)
|
|
|
- out_uid = user_info_dict["user_id"]
|
|
|
- avatar_url = user_info_dict["avatar_url"]
|
|
|
- # tag1 = user_sheet[i][7]
|
|
|
- # tag2 = user_sheet[i][8]
|
|
|
- # tag3 = user_sheet[i][9]
|
|
|
- # tag4 = user_sheet[i][10]
|
|
|
- # tag5 = user_sheet[i][11]
|
|
|
- # tag6 = user_sheet[i][12]
|
|
|
- # Common.logger(log_type, crawler).info(f"正在更新 {user_name} 用户信息")
|
|
|
- # if out_uid is None or our_uid is None:
|
|
|
- # # 用来创建our_id的信息
|
|
|
- # user_dict = {
|
|
|
- # 'recommendStatus': -6,
|
|
|
- # 'appRecommendStatus': -6,
|
|
|
- # 'nickName': user_info_dict["user_name"],
|
|
|
- # 'avatarUrl': user_info_dict['avatar_url'],
|
|
|
- # 'tagName': f'{tag1},{tag2},{tag3},{tag4},{tag5},{tag6}',
|
|
|
- # }
|
|
|
- # our_uid = getUser.create_uid(log_type, crawler, user_dict, env)
|
|
|
- # Common.logger(log_type, crawler).info(f'新创建的站内UID:{our_uid}')
|
|
|
- # if env == 'prod':
|
|
|
- # our_user_link = f'https://admin.piaoquantv.com/ums/user/{our_uid}/post'
|
|
|
- # else:
|
|
|
- # our_user_link = f'https://testadmin.piaoquantv.com/ums/user/{our_uid}/post'
|
|
|
- # Feishu.update_values(log_type, crawler, sheetid, f'D{i + 1}:G{i + 1}', [
|
|
|
- # [user_info_dict["user_id"], user_info_dict["avatar_url"], our_uid, our_user_link]])
|
|
|
- # Common.logger(log_type, crawler).info(f'用户信息创建成功!\n')
|
|
|
- # else:
|
|
|
- # Common.logger(log_type, crawler).info("用户信息已存在\n")
|
|
|
- Feishu.update_values(log_type, crawler, "Bzv72P", f'D{i + 1}:E{i + 1}', [[out_uid, avatar_url]])
|
|
|
- our_user_dict = {
|
|
|
- 'user_name': user_name,
|
|
|
- 'user_id': out_uid,
|
|
|
- 'wechat_name': wechat_name,
|
|
|
- # 'our_uid': our_uid,
|
|
|
- # 'our_user_link': our_user_link,
|
|
|
- 'avatar_url': avatar_url,
|
|
|
- }
|
|
|
- for k, v in our_user_dict.items():
|
|
|
- Common.logger(log_type, crawler).info(f"{k}:{v}")
|
|
|
- user_list.append(our_user_dict)
|
|
|
- time.sleep(1)
|
|
|
+ def get_users(cls, log_type, crawler, user_sheet, sheetid, i, env):
|
|
|
+ user_name = user_sheet[i][0]
|
|
|
+ wechat_name = user_sheet[i][2]
|
|
|
+ if wechat_name is None or wechat_name.strip() == "" or wechat_name.replace(" ", "") == "":
|
|
|
+ wechat_name = user_name
|
|
|
+ out_uid = user_sheet[i][3]
|
|
|
+ avatar_url = user_sheet[i][4]
|
|
|
+ if out_uid is None or out_uid.strip() == "" or out_uid.replace(" ", "") == "":
|
|
|
+ user_info_dict = cls.get_user_info(log_type=log_type, crawler=crawler, wechat_name=wechat_name, env=env)
|
|
|
+ out_uid = user_info_dict["user_id"]
|
|
|
+ avatar_url = user_info_dict["avatar_url"]
|
|
|
+ Feishu.update_values(log_type, crawler, sheetid, f'D{i + 1}:E{i + 1}', [[out_uid, avatar_url]])
|
|
|
|
|
|
- return user_list
|
|
|
+ our_user_dict = {
|
|
|
+ 'user_name': user_name,
|
|
|
+ 'user_id': out_uid,
|
|
|
+ 'wechat_name': wechat_name,
|
|
|
+ 'avatar_url': avatar_url,
|
|
|
+ }
|
|
|
+ for k, v in our_user_dict.items():
|
|
|
+ Common.logger(log_type, crawler).info(f"{k}:{v}")
|
|
|
+ return our_user_dict
|
|
|
|
|
|
# 获取用户 fakeid
|
|
|
@classmethod
|
|
@@ -490,23 +442,38 @@ class GongzhonghaoAuthor3:
|
|
|
|
|
|
@classmethod
|
|
|
def get_all_videos(cls, log_type, crawler, rule_dict, env):
|
|
|
- user_list = cls.get_users(log_type, crawler, "Bzv72P", env)
|
|
|
- # Common.logger(log_type, crawler).info(f"user_list:{user_list}")
|
|
|
- if user_list is None or len(user_list) == 0:
|
|
|
- Common.logger(log_type, crawler).warning(f"抓取用户列表为空\n")
|
|
|
- return
|
|
|
- for user_dict in user_list:
|
|
|
- try:
|
|
|
+ while True:
|
|
|
+ sheetid = "Bzv72P"
|
|
|
+ user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
|
|
|
+ if user_sheet is None:
|
|
|
+ Common.logger(log_type, crawler).warning(f"user_sheet:{user_sheet}, 2秒后重试")
|
|
|
+ time.sleep(2)
|
|
|
+ continue
|
|
|
+ len_sheet = len(user_sheet)
|
|
|
+ if len_sheet <= 201:
|
|
|
+ Common.logger(log_type, crawler).info("抓取用户数<=200,无需启动第三套抓取脚本\n")
|
|
|
+ return
|
|
|
+ if len_sheet >= 301:
|
|
|
+ len_sheet = 301
|
|
|
+ for i in range(201, len_sheet):
|
|
|
+ user_dict = cls.get_users(log_type=log_type,
|
|
|
+ crawler=crawler,
|
|
|
+ user_sheet=user_sheet,
|
|
|
+ sheetid=sheetid,
|
|
|
+ i=i,
|
|
|
+ env=env)
|
|
|
Common.logger(log_type, crawler).info(f'获取 {user_dict["user_name"]} 公众号视频\n')
|
|
|
- cls.get_videoList(log_type=log_type,
|
|
|
- crawler=crawler,
|
|
|
- rule_dict=rule_dict,
|
|
|
- user_dict=user_dict,
|
|
|
- env=env)
|
|
|
- Common.logger(log_type, crawler).info('休眠 60 秒\n')
|
|
|
- time.sleep(60)
|
|
|
- except Exception as e:
|
|
|
- Common.logger(log_type, crawler).info(f'抓取{user_dict["user_name"]}公众号时异常:{e}\n')
|
|
|
+ try:
|
|
|
+ Common.logger(log_type, crawler).info(f'获取 {user_dict["user_name"]} 公众号视频\n')
|
|
|
+ cls.get_videoList(log_type=log_type,
|
|
|
+ crawler=crawler,
|
|
|
+ rule_dict=rule_dict,
|
|
|
+ user_dict=user_dict,
|
|
|
+ env=env)
|
|
|
+ Common.logger(log_type, crawler).info('休眠 60 秒\n')
|
|
|
+ time.sleep(60)
|
|
|
+ except Exception as e:
|
|
|
+ Common.logger(log_type, crawler).info(f'抓取{user_dict["user_name"]}公众号时异常:{e}\n')
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|