|
@@ -24,7 +24,7 @@ sys.path.append(os.getcwd())
|
|
|
from common.common import Common
|
|
|
from common.db import MysqlHelper
|
|
|
from common.feishu import Feishu
|
|
|
-from common.users import Users
|
|
|
+from common.getuser import getUser
|
|
|
from common.publish import Publish
|
|
|
from common.translate import Translate
|
|
|
|
|
@@ -135,7 +135,11 @@ class YoutubeFollow:
|
|
|
data = json.loads(info)
|
|
|
header = data['header']['c4TabbedHeaderRenderer']
|
|
|
tabs = data['contents']['twoColumnBrowseResultsRenderer']['tabs']
|
|
|
- subsimpleText = header['subscriberCountText']['simpleText'].replace('位订阅者', '')
|
|
|
+ try:
|
|
|
+ subsimpleText = header['subscriberCountText']['simpleText'].replace('位订阅者', '')
|
|
|
+ out_fans = format_nums(subsimpleText)
|
|
|
+ except Exception as e:
|
|
|
+ out_fans = 0
|
|
|
for tab in tabs:
|
|
|
if 'tabRenderer' not in tab or 'content' not in tab['tabRenderer']:
|
|
|
continue
|
|
@@ -149,7 +153,7 @@ class YoutubeFollow:
|
|
|
out_user_dict = {
|
|
|
'out_user_name': header['title'],
|
|
|
'out_avatar_url': header['avatar']['thumbnails'][-1]['url'],
|
|
|
- 'out_fans': format_nums(subsimpleText),
|
|
|
+ 'out_fans': out_fans,
|
|
|
'out_play_cnt': int(
|
|
|
viewCountText.replace('收看次數:', '').replace('次', '').replace(',', '')) if viewCountText else 0,
|
|
|
'out_create_time': out_create_time.replace('年', '-').replace('月', '-').replace('日', ''),
|
|
@@ -173,8 +177,7 @@ class YoutubeFollow:
|
|
|
try:
|
|
|
user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
|
|
|
user_list = []
|
|
|
- for i in range(1, len(user_sheet)):
|
|
|
- # for i in range(181, len(user_sheet)):
|
|
|
+ for i in range(271, len(user_sheet)):
|
|
|
out_uid = user_sheet[i][2]
|
|
|
user_name = user_sheet[i][3]
|
|
|
browse_id = user_sheet[i][5]
|
|
@@ -182,22 +185,17 @@ class YoutubeFollow:
|
|
|
uer_url = user_sheet[i][4]
|
|
|
if out_uid is not None and user_name is not None:
|
|
|
Common.logger(log_type, crawler).info(f"正在更新 {user_name} 用户信息\n")
|
|
|
- # 获取站外browse_id,并写入飞书
|
|
|
- # if browse_id is None:
|
|
|
- # browse_id = cls.get_browse_id(log_type, crawler, out_uid, machine)
|
|
|
- # if browse_id is None:
|
|
|
- # Common.logger(log_type, crawler).warning('browse_id is None !')
|
|
|
- # else:
|
|
|
- # Feishu.update_values(log_type, crawler, sheetid, f'F{i + 1}:F{i + 1}', [[browse_id]])
|
|
|
- # Common.logger(log_type, crawler).info(f'browse_id写入成功:{browse_id}')
|
|
|
- # 站内 UID 为空,且数据库中(youtube+out_user_id)返回数量 == 0,则创建新的站内账号
|
|
|
+
|
|
|
if our_uid is None:
|
|
|
sql = f""" select * from crawler_user where platform="{cls.platform}" and out_user_id="{out_uid}" """
|
|
|
our_user_info = MysqlHelper.get_values(log_type, crawler, sql, env, machine)
|
|
|
# 数据库中(youtube + out_user_id)返回数量 == 0,则创建站内账号UID,并写入定向账号飞书表。并结合站外用户信息,一并写入爬虫账号数据库
|
|
|
- if our_user_info is None or len(our_user_info) == 0:
|
|
|
+ if not our_user_info:
|
|
|
# 获取站外账号信息,写入数据库
|
|
|
- out_user_dict = cls.get_out_user_info(log_type, crawler, browse_id, out_uid)
|
|
|
+ try:
|
|
|
+ out_user_dict = cls.get_out_user_info(log_type, crawler, browse_id, out_uid)
|
|
|
+ except Exception as e:
|
|
|
+ continue
|
|
|
out_avatar_url = out_user_dict['out_avatar_url']
|
|
|
out_create_time = out_user_dict['out_create_time']
|
|
|
out_play_cnt = out_user_dict['out_play_cnt']
|
|
@@ -210,7 +208,7 @@ class YoutubeFollow:
|
|
|
'avatarUrl': out_avatar_url,
|
|
|
'tagName': tag,
|
|
|
}
|
|
|
- our_uid = Users.create_uid(log_type, crawler, create_user_dict, env)
|
|
|
+ our_uid = getUser.create_uid(log_type, crawler, create_user_dict, env)
|
|
|
Common.logger(log_type, crawler).info(f'新创建的站内UID:{our_uid}')
|
|
|
if env == 'prod':
|
|
|
our_user_link = f'https://admin.piaoquantv.com/ums/user/{our_uid}/post'
|
|
@@ -221,22 +219,22 @@ class YoutubeFollow:
|
|
|
[[our_uid, our_user_link]])
|
|
|
Common.logger(log_type, crawler).info(f'站内用户信息写入飞书成功!')
|
|
|
|
|
|
- sql = f""" insert into crawler_user(user_id,
|
|
|
- out_user_id,
|
|
|
- out_user_name,
|
|
|
- out_avatar_url,
|
|
|
- out_create_time,
|
|
|
- out_play_cnt,
|
|
|
- out_fans,
|
|
|
- platform,
|
|
|
+ sql = f""" insert into crawler_user(user_id,
|
|
|
+ out_user_id,
|
|
|
+ out_user_name,
|
|
|
+ out_avatar_url,
|
|
|
+ out_create_time,
|
|
|
+ out_play_cnt,
|
|
|
+ out_fans,
|
|
|
+ platform,
|
|
|
tag)
|
|
|
- values({our_uid},
|
|
|
- "{out_uid}",
|
|
|
- "{user_name}",
|
|
|
- "{out_avatar_url}",
|
|
|
- "{out_create_time}",
|
|
|
- {out_play_cnt},
|
|
|
- {out_fans},
|
|
|
+ values({our_uid},
|
|
|
+ "{out_uid}",
|
|
|
+ "{user_name}",
|
|
|
+ "{out_avatar_url}",
|
|
|
+ "{out_create_time}",
|
|
|
+ {out_play_cnt},
|
|
|
+ {out_fans},
|
|
|
"{cls.platform}",
|
|
|
"{tag}") """
|
|
|
Common.logger(log_type, crawler).info(f'sql:{sql}')
|
|
@@ -1179,10 +1177,10 @@ if __name__ == "__main__":
|
|
|
# print(YoutubeFollow.get_user_from_feishu('follow', 'youtube', 'c467d7', 'prod', 'prod'))
|
|
|
# YoutubeFollow.get_out_user_info('follow', 'youtube', 'UC08jgxf119fzynp2uHCvZIg', '@weitravel')
|
|
|
# YoutubeFollow.get_video_info('follow', 'youtube', 'OGVK0IXBIhI')
|
|
|
- # YoutubeFollow.get_follow_videos('follow', 'youtube', 'youtube_follow', 'out', 'dev', 'local')
|
|
|
+ YoutubeFollow.get_follow_videos('follow', 'youtube', 'youtube_follow', 'hk', 'dev', 'local')
|
|
|
# print(YoutubeFollow.filter_emoji("姐妹倆一唱一和,完美配合,終於把大慶降服了😅😅#萌娃搞笑日常"))
|
|
|
# YoutubeFollow.repeat_video('follow', 'youtube', 4, "dev", "local")
|
|
|
# title = "'西部巡游220丨两人一车环游中国半年,需要花费多少钱? 2万公里吃住行费用总结'"
|
|
|
# title = "'Insanely Crowded Shanghai Yu Garden Lantern Festival Walk Tour 2023 人气爆棚的上海豫园元宵节漫步之行 4K'"
|
|
|
# print(title.strip().replace("\\", "").replace(" ", "").replace("\n", "").replace("/", "").replace("\r", "").replace("&NBSP", "").replace("&", ""))
|
|
|
- pass
|
|
|
+ pass
|