|
@@ -399,7 +399,7 @@ class Follow:
|
|
|
# 站内 UID 为空,且数据库中(youtube+out_user_id)返回数量 == 0,则创建新的站内账号
|
|
|
if our_uid is None:
|
|
|
sql = f""" select * from crawler_user where platform="{cls.platform}" and out_user_id="{out_uid}" """
|
|
|
- our_user_info = MysqlHelper.get_values(log_type, crawler, sql, env)
|
|
|
+ our_user_info = MysqlHelper.get_values(log_type, crawler, sql, env, machine)
|
|
|
# 数据库中(youtube + out_user_id)返回数量 == 0,则创建站内账号UID,并写入定向账号飞书表。并结合站外用户信息,一并写入爬虫账号数据库
|
|
|
if our_user_info is None or len(our_user_info) == 0:
|
|
|
# 获取站外账号信息,写入数据库
|
|
@@ -417,7 +417,8 @@ class Follow:
|
|
|
'tagName': tag,
|
|
|
}
|
|
|
our_uid = Users.create_user(log_type, crawler, create_user_dict, env)
|
|
|
- if 'env' == 'prod':
|
|
|
+ Common.logger(log_type, crawler).info(f'新创建的站内UID:{our_uid}')
|
|
|
+ if env == 'prod':
|
|
|
our_user_link = f'https://admin.piaoquantv.com/ums/user/{our_uid}/post'
|
|
|
else:
|
|
|
our_user_link = f'https://testadmin.piaoquantv.com/ums/user/{our_uid}/post'
|
|
@@ -443,7 +444,7 @@ class Follow:
|
|
|
{out_fans},
|
|
|
"{cls.platform}",
|
|
|
"{tag}") """
|
|
|
- MysqlHelper.update_values(log_type, crawler, sql, env)
|
|
|
+ MysqlHelper.update_values(log_type, crawler, sql, env, machine)
|
|
|
Common.logger(log_type, crawler).info('用户信息插入数据库成功!\n')
|
|
|
# 数据库中(youtube + out_user_id)返回数量 != 0,则直接把数据库中的站内 UID 写入飞书
|
|
|
else:
|
|
@@ -690,7 +691,7 @@ class Follow:
|
|
|
# 发布时间<=30天
|
|
|
publish_time = int(time.mktime(time.strptime(video_dict['publish_time'], "%Y-%m-%d")))
|
|
|
if int(time.time()) - publish_time <= 3600*24*30:
|
|
|
- cls.download_publish(log_type, crawler, video_dict, strategy, our_uid, env, oss_endpoint)
|
|
|
+ cls.download_publish(log_type, crawler, video_dict, strategy, our_uid, env, oss_endpoint, machine)
|
|
|
else:
|
|
|
Common.logger(log_type, crawler).info('发布时间超过30天\n')
|
|
|
return
|
|
@@ -895,6 +896,11 @@ class Follow:
|
|
|
if Translate.is_contains_chinese(video_title) is False:
|
|
|
video_title = Translate.google_translate(video_title, machine) # 自动翻译标题为中文
|
|
|
|
|
|
+ if 'lengthSeconds' not in videoDetails:
|
|
|
+ duration = 0
|
|
|
+ else:
|
|
|
+ duration = int(videoDetails['lengthSeconds'])
|
|
|
+
|
|
|
# play_cnt
|
|
|
if 'viewCount' not in videoDetails:
|
|
|
play_cnt = 0
|
|
@@ -946,6 +952,7 @@ class Follow:
|
|
|
video_dict = {
|
|
|
'video_title': video_title,
|
|
|
'video_id': video_id,
|
|
|
+ 'duration': duration,
|
|
|
'play_cnt': play_cnt,
|
|
|
'publish_time': publish_time,
|
|
|
'user_name': user_name,
|
|
@@ -956,11 +963,13 @@ class Follow:
|
|
|
return video_dict
|
|
|
|
|
|
@classmethod
|
|
|
- def download_publish(cls, log_type, crawler, video_dict, strategy, our_uid, env, oss_endpoint):
|
|
|
+ def download_publish(cls, log_type, crawler, video_dict, strategy, our_uid, env, oss_endpoint, machine):
|
|
|
sql = f""" select * from crawler_video where platform="{cls.platform}" and out_video_id="{video_dict['video_id']}" """
|
|
|
- repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
|
|
|
+ repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env, machine)
|
|
|
if video_dict['video_title'] == '' or video_dict['video_url'] == '':
|
|
|
Common.logger(log_type, crawler).info('无效视频\n')
|
|
|
+ elif video_dict['duration'] > 600 or video_dict['duration'] < 60:
|
|
|
+ Common.logger(log_type, crawler).info(f"时长:{video_dict['duration']}不满足规则\n")
|
|
|
elif repeat_video is not None and len(repeat_video) != 0:
|
|
|
Common.logger(log_type, crawler).info('视频已下载\n')
|
|
|
else:
|
|
@@ -1065,7 +1074,7 @@ class Follow:
|
|
|
"{rule}",
|
|
|
{int(video_width)},
|
|
|
{int(video_height)}) """
|
|
|
- MysqlHelper.update_values(log_type, crawler, sql, env)
|
|
|
+ MysqlHelper.update_values(log_type, crawler, sql, env, machine)
|
|
|
Common.logger(log_type, crawler).info('视频信息插入数据库成功!\n')
|
|
|
|
|
|
@classmethod
|