|
@@ -20,6 +20,7 @@ from common.feishu import Feishu
|
|
|
from common.getuser import getUser
|
|
|
from common.db import MysqlHelper
|
|
|
from common.publish import Publish
|
|
|
+from common.public import random_title, get_config_from_mysql
|
|
|
from common.public import get_user_from_mysql
|
|
|
from common.userAgent import get_random_user_agent
|
|
|
|
|
@@ -93,26 +94,6 @@ class KuaiShouFollow:
|
|
|
except Exception as e:
|
|
|
Common.logger(log_type, crawler).error(f'filter_words异常:{e}\n')
|
|
|
|
|
|
- # 万能标题
|
|
|
- @classmethod
|
|
|
- def random_title(cls, log_type, crawler):
|
|
|
- try:
|
|
|
- while True:
|
|
|
- random_title_sheet = Feishu.get_values_batch(log_type, crawler, '0DiyXe')
|
|
|
- if random_title_sheet is None:
|
|
|
- Common.logger(log_type, crawler).warning(f"filter_words_sheet:{random_title_sheet} 10秒钟后重试")
|
|
|
- continue
|
|
|
- random_title_list = []
|
|
|
- for x in random_title_sheet:
|
|
|
- for y in x:
|
|
|
- if y is None:
|
|
|
- pass
|
|
|
- else:
|
|
|
- random_title_list.append(y)
|
|
|
- return random.choice(random_title_list)
|
|
|
- except Exception as e:
|
|
|
- Common.logger(log_type, crawler).error(f'random_title:{e}\n')
|
|
|
-
|
|
|
# 获取站外用户信息
|
|
|
@classmethod
|
|
|
def get_out_user_info(cls, log_type, crawler, out_uid):
|
|
@@ -261,7 +242,7 @@ class KuaiShouFollow:
|
|
|
|
|
|
# 处理视频标题
|
|
|
@classmethod
|
|
|
- def video_title(cls, log_type, crawler, title):
|
|
|
+ def video_title(cls, log_type, crawler, env, title):
|
|
|
title_split1 = title.split(" #")
|
|
|
if title_split1[0] != "":
|
|
|
title1 = title_split1[0]
|
|
@@ -288,7 +269,7 @@ class KuaiShouFollow:
|
|
|
.replace("?", "").replace('"', "").replace("<", "") \
|
|
|
.replace(">", "").replace("|", "").replace("@", "").replace('"', '').replace("'", '')[:40]
|
|
|
if video_title.replace(" ", "") == "" or video_title == "。。。" or video_title == "...":
|
|
|
- return cls.random_title(log_type, crawler)
|
|
|
+ return random_title(log_type, crawler, env, text='title')
|
|
|
else:
|
|
|
return video_title
|
|
|
|
|
@@ -359,11 +340,11 @@ class KuaiShouFollow:
|
|
|
|
|
|
# video_title
|
|
|
if 'caption' not in feeds[i]['photo']:
|
|
|
- video_title = cls.random_title(log_type, crawler)
|
|
|
+ video_title = random_title(log_type, crawler, env, text='title')
|
|
|
elif feeds[i]['photo']['caption'].strip() == "":
|
|
|
- video_title = cls.random_title(log_type, crawler)
|
|
|
+ video_title = random_title(log_type, crawler, env, text='title')
|
|
|
else:
|
|
|
- video_title = cls.video_title(log_type, crawler, feeds[i]['photo']['caption'])
|
|
|
+ video_title = cls.video_title(log_type, crawler, env, feeds[i]['photo']['caption'])
|
|
|
|
|
|
if 'videoResource' not in feeds[i]['photo'] \
|
|
|
and 'manifest' not in feeds[i]['photo'] \
|
|
@@ -546,15 +527,15 @@ class KuaiShouFollow:
|
|
|
@classmethod
|
|
|
def download_publish(cls, log_type, crawler, strategy, video_dict, rule_dict, our_uid, oss_endpoint, env, machine):
|
|
|
try:
|
|
|
+ filter_words = get_config_from_mysql(log_type, crawler, env, text='filter')
|
|
|
+ for filter_word in filter_words:
|
|
|
+ if filter_word in video_dict['video_title']:
|
|
|
+ Common.logger(log_type, crawler).info('标题已中过滤词:{}\n', video_dict['video_title'])
|
|
|
+ return
|
|
|
download_finished = False
|
|
|
if cls.repeat_video(log_type, crawler, video_dict['video_id'], video_dict['video_title'],
|
|
|
video_dict['publish_time_str'], env, machine) != 0:
|
|
|
Common.logger(log_type, crawler).info('视频已下载\n')
|
|
|
- # elif video_dict['video_id'] in [x for y in Feishu.get_values_batch(log_type, crawler, "3cd128") for x in y]:
|
|
|
- # Common.logger(log_type, crawler).info('视频已下载\n')
|
|
|
- elif any(word if word in video_dict['video_title'] else False for word in
|
|
|
- cls.filter_words(log_type, crawler)) is True:
|
|
|
- Common.logger(log_type, crawler).info('标题已中过滤词\n')
|
|
|
else:
|
|
|
# 下载视频
|
|
|
Common.download_method(log_type=log_type, crawler=crawler, text='video',
|
|
@@ -674,13 +655,14 @@ class KuaiShouFollow:
|
|
|
our_uid = user["media_id"]
|
|
|
Common.logger(log_type, crawler).info(f"开始抓取 {user_name} 用户主页视频\n")
|
|
|
cls.get_videoList(log_type=log_type,
|
|
|
- crawler=crawler,
|
|
|
- strategy=strategy,
|
|
|
- our_uid=our_uid,
|
|
|
- out_uid=out_uid,
|
|
|
- oss_endpoint=oss_endpoint,
|
|
|
- env=env,
|
|
|
- machine=machine)
|
|
|
+ crawler=crawler,
|
|
|
+ strategy=strategy,
|
|
|
+ our_uid=our_uid,
|
|
|
+ out_uid=out_uid,
|
|
|
+ oss_endpoint=oss_endpoint,
|
|
|
+ env=env,
|
|
|
+ machine=machine)
|
|
|
+
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
KuaiShouFollow.get_videoList(log_type="follow",
|