|
@@ -692,11 +692,7 @@ class XiguaSearch:
|
|
|
Common.logger(log_type, crawler).error(
|
|
|
f'关键词:{search_word},视频:{item_id},获取详情失败,原因:{e}')
|
|
|
continue
|
|
|
- filter_words = get_config_from_mysql(log_type, crawler, env, text='filter')
|
|
|
- for filter_word in filter_words:
|
|
|
- if filter_word in video_dict['video_title']:
|
|
|
- Common.logger(log_type, crawler).info('标题已中过滤词:{}\n', video_dict['video_title'])
|
|
|
- continue
|
|
|
+
|
|
|
if cls.repeat_video(log_type, crawler, video_dict['video_id'], env, machine) != 0:
|
|
|
Common.logger(log_type, crawler).info(
|
|
|
f'关键词:{search_word},gid:{video_dict["gid"]},视频已下载,无需重复下载\n')
|
|
@@ -800,6 +796,11 @@ class XiguaSearch:
|
|
|
@classmethod
|
|
|
def download_publish(cls, log_type, crawler, search_word, strategy, video_dict, rule_dict, our_uid, oss_endpoint,
|
|
|
env, machine):
|
|
|
+ filter_words = get_config_from_mysql(log_type, crawler, env, text='filter')
|
|
|
+ for filter_word in filter_words:
|
|
|
+ if filter_word in video_dict['video_title']:
|
|
|
+ Common.logger(log_type, crawler).info('标题已中过滤词:{}\n', video_dict['video_title'])
|
|
|
+ return
|
|
|
Common.download_method(log_type=log_type, crawler=crawler, text='xigua_video',
|
|
|
title=video_dict['video_title'], url=video_dict['video_url'])
|
|
|
# 下载音频
|