2 年之前 · 2994735a6c
--- a/xigua/xigua_search/xigua_search.py
+++ b/xigua/xigua_search/xigua_search.py
@@ -668,6 +668,15 @@ class XiguaSearch:
 
															                 if publish_time <= old_time:
														
 
															                     Common.logger(log_type, crawler).error(f'关键词:{search_word},抓取完毕，退出抓取\n')
														
 
															                     return
														
 
															+                filter_words = get_config_from_mysql(log_type, crawler, env, text='filter')
														
 
															+                is_filter = False
														
 
															+                for filter_word in filter_words:
														
 
															+                    if filter_word in video_dict['video_title']:
														
 
															+                        is_filter = True
														
 
															+                        break
														
 
															+                if is_filter:
														
 
															+                    Common.logger(log_type, crawler).info('标题已中过滤词:{}\n', video_dict['video_title'])
														
 
															+                    continue
														
 
															                 if v_type == 'video':
														
 
															                     item_id = video_info['data']['group_id']
														
 
															                     if video_info['data']['publish_time'] <= old_time:
														
@@ -680,6 +689,8 @@ class XiguaSearch:
 
															                         'video_time'] > rule_dict['max_duration']:
														
 
															                         Common.logger(log_type, crawler).error(f'关键词:{search_word},视频:{item_id},不符合抓取规则\n')
														
 
															                         continue
														
 
															+
														
 
															+
														
 
															                     try:
														
 
															                         video_dict = cls.get_video_info(log_type, crawler, item_id)
														
 
															                         video_url_dict = cls.get_video_url(log_type, crawler, video_dict['gid'])
														
@@ -796,11 +807,7 @@ class XiguaSearch:
 
															     @classmethod
														
 
															     def download_publish(cls, log_type, crawler, search_word, strategy, video_dict, rule_dict, our_uid, oss_endpoint,
														
 
															                          env, machine):
														
 
															-        filter_words = get_config_from_mysql(log_type, crawler, env, text='filter')
														
 
															-        for filter_word in filter_words:
														
 
															-            if filter_word in video_dict['video_title']:
														
 
															-                Common.logger(log_type, crawler).info('标题已中过滤词:{}\n', video_dict['video_title'])
														
 
															-                return
														
 
															+
														
 
															         Common.download_method(log_type=log_type, crawler=crawler, text='xigua_video',
														
 
															                                title=video_dict['video_title'], url=video_dict['video_url'])
														
 
															         # 下载音频