2 年之前 · 2994735a6c
--- a/xigua/xigua_search/xigua_search.py
+++ b/xigua/xigua_search/xigua_search.py
@@ -668,6 +668,15 @@ class XiguaSearch:
 
				                 if publish_time <= old_time:
			
 
				                     Common.logger(log_type, crawler).error(f'关键词:{search_word},抓取完毕，退出抓取\n')
			
 
				                     return
			
 
				+                filter_words = get_config_from_mysql(log_type, crawler, env, text='filter')
			
 
				+                is_filter = False
			
 
				+                for filter_word in filter_words:
			
 
				+                    if filter_word in video_dict['video_title']:
			
 
				+                        is_filter = True
			
 
				+                        break
			
 
				+                if is_filter:
			
 
				+                    Common.logger(log_type, crawler).info('标题已中过滤词:{}\n', video_dict['video_title'])
			
 
				+                    continue
			
 
				                 if v_type == 'video':
			
 
				                     item_id = video_info['data']['group_id']
			
 
				                     if video_info['data']['publish_time'] <= old_time:
			
@@ -680,6 +689,8 @@ class XiguaSearch:
 
				                         'video_time'] > rule_dict['max_duration']:
			
 
				                         Common.logger(log_type, crawler).error(f'关键词:{search_word},视频:{item_id},不符合抓取规则\n')
			
 
				                         continue
			
 
				+
			
 
				+
			
 
				                     try:
			
 
				                         video_dict = cls.get_video_info(log_type, crawler, item_id)
			
 
				                         video_url_dict = cls.get_video_url(log_type, crawler, video_dict['gid'])
			
@@ -796,11 +807,7 @@ class XiguaSearch:
 
				     @classmethod
			
 
				     def download_publish(cls, log_type, crawler, search_word, strategy, video_dict, rule_dict, our_uid, oss_endpoint,
			
 
				                          env, machine):
			
 
				-        filter_words = get_config_from_mysql(log_type, crawler, env, text='filter')
			
 
				-        for filter_word in filter_words:
			
 
				-            if filter_word in video_dict['video_title']:
			
 
				-                Common.logger(log_type, crawler).info('标题已中过滤词:{}\n', video_dict['video_title'])
			
 
				-                return
			
 
				+
			
 
				         Common.download_method(log_type=log_type, crawler=crawler, text='xigua_video',
			
 
				                                title=video_dict['video_title'], url=video_dict['video_url'])
			
 
				         # 下载音频