lierqiang 2 年之前
父节点
当前提交
2994735a6c
共有 1 个文件被更改,包括 12 次插入5 次删除
  1. 12 5
      xigua/xigua_search/xigua_search.py

+ 12 - 5
xigua/xigua_search/xigua_search.py

@@ -668,6 +668,15 @@ class XiguaSearch:
                 if publish_time <= old_time:
                     Common.logger(log_type, crawler).error(f'关键词:{search_word},抓取完毕,退出抓取\n')
                     return
+                filter_words = get_config_from_mysql(log_type, crawler, env, text='filter')
+                is_filter = False
+                for filter_word in filter_words:
+                    if filter_word in video_dict['video_title']:
+                        is_filter = True
+                        break
+                if is_filter:
+                    Common.logger(log_type, crawler).info('标题已中过滤词:{}\n', video_dict['video_title'])
+                    continue
                 if v_type == 'video':
                     item_id = video_info['data']['group_id']
                     if video_info['data']['publish_time'] <= old_time:
@@ -680,6 +689,8 @@ class XiguaSearch:
                         'video_time'] > rule_dict['max_duration']:
                         Common.logger(log_type, crawler).error(f'关键词:{search_word},视频:{item_id},不符合抓取规则\n')
                         continue
+
+
                     try:
                         video_dict = cls.get_video_info(log_type, crawler, item_id)
                         video_url_dict = cls.get_video_url(log_type, crawler, video_dict['gid'])
@@ -796,11 +807,7 @@ class XiguaSearch:
     @classmethod
     def download_publish(cls, log_type, crawler, search_word, strategy, video_dict, rule_dict, our_uid, oss_endpoint,
                          env, machine):
-        filter_words = get_config_from_mysql(log_type, crawler, env, text='filter')
-        for filter_word in filter_words:
-            if filter_word in video_dict['video_title']:
-                Common.logger(log_type, crawler).info('标题已中过滤词:{}\n', video_dict['video_title'])
-                return
+
         Common.download_method(log_type=log_type, crawler=crawler, text='xigua_video',
                                title=video_dict['video_title'], url=video_dict['video_url'])
         # 下载音频