lierqiang 2 rokov pred
rodič
commit
a99154e2b5
1 zmenil súbory, kde vykonal 12 pridanie a 26 odobranie
  1. 12 26
      xigua/xigua_search/xigua_search.py

+ 12 - 26
xigua/xigua_search/xigua_search.py

@@ -668,15 +668,7 @@ class XiguaSearch:
                 if publish_time <= old_time:
                     Common.logger(log_type, crawler).error(f'关键词:{search_word},抓取完毕,退出抓取\n')
                     return
-                filter_words = get_config_from_mysql(log_type, crawler, env, text='filter')
-                is_filter = False
-                for filter_word in filter_words:
-                    if filter_word in video_dict['video_title']:
-                        is_filter = True
-                        break
-                if is_filter:
-                    Common.logger(log_type, crawler).info('标题已中过滤词:{}\n', video_dict['video_title'])
-                    continue
+
                 if v_type == 'video':
                     item_id = video_info['data']['group_id']
                     if video_info['data']['publish_time'] <= old_time:
@@ -693,6 +685,15 @@ class XiguaSearch:
 
                     try:
                         video_dict = cls.get_video_info(log_type, crawler, item_id)
+                        filter_words = get_config_from_mysql(log_type, crawler, env, text='filter')
+                        is_filter = False
+                        for filter_word in filter_words:
+                            if filter_word in video_dict['video_title']:
+                                is_filter = True
+                                break
+                        if is_filter:
+                            Common.logger(log_type, crawler).info('标题已中过滤词:{}\n', video_dict['video_title'])
+                            continue
                         video_url_dict = cls.get_video_url(log_type, crawler, video_dict['gid'])
                         video_dict['video_width'] = video_url_dict["video_width"]
                         video_dict['video_height'] = video_url_dict["video_height"]
@@ -939,20 +940,5 @@ class XiguaSearch:
 
 
 if __name__ == '__main__':
-    # print(Follow.get_signature("follow", "xigua", "95420624045", "local"))
-    # XiguaSearch.get_search_videos('search', 'xigua', 'xigua_search', 'inner', 'prod', 'aliyun')
-
-    # Follow.get_videolist(log_type="follow",
-    #                      crawler="xigua",
-    #                      strategy="定向爬虫策略",
-    #                      our_uid="6267141",
-    #                      out_uid="95420624045",
-    #                      oss_endpoint="out",
-    #                      env="dev",
-    #                      machine="local")
-    # print(Follow.random_signature())
-    # rule = Follow.get_rule("follow", "xigua")
-    # print(type(rule))
-    # print(type(json.dumps(rule)))
-    # print(json.dumps(rule))
-    pass
+    XiguaSearch.get_search_videos('search', 'xigua', 'xigua_search', 'inner', 'prod', 'aliyun')
+