Parcourir la source

update 西瓜搜索改为时间排序搜索

lierqiang il y a 2 ans
Parent
commit
94f2f05a84
1 fichiers modifiés avec 8 ajouts et 4 suppressions
  1. 8 4
      xigua/xigua_search/xigua_search.py

+ 8 - 4
xigua/xigua_search/xigua_search.py

@@ -634,13 +634,13 @@ class XiguaSearch:
         while True:
 
             signature = cls.random_signature()
-            url = "https://www.ixigua.com/api/searchv2/complex/{}/{}".format(
+            url = "https://www.ixigua.com/api/searchv2/complex/{}/{}?order_type=publish_time&click_position=new".format(
                 quote(search_word), offset, signature)
 
             headers = {
                 'referer': 'https://www.ixigua.com/search/{}/?logTag=594535e3690f17a88cdb&tab_name=search'.format(
                     quote(search_word)),
-                'cookie': 'ttwid=1|x_4RDmVTqp6BQ5Xy5AnuCZCQdDyDxv-fnMVWzj19VU0|1678693556|883092b75667cbcc48cbcc4b3b44d060aa205ef26c7640dc672d0cc50ddf0be9',
+                'cookie': '1%7Cx_4RDmVTqp6BQ5Xy5AnuCZCQdDyDxv-fnMVWzj19VU0%7C1679381806%7C8606abc1cd7ca28e965c685e31e622e1a897475a1dfb4ed56bb2fc2ba08f2f62',
                 'user-agent': get_random_user_agent('pc'),
             }
             try:
@@ -654,9 +654,13 @@ class XiguaSearch:
             for video_info in search_list:
                 v_type = video_info['type']
                 rule_dict = cls.get_rule(log_type, crawler)
+                publish_time = video_info['publish_time']
+                old_time = int(time.time()) - (3600 * 24 * rule_dict['publish_time'])
+                if publish_time <= old_time:
+                    Common.logger(log_type, crawler).error(f'关键词:{search_word},抓取完毕,退出抓取\n')
+                    return
                 if v_type == 'video':
                     item_id = video_info['data']['group_id']
-                    old_time = int(time.time()) - (3600 * 24 * rule_dict['publish_time'])
                     if video_info['data']['publish_time'] <= old_time:
                         Common.logger(log_type, crawler).error(f'视频:{item_id},不符合抓取规则\n')
                         continue
@@ -905,7 +909,7 @@ class XiguaSearch:
 
 if __name__ == '__main__':
     # print(Follow.get_signature("follow", "xigua", "95420624045", "local"))
-    Search.get_search_videos('search', 'xigua', 'xigua_search', 'inner', 'prod', 'aliyun')
+    # XiguaSearch.get_search_videos('search', 'xigua', 'xigua_search', 'inner', 'prod', 'aliyun')
 
     # Follow.get_videolist(log_type="follow",
     #                      crawler="xigua",