|
@@ -634,13 +634,13 @@ class XiguaSearch:
|
|
|
while True:
|
|
|
|
|
|
signature = cls.random_signature()
|
|
|
- url = "https://www.ixigua.com/api/searchv2/complex/{}/{}".format(
|
|
|
+ url = "https://www.ixigua.com/api/searchv2/complex/{}/{}?order_type=publish_time&click_position=new".format(
|
|
|
quote(search_word), offset, signature)
|
|
|
|
|
|
headers = {
|
|
|
'referer': 'https://www.ixigua.com/search/{}/?logTag=594535e3690f17a88cdb&tab_name=search'.format(
|
|
|
quote(search_word)),
|
|
|
- 'cookie': 'ttwid=1|x_4RDmVTqp6BQ5Xy5AnuCZCQdDyDxv-fnMVWzj19VU0|1678693556|883092b75667cbcc48cbcc4b3b44d060aa205ef26c7640dc672d0cc50ddf0be9',
|
|
|
+ 'cookie': '1%7Cx_4RDmVTqp6BQ5Xy5AnuCZCQdDyDxv-fnMVWzj19VU0%7C1679381806%7C8606abc1cd7ca28e965c685e31e622e1a897475a1dfb4ed56bb2fc2ba08f2f62',
|
|
|
'user-agent': get_random_user_agent('pc'),
|
|
|
}
|
|
|
try:
|
|
@@ -654,9 +654,13 @@ class XiguaSearch:
|
|
|
for video_info in search_list:
|
|
|
v_type = video_info['type']
|
|
|
rule_dict = cls.get_rule(log_type, crawler)
|
|
|
+ publish_time = video_info['publish_time']
|
|
|
+ old_time = int(time.time()) - (3600 * 24 * rule_dict['publish_time'])
|
|
|
+ if publish_time <= old_time:
|
|
|
+ Common.logger(log_type, crawler).error(f'关键词:{search_word},抓取完毕,退出抓取\n')
|
|
|
+ return
|
|
|
if v_type == 'video':
|
|
|
item_id = video_info['data']['group_id']
|
|
|
- old_time = int(time.time()) - (3600 * 24 * rule_dict['publish_time'])
|
|
|
if video_info['data']['publish_time'] <= old_time:
|
|
|
Common.logger(log_type, crawler).error(f'视频:{item_id},不符合抓取规则\n')
|
|
|
continue
|
|
@@ -905,7 +909,7 @@ class XiguaSearch:
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
# print(Follow.get_signature("follow", "xigua", "95420624045", "local"))
|
|
|
- Search.get_search_videos('search', 'xigua', 'xigua_search', 'inner', 'prod', 'aliyun')
|
|
|
+ # XiguaSearch.get_search_videos('search', 'xigua', 'xigua_search', 'inner', 'prod', 'aliyun')
|
|
|
|
|
|
# Follow.get_videolist(log_type="follow",
|
|
|
# crawler="xigua",
|