|
@@ -130,48 +130,49 @@ class CrawlerChannelAccountVideos:
|
|
|
|
|
|
def crawler_each_account(self, channel_account: dict, last_buffer: str = "") -> None:
|
|
|
"""
|
|
|
- get channel account videos
|
|
|
+ 通过循环替代递归,分页爬取频道账号视频
|
|
|
"""
|
|
|
channel_account_id = channel_account["account_id"]
|
|
|
- max_cursor = channel_account["max_cursor"]
|
|
|
- if not max_cursor:
|
|
|
- max_cursor = const.DEFAULT_CURSOR
|
|
|
-
|
|
|
- response = get_channel_account_videos(
|
|
|
- channel_account_id, last_buffer=last_buffer
|
|
|
- )
|
|
|
- if response["ret"] == 200:
|
|
|
+ max_cursor = channel_account.get("max_cursor") or const.DEFAULT_CURSOR
|
|
|
+ current_last_buffer = last_buffer
|
|
|
+ has_more = True
|
|
|
+
|
|
|
+ while has_more:
|
|
|
+ response = get_channel_account_videos(channel_account_id, last_buffer=current_last_buffer)
|
|
|
+ if response["ret"] != 200:
|
|
|
+ log(
|
|
|
+ task="crawler_channel_account_videos",
|
|
|
+ function="crawler_each_video",
|
|
|
+ message="get_channel_account_videos failed",
|
|
|
+ data={
|
|
|
+ "response": response,
|
|
|
+ "channel_account_id": channel_account_id,
|
|
|
+ "max_cursor": max_cursor,
|
|
|
+ },
|
|
|
+ )
|
|
|
+ break
|
|
|
+
|
|
|
response_data = response["data"]
|
|
|
- last_buffer = response_data["lastBuffer"]
|
|
|
- continue_flag = response_data["continueFlag"]
|
|
|
+ current_last_buffer = response_data["lastBuffer"] # 更新分页游标
|
|
|
+ has_more = response_data["continueFlag"] # 是否还有下一页
|
|
|
video_list = response_data["object"]
|
|
|
+
|
|
|
+ if not video_list:
|
|
|
+ break
|
|
|
+
|
|
|
create_timestamp = video_list[0]["createtime"]
|
|
|
if create_timestamp < max_cursor:
|
|
|
- return
|
|
|
+ break
|
|
|
|
|
|
crawl_video_list_bar = tqdm(video_list, desc="crawl videos")
|
|
|
for video in crawl_video_list_bar:
|
|
|
crawl_video_list_bar.set_postfix({"video_id": video["id"]})
|
|
|
self.crawler_each_video(video)
|
|
|
|
|
|
- if continue_flag:
|
|
|
+ if has_more:
|
|
|
time.sleep(const.SLEEP_SECOND)
|
|
|
- return self.crawler_each_account(channel_account_id, last_buffer)
|
|
|
else:
|
|
|
- return
|
|
|
-
|
|
|
- else:
|
|
|
- log(
|
|
|
- task="crawler_channel_account_videos",
|
|
|
- function="crawler_each_video",
|
|
|
- message="get_channel_account_videos failed",
|
|
|
- data={
|
|
|
- "response": response,
|
|
|
- "channel_account_id": channel_account_id,
|
|
|
- "max_cursor": max_cursor,
|
|
|
- },
|
|
|
- )
|
|
|
- return
|
|
|
+ break
|
|
|
|
|
|
def update_account_max_cursor(self, account_id: str) -> None:
|
|
|
"""
|