|
@@ -1,4 +1,5 @@
|
|
|
import asyncio
|
|
|
+import random
|
|
|
import uuid
|
|
|
from typing import List, Dict, Optional, Any
|
|
|
|
|
@@ -59,7 +60,7 @@ class BaseSpider:
|
|
|
|
|
|
# 爬取行为相关的配置
|
|
|
self.loop_times = self.platform_config.loop_times or 100
|
|
|
- self.loop_interval = self.platform_config.loop_interval or 5
|
|
|
+ self.loop_interval = self.platform_config.loop_interval
|
|
|
self.timeout = self.platform_config.request_timeout or 30
|
|
|
self.max_retries = self.platform_config.max_retries or 3
|
|
|
self.feishu_sheetid = self.platform_config.feishu_sheetid
|
|
@@ -69,7 +70,7 @@ class BaseSpider:
|
|
|
self.logger = LoggerManager.get_logger(platform=self.platform, mode=self.mode)
|
|
|
self.aliyun_log = LoggerManager.get_aliyun_logger(platform=self.platform, mode=self.mode)
|
|
|
self.logger.info(f"爬虫 '{self.platform}/{self.mode}' 初始化...")
|
|
|
- self.logger.info(f"最大循环次数: {self.loop_times}, 循环间隔: {self.loop_interval}s")
|
|
|
+ self.logger.info(f"最大循环次数: {self.loop_times}, 循环间隔时间: {self.loop_interval}")
|
|
|
|
|
|
def _setup_services(self):
|
|
|
"""初始化外部服务客户端。"""
|
|
@@ -282,9 +283,10 @@ class BaseSpider:
|
|
|
|
|
|
async def _wait_for_next_loop(self, current_loop: int) -> None:
|
|
|
"""等待下次循环"""
|
|
|
- if current_loop < self.loop_times and self.loop_interval > 0:
|
|
|
- self.logger.info(f"等待 {self.loop_interval} 秒后进行下一次请求")
|
|
|
- await asyncio.sleep(self.loop_interval)
|
|
|
+ if current_loop < self.loop_times:
|
|
|
+ wait_time = random.randint(self.loop_interval["min"], self.loop_interval["max"])
|
|
|
+ self.logger.info(f"等待 {wait_time} 秒后进行下一次请求")
|
|
|
+ await asyncio.sleep(wait_time)
|
|
|
|
|
|
async def before_run(self):
|
|
|
"""运行前预处理钩子,子类可覆盖"""
|