12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455 |
- # spider_config.py
- import os
- from urllib.parse import urljoin
- import yaml
- from core.utils.path_utils import spiders_config_path
- from core.models.spiders_config_models import PlatformConfig
- class SpiderConfig:
- _config = None
- _config_path = spiders_config_path
- @classmethod
- def _load_yaml(cls):
- """
- 加载spiders_config.yaml
- :return:
- """
- if not os.path.exists(cls._config_path):
- raise FileNotFoundError(f"[配置错误] 找不到配置文件: {cls._config_path}")
- with open(cls._config_path, "r", encoding="utf-8") as f:
- cls._config = yaml.safe_load(f)
- @classmethod
- def get_platform_config(cls, classname: str) -> PlatformConfig:
- """
- 获取平台配置,并拼接完整 URL
- 支持类方法调用 + 单次加载配置
- """
- if cls._config is None:
- cls._load_yaml()
- if classname not in cls._config:
- raise ValueError(f"[配置错误] 未找到平台配置: {classname}")
- platform_config = cls._config.get(classname, {})
- base_config = cls._config.get("default", {})
- # 合并配置:平台配置覆盖默认配置
- merged = {**base_config, **platform_config}
- # 自动拼接完整 url(优先用完整 url)
- if "url" not in merged and "base_url" in merged and "path" in merged:
- merged["url"] = urljoin(merged["base_url"], merged["path"])
- # 使用 pydantic 进行验证
- try:
- return PlatformConfig(**merged)
- except ValueError as e:
- raise ValueError(f"[配置错误] 平台 {classname} 的配置验证失败: {e}")
- # 示例使用
- if __name__ == '__main__':
- config = SpiderConfig.get_platform_config("yuannifuqimanmanrecommend")
- print(config)
|