spider_config.py 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. # spider_config.py
  2. import os
  3. from urllib.parse import urljoin
  4. import yaml
  5. from core.utils.path_utils import spiders_config_path
  6. from core.models.spiders_config_models import BaseConfig, PlatformConfig
  7. class SpiderConfig:
  8. _config = None
  9. _config_path = spiders_config_path
  10. @classmethod
  11. def _load_yaml(cls):
  12. if not os.path.exists(cls._config_path):
  13. raise FileNotFoundError(f"[配置错误] 找不到配置文件: {cls._config_path}")
  14. with open(cls._config_path, "r", encoding="utf-8") as f:
  15. cls._config = yaml.safe_load(f)
  16. @classmethod
  17. def get_platform_config(cls, classname: str) -> PlatformConfig:
  18. """
  19. 获取平台配置,并拼接完整 URL
  20. 支持类方法调用 + 单次加载配置
  21. """
  22. if cls._config is None:
  23. cls._load_yaml()
  24. if classname not in cls._config:
  25. raise ValueError(f"[配置错误] 未找到平台配置: {classname}")
  26. platform_config = cls._config.get(classname, {})
  27. base_config = cls._config.get("default", {})
  28. # 合并配置:平台配置覆盖默认配置
  29. merged = {**base_config, **platform_config}
  30. # 自动拼接完整 url(优先用完整 url)
  31. if "url" not in merged and "base_url" in merged and "path" in merged:
  32. merged["url"] = urljoin(merged["base_url"], merged["path"])
  33. # 使用 pydantic 进行验证
  34. try:
  35. return PlatformConfig(**merged)
  36. except ValueError as e:
  37. raise ValueError(f"[配置错误] 平台 {classname} 的配置验证失败: {e}")
  38. # 示例使用
  39. if __name__ == '__main__':
  40. config = SpiderConfig.get_platform_config("benshanzhufurecommend")
  41. print(config)