spider_config.py 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. # spider_config.py
  2. import os
  3. from urllib.parse import urljoin
  4. import yaml
  5. from core.utils.path_utils import spiders_config_path
  6. from core.models.spiders_config_models import PlatformConfig
  7. class SpiderConfig:
  8. _config = None
  9. _config_path = spiders_config_path
  10. @classmethod
  11. def _load_yaml(cls):
  12. """
  13. 加载spiders_config.yaml
  14. :return:
  15. """
  16. if not os.path.exists(cls._config_path):
  17. raise FileNotFoundError(f"[配置错误] 找不到配置文件: {cls._config_path}")
  18. with open(cls._config_path, "r", encoding="utf-8") as f:
  19. cls._config = yaml.safe_load(f)
  20. @classmethod
  21. def get_platform_config(cls, classname: str) -> PlatformConfig:
  22. """
  23. 获取平台配置,并拼接完整 URL
  24. 支持类方法调用 + 单次加载配置
  25. """
  26. if cls._config is None:
  27. cls._load_yaml()
  28. if classname not in cls._config:
  29. raise ValueError(f"[配置错误] 未找到平台配置: {classname}")
  30. platform_config = cls._config.get(classname, {})
  31. base_config = cls._config.get("default", {})
  32. # 合并配置:平台配置覆盖默认配置
  33. merged = {**base_config, **platform_config}
  34. # 自动拼接完整 url(优先用完整 url)
  35. if "url" not in merged and "base_url" in merged and "path" in merged:
  36. merged["url"] = urljoin(merged["base_url"], merged["path"])
  37. # 使用 pydantic 进行验证
  38. try:
  39. return PlatformConfig(**merged)
  40. except ValueError as e:
  41. raise ValueError(f"[配置错误] 平台 {classname} 的配置验证失败: {e}")
  42. # 示例使用
  43. if __name__ == '__main__':
  44. config = SpiderConfig.get_platform_config("yuannifuqimanmanrecommend")
  45. print(config)