spider_config.py 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364
  1. # spider_config.py 修改后
  2. import os
  3. from urllib.parse import urljoin
  4. import yaml
  5. from core.utils.path_utils import spiders_config_path
  6. from core.models.spiders_config_models import PlatformConfig
  7. class SpiderConfig:
  8. _config = None
  9. _config_path = spiders_config_path
  10. @classmethod
  11. def _load_yaml(cls):
  12. if not os.path.exists(cls._config_path):
  13. raise FileNotFoundError(f"[配置错误] 找不到配置文件: {cls._config_path}")
  14. with open(cls._config_path, "r", encoding="utf-8") as f:
  15. cls._config = yaml.safe_load(f)
  16. @classmethod
  17. def get_platform_config(cls, classname: str) -> PlatformConfig:
  18. if cls._config is None:
  19. cls._load_yaml()
  20. if classname not in cls._config:
  21. raise ValueError(f"[配置错误] 未找到平台配置: {classname}")
  22. platform_config = cls._config.get(classname, {})
  23. base_config = cls._config.get("default", {})
  24. # 合并配置
  25. merged = {**base_config, **platform_config}
  26. # 自动拼接完整 url
  27. if "url" not in merged and "base_url" in merged and "path" in merged:
  28. merged["url"] = urljoin(merged["base_url"], merged["path"])
  29. try:
  30. # Pydantic 2.x 直接实例化
  31. return PlatformConfig(**merged)
  32. except Exception as e:
  33. raise ValueError(f"[配置错误] 平台 {classname} 的配置验证失败: {e}")
  34. @classmethod
  35. def reload_config(cls):
  36. cls._config = None
  37. cls._load_yaml()
  38. @classmethod
  39. def list_all_platforms(cls):
  40. if cls._config is None:
  41. cls._load_yaml()
  42. return [key for key in cls._config.keys() if key != "default"]
  43. @classmethod
  44. def get_config_stats(cls):
  45. if cls._config is None:
  46. cls._load_yaml()
  47. return {
  48. "total_platforms": len(cls.list_all_platforms()),
  49. "last_modified": os.path.getmtime(cls._config_path) if os.path.exists(cls._config_path) else 0,
  50. "config_file": cls._config_path
  51. }