12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364 |
- # spider_config.py 修改后
- import os
- from urllib.parse import urljoin
- import yaml
- from core.utils.path_utils import spiders_config_path
- from core.models.spiders_config_models import PlatformConfig
- class SpiderConfig:
- _config = None
- _config_path = spiders_config_path
- @classmethod
- def _load_yaml(cls):
- if not os.path.exists(cls._config_path):
- raise FileNotFoundError(f"[配置错误] 找不到配置文件: {cls._config_path}")
- with open(cls._config_path, "r", encoding="utf-8") as f:
- cls._config = yaml.safe_load(f)
- @classmethod
- def get_platform_config(cls, classname: str) -> PlatformConfig:
- if cls._config is None:
- cls._load_yaml()
- if classname not in cls._config:
- raise ValueError(f"[配置错误] 未找到平台配置: {classname}")
- platform_config = cls._config.get(classname, {})
- base_config = cls._config.get("default", {})
- # 合并配置
- merged = {**base_config, **platform_config}
- # 自动拼接完整 url
- if "url" not in merged and "base_url" in merged and "path" in merged:
- merged["url"] = urljoin(merged["base_url"], merged["path"])
- try:
- # Pydantic 2.x 直接实例化
- return PlatformConfig(**merged)
- except Exception as e:
- raise ValueError(f"[配置错误] 平台 {classname} 的配置验证失败: {e}")
- @classmethod
- def reload_config(cls):
- cls._config = None
- cls._load_yaml()
- @classmethod
- def list_all_platforms(cls):
- if cls._config is None:
- cls._load_yaml()
- return [key for key in cls._config.keys() if key != "default"]
- @classmethod
- def get_config_stats(cls):
- if cls._config is None:
- cls._load_yaml()
- return {
- "total_platforms": len(cls.list_all_platforms()),
- "last_modified": os.path.getmtime(cls._config_path) if os.path.exists(cls._config_path) else 0,
- "config_file": cls._config_path
- }
|