spider_config.py 2.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. # spider_config.py
  2. import os
  3. from urllib.parse import urljoin
  4. import yaml
  5. from core.utils.path_utils import spiders_config_path
  6. from core.models.spiders_config_models import PlatformConfig
  7. class SpiderConfig:
  8. _config = None
  9. _config_path = spiders_config_path
  10. @classmethod
  11. def _load_yaml(cls):
  12. """
  13. 加载spiders_config.yaml
  14. :return:
  15. """
  16. if not os.path.exists(cls._config_path):
  17. raise FileNotFoundError(f"[配置错误] 找不到配置文件: {cls._config_path}")
  18. # 检查文件是否修改过
  19. with open(cls._config_path, "r", encoding="utf-8") as f:
  20. cls._config = yaml.safe_load(f)
  21. @classmethod
  22. def get_platform_config(cls, classname: str) -> PlatformConfig:
  23. """
  24. 获取平台配置,并拼接完整 URL
  25. 支持类方法调用 + 单次加载配置
  26. """
  27. if cls._config is None:
  28. cls._load_yaml()
  29. if classname not in cls._config:
  30. raise ValueError(f"[配置错误] 未找到平台配置: {classname}")
  31. platform_config = cls._config.get(classname, {})
  32. base_config = cls._config.get("default", {})
  33. # 合并配置:平台配置覆盖默认配置
  34. merged = {**base_config, **platform_config}
  35. # 自动拼接完整 url(优先用完整 url)
  36. if "url" not in merged and "base_url" in merged and "path" in merged:
  37. merged["url"] = urljoin(merged["base_url"], merged["path"])
  38. # 使用 pydantic 进行验证
  39. try:
  40. return PlatformConfig(**merged)
  41. except Exception as e:
  42. raise ValueError(f"[配置错误] 平台 {classname} 的配置验证失败: {e}")
  43. @classmethod
  44. def reload_config(cls):
  45. """
  46. 强制重新加载配置文件
  47. """
  48. cls._config = None
  49. cls._load_yaml()
  50. @classmethod
  51. def list_all_platforms(cls):
  52. """
  53. 获取所有平台配置名称列表
  54. """
  55. if cls._config is None:
  56. cls._load_yaml()
  57. platforms = [key for key in cls._config.keys() if key != "default"]
  58. return platforms
  59. @classmethod
  60. def get_config_stats(cls):
  61. """
  62. 获取配置统计信息
  63. """
  64. if cls._config is None:
  65. cls._load_yaml()
  66. return {
  67. "total_platforms": len(cls.list_all_platforms()),
  68. "last_modified": os.path.getmtime(cls._config_path) if os.path.exists(cls._config_path) else 0,
  69. "config_file": cls._config_path
  70. }
  71. # 示例使用
  72. if __name__ == '__main__':
  73. config = SpiderConfig.get_platform_config("yuannifuqimanmanrecommend")
  74. print(config)