123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184 |
- """
- 配置健康检查工具
- 用于验证配置文件的完整性和正确性
- """
- import sys
- from typing import List, Dict, Any
- from core.utils.config_manager import get_config_manager
- from core.utils.spider_config import SpiderConfig
- from config import settings
- class ConfigHealthCheck:
- """
- 配置健康检查工具
- """
-
- def __init__(self):
- self.config_manager = get_config_manager()
- self.errors = []
- self.warnings = []
- def check_env_config(self) -> bool:
- """
- 检查环境配置
- """
- try:
- # 检查必要配置是否存在
- required_settings = [
- 'DB_HOST', 'DB_USER', 'DB_PASSWORD', 'DB_NAME',
- 'ROCKETMQ_ENDPOINT', 'ROCKETMQ_ACCESS_KEY_ID', 'ROCKETMQ_ACCESS_KEY_SECRET',
- 'FEISHU_APPID', 'FEISHU_APPSECRET',
- 'ALIYUN_ACCESS_KEY_ID', 'ALIYUN_ACCESS_KEY_SECRET',
- 'REDIS_HOST', 'REDIS_PASSWORD'
- ]
-
- for setting in required_settings:
- if not getattr(settings, setting, None):
- self.errors.append(f"环境配置缺失: {setting}")
-
- # 检查URL格式
- url_settings = ['ROCKETMQ_ENDPOINT']
- for setting in url_settings:
- url = getattr(settings, setting, None)
- if url and not isinstance(url, str):
- self.errors.append(f"URL配置格式错误: {setting}")
-
- return len(self.errors) == 0
-
- except Exception as e:
- self.errors.append(f"环境配置检查异常: {str(e)}")
- return False
- def check_spider_configs(self) -> bool:
- """
- 检查所有爬虫配置
- """
- try:
- platforms = self.config_manager.list_platforms()
- if not platforms:
- self.warnings.append("未找到任何平台配置")
- return True
-
- valid_count = 0
- for platform in platforms:
- try:
- config = self.config_manager.get_platform_config(platform)
- # 验证配置字段
- if not config.platform:
- self.errors.append(f"平台 {platform} 缺少 platform 字段")
- if not config.mode:
- self.errors.append(f"平台 {platform} 缺少 mode 字段")
- if not config.url:
- self.errors.append(f"平台 {platform} 缺少 url 字段")
- valid_count += 1
- except Exception as e:
- self.errors.append(f"平台 {platform} 配置验证失败: {str(e)}")
-
- return len(self.errors) == 0
-
- except Exception as e:
- self.errors.append(f"爬虫配置检查异常: {str(e)}")
- return False
- def check_file_permissions(self) -> bool:
- """
- 检查配置文件权限
- """
- import os
- from core.utils.path_utils import spiders_config_path
-
- try:
- # 检查爬虫配置文件是否存在
- if not os.path.exists(spiders_config_path):
- self.errors.append(f"爬虫配置文件不存在: {spiders_config_path}")
- return False
-
- # 检查文件是否可读
- if not os.access(spiders_config_path, os.R_OK):
- self.errors.append(f"爬虫配置文件不可读: {spiders_config_path}")
-
- return len(self.errors) == 0
-
- except Exception as e:
- self.errors.append(f"文件权限检查异常: {str(e)}")
- return False
- def run_all_checks(self) -> Dict[str, Any]:
- """
- 运行所有检查
- """
- self.errors.clear()
- self.warnings.clear()
-
- env_ok = self.check_env_config()
- spider_ok = self.check_spider_configs()
- file_ok = self.check_file_permissions()
-
- overall_ok = env_ok and spider_ok and file_ok
-
- return {
- "success": overall_ok,
- "errors": self.errors.copy(),
- "warnings": self.warnings.copy(),
- "details": {
- "env_config": env_ok,
- "spider_configs": spider_ok,
- "file_permissions": file_ok
- }
- }
- def print_report(self):
- """
- 打印健康检查报告
- """
- result = self.run_all_checks()
-
- print("=" * 50)
- print("配置健康检查报告")
- print("=" * 50)
-
- if result["success"]:
- print("✓ 所有配置检查通过")
- else:
- print("✗ 配置存在问题")
-
- print(f"\n详细信息:")
- print(f" 环境配置: {'✓' if result['details']['env_config'] else '✗'}")
- print(f" 爬虫配置: {'✓' if result['details']['spider_configs'] else '✗'}")
- print(f" 文件权限: {'✓' if result['details']['file_permissions'] else '✗'}")
-
- if result["warnings"]:
- print(f"\n警告:")
- for warning in result["warnings"]:
- print(f" - {warning}")
-
- if result["errors"]:
- print(f"\n错误:")
- for error in result["errors"]:
- print(f" - {error}")
-
- print("\n统计信息:")
- try:
- stats = self.config_manager.get_config_stats()
- print(f" 平台数量: {stats['total_platforms']}")
- print(f" 运行环境: {stats['env']}")
- except Exception as e:
- print(f" 统计信息获取失败: {e}")
-
- print("=" * 50)
-
- return result
- def run_health_check():
- """
- 运行配置健康检查
- """
- checker = ConfigHealthCheck()
- return checker.print_report()
- if __name__ == "__main__":
- result = run_health_check()
- sys.exit(0 if result["success"] else 1)
|