123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219 |
- """
- 配置文档生成工具
- 自动生成配置文件说明文档
- """
- import yaml
- from core.utils.config_manager import get_config_manager
- from core.utils.path_utils import spiders_config_path
- class ConfigDocumentation:
- """
- 配置文档生成工具
- """
-
- def __init__(self):
- self.config_manager = get_config_manager()
- def generate_env_config_docs(self) -> str:
- """
- 生成环境配置文档
- """
- docs = "# 环境配置说明\n\n"
- docs += "环境配置通过 `.env` 文件进行配置,以下为所有可配置项:\n\n"
-
- env_settings_info = {
- "ENV": {
- "description": "运行环境",
- "default": "prod",
- "options": ["prod", "dev"]
- },
- "DB_HOST": {
- "description": "数据库主机地址",
- "required": True
- },
- "DB_PORT": {
- "description": "数据库端口",
- "default": 3306
- },
- "DB_USER": {
- "description": "数据库用户名",
- "required": True
- },
- "DB_PASSWORD": {
- "description": "数据库密码",
- "required": True
- },
- "DB_NAME": {
- "description": "数据库名称",
- "required": True
- },
- "DB_CHARSET": {
- "description": "数据库字符集",
- "required": True
- },
- "ROCKETMQ_ENDPOINT": {
- "description": "RocketMQ接入点",
- "required": True
- },
- "ROCKETMQ_ACCESS_KEY_ID": {
- "description": "RocketMQ访问密钥ID",
- "required": True
- },
- "ROCKETMQ_ACCESS_KEY_SECRET": {
- "description": "RocketMQ访问密钥",
- "required": True
- },
- "FEISHU_APPID": {
- "description": "飞书应用ID",
- "required": True
- },
- "FEISHU_APPSECRET": {
- "description": "飞书应用密钥",
- "required": True
- },
- "ALIYUN_ACCESS_KEY_ID": {
- "description": "阿里云访问密钥ID",
- "required": True
- },
- "ALIYUN_ACCESS_KEY_SECRET": {
- "description": "阿里云访问密钥",
- "required": True
- },
- "REDIS_HOST": {
- "description": "Redis主机地址",
- "required": True
- },
- "REDIS_PORT": {
- "description": "Redis端口",
- "default": 6379
- },
- "REDIS_PASSWORD": {
- "description": "Redis密码",
- "required": True
- }
- }
-
- docs += "| 配置项 | 描述 | 是否必填 | 默认值 |\n"
- docs += "|--------|------|----------|--------|\n"
-
- for key, info in env_settings_info.items():
- description = info.get("description", "")
- required = "是" if info.get("required", False) else "否"
- default = str(info.get("default", "")) if info.get("default") is not None else ""
- options = ", ".join(info.get("options", []))
- if options:
- description += f" (可选值: {options})"
-
- docs += f"| {key} | {description} | {required} | {default} |\n"
-
- return docs
- def generate_spider_config_docs(self) -> str:
- """
- 生成爬虫配置文档
- """
- docs = "# 爬虫配置说明\n\n"
- docs += "爬虫配置通过 `config/spiders_config.yaml` 文件进行配置。\n\n"
-
- # 添加配置示例
- docs += "## 配置示例\n\n```yaml\n"
- with open(spiders_config_path, 'r', encoding='utf-8') as f:
- docs += f.read()
- docs += "```\n\n"
-
- # 添加字段说明
- docs += "## 字段说明\n\n"
-
- global_config_fields = {
- "base_url": "基础URL,用于拼接完整请求URL",
- "request_timeout": "请求超时时间(秒)",
- "max_retries": "最大重试次数",
- "headers": "请求头信息"
- }
-
- platform_config_fields = {
- "platform": "平台名称",
- "mode": "爬取模式(如 recommend, author)",
- "path": "API路径",
- "url": "完整请求URL",
- "method": "HTTP请求方法",
- "request_body": "请求体参数",
- "loop_times": "循环次数",
- "loop_interval": "循环间隔(min/max)",
- "response_parse": "响应解析配置",
- "feishu_sheetid": "飞书表格ID"
- }
-
- response_parse_fields = {
- "data_path": "数据列表路径",
- "next_cursor": "下一页游标路径",
- "has_more": "是否还有更多数据路径",
- "fields": "字段映射配置"
- }
-
- docs += "### 全局配置字段\n\n"
- docs += "| 字段 | 描述 |\n"
- docs += "|------|------|\n"
- for field, description in global_config_fields.items():
- docs += f"| {field} | {description} |\n"
-
- docs += "\n### 平台配置字段\n\n"
- docs += "| 字段 | 描述 |\n"
- docs += "|------|------|\n"
- for field, description in platform_config_fields.items():
- docs += f"| {field} | {description} |\n"
-
- docs += "\n### 响应解析字段\n\n"
- docs += "| 字段 | 描述 |\n"
- docs += "|------|------|\n"
- for field, description in response_parse_fields.items():
- docs += f"| {field} | {description} |\n"
-
- return docs
- def generate_complete_docs(self) -> str:
- """
- 生成完整配置文档
- """
- docs = "# AutoScraperX 配置说明\n\n"
- docs += "本文档详细说明了AutoScraperX项目的配置项。\n\n"
- docs += "---\n\n"
- docs += self.generate_env_config_docs()
- docs += "\n---\n\n"
- docs += self.generate_spider_config_docs()
- docs += "\n---\n\n"
- docs += "## 当前配置状态\n\n"
-
- try:
- stats = self.config_manager.get_config_stats()
- docs += f"- 平台配置数量: {stats['total_platforms']}\n"
- docs += f"- 运行环境: {stats['env']}\n"
- docs += f"- 配置文件路径: {stats['config_file']}\n"
- except Exception as e:
- docs += f"配置状态获取失败: {e}\n"
-
- return docs
- def save_docs(self, filepath: str = "CONFIGURATION.md"):
- """
- 保存文档到文件
- """
- docs = self.generate_complete_docs()
- with open(filepath, 'w', encoding='utf-8') as f:
- f.write(docs)
- return filepath
- def generate_config_docs():
- """
- 生成配置文档
- """
- doc_generator = ConfigDocumentation()
- filepath = doc_generator.save_docs()
- print(f"配置文档已保存到: {filepath}")
- return filepath
- if __name__ == "__main__":
- generate_config_docs()
|