config_documentation.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219
  1. """
  2. 配置文档生成工具
  3. 自动生成配置文件说明文档
  4. """
  5. import yaml
  6. from core.utils.config_manager import get_config_manager
  7. from core.utils.path_utils import spiders_config_path
  8. class ConfigDocumentation:
  9. """
  10. 配置文档生成工具
  11. """
  12. def __init__(self):
  13. self.config_manager = get_config_manager()
  14. def generate_env_config_docs(self) -> str:
  15. """
  16. 生成环境配置文档
  17. """
  18. docs = "# 环境配置说明\n\n"
  19. docs += "环境配置通过 `.env` 文件进行配置,以下为所有可配置项:\n\n"
  20. env_settings_info = {
  21. "ENV": {
  22. "description": "运行环境",
  23. "default": "prod",
  24. "options": ["prod", "dev"]
  25. },
  26. "DB_HOST": {
  27. "description": "数据库主机地址",
  28. "required": True
  29. },
  30. "DB_PORT": {
  31. "description": "数据库端口",
  32. "default": 3306
  33. },
  34. "DB_USER": {
  35. "description": "数据库用户名",
  36. "required": True
  37. },
  38. "DB_PASSWORD": {
  39. "description": "数据库密码",
  40. "required": True
  41. },
  42. "DB_NAME": {
  43. "description": "数据库名称",
  44. "required": True
  45. },
  46. "DB_CHARSET": {
  47. "description": "数据库字符集",
  48. "required": True
  49. },
  50. "ROCKETMQ_ENDPOINT": {
  51. "description": "RocketMQ接入点",
  52. "required": True
  53. },
  54. "ROCKETMQ_ACCESS_KEY_ID": {
  55. "description": "RocketMQ访问密钥ID",
  56. "required": True
  57. },
  58. "ROCKETMQ_ACCESS_KEY_SECRET": {
  59. "description": "RocketMQ访问密钥",
  60. "required": True
  61. },
  62. "FEISHU_APPID": {
  63. "description": "飞书应用ID",
  64. "required": True
  65. },
  66. "FEISHU_APPSECRET": {
  67. "description": "飞书应用密钥",
  68. "required": True
  69. },
  70. "ALIYUN_ACCESS_KEY_ID": {
  71. "description": "阿里云访问密钥ID",
  72. "required": True
  73. },
  74. "ALIYUN_ACCESS_KEY_SECRET": {
  75. "description": "阿里云访问密钥",
  76. "required": True
  77. },
  78. "REDIS_HOST": {
  79. "description": "Redis主机地址",
  80. "required": True
  81. },
  82. "REDIS_PORT": {
  83. "description": "Redis端口",
  84. "default": 6379
  85. },
  86. "REDIS_PASSWORD": {
  87. "description": "Redis密码",
  88. "required": True
  89. }
  90. }
  91. docs += "| 配置项 | 描述 | 是否必填 | 默认值 |\n"
  92. docs += "|--------|------|----------|--------|\n"
  93. for key, info in env_settings_info.items():
  94. description = info.get("description", "")
  95. required = "是" if info.get("required", False) else "否"
  96. default = str(info.get("default", "")) if info.get("default") is not None else ""
  97. options = ", ".join(info.get("options", []))
  98. if options:
  99. description += f" (可选值: {options})"
  100. docs += f"| {key} | {description} | {required} | {default} |\n"
  101. return docs
  102. def generate_spider_config_docs(self) -> str:
  103. """
  104. 生成爬虫配置文档
  105. """
  106. docs = "# 爬虫配置说明\n\n"
  107. docs += "爬虫配置通过 `config/spiders_config.yaml` 文件进行配置。\n\n"
  108. # 添加配置示例
  109. docs += "## 配置示例\n\n```yaml\n"
  110. with open(spiders_config_path, 'r', encoding='utf-8') as f:
  111. docs += f.read()
  112. docs += "```\n\n"
  113. # 添加字段说明
  114. docs += "## 字段说明\n\n"
  115. global_config_fields = {
  116. "base_url": "基础URL,用于拼接完整请求URL",
  117. "request_timeout": "请求超时时间(秒)",
  118. "max_retries": "最大重试次数",
  119. "headers": "请求头信息"
  120. }
  121. platform_config_fields = {
  122. "platform": "平台名称",
  123. "mode": "爬取模式(如 recommend, author)",
  124. "path": "API路径",
  125. "url": "完整请求URL",
  126. "method": "HTTP请求方法",
  127. "request_body": "请求体参数",
  128. "loop_times": "循环次数",
  129. "loop_interval": "循环间隔(min/max)",
  130. "response_parse": "响应解析配置",
  131. "feishu_sheetid": "飞书表格ID"
  132. }
  133. response_parse_fields = {
  134. "data_path": "数据列表路径",
  135. "next_cursor": "下一页游标路径",
  136. "has_more": "是否还有更多数据路径",
  137. "fields": "字段映射配置"
  138. }
  139. docs += "### 全局配置字段\n\n"
  140. docs += "| 字段 | 描述 |\n"
  141. docs += "|------|------|\n"
  142. for field, description in global_config_fields.items():
  143. docs += f"| {field} | {description} |\n"
  144. docs += "\n### 平台配置字段\n\n"
  145. docs += "| 字段 | 描述 |\n"
  146. docs += "|------|------|\n"
  147. for field, description in platform_config_fields.items():
  148. docs += f"| {field} | {description} |\n"
  149. docs += "\n### 响应解析字段\n\n"
  150. docs += "| 字段 | 描述 |\n"
  151. docs += "|------|------|\n"
  152. for field, description in response_parse_fields.items():
  153. docs += f"| {field} | {description} |\n"
  154. return docs
  155. def generate_complete_docs(self) -> str:
  156. """
  157. 生成完整配置文档
  158. """
  159. docs = "# AutoScraperX 配置说明\n\n"
  160. docs += "本文档详细说明了AutoScraperX项目的配置项。\n\n"
  161. docs += "---\n\n"
  162. docs += self.generate_env_config_docs()
  163. docs += "\n---\n\n"
  164. docs += self.generate_spider_config_docs()
  165. docs += "\n---\n\n"
  166. docs += "## 当前配置状态\n\n"
  167. try:
  168. stats = self.config_manager.get_config_stats()
  169. docs += f"- 平台配置数量: {stats['total_platforms']}\n"
  170. docs += f"- 运行环境: {stats['env']}\n"
  171. docs += f"- 配置文件路径: {stats['config_file']}\n"
  172. except Exception as e:
  173. docs += f"配置状态获取失败: {e}\n"
  174. return docs
  175. def save_docs(self, filepath: str = "CONFIGURATION.md"):
  176. """
  177. 保存文档到文件
  178. """
  179. docs = self.generate_complete_docs()
  180. with open(filepath, 'w', encoding='utf-8') as f:
  181. f.write(docs)
  182. return filepath
  183. def generate_config_docs():
  184. """
  185. 生成配置文档
  186. """
  187. doc_generator = ConfigDocumentation()
  188. filepath = doc_generator.save_docs()
  189. print(f"配置文档已保存到: {filepath}")
  190. return filepath
  191. if __name__ == "__main__":
  192. generate_config_docs()