path_config.py 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. 路径配置管理工具
  5. 提供统一的路径管理,支持多账号批量处理
  6. """
  7. import json
  8. from pathlib import Path
  9. from typing import Dict, Optional, List
  10. import os
  11. class PathConfig:
  12. """路径配置管理类"""
  13. def __init__(self, account_name: Optional[str] = None):
  14. """
  15. 初始化路径配置
  16. Args:
  17. account_name: 账号名称,如果不指定则使用默认账号或环境变量
  18. """
  19. # 获取项目根目录
  20. self.project_root = Path(__file__).parent.parent.parent
  21. self.config_file = self.project_root / "config" / "accounts.json"
  22. # 加载配置
  23. self._load_config()
  24. # 确定账号名称
  25. self.account_name = self._determine_account_name(account_name)
  26. # 构建路径
  27. account_base = self.config["paths"]["account_base"]
  28. self.account_dir = self.project_root / account_base / self.account_name
  29. def _load_config(self):
  30. """加载配置文件"""
  31. if not self.config_file.exists():
  32. raise FileNotFoundError(f"配置文件不存在: {self.config_file}")
  33. with open(self.config_file, "r", encoding="utf-8") as f:
  34. self.config = json.load(f)
  35. def _determine_account_name(self, account_name: Optional[str]) -> str:
  36. """
  37. 确定要使用的账号名称
  38. 优先级:
  39. 1. 函数参数指定的账号名
  40. 2. 环境变量 ACCOUNT_NAME
  41. 3. 配置文件中的默认账号
  42. Args:
  43. account_name: 参数指定的账号名
  44. Returns:
  45. 最终确定的账号名称
  46. """
  47. # 1. 参数指定
  48. if account_name:
  49. return account_name
  50. # 2. 环境变量
  51. env_account = os.environ.get("ACCOUNT_NAME")
  52. if env_account:
  53. return env_account
  54. # 3. 配置文件默认值
  55. default_account = self.config.get("default_account")
  56. if default_account:
  57. return default_account
  58. # 4. 如果都没有,抛出错误
  59. raise ValueError(
  60. "未指定账号名称!请通过以下方式之一指定:\n"
  61. "1. 参数: PathConfig(account_name='账号名')\n"
  62. "2. 环境变量: export ACCOUNT_NAME='账号名'\n"
  63. "3. 配置文件: 在 config/accounts.json 中设置 default_account"
  64. )
  65. def get_enabled_accounts(self) -> List[str]:
  66. """获取所有启用的账号列表"""
  67. accounts = self.config.get("accounts", [])
  68. return [acc["name"] for acc in accounts if acc.get("enabled", True)]
  69. def get_all_accounts(self) -> List[str]:
  70. """获取所有账号列表(包括未启用的)"""
  71. accounts = self.config.get("accounts", [])
  72. return [acc["name"] for acc in accounts]
  73. @property
  74. def filter_mode(self) -> str:
  75. """
  76. 获取过滤模式
  77. Returns:
  78. 过滤模式名称:
  79. - "exclude_current_posts": 过滤当前帖子ID(默认,推荐)
  80. - "time_based": 基于时间过滤
  81. - "none": 不过滤
  82. """
  83. return self.config.get("filter_mode", "exclude_current_posts")
  84. # ===== 输入路径 =====
  85. @property
  86. def current_posts_dir(self) -> Path:
  87. """当前帖子what解构结果目录"""
  88. rel_path = self.config["paths"]["input"]["current_posts"]
  89. return self.account_dir / rel_path
  90. @property
  91. def historical_posts_dir(self) -> Path:
  92. """过去帖子what解构结果目录"""
  93. rel_path = self.config["paths"]["input"]["historical_posts"]
  94. return self.account_dir / rel_path
  95. @property
  96. def pattern_cluster_file(self) -> Path:
  97. """pattern聚合结果文件"""
  98. rel_path = self.config["paths"]["input"]["pattern_cluster"]
  99. return self.account_dir / rel_path
  100. # ===== 输出路径 =====
  101. @property
  102. def intermediate_dir(self) -> Path:
  103. """中间结果目录"""
  104. rel_path = self.config["paths"]["output"]["intermediate"]
  105. return self.account_dir / rel_path
  106. @property
  107. def feature_category_mapping_file(self) -> Path:
  108. """特征名称_分类映射.json"""
  109. return self.intermediate_dir / "特征名称_分类映射.json"
  110. @property
  111. def category_hierarchy_file(self) -> Path:
  112. """分类层级映射.json"""
  113. return self.intermediate_dir / "分类层级映射.json"
  114. @property
  115. def feature_source_mapping_file(self) -> Path:
  116. """特征名称_帖子来源.json"""
  117. return self.intermediate_dir / "特征名称_帖子来源.json"
  118. @property
  119. def task_list_file(self) -> Path:
  120. """当前帖子_解构任务列表.json"""
  121. return self.intermediate_dir / "当前帖子_解构任务列表.json"
  122. @property
  123. def how_results_dir(self) -> Path:
  124. """how解构结果目录"""
  125. rel_path = self.config["paths"]["output"]["how_results"]
  126. return self.account_dir / rel_path
  127. @property
  128. def visualization_dir(self) -> Path:
  129. """可视化结果目录"""
  130. rel_path = self.config["paths"]["output"]["visualization"]
  131. return self.account_dir / rel_path
  132. @property
  133. def visualization_file(self) -> Path:
  134. """可视化HTML文件"""
  135. return self.visualization_dir / "how解构结果_可视化.html"
  136. # ===== 工具方法 =====
  137. def ensure_dirs(self):
  138. """确保所有输出目录存在"""
  139. self.intermediate_dir.mkdir(parents=True, exist_ok=True)
  140. self.how_results_dir.mkdir(parents=True, exist_ok=True)
  141. self.visualization_dir.mkdir(parents=True, exist_ok=True)
  142. def validate_input_paths(self) -> Dict[str, bool]:
  143. """
  144. 验证输入路径是否存在
  145. Returns:
  146. 验证结果字典
  147. """
  148. results = {
  149. "当前帖子目录": self.current_posts_dir.exists(),
  150. "过去帖子目录": self.historical_posts_dir.exists(),
  151. "pattern聚合文件": self.pattern_cluster_file.exists(),
  152. }
  153. return results
  154. def print_paths(self):
  155. """打印所有路径信息(用于调试)"""
  156. print("="*60)
  157. print(f"账号: {self.account_name}")
  158. print(f"过滤模式: {self.filter_mode}")
  159. print(f"账号根目录: {self.account_dir}")
  160. print("\n输入路径:")
  161. print(f" 当前帖子目录: {self.current_posts_dir}")
  162. print(f" 过去帖子目录: {self.historical_posts_dir}")
  163. print(f" pattern聚合文件: {self.pattern_cluster_file}")
  164. print("\n输出路径:")
  165. print(f" 中间结果目录: {self.intermediate_dir}")
  166. print(f" how解构结果目录: {self.how_results_dir}")
  167. print(f" 可视化结果目录: {self.visualization_dir}")
  168. print("="*60)
  169. def check_and_print_status(self):
  170. """检查并打印路径状态"""
  171. self.print_paths()
  172. print("\n输入路径验证:")
  173. validation = self.validate_input_paths()
  174. for name, exists in validation.items():
  175. status = "✓ 存在" if exists else "✗ 不存在"
  176. print(f" {name}: {status}")
  177. if not all(validation.values()):
  178. print("\n⚠️ 警告: 部分输入路径不存在!")
  179. return False
  180. else:
  181. print("\n✓ 所有输入路径验证通过")
  182. return True
  183. def get_path_config(account_name: Optional[str] = None) -> PathConfig:
  184. """
  185. 获取路径配置对象(便捷函数)
  186. Args:
  187. account_name: 账号名称,可选
  188. Returns:
  189. PathConfig对象
  190. """
  191. return PathConfig(account_name)
  192. if __name__ == "__main__":
  193. # 测试代码
  194. import sys
  195. account = sys.argv[1] if len(sys.argv) > 1 else None
  196. try:
  197. config = PathConfig(account)
  198. config.check_and_print_status()
  199. print("\n所有启用的账号:")
  200. for acc in config.get_enabled_accounts():
  201. print(f" - {acc}")
  202. except Exception as e:
  203. print(f"错误: {e}")
  204. sys.exit(1)