1 周之前 · d512b4720b
--- a/config/accounts.json
+++ b/config/accounts.json
@@ -0,0 +1,44 @@
 
				+{
			
 
				+  "accounts": [
			
 
				+    {
			
 
				+      "name": "阿里多多酱",
			
 
				+      "enabled": true,
			
 
				+      "description": "第一个测试账号"
			
 
				+    },
			
 
				+    {
			
 
				+      "name": "示例账号2",
			
 
				+      "enabled": false,
			
 
				+      "description": "未启用的示例账号"
			
 
				+    }
			
 
				+  ],
			
 
				+  "default_account": "阿里多多酱",
			
 
				+  "comment": "原始数据目录不动，所有输出结果统一放到 how解构/ 目录下",
			
 
				+  "filter_mode": "exclude_current_posts",
			
 
				+  "filter_modes": {
			
 
				+    "exclude_current_posts": {
			
 
				+      "description": "过滤在当前帖子目录中出现的帖子ID（推荐，更宽松）",
			
 
				+      "enabled": true
			
 
				+    },
			
 
				+    "time_based": {
			
 
				+      "description": "基于发布时间过滤，只保留早于当前帖子最早发布时间的历史帖子",
			
 
				+      "enabled": false
			
 
				+    },
			
 
				+    "none": {
			
 
				+      "description": "不过滤任何帖子",
			
 
				+      "enabled": false
			
 
				+    }
			
 
				+  },
			
 
				+  "paths": {
			
 
				+    "account_base": "data/账号",
			
 
				+    "input": {
			
 
				+      "current_posts": "what解构",
			
 
				+      "historical_posts": "what解构_所有帖子",
			
 
				+      "pattern_cluster": "pattern/optimization/optimized_clustered_data_gemini-3-pro-preview.json"
			
 
				+    },
			
 
				+    "output": {
			
 
				+      "intermediate": "how解构/intermediate",
			
 
				+      "how_results": "how解构/results",
			
 
				+      "visualization": "how解构/visualization"
			
 
				+    }
			
 
				+  }
			
 
				+}
			
--- a/doc/20251121.md
+++ b/doc/20251121.md
@@ -0,0 +1,24 @@
 
				+
			
 
				+# 数据结构改动
			
 
				+
			
 
				+请你阅读：script/data_processing/run_all.sh 及相关的脚本，现在数据目录发生一下变化，结构没变，需要哪些改动？
			
 
				+
			
 
				+整体数据目录：
			
 
				+旧路径：data/data_1118
			
 
				+新路径：data/账号/阿里多多酱
			
 
				+其中，阿里多多酱是账号名称，后续解构其他账号有可能会变
			
 
				+
			
 
				+当前帖子what解构结果：
			
 
				+旧路径：data/data_1118/当前帖子_what解构结果
			
 
				+新路径：data/账号/阿里多多酱/what解构
			
 
				+
			
 
				+过去帖子_pattern聚合结果：
			
 
				+旧路径：data/data_1118/过去帖子_pattern聚合结果.json
			
 
				+新路径：data/账号/阿里多多酱/pattern/cluster/clustered_data.json
			
 
				+
			
 
				+过去帖子_what解构结果
			
 
				+旧路径：data/data_1118/过去帖子_what解构结果
			
 
				+新路径：data/账号/阿里多多酱/what解构_所有帖子
			
 
				+
			
 
				+
			
 
				+data/账号/阿里多多酱/pattern/optimization/optimized_clustered_data_gemini-3-pro-preview.json
			
--- a/doc/多账号目录管理方案.md
+++ b/doc/多账号目录管理方案.md
@@ -0,0 +1,147 @@
 
				+# 多账号目录管理方案
			
 
				+
			
 
				+## 目录结构
			
 
				+
			
 
				+```
			
 
				+data/
			
 
				+├── 账号/
			
 
				+│   ├── 阿里多多酱/                        # 账号目录
			
 
				+│   │   ├── what解构/                      # 当前帖子what解构（原始数据，不动）
			
 
				+│   │   ├── what解构_所有帖子/             # 过去帖子what解构（原始数据，不动）
			
 
				+│   │   ├── pattern/                       # pattern聚合结果（原始数据，不动）
			
 
				+│   │   │   └── cluster/
			
 
				+│   │   │       └── clustered_data.json
			
 
				+│   │   └── how解构/                       # 新增：所有输出结果
			
 
				+│   │       ├── intermediate/              # 中间结果
			
 
				+│   │       │   ├── 特征名称_分类映射.json
			
 
				+│   │       │   ├── 分类层级映射.json
			
 
				+│   │       │   ├── 特征名称_帖子来源.json
			
 
				+│   │       │   └── 当前帖子_解构任务列表.json
			
 
				+│   │       ├── results/                   # how解构最终结果
			
 
				+│   │       │   └── *_how.json
			
 
				+│   │       └── visualization/             # 可视化结果
			
 
				+│   │           └── how解构结果_可视化.html
			
 
				+│   │
			
 
				+│   └── 其他账号名/                        # 其他账号，结构相同
			
 
				+│       └── ...
			
 
				+│
			
 
				+└── config/
			
 
				+    └── accounts.json                      # 账号配置文件
			
 
				+```
			
 
				+
			
 
				+## 核心设计原则
			
 
				+
			
 
				+1. **原始数据目录不动**：`what解构/`、`what解构_所有帖子/`、`pattern/` 保持原样
			
 
				+2. **输出统一管理**：所有输出结果放到新增的 `how解构/` 目录下
			
 
				+3. **账号隔离**：每个账号独立目录，互不影响
			
 
				+4. **配置驱动**：通过配置文件管理路径，支持批量处理
			
 
				+
			
 
				+## 使用方式
			
 
				+
			
 
				+### 1. 配置账号
			
 
				+
			
 
				+编辑 `config/accounts.json`：
			
 
				+
			
 
				+```json
			
 
				+{
			
 
				+  "accounts": [
			
 
				+    {
			
 
				+      "name": "阿里多多酱",
			
 
				+      "enabled": true,
			
 
				+      "description": "第一个测试账号"
			
 
				+    },
			
 
				+    {
			
 
				+      "name": "新账号名",
			
 
				+      "enabled": true,
			
 
				+      "description": "新增账号"
			
 
				+    }
			
 
				+  ],
			
 
				+  "default_account": "阿里多多酱"
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+### 2. 准备数据
			
 
				+
			
 
				+确保账号目录下有以下原始数据：
			
 
				+- `what解构/` - 当前帖子解构结果
			
 
				+- `what解构_所有帖子/` - 过去帖子解构结果
			
 
				+- `pattern/cluster/clustered_data.json` - pattern聚合结果
			
 
				+
			
 
				+### 3. 运行处理脚本
			
 
				+
			
 
				+#### 方式一：处理单个账号
			
 
				+
			
 
				+```bash
			
 
				+# 指定账号名
			
 
				+./script/data_processing/run_all_with_config.sh 阿里多多酱
			
 
				+
			
 
				+# 或使用环境变量
			
 
				+ACCOUNT_NAME=阿里多多酱 ./script/data_processing/run_all_with_config.sh
			
 
				+```
			
 
				+
			
 
				+#### 方式二：批量处理所有启用的账号
			
 
				+
			
 
				+```bash
			
 
				+./script/data_processing/run_all_with_config.sh --all
			
 
				+```
			
 
				+
			
 
				+### 4. 查看结果
			
 
				+
			
 
				+处理完成后，结果在 `data/账号/<账号名>/how解构/` 目录下：
			
 
				+- `intermediate/` - 中间结果文件
			
 
				+- `results/` - how解构结果
			
 
				+- `visualization/` - 可视化HTML文件
			
 
				+
			
 
				+## Python脚本中使用
			
 
				+
			
 
				+所有Python脚本都应该使用 `PathConfig` 类来管理路径：
			
 
				+
			
 
				+```python
			
 
				+from path_config import PathConfig
			
 
				+
			
 
				+# 获取路径配置
			
 
				+config = PathConfig()  # 使用默认账号或环境变量
			
 
				+# 或指定账号
			
 
				+config = PathConfig(account_name="阿里多多酱")
			
 
				+
			
 
				+# 使用路径
			
 
				+input_file = config.pattern_cluster_file
			
 
				+output_file = config.feature_category_mapping_file
			
 
				+
			
 
				+# 确保输出目录存在
			
 
				+config.ensure_dirs()
			
 
				+```
			
 
				+
			
 
				+## 路径配置说明
			
 
				+
			
 
				+`PathConfig` 类提供的主要属性：
			
 
				+
			
 
				+### 输入路径
			
 
				+- `current_posts_dir` - 当前帖子目录
			
 
				+- `historical_posts_dir` - 过去帖子目录
			
 
				+- `pattern_cluster_file` - pattern聚合文件
			
 
				+
			
 
				+### 输出路径
			
 
				+- `intermediate_dir` - 中间结果目录
			
 
				+- `feature_category_mapping_file` - 特征分类映射文件
			
 
				+- `category_hierarchy_file` - 分类层级映射文件
			
 
				+- `feature_source_mapping_file` - 特征来源映射文件
			
 
				+- `task_list_file` - 任务列表文件
			
 
				+- `how_results_dir` - how解构结果目录
			
 
				+- `visualization_dir` - 可视化目录
			
 
				+- `visualization_file` - 可视化HTML文件
			
 
				+
			
 
				+## 优势
			
 
				+
			
 
				+1. **原始数据安全**：不修改原始数据目录，只新增输出目录
			
 
				+2. **易于扩展**：添加新账号只需创建目录和添加配置
			
 
				+3. **批量处理**：支持一键处理所有账号
			
 
				+4. **配置灵活**：通过配置文件轻松调整路径结构
			
 
				+5. **代码复用**：所有脚本使用统一的路径管理类
			
 
				+
			
 
				+## 注意事项
			
 
				+
			
 
				+1. 新增账号时，确保账号目录下有完整的原始数据
			
 
				+2. 运行脚本前会自动验证输入路径是否存在
			
 
				+3. 输出目录会自动创建，无需手动创建
			
 
				+4. 可以通过环境变量 `ACCOUNT_NAME` 指定账号，方便在CI/CD中使用
			
--- a/doc/路径改动总结.md
+++ b/doc/路径改动总结.md
@@ -0,0 +1,186 @@
 
				+# 路径改动总结
			
 
				+
			
 
				+## 改动概述
			
 
				+
			
 
				+将数据处理脚本从硬编码路径改为配置驱动的路径管理，支持多账号批量处理。
			
 
				+
			
 
				+## 核心变化
			
 
				+
			
 
				+### 1. 目录结构
			
 
				+
			
 
				+**原始数据目录保持不变**，只新增 `how解构/` 输出目录：
			
 
				+
			
 
				+```
			
 
				+data/账号/阿里多多酱/
			
 
				+├── what解构/                    # 原始数据（不动）
			
 
				+├── what解构_所有帖子/           # 原始数据（不动）
			
 
				+├── pattern/                     # 原始数据（不动）
			
 
				+│   └── cluster/
			
 
				+│       └── clustered_data.json
			
 
				+└── how解构/                     # 新增：所有输出结果
			
 
				+    ├── intermediate/            # 中间结果
			
 
				+    │   ├── 特征名称_分类映射.json
			
 
				+    │   ├── 分类层级映射.json
			
 
				+    │   ├── 特征名称_帖子来源.json
			
 
				+    │   └── 当前帖子_解构任务列表.json
			
 
				+    ├── results/                 # how解构最终结果
			
 
				+    │   └── *_how.json
			
 
				+    └── visualization/           # 可视化结果
			
 
				+        └── how解构结果_可视化.html
			
 
				+```
			
 
				+
			
 
				+### 2. 路径对照表
			
 
				+
			
 
				+| 文件类型 | 旧路径 | 新路径 |
			
 
				+|---------|--------|--------|
			
 
				+| pattern聚合结果 | `data/data_1118/过去帖子_pattern聚合结果.json` | `data/账号/阿里多多酱/pattern/cluster/clustered_data.json` |
			
 
				+| 当前帖子what解构 | `data/data_1118/当前帖子_what解构结果/` | `data/账号/阿里多多酱/what解构/` |
			
 
				+| 过去帖子what解构 | `data/data_1118/过去帖子_what解构结果/` | `data/账号/阿里多多酱/what解构_所有帖子/` |
			
 
				+| 特征分类映射 | `data/data_1118/特征名称_分类映射.json` | `data/账号/阿里多多酱/how解构/intermediate/特征名称_分类映射.json` |
			
 
				+| 分类层级映射 | `data/data_1118/分类层级映射.json` | `data/账号/阿里多多酱/how解构/intermediate/分类层级映射.json` |
			
 
				+| 特征帖子来源 | `data/data_1118/特征名称_帖子来源.json` | `data/账号/阿里多多酱/how解构/intermediate/特征名称_帖子来源.json` |
			
 
				+| 解构任务列表 | `data/data_1118/当前帖子_解构任务列表.json` | `data/账号/阿里多多酱/how解构/intermediate/当前帖子_解构任务列表.json` |
			
 
				+| how解构结果 | `data/data_1118/当前帖子_how解构结果/` | `data/账号/阿里多多酱/how解构/results/` |
			
 
				+| 可视化结果 | `data/data_1118/当前帖子_how解构结果_可视化.html` | `data/账号/阿里多多酱/how解构/visualization/how解构结果_可视化.html` |
			
 
				+
			
 
				+## 修改的文件
			
 
				+
			
 
				+### 新增文件
			
 
				+
			
 
				+1. **`config/accounts.json`** - 账号配置文件
			
 
				+2. **`script/data_processing/path_config.py`** - 路径配置管理类
			
 
				+3. **`script/data_processing/run_all_with_config.sh`** - 支持多账号的运行脚本
			
 
				+4. **`script/data_processing/migrate_data_structure.sh`** - 数据迁移脚本（已不需要）
			
 
				+
			
 
				+### 修改的文件
			
 
				+
			
 
				+1. **`script/data_processing/extract_feature_categories.py`**
			
 
				+   - 添加 `from script.data_processing.path_config import PathConfig`
			
 
				+   - 使用 `PathConfig` 替代硬编码路径
			
 
				+
			
 
				+2. **`script/data_processing/extract_features_from_posts.py`**
			
 
				+   - 添加 `from script.data_processing.path_config import PathConfig`
			
 
				+   - 使用 `PathConfig` 替代硬编码路径
			
 
				+
			
 
				+3. **`script/data_processing/extract_current_posts.py`**
			
 
				+   - 添加 `from script.data_processing.path_config import PathConfig`
			
 
				+   - 使用 `PathConfig` 替代硬编码路径
			
 
				+
			
 
				+4. **`script/data_processing/match_inspiration_features.py`**
			
 
				+   - 添加 `from script.data_processing.path_config import PathConfig`
			
 
				+   - 使用 `PathConfig` 替代硬编码路径
			
 
				+
			
 
				+5. **`script/data_processing/visualize_how_results.py`**
			
 
				+   - 添加 `from script.data_processing.path_config import PathConfig`
			
 
				+   - 修改 `load_feature_category_mapping()` 和 `load_feature_source_mapping()` 函数接受 `PathConfig` 参数
			
 
				+   - 使用 `PathConfig` 替代硬编码路径
			
 
				+
			
 
				+6. **`script/data_processing/run_all.sh`** (旧版本，已更新路径但建议使用新版本)
			
 
				+   - 更新所有 echo 输出中的路径描述
			
 
				+
			
 
				+## 使用方式
			
 
				+
			
 
				+### 单账号处理
			
 
				+
			
 
				+```bash
			
 
				+# 方式1：命令行参数
			
 
				+./script/data_processing/run_all_with_config.sh 阿里多多酱
			
 
				+
			
 
				+# 方式2：环境变量
			
 
				+ACCOUNT_NAME=阿里多多酱 ./script/data_processing/run_all_with_config.sh
			
 
				+
			
 
				+# 方式3：使用默认账号（配置文件中的 default_account）
			
 
				+./script/data_processing/run_all_with_config.sh
			
 
				+```
			
 
				+
			
 
				+### 批量处理多个账号
			
 
				+
			
 
				+```bash
			
 
				+# 处理所有在配置文件中启用的账号
			
 
				+./script/data_processing/run_all_with_config.sh --all
			
 
				+```
			
 
				+
			
 
				+### Python脚本中使用
			
 
				+
			
 
				+```python
			
 
				+from script.data_processing.path_config import PathConfig
			
 
				+
			
 
				+# 获取路径配置
			
 
				+config = PathConfig()  # 使用默认账号
			
 
				+# 或
			
 
				+config = PathConfig(account_name="阿里多多酱")
			
 
				+
			
 
				+# 使用路径
			
 
				+input_file = config.pattern_cluster_file
			
 
				+output_file = config.feature_category_mapping_file
			
 
				+
			
 
				+# 确保输出目录存在
			
 
				+config.ensure_dirs()
			
 
				+
			
 
				+# 读写文件
			
 
				+with open(input_file, 'r') as f:
			
 
				+    data = json.load(f)
			
 
				+
			
 
				+with open(output_file, 'w') as f:
			
 
				+    json.dump(result, f)
			
 
				+```
			
 
				+
			
 
				+## 添加新账号
			
 
				+
			
 
				+1. 创建账号目录和原始数据：
			
 
				+```bash
			
 
				+mkdir -p "data/账号/新账号名/what解构"
			
 
				+mkdir -p "data/账号/新账号名/what解构_所有帖子"
			
 
				+mkdir -p "data/账号/新账号名/pattern/cluster"
			
 
				+# 放入原始数据...
			
 
				+```
			
 
				+
			
 
				+2. 在 `config/accounts.json` 中添加配置：
			
 
				+```json
			
 
				+{
			
 
				+  "name": "新账号名",
			
 
				+  "enabled": true,
			
 
				+  "description": "新账号描述"
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+3. 运行处理脚本：
			
 
				+```bash
			
 
				+./script/data_processing/run_all_with_config.sh 新账号名
			
 
				+```
			
 
				+
			
 
				+## 迁移说明
			
 
				+
			
 
				+从旧版本迁移到新版本：
			
 
				+
			
 
				+1. **数据无需迁移**：原始数据路径保持不变
			
 
				+2. **删除旧输出**：可以删除 `data/data_1118/` 下的中间结果和输出文件
			
 
				+3. **使用新脚本**：用 `run_all_with_config.sh` 替代 `run_all.sh`
			
 
				+
			
 
				+## 优势
			
 
				+
			
 
				+1. ✅ **原始数据安全**：不修改原始数据目录结构
			
 
				+2. ✅ **输出结构清晰**：所有输出集中在 `how解构/` 目录
			
 
				+3. ✅ **支持多账号**：轻松管理和批量处理多个账号
			
 
				+4. ✅ **配置驱动**：通过配置文件灵活调整路径
			
 
				+5. ✅ **易于维护**：统一的路径管理，避免硬编码
			
 
				+6. ✅ **向后兼容**：旧的 `run_all.sh` 仍可使用（路径已更新）
			
 
				+
			
 
				+## 验证
			
 
				+
			
 
				+验证配置是否正确：
			
 
				+
			
 
				+```bash
			
 
				+# 查看路径配置
			
 
				+python script/data_processing/path_config.py 阿里多多酱
			
 
				+
			
 
				+# 测试路径并创建输出目录
			
 
				+python -c "from script.data_processing.path_config import PathConfig; config = PathConfig('阿里多多酱'); config.check_and_print_status(); config.ensure_dirs()"
			
 
				+```
			
 
				+
			
 
				+## 注意事项
			
 
				+
			
 
				+1. 运行脚本前确保账号目录下有完整的原始数据
			
 
				+2. 首次运行时会自动创建 `how解构/` 目录及子目录
			
 
				+3. 可通过环境变量 `ACCOUNT_NAME` 指定账号，方便CI/CD集成
			
 
				+4. 批量处理时，某个账号失败不会影响其他账号的处理
			
--- a/script/data_processing/extract_current_posts.py
+++ b/script/data_processing/extract_current_posts.py
@@ -15,6 +15,7 @@ project_root = Path(__file__).parent.parent.parent
 
				 sys.path.insert(0, str(project_root))
			
 
				 
			
 
				 from script.detail import get_xiaohongshu_detail
			
 
				+from script.data_processing.path_config import PathConfig
			
 
				 
			
 
				 
			
 
				 def extract_post_id_from_filename(filename: str) -> str:
			
@@ -195,13 +196,20 @@ def process_single_file(file_path: Path) -> Optional[Dict]:
 
				 
			
 
				 
			
 
				 def main():
			
 
				-    # 输入输出路径（默认使用项目根目录下的 data/data_1117 目录）
			
 
				-    script_dir = Path(__file__).parent
			
 
				-    project_root = script_dir.parent.parent
			
 
				-    data_dir = project_root / "data" / "data_1118"
			
 
				+    # 使用路径配置
			
 
				+    config = PathConfig()
			
 
				 
			
 
				-    input_dir = data_dir / "当前帖子_what解构结果"
			
 
				-    output_file = data_dir / "当前帖子_解构任务列表.json"
			
 
				+    # 确保输出目录存在
			
 
				+    config.ensure_dirs()
			
 
				+
			
 
				+    # 获取路径
			
 
				+    input_dir = config.current_posts_dir
			
 
				+    output_file = config.task_list_file
			
 
				+
			
 
				+    print(f"账号: {config.account_name}")
			
 
				+    print(f"当前帖子目录: {input_dir}")
			
 
				+    print(f"输出文件: {output_file}")
			
 
				+    print()
			
 
				 
			
 
				     print(f"正在扫描目录: {input_dir}")
			
 
				 
			
--- a/script/data_processing/extract_feature_categories.py
+++ b/script/data_processing/extract_feature_categories.py
@@ -15,6 +15,7 @@ project_root = Path(__file__).parent.parent.parent
 
				 sys.path.insert(0, str(project_root))
			
 
				 
			
 
				 from script.detail import get_xiaohongshu_detail
			
 
				+from script.data_processing.path_config import PathConfig
			
 
				 
			
 
				 
			
 
				 def extract_post_id_from_filename(filename: str) -> str:
			
@@ -35,6 +36,38 @@ def get_post_detail(post_id: str) -> Optional[Dict]:
 
				         return None
			
 
				 
			
 
				 
			
 
				+def get_current_post_ids(current_posts_dir: Path) -> Set[str]:
			
 
				+    """
			
 
				+    获取当前帖子目录中的所有帖子ID
			
 
				+
			
 
				+    Args:
			
 
				+        current_posts_dir: 当前帖子目录路径
			
 
				+
			
 
				+    Returns:
			
 
				+        当前帖子ID集合
			
 
				+    """
			
 
				+    if not current_posts_dir.exists():
			
 
				+        print(f"警告: 当前帖子目录不存在: {current_posts_dir}")
			
 
				+        return set()
			
 
				+
			
 
				+    json_files = list(current_posts_dir.glob("*.json"))
			
 
				+    if not json_files:
			
 
				+        print(f"警告: 当前帖子目录为空: {current_posts_dir}")
			
 
				+        return set()
			
 
				+
			
 
				+    print(f"\n正在获取当前帖子ID...")
			
 
				+    print(f"找到 {len(json_files)} 个当前帖子")
			
 
				+
			
 
				+    post_ids = set()
			
 
				+    for file_path in json_files:
			
 
				+        post_id = extract_post_id_from_filename(file_path.name)
			
 
				+        if post_id:
			
 
				+            post_ids.add(post_id)
			
 
				+
			
 
				+    print(f"提取到 {len(post_ids)} 个帖子ID")
			
 
				+    return post_ids
			
 
				+
			
 
				+
			
 
				 def get_earliest_publish_time(current_posts_dir: Path) -> Optional[str]:
			
 
				     """
			
 
				     获取当前帖子目录中最早的发布时间
			
@@ -120,9 +153,44 @@ def collect_all_post_ids(data: Dict) -> Set[str]:
 
				     return post_ids
			
 
				 
			
 
				 
			
 
				+def filter_data_by_post_ids(data: Dict, exclude_post_ids: Set[str]) -> tuple[Dict, Set[str]]:
			
 
				+    """
			
 
				+    根据帖子ID过滤数据（新规则：排除当前帖子ID）
			
 
				+
			
 
				+    Args:
			
 
				+        data: 原始聚合结果数据
			
 
				+        exclude_post_ids: 要排除的帖子ID集合
			
 
				+
			
 
				+    Returns:
			
 
				+        (过滤后的数据, 被过滤掉的帖子ID集合)
			
 
				+    """
			
 
				+    # 收集所有帖子ID
			
 
				+    all_post_ids = collect_all_post_ids(data)
			
 
				+    print(f"\n数据中包含 {len(all_post_ids)} 个不同的帖子")
			
 
				+
			
 
				+    # 过滤帖子
			
 
				+    print(f"\n正在应用帖子ID过滤，排除当前帖子目录中的 {len(exclude_post_ids)} 个帖子...")
			
 
				+    filtered_post_ids = all_post_ids & exclude_post_ids  # 交集：需要过滤的
			
 
				+    valid_post_ids = all_post_ids - exclude_post_ids     # 差集：保留的
			
 
				+
			
 
				+    if filtered_post_ids:
			
 
				+        print(f"  ⚠️  过滤掉 {len(filtered_post_ids)} 个当前帖子:")
			
 
				+        for post_id in sorted(list(filtered_post_ids)[:10]):  # 最多显示10个
			
 
				+            print(f"    - {post_id}")
			
 
				+        if len(filtered_post_ids) > 10:
			
 
				+            print(f"    ... 还有 {len(filtered_post_ids) - 10} 个")
			
 
				+
			
 
				+    print(f"\n过滤统计: 过滤掉 {len(filtered_post_ids)} 个帖子，保留 {len(valid_post_ids)} 个帖子")
			
 
				+
			
 
				+    # 过滤数据
			
 
				+    filtered_data = filter_node_by_post_ids(data, valid_post_ids)
			
 
				+
			
 
				+    return filtered_data, filtered_post_ids
			
 
				+
			
 
				+
			
 
				 def filter_data_by_time(data: Dict, time_filter: str) -> tuple[Dict, Set[str]]:
			
 
				     """
			
 
				-    根据发布时间过滤数据
			
 
				+    根据发布时间过滤数据（旧规则：基于时间）
			
 
				 
			
 
				     Args:
			
 
				         data: 原始聚合结果数据
			
@@ -359,37 +427,60 @@ def build_category_hierarchy(category_data: Dict) -> Dict[str, Dict]:
 
				 
			
 
				 
			
 
				 def main():
			
 
				-    # 输入输出文件路径（默认使用项目根目录下的 data/data_1117 目录）
			
 
				-    script_dir = Path(__file__).parent
			
 
				-    project_root = script_dir.parent.parent
			
 
				-    data_dir = project_root / "data" / "data_1118"
			
 
				-
			
 
				-    input_file = data_dir / "过去帖子_pattern聚合结果.json"
			
 
				-    current_posts_dir = data_dir / "当前帖子_what解构结果"
			
 
				-    output_file_1 = data_dir / "特征名称_分类映射.json"
			
 
				-    output_file_2 = data_dir / "分类层级映射.json"
			
 
				-
			
 
				-    # 获取当前帖子的最早发布时间
			
 
				-    earliest_time = get_earliest_publish_time(current_posts_dir)
			
 
				+    # 使用路径配置
			
 
				+    config = PathConfig()
			
 
				+
			
 
				+    # 确保输出目录存在
			
 
				+    config.ensure_dirs()
			
 
				+
			
 
				+    # 获取路径
			
 
				+    input_file = config.pattern_cluster_file
			
 
				+    current_posts_dir = config.current_posts_dir
			
 
				+    output_file_1 = config.feature_category_mapping_file
			
 
				+    output_file_2 = config.category_hierarchy_file
			
 
				+
			
 
				+    print(f"账号: {config.account_name}")
			
 
				+    print(f"过滤模式: {config.filter_mode}")
			
 
				+    print(f"输入文件: {input_file}")
			
 
				+    print(f"当前帖子目录: {current_posts_dir}")
			
 
				+    print(f"输出文件1: {output_file_1}")
			
 
				+    print(f"输出文件2: {output_file_2}")
			
 
				+    print()
			
 
				 
			
 
				     # 读取输入文件
			
 
				     print(f"\n正在读取文件: {input_file}")
			
 
				     with open(input_file, "r", encoding="utf-8") as f:
			
 
				         data = json.load(f)
			
 
				 
			
 
				-    # 如果有时间过滤，应用过滤
			
 
				+    # 根据配置的过滤模式应用过滤
			
 
				     filtered_post_ids = set()
			
 
				-    if earliest_time:
			
 
				+    filter_mode = config.filter_mode
			
 
				+
			
 
				+    if filter_mode == "exclude_current_posts":
			
 
				+        # 新规则：排除当前帖子目录中的帖子ID
			
 
				         print("\n" + "="*60)
			
 
				-        print("开始应用时间过滤...")
			
 
				-        data, filtered_post_ids = filter_data_by_time(data, earliest_time)
			
 
				+        print("应用过滤规则: 排除当前帖子ID")
			
 
				+        current_post_ids = get_current_post_ids(current_posts_dir)
			
 
				+        if current_post_ids:
			
 
				+            data, filtered_post_ids = filter_data_by_post_ids(data, current_post_ids)
			
 
				+        else:
			
 
				+            print("\n未找到当前帖子ID，跳过过滤")
			
 
				+
			
 
				+    elif filter_mode == "time_based":
			
 
				+        # 旧规则：基于发布时间过滤
			
 
				+        print("\n" + "="*60)
			
 
				+        print("应用过滤规则: 基于发布时间")
			
 
				+        earliest_time = get_earliest_publish_time(current_posts_dir)
			
 
				+        if earliest_time:
			
 
				+            data, filtered_post_ids = filter_data_by_time(data, earliest_time)
			
 
				+        else:
			
 
				+            print("\n未能获取时间信息，跳过过滤")
			
 
				+
			
 
				+    elif filter_mode == "none":
			
 
				+        print("\n过滤模式: none，不应用任何过滤")
			
 
				 
			
 
				-        if filtered_post_ids:
			
 
				-            print(f"\n⚠️  警告: 以下 {len(filtered_post_ids)} 个帖子因发布时间晚于阈值被过滤:")
			
 
				-            for post_id in sorted(filtered_post_ids):
			
 
				-                print(f"  - {post_id}")
			
 
				     else:
			
 
				-        print("\n未启用时间过滤")
			
 
				+        print(f"\n警告: 未知的过滤模式 '{filter_mode}'，不应用过滤")
			
 
				 
			
 
				     # 处理结果1: 特征名称到分类的映射
			
 
				     output_1 = {}
			
@@ -418,8 +509,8 @@ def main():
 
				         json.dump(output_1, f, ensure_ascii=False, indent=4)
			
 
				 
			
 
				     print("完成!")
			
 
				-    if earliest_time:
			
 
				-        print(f"\n总计 (特征名称映射，已过滤掉发布时间 >= {earliest_time} 的帖子):")
			
 
				+    if filtered_post_ids:
			
 
				+        print(f"\n总计 (特征名称映射，已过滤掉 {len(filtered_post_ids)} 个帖子):")
			
 
				     else:
			
 
				         print(f"\n总计 (特征名称映射):")
			
 
				     for category, features in output_1.items():
			
@@ -454,8 +545,8 @@ def main():
 
				         json.dump(output_2, f, ensure_ascii=False, indent=4)
			
 
				 
			
 
				     print("完成!")
			
 
				-    if earliest_time:
			
 
				-        print(f"\n总计 (分类层级映射，已过滤掉发布时间 >= {earliest_time} 的帖子):")
			
 
				+    if filtered_post_ids:
			
 
				+        print(f"\n总计 (分类层级映射，已过滤掉 {len(filtered_post_ids)} 个帖子):")
			
 
				     else:
			
 
				         print(f"\n总计 (分类层级映射):")
			
 
				     for category, hierarchies in output_2.items():
			
--- a/script/data_processing/extract_features_from_posts.py
+++ b/script/data_processing/extract_features_from_posts.py
@@ -6,7 +6,7 @@
 
				 
			
 
				 import json
			
 
				 from pathlib import Path
			
 
				-from typing import Dict, List, Optional
			
 
				+from typing import Dict, List, Optional, Set
			
 
				 import re
			
 
				 import sys
			
 
				 
			
@@ -15,6 +15,7 @@ project_root = Path(__file__).parent.parent.parent
 
				 sys.path.insert(0, str(project_root))
			
 
				 
			
 
				 from script.detail import get_xiaohongshu_detail
			
 
				+from script.data_processing.path_config import PathConfig
			
 
				 
			
 
				 
			
 
				 def extract_post_id_from_filename(filename: str) -> str:
			
@@ -228,14 +229,20 @@ def merge_results(all_results: List[Dict]) -> Dict:
 
				     return merged
			
 
				 
			
 
				 
			
 
				-def convert_to_array_format(merged_dict: Dict, fetch_details: bool = True, time_filter: Optional[str] = None) -> Dict:
			
 
				+def convert_to_array_format(
			
 
				+    merged_dict: Dict,
			
 
				+    fetch_details: bool = True,
			
 
				+    time_filter: Optional[str] = None,
			
 
				+    exclude_post_ids: Optional[Set[str]] = None
			
 
				+) -> Dict:
			
 
				     """
			
 
				     将字典格式转换为数组格式，并添加帖子详情
			
 
				 
			
 
				     Args:
			
 
				         merged_dict: 字典格式的结果
			
 
				         fetch_details: 是否获取帖子详情，默认为True
			
 
				-        time_filter: 时间过滤阈值，只保留发布时间>=该时间的帖子，格式为 "YYYY-MM-DD HH:MM:SS"
			
 
				+        time_filter: 时间过滤阈值，只保留发布时间<该时间的帖子，格式为 "YYYY-MM-DD HH:MM:SS"
			
 
				+        exclude_post_ids: 要排除的帖子ID集合
			
 
				 
			
 
				     Returns:
			
 
				         数组格式的结果
			
@@ -265,11 +272,23 @@ def convert_to_array_format(merged_dict: Dict, fetch_details: bool = True, time_
 
				 
			
 
				         print(f"成功获取 {len(post_details)} 个帖子详情")
			
 
				 
			
 
				-        # 如果启用时间过滤，过滤帖子（过滤掉发布时间晚于等于阈值的帖子，避免穿越）
			
 
				-        if time_filter:
			
 
				+        # 应用过滤规则
			
 
				+        filtered_count = 0
			
 
				+
			
 
				+        # 1. 如果启用帖子ID过滤
			
 
				+        if exclude_post_ids:
			
 
				+            print(f"\n正在应用帖子ID过滤，排除 {len(exclude_post_ids)} 个当前帖子...")
			
 
				+            before_count = len(post_details)
			
 
				+            post_details = {pid: detail for pid, detail in post_details.items() if pid not in exclude_post_ids}
			
 
				+            filtered_count = before_count - len(post_details)
			
 
				+            if filtered_count > 0:
			
 
				+                print(f"  ⚠️  过滤掉 {filtered_count} 个当前帖子")
			
 
				+            print(f"保留 {len(post_details)} 个帖子")
			
 
				+
			
 
				+        # 2. 如果启用时间过滤（过滤掉发布时间晚于等于阈值的帖子，避免穿越）
			
 
				+        elif time_filter:
			
 
				             print(f"\n正在应用时间过滤 (< {time_filter})，避免使用晚于当前帖子的数据...")
			
 
				             filtered_post_ids = set()
			
 
				-            filtered_count = 0
			
 
				             for post_id, detail in post_details.items():
			
 
				                 publish_time = detail.get('publish_time', '')
			
 
				                 if publish_time < time_filter:
			
@@ -288,8 +307,8 @@ def convert_to_array_format(merged_dict: Dict, fetch_details: bool = True, time_
 
				             # 为每个来源添加帖子详情
			
 
				             enhanced_sources = []
			
 
				             for source in data["来源"]:
			
 
				-                # 如果启用时间过滤，跳过不符合时间条件的帖子
			
 
				-                if fetch_details and time_filter and source["帖子id"] not in post_details:
			
 
				+                # 如果启用过滤，跳过不符合条件的帖子
			
 
				+                if fetch_details and (time_filter or exclude_post_ids) and source["帖子id"] not in post_details:
			
 
				                     continue
			
 
				 
			
 
				                 enhanced_source = source.copy()
			
@@ -307,6 +326,38 @@ def convert_to_array_format(merged_dict: Dict, fetch_details: bool = True, time_
 
				     return result
			
 
				 
			
 
				 
			
 
				+def get_current_post_ids(current_posts_dir: Path) -> Set[str]:
			
 
				+    """
			
 
				+    获取当前帖子目录中的所有帖子ID
			
 
				+
			
 
				+    Args:
			
 
				+        current_posts_dir: 当前帖子目录路径
			
 
				+
			
 
				+    Returns:
			
 
				+        当前帖子ID集合
			
 
				+    """
			
 
				+    if not current_posts_dir.exists():
			
 
				+        print(f"警告: 当前帖子目录不存在: {current_posts_dir}")
			
 
				+        return set()
			
 
				+
			
 
				+    json_files = list(current_posts_dir.glob("*.json"))
			
 
				+    if not json_files:
			
 
				+        print(f"警告: 当前帖子目录为空: {current_posts_dir}")
			
 
				+        return set()
			
 
				+
			
 
				+    print(f"\n正在获取当前帖子ID...")
			
 
				+    print(f"找到 {len(json_files)} 个当前帖子")
			
 
				+
			
 
				+    post_ids = set()
			
 
				+    for file_path in json_files:
			
 
				+        post_id = extract_post_id_from_filename(file_path.name)
			
 
				+        if post_id:
			
 
				+            post_ids.add(post_id)
			
 
				+
			
 
				+    print(f"提取到 {len(post_ids)} 个帖子ID")
			
 
				+    return post_ids
			
 
				+
			
 
				+
			
 
				 def get_earliest_publish_time(current_posts_dir: Path) -> Optional[str]:
			
 
				     """
			
 
				     获取当前帖子目录中最早的发布时间
			
@@ -354,17 +405,23 @@ def get_earliest_publish_time(current_posts_dir: Path) -> Optional[str]:
 
				 
			
 
				 
			
 
				 def main():
			
 
				-    # 输入输出路径（默认使用项目根目录下的 data/data_1117 目录）
			
 
				-    script_dir = Path(__file__).parent
			
 
				-    project_root = script_dir.parent.parent
			
 
				-    data_dir = project_root / "data" / "data_1118"
			
 
				+    # 使用路径配置
			
 
				+    config = PathConfig()
			
 
				+
			
 
				+    # 确保输出目录存在
			
 
				+    config.ensure_dirs()
			
 
				 
			
 
				-    input_dir = data_dir / "过去帖子_what解构结果"
			
 
				-    current_posts_dir = data_dir / "当前帖子_what解构结果"
			
 
				-    output_file = data_dir / "特征名称_帖子来源.json"
			
 
				+    # 获取路径
			
 
				+    input_dir = config.historical_posts_dir
			
 
				+    current_posts_dir = config.current_posts_dir
			
 
				+    output_file = config.feature_source_mapping_file
			
 
				 
			
 
				-    # 获取当前帖子的最早发布时间
			
 
				-    earliest_time = get_earliest_publish_time(current_posts_dir)
			
 
				+    print(f"账号: {config.account_name}")
			
 
				+    print(f"过滤模式: {config.filter_mode}")
			
 
				+    print(f"过去帖子目录: {input_dir}")
			
 
				+    print(f"当前帖子目录: {current_posts_dir}")
			
 
				+    print(f"输出文件: {output_file}")
			
 
				+    print()
			
 
				 
			
 
				     print(f"\n正在扫描目录: {input_dir}")
			
 
				 
			
@@ -383,15 +440,35 @@ def main():
 
				     print("\n正在合并结果...")
			
 
				     merged_result = merge_results(all_results)
			
 
				 
			
 
				-    # 转换为数组格式（带时间过滤）
			
 
				+    # 根据配置的过滤模式应用过滤
			
 
				+    filter_mode = config.filter_mode
			
 
				+    time_filter = None
			
 
				+    exclude_post_ids = None
			
 
				+
			
 
				+    if filter_mode == "exclude_current_posts":
			
 
				+        # 新规则：排除当前帖子ID
			
 
				+        print("\n应用过滤规则: 排除当前帖子ID")
			
 
				+        exclude_post_ids = get_current_post_ids(current_posts_dir)
			
 
				+    elif filter_mode == "time_based":
			
 
				+        # 旧规则：基于发布时间
			
 
				+        print("\n应用过滤规则: 基于发布时间")
			
 
				+        time_filter = get_earliest_publish_time(current_posts_dir)
			
 
				+    elif filter_mode == "none":
			
 
				+        print("\n过滤模式: none，不应用任何过滤")
			
 
				+    else:
			
 
				+        print(f"\n警告: 未知的过滤模式 '{filter_mode}'，不应用过滤")
			
 
				+
			
 
				+    # 转换为数组格式（带过滤）
			
 
				     print("正在转换为数组格式...")
			
 
				-    final_result = convert_to_array_format(merged_result, fetch_details=True, time_filter=earliest_time)
			
 
				+    final_result = convert_to_array_format(
			
 
				+        merged_result,
			
 
				+        fetch_details=True,
			
 
				+        time_filter=time_filter,
			
 
				+        exclude_post_ids=exclude_post_ids
			
 
				+    )
			
 
				 
			
 
				     # 统计信息
			
 
				-    if earliest_time:
			
 
				-        print(f"\n提取统计 (已过滤掉发布时间 >= {earliest_time} 的帖子):")
			
 
				-    else:
			
 
				-        print(f"\n提取统计:")
			
 
				+    print(f"\n提取统计:")
			
 
				     for category in ["灵感点", "目的点", "关键点"]:
			
 
				         feature_count = len(final_result[category])
			
 
				         source_count = sum(len(item["特征来源"]) for item in final_result[category])
			
--- a/script/data_processing/match_inspiration_features.py
+++ b/script/data_processing/match_inspiration_features.py
@@ -19,6 +19,7 @@ project_root = Path(__file__).parent.parent.parent
 
				 sys.path.insert(0, str(project_root))
			
 
				 
			
 
				 from lib.hybrid_similarity import compare_phrases
			
 
				+from script.data_processing.path_config import PathConfig
			
 
				 
			
 
				 # 全局并发限制
			
 
				 MAX_CONCURRENT_REQUESTS = 100
			
@@ -444,18 +445,24 @@ async def process_task_list(
 
				 
			
 
				 async def main():
			
 
				     """主函数"""
			
 
				-    # 输入输出路径
			
 
				-    script_dir = Path(__file__).parent
			
 
				-    project_root = script_dir.parent.parent
			
 
				-    data_dir = project_root / "data" / "data_1118"
			
 
				-
			
 
				-    task_list_file = data_dir / "当前帖子_解构任务列表.json"
			
 
				-    persona_features_file = data_dir / "特征名称_帖子来源.json"
			
 
				-    category_mapping_file = data_dir / "特征名称_分类映射.json"
			
 
				-    output_dir = data_dir / "当前帖子_how解构结果"
			
 
				-
			
 
				-    # 创建输出目录
			
 
				-    output_dir.mkdir(parents=True, exist_ok=True)
			
 
				+    # 使用路径配置
			
 
				+    config = PathConfig()
			
 
				+
			
 
				+    # 确保输出目录存在
			
 
				+    config.ensure_dirs()
			
 
				+
			
 
				+    # 获取路径
			
 
				+    task_list_file = config.task_list_file
			
 
				+    persona_features_file = config.feature_source_mapping_file
			
 
				+    category_mapping_file = config.feature_category_mapping_file
			
 
				+    output_dir = config.how_results_dir
			
 
				+
			
 
				+    print(f"账号: {config.account_name}")
			
 
				+    print(f"任务列表文件: {task_list_file}")
			
 
				+    print(f"人设特征文件: {persona_features_file}")
			
 
				+    print(f"分类映射文件: {category_mapping_file}")
			
 
				+    print(f"输出目录: {output_dir}")
			
 
				+    print()
			
 
				 
			
 
				     print(f"读取解构任务列表: {task_list_file}")
			
 
				     with open(task_list_file, "r", encoding="utf-8") as f:
			
--- a/script/data_processing/migrate_data_structure.sh
+++ b/script/data_processing/migrate_data_structure.sh
@@ -0,0 +1,90 @@
 
				+#!/bin/bash
			
 
				+# 数据目录结构迁移脚本
			
 
				+# 将现有数据迁移到新的目录结构
			
 
				+
			
 
				+set -e
			
 
				+
			
 
				+# 获取脚本所在目录
			
 
				+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
			
 
				+PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
			
 
				+
			
 
				+# 颜色定义
			
 
				+GREEN='\033[0;32m'
			
 
				+YELLOW='\033[1;33m'
			
 
				+NC='\033[0m'
			
 
				+
			
 
				+cd "$PROJECT_ROOT"
			
 
				+
			
 
				+echo "=========================================="
			
 
				+echo "数据目录结构迁移脚本"
			
 
				+echo "=========================================="
			
 
				+echo ""
			
 
				+
			
 
				+# 账号名称
			
 
				+ACCOUNT_NAME=${1:-"阿里多多酱"}
			
 
				+
			
 
				+ACCOUNT_DIR="data/账号/$ACCOUNT_NAME"
			
 
				+
			
 
				+if [ ! -d "$ACCOUNT_DIR" ]; then
			
 
				+    echo "错误: 账号目录不存在: $ACCOUNT_DIR"
			
 
				+    exit 1
			
 
				+fi
			
 
				+
			
 
				+echo "正在迁移账号: $ACCOUNT_NAME"
			
 
				+echo "账号目录: $ACCOUNT_DIR"
			
 
				+echo ""
			
 
				+
			
 
				+# 创建新的目录结构
			
 
				+echo "创建新目录结构..."
			
 
				+mkdir -p "$ACCOUNT_DIR/input"
			
 
				+mkdir -p "$ACCOUNT_DIR/output/intermediate"
			
 
				+mkdir -p "$ACCOUNT_DIR/output/how解构结果"
			
 
				+mkdir -p "$ACCOUNT_DIR/output/visualization"
			
 
				+
			
 
				+echo -e "${GREEN}✓${NC} 目录结构创建完成"
			
 
				+echo ""
			
 
				+
			
 
				+# 迁移数据
			
 
				+echo "迁移数据..."
			
 
				+
			
 
				+# 1. 迁移 what解构 (当前帖子)
			
 
				+if [ -d "$ACCOUNT_DIR/what解构" ] && [ ! -L "$ACCOUNT_DIR/what解构" ]; then
			
 
				+    echo "  移动: what解构 -> input/what解构"
			
 
				+    mv "$ACCOUNT_DIR/what解构" "$ACCOUNT_DIR/input/"
			
 
				+fi
			
 
				+
			
 
				+# 2. 迁移 what解构_所有帖子 (过去帖子)
			
 
				+if [ -d "$ACCOUNT_DIR/what解构_所有帖子" ] && [ ! -L "$ACCOUNT_DIR/what解构_所有帖子" ]; then
			
 
				+    echo "  移动: what解构_所有帖子 -> input/what解构_所有帖子"
			
 
				+    mv "$ACCOUNT_DIR/what解构_所有帖子" "$ACCOUNT_DIR/input/"
			
 
				+fi
			
 
				+
			
 
				+# 3. pattern 目录已经在正确位置，不需要移动
			
 
				+
			
 
				+echo -e "${GREEN}✓${NC} 数据迁移完成"
			
 
				+echo ""
			
 
				+
			
 
				+# 显示新的目录结构
			
 
				+echo "新的目录结构:"
			
 
				+echo "=========================================="
			
 
				+tree -L 3 -d "$ACCOUNT_DIR" 2>/dev/null || {
			
 
				+    echo "$ACCOUNT_DIR/"
			
 
				+    echo "├── input/"
			
 
				+    echo "│   ├── what解构/"
			
 
				+    echo "│   └── what解构_所有帖子/"
			
 
				+    echo "├── pattern/"
			
 
				+    echo "│   └── cluster/"
			
 
				+    echo "└── output/"
			
 
				+    echo "    ├── intermediate/"
			
 
				+    echo "    ├── how解构结果/"
			
 
				+    echo "    └── visualization/"
			
 
				+}
			
 
				+echo "=========================================="
			
 
				+echo ""
			
 
				+
			
 
				+echo -e "${GREEN}✓${NC} 迁移完成！"
			
 
				+echo ""
			
 
				+echo "提示："
			
 
				+echo "  - 原始数据现在在: $ACCOUNT_DIR/input/"
			
 
				+echo "  - 输出结果将保存到: $ACCOUNT_DIR/output/"
			
 
				+echo "  - pattern数据保持在: $ACCOUNT_DIR/pattern/"
			
--- a/script/data_processing/path_config.py
+++ b/script/data_processing/path_config.py
@@ -0,0 +1,256 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+路径配置管理工具
			
 
				+
			
 
				+提供统一的路径管理，支持多账号批量处理
			
 
				+"""
			
 
				+
			
 
				+import json
			
 
				+from pathlib import Path
			
 
				+from typing import Dict, Optional, List
			
 
				+import os
			
 
				+
			
 
				+
			
 
				+class PathConfig:
			
 
				+    """路径配置管理类"""
			
 
				+
			
 
				+    def __init__(self, account_name: Optional[str] = None):
			
 
				+        """
			
 
				+        初始化路径配置
			
 
				+
			
 
				+        Args:
			
 
				+            account_name: 账号名称，如果不指定则使用默认账号或环境变量
			
 
				+        """
			
 
				+        # 获取项目根目录
			
 
				+        self.project_root = Path(__file__).parent.parent.parent
			
 
				+        self.config_file = self.project_root / "config" / "accounts.json"
			
 
				+
			
 
				+        # 加载配置
			
 
				+        self._load_config()
			
 
				+
			
 
				+        # 确定账号名称
			
 
				+        self.account_name = self._determine_account_name(account_name)
			
 
				+
			
 
				+        # 构建路径
			
 
				+        account_base = self.config["paths"]["account_base"]
			
 
				+        self.account_dir = self.project_root / account_base / self.account_name
			
 
				+
			
 
				+    def _load_config(self):
			
 
				+        """加载配置文件"""
			
 
				+        if not self.config_file.exists():
			
 
				+            raise FileNotFoundError(f"配置文件不存在: {self.config_file}")
			
 
				+
			
 
				+        with open(self.config_file, "r", encoding="utf-8") as f:
			
 
				+            self.config = json.load(f)
			
 
				+
			
 
				+    def _determine_account_name(self, account_name: Optional[str]) -> str:
			
 
				+        """
			
 
				+        确定要使用的账号名称
			
 
				+
			
 
				+        优先级：
			
 
				+        1. 函数参数指定的账号名
			
 
				+        2. 环境变量 ACCOUNT_NAME
			
 
				+        3. 配置文件中的默认账号
			
 
				+
			
 
				+        Args:
			
 
				+            account_name: 参数指定的账号名
			
 
				+
			
 
				+        Returns:
			
 
				+            最终确定的账号名称
			
 
				+        """
			
 
				+        # 1. 参数指定
			
 
				+        if account_name:
			
 
				+            return account_name
			
 
				+
			
 
				+        # 2. 环境变量
			
 
				+        env_account = os.environ.get("ACCOUNT_NAME")
			
 
				+        if env_account:
			
 
				+            return env_account
			
 
				+
			
 
				+        # 3. 配置文件默认值
			
 
				+        default_account = self.config.get("default_account")
			
 
				+        if default_account:
			
 
				+            return default_account
			
 
				+
			
 
				+        # 4. 如果都没有，抛出错误
			
 
				+        raise ValueError(
			
 
				+            "未指定账号名称！请通过以下方式之一指定：\n"
			
 
				+            "1. 参数: PathConfig(account_name='账号名')\n"
			
 
				+            "2. 环境变量: export ACCOUNT_NAME='账号名'\n"
			
 
				+            "3. 配置文件: 在 config/accounts.json 中设置 default_account"
			
 
				+        )
			
 
				+
			
 
				+    def get_enabled_accounts(self) -> List[str]:
			
 
				+        """获取所有启用的账号列表"""
			
 
				+        accounts = self.config.get("accounts", [])
			
 
				+        return [acc["name"] for acc in accounts if acc.get("enabled", True)]
			
 
				+
			
 
				+    def get_all_accounts(self) -> List[str]:
			
 
				+        """获取所有账号列表（包括未启用的）"""
			
 
				+        accounts = self.config.get("accounts", [])
			
 
				+        return [acc["name"] for acc in accounts]
			
 
				+
			
 
				+    @property
			
 
				+    def filter_mode(self) -> str:
			
 
				+        """
			
 
				+        获取过滤模式
			
 
				+
			
 
				+        Returns:
			
 
				+            过滤模式名称：
			
 
				+            - "exclude_current_posts": 过滤当前帖子ID（默认，推荐）
			
 
				+            - "time_based": 基于时间过滤
			
 
				+            - "none": 不过滤
			
 
				+        """
			
 
				+        return self.config.get("filter_mode", "exclude_current_posts")
			
 
				+
			
 
				+    # ===== 输入路径 =====
			
 
				+
			
 
				+    @property
			
 
				+    def current_posts_dir(self) -> Path:
			
 
				+        """当前帖子what解构结果目录"""
			
 
				+        rel_path = self.config["paths"]["input"]["current_posts"]
			
 
				+        return self.account_dir / rel_path
			
 
				+
			
 
				+    @property
			
 
				+    def historical_posts_dir(self) -> Path:
			
 
				+        """过去帖子what解构结果目录"""
			
 
				+        rel_path = self.config["paths"]["input"]["historical_posts"]
			
 
				+        return self.account_dir / rel_path
			
 
				+
			
 
				+    @property
			
 
				+    def pattern_cluster_file(self) -> Path:
			
 
				+        """pattern聚合结果文件"""
			
 
				+        rel_path = self.config["paths"]["input"]["pattern_cluster"]
			
 
				+        return self.account_dir / rel_path
			
 
				+
			
 
				+    # ===== 输出路径 =====
			
 
				+
			
 
				+    @property
			
 
				+    def intermediate_dir(self) -> Path:
			
 
				+        """中间结果目录"""
			
 
				+        rel_path = self.config["paths"]["output"]["intermediate"]
			
 
				+        return self.account_dir / rel_path
			
 
				+
			
 
				+    @property
			
 
				+    def feature_category_mapping_file(self) -> Path:
			
 
				+        """特征名称_分类映射.json"""
			
 
				+        return self.intermediate_dir / "特征名称_分类映射.json"
			
 
				+
			
 
				+    @property
			
 
				+    def category_hierarchy_file(self) -> Path:
			
 
				+        """分类层级映射.json"""
			
 
				+        return self.intermediate_dir / "分类层级映射.json"
			
 
				+
			
 
				+    @property
			
 
				+    def feature_source_mapping_file(self) -> Path:
			
 
				+        """特征名称_帖子来源.json"""
			
 
				+        return self.intermediate_dir / "特征名称_帖子来源.json"
			
 
				+
			
 
				+    @property
			
 
				+    def task_list_file(self) -> Path:
			
 
				+        """当前帖子_解构任务列表.json"""
			
 
				+        return self.intermediate_dir / "当前帖子_解构任务列表.json"
			
 
				+
			
 
				+    @property
			
 
				+    def how_results_dir(self) -> Path:
			
 
				+        """how解构结果目录"""
			
 
				+        rel_path = self.config["paths"]["output"]["how_results"]
			
 
				+        return self.account_dir / rel_path
			
 
				+
			
 
				+    @property
			
 
				+    def visualization_dir(self) -> Path:
			
 
				+        """可视化结果目录"""
			
 
				+        rel_path = self.config["paths"]["output"]["visualization"]
			
 
				+        return self.account_dir / rel_path
			
 
				+
			
 
				+    @property
			
 
				+    def visualization_file(self) -> Path:
			
 
				+        """可视化HTML文件"""
			
 
				+        return self.visualization_dir / "how解构结果_可视化.html"
			
 
				+
			
 
				+    # ===== 工具方法 =====
			
 
				+
			
 
				+    def ensure_dirs(self):
			
 
				+        """确保所有输出目录存在"""
			
 
				+        self.intermediate_dir.mkdir(parents=True, exist_ok=True)
			
 
				+        self.how_results_dir.mkdir(parents=True, exist_ok=True)
			
 
				+        self.visualization_dir.mkdir(parents=True, exist_ok=True)
			
 
				+
			
 
				+    def validate_input_paths(self) -> Dict[str, bool]:
			
 
				+        """
			
 
				+        验证输入路径是否存在
			
 
				+
			
 
				+        Returns:
			
 
				+            验证结果字典
			
 
				+        """
			
 
				+        results = {
			
 
				+            "当前帖子目录": self.current_posts_dir.exists(),
			
 
				+            "过去帖子目录": self.historical_posts_dir.exists(),
			
 
				+            "pattern聚合文件": self.pattern_cluster_file.exists(),
			
 
				+        }
			
 
				+        return results
			
 
				+
			
 
				+    def print_paths(self):
			
 
				+        """打印所有路径信息（用于调试）"""
			
 
				+        print("="*60)
			
 
				+        print(f"账号: {self.account_name}")
			
 
				+        print(f"过滤模式: {self.filter_mode}")
			
 
				+        print(f"账号根目录: {self.account_dir}")
			
 
				+        print("\n输入路径:")
			
 
				+        print(f"  当前帖子目录: {self.current_posts_dir}")
			
 
				+        print(f"  过去帖子目录: {self.historical_posts_dir}")
			
 
				+        print(f"  pattern聚合文件: {self.pattern_cluster_file}")
			
 
				+        print("\n输出路径:")
			
 
				+        print(f"  中间结果目录: {self.intermediate_dir}")
			
 
				+        print(f"  how解构结果目录: {self.how_results_dir}")
			
 
				+        print(f"  可视化结果目录: {self.visualization_dir}")
			
 
				+        print("="*60)
			
 
				+
			
 
				+    def check_and_print_status(self):
			
 
				+        """检查并打印路径状态"""
			
 
				+        self.print_paths()
			
 
				+        print("\n输入路径验证:")
			
 
				+        validation = self.validate_input_paths()
			
 
				+        for name, exists in validation.items():
			
 
				+            status = "✓ 存在" if exists else "✗ 不存在"
			
 
				+            print(f"  {name}: {status}")
			
 
				+
			
 
				+        if not all(validation.values()):
			
 
				+            print("\n⚠️  警告: 部分输入路径不存在！")
			
 
				+            return False
			
 
				+        else:
			
 
				+            print("\n✓ 所有输入路径验证通过")
			
 
				+            return True
			
 
				+
			
 
				+
			
 
				+def get_path_config(account_name: Optional[str] = None) -> PathConfig:
			
 
				+    """
			
 
				+    获取路径配置对象（便捷函数）
			
 
				+
			
 
				+    Args:
			
 
				+        account_name: 账号名称，可选
			
 
				+
			
 
				+    Returns:
			
 
				+        PathConfig对象
			
 
				+    """
			
 
				+    return PathConfig(account_name)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    # 测试代码
			
 
				+    import sys
			
 
				+
			
 
				+    account = sys.argv[1] if len(sys.argv) > 1 else None
			
 
				+
			
 
				+    try:
			
 
				+        config = PathConfig(account)
			
 
				+        config.check_and_print_status()
			
 
				+
			
 
				+        print("\n所有启用的账号:")
			
 
				+        for acc in config.get_enabled_accounts():
			
 
				+            print(f"  - {acc}")
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        print(f"错误: {e}")
			
 
				+        sys.exit(1)
			
--- a/script/data_processing/run_all.sh
+++ b/script/data_processing/run_all.sh
@@ -21,10 +21,10 @@ cd "$PROJECT_ROOT"
 
				 
			
 
				 # 步骤1: 提取特征分类映射
			
 
				 echo "[1/5] 正在提取特征分类映射..."
			
 
				-echo "  输入: data/data_1118/过去帖子_pattern聚合结果.json"
			
 
				-echo "  输入: data/data_1118/当前帖子_what解构结果/*.json (时间阈值)"
			
 
				-echo "  输出: data/data_1118/特征名称_分类映射.json"
			
 
				-echo "  输出: data/data_1118/分类层级映射.json"
			
 
				+echo "  输入: data/账号/阿里多多酱/pattern/cluster/clustered_data.json"
			
 
				+echo "  输入: data/账号/阿里多多酱/what解构/*.json (时间阈值)"
			
 
				+echo "  输出: data/账号/阿里多多酱/特征名称_分类映射.json"
			
 
				+echo "  输出: data/账号/阿里多多酱/分类层级映射.json"
			
 
				 python script/data_processing/extract_feature_categories.py
			
 
				 if [ $? -eq 0 ]; then
			
 
				     echo "✓ 特征分类映射提取完成"
			
@@ -36,9 +36,9 @@ echo ""
 
				 
			
 
				 # 步骤2: 提取特征帖子来源
			
 
				 echo "[2/5] 正在提取特征帖子来源..."
			
 
				-echo "  输入: data/data_1118/过去帖子_what解构结果/*.json"
			
 
				-echo "  输入: data/data_1118/当前帖子_what解构结果/*.json (时间阈值)"
			
 
				-echo "  输出: data/data_1118/特征名称_帖子来源.json"
			
 
				+echo "  输入: data/账号/阿里多多酱/what解构_所有帖子/*.json"
			
 
				+echo "  输入: data/账号/阿里多多酱/what解构/*.json (时间阈值)"
			
 
				+echo "  输出: data/账号/阿里多多酱/特征名称_帖子来源.json"
			
 
				 python script/data_processing/extract_features_from_posts.py
			
 
				 if [ $? -eq 0 ]; then
			
 
				     echo "✓ 特征帖子来源提取完成"
			
@@ -50,8 +50,8 @@ echo ""
 
				 
			
 
				 # 步骤3: 提取当前帖子解构任务列表
			
 
				 echo "[3/5] 正在提取当前帖子解构任务列表..."
			
 
				-echo "  输入: data/data_1118/当前帖子_what解构结果/*.json"
			
 
				-echo "  输出: data/data_1118/当前帖子_解构任务列表.json"
			
 
				+echo "  输入: data/账号/阿里多多酱/what解构/*.json"
			
 
				+echo "  输出: data/账号/阿里多多酱/当前帖子_解构任务列表.json"
			
 
				 python script/data_processing/extract_current_posts.py
			
 
				 if [ $? -eq 0 ]; then
			
 
				     echo "✓ 当前帖子解构任务列表提取完成"
			
@@ -63,10 +63,10 @@ echo ""
 
				 
			
 
				 # 步骤4: 匹配灵感特征
			
 
				 echo "[4/5] 正在匹配灵感特征..."
			
 
				-echo "  输入: data/data_1118/当前帖子_解构任务列表.json"
			
 
				-echo "  输入: data/data_1118/特征名称_帖子来源.json"
			
 
				-echo "  输入: data/data_1118/特征名称_分类映射.json"
			
 
				-echo "  输出: data/data_1118/当前帖子_how解构结果/*_how.json"
			
 
				+echo "  输入: data/账号/阿里多多酱/当前帖子_解构任务列表.json"
			
 
				+echo "  输入: data/账号/阿里多多酱/特征名称_帖子来源.json"
			
 
				+echo "  输入: data/账号/阿里多多酱/特征名称_分类映射.json"
			
 
				+echo "  输出: data/账号/阿里多多酱/当前帖子_how解构结果/*_how.json"
			
 
				 python script/data_processing/match_inspiration_features.py
			
 
				 if [ $? -eq 0 ]; then
			
 
				     echo "✓ 灵感特征匹配完成"
			
@@ -78,10 +78,10 @@ echo ""
 
				 
			
 
				 # 步骤5: 可视化结果
			
 
				 echo "[5/5] 正在生成可视化结果..."
			
 
				-echo "  输入: data/data_1118/当前帖子_how解构结果/*.json"
			
 
				-echo "  输入: data/data_1118/特征名称_分类映射.json"
			
 
				-echo "  输入: data/data_1118/分类层级映射.json"
			
 
				-echo "  输出: data/data_1118/当前帖子_how解构结果_可视化.html"
			
 
				+echo "  输入: data/账号/阿里多多酱/当前帖子_how解构结果/*.json"
			
 
				+echo "  输入: data/账号/阿里多多酱/特征名称_分类映射.json"
			
 
				+echo "  输入: data/账号/阿里多多酱/分类层级映射.json"
			
 
				+echo "  输出: data/账号/阿里多多酱/当前帖子_how解构结果_可视化.html"
			
 
				 python script/data_processing/visualize_how_results.py
			
 
				 if [ $? -eq 0 ]; then
			
 
				     echo "✓ 可视化结果生成完成"
			
--- a/script/data_processing/run_all_with_config.sh
+++ b/script/data_processing/run_all_with_config.sh
@@ -0,0 +1,240 @@
 
				+#!/bin/bash
			
 
				+# 数据处理脚本执行脚本（支持多账号配置）
			
 
				+# 使用方式：
			
 
				+#   1. 处理单个账号: ./run_all_with_config.sh 阿里多多酱
			
 
				+#   2. 处理所有启用的账号: ./run_all_with_config.sh --all
			
 
				+#   3. 使用环境变量: ACCOUNT_NAME=阿里多多酱 ./run_all_with_config.sh
			
 
				+
			
 
				+set -e  # 遇到错误立即退出
			
 
				+
			
 
				+# 获取脚本所在目录的绝对路径
			
 
				+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
			
 
				+# 项目根目录
			
 
				+PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
			
 
				+
			
 
				+# 切换到项目根目录
			
 
				+cd "$PROJECT_ROOT"
			
 
				+
			
 
				+# 颜色定义
			
 
				+GREEN='\033[0;32m'
			
 
				+RED='\033[0;31m'
			
 
				+YELLOW='\033[1;33m'
			
 
				+BLUE='\033[0;34m'
			
 
				+NC='\033[0m' # No Color
			
 
				+
			
 
				+# 打印带颜色的消息
			
 
				+print_info() {
			
 
				+    echo -e "${BLUE}[INFO]${NC} $1"
			
 
				+}
			
 
				+
			
 
				+print_success() {
			
 
				+    echo -e "${GREEN}[SUCCESS]${NC} $1"
			
 
				+}
			
 
				+
			
 
				+print_error() {
			
 
				+    echo -e "${RED}[ERROR]${NC} $1"
			
 
				+}
			
 
				+
			
 
				+print_warning() {
			
 
				+    echo -e "${YELLOW}[WARNING]${NC} $1"
			
 
				+}
			
 
				+
			
 
				+# 处理单个账号的函数
			
 
				+process_account() {
			
 
				+    local account_name=$1
			
 
				+
			
 
				+    echo ""
			
 
				+    echo "=========================================="
			
 
				+    echo "处理账号: $account_name"
			
 
				+    echo "=========================================="
			
 
				+
			
 
				+    # 设置环境变量
			
 
				+    export ACCOUNT_NAME="$account_name"
			
 
				+
			
 
				+    # 检查路径配置
			
 
				+    print_info "检查路径配置..."
			
 
				+    python script/data_processing/path_config.py "$account_name"
			
 
				+    if [ $? -ne 0 ]; then
			
 
				+        print_error "路径配置检查失败，跳过账号: $account_name"
			
 
				+        return 1
			
 
				+    fi
			
 
				+
			
 
				+    echo ""
			
 
				+    print_info "开始处理数据..."
			
 
				+    echo ""
			
 
				+
			
 
				+    # 步骤1: 提取特征分类映射
			
 
				+    print_info "[1/5] 正在提取特征分类映射..."
			
 
				+    python script/data_processing/extract_feature_categories.py
			
 
				+    if [ $? -eq 0 ]; then
			
 
				+        print_success "特征分类映射提取完成"
			
 
				+    else
			
 
				+        print_error "特征分类映射提取失败"
			
 
				+        return 1
			
 
				+    fi
			
 
				+    echo ""
			
 
				+
			
 
				+    # 步骤2: 提取特征帖子来源
			
 
				+    print_info "[2/5] 正在提取特征帖子来源..."
			
 
				+    python script/data_processing/extract_features_from_posts.py
			
 
				+    if [ $? -eq 0 ]; then
			
 
				+        print_success "特征帖子来源提取完成"
			
 
				+    else
			
 
				+        print_error "特征帖子来源提取失败"
			
 
				+        return 1
			
 
				+    fi
			
 
				+    echo ""
			
 
				+
			
 
				+    # 步骤3: 提取当前帖子解构任务列表
			
 
				+    print_info "[3/5] 正在提取当前帖子解构任务列表..."
			
 
				+    python script/data_processing/extract_current_posts.py
			
 
				+    if [ $? -eq 0 ]; then
			
 
				+        print_success "当前帖子解构任务列表提取完成"
			
 
				+    else
			
 
				+        print_error "当前帖子解构任务列表提取失败"
			
 
				+        return 1
			
 
				+    fi
			
 
				+    echo ""
			
 
				+
			
 
				+    # 步骤4: 匹配灵感特征
			
 
				+    print_info "[4/5] 正在匹配灵感特征..."
			
 
				+    python script/data_processing/match_inspiration_features.py
			
 
				+    if [ $? -eq 0 ]; then
			
 
				+        print_success "灵感特征匹配完成"
			
 
				+    else
			
 
				+        print_error "灵感特征匹配失败"
			
 
				+        return 1
			
 
				+    fi
			
 
				+    echo ""
			
 
				+
			
 
				+    # 步骤5: 可视化结果
			
 
				+    print_info "[5/5] 正在生成可视化结果..."
			
 
				+    python script/data_processing/visualize_how_results.py
			
 
				+    if [ $? -eq 0 ]; then
			
 
				+        print_success "可视化结果生成完成"
			
 
				+    else
			
 
				+        print_error "可视化结果生成失败"
			
 
				+        return 1
			
 
				+    fi
			
 
				+    echo ""
			
 
				+
			
 
				+    print_success "账号 $account_name 处理完成！"
			
 
				+    return 0
			
 
				+}
			
 
				+
			
 
				+# 获取所有启用的账号
			
 
				+get_enabled_accounts() {
			
 
				+    python -c "
			
 
				+import json
			
 
				+from pathlib import Path
			
 
				+config_file = Path('config/accounts.json')
			
 
				+with open(config_file) as f:
			
 
				+    config = json.load(f)
			
 
				+accounts = [acc['name'] for acc in config.get('accounts', []) if acc.get('enabled', True)]
			
 
				+print(' '.join(accounts))
			
 
				+"
			
 
				+}
			
 
				+
			
 
				+# 获取默认账号
			
 
				+get_default_account() {
			
 
				+    python -c "
			
 
				+import json
			
 
				+from pathlib import Path
			
 
				+config_file = Path('config/accounts.json')
			
 
				+with open(config_file) as f:
			
 
				+    config = json.load(f)
			
 
				+default = config.get('default_account', '')
			
 
				+print(default)
			
 
				+"
			
 
				+}
			
 
				+
			
 
				+# 主逻辑
			
 
				+main() {
			
 
				+    echo "=========================================="
			
 
				+    echo "数据处理流程"
			
 
				+    echo "项目根目录: $PROJECT_ROOT"
			
 
				+    echo "=========================================="
			
 
				+
			
 
				+    # 解析参数
			
 
				+    if [ "$1" == "--all" ]; then
			
 
				+        # 处理所有启用的账号
			
 
				+        print_info "处理所有启用的账号..."
			
 
				+        accounts=$(get_enabled_accounts)
			
 
				+
			
 
				+        if [ -z "$accounts" ]; then
			
 
				+            print_error "没有找到启用的账号"
			
 
				+            exit 1
			
 
				+        fi
			
 
				+
			
 
				+        print_info "找到启用的账号: $accounts"
			
 
				+
			
 
				+        success_count=0
			
 
				+        fail_count=0
			
 
				+
			
 
				+        for account in $accounts; do
			
 
				+            if process_account "$account"; then
			
 
				+                ((success_count++))
			
 
				+            else
			
 
				+                ((fail_count++))
			
 
				+            fi
			
 
				+        done
			
 
				+
			
 
				+        echo ""
			
 
				+        echo "=========================================="
			
 
				+        print_info "批量处理完成"
			
 
				+        print_success "成功: $success_count 个账号"
			
 
				+        if [ $fail_count -gt 0 ]; then
			
 
				+            print_error "失败: $fail_count 个账号"
			
 
				+        fi
			
 
				+        echo "=========================================="
			
 
				+
			
 
				+        if [ $fail_count -gt 0 ]; then
			
 
				+            exit 1
			
 
				+        fi
			
 
				+
			
 
				+    elif [ -n "$1" ]; then
			
 
				+        # 处理指定的账号
			
 
				+        process_account "$1"
			
 
				+
			
 
				+    elif [ -n "$ACCOUNT_NAME" ]; then
			
 
				+        # 使用环境变量指定的账号
			
 
				+        print_info "使用环境变量 ACCOUNT_NAME=$ACCOUNT_NAME"
			
 
				+        process_account "$ACCOUNT_NAME"
			
 
				+
			
 
				+    else
			
 
				+        # 尝试使用默认账号
			
 
				+        default_account=$(get_default_account)
			
 
				+        if [ -n "$default_account" ]; then
			
 
				+            print_info "使用默认账号: $default_account"
			
 
				+            process_account "$default_account"
			
 
				+            exit $?
			
 
				+        fi
			
 
				+
			
 
				+        # 显示用法
			
 
				+        echo "用法:"
			
 
				+        echo "  1. 处理单个账号:"
			
 
				+        echo "     $0 <账号名>"
			
 
				+        echo "     例如: $0 阿里多多酱"
			
 
				+        echo ""
			
 
				+        echo "  2. 处理所有启用的账号:"
			
 
				+        echo "     $0 --all"
			
 
				+        echo ""
			
 
				+        echo "  3. 使用环境变量:"
			
 
				+        echo "     ACCOUNT_NAME=阿里多多酱 $0"
			
 
				+        echo ""
			
 
				+
			
 
				+        # 显示可用账号
			
 
				+        accounts=$(get_enabled_accounts)
			
 
				+        if [ -n "$accounts" ]; then
			
 
				+            echo "当前启用的账号:"
			
 
				+            for account in $accounts; do
			
 
				+                echo "  - $account"
			
 
				+            done
			
 
				+        fi
			
 
				+
			
 
				+        exit 1
			
 
				+    fi
			
 
				+}
			
 
				+
			
 
				+# 运行主函数
			
 
				+main "$@"
			
--- a/script/data_processing/visualize_how_results.py
+++ b/script/data_processing/visualize_how_results.py
@@ -19,6 +19,8 @@ import html as html_module
 
				 project_root = Path(__file__).parent.parent.parent
			
 
				 sys.path.insert(0, str(project_root))
			
 
				 
			
 
				+from script.data_processing.path_config import PathConfig
			
 
				+
			
 
				 
			
 
				 # 注意：已改用基于相似度的显示方式，不再使用关系类型
			
 
				 # def get_relation_color(relation: str) -> str:
			
@@ -274,11 +276,9 @@ def generate_inspiration_detail_html(inspiration_point: Dict, feature_status_map
 
				     return html
			
 
				 
			
 
				 
			
 
				-def load_feature_category_mapping() -> Dict:
			
 
				+def load_feature_category_mapping(config: PathConfig) -> Dict:
			
 
				     """加载特征名称到分类的映射"""
			
 
				-    script_dir = Path(__file__).parent
			
 
				-    project_root = script_dir.parent.parent
			
 
				-    mapping_file = project_root / "data" / "data_1118" / "特征名称_分类映射.json"
			
 
				+    mapping_file = config.feature_category_mapping_file
			
 
				 
			
 
				     try:
			
 
				         with open(mapping_file, "r", encoding="utf-8") as f:
			
@@ -288,11 +288,9 @@ def load_feature_category_mapping() -> Dict:
 
				         return {}
			
 
				 
			
 
				 
			
 
				-def load_feature_source_mapping() -> Dict:
			
 
				+def load_feature_source_mapping(config: PathConfig) -> Dict:
			
 
				     """加载特征名称到帖子来源的映射"""
			
 
				-    script_dir = Path(__file__).parent
			
 
				-    project_root = script_dir.parent.parent
			
 
				-    mapping_file = project_root / "data" / "data_1118" / "特征名称_帖子来源.json"
			
 
				+    mapping_file = config.feature_source_mapping_file
			
 
				 
			
 
				     try:
			
 
				         with open(mapping_file, "r", encoding="utf-8") as f:
			
@@ -3696,23 +3694,31 @@ def minify_html(html: str) -> str:
 
				 
			
 
				 def main():
			
 
				     """主函数"""
			
 
				-    script_dir = Path(__file__).parent
			
 
				-    project_root = script_dir.parent.parent
			
 
				-    data_dir = project_root / "data" / "data_1118"
			
 
				+    # 使用路径配置
			
 
				+    config = PathConfig()
			
 
				+
			
 
				+    # 确保输出目录存在
			
 
				+    config.ensure_dirs()
			
 
				+
			
 
				+    # 获取路径
			
 
				+    input_dir = config.how_results_dir
			
 
				+    output_file = config.visualization_file
			
 
				 
			
 
				-    input_dir = data_dir / "当前帖子_how解构结果"
			
 
				-    output_file = data_dir / "当前帖子_how解构结果_可视化.html"
			
 
				+    print(f"账号: {config.account_name}")
			
 
				+    print(f"输入目录: {input_dir}")
			
 
				+    print(f"输出文件: {output_file}")
			
 
				+    print()
			
 
				 
			
 
				     print(f"读取 how 解构结果: {input_dir}")
			
 
				 
			
 
				     # 加载特征分类映射
			
 
				     print(f"加载特征分类映射...")
			
 
				-    category_mapping = load_feature_category_mapping()
			
 
				+    category_mapping = load_feature_category_mapping(config)
			
 
				     print(f"已加载 {sum(len(v) for v in category_mapping.values())} 个特征分类")
			
 
				 
			
 
				     # 加载特征来源映射
			
 
				     print(f"加载特征来源映射...")
			
 
				-    source_mapping = load_feature_source_mapping()
			
 
				+    source_mapping = load_feature_source_mapping(config)
			
 
				     print(f"已加载 {len(source_mapping)} 个特征的来源信息")
			
 
				 
			
 
				     json_files = list(input_dir.glob("*_how.json"))