2 주 전 · 745748c4f2
--- a/README_索引构建.md
+++ b/README_索引构建.md
@@ -0,0 +1,435 @@
 
				+# 人设数据索引构建工具
			
 
				+
			
 
				+本目录包含三个索引构建脚本，用于从人设数据和 what 解构结果中构建不同维度的索引，方便快速查询和分析。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 快速开始
			
 
				+
			
 
				+### 一键构建所有索引
			
 
				+
			
 
				+```bash
			
 
				+# 构建所有三个索引文件
			
 
				+python build_category_index.py --persona-dir data/阿里多多酱/out/人设_1110
			
 
				+python build_point_to_note_index.py --what-dir data/阿里多多酱/out/人设_1110/what解构结果
			
 
				+python build_note_to_all_index.py --persona-dir data/阿里多多酱/out/人设_1110
			
 
				+```
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 索引文件说明
			
 
				+
			
 
				+### 1. 分类索引_完整.json
			
 
				+
			
 
				+**脚本**: `build_category_index.py`
			
 
				+
			
 
				+**用途**: 按**分类维度**组织帖子（灵感分类、目的分类、关键点分类）
			
 
				+
			
 
				+**数据来源**:
			
 
				+- 灵感分类：来自 `人设.json`
			
 
				+- 目的分类：来自 `what解构结果/`
			
 
				+- 关键点分类：来自 `what解构结果/`
			
 
				+
			
 
				+**结构**:
			
 
				+```json
			
 
				+{
			
 
				+  "灵感分类": {
			
 
				+    "职场疲惫共鸣": {
			
 
				+      "分类层级": "二级分类",
			
 
				+      "分类名称": "职场疲惫共鸣",
			
 
				+      "分类定义": "关于工作压力、倦怠及身份认同的普遍情绪宣泄。",
			
 
				+      "分类路径": [
			
 
				+        {"视角名称": "灵感触发源", "视角描述": "..."},
			
 
				+        {"分类名称": "个人感受与思考", "分类定义": "..."},
			
 
				+        {"分类名称": "职场疲惫共鸣", "分类定义": "..."}
			
 
				+      ],
			
 
				+      "帖子ID列表": ["68d1ebb8...", "68d00894..."],
			
 
				+      "帖子详情列表": [{...}, {...}]
			
 
				+    }
			
 
				+  },
			
 
				+  "目的分类": {...},
			
 
				+  "关键点分类": {...}
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+**使用场景**:
			
 
				+- 查看某个分类下的所有帖子
			
 
				+- 了解分类的层级结构和定义
			
 
				+- 按分类维度分析内容
			
 
				+
			
 
				+**统计**:
			
 
				+- 灵感分类：15 个
			
 
				+- 目的分类：45 个
			
 
				+- 关键点分类：60 个
			
 
				+
			
 
				+---
			
 
				+
			
 
				+### 2. 点到帖子映射.json
			
 
				+
			
 
				+**脚本**: `build_point_to_note_index.py`
			
 
				+
			
 
				+**用途**: 按**具体点**组织帖子，提供双向映射（点→帖子，帖子→点）
			
 
				+
			
 
				+**数据来源**: 全部来自 `what解构结果/`
			
 
				+
			
 
				+**结构**:
			
 
				+```json
			
 
				+{
			
 
				+  "点到帖子映射": {
			
 
				+    "灵感点": {
			
 
				+      "职场投入产出不平衡": {
			
 
				+        "灵感点": "职场投入产出不平衡",
			
 
				+        "维度": "职场观察",
			
 
				+        "描述": "观察到职场中普遍存在的现象...",
			
 
				+        "帖子ID列表": ["68d00894..."],
			
 
				+        "帖子详情列表": [{...}]
			
 
				+      }
			
 
				+    },
			
 
				+    "目的点": {...},
			
 
				+    "关键点": {...}
			
 
				+  },
			
 
				+  "帖子到点映射": {
			
 
				+    "68d00894...": {
			
 
				+      "灵感点列表": [
			
 
				+        {
			
 
				+          "灵感点": "职场投入产出不平衡",
			
 
				+          "来源字段": "共性差异",
			
 
				+          "维度": "职场观察",
			
 
				+          "描述": "..."
			
 
				+        }
			
 
				+      ],
			
 
				+      "目的点列表": [...],
			
 
				+      "关键点列表": [...],
			
 
				+      "帖子详情": {...}
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+**使用场景**:
			
 
				+- 查看某个具体灵感点/目的点/关键点的所有帖子
			
 
				+- 反查：某个帖子有哪些点
			
 
				+- 分析点与帖子的关联关系
			
 
				+
			
 
				+**统计**:
			
 
				+- 灵感点：46 个（关联 20 个帖子）
			
 
				+- 目的点：45 个（关联 19 个帖子）
			
 
				+- 关键点：60 个（关联 19 个帖子）
			
 
				+- 平均每个帖子：2.3 个灵感点、2.2 个目的点、3.2 个关键点
			
 
				+
			
 
				+---
			
 
				+
			
 
				+### 3. 帖子到分类和点映射.json
			
 
				+
			
 
				+**脚本**: `build_note_to_all_index.py`
			
 
				+
			
 
				+**用途**: 按**帖子ID**组织，包含每个帖子的完整信息（分类 + 点 + 详情）
			
 
				+
			
 
				+**数据来源**:
			
 
				+- 分类信息：来自 `人设.json`
			
 
				+- 点信息：来自 `what解构结果/`
			
 
				+- 帖子详情：通过详情接口获取
			
 
				+
			
 
				+**结构**:
			
 
				+```json
			
 
				+{
			
 
				+  "68d00894...": {
			
 
				+    "帖子ID": "68d00894...",
			
 
				+    "所属分类": [
			
 
				+      {
			
 
				+        "分类类型": "灵感分类",
			
 
				+        "视角名称": "灵感触发源",
			
 
				+        "一级分类": "个人感受与思考",
			
 
				+        "二级分类": "职场疲惫共鸣",
			
 
				+        "分类定义": "..."
			
 
				+      }
			
 
				+    ],
			
 
				+    "灵感点列表": [
			
 
				+      {
			
 
				+        "灵感点": "职场投入产出不平衡",
			
 
				+        "来源字段": "共性差异",
			
 
				+        "维度": "职场观察",
			
 
				+        "描述": "..."
			
 
				+      }
			
 
				+    ],
			
 
				+    "目的点列表": [...],
			
 
				+    "关键点列表": [...],
			
 
				+    "帖子详情": {
			
 
				+      "channel_content_id": "68d00894...",
			
 
				+      "title": "上班要显贵 就不能太基础",
			
 
				+      "body_text": "...",
			
 
				+      "like_count": 768,
			
 
				+      "images": [...],
			
 
				+      "content_type": "normal",
			
 
				+      "publish_time": "2025-09-22 10:01:05"
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+**使用场景**:
			
 
				+- 通过帖子ID快速获取完整信息
			
 
				+- 查看某个帖子属于哪些分类
			
 
				+- 查看某个帖子有哪些点
			
 
				+- 一站式获取帖子的所有元数据
			
 
				+
			
 
				+**统计**:
			
 
				+- 帖子总数：20
			
 
				+- 有分类信息的帖子：17 个
			
 
				+- 有灵感点信息的帖子：20 个
			
 
				+- 有目的点信息的帖子：19 个
			
 
				+- 有关键点信息的帖子：19 个
			
 
				+- 平均每个帖子：2.0 个分类、2.3 个灵感点、2.2 个目的点、3.2 个关键点
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 使用指南
			
 
				+
			
 
				+### 脚本 1: build_category_index.py
			
 
				+
			
 
				+构建按分类维度组织的索引。
			
 
				+
			
 
				+**基本使用**:
			
 
				+```bash
			
 
				+python build_category_index.py --persona-dir data/阿里多多酱/out/人设_1110
			
 
				+```
			
 
				+
			
 
				+**参数说明**:
			
 
				+- `--persona-dir`: 人设目录路径（必填）
			
 
				+- `--output`: 输出文件路径（可选，默认: `{persona_dir}/分类索引_完整.json`）
			
 
				+- `--no-details`: 不获取帖子详情（可选，只构建索引结构）
			
 
				+
			
 
				+**示例**:
			
 
				+```bash
			
 
				+# 基本使用（包含帖子详情）
			
 
				+python build_category_index.py --persona-dir data/阿里多多酱/out/人设_1110
			
 
				+
			
 
				+# 只构建索引结构，不获取详情（速度快）
			
 
				+python build_category_index.py --persona-dir data/阿里多多酱/out/人设_1110 --no-details
			
 
				+
			
 
				+# 自定义输出文件
			
 
				+python build_category_index.py --persona-dir data/阿里多多酱/out/人设_1110 --output my_index.json
			
 
				+```
			
 
				+
			
 
				+---
			
 
				+
			
 
				+### 脚本 2: build_point_to_note_index.py
			
 
				+
			
 
				+构建点到帖子的双向映射索引。
			
 
				+
			
 
				+**基本使用**:
			
 
				+```bash
			
 
				+python build_point_to_note_index.py --what-dir data/阿里多多酱/out/人设_1110/what解构结果
			
 
				+```
			
 
				+
			
 
				+**参数说明**:
			
 
				+- `--what-dir`: what解构结果目录路径（必填）
			
 
				+- `--output`: 输出文件路径（可选，默认: `{what_dir}/../点到帖子映射.json`）
			
 
				+- `--no-details`: 不获取帖子详情（可选）
			
 
				+
			
 
				+**示例**:
			
 
				+```bash
			
 
				+# 基本使用
			
 
				+python build_point_to_note_index.py --what-dir data/阿里多多酱/out/人设_1110/what解构结果
			
 
				+
			
 
				+# 只构建索引结构
			
 
				+python build_point_to_note_index.py --what-dir data/阿里多多酱/out/人设_1110/what解构结果 --no-details
			
 
				+
			
 
				+# 自定义输出文件
			
 
				+python build_point_to_note_index.py --what-dir data/阿里多多酱/out/人设_1110/what解构结果 --output custom.json
			
 
				+```
			
 
				+
			
 
				+---
			
 
				+
			
 
				+### 脚本 3: build_note_to_all_index.py
			
 
				+
			
 
				+构建帖子ID到分类和点的完整映射。
			
 
				+
			
 
				+**基本使用**:
			
 
				+```bash
			
 
				+python build_note_to_all_index.py --persona-dir data/阿里多多酱/out/人设_1110
			
 
				+```
			
 
				+
			
 
				+**参数说明**:
			
 
				+- `--persona-dir`: 人设目录路径（必填）
			
 
				+- `--output`: 输出文件路径（可选，默认: `{persona_dir}/帖子到分类和点映射.json`）
			
 
				+- `--no-details`: 不获取帖子详情（可选）
			
 
				+
			
 
				+**示例**:
			
 
				+```bash
			
 
				+# 基本使用
			
 
				+python build_note_to_all_index.py --persona-dir data/阿里多多酱/out/人设_1110
			
 
				+
			
 
				+# 只构建索引结构
			
 
				+python build_note_to_all_index.py --persona-dir data/阿里多多酱/out/人设_1110 --no-details
			
 
				+
			
 
				+# 自定义输出文件
			
 
				+python build_note_to_all_index.py --persona-dir data/阿里多多酱/out/人设_1110 --output custom.json
			
 
				+```
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 查询场景对比
			
 
				+
			
 
				+| 查询需求 | 使用哪个索引 | 示例查询 |
			
 
				+|---------|-------------|---------|
			
 
				+| 查看某个**分类**下的所有帖子 | `分类索引_完整.json` | "职场疲惫共鸣"分类有哪些帖子？ |
			
 
				+| 查看某个**具体点**的所有帖子 | `点到帖子映射.json` → 点到帖子映射 | "职场投入产出不平衡"这个灵感点有哪些帖子？ |
			
 
				+| 查看某个**帖子**的所有分类 | `帖子到分类和点映射.json` | 帖子 68d00894 属于哪些分类？ |
			
 
				+| 查看某个**帖子**的所有点 | `帖子到分类和点映射.json` | 帖子 68d00894 有哪些灵感点/目的点/关键点？ |
			
 
				+| 反查：帖子有哪些点 | `点到帖子映射.json` → 帖子到点映射 | 帖子 68d00894 有哪些点？ |
			
 
				+| 获取帖子的完整信息 | `帖子到分类和点映射.json` | 帖子 68d00894 的详情、分类、点是什么？ |
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 数据流程图
			
 
				+
			
 
				+```
			
 
				+人设.json
			
 
				+  ↓
			
 
				+build_category_index.py  →  分类索引_完整.json
			
 
				+  (灵感分类)
			
 
				+
			
 
				+what解构结果/
			
 
				+  ↓
			
 
				+build_category_index.py  →  分类索引_完整.json
			
 
				+  (目的分类 + 关键点分类)
			
 
				+  ↓
			
 
				+build_point_to_note_index.py  →  点到帖子映射.json
			
 
				+  (灵感点 + 目的点 + 关键点)
			
 
				+
			
 
				+人设.json + what解构结果/
			
 
				+  ↓
			
 
				+build_note_to_all_index.py  →  帖子到分类和点映射.json
			
 
				+  (完整映射)
			
 
				+```
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 核心特性
			
 
				+
			
 
				+### 1. 自动缓存
			
 
				+
			
 
				+所有脚本都会利用帖子详情的缓存机制：
			
 
				+- 已获取过的帖子详情会自动使用缓存
			
 
				+- 只有新帖子才会调用详情接口
			
 
				+- 大大提升重复运行的速度
			
 
				+
			
 
				+### 2. 数据去重
			
 
				+
			
 
				+- 帖子ID自动去重
			
 
				+- 图片URL自动去重（保持顺序）
			
 
				+- 确保数据一致性
			
 
				+
			
 
				+### 3. Null处理
			
 
				+
			
 
				+- 不存在的字段统一使用 `null`（而非空字符串或 0）
			
 
				+- 保持数据结构的一致性和可靠性
			
 
				+
			
 
				+### 4. 时间格式化
			
 
				+
			
 
				+- 自动将时间戳转换为可读格式
			
 
				+- `publish_timestamp`: 毫秒时间戳
			
 
				+- `publish_time`: "YYYY-MM-DD HH:MM:SS"
			
 
				+
			
 
				+### 5. 内容类型判断
			
 
				+
			
 
				+- 根据 `video` 字段自动判断
			
 
				+- 有视频: `content_type = "video"`
			
 
				+- 无视频: `content_type = "normal"`
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 常见问题
			
 
				+
			
 
				+### Q1: 三个索引文件有什么区别？
			
 
				+
			
 
				+**A**: 它们从不同维度组织相同的数据：
			
 
				+- **分类索引**: 按分类维度（灵感/目的/关键点分类）
			
 
				+- **点到帖子映射**: 按具体点（具体的灵感点/目的点/关键点）
			
 
				+- **帖子到分类和点映射**: 按帖子ID（一站式获取所有信息）
			
 
				+
			
 
				+### Q2: 为什么需要三个不同的索引？
			
 
				+
			
 
				+**A**: 不同的查询场景需要不同的数据组织方式：
			
 
				+- 分析分类 → 用分类索引
			
 
				+- 分析具体点 → 用点到帖子映射
			
 
				+- 查看帖子完整信息 → 用帖子到分类和点映射
			
 
				+
			
 
				+### Q3: 如何更新索引？
			
 
				+
			
 
				+**A**: 重新运行对应的脚本即可：
			
 
				+```bash
			
 
				+# 更新所有索引
			
 
				+python build_category_index.py --persona-dir data/阿里多多酱/out/人设_1110
			
 
				+python build_point_to_note_index.py --what-dir data/阿里多多酱/out/人设_1110/what解构结果
			
 
				+python build_note_to_all_index.py --persona-dir data/阿里多多酱/out/人设_1110
			
 
				+```
			
 
				+
			
 
				+### Q4: `--no-details` 什么时候使用？
			
 
				+
			
 
				+**A**: 适用于以下场景：
			
 
				+- 只需要索引结构，不需要帖子详情
			
 
				+- 快速验证索引构建是否正确
			
 
				+- 减少API调用（开发测试时）
			
 
				+
			
 
				+### Q5: 帖子详情从哪里来？
			
 
				+
			
 
				+**A**: 通过 `script.detail.get_xiaohongshu_detail()` 接口获取：
			
 
				+- 自动使用缓存（如果已获取过）
			
 
				+- 自动保存到 `data/detail/xiaohongshu_detail/{note_id}/`
			
 
				+- 包含 raw 和 clean 两种格式
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 技术细节
			
 
				+
			
 
				+### 数据来源
			
 
				+
			
 
				+| 数据类型 | 来源文件 | 提取字段 |
			
 
				+|---------|---------|---------|
			
 
				+| 灵感分类 | `人设.json` | `灵感点列表` → `模式列表` → `二级细分` |
			
 
				+| 目的分类 | `what解构结果/*.json` | `三点解构.目的点.main_purpose` / `secondary_purposes` |
			
 
				+| 关键点分类 | `what解构结果/*.json` | `三点解构.关键点.key_points` |
			
 
				+| 具体灵感点 | `what解构结果/*.json` | `三点解构.灵感点.{全新内容,共性差异,共性内容}` |
			
 
				+| 具体目的点 | `what解构结果/*.json` | `三点解构.目的点.main_purpose` / `secondary_purposes` |
			
 
				+| 具体关键点 | `what解构结果/*.json` | `三点解构.关键点.key_points` |
			
 
				+| 帖子详情 | 详情接口 | `get_xiaohongshu_detail(note_id)` |
			
 
				+
			
 
				+### 依赖
			
 
				+
			
 
				+```python
			
 
				+from script.detail import get_xiaohongshu_detail
			
 
				+```
			
 
				+
			
 
				+确保 `script/detail/` 模块可用。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 输出文件位置
			
 
				+
			
 
				+默认输出位置：
			
 
				+```
			
 
				+data/阿里多多酱/out/人设_1110/
			
 
				+├── 人设.json                    # 输入
			
 
				+├── what解构结果/                 # 输入
			
 
				+├── 分类索引_完整.json            # 输出1
			
 
				+├── 点到帖子映射.json             # 输出2
			
 
				+└── 帖子到分类和点映射.json       # 输出3
			
 
				+```
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 版本历史
			
 
				+
			
 
				+- **v1.0** (2025-11-13): 初始版本
			
 
				+  - 支持三种索引构建
			
 
				+  - 自动获取帖子详情
			
 
				+  - 支持缓存机制
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 相关文档
			
 
				+
			
 
				+- [搜索模块 README](script/search/README.md)
			
 
				+- [详情模块 README](script/detail/README.md)
			
--- a/build_category_index.py
+++ b/build_category_index.py
@@ -0,0 +1,415 @@
 
				+"""
			
 
				+构建人设分类的反向索引
			
 
				+
			
 
				+将人设数据和what解构数据转换为以分类名称为键的反向索引结构，包含：
			
 
				+- 灵感分类（来自人设.json）
			
 
				+- 目的分类（来自what解构）
			
 
				+- 关键点分类（来自what解构）
			
 
				+
			
 
				+使用方式:
			
 
				+    python build_category_index.py --persona-dir data/阿里多多酱/out/人设_1110
			
 
				+"""
			
 
				+import os
			
 
				+import json
			
 
				+import argparse
			
 
				+from typing import Dict, List, Any
			
 
				+from glob import glob
			
 
				+from script.detail import get_xiaohongshu_detail
			
 
				+
			
 
				+
			
 
				+def build_inspiration_index(persona_data: Dict[str, Any]) -> Dict[str, Any]:
			
 
				+    """构建灵感点索引
			
 
				+
			
 
				+    Args:
			
 
				+        persona_data: 人设数据（包含灵感点列表）
			
 
				+
			
 
				+    Returns:
			
 
				+        灵感分类索引
			
 
				+    """
			
 
				+    index = {}
			
 
				+
			
 
				+    # 遍历所有视角
			
 
				+    for perspective in persona_data.get("灵感点列表", []):
			
 
				+        perspective_name = perspective.get("视角名称", "")
			
 
				+        perspective_desc = perspective.get("视角描述", "")
			
 
				+
			
 
				+        # 遍历一级分类（模式列表）
			
 
				+        for category_l1 in perspective.get("模式列表", []):
			
 
				+            category_l1_name = category_l1.get("分类名称", "")
			
 
				+            category_l1_def = category_l1.get("核心定义", "")
			
 
				+
			
 
				+            # 收集一级分类下所有二级分类的帖子
			
 
				+            category_l1_note_ids = set()
			
 
				+
			
 
				+            # 遍历二级分类（二级细分）
			
 
				+            for category_l2 in category_l1.get("二级细分", []):
			
 
				+                category_l2_name = category_l2.get("分类名称", "")
			
 
				+                category_l2_def = category_l2.get("分类定义", "")
			
 
				+                note_ids = category_l2.get("帖子ID列表", [])
			
 
				+
			
 
				+                # 去重帖子ID
			
 
				+                unique_note_ids = list(dict.fromkeys(note_ids))
			
 
				+
			
 
				+                # 添加到一级分类的帖子集合
			
 
				+                category_l1_note_ids.update(unique_note_ids)
			
 
				+
			
 
				+                # 构建二级分类路径
			
 
				+                category_l2_path = [
			
 
				+                    {
			
 
				+                        "视角名称": perspective_name,
			
 
				+                        "视角描述": perspective_desc
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "分类名称": category_l1_name,
			
 
				+                        "分类定义": category_l1_def
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "分类名称": category_l2_name,
			
 
				+                        "分类定义": category_l2_def
			
 
				+                    }
			
 
				+                ]
			
 
				+
			
 
				+                # 如果二级分类已存在，合并帖子列表
			
 
				+                if category_l2_name in index:
			
 
				+                    existing_ids = set(index[category_l2_name]["帖子ID列表"])
			
 
				+                    new_ids = set(unique_note_ids)
			
 
				+                    index[category_l2_name]["帖子ID列表"] = list(existing_ids | new_ids)
			
 
				+                else:
			
 
				+                    # 创建新的二级分类索引
			
 
				+                    index[category_l2_name] = {
			
 
				+                        "分类层级": "二级分类",
			
 
				+                        "分类名称": category_l2_name,
			
 
				+                        "分类定义": category_l2_def,
			
 
				+                        "分类路径": category_l2_path,
			
 
				+                        "帖子ID列表": unique_note_ids
			
 
				+                    }
			
 
				+
			
 
				+            # 构建一级分类路径
			
 
				+            category_l1_path = [
			
 
				+                {
			
 
				+                    "视角名称": perspective_name,
			
 
				+                    "视角描述": perspective_desc
			
 
				+                },
			
 
				+                {
			
 
				+                    "分类名称": category_l1_name,
			
 
				+                    "分类定义": category_l1_def
			
 
				+                }
			
 
				+            ]
			
 
				+
			
 
				+            # 添加一级分类索引
			
 
				+            if category_l1_name in index:
			
 
				+                existing_ids = set(index[category_l1_name]["帖子ID列表"])
			
 
				+                index[category_l1_name]["帖子ID列表"] = list(existing_ids | category_l1_note_ids)
			
 
				+            else:
			
 
				+                index[category_l1_name] = {
			
 
				+                    "分类层级": "一级分类",
			
 
				+                    "分类名称": category_l1_name,
			
 
				+                    "分类定义": category_l1_def,
			
 
				+                    "分类路径": category_l1_path,
			
 
				+                    "帖子ID列表": list(category_l1_note_ids)
			
 
				+                }
			
 
				+
			
 
				+    return index
			
 
				+
			
 
				+
			
 
				+def build_purpose_index(what_dir: str) -> Dict[str, Any]:
			
 
				+    """构建目的点索引
			
 
				+
			
 
				+    Args:
			
 
				+        what_dir: what解构结果目录路径
			
 
				+
			
 
				+    Returns:
			
 
				+        目的分类索引
			
 
				+    """
			
 
				+    index = {}
			
 
				+
			
 
				+    # 读取所有what解构文件
			
 
				+    what_files = glob(os.path.join(what_dir, "*_with_history_*.json"))
			
 
				+
			
 
				+    for what_file in what_files:
			
 
				+        # 从文件名提取note_id
			
 
				+        filename = os.path.basename(what_file)
			
 
				+        note_id = filename.split("_with_history_")[0]
			
 
				+
			
 
				+        with open(what_file, 'r', encoding='utf-8') as f:
			
 
				+            data = json.load(f)
			
 
				+
			
 
				+        purpose_data = data.get("三点解构", {}).get("目的点", {})
			
 
				+
			
 
				+        # 主目的
			
 
				+        main_purpose = purpose_data.get("main_purpose", {})
			
 
				+        if main_purpose:
			
 
				+            purpose_name = main_purpose.get("目的点", "")
			
 
				+            if purpose_name:
			
 
				+                if purpose_name not in index:
			
 
				+                    index[purpose_name] = {
			
 
				+                        "分类类型": "主目的",
			
 
				+                        "目的点": purpose_name,
			
 
				+                        "维度": main_purpose.get("维度", ""),
			
 
				+                        "描述": main_purpose.get("描述", ""),
			
 
				+                        "帖子ID列表": []
			
 
				+                    }
			
 
				+                if note_id not in index[purpose_name]["帖子ID列表"]:
			
 
				+                    index[purpose_name]["帖子ID列表"].append(note_id)
			
 
				+
			
 
				+        # 次要目的
			
 
				+        secondary_purposes = purpose_data.get("secondary_purposes", [])
			
 
				+        for sec_purpose in secondary_purposes:
			
 
				+            purpose_name = sec_purpose.get("目的点", "")
			
 
				+            if purpose_name:
			
 
				+                if purpose_name not in index:
			
 
				+                    index[purpose_name] = {
			
 
				+                        "分类类型": "次要目的",
			
 
				+                        "目的点": purpose_name,
			
 
				+                        "维度": sec_purpose.get("维度", ""),
			
 
				+                        "描述": sec_purpose.get("描述", ""),
			
 
				+                        "帖子ID列表": []
			
 
				+                    }
			
 
				+                if note_id not in index[purpose_name]["帖子ID列表"]:
			
 
				+                    index[purpose_name]["帖子ID列表"].append(note_id)
			
 
				+
			
 
				+    return index
			
 
				+
			
 
				+
			
 
				+def build_keypoint_index(what_dir: str) -> Dict[str, Any]:
			
 
				+    """构建关键点索引
			
 
				+
			
 
				+    Args:
			
 
				+        what_dir: what解构结果目录路径
			
 
				+
			
 
				+    Returns:
			
 
				+        关键点分类索引
			
 
				+    """
			
 
				+    index = {}
			
 
				+
			
 
				+    # 读取所有what解构文件
			
 
				+    what_files = glob(os.path.join(what_dir, "*_with_history_*.json"))
			
 
				+
			
 
				+    for what_file in what_files:
			
 
				+        # 从文件名提取note_id
			
 
				+        filename = os.path.basename(what_file)
			
 
				+        note_id = filename.split("_with_history_")[0]
			
 
				+
			
 
				+        with open(what_file, 'r', encoding='utf-8') as f:
			
 
				+            data = json.load(f)
			
 
				+
			
 
				+        keypoint_data = data.get("三点解构", {}).get("关键点", {})
			
 
				+        key_points = keypoint_data.get("key_points", [])
			
 
				+
			
 
				+        for kp in key_points:
			
 
				+            kp_name = kp.get("关键点", "")
			
 
				+            if kp_name:
			
 
				+                if kp_name not in index:
			
 
				+                    index[kp_name] = {
			
 
				+                        "关键点": kp_name,
			
 
				+                        "维度大类": kp.get("维度大类", ""),
			
 
				+                        "维度细分": kp.get("维度细分", ""),
			
 
				+                        "描述": kp.get("描述", ""),
			
 
				+                        "帖子ID列表": []
			
 
				+                    }
			
 
				+                if note_id not in index[kp_name]["帖子ID列表"]:
			
 
				+                    index[kp_name]["帖子ID列表"].append(note_id)
			
 
				+
			
 
				+    return index
			
 
				+
			
 
				+
			
 
				+def fetch_note_details(category_data: Dict[str, Any]) -> Dict[str, Any]:
			
 
				+    """获取帖子详情
			
 
				+
			
 
				+    Args:
			
 
				+        category_data: 分类数据（包含帖子ID列表）
			
 
				+
			
 
				+    Returns:
			
 
				+        更新后的分类数据（包含帖子详情）
			
 
				+    """
			
 
				+    # 收集所有unique的note_ids
			
 
				+    all_note_ids = set()
			
 
				+    for category_info in category_data.values():
			
 
				+        all_note_ids.update(category_info.get("帖子ID列表", []))
			
 
				+
			
 
				+    all_note_ids = list(all_note_ids)
			
 
				+
			
 
				+    print(f"\n{'=' * 80}")
			
 
				+    print(f"开始获取帖子详情...")
			
 
				+    print(f"{'=' * 80}\n")
			
 
				+    print(f"共有 {len(all_note_ids)} 个唯一帖子\n")
			
 
				+
			
 
				+    # 获取所有帖子详情（缓存到内存）
			
 
				+    note_details_cache = {}
			
 
				+    for i, note_id in enumerate(all_note_ids, 1):
			
 
				+        try:
			
 
				+            print(f"[{i}/{len(all_note_ids)}] 获取详情: {note_id}")
			
 
				+            detail = get_xiaohongshu_detail(note_id)
			
 
				+            note_details_cache[note_id] = detail
			
 
				+        except Exception as e:
			
 
				+            print(f"  ⚠️  获取失败: {e}")
			
 
				+            note_details_cache[note_id] = {
			
 
				+                "channel_content_id": note_id,
			
 
				+                "error": str(e)
			
 
				+            }
			
 
				+
			
 
				+    print(f"\n✓ 帖子详情获取完成\n")
			
 
				+
			
 
				+    # 填充详情到每个分类
			
 
				+    for category_info in category_data.values():
			
 
				+        note_ids = category_info.get("帖子ID列表", [])
			
 
				+        category_info["帖子详情列表"] = [
			
 
				+            note_details_cache.get(note_id, {"channel_content_id": note_id})
			
 
				+            for note_id in note_ids
			
 
				+        ]
			
 
				+
			
 
				+    return category_data
			
 
				+
			
 
				+
			
 
				+def save_index(index_data: Dict[str, Any], output_file: str):
			
 
				+    """保存索引到文件
			
 
				+
			
 
				+    Args:
			
 
				+        index_data: 索引数据
			
 
				+        output_file: 输出文件路径
			
 
				+    """
			
 
				+    output_dir = os.path.dirname(output_file)
			
 
				+    if output_dir:
			
 
				+        os.makedirs(output_dir, exist_ok=True)
			
 
				+
			
 
				+    with open(output_file, 'w', encoding='utf-8') as f:
			
 
				+        json.dump(index_data, f, ensure_ascii=False, indent=2)
			
 
				+
			
 
				+    print(f"✓ 索引已保存: {output_file}")
			
 
				+
			
 
				+
			
 
				+def print_statistics(index_data: Dict[str, Any]):
			
 
				+    """打印统计信息
			
 
				+
			
 
				+    Args:
			
 
				+        index_data: 完整的索引数据
			
 
				+    """
			
 
				+    print(f"\n{'=' * 80}")
			
 
				+    print(f"索引统计信息")
			
 
				+    print(f"{'=' * 80}\n")
			
 
				+
			
 
				+    for index_type, categories in index_data.items():
			
 
				+        total_categories = len(categories)
			
 
				+        all_note_ids = set()
			
 
				+        for cat_info in categories.values():
			
 
				+            all_note_ids.update(cat_info.get("帖子ID列表", []))
			
 
				+        total_notes = len(all_note_ids)
			
 
				+        avg_notes = total_notes / total_categories if total_categories > 0 else 0
			
 
				+
			
 
				+        print(f"{index_type}:")
			
 
				+        print(f"  分类数量: {total_categories}")
			
 
				+        print(f"  帖子总数: {total_notes}")
			
 
				+        print(f"  平均每分类帖子数: {avg_notes:.1f}\n")
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    """主函数"""
			
 
				+    parser = argparse.ArgumentParser(
			
 
				+        description="构建人设分类的反向索引（灵感+目的+关键点）",
			
 
				+        formatter_class=argparse.RawDescriptionHelpFormatter,
			
 
				+        epilog="""
			
 
				+使用示例:
			
 
				+  # 基本使用
			
 
				+  python build_category_index.py --persona-dir data/阿里多多酱/out/人设_1110
			
 
				+
			
 
				+  # 只构建索引，不获取帖子详情
			
 
				+  python build_category_index.py --persona-dir data/阿里多多酱/out/人设_1110 --no-details
			
 
				+
			
 
				+  # 自定义输出文件
			
 
				+  python build_category_index.py --persona-dir data/阿里多多酱/out/人设_1110 --output custom_index.json
			
 
				+        """
			
 
				+    )
			
 
				+
			
 
				+    parser.add_argument(
			
 
				+        "--persona-dir",
			
 
				+        required=True,
			
 
				+        help="人设目录路径（包含人设.json和what解构结果/的目录）"
			
 
				+    )
			
 
				+
			
 
				+    parser.add_argument(
			
 
				+        "--output",
			
 
				+        default=None,
			
 
				+        help="输出文件路径（默认: {persona_dir}/分类索引_完整.json）"
			
 
				+    )
			
 
				+
			
 
				+    parser.add_argument(
			
 
				+        "--no-details",
			
 
				+        action="store_true",
			
 
				+        help="不获取帖子详情（只构建索引结构）"
			
 
				+    )
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    persona_dir = args.persona_dir
			
 
				+    fetch_details = not args.no_details
			
 
				+
			
 
				+    # 检查必要文件
			
 
				+    persona_file = os.path.join(persona_dir, "人设.json")
			
 
				+    what_dir = os.path.join(persona_dir, "what解构结果")
			
 
				+
			
 
				+    if not os.path.exists(persona_file):
			
 
				+        print(f"❌ 错误: 找不到人设文件: {persona_file}")
			
 
				+        return
			
 
				+
			
 
				+    if not os.path.exists(what_dir):
			
 
				+        print(f"❌ 错误: 找不到what解构目录: {what_dir}")
			
 
				+        return
			
 
				+
			
 
				+    print(f"{'=' * 80}")
			
 
				+    print(f"构建人设分类反向索引（灵感+目的+关键点）")
			
 
				+    print(f"{'=' * 80}")
			
 
				+    print(f"人设文件: {persona_file}")
			
 
				+    print(f"解构目录: {what_dir}")
			
 
				+    print(f"获取详情: {'是' if fetch_details else '否'}\n")
			
 
				+
			
 
				+    # 读取人设数据
			
 
				+    with open(persona_file, 'r', encoding='utf-8') as f:
			
 
				+        persona_data = json.load(f)
			
 
				+
			
 
				+    # 构建三种索引
			
 
				+    print(f"{'─' * 80}")
			
 
				+    print(f"1. 构建灵感分类索引...")
			
 
				+    print(f"{'─' * 80}\n")
			
 
				+    inspiration_index = build_inspiration_index(persona_data)
			
 
				+    print(f"✓ 灵感分类: {len(inspiration_index)} 个分类\n")
			
 
				+
			
 
				+    print(f"{'─' * 80}")
			
 
				+    print(f"2. 构建目的分类索引...")
			
 
				+    print(f"{'─' * 80}\n")
			
 
				+    purpose_index = build_purpose_index(what_dir)
			
 
				+    print(f"✓ 目的分类: {len(purpose_index)} 个分类\n")
			
 
				+
			
 
				+    print(f"{'─' * 80}")
			
 
				+    print(f"3. 构建关键点分类索引...")
			
 
				+    print(f"{'─' * 80}\n")
			
 
				+    keypoint_index = build_keypoint_index(what_dir)
			
 
				+    print(f"✓ 关键点分类: {len(keypoint_index)} 个分类\n")
			
 
				+
			
 
				+    # 合并为完整索引
			
 
				+    full_index = {
			
 
				+        "灵感分类": inspiration_index,
			
 
				+        "目的分类": purpose_index,
			
 
				+        "关键点分类": keypoint_index
			
 
				+    }
			
 
				+
			
 
				+    # 获取帖子详情
			
 
				+    if fetch_details:
			
 
				+        full_index["灵感分类"] = fetch_note_details(inspiration_index)
			
 
				+        full_index["目的分类"] = fetch_note_details(purpose_index)
			
 
				+        full_index["关键点分类"] = fetch_note_details(keypoint_index)
			
 
				+
			
 
				+    # 确定输出文件路径
			
 
				+    if args.output:
			
 
				+        output_file = args.output
			
 
				+    else:
			
 
				+        output_file = os.path.join(persona_dir, "分类索引_完整.json")
			
 
				+
			
 
				+    # 保存索引
			
 
				+    save_index(full_index, output_file)
			
 
				+
			
 
				+    # 打印统计信息
			
 
				+    print_statistics(full_index)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/build_note_to_all_index.py
+++ b/build_note_to_all_index.py
@@ -0,0 +1,362 @@
 
				+"""
			
 
				+构建帖子ID到点和分类的完整映射
			
 
				+
			
 
				+从人设.json和what解构结果中提取每个帖子的：
			
 
				+1. 所属的灵感分类、目的分类、关键点分类（来自人设.json）
			
 
				+2. 具体的灵感点、目的点、关键点（来自what解构结果）
			
 
				+3. 帖子详情
			
 
				+
			
 
				+使用方式:
			
 
				+    python build_note_to_all_index.py --persona-dir data/阿里多多酱/out/人设_1110
			
 
				+"""
			
 
				+import os
			
 
				+import json
			
 
				+import argparse
			
 
				+from typing import Dict, List, Any
			
 
				+from glob import glob
			
 
				+from script.detail import get_xiaohongshu_detail
			
 
				+
			
 
				+
			
 
				+def build_category_mapping(persona_data: Dict[str, Any]) -> Dict[str, List[str]]:
			
 
				+    """构建帖子ID到分类的映射
			
 
				+
			
 
				+    Args:
			
 
				+        persona_data: 人设数据
			
 
				+
			
 
				+    Returns:
			
 
				+        {note_id: [分类名称列表]}
			
 
				+    """
			
 
				+    note_to_categories = {}
			
 
				+
			
 
				+    # 遍历所有视角
			
 
				+    for perspective in persona_data.get("灵感点列表", []):
			
 
				+        perspective_name = perspective.get("视角名称", "")
			
 
				+
			
 
				+        # 遍历一级分类
			
 
				+        for category_l1 in perspective.get("模式列表", []):
			
 
				+            category_l1_name = category_l1.get("分类名称", "")
			
 
				+
			
 
				+            # 遍历二级分类
			
 
				+            for category_l2 in category_l1.get("二级细分", []):
			
 
				+                category_l2_name = category_l2.get("分类名称", "")
			
 
				+                category_l2_def = category_l2.get("分类定义", "")
			
 
				+                note_ids = category_l2.get("帖子ID列表", [])
			
 
				+
			
 
				+                # 去重帖子ID
			
 
				+                unique_note_ids = list(dict.fromkeys(note_ids))
			
 
				+
			
 
				+                # 为每个帖子添加分类信息
			
 
				+                for note_id in unique_note_ids:
			
 
				+                    if note_id not in note_to_categories:
			
 
				+                        note_to_categories[note_id] = []
			
 
				+
			
 
				+                    note_to_categories[note_id].append({
			
 
				+                        "分类类型": "灵感分类",
			
 
				+                        "视角名称": perspective_name,
			
 
				+                        "一级分类": category_l1_name,
			
 
				+                        "二级分类": category_l2_name,
			
 
				+                        "分类定义": category_l2_def
			
 
				+                    })
			
 
				+
			
 
				+    return note_to_categories
			
 
				+
			
 
				+
			
 
				+def extract_points_from_what(what_dir: str) -> Dict[str, Dict[str, Any]]:
			
 
				+    """从what解构结果提取所有点
			
 
				+
			
 
				+    Args:
			
 
				+        what_dir: what解构结果目录
			
 
				+
			
 
				+    Returns:
			
 
				+        {note_id: {灵感点列表, 目的点列表, 关键点列表}}
			
 
				+    """
			
 
				+    note_to_points = {}
			
 
				+
			
 
				+    # 读取所有what解构文件
			
 
				+    what_files = glob(os.path.join(what_dir, "*_with_history_*.json"))
			
 
				+
			
 
				+    for what_file in what_files:
			
 
				+        # 从文件名提取note_id
			
 
				+        filename = os.path.basename(what_file)
			
 
				+        note_id = filename.split("_with_history_")[0]
			
 
				+
			
 
				+        with open(what_file, 'r', encoding='utf-8') as f:
			
 
				+            data = json.load(f)
			
 
				+
			
 
				+        three_points = data.get("三点解构", {})
			
 
				+
			
 
				+        # 提取灵感点
			
 
				+        inspiration_points = []
			
 
				+        inspiration_data = three_points.get("灵感点", {})
			
 
				+
			
 
				+        for field in ["全新内容", "共性差异", "共性内容"]:
			
 
				+            items = inspiration_data.get(field, [])
			
 
				+            for item in items:
			
 
				+                point = item.get("灵感点", "")
			
 
				+                if point:
			
 
				+                    inspiration_points.append({
			
 
				+                        "灵感点": point,
			
 
				+                        "来源字段": field,
			
 
				+                        "维度": item.get("维度", ""),
			
 
				+                        "描述": item.get("描述", "")
			
 
				+                    })
			
 
				+
			
 
				+        # 提取目的点
			
 
				+        purpose_points = []
			
 
				+        purpose_data = three_points.get("目的点", {})
			
 
				+
			
 
				+        # 主目的
			
 
				+        main_purpose = purpose_data.get("main_purpose", {})
			
 
				+        if main_purpose:
			
 
				+            point = main_purpose.get("目的点", "")
			
 
				+            if point:
			
 
				+                purpose_points.append({
			
 
				+                    "目的点": point,
			
 
				+                    "类型": "主目的",
			
 
				+                    "维度": main_purpose.get("维度", ""),
			
 
				+                    "描述": main_purpose.get("描述", "")
			
 
				+                })
			
 
				+
			
 
				+        # 次要目的
			
 
				+        secondary_purposes = purpose_data.get("secondary_purposes", [])
			
 
				+        for sec_purpose in secondary_purposes:
			
 
				+            point = sec_purpose.get("目的点", "")
			
 
				+            if point:
			
 
				+                purpose_points.append({
			
 
				+                    "目的点": point,
			
 
				+                    "类型": "次要目的",
			
 
				+                    "维度": sec_purpose.get("维度", ""),
			
 
				+                    "描述": sec_purpose.get("描述", "")
			
 
				+                })
			
 
				+
			
 
				+        # 提取关键点
			
 
				+        key_points = []
			
 
				+        keypoint_data = three_points.get("关键点", {})
			
 
				+        kp_list = keypoint_data.get("key_points", [])
			
 
				+
			
 
				+        for kp in kp_list:
			
 
				+            point = kp.get("关键点", "")
			
 
				+            if point:
			
 
				+                key_points.append({
			
 
				+                    "关键点": point,
			
 
				+                    "维度大类": kp.get("维度大类", ""),
			
 
				+                    "维度细分": kp.get("维度细分", ""),
			
 
				+                    "描述": kp.get("描述", "")
			
 
				+                })
			
 
				+
			
 
				+        note_to_points[note_id] = {
			
 
				+            "灵感点列表": inspiration_points,
			
 
				+            "目的点列表": purpose_points,
			
 
				+            "关键点列表": key_points
			
 
				+        }
			
 
				+
			
 
				+    return note_to_points
			
 
				+
			
 
				+
			
 
				+def build_note_to_all_index(
			
 
				+    persona_dir: str,
			
 
				+    fetch_details: bool = True
			
 
				+) -> Dict[str, Any]:
			
 
				+    """构建帖子ID到点和分类的完整映射
			
 
				+
			
 
				+    Args:
			
 
				+        persona_dir: 人设目录路径
			
 
				+        fetch_details: 是否获取帖子详情
			
 
				+
			
 
				+    Returns:
			
 
				+        完整的映射索引
			
 
				+    """
			
 
				+    persona_file = os.path.join(persona_dir, "人设.json")
			
 
				+    what_dir = os.path.join(persona_dir, "what解构结果")
			
 
				+
			
 
				+    print(f"{'=' * 80}")
			
 
				+    print(f"构建帖子ID到点和分类的完整映射")
			
 
				+    print(f"{'=' * 80}")
			
 
				+    print(f"人设文件: {persona_file}")
			
 
				+    print(f"解构目录: {what_dir}\n")
			
 
				+
			
 
				+    # 读取人设数据
			
 
				+    with open(persona_file, 'r', encoding='utf-8') as f:
			
 
				+        persona_data = json.load(f)
			
 
				+
			
 
				+    # 构建分类映射
			
 
				+    print(f"{'─' * 80}")
			
 
				+    print(f"1. 提取分类信息（来自人设.json）")
			
 
				+    print(f"{'─' * 80}\n")
			
 
				+    note_to_categories = build_category_mapping(persona_data)
			
 
				+    print(f"✓ 从人设.json中提取了 {len(note_to_categories)} 个帖子的分类信息\n")
			
 
				+
			
 
				+    # 提取点信息
			
 
				+    print(f"{'─' * 80}")
			
 
				+    print(f"2. 提取点信息（来自what解构结果）")
			
 
				+    print(f"{'─' * 80}\n")
			
 
				+    note_to_points = extract_points_from_what(what_dir)
			
 
				+    print(f"✓ 从what解构结果中提取了 {len(note_to_points)} 个帖子的点信息\n")
			
 
				+
			
 
				+    # 合并所有帖子ID
			
 
				+    all_note_ids = set(note_to_categories.keys()) | set(note_to_points.keys())
			
 
				+    print(f"✓ 共有 {len(all_note_ids)} 个唯一帖子\n")
			
 
				+
			
 
				+    # 构建完整映射
			
 
				+    note_index = {}
			
 
				+
			
 
				+    for note_id in all_note_ids:
			
 
				+        note_index[note_id] = {
			
 
				+            "帖子ID": note_id,
			
 
				+            "所属分类": note_to_categories.get(note_id, []),
			
 
				+            "灵感点列表": note_to_points.get(note_id, {}).get("灵感点列表", []),
			
 
				+            "目的点列表": note_to_points.get(note_id, {}).get("目的点列表", []),
			
 
				+            "关键点列表": note_to_points.get(note_id, {}).get("关键点列表", [])
			
 
				+        }
			
 
				+
			
 
				+    # 获取帖子详情
			
 
				+    if fetch_details:
			
 
				+        print(f"{'=' * 80}")
			
 
				+        print(f"开始获取帖子详情...")
			
 
				+        print(f"{'=' * 80}\n")
			
 
				+
			
 
				+        for i, note_id in enumerate(sorted(all_note_ids), 1):
			
 
				+            try:
			
 
				+                print(f"[{i}/{len(all_note_ids)}] 获取详情: {note_id}")
			
 
				+                detail = get_xiaohongshu_detail(note_id)
			
 
				+                note_index[note_id]["帖子详情"] = detail
			
 
				+            except Exception as e:
			
 
				+                print(f"  ⚠️  获取失败: {e}")
			
 
				+                note_index[note_id]["帖子详情"] = {
			
 
				+                    "channel_content_id": note_id,
			
 
				+                    "error": str(e)
			
 
				+                }
			
 
				+
			
 
				+        print(f"\n✓ 帖子详情获取完成\n")
			
 
				+
			
 
				+    return note_index
			
 
				+
			
 
				+
			
 
				+def save_index(index_data: Dict[str, Any], output_file: str):
			
 
				+    """保存索引到文件
			
 
				+
			
 
				+    Args:
			
 
				+        index_data: 索引数据
			
 
				+        output_file: 输出文件路径
			
 
				+    """
			
 
				+    output_dir = os.path.dirname(output_file)
			
 
				+    if output_dir:
			
 
				+        os.makedirs(output_dir, exist_ok=True)
			
 
				+
			
 
				+    with open(output_file, 'w', encoding='utf-8') as f:
			
 
				+        json.dump(index_data, f, ensure_ascii=False, indent=2)
			
 
				+
			
 
				+    print(f"✓ 索引已保存: {output_file}")
			
 
				+
			
 
				+
			
 
				+def print_statistics(index_data: Dict[str, Any]):
			
 
				+    """打印统计信息
			
 
				+
			
 
				+    Args:
			
 
				+        index_data: 完整的索引数据
			
 
				+    """
			
 
				+    print(f"\n{'=' * 80}")
			
 
				+    print(f"索引统计信息")
			
 
				+    print(f"{'=' * 80}\n")
			
 
				+
			
 
				+    total_notes = len(index_data)
			
 
				+    print(f"帖子总数: {total_notes}")
			
 
				+
			
 
				+    # 统计有分类的帖子
			
 
				+    notes_with_categories = sum(1 for v in index_data.values() if v.get("所属分类"))
			
 
				+    print(f"有分类信息的帖子: {notes_with_categories}")
			
 
				+
			
 
				+    # 统计有点信息的帖子
			
 
				+    notes_with_inspiration = sum(1 for v in index_data.values() if v.get("灵感点列表"))
			
 
				+    notes_with_purpose = sum(1 for v in index_data.values() if v.get("目的点列表"))
			
 
				+    notes_with_keypoint = sum(1 for v in index_data.values() if v.get("关键点列表"))
			
 
				+
			
 
				+    print(f"有灵感点信息的帖子: {notes_with_inspiration}")
			
 
				+    print(f"有目的点信息的帖子: {notes_with_purpose}")
			
 
				+    print(f"有关键点信息的帖子: {notes_with_keypoint}")
			
 
				+
			
 
				+    # 统计平均数量
			
 
				+    total_categories = sum(len(v.get("所属分类", [])) for v in index_data.values())
			
 
				+    total_inspiration = sum(len(v.get("灵感点列表", [])) for v in index_data.values())
			
 
				+    total_purpose = sum(len(v.get("目的点列表", [])) for v in index_data.values())
			
 
				+    total_keypoint = sum(len(v.get("关键点列表", [])) for v in index_data.values())
			
 
				+
			
 
				+    if total_notes > 0:
			
 
				+        print(f"\n平均每个帖子:")
			
 
				+        print(f"  分类数: {total_categories / total_notes:.1f}")
			
 
				+        print(f"  灵感点数: {total_inspiration / total_notes:.1f}")
			
 
				+        print(f"  目的点数: {total_purpose / total_notes:.1f}")
			
 
				+        print(f"  关键点数: {total_keypoint / total_notes:.1f}")
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    """主函数"""
			
 
				+    parser = argparse.ArgumentParser(
			
 
				+        description="构建帖子ID到点和分类的完整映射",
			
 
				+        formatter_class=argparse.RawDescriptionHelpFormatter,
			
 
				+        epilog="""
			
 
				+使用示例:
			
 
				+  # 基本使用
			
 
				+  python build_note_to_all_index.py --persona-dir data/阿里多多酱/out/人设_1110
			
 
				+
			
 
				+  # 只构建索引，不获取帖子详情
			
 
				+  python build_note_to_all_index.py --persona-dir data/阿里多多酱/out/人设_1110 --no-details
			
 
				+
			
 
				+  # 自定义输出文件
			
 
				+  python build_note_to_all_index.py --persona-dir data/阿里多多酱/out/人设_1110 --output custom.json
			
 
				+        """
			
 
				+    )
			
 
				+
			
 
				+    parser.add_argument(
			
 
				+        "--persona-dir",
			
 
				+        required=True,
			
 
				+        help="人设目录路径（包含人设.json和what解构结果/的目录）"
			
 
				+    )
			
 
				+
			
 
				+    parser.add_argument(
			
 
				+        "--output",
			
 
				+        default=None,
			
 
				+        help="输出文件路径（默认: {persona_dir}/帖子到分类和点映射.json）"
			
 
				+    )
			
 
				+
			
 
				+    parser.add_argument(
			
 
				+        "--no-details",
			
 
				+        action="store_true",
			
 
				+        help="不获取帖子详情（只构建索引结构）"
			
 
				+    )
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    persona_dir = args.persona_dir
			
 
				+    fetch_details = not args.no_details
			
 
				+
			
 
				+    # 检查必要文件
			
 
				+    persona_file = os.path.join(persona_dir, "人设.json")
			
 
				+    what_dir = os.path.join(persona_dir, "what解构结果")
			
 
				+
			
 
				+    if not os.path.exists(persona_file):
			
 
				+        print(f"❌ 错误: 找不到人设文件: {persona_file}")
			
 
				+        return
			
 
				+
			
 
				+    if not os.path.exists(what_dir):
			
 
				+        print(f"❌ 错误: 找不到what解构目录: {what_dir}")
			
 
				+        return
			
 
				+
			
 
				+    # 构建索引
			
 
				+    index_data = build_note_to_all_index(persona_dir, fetch_details=fetch_details)
			
 
				+
			
 
				+    # 确定输出文件路径
			
 
				+    if args.output:
			
 
				+        output_file = args.output
			
 
				+    else:
			
 
				+        output_file = os.path.join(persona_dir, "帖子到分类和点映射.json")
			
 
				+
			
 
				+    # 保存索引
			
 
				+    save_index(index_data, output_file)
			
 
				+
			
 
				+    # 打印统计信息
			
 
				+    print_statistics(index_data)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/build_point_to_note_index.py
+++ b/build_point_to_note_index.py
@@ -0,0 +1,373 @@
 
				+"""
			
 
				+构建点到帖子的映射索引
			
 
				+
			
 
				+从 what解构结果 中提取每个帖子的灵感点、目的点、关键点，
			
 
				+构建从具体点到帖子详情的映射关系。
			
 
				+
			
 
				+使用方式:
			
 
				+    python build_point_to_note_index.py --what-dir data/阿里多多酱/out/人设_1110/what解构结果
			
 
				+"""
			
 
				+import os
			
 
				+import json
			
 
				+import argparse
			
 
				+from typing import Dict, List, Any
			
 
				+from glob import glob
			
 
				+from script.detail import get_xiaohongshu_detail
			
 
				+
			
 
				+
			
 
				+def extract_points_from_what_file(what_file: str) -> Dict[str, Any]:
			
 
				+    """从单个 what 解构文件中提取所有点
			
 
				+
			
 
				+    Args:
			
 
				+        what_file: what解构文件路径
			
 
				+
			
 
				+    Returns:
			
 
				+        包含灵感点、目的点、关键点的字典
			
 
				+    """
			
 
				+    # 从文件名提取note_id
			
 
				+    filename = os.path.basename(what_file)
			
 
				+    note_id = filename.split("_with_history_")[0]
			
 
				+
			
 
				+    with open(what_file, 'r', encoding='utf-8') as f:
			
 
				+        data = json.load(f)
			
 
				+
			
 
				+    three_points = data.get("三点解构", {})
			
 
				+
			
 
				+    # 提取灵感点
			
 
				+    inspiration_points = []
			
 
				+    inspiration_data = three_points.get("灵感点", {})
			
 
				+
			
 
				+    for field in ["全新内容", "共性差异", "共性内容"]:
			
 
				+        items = inspiration_data.get(field, [])
			
 
				+        for item in items:
			
 
				+            point = item.get("灵感点", "")
			
 
				+            if point:
			
 
				+                inspiration_points.append({
			
 
				+                    "灵感点": point,
			
 
				+                    "来源字段": field,
			
 
				+                    "维度": item.get("维度", ""),
			
 
				+                    "描述": item.get("描述", "")
			
 
				+                })
			
 
				+
			
 
				+    # 提取目的点
			
 
				+    purpose_points = []
			
 
				+    purpose_data = three_points.get("目的点", {})
			
 
				+
			
 
				+    # 主目的
			
 
				+    main_purpose = purpose_data.get("main_purpose", {})
			
 
				+    if main_purpose:
			
 
				+        point = main_purpose.get("目的点", "")
			
 
				+        if point:
			
 
				+            purpose_points.append({
			
 
				+                "目的点": point,
			
 
				+                "类型": "主目的",
			
 
				+                "维度": main_purpose.get("维度", ""),
			
 
				+                "描述": main_purpose.get("描述", "")
			
 
				+            })
			
 
				+
			
 
				+    # 次要目的
			
 
				+    secondary_purposes = purpose_data.get("secondary_purposes", [])
			
 
				+    for sec_purpose in secondary_purposes:
			
 
				+        point = sec_purpose.get("目的点", "")
			
 
				+        if point:
			
 
				+            purpose_points.append({
			
 
				+                "目的点": point,
			
 
				+                "类型": "次要目的",
			
 
				+                "维度": sec_purpose.get("维度", ""),
			
 
				+                "描述": sec_purpose.get("描述", "")
			
 
				+            })
			
 
				+
			
 
				+    # 提取关键点
			
 
				+    key_points = []
			
 
				+    keypoint_data = three_points.get("关键点", {})
			
 
				+    kp_list = keypoint_data.get("key_points", [])
			
 
				+
			
 
				+    for kp in kp_list:
			
 
				+        point = kp.get("关键点", "")
			
 
				+        if point:
			
 
				+            key_points.append({
			
 
				+                "关键点": point,
			
 
				+                "维度大类": kp.get("维度大类", ""),
			
 
				+                "维度细分": kp.get("维度细分", ""),
			
 
				+                "描述": kp.get("描述", "")
			
 
				+            })
			
 
				+
			
 
				+    return {
			
 
				+        "note_id": note_id,
			
 
				+        "灵感点列表": inspiration_points,
			
 
				+        "目的点列表": purpose_points,
			
 
				+        "关键点列表": key_points
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def build_point_to_note_index(what_dir: str, fetch_details: bool = True) -> Dict[str, Any]:
			
 
				+    """构建点到帖子的映射索引
			
 
				+
			
 
				+    Args:
			
 
				+        what_dir: what解构结果目录路径
			
 
				+        fetch_details: 是否获取帖子详情
			
 
				+
			
 
				+    Returns:
			
 
				+        完整的映射索引
			
 
				+    """
			
 
				+    # 读取所有what解构文件
			
 
				+    what_files = glob(os.path.join(what_dir, "*_with_history_*.json"))
			
 
				+
			
 
				+    print(f"{'=' * 80}")
			
 
				+    print(f"开始构建点到帖子的映射索引")
			
 
				+    print(f"{'=' * 80}")
			
 
				+    print(f"解构文件数量: {len(what_files)}\n")
			
 
				+
			
 
				+    # 初始化索引结构
			
 
				+    inspiration_index = {}  # {灵感点: [note_id1, note_id2, ...]}
			
 
				+    purpose_index = {}      # {目的点: [note_id1, note_id2, ...]}
			
 
				+    keypoint_index = {}     # {关键点: [note_id1, note_id2, ...]}
			
 
				+
			
 
				+    # 帖子到点的映射
			
 
				+    note_to_points = {}     # {note_id: {灵感点: [], 目的点: [], 关键点: []}}
			
 
				+
			
 
				+    # 遍历所有文件
			
 
				+    for what_file in what_files:
			
 
				+        points_data = extract_points_from_what_file(what_file)
			
 
				+        note_id = points_data["note_id"]
			
 
				+
			
 
				+        # 初始化帖子的点列表
			
 
				+        note_to_points[note_id] = {
			
 
				+            "灵感点列表": points_data["灵感点列表"],
			
 
				+            "目的点列表": points_data["目的点列表"],
			
 
				+            "关键点列表": points_data["关键点列表"]
			
 
				+        }
			
 
				+
			
 
				+        # 构建灵感点到帖子的映射
			
 
				+        for insp in points_data["灵感点列表"]:
			
 
				+            point_name = insp["灵感点"]
			
 
				+            if point_name not in inspiration_index:
			
 
				+                inspiration_index[point_name] = {
			
 
				+                    "灵感点": point_name,
			
 
				+                    "维度": insp["维度"],
			
 
				+                    "描述": insp["描述"],
			
 
				+                    "帖子ID列表": []
			
 
				+                }
			
 
				+            if note_id not in inspiration_index[point_name]["帖子ID列表"]:
			
 
				+                inspiration_index[point_name]["帖子ID列表"].append(note_id)
			
 
				+
			
 
				+        # 构建目的点到帖子的映射
			
 
				+        for purp in points_data["目的点列表"]:
			
 
				+            point_name = purp["目的点"]
			
 
				+            if point_name not in purpose_index:
			
 
				+                purpose_index[point_name] = {
			
 
				+                    "目的点": point_name,
			
 
				+                    "类型": purp["类型"],
			
 
				+                    "维度": purp["维度"],
			
 
				+                    "描述": purp["描述"],
			
 
				+                    "帖子ID列表": []
			
 
				+                }
			
 
				+            if note_id not in purpose_index[point_name]["帖子ID列表"]:
			
 
				+                purpose_index[point_name]["帖子ID列表"].append(note_id)
			
 
				+
			
 
				+        # 构建关键点到帖子的映射
			
 
				+        for kp in points_data["关键点列表"]:
			
 
				+            point_name = kp["关键点"]
			
 
				+            if point_name not in keypoint_index:
			
 
				+                keypoint_index[point_name] = {
			
 
				+                    "关键点": point_name,
			
 
				+                    "维度大类": kp["维度大类"],
			
 
				+                    "维度细分": kp["维度细分"],
			
 
				+                    "描述": kp["描述"],
			
 
				+                    "帖子ID列表": []
			
 
				+                }
			
 
				+            if note_id not in keypoint_index[point_name]["帖子ID列表"]:
			
 
				+                keypoint_index[point_name]["帖子ID列表"].append(note_id)
			
 
				+
			
 
				+    print(f"✓ 灵感点: {len(inspiration_index)} 个")
			
 
				+    print(f"✓ 目的点: {len(purpose_index)} 个")
			
 
				+    print(f"✓ 关键点: {len(keypoint_index)} 个")
			
 
				+    print(f"✓ 帖子: {len(note_to_points)} 个\n")
			
 
				+
			
 
				+    # 获取帖子详情
			
 
				+    if fetch_details:
			
 
				+        # 收集所有唯一的note_ids
			
 
				+        all_note_ids = list(note_to_points.keys())
			
 
				+
			
 
				+        print(f"{'=' * 80}")
			
 
				+        print(f"开始获取帖子详情...")
			
 
				+        print(f"{'=' * 80}\n")
			
 
				+
			
 
				+        # 获取所有帖子详情（缓存到内存）
			
 
				+        note_details_cache = {}
			
 
				+        for i, note_id in enumerate(all_note_ids, 1):
			
 
				+            try:
			
 
				+                print(f"[{i}/{len(all_note_ids)}] 获取详情: {note_id}")
			
 
				+                detail = get_xiaohongshu_detail(note_id)
			
 
				+                note_details_cache[note_id] = detail
			
 
				+            except Exception as e:
			
 
				+                print(f"  ⚠️  获取失败: {e}")
			
 
				+                note_details_cache[note_id] = {
			
 
				+                    "channel_content_id": note_id,
			
 
				+                    "error": str(e)
			
 
				+                }
			
 
				+
			
 
				+        print(f"\n✓ 帖子详情获取完成\n")
			
 
				+
			
 
				+        # 填充详情到每个索引
			
 
				+        for point_info in inspiration_index.values():
			
 
				+            note_ids = point_info.get("帖子ID列表", [])
			
 
				+            point_info["帖子详情列表"] = [
			
 
				+                note_details_cache.get(note_id, {"channel_content_id": note_id})
			
 
				+                for note_id in note_ids
			
 
				+            ]
			
 
				+
			
 
				+        for point_info in purpose_index.values():
			
 
				+            note_ids = point_info.get("帖子ID列表", [])
			
 
				+            point_info["帖子详情列表"] = [
			
 
				+                note_details_cache.get(note_id, {"channel_content_id": note_id})
			
 
				+                for note_id in note_ids
			
 
				+            ]
			
 
				+
			
 
				+        for point_info in keypoint_index.values():
			
 
				+            note_ids = point_info.get("帖子ID列表", [])
			
 
				+            point_info["帖子详情列表"] = [
			
 
				+                note_details_cache.get(note_id, {"channel_content_id": note_id})
			
 
				+                for note_id in note_ids
			
 
				+            ]
			
 
				+
			
 
				+        # 填充帖子到点的映射中的详情
			
 
				+        for note_id in note_to_points.keys():
			
 
				+            note_to_points[note_id]["帖子详情"] = note_details_cache.get(
			
 
				+                note_id,
			
 
				+                {"channel_content_id": note_id}
			
 
				+            )
			
 
				+
			
 
				+    return {
			
 
				+        "点到帖子映射": {
			
 
				+            "灵感点": inspiration_index,
			
 
				+            "目的点": purpose_index,
			
 
				+            "关键点": keypoint_index
			
 
				+        },
			
 
				+        "帖子到点映射": note_to_points
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def save_index(index_data: Dict[str, Any], output_file: str):
			
 
				+    """保存索引到文件
			
 
				+
			
 
				+    Args:
			
 
				+        index_data: 索引数据
			
 
				+        output_file: 输出文件路径
			
 
				+    """
			
 
				+    output_dir = os.path.dirname(output_file)
			
 
				+    if output_dir:
			
 
				+        os.makedirs(output_dir, exist_ok=True)
			
 
				+
			
 
				+    with open(output_file, 'w', encoding='utf-8') as f:
			
 
				+        json.dump(index_data, f, ensure_ascii=False, indent=2)
			
 
				+
			
 
				+    print(f"✓ 索引已保存: {output_file}")
			
 
				+
			
 
				+
			
 
				+def print_statistics(index_data: Dict[str, Any]):
			
 
				+    """打印统计信息
			
 
				+
			
 
				+    Args:
			
 
				+        index_data: 完整的索引数据
			
 
				+    """
			
 
				+    print(f"\n{'=' * 80}")
			
 
				+    print(f"索引统计信息")
			
 
				+    print(f"{'=' * 80}\n")
			
 
				+
			
 
				+    point_to_note = index_data.get("点到帖子映射", {})
			
 
				+    note_to_point = index_data.get("帖子到点映射", {})
			
 
				+
			
 
				+    print(f"点到帖子映射:")
			
 
				+    for point_type, points in point_to_note.items():
			
 
				+        total_points = len(points)
			
 
				+        all_note_ids = set()
			
 
				+        for point_info in points.values():
			
 
				+            all_note_ids.update(point_info.get("帖子ID列表", []))
			
 
				+        total_notes = len(all_note_ids)
			
 
				+        avg_notes = total_notes / total_points if total_points > 0 else 0
			
 
				+
			
 
				+        print(f"  {point_type}:")
			
 
				+        print(f"    点的数量: {total_points}")
			
 
				+        print(f"    关联帖子数: {total_notes}")
			
 
				+        print(f"    平均每个点关联帖子数: {avg_notes:.1f}")
			
 
				+
			
 
				+    print(f"\n帖子到点映射:")
			
 
				+    print(f"  帖子数量: {len(note_to_point)}")
			
 
				+
			
 
				+    # 统计每个帖子平均有多少个点
			
 
				+    total_insp = sum(len(v.get("灵感点列表", [])) for v in note_to_point.values())
			
 
				+    total_purp = sum(len(v.get("目的点列表", [])) for v in note_to_point.values())
			
 
				+    total_kp = sum(len(v.get("关键点列表", [])) for v in note_to_point.values())
			
 
				+
			
 
				+    note_count = len(note_to_point)
			
 
				+    if note_count > 0:
			
 
				+        print(f"  平均每个帖子的灵感点数: {total_insp / note_count:.1f}")
			
 
				+        print(f"  平均每个帖子的目的点数: {total_purp / note_count:.1f}")
			
 
				+        print(f"  平均每个帖子的关键点数: {total_kp / note_count:.1f}")
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    """主函数"""
			
 
				+    parser = argparse.ArgumentParser(
			
 
				+        description="构建点到帖子的映射索引",
			
 
				+        formatter_class=argparse.RawDescriptionHelpFormatter,
			
 
				+        epilog="""
			
 
				+使用示例:
			
 
				+  # 基本使用
			
 
				+  python build_point_to_note_index.py --what-dir data/阿里多多酱/out/人设_1110/what解构结果
			
 
				+
			
 
				+  # 只构建索引，不获取帖子详情
			
 
				+  python build_point_to_note_index.py --what-dir data/阿里多多酱/out/人设_1110/what解构结果 --no-details
			
 
				+
			
 
				+  # 自定义输出文件
			
 
				+  python build_point_to_note_index.py --what-dir data/阿里多多酱/out/人设_1110/what解构结果 --output custom.json
			
 
				+        """
			
 
				+    )
			
 
				+
			
 
				+    parser.add_argument(
			
 
				+        "--what-dir",
			
 
				+        required=True,
			
 
				+        help="what解构结果目录路径"
			
 
				+    )
			
 
				+
			
 
				+    parser.add_argument(
			
 
				+        "--output",
			
 
				+        default=None,
			
 
				+        help="输出文件路径（默认: {what_dir}/../点到帖子映射.json）"
			
 
				+    )
			
 
				+
			
 
				+    parser.add_argument(
			
 
				+        "--no-details",
			
 
				+        action="store_true",
			
 
				+        help="不获取帖子详情（只构建索引结构）"
			
 
				+    )
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    what_dir = args.what_dir
			
 
				+    fetch_details = not args.no_details
			
 
				+
			
 
				+    if not os.path.exists(what_dir):
			
 
				+        print(f"❌ 错误: 找不到what解构目录: {what_dir}")
			
 
				+        return
			
 
				+
			
 
				+    # 构建索引
			
 
				+    index_data = build_point_to_note_index(what_dir, fetch_details=fetch_details)
			
 
				+
			
 
				+    # 确定输出文件路径
			
 
				+    if args.output:
			
 
				+        output_file = args.output
			
 
				+    else:
			
 
				+        parent_dir = os.path.dirname(what_dir.rstrip('/'))
			
 
				+        output_file = os.path.join(parent_dir, "点到帖子映射.json")
			
 
				+
			
 
				+    # 保存索引
			
 
				+    save_index(index_data, output_file)
			
 
				+
			
 
				+    # 打印统计信息
			
 
				+    print_statistics(index_data)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/run_inspiration_analysis.py
+++ b/run_inspiration_analysis.py
@@ -1,10 +1,11 @@
 
				 """
			
 
				-主流程脚本：串联 Step1 和 Step2
			
 
				+主流程脚本：串联 Step1、搜索和 Step2
			
 
				 
			
 
				 执行完整的灵感分析流程：
			
 
				 1. Step1: 灵感与人设匹配（调用 step1 main，自动保存结果）
			
 
				-2. Step2: 增量词在人设中的匹配（调用 step2 main，自动保存结果）
			
 
				-3. 生成流程汇总文件
			
 
				+2. Step1.5: 基于 Top1 匹配要素进行小红书搜索（使用 search_xiaohongshu）
			
 
				+3. Step2: 增量词在人设中的匹配（调用 step2 main，自动保存结果）
			
 
				+4. 生成流程汇总文件
			
 
				 """
			
 
				 import os
			
 
				 import sys
			
@@ -22,6 +23,9 @@ from lib.utils import read_json
 
				 import step1_inspiration_match
			
 
				 import step2_incremental_match
			
 
				 
			
 
				+# 导入搜索功能
			
 
				+from script.search import search_xiaohongshu
			
 
				+
			
 
				 
			
 
				 def find_step1_output(persona_dir: str, inspiration: str, max_tasks: int = None) -> str:
			
 
				     """查找 step1 输出文件
			
@@ -71,15 +75,89 @@ def find_step2_output(persona_dir: str, inspiration: str, max_tasks: int = None)
 
				     return str(step2_files[0])
			
 
				 
			
 
				 
			
 
				+def get_inspiration_score(persona_dir: str, inspiration: str, max_tasks: int = None) -> float:
			
 
				+    """获取灵感的 Step1 Top1 分数
			
 
				+
			
 
				+    Args:
			
 
				+        persona_dir: 人设目录
			
 
				+        inspiration: 灵感点名称
			
 
				+        max_tasks: 任务数限制
			
 
				+
			
 
				+    Returns:
			
 
				+        Step1 Top1 的 score，如果文件不存在返回 -1
			
 
				+    """
			
 
				+    try:
			
 
				+        step1_file = find_step1_output(persona_dir, inspiration, max_tasks)
			
 
				+        step1_data = read_json(step1_file)
			
 
				+        results = step1_data.get("匹配结果列表", [])
			
 
				+        if results:
			
 
				+            return results[0].get('匹配结果', {}).get('score', 0)
			
 
				+        return 0
			
 
				+    except (FileNotFoundError, Exception):
			
 
				+        return -1
			
 
				+
			
 
				+
			
 
				+def sort_inspirations_by_score(
			
 
				+    persona_dir: str,
			
 
				+    inspiration_list: list,
			
 
				+    max_tasks: int = None
			
 
				+) -> list:
			
 
				+    """根据 Step1 结果分数对灵感列表排序
			
 
				+
			
 
				+    Args:
			
 
				+        persona_dir: 人设目录
			
 
				+        inspiration_list: 灵感列表
			
 
				+        max_tasks: 任务数限制
			
 
				+
			
 
				+    Returns:
			
 
				+        排序后的灵感列表（按分数降序）
			
 
				+    """
			
 
				+    print(f"\n{'─' * 80}")
			
 
				+    print(f"正在读取现有 Step1 结果文件...")
			
 
				+    print(f"{'─' * 80}")
			
 
				+
			
 
				+    inspiration_scores = []
			
 
				+    for inspiration in inspiration_list:
			
 
				+        score = get_inspiration_score(persona_dir, inspiration, max_tasks)
			
 
				+        inspiration_scores.append({
			
 
				+            "inspiration": inspiration,
			
 
				+            "score": score,
			
 
				+            "has_result": score >= 0
			
 
				+        })
			
 
				+
			
 
				+    # 统计
			
 
				+    has_result_count = sum(1 for item in inspiration_scores if item["has_result"])
			
 
				+    print(f"找到 {has_result_count}/{len(inspiration_list)} 个灵感的 Step1 结果")
			
 
				+
			
 
				+    # 排序：有结果的按分数降序，无结果的放最后（保持原顺序）
			
 
				+    sorted_items = sorted(
			
 
				+        inspiration_scores,
			
 
				+        key=lambda x: (x["has_result"], x["score"]),
			
 
				+        reverse=True
			
 
				+    )
			
 
				+
			
 
				+    # 显示排序结果（前10个）
			
 
				+    print(f"\n排序后的灵感列表（前10个）:")
			
 
				+    for i, item in enumerate(sorted_items[:10], 1):
			
 
				+        status = f"score={item['score']:.2f}" if item['has_result'] else "无结果"
			
 
				+        print(f"  {i}. [{status}] {item['inspiration']}")
			
 
				+
			
 
				+    if len(sorted_items) > 10:
			
 
				+        print(f"  ... 还有 {len(sorted_items) - 10} 个")
			
 
				+
			
 
				+    return [item["inspiration"] for item in sorted_items]
			
 
				+
			
 
				+
			
 
				 async def run_full_analysis(
			
 
				     persona_dir: str,
			
 
				     inspiration: str,
			
 
				     max_tasks: int = None,
			
 
				     force: bool = False,
			
 
				     current_time: str = None,
			
 
				-    log_url: str = None
			
 
				+    log_url: str = None,
			
 
				+    enable_step2: bool = False
			
 
				 ) -> dict:
			
 
				-    """执行完整的灵感分析流程（Step1 + Step2）
			
 
				+    """执行完整的灵感分析流程（Step1 + 搜索 + Step2）
			
 
				 
			
 
				     Args:
			
 
				         persona_dir: 人设目录路径
			
@@ -88,6 +166,7 @@ async def run_full_analysis(
 
				         force: 是否强制重新执行（跳过文件存在检查）
			
 
				         current_time: 当前时间戳
			
 
				         log_url: 日志链接
			
 
				+        enable_step2: 是否执行 Step2（默认 False）
			
 
				 
			
 
				     Returns:
			
 
				         包含文件路径和状态的字典
			
@@ -138,34 +217,75 @@ async def run_full_analysis(
 
				     step1_element = step1_top1.get("业务信息", {}).get("匹配要素", "")
			
 
				     print(f"Top1 匹配要素: {step1_element}, score: {step1_score:.2f}")
			
 
				 
			
 
				-    # ========== Step2: 增量词匹配 ==========
			
 
				+    # ========== Step1.5: 小红书搜索 ==========
			
 
				     print(f"\n{'─' * 80}")
			
 
				-    print(f"Step2: 增量词在人设中的匹配")
			
 
				+    print(f"Step1.5: 基于 Top1 匹配要素进行小红书搜索")
			
 
				     print(f"{'─' * 80}\n")
			
 
				 
			
 
				-    # 临时修改 sys.argv 来传递参数给 step2
			
 
				-    sys.argv = [
			
 
				-        "step2_incremental_match.py",
			
 
				-        persona_dir,
			
 
				-        inspiration
			
 
				-    ]
			
 
				+    search_keyword = step1_element
			
 
				+    print(f"搜索关键词: {search_keyword}")
			
 
				 
			
 
				+    # 执行搜索
			
 
				     try:
			
 
				-        # 调用 step2 的 main 函数（通过参数传递 force）
			
 
				-        await step2_incremental_match.main(current_time, log_url, force=force)
			
 
				-    finally:
			
 
				-        # 恢复原始参数
			
 
				-        sys.argv = original_argv
			
 
				+        search_result = search_xiaohongshu(search_keyword)
			
 
				+        search_notes_count = len(search_result.get('notes', []))
			
 
				+        print(f"✓ 搜索完成，找到 {search_notes_count} 条笔记")
			
 
				+
			
 
				+        # 保存搜索结果
			
 
				+        search_dir = os.path.join(persona_dir, "how", "灵感点", inspiration, "search")
			
 
				+        os.makedirs(search_dir, exist_ok=True)
			
 
				 
			
 
				-    # 查找 step2 输出文件
			
 
				-    step2_file = find_step2_output(persona_dir, inspiration, max_tasks)
			
 
				-    print(f"✓ Step2 完成，结果文件: {step2_file}\n")
			
 
				+        scope_prefix = f"top{max_tasks}" if max_tasks is not None else "all"
			
 
				+        search_filename = f"{scope_prefix}_search_{search_keyword[:20]}.json"  # 截取关键词前20字符避免文件名过长
			
 
				+        search_file = os.path.join(search_dir, search_filename)
			
 
				 
			
 
				-    # 读取 step2 结果
			
 
				-    step2_data = read_json(step2_file)
			
 
				-    step2_score = step2_data.get("匹配结果", {}).get("score", 0)
			
 
				-    step2_b_content = step2_data.get("输入信息", {}).get("B", "")
			
 
				-    step2_word_count = len(step2_b_content.split("\n")) if step2_b_content else 0
			
 
				+        with open(search_file, 'w', encoding='utf-8') as f:
			
 
				+            json.dump(search_result, f, ensure_ascii=False, indent=2)
			
 
				+
			
 
				+        print(f"✓ 搜索结果已保存: {search_file}\n")
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        print(f"⚠️  搜索失败: {e}")
			
 
				+        search_file = None
			
 
				+        search_notes_count = 0
			
 
				+
			
 
				+    # ========== Step2: 增量词匹配 ==========
			
 
				+    step2_file = None
			
 
				+    step2_score = None
			
 
				+    step2_word_count = None
			
 
				+
			
 
				+    if enable_step2:
			
 
				+        print(f"\n{'─' * 80}")
			
 
				+        print(f"Step2: 增量词在人设中的匹配")
			
 
				+        print(f"{'─' * 80}\n")
			
 
				+
			
 
				+        # 临时修改 sys.argv 来传递参数给 step2
			
 
				+        sys.argv = [
			
 
				+            "step2_incremental_match.py",
			
 
				+            persona_dir,
			
 
				+            inspiration
			
 
				+        ]
			
 
				+
			
 
				+        try:
			
 
				+            # 调用 step2 的 main 函数（通过参数传递 force）
			
 
				+            await step2_incremental_match.main(current_time, log_url, force=force)
			
 
				+        finally:
			
 
				+            # 恢复原始参数
			
 
				+            sys.argv = original_argv
			
 
				+
			
 
				+        # 查找 step2 输出文件
			
 
				+        step2_file = find_step2_output(persona_dir, inspiration, max_tasks)
			
 
				+        print(f"✓ Step2 完成，结果文件: {step2_file}\n")
			
 
				+
			
 
				+        # 读取 step2 结果
			
 
				+        step2_data = read_json(step2_file)
			
 
				+        step2_score = step2_data.get("匹配结果", {}).get("score", 0)
			
 
				+        step2_b_content = step2_data.get("输入信息", {}).get("B", "")
			
 
				+        step2_word_count = len(step2_b_content.split("\n")) if step2_b_content else 0
			
 
				+    else:
			
 
				+        print(f"\n{'─' * 80}")
			
 
				+        print(f"Step2: 已跳过（使用 --enable-step2 启用）")
			
 
				+        print(f"{'─' * 80}\n")
			
 
				 
			
 
				     # ========== 保存流程汇总 ==========
			
 
				     output_dir = os.path.join(persona_dir, "how", "灵感点", inspiration)
			
@@ -178,22 +298,30 @@ async def run_full_analysis(
 
				     summary_filename = f"{scope_prefix}_summary_完整流程_{model_short}.json"
			
 
				     summary_file = os.path.join(output_dir, summary_filename)
			
 
				 
			
 
				+    # 构建流程描述
			
 
				+    workflow = "Step1 + 搜索"
			
 
				+    if enable_step2:
			
 
				+        workflow += " + Step2"
			
 
				+
			
 
				     summary = {
			
 
				         "元数据": {
			
 
				             "current_time": current_time,
			
 
				             "log_url": log_url,
			
 
				-            "流程": "Step1 + Step2 完整分析",
			
 
				+            "流程": workflow,
			
 
				             "step1_model": step1_data.get("元数据", {}).get("model", ""),
			
 
				-            "step2_model": step2_data.get("元数据", {}).get("model", "")
			
 
				+            "step2_model": step2_data.get("元数据", {}).get("model", "") if enable_step2 and 'step2_data' in locals() else None
			
 
				         },
			
 
				         "灵感": inspiration,
			
 
				         "文件路径": {
			
 
				             "step1": step1_file,
			
 
				+            "search": search_file if 'search_file' in locals() else None,
			
 
				             "step2": step2_file
			
 
				         },
			
 
				         "关键指标": {
			
 
				             "step1_top1_score": step1_score,
			
 
				             "step1_top1_匹配要素": step1_element,
			
 
				+            "search_keyword": search_keyword if 'search_keyword' in locals() else None,
			
 
				+            "search_notes_count": search_notes_count if 'search_notes_count' in locals() else 0,
			
 
				             "step2_增量词数量": step2_word_count,
			
 
				             "step2_score": step2_score
			
 
				         }
			
@@ -206,12 +334,16 @@ async def run_full_analysis(
 
				     print(f"完整流程执行完成")
			
 
				     print(f"{'=' * 80}")
			
 
				     print(f"\n结果文件:")
			
 
				-    print(f"  Step1: {step1_file}")
			
 
				-    print(f"  Step2: {step2_file}")
			
 
				-    print(f"  汇总:  {summary_file}\n")
			
 
				+    print(f"  Step1:  {step1_file}")
			
 
				+    if 'search_file' in locals() and search_file:
			
 
				+        print(f"  搜索:   {search_file}")
			
 
				+    if enable_step2 and step2_file:
			
 
				+        print(f"  Step2:  {step2_file}")
			
 
				+    print(f"  汇总:   {summary_file}\n")
			
 
				 
			
 
				     return {
			
 
				         "step1_file": step1_file,
			
 
				+        "search_file": search_file if 'search_file' in locals() else None,
			
 
				         "step2_file": step2_file,
			
 
				         "summary_file": summary_file,
			
 
				         "status": "success"
			
@@ -222,16 +354,22 @@ async def main():
 
				     """主函数"""
			
 
				     # 解析命令行参数
			
 
				     parser = argparse.ArgumentParser(
			
 
				-        description="灵感分析主流程 (Step1 + Step2)",
			
 
				+        description="灵感分析主流程 (Step1 + 搜索 + Step2)",
			
 
				         formatter_class=argparse.RawDescriptionHelpFormatter,
			
 
				         epilog="""
			
 
				 使用示例:
			
 
				-  # 处理第1个灵感
			
 
				+  # 处理第1个灵感（Step1 + 搜索，默认不执行 Step2）
			
 
				   python run_inspiration_analysis.py --dir data/阿里多多酱/out/人设_1110 --count 1
			
 
				 
			
 
				+  # 启用 Step2 完整流程（Step1 + 搜索 + Step2）
			
 
				+  python run_inspiration_analysis.py --count 1 --enable-step2
			
 
				+
			
 
				   # 随机处理5个灵感
			
 
				   python run_inspiration_analysis.py --count 5 --shuffle
			
 
				 
			
 
				+  # 按 Step1 分数排序，处理前10个高分灵感
			
 
				+  python run_inspiration_analysis.py --count 10 --sort-by-score
			
 
				+
			
 
				   # 处理所有灵感，强制重新执行
			
 
				   python run_inspiration_analysis.py --count all --force
			
 
				 
			
@@ -271,17 +409,36 @@ async def main():
 
				         help="随机选择灵感，而不是按顺序"
			
 
				     )
			
 
				 
			
 
				+    parser.add_argument(
			
 
				+        "--sort-by-score",
			
 
				+        action="store_true",
			
 
				+        help="根据 Step1 结果分数排序（降序），优先处理高分灵感"
			
 
				+    )
			
 
				+
			
 
				+    parser.add_argument(
			
 
				+        "--enable-step2",
			
 
				+        action="store_true",
			
 
				+        help="启用 Step2 增量词匹配（默认关闭）"
			
 
				+    )
			
 
				+
			
 
				     args = parser.parse_args()
			
 
				 
			
 
				     persona_dir = args.dir
			
 
				     force = args.force
			
 
				     shuffle = args.shuffle
			
 
				+    sort_by_score = args.sort_by_score
			
 
				+    enable_step2 = args.enable_step2
			
 
				 
			
 
				     # 处理 max_tasks
			
 
				     max_tasks = None if args.max_tasks == "all" else int(args.max_tasks)
			
 
				 
			
 
				+    # 动态流程名称
			
 
				+    workflow_name = "Step1 + 搜索"
			
 
				+    if enable_step2:
			
 
				+        workflow_name += " + Step2"
			
 
				+
			
 
				     print(f"{'=' * 80}")
			
 
				-    print(f"灵感分析主流程 (Step1 + Step2)")
			
 
				+    print(f"灵感分析主流程 ({workflow_name})")
			
 
				     print(f"{'=' * 80}")
			
 
				     print(f"人设目录: {persona_dir}")
			
 
				 
			
@@ -305,8 +462,20 @@ async def main():
 
				     if shuffle:
			
 
				         print(f"随机模式: 随机选择灵感")
			
 
				 
			
 
				+    if sort_by_score:
			
 
				+        print(f"分数排序: 根据 Step1 结果按分数降序处理")
			
 
				+
			
 
				+    if enable_step2:
			
 
				+        print(f"Step2: 启用增量词匹配")
			
 
				+    else:
			
 
				+        print(f"Step2: 已关闭（使用 --enable-step2 启用）")
			
 
				+
			
 
				     # 选择要处理的灵感列表
			
 
				-    if shuffle:
			
 
				+    if sort_by_score:
			
 
				+        # 根据 Step1 结果分数排序
			
 
				+        sorted_list = sort_inspirations_by_score(persona_dir, inspiration_list, max_tasks)
			
 
				+        inspirations_to_process = sorted_list[:inspiration_count]
			
 
				+    elif shuffle:
			
 
				         # 随机打乱灵感列表后选择
			
 
				         shuffled_list = inspiration_list.copy()
			
 
				         random.shuffle(shuffled_list)
			
@@ -336,7 +505,8 @@ async def main():
 
				                 max_tasks=max_tasks,
			
 
				                 force=force,
			
 
				                 current_time=insp_time,
			
 
				-                log_url=insp_log_url
			
 
				+                log_url=insp_log_url,
			
 
				+                enable_step2=enable_step2
			
 
				             )
			
 
				 
			
 
				         results.append(result)
			
--- a/visualize_inspiration_points.py
+++ b/visualize_inspiration_points.py