3 viikkoa sitten · 6876717549
--- a/enhanced_search_v2.py
+++ b/enhanced_search_v2.py
@@ -20,6 +20,7 @@ from itertools import combinations
 
				 from openrouter_client import OpenRouterClient
			
 
				 from llm_evaluator import LLMEvaluator
			
 
				 from xiaohongshu_search import XiaohongshuSearch
			
 
				+from stage7_analyzer import Stage7DeconstructionAnalyzer
			
 
				 
			
 
				 # 配置日志
			
 
				 logging.basicConfig(
			
@@ -51,7 +52,15 @@ class EnhancedSearchV2:
 
				         max_searches_per_base_word: Optional[int] = None,
			
 
				         enable_stage6: bool = False,
			
 
				         stage6_max_workers: int = 10,
			
 
				-        stage6_max_notes: int = 20
			
 
				+        stage6_max_notes: int = 20,
			
 
				+        enable_stage7: bool = False,
			
 
				+        stage7_only: bool = False,
			
 
				+        stage7_max_workers: int = 5,
			
 
				+        stage7_max_notes: Optional[int] = None,
			
 
				+        stage7_skip: int = 0,
			
 
				+        stage7_sort_by: str = 'score',
			
 
				+        stage7_api_url: str = "http://192.168.245.150:7000/what/analysis/single",
			
 
				+        stage7_min_score: float = 8.0
			
 
				     ):
			
 
				         """
			
 
				         初始化系统
			
@@ -70,6 +79,14 @@ class EnhancedSearchV2:
 
				             enable_stage6: 是否启用Stage 6评估（默认False）
			
 
				             stage6_max_workers: Stage 6并发评估数（默认10）
			
 
				             stage6_max_notes: 每个搜索结果评估的最大帖子数（默认20）
			
 
				+            enable_stage7: 是否启用Stage 7深度解构（默认False）
			
 
				+            stage7_only: 只运行Stage 7（从Stage 6结果开始，默认False）
			
 
				+            stage7_max_workers: Stage 7并发数（默认5）
			
 
				+            stage7_max_notes: Stage 7最多处理多少个帖子（默认None不限制）
			
 
				+            stage7_skip: Stage 7跳过前N个帖子（默认0）
			
 
				+            stage7_sort_by: Stage 7排序方式：score/time/engagement（默认score）
			
 
				+            stage7_api_url: Stage 7解构API地址
			
 
				+            stage7_min_score: Stage 7处理的最低分数阈值（默认8.0）
			
 
				         """
			
 
				         self.how_json_path = how_json_path
			
 
				         self.dimension_associations_path = dimension_associations_path
			
@@ -83,6 +100,8 @@ class EnhancedSearchV2:
 
				         self.enable_stage6 = enable_stage6
			
 
				         self.stage6_max_workers = stage6_max_workers
			
 
				         self.stage6_max_notes = stage6_max_notes
			
 
				+        self.enable_stage7 = enable_stage7
			
 
				+        self.stage7_only = stage7_only
			
 
				 
			
 
				         # 创建输出目录
			
 
				         os.makedirs(output_dir, exist_ok=True)
			
@@ -103,6 +122,20 @@ class EnhancedSearchV2:
 
				         self.llm_evaluator = LLMEvaluator(self.openrouter_client)
			
 
				         self.search_client = XiaohongshuSearch()
			
 
				 
			
 
				+        # 初始化 Stage 7 分析器
			
 
				+        self.stage7_analyzer = Stage7DeconstructionAnalyzer(
			
 
				+            api_url=stage7_api_url,
			
 
				+            max_workers=stage7_max_workers,
			
 
				+            max_notes=stage7_max_notes,
			
 
				+            min_score=stage7_min_score,
			
 
				+            skip_count=stage7_skip,
			
 
				+            sort_by=stage7_sort_by,
			
 
				+            output_dir=output_dir,
			
 
				+            enable_image_download=False,  # 直接使用原始图片URL，不做代理
			
 
				+            image_server_url="http://localhost:8765",  # 图片服务器URL（已弃用）
			
 
				+            image_download_dir="downloaded_images"  # 图片下载目录（已弃用）
			
 
				+        )
			
 
				+
			
 
				         logger.info("系统初始化完成")
			
 
				 
			
 
				     def _load_json(self, file_path: str) -> Any:
			
@@ -1547,6 +1580,21 @@ class EnhancedSearchV2:
 
				         logger.info("=" * 60)
			
 
				 
			
 
				         try:
			
 
				+            # Stage 7 Only 模式：只运行 Stage 7
			
 
				+            if self.stage7_only:
			
 
				+                logger.info("运行模式: Stage 7 Only (从 Stage 6 结果开始)")
			
 
				+                stage6_path = os.path.join(self.output_dir, "stage6_with_evaluations.json")
			
 
				+
			
 
				+                if not os.path.exists(stage6_path):
			
 
				+                    raise FileNotFoundError(f"Stage 6 结果不存在: {stage6_path}")
			
 
				+
			
 
				+                with open(stage6_path, 'r', encoding='utf-8') as f:
			
 
				+                    stage6_results = json.load(f)
			
 
				+
			
 
				+                stage7_results = self.stage7_analyzer.run(stage6_results)
			
 
				+                return stage7_results
			
 
				+
			
 
				+            # 正常流程：从 Stage 1 开始
			
 
				             # 阶段1
			
 
				             stage1_results = self.stage1_filter_features()
			
 
				 
			
@@ -1575,11 +1623,17 @@ class EnhancedSearchV2:
 
				                 logger.info("阶段6：跳过（未启用）")
			
 
				                 logger.info("=" * 60)
			
 
				 
			
 
				-            # 阶段7 - 暂时切断执行（代码保留）
			
 
				-            # final_results = self.stage7_extended_searches(stage6_results, search_delay=2.0)
			
 
				+            # 阶段7 - 深度解构分析（条件执行）
			
 
				+            if self.enable_stage7:
			
 
				+                stage7_results = self.stage7_analyzer.run(stage6_results)
			
 
				+                final_results = stage7_results
			
 
				+            else:
			
 
				+                final_results = stage6_results
			
 
				 
			
 
				             logger.info("\n" + "=" * 60)
			
 
				-            if self.enable_stage6:
			
 
				+            if self.enable_stage7:
			
 
				+                logger.info("✓ 完整流程执行完成（Stage1-7）")
			
 
				+            elif self.enable_stage6:
			
 
				                 logger.info("✓ 完整流程执行完成（Stage1-6）")
			
 
				             else:
			
 
				                 logger.info("✓ 完整流程执行完成（Stage1-5）")
			
@@ -1612,7 +1666,7 @@ class EnhancedSearchV2:
 
				             except Exception as e:
			
 
				                 logger.error(f"可视化生成异常: {e}")
			
 
				 
			
 
				-            return stage5_results
			
 
				+            return final_results
			
 
				 
			
 
				         except Exception as e:
			
 
				             logger.error(f"流程执行失败: {e}")
			
@@ -1694,6 +1748,53 @@ def main():
 
				         default=20,
			
 
				         help='每个搜索结果评估的最大帖子数（默认20）'
			
 
				     )
			
 
				+    parser.add_argument(
			
 
				+        '--enable-stage7',
			
 
				+        action='store_true',
			
 
				+        help='启用 Stage 7 深度解构分析'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--stage7-only',
			
 
				+        action='store_true',
			
 
				+        help='只运行 Stage 7（从 Stage 6 结果开始）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--stage7-max-workers',
			
 
				+        type=int,
			
 
				+        default=5,
			
 
				+        help='Stage 7 并发数（默认5）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--stage7-max-notes',
			
 
				+        type=int,
			
 
				+        default=None,
			
 
				+        help='Stage 7 最多处理多少个完全匹配的帖子（默认None不限制）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--stage7-skip',
			
 
				+        type=int,
			
 
				+        default=0,
			
 
				+        help='Stage 7 跳过前 N 个完全匹配的帖子（默认0）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--stage7-sort-by',
			
 
				+        type=str,
			
 
				+        choices=['score', 'time', 'engagement'],
			
 
				+        default='score',
			
 
				+        help='Stage 7 排序方式: score(评分), time(时间), engagement(互动量)'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--stage7-api-url',
			
 
				+        type=str,
			
 
				+        default='http://192.168.245.150:7000/what/analysis/single',
			
 
				+        help='Stage 7 解构 API 地址'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--stage7-min-score',
			
 
				+        type=float,
			
 
				+        default=8.0,
			
 
				+        help='Stage 7 处理的最低分数阈值（默认8.0）'
			
 
				+    )
			
 
				 
			
 
				     args = parser.parse_args()
			
 
				 
			
@@ -1711,7 +1812,15 @@ def main():
 
				         max_searches_per_base_word=args.max_searches_per_base_word,
			
 
				         enable_stage6=args.enable_stage6,
			
 
				         stage6_max_workers=args.stage6_max_workers,
			
 
				-        stage6_max_notes=args.stage6_max_notes
			
 
				+        stage6_max_notes=args.stage6_max_notes,
			
 
				+        enable_stage7=args.enable_stage7,
			
 
				+        stage7_only=args.stage7_only,
			
 
				+        stage7_max_workers=args.stage7_max_workers,
			
 
				+        stage7_max_notes=args.stage7_max_notes,
			
 
				+        stage7_skip=args.stage7_skip,
			
 
				+        stage7_sort_by=args.stage7_sort_by,
			
 
				+        stage7_api_url=args.stage7_api_url,
			
 
				+        stage7_min_score=args.stage7_min_score
			
 
				     )
			
 
				 
			
 
				     # 执行完整流程
			
--- a/lib/README_async_utils.md
+++ b/lib/README_async_utils.md
@@ -0,0 +1,114 @@
 
				+# 异步并发处理工具
			
 
				+
			
 
				+## 文件说明
			
 
				+
			
 
				+`lib/async_utils.py` - 提供通用的异步任务并发执行功能
			
 
				+
			
 
				+## 功能列表
			
 
				+
			
 
				+### 1. `process_tasks_with_semaphore`
			
 
				+
			
 
				+基本的并发处理函数，使用信号量控制并发数量。
			
 
				+
			
 
				+#### 参数
			
 
				+
			
 
				+- `tasks`: 任务列表
			
 
				+- `process_func`: 处理单个任务的异步函数，签名为 `async def func(task, index) -> result`
			
 
				+- `max_concurrent`: 最大并发数（默认: 3）
			
 
				+- `show_progress`: 是否显示进度信息（默认: True）
			
 
				+
			
 
				+#### 使用示例
			
 
				+
			
 
				+```python
			
 
				+from lib.async_utils import process_tasks_with_semaphore
			
 
				+
			
 
				+# 定义处理单个任务的函数
			
 
				+async def process_one_task(task: dict, index: int) -> dict:
			
 
				+    # 你的处理逻辑
			
 
				+    result = await some_async_operation(task)
			
 
				+    return result
			
 
				+
			
 
				+# 准备任务列表
			
 
				+tasks = [task1, task2, task3, ...]
			
 
				+
			
 
				+# 并发处理所有任务
			
 
				+results = await process_tasks_with_semaphore(
			
 
				+    tasks,
			
 
				+    process_one_task,
			
 
				+    max_concurrent=3,
			
 
				+    show_progress=True
			
 
				+)
			
 
				+```
			
 
				+
			
 
				+### 2. `process_tasks_with_semaphore_retry`
			
 
				+
			
 
				+支持重试的并发处理函数，适用于不稳定的网络请求。
			
 
				+
			
 
				+#### 参数
			
 
				+
			
 
				+- `tasks`: 任务列表
			
 
				+- `process_func`: 处理单个任务的异步函数
			
 
				+- `max_concurrent`: 最大并发数（默认: 3）
			
 
				+- `max_retries`: 最大重试次数（默认: 3）
			
 
				+- `show_progress`: 是否显示进度信息（默认: True）
			
 
				+
			
 
				+#### 使用示例
			
 
				+
			
 
				+```python
			
 
				+from lib.async_utils import process_tasks_with_semaphore_retry
			
 
				+
			
 
				+# 定义可能失败的异步任务
			
 
				+async def unstable_task(task: dict, index: int) -> dict:
			
 
				+    # 可能会抛出异常的操作
			
 
				+    result = await api_call(task)
			
 
				+    return result
			
 
				+
			
 
				+# 并发处理，失败时自动重试
			
 
				+results = await process_tasks_with_semaphore_retry(
			
 
				+    tasks,
			
 
				+    unstable_task,
			
 
				+    max_concurrent=3,
			
 
				+    max_retries=3,
			
 
				+    show_progress=True
			
 
				+)
			
 
				+```
			
 
				+
			
 
				+## 在 match_inspiration_to_persona.py 中的使用
			
 
				+
			
 
				+```python
			
 
				+# 1. 导入工具
			
 
				+from lib.async_utils import process_tasks_with_semaphore
			
 
				+
			
 
				+# 2. 定义处理函数
			
 
				+async def process_match_task_with_error_handling(task: dict, index: int) -> dict:
			
 
				+    try:
			
 
				+        result = await match_single_task(task)
			
 
				+        return result
			
 
				+    except Exception as e:
			
 
				+        # 错误处理逻辑
			
 
				+        return error_result
			
 
				+
			
 
				+# 3. 并发处理任务
			
 
				+results = await process_tasks_with_semaphore(
			
 
				+    test_tasks,
			
 
				+    process_match_task_with_error_handling,
			
 
				+    max_concurrent=3,
			
 
				+    show_progress=True
			
 
				+)
			
 
				+```
			
 
				+
			
 
				+## 特点
			
 
				+
			
 
				+1. **通用性**: 可用于任何需要并发处理的异步任务
			
 
				+2. **并发控制**: 使用信号量控制并发数量，避免资源耗尽
			
 
				+3. **顺序保证**: 返回结果与输入任务的顺序一致
			
 
				+4. **进度显示**: 可选的进度显示功能
			
 
				+5. **重试支持**: 第二个函数支持自动重试机制
			
 
				+
			
 
				+## 适用场景
			
 
				+
			
 
				+- API 批量请求
			
 
				+- 文件批量处理
			
 
				+- 数据库批量操作
			
 
				+- LLM 批量推理
			
 
				+- 任何需要并发控制的异步操作
			
--- a/lib/README_semantic_similarity.md
+++ b/lib/README_semantic_similarity.md
@@ -0,0 +1,293 @@
 
				+# 语义相似度分析模块
			
 
				+
			
 
				+## 功能概述
			
 
				+
			
 
				+提供基于 AI Agent 的语义相似度分析功能，支持缓存机制以提高性能和降低 API 调用成本。
			
 
				+
			
 
				+## 主要功能
			
 
				+
			
 
				+### 1. 核心函数
			
 
				+
			
 
				+- `difference_between_phrases()` - 返回原始 AI 响应
			
 
				+- `difference_between_phrases_parsed()` - 返回解析后的 JSON 字典
			
 
				+- `compare_phrases()` - `difference_between_phrases_parsed()` 的别名
			
 
				+
			
 
				+### 2. 缓存系统设计
			
 
				+
			
 
				+#### 缓存文件名设计
			
 
				+
			
 
				+**方案：可读文件名 + 哈希后缀**
			
 
				+
			
 
				+```
			
 
				+cache/semantic_similarity/
			
 
				+├── 宿命感_vs_余华的小说_gpt-4.1-mini_t0.0_a7f3e2d9.json
			
 
				+├── 人工智能_vs_机器学习_claude-sonnet-4.5_t0.0_b8e4f3e0.json
			
 
				+├── 深度学习_vs_神经网络_gemini-2.5-pro_t0.2_c9f5g4h1.json
			
 
				+└── 创意写作_vs_AI生成_gpt-4.1-mini_t0.7_d0a6h5i2.json
			
 
				+```
			
 
				+
			
 
				+**文件名格式：**
			
 
				+```
			
 
				+{phrase_a}_vs_{phrase_b}_{model}_t{temp}_{hash[:8]}.json
			
 
				+```
			
 
				+
			
 
				+- `phrase_a`: 第一个短语（最长20字符，特殊字符转为下划线）
			
 
				+- `phrase_b`: 第二个短语（最长20字符，特殊字符转为下划线）
			
 
				+- `model`: 模型简称（提取 `/` 后部分，最长20字符）
			
 
				+- `t{temp}`: 温度参数（格式化为1位小数，如 t0.0, t0.2, t0.7）
			
 
				+- `hash[:8]`: 完整哈希的前8位
			
 
				+
			
 
				+**哈希生成逻辑：**
			
 
				+- 基于所有影响结果的参数生成唯一 MD5 哈希：
			
 
				+  - `phrase_a` - 第一个短语
			
 
				+  - `phrase_b` - 第二个短语
			
 
				+  - `model_name` - 模型名称
			
 
				+  - `temperature` - 温度参数
			
 
				+  - `max_tokens` - 最大 token 数
			
 
				+  - `prompt_template` - 提示词模板
			
 
				+
			
 
				+**缓存文件格式（结构化 JSON）：**
			
 
				+
			
 
				+```json
			
 
				+{
			
 
				+  "input": {
			
 
				+    "phrase_a": "宿命感",
			
 
				+    "phrase_b": "余华的小说",
			
 
				+    "model_name": "openai/gpt-4.1-mini",
			
 
				+    "temperature": 0.0,
			
 
				+    "max_tokens": 65536,
			
 
				+    "prompt_template": "从语意角度,判断【{phrase_a}】和【{phrase_b}】..."
			
 
				+  },
			
 
				+  "output": {
			
 
				+    "result": "{\n  \"说明\": \"...\",\n  \"相似度\": 0.75\n}"
			
 
				+  },
			
 
				+  "metadata": {
			
 
				+    "timestamp": "2025-11-19 14:30:45",
			
 
				+    "cache_key": "a7f3e2d9c1b4a5f8e6d7c9b2a1f3e5d7",
			
 
				+    "cache_file": "宿命感_vs_余华的小说_gpt-4.1-mini_t0.0_a7f3e2d9.json"
			
 
				+  }
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+#### 缓存特性
			
 
				+
			
 
				+1. **自动缓存**：默认启用，首次调用保存结果
			
 
				+2. **智能匹配**：相同参数自动从缓存读取
			
 
				+3. **可控性**：支持 `use_cache=False` 强制重新请求
			
 
				+4. **可追溯**：缓存文件包含完整元数据和时间戳
			
 
				+5. **自定义目录**：支持通过 `cache_dir` 参数自定义缓存位置
			
 
				+
			
 
				+## 使用示例
			
 
				+
			
 
				+### 基本使用（自动缓存）
			
 
				+
			
 
				+```python
			
 
				+from lib.semantic_similarity import compare_phrases
			
 
				+
			
 
				+# 第一次调用 - 请求 API 并缓存
			
 
				+result = await compare_phrases("宿命感", "余华的小说")
			
 
				+# 输出: ✓ 已缓存: 宿命感_vs_余华的小说_gpt-4.1-mini_t0.0_a7f3e2d9.json
			
 
				+
			
 
				+# 第二次调用相同参数 - 从缓存读取
			
 
				+result = await compare_phrases("宿命感", "余华的小说")
			
 
				+# 输出: ✓ 使用缓存: 宿命感_vs_余华的小说_t0.0_a7f3e2d9.json
			
 
				+
			
 
				+print(result['相似度'])  # 0.3
			
 
				+print(result['说明'])    # "两个概念..."
			
 
				+```
			
 
				+
			
 
				+### 禁用缓存
			
 
				+
			
 
				+```python
			
 
				+# 强制重新请求 API
			
 
				+result = await compare_phrases(
			
 
				+    "人工智能",
			
 
				+    "机器学习",
			
 
				+    use_cache=False
			
 
				+)
			
 
				+```
			
 
				+
			
 
				+### 自定义缓存目录
			
 
				+
			
 
				+```python
			
 
				+# 使用自定义缓存目录
			
 
				+result = await compare_phrases(
			
 
				+    "深度学习",
			
 
				+    "神经网络",
			
 
				+    cache_dir="my_cache/similarity"
			
 
				+)
			
 
				+```
			
 
				+
			
 
				+### 自定义提示词模板
			
 
				+
			
 
				+```python
			
 
				+custom_template = """
			
 
				+请详细分析【{phrase_a}】和【{phrase_b}】的语义关系
			
 
				+输出格式：
			
 
				+```json
			
 
				+{{
			
 
				+  "说明": "详细分析",
			
 
				+  "相似度": 0.5,
			
 
				+  "关系类型": "相关/包含/对立/无关"
			
 
				+}}
			
 
				+```
			
 
				+"""
			
 
				+
			
 
				+result = await compare_phrases(
			
 
				+    "机器学习",
			
 
				+    "深度学习",
			
 
				+    prompt_template=custom_template
			
 
				+)
			
 
				+```
			
 
				+
			
 
				+### 配置不同模型
			
 
				+
			
 
				+```python
			
 
				+# 使用 Claude 模型
			
 
				+result = await compare_phrases(
			
 
				+    "人工智能",
			
 
				+    "深度学习",
			
 
				+    model_name='anthropic/claude-sonnet-4.5',
			
 
				+    temperature=0.2
			
 
				+)
			
 
				+```
			
 
				+
			
 
				+## 缓存管理
			
 
				+
			
 
				+### 查看缓存
			
 
				+
			
 
				+```bash
			
 
				+# 查看缓存目录
			
 
				+ls cache/semantic_similarity/
			
 
				+
			
 
				+# 查看特定缓存文件
			
 
				+cat cache/semantic_similarity/a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6.json
			
 
				+```
			
 
				+
			
 
				+### 清理缓存
			
 
				+
			
 
				+```bash
			
 
				+# 清理所有缓存
			
 
				+rm -rf cache/semantic_similarity/
			
 
				+
			
 
				+# 清理特定缓存文件
			
 
				+rm cache/semantic_similarity/a1b2c3d4*.json
			
 
				+```
			
 
				+
			
 
				+### 缓存统计
			
 
				+
			
 
				+```python
			
 
				+from pathlib import Path
			
 
				+import json
			
 
				+
			
 
				+cache_dir = Path("cache/semantic_similarity")
			
 
				+cache_files = list(cache_dir.glob("*.json"))
			
 
				+
			
 
				+print(f"缓存文件总数: {len(cache_files)}")
			
 
				+
			
 
				+# 统计各模型使用情况
			
 
				+model_stats = {}
			
 
				+for file in cache_files:
			
 
				+    with open(file, 'r') as f:
			
 
				+        data = json.load(f)
			
 
				+        model = data.get('model_name', 'unknown')
			
 
				+        model_stats[model] = model_stats.get(model, 0) + 1
			
 
				+
			
 
				+print("各模型缓存数量:")
			
 
				+for model, count in model_stats.items():
			
 
				+    print(f"  {model}: {count}")
			
 
				+```
			
 
				+
			
 
				+## 参数说明
			
 
				+
			
 
				+### 所有函数共享参数
			
 
				+
			
 
				+| 参数 | 类型 | 默认值 | 说明 |
			
 
				+|------|------|--------|------|
			
 
				+| `phrase_a` | str | 必填 | 第一个短语 |
			
 
				+| `phrase_b` | str | 必填 | 第二个短语 |
			
 
				+| `model_name` | str | `'openai/gpt-4.1-mini'` | 使用的 AI 模型 |
			
 
				+| `temperature` | float | `0.0` | 温度参数（0.0-1.0） |
			
 
				+| `max_tokens` | int | `65536` | 最大生成 token 数 |
			
 
				+| `prompt_template` | str | `None` | 自定义提示词模板 |
			
 
				+| `use_cache` | bool | `True` | 是否启用缓存 |
			
 
				+| `cache_dir` | str | `'cache/semantic_similarity'` | 缓存目录路径 |
			
 
				+
			
 
				+### 支持的模型
			
 
				+
			
 
				+- `'google/gemini-2.5-pro'`
			
 
				+- `'anthropic/claude-sonnet-4.5'`
			
 
				+- `'google/gemini-2.0-flash-001'`
			
 
				+- `'openai/gpt-5-mini'`
			
 
				+- `'anthropic/claude-haiku-4.5'`
			
 
				+- `'openai/gpt-4.1-mini'` (默认)
			
 
				+
			
 
				+## 性能优化
			
 
				+
			
 
				+### 缓存命中率优化
			
 
				+
			
 
				+1. **参数标准化**：确保相同语义使用相同参数
			
 
				+2. **批量处理**：对相同短语对只调用一次
			
 
				+3. **预热缓存**：提前为常用短语对生成缓存
			
 
				+
			
 
				+### 示例：批量处理
			
 
				+
			
 
				+```python
			
 
				+phrase_pairs = [
			
 
				+    ("宿命感", "余华的小说"),
			
 
				+    ("人工智能", "机器学习"),
			
 
				+    ("深度学习", "神经网络"),
			
 
				+]
			
 
				+
			
 
				+for phrase_a, phrase_b in phrase_pairs:
			
 
				+    result = await compare_phrases(phrase_a, phrase_b)
			
 
				+    print(f"{phrase_a} vs {phrase_b}: {result['相似度']}")
			
 
				+```
			
 
				+
			
 
				+## 注意事项
			
 
				+
			
 
				+1. **参数敏感性**：任何参数变化都会导致新的缓存键
			
 
				+2. **存储空间**：长期使用可能积累大量缓存文件
			
 
				+3. **缓存一致性**：模型更新后建议清理旧缓存
			
 
				+4. **并发安全**：当前实现不支持并发写入同一缓存文件
			
 
				+
			
 
				+## 故障排查
			
 
				+
			
 
				+### 缓存未命中
			
 
				+
			
 
				+**问题**：相同参数调用但未使用缓存
			
 
				+
			
 
				+**可能原因**：
			
 
				+- 参数细微差异（如空格、换行）
			
 
				+- `prompt_template` 不一致
			
 
				+- 缓存文件损坏或被删除
			
 
				+
			
 
				+**解决方案**：
			
 
				+```python
			
 
				+# 检查缓存键
			
 
				+from lib.semantic_similarity import _generate_cache_key, DEFAULT_PROMPT_TEMPLATE
			
 
				+
			
 
				+key = _generate_cache_key(
			
 
				+    "宿命感", "余华的小说",
			
 
				+    "openai/gpt-4.1-mini", 0.0, 65536,
			
 
				+    DEFAULT_PROMPT_TEMPLATE
			
 
				+)
			
 
				+print(f"缓存键: {key}")
			
 
				+```
			
 
				+
			
 
				+### 缓存损坏
			
 
				+
			
 
				+**问题**：缓存文件存在但无法加载
			
 
				+
			
 
				+**解决方案**：
			
 
				+```bash
			
 
				+# 删除损坏的缓存文件
			
 
				+rm cache/semantic_similarity/{cache_key}.json
			
 
				+```
			
 
				+
			
 
				+## 版本历史
			
 
				+
			
 
				+- **v1.0** - 初始版本，支持基本语义相似度分析
			
 
				+- **v1.1** - 添加缓存系统
			
 
				+- **v1.2** - 支持自定义提示词模板
			
 
				+- **v1.3** - 优化缓存文件格式，添加元数据
			
--- a/lib/async_utils.py
+++ b/lib/async_utils.py
@@ -0,0 +1,99 @@
 
				+"""
			
 
				+异步并发处理工具模块
			
 
				+
			
 
				+提供通用的异步任务并发执行功能
			
 
				+"""
			
 
				+import asyncio
			
 
				+from typing import List, Callable, Any, Awaitable
			
 
				+
			
 
				+
			
 
				+async def process_tasks_with_semaphore(
			
 
				+        tasks: List[Any],
			
 
				+        process_func: Callable[[Any, int], Awaitable[Any]],
			
 
				+        max_concurrent: int = 3,
			
 
				+        show_progress: bool = True
			
 
				+) -> List[Any]:
			
 
				+    """使用信号量控制并发数量处理任务
			
 
				+
			
 
				+    Args:
			
 
				+        tasks: 任务列表
			
 
				+        process_func: 处理单个任务的异步函数，签名为 async def func(task, index) -> result
			
 
				+        max_concurrent: 最大并发数
			
 
				+        show_progress: 是否显示进度信息
			
 
				+
			
 
				+    Returns:
			
 
				+        结果列表（保持原始顺序）
			
 
				+
			
 
				+    Example:
			
 
				+        async def process_one(task, index):
			
 
				+            result = await some_async_operation(task)
			
 
				+            return result
			
 
				+
			
 
				+        tasks = [task1, task2, task3]
			
 
				+        results = await process_tasks_with_semaphore(tasks, process_one, max_concurrent=3)
			
 
				+    """
			
 
				+    semaphore = asyncio.Semaphore(max_concurrent)
			
 
				+
			
 
				+    async def process_with_semaphore(task: Any, index: int):
			
 
				+        """包装处理函数，添加信号量控制"""
			
 
				+        async with semaphore:
			
 
				+            result = await process_func(task, index)
			
 
				+            if show_progress:
			
 
				+                print(f"[{index + 1}/{len(tasks)}] 任务完成")
			
 
				+            return result
			
 
				+
			
 
				+    # 并发处理所有任务
			
 
				+    results = await asyncio.gather(
			
 
				+        *[process_with_semaphore(task, i) for i, task in enumerate(tasks)]
			
 
				+    )
			
 
				+
			
 
				+    return results
			
 
				+
			
 
				+
			
 
				+async def process_tasks_with_semaphore_retry(
			
 
				+        tasks: List[Any],
			
 
				+        process_func: Callable[[Any, int], Awaitable[Any]],
			
 
				+        max_concurrent: int = 3,
			
 
				+        max_retries: int = 3,
			
 
				+        show_progress: bool = True
			
 
				+) -> List[Any]:
			
 
				+    """使用信号量控制并发数量处理任务（支持重试）
			
 
				+
			
 
				+    Args:
			
 
				+        tasks: 任务列表
			
 
				+        process_func: 处理单个任务的异步函数，签名为 async def func(task, index) -> result
			
 
				+        max_concurrent: 最大并发数
			
 
				+        max_retries: 最大重试次数
			
 
				+        show_progress: 是否显示进度信息
			
 
				+
			
 
				+    Returns:
			
 
				+        结果列表（保持原始顺序）
			
 
				+    """
			
 
				+    semaphore = asyncio.Semaphore(max_concurrent)
			
 
				+
			
 
				+    async def process_with_semaphore_and_retry(task: Any, index: int):
			
 
				+        """包装处理函数，添加信号量控制和重试逻辑"""
			
 
				+        async with semaphore:
			
 
				+            for attempt in range(max_retries):
			
 
				+                try:
			
 
				+                    result = await process_func(task, index)
			
 
				+                    if show_progress:
			
 
				+                        print(f"[{index + 1}/{len(tasks)}] 任务完成")
			
 
				+                    return result
			
 
				+                except Exception as e:
			
 
				+                    if attempt < max_retries - 1:
			
 
				+                        if show_progress:
			
 
				+                            print(f"[{index + 1}/{len(tasks)}] 重试 {attempt + 1}/{max_retries - 1}: {e}")
			
 
				+                        await asyncio.sleep(1)  # 重试前等待1秒
			
 
				+                    else:
			
 
				+                        if show_progress:
			
 
				+                            print(f"[{index + 1}/{len(tasks)}] 失败（已重试 {max_retries} 次）: {e}")
			
 
				+                        raise
			
 
				+
			
 
				+    # 并发处理所有任务
			
 
				+    results = await asyncio.gather(
			
 
				+        *[process_with_semaphore_and_retry(task, i) for i, task in enumerate(tasks)],
			
 
				+        return_exceptions=True  # 返回异常而不是抛出
			
 
				+    )
			
 
				+
			
 
				+    return results
			
--- a/lib/batch_match_analyzer.py
+++ b/lib/batch_match_analyzer.py
@@ -0,0 +1,261 @@
 
				+"""
			
 
				+批量匹配分析模块
			
 
				+
			
 
				+分析单个特征与多个特征之间的语义匹配度（批量版本）
			
 
				+
			
 
				+提供接口：
			
 
				+analyze_batch_match(phrase_a, phrase_b_list, model_name) - 批量分析匹配度
			
 
				+
			
 
				+返回格式：
			
 
				+[
			
 
				+    {
			
 
				+        "特征": "...",
			
 
				+        "分数": 0.85,
			
 
				+        "说明": "..."
			
 
				+    },
			
 
				+    ...
			
 
				+]
			
 
				+"""
			
 
				+from typing import List
			
 
				+from agents import Agent, Runner, ModelSettings
			
 
				+from agents.tracing.create import custom_span
			
 
				+from lib.client import get_model
			
 
				+from lib.utils import parse_json_from_text
			
 
				+
			
 
				+
			
 
				+# ========== System Prompt ==========
			
 
				+BATCH_MATCH_SYSTEM_PROMPT = """
			
 
				+# 任务
			
 
				+分析单个特征 <A> 与多个特征 <B_List> 之间的语义匹配度。
			
 
				+
			
 
				+## 输入说明
			
 
				+- **<A></A>**: 待分析的特征（必选）
			
 
				+- **<B_List></B_List>**: 多个特征列表（必选）
			
 
				+
			
 
				+**重要**：
			
 
				+1. 必须在同一个评分标准下对所有 B 进行评分，确保分数可比
			
 
				+2. **优先识别并给出高分**给与 <A> 相似度最高的特征
			
 
				+3. 严格区分高相似度和低相似度，避免分数过于集中
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 评分标准（0-1分）
			
 
				+
			
 
				+**核心原则**：从 <B_List> 中找出与 <A> 最相似的特征，给予最高分，其他按相似度递减。
			
 
				+
			
 
				+- **0.9-1.0**：几乎完全相同（同义词、可互换）
			
 
				+- **0.7-0.9**：非常接近、高度相关（强关联、核心相关）
			
 
				+- **0.5-0.7**：有一定关联（中等相关、间接关联）
			
 
				+- **0.3-0.5**：关系较弱（弱相关、边缘关联）
			
 
				+- **0.0-0.3**：几乎无关或完全无关
			
 
				+
			
 
				+**评分策略**：
			
 
				+- 优先识别与 <A> 最相似的特征，给 0.7+ 高分
			
 
				+- 对明显无关的特征，果断给 0.0-0.3 低分
			
 
				+- 合理使用中间分数段，避免过度集中
			
 
				+- 确保分数有梯度，体现明确的相似度差异
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 输出格式（严格JSON数组）
			
 
				+
			
 
				+```json
			
 
				+[
			
 
				+  {
			
 
				+    "特征": "第一个B的特征",
			
 
				+    "分数": 0.85,
			
 
				+    "说明": "简要说明评分依据"
			
 
				+  },
			
 
				+  {
			
 
				+    "特征": "第二个B的特征",
			
 
				+    "分数": 0.45,
			
 
				+    "说明": "简要说明评分依据"
			
 
				+  }
			
 
				+]
			
 
				+```
			
 
				+
			
 
				+**输出要求**：
			
 
				+1. 数组长度必须等于 <B_List> 的长度，顺序一一对应
			
 
				+2. 分数必须是0-1之间的浮点数，保留2位小数
			
 
				+3. 所有评分必须使用相同的标准，分数之间可比
			
 
				+4. **必须有明显的分数梯度**，最相似的给高分，不相关的给低分
			
 
				+""".strip()
			
 
				+
			
 
				+
			
 
				+def create_batch_match_agent(model_name: str) -> Agent:
			
 
				+    """创建批量匹配分析的 Agent
			
 
				+
			
 
				+    Args:
			
 
				+        model_name: 模型名称
			
 
				+
			
 
				+    Returns:
			
 
				+        Agent 实例
			
 
				+    """
			
 
				+    agent = Agent(
			
 
				+        name="Batch Match Expert",
			
 
				+        instructions=BATCH_MATCH_SYSTEM_PROMPT,
			
 
				+        model=get_model(model_name),
			
 
				+        model_settings=ModelSettings(
			
 
				+            temperature=0.0,
			
 
				+            max_tokens=65536,
			
 
				+        ),
			
 
				+        tools=[],
			
 
				+    )
			
 
				+
			
 
				+    return agent
			
 
				+
			
 
				+
			
 
				+def clean_json_text(text: str) -> str:
			
 
				+    """清理JSON文本中的常见错误
			
 
				+
			
 
				+    Args:
			
 
				+        text: 原始JSON文本
			
 
				+
			
 
				+    Returns:
			
 
				+        清理后的JSON文本
			
 
				+    """
			
 
				+    import re
			
 
				+
			
 
				+    # 1. 移除数组元素之间的异常字符（如 trib{）
			
 
				+    # 匹配模式：逗号后面跟着任意非空白字符，直到遇到正常的对象开始 {
			
 
				+    text = re.sub(r',\s*[a-zA-Z]+\s*\{', r',\n  {', text)
			
 
				+
			
 
				+    # 2. 移除对象之间的异常字符
			
 
				+    text = re.sub(r'\}\s*[a-zA-Z]+\s*\{', r'},\n  {', text)
			
 
				+
			
 
				+    return text
			
 
				+
			
 
				+
			
 
				+def parse_batch_match_response(response_content: str) -> List[dict]:
			
 
				+    """解析批量匹配响应
			
 
				+
			
 
				+    Args:
			
 
				+        response_content: Agent 返回的响应内容
			
 
				+
			
 
				+    Returns:
			
 
				+        解析后的字典列表
			
 
				+    """
			
 
				+    try:
			
 
				+        # 使用 parse_json_from_text 函数进行健壮的 JSON 解析
			
 
				+        result = parse_json_from_text(response_content)
			
 
				+
			
 
				+        # 如果解析失败（返回空字典），尝试清理后再解析
			
 
				+        if not result:
			
 
				+            print(f"首次解析失败，尝试清理JSON文本后重新解析...")
			
 
				+            cleaned_text = clean_json_text(response_content)
			
 
				+            result = parse_json_from_text(cleaned_text)
			
 
				+
			
 
				+            # 如果清理后仍然失败
			
 
				+            if not result:
			
 
				+                print(f"清理后仍解析失败: 无法从响应中提取有效JSON")
			
 
				+                return [{
			
 
				+                    "特征": "",
			
 
				+                    "分数": 0.0,
			
 
				+                    "说明": "解析失败: 无法从响应中提取有效JSON"
			
 
				+                }]
			
 
				+
			
 
				+        # 确保返回的是列表
			
 
				+        if not isinstance(result, list):
			
 
				+            return [result]
			
 
				+
			
 
				+        return result
			
 
				+    except Exception as e:
			
 
				+        print(f"解析响应失败: {e}")
			
 
				+        return [{
			
 
				+            "特征": "",
			
 
				+            "分数": 0.0,
			
 
				+            "说明": f"解析失败: {str(e)}"
			
 
				+        }]
			
 
				+
			
 
				+
			
 
				+async def analyze_batch_match(
			
 
				+    phrase_a: str,
			
 
				+    phrase_b_list: List[str],
			
 
				+    model_name: str = None
			
 
				+) -> List[dict]:
			
 
				+    """批量分析匹配度
			
 
				+
			
 
				+    Args:
			
 
				+        phrase_a: 待分析的特征
			
 
				+        phrase_b_list: 多个特征列表
			
 
				+        model_name: 使用的模型名称（可选，默认使用 client.py 中的 MODEL_NAME）
			
 
				+
			
 
				+    Returns:
			
 
				+        匹配结果列表：[{"特征": "...", "分数": 0.85, "说明": "..."}, ...]
			
 
				+    """
			
 
				+    try:
			
 
				+        # 如果未指定模型，使用默认模型
			
 
				+        if model_name is None:
			
 
				+            from lib.client import MODEL_NAME
			
 
				+            model_name = MODEL_NAME
			
 
				+
			
 
				+        # 创建 Agent
			
 
				+        agent = create_batch_match_agent(model_name)
			
 
				+
			
 
				+        # 构建 B 列表字符串
			
 
				+        b_list_str = "\n".join([f"- {b}" for b in phrase_b_list])
			
 
				+
			
 
				+        # 构建任务描述
			
 
				+        task_description = f"""## 本次分析任务
			
 
				+
			
 
				+<A>
			
 
				+{phrase_a}
			
 
				+</A>
			
 
				+
			
 
				+<B_List>
			
 
				+{b_list_str}
			
 
				+</B_List>
			
 
				+
			
 
				+请分析 <A> 与 <B_List> 中每个特征的匹配度，输出 JSON 数组格式的结果。
			
 
				+重要：必须使用一致的评分标准！"""
			
 
				+
			
 
				+        # 构造消息
			
 
				+        messages = [{
			
 
				+            "role": "user",
			
 
				+            "content": [
			
 
				+                {
			
 
				+                    "type": "input_text",
			
 
				+                    "text": task_description
			
 
				+                }
			
 
				+            ]
			
 
				+        }]
			
 
				+
			
 
				+        # 使用 custom_span 追踪分析过程
			
 
				+        # 截断显示内容，避免 span name 过长
			
 
				+        a_short = (phrase_a[:30] + "...") if len(phrase_a) > 30 else phrase_a
			
 
				+
			
 
				+        with custom_span(
			
 
				+            name=f"批量匹配分析: {a_short} vs {len(phrase_b_list)}个特征",
			
 
				+            data={
			
 
				+                "phrase_a": phrase_a,
			
 
				+                "phrase_b_list": phrase_b_list,
			
 
				+                "b_count": len(phrase_b_list)
			
 
				+            }
			
 
				+        ):
			
 
				+            # 运行 Agent
			
 
				+            result = await Runner.run(agent, input=messages)
			
 
				+
			
 
				+        # 解析响应
			
 
				+        parsed_result = parse_batch_match_response(result.final_output)
			
 
				+
			
 
				+        # 验证返回的结果数量
			
 
				+        if len(parsed_result) != len(phrase_b_list):
			
 
				+            print(f"警告: 返回结果数量 ({len(parsed_result)}) 与输入数量 ({len(phrase_b_list)}) 不匹配")
			
 
				+            # 补齐或截断
			
 
				+            while len(parsed_result) < len(phrase_b_list):
			
 
				+                parsed_result.append({
			
 
				+                    "特征": phrase_b_list[len(parsed_result)],
			
 
				+                    "分数": 0.0,
			
 
				+                    "说明": "结果数量不匹配，自动补齐"
			
 
				+                })
			
 
				+            parsed_result = parsed_result[:len(phrase_b_list)]
			
 
				+
			
 
				+        return parsed_result
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        # 返回错误信息（为每个 B 创建一个错误条目）
			
 
				+        return [{
			
 
				+            "特征": b,
			
 
				+            "分数": 0.0,
			
 
				+            "说明": f"分析过程出错: {str(e)}"
			
 
				+        } for b in phrase_b_list]
			
--- a/lib/client.py
+++ b/lib/client.py
@@ -0,0 +1,17 @@
 
				+
			
 
				+from agents import Agent, Runner, OpenAIChatCompletionsModel
			
 
				+from openai import AsyncOpenAI
			
 
				+import os
			
 
				+BASE_URL = os.getenv("EXAMPLE_BASE_URL") or "https://openrouter.ai/api/v1"
			
 
				+API_KEY = os.getenv("OPENROUTER_API_KEY")
			
 
				+MODEL_NAME = "google/gemini-2.5-flash"
			
 
				+client = AsyncOpenAI(
			
 
				+    base_url=BASE_URL,
			
 
				+    api_key=API_KEY,
			
 
				+    max_retries=5,
			
 
				+)
			
 
				+def get_model(model_name=MODEL_NAME):
			
 
				+    return OpenAIChatCompletionsModel(
			
 
				+        openai_client=client,
			
 
				+        model=model_name,
			
 
				+    )
			
--- a/lib/config.py
+++ b/lib/config.py
@@ -0,0 +1,189 @@
 
				+#!/usr/bin/env python3
			
 
				+"""
			
 
				+配置模块 - 统一管理项目配置
			
 
				+"""
			
 
				+import os
			
 
				+from pathlib import Path
			
 
				+from typing import Optional
			
 
				+
			
 
				+
			
 
				+class Config:
			
 
				+    """项目配置类"""
			
 
				+
			
 
				+    # 默认缓存根目录（用户主目录下的 cache）
			
 
				+    _DEFAULT_CACHE_ROOT = os.path.expanduser("~/cache")
			
 
				+
			
 
				+    # 缓存根目录
			
 
				+    _cache_root: Optional[str] = None
			
 
				+
			
 
				+    @classmethod
			
 
				+    def get_cache_root(cls) -> str:
			
 
				+        """
			
 
				+        获取缓存根目录
			
 
				+
			
 
				+        Returns:
			
 
				+            缓存根目录路径
			
 
				+        """
			
 
				+        if cls._cache_root is None:
			
 
				+            # 1. 优先从环境变量读取
			
 
				+            cache_root = os.environ.get("CACHE_ROOT")
			
 
				+            if cache_root:
			
 
				+                cls._cache_root = cache_root
			
 
				+            else:
			
 
				+                # 2. 使用默认路径
			
 
				+                cls._cache_root = cls._DEFAULT_CACHE_ROOT
			
 
				+
			
 
				+        return cls._cache_root
			
 
				+
			
 
				+    @classmethod
			
 
				+    def set_cache_root(cls, path: str) -> None:
			
 
				+        """
			
 
				+        设置缓存根目录
			
 
				+
			
 
				+        Args:
			
 
				+            path: 缓存根目录路径（可以是绝对路径或相对路径）
			
 
				+        """
			
 
				+        cls._cache_root = path
			
 
				+
			
 
				+    @classmethod
			
 
				+    def get_cache_dir(cls, subdir: str) -> str:
			
 
				+        """
			
 
				+        获取特定子模块的缓存目录
			
 
				+
			
 
				+        Args:
			
 
				+            subdir: 子目录名称，如：
			
 
				+                - "text_embedding", "semantic_similarity" - 计算缓存
			
 
				+                - "data/search", "data/detail" - 爬虫数据缓存
			
 
				+                - "data/analysis" - 分析结果缓存
			
 
				+
			
 
				+        Returns:
			
 
				+            完整的缓存目录路径
			
 
				+        """
			
 
				+        cache_root = cls.get_cache_root()
			
 
				+        return str(Path(cache_root) / subdir)
			
 
				+
			
 
				+    @classmethod
			
 
				+    def get_data_dir(cls, subdir: str = "") -> str:
			
 
				+        """
			
 
				+        获取数据缓存目录（data 目录现在在缓存根目录下）
			
 
				+
			
 
				+        Args:
			
 
				+            subdir: 子目录名称，如 "search", "detail", "tools_list" 等
			
 
				+                   如果为空字符串，返回 data 根目录
			
 
				+
			
 
				+        Returns:
			
 
				+            完整的数据目录路径
			
 
				+
			
 
				+        Note:
			
 
				+            data 目录现在统一放在缓存根目录下：
			
 
				+            - 默认：cache/data/
			
 
				+            - 如果设置了 CACHE_ROOT=/custom: /custom/data/
			
 
				+        """
			
 
				+        cache_root = cls.get_cache_root()
			
 
				+        if subdir:
			
 
				+            return str(Path(cache_root) / "data" / subdir)
			
 
				+        return str(Path(cache_root) / "data")
			
 
				+
			
 
				+    @classmethod
			
 
				+    def reset(cls) -> None:
			
 
				+        """
			
 
				+        重置配置为默认值（主要用于测试）
			
 
				+        """
			
 
				+        cls._cache_root = None
			
 
				+
			
 
				+
			
 
				+# 便捷函数
			
 
				+def get_cache_root() -> str:
			
 
				+    """获取缓存根目录"""
			
 
				+    return Config.get_cache_root()
			
 
				+
			
 
				+
			
 
				+def set_cache_root(path: str) -> None:
			
 
				+    """设置缓存根目录"""
			
 
				+    Config.set_cache_root(path)
			
 
				+
			
 
				+
			
 
				+def get_cache_dir(subdir: str) -> str:
			
 
				+    """获取特定子模块的缓存目录"""
			
 
				+    return Config.get_cache_dir(subdir)
			
 
				+
			
 
				+
			
 
				+def get_data_dir(subdir: str = "") -> str:
			
 
				+    """
			
 
				+    获取数据缓存目录
			
 
				+
			
 
				+    Note: data 目录现在在缓存根目录下，例如 cache/data/
			
 
				+    """
			
 
				+    return Config.get_data_dir(subdir)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    print("=" * 60)
			
 
				+    print("配置模块示例")
			
 
				+    print("=" * 60)
			
 
				+    print()
			
 
				+
			
 
				+    # 示例 1: 使用默认配置
			
 
				+    print("示例 1: 默认配置")
			
 
				+    print(f"缓存根目录: {get_cache_root()}")
			
 
				+    print(f"text_embedding 缓存: {get_cache_dir('text_embedding')}")
			
 
				+    print(f"semantic_similarity 缓存: {get_cache_dir('semantic_similarity')}")
			
 
				+    print()
			
 
				+
			
 
				+    # 示例 2: 自定义缓存根目录
			
 
				+    print("示例 2: 自定义缓存根目录")
			
 
				+    set_cache_root("/tmp/my_cache")
			
 
				+    print(f"缓存根目录: {get_cache_root()}")
			
 
				+    print(f"text_embedding 缓存: {get_cache_dir('text_embedding')}")
			
 
				+    print(f"semantic_similarity 缓存: {get_cache_dir('semantic_similarity')}")
			
 
				+    print()
			
 
				+
			
 
				+    # 示例 3: 使用相对路径
			
 
				+    print("示例 3: 使用相对路径")
			
 
				+    set_cache_root("data/cache")
			
 
				+    print(f"缓存根目录: {get_cache_root()}")
			
 
				+    print(f"text_embedding 缓存: {get_cache_dir('text_embedding')}")
			
 
				+    print()
			
 
				+
			
 
				+    # 示例 4: 通过环境变量设置
			
 
				+    print("示例 4: 通过环境变量设置")
			
 
				+    Config.reset()  # 重置配置
			
 
				+    os.environ["CACHE_ROOT"] = "/Users/semsevens/Desktop/workspace/daily/1113/how_1120_v3/cache"
			
 
				+    print(f"缓存根目录: {get_cache_root()}")
			
 
				+    print(f"text_embedding 缓存: {get_cache_dir('text_embedding')}")
			
 
				+    print()
			
 
				+
			
 
				+    # 示例 5: 数据目录配置（在缓存根目录下）
			
 
				+    print("示例 5: 数据目录配置（在缓存根目录下）")
			
 
				+    Config.reset()  # 重置配置
			
 
				+    print(f"缓存根目录: {get_cache_root()}")
			
 
				+    print(f"data 目录: {get_data_dir()}")
			
 
				+    print(f"search 数据: {get_data_dir('search')}")
			
 
				+    print(f"detail 数据: {get_data_dir('detail')}")
			
 
				+    print()
			
 
				+
			
 
				+    # 示例 6: 设置缓存根目录后，data 也会跟着变
			
 
				+    print("示例 6: 设置缓存根目录后，data 也会跟着变")
			
 
				+    set_cache_root("/custom/cache")
			
 
				+    print(f"缓存根目录: {get_cache_root()}")
			
 
				+    print(f"data 目录: {get_data_dir()}")
			
 
				+    print(f"search 数据: {get_data_dir('search')}")
			
 
				+    print()
			
 
				+
			
 
				+    print("=" * 60)
			
 
				+    print("使用方法:")
			
 
				+    print("-" * 60)
			
 
				+    print("缓存根目录:")
			
 
				+    print("  1. 默认使用 'cache' 目录")
			
 
				+    print("  2. 通过代码设置: set_cache_root('/path/to/cache')")
			
 
				+    print("  3. 通过环境变量: export CACHE_ROOT=/path/to/cache")
			
 
				+    print()
			
 
				+    print("目录结构:")
			
 
				+    print("  cache/")
			
 
				+    print("    ├── text_embedding/          # 向量相似度缓存")
			
 
				+    print("    ├── semantic_similarity/     # 语义相似度缓存")
			
 
				+    print("    └── data/                    # 数据缓存（原 data 目录）")
			
 
				+    print("        ├── search/              # 搜索数据")
			
 
				+    print("        ├── detail/              # 详情数据")
			
 
				+    print("        └── analysis/            # 分析结果")
			
 
				+    print("=" * 60)
			
--- a/lib/data_loader.py
+++ b/lib/data_loader.py
@@ -0,0 +1,155 @@
 
				+"""
			
 
				+通用数据加载模块
			
 
				+
			
 
				+提供项目中常用的数据加载函数
			
 
				+"""
			
 
				+import os
			
 
				+import sys
			
 
				+from typing import List
			
 
				+from lib.utils import read_json
			
 
				+
			
 
				+
			
 
				+def load_persona_data(persona_dir: str) -> dict:
			
 
				+    """加载人设数据
			
 
				+
			
 
				+    Args:
			
 
				+        persona_dir: 人设目录路径
			
 
				+
			
 
				+    Returns:
			
 
				+        人设数据字典
			
 
				+
			
 
				+    Raises:
			
 
				+        SystemExit: 文件不存在时退出
			
 
				+    """
			
 
				+    persona_data_path = os.path.join(persona_dir, "人设.json")
			
 
				+    try:
			
 
				+        return read_json(persona_data_path)
			
 
				+    except FileNotFoundError:
			
 
				+        print(f"❌ 找不到人设数据文件: {persona_data_path}")
			
 
				+        print(f"请检查路径是否正确: {persona_dir}")
			
 
				+        sys.exit(1)
			
 
				+
			
 
				+
			
 
				+def load_inspiration_list(persona_dir: str) -> List[str]:
			
 
				+    """加载灵感点列表（简化版本，仅包含名称）
			
 
				+
			
 
				+    Args:
			
 
				+        persona_dir: 人设目录路径
			
 
				+
			
 
				+    Returns:
			
 
				+        灵感点文本列表
			
 
				+
			
 
				+    Raises:
			
 
				+        SystemExit: 文件不存在或格式错误时退出
			
 
				+    """
			
 
				+    inspiration_list_path = os.path.join(persona_dir, "灵感点.json")
			
 
				+    try:
			
 
				+        inspiration_list = read_json(inspiration_list_path)
			
 
				+        if not isinstance(inspiration_list, list) or len(inspiration_list) == 0:
			
 
				+            print(f"❌ 灵感文件格式错误或为空: {inspiration_list_path}")
			
 
				+            sys.exit(1)
			
 
				+        # 直接返回字符串列表（简化版本）
			
 
				+        return inspiration_list
			
 
				+    except FileNotFoundError:
			
 
				+        print(f"❌ 找不到灵感文件: {inspiration_list_path}")
			
 
				+        print("请先运行 extract_inspirations.py 生成灵感点文件")
			
 
				+        sys.exit(1)
			
 
				+
			
 
				+
			
 
				+def load_inspiration_data(persona_dir: str) -> List[dict]:
			
 
				+    """加载完整的灵感点数据（包含 meta 信息）
			
 
				+
			
 
				+    Args:
			
 
				+        persona_dir: 人设目录路径
			
 
				+
			
 
				+    Returns:
			
 
				+        灵感点数据列表，每项包含 {"灵感点": str, "meta": dict}
			
 
				+
			
 
				+    Raises:
			
 
				+        SystemExit: 文件不存在或格式错误时退出
			
 
				+    """
			
 
				+    inspiration_detail_path = os.path.join(persona_dir, "灵感点_详细.json")
			
 
				+    try:
			
 
				+        inspiration_data = read_json(inspiration_detail_path)
			
 
				+        if not isinstance(inspiration_data, list) or len(inspiration_data) == 0:
			
 
				+            print(f"❌ 灵感详细文件格式错误或为空: {inspiration_detail_path}")
			
 
				+            sys.exit(1)
			
 
				+        return inspiration_data
			
 
				+    except FileNotFoundError:
			
 
				+        print(f"❌ 找不到灵感详细文件: {inspiration_detail_path}")
			
 
				+        print("请先运行 extract_inspirations.py 生成灵感点文件")
			
 
				+        sys.exit(1)
			
 
				+
			
 
				+
			
 
				+def select_inspiration(inspiration_arg: str, inspiration_list: List[str]) -> str:
			
 
				+    """根据参数选择灵感
			
 
				+
			
 
				+    Args:
			
 
				+        inspiration_arg: 灵感参数（数字索引或灵感名称）
			
 
				+        inspiration_list: 灵感点文本列表
			
 
				+
			
 
				+    Returns:
			
 
				+        选中的灵感点文本
			
 
				+
			
 
				+    Raises:
			
 
				+        SystemExit: 选择失败时退出
			
 
				+    """
			
 
				+    try:
			
 
				+        # 尝试作为索引解析
			
 
				+        inspiration_index = int(inspiration_arg)
			
 
				+        if 0 <= inspiration_index < len(inspiration_list):
			
 
				+            inspiration = inspiration_list[inspiration_index]
			
 
				+            print(f"使用灵感[{inspiration_index}]: {inspiration}")
			
 
				+            return inspiration
			
 
				+        else:
			
 
				+            print(f"❌ 灵感索引超出范围: {inspiration_index} (有效范围: 0-{len(inspiration_list)-1})")
			
 
				+    except ValueError:
			
 
				+        # 不是数字，当作灵感名称
			
 
				+        if inspiration_arg in inspiration_list:
			
 
				+            print(f"使用灵感: {inspiration_arg}")
			
 
				+            return inspiration_arg
			
 
				+        else:
			
 
				+            print(f"❌ 找不到灵感: {inspiration_arg}")
			
 
				+
			
 
				+    # 显示可用灵感列表后退出
			
 
				+    print(f"可用灵感列表:")
			
 
				+    for i, insp in enumerate(inspiration_list[:10]):
			
 
				+        print(f"  {i}: {insp}")
			
 
				+    if len(inspiration_list) > 10:
			
 
				+        print(f"  ... 还有 {len(inspiration_list) - 10} 个")
			
 
				+    sys.exit(1)
			
 
				+
			
 
				+
			
 
				+def load_step1_result(persona_dir: str, inspiration: str, model_name: str, scope: str = "all") -> dict:
			
 
				+    """加载 step1 匹配结果
			
 
				+
			
 
				+    Args:
			
 
				+        persona_dir: 人设目录路径
			
 
				+        inspiration: 灵感点名称
			
 
				+        model_name: 模型名称（如 "google/gemini-2.5-pro"）
			
 
				+        scope: 范围标识（"all" 或 "top10" 等）
			
 
				+
			
 
				+    Returns:
			
 
				+        step1 结果字典
			
 
				+
			
 
				+    Raises:
			
 
				+        SystemExit: 文件不存在时退出
			
 
				+    """
			
 
				+    # 提取模型简称
			
 
				+    model_name_short = model_name.replace("google/", "").replace("/", "_")
			
 
				+
			
 
				+    # 构建文件路径
			
 
				+    step1_file = os.path.join(
			
 
				+        persona_dir,
			
 
				+        "how",
			
 
				+        "灵感点",
			
 
				+        inspiration,
			
 
				+        f"{scope}_step1_灵感人设匹配_{model_name_short}.json"
			
 
				+    )
			
 
				+
			
 
				+    try:
			
 
				+        return read_json(step1_file)
			
 
				+    except FileNotFoundError:
			
 
				+        print(f"❌ 找不到 step1 结果文件: {step1_file}")
			
 
				+        print(f"请先运行 step1_inspiration_match.py 生成结果")
			
 
				+        sys.exit(1)
			
--- a/lib/hybrid_similarity.py
+++ b/lib/hybrid_similarity.py
@@ -0,0 +1,341 @@
 
				+#!/usr/bin/env python3
			
 
				+"""
			
 
				+混合相似度计算模块
			
 
				+结合向量模型（text_embedding）和LLM模型（semantic_similarity）的结果
			
 
				+
			
 
				+提供2种接口：
			
 
				+1. compare_phrases() - 单对计算
			
 
				+2. compare_phrases_cartesian() - 笛卡尔积批量计算 (M×N)
			
 
				+"""
			
 
				+
			
 
				+from typing import Dict, Any, Optional, List
			
 
				+import asyncio
			
 
				+import numpy as np
			
 
				+from lib.text_embedding import compare_phrases as compare_phrases_embedding
			
 
				+from lib.text_embedding_api import compare_phrases_cartesian as compare_phrases_cartesian_api
			
 
				+from lib.semantic_similarity import compare_phrases as compare_phrases_semantic
			
 
				+from lib.semantic_similarity import compare_phrases_cartesian as compare_phrases_cartesian_semantic
			
 
				+from lib.config import get_cache_dir
			
 
				+
			
 
				+
			
 
				+async def compare_phrases(
			
 
				+    phrase_a: str,
			
 
				+    phrase_b: str,
			
 
				+    weight_embedding: float = 0.5,
			
 
				+    weight_semantic: float = 0.5,
			
 
				+    embedding_model: str = "chinese",
			
 
				+    semantic_model: str = 'openai/gpt-4.1-mini',
			
 
				+    use_cache: bool = True,
			
 
				+    cache_dir_embedding: Optional[str] = None,
			
 
				+    cache_dir_semantic: Optional[str] = None,
			
 
				+    **semantic_kwargs
			
 
				+) -> Dict[str, Any]:
			
 
				+    """
			
 
				+    混合相似度计算：同时使用向量模型和LLM模型，按权重组合结果
			
 
				+
			
 
				+    Args:
			
 
				+        phrase_a: 第一个短语
			
 
				+        phrase_b: 第二个短语
			
 
				+        weight_embedding: 向量模型权重，默认 0.5
			
 
				+        weight_semantic: LLM模型权重，默认 0.5
			
 
				+        embedding_model: 向量模型名称，默认 "chinese"
			
 
				+        semantic_model: LLM模型名称，默认 'openai/gpt-4.1-mini'
			
 
				+        use_cache: 是否使用缓存，默认 True
			
 
				+        cache_dir_embedding: 向量模型缓存目录，默认从配置读取
			
 
				+        cache_dir_semantic: LLM模型缓存目录，默认从配置读取
			
 
				+        **semantic_kwargs: 其他传递给semantic_similarity的参数
			
 
				+            - temperature: 温度参数，默认 0.0
			
 
				+            - max_tokens: 最大token数，默认 65536
			
 
				+            - prompt_template: 自定义提示词模板
			
 
				+            - instructions: Agent系统指令
			
 
				+            - tools: Agent工具列表
			
 
				+            - name: Agent名称
			
 
				+
			
 
				+    Returns:
			
 
				+        {
			
 
				+            "相似度": float,           # 加权平均后的相似度 (0-1)
			
 
				+            "说明": str               # 综合说明（包含各模型的分数和说明）
			
 
				+        }
			
 
				+
			
 
				+    Examples:
			
 
				+        >>> # 使用默认权重 (0.5:0.5)
			
 
				+        >>> result = await compare_phrases("深度学习", "神经网络")
			
 
				+        >>> print(result['相似度'])  # 加权平均后的相似度
			
 
				+        0.82
			
 
				+
			
 
				+        >>> # 自定义权重，更倾向向量模型
			
 
				+        >>> result = await compare_phrases(
			
 
				+        ...     "深度学习", "神经网络",
			
 
				+        ...     weight_embedding=0.7,
			
 
				+        ...     weight_semantic=0.3
			
 
				+        ... )
			
 
				+
			
 
				+        >>> # 使用不同的模型
			
 
				+        >>> result = await compare_phrases(
			
 
				+        ...     "深度学习", "神经网络",
			
 
				+        ...     embedding_model="multilingual",
			
 
				+        ...     semantic_model="anthropic/claude-sonnet-4.5"
			
 
				+        ... )
			
 
				+    """
			
 
				+    # 验证权重
			
 
				+    total_weight = weight_embedding + weight_semantic
			
 
				+    if abs(total_weight - 1.0) > 0.001:
			
 
				+        raise ValueError(f"权重之和必须为1.0，当前为: {total_weight}")
			
 
				+
			
 
				+    # 使用配置的缓存目录（如果未指定）
			
 
				+    if cache_dir_embedding is None:
			
 
				+        cache_dir_embedding = get_cache_dir("text_embedding")
			
 
				+    if cache_dir_semantic is None:
			
 
				+        cache_dir_semantic = get_cache_dir("semantic_similarity")
			
 
				+
			
 
				+    # 并发调用两个模型
			
 
				+    embedding_task = asyncio.to_thread(
			
 
				+        compare_phrases_embedding,
			
 
				+        phrase_a=phrase_a,
			
 
				+        phrase_b=phrase_b,
			
 
				+        model_name=embedding_model,
			
 
				+        use_cache=use_cache,
			
 
				+        cache_dir=cache_dir_embedding
			
 
				+    )
			
 
				+
			
 
				+    semantic_task = compare_phrases_semantic(
			
 
				+        phrase_a=phrase_a,
			
 
				+        phrase_b=phrase_b,
			
 
				+        model_name=semantic_model,
			
 
				+        use_cache=use_cache,
			
 
				+        cache_dir=cache_dir_semantic,
			
 
				+        **semantic_kwargs
			
 
				+    )
			
 
				+
			
 
				+    # 等待两个任务完成
			
 
				+    embedding_result, semantic_result = await asyncio.gather(
			
 
				+        embedding_task,
			
 
				+        semantic_task
			
 
				+    )
			
 
				+
			
 
				+    # 提取相似度分数
			
 
				+    score_embedding = embedding_result.get("相似度", 0.0)
			
 
				+    score_semantic = semantic_result.get("相似度", 0.0)
			
 
				+
			
 
				+    # 计算加权平均
			
 
				+    final_score = (
			
 
				+        score_embedding * weight_embedding +
			
 
				+        score_semantic * weight_semantic
			
 
				+    )
			
 
				+
			
 
				+    # 生成综合说明（格式化为清晰的结构）
			
 
				+    explanation = (
			
 
				+        f"【混合相似度】{final_score:.3f}（向量模型权重{weight_embedding}，LLM模型权重{weight_semantic}）\n\n"
			
 
				+        f"【向量模型】相似度={score_embedding:.3f}\n"
			
 
				+        f"{embedding_result.get('说明', 'N/A')}\n\n"
			
 
				+        f"【LLM模型】相似度={score_semantic:.3f}\n"
			
 
				+        f"{semantic_result.get('说明', 'N/A')}"
			
 
				+    )
			
 
				+
			
 
				+    # 构建返回结果（与原接口完全一致）
			
 
				+    return {
			
 
				+        "相似度": final_score,
			
 
				+        "说明": explanation
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+async def compare_phrases_cartesian(
			
 
				+    phrases_a: List[str],
			
 
				+    phrases_b: List[str],
			
 
				+    max_concurrent: int = 50,
			
 
				+    progress_callback: Optional[callable] = None
			
 
				+) -> List[List[Dict[str, Any]]]:
			
 
				+    """
			
 
				+    混合相似度笛卡尔积批量计算：M×N矩阵
			
 
				+
			
 
				+    结合向量模型API笛卡尔积（快速）和LLM并发调用（已优化）
			
 
				+    使用默认权重：向量0.5，LLM 0.5
			
 
				+
			
 
				+    Args:
			
 
				+        phrases_a: 第一组短语列表（M个）
			
 
				+        phrases_b: 第二组短语列表（N个）
			
 
				+        max_concurrent: 最大并发数，默认50（控制LLM调用并发）
			
 
				+        progress_callback: 进度回调函数，每完成一个LLM任务时调用
			
 
				+
			
 
				+    Returns:
			
 
				+        嵌套列表 List[List[Dict]]，每个Dict包含完整结果
			
 
				+        results[i][j] = {
			
 
				+            "相似度": float,  # 混合相似度
			
 
				+            "说明": str       # 包含向量和LLM的详细说明
			
 
				+        }
			
 
				+
			
 
				+    Examples:
			
 
				+        >>> results = await compare_phrases_cartesian(
			
 
				+        ...     ["深度学习"],
			
 
				+        ...     ["神经网络", "Python"]
			
 
				+        ... )
			
 
				+        >>> print(results[0][0]['相似度'])  # 混合相似度
			
 
				+        >>> print(results[0][1]['说明'])    # 完整说明
			
 
				+
			
 
				+        >>> # 使用进度回调
			
 
				+        >>> def on_progress(count):
			
 
				+        ...     print(f"完成 {count} 个任务")
			
 
				+        >>> results = await compare_phrases_cartesian(
			
 
				+        ...     ["深度学习"],
			
 
				+        ...     ["神经网络", "Python"],
			
 
				+        ...     max_concurrent=100,
			
 
				+        ...     progress_callback=on_progress
			
 
				+        ... )
			
 
				+    """
			
 
				+    # 参数验证
			
 
				+    if not phrases_a or not phrases_b:
			
 
				+        return [[]]
			
 
				+
			
 
				+    M, N = len(phrases_a), len(phrases_b)
			
 
				+
			
 
				+    # 默认权重
			
 
				+    weight_embedding = 0.5
			
 
				+    weight_semantic = 0.5
			
 
				+
			
 
				+    # 串行执行两个任务（向量模型快，先执行；避免并发死锁）
			
 
				+    # 1. 向量模型：使用API笛卡尔积（一次调用获取M×N完整结果，通常1-2秒）
			
 
				+    import time
			
 
				+    start_time = time.time()
			
 
				+    embedding_results = await asyncio.to_thread(
			
 
				+        compare_phrases_cartesian_api,
			
 
				+        phrases_a,
			
 
				+        phrases_b
			
 
				+    )
			
 
				+    elapsed = time.time() - start_time
			
 
				+    # print(f"✓ 向量模型完成，耗时: {elapsed:.1f}秒")  # 调试用
			
 
				+
			
 
				+    # 2. LLM模型：使用并发调用（M×N个任务，受max_concurrent控制）
			
 
				+    semantic_results = await compare_phrases_cartesian_semantic(
			
 
				+        phrases_a,
			
 
				+        phrases_b,
			
 
				+        max_concurrent,
			
 
				+        progress_callback  # 传递进度回调
			
 
				+    )
			
 
				+    # embedding_results[i][j] = {"相似度": float, "说明": str}
			
 
				+    # semantic_results[i][j] = {"相似度": float, "说明": str}
			
 
				+
			
 
				+    # 构建嵌套列表，包含完整信息（带子模型详细说明）
			
 
				+    nested_results = []
			
 
				+    for i in range(M):
			
 
				+        row_results = []
			
 
				+        for j in range(N):
			
 
				+            # 获取子模型的完整结果
			
 
				+            embedding_result = embedding_results[i][j]
			
 
				+            semantic_result = semantic_results[i][j]
			
 
				+
			
 
				+            score_embedding = embedding_result.get("相似度", 0.0)
			
 
				+            score_semantic = semantic_result.get("相似度", 0.0)
			
 
				+
			
 
				+            # 计算加权平均
			
 
				+            final_score = (
			
 
				+                score_embedding * weight_embedding +
			
 
				+                score_semantic * weight_semantic
			
 
				+            )
			
 
				+
			
 
				+            # 生成完整说明（包含子模型的详细说明）
			
 
				+            explanation = (
			
 
				+                f"【混合相似度】{final_score:.3f}（向量模型权重{weight_embedding}，LLM模型权重{weight_semantic}）\n\n"
			
 
				+                f"【向量模型】相似度={score_embedding:.3f}\n"
			
 
				+                f"{embedding_result.get('说明', 'N/A')}\n\n"
			
 
				+                f"【LLM模型】相似度={score_semantic:.3f}\n"
			
 
				+                f"{semantic_result.get('说明', 'N/A')}"
			
 
				+            )
			
 
				+
			
 
				+            row_results.append({
			
 
				+                "相似度": final_score,
			
 
				+                "说明": explanation
			
 
				+            })
			
 
				+        nested_results.append(row_results)
			
 
				+
			
 
				+    return nested_results
			
 
				+
			
 
				+
			
 
				+def compare_phrases_sync(
			
 
				+    phrase_a: str,
			
 
				+    phrase_b: str,
			
 
				+    weight_embedding: float = 0.5,
			
 
				+    weight_semantic: float = 0.5,
			
 
				+    **kwargs
			
 
				+) -> Dict[str, Any]:
			
 
				+    """
			
 
				+    混合相似度计算的同步版本（内部创建事件循环）
			
 
				+
			
 
				+    Args:
			
 
				+        phrase_a: 第一个短语
			
 
				+        phrase_b: 第二个短语
			
 
				+        weight_embedding: 向量模型权重，默认 0.5
			
 
				+        weight_semantic: LLM模型权重，默认 0.5
			
 
				+        **kwargs: 其他参数（同 compare_phrases）
			
 
				+
			
 
				+    Returns:
			
 
				+        同 compare_phrases
			
 
				+
			
 
				+    Examples:
			
 
				+        >>> result = compare_phrases_sync("深度学习", "神经网络")
			
 
				+        >>> print(result['相似度'])
			
 
				+    """
			
 
				+    return asyncio.run(
			
 
				+        compare_phrases(
			
 
				+            phrase_a=phrase_a,
			
 
				+            phrase_b=phrase_b,
			
 
				+            weight_embedding=weight_embedding,
			
 
				+            weight_semantic=weight_semantic,
			
 
				+            **kwargs
			
 
				+        )
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    async def main():
			
 
				+        print("=" * 80)
			
 
				+        print("混合相似度计算示例")
			
 
				+        print("=" * 80)
			
 
				+        print()
			
 
				+
			
 
				+        # 示例 1: 默认权重 (0.5:0.5)
			
 
				+        print("示例 1: 默认权重 (0.5:0.5)")
			
 
				+        print("-" * 80)
			
 
				+        result = await compare_phrases("深度学习", "神经网络")
			
 
				+        print(f"相似度: {result['相似度']:.3f}")
			
 
				+        print(f"说明:\n{result['说明']}")
			
 
				+        print()
			
 
				+
			
 
				+        # 示例 2: 不相关的短语
			
 
				+        print("示例 2: 不相关的短语")
			
 
				+        print("-" * 80)
			
 
				+        result = await compare_phrases("编程", "吃饭")
			
 
				+        print(f"相似度: {result['相似度']:.3f}")
			
 
				+        print(f"说明:\n{result['说明']}")
			
 
				+        print()
			
 
				+
			
 
				+        # 示例 3: 自定义权重，更倾向向量模型
			
 
				+        print("示例 3: 自定义权重 (向量:0.7, LLM:0.3)")
			
 
				+        print("-" * 80)
			
 
				+        result = await compare_phrases(
			
 
				+            "人工智能", "机器学习",
			
 
				+            weight_embedding=0.7,
			
 
				+            weight_semantic=0.3
			
 
				+        )
			
 
				+        print(f"相似度: {result['相似度']:.3f}")
			
 
				+        print(f"说明:\n{result['说明']}")
			
 
				+        print()
			
 
				+
			
 
				+        # 示例 4: 完整输出示例
			
 
				+        print("示例 4: 完整输出示例")
			
 
				+        print("-" * 80)
			
 
				+        result = await compare_phrases("宿命感", "余华的小说")
			
 
				+        print(f"相似度: {result['相似度']:.3f}")
			
 
				+        print(f"说明:\n{result['说明']}")
			
 
				+        print()
			
 
				+
			
 
				+        # 示例 5: 同步版本
			
 
				+        print("示例 5: 同步版本调用")
			
 
				+        print("-" * 80)
			
 
				+        result = compare_phrases_sync("Python", "编程语言")
			
 
				+        print(f"相似度: {result['相似度']:.3f}")
			
 
				+        print(f"说明:\n{result['说明']}")
			
 
				+        print()
			
 
				+
			
 
				+        print("=" * 80)
			
 
				+
			
 
				+    asyncio.run(main())
			
--- a/lib/match_analyzer.py
+++ b/lib/match_analyzer.py
@@ -0,0 +1,353 @@
 
				+"""
			
 
				+通用的信息匹配分析模块
			
 
				+
			
 
				+分析 <B> 在 <A> 中的字面语义匹配关系
			
 
				+适用于任何信息匹配场景
			
 
				+
			
 
				+提供两个接口：
			
 
				+1. match_single(b_content, a_content, model_name, b_context="", a_context="") - 单个匹配
			
 
				+2. match_batch(b_items, a_content, model_name, b_context="", a_context="") - 批量匹配
			
 
				+
			
 
				+支持可选的 Context 参数：
			
 
				+- b_context: B 的补充上下文（帮助理解 B）
			
 
				+- a_context: A 的补充上下文（帮助理解 A）
			
 
				+- Context 默认为空，不提供时不会出现在 prompt 中
			
 
				+"""
			
 
				+import json
			
 
				+from typing import List
			
 
				+from agents import Agent, Runner, ModelSettings
			
 
				+from agents.tracing.create import custom_span
			
 
				+from lib.client import get_model
			
 
				+
			
 
				+
			
 
				+
			
 
				+# ========== System Prompt ==========
			
 
				+MATCH_SYSTEM_PROMPT = """
			
 
				+# 任务
			
 
				+分析 <B> 在 <A> 中的字面语义匹配关系。
			
 
				+
			
 
				+## 输入说明
			
 
				+
			
 
				+- **<B></B>**: 待匹配的内容（必选）
			
 
				+- **<A></A>**: 上下文内容（必选）
			
 
				+- **<B_Context></B_Context>**: B 的补充上下文（可选，帮助理解 B）
			
 
				+- **<A_Context></A_Context>**: A 的补充上下文（可选，帮助理解 A）
			
 
				+
			
 
				+**重要**：匹配分析发生在 <B> 和 <A> 之间，Context 仅作为补充理解的辅助信息。
			
 
				+
			
 
				+## 分析方法
			
 
				+
			
 
				+### 核心原则：字面语义匹配
			
 
				+只关注 <B> 和 <A> 在**字面词语和概念**上的重叠度，不考虑抽象关系。
			
 
				+
			
 
				+### 分析步骤
			
 
				+
			
 
				+1. **提取关键词/概念**
			
 
				+   - 从 <B> 中提取：关键词语和核心概念
			
 
				+   - 从 <A> 中提取：关键词语和核心概念
			
 
				+
			
 
				+2. **识别相同部分**
			
 
				+   - 完全相同的词语（字面一致）
			
 
				+   - 同义词或近义词
			
 
				+
			
 
				+3. **识别增量部分**
			
 
				+   - <B> 中有，但 <A> 中没有的词语/概念
			
 
				+   - 这些是 <B> 相对于 <A> 的额外信息
			
 
				+
			
 
				+4. **计算匹配分数**
			
 
				+   - 基于相同部分的覆盖度
			
 
				+   - 考虑词语/概念的重要性
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 评分标准（0-1分）
			
 
				+
			
 
				+**字面匹配度评分：**
			
 
				+- **0.9-1.0**：<B> 和 <A> 几乎完全一致，词语高度重叠
			
 
				+- **0.7-0.8**：大部分核心词语/概念匹配，少量增量
			
 
				+- **0.5-0.6**：部分核心词语/概念匹配，有一定增量
			
 
				+- **0.3-0.4**：少量词语/概念匹配，大部分不同
			
 
				+- **0.1-0.2**：几乎无字面匹配，仅有概念联系
			
 
				+- **0.0**：完全无关
			
 
				+
			
 
				+**重要原则：**
			
 
				+- 如果 <A> 是抽象/元级别的描述，而 <B> 是具体内容，字面上无词语重叠，应给低分（0.1-0.3）
			
 
				+- 优先考虑具体词语的匹配，而非抽象概念的包含关系
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 输出格式（严格JSON）
			
 
				+```json
			
 
				+{
			
 
				+  "score": 0.75,
			
 
				+  "score说明": "简要说明分数是如何计算的，基于哪些词语/概念的匹配",
			
 
				+  "相同部分": {
			
 
				+    "B中的词1": "与A中的'某词'完全相同",
			
 
				+    "B中的词2": "与A中的'某词'同义"
			
 
				+  },
			
 
				+  "增量部分": {
			
 
				+    "B中的词3": "A中无此概念"
			
 
				+  }
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+**输出要求**：
			
 
				+1. 必须严格按照上述JSON格式输出（score 和 score说明在最前面）
			
 
				+2. 所有字段都必须填写
			
 
				+3. **score字段**：必须是0-1之间的浮点数，保留2位小数
			
 
				+4. **score说明**：必须简洁说明评分依据（基于相同部分的覆盖度）
			
 
				+5. **相同部分**：字典格式，key是<B>中的词语，value说明它与<A>中哪个词的关系（完全相同/同义）；如果没有则填写空字典 {}
			
 
				+6. **增量部分**：字典格式，key是<B>中的词语，value说明为什么是增量（如"A中无此概念"）；如果没有增量部分，填写空字典 {}
			
 
				+7. **关键约束**：相同部分和增量部分的key必须只能是<B>中的词语，不能是<A>中的词语
			
 
				+""".strip()
			
 
				+
			
 
				+
			
 
				+def create_match_agent(model_name: str) -> Agent:
			
 
				+    """创建信息匹配分析的 Agent
			
 
				+
			
 
				+    Args:
			
 
				+        model_name: 模型名称
			
 
				+
			
 
				+    Returns:
			
 
				+        Agent 实例
			
 
				+    """
			
 
				+    agent = Agent(
			
 
				+        name="Information Match Expert",
			
 
				+        instructions=MATCH_SYSTEM_PROMPT,
			
 
				+        model=get_model(model_name),
			
 
				+        model_settings=ModelSettings(
			
 
				+            temperature=0.0,
			
 
				+            max_tokens=65536,
			
 
				+        ),
			
 
				+        tools=[],
			
 
				+    )
			
 
				+
			
 
				+    return agent
			
 
				+
			
 
				+
			
 
				+def parse_match_response(response_content: str) -> dict:
			
 
				+    """解析匹配响应
			
 
				+
			
 
				+    Args:
			
 
				+        response_content: Agent 返回的响应内容
			
 
				+
			
 
				+    Returns:
			
 
				+        解析后的字典
			
 
				+    """
			
 
				+    try:
			
 
				+        # 如果响应包含在 markdown 代码块中，提取 JSON 部分
			
 
				+        if "```json" in response_content:
			
 
				+            json_start = response_content.index("```json") + 7
			
 
				+            json_end = response_content.index("```", json_start)
			
 
				+            json_text = response_content[json_start:json_end].strip()
			
 
				+        elif "```" in response_content:
			
 
				+            json_start = response_content.index("```") + 3
			
 
				+            json_end = response_content.index("```", json_start)
			
 
				+            json_text = response_content[json_start:json_end].strip()
			
 
				+        else:
			
 
				+            json_text = response_content.strip()
			
 
				+
			
 
				+        return json.loads(json_text)
			
 
				+    except Exception as e:
			
 
				+        print(f"解析响应失败: {e}")
			
 
				+        return {
			
 
				+            "相同部分": {},
			
 
				+            "增量部分": {},
			
 
				+            "score": 0.0,
			
 
				+            "score说明": f"解析失败: {str(e)}"
			
 
				+        }
			
 
				+
			
 
				+
			
 
				+def _create_batch_agent(model_name: str) -> Agent:
			
 
				+    """创建批量匹配的 Agent
			
 
				+
			
 
				+    Args:
			
 
				+        model_name: 模型名称
			
 
				+
			
 
				+    Returns:
			
 
				+        Agent 实例
			
 
				+    """
			
 
				+    # 批量匹配的 System Prompt（在单个匹配基础上修改输出格式）
			
 
				+    batch_prompt = MATCH_SYSTEM_PROMPT.replace(
			
 
				+        "## 输出格式（严格JSON）",
			
 
				+        "## 输出格式（JSON数组）\n对每个 <B> 输出一个匹配结果："
			
 
				+    ).replace(
			
 
				+        "```json\n{",
			
 
				+        "```json\n[{"
			
 
				+    ).replace(
			
 
				+        "}\n```",
			
 
				+        "}]\n```"
			
 
				+    ) + "\n\n**额外要求**：数组长度必须等于 <B> 的数量，顺序对应"
			
 
				+
			
 
				+    agent = Agent(
			
 
				+        name="Batch Information Match Expert",
			
 
				+        instructions=batch_prompt,
			
 
				+        model=get_model(model_name),
			
 
				+        tools=[],
			
 
				+    )
			
 
				+
			
 
				+    return agent
			
 
				+
			
 
				+
			
 
				+async def _run_match_agent(
			
 
				+    agent: Agent,
			
 
				+    b_content: str,
			
 
				+    a_content: str,
			
 
				+    request_desc: str,
			
 
				+    b_context: str = "",
			
 
				+    a_context: str = ""
			
 
				+) -> str:
			
 
				+    """运行匹配 Agent 的公共逻辑
			
 
				+
			
 
				+    Args:
			
 
				+        agent: Agent 实例
			
 
				+        b_content: B 的内容
			
 
				+        a_content: A 的内容
			
 
				+        request_desc: 请求描述（如"并输出 JSON 格式"或"并输出 JSON 数组格式"）
			
 
				+        b_context: B 的上下文（可选）
			
 
				+        a_context: A 的上下文（可选）
			
 
				+
			
 
				+    Returns:
			
 
				+        Agent 的原始输出
			
 
				+    """
			
 
				+    # 构建任务描述
			
 
				+    b_section = f"<B>\n{b_content}\n</B>"
			
 
				+    if b_context:
			
 
				+        b_section += f"\n\n<B_Context>\n{b_context}\n</B_Context>"
			
 
				+
			
 
				+    a_section = f"<A>\n{a_content}\n</A>"
			
 
				+    if a_context:
			
 
				+        a_section += f"\n\n<A_Context>\n{a_context}\n</A_Context>"
			
 
				+
			
 
				+    task_description = f"""## 本次分析任务
			
 
				+
			
 
				+{b_section}
			
 
				+
			
 
				+{a_section}
			
 
				+
			
 
				+请严格按照系统提示中的要求分析 <B> 在 <A> 中的字面语义匹配关系，{request_desc}的结果。"""
			
 
				+
			
 
				+    # 构造消息
			
 
				+    messages = [{
			
 
				+        "role": "user",
			
 
				+        "content": [
			
 
				+            {
			
 
				+                "type": "input_text",
			
 
				+                "text": task_description
			
 
				+            }
			
 
				+        ]
			
 
				+    }]
			
 
				+
			
 
				+    # 使用 custom_span 追踪匹配过程
			
 
				+    # 截断显示内容，避免 span name 过长
			
 
				+    b_short = (b_content[:40] + "...") if len(b_content) > 40 else b_content
			
 
				+    a_short = (a_content[:40] + "...") if len(a_content) > 40 else a_content
			
 
				+
			
 
				+    with custom_span(
			
 
				+        name=f"匹配分析: {b_short} in {a_short}",
			
 
				+        data={
			
 
				+            "B": b_content,
			
 
				+            "A": a_content,
			
 
				+            "B_Context": b_context if b_context else None,
			
 
				+            "A_Context": a_context if a_context else None,
			
 
				+            "模式": request_desc
			
 
				+        }
			
 
				+    ):
			
 
				+        # 运行 Agent
			
 
				+        result = await Runner.run(agent, input=messages)
			
 
				+
			
 
				+    return result.final_output
			
 
				+
			
 
				+
			
 
				+async def match_single(
			
 
				+    b_content: str,
			
 
				+    a_content: str,
			
 
				+    model_name: str,
			
 
				+    b_context: str = "",
			
 
				+    a_context: str = ""
			
 
				+) -> dict:
			
 
				+    """单个匹配：分析一个 B 在 A 中的匹配
			
 
				+
			
 
				+    Args:
			
 
				+        b_content: B（待匹配）的内容
			
 
				+        a_content: A（上下文）的内容
			
 
				+        model_name: 使用的模型名称
			
 
				+        b_context: B 的补充上下文（可选，默认为空）
			
 
				+        a_context: A 的补充上下文（可选，默认为空）
			
 
				+
			
 
				+    Returns:
			
 
				+        匹配结果字典：{"相同部分": {}, "增量部分": {}, "score": 0.0, "score说明": ""}
			
 
				+    """
			
 
				+    try:
			
 
				+        # 创建 Agent
			
 
				+        agent = create_match_agent(model_name)
			
 
				+
			
 
				+        # 运行匹配
			
 
				+        output = await _run_match_agent(
			
 
				+            agent, b_content, a_content, "并输出 JSON 格式",
			
 
				+            b_context=b_context, a_context=a_context
			
 
				+        )
			
 
				+
			
 
				+        # 解析响应
			
 
				+        parsed_result = parse_match_response(output)
			
 
				+
			
 
				+        return parsed_result
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        return {
			
 
				+            "相同部分": {},
			
 
				+            "增量部分": {},
			
 
				+            "score": 0.0,
			
 
				+            "score说明": f"匹配过程出错: {str(e)}"
			
 
				+        }
			
 
				+
			
 
				+
			
 
				+async def match_batch(
			
 
				+    b_items: List[str],
			
 
				+    a_content: str,
			
 
				+    model_name: str,
			
 
				+    b_context: str = "",
			
 
				+    a_context: str = ""
			
 
				+) -> List[dict]:
			
 
				+    """批量匹配：分析多个 B 在 A 中的匹配（一次调用）
			
 
				+
			
 
				+    Args:
			
 
				+        b_items: B列表（多个待匹配项）
			
 
				+        a_content: A（上下文）的内容
			
 
				+        model_name: 使用的模型名称
			
 
				+        b_context: B 的补充上下文（可选，默认为空）
			
 
				+        a_context: A 的补充上下文（可选，默认为空）
			
 
				+
			
 
				+    Returns:
			
 
				+        匹配结果列表：[{"相同部分": {}, "增量部分": {}, "score": 0.0, "score说明": ""}, ...]
			
 
				+    """
			
 
				+    try:
			
 
				+        # 创建批量匹配 Agent
			
 
				+        agent = _create_batch_agent(model_name)
			
 
				+
			
 
				+        # 构建 B 列表字符串
			
 
				+        b_list_str = "\n".join([f"- {item}" for item in b_items])
			
 
				+
			
 
				+        # 运行匹配
			
 
				+        output = await _run_match_agent(
			
 
				+            agent, b_list_str, a_content, "并输出 JSON 数组格式",
			
 
				+            b_context=b_context, a_context=a_context
			
 
				+        )
			
 
				+
			
 
				+        # 解析响应（期望是数组）
			
 
				+        parsed_result = parse_match_response(output)
			
 
				+
			
 
				+        # 如果返回的是数组，直接返回；如果是单个对象，包装成数组
			
 
				+        if isinstance(parsed_result, list):
			
 
				+            return parsed_result
			
 
				+        else:
			
 
				+            return [parsed_result]
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        # 返回错误信息（为每个 B 创建一个错误条目）
			
 
				+        return [{
			
 
				+            "相同部分": {},
			
 
				+            "增量部分": {},
			
 
				+            "score": 0.0,
			
 
				+            "score说明": f"匹配过程出错: {str(e)}"
			
 
				+        } for _ in b_items]
			
--- a/lib/my_trace.py
+++ b/lib/my_trace.py
@@ -0,0 +1,80 @@
 
				+from datetime import datetime
			
 
				+import logging
			
 
				+
			
 
				+def get_current_time():
			
 
				+    import uuid
			
 
				+    random_uuid = str(uuid.uuid4())
			
 
				+    return datetime.now().strftime("%Y%m%d-%H%M%S") + "_" + random_uuid[:2]
			
 
				+
			
 
				+def set_trace_logfire():
			
 
				+    from agents.tracing.setup import GLOBAL_TRACE_PROVIDER
			
 
				+    GLOBAL_TRACE_PROVIDER.shutdown()
			
 
				+    import logfire
			
 
				+    current_time = get_current_time()
			
 
				+    logfire.configure(service_name=f'{current_time}')
			
 
				+    logfire.instrument_openai_agents()
			
 
				+    import urllib.parse
			
 
				+    current_time_encoded = urllib.parse.quote(current_time)
			
 
				+    import logging
			
 
				+    LOG_LEVEL = "WARNING"
			
 
				+    # 设置日志
			
 
				+    logging.basicConfig(
			
 
				+        level=getattr(logging, LOG_LEVEL),
			
 
				+        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
			
 
				+    )
			
 
				+    logger = logging.getLogger(__name__)
			
 
				+    log_url = f'https://logfire-us.pydantic.dev/semsevens/test?q=service_name+%3D+%27{current_time_encoded}%27&last=30d'
			
 
				+    logger.warning(f"任务日志链接: {log_url}")
			
 
				+    return current_time, log_url
			
 
				+
			
 
				+def set_trace():
			
 
				+    # 设置全局logging级别，覆盖所有子模块
			
 
				+    logging.basicConfig(level=logging.WARNING, force=True)
			
 
				+    # 确保根logger级别生效
			
 
				+    logging.getLogger().setLevel(logging.WARNING)
			
 
				+    return set_trace_smith()
			
 
				+    # return set_trace_logfire()
			
 
				+
			
 
				+
			
 
				+def set_trace_smith():
			
 
				+    from agents.tracing.setup import GLOBAL_TRACE_PROVIDER
			
 
				+    GLOBAL_TRACE_PROVIDER.shutdown()
			
 
				+    from agents import set_trace_processors
			
 
				+    from langsmith.wrappers import OpenAIAgentsTracingProcessor
			
 
				+    import logging
			
 
				+    current_time = get_current_time()
			
 
				+    set_trace_processors([OpenAIAgentsTracingProcessor(name=f'{current_time}')])
			
 
				+    import urllib.parse
			
 
				+    LOG_LEVEL = "WARNING"
			
 
				+    # 设置日志
			
 
				+    logging.basicConfig(
			
 
				+        level=getattr(logging, LOG_LEVEL),
			
 
				+        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
			
 
				+    )
			
 
				+    logger = logging.getLogger(__name__)
			
 
				+    current_time_encoded = urllib.parse.quote(current_time)
			
 
				+    log_url = f'https://smith.langchain.com/o/3ebe0715-9709-4594-a0aa-40a77a4e10bd/projects/p/611fa0d6-5510-4f60-b693-87e2ccc2ea5f?timeModel=%7B%22duration%22%3A%227d%22%7D&searchModel=%7B%22filter%22%3A%22and%28eq%28is_root%2C+true%29%2C+eq%28name%2C+%5C%22{current_time_encoded}%5C%22%29%29%22%2C%22searchFilter%22%3A%22eq%28is_root%2C+true%29%22%7D'
			
 
				+    LOG_LEVEL = "WARNING"
			
 
				+    logger.warning(f"任务日志链接: {log_url}")
			
 
				+    return current_time, log_url
			
 
				+        
			
 
				+def set_debug():
			
 
				+    import logging
			
 
				+    # 设置全局日志级别为DEBUG，确保所有模块生效
			
 
				+    logging.basicConfig(
			
 
				+        level=logging.DEBUG,
			
 
				+        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
			
 
				+    )
			
 
				+    # 确保根日志记录器也设置为DEBUG级别
			
 
				+    logging.getLogger().setLevel(logging.DEBUG)
			
 
				+
			
 
				+def set_info():
			
 
				+    import logging
			
 
				+    # 设置全局日志级别为INFO，确保所有模块生效
			
 
				+    logging.basicConfig(
			
 
				+        level=logging.INFO,
			
 
				+        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
			
 
				+    )
			
 
				+    # 确保根日志记录器也设置为INFO级别
			
 
				+    logging.getLogger().setLevel(logging.INFO)
			
 
				+    
			
--- a/lib/relation_analyzer.py
+++ b/lib/relation_analyzer.py
@@ -0,0 +1,288 @@
 
				+"""
			
 
				+短语关系分析模块
			
 
				+
			
 
				+分析两个短语之间的语义关系
			
 
				+
			
 
				+提供接口：
			
 
				+analyze_relation(phrase_a, phrase_b, model_name, context_a="", context_b="") - 分析两个短语的关系
			
 
				+
			
 
				+支持可选的 Context 参数：
			
 
				+- context_a: phrase_a 的补充上下文（帮助理解 phrase_a）
			
 
				+- context_b: phrase_b 的补充上下文（帮助理解 phrase_b）
			
 
				+- Context 默认为空，不提供时不会出现在 prompt 中
			
 
				+
			
 
				+返回格式：
			
 
				+{
			
 
				+    "relation": "same",           # 7种关系之一
			
 
				+    "score": 0.95,                # 0-1，语义接近程度
			
 
				+    "explanation": "说明"          # 关系判断的依据
			
 
				+}
			
 
				+"""
			
 
				+import json
			
 
				+from agents import Agent, Runner, ModelSettings
			
 
				+from agents.tracing.create import custom_span, trace
			
 
				+from lib.client import get_model
			
 
				+
			
 
				+
			
 
				+# ========== System Prompt ==========
			
 
				+RELATION_SYSTEM_PROMPT = """
			
 
				+# 任务
			
 
				+分析两个短语 <A> 和 <B> 之间的语义关系。
			
 
				+
			
 
				+## 输入说明
			
 
				+
			
 
				+- **<A></A>**: 第一个短语（必选）
			
 
				+- **<B></B>**: 第二个短语（必选）
			
 
				+- **<A_Context></A_Context>**: A 的补充上下文（可选，帮助理解 A）
			
 
				+- **<B_Context></B_Context>**: B 的补充上下文（可选，帮助理解 B）
			
 
				+
			
 
				+**重要**：关系分析发生在 <A> 和 <B> 之间，Context 仅作为补充理解的辅助信息。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 关系类型（7种）
			
 
				+
			
 
				+### 1. same（同义）
			
 
				+- **定义**：意思完全相同或非常接近，可以互相替换
			
 
				+- **例子**：
			
 
				+  - "医生" 和 "大夫" → same
			
 
				+  - "计算机" 和 "电脑" → same
			
 
				+  - "快乐" 和 "高兴" → same
			
 
				+
			
 
				+### 2. coordinate（同级）
			
 
				+- **定义**：有共同的上位概念，属于并列关系，通常无交集
			
 
				+- **例子**：
			
 
				+  - "轿车" 和 "SUV" → coordinate（都是汽车）
			
 
				+  - "苹果" 和 "香蕉" → coordinate（都是水果）
			
 
				+  - "数学" 和 "物理" → coordinate（都是学科）
			
 
				+
			
 
				+### 3. contains（包含）
			
 
				+- **定义**：A 的概念范围包含 B，B 是 A 的子类或特例
			
 
				+- **例子**：
			
 
				+  - "水果" contains "苹果"
			
 
				+  - "汽车" contains "轿车"
			
 
				+  - "动物" contains "狗"
			
 
				+
			
 
				+### 4. contained_by（被包含）
			
 
				+- **定义**：A 被 B 包含，A 是 B 的子类或特例
			
 
				+- **例子**：
			
 
				+  - "苹果" contained_by "水果"
			
 
				+  - "轿车" contained_by "汽车"
			
 
				+  - "狗" contained_by "动物"
			
 
				+
			
 
				+### 5. overlap（部分重叠）
			
 
				+- **定义**：两个概念有交集，但互不包含
			
 
				+- **例子**：
			
 
				+  - "红苹果" 和 "大苹果" → overlap（有又红又大的苹果）
			
 
				+  - "亚洲国家" 和 "发展中国家" → overlap（如中国、印度等）
			
 
				+  - "学生" 和 "运动员" → overlap（有学生运动员）
			
 
				+
			
 
				+### 6. related（相关）
			
 
				+- **定义**：有语义联系，但不属于上述任何层级关系
			
 
				+- **例子**：
			
 
				+  - "医生" 和 "医院" → related（工作场所关系）
			
 
				+  - "阅读" 和 "书籍" → related（动作-对象关系）
			
 
				+  - "钥匙" 和 "锁" → related（工具-用途关系）
			
 
				+  - "老师" 和 "学生" → related（角色关系）
			
 
				+
			
 
				+### 7. unrelated（无关）
			
 
				+- **定义**：无明显语义关系
			
 
				+- **例子**：
			
 
				+  - "医生" 和 "石头" → unrelated
			
 
				+  - "苹果" 和 "数学" → unrelated
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 评分标准（score: 0-1）
			
 
				+
			
 
				+**score 表示两个短语的语义接近程度：**
			
 
				+
			
 
				+- **0.9-1.0**：几乎完全相同（完全同义）
			
 
				+- **0.8-0.9**：非常接近（高度同义、直接包含关系）
			
 
				+- **0.7-0.8**：比较接近（近义、明确的同级或包含）
			
 
				+- **0.6-0.7**：有一定接近度（同级但层级稍远、间接包含）
			
 
				+- **0.5-0.6**：中等程度的关系（中等交集、中度相关）
			
 
				+- **0.4-0.5**：关系较弱（小交集、弱相关）
			
 
				+- **0.3-0.4**：关系很弱（勉强算同级、很弱的相关）
			
 
				+- **0.0-0.3**：几乎无关或完全无关
			
 
				+
			
 
				+**不同关系类型的 score 范围参考：**
			
 
				+- same: 通常 0.7-1.0（完全同义接近1.0，近义0.7-0.8）
			
 
				+- contains/contained_by: 通常 0.5-0.9（直接包含0.8+，跨层级0.5-0.7）
			
 
				+- coordinate: 通常 0.3-0.8（同级且上位概念近0.7+，同级但距离远0.3-0.5）
			
 
				+- overlap: 通常 0.2-0.8（交集大0.6+，交集小0.2-0.4）
			
 
				+- related: 通常 0.1-0.7（强相关0.5+，弱相关0.1-0.3）
			
 
				+- unrelated: 通常 0.0-0.2
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 判断逻辑（按优先级）
			
 
				+
			
 
				+1. **A 和 B 意思相同或非常接近？** → same
			
 
				+2. **A 包含 B 或 B 包含 A？** → contains 或 contained_by
			
 
				+3. **A 和 B 有共同上位概念且无交集？** → coordinate
			
 
				+4. **A 和 B 有交集但互不包含？** → overlap
			
 
				+5. **A 和 B 有语义联系但不属于上述？** → related
			
 
				+6. **A 和 B 完全无关？** → unrelated
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 输出格式（严格JSON）
			
 
				+
			
 
				+```json
			
 
				+{
			
 
				+  "relation": "same",
			
 
				+  "score": 0.95,
			
 
				+  "explanation": "简要说明为什么是这个关系，以及 score 的依据"
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+**输出要求**：
			
 
				+1. 必须严格按照上述JSON格式输出
			
 
				+2. 所有字段都必须填写
			
 
				+3. **relation字段**：必须是以下7个值之一：same, coordinate, contains, contained_by, overlap, related, unrelated
			
 
				+4. **score字段**：必须是0-1之间的浮点数，保留2位小数
			
 
				+5. **explanation字段**：必须简洁说明关系类型和评分依据（1-2句话）
			
 
				+""".strip()
			
 
				+
			
 
				+
			
 
				+def create_relation_agent(model_name: str) -> Agent:
			
 
				+    """创建关系分析的 Agent
			
 
				+
			
 
				+    Args:
			
 
				+        model_name: 模型名称
			
 
				+
			
 
				+    Returns:
			
 
				+        Agent 实例
			
 
				+    """
			
 
				+    agent = Agent(
			
 
				+        name="Phrase Relation Expert",
			
 
				+        instructions=RELATION_SYSTEM_PROMPT,
			
 
				+        model=get_model(model_name),
			
 
				+        model_settings=ModelSettings(
			
 
				+            temperature=0.0,
			
 
				+            max_tokens=65536,
			
 
				+        ),
			
 
				+        tools=[],
			
 
				+    )
			
 
				+
			
 
				+    return agent
			
 
				+
			
 
				+
			
 
				+def parse_relation_response(response_content: str) -> dict:
			
 
				+    """解析关系分析响应
			
 
				+
			
 
				+    Args:
			
 
				+        response_content: Agent 返回的响应内容
			
 
				+
			
 
				+    Returns:
			
 
				+        解析后的字典
			
 
				+    """
			
 
				+    try:
			
 
				+        # 如果响应包含在 markdown 代码块中，提取 JSON 部分
			
 
				+        if "```json" in response_content:
			
 
				+            json_start = response_content.index("```json") + 7
			
 
				+            json_end = response_content.index("```", json_start)
			
 
				+            json_text = response_content[json_start:json_end].strip()
			
 
				+        elif "```" in response_content:
			
 
				+            json_start = response_content.index("```") + 3
			
 
				+            json_end = response_content.index("```", json_start)
			
 
				+            json_text = response_content[json_start:json_end].strip()
			
 
				+        else:
			
 
				+            json_text = response_content.strip()
			
 
				+
			
 
				+        return json.loads(json_text)
			
 
				+    except Exception as e:
			
 
				+        print(f"解析响应失败: {e}")
			
 
				+        return {
			
 
				+            "relation": "unrelated",
			
 
				+            "score": 0.0,
			
 
				+            "explanation": f"解析失败: {str(e)}"
			
 
				+        }
			
 
				+
			
 
				+
			
 
				+async def analyze_relation(
			
 
				+    phrase_a: str,
			
 
				+    phrase_b: str,
			
 
				+    model_name: str = None,
			
 
				+    context_a: str = "",
			
 
				+    context_b: str = ""
			
 
				+) -> dict:
			
 
				+    """分析两个短语之间的关系
			
 
				+
			
 
				+    Args:
			
 
				+        phrase_a: 第一个短语
			
 
				+        phrase_b: 第二个短语
			
 
				+        model_name: 使用的模型名称（可选，默认使用 client.py 中的 MODEL_NAME）
			
 
				+        context_a: phrase_a 的补充上下文（可选，默认为空）
			
 
				+        context_b: phrase_b 的补充上下文（可选，默认为空）
			
 
				+
			
 
				+    Returns:
			
 
				+        关系分析结果字典：{"relation": "same", "score": 0.95, "explanation": "..."}
			
 
				+    """
			
 
				+    try:
			
 
				+        # 如果未指定模型，使用默认模型
			
 
				+        if model_name is None:
			
 
				+            from lib.client import MODEL_NAME
			
 
				+            model_name = MODEL_NAME
			
 
				+
			
 
				+        # 创建 Agent
			
 
				+        agent = create_relation_agent(model_name)
			
 
				+
			
 
				+        # 构建任务描述
			
 
				+        a_section = f"<A>\n{phrase_a}\n</A>"
			
 
				+        if context_a:
			
 
				+            a_section += f"\n\n<A_Context>\n{context_a}\n</A_Context>"
			
 
				+
			
 
				+        b_section = f"<B>\n{phrase_b}\n</B>"
			
 
				+        if context_b:
			
 
				+            b_section += f"\n\n<B_Context>\n{context_b}\n</B_Context>"
			
 
				+
			
 
				+        task_description = f"""## 本次分析任务
			
 
				+
			
 
				+{a_section}
			
 
				+
			
 
				+{b_section}
			
 
				+
			
 
				+请严格按照系统提示中的要求分析 <A> 和 <B> 之间的语义关系，并输出 JSON 格式的结果。"""
			
 
				+
			
 
				+        # 构造消息
			
 
				+        messages = [{
			
 
				+            "role": "user",
			
 
				+            "content": [
			
 
				+                {
			
 
				+                    "type": "input_text",
			
 
				+                    "text": task_description
			
 
				+                }
			
 
				+            ]
			
 
				+        }]
			
 
				+
			
 
				+        # 使用 custom_span 追踪分析过程
			
 
				+        # 截断显示内容，避免 span name 过长
			
 
				+        a_short = (phrase_a[:30] + "...") if len(phrase_a) > 30 else phrase_a
			
 
				+        b_short = (phrase_b[:30] + "...") if len(phrase_b) > 30 else phrase_b
			
 
				+
			
 
				+        with trace():
			
 
				+            with custom_span(
			
 
				+                name=f"关系分析: {a_short} <-> {b_short}",
			
 
				+                data={
			
 
				+                    "phrase_a": phrase_a,
			
 
				+                    "phrase_b": phrase_b,
			
 
				+                    "context_a": context_a if context_a else None,
			
 
				+                    "context_b": context_b if context_b else None,
			
 
				+                }
			
 
				+            ):
			
 
				+                # 运行 Agent
			
 
				+                result = await Runner.run(agent, input=messages)
			
 
				+
			
 
				+        # 解析响应
			
 
				+        parsed_result = parse_relation_response(result.final_output)
			
 
				+
			
 
				+        return parsed_result
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        return {
			
 
				+            "relation": "unrelated",
			
 
				+            "score": 0.0,
			
 
				+            "explanation": f"分析过程出错: {str(e)}"
			
 
				+        }
			
--- a/lib/semantic_similarity.py
+++ b/lib/semantic_similarity.py
@@ -0,0 +1,745 @@
 
				+#!/usr/bin/env python3
			
 
				+"""
			
 
				+语义相似度分析模块
			
 
				+使用 AI Agent 判断两个短语之间的语义相似度
			
 
				+"""
			
 
				+
			
 
				+from agents import Agent, Runner, ModelSettings
			
 
				+from lib.client import get_model
			
 
				+from lib.utils import parse_json_from_text
			
 
				+from lib.config import get_cache_dir
			
 
				+from typing import Dict, Any, Optional, List, Tuple
			
 
				+import hashlib
			
 
				+import json
			
 
				+import os
			
 
				+from datetime import datetime
			
 
				+from pathlib import Path
			
 
				+import asyncio
			
 
				+import numpy as np
			
 
				+
			
 
				+
			
 
				+# 默认提示词模板
			
 
				+DEFAULT_PROMPT_TEMPLATE = """
			
 
				+从语意角度,判断"{phrase_a}"和"{phrase_b}"这两个短语的相似度,从0-1打分，输出格式如下：
			
 
				+```json
			
 
				+{{
			
 
				+  "说明": "简明扼要说明理由",
			
 
				+  "相似度": 0.0,
			
 
				+}}
			
 
				+```
			
 
				+""".strip()
			
 
				+
			
 
				+
			
 
				+def _get_default_cache_dir() -> str:
			
 
				+    """获取默认缓存目录（从配置中读取）"""
			
 
				+    return get_cache_dir("semantic_similarity")
			
 
				+
			
 
				+
			
 
				+def _generate_cache_key(
			
 
				+    phrase_a: str,
			
 
				+    phrase_b: str,
			
 
				+    model_name: str,
			
 
				+    temperature: float,
			
 
				+    max_tokens: int,
			
 
				+    prompt_template: str,
			
 
				+    instructions: str = None,
			
 
				+    tools: str = "[]"
			
 
				+) -> str:
			
 
				+    """
			
 
				+    生成缓存键（哈希值）
			
 
				+
			
 
				+    Args:
			
 
				+        phrase_a: 第一个短语
			
 
				+        phrase_b: 第二个短语
			
 
				+        model_name: 模型名称
			
 
				+        temperature: 温度参数
			
 
				+        max_tokens: 最大token数
			
 
				+        prompt_template: 提示词模板
			
 
				+        instructions: Agent 系统指令
			
 
				+        tools: 工具列表的 JSON 字符串
			
 
				+
			
 
				+    Returns:
			
 
				+        32位MD5哈希值
			
 
				+    """
			
 
				+    # 创建包含所有参数的字符串
			
 
				+    cache_string = f"{phrase_a}||{phrase_b}||{model_name}||{temperature}||{max_tokens}||{prompt_template}||{instructions}||{tools}"
			
 
				+
			
 
				+    # 生成MD5哈希
			
 
				+    return hashlib.md5(cache_string.encode('utf-8')).hexdigest()
			
 
				+
			
 
				+
			
 
				+def _sanitize_for_filename(text: str, max_length: int = 30) -> str:
			
 
				+    """
			
 
				+    将文本转换为安全的文件名部分
			
 
				+
			
 
				+    Args:
			
 
				+        text: 原始文本
			
 
				+        max_length: 最大长度
			
 
				+
			
 
				+    Returns:
			
 
				+        安全的文件名字符串
			
 
				+    """
			
 
				+    import re
			
 
				+    # 移除特殊字符，只保留中文、英文、数字、下划线
			
 
				+    sanitized = re.sub(r'[^\w\u4e00-\u9fff]', '_', text)
			
 
				+    # 移除连续的下划线
			
 
				+    sanitized = re.sub(r'_+', '_', sanitized)
			
 
				+    # 截断到最大长度
			
 
				+    if len(sanitized) > max_length:
			
 
				+        sanitized = sanitized[:max_length]
			
 
				+    return sanitized.strip('_')
			
 
				+
			
 
				+
			
 
				+def _get_cache_filepath(
			
 
				+    cache_key: str,
			
 
				+    phrase_a: str,
			
 
				+    phrase_b: str,
			
 
				+    model_name: str,
			
 
				+    temperature: float,
			
 
				+    cache_dir: Optional[str] = None
			
 
				+) -> Path:
			
 
				+    """
			
 
				+    获取缓存文件路径（可读文件名）
			
 
				+
			
 
				+    Args:
			
 
				+        cache_key: 缓存键（哈希值）
			
 
				+        phrase_a: 第一个短语
			
 
				+        phrase_b: 第二个短语
			
 
				+        model_name: 模型名称
			
 
				+        temperature: 温度参数
			
 
				+        cache_dir: 缓存目录
			
 
				+
			
 
				+    Returns:
			
 
				+        缓存文件的完整路径
			
 
				+
			
 
				+    文件名格式: {phrase_a}_vs_{phrase_b}_{model}_t{temp}_{hash[:8]}.json
			
 
				+    示例: 宿命感_vs_余华的小说_gpt-4.1-mini_t0.0_a7f3e2d9.json
			
 
				+    """
			
 
				+    if cache_dir is None:
			
 
				+        cache_dir = _get_default_cache_dir()
			
 
				+
			
 
				+    # 清理短语和模型名
			
 
				+    clean_a = _sanitize_for_filename(phrase_a, max_length=20)
			
 
				+    clean_b = _sanitize_for_filename(phrase_b, max_length=20)
			
 
				+
			
 
				+    # 简化模型名（提取关键部分）
			
 
				+    model_short = model_name.split('/')[-1]  # 例如: openai/gpt-4.1-mini -> gpt-4.1-mini
			
 
				+    model_short = _sanitize_for_filename(model_short, max_length=20)
			
 
				+
			
 
				+    # 格式化温度参数
			
 
				+    temp_str = f"t{temperature:.1f}"
			
 
				+
			
 
				+    # 使用哈希的前8位
			
 
				+    hash_short = cache_key[:8]
			
 
				+
			
 
				+    # 组合文件名
			
 
				+    filename = f"{clean_a}_vs_{clean_b}_{model_short}_{temp_str}_{hash_short}.json"
			
 
				+
			
 
				+    return Path(cache_dir) / filename
			
 
				+
			
 
				+
			
 
				+def _load_from_cache(
			
 
				+    cache_key: str,
			
 
				+    phrase_a: str,
			
 
				+    phrase_b: str,
			
 
				+    model_name: str,
			
 
				+    temperature: float,
			
 
				+    cache_dir: Optional[str] = None
			
 
				+) -> Optional[str]:
			
 
				+    """
			
 
				+    从缓存加载数据
			
 
				+
			
 
				+    Args:
			
 
				+        cache_key: 缓存键
			
 
				+        phrase_a: 第一个短语
			
 
				+        phrase_b: 第二个短语
			
 
				+        model_name: 模型名称
			
 
				+        temperature: 温度参数
			
 
				+        cache_dir: 缓存目录
			
 
				+
			
 
				+    Returns:
			
 
				+        缓存的结果字符串，如果不存在则返回 None
			
 
				+    """
			
 
				+    if cache_dir is None:
			
 
				+        cache_dir = _get_default_cache_dir()
			
 
				+
			
 
				+    cache_file = _get_cache_filepath(cache_key, phrase_a, phrase_b, model_name, temperature, cache_dir)
			
 
				+
			
 
				+    # 如果文件不存在，尝试通过哈希匹配查找
			
 
				+    if not cache_file.exists():
			
 
				+        # 查找所有以该哈希结尾的文件
			
 
				+        cache_path = Path(cache_dir)
			
 
				+        if cache_path.exists():
			
 
				+            hash_short = cache_key[:8]
			
 
				+            matching_files = list(cache_path.glob(f"*_{hash_short}.json"))
			
 
				+            if matching_files:
			
 
				+                cache_file = matching_files[0]
			
 
				+            else:
			
 
				+                return None
			
 
				+        else:
			
 
				+            return None
			
 
				+
			
 
				+    try:
			
 
				+        with open(cache_file, 'r', encoding='utf-8') as f:
			
 
				+            cached_data = json.load(f)
			
 
				+            return cached_data['output']['raw']
			
 
				+    except (json.JSONDecodeError, IOError, KeyError):
			
 
				+        return None
			
 
				+
			
 
				+
			
 
				+def _save_to_cache(
			
 
				+    cache_key: str,
			
 
				+    phrase_a: str,
			
 
				+    phrase_b: str,
			
 
				+    model_name: str,
			
 
				+    temperature: float,
			
 
				+    max_tokens: int,
			
 
				+    prompt_template: str,
			
 
				+    instructions: str,
			
 
				+    tools: str,
			
 
				+    result: str,
			
 
				+    cache_dir: Optional[str] = None
			
 
				+) -> None:
			
 
				+    """
			
 
				+    保存数据到缓存
			
 
				+
			
 
				+    Args:
			
 
				+        cache_key: 缓存键
			
 
				+        phrase_a: 第一个短语
			
 
				+        phrase_b: 第二个短语
			
 
				+        model_name: 模型名称
			
 
				+        temperature: 温度参数
			
 
				+        max_tokens: 最大token数
			
 
				+        prompt_template: 提示词模板
			
 
				+        instructions: Agent 系统指令
			
 
				+        tools: 工具列表的 JSON 字符串
			
 
				+        result: 结果数据（原始字符串）
			
 
				+        cache_dir: 缓存目录
			
 
				+    """
			
 
				+    if cache_dir is None:
			
 
				+        cache_dir = _get_default_cache_dir()
			
 
				+
			
 
				+    cache_file = _get_cache_filepath(cache_key, phrase_a, phrase_b, model_name, temperature, cache_dir)
			
 
				+
			
 
				+    # 确保缓存目录存在
			
 
				+    cache_file.parent.mkdir(parents=True, exist_ok=True)
			
 
				+
			
 
				+    # 尝试解析 result 为 JSON
			
 
				+    parsed_result = parse_json_from_text(result)
			
 
				+
			
 
				+    # 准备缓存数据（包含完整的输入输出信息）
			
 
				+    cache_data = {
			
 
				+        "input": {
			
 
				+            "phrase_a": phrase_a,
			
 
				+            "phrase_b": phrase_b,
			
 
				+            "model_name": model_name,
			
 
				+            "temperature": temperature,
			
 
				+            "max_tokens": max_tokens,
			
 
				+            "prompt_template": prompt_template,
			
 
				+            "instructions": instructions,
			
 
				+            "tools": tools
			
 
				+        },
			
 
				+        "output": {
			
 
				+            "raw": result,              # 保留原始响应
			
 
				+            "parsed": parsed_result     # 解析后的JSON对象
			
 
				+        },
			
 
				+        "metadata": {
			
 
				+            "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
			
 
				+            "cache_key": cache_key,
			
 
				+            "cache_file": str(cache_file.name)
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    try:
			
 
				+        with open(cache_file, 'w', encoding='utf-8') as f:
			
 
				+            json.dump(cache_data, f, ensure_ascii=False, indent=2)
			
 
				+    except IOError:
			
 
				+        pass  # 静默失败，不影响主流程
			
 
				+
			
 
				+
			
 
				+async def _difference_between_phrases(
			
 
				+    phrase_a: str,
			
 
				+    phrase_b: str,
			
 
				+    model_name: str = 'openai/gpt-4.1-mini',
			
 
				+    temperature: float = 0.0,
			
 
				+    max_tokens: int = 65536,
			
 
				+    prompt_template: str = None,
			
 
				+    instructions: str = None,
			
 
				+    tools: list = None,
			
 
				+    name: str = "Semantic Similarity Analyzer",
			
 
				+    use_cache: bool = True,
			
 
				+    cache_dir: Optional[str] = None
			
 
				+) -> str:
			
 
				+    """
			
 
				+    从语义角度判断两个短语的相似度
			
 
				+
			
 
				+    Args:
			
 
				+        phrase_a: 第一个短语
			
 
				+        phrase_b: 第二个短语
			
 
				+        model_name: 使用的模型名称，可选值：
			
 
				+            - 'google/gemini-2.5-pro'
			
 
				+            - 'anthropic/claude-sonnet-4.5'
			
 
				+            - 'google/gemini-2.0-flash-001'
			
 
				+            - 'openai/gpt-5-mini'
			
 
				+            - 'anthropic/claude-haiku-4.5'
			
 
				+            - 'openai/gpt-4.1-mini' (默认)
			
 
				+        temperature: 模型温度参数，控制输出随机性，默认 0.0（确定性输出）
			
 
				+        max_tokens: 最大生成token数，默认 65536
			
 
				+        prompt_template: 自定义提示词模板，使用 {phrase_a} 和 {phrase_b} 作为占位符
			
 
				+                        如果为 None，使用默认模板
			
 
				+        instructions: Agent 的系统指令，默认为 None
			
 
				+        tools: Agent 可用的工具列表，默认为 []
			
 
				+        name: Agent 的名称，默认为 "Semantic Similarity Analyzer"（不参与缓存key构建）
			
 
				+        use_cache: 是否使用缓存，默认 True
			
 
				+        cache_dir: 缓存目录，默认从配置读取（可通过 lib.config 设置）
			
 
				+
			
 
				+    Returns:
			
 
				+        JSON 格式的相似度分析结果字符串
			
 
				+
			
 
				+    Examples:
			
 
				+        >>> # 使用默认模板和缓存
			
 
				+        >>> result = await difference_between_phrases("宿命感", "余华的小说")
			
 
				+        >>> print(result)
			
 
				+        {
			
 
				+          "说明": "简明扼要说明理由",
			
 
				+          "相似度": 0.0
			
 
				+        }
			
 
				+
			
 
				+        >>> # 禁用缓存
			
 
				+        >>> result = await difference_between_phrases(
			
 
				+        ...     "宿命感", "余华的小说",
			
 
				+        ...     use_cache=False
			
 
				+        ... )
			
 
				+
			
 
				+        >>> # 使用自定义模板
			
 
				+        >>> custom_template = '''
			
 
				+        ... 请分析【{phrase_a}】和【{phrase_b}】的语义关联度
			
 
				+        ... 输出格式：{{"score": 0.0, "reason": "..."}}
			
 
				+        ... '''
			
 
				+        >>> result = await difference_between_phrases(
			
 
				+        ...     "宿命感", "余华的小说",
			
 
				+        ...     prompt_template=custom_template
			
 
				+        ... )
			
 
				+    """
			
 
				+    # 使用自定义模板或默认模板
			
 
				+    if prompt_template is None:
			
 
				+        prompt_template = DEFAULT_PROMPT_TEMPLATE
			
 
				+
			
 
				+    # 默认tools为空列表
			
 
				+    if tools is None:
			
 
				+        tools = []
			
 
				+
			
 
				+    # 生成缓存键（tools转为JSON字符串以便哈希）
			
 
				+    tools_str = json.dumps(tools, sort_keys=True) if tools else "[]"
			
 
				+    cache_key = _generate_cache_key(
			
 
				+        phrase_a, phrase_b, model_name, temperature, max_tokens, prompt_template, instructions, tools_str
			
 
				+    )
			
 
				+
			
 
				+    # 尝试从缓存加载
			
 
				+    if use_cache:
			
 
				+        cached_result = _load_from_cache(cache_key, phrase_a, phrase_b, model_name, temperature, cache_dir)
			
 
				+        if cached_result is not None:
			
 
				+            return cached_result
			
 
				+
			
 
				+    # 缓存未命中，调用 API
			
 
				+    agent = Agent(
			
 
				+        name=name,
			
 
				+        model=get_model(model_name),
			
 
				+        model_settings=ModelSettings(
			
 
				+            temperature=temperature,
			
 
				+            max_tokens=max_tokens,
			
 
				+        ),
			
 
				+        instructions=instructions,
			
 
				+        tools=tools,
			
 
				+    )
			
 
				+
			
 
				+    # 格式化提示词
			
 
				+    prompt = prompt_template.format(phrase_a=phrase_a, phrase_b=phrase_b)
			
 
				+
			
 
				+    result = await Runner.run(agent, input=prompt)
			
 
				+    final_output = result.final_output
			
 
				+
			
 
				+    # 注意：不在这里缓存，而是在解析成功后缓存
			
 
				+    # 这样可以避免缓存解析失败的响应
			
 
				+
			
 
				+    return final_output
			
 
				+
			
 
				+
			
 
				+async def _difference_between_phrases_parsed(
			
 
				+    phrase_a: str,
			
 
				+    phrase_b: str,
			
 
				+    model_name: str = 'openai/gpt-4.1-mini',
			
 
				+    temperature: float = 0.0,
			
 
				+    max_tokens: int = 65536,
			
 
				+    prompt_template: str = None,
			
 
				+    instructions: str = None,
			
 
				+    tools: list = None,
			
 
				+    name: str = "Semantic Similarity Analyzer",
			
 
				+    use_cache: bool = True,
			
 
				+    cache_dir: Optional[str] = None
			
 
				+) -> Dict[str, Any]:
			
 
				+    """
			
 
				+    从语义角度判断两个短语的相似度，并解析返回结果为字典
			
 
				+
			
 
				+    Args:
			
 
				+        phrase_a: 第一个短语
			
 
				+        phrase_b: 第二个短语
			
 
				+        model_name: 使用的模型名称
			
 
				+        temperature: 模型温度参数，控制输出随机性，默认 0.0（确定性输出）
			
 
				+        max_tokens: 最大生成token数，默认 65536
			
 
				+        prompt_template: 自定义提示词模板，使用 {phrase_a} 和 {phrase_b} 作为占位符
			
 
				+        instructions: Agent 的系统指令，默认为 None
			
 
				+        tools: Agent 可用的工具列表，默认为 []
			
 
				+        name: Agent 的名称，默认为 "Semantic Similarity Analyzer"
			
 
				+        use_cache: 是否使用缓存，默认 True
			
 
				+        cache_dir: 缓存目录，默认从配置读取（可通过 lib.config 设置）
			
 
				+
			
 
				+    Returns:
			
 
				+        解析后的字典，包含：
			
 
				+        - 说明: 相似度判断的理由
			
 
				+        - 相似度: 0-1之间的浮点数
			
 
				+
			
 
				+    Raises:
			
 
				+        ValueError: 当无法解析AI响应为有效JSON时抛出
			
 
				+
			
 
				+    Examples:
			
 
				+        >>> result = await difference_between_phrases_parsed("宿命感", "余华的小说")
			
 
				+        >>> print(result['相似度'])
			
 
				+        0.3
			
 
				+        >>> print(result['说明'])
			
 
				+        "两个概念有一定关联..."
			
 
				+    """
			
 
				+    # 使用默认模板或自定义模板
			
 
				+    if prompt_template is None:
			
 
				+        prompt_template = DEFAULT_PROMPT_TEMPLATE
			
 
				+
			
 
				+    # 默认tools为空列表
			
 
				+    if tools is None:
			
 
				+        tools = []
			
 
				+
			
 
				+    # 生成缓存键
			
 
				+    tools_str = json.dumps(tools, sort_keys=True) if tools else "[]"
			
 
				+    cache_key = _generate_cache_key(
			
 
				+        phrase_a, phrase_b, model_name, temperature, max_tokens, prompt_template, instructions, tools_str
			
 
				+    )
			
 
				+
			
 
				+    # 尝试从缓存加载
			
 
				+    if use_cache:
			
 
				+        cached_result = _load_from_cache(cache_key, phrase_a, phrase_b, model_name, temperature, cache_dir)
			
 
				+        if cached_result is not None:
			
 
				+            # 缓存命中，直接解析并返回
			
 
				+            parsed_result = parse_json_from_text(cached_result)
			
 
				+            if parsed_result:
			
 
				+                return parsed_result
			
 
				+            # 如果缓存的内容也无法解析，继续执行API调用（可能之前缓存了错误响应）
			
 
				+
			
 
				+    # 重试机制：最多重试3次
			
 
				+    max_retries = 3
			
 
				+    last_error = None
			
 
				+
			
 
				+    for attempt in range(max_retries):
			
 
				+        try:
			
 
				+            # 调用AI获取原始响应（不传use_cache，因为我们在这里手动处理缓存）
			
 
				+            raw_result = await _difference_between_phrases(
			
 
				+                phrase_a, phrase_b, model_name, temperature, max_tokens,
			
 
				+                prompt_template, instructions, tools, name, use_cache=False, cache_dir=cache_dir
			
 
				+            )
			
 
				+
			
 
				+            # 使用 utils.parse_json_from_text 解析结果
			
 
				+            parsed_result = parse_json_from_text(raw_result)
			
 
				+
			
 
				+            # 如果解析成功，缓存并返回
			
 
				+            if parsed_result:
			
 
				+                # 只有解析成功后才缓存
			
 
				+                if use_cache:
			
 
				+                    _save_to_cache(
			
 
				+                        cache_key, phrase_a, phrase_b, model_name,
			
 
				+                        temperature, max_tokens, prompt_template,
			
 
				+                        instructions, tools_str, raw_result, cache_dir
			
 
				+                    )
			
 
				+                return parsed_result
			
 
				+
			
 
				+            # 解析失败，记录错误信息，准备重试
			
 
				+            formatted_prompt = prompt_template.format(phrase_a=phrase_a, phrase_b=phrase_b)
			
 
				+            error_msg = f"""
			
 
				+JSON解析失败 (尝试 {attempt + 1}/{max_retries})
			
 
				+================================================================================
			
 
				+短语A: {phrase_a}
			
 
				+短语B: {phrase_b}
			
 
				+模型: {model_name}
			
 
				+温度: {temperature}
			
 
				+================================================================================
			
 
				+Prompt:
			
 
				+{formatted_prompt}
			
 
				+================================================================================
			
 
				+AI响应 (长度: {len(raw_result)}):
			
 
				+{raw_result}
			
 
				+================================================================================
			
 
				+"""
			
 
				+            last_error = error_msg
			
 
				+            print(error_msg)
			
 
				+
			
 
				+            if attempt < max_retries - 1:
			
 
				+                print(f"⚠️  将在 1 秒后重试... (剩余重试次数: {max_retries - attempt - 1})")
			
 
				+                import asyncio
			
 
				+                await asyncio.sleep(1)
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            # 捕获其他异常（如网络错误）
			
 
				+            error_msg = f"API调用失败 (尝试 {attempt + 1}/{max_retries}): {str(e)}"
			
 
				+            last_error = error_msg
			
 
				+            print(error_msg)
			
 
				+
			
 
				+            if attempt < max_retries - 1:
			
 
				+                print(f"⚠️  将在 1 秒后重试... (剩余重试次数: {max_retries - attempt - 1})")
			
 
				+                import asyncio
			
 
				+                await asyncio.sleep(1)
			
 
				+
			
 
				+    # 所有重试都失败了，抛出异常
			
 
				+    final_error = f"""
			
 
				+所有重试均失败！已尝试 {max_retries} 次
			
 
				+================================================================================
			
 
				+最后一次错误:
			
 
				+{last_error}
			
 
				+================================================================================
			
 
				+"""
			
 
				+    raise ValueError(final_error)
			
 
				+
			
 
				+
			
 
				+# ========== V1 版本（默认版本） ==========
			
 
				+
			
 
				+# 对外接口 - V1
			
 
				+async def compare_phrases(
			
 
				+    phrase_a: str,
			
 
				+    phrase_b: str,
			
 
				+    model_name: str = 'openai/gpt-4.1-mini',
			
 
				+    temperature: float = 0.0,
			
 
				+    max_tokens: int = 65536,
			
 
				+    prompt_template: str = None,
			
 
				+    instructions: str = None,
			
 
				+    tools: list = None,
			
 
				+    name: str = "Semantic Similarity Analyzer",
			
 
				+    use_cache: bool = True,
			
 
				+    cache_dir: Optional[str] = None
			
 
				+) -> Dict[str, Any]:
			
 
				+    """
			
 
				+    比较两个短语的语义相似度（对外唯一接口）
			
 
				+
			
 
				+    Args:
			
 
				+        phrase_a: 第一个短语
			
 
				+        phrase_b: 第二个短语
			
 
				+        model_name: 使用的模型名称
			
 
				+        temperature: 模型温度参数，控制输出随机性，默认 0.0（确定性输出）
			
 
				+        max_tokens: 最大生成token数，默认 65536
			
 
				+        prompt_template: 自定义提示词模板，使用 {phrase_a} 和 {phrase_b} 作为占位符
			
 
				+        instructions: Agent 的系统指令，默认为 None
			
 
				+        tools: Agent 可用的工具列表，默认为 []
			
 
				+        name: Agent 的名称，默认为 "Semantic Similarity Analyzer"
			
 
				+        use_cache: 是否使用缓存，默认 True
			
 
				+        cache_dir: 缓存目录，默认从配置读取（可通过 lib.config 设置）
			
 
				+
			
 
				+    Returns:
			
 
				+        解析后的字典
			
 
				+    """
			
 
				+    return await _difference_between_phrases_parsed(
			
 
				+        phrase_a, phrase_b, model_name, temperature, max_tokens,
			
 
				+        prompt_template, instructions, tools, name, use_cache, cache_dir
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+async def compare_phrases_cartesian(
			
 
				+    phrases_a: List[str],
			
 
				+    phrases_b: List[str],
			
 
				+    max_concurrent: int = 50,
			
 
				+    progress_callback: Optional[callable] = None
			
 
				+) -> List[List[Dict[str, Any]]]:
			
 
				+    """
			
 
				+    笛卡尔积批量计算：M×N并发LLM调用（带并发控制和进度回调）
			
 
				+
			
 
				+    用于架构统一性，内部通过并发实现（LLM无法真正批处理）
			
 
				+
			
 
				+    Args:
			
 
				+        phrases_a: 第一组短语列表（M个）
			
 
				+        phrases_b: 第二组短语列表（N个）
			
 
				+        max_concurrent: 最大并发数，默认50
			
 
				+        progress_callback: 进度回调函数，每完成一个任务时调用
			
 
				+
			
 
				+    Returns:
			
 
				+        嵌套列表 List[List[Dict]]，每个Dict包含完整的比较结果
			
 
				+        results[i][j] = {
			
 
				+            "相似度": float,
			
 
				+            "说明": str
			
 
				+        }
			
 
				+
			
 
				+    Examples:
			
 
				+        >>> results = await compare_phrases_cartesian(
			
 
				+        ...     ["深度学习"],
			
 
				+        ...     ["神经网络", "Python"]
			
 
				+        ... )
			
 
				+        >>> print(results[0][0]['相似度'])  # 深度学习 vs 神经网络
			
 
				+        >>> print(results[0][1]['说明'])    # 深度学习 vs Python
			
 
				+    """
			
 
				+    # 参数验证
			
 
				+    if not phrases_a or not phrases_b:
			
 
				+        return [[]]
			
 
				+
			
 
				+    M, N = len(phrases_a), len(phrases_b)
			
 
				+
			
 
				+    # 创建信号量控制并发
			
 
				+    semaphore = asyncio.Semaphore(max_concurrent)
			
 
				+
			
 
				+    async def limited_compare(phrase_a: str, phrase_b: str):
			
 
				+        async with semaphore:
			
 
				+            result = await compare_phrases(phrase_a, phrase_b)
			
 
				+            # 调用进度回调
			
 
				+            if progress_callback:
			
 
				+                progress_callback(1)
			
 
				+            return result
			
 
				+
			
 
				+    # 创建M×N个受控的并发任务
			
 
				+    tasks = []
			
 
				+    for phrase_a in phrases_a:
			
 
				+        for phrase_b in phrases_b:
			
 
				+            tasks.append(limited_compare(phrase_a, phrase_b))
			
 
				+
			
 
				+    # 并发执行所有任务
			
 
				+    results = await asyncio.gather(*tasks)
			
 
				+
			
 
				+    # 返回嵌套列表结构
			
 
				+    nested_results = []
			
 
				+    for i in range(M):
			
 
				+        row_results = results[i * N : (i + 1) * N]
			
 
				+        nested_results.append(row_results)
			
 
				+    return nested_results
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    import asyncio
			
 
				+
			
 
				+    async def main():
			
 
				+        """示例使用"""
			
 
				+        # 示例 1: 基本使用（使用缓存）
			
 
				+        print("示例 1: 基本使用")
			
 
				+        result = await compare_phrases("宿命感", "余华的小说")
			
 
				+        print(f"相似度: {result.get('相似度')}")
			
 
				+        print(f"说明: {result.get('说明')}")
			
 
				+        print()
			
 
				+
			
 
				+        # 示例 2: 再次调用相同参数（应该从缓存读取）
			
 
				+        print("示例 2: 测试缓存")
			
 
				+        result = await compare_phrases("宿命感", "余华的小说")
			
 
				+        print(f"相似度: {result.get('相似度')}")
			
 
				+        print()
			
 
				+
			
 
				+        # 示例 3: 自定义温度
			
 
				+        print("示例 3: 自定义温度（创意性输出）")
			
 
				+        result = await compare_phrases(
			
 
				+            "创意写作", "AI生成",
			
 
				+            temperature=0.7
			
 
				+        )
			
 
				+        print(f"相似度: {result.get('相似度')}")
			
 
				+        print(f"说明: {result.get('说明')}")
			
 
				+        print()
			
 
				+
			
 
				+        # 示例 4: 自定义 Agent 名称
			
 
				+        print("示例 4: 自定义 Agent 名称")
			
 
				+        result = await compare_phrases(
			
 
				+            "人工智能", "机器学习",
			
 
				+            name="AI语义分析专家"
			
 
				+        )
			
 
				+        print(f"相似度: {result.get('相似度')}")
			
 
				+        print(f"说明: {result.get('说明')}")
			
 
				+        print()
			
 
				+
			
 
				+        # 示例 5: 使用不同的模型
			
 
				+        print("示例 5: 使用 Claude 模型")
			
 
				+        result = await compare_phrases(
			
 
				+            "深度学习", "神经网络",
			
 
				+            model_name='anthropic/claude-haiku-4.5'
			
 
				+        )
			
 
				+        print(f"相似度: {result.get('相似度')}")
			
 
				+        print(f"说明: {result.get('说明')}")
			
 
				+
			
 
				+    asyncio.run(main())
			
 
				+
			
 
				+
			
 
				+# ========== V2 版本（示例：详细分析版本） ==========
			
 
				+
			
 
				+# V2 默认提示词模板（更详细的分析）
			
 
				+DEFAULT_PROMPT_TEMPLATE_V2 = """
			
 
				+请深入分析【{phrase_a}】和【{phrase_b}】的语义关系，包括：
			
 
				+1. 语义相似度（0-1）
			
 
				+2. 关系类型（如：包含、相关、对立、无关等）
			
 
				+3. 详细说明
			
 
				+
			
 
				+输出格式：
			
 
				+```json
			
 
				+{{
			
 
				+  "相似度": 0.0,
			
 
				+  "关系类型": "相关/包含/对立/无关",
			
 
				+  "详细说明": "详细分析两者的语义关系...",
			
 
				+  "应用场景": "该关系在实际应用中的意义..."
			
 
				+}}
			
 
				+```
			
 
				+""".strip()
			
 
				+
			
 
				+
			
 
				+# 对外接口 - V2
			
 
				+async def compare_phrases_v2(
			
 
				+    phrase_a: str,
			
 
				+    phrase_b: str,
			
 
				+    model_name: str = 'anthropic/claude-sonnet-4.5',  # V2 默认使用更强的模型
			
 
				+    temperature: float = 0.0,
			
 
				+    max_tokens: int = 65536,
			
 
				+    prompt_template: str = None,
			
 
				+    instructions: str = None,
			
 
				+    tools: list = None,
			
 
				+    name: str = "Advanced Semantic Analyzer",
			
 
				+    use_cache: bool = True,
			
 
				+    cache_dir: Optional[str] = None
			
 
				+) -> Dict[str, Any]:
			
 
				+    """
			
 
				+    比较两个短语的语义相似度 - V2 版本（详细分析）
			
 
				+
			
 
				+    V2 特点：
			
 
				+    - 默认使用更强的模型（Claude Sonnet 4.5）
			
 
				+    - 更详细的分析输出（包含关系类型和应用场景）
			
 
				+    - 适合需要深入分析的场景
			
 
				+
			
 
				+    Args:
			
 
				+        phrase_a: 第一个短语
			
 
				+        phrase_b: 第二个短语
			
 
				+        model_name: 使用的模型名称，默认 'anthropic/claude-sonnet-4.5'
			
 
				+        temperature: 模型温度参数，默认 0.0
			
 
				+        max_tokens: 最大生成token数，默认 65536
			
 
				+        prompt_template: 自定义提示词模板，默认使用 V2 详细模板
			
 
				+        instructions: Agent 的系统指令，默认为 None
			
 
				+        tools: Agent 可用的工具列表，默认为 []
			
 
				+        name: Agent 的名称，默认 "Advanced Semantic Analyzer"
			
 
				+        use_cache: 是否使用缓存，默认 True
			
 
				+        cache_dir: 缓存目录，默认从配置读取（可通过 lib.config 设置）
			
 
				+
			
 
				+    Returns:
			
 
				+        解析后的字典，包含：
			
 
				+        - 相似度: 0-1之间的浮点数
			
 
				+        - 关系类型: 关系分类
			
 
				+        - 详细说明: 详细分析
			
 
				+        - 应用场景: 应用建议
			
 
				+
			
 
				+    Examples:
			
 
				+        >>> result = await compare_phrases_v2("深度学习", "神经网络")
			
 
				+        >>> print(result['相似度'])
			
 
				+        0.9
			
 
				+        >>> print(result['关系类型'])
			
 
				+        "包含"
			
 
				+        >>> print(result['详细说明'])
			
 
				+        "深度学习是基于人工神经网络的机器学习方法..."
			
 
				+    """
			
 
				+    # 使用 V2 默认模板（如果未指定）
			
 
				+    if prompt_template is None:
			
 
				+        prompt_template = DEFAULT_PROMPT_TEMPLATE_V2
			
 
				+
			
 
				+    return await _difference_between_phrases_parsed(
			
 
				+        phrase_a, phrase_b, model_name, temperature, max_tokens,
			
 
				+        prompt_template, instructions, tools, name, use_cache, cache_dir
			
 
				+    )
			
--- a/lib/structured_logger.py
+++ b/lib/structured_logger.py
@@ -0,0 +1,305 @@
 
				+"""
			
 
				+结构化日志记录器
			
 
				+提供步骤化、可追溯、易于可视化的日志记录功能
			
 
				+"""
			
 
				+import json
			
 
				+import os
			
 
				+from datetime import datetime
			
 
				+from typing import Any, Optional
			
 
				+from pathlib import Path
			
 
				+
			
 
				+
			
 
				+class StructuredLogger:
			
 
				+    """
			
 
				+    结构化日志记录器
			
 
				+
			
 
				+    特点：
			
 
				+    1. 每个步骤独立保存文件
			
 
				+    2. 记录完整的时间线
			
 
				+    3. 支持嵌套步骤（树形结构）
			
 
				+    4. 便于可视化和debug
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, log_dir: str, run_id: str):
			
 
				+        """
			
 
				+        初始化日志记录器
			
 
				+
			
 
				+        Args:
			
 
				+            log_dir: 日志根目录
			
 
				+            run_id: 本次运行的唯一标识
			
 
				+        """
			
 
				+        self.log_dir = Path(log_dir)
			
 
				+        self.run_id = run_id
			
 
				+
			
 
				+        # 创建目录结构
			
 
				+        self.steps_dir = self.log_dir / "steps"
			
 
				+        self.timeline_dir = self.log_dir / "timeline"
			
 
				+        self.artifacts_dir = self.log_dir / "artifacts"
			
 
				+
			
 
				+        for dir_path in [self.steps_dir, self.timeline_dir, self.artifacts_dir]:
			
 
				+            dir_path.mkdir(parents=True, exist_ok=True)
			
 
				+
			
 
				+        # 时间线记录
			
 
				+        self.timeline = []
			
 
				+        self.step_counter = 0
			
 
				+        self.step_stack = []  # 用于嵌套步骤
			
 
				+
			
 
				+        # 初始化元数据
			
 
				+        self.metadata = {
			
 
				+            "run_id": run_id,
			
 
				+            "start_time": datetime.now().isoformat(),
			
 
				+            "status": "running",
			
 
				+            "steps_count": 0,
			
 
				+            "log_dir": str(self.log_dir),
			
 
				+        }
			
 
				+        self._save_metadata()
			
 
				+
			
 
				+    def start_step(
			
 
				+        self,
			
 
				+        step_name: str,
			
 
				+        step_type: str,
			
 
				+        description: str = "",
			
 
				+        input_data: Any = None
			
 
				+    ) -> int:
			
 
				+        """
			
 
				+        开始一个新步骤
			
 
				+
			
 
				+        Args:
			
 
				+            step_name: 步骤名称（如："extract_keywords", "explore_level_1"）
			
 
				+            step_type: 步骤类型（如："extraction", "exploration", "analysis", "evaluation"）
			
 
				+            description: 步骤描述
			
 
				+            input_data: 输入数据
			
 
				+
			
 
				+        Returns:
			
 
				+            step_id: 步骤ID
			
 
				+        """
			
 
				+        self.step_counter += 1
			
 
				+        step_id = self.step_counter
			
 
				+
			
 
				+        # 计算层级（基于栈深度）
			
 
				+        level = len(self.step_stack)
			
 
				+        parent_id = self.step_stack[-1] if self.step_stack else None
			
 
				+
			
 
				+        step_info = {
			
 
				+            "step_id": step_id,
			
 
				+            "step_name": step_name,
			
 
				+            "step_type": step_type,
			
 
				+            "description": description,
			
 
				+            "level": level,
			
 
				+            "parent_id": parent_id,
			
 
				+            "status": "running",
			
 
				+            "start_time": datetime.now().isoformat(),
			
 
				+            "end_time": None,
			
 
				+            "duration_seconds": None,
			
 
				+            "input": self._serialize(input_data),
			
 
				+            "output": None,
			
 
				+            "error": None,
			
 
				+        }
			
 
				+
			
 
				+        # 压入栈
			
 
				+        self.step_stack.append(step_id)
			
 
				+
			
 
				+        # 保存步骤开始信息
			
 
				+        self._save_step(step_id, step_info)
			
 
				+
			
 
				+        # 添加到时间线
			
 
				+        self.timeline.append({
			
 
				+            "timestamp": step_info["start_time"],
			
 
				+            "event": "step_start",
			
 
				+            "step_id": step_id,
			
 
				+            "step_name": step_name,
			
 
				+            "step_type": step_type,
			
 
				+        })
			
 
				+        self._save_timeline()
			
 
				+
			
 
				+        print(f"\n{'  ' * level}[Step {step_id}] {step_name} - {description}")
			
 
				+
			
 
				+        return step_id
			
 
				+
			
 
				+    def end_step(
			
 
				+        self,
			
 
				+        step_id: int,
			
 
				+        output_data: Any = None,
			
 
				+        status: str = "success",
			
 
				+        error: Optional[str] = None
			
 
				+    ):
			
 
				+        """
			
 
				+        结束一个步骤
			
 
				+
			
 
				+        Args:
			
 
				+            step_id: 步骤ID
			
 
				+            output_data: 输出数据
			
 
				+            status: 步骤状态（"success", "error", "skipped"）
			
 
				+            error: 错误信息（如果有）
			
 
				+        """
			
 
				+        # 从栈中弹出
			
 
				+        if self.step_stack and self.step_stack[-1] == step_id:
			
 
				+            self.step_stack.pop()
			
 
				+
			
 
				+        # 读取步骤信息
			
 
				+        step_info = self._load_step(step_id)
			
 
				+
			
 
				+        # 更新步骤信息
			
 
				+        end_time = datetime.now()
			
 
				+        start_time = datetime.fromisoformat(step_info["start_time"])
			
 
				+        duration = (end_time - start_time).total_seconds()
			
 
				+
			
 
				+        step_info.update({
			
 
				+            "status": status,
			
 
				+            "end_time": end_time.isoformat(),
			
 
				+            "duration_seconds": duration,
			
 
				+            "output": self._serialize(output_data),
			
 
				+            "error": error,
			
 
				+        })
			
 
				+
			
 
				+        # 保存步骤结束信息
			
 
				+        self._save_step(step_id, step_info)
			
 
				+
			
 
				+        # 添加到时间线
			
 
				+        self.timeline.append({
			
 
				+            "timestamp": step_info["end_time"],
			
 
				+            "event": "step_end",
			
 
				+            "step_id": step_id,
			
 
				+            "step_name": step_info["step_name"],
			
 
				+            "status": status,
			
 
				+            "duration_seconds": duration,
			
 
				+        })
			
 
				+        self._save_timeline()
			
 
				+
			
 
				+        level = len(self.step_stack)
			
 
				+        status_emoji = "✅" if status == "success" else "❌" if status == "error" else "⏭️"
			
 
				+        print(f"{'  ' * level}{status_emoji} [Step {step_id}] Completed in {duration:.2f}s")
			
 
				+
			
 
				+    def log_artifact(
			
 
				+        self,
			
 
				+        step_id: int,
			
 
				+        artifact_name: str,
			
 
				+        artifact_data: Any,
			
 
				+        artifact_type: str = "json"
			
 
				+    ) -> str:
			
 
				+        """
			
 
				+        保存步骤的关联产物（如：API响应、中间结果等）
			
 
				+
			
 
				+        Args:
			
 
				+            step_id: 步骤ID
			
 
				+            artifact_name: 产物名称
			
 
				+            artifact_data: 产物数据
			
 
				+            artifact_type: 产物类型（"json", "text", "image"等）
			
 
				+
			
 
				+        Returns:
			
 
				+            artifact_path: 产物文件路径
			
 
				+        """
			
 
				+        artifact_dir = self.artifacts_dir / f"step_{step_id:04d}"
			
 
				+        artifact_dir.mkdir(exist_ok=True)
			
 
				+
			
 
				+        if artifact_type == "json":
			
 
				+            artifact_path = artifact_dir / f"{artifact_name}.json"
			
 
				+            with open(artifact_path, "w", encoding="utf-8") as f:
			
 
				+                json.dump(artifact_data, f, ensure_ascii=False, indent=2)
			
 
				+        elif artifact_type == "text":
			
 
				+            artifact_path = artifact_dir / f"{artifact_name}.txt"
			
 
				+            with open(artifact_path, "w", encoding="utf-8") as f:
			
 
				+                f.write(str(artifact_data))
			
 
				+        else:
			
 
				+            artifact_path = artifact_dir / artifact_name
			
 
				+            with open(artifact_path, "wb") as f:
			
 
				+                f.write(artifact_data)
			
 
				+
			
 
				+        print(f"  📎 Artifact saved: {artifact_path.name}")
			
 
				+        return str(artifact_path)
			
 
				+
			
 
				+    def finalize(self, final_status: str = "success", final_output: Any = None):
			
 
				+        """
			
 
				+        完成整个运行，生成最终摘要
			
 
				+
			
 
				+        Args:
			
 
				+            final_status: 最终状态
			
 
				+            final_output: 最终输出
			
 
				+        """
			
 
				+        self.metadata.update({
			
 
				+            "end_time": datetime.now().isoformat(),
			
 
				+            "status": final_status,
			
 
				+            "steps_count": self.step_counter,
			
 
				+            "final_output": self._serialize(final_output),
			
 
				+        })
			
 
				+        self._save_metadata()
			
 
				+
			
 
				+        # 生成摘要
			
 
				+        self._generate_summary()
			
 
				+
			
 
				+        print(f"\n{'='*60}")
			
 
				+        print(f"Run completed: {final_status}")
			
 
				+        print(f"Total steps: {self.step_counter}")
			
 
				+        print(f"Log directory: {self.log_dir}")
			
 
				+        print(f"{'='*60}")
			
 
				+
			
 
				+    def _save_step(self, step_id: int, step_info: dict):
			
 
				+        """保存步骤信息"""
			
 
				+        step_file = self.steps_dir / f"step_{step_id:04d}.json"
			
 
				+        with open(step_file, "w", encoding="utf-8") as f:
			
 
				+            json.dump(step_info, f, ensure_ascii=False, indent=2)
			
 
				+
			
 
				+    def _load_step(self, step_id: int) -> dict:
			
 
				+        """加载步骤信息"""
			
 
				+        step_file = self.steps_dir / f"step_{step_id:04d}.json"
			
 
				+        with open(step_file, "r", encoding="utf-8") as f:
			
 
				+            return json.load(f)
			
 
				+
			
 
				+    def _save_timeline(self):
			
 
				+        """保存时间线"""
			
 
				+        timeline_file = self.timeline_dir / "timeline.json"
			
 
				+        with open(timeline_file, "w", encoding="utf-8") as f:
			
 
				+            json.dump(self.timeline, f, ensure_ascii=False, indent=2)
			
 
				+
			
 
				+    def _save_metadata(self):
			
 
				+        """保存元数据"""
			
 
				+        metadata_file = self.log_dir / "metadata.json"
			
 
				+        with open(metadata_file, "w", encoding="utf-8") as f:
			
 
				+            json.dump(self.metadata, f, ensure_ascii=False, indent=2)
			
 
				+
			
 
				+    def _serialize(self, data: Any) -> Any:
			
 
				+        """序列化数据（处理Pydantic模型等）"""
			
 
				+        if data is None:
			
 
				+            return None
			
 
				+
			
 
				+        # 处理Pydantic模型
			
 
				+        if hasattr(data, "model_dump"):
			
 
				+            return data.model_dump()
			
 
				+
			
 
				+        # 处理字典
			
 
				+        if isinstance(data, dict):
			
 
				+            return {k: self._serialize(v) for k, v in data.items()}
			
 
				+
			
 
				+        # 处理列表
			
 
				+        if isinstance(data, list):
			
 
				+            return [self._serialize(item) for item in data]
			
 
				+
			
 
				+        # 其他类型直接返回
			
 
				+        return data
			
 
				+
			
 
				+    def _generate_summary(self):
			
 
				+        """生成运行摘要"""
			
 
				+        summary = {
			
 
				+            "run_id": self.run_id,
			
 
				+            "status": self.metadata["status"],
			
 
				+            "start_time": self.metadata["start_time"],
			
 
				+            "end_time": self.metadata["end_time"],
			
 
				+            "total_steps": self.step_counter,
			
 
				+            "steps_overview": [],
			
 
				+        }
			
 
				+
			
 
				+        # 汇总所有步骤
			
 
				+        for step_id in range(1, self.step_counter + 1):
			
 
				+            step_info = self._load_step(step_id)
			
 
				+            summary["steps_overview"].append({
			
 
				+                "step_id": step_id,
			
 
				+                "step_name": step_info["step_name"],
			
 
				+                "step_type": step_info["step_type"],
			
 
				+                "status": step_info["status"],
			
 
				+                "duration_seconds": step_info["duration_seconds"],
			
 
				+            })
			
 
				+
			
 
				+        # 保存摘要
			
 
				+        summary_file = self.log_dir / "summary.json"
			
 
				+        with open(summary_file, "w", encoding="utf-8") as f:
			
 
				+            json.dump(summary, f, ensure_ascii=False, indent=2)
			
--- a/lib/text_embedding.py
+++ b/lib/text_embedding.py
@@ -0,0 +1,408 @@
 
				+#!/usr/bin/env python3
			
 
				+"""
			
 
				+文本相似度计算模块
			
 
				+基于 similarities 库（真正的向量模型，不使用 LLM）
			
 
				+"""
			
 
				+
			
 
				+from typing import Dict, Any, Optional
			
 
				+import hashlib
			
 
				+import json
			
 
				+from pathlib import Path
			
 
				+from datetime import datetime
			
 
				+import threading
			
 
				+
			
 
				+from .config import get_cache_dir
			
 
				+
			
 
				+# 支持的模型列表
			
 
				+SUPPORTED_MODELS = {
			
 
				+    "chinese": "shibing624/text2vec-base-chinese",           # 默认，中文通用
			
 
				+    "multilingual": "shibing624/text2vec-base-multilingual", # 多语言（中英韩日德意等）
			
 
				+    "paraphrase": "shibing624/text2vec-base-chinese-paraphrase",  # 中文长文本
			
 
				+    "sentence": "shibing624/text2vec-base-chinese-sentence",      # 中文短句子
			
 
				+}
			
 
				+
			
 
				+# 延迟导入 similarities，避免初始化时就加载模型
			
 
				+_similarity_models = {}  # 存储多个模型实例
			
 
				+_model_lock = threading.Lock()  # 线程锁，保护模型加载
			
 
				+
			
 
				+
			
 
				+def _get_default_cache_dir() -> str:
			
 
				+    """获取默认缓存目录（从配置中读取）"""
			
 
				+    return get_cache_dir("text_embedding")
			
 
				+
			
 
				+
			
 
				+def _generate_cache_key(phrase_a: str, phrase_b: str, model_name: str) -> str:
			
 
				+    """
			
 
				+    生成缓存键（哈希值）
			
 
				+
			
 
				+    Args:
			
 
				+        phrase_a: 第一个短语
			
 
				+        phrase_b: 第二个短语
			
 
				+        model_name: 模型名称
			
 
				+
			
 
				+    Returns:
			
 
				+        32位MD5哈希值
			
 
				+    """
			
 
				+    cache_string = f"{phrase_a}||{phrase_b}||{model_name}"
			
 
				+    return hashlib.md5(cache_string.encode('utf-8')).hexdigest()
			
 
				+
			
 
				+
			
 
				+def _sanitize_for_filename(text: str, max_length: int = 30) -> str:
			
 
				+    """
			
 
				+    将文本转换为安全的文件名部分
			
 
				+
			
 
				+    Args:
			
 
				+        text: 原始文本
			
 
				+        max_length: 最大长度
			
 
				+
			
 
				+    Returns:
			
 
				+        安全的文件名字符串
			
 
				+    """
			
 
				+    import re
			
 
				+    # 移除特殊字符，只保留中文、英文、数字、下划线
			
 
				+    sanitized = re.sub(r'[^\w\u4e00-\u9fff]', '_', text)
			
 
				+    # 移除连续的下划线
			
 
				+    sanitized = re.sub(r'_+', '_', sanitized)
			
 
				+    # 截断到最大长度
			
 
				+    if len(sanitized) > max_length:
			
 
				+        sanitized = sanitized[:max_length]
			
 
				+    return sanitized.strip('_')
			
 
				+
			
 
				+
			
 
				+def _get_cache_filepath(
			
 
				+    cache_key: str,
			
 
				+    phrase_a: str,
			
 
				+    phrase_b: str,
			
 
				+    model_name: str,
			
 
				+    cache_dir: Optional[str] = None
			
 
				+) -> Path:
			
 
				+    """
			
 
				+    获取缓存文件路径（可读文件名）
			
 
				+
			
 
				+    Args:
			
 
				+        cache_key: 缓存键（哈希值）
			
 
				+        phrase_a: 第一个短语
			
 
				+        phrase_b: 第二个短语
			
 
				+        model_name: 模型名称
			
 
				+        cache_dir: 缓存目录
			
 
				+
			
 
				+    Returns:
			
 
				+        缓存文件的完整路径
			
 
				+
			
 
				+    文件名格式: {phrase_a}_vs_{phrase_b}_{model}_{hash[:8]}.json
			
 
				+    """
			
 
				+    if cache_dir is None:
			
 
				+        cache_dir = _get_default_cache_dir()
			
 
				+
			
 
				+    # 清理短语和模型名
			
 
				+    clean_a = _sanitize_for_filename(phrase_a, max_length=20)
			
 
				+    clean_b = _sanitize_for_filename(phrase_b, max_length=20)
			
 
				+
			
 
				+    # 简化模型名（提取关键部分）
			
 
				+    model_short = model_name.split('/')[-1]
			
 
				+    model_short = _sanitize_for_filename(model_short, max_length=20)
			
 
				+
			
 
				+    # 使用哈希的前8位
			
 
				+    hash_short = cache_key[:8]
			
 
				+
			
 
				+    # 组合文件名
			
 
				+    filename = f"{clean_a}_vs_{clean_b}_{model_short}_{hash_short}.json"
			
 
				+
			
 
				+    return Path(cache_dir) / filename
			
 
				+
			
 
				+
			
 
				+def _load_from_cache(
			
 
				+    cache_key: str,
			
 
				+    phrase_a: str,
			
 
				+    phrase_b: str,
			
 
				+    model_name: str,
			
 
				+    cache_dir: Optional[str] = None
			
 
				+) -> Optional[Dict[str, Any]]:
			
 
				+    """
			
 
				+    从缓存加载数据
			
 
				+
			
 
				+    Args:
			
 
				+        cache_key: 缓存键
			
 
				+        phrase_a: 第一个短语
			
 
				+        phrase_b: 第二个短语
			
 
				+        model_name: 模型名称
			
 
				+        cache_dir: 缓存目录
			
 
				+
			
 
				+    Returns:
			
 
				+        缓存的结果字典，如果不存在则返回 None
			
 
				+    """
			
 
				+    if cache_dir is None:
			
 
				+        cache_dir = _get_default_cache_dir()
			
 
				+
			
 
				+    cache_file = _get_cache_filepath(cache_key, phrase_a, phrase_b, model_name, cache_dir)
			
 
				+
			
 
				+    # 如果文件不存在，尝试通过哈希匹配查找
			
 
				+    if not cache_file.exists():
			
 
				+        cache_path = Path(cache_dir)
			
 
				+        if cache_path.exists():
			
 
				+            hash_short = cache_key[:8]
			
 
				+            matching_files = list(cache_path.glob(f"*_{hash_short}.json"))
			
 
				+            if matching_files:
			
 
				+                cache_file = matching_files[0]
			
 
				+            else:
			
 
				+                return None
			
 
				+        else:
			
 
				+            return None
			
 
				+
			
 
				+    try:
			
 
				+        with open(cache_file, 'r', encoding='utf-8') as f:
			
 
				+            cached_data = json.load(f)
			
 
				+            return cached_data['output']
			
 
				+    except (json.JSONDecodeError, IOError, KeyError):
			
 
				+        return None
			
 
				+
			
 
				+
			
 
				+def _save_to_cache(
			
 
				+    cache_key: str,
			
 
				+    phrase_a: str,
			
 
				+    phrase_b: str,
			
 
				+    model_name: str,
			
 
				+    result: Dict[str, Any],
			
 
				+    cache_dir: Optional[str] = None
			
 
				+) -> None:
			
 
				+    """
			
 
				+    保存数据到缓存
			
 
				+
			
 
				+    Args:
			
 
				+        cache_key: 缓存键
			
 
				+        phrase_a: 第一个短语
			
 
				+        phrase_b: 第二个短语
			
 
				+        model_name: 模型名称
			
 
				+        result: 结果数据（字典格式）
			
 
				+        cache_dir: 缓存目录
			
 
				+    """
			
 
				+    if cache_dir is None:
			
 
				+        cache_dir = _get_default_cache_dir()
			
 
				+
			
 
				+    cache_file = _get_cache_filepath(cache_key, phrase_a, phrase_b, model_name, cache_dir)
			
 
				+
			
 
				+    # 确保缓存目录存在
			
 
				+    cache_file.parent.mkdir(parents=True, exist_ok=True)
			
 
				+
			
 
				+    # 准备缓存数据
			
 
				+    cache_data = {
			
 
				+        "input": {
			
 
				+            "phrase_a": phrase_a,
			
 
				+            "phrase_b": phrase_b,
			
 
				+            "model_name": model_name,
			
 
				+        },
			
 
				+        "output": result,
			
 
				+        "metadata": {
			
 
				+            "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
			
 
				+            "cache_key": cache_key,
			
 
				+            "cache_file": str(cache_file.name)
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    try:
			
 
				+        with open(cache_file, 'w', encoding='utf-8') as f:
			
 
				+            json.dump(cache_data, f, ensure_ascii=False, indent=2)
			
 
				+    except IOError:
			
 
				+        pass  # 静默失败，不影响主流程
			
 
				+
			
 
				+
			
 
				+def _get_similarity_model(model_name: str = "shibing624/text2vec-base-chinese"):
			
 
				+    """
			
 
				+    获取或初始化相似度模型（支持多个模型，线程安全）
			
 
				+
			
 
				+    Args:
			
 
				+        model_name: 模型名称
			
 
				+
			
 
				+    Returns:
			
 
				+        BertSimilarity 模型实例
			
 
				+    """
			
 
				+    global _similarity_models, _model_lock
			
 
				+
			
 
				+    # 如果是简称，转换为完整名称
			
 
				+    if model_name in SUPPORTED_MODELS:
			
 
				+        model_name = SUPPORTED_MODELS[model_name]
			
 
				+
			
 
				+    # 快速路径：如果模型已加载，直接返回（无锁检查）
			
 
				+    if model_name in _similarity_models:
			
 
				+        return _similarity_models[model_name]
			
 
				+
			
 
				+    # 慢速路径：需要加载模型（使用锁保护）
			
 
				+    with _model_lock:
			
 
				+        # 双重检查：可能在等待锁时其他线程已经加载了
			
 
				+        if model_name in _similarity_models:
			
 
				+            return _similarity_models[model_name]
			
 
				+
			
 
				+        # 加载新模型
			
 
				+        try:
			
 
				+            from similarities import BertSimilarity
			
 
				+            print(f"正在加载模型: {model_name}...")
			
 
				+            _similarity_models[model_name] = BertSimilarity(model_name_or_path=model_name)
			
 
				+            print("模型加载完成！")
			
 
				+            return _similarity_models[model_name]
			
 
				+        except ImportError:
			
 
				+            raise ImportError(
			
 
				+                "请先安装 similarities 库: pip install -U similarities torch"
			
 
				+            )
			
 
				+
			
 
				+
			
 
				+def compare_phrases(
			
 
				+    phrase_a: str,
			
 
				+    phrase_b: str,
			
 
				+    model_name: str = "chinese",
			
 
				+    use_cache: bool = True,
			
 
				+    cache_dir: Optional[str] = None
			
 
				+) -> Dict[str, Any]:
			
 
				+    """
			
 
				+    比较两个短语的语义相似度（兼容 semantic_similarity.py 的接口）
			
 
				+
			
 
				+    返回格式与 semantic_similarity.compare_phrases() 一致：
			
 
				+    {
			
 
				+        "说明": "基于向量模型计算的语义相似度",
			
 
				+        "相似度": 0.85
			
 
				+    }
			
 
				+
			
 
				+    Args:
			
 
				+        phrase_a: 第一个短语
			
 
				+        phrase_b: 第二个短语
			
 
				+        model_name: 模型名称，可选：
			
 
				+            简称：
			
 
				+            - "chinese" (默认) - 中文通用模型
			
 
				+            - "multilingual" - 多语言模型（中英韩日德意等）
			
 
				+            - "paraphrase" - 中文长文本模型
			
 
				+            - "sentence" - 中文短句子模型
			
 
				+
			
 
				+            完整名称：
			
 
				+            - "shibing624/text2vec-base-chinese"
			
 
				+            - "shibing624/text2vec-base-multilingual"
			
 
				+            - "shibing624/text2vec-base-chinese-paraphrase"
			
 
				+            - "shibing624/text2vec-base-chinese-sentence"
			
 
				+        use_cache: 是否使用缓存，默认 True
			
 
				+        cache_dir: 缓存目录，默认从配置读取（可通过 lib.config 设置）
			
 
				+
			
 
				+    Returns:
			
 
				+        {
			
 
				+            "说明": str,      # 相似度说明
			
 
				+            "相似度": float    # 0-1之间的相似度分数
			
 
				+        }
			
 
				+
			
 
				+    Examples:
			
 
				+        >>> # 使用默认模型
			
 
				+        >>> result = compare_phrases("如何更换花呗绑定银行卡", "花呗更改绑定银行卡")
			
 
				+        >>> print(result['相似度'])  # 0.855
			
 
				+
			
 
				+        >>> # 使用多语言模型
			
 
				+        >>> result = compare_phrases("Hello", "Hi", model_name="multilingual")
			
 
				+
			
 
				+        >>> # 使用长文本模型
			
 
				+        >>> result = compare_phrases("长文本1...", "长文本2...", model_name="paraphrase")
			
 
				+
			
 
				+        >>> # 禁用缓存
			
 
				+        >>> result = compare_phrases("测试", "测试", use_cache=False)
			
 
				+
			
 
				+        >>> # 自定义缓存目录
			
 
				+        >>> result = compare_phrases("测试1", "测试2", cache_dir="/tmp/my_cache")
			
 
				+    """
			
 
				+    if cache_dir is None:
			
 
				+        cache_dir = _get_default_cache_dir()
			
 
				+
			
 
				+    # 转换简称为完整名称（用于缓存键）
			
 
				+    full_model_name = SUPPORTED_MODELS.get(model_name, model_name)
			
 
				+
			
 
				+    # 生成缓存键
			
 
				+    cache_key = _generate_cache_key(phrase_a, phrase_b, full_model_name)
			
 
				+
			
 
				+    # 尝试从缓存加载
			
 
				+    if use_cache:
			
 
				+        cached_result = _load_from_cache(cache_key, phrase_a, phrase_b, full_model_name, cache_dir)
			
 
				+        if cached_result is not None:
			
 
				+            return cached_result
			
 
				+
			
 
				+    # 缓存未命中，计算相似度
			
 
				+    model = _get_similarity_model(model_name)
			
 
				+    score = float(model.similarity(phrase_a, phrase_b))
			
 
				+
			
 
				+    # 生成说明
			
 
				+    if score >= 0.9:
			
 
				+        level = "极高"
			
 
				+    elif score >= 0.7:
			
 
				+        level = "高"
			
 
				+    elif score >= 0.5:
			
 
				+        level = "中等"
			
 
				+    elif score >= 0.3:
			
 
				+        level = "较低"
			
 
				+    else:
			
 
				+        level = "低"
			
 
				+
			
 
				+    explanation = f"基于向量模型计算的语义相似度为 {level} ({score:.2f})"
			
 
				+
			
 
				+    result = {
			
 
				+        "说明": explanation,
			
 
				+        "相似度": score
			
 
				+    }
			
 
				+
			
 
				+    # 保存到缓存
			
 
				+    if use_cache:
			
 
				+        _save_to_cache(cache_key, phrase_a, phrase_b, full_model_name, result, cache_dir)
			
 
				+
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    print("=" * 60)
			
 
				+    print("text_embedding - 文本相似度计算（带缓存）")
			
 
				+    print("=" * 60)
			
 
				+    print()
			
 
				+
			
 
				+    # 示例 1: 默认模型（首次调用，会保存缓存）
			
 
				+    print("示例 1: 默认模型（chinese）- 首次调用")
			
 
				+    result = compare_phrases("如何更换花呗绑定银行卡", "花呗更改绑定银行卡")
			
 
				+    print(f"相似度: {result['相似度']:.3f}")
			
 
				+    print(f"说明: {result['说明']}")
			
 
				+    print()
			
 
				+
			
 
				+    # 示例 2: 再次调用相同参数（从缓存读取）
			
 
				+    print("示例 2: 测试缓存 - 再次调用相同参数")
			
 
				+    result = compare_phrases("如何更换花呗绑定银行卡", "花呗更改绑定银行卡")
			
 
				+    print(f"相似度: {result['相似度']:.3f}")
			
 
				+    print(f"说明: {result['说明']}")
			
 
				+    print("（应该从缓存读取，速度更快）")
			
 
				+    print()
			
 
				+
			
 
				+    # 示例 3: 短句子
			
 
				+    print("示例 3: 使用默认模型")
			
 
				+    result = compare_phrases("深度学习", "神经网络")
			
 
				+    print(f"相似度: {result['相似度']:.3f}")
			
 
				+    print(f"说明: {result['说明']}")
			
 
				+    print()
			
 
				+
			
 
				+    # 示例 4: 不相关
			
 
				+    print("示例 4: 不相关的短语")
			
 
				+    result = compare_phrases("编程", "吃饭")
			
 
				+    print(f"相似度: {result['相似度']:.3f}")
			
 
				+    print(f"说明: {result['说明']}")
			
 
				+    print()
			
 
				+
			
 
				+    # 示例 5: 多语言模型
			
 
				+    print("示例 5: 多语言模型（multilingual）")
			
 
				+    result = compare_phrases("Hello", "Hi", model_name="multilingual")
			
 
				+    print(f"相似度: {result['相似度']:.3f}")
			
 
				+    print(f"说明: {result['说明']}")
			
 
				+    print()
			
 
				+
			
 
				+    # 示例 6: 禁用缓存
			
 
				+    print("示例 6: 禁用缓存")
			
 
				+    result = compare_phrases("测试", "测试", use_cache=False)
			
 
				+    print(f"相似度: {result['相似度']:.3f}")
			
 
				+    print(f"说明: {result['说明']}")
			
 
				+    print()
			
 
				+
			
 
				+    print("=" * 60)
			
 
				+    print("支持的模型:")
			
 
				+    print("-" * 60)
			
 
				+    for key, value in SUPPORTED_MODELS.items():
			
 
				+        print(f"  {key:15s} -> {value}")
			
 
				+    print("=" * 60)
			
 
				+    print()
			
 
				+    print("缓存目录: cache/text_embedding/")
			
 
				+    print("缓存文件格式: {phrase_a}_vs_{phrase_b}_{model}_{hash[:8]}.json")
			
 
				+    print("=" * 60)
			
--- a/lib/text_embedding_api.py
+++ b/lib/text_embedding_api.py
@@ -0,0 +1,466 @@
 
				+#!/usr/bin/env python3
			
 
				+"""
			
 
				+文本相似度计算模块 - 基于远程API
			
 
				+使用远程GPU加速的相似度计算服务，接口与 text_embedding.py 兼容
			
 
				+
			
 
				+提供3种计算模式：
			
 
				+1. compare_phrases() - 单对计算
			
 
				+2. compare_phrases_batch() - 批量成对计算 (pair[i].text1 vs pair[i].text2)
			
 
				+3. compare_phrases_cartesian() - 笛卡尔积计算 (M×N矩阵)
			
 
				+"""
			
 
				+
			
 
				+from typing import Dict, Any, Optional, List, Tuple
			
 
				+import requests
			
 
				+import numpy as np
			
 
				+
			
 
				+# API配置
			
 
				+DEFAULT_API_BASE_URL = "http://61.48.133.26:8187"
			
 
				+DEFAULT_TIMEOUT = 60  # 秒
			
 
				+
			
 
				+# API客户端单例
			
 
				+_api_client = None
			
 
				+
			
 
				+
			
 
				+class SimilarityAPIClient:
			
 
				+    """文本相似度API客户端"""
			
 
				+
			
 
				+    def __init__(self, base_url: str = DEFAULT_API_BASE_URL, timeout: int = DEFAULT_TIMEOUT):
			
 
				+        self.base_url = base_url.rstrip('/')
			
 
				+        self.timeout = timeout
			
 
				+        self._session = requests.Session()  # 复用连接
			
 
				+
			
 
				+    def health_check(self) -> Dict:
			
 
				+        """健康检查"""
			
 
				+        response = self._session.get(f"{self.base_url}/health", timeout=10)
			
 
				+        response.raise_for_status()
			
 
				+        return response.json()
			
 
				+
			
 
				+    def list_models(self) -> Dict:
			
 
				+        """列出支持的模型"""
			
 
				+        response = self._session.get(f"{self.base_url}/models", timeout=10)
			
 
				+        response.raise_for_status()
			
 
				+        return response.json()
			
 
				+
			
 
				+    def similarity(
			
 
				+        self,
			
 
				+        text1: str,
			
 
				+        text2: str,
			
 
				+        model_name: Optional[str] = None
			
 
				+    ) -> Dict:
			
 
				+        """
			
 
				+        计算单个文本对的相似度
			
 
				+
			
 
				+        Args:
			
 
				+            text1: 第一个文本
			
 
				+            text2: 第二个文本
			
 
				+            model_name: 可选模型名称
			
 
				+
			
 
				+        Returns:
			
 
				+            {"text1": str, "text2": str, "score": float}
			
 
				+        """
			
 
				+        payload = {"text1": text1, "text2": text2}
			
 
				+        if model_name:
			
 
				+            payload["model_name"] = model_name
			
 
				+
			
 
				+        response = self._session.post(
			
 
				+            f"{self.base_url}/similarity",
			
 
				+            json=payload,
			
 
				+            timeout=self.timeout
			
 
				+        )
			
 
				+        response.raise_for_status()
			
 
				+        return response.json()
			
 
				+
			
 
				+    def batch_similarity(
			
 
				+        self,
			
 
				+        pairs: List[Dict],
			
 
				+        model_name: Optional[str] = None
			
 
				+    ) -> Dict:
			
 
				+        """
			
 
				+        批量计算成对相似度
			
 
				+
			
 
				+        Args:
			
 
				+            pairs: [{"text1": str, "text2": str}, ...]
			
 
				+            model_name: 可选模型名称
			
 
				+
			
 
				+        Returns:
			
 
				+            {"results": [{"text1": str, "text2": str, "score": float}, ...]}
			
 
				+        """
			
 
				+        payload = {"pairs": pairs}
			
 
				+        if model_name:
			
 
				+            payload["model_name"] = model_name
			
 
				+
			
 
				+        response = self._session.post(
			
 
				+            f"{self.base_url}/batch_similarity",
			
 
				+            json=payload,
			
 
				+            timeout=self.timeout
			
 
				+        )
			
 
				+        response.raise_for_status()
			
 
				+        return response.json()
			
 
				+
			
 
				+    def cartesian_similarity(
			
 
				+        self,
			
 
				+        texts1: List[str],
			
 
				+        texts2: List[str],
			
 
				+        model_name: Optional[str] = None
			
 
				+    ) -> Dict:
			
 
				+        """
			
 
				+        计算笛卡尔积相似度（M×N）
			
 
				+
			
 
				+        Args:
			
 
				+            texts1: 第一组文本列表 (M个)
			
 
				+            texts2: 第二组文本列表 (N个)
			
 
				+            model_name: 可选模型名称
			
 
				+
			
 
				+        Returns:
			
 
				+            {
			
 
				+                "results": [{"text1": str, "text2": str, "score": float}, ...],
			
 
				+                "total": int  # M×N
			
 
				+            }
			
 
				+        """
			
 
				+        payload = {
			
 
				+            "texts1": texts1,
			
 
				+            "texts2": texts2
			
 
				+        }
			
 
				+        if model_name:
			
 
				+            payload["model_name"] = model_name
			
 
				+
			
 
				+        response = self._session.post(
			
 
				+            f"{self.base_url}/cartesian_similarity",
			
 
				+            json=payload,
			
 
				+            timeout=self.timeout
			
 
				+        )
			
 
				+        response.raise_for_status()
			
 
				+        return response.json()
			
 
				+
			
 
				+
			
 
				+def _get_api_client() -> SimilarityAPIClient:
			
 
				+    """获取API客户端单例"""
			
 
				+    global _api_client
			
 
				+    if _api_client is None:
			
 
				+        _api_client = SimilarityAPIClient()
			
 
				+    return _api_client
			
 
				+
			
 
				+
			
 
				+def _format_result(score: float) -> Dict[str, Any]:
			
 
				+    """
			
 
				+    格式化相似度结果（兼容 text_embedding.py 格式）
			
 
				+
			
 
				+    Args:
			
 
				+        score: 相似度分数 (0-1)
			
 
				+
			
 
				+    Returns:
			
 
				+        {"说明": str, "相似度": float}
			
 
				+    """
			
 
				+    # 生成说明
			
 
				+    if score >= 0.9:
			
 
				+        level = "极高"
			
 
				+    elif score >= 0.7:
			
 
				+        level = "高"
			
 
				+    elif score >= 0.5:
			
 
				+        level = "中等"
			
 
				+    elif score >= 0.3:
			
 
				+        level = "较低"
			
 
				+    else:
			
 
				+        level = "低"
			
 
				+
			
 
				+    return {
			
 
				+        "说明": f"基于向量模型计算的语义相似度为 {level} ({score:.2f})",
			
 
				+        "相似度": score
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+# ============================================================================
			
 
				+# 公开接口 - 3种计算模式
			
 
				+# ============================================================================
			
 
				+
			
 
				+def compare_phrases(
			
 
				+    phrase_a: str,
			
 
				+    phrase_b: str,
			
 
				+    model_name: Optional[str] = None
			
 
				+) -> Dict[str, Any]:
			
 
				+    """
			
 
				+    比较两个短语的语义相似度（单对计算）
			
 
				+
			
 
				+    Args:
			
 
				+        phrase_a: 第一个短语
			
 
				+        phrase_b: 第二个短语
			
 
				+        model_name: 模型名称（可选，默认使用API服务端默认模型）
			
 
				+
			
 
				+    Returns:
			
 
				+        {
			
 
				+            "说明": str,      # 相似度说明
			
 
				+            "相似度": float    # 0-1之间的相似度分数
			
 
				+        }
			
 
				+
			
 
				+    Examples:
			
 
				+        >>> result = compare_phrases("深度学习", "神经网络")
			
 
				+        >>> print(result['相似度'])  # 0.855
			
 
				+        >>> print(result['说明'])    # 基于向量模型计算的语义相似度为 高 (0.86)
			
 
				+    """
			
 
				+    try:
			
 
				+        client = _get_api_client()
			
 
				+        api_result = client.similarity(phrase_a, phrase_b, model_name)
			
 
				+        score = float(api_result["score"])
			
 
				+        return _format_result(score)
			
 
				+    except Exception as e:
			
 
				+        raise RuntimeError(f"API调用失败: {e}")
			
 
				+
			
 
				+
			
 
				+def compare_phrases_batch(
			
 
				+    phrase_pairs: List[Tuple[str, str]],
			
 
				+    model_name: Optional[str] = None
			
 
				+) -> List[Dict[str, Any]]:
			
 
				+    """
			
 
				+    批量比较多对短语的语义相似度（成对计算）
			
 
				+
			
 
				+    说明：pair[i].text1 vs pair[i].text2
			
 
				+    适用场景：有N对独立的文本需要分别计算相似度
			
 
				+
			
 
				+    Args:
			
 
				+        phrase_pairs: 短语对列表 [(phrase_a, phrase_b), ...]
			
 
				+        model_name: 模型名称（可选）
			
 
				+
			
 
				+    Returns:
			
 
				+        结果列表，每个元素格式：
			
 
				+        {
			
 
				+            "说明": str,
			
 
				+            "相似度": float
			
 
				+        }
			
 
				+
			
 
				+    Examples:
			
 
				+        >>> pairs = [
			
 
				+        ...     ("深度学习", "神经网络"),
			
 
				+        ...     ("机器学习", "人工智能"),
			
 
				+        ...     ("Python编程", "Python开发")
			
 
				+        ... ]
			
 
				+        >>> results = compare_phrases_batch(pairs)
			
 
				+        >>> for (a, b), result in zip(pairs, results):
			
 
				+        ...     print(f"{a} vs {b}: {result['相似度']:.4f}")
			
 
				+
			
 
				+    性能：
			
 
				+        - 3对文本：~50ms（vs 逐对调用 ~150ms）
			
 
				+        - 100对文本：~200ms（vs 逐对调用 ~5s）
			
 
				+    """
			
 
				+    if not phrase_pairs:
			
 
				+        return []
			
 
				+
			
 
				+    try:
			
 
				+        # 转换为API格式
			
 
				+        api_pairs = [{"text1": a, "text2": b} for a, b in phrase_pairs]
			
 
				+
			
 
				+        # 调用API批量计算
			
 
				+        client = _get_api_client()
			
 
				+        api_response = client.batch_similarity(api_pairs, model_name)
			
 
				+        api_results = api_response["results"]
			
 
				+
			
 
				+        # 格式化结果
			
 
				+        results = []
			
 
				+        for api_result in api_results:
			
 
				+            score = float(api_result["score"])
			
 
				+            results.append(_format_result(score))
			
 
				+
			
 
				+        return results
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        raise RuntimeError(f"API批量调用失败: {e}")
			
 
				+
			
 
				+
			
 
				+def compare_phrases_cartesian(
			
 
				+    phrases_a: List[str],
			
 
				+    phrases_b: List[str]
			
 
				+) -> List[List[Dict[str, Any]]]:
			
 
				+    """
			
 
				+    计算笛卡尔积相似度（M×N矩阵）
			
 
				+
			
 
				+    说明：计算 phrases_a 中每个短语与 phrases_b 中每个短语的相似度
			
 
				+    适用场景：需要计算两组文本之间所有可能的组合
			
 
				+
			
 
				+    Args:
			
 
				+        phrases_a: 第一组短语列表 (M个)
			
 
				+        phrases_b: 第二组短语列表 (N个)
			
 
				+
			
 
				+    Returns:
			
 
				+        M×N的结果矩阵（嵌套列表）
			
 
				+        results[i][j] = {
			
 
				+            "相似度": float,  # phrases_a[i] vs phrases_b[j]
			
 
				+            "说明": str
			
 
				+        }
			
 
				+
			
 
				+    Examples:
			
 
				+        >>> phrases_a = ["深度学习", "机器学习"]
			
 
				+        >>> phrases_b = ["神经网络", "人工智能", "Python"]
			
 
				+
			
 
				+        >>> results = compare_phrases_cartesian(phrases_a, phrases_b)
			
 
				+        >>> print(results[0][0]['相似度'])  # 深度学习 vs 神经网络
			
 
				+        >>> print(results[1][2]['说明'])    # 机器学习 vs Python 的说明
			
 
				+
			
 
				+    性能：
			
 
				+        - 2×3=6个组合：~50ms
			
 
				+        - 10×100=1000个组合：~500ms
			
 
				+        - 比逐对调用快 50-200x
			
 
				+    """
			
 
				+    if not phrases_a or not phrases_b:
			
 
				+        return [[]]
			
 
				+
			
 
				+    try:
			
 
				+        # 调用API计算笛卡尔积（一次性批量调用，不受max_concurrent限制）
			
 
				+        client = _get_api_client()
			
 
				+        api_response = client.cartesian_similarity(phrases_a, phrases_b, model_name=None)
			
 
				+        api_results = api_response["results"]
			
 
				+
			
 
				+        M = len(phrases_a)
			
 
				+        N = len(phrases_b)
			
 
				+
			
 
				+        # 返回嵌套列表（带完整说明）
			
 
				+        results = [[None for _ in range(N)] for _ in range(M)]
			
 
				+        for idx, api_result in enumerate(api_results):
			
 
				+            i = idx // N
			
 
				+            j = idx % N
			
 
				+            score = float(api_result["score"])
			
 
				+            results[i][j] = _format_result(score)
			
 
				+        return results
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        raise RuntimeError(f"API笛卡尔积调用失败: {e}")
			
 
				+
			
 
				+
			
 
				+# ============================================================================
			
 
				+# 工具函数
			
 
				+# ============================================================================
			
 
				+
			
 
				+def get_api_health() -> Dict:
			
 
				+    """
			
 
				+    获取API健康状态
			
 
				+
			
 
				+    Returns:
			
 
				+        {
			
 
				+            "status": "ok",
			
 
				+            "gpu_available": bool,
			
 
				+            "gpu_name": str,
			
 
				+            "model_loaded": bool,
			
 
				+            "max_batch_pairs": int,
			
 
				+            "max_cartesian_texts": int,
			
 
				+            ...
			
 
				+        }
			
 
				+    """
			
 
				+    client = _get_api_client()
			
 
				+    return client.health_check()
			
 
				+
			
 
				+
			
 
				+def get_supported_models() -> Dict:
			
 
				+    """
			
 
				+    获取API支持的模型列表
			
 
				+
			
 
				+    Returns:
			
 
				+        模型列表及详细信息
			
 
				+    """
			
 
				+    client = _get_api_client()
			
 
				+    return client.list_models()
			
 
				+
			
 
				+
			
 
				+# ============================================================================
			
 
				+# 测试代码
			
 
				+# ============================================================================
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    print("=" * 80)
			
 
				+    print(" text_embedding_api 模块测试")
			
 
				+    print("=" * 80)
			
 
				+
			
 
				+    # 测试1: 健康检查
			
 
				+    print("\n1. API健康检查")
			
 
				+    print("-" * 80)
			
 
				+    try:
			
 
				+        health = get_api_health()
			
 
				+        print(f"✅ API状态: {health['status']}")
			
 
				+        print(f"   GPU可用: {health['gpu_available']}")
			
 
				+        if health.get('gpu_name'):
			
 
				+            print(f"   GPU名称: {health['gpu_name']}")
			
 
				+        print(f"   模型已加载: {health['model_loaded']}")
			
 
				+        print(f"   最大批量对数: {health['max_batch_pairs']}")
			
 
				+        print(f"   最大笛卡尔积: {health['max_cartesian_texts']}")
			
 
				+    except Exception as e:
			
 
				+        print(f"❌ API连接失败: {e}")
			
 
				+        print("   请确保API服务正常运行")
			
 
				+        exit(1)
			
 
				+
			
 
				+    # 测试2: 单个相似度
			
 
				+    print("\n2. 单个相似度计算")
			
 
				+    print("-" * 80)
			
 
				+    result = compare_phrases("深度学习", "神经网络")
			
 
				+    print(f"深度学习 vs 神经网络")
			
 
				+    print(f"  相似度: {result['相似度']:.4f}")
			
 
				+    print(f"  说明: {result['说明']}")
			
 
				+
			
 
				+    # 测试3: 批量成对相似度
			
 
				+    print("\n3. 批量成对相似度计算")
			
 
				+    print("-" * 80)
			
 
				+    pairs = [
			
 
				+        ("深度学习", "神经网络"),
			
 
				+        ("机器学习", "人工智能"),
			
 
				+        ("Python编程", "Python开发")
			
 
				+    ]
			
 
				+    results = compare_phrases_batch(pairs)
			
 
				+    for (a, b), result in zip(pairs, results):
			
 
				+        print(f"{a} vs {b}: {result['相似度']:.4f}")
			
 
				+
			
 
				+    # 测试4: 笛卡尔积（嵌套列表）
			
 
				+    print("\n4. 笛卡尔积计算（嵌套列表格式）")
			
 
				+    print("-" * 80)
			
 
				+    phrases_a = ["深度学习", "机器学习"]
			
 
				+    phrases_b = ["神经网络", "人工智能", "Python"]
			
 
				+
			
 
				+    results = compare_phrases_cartesian(phrases_a, phrases_b)
			
 
				+    print(f"计算 {len(phrases_a)} × {len(phrases_b)} = {len(phrases_a) * len(phrases_b)} 个相似度")
			
 
				+
			
 
				+    for i, phrase_a in enumerate(phrases_a):
			
 
				+        print(f"\n{phrase_a}:")
			
 
				+        for j, phrase_b in enumerate(phrases_b):
			
 
				+            score = results[i][j]['相似度']
			
 
				+            print(f"  vs {phrase_b:15}: {score:.4f}")
			
 
				+
			
 
				+    # 测试5: 笛卡尔积（numpy矩阵）
			
 
				+    print("\n5. 笛卡尔积计算（numpy矩阵格式）")
			
 
				+    print("-" * 80)
			
 
				+    matrix = compare_phrases_cartesian(phrases_a, phrases_b, return_matrix=True)
			
 
				+    print(f"矩阵 shape: {matrix.shape}")
			
 
				+    print(f"\n相似度矩阵:")
			
 
				+    print(f"{'':15}", end="")
			
 
				+    for b in phrases_b:
			
 
				+        print(f"{b:15}", end="")
			
 
				+    print()
			
 
				+
			
 
				+    for i, a in enumerate(phrases_a):
			
 
				+        print(f"{a:15}", end="")
			
 
				+        for j in range(len(phrases_b)):
			
 
				+            print(f"{matrix[i][j]:15.4f}", end="")
			
 
				+        print()
			
 
				+
			
 
				+    # 测试6: 性能对比（可选）
			
 
				+    print("\n6. 性能测试（可选）")
			
 
				+    print("-" * 80)
			
 
				+    print("测试大规模笛卡尔积性能...")
			
 
				+
			
 
				+    import time
			
 
				+
			
 
				+    test_a = ["测试文本A" + str(i) for i in range(10)]
			
 
				+    test_b = ["测试文本B" + str(i) for i in range(50)]
			
 
				+
			
 
				+    print(f"计算 {len(test_a)} × {len(test_b)} = {len(test_a) * len(test_b)} 个相似度")
			
 
				+
			
 
				+    start = time.time()
			
 
				+    matrix = compare_phrases_cartesian(test_a, test_b, return_matrix=True)
			
 
				+    elapsed = time.time() - start
			
 
				+
			
 
				+    print(f"耗时: {elapsed*1000:.2f}ms")
			
 
				+    print(f"QPS: {matrix.size / elapsed:.2f}")
			
 
				+
			
 
				+    print("\n" + "=" * 80)
			
 
				+    print(" ✅ 所有测试通过！")
			
 
				+    print("=" * 80)
			
 
				+
			
 
				+    print("\n📝 接口总结：")
			
 
				+    print("  1. compare_phrases(a, b) - 单对计算")
			
 
				+    print("  2. compare_phrases_batch([(a,b),...]) - 批量成对")
			
 
				+    print("  3. compare_phrases_cartesian([a1,a2], [b1,b2,b3]) - 笛卡尔积")
			
 
				+    print("\n💡 提示：所有接口都不使用缓存，因为API已经足够快")
			
--- a/lib/text_embedding_api_README.md
+++ b/lib/text_embedding_api_README.md
@@ -0,0 +1,184 @@
 
				+# text_embedding_api - 基于远程API的文本相似度计算
			
 
				+
			
 
				+## 概述
			
 
				+
			
 
				+简化版的文本相似度计算模块，使用远程GPU加速API，**去除了缓存机制**（API已经足够快）。
			
 
				+
			
 
				+## 3种计算模式
			
 
				+
			
 
				+```python
			
 
				+from lib.text_embedding_api import (
			
 
				+    compare_phrases,           # 1. 单对计算
			
 
				+    compare_phrases_batch,     # 2. 批量成对
			
 
				+    compare_phrases_cartesian  # 3. 笛卡尔积
			
 
				+)
			
 
				+```
			
 
				+
			
 
				+### 1. 单对计算
			
 
				+
			
 
				+```python
			
 
				+result = compare_phrases("深度学习", "神经网络")
			
 
				+print(result['相似度'])  # 0.8500
			
 
				+print(result['说明'])    # 基于向量模型计算的语义相似度为 高 (0.85)
			
 
				+```
			
 
				+
			
 
				+### 2. 批量成对计算
			
 
				+
			
 
				+适用场景：有N对独立的文本需要分别计算相似度
			
 
				+
			
 
				+```python
			
 
				+pairs = [
			
 
				+    ("深度学习", "神经网络"),
			
 
				+    ("机器学习", "人工智能"),
			
 
				+    ("Python编程", "Python开发")
			
 
				+]
			
 
				+
			
 
				+results = compare_phrases_batch(pairs)
			
 
				+for (a, b), result in zip(pairs, results):
			
 
				+    print(f"{a} vs {b}: {result['相似度']:.4f}")
			
 
				+```
			
 
				+
			
 
				+### 3. 笛卡尔积计算 ⭐
			
 
				+
			
 
				+适用场景：需要计算两组文本之间所有可能的组合（M×N）
			
 
				+
			
 
				+#### 方式A: 返回嵌套列表（带说明）
			
 
				+
			
 
				+```python
			
 
				+phrases_a = ["深度学习", "机器学习"]
			
 
				+phrases_b = ["神经网络", "人工智能", "Python"]
			
 
				+
			
 
				+results = compare_phrases_cartesian(phrases_a, phrases_b)
			
 
				+
			
 
				+# 访问结果
			
 
				+print(results[0][0]['相似度'])  # 深度学习 vs 神经网络
			
 
				+print(results[1][2]['说明'])    # 机器学习 vs Python
			
 
				+```
			
 
				+
			
 
				+#### 方式B: 返回numpy矩阵（只有分数，更快）
			
 
				+
			
 
				+```python
			
 
				+matrix = compare_phrases_cartesian(phrases_a, phrases_b, return_matrix=True)
			
 
				+
			
 
				+print(matrix.shape)  # (2, 3)
			
 
				+print(matrix[0, 1])  # 深度学习 vs 人工智能
			
 
				+print(matrix[1, 0])  # 机器学习 vs 神经网络
			
 
				+```
			
 
				+
			
 
				+## 性能对比
			
 
				+
			
 
				+| 场景 | 数据量 | 耗时 |
			
 
				+|------|--------|------|
			
 
				+| **单对计算** | 1对 | ~30ms |
			
 
				+| **批量成对** | 100对 | ~200ms |
			
 
				+| **笛卡尔积** | 10×100=1000 | ~500ms |
			
 
				+
			
 
				+## API健康检查
			
 
				+
			
 
				+```python
			
 
				+from lib.text_embedding_api import get_api_health
			
 
				+
			
 
				+health = get_api_health()
			
 
				+print(health['status'])              # "ok"
			
 
				+print(health['gpu_available'])       # True
			
 
				+print(health['max_cartesian_texts']) # 最大文本数限制
			
 
				+```
			
 
				+
			
 
				+## 业务集成示例
			
 
				+
			
 
				+### 场景1: 一个特征匹配所有人设（1 vs N）
			
 
				+
			
 
				+```python
			
 
				+from lib.text_embedding_api import compare_phrases_cartesian
			
 
				+
			
 
				+feature = "宿命感"
			
 
				+persona_features = ["人设1", "人设2", ..., "人设100"]
			
 
				+
			
 
				+# 一次API调用获取所有100个相似度
			
 
				+matrix = compare_phrases_cartesian([feature], persona_features, return_matrix=True)
			
 
				+scores = matrix[0]  # 取第一行
			
 
				+
			
 
				+for i, score in enumerate(scores):
			
 
				+    if score > 0.7:  # 只处理高相似度
			
 
				+        print(f"{feature} → {persona_features[i]}: {score:.4f}")
			
 
				+```
			
 
				+
			
 
				+**性能**: ~0.2秒（vs 逐对调用 ~10秒）
			
 
				+
			
 
				+### 场景2: 多个特征批量匹配（M vs N）
			
 
				+
			
 
				+```python
			
 
				+features = ["特征1", "特征2", ..., "特征10"]
			
 
				+persona_features = ["人设1", "人设2", ..., "人设100"]
			
 
				+
			
 
				+# 一次API调用获取10×100=1000个相似度
			
 
				+matrix = compare_phrases_cartesian(features, persona_features, return_matrix=True)
			
 
				+
			
 
				+# 处理结果
			
 
				+for i, feature in enumerate(features):
			
 
				+    for j, persona in enumerate(persona_features):
			
 
				+        score = matrix[i, j]
			
 
				+        if score > 0.7:
			
 
				+            print(f"{feature} → {persona}: {score:.4f}")
			
 
				+```
			
 
				+
			
 
				+**性能**: ~0.5秒（vs 逐对调用 ~100秒）
			
 
				+
			
 
				+## 与 text_embedding.py 的兼容性
			
 
				+
			
 
				+`compare_phrases()` 接口完全兼容：
			
 
				+
			
 
				+```python
			
 
				+# 原来的代码
			
 
				+from lib.text_embedding import compare_phrases
			
 
				+
			
 
				+# 新代码（直接替换）
			
 
				+from lib.text_embedding_api import compare_phrases
			
 
				+
			
 
				+# 使用方式完全相同
			
 
				+result = compare_phrases("测试1", "测试2")
			
 
				+```
			
 
				+
			
 
				+**区别**:
			
 
				+- ✅ 更快（GPU加速）
			
 
				+- ✅ 零内存占用（无需加载模型）
			
 
				+- ✅ 新增笛卡尔积功能
			
 
				+- ❌ 需要网络连接
			
 
				+- ❌ 无缓存机制（API已经够快，不需要）
			
 
				+
			
 
				+## 依赖
			
 
				+
			
 
				+```bash
			
 
				+pip install requests numpy
			
 
				+```
			
 
				+
			
 
				+## 测试
			
 
				+
			
 
				+```bash
			
 
				+python3 lib/text_embedding_api.py
			
 
				+```
			
 
				+
			
 
				+## API配置
			
 
				+
			
 
				+默认API地址: `http://61.48.133.26:8187`
			
 
				+
			
 
				+如需修改，可在代码中设置：
			
 
				+
			
 
				+```python
			
 
				+from lib.text_embedding_api import SimilarityAPIClient
			
 
				+
			
 
				+client = SimilarityAPIClient(
			
 
				+    base_url="http://your-api-server:8187",
			
 
				+    timeout=120
			
 
				+)
			
 
				+```
			
 
				+
			
 
				+## 总结
			
 
				+
			
 
				+**3个接口，无缓存，专注性能：**
			
 
				+
			
 
				+1. `compare_phrases(a, b)` - 单对
			
 
				+2. `compare_phrases_batch([(a,b),...])` - 批量成对
			
 
				+3. `compare_phrases_cartesian([...], [...])` - 笛卡尔积 ⭐
			
 
				+
			
 
				+**推荐**: 优先使用笛卡尔积接口处理批量数据，性能最优。
			
--- a/lib/utils.py
+++ b/lib/utils.py
@@ -0,0 +1,633 @@
 
				+from typing import List, Dict, Any
			
 
				+import json
			
 
				+from .my_trace import get_current_time
			
 
				+import re
			
 
				+import uuid
			
 
				+import datetime
			
 
				+
			
 
				+def parse_json_from_text(text: str) -> dict:
			
 
				+    """
			
 
				+    从文本中解析JSON，支持多种格式的JSON代码块
			
 
				+    
			
 
				+    Args:
			
 
				+        text (str): 包含JSON的文本
			
 
				+    
			
 
				+    Returns:
			
 
				+        dict: 解析后的JSON数据，解析失败返回空字典
			
 
				+    """
			
 
				+    if not text or not isinstance(text, str):
			
 
				+        return {}
			
 
				+    
			
 
				+    # 去除首尾空白字符
			
 
				+    text = text.strip()
			
 
				+    
			
 
				+    # 定义可能的JSON代码块标记
			
 
				+    json_markers = [
			
 
				+        ("'''json", "'''"),
			
 
				+        ('"""json', '"""'),
			
 
				+        ("```json", "```"),
			
 
				+        ("```", "```")
			
 
				+    ]
			
 
				+    
			
 
				+    # 尝试提取JSON代码块
			
 
				+    json_content = text
			
 
				+    for start_marker, end_marker in json_markers:
			
 
				+        if text.startswith(start_marker):
			
 
				+            # 找到开始标记，查找结束标记
			
 
				+            start_pos = len(start_marker)
			
 
				+            end_pos = text.find(end_marker, start_pos)
			
 
				+            if end_pos != -1:
			
 
				+                json_content = text[start_pos:end_pos].strip()
			
 
				+                break
			
 
				+    
			
 
				+    # 如果没有找到代码块标记，检查是否以结束标记结尾并移除
			
 
				+    if json_content == text:
			
 
				+        for _, end_marker in json_markers:
			
 
				+            if text.endswith(end_marker):
			
 
				+                json_content = text[:-len(end_marker)].strip()
			
 
				+                break
			
 
				+    
			
 
				+    # 尝试解析JSON
			
 
				+    try:
			
 
				+        return json.loads(json_content)
			
 
				+    except json.JSONDecodeError as e:
			
 
				+        # 打印详细的解析失败信息
			
 
				+        print(f"JSON解析失败: {e}")
			
 
				+        print(f"原始文本长度: {len(text)}")
			
 
				+        print(f"提取的JSON内容长度: {len(json_content)}")
			
 
				+        print(f"原始文本内容预览 (前500字符):\n{text[:500]}")
			
 
				+        print(f"提取的JSON内容预览 (前500字符):\n{json_content[:500]}")
			
 
				+        print("-" * 80)
			
 
				+
			
 
				+        # 如果直接解析失败，尝试查找第一个{到最后一个}的内容
			
 
				+        try:
			
 
				+            first_brace = json_content.find('{')
			
 
				+            last_brace = json_content.rfind('}')
			
 
				+            if first_brace != -1 and last_brace != -1 and first_brace < last_brace:
			
 
				+                json_part = json_content[first_brace:last_brace + 1]
			
 
				+                return json.loads(json_part)
			
 
				+        except json.JSONDecodeError as e2:
			
 
				+            print(f"二次解析也失败: {e2}")
			
 
				+            if first_brace != -1 and last_brace != -1:
			
 
				+                print(f"尝试解析的内容:\n{json_part[:500]}")
			
 
				+
			
 
				+        return {}
			
 
				+
			
 
				+
			
 
				+def get_safe_filename(filename: str) -> str:
			
 
				+    """
			
 
				+    生成安全的文件名，移除不安全字符
			
 
				+    
			
 
				+    Args:
			
 
				+        filename: 原始文件名
			
 
				+        
			
 
				+    Returns:
			
 
				+        str: 安全的文件名
			
 
				+    """
			
 
				+    # 移除不安全的字符，只保留字母、数字、下划线、连字符和点
			
 
				+    return re.sub(r'[^\w\-\./]', '_', filename)
			
 
				+
			
 
				+
			
 
				+def generate_image_filename(mime_type: str, prefix: str = "gemini_img") -> str:
			
 
				+    """
			
 
				+    生成合理的图片文件名
			
 
				+
			
 
				+    Args:
			
 
				+        mime_type: 文件MIME类型
			
 
				+        prefix: 文件名前缀
			
 
				+
			
 
				+    Returns:
			
 
				+        str: 生成的文件名
			
 
				+    """
			
 
				+    # 获取当前时间戳
			
 
				+    timestamp = datetime.datetime.now().strftime("%Y%m%d/%H%M%S")
			
 
				+
			
 
				+    # 获取文件扩展名
			
 
				+    extension = mime_type.split('/')[-1]
			
 
				+    if extension == "jpeg":
			
 
				+        extension = "jpg"
			
 
				+
			
 
				+    # 生成唯一ID (短UUID)
			
 
				+    unique_id = str(uuid.uuid4())[:4]
			
 
				+
			
 
				+    # 组合文件名
			
 
				+    filename = f"{prefix}/{timestamp}_{unique_id}.{extension}"
			
 
				+
			
 
				+    # 确保文件名安全
			
 
				+    return get_safe_filename(filename)
			
 
				+
			
 
				+def parse_multimodal_content(content: str) -> List[Dict[str, Any]]:
			
 
				+    """解析多模态内容，保持上下文顺序，适用于AI参数传递 """
			
 
				+    
			
 
				+    result = []
			
 
				+    lines = content.split('\n')
			
 
				+    role = ''
			
 
				+    
			
 
				+    for line in lines:
			
 
				+        line = line.strip()
			
 
				+        if not line:
			
 
				+            continue
			
 
				+            
			
 
				+        # 分割前缀和内容
			
 
				+        if ':' in line:
			
 
				+            prefix, content = line.split(':', 1)
			
 
				+            prefix = prefix.strip().lower()
			
 
				+            content = content.strip()
			
 
				+            row = {}
			
 
				+            if prefix == 'image':
			
 
				+                row = {
			
 
				+                    "type": "image_url",
			
 
				+                    "image_url": {
			
 
				+                        "url": content
			
 
				+                    }
			
 
				+                }
			
 
				+            elif prefix == 'text':
			
 
				+                row = {
			
 
				+                    "type": "text",
			
 
				+                    "text": content
			
 
				+                }
			
 
				+            elif prefix == 'role':
			
 
				+                role = content
			
 
				+            if row:
			
 
				+                if role:
			
 
				+                    row['role'] = role
			
 
				+                    role = ''
			
 
				+                result.append(row)
			
 
				+    
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				+def read_json(file_path):
			
 
				+    """
			
 
				+    读取JSON文件并返回解析后的数据
			
 
				+    
			
 
				+    Args:
			
 
				+        file_path: JSON文件路径
			
 
				+        
			
 
				+    Returns:
			
 
				+        解析后的JSON数据
			
 
				+    """
			
 
				+    try:
			
 
				+        with open(file_path, 'r', encoding='utf-8') as f:
			
 
				+            return json.load(f)
			
 
				+    except Exception as e:
			
 
				+        print(f"读取JSON文件时出错: {e}")
			
 
				+        return None
			
 
				+
			
 
				+def save_json(data, file_path):
			
 
				+    """
			
 
				+    保存数据到JSON文件
			
 
				+    
			
 
				+    Args:
			
 
				+        data: 要保存的数据
			
 
				+        file_path: 保存路径
			
 
				+    """
			
 
				+    with open(file_path, 'w', encoding='utf-8') as f:
			
 
				+        json.dump(data, f, ensure_ascii=False, indent=2)
			
 
				+        
			
 
				+
			
 
				+def get_script_data(file_path):
			
 
				+    """
			
 
				+    读取JSON文件并返回解析后的数据
			
 
				+    
			
 
				+    Args:
			
 
				+        file_path: JSON文件路径
			
 
				+    """
			
 
				+    return read_json(file_path)['脚本']
			
 
				+
			
 
				+import os
			
 
				+import xml.etree.ElementTree as ET
			
 
				+from typing import Dict, List, Any
			
 
				+import re
			
 
				+import unicodedata
			
 
				+
			
 
				+
			
 
				+def get_model(model_name):
			
 
				+    # return 'gemini/gemini-2.5-flash'
			
 
				+    # return 'litellm/gemini/gemini-2.5-flash'
			
 
				+    if model_name.startswith('litellm'):
			
 
				+        return model_name
			
 
				+    else:
			
 
				+        from openai import AsyncOpenAI
			
 
				+        from agents import OpenAIChatCompletionsModel
			
 
				+        BASE_URL = os.getenv("EXAMPLE_BASE_URL") or "https://openrouter.ai/api/v1"
			
 
				+        API_KEY = os.getenv("OPENROUTER_API_KEY") or ""
			
 
				+        client = AsyncOpenAI(
			
 
				+            base_url=BASE_URL,
			
 
				+            api_key=API_KEY,
			
 
				+        )
			
 
				+        return OpenAIChatCompletionsModel(
			
 
				+            # model='google/gemini-2.5-pro-preview',
			
 
				+            # model='google/gemini-2.5-flash-preview-05-20',
			
 
				+            # model='google/gemini-2.5-flash-preview-05-20',
			
 
				+            # model='google/gemini-2.5-flash',
			
 
				+            # model='google/gemini-2.5-flash',
			
 
				+            # model='google/gemini-2.5-flash-preview-05-20:thinking',
			
 
				+            # model='google/gemini-2.0-flash-001',
			
 
				+            model=model_name,
			
 
				+            openai_client=client,
			
 
				+        )
			
 
				+
			
 
				+def read_file_as_string(file_path):
			
 
				+    """读取文件内容并返回字符串"""
			
 
				+    try:
			
 
				+        with open(file_path, 'r', encoding='utf-8') as file:
			
 
				+            content = file.read().strip()
			
 
				+        return content
			
 
				+    except Exception as e:
			
 
				+        print(f"读取文件时出错: {e}")
			
 
				+        return None
			
 
				+def save_file_as_string(file_path, content):
			
 
				+    """将字符串内容写入文件"""
			
 
				+    with open(file_path, 'w', encoding='utf-8') as f:
			
 
				+        f.write(content)
			
 
				+
			
 
				+def extract_html_from_markdown(text):
			
 
				+    """
			
 
				+    从可能包含markdown或其他代码块的文本中提取HTML内容
			
 
				+    
			
 
				+    参数:
			
 
				+        text: 可能包含各种格式的文本
			
 
				+        
			
 
				+    返回:
			
 
				+        提取出的纯HTML内容
			
 
				+    """
			
 
				+    # 处理```html```格式（反引号）
			
 
				+    backtick_pattern = r"```(?:html)?\s*([\s\S]*?)```"
			
 
				+    backtick_matches = re.findall(backtick_pattern, text)
			
 
				+    
			
 
				+    # 处理'''html'''格式（单引号）
			
 
				+    single_quote_pattern = r"'''(?:html)?\s*([\s\S]*?)'''"
			
 
				+    single_quote_matches = re.findall(single_quote_pattern, text)
			
 
				+    
			
 
				+    # 处理"""html"""格式（双引号）
			
 
				+    double_quote_pattern = r'"""(?:html)?\s*([\s\S]*?)"""'
			
 
				+    double_quote_matches = re.findall(double_quote_pattern, text)
			
 
				+    
			
 
				+    if backtick_matches:
			
 
				+        # 优先使用反引号格式
			
 
				+        return backtick_matches[0].strip()
			
 
				+    elif single_quote_matches:
			
 
				+        # 其次使用单引号格式
			
 
				+        return single_quote_matches[0].strip()
			
 
				+    elif double_quote_matches:
			
 
				+        # 再次使用双引号格式
			
 
				+        return double_quote_matches[0].strip()
			
 
				+    else:
			
 
				+        # 如果没有代码块格式，直接返回原get_current_time始文本
			
 
				+        return text
			
 
				+    
			
 
				+def create_workspace_dir(current_time=None, make_dir=True):
			
 
				+    if not current_time:
			
 
				+        current_time = get_current_time()
			
 
				+    task_dir = f"result/{current_time}"
			
 
				+    if make_dir:
			
 
				+        os.makedirs(task_dir, exist_ok=True)
			
 
				+    task_dir_absolute = os.path.abspath(task_dir)
			
 
				+    # print(f"任务目录的绝对路径: {task_dir_absolute}")
			
 
				+    return task_dir_absolute, str(current_time)
			
 
				+
			
 
				+
			
 
				+def extract_tag_content(text, tag_name):
			
 
				+    """
			
 
				+    从文本中提取指定标签内的内容
			
 
				+    
			
 
				+    参数:
			
 
				+        text (str): 要处理的文本
			
 
				+        tag_name (str): 要提取的标签名称
			
 
				+    
			
 
				+    返回:
			
 
				+        str: 标签内的内容，如果未找到则返回空字符串
			
 
				+    """
			
 
				+    import re
			
 
				+    pattern = f"<{tag_name}>(.*?)</{tag_name}>"
			
 
				+    match = re.search(pattern, text, re.DOTALL)
			
 
				+    if match:
			
 
				+        return match.group(1).strip()
			
 
				+    return ""
			
 
				+
			
 
				+from typing import Dict, List, Optional
			
 
				+def parse_tasks(tasks_xml: str) -> List[Dict]:
			
 
				+    """Parse XML tasks into a list of task dictionaries."""
			
 
				+    tasks = []
			
 
				+    current_task = {}
			
 
				+    
			
 
				+    for line in tasks_xml.split('\n'):
			
 
				+        line = line.strip()
			
 
				+        if not line:
			
 
				+            continue
			
 
				+            
			
 
				+        if line.startswith("<task>"):
			
 
				+            current_task = {}
			
 
				+        elif line.startswith("<name>"):
			
 
				+            current_task["name"] = line[6:-7].strip()
			
 
				+        elif line.startswith("<output>"):
			
 
				+            current_task["output"] = line[12:-13].strip()
			
 
				+        elif line.startswith("</task>"):
			
 
				+            if "description" in current_task:
			
 
				+                if "type" not in current_task:
			
 
				+                    current_task["type"] = "default"
			
 
				+                tasks.append(current_task)
			
 
				+    
			
 
				+    return tasks
			
 
				+    
			
 
				+    
			
 
				+def parse_xml_content(xml_string: str) -> Dict[str, Any]:
			
 
				+    """
			
 
				+    将XML字符串解析成字典，提取main_task、thoughts、tasks和resources
			
 
				+    
			
 
				+    参数:
			
 
				+        xml_string: 包含任务信息的XML字符串
			
 
				+        
			
 
				+    返回:
			
 
				+        包含所有解析信息的字典
			
 
				+    """
			
 
				+    # 创建结果字典
			
 
				+    result = {
			
 
				+        "main_task": {},
			
 
				+        "thoughts": "",
			
 
				+        "tasks": [],
			
 
				+        "resources": []
			
 
				+    }
			
 
				+    
			
 
				+    try:
			
 
				+        # 提取thoughts内容
			
 
				+        thoughts_match = re.search(r'<thoughts>(.*?)</thoughts>', xml_string, re.DOTALL)
			
 
				+        if thoughts_match:
			
 
				+            result["thoughts"] = thoughts_match.group(1).strip()
			
 
				+        
			
 
				+        # 提取main_task内容
			
 
				+        main_task_match = re.search(r'<main_task>(.*?)</main_task>', xml_string, re.DOTALL)
			
 
				+        if main_task_match:
			
 
				+            main_task_content = main_task_match.group(1)
			
 
				+            main_task = {}
			
 
				+            
			
 
				+            # 获取主任务名称
			
 
				+            name_match = re.search(r'<name>(.*?)</name>', main_task_content, re.DOTALL)
			
 
				+            if name_match:
			
 
				+                main_task['name'] = name_match.group(1).strip()
			
 
				+            
			
 
				+            # 获取主任务输出
			
 
				+            output_match = re.search(r'<output>(.*?)</output>', main_task_content, re.DOTALL)
			
 
				+            if output_match:
			
 
				+                main_task['output'] = output_match.group(1).strip()
			
 
				+            
			
 
				+            # 获取主任务描述
			
 
				+            description_match = re.search(r'<description>(.*?)</description>', main_task_content, re.DOTALL)
			
 
				+            if description_match:
			
 
				+                main_task['description'] = description_match.group(1).strip()
			
 
				+            
			
 
				+            result["main_task"] = main_task
			
 
				+        
			
 
				+        # 提取<tasks>...</tasks>部分
			
 
				+        tasks_pattern = re.compile(r'<tasks>(.*?)</tasks>', re.DOTALL)
			
 
				+        tasks_match = tasks_pattern.search(xml_string)
			
 
				+        
			
 
				+        if tasks_match:
			
 
				+            tasks_content = tasks_match.group(1)
			
 
				+            
			
 
				+            # 提取每个task块
			
 
				+            task_pattern = re.compile(r'<task>(.*?)</task>', re.DOTALL)
			
 
				+            task_matches = task_pattern.finditer(tasks_content)
			
 
				+            
			
 
				+            for task_match in task_matches:
			
 
				+                task_content = task_match.group(1)
			
 
				+                task_dict = {}
			
 
				+                
			
 
				+                # 获取任务名称
			
 
				+                name_match = re.search(r'<name>(.*?)</name>', task_content, re.DOTALL)
			
 
				+                if not name_match:
			
 
				+                    continue  # 跳过没有名称的任务
			
 
				+                
			
 
				+                name = name_match.group(1).strip()
			
 
				+                task_dict['name'] = name
			
 
				+                # 获取输出信息
			
 
				+                output_match = re.search(r'<output>(.*?)</output>', task_content, re.DOTALL)
			
 
				+                task_dict['output'] = output_match.group(1).strip() if output_match else ""
			
 
				+                
			
 
				+                # 获取描述信息
			
 
				+                description_match = re.search(r'<description>(.*?)</description>', task_content, re.DOTALL)
			
 
				+                task_dict['description'] = description_match.group(1).strip() if description_match else ""
			
 
				+                
			
 
				+                # 获取依赖任务列表
			
 
				+                depend_tasks = []
			
 
				+                depend_tasks_section = re.search(r'<depend_tasks>(.*?)</depend_tasks>', task_content, re.DOTALL)
			
 
				+                if depend_tasks_section:
			
 
				+                    depend_task_matches = re.finditer(r'<depend_task>(.*?)</depend_task>', 
			
 
				+                                                   depend_tasks_section.group(1), re.DOTALL)
			
 
				+                    for dt_match in depend_task_matches:
			
 
				+                        if dt_match.group(1).strip():
			
 
				+                            depend_tasks.append(dt_match.group(1).strip())
			
 
				+                
			
 
				+                task_dict['depend_tasks'] = depend_tasks
			
 
				+                
			
 
				+                # 获取依赖资源列表
			
 
				+                depend_resources = []
			
 
				+                resources_match = re.search(r'<depend_resources>(.*?)</depend_resources>', task_content, re.DOTALL)
			
 
				+                if resources_match and resources_match.group(1).strip():
			
 
				+                    resources_text = resources_match.group(1).strip()
			
 
				+                    depend_resources = [res.strip() for res in resources_text.split(',') if res.strip()]
			
 
				+                
			
 
				+                task_dict['depend_resources'] = depend_resources
			
 
				+                
			
 
				+                # 将任务添加到结果字典
			
 
				+                result["tasks"].append(task_dict)
			
 
				+        
			
 
				+        # 提取resources内容
			
 
				+        resources_pattern = re.compile(r'<resources>(.*?)</resources>', re.DOTALL)
			
 
				+        resources_match = resources_pattern.search(xml_string)
			
 
				+        
			
 
				+        if resources_match:
			
 
				+            resources_content = resources_match.group(1).strip()
			
 
				+            result["resources"] = resources_content
			
 
				+        return result
			
 
				+    
			
 
				+    except Exception as e:
			
 
				+        raise ValueError(f"处理XML数据时发生错误: {e}")
			
 
				+
			
 
				+
			
 
				+def parse_planner_result(result):
			
 
				+    """
			
 
				+    解析规划结果，并为每个任务添加任务目录名
			
 
				+    
			
 
				+    参数:
			
 
				+        result: 包含thoughts、main_task、tasks和resources的规划结果字符串
			
 
				+        
			
 
				+    返回:
			
 
				+        解析后的完整规划信息字典
			
 
				+    """
			
 
				+    # 使用parse_xml_content解析完整内容
			
 
				+    parsed_result = parse_xml_content(result)
			
 
				+    task_name_to_index = {}
			
 
				+    task_dict = {
			
 
				+        'tasks': {},
			
 
				+        'max_index': 1,
			
 
				+    }
			
 
				+    
			
 
				+    # 为每个任务添加task_dir字段
			
 
				+    for i, task_info in enumerate(parsed_result["tasks"]):
			
 
				+        # 使用sanitize_filename生成目录名
			
 
				+        task_name = task_info.get("name", "task")
			
 
				+        depend_tasks_dir = []
			
 
				+        task_info['task_dir'] = get_task_dir(task_name, task_dict)
			
 
				+        for depend_task in task_info.get("depend_tasks", []):
			
 
				+            depend_tasks_dir.append(get_task_dir(depend_task, task_dict))
			
 
				+        task_info['depend_tasks_dir'] = depend_tasks_dir
			
 
				+        task_info['status'] = 'todo' # 任务状态，todo: 未开始，doing: 进行中，success: 已完成，fail: 失败
			
 
				+        task_name_to_index[task_name] = i
			
 
				+    
			
 
				+    # 为主任务也添加task_dir字段
			
 
				+    if parsed_result["main_task"]:
			
 
				+        main_task_name = parsed_result["main_task"].get("name", "main_task")
			
 
				+        parsed_result["main_task"]["task_dir"] = sanitize_filename(main_task_name)
			
 
				+    
			
 
				+    return parsed_result, task_name_to_index
			
 
				+def get_task_dir(task_name, task_dict, append_index=True):
			
 
				+    max_index = task_dict.get('max_index', 1)
			
 
				+    if task_name in task_dict['tasks']:
			
 
				+        return task_dict['tasks'][task_name]
			
 
				+    max_index_str = f"{max_index:02d}"
			
 
				+    task_dir_raw = sanitize_filename(task_name)
			
 
				+    if append_index:
			
 
				+        task_dir = f"{max_index_str}_{task_dir_raw}"
			
 
				+    else:
			
 
				+        task_dir = task_dir_raw
			
 
				+    task_dict['tasks'][task_name] = task_dir
			
 
				+    task_dict['max_index'] = max_index + 1
			
 
				+    return task_dir
			
 
				+    
			
 
				+def sanitize_filename(task_name: str, max_length: int = 20) -> str:
			
 
				+    """
			
 
				+    将任务名称转换为适合作为文件夹名称的字符串
			
 
				+    
			
 
				+    参数:
			
 
				+        task_name: 需要转换的任务名称
			
 
				+        max_length: 文件名最大长度限制，默认80个字符
			
 
				+        
			
 
				+    返回:
			
 
				+        处理后适合作为文件名/文件夹名的字符串
			
 
				+    """
			
 
				+    # 替换Windows和Unix系统中不允许的文件名字符
			
 
				+    # 替换 / \ : * ? " < > | 等字符为下划线
			
 
				+    sanitized = re.sub(r'[\\/*?:"<>|]', '_', task_name)
			
 
				+    
			
 
				+    # 替换连续的空白字符为单个下划线
			
 
				+    sanitized = re.sub(r'\s+', '_', sanitized)
			
 
				+    
			
 
				+    # 移除开头和结尾的点和空格
			
 
				+    sanitized = sanitized.strip('. ')
			
 
				+    
			
 
				+    # 如果名称过长，截断它
			
 
				+    if len(sanitized) > max_length:
			
 
				+        # 保留前面的部分和后面的部分，中间用...连接
			
 
				+        half_length = (max_length - 3) // 2
			
 
				+        sanitized = sanitized[:half_length] + '...' + sanitized[-half_length:]
			
 
				+    
			
 
				+    # 确保名称不为空
			
 
				+    if not sanitized:
			
 
				+        sanitized = "unnamed_task"
			
 
				+    
			
 
				+    return sanitized
			
 
				+
			
 
				+def write_json(data, file_path: str) -> None:
			
 
				+    """
			
 
				+    将数据写入JSON文件
			
 
				+    
			
 
				+    参数:
			
 
				+        data: 要写入的数据对象
			
 
				+        file_path: 目标文件路径
			
 
				+        
			
 
				+    返回:
			
 
				+        无
			
 
				+    """
			
 
				+    import json
			
 
				+    with open(file_path, 'w', encoding='utf-8') as f:
			
 
				+        json.dump(data, f, ensure_ascii=False, indent=2)
			
 
				+def write_string_to_file(content: str, file_path: str) -> None:
			
 
				+    """
			
 
				+    将字符串内容写入文件
			
 
				+    
			
 
				+    参数:
			
 
				+        content: 要写入的字符串内容
			
 
				+        file_path: 目标文件路径
			
 
				+        
			
 
				+    返回:
			
 
				+        无
			
 
				+    """
			
 
				+    with open(file_path, 'w', encoding='utf-8') as f:
			
 
				+        f.write(content)
			
 
				+
			
 
				+def pretty_process(result):
			
 
				+    def format_output(in_str):
			
 
				+        return in_str.replace('\n\n', '\n').replace('\\"', '"')
			
 
				+    process_list = []
			
 
				+    i = 0
			
 
				+    call_dict = {}
			
 
				+    
			
 
				+    # 首先收集所有工具调用输出
			
 
				+    for row in result:
			
 
				+        if isinstance(row, list):
			
 
				+            # 处理列表：递归处理列表中的每个项目
			
 
				+            for item in row:
			
 
				+                if isinstance(item, dict) and item.get('type', '') == 'function_call_output':
			
 
				+                    call_id = item['call_id']
			
 
				+                    call_dict[call_id] = item['output']
			
 
				+        elif isinstance(row, dict) and row.get('type', '') == 'function_call_output':
			
 
				+            call_id = row['call_id']
			
 
				+            call_dict[call_id] = row['output']
			
 
				+    
			
 
				+    # 然后处理每一行
			
 
				+    for row in result:
			
 
				+        if isinstance(row, list):
			
 
				+            # 递归处理列表中的每个项目
			
 
				+            for item in row:
			
 
				+                if isinstance(item, dict):
			
 
				+                    process_row(item, process_list, call_dict, i)
			
 
				+                    i += 1
			
 
				+        else:
			
 
				+            # 直接处理字典项
			
 
				+            process_row(row, process_list, call_dict, i)
			
 
				+            i += 1
			
 
				+    
			
 
				+    process_str = '\n'.join(process_list)
			
 
				+    return process_str
			
 
				+
			
 
				+def process_row(row, process_list, call_dict, i):
			
 
				+    """处理单个行项目，添加到处理列表中"""
			
 
				+    def format_output(in_str):
			
 
				+        return in_str.replace('\n\n', '\n').replace('\\"', '"')
			
 
				+    
			
 
				+    if not isinstance(row, dict):
			
 
				+        return
			
 
				+        
			
 
				+    action = ''
			
 
				+    out = ''
			
 
				+    call_id = ''
			
 
				+    role_ = row.get('role', '')
			
 
				+    type_ = row.get('type', '')
			
 
				+    
			
 
				+    if type_ == 'function_call':
			
 
				+        action = f'工具调用-{row.get("name")}'
			
 
				+        out = row['arguments']
			
 
				+        call_id = row['call_id']
			
 
				+    elif type_ == 'function_call_output':
			
 
				+        return  # 跳过函数调用输出，它们已经被收集到call_dict中
			
 
				+    elif role_ in ('user', 'assistant'):
			
 
				+        action = role_
			
 
				+        if isinstance(row['content'], str):
			
 
				+            out = row['content']
			
 
				+        else:
			
 
				+            content_text = ""
			
 
				+            for this_c in row['content']:
			
 
				+                if isinstance(this_c, dict) and 'text' in this_c:
			
 
				+                    content_text += this_c['text']
			
 
				+            out = content_text
			
 
				+    
			
 
				+    process_list.append('\n\n' + f'{i+1}. ' + '## ' + action + ' ' * 4 + '-' * 32 + '\n')
			
 
				+    process_list.append(format_output(str(out)))
			
 
				+    
			
 
				+    # 如果存在对应的工具输出，添加它
			
 
				+    if call_id and call_id in call_dict:
			
 
				+        process_list.append('\n\n' + f'{i+2}. ' + '## ' + '工具输出' + ' ' * 4 + '-' * 32 + '\n')
			
 
				+        process_list.append(format_output(call_dict[call_id]))
			
 
				+
			
--- a/pipeline_config.json
+++ b/pipeline_config.json
@@ -0,0 +1,20 @@
 
				+{
			
 
				+  "feature": ["墨镜"],
			
 
				+  "max_notes": 10,
			
 
				+  "min_score": 8.0,
			
 
				+  "sort_by": "score",
			
 
				+  "skip": 0,
			
 
				+  "timeout": 600,
			
 
				+  "max_workers": 5,
			
 
				+  "max_retries": 3,
			
 
				+  "run_stage8": true,
			
 
				+  "visualize": true,
			
 
				+  "open_browser": true,
			
 
				+  "stage8_weight_embedding": 0.5,
			
 
				+  "stage8_weight_semantic": 0.5,
			
 
				+  "stage8_min_similarity": 0.0,
			
 
				+  "stage8_max_workers": 5,
			
 
				+  "input": "output_v2/stage6_with_evaluations.json",
			
 
				+  "output": "output_v2/stage7_with_deconstruction.json",
			
 
				+  "stage8_output": "output_v2/stage8_similarity_scores.json"
			
 
				+}
			
--- a/run_stage7.py
+++ b/run_stage7.py
@@ -10,7 +10,11 @@ import os
 
				 import json
			
 
				 import logging
			
 
				 import argparse
			
 
				+import webbrowser
			
 
				+from pathlib import Path
			
 
				 from stage7_analyzer import Stage7DeconstructionAnalyzer
			
 
				+from stage8_similarity_analyzer import Stage8SimilarityAnalyzer
			
 
				+import visualize_stage78_with_deconstruction
			
 
				 
			
 
				 # 配置日志
			
 
				 logging.basicConfig(
			
@@ -28,10 +32,10 @@ logger = logging.getLogger(__name__)
 
				 def main():
			
 
				     """主函数"""
			
 
				     parser = argparse.ArgumentParser(
			
 
				-        description='Stage 7 深度解构分析（独立运行）',
			
 
				+        description='Stage 7 深度解构分析（独立运行，支持流水线执行）',
			
 
				         formatter_class=argparse.RawDescriptionHelpFormatter,
			
 
				         epilog='''
			
 
				-示例用法:
			
 
				+基础用法示例:
			
 
				   # 只处理"墨镜"特征的前10个高分帖子
			
 
				   python3 run_stage7.py --feature "墨镜" --max-notes 10
			
 
				 
			
@@ -47,8 +51,41 @@ def main():
 
				   # 降低分数阈值，处理更多帖子
			
 
				   python3 run_stage7.py --feature "墨镜" --min-score 6.0 --max-notes 30
			
 
				 
			
 
				-  # 使用配置文件
			
 
				-  python3 run_stage7.py --config stage7_config.json
			
 
				+流水线执行示例（推荐）:
			
 
				+  # 完整流水线: Stage 7 → Stage 8 → 可视化 → 自动打开浏览器
			
 
				+  python3 run_stage7.py --feature "墨镜" --max-notes 10 --run-stage8 --visualize
			
 
				+
			
 
				+  # Stage 7 → Stage 8（不生成可视化）
			
 
				+  python3 run_stage7.py --feature "墨镜" --max-notes 10 --run-stage8
			
 
				+
			
 
				+  # Stage 7 → 可视化（跳过 Stage 8）
			
 
				+  python3 run_stage7.py --feature "墨镜" --max-notes 10 --visualize
			
 
				+
			
 
				+  # 完整流水线，不自动打开浏览器
			
 
				+  python3 run_stage7.py --feature "墨镜" --run-stage8 --visualize --no-open
			
 
				+
			
 
				+  # 自定义 Stage 8 相似度权重
			
 
				+  python3 run_stage7.py --feature "墨镜" --run-stage8 --visualize \\
			
 
				+    --stage8-weight-embedding 0.7 --stage8-weight-semantic 0.3
			
 
				+
			
 
				+  # 过滤低相似度特征
			
 
				+  python3 run_stage7.py --feature "墨镜" --run-stage8 --visualize \\
			
 
				+    --stage8-min-similarity 0.3
			
 
				+
			
 
				+配置文件示例:
			
 
				+  # 使用配置文件（支持所有参数）
			
 
				+  python3 run_stage7.py --config pipeline_config.json
			
 
				+
			
 
				+  # 配置文件示例内容（pipeline_config.json）:
			
 
				+  {
			
 
				+    "feature": ["墨镜"],
			
 
				+    "max_notes": 10,
			
 
				+    "timeout": 600,
			
 
				+    "run_stage8": true,
			
 
				+    "visualize": true,
			
 
				+    "stage8_weight_embedding": 0.5,
			
 
				+    "stage8_weight_semantic": 0.5
			
 
				+  }
			
 
				         '''
			
 
				     )
			
 
				 
			
@@ -107,8 +144,8 @@ def main():
 
				     parser.add_argument(
			
 
				         '--timeout',
			
 
				         type=int,
			
 
				-        default=30,
			
 
				-        help='API 超时时间（秒）（默认: 30）'
			
 
				+        default=600,
			
 
				+        help='API 超时时间（秒）（默认: 600，即10分钟）'
			
 
				     )
			
 
				     parser.add_argument(
			
 
				         '--max-retries',
			
@@ -132,6 +169,69 @@ def main():
 
				         help='从 JSON 配置文件加载参数'
			
 
				     )
			
 
				 
			
 
				+    # 流水线控制参数
			
 
				+    parser.add_argument(
			
 
				+        '--run-stage8',
			
 
				+        action='store_true',
			
 
				+        help='Stage 7 完成后自动运行 Stage 8'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--visualize',
			
 
				+        action='store_true',
			
 
				+        help='生成可视化结果'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--open-browser',
			
 
				+        action='store_true',
			
 
				+        default=True,
			
 
				+        help='自动在浏览器中打开可视化结果（默认: True）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--no-open',
			
 
				+        action='store_true',
			
 
				+        help='禁用自动打开浏览器'
			
 
				+    )
			
 
				+
			
 
				+    # Stage 8 输出配置
			
 
				+    parser.add_argument(
			
 
				+        '--stage8-output',
			
 
				+        default='output_v2/stage8_similarity_scores.json',
			
 
				+        help='Stage 8 输出文件路径（默认: output_v2/stage8_similarity_scores.json）'
			
 
				+    )
			
 
				+
			
 
				+    # Stage 8 相似度配置
			
 
				+    parser.add_argument(
			
 
				+        '--stage8-weight-embedding',
			
 
				+        type=float,
			
 
				+        default=0.5,
			
 
				+        help='Stage 8 向量模型权重（默认: 0.5）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--stage8-weight-semantic',
			
 
				+        type=float,
			
 
				+        default=0.5,
			
 
				+        help='Stage 8 LLM 模型权重（默认: 0.5）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--stage8-min-similarity',
			
 
				+        type=float,
			
 
				+        default=0.0,
			
 
				+        help='Stage 8 最小相似度阈值（默认: 0.0）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--stage8-max-workers',
			
 
				+        type=int,
			
 
				+        default=5,
			
 
				+        help='Stage 8 最大并发数（默认: 5）'
			
 
				+    )
			
 
				+
			
 
				+    # 可视化输出配置
			
 
				+    parser.add_argument(
			
 
				+        '--viz-output',
			
 
				+        default=None,
			
 
				+        help='可视化输出目录（默认: visualization/）'
			
 
				+    )
			
 
				+
			
 
				     args = parser.parse_args()
			
 
				 
			
 
				     # 如果提供了配置文件，加载配置
			
@@ -196,7 +296,7 @@ def main():
 
				 
			
 
				         # 打印结果摘要
			
 
				         logger.info("\n" + "=" * 60)
			
 
				-        logger.info("执行完成!")
			
 
				+        logger.info("Stage 7 执行完成!")
			
 
				         logger.info(f"  总匹配帖子数: {stage7_results['metadata']['total_matched_notes']}")
			
 
				         logger.info(f"  实际处理数: {stage7_results['metadata']['processed_notes']}")
			
 
				         logger.info(f"  成功: {stage7_results['metadata']['success_count']}")
			
@@ -205,6 +305,128 @@ def main():
 
				         logger.info(f"  结果已保存: {args.output}")
			
 
				         logger.info("=" * 60)
			
 
				 
			
 
				+        # Stage 8: 相似度分析
			
 
				+        stage8_results = None
			
 
				+        if args.run_stage8:
			
 
				+            logger.info("\n" + "=" * 60)
			
 
				+            logger.info("开始执行 Stage 8 相似度分析...")
			
 
				+            logger.info("=" * 60)
			
 
				+
			
 
				+            try:
			
 
				+                # 创建 Stage 8 分析器
			
 
				+                stage8_analyzer = Stage8SimilarityAnalyzer(
			
 
				+                    weight_embedding=args.stage8_weight_embedding,
			
 
				+                    weight_semantic=args.stage8_weight_semantic,
			
 
				+                    max_workers=args.stage8_max_workers,
			
 
				+                    min_similarity=args.stage8_min_similarity,
			
 
				+                    target_features=args.feature
			
 
				+                )
			
 
				+
			
 
				+                # 运行 Stage 8 分析
			
 
				+                stage8_results = stage8_analyzer.run(
			
 
				+                    stage7_results=stage7_results,
			
 
				+                    output_path=args.stage8_output
			
 
				+                )
			
 
				+
			
 
				+                # 打印 Stage 8 结果摘要
			
 
				+                logger.info("\n" + "=" * 60)
			
 
				+                logger.info("Stage 8 执行完成!")
			
 
				+                metadata = stage8_results['metadata']
			
 
				+                overall_stats = metadata['overall_statistics']
			
 
				+
			
 
				+                logger.info(f"  处理帖子数: {overall_stats['total_notes']}")
			
 
				+                logger.info(f"  提取特征总数: {overall_stats['total_features_extracted']}")
			
 
				+                logger.info(f"  平均特征数/帖子: {overall_stats['avg_features_per_note']:.2f}")
			
 
				+                logger.info(f"  平均最高相似度: {overall_stats['avg_max_similarity']:.3f}")
			
 
				+                logger.info(f"  包含高相似度特征的帖子: {overall_stats['notes_with_high_similarity']}")
			
 
				+                logger.info(f"  总耗时: {metadata['processing_time_seconds']:.2f}秒")
			
 
				+                logger.info(f"  结果已保存: {args.stage8_output}")
			
 
				+                logger.info("=" * 60)
			
 
				+
			
 
				+                # 打印 Top 5 高相似度特征示例
			
 
				+                if stage8_results['results']:
			
 
				+                    logger.info("\nTop 5 高相似度特征示例:")
			
 
				+                    all_features = []
			
 
				+                    for result in stage8_results['results']:
			
 
				+                        for feat in result['deconstructed_features'][:5]:
			
 
				+                            all_features.append({
			
 
				+                                'note_id': result['note_id'],
			
 
				+                                'feature_name': feat['feature_name'],
			
 
				+                                'dimension': feat['dimension'],
			
 
				+                                'similarity': feat['similarity_score']
			
 
				+                            })
			
 
				+
			
 
				+                    # 按相似度排序，取 Top 5
			
 
				+                    all_features.sort(key=lambda x: x['similarity'], reverse=True)
			
 
				+                    for i, feat in enumerate(all_features[:5], 1):
			
 
				+                        logger.info(f"  {i}. [{feat['note_id'][:12]}...] "
			
 
				+                                   f"{feat['feature_name']} ({feat['dimension']}) "
			
 
				+                                   f"- 相似度: {feat['similarity']:.3f}")
			
 
				+
			
 
				+            except Exception as e:
			
 
				+                logger.error(f"Stage 8 执行失败: {e}", exc_info=True)
			
 
				+                logger.warning("继续执行后续步骤...")
			
 
				+
			
 
				+        # 可视化生成
			
 
				+        viz_path = None
			
 
				+        if args.visualize:
			
 
				+            logger.info("\n" + "=" * 60)
			
 
				+            logger.info("开始生成可视化结果...")
			
 
				+            logger.info("=" * 60)
			
 
				+
			
 
				+            try:
			
 
				+                # 准备可视化所需的数据文件路径
			
 
				+                viz_args = [
			
 
				+                    '--stage6', args.input,
			
 
				+                    '--stage7', args.output
			
 
				+                ]
			
 
				+
			
 
				+                # 如果有 Stage 8 结果，添加到参数中
			
 
				+                if stage8_results and args.stage8_output:
			
 
				+                    viz_args.extend(['--stage8', args.stage8_output])
			
 
				+
			
 
				+                # 如果指定了可视化输出目录
			
 
				+                if args.viz_output:
			
 
				+                    viz_args.extend(['--output-dir', args.viz_output])
			
 
				+
			
 
				+                # 调用可视化模块
			
 
				+                import sys
			
 
				+                original_argv = sys.argv
			
 
				+                try:
			
 
				+                    sys.argv = ['visualize_stage78_with_deconstruction.py'] + viz_args
			
 
				+                    viz_path = visualize_stage78_with_deconstruction.main()
			
 
				+                finally:
			
 
				+                    sys.argv = original_argv
			
 
				+
			
 
				+                if viz_path:
			
 
				+                    logger.info("\n" + "=" * 60)
			
 
				+                    logger.info("可视化生成完成!")
			
 
				+                    logger.info(f"  可视化文件: {viz_path}")
			
 
				+                    logger.info("=" * 60)
			
 
				+
			
 
				+                    # 自动打开浏览器
			
 
				+                    if args.open_browser and not args.no_open:
			
 
				+                        logger.info("\n正在打开浏览器...")
			
 
				+                        try:
			
 
				+                            # 使用 Path.as_uri() 来正确处理包含中文和特殊字符的路径
			
 
				+                            file_url = Path(viz_path).resolve().as_uri()
			
 
				+                            webbrowser.open(file_url)
			
 
				+                            logger.info("浏览器已打开")
			
 
				+                        except Exception as e:
			
 
				+                            logger.warning(f"无法自动打开浏览器: {e}")
			
 
				+                            logger.info(f"请手动打开: {os.path.abspath(viz_path)}")
			
 
				+                else:
			
 
				+                    logger.warning("可视化生成返回了空路径")
			
 
				+
			
 
				+            except Exception as e:
			
 
				+                logger.error(f"可视化生成失败: {e}", exc_info=True)
			
 
				+                logger.warning("跳过可视化步骤")
			
 
				+
			
 
				+        # 流水线执行完成
			
 
				+        logger.info("\n" + "=" * 60)
			
 
				+        logger.info("流水线执行完成!")
			
 
				+        logger.info("=" * 60)
			
 
				+
			
 
				     except Exception as e:
			
 
				         logger.error(f"执行失败: {e}", exc_info=True)
			
 
				         raise
			
--- a/run_stage8.py
+++ b/run_stage8.py
@@ -0,0 +1,221 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""Stage 8 独立运行脚本"""
			
 
				+
			
 
				+import os
			
 
				+import json
			
 
				+import logging
			
 
				+import argparse
			
 
				+from stage8_similarity_analyzer import Stage8SimilarityAnalyzer
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    parser = argparse.ArgumentParser(
			
 
				+        description='Stage 8 解构特征相似度分析（独立运行）',
			
 
				+        formatter_class=argparse.RawDescriptionHelpFormatter,
			
 
				+        epilog="""
			
 
				+使用示例:
			
 
				+  # 基础用法 - 处理"墨镜"特征
			
 
				+  python3 run_stage8.py --feature "墨镜"
			
 
				+
			
 
				+  # 处理多个特征
			
 
				+  python3 run_stage8.py --feature "墨镜" "耳环"
			
 
				+
			
 
				+  # 自定义权重配置
			
 
				+  python3 run_stage8.py --feature "墨镜" --weight-embedding 0.7 --weight-semantic 0.3
			
 
				+
			
 
				+  # 过滤低相似度特征
			
 
				+  python3 run_stage8.py --feature "墨镜" --min-similarity 0.3
			
 
				+
			
 
				+  # 使用配置文件
			
 
				+  python3 run_stage8.py --config stage8_config.json
			
 
				+
			
 
				+  # 自定义输入输出路径
			
 
				+  python3 run_stage8.py --input output_v2/stage7_custom.json --output output_v2/stage8_custom.json
			
 
				+        """
			
 
				+    )
			
 
				+
			
 
				+    # 输入输出
			
 
				+    parser.add_argument(
			
 
				+        '--input',
			
 
				+        default='output_v2/stage7_with_deconstruction.json',
			
 
				+        help='Stage 7 结果文件路径（默认: output_v2/stage7_with_deconstruction.json）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--output',
			
 
				+        default='output_v2/stage8_similarity_scores.json',
			
 
				+        help='输出文件路径（默认: output_v2/stage8_similarity_scores.json）'
			
 
				+    )
			
 
				+
			
 
				+    # 特征过滤
			
 
				+    parser.add_argument(
			
 
				+        '--feature',
			
 
				+        nargs='+',
			
 
				+        default=None,
			
 
				+        help='指定要处理的原始特征名称（可指定多个），如: --feature "墨镜" "耳环"'
			
 
				+    )
			
 
				+
			
 
				+    # 相似度配置
			
 
				+    parser.add_argument(
			
 
				+        '--weight-embedding',
			
 
				+        type=float,
			
 
				+        default=0.5,
			
 
				+        help='向量模型权重（默认: 0.5）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--weight-semantic',
			
 
				+        type=float,
			
 
				+        default=0.5,
			
 
				+        help='LLM 模型权重（默认: 0.5）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--min-similarity',
			
 
				+        type=float,
			
 
				+        default=0.0,
			
 
				+        help='最小相似度阈值，低于此值的特征会被过滤（默认: 0.0，保留所有）'
			
 
				+    )
			
 
				+
			
 
				+    # 并发配置
			
 
				+    parser.add_argument(
			
 
				+        '--max-workers',
			
 
				+        type=int,
			
 
				+        default=5,
			
 
				+        help='最大并发数（默认: 5）'
			
 
				+    )
			
 
				+
			
 
				+    # 配置文件
			
 
				+    parser.add_argument(
			
 
				+        '--config',
			
 
				+        help='从配置文件读取参数（JSON 格式）'
			
 
				+    )
			
 
				+
			
 
				+    # 日志级别
			
 
				+    parser.add_argument(
			
 
				+        '--log-level',
			
 
				+        default='INFO',
			
 
				+        choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'],
			
 
				+        help='日志级别（默认: INFO）'
			
 
				+    )
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    # 配置日志
			
 
				+    logging.basicConfig(
			
 
				+        level=getattr(logging, args.log_level),
			
 
				+        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
			
 
				+    )
			
 
				+    logger = logging.getLogger(__name__)
			
 
				+
			
 
				+    # 如果提供了配置文件，从文件读取参数
			
 
				+    if args.config:
			
 
				+        logger.info(f"从配置文件读取参数: {args.config}")
			
 
				+        try:
			
 
				+            with open(args.config, 'r', encoding='utf-8') as f:
			
 
				+                config = json.load(f)
			
 
				+
			
 
				+            # 配置文件中的参数会覆盖命令行默认值，但不会覆盖用户显式指定的命令行参数
			
 
				+            args.input = config.get('input', args.input)
			
 
				+            args.output = config.get('output', args.output)
			
 
				+            args.feature = config.get('feature', args.feature)
			
 
				+            args.weight_embedding = config.get('weight_embedding', args.weight_embedding)
			
 
				+            args.weight_semantic = config.get('weight_semantic', args.weight_semantic)
			
 
				+            args.min_similarity = config.get('min_similarity', args.min_similarity)
			
 
				+            args.max_workers = config.get('max_workers', args.max_workers)
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"读取配置文件失败: {e}")
			
 
				+            return 1
			
 
				+
			
 
				+    # 验证输入文件
			
 
				+    if not os.path.exists(args.input):
			
 
				+        logger.error(f"输入文件不存在: {args.input}")
			
 
				+        return 1
			
 
				+
			
 
				+    # 读取 Stage 7 结果
			
 
				+    logger.info(f"读取 Stage 7 结果: {args.input}")
			
 
				+    try:
			
 
				+        with open(args.input, 'r', encoding='utf-8') as f:
			
 
				+            stage7_results = json.load(f)
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"读取 Stage 7 结果失败: {e}")
			
 
				+        return 1
			
 
				+
			
 
				+    # 打印配置信息
			
 
				+    logger.info("\n" + "=" * 60)
			
 
				+    logger.info("Stage 8 配置:")
			
 
				+    logger.info("=" * 60)
			
 
				+    logger.info(f"输入文件: {args.input}")
			
 
				+    logger.info(f"输出文件: {args.output}")
			
 
				+    if args.feature:
			
 
				+        logger.info(f"目标特征: {', '.join(args.feature)}")
			
 
				+    else:
			
 
				+        logger.info(f"目标特征: 全部")
			
 
				+    logger.info(f"向量模型权重: {args.weight_embedding}")
			
 
				+    logger.info(f"LLM 模型权重: {args.weight_semantic}")
			
 
				+    logger.info(f"最小相似度阈值: {args.min_similarity}")
			
 
				+    logger.info(f"最大并发数: {args.max_workers}")
			
 
				+    logger.info("=" * 60 + "\n")
			
 
				+
			
 
				+    # 创建分析器
			
 
				+    try:
			
 
				+        analyzer = Stage8SimilarityAnalyzer(
			
 
				+            weight_embedding=args.weight_embedding,
			
 
				+            weight_semantic=args.weight_semantic,
			
 
				+            max_workers=args.max_workers,
			
 
				+            min_similarity=args.min_similarity,
			
 
				+            target_features=args.feature
			
 
				+        )
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"创建分析器失败: {e}")
			
 
				+        return 1
			
 
				+
			
 
				+    # 运行分析
			
 
				+    try:
			
 
				+        stage8_results = analyzer.run(stage7_results, output_path=args.output)
			
 
				+
			
 
				+        # 打印摘要
			
 
				+        logger.info("\n" + "=" * 60)
			
 
				+        logger.info("Stage 8 执行完成")
			
 
				+        logger.info("=" * 60)
			
 
				+
			
 
				+        metadata = stage8_results['metadata']
			
 
				+        overall_stats = metadata['overall_statistics']
			
 
				+
			
 
				+        logger.info(f"处理帖子数: {overall_stats['total_notes']}")
			
 
				+        logger.info(f"提取特征总数: {overall_stats['total_features_extracted']}")
			
 
				+        logger.info(f"平均特征数/帖子: {overall_stats['avg_features_per_note']}")
			
 
				+        logger.info(f"平均最高相似度: {overall_stats['avg_max_similarity']}")
			
 
				+        logger.info(f"包含高相似度特征的帖子: {overall_stats['notes_with_high_similarity']}")
			
 
				+        logger.info(f"总耗时: {metadata['processing_time_seconds']}秒")
			
 
				+        logger.info(f"结果已保存: {args.output}")
			
 
				+        logger.info("=" * 60 + "\n")
			
 
				+
			
 
				+        # 打印 Top 5 高相似度特征示例
			
 
				+        if stage8_results['results']:
			
 
				+            logger.info("Top 5 高相似度特征示例:")
			
 
				+            all_features = []
			
 
				+            for result in stage8_results['results']:
			
 
				+                for feat in result['deconstructed_features'][:5]:  # 每个帖子取前5个
			
 
				+                    all_features.append({
			
 
				+                        'note_id': result['note_id'],
			
 
				+                        'feature_name': feat['feature_name'],
			
 
				+                        'dimension': feat['dimension'],
			
 
				+                        'similarity': feat['similarity_score']
			
 
				+                    })
			
 
				+
			
 
				+            # 按相似度排序，取 Top 5
			
 
				+            all_features.sort(key=lambda x: x['similarity'], reverse=True)
			
 
				+            for i, feat in enumerate(all_features[:5], 1):
			
 
				+                logger.info(f"  {i}. [{feat['note_id'][:12]}...] "
			
 
				+                           f"{feat['feature_name']} ({feat['dimension']}) "
			
 
				+                           f"- 相似度: {feat['similarity']:.3f}")
			
 
				+
			
 
				+        return 0
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"Stage 8 执行失败: {e}", exc_info=True)
			
 
				+        return 1
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    exit(main())
			
--- a/stage7_analyzer.py
+++ b/stage7_analyzer.py
@@ -136,7 +136,8 @@ class Stage7DeconstructionAnalyzer:
 
				                                     'evaluation': note_eval,
			
 
				                                     'search_word': search_word,
			
 
				                                     'source_word': source_word,
			
 
				-                                    'original_feature': original_feature
			
 
				+                                    'original_feature': original_feature,
			
 
				+                                    'top3_persona_features': feature_group.get('top3匹配信息', [])
			
 
				                                 })
			
 
				 
			
 
				         return matched_notes
			
@@ -272,16 +273,22 @@ class Stage7DeconstructionAnalyzer:
 
				         logger.info(f"  搜索词: {search_word}")
			
 
				         logger.info(f"  原始特征: {original_feature}")
			
 
				 
			
 
				-        # 构建 start_points（使用组合方案）
			
 
				+        # 获取关键匹配点（用于保存到结果中）
			
 
				         key_points = evaluation.get('关键匹配点', [])
			
 
				-        start_points = [
			
 
				-            original_feature,                    # 原始特征
			
 
				-            search_word,                         # 搜索词
			
 
				-            key_points[0] if key_points else ''  # 第一个关键匹配点
			
 
				-        ]
			
 
				-        start_points = [p for p in start_points if p]  # 过滤空值
			
 
				+
			
 
				+        # 获取 top3 人设特征
			
 
				+        top3_features = matched_note_data.get('top3_persona_features', [])
			
 
				+
			
 
				+        # 构建 start_points - 只使用 top3 的第一个人设特征名称
			
 
				+        start_points = []
			
 
				+        if top3_features:
			
 
				+            first_feature = top3_features[0].get('人设特征名称', '')
			
 
				+            if first_feature:
			
 
				+                start_points = [first_feature]
			
 
				 
			
 
				         logger.info(f"  start_points: {start_points}")
			
 
				+        if top3_features:
			
 
				+            logger.info(f"  top3人设特征: {[f.get('人设特征名称', '') for f in top3_features[:3]]}")
			
 
				 
			
 
				         # 直接使用原始图片URL，不做任何处理
			
 
				         original_images = note_card.get('image_list', [])
			
--- a/stage7_config.json
+++ b/stage7_config.json
@@ -7,7 +7,7 @@
 
				   "max_notes": 10,
			
 
				   "sort_by": "score",
			
 
				   "api_url": "http://192.168.245.150:7000/what/analysis/single",
			
 
				-  "timeout": 30,
			
 
				+  "timeout": 600,
			
 
				   "max_retries": 3,
			
 
				   "max_workers": 5
			
 
				 }
			
--- a/stage8_similarity_analyzer.py
+++ b/stage8_similarity_analyzer.py
@@ -0,0 +1,560 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+Stage 8 相似度分析器
			
 
				+计算 Stage 7 解构特征与原始特征的相似度评分
			
 
				+"""
			
 
				+
			
 
				+import os
			
 
				+import json
			
 
				+import time
			
 
				+import logging
			
 
				+import asyncio
			
 
				+from datetime import datetime
			
 
				+from typing import Dict, List, Any, Optional
			
 
				+from lib.hybrid_similarity import compare_phrases_cartesian
			
 
				+from lib.config import get_cache_dir
			
 
				+
			
 
				+try:
			
 
				+    from tqdm import tqdm
			
 
				+    TQDM_AVAILABLE = True
			
 
				+except ImportError:
			
 
				+    TQDM_AVAILABLE = False
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+
			
 
				+def extract_deconstructed_features(api_response: Dict) -> List[Dict]:
			
 
				+    """
			
 
				+    从三点解构中提取所有特征
			
 
				+
			
 
				+    Args:
			
 
				+        api_response: Stage 7 的 api_response 对象
			
 
				+
			
 
				+    Returns:
			
 
				+        特征列表，每个特征包含:
			
 
				+        - feature_name: 特征名称
			
 
				+        - dimension: 维度 (灵感点-全新内容/灵感点-共性差异/灵感点-共性内容/目的点/关键点)
			
 
				+        - dimension_detail: 维度细分 (实质/形式/意图等)
			
 
				+        - weight: 权重
			
 
				+        - source_index: 在该维度中的索引
			
 
				+        - source_*: 溯源信息 (候选编号、目的点描述、关键点描述等)
			
 
				+    """
			
 
				+    features = []
			
 
				+
			
 
				+    # 检查 API 响应状态
			
 
				+    if api_response.get('status') != 'success':
			
 
				+        logger.warning("  API 响应状态不是 success，无法提取特征")
			
 
				+        return features
			
 
				+
			
 
				+    result = api_response.get('result', {})
			
 
				+
			
 
				+    # 检查是否有 data 字段
			
 
				+    if 'data' not in result:
			
 
				+        logger.warning("  API 响应中没有 data 字段")
			
 
				+        return features
			
 
				+
			
 
				+    data = result['data']
			
 
				+    three_point = data.get('三点解构', {})
			
 
				+
			
 
				+    if not three_point:
			
 
				+        logger.warning("  三点解构数据为空")
			
 
				+        return features
			
 
				+
			
 
				+    # 1. 提取灵感点 (3个子类别)
			
 
				+    inspiration = three_point.get('灵感点', {})
			
 
				+    for category in ['全新内容', '共性差异', '共性内容']:
			
 
				+        items = inspiration.get(category, [])
			
 
				+        for idx, item in enumerate(items):
			
 
				+            extracted_features = item.get('提取的特征', [])
			
 
				+            for feat in extracted_features:
			
 
				+                feature_name = feat.get('特征名称', '')
			
 
				+                if not feature_name:
			
 
				+                    continue
			
 
				+
			
 
				+                features.append({
			
 
				+                    'feature_name': feature_name,
			
 
				+                    'dimension': f'灵感点-{category}',
			
 
				+                    'dimension_detail': feat.get('维度分类', ''),  # 注意字段名
			
 
				+                    'weight': feat.get('权重', 0),
			
 
				+                    'source_index': idx,
			
 
				+                    'source_candidate_number': item.get('候选编号', 0),
			
 
				+                    'source_inspiration': item.get('灵感点', '')
			
 
				+                })
			
 
				+
			
 
				+    # 2. 提取目的点
			
 
				+    purpose = three_point.get('目的点', {})
			
 
				+    purposes_list = purpose.get('purposes', [])
			
 
				+    for idx, item in enumerate(purposes_list):
			
 
				+        extracted_features = item.get('提取的特征', [])
			
 
				+        for feat in extracted_features:
			
 
				+            feature_name = feat.get('特征名称', '')
			
 
				+            if not feature_name:
			
 
				+                continue
			
 
				+
			
 
				+            features.append({
			
 
				+                'feature_name': feature_name,
			
 
				+                'dimension': '目的点',
			
 
				+                'dimension_detail': feat.get('特征分类', ''),  # 注意字段名
			
 
				+                'weight': feat.get('权重', 0),
			
 
				+                'source_index': idx,
			
 
				+                'source_purpose': item.get('目的点', ''),
			
 
				+                'source_purpose_dimension': item.get('维度', {})
			
 
				+            })
			
 
				+
			
 
				+    # 3. 提取关键点
			
 
				+    key_points_data = three_point.get('关键点', {})
			
 
				+    key_points_list = key_points_data.get('key_points', [])
			
 
				+    for idx, item in enumerate(key_points_list):
			
 
				+        extracted_features = item.get('提取的特征', [])
			
 
				+        for feat in extracted_features:
			
 
				+            feature_name = feat.get('特征名称', '')
			
 
				+            if not feature_name:
			
 
				+                continue
			
 
				+
			
 
				+            features.append({
			
 
				+                'feature_name': feature_name,
			
 
				+                'dimension': '关键点',
			
 
				+                'dimension_detail': feat.get('维度', ''),  # 注意字段名
			
 
				+                'weight': feat.get('权重', 0),
			
 
				+                'source_index': idx,
			
 
				+                'source_candidate_number': item.get('候选编号', 0),
			
 
				+                'source_key_point': item.get('关键点', ''),
			
 
				+                'source_key_point_dimension': item.get('维度', '')
			
 
				+            })
			
 
				+
			
 
				+    logger.info(f"  提取特征数量: {len(features)}")
			
 
				+    if features:
			
 
				+        # 统计各维度数量
			
 
				+        dimension_counts = {}
			
 
				+        for feat in features:
			
 
				+            dim = feat['dimension']
			
 
				+            dimension_counts[dim] = dimension_counts.get(dim, 0) + 1
			
 
				+        logger.info(f"  维度分布: {dimension_counts}")
			
 
				+
			
 
				+    return features
			
 
				+
			
 
				+
			
 
				+async def calculate_similarity_for_note(
			
 
				+    note_result: Dict,
			
 
				+    original_feature: str,
			
 
				+    weight_embedding: float = 0.5,
			
 
				+    weight_semantic: float = 0.5,
			
 
				+    min_similarity: float = 0.0
			
 
				+) -> Dict:
			
 
				+    """
			
 
				+    计算单个帖子的所有特征与原始特征的相似度
			
 
				+
			
 
				+    Args:
			
 
				+        note_result: Stage 7 的单个 result 对象
			
 
				+        original_feature: 原始特征名称
			
 
				+        weight_embedding: 向量模型权重
			
 
				+        weight_semantic: LLM 模型权重
			
 
				+        min_similarity: 最小相似度阈值，低于此值的特征会被过滤
			
 
				+
			
 
				+    Returns:
			
 
				+        包含相似度信息的结果对象
			
 
				+    """
			
 
				+    note_id = note_result.get('note_id', '')
			
 
				+
			
 
				+    logger.info(f"  [{note_id}] 开始计算相似度...")
			
 
				+
			
 
				+    # 1. 提取解构特征
			
 
				+    deconstructed_features = extract_deconstructed_features(
			
 
				+        note_result['api_response']
			
 
				+    )
			
 
				+
			
 
				+    if not deconstructed_features:
			
 
				+        logger.warning(f"  [{note_id}] 没有提取到特征")
			
 
				+        return {
			
 
				+            'note_id': note_id,
			
 
				+            'original_feature': original_feature,
			
 
				+            'evaluation_score': note_result.get('evaluation_score', 0),
			
 
				+            'search_word': note_result.get('search_word', ''),
			
 
				+            'note_data': note_result.get('note_data', {}),
			
 
				+            'deconstructed_features': [],
			
 
				+            'similarity_statistics': {
			
 
				+                'total_features': 0,
			
 
				+                'max_similarity': 0,
			
 
				+                'min_similarity': 0,
			
 
				+                'avg_similarity': 0,
			
 
				+                'high_similarity_count': 0,
			
 
				+                'medium_similarity_count': 0,
			
 
				+                'low_similarity_count': 0
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+    # 2. 构建特征名称列表
			
 
				+    feature_names = [f['feature_name'] for f in deconstructed_features]
			
 
				+
			
 
				+    logger.info(f"  [{note_id}] 调用相似度计算 API (1×{len(feature_names)} 笛卡尔积)...")
			
 
				+
			
 
				+    # 3. 批量计算相似度 (1×N 笛卡尔积)
			
 
				+    try:
			
 
				+        start_time = time.time()
			
 
				+        similarity_results = await compare_phrases_cartesian(
			
 
				+            phrases_a=[original_feature],
			
 
				+            phrases_b=feature_names,
			
 
				+            max_concurrent=50
			
 
				+        )
			
 
				+        elapsed = time.time() - start_time
			
 
				+        logger.info(f"  [{note_id}] 相似度计算完成 ({elapsed:.1f}秒)")
			
 
				+
			
 
				+        # 4. 映射结果回特征对象
			
 
				+        for i, feat in enumerate(deconstructed_features):
			
 
				+            feat['similarity_score'] = similarity_results[0][i]['相似度']
			
 
				+            feat['similarity_explanation'] = similarity_results[0][i]['说明']
			
 
				+
			
 
				+        # 5. 过滤低相似度特征
			
 
				+        if min_similarity > 0:
			
 
				+            original_count = len(deconstructed_features)
			
 
				+            deconstructed_features = [
			
 
				+                f for f in deconstructed_features
			
 
				+                if f['similarity_score'] >= min_similarity
			
 
				+            ]
			
 
				+            filtered_count = original_count - len(deconstructed_features)
			
 
				+            if filtered_count > 0:
			
 
				+                logger.info(f"  [{note_id}] 过滤掉 {filtered_count} 个低相似度特征 (< {min_similarity})")
			
 
				+
			
 
				+        # 6. 计算统计信息
			
 
				+        if deconstructed_features:
			
 
				+            scores = [f['similarity_score'] for f in deconstructed_features]
			
 
				+            statistics = {
			
 
				+                'total_features': len(scores),
			
 
				+                'max_similarity': round(max(scores), 3),
			
 
				+                'min_similarity': round(min(scores), 3),
			
 
				+                'avg_similarity': round(sum(scores) / len(scores), 3),
			
 
				+                'high_similarity_count': sum(1 for s in scores if s >= 0.7),
			
 
				+                'medium_similarity_count': sum(1 for s in scores if 0.5 <= s < 0.7),
			
 
				+                'low_similarity_count': sum(1 for s in scores if s < 0.5)
			
 
				+            }
			
 
				+
			
 
				+            # 7. 按相似度降序排序
			
 
				+            deconstructed_features.sort(key=lambda x: x['similarity_score'], reverse=True)
			
 
				+
			
 
				+            logger.info(f"  [{note_id}] 统计: 最高={statistics['max_similarity']}, "
			
 
				+                       f"平均={statistics['avg_similarity']}, "
			
 
				+                       f"高相似度={statistics['high_similarity_count']}个")
			
 
				+        else:
			
 
				+            statistics = {
			
 
				+                'total_features': 0,
			
 
				+                'max_similarity': 0,
			
 
				+                'min_similarity': 0,
			
 
				+                'avg_similarity': 0,
			
 
				+                'high_similarity_count': 0,
			
 
				+                'medium_similarity_count': 0,
			
 
				+                'low_similarity_count': 0
			
 
				+            }
			
 
				+
			
 
				+        return {
			
 
				+            'note_id': note_id,
			
 
				+            'original_feature': original_feature,
			
 
				+            'evaluation_score': note_result.get('evaluation_score', 0),
			
 
				+            'search_word': note_result.get('search_word', ''),
			
 
				+            'note_data': note_result.get('note_data', {}),
			
 
				+            'deconstructed_features': deconstructed_features,
			
 
				+            'similarity_statistics': statistics,
			
 
				+            'processing_time_seconds': round(elapsed, 2)
			
 
				+        }
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"  [{note_id}] 相似度计算失败: {e}")
			
 
				+        return {
			
 
				+            'note_id': note_id,
			
 
				+            'original_feature': original_feature,
			
 
				+            'evaluation_score': note_result.get('evaluation_score', 0),
			
 
				+            'search_word': note_result.get('search_word', ''),
			
 
				+            'note_data': note_result.get('note_data', {}),
			
 
				+            'deconstructed_features': [],
			
 
				+            'similarity_statistics': {
			
 
				+                'total_features': 0,
			
 
				+                'error': str(e)
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+
			
 
				+class Stage8SimilarityAnalyzer:
			
 
				+    """Stage 8: 解构特征与原始特征的相似度分析"""
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        weight_embedding: float = 0.5,
			
 
				+        weight_semantic: float = 0.5,
			
 
				+        max_workers: int = 5,
			
 
				+        min_similarity: float = 0.0,
			
 
				+        output_dir: str = "output_v2",
			
 
				+        target_features: Optional[List[str]] = None
			
 
				+    ):
			
 
				+        """
			
 
				+        初始化 Stage 8 分析器
			
 
				+
			
 
				+        Args:
			
 
				+            weight_embedding: 向量模型权重（默认 0.5）
			
 
				+            weight_semantic: LLM 模型权重（默认 0.5）
			
 
				+            max_workers: 最大并发数（默认 5）
			
 
				+            min_similarity: 最小相似度阈值（默认 0.0，保留所有特征）
			
 
				+            output_dir: 输出目录
			
 
				+            target_features: 指定要处理的原始特征列表（None = 处理所有特征）
			
 
				+        """
			
 
				+        self.weight_embedding = weight_embedding
			
 
				+        self.weight_semantic = weight_semantic
			
 
				+        self.max_workers = max_workers
			
 
				+        self.min_similarity = min_similarity
			
 
				+        self.output_dir = output_dir
			
 
				+        self.target_features = target_features
			
 
				+
			
 
				+        # 验证权重
			
 
				+        total_weight = weight_embedding + weight_semantic
			
 
				+        if abs(total_weight - 1.0) > 0.001:
			
 
				+            raise ValueError(f"权重之和必须为1.0，当前为: {total_weight}")
			
 
				+
			
 
				+    def _save_intermediate_results(
			
 
				+        self,
			
 
				+        results: List[Dict],
			
 
				+        output_path: str,
			
 
				+        processed_count: int,
			
 
				+        total_count: int,
			
 
				+        start_time: float
			
 
				+    ):
			
 
				+        """保存中间结果"""
			
 
				+        base_dir = os.path.dirname(output_path) or self.output_dir
			
 
				+        base_name = os.path.basename(output_path)
			
 
				+        name_without_ext = os.path.splitext(base_name)[0]
			
 
				+
			
 
				+        intermediate_path = os.path.join(
			
 
				+            base_dir,
			
 
				+            f"{name_without_ext}_partial_{processed_count}of{total_count}.json"
			
 
				+        )
			
 
				+
			
 
				+        # 统计
			
 
				+        total_features = sum(r['similarity_statistics']['total_features'] for r in results)
			
 
				+        avg_max_sim = sum(r['similarity_statistics']['max_similarity'] for r in results) / len(results)
			
 
				+
			
 
				+        intermediate_result = {
			
 
				+            'metadata': {
			
 
				+                'stage': 'stage8_partial',
			
 
				+                'description': f'部分结果（{processed_count}/{total_count}）',
			
 
				+                'processed_notes': len(results),
			
 
				+                'total_features_extracted': total_features,
			
 
				+                'avg_max_similarity': round(avg_max_sim, 3),
			
 
				+                'saved_at': datetime.now().isoformat(),
			
 
				+                'processing_time_seconds': round(time.time() - start_time, 2)
			
 
				+            },
			
 
				+            'results': results
			
 
				+        }
			
 
				+
			
 
				+        os.makedirs(base_dir, exist_ok=True)
			
 
				+        with open(intermediate_path, 'w', encoding='utf-8') as f:
			
 
				+            json.dump(intermediate_result, f, ensure_ascii=False, indent=2)
			
 
				+
			
 
				+        logger.info(f"    已保存中间结果: {intermediate_path}")
			
 
				+
			
 
				+    async def run_async(
			
 
				+        self,
			
 
				+        stage7_results: Dict,
			
 
				+        output_path: Optional[str] = None
			
 
				+    ) -> Dict:
			
 
				+        """
			
 
				+        执行 Stage 8 相似度分析（异步版本）
			
 
				+
			
 
				+        Args:
			
 
				+            stage7_results: Stage 7 结果
			
 
				+            output_path: 输出路径（可选）
			
 
				+
			
 
				+        Returns:
			
 
				+            Stage 8 结果
			
 
				+        """
			
 
				+        logger.info("\n" + "=" * 60)
			
 
				+        logger.info("Stage 8: 解构特征与原始特征的相似度分析")
			
 
				+        logger.info("=" * 60)
			
 
				+
			
 
				+        # 打印配置
			
 
				+        logger.info("配置参数:")
			
 
				+        logger.info(f"  向量模型权重: {self.weight_embedding}")
			
 
				+        logger.info(f"  LLM 模型权重: {self.weight_semantic}")
			
 
				+        logger.info(f"  最大并发数: {self.max_workers}")
			
 
				+        logger.info(f"  最小相似度阈值: {self.min_similarity}")
			
 
				+        if self.target_features:
			
 
				+            logger.info(f"  目标特征: {', '.join(self.target_features)}")
			
 
				+        else:
			
 
				+            logger.info(f"  目标特征: 全部")
			
 
				+
			
 
				+        # 默认输出路径
			
 
				+        if output_path is None:
			
 
				+            output_path = os.path.join(self.output_dir, "stage8_similarity_scores.json")
			
 
				+
			
 
				+        # 提取 Stage 7 结果
			
 
				+        results_list = stage7_results.get('results', [])
			
 
				+
			
 
				+        # 过滤目标特征
			
 
				+        if self.target_features:
			
 
				+            results_list = [
			
 
				+                r for r in results_list
			
 
				+                if r.get('original_feature') in self.target_features
			
 
				+            ]
			
 
				+
			
 
				+        total_notes = len(results_list)
			
 
				+        logger.info(f"  待处理帖子数: {total_notes}")
			
 
				+
			
 
				+        if total_notes == 0:
			
 
				+            logger.warning("  没有需要处理的帖子")
			
 
				+            return {
			
 
				+                'metadata': {
			
 
				+                    'stage': 'stage8',
			
 
				+                    'processed_notes': 0
			
 
				+                },
			
 
				+                'results': []
			
 
				+            }
			
 
				+
			
 
				+        # 创建任务列表
			
 
				+        start_time = time.time()
			
 
				+        results = []
			
 
				+
			
 
				+        # 使用 Semaphore 控制并发数
			
 
				+        semaphore = asyncio.Semaphore(self.max_workers)
			
 
				+
			
 
				+        async def bounded_task(result):
			
 
				+            async with semaphore:
			
 
				+                return await calculate_similarity_for_note(
			
 
				+                    result,
			
 
				+                    result.get('original_feature', ''),
			
 
				+                    self.weight_embedding,
			
 
				+                    self.weight_semantic,
			
 
				+                    self.min_similarity
			
 
				+                )
			
 
				+
			
 
				+        tasks = [bounded_task(result) for result in results_list]
			
 
				+
			
 
				+        # 带进度条执行
			
 
				+        if TQDM_AVAILABLE:
			
 
				+            logger.info("  使用进度条显示...")
			
 
				+            processed_count = 0
			
 
				+            save_interval = 10
			
 
				+
			
 
				+            for coro in tqdm(
			
 
				+                asyncio.as_completed(tasks),
			
 
				+                total=len(tasks),
			
 
				+                desc="  相似度计算进度",
			
 
				+                unit="帖子",
			
 
				+                ncols=100
			
 
				+            ):
			
 
				+                result = await coro
			
 
				+                results.append(result)
			
 
				+                processed_count += 1
			
 
				+
			
 
				+                # 增量保存
			
 
				+                if processed_count % save_interval == 0:
			
 
				+                    self._save_intermediate_results(
			
 
				+                        results,
			
 
				+                        output_path,
			
 
				+                        processed_count,
			
 
				+                        total_notes,
			
 
				+                        start_time
			
 
				+                    )
			
 
				+        else:
			
 
				+            # 简单执行
			
 
				+            results = await asyncio.gather(*tasks)
			
 
				+            logger.info(f"  完成: {len(results)}/{total_notes}")
			
 
				+
			
 
				+        processing_time = time.time() - start_time
			
 
				+
			
 
				+        # 计算总体统计
			
 
				+        total_features = sum(r['similarity_statistics']['total_features'] for r in results)
			
 
				+        all_max_similarities = [r['similarity_statistics']['max_similarity'] for r in results if r['similarity_statistics']['total_features'] > 0]
			
 
				+
			
 
				+        overall_stats = {
			
 
				+            'total_notes': total_notes,
			
 
				+            'total_features_extracted': total_features,
			
 
				+            'avg_features_per_note': round(total_features / total_notes, 1) if total_notes > 0 else 0,
			
 
				+            'avg_max_similarity': round(sum(all_max_similarities) / len(all_max_similarities), 3) if all_max_similarities else 0,
			
 
				+            'notes_with_high_similarity': sum(1 for r in results if r['similarity_statistics'].get('high_similarity_count', 0) > 0)
			
 
				+        }
			
 
				+
			
 
				+        logger.info(f"\n  总耗时: {processing_time:.1f}秒")
			
 
				+        logger.info(f"  总特征数: {total_features}")
			
 
				+        logger.info(f"  平均特征数/帖子: {overall_stats['avg_features_per_note']}")
			
 
				+        logger.info(f"  平均最高相似度: {overall_stats['avg_max_similarity']}")
			
 
				+        logger.info(f"  包含高相似度特征的帖子: {overall_stats['notes_with_high_similarity']}")
			
 
				+
			
 
				+        # 构建最终结果
			
 
				+        final_result = {
			
 
				+            'metadata': {
			
 
				+                'stage': 'stage8',
			
 
				+                'description': '解构特征与原始特征的相似度评分',
			
 
				+                'source_file': stage7_results.get('metadata', {}).get('created_at', ''),
			
 
				+                'target_features': self.target_features if self.target_features else '全部',
			
 
				+                'similarity_config': {
			
 
				+                    'algorithm': 'hybrid_similarity',
			
 
				+                    'weight_embedding': self.weight_embedding,
			
 
				+                    'weight_semantic': self.weight_semantic,
			
 
				+                    'min_similarity_threshold': self.min_similarity
			
 
				+                },
			
 
				+                'overall_statistics': overall_stats,
			
 
				+                'created_at': datetime.now().isoformat(),
			
 
				+                'processing_time_seconds': round(processing_time, 2)
			
 
				+            },
			
 
				+            'results': results
			
 
				+        }
			
 
				+
			
 
				+        # 保存结果
			
 
				+        os.makedirs(os.path.dirname(output_path) or self.output_dir, exist_ok=True)
			
 
				+        with open(output_path, 'w', encoding='utf-8') as f:
			
 
				+            json.dump(final_result, f, ensure_ascii=False, indent=2)
			
 
				+
			
 
				+        logger.info(f"  结果已保存: {output_path}")
			
 
				+
			
 
				+        return final_result
			
 
				+
			
 
				+    def run(
			
 
				+        self,
			
 
				+        stage7_results: Dict,
			
 
				+        output_path: Optional[str] = None
			
 
				+    ) -> Dict:
			
 
				+        """
			
 
				+        执行 Stage 8 相似度分析（同步版本）
			
 
				+
			
 
				+        Args:
			
 
				+            stage7_results: Stage 7 结果
			
 
				+            output_path: 输出路径（可选）
			
 
				+
			
 
				+        Returns:
			
 
				+            Stage 8 结果
			
 
				+        """
			
 
				+        return asyncio.run(self.run_async(stage7_results, output_path))
			
 
				+
			
 
				+
			
 
				+def test_stage8_analyzer():
			
 
				+    """测试 Stage 8 分析器"""
			
 
				+    # 读取 Stage 7 结果
			
 
				+    stage7_path = "output_v2/stage7_with_deconstruction.json"
			
 
				+
			
 
				+    if not os.path.exists(stage7_path):
			
 
				+        print(f"Stage 7 结果不存在: {stage7_path}")
			
 
				+        return
			
 
				+
			
 
				+    with open(stage7_path, 'r', encoding='utf-8') as f:
			
 
				+        stage7_results = json.load(f)
			
 
				+
			
 
				+    # 创建分析器
			
 
				+    analyzer = Stage8SimilarityAnalyzer(
			
 
				+        weight_embedding=0.5,
			
 
				+        weight_semantic=0.5,
			
 
				+        max_workers=3,
			
 
				+        min_similarity=0.3,
			
 
				+        target_features=["墨镜"]
			
 
				+    )
			
 
				+
			
 
				+    # 运行分析
			
 
				+    stage8_results = analyzer.run(stage7_results)
			
 
				+
			
 
				+    print(f"\n处理了 {stage8_results['metadata']['overall_statistics']['total_notes']} 个帖子")
			
 
				+    print(f"提取了 {stage8_results['metadata']['overall_statistics']['total_features_extracted']} 个特征")
			
 
				+    print(f"平均最高相似度: {stage8_results['metadata']['overall_statistics']['avg_max_similarity']}")
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    logging.basicConfig(
			
 
				+        level=logging.INFO,
			
 
				+        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
			
 
				+    )
			
 
				+    test_stage8_analyzer()
			
--- a/visualize_stage78_with_deconstruction.py
+++ b/visualize_stage78_with_deconstruction.py
@@ -0,0 +1,2062 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+Stage6/7/8整合可视化工具
			
 
				+在Stage6评估结果基础上,为完全匹配帖子增加Stage7解构和Stage8相似度展示
			
 
				+"""
			
 
				+
			
 
				+import json
			
 
				+import os
			
 
				+from datetime import datetime
			
 
				+from typing import List, Dict, Any
			
 
				+
			
 
				+
			
 
				+def load_data(json_path: str) -> List[Dict[str, Any]]:
			
 
				+    """加载JSON数据"""
			
 
				+    with open(json_path, 'r', encoding='utf-8') as f:
			
 
				+        return json.load(f)
			
 
				+
			
 
				+
			
 
				+def load_stage7_data(json_path: str) -> Dict[str, Any]:
			
 
				+    """加载Stage7解构数据"""
			
 
				+    with open(json_path, 'r', encoding='utf-8') as f:
			
 
				+        data = json.load(f)
			
 
				+
			
 
				+    # 创建note_id到解构数据的映射
			
 
				+    mapping = {}
			
 
				+    for result in data.get('results', []):
			
 
				+        note_id = result.get('note_id')
			
 
				+        if note_id:
			
 
				+            mapping[note_id] = result
			
 
				+
			
 
				+    return mapping
			
 
				+
			
 
				+
			
 
				+def load_stage8_data(json_path: str) -> Dict[str, Any]:
			
 
				+    """加载Stage8相似度数据"""
			
 
				+    with open(json_path, 'r', encoding='utf-8') as f:
			
 
				+        data = json.load(f)
			
 
				+
			
 
				+    # 创建note_id到相似度数据的映射
			
 
				+    mapping = {}
			
 
				+    for result in data.get('results', []):
			
 
				+        note_id = result.get('note_id')
			
 
				+        if note_id:
			
 
				+            mapping[note_id] = result
			
 
				+
			
 
				+    return mapping
			
 
				+
			
 
				+
			
 
				+def calculate_statistics(data: List[Dict[str, Any]]) -> Dict[str, Any]:
			
 
				+    """计算统计数据(包括评估结果)"""
			
 
				+    total_features = len(data)
			
 
				+    total_search_words = 0
			
 
				+    searched_count = 0
			
 
				+    not_searched_count = 0
			
 
				+    total_notes = 0
			
 
				+    video_count = 0
			
 
				+    normal_count = 0
			
 
				+
			
 
				+    # 评估统计
			
 
				+    total_evaluated_notes = 0
			
 
				+    total_filtered = 0
			
 
				+    match_complete = 0
			
 
				+    match_similar = 0
			
 
				+    match_weak = 0
			
 
				+    match_none = 0
			
 
				+
			
 
				+    for feature in data:
			
 
				+        grouped_results = feature.get('组合评估结果_分组', [])
			
 
				+
			
 
				+        for group in grouped_results:
			
 
				+            search_items = group.get('top10_searches', [])
			
 
				+            total_search_words += len(search_items)
			
 
				+
			
 
				+            for search_item in search_items:
			
 
				+                search_result = search_item.get('search_result', {})
			
 
				+
			
 
				+                if search_result:
			
 
				+                    searched_count += 1
			
 
				+                    notes = search_result.get('data', {}).get('data', [])
			
 
				+                    total_notes += len(notes)
			
 
				+
			
 
				+                    for note in notes:
			
 
				+                        note_type = note.get('note_card', {}).get('type', '')
			
 
				+                        if note_type == 'video':
			
 
				+                            video_count += 1
			
 
				+                        else:
			
 
				+                            normal_count += 1
			
 
				+
			
 
				+                    evaluation = search_item.get('evaluation_with_filter')
			
 
				+                    if evaluation:
			
 
				+                        total_evaluated_notes += evaluation.get('total_notes', 0)
			
 
				+                        total_filtered += evaluation.get('filtered_count', 0)
			
 
				+
			
 
				+                        stats = evaluation.get('statistics', {})
			
 
				+                        match_complete += stats.get('完全匹配(8-10)', 0)
			
 
				+                        match_similar += stats.get('相似匹配(6-7)', 0)
			
 
				+                        match_weak += stats.get('弱相似(5-6)', 0)
			
 
				+                        match_none += stats.get('无匹配(≤4)', 0)
			
 
				+                else:
			
 
				+                    not_searched_count += 1
			
 
				+
			
 
				+    total_remaining = total_evaluated_notes - total_filtered if total_evaluated_notes > 0 else 0
			
 
				+
			
 
				+    return {
			
 
				+        'total_features': total_features,
			
 
				+        'total_search_words': total_search_words,
			
 
				+        'searched_count': searched_count,
			
 
				+        'not_searched_count': not_searched_count,
			
 
				+        'searched_percentage': round(searched_count / total_search_words * 100, 1) if total_search_words > 0 else 0,
			
 
				+        'total_notes': total_notes,
			
 
				+        'video_count': video_count,
			
 
				+        'normal_count': normal_count,
			
 
				+        'video_percentage': round(video_count / total_notes * 100, 1) if total_notes > 0 else 0,
			
 
				+        'normal_percentage': round(normal_count / total_notes * 100, 1) if total_notes > 0 else 0,
			
 
				+        'total_evaluated': total_evaluated_notes,
			
 
				+        'total_filtered': total_filtered,
			
 
				+        'total_remaining': total_remaining,
			
 
				+        'filter_rate': round(total_filtered / total_evaluated_notes * 100, 1) if total_evaluated_notes > 0 else 0,
			
 
				+        'match_complete': match_complete,
			
 
				+        'match_similar': match_similar,
			
 
				+        'match_weak': match_weak,
			
 
				+        'match_none': match_none,
			
 
				+        'complete_rate': round(match_complete / total_remaining * 100, 1) if total_remaining > 0 else 0,
			
 
				+        'similar_rate': round(match_similar / total_remaining * 100, 1) if total_remaining > 0 else 0,
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def generate_html(data: List[Dict[str, Any]], stats: Dict[str, Any],
			
 
				+                  stage7_mapping: Dict[str, Any], stage8_mapping: Dict[str, Any],
			
 
				+                  output_path: str):
			
 
				+    """生成HTML可视化页面"""
			
 
				+
			
 
				+    # 准备数据JSON
			
 
				+    data_json = json.dumps(data, ensure_ascii=False, indent=2)
			
 
				+    stage7_json = json.dumps(stage7_mapping, ensure_ascii=False, indent=2)
			
 
				+    stage8_json = json.dumps(stage8_mapping, ensure_ascii=False, indent=2)
			
 
				+
			
 
				+    html_content = f'''<!DOCTYPE html>
			
 
				+<html lang="zh-CN">
			
 
				+<head>
			
 
				+    <meta charset="UTF-8">
			
 
				+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
			
 
				+    <title>Stage6/7/8 整合可视化</title>
			
 
				+    <style>
			
 
				+        * {{
			
 
				+            margin: 0;
			
 
				+            padding: 0;
			
 
				+            box-sizing: border-box;
			
 
				+        }}
			
 
				+
			
 
				+        body {{
			
 
				+            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
			
 
				+            background: #f5f7fa;
			
 
				+            color: #333;
			
 
				+            overflow-x: hidden;
			
 
				+        }}
			
 
				+
			
 
				+        /* 顶部统计面板 */
			
 
				+        .stats-panel {{
			
 
				+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
			
 
				+            color: white;
			
 
				+            padding: 20px;
			
 
				+            box-shadow: 0 2px 10px rgba(0,0,0,0.1);
			
 
				+        }}
			
 
				+
			
 
				+        .stats-container {{
			
 
				+            max-width: 1400px;
			
 
				+            margin: 0 auto;
			
 
				+        }}
			
 
				+
			
 
				+        .stats-row {{
			
 
				+            display: flex;
			
 
				+            justify-content: space-around;
			
 
				+            align-items: center;
			
 
				+            flex-wrap: wrap;
			
 
				+            gap: 15px;
			
 
				+            margin-bottom: 15px;
			
 
				+        }}
			
 
				+
			
 
				+        .stats-row:last-child {{
			
 
				+            margin-bottom: 0;
			
 
				+            padding-top: 15px;
			
 
				+            border-top: 1px solid rgba(255,255,255,0.2);
			
 
				+        }}
			
 
				+
			
 
				+        .stat-item {{
			
 
				+            text-align: center;
			
 
				+        }}
			
 
				+
			
 
				+        .stat-value {{
			
 
				+            font-size: 28px;
			
 
				+            font-weight: bold;
			
 
				+            margin-bottom: 5px;
			
 
				+        }}
			
 
				+
			
 
				+        .stat-label {{
			
 
				+            font-size: 12px;
			
 
				+            opacity: 0.9;
			
 
				+        }}
			
 
				+
			
 
				+        .stat-item.small .stat-value {{
			
 
				+            font-size: 22px;
			
 
				+        }}
			
 
				+
			
 
				+        /* 过滤控制面板 */
			
 
				+        .filter-panel {{
			
 
				+            background: white;
			
 
				+            max-width: 1400px;
			
 
				+            margin: 20px auto;
			
 
				+            padding: 15px 20px;
			
 
				+            border-radius: 8px;
			
 
				+            box-shadow: 0 2px 8px rgba(0,0,0,0.1);
			
 
				+            display: flex;
			
 
				+            align-items: center;
			
 
				+            gap: 20px;
			
 
				+            flex-wrap: wrap;
			
 
				+        }}
			
 
				+
			
 
				+        .filter-label {{
			
 
				+            font-weight: 600;
			
 
				+            color: #374151;
			
 
				+        }}
			
 
				+
			
 
				+        .filter-buttons {{
			
 
				+            display: flex;
			
 
				+            gap: 10px;
			
 
				+            flex-wrap: wrap;
			
 
				+        }}
			
 
				+
			
 
				+        .filter-btn {{
			
 
				+            padding: 6px 12px;
			
 
				+            border: 2px solid #e5e7eb;
			
 
				+            background: white;
			
 
				+            border-radius: 6px;
			
 
				+            cursor: pointer;
			
 
				+            font-size: 13px;
			
 
				+            font-weight: 500;
			
 
				+            transition: all 0.2s;
			
 
				+        }}
			
 
				+
			
 
				+        .filter-btn:hover {{
			
 
				+            border-color: #667eea;
			
 
				+            background: #f9fafb;
			
 
				+        }}
			
 
				+
			
 
				+        .filter-btn.active {{
			
 
				+            border-color: #667eea;
			
 
				+            background: #667eea;
			
 
				+            color: white;
			
 
				+        }}
			
 
				+
			
 
				+        .filter-btn.complete {{
			
 
				+            border-color: #10b981;
			
 
				+        }}
			
 
				+        .filter-btn.complete.active {{
			
 
				+            background: #10b981;
			
 
				+            border-color: #10b981;
			
 
				+        }}
			
 
				+
			
 
				+        .filter-btn.similar {{
			
 
				+            border-color: #f59e0b;
			
 
				+        }}
			
 
				+        .filter-btn.similar.active {{
			
 
				+            background: #f59e0b;
			
 
				+            border-color: #f59e0b;
			
 
				+        }}
			
 
				+
			
 
				+        .filter-btn.weak {{
			
 
				+            border-color: #f97316;
			
 
				+        }}
			
 
				+        .filter-btn.weak.active {{
			
 
				+            background: #f97316;
			
 
				+            border-color: #f97316;
			
 
				+        }}
			
 
				+
			
 
				+        .filter-btn.none {{
			
 
				+            border-color: #ef4444;
			
 
				+        }}
			
 
				+        .filter-btn.none.active {{
			
 
				+            background: #ef4444;
			
 
				+            border-color: #ef4444;
			
 
				+        }}
			
 
				+
			
 
				+        .filter-btn.filtered {{
			
 
				+            border-color: #6b7280;
			
 
				+        }}
			
 
				+        .filter-btn.filtered.active {{
			
 
				+            background: #6b7280;
			
 
				+            border-color: #6b7280;
			
 
				+        }}
			
 
				+
			
 
				+        /* 主容器 */
			
 
				+        .main-container {{
			
 
				+            display: flex;
			
 
				+            max-width: 1400px;
			
 
				+            margin: 0 auto 20px;
			
 
				+            gap: 20px;
			
 
				+            padding: 0 20px;
			
 
				+            height: calc(100vh - 260px);
			
 
				+        }}
			
 
				+
			
 
				+        /* 左侧导航 */
			
 
				+        .left-sidebar {{
			
 
				+            width: 30%;
			
 
				+            background: white;
			
 
				+            border-radius: 8px;
			
 
				+            box-shadow: 0 2px 8px rgba(0,0,0,0.1);
			
 
				+            overflow-y: auto;
			
 
				+            position: sticky;
			
 
				+            top: 20px;
			
 
				+            height: fit-content;
			
 
				+            max-height: calc(100vh - 280px);
			
 
				+        }}
			
 
				+
			
 
				+        .feature-group {{
			
 
				+            border-bottom: 1px solid #e5e7eb;
			
 
				+        }}
			
 
				+
			
 
				+        .feature-header {{
			
 
				+            padding: 15px 20px;
			
 
				+            background: #f9fafb;
			
 
				+            cursor: pointer;
			
 
				+            user-select: none;
			
 
				+            transition: background 0.2s;
			
 
				+        }}
			
 
				+
			
 
				+        .feature-header:hover {{
			
 
				+            background: #f3f4f6;
			
 
				+        }}
			
 
				+
			
 
				+        .feature-header.active {{
			
 
				+            background: #667eea;
			
 
				+            color: white;
			
 
				+        }}
			
 
				+
			
 
				+        .feature-title {{
			
 
				+            font-size: 16px;
			
 
				+            font-weight: 600;
			
 
				+            margin-bottom: 5px;
			
 
				+        }}
			
 
				+
			
 
				+        .feature-meta {{
			
 
				+            font-size: 12px;
			
 
				+            color: #6b7280;
			
 
				+        }}
			
 
				+
			
 
				+        .feature-header.active .feature-meta {{
			
 
				+            color: rgba(255,255,255,0.8);
			
 
				+        }}
			
 
				+
			
 
				+        .search-words-list {{
			
 
				+            display: none;
			
 
				+            padding: 0;
			
 
				+        }}
			
 
				+
			
 
				+        .search-words-list.expanded {{
			
 
				+            display: block;
			
 
				+        }}
			
 
				+
			
 
				+        .base-word-group {{
			
 
				+            border-bottom: 1px solid #f3f4f6;
			
 
				+        }}
			
 
				+
			
 
				+        .base-word-header {{
			
 
				+            padding: 12px 20px 12px 30px;
			
 
				+            background: #fafbfc;
			
 
				+            cursor: pointer;
			
 
				+            user-select: none;
			
 
				+            transition: all 0.2s;
			
 
				+            border-left: 3px solid transparent;
			
 
				+        }}
			
 
				+
			
 
				+        .base-word-header:hover {{
			
 
				+            background: #f3f4f6;
			
 
				+            border-left-color: #a78bfa;
			
 
				+        }}
			
 
				+
			
 
				+        .base-word-header.active {{
			
 
				+            background: #f3f4f6;
			
 
				+            border-left-color: #7c3aed;
			
 
				+        }}
			
 
				+
			
 
				+        .base-word-title {{
			
 
				+            font-size: 15px;
			
 
				+            font-weight: 600;
			
 
				+            color: #7c3aed;
			
 
				+            margin-bottom: 4px;
			
 
				+        }}
			
 
				+
			
 
				+        .base-word-meta {{
			
 
				+            font-size: 11px;
			
 
				+            color: #6b7280;
			
 
				+        }}
			
 
				+
			
 
				+        .base-word-desc {{
			
 
				+            padding: 8px 20px 8px 30px;
			
 
				+            background: #fefce8;
			
 
				+            font-size: 12px;
			
 
				+            color: #854d0e;
			
 
				+            line-height: 1.5;
			
 
				+            border-left: 3px solid #fbbf24;
			
 
				+            display: none;
			
 
				+        }}
			
 
				+
			
 
				+        .base-word-desc.expanded {{
			
 
				+            display: block;
			
 
				+        }}
			
 
				+
			
 
				+        .search-words-sublist {{
			
 
				+            display: none;
			
 
				+        }}
			
 
				+
			
 
				+        .search-words-sublist.expanded {{
			
 
				+            display: block;
			
 
				+        }}
			
 
				+
			
 
				+        .search-word-item {{
			
 
				+            padding: 12px 20px 12px 50px;
			
 
				+            cursor: pointer;
			
 
				+            border-left: 3px solid transparent;
			
 
				+            transition: all 0.2s;
			
 
				+        }}
			
 
				+
			
 
				+        .search-word-item:hover {{
			
 
				+            background: #f9fafb;
			
 
				+            border-left-color: #667eea;
			
 
				+        }}
			
 
				+
			
 
				+        .search-word-item.active {{
			
 
				+            background: #ede9fe;
			
 
				+            border-left-color: #7c3aed;
			
 
				+        }}
			
 
				+
			
 
				+        .search-word-text {{
			
 
				+            font-size: 14px;
			
 
				+            font-weight: 500;
			
 
				+            color: #374151;
			
 
				+            margin-bottom: 4px;
			
 
				+        }}
			
 
				+
			
 
				+        .search-word-score {{
			
 
				+            display: inline-block;
			
 
				+            padding: 2px 8px;
			
 
				+            border-radius: 12px;
			
 
				+            font-size: 11px;
			
 
				+            font-weight: 600;
			
 
				+            margin-left: 8px;
			
 
				+        }}
			
 
				+
			
 
				+        .score-high {{
			
 
				+            background: #d1fae5;
			
 
				+            color: #065f46;
			
 
				+        }}
			
 
				+
			
 
				+        .score-medium {{
			
 
				+            background: #fef3c7;
			
 
				+            color: #92400e;
			
 
				+        }}
			
 
				+
			
 
				+        .score-low {{
			
 
				+            background: #fee2e2;
			
 
				+            color: #991b1b;
			
 
				+        }}
			
 
				+
			
 
				+        .eval-badge {{
			
 
				+            display: inline-block;
			
 
				+            padding: 2px 6px;
			
 
				+            border-radius: 10px;
			
 
				+            font-size: 11px;
			
 
				+            font-weight: 600;
			
 
				+            margin-left: 6px;
			
 
				+        }}
			
 
				+
			
 
				+        .eval-complete {{
			
 
				+            background: #d1fae5;
			
 
				+            color: #065f46;
			
 
				+            border: 1px solid #10b981;
			
 
				+        }}
			
 
				+
			
 
				+        .eval-similar {{
			
 
				+            background: #fef3c7;
			
 
				+            color: #92400e;
			
 
				+            border: 1px solid #f59e0b;
			
 
				+        }}
			
 
				+
			
 
				+        .eval-weak {{
			
 
				+            background: #fed7aa;
			
 
				+            color: #9a3412;
			
 
				+            border: 1px solid #f97316;
			
 
				+        }}
			
 
				+
			
 
				+        .eval-none {{
			
 
				+            background: #fee2e2;
			
 
				+            color: #991b1b;
			
 
				+            border: 1px solid #ef4444;
			
 
				+        }}
			
 
				+
			
 
				+        .eval-filtered {{
			
 
				+            background: #e5e7eb;
			
 
				+            color: #4b5563;
			
 
				+            border: 1px solid #6b7280;
			
 
				+        }}
			
 
				+
			
 
				+        .search-word-eval {{
			
 
				+            font-size: 11px;
			
 
				+            color: #6b7280;
			
 
				+            margin-top: 4px;
			
 
				+        }}
			
 
				+
			
 
				+        /* 右侧结果区 */
			
 
				+        .right-content {{
			
 
				+            flex: 1;
			
 
				+            overflow-y: auto;
			
 
				+            padding-bottom: 40px;
			
 
				+        }}
			
 
				+
			
 
				+        .result-block {{
			
 
				+            background: white;
			
 
				+            border-radius: 8px;
			
 
				+            box-shadow: 0 2px 8px rgba(0,0,0,0.1);
			
 
				+            margin-bottom: 30px;
			
 
				+            padding: 20px;
			
 
				+            scroll-margin-top: 20px;
			
 
				+        }}
			
 
				+
			
 
				+        .result-header {{
			
 
				+            margin-bottom: 20px;
			
 
				+            padding-bottom: 15px;
			
 
				+            border-bottom: 2px solid #e5e7eb;
			
 
				+        }}
			
 
				+
			
 
				+        .result-title {{
			
 
				+            font-size: 20px;
			
 
				+            font-weight: 600;
			
 
				+            color: #111827;
			
 
				+            margin-bottom: 10px;
			
 
				+        }}
			
 
				+
			
 
				+        .result-stats {{
			
 
				+            display: flex;
			
 
				+            gap: 10px;
			
 
				+            font-size: 12px;
			
 
				+            color: #6b7280;
			
 
				+            flex-wrap: wrap;
			
 
				+        }}
			
 
				+
			
 
				+        .stat-badge {{
			
 
				+            background: #f3f4f6;
			
 
				+            padding: 4px 10px;
			
 
				+            border-radius: 4px;
			
 
				+        }}
			
 
				+
			
 
				+        .stat-badge.eval {{
			
 
				+            font-weight: 600;
			
 
				+        }}
			
 
				+
			
 
				+        .stat-badge.eval.complete {{
			
 
				+            background: #d1fae5;
			
 
				+            color: #065f46;
			
 
				+        }}
			
 
				+
			
 
				+        .stat-badge.eval.similar {{
			
 
				+            background: #fef3c7;
			
 
				+            color: #92400e;
			
 
				+        }}
			
 
				+
			
 
				+        .stat-badge.eval.weak {{
			
 
				+            background: #fed7aa;
			
 
				+            color: #9a3412;
			
 
				+        }}
			
 
				+
			
 
				+        .stat-badge.eval.none {{
			
 
				+            background: #fee2e2;
			
 
				+            color: #991b1b;
			
 
				+        }}
			
 
				+
			
 
				+        .stat-badge.eval.filtered {{
			
 
				+            background: #e5e7eb;
			
 
				+            color: #4b5563;
			
 
				+        }}
			
 
				+
			
 
				+        .notes-grid {{
			
 
				+            display: grid;
			
 
				+            grid-template-columns: repeat(auto-fill, minmax(280px, 1fr));
			
 
				+            gap: 20px;
			
 
				+        }}
			
 
				+
			
 
				+        .empty-state {{
			
 
				+            text-align: center;
			
 
				+            padding: 60px 40px;
			
 
				+            color: #6b7280;
			
 
				+        }}
			
 
				+
			
 
				+        .empty-icon {{
			
 
				+            font-size: 48px;
			
 
				+            margin-bottom: 16px;
			
 
				+        }}
			
 
				+
			
 
				+        .empty-title {{
			
 
				+            font-size: 16px;
			
 
				+            font-weight: 600;
			
 
				+            color: #374151;
			
 
				+            margin-bottom: 8px;
			
 
				+        }}
			
 
				+
			
 
				+        .empty-desc {{
			
 
				+            font-size: 14px;
			
 
				+            line-height: 1.6;
			
 
				+            color: #9ca3af;
			
 
				+            max-width: 400px;
			
 
				+            margin: 0 auto;
			
 
				+        }}
			
 
				+
			
 
				+        .note-card {{
			
 
				+            border: 3px solid #e5e7eb;
			
 
				+            border-radius: 8px;
			
 
				+            overflow: hidden;
			
 
				+            cursor: pointer;
			
 
				+            transition: all 0.3s;
			
 
				+            background: white;
			
 
				+        }}
			
 
				+
			
 
				+        .note-card:hover {{
			
 
				+            transform: translateY(-4px);
			
 
				+            box-shadow: 0 10px 25px rgba(0,0,0,0.15);
			
 
				+        }}
			
 
				+
			
 
				+        .note-card.eval-complete {{
			
 
				+            border-color: #10b981;
			
 
				+        }}
			
 
				+
			
 
				+        .note-card.eval-similar {{
			
 
				+            border-color: #f59e0b;
			
 
				+        }}
			
 
				+
			
 
				+        .note-card.eval-weak {{
			
 
				+            border-color: #f97316;
			
 
				+        }}
			
 
				+
			
 
				+        .note-card.eval-none {{
			
 
				+            border-color: #ef4444;
			
 
				+        }}
			
 
				+
			
 
				+        .note-card.eval-filtered {{
			
 
				+            border-color: #6b7280;
			
 
				+            opacity: 0.6;
			
 
				+        }}
			
 
				+
			
 
				+        /* 图片轮播 */
			
 
				+        .image-carousel {{
			
 
				+            position: relative;
			
 
				+            width: 100%;
			
 
				+            height: 280px;
			
 
				+            background: #f3f4f6;
			
 
				+            overflow: hidden;
			
 
				+        }}
			
 
				+
			
 
				+        .carousel-images {{
			
 
				+            display: flex;
			
 
				+            height: 100%;
			
 
				+            transition: transform 0.3s ease;
			
 
				+        }}
			
 
				+
			
 
				+        .carousel-image {{
			
 
				+            min-width: 100%;
			
 
				+            height: 100%;
			
 
				+            object-fit: cover;
			
 
				+        }}
			
 
				+
			
 
				+        .carousel-btn {{
			
 
				+            position: absolute;
			
 
				+            top: 50%;
			
 
				+            transform: translateY(-50%);
			
 
				+            background: rgba(0,0,0,0.5);
			
 
				+            color: white;
			
 
				+            border: none;
			
 
				+            width: 32px;
			
 
				+            height: 32px;
			
 
				+            border-radius: 50%;
			
 
				+            cursor: pointer;
			
 
				+            font-size: 16px;
			
 
				+            display: none;
			
 
				+            align-items: center;
			
 
				+            justify-content: center;
			
 
				+            transition: background 0.2s;
			
 
				+            z-index: 10;
			
 
				+        }}
			
 
				+
			
 
				+        .carousel-btn:hover {{
			
 
				+            background: rgba(0,0,0,0.7);
			
 
				+        }}
			
 
				+
			
 
				+        .carousel-btn.prev {{
			
 
				+            left: 8px;
			
 
				+        }}
			
 
				+
			
 
				+        .carousel-btn.next {{
			
 
				+            right: 8px;
			
 
				+        }}
			
 
				+
			
 
				+        .note-card:hover .carousel-btn {{
			
 
				+            display: flex;
			
 
				+        }}
			
 
				+
			
 
				+        .carousel-indicators {{
			
 
				+            position: absolute;
			
 
				+            bottom: 10px;
			
 
				+            left: 50%;
			
 
				+            transform: translateX(-50%);
			
 
				+            display: flex;
			
 
				+            gap: 6px;
			
 
				+            z-index: 10;
			
 
				+        }}
			
 
				+
			
 
				+        .dot {{
			
 
				+            width: 8px;
			
 
				+            height: 8px;
			
 
				+            border-radius: 50%;
			
 
				+            background: rgba(255,255,255,0.5);
			
 
				+            cursor: pointer;
			
 
				+            transition: all 0.2s;
			
 
				+        }}
			
 
				+
			
 
				+        .dot.active {{
			
 
				+            background: white;
			
 
				+            width: 24px;
			
 
				+            border-radius: 4px;
			
 
				+        }}
			
 
				+
			
 
				+        .image-counter {{
			
 
				+            position: absolute;
			
 
				+            top: 10px;
			
 
				+            right: 10px;
			
 
				+            background: rgba(0,0,0,0.6);
			
 
				+            color: white;
			
 
				+            padding: 4px 8px;
			
 
				+            border-radius: 4px;
			
 
				+            font-size: 12px;
			
 
				+            z-index: 10;
			
 
				+        }}
			
 
				+
			
 
				+        .note-info {{
			
 
				+            padding: 12px;
			
 
				+        }}
			
 
				+
			
 
				+        .note-title {{
			
 
				+            font-size: 14px;
			
 
				+            font-weight: 500;
			
 
				+            color: #111827;
			
 
				+            margin-bottom: 8px;
			
 
				+            display: -webkit-box;
			
 
				+            -webkit-line-clamp: 2;
			
 
				+            -webkit-box-orient: vertical;
			
 
				+            overflow: hidden;
			
 
				+            line-height: 1.4;
			
 
				+        }}
			
 
				+
			
 
				+        .note-meta {{
			
 
				+            display: flex;
			
 
				+            align-items: center;
			
 
				+            justify-content: space-between;
			
 
				+            font-size: 12px;
			
 
				+            color: #6b7280;
			
 
				+            margin-bottom: 8px;
			
 
				+        }}
			
 
				+
			
 
				+        .note-type {{
			
 
				+            padding: 3px 8px;
			
 
				+            border-radius: 4px;
			
 
				+            font-weight: 500;
			
 
				+        }}
			
 
				+
			
 
				+        .type-video {{
			
 
				+            background: #dbeafe;
			
 
				+            color: #1e40af;
			
 
				+        }}
			
 
				+
			
 
				+        .type-normal {{
			
 
				+            background: #d1fae5;
			
 
				+            color: #065f46;
			
 
				+        }}
			
 
				+
			
 
				+        .note-author {{
			
 
				+            display: flex;
			
 
				+            align-items: center;
			
 
				+            gap: 6px;
			
 
				+        }}
			
 
				+
			
 
				+        .author-avatar {{
			
 
				+            width: 24px;
			
 
				+            height: 24px;
			
 
				+            border-radius: 50%;
			
 
				+        }}
			
 
				+
			
 
				+        .note-eval {{
			
 
				+            padding: 8px 12px;
			
 
				+            background: #f9fafb;
			
 
				+            border-top: 1px solid #e5e7eb;
			
 
				+            font-size: 12px;
			
 
				+        }}
			
 
				+
			
 
				+        .note-eval-header {{
			
 
				+            display: flex;
			
 
				+            align-items: center;
			
 
				+            justify-content: space-between;
			
 
				+            cursor: pointer;
			
 
				+            user-select: none;
			
 
				+        }}
			
 
				+
			
 
				+        .note-eval-score {{
			
 
				+            font-weight: 600;
			
 
				+        }}
			
 
				+
			
 
				+        .note-eval-toggle {{
			
 
				+            color: #6b7280;
			
 
				+            font-size: 10px;
			
 
				+        }}
			
 
				+
			
 
				+        .note-eval-details {{
			
 
				+            margin-top: 8px;
			
 
				+            padding-top: 8px;
			
 
				+            border-top: 1px solid #e5e7eb;
			
 
				+            display: none;
			
 
				+            line-height: 1.5;
			
 
				+        }}
			
 
				+
			
 
				+        .note-eval-details.expanded {{
			
 
				+            display: block;
			
 
				+        }}
			
 
				+
			
 
				+        .eval-detail-label {{
			
 
				+            font-weight: 600;
			
 
				+            color: #374151;
			
 
				+            margin-top: 6px;
			
 
				+            margin-bottom: 2px;
			
 
				+        }}
			
 
				+
			
 
				+        .eval-detail-label:first-child {{
			
 
				+            margin-top: 0;
			
 
				+        }}
			
 
				+
			
 
				+        .eval-detail-text {{
			
 
				+            color: #6b7280;
			
 
				+        }}
			
 
				+
			
 
				+        /* ========== 新增: 解构面板样式 ========== */
			
 
				+
			
 
				+        .deconstruction-toggle-btn {{
			
 
				+            width: 100%;
			
 
				+            padding: 10px;
			
 
				+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
			
 
				+            color: white;
			
 
				+            border: none;
			
 
				+            border-top: 1px solid #e5e7eb;
			
 
				+            cursor: pointer;
			
 
				+            font-size: 13px;
			
 
				+            font-weight: 600;
			
 
				+            transition: all 0.3s;
			
 
				+            display: flex;
			
 
				+            align-items: center;
			
 
				+            justify-content: center;
			
 
				+            gap: 6px;
			
 
				+            position: relative;
			
 
				+            z-index: 1;
			
 
				+        }}
			
 
				+
			
 
				+        .deconstruction-toggle-btn:hover {{
			
 
				+            background: linear-gradient(135deg, #5568d3 0%, #6a3f8f 100%);
			
 
				+            transform: scale(1.02);
			
 
				+        }}
			
 
				+
			
 
				+        /* 浮层遮罩 */
			
 
				+        .modal-overlay {{
			
 
				+            display: none;
			
 
				+            position: fixed;
			
 
				+            top: 0;
			
 
				+            left: 0;
			
 
				+            right: 0;
			
 
				+            bottom: 0;
			
 
				+            background: rgba(0, 0, 0, 0.7);
			
 
				+            z-index: 9998;
			
 
				+            animation: fadeIn 0.3s ease;
			
 
				+        }}
			
 
				+
			
 
				+        .modal-overlay.active {{
			
 
				+            display: flex;
			
 
				+            align-items: center;
			
 
				+            justify-content: center;
			
 
				+        }}
			
 
				+
			
 
				+        /* 浮层窗口 */
			
 
				+        .modal-window {{
			
 
				+            background: white;
			
 
				+            border-radius: 12px;
			
 
				+            width: 90%;
			
 
				+            max-width: 1200px;
			
 
				+            max-height: 90vh;
			
 
				+            overflow: hidden;
			
 
				+            box-shadow: 0 20px 60px rgba(0, 0, 0, 0.3);
			
 
				+            animation: slideUp 0.3s ease;
			
 
				+            display: flex;
			
 
				+            flex-direction: column;
			
 
				+        }}
			
 
				+
			
 
				+        @keyframes fadeIn {{
			
 
				+            from {{ opacity: 0; }}
			
 
				+            to {{ opacity: 1; }}
			
 
				+        }}
			
 
				+
			
 
				+        @keyframes slideUp {{
			
 
				+            from {{
			
 
				+                opacity: 0;
			
 
				+                transform: translateY(50px);
			
 
				+            }}
			
 
				+            to {{
			
 
				+                opacity: 1;
			
 
				+                transform: translateY(0);
			
 
				+            }}
			
 
				+        }}
			
 
				+
			
 
				+        /* 浮层头部 */
			
 
				+        .modal-header {{
			
 
				+            padding: 20px 25px;
			
 
				+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
			
 
				+            color: white;
			
 
				+            display: flex;
			
 
				+            align-items: center;
			
 
				+            justify-content: space-between;
			
 
				+            flex-shrink: 0;
			
 
				+        }}
			
 
				+
			
 
				+        .modal-title {{
			
 
				+            font-size: 18px;
			
 
				+            font-weight: 600;
			
 
				+            display: flex;
			
 
				+            align-items: center;
			
 
				+            gap: 10px;
			
 
				+        }}
			
 
				+
			
 
				+        .modal-note-title {{
			
 
				+            font-size: 14px;
			
 
				+            opacity: 0.9;
			
 
				+            margin-top: 5px;
			
 
				+        }}
			
 
				+
			
 
				+        .modal-close-btn {{
			
 
				+            background: rgba(255, 255, 255, 0.2);
			
 
				+            border: none;
			
 
				+            color: white;
			
 
				+            width: 36px;
			
 
				+            height: 36px;
			
 
				+            border-radius: 50%;
			
 
				+            cursor: pointer;
			
 
				+            font-size: 20px;
			
 
				+            display: flex;
			
 
				+            align-items: center;
			
 
				+            justify-content: center;
			
 
				+            transition: all 0.2s;
			
 
				+        }}
			
 
				+
			
 
				+        .modal-close-btn:hover {{
			
 
				+            background: rgba(255, 255, 255, 0.3);
			
 
				+            transform: scale(1.1);
			
 
				+        }}
			
 
				+
			
 
				+        /* 浮层内容区 */
			
 
				+        .modal-body {{
			
 
				+            flex: 1;
			
 
				+            overflow-y: auto;
			
 
				+            padding: 25px;
			
 
				+            background: #fafbfc;
			
 
				+        }}
			
 
				+
			
 
				+        .deconstruction-content {{
			
 
				+            max-width: 1000px;
			
 
				+            margin: 0 auto;
			
 
				+        }}
			
 
				+
			
 
				+        .deconstruction-header {{
			
 
				+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
			
 
				+            color: white;
			
 
				+            padding: 12px 15px;
			
 
				+            border-radius: 6px;
			
 
				+            margin-bottom: 15px;
			
 
				+            font-size: 14px;
			
 
				+            font-weight: 600;
			
 
				+        }}
			
 
				+
			
 
				+        .original-feature {{
			
 
				+            font-size: 16px;
			
 
				+            margin-top: 4px;
			
 
				+        }}
			
 
				+
			
 
				+        .dimension-card {{
			
 
				+            background: white;
			
 
				+            border: 2px solid #e5e7eb;
			
 
				+            border-radius: 8px;
			
 
				+            margin-bottom: 12px;
			
 
				+            overflow: hidden;
			
 
				+        }}
			
 
				+
			
 
				+        .dimension-header {{
			
 
				+            padding: 10px 15px;
			
 
				+            background: #667eea;
			
 
				+            color: white;
			
 
				+            cursor: pointer;
			
 
				+            user-select: none;
			
 
				+            display: flex;
			
 
				+            align-items: center;
			
 
				+            justify-content: space-between;
			
 
				+            font-weight: 600;
			
 
				+            font-size: 14px;
			
 
				+        }}
			
 
				+
			
 
				+        .dimension-header:hover {{
			
 
				+            background: #5568d3;
			
 
				+        }}
			
 
				+
			
 
				+        .dimension-title {{
			
 
				+            display: flex;
			
 
				+            align-items: center;
			
 
				+            gap: 8px;
			
 
				+        }}
			
 
				+
			
 
				+        .dimension-count {{
			
 
				+            font-size: 12px;
			
 
				+            opacity: 0.9;
			
 
				+        }}
			
 
				+
			
 
				+        .dimension-toggle {{
			
 
				+            font-size: 12px;
			
 
				+        }}
			
 
				+
			
 
				+        .dimension-body {{
			
 
				+            max-height: 0;
			
 
				+            overflow: hidden;
			
 
				+            transition: max-height 0.3s ease-in-out;
			
 
				+        }}
			
 
				+
			
 
				+        .dimension-body.expanded {{
			
 
				+            max-height: 1000px;
			
 
				+        }}
			
 
				+
			
 
				+        .feature-list {{
			
 
				+            padding: 10px;
			
 
				+        }}
			
 
				+
			
 
				+        .feature-item {{
			
 
				+            padding: 10px;
			
 
				+            margin-bottom: 8px;
			
 
				+            background: #f9fafb;
			
 
				+            border-left: 3px solid #e5e7eb;
			
 
				+            border-radius: 4px;
			
 
				+            transition: all 0.2s;
			
 
				+        }}
			
 
				+
			
 
				+        .feature-item:hover {{
			
 
				+            background: #f3f4f6;
			
 
				+            border-left-color: #667eea;
			
 
				+        }}
			
 
				+
			
 
				+        .feature-item.top-score {{
			
 
				+            background: #fff9e6;
			
 
				+            border-left: 3px solid #FFD700;
			
 
				+            box-shadow: 0 2px 8px rgba(255, 215, 0, 0.2);
			
 
				+        }}
			
 
				+
			
 
				+        .feature-item.top-score .feature-name {{
			
 
				+            color: #b8860b;
			
 
				+            font-weight: 700;
			
 
				+        }}
			
 
				+
			
 
				+        .feature-name {{
			
 
				+            font-size: 13px;
			
 
				+            font-weight: 600;
			
 
				+            color: #111827;
			
 
				+            margin-bottom: 6px;
			
 
				+            display: flex;
			
 
				+            align-items: center;
			
 
				+            gap: 6px;
			
 
				+        }}
			
 
				+
			
 
				+        .top-badge {{
			
 
				+            background: #FFD700;
			
 
				+            color: #000;
			
 
				+            padding: 2px 6px;
			
 
				+            border-radius: 4px;
			
 
				+            font-size: 11px;
			
 
				+            font-weight: 700;
			
 
				+        }}
			
 
				+
			
 
				+        .feature-meta-row {{
			
 
				+            display: flex;
			
 
				+            align-items: center;
			
 
				+            gap: 8px;
			
 
				+            margin-bottom: 6px;
			
 
				+            font-size: 11px;
			
 
				+            color: #6b7280;
			
 
				+        }}
			
 
				+
			
 
				+        .feature-dimension-detail {{
			
 
				+            background: #e5e7eb;
			
 
				+            padding: 2px 6px;
			
 
				+            border-radius: 3px;
			
 
				+        }}
			
 
				+
			
 
				+        .feature-weight {{
			
 
				+            background: #dbeafe;
			
 
				+            padding: 2px 6px;
			
 
				+            border-radius: 3px;
			
 
				+        }}
			
 
				+
			
 
				+        .similarity-row {{
			
 
				+            display: flex;
			
 
				+            align-items: center;
			
 
				+            gap: 10px;
			
 
				+        }}
			
 
				+
			
 
				+        .similarity-score {{
			
 
				+            font-size: 14px;
			
 
				+            font-weight: 700;
			
 
				+            min-width: 50px;
			
 
				+        }}
			
 
				+
			
 
				+        .similarity-score.high {{
			
 
				+            color: #10b981;
			
 
				+        }}
			
 
				+
			
 
				+        .similarity-score.medium {{
			
 
				+            color: #f59e0b;
			
 
				+        }}
			
 
				+
			
 
				+        .similarity-score.low {{
			
 
				+            color: #6b7280;
			
 
				+        }}
			
 
				+
			
 
				+        .similarity-bar-container {{
			
 
				+            flex: 1;
			
 
				+            height: 8px;
			
 
				+            background: #e5e7eb;
			
 
				+            border-radius: 4px;
			
 
				+            overflow: hidden;
			
 
				+        }}
			
 
				+
			
 
				+        .similarity-bar {{
			
 
				+            height: 100%;
			
 
				+            border-radius: 4px;
			
 
				+            transition: width 0.3s ease;
			
 
				+        }}
			
 
				+
			
 
				+        .similarity-bar.high {{
			
 
				+            background: linear-gradient(90deg, #10b981 0%, #059669 100%);
			
 
				+        }}
			
 
				+
			
 
				+        .similarity-bar.medium {{
			
 
				+            background: linear-gradient(90deg, #f59e0b 0%, #d97706 100%);
			
 
				+        }}
			
 
				+
			
 
				+        .similarity-bar.low {{
			
 
				+            background: linear-gradient(90deg, #9ca3af 0%, #6b7280 100%);
			
 
				+        }}
			
 
				+
			
 
				+        .similarity-explanation {{
			
 
				+            margin-top: 8px;
			
 
				+            padding: 8px;
			
 
				+            background: white;
			
 
				+            border-radius: 4px;
			
 
				+            font-size: 11px;
			
 
				+            color: #6b7280;
			
 
				+            line-height: 1.5;
			
 
				+            display: none;
			
 
				+        }}
			
 
				+
			
 
				+        .feature-item:hover .similarity-explanation {{
			
 
				+            display: block;
			
 
				+        }}
			
 
				+
			
 
				+        /* 滚动条样式 */
			
 
				+        ::-webkit-scrollbar {{
			
 
				+            width: 8px;
			
 
				+            height: 8px;
			
 
				+        }}
			
 
				+
			
 
				+        ::-webkit-scrollbar-track {{
			
 
				+            background: #f1f1f1;
			
 
				+        }}
			
 
				+
			
 
				+        ::-webkit-scrollbar-thumb {{
			
 
				+            background: #888;
			
 
				+            border-radius: 4px;
			
 
				+        }}
			
 
				+
			
 
				+        ::-webkit-scrollbar-thumb:hover {{
			
 
				+            background: #555;
			
 
				+        }}
			
 
				+
			
 
				+        .hidden {{
			
 
				+            display: none !important;
			
 
				+        }}
			
 
				+    </style>
			
 
				+</head>
			
 
				+<body>
			
 
				+    <!-- 统计面板 -->
			
 
				+    <div class="stats-panel">
			
 
				+        <div class="stats-container">
			
 
				+            <div class="stats-row">
			
 
				+                <div class="stat-item">
			
 
				+                    <div class="stat-value">📊 {stats['total_features']}</div>
			
 
				+                    <div class="stat-label">原始特征数</div>
			
 
				+                </div>
			
 
				+                <div class="stat-item">
			
 
				+                    <div class="stat-value">🔍 {stats['total_search_words']}</div>
			
 
				+                    <div class="stat-label">搜索词总数</div>
			
 
				+                </div>
			
 
				+                <div class="stat-item">
			
 
				+                    <div class="stat-value">✅ {stats['searched_count']}</div>
			
 
				+                    <div class="stat-label">已搜索 ({stats['searched_percentage']}%)</div>
			
 
				+                </div>
			
 
				+                <div class="stat-item">
			
 
				+                    <div class="stat-value">⏸️ {stats['not_searched_count']}</div>
			
 
				+                    <div class="stat-label">未搜索</div>
			
 
				+                </div>
			
 
				+                <div class="stat-item">
			
 
				+                    <div class="stat-value">📝 {stats['total_notes']}</div>
			
 
				+                    <div class="stat-label">帖子总数</div>
			
 
				+                </div>
			
 
				+                <div class="stat-item">
			
 
				+                    <div class="stat-value">🎬 {stats['video_count']}</div>
			
 
				+                    <div class="stat-label">视频 ({stats['video_percentage']}%)</div>
			
 
				+                </div>
			
 
				+                <div class="stat-item">
			
 
				+                    <div class="stat-value">📷 {stats['normal_count']}</div>
			
 
				+                    <div class="stat-label">图文 ({stats['normal_percentage']}%)</div>
			
 
				+                </div>
			
 
				+            </div>
			
 
				+            <div class="stats-row">
			
 
				+                <div class="stat-item small">
			
 
				+                    <div class="stat-value">⚡ {stats['total_evaluated']}</div>
			
 
				+                    <div class="stat-label">已评估</div>
			
 
				+                </div>
			
 
				+                <div class="stat-item small">
			
 
				+                    <div class="stat-value">⚫ {stats['total_filtered']}</div>
			
 
				+                    <div class="stat-label">已过滤 ({stats['filter_rate']}%)</div>
			
 
				+                </div>
			
 
				+                <div class="stat-item small">
			
 
				+                    <div class="stat-value">🟢 {stats['match_complete']}</div>
			
 
				+                    <div class="stat-label">完全匹配 ({stats['complete_rate']}%)</div>
			
 
				+                </div>
			
 
				+                <div class="stat-item small">
			
 
				+                    <div class="stat-value">🟡 {stats['match_similar']}</div>
			
 
				+                    <div class="stat-label">相似匹配 ({stats['similar_rate']}%)</div>
			
 
				+                </div>
			
 
				+                <div class="stat-item small">
			
 
				+                    <div class="stat-value">🟠 {stats['match_weak']}</div>
			
 
				+                    <div class="stat-label">弱相似</div>
			
 
				+                </div>
			
 
				+                <div class="stat-item small">
			
 
				+                    <div class="stat-value">🔴 {stats['match_none']}</div>
			
 
				+                    <div class="stat-label">无匹配</div>
			
 
				+                </div>
			
 
				+            </div>
			
 
				+        </div>
			
 
				+    </div>
			
 
				+
			
 
				+    <!-- 过滤控制面板 -->
			
 
				+    <div class="filter-panel">
			
 
				+        <span class="filter-label">🔍 筛选显示:</span>
			
 
				+        <div class="filter-buttons">
			
 
				+            <button class="filter-btn active" onclick="filterNotes('all')">全部</button>
			
 
				+            <button class="filter-btn complete" onclick="filterNotes('complete')">🟢 完全匹配</button>
			
 
				+            <button class="filter-btn similar" onclick="filterNotes('similar')">🟡 相似匹配</button>
			
 
				+            <button class="filter-btn weak" onclick="filterNotes('weak')">🟠 弱相似</button>
			
 
				+            <button class="filter-btn none" onclick="filterNotes('none')">🔴 无匹配</button>
			
 
				+            <button class="filter-btn filtered" onclick="filterNotes('filtered')">⚫ 已过滤</button>
			
 
				+        </div>
			
 
				+    </div>
			
 
				+
			
 
				+    <!-- 主容器 -->
			
 
				+    <div class="main-container">
			
 
				+        <!-- 左侧导航 -->
			
 
				+        <div class="left-sidebar" id="leftSidebar"></div>
			
 
				+
			
 
				+        <!-- 右侧结果区 -->
			
 
				+        <div class="right-content" id="rightContent"></div>
			
 
				+    </div>
			
 
				+
			
 
				+    <!-- 解构结果模态窗口 -->
			
 
				+    <div class="modal-overlay" id="deconstructionModal">
			
 
				+        <div class="modal-window">
			
 
				+            <div class="modal-header">
			
 
				+                <div>
			
 
				+                    <div class="modal-title">🎯 解构特征相似度分析</div>
			
 
				+                    <div class="modal-note-title" id="modalNoteTitle"></div>
			
 
				+                </div>
			
 
				+                <button class="modal-close-btn" onclick="closeModal()">×</button>
			
 
				+            </div>
			
 
				+            <div class="modal-body">
			
 
				+                <div class="deconstruction-content" id="modalContent"></div>
			
 
				+            </div>
			
 
				+        </div>
			
 
				+    </div>
			
 
				+
			
 
				+    <script>
			
 
				+        // 数据
			
 
				+        const data = {data_json};
			
 
				+        const stage7Data = {stage7_json};
			
 
				+        const stage8Data = {stage8_json};
			
 
				+        let currentFilter = 'all';
			
 
				+
			
 
				+        // 创建评估映射
			
 
				+        const noteEvaluations = {{}};
			
 
				+        data.forEach((feature, fIdx) => {{
			
 
				+            const groups = feature['组合评估结果_分组'] || [];
			
 
				+            groups.forEach((group, gIdx) => {{
			
 
				+                const searches = group['top10_searches'] || [];
			
 
				+                searches.forEach((search, sIdx) => {{
			
 
				+                    const evaluation = search['evaluation_with_filter'];
			
 
				+                    if (evaluation && evaluation.notes_evaluation) {{
			
 
				+                        evaluation.notes_evaluation.forEach(noteEval => {{
			
 
				+                            const key = `${{fIdx}}-${{gIdx}}-${{sIdx}}-${{noteEval.note_index}}`;
			
 
				+                            noteEvaluations[key] = noteEval;
			
 
				+                        }});
			
 
				+                    }}
			
 
				+                }});
			
 
				+            }});
			
 
				+        }});
			
 
				+
			
 
				+        // 获取评估类别
			
 
				+        function getEvalCategory(noteEval) {{
			
 
				+            if (!noteEval || noteEval['Query相关性'] !== '相关') {{
			
 
				+                return 'filtered';
			
 
				+            }}
			
 
				+            const score = noteEval['综合得分'];
			
 
				+            if (score >= 8) return 'complete';
			
 
				+            if (score >= 6) return 'similar';
			
 
				+            if (score >= 5) return 'weak';
			
 
				+            return 'none';
			
 
				+        }}
			
 
				+
			
 
				+        // 渲染左侧导航
			
 
				+        function renderLeftSidebar() {{
			
 
				+            const sidebar = document.getElementById('leftSidebar');
			
 
				+            let html = '';
			
 
				+
			
 
				+            data.forEach((feature, featureIdx) => {{
			
 
				+                const groups = feature['组合评估结果_分组'] || [];
			
 
				+                let totalSearches = 0;
			
 
				+                groups.forEach(group => {{
			
 
				+                    totalSearches += (group['top10_searches'] || []).length;
			
 
				+                }});
			
 
				+
			
 
				+                html += `
			
 
				+                    <div class="feature-group">
			
 
				+                        <div class="feature-header" onclick="toggleFeature(${{featureIdx}})" id="feature-header-${{featureIdx}}">
			
 
				+                            <div class="feature-title">${{feature['原始特征名称']}}</div>
			
 
				+                            <div class="feature-meta">
			
 
				+                                ${{feature['来源层级']}} · 权重: ${{feature['权重'].toFixed(2)}} · ${{totalSearches}}个搜索词
			
 
				+                            </div>
			
 
				+                        </div>
			
 
				+                        <div class="search-words-list" id="search-words-${{featureIdx}}">
			
 
				+                `;
			
 
				+
			
 
				+                groups.forEach((group, groupIdx) => {{
			
 
				+                    const baseWord = group['base_word'] || '';
			
 
				+                    const baseSimilarity = group['base_word_similarity'] || 0;
			
 
				+                    const searches = group['top10_searches'] || [];
			
 
				+
			
 
				+                    const relatedWords = feature['高相似度候选_按base_word']?.[baseWord] || [];
			
 
				+                    const relatedWordNames = relatedWords.map(w => w['人设特征名称']).slice(0, 10).join('、');
			
 
				+
			
 
				+                    html += `
			
 
				+                        <div class="base-word-group">
			
 
				+                            <div class="base-word-header" onclick="toggleBaseWord(${{featureIdx}}, ${{groupIdx}})"
			
 
				+                                 id="base-word-header-${{featureIdx}}-${{groupIdx}}">
			
 
				+                                <div class="base-word-title">🎯 ${{baseWord}}</div>
			
 
				+                                <div class="base-word-meta">相似度: ${{baseSimilarity.toFixed(2)}} · ${{searches.length}}个搜索词</div>
			
 
				+                            </div>
			
 
				+                            <div class="base-word-desc" id="base-word-desc-${{featureIdx}}-${{groupIdx}}">
			
 
				+                                ${{relatedWordNames || '无相关词汇'}}
			
 
				+                            </div>
			
 
				+                            <div class="search-words-sublist" id="search-words-sublist-${{featureIdx}}-${{groupIdx}}">
			
 
				+                    `;
			
 
				+
			
 
				+                    searches.forEach((sw, swIdx) => {{
			
 
				+                        const score = sw.score || 0;
			
 
				+                        const blockId = `block-${{featureIdx}}-${{groupIdx}}-${{swIdx}}`;
			
 
				+                        const sourceWord = sw.source_word || '';
			
 
				+
			
 
				+                        const evaluation = sw['evaluation_with_filter'];
			
 
				+                        let evalBadges = '';
			
 
				+                        if (evaluation) {{
			
 
				+                            const stats = evaluation.statistics || {{}};
			
 
				+                            const complete = stats['完全匹配(8-10)'] || 0;
			
 
				+                            const similar = stats['相似匹配(6-7)'] || 0;
			
 
				+                            const weak = stats['弱相似(5-6)'] || 0;
			
 
				+                            const none = stats['无匹配(≤4)'] || 0;
			
 
				+                            const filtered = evaluation.filtered_count || 0;
			
 
				+
			
 
				+                            if (complete > 0) evalBadges += `<span class="eval-badge eval-complete">🟢${{complete}}</span>`;
			
 
				+                            if (similar > 0) evalBadges += `<span class="eval-badge eval-similar">🟡${{similar}}</span>`;
			
 
				+                            if (weak > 0) evalBadges += `<span class="eval-badge eval-weak">🟠${{weak}}</span>`;
			
 
				+                            if (none > 0) evalBadges += `<span class="eval-badge eval-none">🔴${{none}}</span>`;
			
 
				+                            if (filtered > 0) evalBadges += `<span class="eval-badge eval-filtered">⚫${{filtered}}</span>`;
			
 
				+                        }}
			
 
				+
			
 
				+                        html += `
			
 
				+                            <div class="search-word-item" onclick="scrollToBlock('${{blockId}}')"
			
 
				+                                 id="sw-${{featureIdx}}-${{groupIdx}}-${{swIdx}}"
			
 
				+                                 data-block-id="${{blockId}}">
			
 
				+                                <div class="search-word-text">
			
 
				+                                    🔍 ${{sw.search_word}}
			
 
				+                                </div>
			
 
				+                                <div class="search-word-meta" style="font-size:11px;color:#9ca3af;margin-top:2px">
			
 
				+                                    来源: ${{sourceWord}}
			
 
				+                                </div>
			
 
				+                                <div class="search-word-eval">${{evalBadges}}</div>
			
 
				+                            </div>
			
 
				+                        `;
			
 
				+                    }});
			
 
				+
			
 
				+                    html += `
			
 
				+                            </div>
			
 
				+                        </div>
			
 
				+                    `;
			
 
				+                }});
			
 
				+
			
 
				+                html += `
			
 
				+                        </div>
			
 
				+                    </div>
			
 
				+                `;
			
 
				+            }});
			
 
				+
			
 
				+            sidebar.innerHTML = html;
			
 
				+        }}
			
 
				+
			
 
				+        // 渲染右侧结果区
			
 
				+        function renderRightContent() {{
			
 
				+            const content = document.getElementById('rightContent');
			
 
				+            let html = '';
			
 
				+
			
 
				+            data.forEach((feature, featureIdx) => {{
			
 
				+                const groups = feature['组合评估结果_分组'] || [];
			
 
				+
			
 
				+                groups.forEach((group, groupIdx) => {{
			
 
				+                    const searches = group['top10_searches'] || [];
			
 
				+
			
 
				+                    searches.forEach((sw, swIdx) => {{
			
 
				+                        const blockId = `block-${{featureIdx}}-${{groupIdx}}-${{swIdx}}`;
			
 
				+                        const hasSearchResult = sw.search_result != null;
			
 
				+                        const searchResult = sw.search_result || {{}};
			
 
				+                        const notes = searchResult.data?.data || [];
			
 
				+
			
 
				+                        const videoCount = notes.filter(n => n.note_card?.type === 'video').length;
			
 
				+                        const normalCount = notes.length - videoCount;
			
 
				+
			
 
				+                        const evaluation = sw['evaluation_with_filter'];
			
 
				+                        let evalStats = '';
			
 
				+                        if (evaluation) {{
			
 
				+                            const stats = evaluation.statistics || {{}};
			
 
				+                            const complete = stats['完全匹配(8-10)'] || 0;
			
 
				+                            const similar = stats['相似匹配(6-7)'] || 0;
			
 
				+                            const weak = stats['弱相似(5-6)'] || 0;
			
 
				+                            const none = stats['无匹配(≤4)'] || 0;
			
 
				+                            const filtered = evaluation.filtered_count || 0;
			
 
				+
			
 
				+                            if (complete > 0) evalStats += `<span class="stat-badge eval complete">🟢 完全:${{complete}}</span>`;
			
 
				+                            if (similar > 0) evalStats += `<span class="stat-badge eval similar">🟡 相似:${{similar}}</span>`;
			
 
				+                            if (weak > 0) evalStats += `<span class="stat-badge eval weak">🟠 弱:${{weak}}</span>`;
			
 
				+                            if (none > 0) evalStats += `<span class="stat-badge eval none">🔴 无:${{none}}</span>`;
			
 
				+                            if (filtered > 0) evalStats += `<span class="stat-badge eval filtered">⚫ 过滤:${{filtered}}</span>`;
			
 
				+                        }}
			
 
				+
			
 
				+                        html += `
			
 
				+                            <div class="result-block" id="${{blockId}}">
			
 
				+                                <div class="result-header">
			
 
				+                                    <div class="result-title">${{sw.search_word}}</div>
			
 
				+                                    <div class="result-stats">
			
 
				+                        `;
			
 
				+
			
 
				+                        if (!hasSearchResult) {{
			
 
				+                            html += `<span class="stat-badge" style="background:#fef3c7;color:#92400e;font-weight:600">⏸️ 未执行搜索</span>`;
			
 
				+                        }} else if (notes.length === 0) {{
			
 
				+                            html += `
			
 
				+                                <span class="stat-badge">📝 0 条帖子</span>
			
 
				+                                <span class="stat-badge" style="background:#fee2e2;color:#991b1b;font-weight:600">❌ 未找到匹配</span>
			
 
				+                            `;
			
 
				+                        }} else {{
			
 
				+                            html += `
			
 
				+                                <span class="stat-badge">📝 ${{notes.length}} 条帖子</span>
			
 
				+                                <span class="stat-badge">🎬 ${{videoCount}} 视频</span>
			
 
				+                                <span class="stat-badge">📷 ${{normalCount}} 图文</span>
			
 
				+                                ${{evalStats}}
			
 
				+                            `;
			
 
				+                        }}
			
 
				+
			
 
				+                        html += `</div></div>`;
			
 
				+
			
 
				+                        if (!hasSearchResult) {{
			
 
				+                            html += `
			
 
				+                                <div class="empty-state">
			
 
				+                                    <div class="empty-icon">⏸️</div>
			
 
				+                                    <div class="empty-title">该搜索词未执行搜索</div>
			
 
				+                                    <div class="empty-desc">由于搜索次数限制,该搜索词未被执行</div>
			
 
				+                                </div>
			
 
				+                            `;
			
 
				+                        }} else if (notes.length === 0) {{
			
 
				+                            html += `
			
 
				+                                <div class="empty-state">
			
 
				+                                    <div class="empty-icon">❌</div>
			
 
				+                                    <div class="empty-title">搜索完成,但未找到匹配的帖子</div>
			
 
				+                                    <div class="empty-desc">该搜索词已执行,但小红书返回了 0 条结果</div>
			
 
				+                                </div>
			
 
				+                            `;
			
 
				+                        }} else {{
			
 
				+                            html += `
			
 
				+                                <div class="notes-grid">
			
 
				+                                    ${{notes.map((note, noteIdx) => renderNoteCard(note, featureIdx, groupIdx, swIdx, noteIdx)).join('')}}
			
 
				+                                </div>
			
 
				+                            `;
			
 
				+                        }}
			
 
				+
			
 
				+                        html += `</div>`;
			
 
				+                    }});
			
 
				+                }});
			
 
				+            }});
			
 
				+
			
 
				+            content.innerHTML = html;
			
 
				+        }}
			
 
				+
			
 
				+        // 渲染单个帖子卡片
			
 
				+        function renderNoteCard(note, featureIdx, groupIdx, swIdx, noteIdx) {{
			
 
				+            const card = note.note_card || {{}};
			
 
				+            const images = card.image_list || [];
			
 
				+            const title = card.display_title || '无标题';
			
 
				+            const noteType = card.type || 'normal';
			
 
				+            const noteId = note.id || '';
			
 
				+            const user = card.user || {{}};
			
 
				+            const userName = user.nick_name || '未知用户';
			
 
				+            const userAvatar = user.avatar || '';
			
 
				+
			
 
				+            const carouselId = `carousel-${{featureIdx}}-${{groupIdx}}-${{swIdx}}-${{noteIdx}}`;
			
 
				+
			
 
				+            const evalKey = `${{featureIdx}}-${{groupIdx}}-${{swIdx}}-${{noteIdx}}`;
			
 
				+            const noteEval = noteEvaluations[evalKey];
			
 
				+            const evalCategory = getEvalCategory(noteEval);
			
 
				+            const evalClass = `eval-${{evalCategory}}`;
			
 
				+
			
 
				+            let evalSection = '';
			
 
				+            if (noteEval) {{
			
 
				+                const score = noteEval['综合得分'];
			
 
				+                const scoreEmoji = score >= 8 ? '🟢' : score >= 6 ? '🟡' : score >= 5 ? '🟠' : '🔴';
			
 
				+                const scoreText = score >= 8 ? '完全匹配' : score >= 6 ? '相似匹配' : score >= 5 ? '弱相似' : '无匹配';
			
 
				+                const reasoning = noteEval['评分说明'] || '无';
			
 
				+                const matchingPoints = (noteEval['关键匹配点'] || []).join('、') || '无';
			
 
				+
			
 
				+                evalSection = `
			
 
				+                    <div class="note-eval">
			
 
				+                        <div class="note-eval-header" onclick="event.stopPropagation(); toggleEvalDetails('${{carouselId}}')">
			
 
				+                            <span class="note-eval-score">${{scoreEmoji}} ${{scoreText}} (${{score}}分)</span>
			
 
				+                            <span class="note-eval-toggle" id="${{carouselId}}-toggle">▼ 详情</span>
			
 
				+                        </div>
			
 
				+                        <div class="note-eval-details" id="${{carouselId}}-details">
			
 
				+                            <div class="eval-detail-label">评估理由:</div>
			
 
				+                            <div class="eval-detail-text">${{reasoning}}</div>
			
 
				+                            <div class="eval-detail-label">匹配要点:</div>
			
 
				+                            <div class="eval-detail-text">${{matchingPoints}}</div>
			
 
				+                        </div>
			
 
				+                    </div>
			
 
				+                `;
			
 
				+            }} else if (evalCategory === 'filtered') {{
			
 
				+                evalSection = `
			
 
				+                    <div class="note-eval">
			
 
				+                        <div class="note-eval-score">⚫ 已过滤(与搜索无关)</div>
			
 
				+                    </div>
			
 
				+                `;
			
 
				+            }}
			
 
				+
			
 
				+            // 检查是否有解构数据(仅完全匹配)
			
 
				+            const hasDeconstruction = evalCategory === 'complete' && (stage7Data[noteId] || stage8Data[noteId]);
			
 
				+            let deconstructionSection = '';
			
 
				+
			
 
				+            if (hasDeconstruction) {{
			
 
				+                deconstructionSection = `
			
 
				+                    <button class="deconstruction-toggle-btn" data-note-id="${{noteId}}" data-note-title="${{title.replace(/"/g, '&quot;')}}">
			
 
				+                        <span>📊</span>
			
 
				+                        <span>查看解构结果</span>
			
 
				+                    </button>
			
 
				+                `;
			
 
				+            }}
			
 
				+
			
 
				+            return `
			
 
				+                <div class="note-card ${{evalClass}}" data-eval-category="${{evalCategory}}" onclick="openNote('${{noteId}}')">
			
 
				+                    <div class="image-carousel" id="${{carouselId}}">
			
 
				+                        <div class="carousel-images">
			
 
				+                            ${{images.map(img => `<img class="carousel-image" src="${{img}}" alt="帖子图片" loading="lazy">`).join('')}}
			
 
				+                        </div>
			
 
				+                        ${{images.length > 1 ? `
			
 
				+                            <button class="carousel-btn prev" onclick="event.stopPropagation(); changeImage('${{carouselId}}', -1)">←</button>
			
 
				+                            <button class="carousel-btn next" onclick="event.stopPropagation(); changeImage('${{carouselId}}', 1)">→</button>
			
 
				+                            <div class="carousel-indicators">
			
 
				+                                ${{images.map((_, i) => `<span class="dot ${{i === 0 ? 'active' : ''}}" onclick="event.stopPropagation(); goToImage('${{carouselId}}', ${{i}})"></span>`).join('')}}
			
 
				+                            </div>
			
 
				+                            <span class="image-counter">1/${{images.length}}</span>
			
 
				+                        ` : ''}}
			
 
				+                    </div>
			
 
				+                    <div class="note-info">
			
 
				+                        <div class="note-title">${{title}}</div>
			
 
				+                        <div class="note-meta">
			
 
				+                            <span class="note-type type-${{noteType}}">
			
 
				+                                ${{noteType === 'video' ? '🎬 视频' : '📷 图文'}}
			
 
				+                            </span>
			
 
				+                            <div class="note-author">
			
 
				+                                ${{userAvatar ? `<img class="author-avatar" src="${{userAvatar}}" alt="${{userName}}">` : ''}}
			
 
				+                                <span>${{userName}}</span>
			
 
				+                            </div>
			
 
				+                        </div>
			
 
				+                    </div>
			
 
				+                    ${{evalSection}}
			
 
				+                    ${{deconstructionSection}}
			
 
				+                </div>
			
 
				+            `;
			
 
				+        }}
			
 
				+
			
 
				+        // 打开解构模态窗口
			
 
				+        function openDeconstructionModal(noteId, noteTitle) {{
			
 
				+            console.log('🔧 [调试] openDeconstructionModal被调用, noteId:', noteId);
			
 
				+
			
 
				+            const modal = document.getElementById('deconstructionModal');
			
 
				+            const modalContent = document.getElementById('modalContent');
			
 
				+            const modalNoteTitle = document.getElementById('modalNoteTitle');
			
 
				+
			
 
				+            if (!modal || !modalContent || !modalNoteTitle) {{
			
 
				+                console.error('❌ [错误] 无法找到模态窗口元素');
			
 
				+                return;
			
 
				+            }}
			
 
				+
			
 
				+            // 设置标题
			
 
				+            modalNoteTitle.textContent = noteTitle || '解构分析';
			
 
				+
			
 
				+            // 检查是否有数据
			
 
				+            const hasStage8Data = !!stage8Data[noteId];
			
 
				+            console.log('📊 [调试] Stage8数据存在:', hasStage8Data);
			
 
				+
			
 
				+            if (!hasStage8Data) {{
			
 
				+                console.warn('⚠️ [警告] 未找到Stage8数据, noteId:', noteId);
			
 
				+                console.log('📋 [调试] 可用的noteId列表:', Object.keys(stage8Data));
			
 
				+                modalContent.innerHTML = '<div style="padding: 30px; text-align: center; color: #6b7280;">暂无解构数据</div>';
			
 
				+            }} else {{
			
 
				+                try {{
			
 
				+                    modalContent.innerHTML = renderDeconstructionContent(noteId);
			
 
				+                    console.log('✅ [调试] 解构内容渲染成功');
			
 
				+                }} catch (error) {{
			
 
				+                    console.error('❌ [错误] 渲染解构内容失败:', error);
			
 
				+                    modalContent.innerHTML = `<div style="padding: 30px; text-align: center; color: red;">渲染错误: ${{error.message}}</div>`;
			
 
				+                }}
			
 
				+            }}
			
 
				+
			
 
				+            // 显示模态窗口
			
 
				+            modal.classList.add('active');
			
 
				+            document.body.style.overflow = 'hidden'; // 禁止背景滚动
			
 
				+            console.log('✅ [调试] 模态窗口已显示');
			
 
				+        }}
			
 
				+
			
 
				+        // 关闭模态窗口
			
 
				+        function closeModal() {{
			
 
				+            console.log('🔧 [调试] closeModal被调用');
			
 
				+            const modal = document.getElementById('deconstructionModal');
			
 
				+            if (modal) {{
			
 
				+                modal.classList.remove('active');
			
 
				+                document.body.style.overflow = ''; // 恢复滚动
			
 
				+                console.log('✅ [调试] 模态窗口已关闭');
			
 
				+            }}
			
 
				+        }}
			
 
				+
			
 
				+        // ESC键关闭模态窗口
			
 
				+        document.addEventListener('keydown', function(e) {{
			
 
				+            if (e.key === 'Escape') {{
			
 
				+                const modal = document.getElementById('deconstructionModal');
			
 
				+                if (modal && modal.classList.contains('active')) {{
			
 
				+                    closeModal();
			
 
				+                }}
			
 
				+            }}
			
 
				+        }});
			
 
				+
			
 
				+        // 点击遮罩层关闭模态窗口
			
 
				+        document.addEventListener('click', function(e) {{
			
 
				+            const modal = document.getElementById('deconstructionModal');
			
 
				+            if (e.target === modal) {{
			
 
				+                closeModal();
			
 
				+            }}
			
 
				+        }});
			
 
				+
			
 
				+        // 渲染解构内容
			
 
				+        function renderDeconstructionContent(noteId) {{
			
 
				+            const stage8Info = stage8Data[noteId];
			
 
				+            if (!stage8Info) {{
			
 
				+                return '<div style="padding: 15px; text-align: center; color: #6b7280;">暂无解构数据</div>';
			
 
				+            }}
			
 
				+
			
 
				+            const originalFeature = stage8Info.original_feature || '未知特征';
			
 
				+            const features = stage8Info.deconstructed_features || [];
			
 
				+
			
 
				+            // 按维度分组
			
 
				+            const dimensionGroups = {{}};
			
 
				+            features.forEach(feat => {{
			
 
				+                const dim = feat.dimension || '未分类';
			
 
				+                if (!dimensionGroups[dim]) {{
			
 
				+                    dimensionGroups[dim] = [];
			
 
				+                }}
			
 
				+                dimensionGroups[dim].push(feat);
			
 
				+            }});
			
 
				+
			
 
				+            // 为每个维度找出最高分
			
 
				+            Object.keys(dimensionGroups).forEach(dim => {{
			
 
				+                const feats = dimensionGroups[dim];
			
 
				+                if (feats.length > 0) {{
			
 
				+                    const maxScore = Math.max(...feats.map(f => f.similarity_score || 0));
			
 
				+                    feats.forEach(f => {{
			
 
				+                        f.isTopInDimension = (f.similarity_score === maxScore);
			
 
				+                    }});
			
 
				+                }}
			
 
				+            }});
			
 
				+
			
 
				+            let html = `
			
 
				+                <div class="deconstruction-header">
			
 
				+                    <div>🎯 解构特征相似度分析</div>
			
 
				+                    <div class="original-feature">目标特征: "${{originalFeature}}"</div>
			
 
				+                </div>
			
 
				+            `;
			
 
				+
			
 
				+            // 按维度排序: 灵感点 -> 目的点 -> 关键点
			
 
				+            const dimensionOrder = ['灵感点-全新内容', '灵感点-共性差异', '灵感点-共性内容', '目的点', '关键点'];
			
 
				+            const sortedDimensions = Object.keys(dimensionGroups).sort((a, b) => {{
			
 
				+                const aIndex = dimensionOrder.findIndex(d => a.startsWith(d));
			
 
				+                const bIndex = dimensionOrder.findIndex(d => b.startsWith(d));
			
 
				+                if (aIndex === -1 && bIndex === -1) return a.localeCompare(b);
			
 
				+                if (aIndex === -1) return 1;
			
 
				+                if (bIndex === -1) return -1;
			
 
				+                return aIndex - bIndex;
			
 
				+            }});
			
 
				+
			
 
				+            sortedDimensions.forEach((dimension, dimIdx) => {{
			
 
				+                const feats = dimensionGroups[dimension];
			
 
				+                const dimId = `dim-${{noteId}}-${{dimIdx}}`;
			
 
				+
			
 
				+                html += `
			
 
				+                    <div class="dimension-card">
			
 
				+                        <div class="dimension-header" onclick="event.stopPropagation(); toggleDimension('${{dimId}}')">
			
 
				+                            <div class="dimension-title">
			
 
				+                                <span>${{getDimensionIcon(dimension)}} ${{dimension}}</span>
			
 
				+                                <span class="dimension-count">(${{feats.length}}个特征)</span>
			
 
				+                            </div>
			
 
				+                            <span class="dimension-toggle" id="${{dimId}}-toggle">▼</span>
			
 
				+                        </div>
			
 
				+                        <div class="dimension-body expanded" id="${{dimId}}">
			
 
				+                            <div class="feature-list">
			
 
				+                `;
			
 
				+
			
 
				+                // 按分数降序排列
			
 
				+                feats.sort((a, b) => (b.similarity_score || 0) - (a.similarity_score || 0));
			
 
				+
			
 
				+                feats.forEach(feat => {{
			
 
				+                    const score = feat.similarity_score || 0;
			
 
				+                    const scoreClass = score >= 0.7 ? 'high' : score >= 0.5 ? 'medium' : 'low';
			
 
				+                    const barWidth = Math.min(score * 100, 100);
			
 
				+                    const isTop = feat.isTopInDimension;
			
 
				+
			
 
				+                    html += `
			
 
				+                        <div class="feature-item ${{isTop ? 'top-score' : ''}}">
			
 
				+                            <div class="feature-name">
			
 
				+                                ${{isTop ? '<span class="top-badge">🏆 最高分</span>' : ''}}
			
 
				+                                ${{feat.feature_name || '未命名特征'}}
			
 
				+                            </div>
			
 
				+                            <div class="feature-meta-row">
			
 
				+                                <span class="feature-dimension-detail">${{feat.dimension_detail || '无分类'}}</span>
			
 
				+                                <span class="feature-weight">权重: ${{(feat.weight || 0).toFixed(1)}}</span>
			
 
				+                            </div>
			
 
				+                            <div class="similarity-row">
			
 
				+                                <span class="similarity-score ${{scoreClass}}">${{score.toFixed(3)}}</span>
			
 
				+                                <div class="similarity-bar-container">
			
 
				+                                    <div class="similarity-bar ${{scoreClass}}" style="width: ${{barWidth}}%"></div>
			
 
				+                                </div>
			
 
				+                            </div>
			
 
				+                            <div class="similarity-explanation">
			
 
				+                                ${{feat.similarity_explanation || '无说明'}}
			
 
				+                            </div>
			
 
				+                        </div>
			
 
				+                    `;
			
 
				+                }});
			
 
				+
			
 
				+                html += `
			
 
				+                            </div>
			
 
				+                        </div>
			
 
				+                    </div>
			
 
				+                `;
			
 
				+            }});
			
 
				+
			
 
				+            return html;
			
 
				+        }}
			
 
				+
			
 
				+        // 获取维度图标
			
 
				+        function getDimensionIcon(dimension) {{
			
 
				+            if (dimension.includes('灵感点')) return '💡';
			
 
				+            if (dimension.includes('目的点')) return '🎯';
			
 
				+            if (dimension.includes('关键点')) return '🔑';
			
 
				+            return '📋';
			
 
				+        }}
			
 
				+
			
 
				+        // 切换维度卡片
			
 
				+        function toggleDimension(dimId) {{
			
 
				+            const body = document.getElementById(dimId);
			
 
				+            const toggle = document.getElementById(`${{dimId}}-toggle`);
			
 
				+
			
 
				+            if (body.classList.contains('expanded')) {{
			
 
				+                body.classList.remove('expanded');
			
 
				+                toggle.textContent = '▶';
			
 
				+            }} else {{
			
 
				+                body.classList.add('expanded');
			
 
				+                toggle.textContent = '▼';
			
 
				+            }}
			
 
				+        }}
			
 
				+
			
 
				+        // 图片轮播逻辑
			
 
				+        const carouselStates = {{}};
			
 
				+
			
 
				+        function changeImage(carouselId, direction) {{
			
 
				+            if (!carouselStates[carouselId]) {{
			
 
				+                carouselStates[carouselId] = {{ currentIndex: 0 }};
			
 
				+            }}
			
 
				+
			
 
				+            const carousel = document.getElementById(carouselId);
			
 
				+            const imagesContainer = carousel.querySelector('.carousel-images');
			
 
				+            const images = carousel.querySelectorAll('.carousel-image');
			
 
				+            const dots = carousel.querySelectorAll('.dot');
			
 
				+            const counter = carousel.querySelector('.image-counter');
			
 
				+
			
 
				+            let newIndex = carouselStates[carouselId].currentIndex + direction;
			
 
				+            if (newIndex < 0) newIndex = images.length - 1;
			
 
				+            if (newIndex >= images.length) newIndex = 0;
			
 
				+
			
 
				+            carouselStates[carouselId].currentIndex = newIndex;
			
 
				+            imagesContainer.style.transform = `translateX(-${{newIndex * 100}}%)`;
			
 
				+
			
 
				+            dots.forEach((dot, i) => {{
			
 
				+                dot.classList.toggle('active', i === newIndex);
			
 
				+            }});
			
 
				+
			
 
				+            if (counter) {{
			
 
				+                counter.textContent = `${{newIndex + 1}}/${{images.length}}`;
			
 
				+            }}
			
 
				+        }}
			
 
				+
			
 
				+        function goToImage(carouselId, index) {{
			
 
				+            if (!carouselStates[carouselId]) {{
			
 
				+                carouselStates[carouselId] = {{ currentIndex: 0 }};
			
 
				+            }}
			
 
				+
			
 
				+            const carousel = document.getElementById(carouselId);
			
 
				+            const imagesContainer = carousel.querySelector('.carousel-images');
			
 
				+            const dots = carousel.querySelectorAll('.dot');
			
 
				+            const counter = carousel.querySelector('.image-counter');
			
 
				+
			
 
				+            carouselStates[carouselId].currentIndex = index;
			
 
				+            imagesContainer.style.transform = `translateX(-${{index * 100}}%)`;
			
 
				+
			
 
				+            dots.forEach((dot, i) => {{
			
 
				+                dot.classList.toggle('active', i === index);
			
 
				+            }});
			
 
				+
			
 
				+            if (counter) {{
			
 
				+                counter.textContent = `${{index + 1}}/${{dots.length}}`;
			
 
				+            }}
			
 
				+        }}
			
 
				+
			
 
				+        function toggleFeature(featureIdx) {{
			
 
				+            const searchWordsList = document.getElementById(`search-words-${{featureIdx}}`);
			
 
				+            const featureHeader = document.getElementById(`feature-header-${{featureIdx}}`);
			
 
				+
			
 
				+            searchWordsList.classList.toggle('expanded');
			
 
				+            featureHeader.classList.toggle('active');
			
 
				+        }}
			
 
				+
			
 
				+        function toggleBaseWord(featureIdx, groupIdx) {{
			
 
				+            const baseWordHeader = document.getElementById(`base-word-header-${{featureIdx}}-${{groupIdx}}`);
			
 
				+            const baseWordDesc = document.getElementById(`base-word-desc-${{featureIdx}}-${{groupIdx}}`);
			
 
				+            const searchWordsSublist = document.getElementById(`search-words-sublist-${{featureIdx}}-${{groupIdx}}`);
			
 
				+
			
 
				+            baseWordHeader.classList.toggle('active');
			
 
				+            baseWordDesc.classList.toggle('expanded');
			
 
				+            searchWordsSublist.classList.toggle('expanded');
			
 
				+        }}
			
 
				+
			
 
				+        function scrollToBlock(blockId) {{
			
 
				+            const block = document.getElementById(blockId);
			
 
				+            if (block) {{
			
 
				+                block.scrollIntoView({{ behavior: 'smooth', block: 'start' }});
			
 
				+
			
 
				+                document.querySelectorAll('.search-word-item').forEach(item => {{
			
 
				+                    item.classList.remove('active');
			
 
				+                }});
			
 
				+
			
 
				+                document.querySelectorAll(`[data-block-id="${{blockId}}"]`).forEach(item => {{
			
 
				+                    item.classList.add('active');
			
 
				+                }});
			
 
				+            }}
			
 
				+        }}
			
 
				+
			
 
				+        function toggleEvalDetails(carouselId) {{
			
 
				+            const details = document.getElementById(`${{carouselId}}-details`);
			
 
				+            const toggle = document.getElementById(`${{carouselId}}-toggle`);
			
 
				+
			
 
				+            if (details && toggle) {{
			
 
				+                details.classList.toggle('expanded');
			
 
				+                toggle.textContent = details.classList.contains('expanded') ? '▲ 收起' : '▼ 详情';
			
 
				+            }}
			
 
				+        }}
			
 
				+
			
 
				+        function filterNotes(category) {{
			
 
				+            currentFilter = category;
			
 
				+
			
 
				+            document.querySelectorAll('.filter-btn').forEach(btn => {{
			
 
				+                btn.classList.remove('active');
			
 
				+            }});
			
 
				+            event.target.classList.add('active');
			
 
				+
			
 
				+            document.querySelectorAll('.note-card').forEach(card => {{
			
 
				+                const evalCategory = card.getAttribute('data-eval-category');
			
 
				+                if (category === 'all' || evalCategory === category) {{
			
 
				+                    card.classList.remove('hidden');
			
 
				+                }} else {{
			
 
				+                    card.classList.add('hidden');
			
 
				+                }}
			
 
				+            }});
			
 
				+
			
 
				+            document.querySelectorAll('.result-block').forEach(block => {{
			
 
				+                const visibleCards = block.querySelectorAll('.note-card:not(.hidden)');
			
 
				+                if (visibleCards.length === 0) {{
			
 
				+                    block.classList.add('hidden');
			
 
				+                }} else {{
			
 
				+                    block.classList.remove('hidden');
			
 
				+                }}
			
 
				+            }});
			
 
				+        }}
			
 
				+
			
 
				+        function openNote(noteId) {{
			
 
				+            if (noteId) {{
			
 
				+                window.open(`https://www.xiaohongshu.com/explore/${{noteId}}`, '_blank');
			
 
				+            }}
			
 
				+        }}
			
 
				+
			
 
				+        // 页面加载时输出调试信息
			
 
				+        console.log('='.repeat(60));
			
 
				+        console.log('🚀 [系统] 页面脚本加载完成');
			
 
				+        console.log('📊 [数据] Stage6特征数:', data.length);
			
 
				+        console.log('📊 [数据] Stage7解构数:', Object.keys(stage7Data).length);
			
 
				+        console.log('📊 [数据] Stage8相似度数:', Object.keys(stage8Data).length);
			
 
				+        console.log('📋 [数据] Stage8可用noteId:', Object.keys(stage8Data));
			
 
				+        console.log('='.repeat(60));
			
 
				+
			
 
				+        // 初始化
			
 
				+        document.addEventListener('DOMContentLoaded', () => {{
			
 
				+            console.log('✅ [系统] DOM加载完成，开始初始化...');
			
 
				+
			
 
				+            try {{
			
 
				+                renderLeftSidebar();
			
 
				+                console.log('✅ [系统] 左侧导航渲染完成');
			
 
				+
			
 
				+                renderRightContent();
			
 
				+                console.log('✅ [系统] 右侧内容渲染完成');
			
 
				+
			
 
				+                if (data.length > 0) {{
			
 
				+                    toggleFeature(0);
			
 
				+
			
 
				+                    const firstGroups = data[0]['组合评估结果_分组'];
			
 
				+                    if (firstGroups && firstGroups.length > 0) {{
			
 
				+                        toggleBaseWord(0, 0);
			
 
				+                    }}
			
 
				+                }}
			
 
				+
			
 
				+                console.log('✅ [系统] 页面初始化完成');
			
 
				+
			
 
				+                // 为所有解构按钮添加事件监听器
			
 
				+                setTimeout(() => {{
			
 
				+                    const buttons = document.querySelectorAll('.deconstruction-toggle-btn');
			
 
				+                    console.log('🔍 [系统] 找到解构按钮数量:', buttons.length);
			
 
				+
			
 
				+                    buttons.forEach((btn, index) => {{
			
 
				+                        const noteId = btn.getAttribute('data-note-id');
			
 
				+                        const noteTitle = btn.getAttribute('data-note-title');
			
 
				+                        console.log(`  按钮[${{index}}] noteId:`, noteId, ', title:', noteTitle);
			
 
				+
			
 
				+                        // 添加事件监听器打开模态窗口
			
 
				+                        btn.addEventListener('click', function(e) {{
			
 
				+                            console.log('🖱️ [事件] 按钮点击, noteId:', noteId);
			
 
				+                            e.stopPropagation();
			
 
				+                            e.preventDefault();
			
 
				+                            openDeconstructionModal(noteId, noteTitle);
			
 
				+                        }});
			
 
				+                    }});
			
 
				+                }}, 500);
			
 
				+
			
 
				+            }} catch (error) {{
			
 
				+                console.error('❌ [错误] 初始化失败:', error);
			
 
				+            }}
			
 
				+        }});
			
 
				+    </script>
			
 
				+</body>
			
 
				+</html>
			
 
				+'''
			
 
				+
			
 
				+    with open(output_path, 'w', encoding='utf-8') as f:
			
 
				+        f.write(html_content)
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    """主函数"""
			
 
				+    script_dir = os.path.dirname(os.path.abspath(__file__))
			
 
				+
			
 
				+    # 加载数据
			
 
				+    stage6_path = os.path.join(script_dir, 'output_v2', 'stage6_with_evaluations.json')
			
 
				+    stage7_path = os.path.join(script_dir, 'output_v2', 'stage7_with_deconstruction.json')
			
 
				+    stage8_path = os.path.join(script_dir, 'output_v2', 'stage8_similarity_scores.json')
			
 
				+
			
 
				+    output_dir = os.path.join(script_dir, 'visualization')
			
 
				+    os.makedirs(output_dir, exist_ok=True)
			
 
				+
			
 
				+    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
			
 
				+    output_path = os.path.join(output_dir, f'stage6_with_stage78_{timestamp}.html')
			
 
				+
			
 
				+    print(f"📖 加载Stage6数据: {stage6_path}")
			
 
				+    data = load_data(stage6_path)
			
 
				+    print(f"✓ 加载了 {len(data)} 个原始特征")
			
 
				+
			
 
				+    print(f"📖 加载Stage7数据: {stage7_path}")
			
 
				+    stage7_mapping = load_stage7_data(stage7_path)
			
 
				+    print(f"✓ 加载了 {len(stage7_mapping)} 个解构结果")
			
 
				+
			
 
				+    print(f"📖 加载Stage8数据: {stage8_path}")
			
 
				+    stage8_mapping = load_stage8_data(stage8_path)
			
 
				+    print(f"✓ 加载了 {len(stage8_mapping)} 个相似度评分")
			
 
				+
			
 
				+    print("📊 计算统计数据...")
			
 
				+    stats = calculate_statistics(data)
			
 
				+    print(f"✓ 统计完成:")
			
 
				+    print(f"  - 原始特征: {stats['total_features']}")
			
 
				+    print(f"  - 搜索词总数: {stats['total_search_words']}")
			
 
				+    print(f"  - 帖子总数: {stats['total_notes']}")
			
 
				+    print(f"  - 完全匹配: {stats['match_complete']} ({stats['complete_rate']}%)")
			
 
				+
			
 
				+    print(f"\n🎨 生成可视化页面...")
			
 
				+    generate_html(data, stats, stage7_mapping, stage8_mapping, output_path)
			
 
				+    print(f"✓ 生成完成: {output_path}")
			
 
				+
			
 
				+    print(f"\n🌐 在浏览器中打开查看:")
			
 
				+    print(f"   file://{output_path}")
			
 
				+
			
 
				+    return output_path
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    main()