2 týždňov pred · 8aad6ff988
--- a/scripts/add_search_words.py
+++ b/scripts/add_search_words.py
@@ -0,0 +1,230 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+为关联特征生成检索词并去重
			
 
				+
			
 
				+读取 associated_tags_results.json，为每个特征生成组合检索词，
			
 
				+并在同一结果项内去重。
			
 
				+"""
			
 
				+
			
 
				+import json
			
 
				+import logging
			
 
				+from pathlib import Path
			
 
				+from typing import Dict, List, Any, Set
			
 
				+import argparse
			
 
				+
			
 
				+
			
 
				+# 配置日志
			
 
				+logging.basicConfig(
			
 
				+    level=logging.INFO,
			
 
				+    format='%(asctime)s - %(levelname)s - %(message)s',
			
 
				+    datefmt='%Y-%m-%d %H:%M:%S'
			
 
				+)
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+
			
 
				+class SearchWordGenerator:
			
 
				+    """检索词生成器"""
			
 
				+
			
 
				+    def __init__(self, input_path: str):
			
 
				+        """
			
 
				+        初始化生成器
			
 
				+
			
 
				+        Args:
			
 
				+            input_path: 输入JSON文件路径
			
 
				+        """
			
 
				+        self.input_path = input_path
			
 
				+        self.data = self._load_json(input_path)
			
 
				+        self.stats = {
			
 
				+            '处理的结果项数': 0,
			
 
				+            '生成的总组合词数': 0,
			
 
				+            '唯一组合词数': 0,
			
 
				+            '重复过滤的词数': 0,
			
 
				+            '每项详情': []
			
 
				+        }
			
 
				+
			
 
				+    def _load_json(self, file_path: str) -> List[Dict]:
			
 
				+        """加载JSON文件"""
			
 
				+        try:
			
 
				+            with open(file_path, 'r', encoding='utf-8') as f:
			
 
				+                return json.load(f)
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"加载文件 {file_path} 失败: {e}")
			
 
				+            raise
			
 
				+
			
 
				+    def generate_search_words(self) -> List[Dict[str, Any]]:
			
 
				+        """
			
 
				+        为所有结果项生成检索词
			
 
				+
			
 
				+        Returns:
			
 
				+            增强后的数据列表
			
 
				+        """
			
 
				+        logger.info("=" * 60)
			
 
				+        logger.info("开始生成检索词")
			
 
				+        logger.info("=" * 60)
			
 
				+
			
 
				+        enhanced_data = []
			
 
				+
			
 
				+        for idx, result in enumerate(self.data, 1):
			
 
				+            logger.info(f"\n处理第 {idx}/{len(self.data)} 个结果项")
			
 
				+
			
 
				+            # 获取基础词（人设特征名称）
			
 
				+            base_word = result.get('最高匹配信息', {}).get('人设特征名称', '')
			
 
				+            original_feature = result.get('原始特征名称', '')
			
 
				+
			
 
				+            logger.info(f"  原始特征: {original_feature}")
			
 
				+            logger.info(f"  人设特征名称（基础词）: {base_word}")
			
 
				+
			
 
				+            if not base_word:
			
 
				+                logger.warning(f"  警告：未找到人设特征名称，跳过")
			
 
				+                enhanced_data.append(result)
			
 
				+                continue
			
 
				+
			
 
				+            # 用于去重的集合（在当前结果项范围内）
			
 
				+            seen_words: Set[str] = set()
			
 
				+            item_stats = {
			
 
				+                '原始特征': original_feature,
			
 
				+                '人设特征名称': base_word,
			
 
				+                '总特征数': 0,
			
 
				+                '唯一组合词数': 0,
			
 
				+                '重复词数': 0,
			
 
				+                '组合词列表': []
			
 
				+            }
			
 
				+
			
 
				+            # 遍历所有关联
			
 
				+            associations = result.get('找到的关联', [])
			
 
				+            for assoc_idx, assoc in enumerate(associations):
			
 
				+                target_path = assoc.get('目标分类路径', '')
			
 
				+                features = assoc.get('特征列表', [])
			
 
				+
			
 
				+                logger.info(f"  处理关联 {assoc_idx + 1}/{len(associations)}: {target_path}")
			
 
				+                logger.info(f"    特征数: {len(features)}")
			
 
				+
			
 
				+                # 遍历特征列表
			
 
				+                for feature in features:
			
 
				+                    feature_name = feature.get('特征名称', '')
			
 
				+                    item_stats['总特征数'] += 1
			
 
				+
			
 
				+                    if not feature_name:
			
 
				+                        feature['search_word'] = None
			
 
				+                        continue
			
 
				+
			
 
				+                    # 生成组合词
			
 
				+                    search_word = f"{base_word} {feature_name}"
			
 
				+
			
 
				+                    # 检查是否重复
			
 
				+                    if search_word not in seen_words:
			
 
				+                        # 首次出现，填充
			
 
				+                        feature['search_word'] = search_word
			
 
				+                        seen_words.add(search_word)
			
 
				+                        item_stats['唯一组合词数'] += 1
			
 
				+                        item_stats['组合词列表'].append(search_word)
			
 
				+                        logger.debug(f"      + 新增: {search_word}")
			
 
				+                    else:
			
 
				+                        # 重复，留空
			
 
				+                        feature['search_word'] = None
			
 
				+                        item_stats['重复词数'] += 1
			
 
				+                        logger.debug(f"      - 重复（留空）: {search_word}")
			
 
				+
			
 
				+            # 记录统计
			
 
				+            logger.info(f"  完成：总特征 {item_stats['总特征数']} 个，"
			
 
				+                       f"唯一组合词 {item_stats['唯一组合词数']} 个，"
			
 
				+                       f"重复 {item_stats['重复词数']} 个")
			
 
				+
			
 
				+            self.stats['处理的结果项数'] += 1
			
 
				+            self.stats['生成的总组合词数'] += item_stats['总特征数']
			
 
				+            self.stats['唯一组合词数'] += item_stats['唯一组合词数']
			
 
				+            self.stats['重复过滤的词数'] += item_stats['重复词数']
			
 
				+            self.stats['每项详情'].append(item_stats)
			
 
				+
			
 
				+            enhanced_data.append(result)
			
 
				+
			
 
				+        logger.info("\n" + "=" * 60)
			
 
				+        logger.info("生成完成")
			
 
				+        logger.info("=" * 60)
			
 
				+        logger.info(f"处理的结果项数: {self.stats['处理的结果项数']}")
			
 
				+        logger.info(f"生成的总组合词数: {self.stats['生成的总组合词数']}")
			
 
				+        logger.info(f"唯一组合词数: {self.stats['唯一组合词数']}")
			
 
				+        logger.info(f"重复过滤的词数: {self.stats['重复过滤的词数']}")
			
 
				+
			
 
				+        return enhanced_data
			
 
				+
			
 
				+    def save_results(self, enhanced_data: List[Dict[str, Any]], output_path: str):
			
 
				+        """保存增强后的数据"""
			
 
				+        try:
			
 
				+            with open(output_path, 'w', encoding='utf-8') as f:
			
 
				+                json.dump(enhanced_data, f, ensure_ascii=False, indent=2)
			
 
				+            logger.info(f"增强数据已保存到: {output_path}")
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"保存结果失败: {e}")
			
 
				+            raise
			
 
				+
			
 
				+    def save_stats(self, stats_path: str):
			
 
				+        """保存统计信息"""
			
 
				+        try:
			
 
				+            with open(stats_path, 'w', encoding='utf-8') as f:
			
 
				+                json.dump(self.stats, f, ensure_ascii=False, indent=2)
			
 
				+            logger.info(f"统计信息已保存到: {stats_path}")
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"保存统计信息失败: {e}")
			
 
				+            raise
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    """主函数"""
			
 
				+    parser = argparse.ArgumentParser(description='为关联特征生成检索词并去重')
			
 
				+    parser.add_argument(
			
 
				+        '--input',
			
 
				+        default='associated_tags_results.json',
			
 
				+        help='输入JSON文件路径（默认: associated_tags_results.json）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--output',
			
 
				+        default='associated_tags_results_with_search.json',
			
 
				+        help='输出JSON文件路径（默认: associated_tags_results_with_search.json）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--stats',
			
 
				+        default='search_words_stats.json',
			
 
				+        help='统计信息输出路径（默认: search_words_stats.json）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--debug',
			
 
				+        action='store_true',
			
 
				+        help='启用调试日志'
			
 
				+    )
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    # 设置日志级别
			
 
				+    if args.debug:
			
 
				+        logger.setLevel(logging.DEBUG)
			
 
				+
			
 
				+    # 创建生成器
			
 
				+    generator = SearchWordGenerator(input_path=args.input)
			
 
				+
			
 
				+    # 生成检索词
			
 
				+    enhanced_data = generator.generate_search_words()
			
 
				+
			
 
				+    # 保存结果
			
 
				+    generator.save_results(enhanced_data, args.output)
			
 
				+    generator.save_stats(args.stats)
			
 
				+
			
 
				+    # 输出汇总
			
 
				+    logger.info("\n" + "=" * 60)
			
 
				+    logger.info("处理完成汇总")
			
 
				+    logger.info("=" * 60)
			
 
				+    logger.info(f"输入文件: {args.input}")
			
 
				+    logger.info(f"输出文件: {args.output}")
			
 
				+    logger.info(f"统计文件: {args.stats}")
			
 
				+    logger.info(f"")
			
 
				+    logger.info(f"处理结果:")
			
 
				+    logger.info(f"  - 结果项数: {generator.stats['处理的结果项数']}")
			
 
				+    logger.info(f"  - 总特征数: {generator.stats['生成的总组合词数']}")
			
 
				+    logger.info(f"  - 唯一组合词: {generator.stats['唯一组合词数']}")
			
 
				+    logger.info(f"  - 重复过滤: {generator.stats['重复过滤的词数']}")
			
 
				+    logger.info(f"  - 去重率: {generator.stats['重复过滤的词数'] / max(generator.stats['生成的总组合词数'], 1) * 100:.1f}%")
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    main()
			
--- a/scripts/execute_search_tasks.py
+++ b/scripts/execute_search_tasks.py
@@ -0,0 +1,413 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+小红书搜索任务执行器
			
 
				+
			
 
				+读取 associated_tags_results_with_search.json，
			
 
				+对所有非空的 search_word 执行小红书搜索，
			
 
				+并将结果写入到对应的特征节点下。
			
 
				+"""
			
 
				+
			
 
				+import sys
			
 
				+import os
			
 
				+
			
 
				+# 将项目根目录添加到Python路径
			
 
				+project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
			
 
				+sys.path.insert(0, project_root)
			
 
				+
			
 
				+import json
			
 
				+import logging
			
 
				+import time
			
 
				+import copy
			
 
				+from pathlib import Path
			
 
				+from typing import Dict, List, Any, Set, Optional
			
 
				+from datetime import datetime
			
 
				+import argparse
			
 
				+
			
 
				+from src.clients.xiaohongshu_search import XiaohongshuSearch
			
 
				+
			
 
				+
			
 
				+# 配置日志
			
 
				+logging.basicConfig(
			
 
				+    level=logging.INFO,
			
 
				+    format='%(asctime)s - %(levelname)s - %(message)s',
			
 
				+    datefmt='%Y-%m-%d %H:%M:%S',
			
 
				+    handlers=[
			
 
				+        logging.FileHandler('search_execution.log', encoding='utf-8'),
			
 
				+        logging.StreamHandler()
			
 
				+    ]
			
 
				+)
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+
			
 
				+class SearchTaskExecutor:
			
 
				+    """搜索任务执行器"""
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        input_path: str,
			
 
				+        output_path: str = None,
			
 
				+        progress_path: str = 'search_progress.json',
			
 
				+        search_delay: float = 2.0,
			
 
				+        content_type: str = '图文',
			
 
				+        sort_type: str = '综合'
			
 
				+    ):
			
 
				+        """
			
 
				+        初始化执行器
			
 
				+
			
 
				+        Args:
			
 
				+            input_path: 输入JSON文件路径
			
 
				+            output_path: 输出JSON文件路径
			
 
				+            progress_path: 进度文件路径
			
 
				+            search_delay: 每次搜索间隔时间（秒）
			
 
				+            content_type: 内容类型
			
 
				+            sort_type: 排序方式
			
 
				+        """
			
 
				+        self.input_path = input_path
			
 
				+        self.output_path = output_path or input_path.replace(
			
 
				+            '.json', '_with_search_data.json'
			
 
				+        )
			
 
				+        self.progress_path = progress_path
			
 
				+        self.search_delay = search_delay
			
 
				+        self.content_type = content_type
			
 
				+        self.sort_type = sort_type
			
 
				+
			
 
				+        # 初始化搜索客户端
			
 
				+        self.search_client = XiaohongshuSearch()
			
 
				+
			
 
				+        # 统计信息
			
 
				+        self.stats = {
			
 
				+            '总特征数': 0,
			
 
				+            '有search_word的特征数': 0,
			
 
				+            '唯一search_word数': 0,
			
 
				+            '已完成搜索数': 0,
			
 
				+            '成功搜索数': 0,
			
 
				+            '失败搜索数': 0,
			
 
				+            '跳过搜索数': 0
			
 
				+        }
			
 
				+
			
 
				+    def load_json(self, file_path: str) -> Any:
			
 
				+        """加载JSON文件"""
			
 
				+        try:
			
 
				+            with open(file_path, 'r', encoding='utf-8') as f:
			
 
				+                return json.load(f)
			
 
				+        except FileNotFoundError:
			
 
				+            logger.warning(f"文件不存在: {file_path}")
			
 
				+            return None
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"加载文件失败 {file_path}: {e}")
			
 
				+            raise
			
 
				+
			
 
				+    def save_json(self, data: Any, file_path: str):
			
 
				+        """保存JSON文件"""
			
 
				+        try:
			
 
				+            with open(file_path, 'w', encoding='utf-8') as f:
			
 
				+                json.dump(data, f, ensure_ascii=False, indent=2)
			
 
				+            logger.info(f"已保存: {file_path}")
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"保存文件失败 {file_path}: {e}")
			
 
				+            raise
			
 
				+
			
 
				+    def load_progress(self) -> Dict[str, Any]:
			
 
				+        """加载进度文件"""
			
 
				+        progress = self.load_json(self.progress_path)
			
 
				+        if progress is None:
			
 
				+            return {
			
 
				+                'completed_searches': {},  # search_word -> result
			
 
				+                'started_at': datetime.now().isoformat(),
			
 
				+                'last_updated': None
			
 
				+            }
			
 
				+        return progress
			
 
				+
			
 
				+    def save_progress(self, progress: Dict[str, Any]):
			
 
				+        """保存进度文件"""
			
 
				+        progress['last_updated'] = datetime.now().isoformat()
			
 
				+        self.save_json(progress, self.progress_path)
			
 
				+
			
 
				+    def collect_search_words(self, data: List[Dict[str, Any]]) -> Dict[str, List[tuple]]:
			
 
				+        """
			
 
				+        收集所有需要搜索的关键词
			
 
				+
			
 
				+        Args:
			
 
				+            data: 输入数据列表
			
 
				+
			
 
				+        Returns:
			
 
				+            字典，key 为 search_word，value 为特征位置列表
			
 
				+            位置格式: (result_idx, assoc_idx, feature_idx)
			
 
				+        """
			
 
				+        search_word_map = {}  # search_word -> [(result_idx, assoc_idx, feature_idx), ...]
			
 
				+
			
 
				+        for result_idx, result in enumerate(data):
			
 
				+            for assoc_idx, assoc in enumerate(result.get('找到的关联', [])):
			
 
				+                for feature_idx, feature in enumerate(assoc.get('特征列表', [])):
			
 
				+                    self.stats['总特征数'] += 1
			
 
				+
			
 
				+                    search_word = feature.get('search_word')
			
 
				+                    if search_word and search_word.strip():
			
 
				+                        self.stats['有search_word的特征数'] += 1
			
 
				+
			
 
				+                        if search_word not in search_word_map:
			
 
				+                            search_word_map[search_word] = []
			
 
				+
			
 
				+                        search_word_map[search_word].append(
			
 
				+                            (result_idx, assoc_idx, feature_idx)
			
 
				+                        )
			
 
				+
			
 
				+        self.stats['唯一search_word数'] = len(search_word_map)
			
 
				+        return search_word_map
			
 
				+
			
 
				+    def execute_search(
			
 
				+        self,
			
 
				+        search_word: str,
			
 
				+        max_retries: int = 3
			
 
				+    ) -> Optional[Dict[str, Any]]:
			
 
				+        """
			
 
				+        执行单个搜索
			
 
				+
			
 
				+        Args:
			
 
				+            search_word: 搜索关键词
			
 
				+            max_retries: 最大重试次数
			
 
				+
			
 
				+        Returns:
			
 
				+            搜索结果字典，失败返回 None
			
 
				+        """
			
 
				+        try:
			
 
				+            logger.info(f"  搜索: {search_word}")
			
 
				+
			
 
				+            result = self.search_client.search(
			
 
				+                keyword=search_word,
			
 
				+                content_type=self.content_type,
			
 
				+                sort_type=self.sort_type,
			
 
				+                max_retries=max_retries
			
 
				+            )
			
 
				+
			
 
				+            # 提取帖子数量
			
 
				+            note_count = len(result.get('data', {}).get('data', []))
			
 
				+            logger.info(f"    ✓ 成功，获取 {note_count} 条帖子")
			
 
				+
			
 
				+            return result
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"    ✗ 失败: {e}")
			
 
				+            return None
			
 
				+
			
 
				+    def process_searches(
			
 
				+        self,
			
 
				+        data: List[Dict[str, Any]],
			
 
				+        search_word_map: Dict[str, List[tuple]],
			
 
				+        progress: Dict[str, Any]
			
 
				+    ):
			
 
				+        """
			
 
				+        执行所有搜索任务
			
 
				+
			
 
				+        Args:
			
 
				+            data: 输入数据（会被修改）
			
 
				+            search_word_map: 搜索词映射
			
 
				+            progress: 进度数据
			
 
				+        """
			
 
				+        completed_searches = progress['completed_searches']
			
 
				+        total_searches = len(search_word_map)
			
 
				+
			
 
				+        logger.info("=" * 60)
			
 
				+        logger.info("开始执行搜索任务")
			
 
				+        logger.info("=" * 60)
			
 
				+        logger.info(f"唯一搜索词数: {total_searches}")
			
 
				+        logger.info(f"已完成: {len(completed_searches)}")
			
 
				+        logger.info(f"待执行: {total_searches - len(completed_searches)}")
			
 
				+        logger.info("")
			
 
				+
			
 
				+        # 遍历所有唯一的搜索词
			
 
				+        for idx, (search_word, positions) in enumerate(search_word_map.items(), 1):
			
 
				+            logger.info(f"[{idx}/{total_searches}] 处理: {search_word}")
			
 
				+            logger.info(f"  影响 {len(positions)} 个特征节点")
			
 
				+
			
 
				+            # 检查是否已完成
			
 
				+            if search_word in completed_searches:
			
 
				+                logger.info(f"  ⊙ 已完成（使用缓存结果）")
			
 
				+                search_result = completed_searches[search_word]
			
 
				+                self.stats['跳过搜索数'] += 1
			
 
				+            else:
			
 
				+                # 执行搜索
			
 
				+                search_result = self.execute_search(search_word)
			
 
				+
			
 
				+                # 记录结果到进度文件
			
 
				+                completed_searches[search_word] = search_result
			
 
				+                self.stats['已完成搜索数'] += 1
			
 
				+
			
 
				+                if search_result:
			
 
				+                    self.stats['成功搜索数'] += 1
			
 
				+                else:
			
 
				+                    self.stats['失败搜索数'] += 1
			
 
				+
			
 
				+                # 保存进度
			
 
				+                self.save_progress(progress)
			
 
				+
			
 
				+                # 延迟，避免请求过快
			
 
				+                if idx < total_searches:  # 最后一次不需要延迟
			
 
				+                    time.sleep(self.search_delay)
			
 
				+
			
 
				+            # 将搜索结果写入到所有相关的特征节点
			
 
				+            self._write_results_to_features(
			
 
				+                data, positions, search_word, search_result
			
 
				+            )
			
 
				+
			
 
				+        logger.info("")
			
 
				+        logger.info("=" * 60)
			
 
				+        logger.info("搜索任务执行完成")
			
 
				+        logger.info("=" * 60)
			
 
				+
			
 
				+    def _write_results_to_features(
			
 
				+        self,
			
 
				+        data: List[Dict[str, Any]],
			
 
				+        positions: List[tuple],
			
 
				+        search_word: str,
			
 
				+        search_result: Optional[Dict[str, Any]]
			
 
				+    ):
			
 
				+        """
			
 
				+        将搜索结果写入到所有相关的特征节点
			
 
				+
			
 
				+        Args:
			
 
				+            data: 数据列表（会被修改）
			
 
				+            positions: 特征位置列表
			
 
				+            search_word: 搜索关键词
			
 
				+            search_result: 搜索结果
			
 
				+        """
			
 
				+        for result_idx, assoc_idx, feature_idx in positions:
			
 
				+            feature = data[result_idx]['找到的关联'][assoc_idx]['特征列表'][feature_idx]
			
 
				+
			
 
				+            # 添加搜索结果
			
 
				+            if search_result:
			
 
				+                # 深拷贝，确保每个特征有独立的数据
			
 
				+                feature['search_result'] = copy.deepcopy(search_result)
			
 
				+
			
 
				+                # 添加元数据
			
 
				+                note_count = len(search_result.get('data', {}).get('data', []))
			
 
				+                feature['search_metadata'] = {
			
 
				+                    'searched_at': datetime.now().isoformat(),
			
 
				+                    'status': 'success',
			
 
				+                    'note_count': note_count,
			
 
				+                    'search_params': {
			
 
				+                        'keyword': search_word,
			
 
				+                        'content_type': self.content_type,
			
 
				+                        'sort_type': self.sort_type
			
 
				+                    }
			
 
				+                }
			
 
				+            else:
			
 
				+                # 搜索失败
			
 
				+                feature['search_result'] = None
			
 
				+                feature['search_metadata'] = {
			
 
				+                    'searched_at': datetime.now().isoformat(),
			
 
				+                    'status': 'failed',
			
 
				+                    'note_count': 0,
			
 
				+                    'search_params': {
			
 
				+                        'keyword': search_word,
			
 
				+                        'content_type': self.content_type,
			
 
				+                        'sort_type': self.sort_type
			
 
				+                    }
			
 
				+                }
			
 
				+
			
 
				+    def execute(self):
			
 
				+        """执行完整流程"""
			
 
				+        logger.info("=" * 60)
			
 
				+        logger.info("搜索任务执行器启动")
			
 
				+        logger.info("=" * 60)
			
 
				+        logger.info(f"输入文件: {self.input_path}")
			
 
				+        logger.info(f"输出文件: {self.output_path}")
			
 
				+        logger.info(f"进度文件: {self.progress_path}")
			
 
				+        logger.info(f"搜索延迟: {self.search_delay} 秒")
			
 
				+        logger.info("")
			
 
				+
			
 
				+        # 1. 加载输入数据
			
 
				+        logger.info("步骤1: 加载输入数据")
			
 
				+        data = self.load_json(self.input_path)
			
 
				+        if not data:
			
 
				+            logger.error("输入数据为空，退出")
			
 
				+            return
			
 
				+
			
 
				+        # 2. 加载进度
			
 
				+        logger.info("步骤2: 加载进度文件")
			
 
				+        progress = self.load_progress()
			
 
				+
			
 
				+        # 3. 收集搜索词
			
 
				+        logger.info("步骤3: 收集搜索关键词")
			
 
				+        search_word_map = self.collect_search_words(data)
			
 
				+        logger.info(f"  总特征数: {self.stats['总特征数']}")
			
 
				+        logger.info(f"  有search_word的特征数: {self.stats['有search_word的特征数']}")
			
 
				+        logger.info(f"  唯一search_word数: {self.stats['唯一search_word数']}")
			
 
				+        logger.info("")
			
 
				+
			
 
				+        # 4. 执行搜索
			
 
				+        logger.info("步骤4: 执行搜索任务")
			
 
				+        self.process_searches(data, search_word_map, progress)
			
 
				+
			
 
				+        # 5. 保存结果
			
 
				+        logger.info("步骤5: 保存结果")
			
 
				+        self.save_json(data, self.output_path)
			
 
				+
			
 
				+        # 6. 输出统计
			
 
				+        logger.info("")
			
 
				+        logger.info("=" * 60)
			
 
				+        logger.info("执行统计")
			
 
				+        logger.info("=" * 60)
			
 
				+        for key, value in self.stats.items():
			
 
				+            logger.info(f"  {key}: {value}")
			
 
				+
			
 
				+        logger.info("")
			
 
				+        logger.info("✓ 执行完成")
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    """主函数"""
			
 
				+    parser = argparse.ArgumentParser(description='小红书搜索任务执行器')
			
 
				+    parser.add_argument(
			
 
				+        '--input',
			
 
				+        default='associated_tags_results_with_search.json',
			
 
				+        help='输入JSON文件路径（默认: associated_tags_results_with_search.json）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--output',
			
 
				+        default=None,
			
 
				+        help='输出JSON文件路径（默认: 输入文件名_with_search_data.json）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--progress',
			
 
				+        default='search_progress.json',
			
 
				+        help='进度文件路径（默认: search_progress.json）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--delay',
			
 
				+        type=float,
			
 
				+        default=2.0,
			
 
				+        help='每次搜索间隔时间（秒，默认: 2.0）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--content-type',
			
 
				+        default='图文',
			
 
				+        choices=['不限', '视频', '图文'],
			
 
				+        help='内容类型（默认: 图文）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--sort-type',
			
 
				+        default='综合',
			
 
				+        choices=['综合', '最新', '最多点赞', '最多评论'],
			
 
				+        help='排序方式（默认: 综合）'
			
 
				+    )
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    # 创建执行器
			
 
				+    executor = SearchTaskExecutor(
			
 
				+        input_path=args.input,
			
 
				+        output_path=args.output,
			
 
				+        progress_path=args.progress,
			
 
				+        search_delay=args.delay,
			
 
				+        content_type=args.content_type,
			
 
				+        sort_type=args.sort_type
			
 
				+    )
			
 
				+
			
 
				+    # 执行
			
 
				+    executor.execute()
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    main()
			
--- a/scripts/import_search_results.py
+++ b/scripts/import_search_results.py
@@ -0,0 +1,219 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+搜索结果导入工具
			
 
				+
			
 
				+将 search_progress.json 中已完成的搜索结果导入到
			
 
				+associated_tags_results_with_search.json 对应的特征节点中。
			
 
				+
			
 
				+匹配规则：根据特征的 search_word 字段匹配
			
 
				+"""
			
 
				+
			
 
				+import json
			
 
				+import copy
			
 
				+from datetime import datetime
			
 
				+from typing import Dict, Any
			
 
				+import argparse
			
 
				+
			
 
				+
			
 
				+def load_json(file_path: str) -> Any:
			
 
				+    """加载JSON文件"""
			
 
				+    try:
			
 
				+        with open(file_path, 'r', encoding='utf-8') as f:
			
 
				+            return json.load(f)
			
 
				+    except Exception as e:
			
 
				+        print(f"错误: 加载文件失败 {file_path}: {e}")
			
 
				+        raise
			
 
				+
			
 
				+
			
 
				+def save_json(data: Any, file_path: str):
			
 
				+    """保存JSON文件"""
			
 
				+    try:
			
 
				+        with open(file_path, 'w', encoding='utf-8') as f:
			
 
				+            json.dump(data, f, ensure_ascii=False, indent=2)
			
 
				+        print(f"✓ 已保存: {file_path}")
			
 
				+    except Exception as e:
			
 
				+        print(f"错误: 保存文件失败 {file_path}: {e}")
			
 
				+        raise
			
 
				+
			
 
				+
			
 
				+def import_search_results(
			
 
				+    source_data_path: str,
			
 
				+    progress_path: str,
			
 
				+    output_path: str,
			
 
				+    content_type: str = '图文',
			
 
				+    sort_type: str = '综合'
			
 
				+):
			
 
				+    """
			
 
				+    导入搜索结果
			
 
				+
			
 
				+    Args:
			
 
				+        source_data_path: 源数据文件路径（包含特征和search_word）
			
 
				+        progress_path: 进度文件路径（包含已完成的搜索结果）
			
 
				+        output_path: 输出文件路径
			
 
				+        content_type: 内容类型
			
 
				+        sort_type: 排序方式
			
 
				+    """
			
 
				+    print("=" * 60)
			
 
				+    print("搜索结果导入工具")
			
 
				+    print("=" * 60)
			
 
				+    print()
			
 
				+
			
 
				+    # 1. 加载源数据
			
 
				+    print(f"步骤1: 加载源数据文件")
			
 
				+    print(f"  {source_data_path}")
			
 
				+    source_data = load_json(source_data_path)
			
 
				+    print(f"  ✓ 已加载 {len(source_data)} 个结果项")
			
 
				+    print()
			
 
				+
			
 
				+    # 2. 加载进度文件
			
 
				+    print(f"步骤2: 加载搜索进度文件")
			
 
				+    print(f"  {progress_path}")
			
 
				+    progress = load_json(progress_path)
			
 
				+    completed_searches = progress.get('completed_searches', {})
			
 
				+    print(f"  ✓ 已加载 {len(completed_searches)} 个搜索结果")
			
 
				+    print()
			
 
				+
			
 
				+    # 3. 统计特征信息
			
 
				+    print("步骤3: 统计特征信息")
			
 
				+    total_features = 0
			
 
				+    features_with_search_word = 0
			
 
				+    unique_search_words = set()
			
 
				+
			
 
				+    for result in source_data:
			
 
				+        for assoc in result.get('找到的关联', []):
			
 
				+            for feature in assoc.get('特征列表', []):
			
 
				+                total_features += 1
			
 
				+                search_word = feature.get('search_word')
			
 
				+                if search_word:
			
 
				+                    features_with_search_word += 1
			
 
				+                    unique_search_words.add(search_word)
			
 
				+
			
 
				+    print(f"  总特征数: {total_features}")
			
 
				+    print(f"  有search_word的特征: {features_with_search_word}")
			
 
				+    print(f"  唯一search_word数: {len(unique_search_words)}")
			
 
				+    print()
			
 
				+
			
 
				+    # 4. 导入搜索结果
			
 
				+    print("步骤4: 导入搜索结果")
			
 
				+    matched_count = 0
			
 
				+    not_found_count = 0
			
 
				+    success_count = 0
			
 
				+    failed_count = 0
			
 
				+
			
 
				+    for result_idx, result in enumerate(source_data):
			
 
				+        for assoc_idx, assoc in enumerate(result.get('找到的关联', [])):
			
 
				+            for feature_idx, feature in enumerate(assoc.get('特征列表', [])):
			
 
				+                search_word = feature.get('search_word')
			
 
				+
			
 
				+                # 跳过空的 search_word
			
 
				+                if not search_word:
			
 
				+                    continue
			
 
				+
			
 
				+                # 查找对应的搜索结果
			
 
				+                if search_word in completed_searches:
			
 
				+                    matched_count += 1
			
 
				+                    search_result = completed_searches[search_word]
			
 
				+
			
 
				+                    # 深拷贝搜索结果，避免共享引用
			
 
				+                    feature['search_result'] = copy.deepcopy(search_result)
			
 
				+
			
 
				+                    # 添加元数据
			
 
				+                    if search_result and search_result.get('data'):
			
 
				+                        note_count = len(search_result.get('data', {}).get('data', []))
			
 
				+                        feature['search_metadata'] = {
			
 
				+                            'searched_at': datetime.now().isoformat(),
			
 
				+                            'status': 'success',
			
 
				+                            'note_count': note_count,
			
 
				+                            'search_params': {
			
 
				+                                'keyword': search_word,
			
 
				+                                'content_type': content_type,
			
 
				+                                'sort_type': sort_type
			
 
				+                            }
			
 
				+                        }
			
 
				+                        success_count += 1
			
 
				+                    else:
			
 
				+                        # 搜索结果为空或失败
			
 
				+                        feature['search_metadata'] = {
			
 
				+                            'searched_at': datetime.now().isoformat(),
			
 
				+                            'status': 'failed',
			
 
				+                            'note_count': 0,
			
 
				+                            'search_params': {
			
 
				+                                'keyword': search_word,
			
 
				+                                'content_type': content_type,
			
 
				+                                'sort_type': sort_type
			
 
				+                            }
			
 
				+                        }
			
 
				+                        failed_count += 1
			
 
				+                else:
			
 
				+                    not_found_count += 1
			
 
				+
			
 
				+    print(f"  匹配成功: {matched_count} 个特征")
			
 
				+    print(f"  搜索成功: {success_count} 个")
			
 
				+    print(f"  搜索失败: {failed_count} 个")
			
 
				+    print(f"  未找到搜索结果: {not_found_count} 个")
			
 
				+    print()
			
 
				+
			
 
				+    # 5. 保存结果
			
 
				+    print("步骤5: 保存输出文件")
			
 
				+    print(f"  {output_path}")
			
 
				+    save_json(source_data, output_path)
			
 
				+    print()
			
 
				+
			
 
				+    # 6. 输出统计信息
			
 
				+    print("=" * 60)
			
 
				+    print("导入完成")
			
 
				+    print("=" * 60)
			
 
				+    print()
			
 
				+    print(f"总特征数: {total_features}")
			
 
				+    print(f"有search_word的特征: {features_with_search_word}")
			
 
				+    print(f"已导入搜索结果: {matched_count} ({matched_count/features_with_search_word*100:.1f}%)")
			
 
				+    print(f"  - 成功: {success_count}")
			
 
				+    print(f"  - 失败: {failed_count}")
			
 
				+    print(f"待搜索: {not_found_count} ({not_found_count/features_with_search_word*100:.1f}%)")
			
 
				+    print()
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    """主函数"""
			
 
				+    parser = argparse.ArgumentParser(description='搜索结果导入工具')
			
 
				+    parser.add_argument(
			
 
				+        '--source',
			
 
				+        default='associated_tags_results_with_search.json',
			
 
				+        help='源数据文件路径（默认: associated_tags_results_with_search.json）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--progress',
			
 
				+        default='search_progress.json',
			
 
				+        help='进度文件路径（默认: search_progress.json）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--output',
			
 
				+        default='associated_tags_results_with_search_data.json',
			
 
				+        help='输出文件路径（默认: associated_tags_results_with_search_data.json）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--content-type',
			
 
				+        default='图文',
			
 
				+        help='内容类型（默认: 图文）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--sort-type',
			
 
				+        default='综合',
			
 
				+        help='排序方式（默认: 综合）'
			
 
				+    )
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    # 执行导入
			
 
				+    import_search_results(
			
 
				+        source_data_path=args.source,
			
 
				+        progress_path=args.progress,
			
 
				+        output_path=args.output,
			
 
				+        content_type=args.content_type,
			
 
				+        sort_type=args.sort_type
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    main()
			
--- a/scripts/run_deconstruction.py
+++ b/scripts/run_deconstruction.py
@@ -0,0 +1,445 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+Stage 6 独立运行脚本
			
 
				+从 Stage 5 结果开始，进行深度解构分析
			
 
				+支持指定 feature 和数量限制
			
 
				+"""
			
 
				+
			
 
				+import sys
			
 
				+import os
			
 
				+
			
 
				+# 将项目根目录添加到Python路径
			
 
				+project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
			
 
				+sys.path.insert(0, project_root)
			
 
				+
			
 
				+import json
			
 
				+import logging
			
 
				+import argparse
			
 
				+import webbrowser
			
 
				+from pathlib import Path
			
 
				+from src.analyzers.post_deconstruction_analyzer import PostDeconstructionAnalyzer
			
 
				+from src.analyzers.similarity_analyzer import SimilarityAnalyzer
			
 
				+import src.visualizers.deconstruction_visualizer as deconstruction_visualizer
			
 
				+
			
 
				+# 配置日志
			
 
				+logging.basicConfig(
			
 
				+    level=logging.INFO,
			
 
				+    format='%(asctime)s - %(levelname)s - %(message)s',
			
 
				+    datefmt='%Y-%m-%d %H:%M:%S',
			
 
				+    handlers=[
			
 
				+        logging.FileHandler('deconstruction_standalone.log', encoding='utf-8'),
			
 
				+        logging.StreamHandler()
			
 
				+    ]
			
 
				+)
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    """主函数"""
			
 
				+    parser = argparse.ArgumentParser(
			
 
				+        description='深度解构分析（独立运行，支持流水线执行）',
			
 
				+        formatter_class=argparse.RawDescriptionHelpFormatter,
			
 
				+        epilog='''
			
 
				+基础用法示例:
			
 
				+  # 只处理"墨镜"特征的前10个高分帖子
			
 
				+  python3 scripts/run_deconstruction.py --feature "墨镜" --max-notes 10
			
 
				+
			
 
				+  # 处理"墨镜"和"耳环"两个特征，每个最多5个
			
 
				+  python3 scripts/run_deconstruction.py --feature "墨镜" "耳环" --max-notes 5
			
 
				+
			
 
				+  # 按数据原始顺序处理前50个（不排序）
			
 
				+  python3 scripts/run_deconstruction.py --sort-by none --max-notes 50
			
 
				+
			
 
				+  # 处理所有特征，按时间排序，前20个
			
 
				+  python3 scripts/run_deconstruction.py --sort-by time --max-notes 20
			
 
				+
			
 
				+  # 只处理"墨镜"，按互动量排序，跳过前3个
			
 
				+  python3 scripts/run_deconstruction.py --feature "墨镜" --sort-by engagement --skip 3
			
 
				+
			
 
				+  # 降低分数阈值，处理更多帖子
			
 
				+  python3 scripts/run_deconstruction.py --feature "墨镜" --min-score 6.0 --max-notes 30
			
 
				+
			
 
				+流水线执行示例（推荐）:
			
 
				+  # 完整流水线: 深度解构 → 相似度分析 → 可视化 → 自动打开浏览器
			
 
				+  python3 scripts/run_deconstruction.py --feature "墨镜" --max-notes 10 --run-similarity --visualize
			
 
				+
			
 
				+  # 深度解构 → 相似度分析（不生成可视化）
			
 
				+  python3 scripts/run_deconstruction.py --feature "墨镜" --max-notes 10 --run-similarity
			
 
				+
			
 
				+  # 深度解构 → 可视化（跳过相似度分析）
			
 
				+  python3 scripts/run_deconstruction.py --feature "墨镜" --max-notes 10 --visualize
			
 
				+
			
 
				+  # 完整流水线，不自动打开浏览器
			
 
				+  python3 scripts/run_deconstruction.py --feature "墨镜" --run-similarity --visualize --no-open
			
 
				+
			
 
				+  # 自定义相似度分析权重
			
 
				+  python3 scripts/run_deconstruction.py --feature "墨镜" --run-similarity --visualize \\
			
 
				+    --similarity-weight-embedding 0.7 --similarity-weight-semantic 0.3
			
 
				+
			
 
				+  # 过滤低相似度特征
			
 
				+  python3 scripts/run_deconstruction.py --feature "墨镜" --run-similarity --visualize \\
			
 
				+    --similarity-min-similarity 0.3
			
 
				+
			
 
				+配置文件示例:
			
 
				+  # 使用配置文件（支持所有参数）
			
 
				+  python3 scripts/run_deconstruction.py --config pipeline_config.json
			
 
				+
			
 
				+  # 配置文件示例内容（pipeline_config.json）:
			
 
				+  {
			
 
				+    "feature": ["墨镜"],
			
 
				+    "max_notes": 10,
			
 
				+    "timeout": 600,
			
 
				+    "run_similarity": true,
			
 
				+    "visualize": true,
			
 
				+    "similarity_weight_embedding": 0.5,
			
 
				+    "similarity_weight_semantic": 0.5
			
 
				+  }
			
 
				+        '''
			
 
				+    )
			
 
				+
			
 
				+    # 输入输出配置
			
 
				+    parser.add_argument(
			
 
				+        '--input',
			
 
				+        default='output_v2/evaluated_results.json',
			
 
				+        help='评估结果文件路径（默认: output_v2/evaluated_results.json）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--output',
			
 
				+        default='output_v2/deep_analysis_results.json',
			
 
				+        help='深度分析输出文件路径（默认: output_v2/deep_analysis_results.json）'
			
 
				+    )
			
 
				+
			
 
				+    # Feature 过滤（新增）
			
 
				+    parser.add_argument(
			
 
				+        '--feature',
			
 
				+        nargs='+',
			
 
				+        default=None,
			
 
				+        help='指定要处理的原始特征名称（可指定多个），如: --feature "墨镜" "耳环"。不指定则处理所有特征'
			
 
				+    )
			
 
				+
			
 
				+    # 过滤参数
			
 
				+    parser.add_argument(
			
 
				+        '--min-score',
			
 
				+        type=float,
			
 
				+        default=0.8,
			
 
				+        help='最低分数阈值，只处理 >= 此分数的帖子（默认: 0.8）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--skip',
			
 
				+        type=int,
			
 
				+        default=0,
			
 
				+        help='跳过前 N 个帖子（默认: 0）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--max-notes',
			
 
				+        type=int,
			
 
				+        default=None,
			
 
				+        help='最多处理多少个帖子（默认: None 不限制）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--sort-by',
			
 
				+        choices=['none', 'score', 'time', 'engagement'],
			
 
				+        default='score',
			
 
				+        help='排序方式: none(不排序,保持数据原始顺序), score(评分), time(时间), engagement(互动量)（默认: score）'
			
 
				+    )
			
 
				+
			
 
				+    # API 配置
			
 
				+    parser.add_argument(
			
 
				+        '--api-url',
			
 
				+        default='http://192.168.245.150:7000/what/analysis/single',
			
 
				+        help='解构 API 地址（默认: http://192.168.245.150:7000/what/analysis/single）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--timeout',
			
 
				+        type=int,
			
 
				+        default=800,
			
 
				+        help='API 超时时间（秒）（默认: 600，即10分钟）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--max-retries',
			
 
				+        type=int,
			
 
				+        default=3,
			
 
				+        help='API 最大重试次数（默认: 3）'
			
 
				+    )
			
 
				+
			
 
				+    # 并发配置
			
 
				+    parser.add_argument(
			
 
				+        '--max-workers',
			
 
				+        type=int,
			
 
				+        default=5,
			
 
				+        help='并发处理数（默认: 5）'
			
 
				+    )
			
 
				+
			
 
				+    # 从配置文件加载
			
 
				+    parser.add_argument(
			
 
				+        '--config',
			
 
				+        default=None,
			
 
				+        help='从 JSON 配置文件加载参数'
			
 
				+    )
			
 
				+
			
 
				+    # 流水线控制参数
			
 
				+    parser.add_argument(
			
 
				+        '--run-similarity',
			
 
				+        action='store_true',
			
 
				+        help='深度解构完成后自动运行相似度分析'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--visualize',
			
 
				+        action='store_true',
			
 
				+        help='生成可视化结果'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--open-browser',
			
 
				+        action='store_true',
			
 
				+        default=True,
			
 
				+        help='自动在浏览器中打开可视化结果（默认: True）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--no-open',
			
 
				+        action='store_true',
			
 
				+        help='禁用自动打开浏览器'
			
 
				+    )
			
 
				+
			
 
				+    # Stage 7 输出配置
			
 
				+    parser.add_argument(
			
 
				+        '--similarity-output',
			
 
				+        default='output_v2/similarity_analysis_results.json',
			
 
				+        help='相似度分析输出文件路径（默认: output_v2/similarity_analysis_results.json）'
			
 
				+    )
			
 
				+
			
 
				+    # Stage 7 相似度配置
			
 
				+    parser.add_argument(
			
 
				+        '--similarity-weight-embedding',
			
 
				+        type=float,
			
 
				+        default=0.5,
			
 
				+        help='相似度分析向量模型权重（默认: 0.5）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--similarity-weight-semantic',
			
 
				+        type=float,
			
 
				+        default=0.5,
			
 
				+        help='相似度分析 LLM 模型权重（默认: 0.5）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--similarity-min-similarity',
			
 
				+        type=float,
			
 
				+        default=0.0,
			
 
				+        help='相似度分析最小相似度阈值（默认: 0.0）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--similarity-max-workers',
			
 
				+        type=int,
			
 
				+        default=5,
			
 
				+        help='相似度分析最大并发数（默认: 5）'
			
 
				+    )
			
 
				+
			
 
				+    # 可视化输出配置
			
 
				+    parser.add_argument(
			
 
				+        '--viz-output',
			
 
				+        default=None,
			
 
				+        help='可视化输出目录（默认: visualization/）'
			
 
				+    )
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    # 如果提供了配置文件，加载配置
			
 
				+    if args.config:
			
 
				+        logger.info(f"从配置文件加载参数: {args.config}")
			
 
				+        with open(args.config, 'r', encoding='utf-8') as f:
			
 
				+            config = json.load(f)
			
 
				+
			
 
				+        # 配置文件中的参数会覆盖命令行参数
			
 
				+        for key, value in config.items():
			
 
				+            setattr(args, key.replace('-', '_'), value)
			
 
				+
			
 
				+    # 检查输入文件是否存在
			
 
				+    if not os.path.exists(args.input):
			
 
				+        logger.error(f"输入文件不存在: {args.input}")
			
 
				+        return
			
 
				+
			
 
				+    # 加载 Stage 5 结果
			
 
				+    logger.info(f"加载评估结果: {args.input}")
			
 
				+    with open(args.input, 'r', encoding='utf-8') as f:
			
 
				+        evaluation_results = json.load(f)
			
 
				+
			
 
				+    # 打印配置
			
 
				+    logger.info("=" * 60)
			
 
				+    logger.info("运行配置:")
			
 
				+    logger.info(f"  输入文件: {args.input}")
			
 
				+    logger.info(f"  输出文件: {args.output}")
			
 
				+    if args.feature:
			
 
				+        logger.info(f"  指定特征: {', '.join(args.feature)}")
			
 
				+    else:
			
 
				+        logger.info(f"  指定特征: 全部")
			
 
				+    logger.info(f"  API 地址: {args.api_url}")
			
 
				+    logger.info(f"  最低分数阈值: {args.min_score}")
			
 
				+    logger.info(f"  跳过前 N 个: {args.skip}")
			
 
				+    logger.info(f"  最多处理数: {args.max_notes if args.max_notes else '不限制'}")
			
 
				+    logger.info(f"  排序方式: {args.sort_by}")
			
 
				+    logger.info(f"  并发数: {args.max_workers}")
			
 
				+    logger.info(f"  API 超时: {args.timeout}秒")
			
 
				+    logger.info(f"  最大重试: {args.max_retries}次")
			
 
				+    logger.info("=" * 60)
			
 
				+
			
 
				+    # 创建分析器
			
 
				+    analyzer = PostDeconstructionAnalyzer(
			
 
				+        api_url=args.api_url,
			
 
				+        max_workers=args.max_workers,
			
 
				+        max_notes=args.max_notes,
			
 
				+        min_score=args.min_score,
			
 
				+        skip_count=args.skip,
			
 
				+        sort_by=args.sort_by,
			
 
				+        timeout=args.timeout,
			
 
				+        max_retries=args.max_retries,
			
 
				+        output_dir=os.path.dirname(args.output) or 'output_v2',
			
 
				+        target_features=args.feature  # 传递 feature 过滤参数
			
 
				+    )
			
 
				+
			
 
				+    # 运行分析
			
 
				+    try:
			
 
				+        deep_results = analyzer.run(
			
 
				+            evaluation_results=evaluation_results,
			
 
				+            output_path=args.output
			
 
				+        )
			
 
				+
			
 
				+        # 打印结果摘要
			
 
				+        logger.info("\n" + "=" * 60)
			
 
				+        logger.info("深度解构分析完成!")
			
 
				+        logger.info(f"  总匹配帖子数: {deep_results['metadata']['total_matched_notes']}")
			
 
				+        logger.info(f"  实际处理数: {deep_results['metadata']['processed_notes']}")
			
 
				+        logger.info(f"  成功: {deep_results['metadata']['success_count']}")
			
 
				+        logger.info(f"  失败: {deep_results['metadata']['failed_count']}")
			
 
				+        logger.info(f"  总耗时: {deep_results['metadata']['processing_time_seconds']}秒")
			
 
				+        logger.info(f"  结果已保存: {args.output}")
			
 
				+        logger.info("=" * 60)
			
 
				+
			
 
				+        # Stage 7: 相似度分析
			
 
				+        similarity_results = None
			
 
				+        if args.run_similarity:
			
 
				+            logger.info("\n" + "=" * 60)
			
 
				+            logger.info("开始执行相似度分析...")
			
 
				+            logger.info("=" * 60)
			
 
				+
			
 
				+            try:
			
 
				+                # 创建 Stage 7 分析器
			
 
				+                similarity_analyzer = SimilarityAnalyzer(
			
 
				+                    weight_embedding=args.similarity_weight_embedding,
			
 
				+                    weight_semantic=args.similarity_weight_semantic,
			
 
				+                    max_workers=args.similarity_max_workers,
			
 
				+                    min_similarity=args.similarity_min_similarity,
			
 
				+                    target_features=args.feature
			
 
				+                )
			
 
				+
			
 
				+                # 运行 Stage 7 分析
			
 
				+                similarity_results = similarity_analyzer.run(
			
 
				+                    deconstruction_results=deep_results,
			
 
				+                    output_path=args.similarity_output
			
 
				+                )
			
 
				+
			
 
				+                # 打印 Stage 7 结果摘要
			
 
				+                logger.info("\n" + "=" * 60)
			
 
				+                logger.info("相似度分析完成!")
			
 
				+                metadata = similarity_results['metadata']
			
 
				+                overall_stats = metadata['overall_statistics']
			
 
				+
			
 
				+                logger.info(f"  处理帖子数: {overall_stats['total_notes']}")
			
 
				+                logger.info(f"  提取特征总数: {overall_stats['total_features_extracted']}")
			
 
				+                logger.info(f"  平均特征数/帖子: {overall_stats['avg_features_per_note']:.2f}")
			
 
				+                logger.info(f"  平均最高相似度: {overall_stats['avg_max_similarity']:.3f}")
			
 
				+                logger.info(f"  包含高相似度特征的帖子: {overall_stats['notes_with_high_similarity']}")
			
 
				+                logger.info(f"  总耗时: {metadata['processing_time_seconds']:.2f}秒")
			
 
				+                logger.info(f"  结果已保存: {args.similarity_output}")
			
 
				+                logger.info("=" * 60)
			
 
				+
			
 
				+                # 打印 Top 5 高相似度特征示例
			
 
				+                if similarity_results['results']:
			
 
				+                    logger.info("\nTop 5 高相似度特征示例:")
			
 
				+                    all_features = []
			
 
				+                    for result in similarity_results['results']:
			
 
				+                        for feat in result['deconstructed_features'][:5]:
			
 
				+                            all_features.append({
			
 
				+                                'note_id': result['note_id'],
			
 
				+                                'feature_name': feat['feature_name'],
			
 
				+                                'dimension': feat['dimension'],
			
 
				+                                'similarity': feat['similarity_score']
			
 
				+                            })
			
 
				+
			
 
				+                    # 按相似度排序，取 Top 5
			
 
				+                    all_features.sort(key=lambda x: x['similarity'], reverse=True)
			
 
				+                    for i, feat in enumerate(all_features[:5], 1):
			
 
				+                        logger.info(f"  {i}. [{feat['note_id'][:12]}...] "
			
 
				+                                   f"{feat['feature_name']} ({feat['dimension']}) "
			
 
				+                                   f"- 相似度: {feat['similarity']:.3f}")
			
 
				+
			
 
				+            except Exception as e:
			
 
				+                logger.error(f"相似度分析失败: {e}", exc_info=True)
			
 
				+                logger.warning("继续执行后续步骤...")
			
 
				+
			
 
				+        # 可视化生成
			
 
				+        viz_path = None
			
 
				+        if args.visualize:
			
 
				+            logger.info("\n" + "=" * 60)
			
 
				+            logger.info("开始生成可视化结果...")
			
 
				+            logger.info("=" * 60)
			
 
				+
			
 
				+            try:
			
 
				+                # 准备可视化所需的数据文件路径
			
 
				+                viz_args = [
			
 
				+                    '--evaluation-results', args.input,
			
 
				+                    '--deep-analysis-results', args.output
			
 
				+                ]
			
 
				+
			
 
				+                # 如果有 Stage 7 结果，添加到参数中
			
 
				+                if similarity_results and args.similarity_output:
			
 
				+                    viz_args.extend(['--similarity-results', args.similarity_output])
			
 
				+
			
 
				+                # 如果指定了可视化输出目录
			
 
				+                if args.viz_output:
			
 
				+                    viz_args.extend(['--output-dir', args.viz_output])
			
 
				+
			
 
				+                # 调用可视化模块
			
 
				+                import sys
			
 
				+                original_argv = sys.argv
			
 
				+                try:
			
 
				+                    sys.argv = ['deconstruction_visualizer.py'] + viz_args
			
 
				+                    viz_path = deconstruction_visualizer.main()
			
 
				+                finally:
			
 
				+                    sys.argv = original_argv
			
 
				+
			
 
				+                if viz_path:
			
 
				+                    logger.info("\n" + "=" * 60)
			
 
				+                    logger.info("可视化生成完成!")
			
 
				+                    logger.info(f"  可视化文件: {viz_path}")
			
 
				+                    logger.info("=" * 60)
			
 
				+
			
 
				+                    # 自动打开浏览器
			
 
				+                    if args.open_browser and not args.no_open:
			
 
				+                        logger.info("\n正在打开浏览器...")
			
 
				+                        try:
			
 
				+                            # 使用 Path.as_uri() 来正确处理包含中文和特殊字符的路径
			
 
				+                            file_url = Path(viz_path).resolve().as_uri()
			
 
				+                            webbrowser.open(file_url)
			
 
				+                            logger.info("浏览器已打开")
			
 
				+                        except Exception as e:
			
 
				+                            logger.warning(f"无法自动打开浏览器: {e}")
			
 
				+                            logger.info(f"请手动打开: {os.path.abspath(viz_path)}")
			
 
				+                else:
			
 
				+                    logger.warning("可视化生成返回了空路径")
			
 
				+
			
 
				+            except Exception as e:
			
 
				+                logger.error(f"可视化生成失败: {e}", exc_info=True)
			
 
				+                logger.warning("跳过可视化步骤")
			
 
				+
			
 
				+        # 流水线执行完成
			
 
				+        logger.info("\n" + "=" * 60)
			
 
				+        logger.info("流水线执行完成!")
			
 
				+        logger.info("=" * 60)
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"执行失败: {e}", exc_info=True)
			
 
				+        raise
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    main()
			
--- a/scripts/run_similarity_analysis.py
+++ b/scripts/run_similarity_analysis.py
@@ -0,0 +1,246 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""相似度分析独立运行脚本"""
			
 
				+
			
 
				+import sys
			
 
				+import os
			
 
				+
			
 
				+# 将项目根目录添加到Python路径
			
 
				+project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
			
 
				+sys.path.insert(0, project_root)
			
 
				+
			
 
				+import json
			
 
				+import logging
			
 
				+import argparse
			
 
				+from src.analyzers.similarity_analyzer import SimilarityAnalyzer
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    parser = argparse.ArgumentParser(
			
 
				+        description='解构特征相似度分析（独立运行）',
			
 
				+        formatter_class=argparse.RawDescriptionHelpFormatter,
			
 
				+        epilog="""
			
 
				+使用示例:
			
 
				+  # 基础用法 - 处理"墨镜"特征
			
 
				+  python3 scripts/run_similarity_analysis.py --feature "墨镜"
			
 
				+
			
 
				+  # 处理多个特征
			
 
				+  python3 scripts/run_similarity_analysis.py --feature "墨镜" "耳环"
			
 
				+
			
 
				+  # 自定义权重配置
			
 
				+  python3 scripts/run_similarity_analysis.py --feature "墨镜" --weight-embedding 0.7 --weight-semantic 0.3
			
 
				+
			
 
				+  # 过滤低相似度特征
			
 
				+  python3 scripts/run_similarity_analysis.py --feature "墨镜" --min-similarity 0.3
			
 
				+
			
 
				+  # 使用配置文件
			
 
				+  python3 scripts/run_similarity_analysis.py --config stage8_config.json
			
 
				+
			
 
				+  # 自定义输入输出路径
			
 
				+  python3 scripts/run_similarity_analysis.py --input output_v2/deep_analysis_custom.json --output output_v2/similarity_custom.json
			
 
				+        """
			
 
				+    )
			
 
				+
			
 
				+    # 输入输出
			
 
				+    parser.add_argument(
			
 
				+        '--input',
			
 
				+        default='output_v2/deep_analysis_results.json',
			
 
				+        help='解构分析结果文件路径（默认: output_v2/deep_analysis_results.json）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--output',
			
 
				+        default='output_v2/similarity_analysis_results.json',
			
 
				+        help='输出文件路径（默认: output_v2/similarity_analysis_results.json）'
			
 
				+    )
			
 
				+
			
 
				+    # 特征过滤
			
 
				+    parser.add_argument(
			
 
				+        '--feature',
			
 
				+        nargs='+',
			
 
				+        default=None,
			
 
				+        help='指定要处理的原始特征名称（可指定多个），如: --feature "墨镜" "耳环"'
			
 
				+    )
			
 
				+
			
 
				+    # 相似度配置
			
 
				+    parser.add_argument(
			
 
				+        '--weight-embedding',
			
 
				+        type=float,
			
 
				+        default=0.5,
			
 
				+        help='向量模型权重（默认: 0.5）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--weight-semantic',
			
 
				+        type=float,
			
 
				+        default=0.5,
			
 
				+        help='LLM 模型权重（默认: 0.5）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--min-similarity',
			
 
				+        type=float,
			
 
				+        default=0.0,
			
 
				+        help='最小相似度阈值，低于此值的特征会被过滤（默认: 0.0，保留所有）'
			
 
				+    )
			
 
				+
			
 
				+    # 并发配置
			
 
				+    parser.add_argument(
			
 
				+        '--max-workers',
			
 
				+        type=int,
			
 
				+        default=5,
			
 
				+        help='最大并发数（默认: 5）'
			
 
				+    )
			
 
				+
			
 
				+    # 综合得分P计算配置
			
 
				+    parser.add_argument(
			
 
				+        '--evaluation-path',
			
 
				+        default='output_v2/evaluated_results.json',
			
 
				+        help='评估结果数据文件路径，用于计算综合得分P（默认: output_v2/evaluated_results.json）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--no-update-evaluation',
			
 
				+        action='store_true',
			
 
				+        help='不计算和更新综合得分P（默认会计算）'
			
 
				+    )
			
 
				+
			
 
				+    # 配置文件
			
 
				+    parser.add_argument(
			
 
				+        '--config',
			
 
				+        help='从配置文件读取参数（JSON 格式）'
			
 
				+    )
			
 
				+
			
 
				+    # 日志级别
			
 
				+    parser.add_argument(
			
 
				+        '--log-level',
			
 
				+        default='INFO',
			
 
				+        choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'],
			
 
				+        help='日志级别（默认: INFO）'
			
 
				+    )
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    # 配置日志
			
 
				+    logging.basicConfig(
			
 
				+        level=getattr(logging, args.log_level),
			
 
				+        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
			
 
				+    )
			
 
				+    logger = logging.getLogger(__name__)
			
 
				+
			
 
				+    # 如果提供了配置文件，从文件读取参数
			
 
				+    if args.config:
			
 
				+        logger.info(f"从配置文件读取参数: {args.config}")
			
 
				+        try:
			
 
				+            with open(args.config, 'r', encoding='utf-8') as f:
			
 
				+                config = json.load(f)
			
 
				+
			
 
				+            # 配置文件中的参数会覆盖命令行默认值，但不会覆盖用户显式指定的命令行参数
			
 
				+            args.input = config.get('input', args.input)
			
 
				+            args.output = config.get('output', args.output)
			
 
				+            args.feature = config.get('feature', args.feature)
			
 
				+            args.weight_embedding = config.get('weight_embedding', args.weight_embedding)
			
 
				+            args.weight_semantic = config.get('weight_semantic', args.weight_semantic)
			
 
				+            args.min_similarity = config.get('min_similarity', args.min_similarity)
			
 
				+            args.max_workers = config.get('max_workers', args.max_workers)
			
 
				+            args.evaluation_path = config.get('evaluation_path', args.evaluation_path)
			
 
				+            if 'no_update_evaluation' in config:
			
 
				+                args.no_update_evaluation = config.get('no_update_evaluation', args.no_update_evaluation)
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"读取配置文件失败: {e}")
			
 
				+            return 1
			
 
				+
			
 
				+    # 验证输入文件
			
 
				+    if not os.path.exists(args.input):
			
 
				+        logger.error(f"输入文件不存在: {args.input}")
			
 
				+        return 1
			
 
				+
			
 
				+    # 读取解构分析结果
			
 
				+    logger.info(f"读取解构分析结果: {args.input}")
			
 
				+    try:
			
 
				+        with open(args.input, 'r', encoding='utf-8') as f:
			
 
				+            deconstruction_results = json.load(f)
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"读取解构分析结果失败: {e}")
			
 
				+        return 1
			
 
				+
			
 
				+    # 打印配置信息
			
 
				+    logger.info("\n" + "=" * 60)
			
 
				+    logger.info("相似度分析配置:")
			
 
				+    logger.info("=" * 60)
			
 
				+    logger.info(f"输入文件: {args.input}")
			
 
				+    logger.info(f"输出文件: {args.output}")
			
 
				+    if args.feature:
			
 
				+        logger.info(f"目标特征: {', '.join(args.feature)}")
			
 
				+    else:
			
 
				+        logger.info(f"目标特征: 全部")
			
 
				+    logger.info(f"向量模型权重: {args.weight_embedding}")
			
 
				+    logger.info(f"LLM 模型权重: {args.weight_semantic}")
			
 
				+    logger.info(f"最小相似度阈值: {args.min_similarity}")
			
 
				+    logger.info(f"最大并发数: {args.max_workers}")
			
 
				+    logger.info(f"评估结果文件路径: {args.evaluation_path}")
			
 
				+    logger.info(f"计算综合得分P: {'否' if args.no_update_evaluation else '是'}")
			
 
				+    logger.info("=" * 60 + "\n")
			
 
				+
			
 
				+    # 创建分析器
			
 
				+    try:
			
 
				+        analyzer = SimilarityAnalyzer(
			
 
				+            weight_embedding=args.weight_embedding,
			
 
				+            weight_semantic=args.weight_semantic,
			
 
				+            max_workers=args.max_workers,
			
 
				+            min_similarity=args.min_similarity,
			
 
				+            target_features=args.feature,
			
 
				+            evaluation_results_path=args.evaluation_path,
			
 
				+            update_evaluation_scores=not args.no_update_evaluation
			
 
				+        )
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"创建分析器失败: {e}")
			
 
				+        return 1
			
 
				+
			
 
				+    # 运行分析
			
 
				+    try:
			
 
				+        similarity_results = analyzer.run(deconstruction_results, output_path=args.output)
			
 
				+
			
 
				+        # 打印摘要
			
 
				+        logger.info("\n" + "=" * 60)
			
 
				+        logger.info("相似度分析完成")
			
 
				+        logger.info("=" * 60)
			
 
				+
			
 
				+        metadata = similarity_results['metadata']
			
 
				+        overall_stats = metadata['overall_statistics']
			
 
				+
			
 
				+        logger.info(f"处理帖子数: {overall_stats['total_notes']}")
			
 
				+        logger.info(f"提取特征总数: {overall_stats['total_features_extracted']}")
			
 
				+        logger.info(f"平均特征数/帖子: {overall_stats['avg_features_per_note']}")
			
 
				+        logger.info(f"平均最高相似度: {overall_stats['avg_max_similarity']}")
			
 
				+        logger.info(f"包含高相似度特征的帖子: {overall_stats['notes_with_high_similarity']}")
			
 
				+        logger.info(f"总耗时: {metadata['processing_time_seconds']}秒")
			
 
				+        logger.info(f"结果已保存: {args.output}")
			
 
				+        logger.info("=" * 60 + "\n")
			
 
				+
			
 
				+        # 打印 Top 5 高相似度特征示例
			
 
				+        if similarity_results['results']:
			
 
				+            logger.info("Top 5 高相似度特征示例:")
			
 
				+            all_features = []
			
 
				+            for result in similarity_results['results']:
			
 
				+                for feat in result['deconstructed_features'][:5]:  # 每个帖子取前5个
			
 
				+                    all_features.append({
			
 
				+                        'note_id': result['note_id'],
			
 
				+                        'feature_name': feat['feature_name'],
			
 
				+                        'dimension': feat['dimension'],
			
 
				+                        'similarity': feat['similarity_score']
			
 
				+                    })
			
 
				+
			
 
				+            # 按相似度排序，取 Top 5
			
 
				+            all_features.sort(key=lambda x: x['similarity'], reverse=True)
			
 
				+            for i, feat in enumerate(all_features[:5], 1):
			
 
				+                logger.info(f"  {i}. [{feat['note_id'][:12]}...] "
			
 
				+                           f"{feat['feature_name']} ({feat['dimension']}) "
			
 
				+                           f"- 相似度: {feat['similarity']:.3f}")
			
 
				+
			
 
				+        return 0
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"相似度分析失败: {e}", exc_info=True)
			
 
				+        return 1
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    exit(main())
			
--- a/scripts/run_visualizer.py
+++ b/scripts/run_visualizer.py
@@ -0,0 +1,76 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+分类树可视化执行脚本
			
 
				+"""
			
 
				+import sys
			
 
				+import os
			
 
				+from pathlib import Path
			
 
				+
			
 
				+# 将项目根目录添加到Python路径
			
 
				+project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
			
 
				+sys.path.insert(0, project_root)
			
 
				+
			
 
				+from src.visualizers.classification_tree_visualizer import visualize_classification_tree
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    # 设置路径
			
 
				+    base_dir = current_dir
			
 
				+
			
 
				+    # 必需的文件
			
 
				+    optimized_data_path = os.path.join(base_dir, "optimized_clustered_data_gemini-3-pro-preview.json")
			
 
				+    posts_dir = base_dir  # 当前目录作为帖子目录
			
 
				+
			
 
				+    # 可选的关联分析文件
			
 
				+    dimension_associations_path = os.path.join(base_dir, "dimension_associations_analysis.json")
			
 
				+    intra_dimension_associations_path = os.path.join(base_dir, "intra_dimension_associations_analysis.json")
			
 
				+    expanded_orthogonal_combinations_path = os.path.join(base_dir, "expanded_orthogonal_combinations.json")
			
 
				+    enriched_xuanti_point_map_path = os.path.join(base_dir, "enriched_xuanti_point_map.json")
			
 
				+
			
 
				+    # 检查必需文件是否存在
			
 
				+    if not os.path.exists(optimized_data_path):
			
 
				+        print(f"❌ 找不到优化数据文件: {optimized_data_path}")
			
 
				+        return
			
 
				+
			
 
				+    print(f"📂 优化数据文件: {optimized_data_path}")
			
 
				+    print(f"📂 帖子目录: {posts_dir}")
			
 
				+
			
 
				+    # 检查可选文件
			
 
				+    if os.path.exists(dimension_associations_path):
			
 
				+        print(f"✅ 找到跨维度关联分析数据")
			
 
				+    if os.path.exists(intra_dimension_associations_path):
			
 
				+        print(f"✅ 找到维度内部关联分析数据")
			
 
				+    if os.path.exists(expanded_orthogonal_combinations_path):
			
 
				+        print(f"✅ 找到扩展正交组合数据")
			
 
				+    if os.path.exists(enriched_xuanti_point_map_path):
			
 
				+        print(f"✅ 找到丰富选题点映射数据")
			
 
				+
			
 
				+    # xuanti_point_map 使用空字典（如果没有外部依赖）
			
 
				+    xuanti_point_map = {}
			
 
				+
			
 
				+    print("\n🚀 开始生成可视化...")
			
 
				+
			
 
				+    try:
			
 
				+        output_path = visualize_classification_tree(
			
 
				+            optimized_data_path=optimized_data_path,
			
 
				+            posts_dir=posts_dir,
			
 
				+            xuanti_point_map=xuanti_point_map,
			
 
				+            dimension_associations_path=dimension_associations_path if os.path.exists(dimension_associations_path) else None,
			
 
				+            intra_dimension_associations_path=intra_dimension_associations_path if os.path.exists(intra_dimension_associations_path) else None,
			
 
				+            expanded_orthogonal_combinations_path=expanded_orthogonal_combinations_path if os.path.exists(expanded_orthogonal_combinations_path) else None,
			
 
				+            enriched_xuanti_point_map_path=enriched_xuanti_point_map_path if os.path.exists(enriched_xuanti_point_map_path) else None
			
 
				+        )
			
 
				+
			
 
				+        print(f"\n🎉 可视化完成！")
			
 
				+        print(f"📄 输出文件: {output_path}")
			
 
				+        print(f"\n💡 请在浏览器中打开: file://{output_path}")
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        print(f"\n❌ 生成可视化失败: {e}")
			
 
				+        import traceback
			
 
				+        traceback.print_exc()
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/scripts/visualize_cascade.py
+++ b/scripts/visualize_cascade.py
@@ -0,0 +1,187 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+生成级联树形可视化
			
 
				+使用全新的cascade_visualizer生成四层级联展示
			
 
				+"""
			
 
				+
			
 
				+import sys
			
 
				+import json
			
 
				+from pathlib import Path
			
 
				+from datetime import datetime
			
 
				+
			
 
				+# 添加项目根目录到路径
			
 
				+project_root = Path(__file__).parent.parent
			
 
				+sys.path.insert(0, str(project_root))
			
 
				+
			
 
				+from src.visualizers.cascade_visualizer import CascadeVisualizer
			
 
				+
			
 
				+
			
 
				+def load_json(file_path: Path) -> dict:
			
 
				+    """加载JSON文件"""
			
 
				+    with open(file_path, 'r', encoding='utf-8') as f:
			
 
				+        return json.load(f)
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    """主函数"""
			
 
				+    print("🎯 级联树形可视化生成器")
			
 
				+    print("=" * 60)
			
 
				+
			
 
				+    # 定义路径
			
 
				+    output_dir = project_root / "output_v2"
			
 
				+    visualization_dir = project_root / "visualization"
			
 
				+
			
 
				+    # 加载评估数据
			
 
				+    evaluated_file = output_dir / "evaluated_results.json"
			
 
				+    print(f"📖 加载评估数据: {evaluated_file}")
			
 
				+
			
 
				+    if not evaluated_file.exists():
			
 
				+        print(f"❌ 文件不存在: {evaluated_file}")
			
 
				+        return
			
 
				+
			
 
				+    evaluated_data = load_json(evaluated_file)
			
 
				+    print(f"✓ 加载了 {len(evaluated_data)} 个原始特征")
			
 
				+
			
 
				+    # 加载解构数据
			
 
				+    deep_analysis_file = output_dir / "deep_analysis_results.json"
			
 
				+    print(f"📖 加载解构数据: {deep_analysis_file}")
			
 
				+
			
 
				+    if not deep_analysis_file.exists():
			
 
				+        print(f"❌ 文件不存在: {deep_analysis_file}")
			
 
				+        return
			
 
				+
			
 
				+    deep_analysis_full = load_json(deep_analysis_file)
			
 
				+    deep_analysis_data = deep_analysis_full.get('results', [])
			
 
				+    print(f"✓ 加载了 {len(deep_analysis_data)} 个解构结果")
			
 
				+
			
 
				+    # 加载Stage8数据
			
 
				+    similarity_file = output_dir / "similarity_analysis_results.json"
			
 
				+    print(f"📖 加载Stage8数据: {similarity_file}")
			
 
				+
			
 
				+    similarity_data = {}
			
 
				+    if similarity_file.exists():
			
 
				+        similarity_full = load_json(similarity_file)
			
 
				+        similarity_data = similarity_full.get('results', {})
			
 
				+        print(f"✓ 加载了 {len(similarity_data)} 个相似度评分")
			
 
				+    else:
			
 
				+        print("⚠️ Stage8数据文件不存在，将使用默认值")
			
 
				+
			
 
				+    # 计算统计数据
			
 
				+    print("\n📊 计算统计数据...")
			
 
				+    stats = calculate_stats(evaluated_data)
			
 
				+    print("✓ 统计完成:")
			
 
				+    print(f"  - 原始特征: {stats['原始特征数']}")
			
 
				+    print(f"  - 搜索词总数: {stats['搜索词总数']}")
			
 
				+    print(f"  - 帖子总数: {stats['帖子总数']}")
			
 
				+    print(f"  - 完全匹配: {stats['完全匹配']} ({stats['完全匹配率']})")
			
 
				+
			
 
				+    # 提取所有特征信息
			
 
				+    print("\n📊 提取所有特征信息...")
			
 
				+    all_features = extract_all_features(evaluated_data, deep_analysis_data, similarity_data)
			
 
				+    print(f"✓ 提取了 {len(all_features)} 个特征")
			
 
				+
			
 
				+    # 统计分类
			
 
				+    high_similarity = sum(1 for f in all_features if f.get('相似度得分', 0) >= 0.8)
			
 
				+    partial_match = sum(1 for f in all_features if 0.5 <= f.get('相似度得分', 0) < 0.8)
			
 
				+    low_similarity = sum(1 for f in all_features if f.get('相似度得分', 0) < 0.5)
			
 
				+
			
 
				+    print(f"  - 高相似度特征（≥0.8）: {high_similarity} 个")
			
 
				+    print(f"  - 部分匹配特征（0.5-0.8）: {partial_match} 个")
			
 
				+    print(f"  - 低相似度特征（<0.5）: {low_similarity} 个")
			
 
				+
			
 
				+    # 生成可视化
			
 
				+    print("\n🎨 生成级联可视化页面...")
			
 
				+    visualizer = CascadeVisualizer()
			
 
				+
			
 
				+    # 生成输出文件名
			
 
				+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
			
 
				+    output_file = visualization_dir / f"cascade_results_{timestamp}.html"
			
 
				+
			
 
				+    # 生成HTML
			
 
				+    result_file = visualizer.generate_html(all_features, stats, str(output_file))
			
 
				+
			
 
				+    print(f"✓ 生成完成: {result_file}")
			
 
				+    print(f"\n🌐 在浏览器中打开查看:")
			
 
				+    print(f"   file://{result_file}")
			
 
				+
			
 
				+
			
 
				+def calculate_stats(evaluated_data: list) -> dict:
			
 
				+    """计算统计数据"""
			
 
				+    stats = {
			
 
				+        '原始特征数': len(evaluated_data),
			
 
				+        '搜索词总数': 0,
			
 
				+        '帖子总数': 0,
			
 
				+        '完全匹配': 0,
			
 
				+        '相似匹配': 0,
			
 
				+        '弱相似': 0,
			
 
				+        '无匹配': 0,
			
 
				+        '已过滤': 0
			
 
				+    }
			
 
				+
			
 
				+    total_notes = 0
			
 
				+    complete_notes = 0
			
 
				+
			
 
				+    for item in evaluated_data:
			
 
				+        groups = item.get('组合评估结果_分组', [])
			
 
				+        for group in groups:
			
 
				+            searches = group.get('top10_searches', [])
			
 
				+            stats['搜索词总数'] += len(searches)
			
 
				+
			
 
				+            for search in searches:
			
 
				+                eval_data = search.get('evaluation_with_filter', {})
			
 
				+                search_stats = eval_data.get('statistics', {})
			
 
				+
			
 
				+                stats['完全匹配'] += search_stats.get('完全匹配(0.8-1.0)', 0)
			
 
				+                stats['相似匹配'] += search_stats.get('相似匹配(0.6-0.79)', 0)
			
 
				+                stats['弱相似'] += search_stats.get('弱相似(0.5-0.59)', 0)
			
 
				+                stats['无匹配'] += search_stats.get('无匹配(≤0.4)', 0)
			
 
				+                stats['已过滤'] += eval_data.get('filtered_count', 0)
			
 
				+
			
 
				+                # 统计帖子总数
			
 
				+                notes = search.get('search_result', {}).get('data', {}).get('data', [])
			
 
				+                total_notes += len(notes)
			
 
				+
			
 
				+                # 统计完全匹配的帖子
			
 
				+                notes_with_scores = eval_data.get('notes_with_scores', [])
			
 
				+                for note_eval in notes_with_scores:
			
 
				+                    match_level = note_eval.get('match_level', '')
			
 
				+                    if '完全匹配' in match_level:
			
 
				+                        complete_notes += 1
			
 
				+
			
 
				+    stats['帖子总数'] = total_notes
			
 
				+    stats['完全匹配率'] = f"{(complete_notes / total_notes * 100):.1f}%" if total_notes > 0 else "0%"
			
 
				+
			
 
				+    return stats
			
 
				+
			
 
				+
			
 
				+def extract_all_features(evaluated_data: list, deep_analysis_data: list, similarity_data: list) -> list:
			
 
				+    """
			
 
				+    提取所有特征信息，整合评估数据、解构数据和相似度数据
			
 
				+    """
			
 
				+    all_features = []
			
 
				+
			
 
				+    # 遍历评估数据
			
 
				+    for eval_item in evaluated_data:
			
 
				+        post_target_word = eval_item.get('帖子目标词', '')
			
 
				+        persona_feature = eval_item.get('人设特征名称', '')
			
 
				+
			
 
				+        # 简化处理：直接从eval_item中获取相似度得分
			
 
				+        # 如果没有，默认为0.5（部分匹配）
			
 
				+        similarity_score = eval_item.get('相似度得分', 0.5)
			
 
				+
			
 
				+        # 整合数据
			
 
				+        feature = {
			
 
				+            '帖子目标词': post_target_word,
			
 
				+            '人设特征名称': persona_feature,
			
 
				+            '相似度得分': similarity_score,
			
 
				+            '组合评估结果_分组': eval_item.get('组合评估结果_分组', [])
			
 
				+        }
			
 
				+
			
 
				+        all_features.append(feature)
			
 
				+
			
 
				+    return all_features
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/scripts/visualize_integrated_results.py
+++ b/scripts/visualize_integrated_results.py
@@ -0,0 +1,37 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+整合可视化工具
			
 
				+调用已适配新数据结构的可视化器，整合评估结果、深度解构和相似度分析
			
 
				+"""
			
 
				+
			
 
				+import sys
			
 
				+import os
			
 
				+
			
 
				+# 将项目根目录添加到Python路径
			
 
				+project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
			
 
				+sys.path.insert(0, project_root)
			
 
				+
			
 
				+# 导入可视化器主函数
			
 
				+from src.visualizers.deconstruction_visualizer import main as visualize_main
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    """
			
 
				+    包装脚本：调用已适配新数据结构的可视化器
			
 
				+
			
 
				+    读取三个数据文件:
			
 
				+    1. output_v2/evaluated_results.json - 评估结果
			
 
				+    2. output_v2/deep_analysis_results.json - 深度解构结果
			
 
				+    3. output_v2/similarity_analysis_results.json - 相似度分析结果
			
 
				+
			
 
				+    生成整合的HTML可视化报告到 visualization/ 目录
			
 
				+    """
			
 
				+    try:
			
 
				+        output_path = visualize_main()
			
 
				+        exit(0)
			
 
				+    except Exception as e:
			
 
				+        print(f"\n❌ 可视化生成失败: {e}")
			
 
				+        import traceback
			
 
				+        traceback.print_exc()
			
 
				+        exit(1)
			
--- a/src/__init__.py
+++ b/src/__init__.py
--- a/src/analyzers/__init__.py
+++ b/src/analyzers/__init__.py
--- a/src/analyzers/post_deconstruction_analyzer.py
+++ b/src/analyzers/post_deconstruction_analyzer.py
@@ -0,0 +1,603 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+帖子解构分析器
			
 
				+对评估结果中完全匹配的帖子进行深度解构分析
			
 
				+"""
			
 
				+
			
 
				+import os
			
 
				+import json
			
 
				+import time
			
 
				+import logging
			
 
				+from datetime import datetime
			
 
				+from concurrent.futures import ThreadPoolExecutor, as_completed
			
 
				+from typing import Dict, List, Any, Optional
			
 
				+
			
 
				+from src.clients.deconstruction_api_client import DeconstructionAPIClient, map_note_to_api_format
			
 
				+
			
 
				+try:
			
 
				+    from tqdm import tqdm
			
 
				+    TQDM_AVAILABLE = True
			
 
				+except ImportError:
			
 
				+    TQDM_AVAILABLE = False
			
 
				+    logger.warning("tqdm 未安装，将使用简单进度显示。安装命令: pip install tqdm")
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+
			
 
				+class PostDeconstructionAnalyzer:
			
 
				+    """完全匹配帖子的深度解构分析"""
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        api_url: str = "http://192.168.245.150:7000/what/analysis/single",
			
 
				+        max_workers: int = 5,
			
 
				+        max_notes: Optional[int] = None,
			
 
				+        min_score: float = 8.0,
			
 
				+        skip_count: int = 0,
			
 
				+        sort_by: str = 'score',
			
 
				+        timeout: int = 800,
			
 
				+        max_retries: int = 3,
			
 
				+        output_dir: str = "output_v2",
			
 
				+        enable_image_download: bool = True,
			
 
				+        image_server_url: str = "http://localhost:8765",
			
 
				+        image_download_dir: str = "downloaded_images",
			
 
				+        target_features: Optional[List[str]] = None
			
 
				+    ):
			
 
				+        """
			
 
				+        初始化帖子解构分析器
			
 
				+
			
 
				+        Args:
			
 
				+            api_url: API 地址
			
 
				+            max_workers: 并发数
			
 
				+            max_notes: 最多处理多少个帖子（None = 不限制）
			
 
				+            min_score: 最低分数阈值（只处理 >= 此分数的帖子）
			
 
				+            skip_count: 跳过前 N 个
			
 
				+            sort_by: 排序方式 ('none' | 'score' | 'time' | 'engagement')
			
 
				+                    - 'none': 不排序，保持Stage6数据原始顺序
			
 
				+                    - 'score': 按评分降序
			
 
				+                    - 'time': 按时间降序
			
 
				+                    - 'engagement': 按互动量降序
			
 
				+            timeout: API 超时时间
			
 
				+            max_retries: API 最大重试次数
			
 
				+            output_dir: 输出目录
			
 
				+            enable_image_download: 是否启用图片下载（下载小红书图片并转换为本地URL）
			
 
				+            image_server_url: 图片服务器URL
			
 
				+            image_download_dir: 图片下载目录
			
 
				+            target_features: 指定要处理的原始特征列表（None = 处理所有特征）
			
 
				+        """
			
 
				+        self.max_workers = max_workers
			
 
				+        self.max_notes = max_notes
			
 
				+        self.min_score = min_score
			
 
				+        self.skip_count = skip_count
			
 
				+        self.sort_by = sort_by
			
 
				+        self.output_dir = output_dir
			
 
				+        self.enable_image_download = enable_image_download
			
 
				+        self.target_features = target_features  # 新增：目标特征过滤
			
 
				+
			
 
				+        # 初始化 API 客户端
			
 
				+        self.api_client = DeconstructionAPIClient(
			
 
				+            api_url=api_url,
			
 
				+            timeout=timeout,
			
 
				+            max_retries=max_retries
			
 
				+        )
			
 
				+
			
 
				+        # 图片下载功能已弃用，直接使用原始图片URL
			
 
				+        # 保留参数以向后兼容，但不再使用
			
 
				+        if self.enable_image_download:
			
 
				+            logger.warning("  注意: enable_image_download 参数已弃用，将直接使用原始图片URL")
			
 
				+
			
 
				+    def extract_matched_notes_from_evaluation(
			
 
				+        self,
			
 
				+        evaluation_results: List[Dict]
			
 
				+    ) -> List[Dict]:
			
 
				+        """
			
 
				+        从 评估结果中提取所有完全匹配的帖子
			
 
				+
			
 
				+        Args:
			
 
				+            evaluation_results: 评估结果（列表）
			
 
				+
			
 
				+        Returns:
			
 
				+            完全匹配的帖子列表
			
 
				+        """
			
 
				+        matched_notes = []
			
 
				+
			
 
				+        # 评估结果是一个列表，每个元素是一个 feature_group
			
 
				+        for feature_group in evaluation_results:
			
 
				+            original_feature = feature_group.get('原始特征名称', '')
			
 
				+
			
 
				+            # 如果指定了 target_features，只处理指定的特征
			
 
				+            if self.target_features and original_feature not in self.target_features:
			
 
				+                continue
			
 
				+
			
 
				+            # 遍历 组合评估结果_分组（这一层包含了 top10_searches）
			
 
				+            for combo_group in feature_group.get('组合评估结果_分组', []):
			
 
				+                # top10_searches 包含所有搜索结果
			
 
				+                for search_item in combo_group.get('top10_searches', []):
			
 
				+                    search_word = search_item.get('search_word', '')
			
 
				+                    source_word = search_item.get('source_word', '')
			
 
				+                    evaluation = search_item.get('evaluation_with_filter', {})
			
 
				+
			
 
				+                    # 检查是否有搜索结果
			
 
				+                    if 'search_result' not in search_item:
			
 
				+                        continue
			
 
				+
			
 
				+                    notes = search_item['search_result'].get('data', {}).get('data', [])
			
 
				+
			
 
				+                    # 遍历评估结果
			
 
				+                    for note_eval in evaluation.get('notes_evaluation', []):
			
 
				+                        score = note_eval.get('综合得分', 0)
			
 
				+
			
 
				+                        # 只处理完全匹配的（分数 >= min_score）
			
 
				+                        if score >= self.min_score:
			
 
				+                            note_index = note_eval.get('note_index', -1)
			
 
				+                            if 0 <= note_index < len(notes):
			
 
				+                                note = notes[note_index]
			
 
				+
			
 
				+                                matched_notes.append({
			
 
				+                                    'note': note,
			
 
				+                                    'note_card': note.get('note_card', {}),
			
 
				+                                    'evaluation': note_eval,
			
 
				+                                    'search_word': search_word,
			
 
				+                                    'source_word': source_word,
			
 
				+                                    'original_feature': original_feature,
			
 
				+                                    'top3_persona_features': feature_group.get('top3匹配信息', [])
			
 
				+                                })
			
 
				+
			
 
				+        return matched_notes
			
 
				+
			
 
				+    def sort_matched_notes(
			
 
				+        self,
			
 
				+        matched_notes: List[Dict]
			
 
				+    ) -> List[Dict]:
			
 
				+        """
			
 
				+        对完全匹配的帖子进行排序
			
 
				+
			
 
				+        Args:
			
 
				+            matched_notes: 匹配的帖子列表
			
 
				+
			
 
				+        Returns:
			
 
				+            排序后的帖子列表
			
 
				+        """
			
 
				+        if self.sort_by == 'none':
			
 
				+            # 不排序，保持数据原始顺序
			
 
				+            return matched_notes
			
 
				+
			
 
				+        elif self.sort_by == 'score':
			
 
				+            # 按评分降序（优先处理高分帖子）
			
 
				+            return sorted(
			
 
				+                matched_notes,
			
 
				+                key=lambda x: x['evaluation'].get('综合得分', 0),
			
 
				+                reverse=True
			
 
				+            )
			
 
				+
			
 
				+        elif self.sort_by == 'time':
			
 
				+            # 按时间降序（优先处理最新帖子）
			
 
				+            return sorted(
			
 
				+                matched_notes,
			
 
				+                key=lambda x: x['note_card'].get('publish_timestamp', 0),
			
 
				+                reverse=True
			
 
				+            )
			
 
				+
			
 
				+        elif self.sort_by == 'engagement':
			
 
				+            # 按互动量降序（点赞+收藏+评论）
			
 
				+            def calc_engagement(note_data):
			
 
				+                interact = note_data['note_card'].get('interact_info', {})
			
 
				+                return (
			
 
				+                    interact.get('liked_count', 0) +
			
 
				+                    interact.get('collected_count', 0) +
			
 
				+                    interact.get('comment_count', 0)
			
 
				+                )
			
 
				+
			
 
				+            return sorted(
			
 
				+                matched_notes,
			
 
				+                key=calc_engagement,
			
 
				+                reverse=True
			
 
				+            )
			
 
				+
			
 
				+        return matched_notes
			
 
				+
			
 
				+    def _save_intermediate_results(
			
 
				+        self,
			
 
				+        results: List[Dict],
			
 
				+        output_path: str,
			
 
				+        processed_count: int,
			
 
				+        total_count: int,
			
 
				+        start_time: float
			
 
				+    ):
			
 
				+        """
			
 
				+        保存中间结果
			
 
				+
			
 
				+        Args:
			
 
				+            results: 当前结果列表
			
 
				+            output_path: 输出路径
			
 
				+            processed_count: 已处理数量
			
 
				+            total_count: 总数量
			
 
				+            start_time: 开始时间
			
 
				+        """
			
 
				+        # 构建中间结果文件路径
			
 
				+        base_dir = os.path.dirname(output_path) or 'output_v2'
			
 
				+        base_name = os.path.basename(output_path)
			
 
				+        name_without_ext = os.path.splitext(base_name)[0]
			
 
				+
			
 
				+        intermediate_path = os.path.join(
			
 
				+            base_dir,
			
 
				+            f"{name_without_ext}_partial_{processed_count}of{total_count}.json"
			
 
				+        )
			
 
				+
			
 
				+        # 统计成功失败数
			
 
				+        success_count = sum(1 for r in results if r['api_response']['status'] == 'success')
			
 
				+        failed_count = len(results) - success_count
			
 
				+
			
 
				+        # 构建中间结果
			
 
				+        intermediate_result = {
			
 
				+            'metadata': {
			
 
				+                'stage': 'deconstruction_partial',
			
 
				+                'description': f'部分结果（{processed_count}/{total_count}）',
			
 
				+                'processed_notes': len(results),
			
 
				+                'success_count': success_count,
			
 
				+                'failed_count': failed_count,
			
 
				+                'saved_at': datetime.now().isoformat(),
			
 
				+                'processing_time_seconds': round(time.time() - start_time, 2)
			
 
				+            },
			
 
				+            'results': results
			
 
				+        }
			
 
				+
			
 
				+        # 保存
			
 
				+        os.makedirs(base_dir, exist_ok=True)
			
 
				+        with open(intermediate_path, 'w', encoding='utf-8') as f:
			
 
				+            json.dump(intermediate_result, f, ensure_ascii=False, indent=2)
			
 
				+
			
 
				+        logger.info(f"    已保存中间结果: {intermediate_path} ({processed_count}/{total_count})")
			
 
				+
			
 
				+    def process_single_note(
			
 
				+        self,
			
 
				+        matched_note_data: Dict,
			
 
				+        index: int,
			
 
				+        total: int
			
 
				+    ) -> Dict:
			
 
				+        """
			
 
				+        处理单个帖子的解构分析
			
 
				+
			
 
				+        Args:
			
 
				+            matched_note_data: 匹配的帖子数据
			
 
				+            index: 当前索引（用于日志）
			
 
				+            total: 总数（用于日志）
			
 
				+
			
 
				+        Returns:
			
 
				+            处理结果
			
 
				+        """
			
 
				+        note = matched_note_data['note']
			
 
				+        note_card = matched_note_data['note_card']
			
 
				+        evaluation = matched_note_data['evaluation']
			
 
				+        search_word = matched_note_data['search_word']
			
 
				+        original_feature = matched_note_data['original_feature']
			
 
				+
			
 
				+        note_id = note.get('id', '')
			
 
				+        note_title = note_card.get('display_title', '')[:30]  # 前30个字符
			
 
				+
			
 
				+        logger.info(f"[{index}/{total}] 解构分析: {note_id}")
			
 
				+        logger.info(f"  标题: {note_title}...")
			
 
				+        logger.info(f"  搜索词: {search_word}")
			
 
				+        logger.info(f"  原始特征: {original_feature}")
			
 
				+
			
 
				+        # 获取关键匹配点（用于保存到结果中）
			
 
				+        key_points = evaluation.get('关键匹配点', [])
			
 
				+
			
 
				+        # 获取 top3 人设特征
			
 
				+        top3_features = matched_note_data.get('top3_persona_features', [])
			
 
				+
			
 
				+        # 构建 start_points - 只使用 top3 的第一个人设特征名称
			
 
				+        start_points = []
			
 
				+        if top3_features:
			
 
				+            first_feature = top3_features[0].get('人设特征名称', '')
			
 
				+            if first_feature:
			
 
				+                start_points = [first_feature]
			
 
				+
			
 
				+        logger.info(f"  start_points: {start_points}")
			
 
				+        if top3_features:
			
 
				+            logger.info(f"  top3人设特征: {[f.get('人设特征名称', '') for f in top3_features[:3]]}")
			
 
				+
			
 
				+        # 直接使用原始图片URL，不做任何处理
			
 
				+        original_images = note_card.get('image_list', [])
			
 
				+        if original_images:
			
 
				+            logger.info(f"  图片数量: {len(original_images)}")
			
 
				+
			
 
				+        # 映射数据为 API 格式（直接使用原始图片URL）
			
 
				+        api_payload = map_note_to_api_format(
			
 
				+            note=note,
			
 
				+            note_card=note_card,
			
 
				+            evaluation=evaluation,
			
 
				+            search_word=search_word,
			
 
				+            original_feature=original_feature,
			
 
				+            start_points=start_points,
			
 
				+            processed_image_urls=None  # 不传递处理后的URL，使用原始URL
			
 
				+        )
			
 
				+
			
 
				+        # 调用 API
			
 
				+        start_time = time.time()
			
 
				+        api_response = self.api_client.call_api(api_payload)
			
 
				+        processing_time = (time.time() - start_time) * 1000  # 毫秒
			
 
				+
			
 
				+        # 构建结果
			
 
				+        result = {
			
 
				+            'note_id': note_id,
			
 
				+            'search_word': search_word,
			
 
				+            'original_feature': original_feature,
			
 
				+            'source_word': matched_note_data['source_word'],
			
 
				+            'evaluation_score': evaluation.get('综合得分', 0),
			
 
				+            'evaluation_type': evaluation.get('匹配类型', ''),
			
 
				+            'evaluation_confidence': evaluation.get('置信度', ''),
			
 
				+            'key_matching_points': key_points,
			
 
				+            'note_data': {
			
 
				+                'title': note_card.get('display_title', ''),
			
 
				+                'author': note_card.get('user', {}).get('nick_name', ''),
			
 
				+                'link': f"https://www.xiaohongshu.com/explore/{note_id}"
			
 
				+            },
			
 
				+            'api_request': api_payload,
			
 
				+            'api_response': api_response,
			
 
				+            'processed_at': datetime.now().isoformat(),
			
 
				+            'processing_time_ms': round(processing_time, 2)
			
 
				+        }
			
 
				+
			
 
				+        if api_response['status'] == 'success':
			
 
				+            logger.info(f"  ✓ 成功 ({processing_time:.0f}ms)")
			
 
				+        else:
			
 
				+            logger.error(f"  ✗ 失败: {api_response['error']}")
			
 
				+
			
 
				+        return result
			
 
				+
			
 
				+    def run(
			
 
				+        self,
			
 
				+        evaluation_results: Dict,
			
 
				+        output_path: Optional[str] = None
			
 
				+    ) -> Dict:
			
 
				+        """
			
 
				+        执行 深度解构分析
			
 
				+
			
 
				+        Args:
			
 
				+            evaluation_results: 评估结果
			
 
				+            output_path: 输出路径（可选）
			
 
				+
			
 
				+        Returns:
			
 
				+            解构分析 结果
			
 
				+        """
			
 
				+        logger.info("\n" + "=" * 60)
			
 
				+        logger.info("解构分析: 完全匹配帖子的深度解构分析")
			
 
				+        logger.info("=" * 60)
			
 
				+
			
 
				+        # 打印配置参数
			
 
				+        logger.info("配置参数:")
			
 
				+        logger.info(f"  API 地址: {self.api_client.api_url}")
			
 
				+        if self.target_features:
			
 
				+            logger.info(f"  目标特征: {', '.join(self.target_features)}")
			
 
				+        else:
			
 
				+            logger.info(f"  目标特征: 全部")
			
 
				+        logger.info(f"  最低分数阈值: {self.min_score}")
			
 
				+        logger.info(f"  并发数: {self.max_workers}")
			
 
				+        logger.info(f"  最多处理帖子数: {self.max_notes if self.max_notes else '不限制'}")
			
 
				+        logger.info(f"  跳过前 N 个: {self.skip_count}")
			
 
				+        logger.info(f"  排序方式: {self.sort_by}")
			
 
				+        logger.info(f"  API 超时: {self.api_client.timeout}秒")
			
 
				+        logger.info(f"  最大重试次数: {self.api_client.max_retries}")
			
 
				+
			
 
				+        # 默认输出路径
			
 
				+        if output_path is None:
			
 
				+            output_path = os.path.join(self.output_dir, "deep_analysis_results.json")
			
 
				+
			
 
				+        # 1. 提取完全匹配的帖子
			
 
				+        matched_notes = self.extract_matched_notes_from_evaluation(evaluation_results)
			
 
				+        total_matched = len(matched_notes)
			
 
				+
			
 
				+        logger.info(f"  完全匹配帖子总数: {total_matched} (分数 >= {self.min_score})")
			
 
				+
			
 
				+        if total_matched == 0:
			
 
				+            logger.warning("  没有找到完全匹配的帖子")
			
 
				+            return {
			
 
				+                'metadata': {
			
 
				+                    'stage': 'deconstruction',
			
 
				+                    'total_matched_notes': 0,
			
 
				+                    'processed_notes': 0
			
 
				+                },
			
 
				+                'results': []
			
 
				+            }
			
 
				+
			
 
				+        # 2. 排序
			
 
				+        matched_notes = self.sort_matched_notes(matched_notes)
			
 
				+        logger.info(f"  排序方式: {self.sort_by}")
			
 
				+
			
 
				+        # 3. 跳过前 N 个
			
 
				+        if self.skip_count > 0:
			
 
				+            logger.info(f"  跳过前 {self.skip_count} 个")
			
 
				+            matched_notes = matched_notes[self.skip_count:]
			
 
				+
			
 
				+        # 4. 限制数量
			
 
				+        if self.max_notes is not None and len(matched_notes) > self.max_notes:
			
 
				+            logger.info(f"  数量限制: {self.max_notes}")
			
 
				+            matched_notes = matched_notes[:self.max_notes]
			
 
				+
			
 
				+        to_process = len(matched_notes)
			
 
				+        logger.info(f"  实际处理: {to_process} 个")
			
 
				+        logger.info(f"  并发数: {self.max_workers}")
			
 
				+        logger.info(f"  API: {self.api_client.api_url}")
			
 
				+
			
 
				+        if to_process == 0:
			
 
				+            logger.warning("  没有需要处理的帖子")
			
 
				+            return {
			
 
				+                'metadata': {
			
 
				+                    'stage': 'deconstruction',
			
 
				+                    'total_matched_notes': total_matched,
			
 
				+                    'processed_notes': 0,
			
 
				+                    'skipped_notes': self.skip_count
			
 
				+                },
			
 
				+                'results': []
			
 
				+            }
			
 
				+
			
 
				+        # 5. 并行处理
			
 
				+        results = []
			
 
				+        start_time = time.time()
			
 
				+        save_interval = 10  # 每处理10个帖子保存一次
			
 
				+
			
 
				+        with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
			
 
				+            futures = []
			
 
				+            for idx, note_data in enumerate(matched_notes, start=1):
			
 
				+                future = executor.submit(
			
 
				+                    self.process_single_note,
			
 
				+                    note_data,
			
 
				+                    idx,
			
 
				+                    to_process
			
 
				+                )
			
 
				+                futures.append(future)
			
 
				+
			
 
				+            # 收集结果（带进度显示）
			
 
				+            if TQDM_AVAILABLE:
			
 
				+                # 使用 tqdm 进度条
			
 
				+                logger.info("  使用进度条显示...")
			
 
				+                iterator = tqdm(
			
 
				+                    as_completed(futures),
			
 
				+                    total=len(futures),
			
 
				+                    desc="  处理进度",
			
 
				+                    unit="帖子",
			
 
				+                    ncols=100
			
 
				+                )
			
 
				+            else:
			
 
				+                # 简单进度显示
			
 
				+                iterator = as_completed(futures)
			
 
				+
			
 
				+            processed_count = 0
			
 
				+            for future in iterator:
			
 
				+                try:
			
 
				+                    result = future.result()
			
 
				+                    results.append(result)
			
 
				+                    processed_count += 1
			
 
				+
			
 
				+                    # 增量保存（每处理一定数量保存一次）
			
 
				+                    if processed_count % save_interval == 0:
			
 
				+                        self._save_intermediate_results(
			
 
				+                            results,
			
 
				+                            output_path,
			
 
				+                            processed_count,
			
 
				+                            to_process,
			
 
				+                            start_time
			
 
				+                        )
			
 
				+
			
 
				+                    # 简单进度显示（如果没有 tqdm）
			
 
				+                    if not TQDM_AVAILABLE and processed_count % 5 == 0:
			
 
				+                        logger.info(f"  进度: {processed_count}/{to_process}")
			
 
				+
			
 
				+                except Exception as e:
			
 
				+                    logger.error(f"  处理失败: {e}")
			
 
				+
			
 
				+        processing_time = time.time() - start_time
			
 
				+
			
 
				+        # 6. 统计
			
 
				+        success_count = sum(1 for r in results if r['api_response']['status'] == 'success')
			
 
				+        failed_count = len(results) - success_count
			
 
				+
			
 
				+        logger.info(f"\n  总耗时: {processing_time:.1f}s")
			
 
				+        logger.info(f"  成功: {success_count}")
			
 
				+        logger.info(f"  失败: {failed_count}")
			
 
				+
			
 
				+        # 6.5. 加载已有结果（如果存在）并合并
			
 
				+        existing_results = []
			
 
				+        if os.path.exists(output_path):
			
 
				+            logger.info(f"\n  检测到已有结果文件，准备合并...")
			
 
				+            try:
			
 
				+                with open(output_path, 'r', encoding='utf-8') as f:
			
 
				+                    existing_data = json.load(f)
			
 
				+                    existing_results = existing_data.get('results', [])
			
 
				+                    logger.info(f"  已有结果数: {len(existing_results)}")
			
 
				+            except Exception as e:
			
 
				+                logger.warning(f"  加载已有结果失败: {e}，将覆盖写入")
			
 
				+                existing_results = []
			
 
				+
			
 
				+        # 6.6. 合并新旧结果（基于 note_id 去重）
			
 
				+        if existing_results:
			
 
				+            # 建立已有结果的 note_id 索引
			
 
				+            existing_note_ids = {r['note_id']: r for r in existing_results}
			
 
				+
			
 
				+            # 统计更新数量
			
 
				+            updated_count = 0
			
 
				+            for new_result in results:
			
 
				+                if new_result['note_id'] in existing_note_ids:
			
 
				+                    updated_count += 1
			
 
				+                # 用新结果更新已有结果（新结果优先）
			
 
				+                existing_note_ids[new_result['note_id']] = new_result
			
 
				+
			
 
				+            # 合并后的完整结果
			
 
				+            merged_results = list(existing_note_ids.values())
			
 
				+
			
 
				+            logger.info(f"  合并后总结果数: {len(merged_results)}")
			
 
				+            logger.info(f"  本次新增: {len(results) - updated_count} 条")
			
 
				+            logger.info(f"  本次更新: {updated_count} 条")
			
 
				+        else:
			
 
				+            merged_results = results
			
 
				+            logger.info(f"  无已有结果，直接保存")
			
 
				+
			
 
				+        # 7. 构建最终结果
			
 
				+        final_result = {
			
 
				+            'metadata': {
			
 
				+                'stage': 'deconstruction',
			
 
				+                'description': '完全匹配帖子的深度解构分析',
			
 
				+                'target_features': self.target_features if self.target_features else '全部',
			
 
				+                'total_matched_notes': total_matched,
			
 
				+                'processed_notes': len(results),
			
 
				+                'total_results_count': len(merged_results),
			
 
				+                'new_results_count': len(results),
			
 
				+                'skipped_notes': self.skip_count,
			
 
				+                'max_notes_limit': self.max_notes,
			
 
				+                'sort_by': self.sort_by,
			
 
				+                'success_count': success_count,
			
 
				+                'failed_count': failed_count,
			
 
				+                'api_url': self.api_client.api_url,
			
 
				+                'min_score_threshold': self.min_score,
			
 
				+                'created_at': datetime.now().isoformat(),
			
 
				+                'processing_time_seconds': round(processing_time, 2)
			
 
				+            },
			
 
				+            'results': merged_results
			
 
				+        }
			
 
				+
			
 
				+        # 8. 保存结果
			
 
				+        os.makedirs(os.path.dirname(output_path), exist_ok=True)
			
 
				+        with open(output_path, 'w', encoding='utf-8') as f:
			
 
				+            json.dump(final_result, f, ensure_ascii=False, indent=2)
			
 
				+
			
 
				+        logger.info(f"  结果已保存: {output_path}")
			
 
				+
			
 
				+        return final_result
			
 
				+
			
 
				+
			
 
				+def test_post_deconstruction_analyzer():
			
 
				+    """测试帖子解构分析器"""
			
 
				+    # 读取 评估结果
			
 
				+    evaluation_path = "output_v2/evaluated_results.json"
			
 
				+
			
 
				+    if not os.path.exists(evaluation_path):
			
 
				+        print(f"评估结果不存在: {evaluation_path}")
			
 
				+        return
			
 
				+
			
 
				+    with open(evaluation_path, 'r', encoding='utf-8') as f:
			
 
				+        evaluation_results = json.load(f)
			
 
				+
			
 
				+    # 创建分析器
			
 
				+    analyzer = PostDeconstructionAnalyzer(
			
 
				+        max_workers=3,
			
 
				+        max_notes=5,  # 只测试 5 个
			
 
				+        skip_count=0,
			
 
				+        sort_by='score'
			
 
				+    )
			
 
				+
			
 
				+    # 运行分析
			
 
				+    deconstruction_results = analyzer.run(evaluation_results)
			
 
				+
			
 
				+    print(f"\n处理了 {deconstruction_results['metadata']['processed_notes']} 个帖子")
			
 
				+    print(f"成功: {deconstruction_results['metadata']['success_count']}")
			
 
				+    print(f"失败: {deconstruction_results['metadata']['failed_count']}")
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    logging.basicConfig(
			
 
				+        level=logging.INFO,
			
 
				+        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
			
 
				+    )
			
 
				+    test_post_deconstruction_analyzer()
			
--- a/src/analyzers/similarity_analyzer.py
+++ b/src/analyzers/similarity_analyzer.py
@@ -0,0 +1,757 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+解构特征相似度分析器
			
 
				+计算解构特征与原始特征的相似度评分
			
 
				+"""
			
 
				+
			
 
				+import os
			
 
				+import json
			
 
				+import time
			
 
				+import logging
			
 
				+import asyncio
			
 
				+from datetime import datetime
			
 
				+from typing import Dict, List, Any, Optional
			
 
				+from lib.hybrid_similarity import compare_phrases_cartesian
			
 
				+from lib.config import get_cache_dir
			
 
				+
			
 
				+try:
			
 
				+    from tqdm import tqdm
			
 
				+    TQDM_AVAILABLE = True
			
 
				+except ImportError:
			
 
				+    TQDM_AVAILABLE = False
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+
			
 
				+def extract_deconstructed_features(api_response: Dict) -> List[Dict]:
			
 
				+    """
			
 
				+    从三点解构中提取所有特征
			
 
				+
			
 
				+    Args:
			
 
				+        api_response: 解构分析 的 api_response 对象
			
 
				+
			
 
				+    Returns:
			
 
				+        特征列表，每个特征包含:
			
 
				+        - feature_name: 特征名称
			
 
				+        - dimension: 维度 (灵感点-全新内容/灵感点-共性差异/灵感点-共性内容/目的点/关键点)
			
 
				+        - dimension_detail: 维度细分 (实质/形式/意图等)
			
 
				+        - weight: 权重
			
 
				+        - source_index: 在该维度中的索引
			
 
				+        - source_*: 溯源信息 (候选编号、目的点描述、关键点描述等)
			
 
				+    """
			
 
				+    features = []
			
 
				+
			
 
				+    # 检查 API 响应状态
			
 
				+    if api_response.get('status') != 'success':
			
 
				+        logger.warning("  API 响应状态不是 success，无法提取特征")
			
 
				+        return features
			
 
				+
			
 
				+    result = api_response.get('result', {})
			
 
				+
			
 
				+    # 检查是否有 data 字段
			
 
				+    if 'data' not in result:
			
 
				+        logger.warning("  API 响应中没有 data 字段")
			
 
				+        return features
			
 
				+
			
 
				+    data = result['data']
			
 
				+    three_point = data.get('三点解构', {})
			
 
				+
			
 
				+    if not three_point:
			
 
				+        logger.warning("  三点解构数据为空")
			
 
				+        return features
			
 
				+
			
 
				+    # 1. 提取灵感点 (3个子类别)
			
 
				+    inspiration = three_point.get('灵感点', {})
			
 
				+    for category in ['全新内容', '共性差异', '共性内容']:
			
 
				+        items = inspiration.get(category, [])
			
 
				+        for idx, item in enumerate(items):
			
 
				+            extracted_features = item.get('提取的特征', [])
			
 
				+            for feat in extracted_features:
			
 
				+                feature_name = feat.get('特征名称', '')
			
 
				+                if not feature_name:
			
 
				+                    continue
			
 
				+
			
 
				+                features.append({
			
 
				+                    'feature_name': feature_name,
			
 
				+                    'dimension': f'灵感点-{category}',
			
 
				+                    'dimension_detail': feat.get('维度分类', ''),  # 注意字段名
			
 
				+                    'weight': feat.get('权重', 0),
			
 
				+                    'source_index': idx,
			
 
				+                    'source_candidate_number': item.get('候选编号', 0),
			
 
				+                    'source_inspiration': item.get('灵感点', '')
			
 
				+                })
			
 
				+
			
 
				+    # 2. 提取目的点
			
 
				+    purpose = three_point.get('目的点', {})
			
 
				+    purposes_list = purpose.get('purposes', [])
			
 
				+    for idx, item in enumerate(purposes_list):
			
 
				+        extracted_features = item.get('提取的特征', [])
			
 
				+        for feat in extracted_features:
			
 
				+            feature_name = feat.get('特征名称', '')
			
 
				+            if not feature_name:
			
 
				+                continue
			
 
				+
			
 
				+            features.append({
			
 
				+                'feature_name': feature_name,
			
 
				+                'dimension': '目的点',
			
 
				+                'dimension_detail': feat.get('特征分类', ''),  # 注意字段名
			
 
				+                'weight': feat.get('权重', 0),
			
 
				+                'source_index': idx,
			
 
				+                'source_purpose': item.get('目的点', ''),
			
 
				+                'source_purpose_dimension': item.get('维度', {})
			
 
				+            })
			
 
				+
			
 
				+    # 3. 提取关键点
			
 
				+    key_points_data = three_point.get('关键点', {})
			
 
				+    key_points_list = key_points_data.get('key_points', [])
			
 
				+    for idx, item in enumerate(key_points_list):
			
 
				+        extracted_features = item.get('提取的特征', [])
			
 
				+        for feat in extracted_features:
			
 
				+            feature_name = feat.get('特征名称', '')
			
 
				+            if not feature_name:
			
 
				+                continue
			
 
				+
			
 
				+            features.append({
			
 
				+                'feature_name': feature_name,
			
 
				+                'dimension': '关键点',
			
 
				+                'dimension_detail': feat.get('维度', ''),  # 注意字段名
			
 
				+                'weight': feat.get('权重', 0),
			
 
				+                'source_index': idx,
			
 
				+                'source_candidate_number': item.get('候选编号', 0),
			
 
				+                'source_key_point': item.get('关键点', ''),
			
 
				+                'source_key_point_dimension': item.get('维度', '')
			
 
				+            })
			
 
				+
			
 
				+    logger.info(f"  提取特征数量: {len(features)}")
			
 
				+    if features:
			
 
				+        # 统计各维度数量
			
 
				+        dimension_counts = {}
			
 
				+        for feat in features:
			
 
				+            dim = feat['dimension']
			
 
				+            dimension_counts[dim] = dimension_counts.get(dim, 0) + 1
			
 
				+        logger.info(f"  维度分布: {dimension_counts}")
			
 
				+
			
 
				+    return features
			
 
				+
			
 
				+
			
 
				+async def calculate_similarity_for_note(
			
 
				+    note_result: Dict,
			
 
				+    original_feature: str,
			
 
				+    weight_embedding: float = 0.5,
			
 
				+    weight_semantic: float = 0.5,
			
 
				+    min_similarity: float = 0.0
			
 
				+) -> Dict:
			
 
				+    """
			
 
				+    计算单个帖子的所有特征与原始特征的相似度
			
 
				+
			
 
				+    Args:
			
 
				+        note_result: 解构分析 的单个 result 对象
			
 
				+        original_feature: 原始特征名称
			
 
				+        weight_embedding: 向量模型权重
			
 
				+        weight_semantic: LLM 模型权重
			
 
				+        min_similarity: 最小相似度阈值，低于此值的特征会被过滤
			
 
				+
			
 
				+    Returns:
			
 
				+        包含相似度信息的结果对象
			
 
				+    """
			
 
				+    note_id = note_result.get('note_id', '')
			
 
				+
			
 
				+    logger.info(f"  [{note_id}] 开始计算相似度...")
			
 
				+
			
 
				+    # 1. 提取解构特征
			
 
				+    deconstructed_features = extract_deconstructed_features(
			
 
				+        note_result['api_response']
			
 
				+    )
			
 
				+
			
 
				+    if not deconstructed_features:
			
 
				+        logger.warning(f"  [{note_id}] 没有提取到特征")
			
 
				+        return {
			
 
				+            'note_id': note_id,
			
 
				+            'original_feature': original_feature,
			
 
				+            'evaluation_score': note_result.get('evaluation_score', 0),
			
 
				+            'search_word': note_result.get('search_word', ''),
			
 
				+            'note_data': note_result.get('note_data', {}),
			
 
				+            'deconstructed_features': [],
			
 
				+            'similarity_statistics': {
			
 
				+                'total_features': 0,
			
 
				+                'max_similarity': 0,
			
 
				+                'min_similarity': 0,
			
 
				+                'avg_similarity': 0,
			
 
				+                'high_similarity_count': 0,
			
 
				+                'medium_similarity_count': 0,
			
 
				+                'low_similarity_count': 0
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+    # 2. 构建特征名称列表
			
 
				+    feature_names = [f['feature_name'] for f in deconstructed_features]
			
 
				+
			
 
				+    logger.info(f"  [{note_id}] 调用相似度计算 API (1×{len(feature_names)} 笛卡尔积)...")
			
 
				+
			
 
				+    # 3. 批量计算相似度 (1×N 笛卡尔积)
			
 
				+    try:
			
 
				+        start_time = time.time()
			
 
				+        similarity_results = await compare_phrases_cartesian(
			
 
				+            phrases_a=[original_feature],
			
 
				+            phrases_b=feature_names,
			
 
				+            max_concurrent=50
			
 
				+        )
			
 
				+        elapsed = time.time() - start_time
			
 
				+        logger.info(f"  [{note_id}] 相似度计算完成 ({elapsed:.1f}秒)")
			
 
				+
			
 
				+        # 4. 映射结果回特征对象
			
 
				+        for i, feat in enumerate(deconstructed_features):
			
 
				+            feat['similarity_score'] = similarity_results[0][i]['相似度']
			
 
				+            feat['similarity_explanation'] = similarity_results[0][i]['说明']
			
 
				+
			
 
				+        # 5. 过滤低相似度特征
			
 
				+        if min_similarity > 0:
			
 
				+            original_count = len(deconstructed_features)
			
 
				+            deconstructed_features = [
			
 
				+                f for f in deconstructed_features
			
 
				+                if f['similarity_score'] >= min_similarity
			
 
				+            ]
			
 
				+            filtered_count = original_count - len(deconstructed_features)
			
 
				+            if filtered_count > 0:
			
 
				+                logger.info(f"  [{note_id}] 过滤掉 {filtered_count} 个低相似度特征 (< {min_similarity})")
			
 
				+
			
 
				+        # 6. 计算统计信息
			
 
				+        if deconstructed_features:
			
 
				+            scores = [f['similarity_score'] for f in deconstructed_features]
			
 
				+            statistics = {
			
 
				+                'total_features': len(scores),
			
 
				+                'max_similarity': round(max(scores), 3),
			
 
				+                'min_similarity': round(min(scores), 3),
			
 
				+                'avg_similarity': round(sum(scores) / len(scores), 3),
			
 
				+                'high_similarity_count': sum(1 for s in scores if s >= 0.7),
			
 
				+                'medium_similarity_count': sum(1 for s in scores if 0.5 <= s < 0.7),
			
 
				+                'low_similarity_count': sum(1 for s in scores if s < 0.5)
			
 
				+            }
			
 
				+
			
 
				+            # 7. 按相似度降序排序
			
 
				+            deconstructed_features.sort(key=lambda x: x['similarity_score'], reverse=True)
			
 
				+
			
 
				+            logger.info(f"  [{note_id}] 统计: 最高={statistics['max_similarity']}, "
			
 
				+                       f"平均={statistics['avg_similarity']}, "
			
 
				+                       f"高相似度={statistics['high_similarity_count']}个")
			
 
				+        else:
			
 
				+            statistics = {
			
 
				+                'total_features': 0,
			
 
				+                'max_similarity': 0,
			
 
				+                'min_similarity': 0,
			
 
				+                'avg_similarity': 0,
			
 
				+                'high_similarity_count': 0,
			
 
				+                'medium_similarity_count': 0,
			
 
				+                'low_similarity_count': 0
			
 
				+            }
			
 
				+
			
 
				+        return {
			
 
				+            'note_id': note_id,
			
 
				+            'original_feature': original_feature,
			
 
				+            'evaluation_score': note_result.get('evaluation_score', 0),
			
 
				+            'search_word': note_result.get('search_word', ''),
			
 
				+            'note_data': note_result.get('note_data', {}),
			
 
				+            'deconstructed_features': deconstructed_features,
			
 
				+            'similarity_statistics': statistics,
			
 
				+            'processing_time_seconds': round(elapsed, 2)
			
 
				+        }
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"  [{note_id}] 相似度计算失败: {e}")
			
 
				+        return {
			
 
				+            'note_id': note_id,
			
 
				+            'original_feature': original_feature,
			
 
				+            'evaluation_score': note_result.get('evaluation_score', 0),
			
 
				+            'search_word': note_result.get('search_word', ''),
			
 
				+            'note_data': note_result.get('note_data', {}),
			
 
				+            'deconstructed_features': [],
			
 
				+            'similarity_statistics': {
			
 
				+                'total_features': 0,
			
 
				+                'error': str(e)
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+
			
 
				+class SimilarityAnalyzer:
			
 
				+    """相似度分析: 解构特征与原始特征的相似度分析"""
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        weight_embedding: float = 0.5,
			
 
				+        weight_semantic: float = 0.5,
			
 
				+        max_workers: int = 5,
			
 
				+        min_similarity: float = 0.0,
			
 
				+        output_dir: str = "output_v2",
			
 
				+        target_features: Optional[List[str]] = None,
			
 
				+        evaluation_results_path: str = 'output_v2/evaluated_results.json',
			
 
				+        update_evaluation_scores: bool = True
			
 
				+    ):
			
 
				+        """
			
 
				+        初始化 相似度分析 分析器
			
 
				+
			
 
				+        Args:
			
 
				+            weight_embedding: 向量模型权重（默认 0.5）
			
 
				+            weight_semantic: LLM 模型权重（默认 0.5）
			
 
				+            max_workers: 最大并发数（默认 5）
			
 
				+            min_similarity: 最小相似度阈值（默认 0.0，保留所有特征）
			
 
				+            output_dir: 输出目录
			
 
				+            target_features: 指定要处理的原始特征列表（None = 处理所有特征）
			
 
				+            evaluation_results_path: 评估结果 数据文件路径（用于计算综合得分）
			
 
				+            update_evaluation_scores: 是否计算并更新 评估结果 的综合得分（默认 True）
			
 
				+        """
			
 
				+        self.weight_embedding = weight_embedding
			
 
				+        self.weight_semantic = weight_semantic
			
 
				+        self.max_workers = max_workers
			
 
				+        self.min_similarity = min_similarity
			
 
				+        self.output_dir = output_dir
			
 
				+        self.target_features = target_features
			
 
				+        self.evaluation_results_path = evaluation_results_path
			
 
				+        self.update_evaluation_scores = update_evaluation_scores
			
 
				+
			
 
				+        # 验证权重
			
 
				+        total_weight = weight_embedding + weight_semantic
			
 
				+        if abs(total_weight - 1.0) > 0.001:
			
 
				+            raise ValueError(f"权重之和必须为1.0，当前为: {total_weight}")
			
 
				+
			
 
				+    def _save_intermediate_results(
			
 
				+        self,
			
 
				+        results: List[Dict],
			
 
				+        output_path: str,
			
 
				+        processed_count: int,
			
 
				+        total_count: int,
			
 
				+        start_time: float
			
 
				+    ):
			
 
				+        """保存中间结果"""
			
 
				+        base_dir = os.path.dirname(output_path) or self.output_dir
			
 
				+        base_name = os.path.basename(output_path)
			
 
				+        name_without_ext = os.path.splitext(base_name)[0]
			
 
				+
			
 
				+        intermediate_path = os.path.join(
			
 
				+            base_dir,
			
 
				+            f"{name_without_ext}_partial_{processed_count}of{total_count}.json"
			
 
				+        )
			
 
				+
			
 
				+        # 统计
			
 
				+        total_features = sum(r['similarity_statistics']['total_features'] for r in results)
			
 
				+        avg_max_sim = sum(r['similarity_statistics']['max_similarity'] for r in results) / len(results)
			
 
				+
			
 
				+        intermediate_result = {
			
 
				+            'metadata': {
			
 
				+                'stage': 'similarity_analysis_partial',
			
 
				+                'description': f'部分结果（{processed_count}/{total_count}）',
			
 
				+                'processed_notes': len(results),
			
 
				+                'total_features_extracted': total_features,
			
 
				+                'avg_max_similarity': round(avg_max_sim, 3),
			
 
				+                'saved_at': datetime.now().isoformat(),
			
 
				+                'processing_time_seconds': round(time.time() - start_time, 2)
			
 
				+            },
			
 
				+            'results': results
			
 
				+        }
			
 
				+
			
 
				+        os.makedirs(base_dir, exist_ok=True)
			
 
				+        with open(intermediate_path, 'w', encoding='utf-8') as f:
			
 
				+            json.dump(intermediate_result, f, ensure_ascii=False, indent=2)
			
 
				+
			
 
				+        logger.info(f"    已保存中间结果: {intermediate_path}")
			
 
				+
			
 
				+    async def run_async(
			
 
				+        self,
			
 
				+        deconstruction_results: Dict,
			
 
				+        output_path: Optional[str] = None
			
 
				+    ) -> Dict:
			
 
				+        """
			
 
				+        执行 相似度分析 相似度分析（异步版本）
			
 
				+
			
 
				+        Args:
			
 
				+            deconstruction_results: 解构分析 结果
			
 
				+            output_path: 输出路径（可选）
			
 
				+
			
 
				+        Returns:
			
 
				+            相似度分析 结果
			
 
				+        """
			
 
				+        logger.info("\n" + "=" * 60)
			
 
				+        logger.info("相似度分析: 解构特征与原始特征的相似度分析")
			
 
				+        logger.info("=" * 60)
			
 
				+
			
 
				+        # 打印配置
			
 
				+        logger.info("配置参数:")
			
 
				+        logger.info(f"  向量模型权重: {self.weight_embedding}")
			
 
				+        logger.info(f"  LLM 模型权重: {self.weight_semantic}")
			
 
				+        logger.info(f"  最大并发数: {self.max_workers}")
			
 
				+        logger.info(f"  最小相似度阈值: {self.min_similarity}")
			
 
				+        if self.target_features:
			
 
				+            logger.info(f"  目标特征: {', '.join(self.target_features)}")
			
 
				+        else:
			
 
				+            logger.info(f"  目标特征: 全部")
			
 
				+
			
 
				+        # 默认输出路径
			
 
				+        if output_path is None:
			
 
				+            output_path = os.path.join(self.output_dir, "similarity_analysis_results.json")
			
 
				+
			
 
				+        # 提取 解构分析 结果
			
 
				+        results_list = deconstruction_results.get('results', [])
			
 
				+
			
 
				+        # 过滤目标特征
			
 
				+        if self.target_features:
			
 
				+            results_list = [
			
 
				+                r for r in results_list
			
 
				+                if r.get('original_feature') in self.target_features
			
 
				+            ]
			
 
				+
			
 
				+        total_notes = len(results_list)
			
 
				+        logger.info(f"  待处理帖子数: {total_notes}")
			
 
				+
			
 
				+        if total_notes == 0:
			
 
				+            logger.warning("  没有需要处理的帖子")
			
 
				+            return {
			
 
				+                'metadata': {
			
 
				+                    'stage': 'similarity_analysis',
			
 
				+                    'processed_notes': 0
			
 
				+                },
			
 
				+                'results': []
			
 
				+            }
			
 
				+
			
 
				+        # 创建任务列表
			
 
				+        start_time = time.time()
			
 
				+        results = []
			
 
				+
			
 
				+        # 使用 Semaphore 控制并发数
			
 
				+        semaphore = asyncio.Semaphore(self.max_workers)
			
 
				+
			
 
				+        async def bounded_task(result):
			
 
				+            async with semaphore:
			
 
				+                return await calculate_similarity_for_note(
			
 
				+                    result,
			
 
				+                    result.get('original_feature', ''),
			
 
				+                    self.weight_embedding,
			
 
				+                    self.weight_semantic,
			
 
				+                    self.min_similarity
			
 
				+                )
			
 
				+
			
 
				+        tasks = [bounded_task(result) for result in results_list]
			
 
				+
			
 
				+        # 带进度条执行
			
 
				+        if TQDM_AVAILABLE:
			
 
				+            logger.info("  使用进度条显示...")
			
 
				+            processed_count = 0
			
 
				+            save_interval = 10
			
 
				+
			
 
				+            for coro in tqdm(
			
 
				+                asyncio.as_completed(tasks),
			
 
				+                total=len(tasks),
			
 
				+                desc="  相似度计算进度",
			
 
				+                unit="帖子",
			
 
				+                ncols=100
			
 
				+            ):
			
 
				+                result = await coro
			
 
				+                results.append(result)
			
 
				+                processed_count += 1
			
 
				+
			
 
				+                # 增量保存
			
 
				+                if processed_count % save_interval == 0:
			
 
				+                    self._save_intermediate_results(
			
 
				+                        results,
			
 
				+                        output_path,
			
 
				+                        processed_count,
			
 
				+                        total_notes,
			
 
				+                        start_time
			
 
				+                    )
			
 
				+        else:
			
 
				+            # 简单执行
			
 
				+            results = await asyncio.gather(*tasks)
			
 
				+            logger.info(f"  完成: {len(results)}/{total_notes}")
			
 
				+
			
 
				+        processing_time = time.time() - start_time
			
 
				+
			
 
				+        # 计算总体统计
			
 
				+        total_features = sum(r['similarity_statistics']['total_features'] for r in results)
			
 
				+        all_max_similarities = [r['similarity_statistics']['max_similarity'] for r in results if r['similarity_statistics']['total_features'] > 0]
			
 
				+
			
 
				+        overall_stats = {
			
 
				+            'total_notes': total_notes,
			
 
				+            'total_features_extracted': total_features,
			
 
				+            'avg_features_per_note': round(total_features / total_notes, 1) if total_notes > 0 else 0,
			
 
				+            'avg_max_similarity': round(sum(all_max_similarities) / len(all_max_similarities), 3) if all_max_similarities else 0,
			
 
				+            'notes_with_high_similarity': sum(1 for r in results if r['similarity_statistics'].get('high_similarity_count', 0) > 0)
			
 
				+        }
			
 
				+
			
 
				+        logger.info(f"\n  总耗时: {processing_time:.1f}秒")
			
 
				+        logger.info(f"  总特征数: {total_features}")
			
 
				+        logger.info(f"  平均特征数/帖子: {overall_stats['avg_features_per_note']}")
			
 
				+        logger.info(f"  平均最高相似度: {overall_stats['avg_max_similarity']}")
			
 
				+        logger.info(f"  包含高相似度特征的帖子: {overall_stats['notes_with_high_similarity']}")
			
 
				+
			
 
				+        # 构建最终结果
			
 
				+        final_result = {
			
 
				+            'metadata': {
			
 
				+                'stage': 'similarity_analysis',
			
 
				+                'description': '解构特征与原始特征的相似度评分',
			
 
				+                'source_file': deconstruction_results.get('metadata', {}).get('created_at', ''),
			
 
				+                'target_features': self.target_features if self.target_features else '全部',
			
 
				+                'similarity_config': {
			
 
				+                    'algorithm': 'hybrid_similarity',
			
 
				+                    'weight_embedding': self.weight_embedding,
			
 
				+                    'weight_semantic': self.weight_semantic,
			
 
				+                    'min_similarity_threshold': self.min_similarity
			
 
				+                },
			
 
				+                'overall_statistics': overall_stats,
			
 
				+                'created_at': datetime.now().isoformat(),
			
 
				+                'processing_time_seconds': round(processing_time, 2)
			
 
				+            },
			
 
				+            'results': results
			
 
				+        }
			
 
				+
			
 
				+        # 保存结果
			
 
				+        os.makedirs(os.path.dirname(output_path) or self.output_dir, exist_ok=True)
			
 
				+        with open(output_path, 'w', encoding='utf-8') as f:
			
 
				+            json.dump(final_result, f, ensure_ascii=False, indent=2)
			
 
				+
			
 
				+        logger.info(f"  结果已保存: {output_path}")
			
 
				+
			
 
				+        # 计算并更新综合得分P
			
 
				+        if self.update_evaluation_scores:
			
 
				+            logger.info("\n" + "=" * 60)
			
 
				+            logger.info("开始计算综合得分P并更新评估结果数据...")
			
 
				+            logger.info("=" * 60)
			
 
				+            self._calculate_and_update_comprehensive_scores(results)
			
 
				+
			
 
				+        return final_result
			
 
				+
			
 
				+    def _calculate_and_update_comprehensive_scores(self, similarity_results: List[Dict]):
			
 
				+        """
			
 
				+        计算综合得分P并更新评估结果数据
			
 
				+
			
 
				+        Args:
			
 
				+            similarity_results: 相似度分析 的结果列表
			
 
				+        """
			
 
				+        try:
			
 
				+            # 1. 加载 评估结果 数据
			
 
				+            logger.info(f"  加载 评估结果 数据: {self.evaluation_results_path}")
			
 
				+            if not os.path.exists(self.evaluation_results_path):
			
 
				+                logger.error(f"  评估结果 文件不存在: {self.evaluation_results_path}")
			
 
				+                return
			
 
				+
			
 
				+            with open(self.evaluation_results_path, 'r', encoding='utf-8') as f:
			
 
				+                evaluation_data = json.load(f)
			
 
				+
			
 
				+            # 2. 构建 相似度分析 映射 (note_id → max_similarity)
			
 
				+            logger.info("  构建相似度映射...")
			
 
				+            similarity_map = {}
			
 
				+            for result in similarity_results:
			
 
				+                note_id = result['note_id']
			
 
				+                max_similarity = result['similarity_statistics']['max_similarity']
			
 
				+                similarity_map[note_id] = max_similarity
			
 
				+
			
 
				+            logger.info(f"  相似度映射条目数: {len(similarity_map)}")
			
 
				+
			
 
				+            # 3. 遍历 评估结果 中的所有原始特征和搜索词，计算 P 值
			
 
				+            # 评估结果 数据是一个列表，每个元素是一个原始特征
			
 
				+            updated_count = 0
			
 
				+            total_searches = 0
			
 
				+
			
 
				+            logger.info(f"  开始遍历 {len(evaluation_data)} 个原始特征...")
			
 
				+
			
 
				+            for feature_item in evaluation_data:
			
 
				+                original_feature = feature_item.get('原始特征名称', '')
			
 
				+                logger.info(f"\n  处理原始特征: {original_feature}")
			
 
				+
			
 
				+                # 遍历每个分组
			
 
				+                for group in feature_item.get('组合评估结果_分组', []):
			
 
				+                    source_word = group.get('source_word', '')
			
 
				+
			
 
				+                    # 遍历该分组的所有搜索词
			
 
				+                    for search_item in group.get('top10_searches', []):
			
 
				+                        search_word = search_item.get('search_word', '')
			
 
				+                        total_searches += 1
			
 
				+
			
 
				+                        logger.info(f"    处理搜索词: {search_word} (来源: {source_word})")
			
 
				+
			
 
				+                        # 计算该搜索词的综合得分
			
 
				+                        p_score, p_detail = self._calculate_single_query_score(
			
 
				+                            search_item,
			
 
				+                            similarity_map
			
 
				+                        )
			
 
				+
			
 
				+                        # 更新搜索词数据
			
 
				+                        if p_score is not None:
			
 
				+                            search_item['comprehensive_score'] = round(p_score, 3)
			
 
				+                            search_item['comprehensive_score_detail'] = p_detail
			
 
				+                            updated_count += 1
			
 
				+                            logger.info(f"      综合得分P = {p_score:.3f} (M={p_detail['M']}, N={p_detail['N']})")
			
 
				+                        else:
			
 
				+                            logger.warning(f"      无法计算综合得分（可能缺少数据）")
			
 
				+
			
 
				+            # 4. 保存更新后的 评估结果 数据
			
 
				+            logger.info(f"\n  保存更新后的 评估结果 数据...")
			
 
				+            logger.info(f"  已更新 {updated_count}/{total_searches} 个搜索词")
			
 
				+
			
 
				+            with open(self.evaluation_results_path, 'w', encoding='utf-8') as f:
			
 
				+                json.dump(evaluation_data, f, ensure_ascii=False, indent=2)
			
 
				+
			
 
				+            logger.info(f"  更新完成: {self.evaluation_results_path}")
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"  计算综合得分失败: {e}", exc_info=True)
			
 
				+
			
 
				+    def _calculate_single_query_score(
			
 
				+        self,
			
 
				+        query: Dict,
			
 
				+        similarity_map: Dict[str, float]
			
 
				+    ) -> tuple[Optional[float], Optional[Dict]]:
			
 
				+        """
			
 
				+        计算单个查询的综合得分P
			
 
				+
			
 
				+        Args:
			
 
				+            query: 评估结果 中的单个查询对象
			
 
				+            similarity_map: note_id → max_similarity 的映射
			
 
				+
			
 
				+        Returns:
			
 
				+            (P值, 详细计算信息) 或 (None, None)
			
 
				+        """
			
 
				+        # 获取总帖子数 N
			
 
				+        evaluation_with_filter = query.get('evaluation_with_filter', {})
			
 
				+        N = evaluation_with_filter.get('total_notes', 0)
			
 
				+
			
 
				+        if N == 0:
			
 
				+            logger.warning(f"    查询总帖子数为0，无法计算P值")
			
 
				+            return None, None
			
 
				+
			
 
				+        # 获取笔记评估数据和原始笔记数据
			
 
				+        notes_evaluation = evaluation_with_filter.get('notes_evaluation', [])
			
 
				+        search_result = query.get('search_result', {})
			
 
				+        notes_data = search_result.get('data', {}).get('data', [])
			
 
				+
			
 
				+        if not notes_evaluation or not notes_data:
			
 
				+            logger.warning(f"    缺少评估数据或笔记数据")
			
 
				+            return 0.0, {
			
 
				+                'N': N,
			
 
				+                'M': 0,
			
 
				+                'total_contribution': 0.0,
			
 
				+                'complete_matches': []
			
 
				+            }
			
 
				+
			
 
				+        # 获取完全匹配的帖子列表 (综合得分 >= 0.8)
			
 
				+        complete_matches_data = []
			
 
				+        for note_eval in notes_evaluation:
			
 
				+            score = note_eval.get('综合得分', 0)
			
 
				+            if score >= 0.8:
			
 
				+                note_index = note_eval.get('note_index', -1)
			
 
				+                if 0 <= note_index < len(notes_data):
			
 
				+                    # 从原始数据中获取note_id
			
 
				+                    note_id = notes_data[note_index].get('id', '')
			
 
				+                    note_card = notes_data[note_index].get('note_card', {})
			
 
				+                    note_title = note_card.get('display_title', '')
			
 
				+
			
 
				+                    complete_matches_data.append({
			
 
				+                        'note_id': note_id,
			
 
				+                        'note_title': note_title,
			
 
				+                        'evaluation_score': score,
			
 
				+                        'note_index': note_index
			
 
				+                    })
			
 
				+
			
 
				+        M = len(complete_matches_data)
			
 
				+        logger.info(f"    完全匹配数: M = {M}/{N}")
			
 
				+
			
 
				+        if M == 0:
			
 
				+            # 没有完全匹配，P = 0
			
 
				+            return 0.0, {
			
 
				+                'N': N,
			
 
				+                'M': 0,
			
 
				+                'total_contribution': 0.0,
			
 
				+                'complete_matches': []
			
 
				+            }
			
 
				+
			
 
				+        # 计算每个完全匹配的贡献 a×b
			
 
				+        contributions = []
			
 
				+        total_contribution = 0.0
			
 
				+
			
 
				+        for match in complete_matches_data:
			
 
				+            note_id = match['note_id']
			
 
				+            evaluation_score = match['evaluation_score']  # a 值
			
 
				+
			
 
				+            # 从 similarity_map 获取 b 值
			
 
				+            max_similarity = similarity_map.get(note_id, 0)  # b 值
			
 
				+
			
 
				+            # 计算贡献
			
 
				+            contribution = evaluation_score * max_similarity
			
 
				+            total_contribution += contribution
			
 
				+
			
 
				+            # 保存详细信息
			
 
				+            contributions.append({
			
 
				+                'note_id': note_id,
			
 
				+                'note_title': match['note_title'],
			
 
				+                'evaluation_score': round(evaluation_score, 3),
			
 
				+                'max_similarity': round(max_similarity, 3),
			
 
				+                'contribution': round(contribution, 3)
			
 
				+            })
			
 
				+
			
 
				+        # 计算综合得分 P = Σ(a×b) / N
			
 
				+        P = total_contribution / N
			
 
				+
			
 
				+        # 按贡献降序排序
			
 
				+        contributions.sort(key=lambda x: x['contribution'], reverse=True)
			
 
				+
			
 
				+        # 构建详细信息
			
 
				+        detail = {
			
 
				+            'N': N,
			
 
				+            'M': M,
			
 
				+            'total_contribution': round(total_contribution, 3),
			
 
				+            'complete_matches': contributions
			
 
				+        }
			
 
				+
			
 
				+        return P, detail
			
 
				+
			
 
				+    def run(
			
 
				+        self,
			
 
				+        deconstruction_results: Dict,
			
 
				+        output_path: Optional[str] = None
			
 
				+    ) -> Dict:
			
 
				+        """
			
 
				+        执行 相似度分析 相似度分析（同步版本）
			
 
				+
			
 
				+        Args:
			
 
				+            deconstruction_results: 解构分析 结果
			
 
				+            output_path: 输出路径（可选）
			
 
				+
			
 
				+        Returns:
			
 
				+            相似度分析 结果
			
 
				+        """
			
 
				+        return asyncio.run(self.run_async(deconstruction_results, output_path))
			
 
				+
			
 
				+
			
 
				+def test_similarity_analyzer():
			
 
				+    """测试相似度分析器"""
			
 
				+    # 读取解构分析结果
			
 
				+    deconstruction_path = "output_v2/deep_analysis_results.json"
			
 
				+
			
 
				+    if not os.path.exists(deconstruction_path):
			
 
				+        print(f"解构分析结果不存在: {deconstruction_path}")
			
 
				+        return
			
 
				+
			
 
				+    with open(deconstruction_path, 'r', encoding='utf-8') as f:
			
 
				+        deconstruction_results = json.load(f)
			
 
				+
			
 
				+    # 创建分析器
			
 
				+    analyzer = SimilarityAnalyzer(
			
 
				+        weight_embedding=0.5,
			
 
				+        weight_semantic=0.5,
			
 
				+        max_workers=3,
			
 
				+        min_similarity=0.3,
			
 
				+        target_features=["墨镜"]
			
 
				+    )
			
 
				+
			
 
				+    # 运行分析
			
 
				+    similarity_results = analyzer.run(deconstruction_results)
			
 
				+
			
 
				+    print(f"\n处理了 {similarity_results['metadata']['overall_statistics']['total_notes']} 个帖子")
			
 
				+    print(f"提取了 {similarity_results['metadata']['overall_statistics']['total_features_extracted']} 个特征")
			
 
				+    print(f"平均最高相似度: {similarity_results['metadata']['overall_statistics']['avg_max_similarity']}")
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    logging.basicConfig(
			
 
				+        level=logging.INFO,
			
 
				+        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
			
 
				+    )
			
 
				+    test_similarity_analyzer()
			
--- a/src/api/base.py
+++ b/src/api/base.py
@@ -0,0 +1,51 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+API客户端基类
			
 
				+"""
			
 
				+
			
 
				+from abc import ABC, abstractmethod
			
 
				+from typing import Any, Optional
			
 
				+import logging
			
 
				+import time
			
 
				+from functools import wraps
			
 
				+
			
 
				+
			
 
				+class BaseAPIClient(ABC):
			
 
				+    """API客户端基类，提供统一的重试、缓存、日志功能"""
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        api_name: str,
			
 
				+        max_retries: int = 3,
			
 
				+        retry_delay: float = 2.0,
			
 
				+        logger: Optional[logging.Logger] = None
			
 
				+    ):
			
 
				+        self.api_name = api_name
			
 
				+        self.max_retries = max_retries
			
 
				+        self.retry_delay = retry_delay
			
 
				+        self.logger = logger or logging.getLogger(api_name)
			
 
				+
			
 
				+    def with_retry(self, func):
			
 
				+        """重试装饰器"""
			
 
				+        @wraps(func)
			
 
				+        def wrapper(*args, **kwargs):
			
 
				+            for attempt in range(1, self.max_retries + 1):
			
 
				+                try:
			
 
				+                    return func(*args, **kwargs)
			
 
				+                except Exception as e:
			
 
				+                    self.logger.warning(
			
 
				+                        f"[{self.api_name}] Attempt {attempt}/{self.max_retries} failed: {e}"
			
 
				+                    )
			
 
				+                    if attempt < self.max_retries:
			
 
				+                        time.sleep(self.retry_delay)
			
 
				+                    else:
			
 
				+                        self.logger.error(f"[{self.api_name}] All retries failed: {e}")
			
 
				+                        raise
			
 
				+            return None
			
 
				+        return wrapper
			
 
				+
			
 
				+    @abstractmethod
			
 
				+    def call(self, *args, **kwargs) -> Any:
			
 
				+        """具体的API调用逻辑，子类实现"""
			
 
				+        pass
			
--- a/src/clients/__init__.py
+++ b/src/clients/__init__.py
--- a/src/clients/deconstruction_api_client.py
+++ b/src/clients/deconstruction_api_client.py
@@ -0,0 +1,253 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+Stage 7 API 客户端
			
 
				+用于调用深度解构分析 API
			
 
				+"""
			
 
				+
			
 
				+import time
			
 
				+import logging
			
 
				+import requests
			
 
				+from datetime import datetime
			
 
				+from typing import Dict, List, Any, Optional
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+
			
 
				+def map_note_to_api_format(
			
 
				+    note: Dict,
			
 
				+    note_card: Dict,
			
 
				+    evaluation: Dict,
			
 
				+    search_word: str,
			
 
				+    original_feature: str,
			
 
				+    start_points: List[str],
			
 
				+    processed_image_urls: Optional[List[str]] = None
			
 
				+) -> Dict:
			
 
				+    """
			
 
				+    将小红书笔记数据映射为 API 所需格式
			
 
				+
			
 
				+    Args:
			
 
				+        note: 笔记原始数据
			
 
				+        note_card: 笔记卡片信息
			
 
				+        evaluation: 评估结果
			
 
				+        search_word: 搜索词
			
 
				+        original_feature: 原始特征
			
 
				+        start_points: 起点列表
			
 
				+        processed_image_urls: 处理后的图片URL列表(如果提供,将替代原始URL)
			
 
				+
			
 
				+    Returns:
			
 
				+        API 请求格式的数据
			
 
				+    """
			
 
				+    # 构建小红书链接
			
 
				+    note_id = note.get('id', '')
			
 
				+    link = f"https://www.xiaohongshu.com/explore/{note_id}"
			
 
				+
			
 
				+    # 获取用户信息
			
 
				+    user = note_card.get('user', {})
			
 
				+    interact_info = note_card.get('interact_info', {})
			
 
				+
			
 
				+    # 获取发布时间（需要转换为毫秒时间戳）
			
 
				+    publish_ts = note_card.get('publish_timestamp', 0)
			
 
				+    publish_ts_ms = publish_ts * 1000 if publish_ts else 0
			
 
				+
			
 
				+    # 格式化发布日期
			
 
				+    publish_date = ''
			
 
				+    if publish_ts:
			
 
				+        try:
			
 
				+            publish_date = datetime.fromtimestamp(publish_ts).strftime('%Y-%m-%d %H:%M:%S')
			
 
				+        except:
			
 
				+            publish_date = ''
			
 
				+
			
 
				+    # 使用处理后的图片URL,如果没有则使用原始URL
			
 
				+    image_urls = processed_image_urls if processed_image_urls else note_card.get('image_list', [])
			
 
				+
			
 
				+    return {
			
 
				+        "post_data": {
			
 
				+            "channel_content_id": note_id,
			
 
				+            "link": link,
			
 
				+            "xsec_token": "",  # 通常为空
			
 
				+            "comment_count": interact_info.get('comment_count', 0),
			
 
				+            "images": image_urls,
			
 
				+            "like_count": interact_info.get('liked_count', 0),
			
 
				+            "body_text": note_card.get('desc', ''),
			
 
				+            "title": note_card.get('display_title', ''),
			
 
				+            "collect_count": interact_info.get('collected_count', 0),
			
 
				+            "channel_account_id": user.get('user_id', ''),
			
 
				+            "channel_account_name": user.get('nick_name', ''),
			
 
				+            "publish_timestamp": publish_ts_ms,
			
 
				+            "modify_timestamp": publish_ts_ms,
			
 
				+            "update_timestamp": int(time.time() * 1000),
			
 
				+            "publish_date": publish_date,
			
 
				+            "content_type": "note",
			
 
				+            "video": {}  # 图文类型无视频
			
 
				+        },
			
 
				+        "question_data": {
			
 
				+            "target": original_feature,      # 例如: "墨镜"
			
 
				+            "start_points": start_points,    # 例如: ["墨镜", "猫咪服饰造型元素", "图片中猫咪佩戴墨镜"]
			
 
				+            "query": search_word             # 例如: "猫咪服饰造型元素"
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+class DeconstructionAPIClient:
			
 
				+    """解构分析 API 客户端"""
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        api_url: str = "http://192.168.245.150:7000/what/analysis/single",
			
 
				+        timeout: int = 800,
			
 
				+        max_retries: int = 3
			
 
				+    ):
			
 
				+        """
			
 
				+        初始化 API 客户端
			
 
				+
			
 
				+        Args:
			
 
				+            api_url: API 地址
			
 
				+            timeout: 超时时间（秒）
			
 
				+            max_retries: 最大重试次数
			
 
				+        """
			
 
				+        self.api_url = api_url
			
 
				+        self.timeout = timeout
			
 
				+        self.max_retries = max_retries
			
 
				+
			
 
				+    def call_api(
			
 
				+        self,
			
 
				+        api_payload: Dict
			
 
				+    ) -> Dict:
			
 
				+        """
			
 
				+        调用解构 API（带重试机制）
			
 
				+
			
 
				+        Args:
			
 
				+            api_payload: API 请求数据
			
 
				+
			
 
				+        Returns:
			
 
				+            {
			
 
				+                'status': 'success' | 'failed',
			
 
				+                'result': API响应数据（成功时）,
			
 
				+                'error': 错误信息（失败时）
			
 
				+            }
			
 
				+        """
			
 
				+        for attempt in range(self.max_retries):
			
 
				+            try:
			
 
				+                response = requests.post(
			
 
				+                    self.api_url,
			
 
				+                    json=api_payload,
			
 
				+                    headers={'Content-Type': 'application/json'},
			
 
				+                    timeout=self.timeout
			
 
				+                )
			
 
				+
			
 
				+                if response.status_code == 200:
			
 
				+                    return {
			
 
				+                        'status': 'success',
			
 
				+                        'result': response.json(),
			
 
				+                        'error': None
			
 
				+                    }
			
 
				+                else:
			
 
				+                    error_msg = f"HTTP {response.status_code}: {response.text[:200]}"
			
 
				+
			
 
				+                    # 如果还有重试机会，继续重试
			
 
				+                    if attempt < self.max_retries - 1:
			
 
				+                        wait_time = 2 ** attempt  # 指数退避: 1s, 2s, 4s
			
 
				+                        logger.warning(f"    API 调用失败，{wait_time}s 后重试 ({attempt + 1}/{self.max_retries})")
			
 
				+                        time.sleep(wait_time)
			
 
				+                        continue
			
 
				+
			
 
				+                    # 最后一次重试也失败
			
 
				+                    return {
			
 
				+                        'status': 'failed',
			
 
				+                        'result': None,
			
 
				+                        'error': error_msg
			
 
				+                    }
			
 
				+
			
 
				+            except requests.Timeout:
			
 
				+                if attempt < self.max_retries - 1:
			
 
				+                    wait_time = 2 ** attempt
			
 
				+                    logger.warning(f"    API 超时，{wait_time}s 后重试 ({attempt + 1}/{self.max_retries})")
			
 
				+                    time.sleep(wait_time)
			
 
				+                    continue
			
 
				+
			
 
				+                return {
			
 
				+                    'status': 'failed',
			
 
				+                    'result': None,
			
 
				+                    'error': f'API timeout after {self.timeout}s'
			
 
				+                }
			
 
				+
			
 
				+            except Exception as e:
			
 
				+                if attempt < self.max_retries - 1:
			
 
				+                    wait_time = 2 ** attempt
			
 
				+                    logger.warning(f"    API 异常，{wait_time}s 后重试 ({attempt + 1}/{self.max_retries}): {e}")
			
 
				+                    time.sleep(wait_time)
			
 
				+                    continue
			
 
				+
			
 
				+                return {
			
 
				+                    'status': 'failed',
			
 
				+                    'result': None,
			
 
				+                    'error': f'Exception: {str(e)}'
			
 
				+                }
			
 
				+
			
 
				+        # 理论上不会到这里
			
 
				+        return {
			
 
				+            'status': 'failed',
			
 
				+            'result': None,
			
 
				+            'error': 'Max retries exceeded'
			
 
				+        }
			
 
				+
			
 
				+
			
 
				+def test_api_client():
			
 
				+    """测试 API 客户端"""
			
 
				+    # 模拟数据
			
 
				+    test_note = {
			
 
				+        'id': '68ba3a27000000001c00f8fc'
			
 
				+    }
			
 
				+
			
 
				+    test_note_card = {
			
 
				+        'display_title': '测试标题',
			
 
				+        'desc': '测试内容',
			
 
				+        'image_list': [
			
 
				+            'https://example.com/image1.jpg',
			
 
				+            'https://example.com/image2.jpg'
			
 
				+        ],
			
 
				+        'user': {
			
 
				+            'user_id': '123456',
			
 
				+            'nick_name': '测试用户'
			
 
				+        },
			
 
				+        'interact_info': {
			
 
				+            'liked_count': 100,
			
 
				+            'collected_count': 50,
			
 
				+            'comment_count': 10
			
 
				+        },
			
 
				+        'publish_timestamp': 1640000000
			
 
				+    }
			
 
				+
			
 
				+    test_evaluation = {
			
 
				+        '综合得分': 9.0,
			
 
				+        '关键匹配点': ['测试匹配点1', '测试匹配点2']
			
 
				+    }
			
 
				+
			
 
				+    # 数据映射测试
			
 
				+    api_payload = map_note_to_api_format(
			
 
				+        note=test_note,
			
 
				+        note_card=test_note_card,
			
 
				+        evaluation=test_evaluation,
			
 
				+        search_word='测试搜索词',
			
 
				+        original_feature='测试特征',
			
 
				+        start_points=['起点1', '起点2']
			
 
				+    )
			
 
				+
			
 
				+    print("API Payload:")
			
 
				+    import json
			
 
				+    print(json.dumps(api_payload, ensure_ascii=False, indent=2))
			
 
				+
			
 
				+    # API 调用测试（需要实际 API 服务）
			
 
				+    # client = DeconstructionAPIClient()
			
 
				+    # result = client.call_api(api_payload)
			
 
				+    # print("\nAPI Result:")
			
 
				+    # print(json.dumps(result, ensure_ascii=False, indent=2))
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    logging.basicConfig(
			
 
				+        level=logging.INFO,
			
 
				+        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
			
 
				+    )
			
 
				+    test_api_client()
			
--- a/src/clients/openrouter_client.py
+++ b/src/clients/openrouter_client.py
@@ -0,0 +1,277 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+OpenRouter API 客户端
			
 
				+支持文本和多模态（图片）任务
			
 
				+"""
			
 
				+
			
 
				+import os
			
 
				+import json
			
 
				+import requests
			
 
				+import logging
			
 
				+import time
			
 
				+from typing import List, Dict, Any, Optional
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+
			
 
				+class OpenRouterClient:
			
 
				+    """OpenRouter API客户端"""
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        api_key: Optional[str] = None,
			
 
				+        model: str = "google/gemini-2.5-pro",
			
 
				+        max_tokens: int = 8192,
			
 
				+        temperature: float = 0.3,
			
 
				+        retry_delay: int = 3
			
 
				+    ):
			
 
				+        """
			
 
				+        初始化客户端
			
 
				+
			
 
				+        Args:
			
 
				+            api_key: API密钥，默认从环境变量读取
			
 
				+            model: 模型名称
			
 
				+            max_tokens: 最大token数
			
 
				+            temperature: 温度参数
			
 
				+            retry_delay: 默认重试延迟（秒）
			
 
				+        """
			
 
				+        self.api_key = api_key or os.getenv("OPENROUTER_API_KEY")
			
 
				+        if not self.api_key:
			
 
				+            raise ValueError("OPENROUTER_API_KEY not found in environment variables")
			
 
				+
			
 
				+        self.base_url = "https://openrouter.ai/api/v1"
			
 
				+        self.model = model
			
 
				+        self.max_tokens = max_tokens
			
 
				+        self.temperature = temperature
			
 
				+        self.retry_delay = retry_delay
			
 
				+
			
 
				+        logger.info(f"OpenRouter客户端已初始化: model={model}, max_tokens={max_tokens}, retry_delay={retry_delay}s")
			
 
				+
			
 
				+    def chat(
			
 
				+        self,
			
 
				+        prompt: str,
			
 
				+        images: Optional[List[str]] = None,
			
 
				+        system_prompt: Optional[str] = None,
			
 
				+        max_retries: int = 3,
			
 
				+        retry_delay: Optional[int] = None
			
 
				+    ) -> Dict[str, Any]:
			
 
				+        """
			
 
				+        调用LLM进行对话
			
 
				+
			
 
				+        Args:
			
 
				+            prompt: 用户提示词
			
 
				+            images: 图片URL列表（可选，用于多模态任务）
			
 
				+            system_prompt: 系统提示词（可选）
			
 
				+            max_retries: 最大重试次数
			
 
				+            retry_delay: 重试延迟（秒），None则使用实例默认值
			
 
				+
			
 
				+        Returns:
			
 
				+            LLM响应
			
 
				+        """
			
 
				+        # 使用实例默认retry_delay（如果未指定）
			
 
				+        if retry_delay is None:
			
 
				+            retry_delay = self.retry_delay
			
 
				+
			
 
				+        # 构建消息
			
 
				+        messages = []
			
 
				+
			
 
				+        # 添加系统提示词
			
 
				+        if system_prompt:
			
 
				+            messages.append({
			
 
				+                "role": "system",
			
 
				+                "content": system_prompt
			
 
				+            })
			
 
				+
			
 
				+        # 构建用户消息
			
 
				+        if images:
			
 
				+            # 多模态消息
			
 
				+            content = [{"type": "text", "text": prompt}]
			
 
				+            for img_url in images:
			
 
				+                content.append({
			
 
				+                    "type": "image_url",
			
 
				+                    "image_url": {"url": img_url}
			
 
				+                })
			
 
				+            messages.append({
			
 
				+                "role": "user",
			
 
				+                "content": content
			
 
				+            })
			
 
				+        else:
			
 
				+            # 纯文本消息
			
 
				+            messages.append({
			
 
				+                "role": "user",
			
 
				+                "content": prompt
			
 
				+            })
			
 
				+
			
 
				+        # 构建请求
			
 
				+        payload = {
			
 
				+            "model": self.model,
			
 
				+            "messages": messages,
			
 
				+            "max_tokens": self.max_tokens,
			
 
				+            "temperature": self.temperature
			
 
				+        }
			
 
				+
			
 
				+        headers = {
			
 
				+            "Authorization": f"Bearer {self.api_key}",
			
 
				+            "Content-Type": "application/json"
			
 
				+        }
			
 
				+
			
 
				+        # 重试循环
			
 
				+        last_exception = None
			
 
				+        for attempt in range(1, max_retries + 1):
			
 
				+            try:
			
 
				+                if attempt > 1:
			
 
				+                    logger.info(f"  重试第 {attempt - 1}/{max_retries - 1} 次")
			
 
				+                    time.sleep(retry_delay)
			
 
				+
			
 
				+                response = requests.post(
			
 
				+                    f"{self.base_url}/chat/completions",
			
 
				+                    json=payload,
			
 
				+                    headers=headers,
			
 
				+                    timeout=60
			
 
				+                )
			
 
				+                response.raise_for_status()
			
 
				+
			
 
				+                result = response.json()
			
 
				+
			
 
				+                # 提取响应内容
			
 
				+                if "choices" in result and len(result["choices"]) > 0:
			
 
				+                    content = result["choices"][0]["message"]["content"]
			
 
				+
			
 
				+                    # 尝试解析JSON
			
 
				+                    try:
			
 
				+                        # 如果响应是JSON格式，解析它
			
 
				+                        if content.strip().startswith('{'):
			
 
				+                            parsed = json.loads(content)
			
 
				+                            return {
			
 
				+                                "success": True,
			
 
				+                                "content": content,
			
 
				+                                "parsed": parsed,
			
 
				+                                "raw_response": result
			
 
				+                            }
			
 
				+                    except json.JSONDecodeError:
			
 
				+                        pass
			
 
				+
			
 
				+                    return {
			
 
				+                        "success": True,
			
 
				+                        "content": content,
			
 
				+                        "raw_response": result
			
 
				+                    }
			
 
				+                else:
			
 
				+                    raise Exception(f"Invalid API response: {result}")
			
 
				+
			
 
				+            except requests.exceptions.RequestException as e:
			
 
				+                last_exception = e
			
 
				+                logger.error(f"  API调用失败 (第{attempt}次尝试): {e}")
			
 
				+
			
 
				+                if attempt >= max_retries:
			
 
				+                    logger.error(f"  已达最大重试次数 {max_retries}")
			
 
				+
			
 
				+        # 所有重试都失败
			
 
				+        return {
			
 
				+            "success": False,
			
 
				+            "error": str(last_exception),
			
 
				+            "content": None
			
 
				+        }
			
 
				+
			
 
				+    def chat_json(
			
 
				+        self,
			
 
				+        prompt: str,
			
 
				+        images: Optional[List[str]] = None,
			
 
				+        system_prompt: Optional[str] = None,
			
 
				+        max_retries: int = 3
			
 
				+    ) -> Optional[Dict[str, Any]]:
			
 
				+        """
			
 
				+        调用LLM并期望返回JSON格式
			
 
				+
			
 
				+        Args:
			
 
				+            prompt: 用户提示词（应包含返回JSON的指示）
			
 
				+            images: 图片URL列表
			
 
				+            system_prompt: 系统提示词
			
 
				+            max_retries: 最大重试次数
			
 
				+
			
 
				+        Returns:
			
 
				+            解析后的JSON对象，失败返回None
			
 
				+        """
			
 
				+        result = self.chat(
			
 
				+            prompt=prompt,
			
 
				+            images=images,
			
 
				+            system_prompt=system_prompt,
			
 
				+            max_retries=max_retries
			
 
				+        )
			
 
				+
			
 
				+        if not result["success"]:
			
 
				+            logger.error(f"LLM调用失败: {result.get('error')}")
			
 
				+            return None
			
 
				+
			
 
				+        # 如果已经解析了JSON
			
 
				+        if "parsed" in result:
			
 
				+            return result["parsed"]
			
 
				+
			
 
				+        # 尝试从content中解析JSON
			
 
				+        content = result["content"]
			
 
				+
			
 
				+        # 尝试提取JSON（可能包含在markdown代码块中）
			
 
				+        if "```json" in content:
			
 
				+            # 提取代码块中的JSON
			
 
				+            start = content.find("```json") + 7
			
 
				+            end = content.find("```", start)
			
 
				+            json_str = content[start:end].strip()
			
 
				+        elif "```" in content:
			
 
				+            # 普通代码块
			
 
				+            start = content.find("```") + 3
			
 
				+            end = content.find("```", start)
			
 
				+            json_str = content[start:end].strip()
			
 
				+        else:
			
 
				+            # 直接尝试解析
			
 
				+            json_str = content.strip()
			
 
				+
			
 
				+        try:
			
 
				+            return json.loads(json_str)
			
 
				+        except json.JSONDecodeError as e:
			
 
				+            logger.error(f"JSON解析失败: {e}")
			
 
				+            logger.error(f"原始内容: {content[:500]}")
			
 
				+            return None
			
 
				+
			
 
				+
			
 
				+def test_client():
			
 
				+    """测试客户端"""
			
 
				+    # 需要设置环境变量 OPENROUTER_API_KEY
			
 
				+    client = OpenRouterClient()
			
 
				+
			
 
				+    # 测试文本任务
			
 
				+    print("\n=== 测试文本任务 ===")
			
 
				+    result = client.chat_json(
			
 
				+        prompt="""
			
 
				+        评估搜索词"猫咪 宠物"能否找到包含"拟人"相关元素的内容。
			
 
				+
			
 
				+        返回JSON格式：
			
 
				+        {
			
 
				+          "score": 0.0-1.0,
			
 
				+          "reasoning": "评估理由"
			
 
				+        }
			
 
				+        """
			
 
				+    )
			
 
				+    print(json.dumps(result, ensure_ascii=False, indent=2))
			
 
				+
			
 
				+    # 测试多模态任务
			
 
				+    print("\n=== 测试多模态任务 ===")
			
 
				+    result = client.chat_json(
			
 
				+        prompt="""
			
 
				+        这张图片中是否包含与"拟人"相关的元素？
			
 
				+
			
 
				+        返回JSON格式：
			
 
				+        {
			
 
				+          "has_element": true/false,
			
 
				+          "elements": ["元素1", "元素2"],
			
 
				+          "reasoning": "理由"
			
 
				+        }
			
 
				+        """,
			
 
				+        images=["http://example.com/cat.jpg"]  # 示例图片
			
 
				+    )
			
 
				+    print(json.dumps(result, ensure_ascii=False, indent=2))
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    logging.basicConfig(level=logging.INFO)
			
 
				+    test_client()
			
--- a/src/clients/xiaohongshu_search.py
+++ b/src/clients/xiaohongshu_search.py
@@ -0,0 +1,331 @@
 
				+#!/usr/bin/env python3
			
 
				+"""
			
 
				+小红书笔记搜索工具
			
 
				+根据关键词搜索小红书笔记，支持多种筛选条件
			
 
				+"""
			
 
				+
			
 
				+import requests
			
 
				+import json
			
 
				+import os
			
 
				+import argparse
			
 
				+import time
			
 
				+import logging
			
 
				+from datetime import datetime
			
 
				+from typing import Dict, Any
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+
			
 
				+class XiaohongshuSearch:
			
 
				+    """小红书笔记搜索API封装类"""
			
 
				+
			
 
				+    BASE_URL = "http://47.84.182.56:8001"
			
 
				+    TOOL_NAME = "xhs_note_search"
			
 
				+    PLATFORM = "xiaohongshu"
			
 
				+
			
 
				+    def __init__(self, results_dir: str = None, cache_dir: str = "search_cache"):
			
 
				+        """
			
 
				+        初始化API客户端
			
 
				+
			
 
				+        Args:
			
 
				+            results_dir: 结果输出目录，默认为项目根目录下的 data/search 文件夹
			
 
				+            cache_dir: 缓存目录，默认为 search_cache
			
 
				+        """
			
 
				+        self.api_url = f"{self.BASE_URL}/tools/call/{self.TOOL_NAME}"
			
 
				+
			
 
				+        # 设置结果输出目录
			
 
				+        if results_dir:
			
 
				+            self.results_base_dir = results_dir
			
 
				+        else:
			
 
				+            # 默认使用项目根目录的 data/search 文件夹
			
 
				+            script_dir = os.path.dirname(os.path.abspath(__file__))
			
 
				+            project_root = os.path.dirname(os.path.dirname(script_dir))
			
 
				+            self.results_base_dir = os.path.join(project_root, "data", "search")
			
 
				+
			
 
				+        # 设置缓存目录
			
 
				+        self.cache_dir = cache_dir
			
 
				+        if cache_dir:
			
 
				+            os.makedirs(cache_dir, exist_ok=True)
			
 
				+
			
 
				+    def _get_cache_key(
			
 
				+        self,
			
 
				+        keyword: str,
			
 
				+        content_type: str,
			
 
				+        sort_type: str,
			
 
				+        publish_time: str
			
 
				+    ) -> str:
			
 
				+        """
			
 
				+        生成缓存key
			
 
				+
			
 
				+        Args:
			
 
				+            keyword: 搜索关键词
			
 
				+            content_type: 内容类型
			
 
				+            sort_type: 排序方式
			
 
				+            publish_time: 发布时间
			
 
				+
			
 
				+        Returns:
			
 
				+            缓存key字符串
			
 
				+        """
			
 
				+        return f"{keyword}_{content_type}_{sort_type}_{publish_time}"
			
 
				+
			
 
				+    def _get_cache_path(self, cache_key: str) -> str:
			
 
				+        """
			
 
				+        获取缓存文件路径
			
 
				+
			
 
				+        Args:
			
 
				+            cache_key: 缓存key
			
 
				+
			
 
				+        Returns:
			
 
				+            缓存文件完整路径
			
 
				+        """
			
 
				+        # 清理文件名中的非法字符
			
 
				+        safe_key = cache_key.replace('/', '_').replace('\\', '_').replace(' ', '_')
			
 
				+        return os.path.join(self.cache_dir, f"{safe_key}.json")
			
 
				+
			
 
				+    def search(
			
 
				+        self,
			
 
				+        keyword: str,
			
 
				+        content_type: str = "不限",
			
 
				+        sort_type: str = "综合",
			
 
				+        publish_time: str = "不限",
			
 
				+        cursor: str = "",
			
 
				+        timeout: int = 30,
			
 
				+        max_retries: int = 5,
			
 
				+        retry_delay: int = 2,
			
 
				+        use_cache: bool = True
			
 
				+    ) -> Dict[str, Any]:
			
 
				+        """
			
 
				+        搜索小红书笔记（带重试机制和缓存）
			
 
				+
			
 
				+        Args:
			
 
				+            keyword: 搜索关键词
			
 
				+            content_type: 内容类型，可选值：不限、视频、图文，默认为'不限'
			
 
				+            sort_type: 排序方式，可选值：综合、最新、最多点赞、最多评论，默认为'综合'
			
 
				+            publish_time: 发布时间筛选，可选值：不限、一天内、一周内、半年内，默认为'不限'
			
 
				+            cursor: 翻页游标，第一页默认为空，下一页的游标在上一页的返回值中获取
			
 
				+            timeout: 请求超时时间（秒），默认30秒
			
 
				+            max_retries: 最大重试次数，默认3次
			
 
				+            retry_delay: 重试间隔时间（秒），默认2秒
			
 
				+            use_cache: 是否使用缓存，默认True
			
 
				+
			
 
				+        Returns:
			
 
				+            API响应的JSON数据
			
 
				+
			
 
				+        Raises:
			
 
				+            requests.exceptions.RequestException: 所有重试都失败时抛出异常
			
 
				+        """
			
 
				+        # 检查缓存
			
 
				+        if use_cache and self.cache_dir:
			
 
				+            cache_key = self._get_cache_key(keyword, content_type, sort_type, publish_time)
			
 
				+            cache_path = self._get_cache_path(cache_key)
			
 
				+
			
 
				+            if os.path.exists(cache_path):
			
 
				+                try:
			
 
				+                    with open(cache_path, 'r', encoding='utf-8') as f:
			
 
				+                        cached_result = json.load(f)
			
 
				+                    logger.info(f"  ✓ 使用缓存: {keyword}")
			
 
				+                    return cached_result
			
 
				+                except Exception as e:
			
 
				+                    logger.warning(f"  读取缓存失败: {e}，将重新搜索")
			
 
				+
			
 
				+        # 缓存未命中或未启用，执行实际搜索
			
 
				+        payload = {
			
 
				+            "keyword": keyword,
			
 
				+            "content_type": '不限',  # 使用映射后的参数
			
 
				+            "sort_type": sort_type,
			
 
				+            "publish_time": publish_time,
			
 
				+            "cursor": cursor
			
 
				+        }
			
 
				+
			
 
				+        last_exception = None
			
 
				+
			
 
				+        # 重试循环：最多尝试 max_retries 次
			
 
				+        for attempt in range(1, max_retries + 1):
			
 
				+            try:
			
 
				+                if attempt > 1:
			
 
				+                    print(f"    重试第 {attempt - 1}/{max_retries - 1} 次: {keyword}")
			
 
				+
			
 
				+                response = requests.post(
			
 
				+                    self.api_url,
			
 
				+                    json=payload,
			
 
				+                    timeout=timeout,
			
 
				+                    headers={"Content-Type": "application/json"}
			
 
				+                )
			
 
				+                response.raise_for_status()
			
 
				+                api_response = response.json()
			
 
				+
			
 
				+                # 解析API返回的result字段（是JSON字符串）
			
 
				+                if not api_response.get("success"):
			
 
				+                    raise Exception(f"API返回失败: {api_response}")
			
 
				+
			
 
				+                result_str = api_response.get("result", "{}")
			
 
				+                result = json.loads(result_str)
			
 
				+
			
 
				+                # 预处理返回数据：提取 image_list 中的 URL 字符串
			
 
				+                self._preprocess_response(result)
			
 
				+
			
 
				+                if attempt > 1:
			
 
				+                    print(f"    ✓ 重试成功")
			
 
				+
			
 
				+                # 保存到缓存
			
 
				+                if use_cache and self.cache_dir:
			
 
				+                    try:
			
 
				+                        cache_key = self._get_cache_key(keyword, content_type, sort_type, publish_time)
			
 
				+                        cache_path = self._get_cache_path(cache_key)
			
 
				+                        with open(cache_path, 'w', encoding='utf-8') as f:
			
 
				+                            json.dump(result, f, ensure_ascii=False, indent=2)
			
 
				+                        logger.info(f"  ✓ 已缓存: {keyword}")
			
 
				+                    except Exception as e:
			
 
				+                        logger.warning(f"  保存缓存失败: {e}")
			
 
				+
			
 
				+                return result
			
 
				+
			
 
				+            except requests.exceptions.RequestException as e:
			
 
				+                last_exception = e
			
 
				+
			
 
				+                if attempt < max_retries:
			
 
				+                    # 还有重试机会，等待后继续
			
 
				+                    print(f"    ✗ 请求失败 (第{attempt}次尝试): {e}")
			
 
				+                    print(f"    等待 {retry_delay} 秒后重试...")
			
 
				+                    time.sleep(retry_delay)
			
 
				+                else:
			
 
				+                    # 已达最大重试次数，抛出异常
			
 
				+                    print(f"    ✗ 请求失败 (已达最大重试次数 {max_retries}): {e}")
			
 
				+
			
 
				+        # 所有重试都失败，抛出最后一次的异常
			
 
				+        raise last_exception
			
 
				+
			
 
				+    def _preprocess_response(self, result: Dict[str, Any]) -> None:
			
 
				+        """
			
 
				+        预处理搜索结果，将 image_list 中的字典格式转换为 URL 字符串列表
			
 
				+        并限制返回的帖子数量为10个
			
 
				+
			
 
				+        Args:
			
 
				+            result: API返回的原始结果字典（会直接修改）
			
 
				+        """
			
 
				+        # 获取帖子列表
			
 
				+        data_wrapper = result.get("data", {})
			
 
				+        notes = data_wrapper.get("data", [])
			
 
				+
			
 
				+        # 限制为前10个帖子
			
 
				+        if len(notes) > 10:
			
 
				+            notes = notes[:10]
			
 
				+            data_wrapper["data"] = notes
			
 
				+            logger.info(f"  限制搜索结果为前10个帖子")
			
 
				+
			
 
				+        for note in notes:
			
 
				+            note_card = note.get("note_card", {})
			
 
				+            image_list_raw = note_card.get("image_list", [])
			
 
				+
			
 
				+            # 提取 URL 字符串
			
 
				+            image_list = []
			
 
				+            for img in image_list_raw:
			
 
				+                if isinstance(img, dict) and "image_url" in img:
			
 
				+                    image_list.append(img["image_url"])
			
 
				+                elif isinstance(img, str):
			
 
				+                    # 如果已经是字符串，直接使用
			
 
				+                    image_list.append(img)
			
 
				+
			
 
				+            # 更新为预处理后的列表
			
 
				+            note_card["image_list"] = image_list
			
 
				+
			
 
				+    def save_result(self, keyword: str, result: Dict[str, Any], page: int = 1) -> str:
			
 
				+        """
			
 
				+        保存结果到文件
			
 
				+        目录结构: results/xiaohongshu_search/关键词/时间戳_page{页码}.json
			
 
				+
			
 
				+        Args:
			
 
				+            keyword: 搜索关键词
			
 
				+            result: API返回的结果
			
 
				+            page: 页码
			
 
				+
			
 
				+        Returns:
			
 
				+            保存的文件路径
			
 
				+        """
			
 
				+        # 创建目录结构: results/xiaohongshu_search/关键词/
			
 
				+        result_dir = os.path.join(self.results_base_dir, "xiaohongshu_search", keyword)
			
 
				+        os.makedirs(result_dir, exist_ok=True)
			
 
				+
			
 
				+        # 文件名使用时间戳和页码
			
 
				+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
			
 
				+        filename = f"{timestamp}_page{page}.json"
			
 
				+        filepath = os.path.join(result_dir, filename)
			
 
				+
			
 
				+        # 保存结果
			
 
				+        with open(filepath, 'w', encoding='utf-8') as f:
			
 
				+            json.dump(result, f, ensure_ascii=False, indent=2)
			
 
				+
			
 
				+        return filepath
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    """示例使用"""
			
 
				+    # 解析命令行参数
			
 
				+    parser = argparse.ArgumentParser(description='小红书笔记搜索工具')
			
 
				+    parser.add_argument(
			
 
				+        '--results-dir',
			
 
				+        type=str,
			
 
				+        default='data/search',
			
 
				+        help='结果输出目录 (默认: data/search)'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--keyword',
			
 
				+        type=str,
			
 
				+        required=True,
			
 
				+        help='搜索关键词 (必填)'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--content-type',
			
 
				+        type=str,
			
 
				+        default='不限',
			
 
				+        choices=['不限', '视频', '图文'],
			
 
				+        help='内容类型 (默认: 不限)'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--sort-type',
			
 
				+        type=str,
			
 
				+        default='综合',
			
 
				+        choices=['综合', '最新', '最多点赞', '最多评论'],
			
 
				+        help='排序方式 (默认: 综合)'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--publish-time',
			
 
				+        type=str,
			
 
				+        default='不限',
			
 
				+        choices=['不限', '一天内', '一周内', '半年内'],
			
 
				+        help='发布时间筛选 (默认: 不限)'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--cursor',
			
 
				+        type=str,
			
 
				+        default='',
			
 
				+        help='翻页游标 (默认为空，即第一页)'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--page',
			
 
				+        type=int,
			
 
				+        default=1,
			
 
				+        help='页码标识，用于保存文件名 (默认: 1)'
			
 
				+    )
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    # 创建API客户端实例
			
 
				+    client = XiaohongshuSearch(results_dir=args.results_dir)
			
 
				+
			
 
				+    # 执行搜索并保存
			
 
				+    try:
			
 
				+        result = client.search(
			
 
				+            args.keyword,
			
 
				+            args.content_type,
			
 
				+            args.sort_type,
			
 
				+            args.publish_time,
			
 
				+            args.cursor
			
 
				+        )
			
 
				+        filepath = client.save_result(args.keyword, result, args.page)
			
 
				+        print(f"Output: {filepath}")
			
 
				+    except Exception as e:
			
 
				+        print(f"Error: {e}", file=__import__('sys').stderr)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/src/evaluators/__init__.py
+++ b/src/evaluators/__init__.py
--- a/src/evaluators/llm_evaluator.py
+++ b/src/evaluators/llm_evaluator.py
@@ -292,22 +292,14 @@ class LLMEvaluator:
 
				 
			
 
				         prompt = f"""# 角色
			
 
				 你是一个专业的搜索query生成专家。你的任务是根据输入信息，生成最优的搜索query组合。
			
 
				+# 核心规则（必须严格遵守）
			
 
				+- 目标动机严格隔离，仅用于最终匹配度评估，Query生成过程中不得使用目标动机原文
			
 
				+- query构成：仅由"中心词（如果有）+待选词的完整、未拆分形式"直接组成，严禁对原始词汇进行任何形式的增、删、改、拆分或重组，包括但不限于将一个词拆分成多个部分进行组合，或将多个词的部分内容进行拼接。
			
 
				+- 单个query结构：2-4个词，考虑词的前后顺序
			
 
				 
			
 
				-## 核心规则（必须严格遵守）
			
 
				-1. **绝对禁止**：
			
 
				-   - 目标动机严格隔离，仅用于最终匹配度评估，Query生成过程中不得使用目标动机原文
			
 
				-   - Query中不得包含动机词汇（如"如何"、"方法"、"技巧"、"教程"等意图词）
			
 
				-   - 所有分析基于真实信息，不可假设推导
			
 
				-
			
 
				-2. **query构成**：仅由"中心词（如果有）+待选词"直接组成，无额外信息
			
 
				-3. **query结构**：2-4个词，考虑前后顺序，无相似或语义重叠的query
			
 
				-4. **输出数量**：生成1-4条备选query
			
 
				-搜
			
 
				-# 输入格式
			
 
				-目标特征：{original_feature}
			
 
				+# 输入
			
 
				 中心词：{base_word}
			
 
				 待选词：{candidate_words_str}
			
 
				-
			
 
				 注：带权重的词用括号标注权重值，无权重或权重为0则平权
			
 
				 
			
 
				 # query生成流程
			
@@ -325,8 +317,8 @@ class LLMEvaluator:
 
				 
			
 
				 **关联性分级：**
			
 
				 - **强关联（0.7-1.0）**：两词在语义上紧密配合，常在同一场景共现，组合后形成完整概念
			
 
				-- **中关联（0.4-0.69）**：两词有明确关联但不强制共现，组合后有一定语义增益
			
 
				-- **无关联（0.0-0.39）**：两词无明显语义关联，组合无意义
			
 
				+- **中关联（0.3-0.69）**：两词有明确关联但不强制共现，组合后有一定语义增益
			
 
				+- **弱关联（0.0-0.29）**：两词无明显语义关联，组合无意义
			
 
				 
			
 
				 ## 第三步：互补性分析
			
 
				 
			
@@ -335,11 +327,16 @@ class LLMEvaluator:
 
				 **互补性分级：**
			
 
				 - **强互补**：两词描述不同维度，组合后语义更完整（如：主体+场景、形式+内容）
			
 
				 - **弱互补**：两词有差异但语义部分重叠
			
 
				-- **语义重叠**：两词描述同一维度，组合无新增价值（避免）
			
 
				+- **语义重叠**：两词描述同一维度，组合无新增价值
			
 
				+
			
 
				+**语义重叠的判定标准：**
			
 
				+- **重叠度>70%**：确实重复，应避免
			
 
				+- **重叠度40-70%**：有差异，允许共存
			
 
				+- **重叠度<40%**：互补，优先保留
			
 
				 
			
 
				 **常见互补维度组合：**
			
 
				 - 主体+场景
			
 
				-- 形式+内容
			
 
				+- 形式+内容  
			
 
				 - 内容+应用方式
			
 
				 - 载体+场景+情绪
			
 
				 
			
@@ -349,35 +346,52 @@ class LLMEvaluator:
 
				 - 与中心词（或其他待选词）的关联强度
			
 
				 - 原始权重高低
			
 
				 - 互补性强弱
			
 
				+- 角度独特性（是否覆盖不同语义维度）
			
 
				 
			
 
				 **排序原则：**
			
 
				-强关联+高权重+强互补 > 强关联+无权重+强互补 > 中关联+高权重
			
 
				+强关联+高权重+强互补 > 强关联+无权重+强互补 > 中关联+高权重+独特角度 > 中关联+强互补
			
 
				+
			
 
				+## 第五步：生成query
			
 
				 
			
 
				-## 第五步：组合生成query
			
 
				+**整体query生成规则（确保从不同优先级和角度生成query）：每种形式可生成1-2个query
			
 
				+- 强关联+强互补（核心query，最精准），
			
 
				+- 强关联+弱互补或中关联+强互补（扩展query，覆盖相关内容）
			
 
				+- 中关联+弱互补但角度独特（覆盖query，探索边缘相关内容）
			
 
				+- 创新组合或探索性query（低关联但可能发现意外相关内容）
			
 
				 
			
 
				 **组合策略：**
			
 
				 
			
 
				 **如果有中心词：**
			
 
				-1. 中心词 + 强关联且强互补的待选词（1-2个）
			
 
				-2. 中心词 + 强关联但弱互补的待选词（1-2个）
			
 
				-3. 仅用待选词组合（当纯待选词组合语义更完整时）
			
 
				+1. 中心词 + 强关联且强互补的待选词（1-2个词）
			
 
				+2. 中心词 + 强关联但弱互补的待选词（1-2个词）
			
 
				+3. 中心词 + 中关联但角度独特的待选词（1-2个词）
			
 
				+4. 仅用待选词组合（当纯待选词组合语义更完整时）
			
 
				 
			
 
				 **如果无中心词：**
			
 
				 1. 2-3个强关联且强互补的待选词组合
			
 
				-2. 1个核心词 + 1-2个中关联但强互补词
			
 
				+2. 1个核心词 + 1-2个中关联但强互补或角度独特的词
			
 
				+3. 探索性组合：关联度中等但可能产生新视角的词组合
			
 
				 
			
 
				 **组合规则：**
			
 
				-- 同一语义维度只保留1个最优词
			
 
				+- 同一语义维度可保留2个有明显差异的词组合
			
 
				 - 优先选择互补性强的词组合
			
 
				-- 构成词数控制在2-3个
			
 
				+- 构成单个query的词数控制在2-3个
			
 
				 - 考虑词的前后顺序（词定语在前，核心名词在后；场景词在前，实体词在后）
			
 
				+- **query数量控制在3-8个**，在保证质量前提下尽可能生成更多不同角度的query
			
 
				+- 即使query在语义上有轻微重叠（重叠度40-70%），只要切入角度不同也应保留
			
 
				+
			
 
				+**多样性要求：**
			
 
				+- 从不同语义维度生成query（形式、场景、内容、情感、应用等）
			
 
				+- 确保各层次query都有代表，覆盖从核心到边缘的搜索空间
			
 
				+- 允许部分query探索性地组合中关联词，以发现潜在相关内容
			
 
				+
			
 
				 **组合理由：**
			
 
				-说明为什么选择这些词组合，词与词之间如何协同工作，形成什么样的搜索语义场
			
 
				+说明为什么选择这些词组合，词与词之间如何协同工作，形成什么样的搜索语义场，属于哪个生成层次
			
 
				 
			
 
				 ## 第六步：query与目标动机匹配度评估
			
 
				 **重要说明：** 只有在query生成完成后，才将query与目标动机进行匹配度评估
			
 
				 **匹配分含义：**
			
 
				-匹配分 = 此query能找到目标动机所需内容的概率（0-1之间）
			
 
				+匹配分 = 此query语意扩展能找到目标动机所需内容的概率（0-1之间）
			
 
				 
			
 
				 **评分标准：**
			
 
				 - **0.8-1.0分**：query在语意上与目标强关联，能精准召回目标动机所需内容，覆盖核心要素
			
@@ -402,22 +416,28 @@ class LLMEvaluator:
 
				 {{
			
 
				   "queries": [
			
 
				     {{
			
 
				-      "query": "查询词",
			
 
				-      "中心词": "{base_word}",
			
 
				-      "组合理由": "query词组合理由的详细说明，深度解释该query与目标及中心词的逻辑关联。目标特征的核心诉求是什么，基于这个诉求，选择了哪些词，为什么这些词最相关（说明权重、语义覆盖等原因）这些词如何协同工作，形成什么样的搜索语义场，词与词之间有什么语义延展关系，这个query预期能召回什么类型的内容，为什么能找到目标",
			
 
				+      "query": "query内容",
			
 
				+      "组合理由": "query词组合理由的详细说明，深度解释该query与中心词的逻辑关联。选择了哪些词，为什么这些词最相关（说明权重、语义覆盖、关联强度、互补性等原因），这些词如何协同工作，形成什么样的搜索语义场，词与词之间有什么语义延展关系，这个query预期能召回什么类型的内容",
			
 
				       "与目标匹配分": 0.85,
			
 
				-      "source_word ": "来源词，待选词和中心词组合"
			
 
				+      "匹配分理由": "目标特征的核心诉求是什么，基于这个诉求，该query为什么能找到目标，query的语义场如何与目标动机产生关联，为什么能/不能召回目标所需内容",
			
 
				+      "source_word": "产生这个query的来源词，待选词和中心词组合,多个组合空格分隔"
			
 
				     }}
			
 
				   ]
			
 
				 }}
			
 
				 
			
 
				-
			
 
				 **关键点：**
			
 
				 1. query生成阶段：只考虑词与词之间的语义关联和互补性
			
 
				 2. 匹配评估阶段：才将生成的query与目标动机进行匹配度分析
			
 
				 3. 目标动机不参与query生成，仅用于最终评估
			
 
				-
			
 
				-注意：只返回JSON，不要其他内容。"""
			
 
				+4. 通过分层生成确保query数量充足且覆盖不同优先级
			
 
				+
			
 
				+**source_word规则**（重要）:
			
 
				+1. 格式：空格分隔的词汇
			
 
				+2. 来源：**必须且只能**从"中心词 + 待选词"中提取
			
 
				+3. 提取规则：该query实际使用到的所有原始词汇
			
 
				+4. 禁止：同义替换、添加新词
			
 
				+5. 必须包含：中心词（如果query中使用了中心词）
			
 
				+"""
			
 
				 
			
 
				         # 调用 LLM
			
 
				         llm_results = self.client.chat_json(prompt=prompt, max_retries=3)
			
--- a/src/models/__init__.py
+++ b/src/models/__init__.py
@@ -0,0 +1,72 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+数据模型模块
			
 
				+"""
			
 
				+
			
 
				+from .post import Post, TopicPoint, PersonaMatch
			
 
				+from .candidate import (
			
 
				+    Candidate,
			
 
				+    CandidateSource,
			
 
				+    TopicPointCandidates,
			
 
				+    PostCandidates
			
 
				+)
			
 
				+from .query import (
			
 
				+    SearchQuery,
			
 
				+    TopicPointQueries,
			
 
				+    PostQueries
			
 
				+)
			
 
				+from .evaluation import (
			
 
				+    NoteEvaluation,
			
 
				+    QueryEvaluation,
			
 
				+    PostEvaluation
			
 
				+)
			
 
				+from .deconstruction import (
			
 
				+    DeconstructedFeature,
			
 
				+    DeconstructionResult,
			
 
				+    PostDeconstruction
			
 
				+)
			
 
				+from .similarity import (
			
 
				+    SimilarFeature,
			
 
				+    SimilarityStatistics,
			
 
				+    ComprehensiveScoreDetail,
			
 
				+    SimilarityScore,
			
 
				+    OverallSimilarityStatistics,
			
 
				+    PostSimilarityScores
			
 
				+)
			
 
				+
			
 
				+__all__ = [
			
 
				+    # Post模型
			
 
				+    'Post',
			
 
				+    'TopicPoint',
			
 
				+    'PersonaMatch',
			
 
				+
			
 
				+    # Candidate模型
			
 
				+    'Candidate',
			
 
				+    'CandidateSource',
			
 
				+    'TopicPointCandidates',
			
 
				+    'PostCandidates',
			
 
				+
			
 
				+    # Query模型
			
 
				+    'SearchQuery',
			
 
				+    'TopicPointQueries',
			
 
				+    'PostQueries',
			
 
				+
			
 
				+    # Evaluation模型
			
 
				+    'NoteEvaluation',
			
 
				+    'QueryEvaluation',
			
 
				+    'PostEvaluation',
			
 
				+
			
 
				+    # Deconstruction模型
			
 
				+    'DeconstructedFeature',
			
 
				+    'DeconstructionResult',
			
 
				+    'PostDeconstruction',
			
 
				+
			
 
				+    # Similarity模型
			
 
				+    'SimilarFeature',
			
 
				+    'SimilarityStatistics',
			
 
				+    'ComprehensiveScoreDetail',
			
 
				+    'SimilarityScore',
			
 
				+    'OverallSimilarityStatistics',
			
 
				+    'PostSimilarityScores',
			
 
				+]
			
--- a/src/models/candidate.py
+++ b/src/models/candidate.py
@@ -0,0 +1,174 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+候选词数据模型
			
 
				+"""
			
 
				+
			
 
				+from dataclasses import dataclass, field
			
 
				+from typing import List, Dict, Any, Optional
			
 
				+from enum import Enum
			
 
				+
			
 
				+
			
 
				+class CandidateSource(Enum):
			
 
				+    """候选词来源"""
			
 
				+    GLOBAL = "global"                   # 全局提取（整个how文件）
			
 
				+    CURRENT_POST = "current_post"       # 当前帖子
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class Candidate:
			
 
				+    """候选词模型"""
			
 
				+
			
 
				+    # 基本信息
			
 
				+    persona_feature_name: str           # 人设特征名称
			
 
				+    similarity: float                   # 相似度得分
			
 
				+
			
 
				+    # 来源信息（关键！）
			
 
				+    source: CandidateSource             # 来源：global / current_post
			
 
				+    source_topic_point: Optional[str] = None  # 如果来自当前帖子，记录来源选题点名称
			
 
				+
			
 
				+    # 元数据
			
 
				+    feature_type: str = ""              # 特征类型：标签/分类
			
 
				+    feature_level: str = ""             # 人设特征层级
			
 
				+    classification_path: str = ""        # 分类路径（用/分隔）
			
 
				+    match_reason: str = ""              # 匹配说明
			
 
				+
			
 
				+    def to_dict(self) -> Dict[str, Any]:
			
 
				+        """转为字典（保存时使用）"""
			
 
				+        return {
			
 
				+            "persona_feature_name": self.persona_feature_name,
			
 
				+            "similarity": self.similarity,
			
 
				+            "source": self.source.value,
			
 
				+            "source_topic_point": self.source_topic_point,
			
 
				+            "feature_type": self.feature_type,
			
 
				+            "feature_level": self.feature_level,
			
 
				+            "classification_path": self.classification_path,
			
 
				+            "match_reason": self.match_reason
			
 
				+        }
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def from_dict(data: Dict[str, Any]) -> 'Candidate':
			
 
				+        """从字典创建"""
			
 
				+        source_str = data.get('source', 'global')
			
 
				+        source = CandidateSource(source_str) if source_str else CandidateSource.GLOBAL
			
 
				+
			
 
				+        return Candidate(
			
 
				+            persona_feature_name=data.get('persona_feature_name', ''),
			
 
				+            similarity=data.get('similarity', 0.0),
			
 
				+            source=source,
			
 
				+            source_topic_point=data.get('source_topic_point'),
			
 
				+            feature_type=data.get('feature_type', ''),
			
 
				+            feature_level=data.get('feature_level', ''),
			
 
				+            classification_path=data.get('classification_path', ''),
			
 
				+            match_reason=data.get('match_reason', '')
			
 
				+        )
			
 
				+
			
 
				+    def get_source_label(self) -> str:
			
 
				+        """获取来源标签（用于展示）"""
			
 
				+        return "当前帖子" if self.source == CandidateSource.CURRENT_POST else "全局"
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class TopicPointCandidates:
			
 
				+    """选题点的候选词集合"""
			
 
				+    topic_point_name: str               # 选题点名称
			
 
				+    topic_point_level: str              # 选题点层级
			
 
				+    topic_point_description: str = ""   # 选题点描述
			
 
				+    candidates: List[Candidate] = field(default_factory=list)  # 候选词列表
			
 
				+
			
 
				+    def to_dict(self) -> Dict[str, Any]:
			
 
				+        """转为字典"""
			
 
				+        return {
			
 
				+            "topic_point_name": self.topic_point_name,
			
 
				+            "topic_point_level": self.topic_point_level,
			
 
				+            "topic_point_description": self.topic_point_description,
			
 
				+            "candidates": [c.to_dict() for c in self.candidates],
			
 
				+            "statistics": {
			
 
				+                "total_candidates": len(self.candidates),
			
 
				+                "from_current_post": sum(
			
 
				+                    1 for c in self.candidates
			
 
				+                    if c.source == CandidateSource.CURRENT_POST
			
 
				+                ),
			
 
				+                "from_global": sum(
			
 
				+                    1 for c in self.candidates
			
 
				+                    if c.source == CandidateSource.GLOBAL
			
 
				+                )
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def from_dict(data: Dict[str, Any]) -> 'TopicPointCandidates':
			
 
				+        """从字典创建"""
			
 
				+        candidates = [
			
 
				+            Candidate.from_dict(c)
			
 
				+            for c in data.get('candidates', [])
			
 
				+        ]
			
 
				+
			
 
				+        return TopicPointCandidates(
			
 
				+            topic_point_name=data.get('topic_point_name', ''),
			
 
				+            topic_point_level=data.get('topic_point_level', ''),
			
 
				+            topic_point_description=data.get('topic_point_description', ''),
			
 
				+            candidates=candidates
			
 
				+        )
			
 
				+
			
 
				+    def get_top_n(self, n: int = 20) -> List[Candidate]:
			
 
				+        """获取Top N个候选词（按相似度降序）"""
			
 
				+        return sorted(
			
 
				+            self.candidates,
			
 
				+            key=lambda c: c.similarity,
			
 
				+            reverse=True
			
 
				+        )[:n]
			
 
				+
			
 
				+    def filter_by_source(self, source: CandidateSource) -> List[Candidate]:
			
 
				+        """按来源过滤"""
			
 
				+        return [c for c in self.candidates if c.source == source]
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class PostCandidates:
			
 
				+    """帖子的所有候选词"""
			
 
				+    post_id: str                        # 帖子ID
			
 
				+    topic_points_candidates: List[TopicPointCandidates] = field(default_factory=list)
			
 
				+
			
 
				+    def to_dict(self) -> Dict[str, Any]:
			
 
				+        """转为字典"""
			
 
				+        total_candidates = sum(len(tpc.candidates) for tpc in self.topic_points_candidates)
			
 
				+        total_from_current = sum(
			
 
				+            len(tpc.filter_by_source(CandidateSource.CURRENT_POST))
			
 
				+            for tpc in self.topic_points_candidates
			
 
				+        )
			
 
				+        total_from_global = sum(
			
 
				+            len(tpc.filter_by_source(CandidateSource.GLOBAL))
			
 
				+            for tpc in self.topic_points_candidates
			
 
				+        )
			
 
				+
			
 
				+        return {
			
 
				+            "post_id": self.post_id,
			
 
				+            "topic_points_candidates": [tpc.to_dict() for tpc in self.topic_points_candidates],
			
 
				+            "statistics": {
			
 
				+                "total_topic_points": len(self.topic_points_candidates),
			
 
				+                "total_candidates": total_candidates,
			
 
				+                "from_current_post": total_from_current,
			
 
				+                "from_global": total_from_global
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def from_dict(data: Dict[str, Any]) -> 'PostCandidates':
			
 
				+        """从字典创建"""
			
 
				+        tpc_list = [
			
 
				+            TopicPointCandidates.from_dict(tpc)
			
 
				+            for tpc in data.get('topic_points_candidates', [])
			
 
				+        ]
			
 
				+
			
 
				+        return PostCandidates(
			
 
				+            post_id=data.get('post_id', ''),
			
 
				+            topic_points_candidates=tpc_list
			
 
				+        )
			
 
				+
			
 
				+    def get_topic_point_candidates(self, topic_point_name: str) -> Optional[TopicPointCandidates]:
			
 
				+        """获取指定选题点的候选词"""
			
 
				+        for tpc in self.topic_points_candidates:
			
 
				+            if tpc.topic_point_name == topic_point_name:
			
 
				+                return tpc
			
 
				+        return None
			
--- a/src/models/deconstruction.py
+++ b/src/models/deconstruction.py
@@ -0,0 +1,184 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+解构分析数据模型
			
 
				+"""
			
 
				+
			
 
				+from dataclasses import dataclass, field, asdict
			
 
				+from typing import List, Dict, Any, Optional
			
 
				+from datetime import datetime
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class DeconstructedFeature:
			
 
				+    """解构特征"""
			
 
				+    feature_name: str                    # 特征名称
			
 
				+    dimension: str                       # 维度 (灵感点-全新内容/灵感点-共性差异/灵感点-共性内容/目的点/关键点)
			
 
				+    dimension_detail: str                # 维度细分 (实质/形式/意图等)
			
 
				+    weight: float                        # 权重
			
 
				+    source_index: int                    # 在该维度中的索引
			
 
				+    source_info: Dict[str, Any] = field(default_factory=dict)  # 溯源信息
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class DeconstructionResult:
			
 
				+    """单个帖子的解构结果"""
			
 
				+    note_id: str                         # 帖子ID
			
 
				+    search_word: str                     # 搜索词
			
 
				+    original_feature: str                # 原始特征
			
 
				+    source_word: str                     # 来源词
			
 
				+    evaluation_score: float              # 评估得分
			
 
				+    evaluation_type: str                 # 匹配类型
			
 
				+    evaluation_confidence: str           # 置信度
			
 
				+    key_matching_points: List[str]       # 关键匹配点
			
 
				+
			
 
				+    # 解构特征
			
 
				+    inspiration_features: List[DeconstructedFeature] = field(default_factory=list)  # 灵感点特征
			
 
				+    purpose_features: List[DeconstructedFeature] = field(default_factory=list)      # 目的点特征
			
 
				+    key_point_features: List[DeconstructedFeature] = field(default_factory=list)    # 关键点特征
			
 
				+
			
 
				+    # 帖子数据
			
 
				+    note_data: Dict[str, Any] = field(default_factory=dict)  # 帖子信息 (title, author, link)
			
 
				+
			
 
				+    # API响应
			
 
				+    api_request: Dict[str, Any] = field(default_factory=dict)   # API请求
			
 
				+    api_response: Dict[str, Any] = field(default_factory=dict)  # API响应
			
 
				+
			
 
				+    # 元数据
			
 
				+    processed_at: str = ""                # 处理时间
			
 
				+    processing_time_ms: float = 0.0       # 处理耗时(毫秒)
			
 
				+
			
 
				+    def to_dict(self) -> Dict[str, Any]:
			
 
				+        """转换为字典"""
			
 
				+        return asdict(self)
			
 
				+
			
 
				+    @property
			
 
				+    def all_features(self) -> List[DeconstructedFeature]:
			
 
				+        """获取所有特征"""
			
 
				+        return (
			
 
				+            self.inspiration_features +
			
 
				+            self.purpose_features +
			
 
				+            self.key_point_features
			
 
				+        )
			
 
				+
			
 
				+    @property
			
 
				+    def feature_count(self) -> int:
			
 
				+        """特征总数"""
			
 
				+        return len(self.all_features)
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class PostDeconstruction:
			
 
				+    """帖子解构结果集合"""
			
 
				+    post_id: str                                              # 帖子ID
			
 
				+    deconstruction_results: List[DeconstructionResult]        # 解构结果列表
			
 
				+
			
 
				+    # 元数据
			
 
				+    total_matched_notes: int = 0                              # 总匹配帖子数
			
 
				+    processed_notes: int = 0                                  # 已处理帖子数
			
 
				+    skipped_notes: int = 0                                    # 跳过帖子数
			
 
				+    success_count: int = 0                                    # 成功数
			
 
				+    failed_count: int = 0                                     # 失败数
			
 
				+
			
 
				+    # 配置参数
			
 
				+    api_url: str = ""                                         # API地址
			
 
				+    min_score_threshold: float = 0.0                          # 最低分数阈值
			
 
				+    sort_by: str = "score"                                    # 排序方式
			
 
				+    target_features: Optional[List[str]] = None               # 目标特征列表
			
 
				+
			
 
				+    # 时间信息
			
 
				+    created_at: str = ""                                      # 创建时间
			
 
				+    processing_time_seconds: float = 0.0                      # 处理耗时(秒)
			
 
				+
			
 
				+    def to_dict(self) -> Dict[str, Any]:
			
 
				+        """转换为字典"""
			
 
				+        return {
			
 
				+            'metadata': {
			
 
				+                'stage': 'deconstruction',
			
 
				+                'description': '完全匹配帖子的深度解构分析',
			
 
				+                'post_id': self.post_id,
			
 
				+                'target_features': self.target_features if self.target_features else '全部',
			
 
				+                'total_matched_notes': self.total_matched_notes,
			
 
				+                'processed_notes': self.processed_notes,
			
 
				+                'skipped_notes': self.skipped_notes,
			
 
				+                'success_count': self.success_count,
			
 
				+                'failed_count': self.failed_count,
			
 
				+                'api_url': self.api_url,
			
 
				+                'min_score_threshold': self.min_score_threshold,
			
 
				+                'sort_by': self.sort_by,
			
 
				+                'created_at': self.created_at or datetime.now().isoformat(),
			
 
				+                'processing_time_seconds': round(self.processing_time_seconds, 2)
			
 
				+            },
			
 
				+            'results': [r.to_dict() for r in self.deconstruction_results]
			
 
				+        }
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_json_file(cls, file_path: str) -> 'PostDeconstruction':
			
 
				+        """从JSON文件加载"""
			
 
				+        import json
			
 
				+        with open(file_path, 'r', encoding='utf-8') as f:
			
 
				+            data = json.load(f)
			
 
				+
			
 
				+        metadata = data['metadata']
			
 
				+        results_data = data['results']
			
 
				+
			
 
				+        # 重建DeconstructionResult对象
			
 
				+        results = []
			
 
				+        for r in results_data:
			
 
				+            # 重建特征列表
			
 
				+            inspiration_features = [
			
 
				+                DeconstructedFeature(**f) for f in r.get('inspiration_features', [])
			
 
				+            ]
			
 
				+            purpose_features = [
			
 
				+                DeconstructedFeature(**f) for f in r.get('purpose_features', [])
			
 
				+            ]
			
 
				+            key_point_features = [
			
 
				+                DeconstructedFeature(**f) for f in r.get('key_point_features', [])
			
 
				+            ]
			
 
				+
			
 
				+            result = DeconstructionResult(
			
 
				+                note_id=r['note_id'],
			
 
				+                search_word=r['search_word'],
			
 
				+                original_feature=r['original_feature'],
			
 
				+                source_word=r['source_word'],
			
 
				+                evaluation_score=r['evaluation_score'],
			
 
				+                evaluation_type=r['evaluation_type'],
			
 
				+                evaluation_confidence=r['evaluation_confidence'],
			
 
				+                key_matching_points=r['key_matching_points'],
			
 
				+                inspiration_features=inspiration_features,
			
 
				+                purpose_features=purpose_features,
			
 
				+                key_point_features=key_point_features,
			
 
				+                note_data=r['note_data'],
			
 
				+                api_request=r['api_request'],
			
 
				+                api_response=r['api_response'],
			
 
				+                processed_at=r['processed_at'],
			
 
				+                processing_time_ms=r['processing_time_ms']
			
 
				+            )
			
 
				+            results.append(result)
			
 
				+
			
 
				+        return cls(
			
 
				+            post_id=metadata['post_id'],
			
 
				+            deconstruction_results=results,
			
 
				+            total_matched_notes=metadata['total_matched_notes'],
			
 
				+            processed_notes=metadata['processed_notes'],
			
 
				+            skipped_notes=metadata['skipped_notes'],
			
 
				+            success_count=metadata['success_count'],
			
 
				+            failed_count=metadata['failed_count'],
			
 
				+            api_url=metadata['api_url'],
			
 
				+            min_score_threshold=metadata['min_score_threshold'],
			
 
				+            sort_by=metadata['sort_by'],
			
 
				+            target_features=metadata.get('target_features'),
			
 
				+            created_at=metadata['created_at'],
			
 
				+            processing_time_seconds=metadata['processing_time_seconds']
			
 
				+        )
			
 
				+
			
 
				+    def get_statistics(self) -> Dict[str, Any]:
			
 
				+        """获取统计信息"""
			
 
				+        total_features = sum(r.feature_count for r in self.deconstruction_results)
			
 
				+
			
 
				+        return {
			
 
				+            'total_notes': len(self.deconstruction_results),
			
 
				+            'total_features': total_features,
			
 
				+            'avg_features_per_note': round(total_features / len(self.deconstruction_results), 1) if self.deconstruction_results else 0,
			
 
				+            'success_rate': round(self.success_count / self.processed_notes, 3) if self.processed_notes > 0 else 0
			
 
				+        }
			
--- a/src/models/evaluation.py
+++ b/src/models/evaluation.py
@@ -0,0 +1,170 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+评估结果数据模型
			
 
				+"""
			
 
				+
			
 
				+from dataclasses import dataclass, field
			
 
				+from typing import List, Dict, Any, Optional
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class NoteEvaluation:
			
 
				+    """单个帖子的评估结果"""
			
 
				+    note_id: str                        # 帖子ID
			
 
				+    channel_content_id: str             # 频道内容ID
			
 
				+    title: str = ""                     # 标题
			
 
				+    body_text: str = ""                 # 正文
			
 
				+
			
 
				+    # 第一层：Query相关性
			
 
				+    query_relevance: str = ""           # "相关" or "不相关"
			
 
				+    query_relevance_reason: str = ""    # 判断理由
			
 
				+
			
 
				+    # 第二层：特征匹配度
			
 
				+    match_level: str = ""               # 完全匹配/相似匹配/弱相似/无匹配
			
 
				+    match_score: float = 0.0            # 综合得分
			
 
				+    match_reason: str = ""              # 匹配理由
			
 
				+
			
 
				+    # 详细得分
			
 
				+    score_details: Optional[Dict[str, Any]] = None
			
 
				+
			
 
				+    def to_dict(self) -> Dict[str, Any]:
			
 
				+        """转为字典"""
			
 
				+        return {
			
 
				+            "note_id": self.note_id,
			
 
				+            "channel_content_id": self.channel_content_id,
			
 
				+            "title": self.title,
			
 
				+            "body_text": self.body_text,
			
 
				+            "query_relevance": self.query_relevance,
			
 
				+            "query_relevance_reason": self.query_relevance_reason,
			
 
				+            "match_level": self.match_level,
			
 
				+            "match_score": self.match_score,
			
 
				+            "match_reason": self.match_reason,
			
 
				+            "score_details": self.score_details
			
 
				+        }
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def from_dict(data: Dict[str, Any]) -> 'NoteEvaluation':
			
 
				+        """从字典创建"""
			
 
				+        return NoteEvaluation(
			
 
				+            note_id=data.get('note_id', ''),
			
 
				+            channel_content_id=data.get('channel_content_id', ''),
			
 
				+            title=data.get('title', ''),
			
 
				+            body_text=data.get('body_text', ''),
			
 
				+            query_relevance=data.get('query_relevance', ''),
			
 
				+            query_relevance_reason=data.get('query_relevance_reason', ''),
			
 
				+            match_level=data.get('match_level', ''),
			
 
				+            match_score=data.get('match_score', 0.0),
			
 
				+            match_reason=data.get('match_reason', ''),
			
 
				+            score_details=data.get('score_details')
			
 
				+        )
			
 
				+
			
 
				+    def is_relevant_to_query(self) -> bool:
			
 
				+        """是否与Query相关"""
			
 
				+        return self.query_relevance == "相关"
			
 
				+
			
 
				+    def is_high_match(self) -> bool:
			
 
				+        """是否高匹配（完全匹配）"""
			
 
				+        return self.match_level == "完全匹配" or self.match_score >= 0.8
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class QueryEvaluation:
			
 
				+    """Query的评估结果"""
			
 
				+    query_text: str                     # Query文本
			
 
				+    topic_point_name: str               # 所属选题点
			
 
				+
			
 
				+    # 统计信息
			
 
				+    total_notes: int = 0                # 总帖子数
			
 
				+    filtered_count: int = 0             # 第一层过滤掉的数量
			
 
				+    evaluated_count: int = 0            # 第二层评估的数量
			
 
				+
			
 
				+    # 匹配度分布
			
 
				+    match_distribution: Dict[str, int] = field(default_factory=dict)
			
 
				+
			
 
				+    # 详细评估结果
			
 
				+    notes_evaluation: List[NoteEvaluation] = field(default_factory=list)
			
 
				+
			
 
				+    def to_dict(self) -> Dict[str, Any]:
			
 
				+        """转为字典"""
			
 
				+        return {
			
 
				+            "query_text": self.query_text,
			
 
				+            "topic_point_name": self.topic_point_name,
			
 
				+            "total_notes": self.total_notes,
			
 
				+            "filtered_count": self.filtered_count,
			
 
				+            "evaluated_count": self.evaluated_count,
			
 
				+            "match_distribution": self.match_distribution,
			
 
				+            "notes_evaluation": [ne.to_dict() for ne in self.notes_evaluation]
			
 
				+        }
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def from_dict(data: Dict[str, Any]) -> 'QueryEvaluation':
			
 
				+        """从字典创建"""
			
 
				+        notes_eval = [
			
 
				+            NoteEvaluation.from_dict(ne)
			
 
				+            for ne in data.get('notes_evaluation', [])
			
 
				+        ]
			
 
				+
			
 
				+        return QueryEvaluation(
			
 
				+            query_text=data.get('query_text', ''),
			
 
				+            topic_point_name=data.get('topic_point_name', ''),
			
 
				+            total_notes=data.get('total_notes', 0),
			
 
				+            filtered_count=data.get('filtered_count', 0),
			
 
				+            evaluated_count=data.get('evaluated_count', 0),
			
 
				+            match_distribution=data.get('match_distribution', {}),
			
 
				+            notes_evaluation=notes_eval
			
 
				+        )
			
 
				+
			
 
				+    def get_high_match_notes(self) -> List[NoteEvaluation]:
			
 
				+        """获取高匹配的帖子"""
			
 
				+        return [ne for ne in self.notes_evaluation if ne.is_high_match()]
			
 
				+
			
 
				+    def get_relevant_notes(self) -> List[NoteEvaluation]:
			
 
				+        """获取与Query相关的帖子"""
			
 
				+        return [ne for ne in self.notes_evaluation if ne.is_relevant_to_query()]
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class PostEvaluation:
			
 
				+    """帖子的所有评估结果"""
			
 
				+    post_id: str                        # 帖子ID
			
 
				+    query_evaluations: List[QueryEvaluation] = field(default_factory=list)
			
 
				+
			
 
				+    def to_dict(self) -> Dict[str, Any]:
			
 
				+        """转为字典"""
			
 
				+        total_queries = len(self.query_evaluations)
			
 
				+        total_notes_evaluated = sum(qe.evaluated_count for qe in self.query_evaluations)
			
 
				+        total_high_match = sum(
			
 
				+            len(qe.get_high_match_notes())
			
 
				+            for qe in self.query_evaluations
			
 
				+        )
			
 
				+
			
 
				+        return {
			
 
				+            "post_id": self.post_id,
			
 
				+            "query_evaluations": [qe.to_dict() for qe in self.query_evaluations],
			
 
				+            "statistics": {
			
 
				+                "total_queries": total_queries,
			
 
				+                "total_notes_evaluated": total_notes_evaluated,
			
 
				+                "total_high_match_notes": total_high_match
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def from_dict(data: Dict[str, Any]) -> 'PostEvaluation':
			
 
				+        """从字典创建"""
			
 
				+        qe_list = [
			
 
				+            QueryEvaluation.from_dict(qe)
			
 
				+            for qe in data.get('query_evaluations', [])
			
 
				+        ]
			
 
				+
			
 
				+        return PostEvaluation(
			
 
				+            post_id=data.get('post_id', ''),
			
 
				+            query_evaluations=qe_list
			
 
				+        )
			
 
				+
			
 
				+    def get_all_high_match_notes(self) -> List[NoteEvaluation]:
			
 
				+        """获取所有高匹配的帖子"""
			
 
				+        all_notes = []
			
 
				+        for qe in self.query_evaluations:
			
 
				+            all_notes.extend(qe.get_high_match_notes())
			
 
				+        return all_notes
			
--- a/src/models/post.py
+++ b/src/models/post.py
@@ -0,0 +1,158 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+帖子数据模型
			
 
				+"""
			
 
				+
			
 
				+from dataclasses import dataclass, field
			
 
				+from typing import List, Dict, Any, Optional
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class PersonaMatch:
			
 
				+    """人设特征匹配结果"""
			
 
				+    persona_feature_name: str           # 人设特征名称
			
 
				+    persona_feature_level: str          # 人设特征层级（灵感点/关键点/目的点）
			
 
				+    feature_type: str                   # 特征类型（标签/分类）
			
 
				+    feature_classification: List[str]   # 特征分类路径
			
 
				+    similarity: float                   # 相似度得分
			
 
				+    match_reason: str                   # 匹配说明
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def from_dict(data: Dict[str, Any]) -> 'PersonaMatch':
			
 
				+        """从字典创建"""
			
 
				+        return PersonaMatch(
			
 
				+            persona_feature_name=data.get('人设特征名称', ''),
			
 
				+            persona_feature_level=data.get('人设特征层级', ''),
			
 
				+            feature_type=data.get('特征类型', ''),
			
 
				+            feature_classification=data.get('特征分类', []),
			
 
				+            similarity=data.get('相似度', 0.0),
			
 
				+            match_reason=data.get('说明', '')
			
 
				+        )
			
 
				+
			
 
				+    def to_dict(self) -> Dict[str, Any]:
			
 
				+        """转为字典"""
			
 
				+        return {
			
 
				+            '人设特征名称': self.persona_feature_name,
			
 
				+            '人设特征层级': self.persona_feature_level,
			
 
				+            '特征类型': self.feature_type,
			
 
				+            '特征分类': self.feature_classification,
			
 
				+            '相似度': self.similarity,
			
 
				+            '说明': self.match_reason
			
 
				+        }
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class TopicPoint:
			
 
				+    """选题点（灵感点/关键点/目的点下的每一项）"""
			
 
				+    id: str                             # ID
			
 
				+    name: str                           # 名称
			
 
				+    level: str                          # 层级（灵感点列表/关键点列表/目的点列表）
			
 
				+    type: str                           # 类型（实质/形式/场景等）
			
 
				+    description: str                    # 描述
			
 
				+    confidence: float                   # 置信度
			
 
				+    persona_matches: List[PersonaMatch] = field(default_factory=list)  # 人设匹配结果
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def from_dict(data: Dict[str, Any], level: str) -> 'TopicPoint':
			
 
				+        """从字典创建"""
			
 
				+        # 解析人设匹配结果
			
 
				+        matches = [
			
 
				+            PersonaMatch.from_dict(m)
			
 
				+            for m in data.get('匹配人设结果', [])
			
 
				+        ]
			
 
				+
			
 
				+        return TopicPoint(
			
 
				+            id=data.get('ID', ''),
			
 
				+            name=data.get('名称', ''),
			
 
				+            level=level,
			
 
				+            type=data.get('类型', ''),
			
 
				+            description=data.get('描述', ''),
			
 
				+            confidence=data.get('置信度', 0.0),
			
 
				+            persona_matches=matches
			
 
				+        )
			
 
				+
			
 
				+    def to_dict(self) -> Dict[str, Any]:
			
 
				+        """转为字典"""
			
 
				+        return {
			
 
				+            'ID': self.id,
			
 
				+            '名称': self.name,
			
 
				+            '层级': self.level,
			
 
				+            '类型': self.type,
			
 
				+            '描述': self.description,
			
 
				+            '置信度': self.confidence,
			
 
				+            '匹配人设结果': [m.to_dict() for m in self.persona_matches]
			
 
				+        }
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class Post:
			
 
				+    """帖子模型"""
			
 
				+    post_id: str                        # 帖子ID
			
 
				+    post_details: Dict[str, Any]        # 帖子详情
			
 
				+    topic_points: List[TopicPoint]      # 所有选题点（灵感点+关键点+目的点）
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def from_json_file(file_path: str) -> 'Post':
			
 
				+        """从JSON文件加载"""
			
 
				+        import json
			
 
				+        with open(file_path, 'r', encoding='utf-8') as f:
			
 
				+            data = json.load(f)
			
 
				+        return Post.from_dict(data)
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def from_dict(data: Dict[str, Any]) -> 'Post':
			
 
				+        """从字典创建"""
			
 
				+        # 解析选题点
			
 
				+        topic_points = []
			
 
				+        deconstruction = data.get('解构结果', {})
			
 
				+
			
 
				+        # 灵感点列表
			
 
				+        for item in deconstruction.get('灵感点列表', []):
			
 
				+            topic_points.append(TopicPoint.from_dict(item, '灵感点列表'))
			
 
				+
			
 
				+        # 关键点列表
			
 
				+        for item in deconstruction.get('关键点列表', []):
			
 
				+            topic_points.append(TopicPoint.from_dict(item, '关键点列表'))
			
 
				+
			
 
				+        # 目的点列表
			
 
				+        for item in deconstruction.get('目的点列表', []):
			
 
				+            topic_points.append(TopicPoint.from_dict(item, '目的点列表'))
			
 
				+
			
 
				+        return Post(
			
 
				+            post_id=data.get('帖子id', ''),
			
 
				+            post_details=data.get('帖子详情', {}),
			
 
				+            topic_points=topic_points
			
 
				+        )
			
 
				+
			
 
				+    def to_dict(self) -> Dict[str, Any]:
			
 
				+        """转为字典"""
			
 
				+        # 按层级重组选题点
			
 
				+        inspiration_points = []
			
 
				+        key_points = []
			
 
				+        purpose_points = []
			
 
				+
			
 
				+        for tp in self.topic_points:
			
 
				+            if tp.level == '灵感点列表':
			
 
				+                inspiration_points.append(tp.to_dict())
			
 
				+            elif tp.level == '关键点列表':
			
 
				+                key_points.append(tp.to_dict())
			
 
				+            elif tp.level == '目的点列表':
			
 
				+                purpose_points.append(tp.to_dict())
			
 
				+
			
 
				+        return {
			
 
				+            '帖子id': self.post_id,
			
 
				+            '帖子详情': self.post_details,
			
 
				+            '解构结果': {
			
 
				+                '灵感点列表': inspiration_points,
			
 
				+                '关键点列表': key_points,
			
 
				+                '目的点列表': purpose_points
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+    def get_topic_point_by_name(self, name: str) -> Optional[TopicPoint]:
			
 
				+        """根据名称查找选题点"""
			
 
				+        for tp in self.topic_points:
			
 
				+            if tp.name == name:
			
 
				+                return tp
			
 
				+        return None
			
--- a/src/models/query.py
+++ b/src/models/query.py
@@ -0,0 +1,174 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+搜索Query数据模型
			
 
				+"""
			
 
				+
			
 
				+from dataclasses import dataclass, field
			
 
				+from typing import List, Dict, Any, Optional
			
 
				+from datetime import datetime
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class SearchQuery:
			
 
				+    """搜索Query模型"""
			
 
				+
			
 
				+    # 基本信息
			
 
				+    query_text: str                     # 搜索词文本
			
 
				+    score: float                        # LLM评分
			
 
				+    reasoning: str = ""                 # 推荐理由
			
 
				+
			
 
				+    # 来源信息
			
 
				+    topic_point_name: str = ""          # 所属选题点
			
 
				+    used_candidates: List[str] = field(default_factory=list)  # 使用的候选词
			
 
				+    candidate_sources: List[str] = field(default_factory=list)  # 候选词来源
			
 
				+
			
 
				+    # 搜索结果（Stage 3填充）
			
 
				+    search_result: Optional[Dict[str, Any]] = None
			
 
				+    search_metadata: Optional[Dict[str, Any]] = None
			
 
				+
			
 
				+    # 评估结果（Stage 4填充）
			
 
				+    evaluation_result: Optional[Dict[str, Any]] = None
			
 
				+
			
 
				+    def to_dict(self) -> Dict[str, Any]:
			
 
				+        """转为字典"""
			
 
				+        return {
			
 
				+            "query_text": self.query_text,
			
 
				+            "score": self.score,
			
 
				+            "reasoning": self.reasoning,
			
 
				+            "topic_point_name": self.topic_point_name,
			
 
				+            "used_candidates": self.used_candidates,
			
 
				+            "candidate_sources": self.candidate_sources,
			
 
				+            "search_result": self.search_result,
			
 
				+            "search_metadata": self.search_metadata,
			
 
				+            "evaluation_result": self.evaluation_result
			
 
				+        }
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def from_dict(data: Dict[str, Any]) -> 'SearchQuery':
			
 
				+        """从字典创建"""
			
 
				+        return SearchQuery(
			
 
				+            query_text=data.get('query_text', ''),
			
 
				+            score=data.get('score', 0.0),
			
 
				+            reasoning=data.get('reasoning', ''),
			
 
				+            topic_point_name=data.get('topic_point_name', ''),
			
 
				+            used_candidates=data.get('used_candidates', []),
			
 
				+            candidate_sources=data.get('candidate_sources', []),
			
 
				+            search_result=data.get('search_result'),
			
 
				+            search_metadata=data.get('search_metadata'),
			
 
				+            evaluation_result=data.get('evaluation_result')
			
 
				+        )
			
 
				+
			
 
				+    def has_search_result(self) -> bool:
			
 
				+        """是否有搜索结果"""
			
 
				+        return self.search_result is not None
			
 
				+
			
 
				+    def has_evaluation(self) -> bool:
			
 
				+        """是否有评估结果"""
			
 
				+        return self.evaluation_result is not None
			
 
				+
			
 
				+    def get_note_count(self) -> int:
			
 
				+        """获取搜索到的帖子数量"""
			
 
				+        if not self.search_metadata:
			
 
				+            return 0
			
 
				+        return self.search_metadata.get('note_count', 0)
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class TopicPointQueries:
			
 
				+    """选题点的搜索Query集合"""
			
 
				+    topic_point_name: str               # 选题点名称
			
 
				+    topic_point_level: str              # 选题点层级
			
 
				+    queries: List[SearchQuery] = field(default_factory=list)  # Query列表
			
 
				+
			
 
				+    def to_dict(self) -> Dict[str, Any]:
			
 
				+        """转为字典"""
			
 
				+        return {
			
 
				+            "topic_point_name": self.topic_point_name,
			
 
				+            "topic_point_level": self.topic_point_level,
			
 
				+            "queries": [q.to_dict() for q in self.queries],
			
 
				+            "statistics": {
			
 
				+                "total_queries": len(self.queries),
			
 
				+                "with_search_results": sum(1 for q in self.queries if q.has_search_result()),
			
 
				+                "with_evaluations": sum(1 for q in self.queries if q.has_evaluation())
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def from_dict(data: Dict[str, Any]) -> 'TopicPointQueries':
			
 
				+        """从字典创建"""
			
 
				+        queries = [
			
 
				+            SearchQuery.from_dict(q)
			
 
				+            for q in data.get('queries', [])
			
 
				+        ]
			
 
				+
			
 
				+        return TopicPointQueries(
			
 
				+            topic_point_name=data.get('topic_point_name', ''),
			
 
				+            topic_point_level=data.get('topic_point_level', ''),
			
 
				+            queries=queries
			
 
				+        )
			
 
				+
			
 
				+    def get_top_n_queries(self, n: int = 10) -> List[SearchQuery]:
			
 
				+        """获取Top N个Query（按score降序）"""
			
 
				+        return sorted(
			
 
				+            self.queries,
			
 
				+            key=lambda q: q.score,
			
 
				+            reverse=True
			
 
				+        )[:n]
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class PostQueries:
			
 
				+    """帖子的所有搜索Query"""
			
 
				+    post_id: str                        # 帖子ID
			
 
				+    topic_points_queries: List[TopicPointQueries] = field(default_factory=list)
			
 
				+
			
 
				+    def to_dict(self) -> Dict[str, Any]:
			
 
				+        """转为字典"""
			
 
				+        total_queries = sum(len(tpq.queries) for tpq in self.topic_points_queries)
			
 
				+        total_with_results = sum(
			
 
				+            sum(1 for q in tpq.queries if q.has_search_result())
			
 
				+            for tpq in self.topic_points_queries
			
 
				+        )
			
 
				+        total_with_evaluations = sum(
			
 
				+            sum(1 for q in tpq.queries if q.has_evaluation())
			
 
				+            for tpq in self.topic_points_queries
			
 
				+        )
			
 
				+
			
 
				+        return {
			
 
				+            "post_id": self.post_id,
			
 
				+            "topic_points_queries": [tpq.to_dict() for tpq in self.topic_points_queries],
			
 
				+            "statistics": {
			
 
				+                "total_topic_points": len(self.topic_points_queries),
			
 
				+                "total_queries": total_queries,
			
 
				+                "with_search_results": total_with_results,
			
 
				+                "with_evaluations": total_with_evaluations
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def from_dict(data: Dict[str, Any]) -> 'PostQueries':
			
 
				+        """从字典创建"""
			
 
				+        tpq_list = [
			
 
				+            TopicPointQueries.from_dict(tpq)
			
 
				+            for tpq in data.get('topic_points_queries', [])
			
 
				+        ]
			
 
				+
			
 
				+        return PostQueries(
			
 
				+            post_id=data.get('post_id', ''),
			
 
				+            topic_points_queries=tpq_list
			
 
				+        )
			
 
				+
			
 
				+    def get_topic_point_queries(self, topic_point_name: str) -> Optional[TopicPointQueries]:
			
 
				+        """获取指定选题点的Query"""
			
 
				+        for tpq in self.topic_points_queries:
			
 
				+            if tpq.topic_point_name == topic_point_name:
			
 
				+                return tpq
			
 
				+        return None
			
 
				+
			
 
				+    def get_all_queries(self) -> List[SearchQuery]:
			
 
				+        """获取所有Query（扁平化）"""
			
 
				+        all_queries = []
			
 
				+        for tpq in self.topic_points_queries:
			
 
				+            all_queries.extend(tpq.queries)
			
 
				+        return all_queries
			
--- a/src/models/similarity.py
+++ b/src/models/similarity.py
@@ -0,0 +1,221 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+相似度分析数据模型
			
 
				+"""
			
 
				+
			
 
				+from dataclasses import dataclass, field, asdict
			
 
				+from typing import List, Dict, Any, Optional
			
 
				+from datetime import datetime
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class SimilarFeature:
			
 
				+    """带相似度的特征"""
			
 
				+    feature_name: str                    # 特征名称
			
 
				+    dimension: str                       # 维度
			
 
				+    dimension_detail: str                # 维度细分
			
 
				+    weight: float                        # 权重
			
 
				+    source_index: int                    # 来源索引
			
 
				+    source_info: Dict[str, Any]          # 溯源信息
			
 
				+
			
 
				+    # 相似度信息
			
 
				+    similarity_score: float              # 相似度得分
			
 
				+    similarity_explanation: str          # 相似度说明
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class SimilarityStatistics:
			
 
				+    """相似度统计信息"""
			
 
				+    total_features: int = 0              # 特征总数
			
 
				+    max_similarity: float = 0.0          # 最高相似度
			
 
				+    min_similarity: float = 0.0          # 最低相似度
			
 
				+    avg_similarity: float = 0.0          # 平均相似度
			
 
				+    high_similarity_count: int = 0       # 高相似度数量 (>=0.7)
			
 
				+    medium_similarity_count: int = 0     # 中等相似度数量 (0.5-0.7)
			
 
				+    low_similarity_count: int = 0        # 低相似度数量 (<0.5)
			
 
				+    error: Optional[str] = None          # 错误信息 (如果有)
			
 
				+
			
 
				+    def to_dict(self) -> Dict[str, Any]:
			
 
				+        """转换为字典"""
			
 
				+        return asdict(self)
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class ComprehensiveScoreDetail:
			
 
				+    """综合得分P详细信息"""
			
 
				+    N: int                               # 总帖子数
			
 
				+    M: int                               # 完全匹配帖子数
			
 
				+    total_contribution: float            # 总贡献值 Σ(a×b)
			
 
				+    complete_matches: List[Dict[str, Any]]  # 完全匹配列表 (含每个的贡献)
			
 
				+
			
 
				+    def to_dict(self) -> Dict[str, Any]:
			
 
				+        """转换为字典"""
			
 
				+        return asdict(self)
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class SimilarityScore:
			
 
				+    """单个帖子的相似度评分"""
			
 
				+    note_id: str                         # 帖子ID
			
 
				+    original_feature: str                # 原始特征
			
 
				+    evaluation_score: float              # Stage 4评估得分
			
 
				+    search_word: str                     # 搜索词
			
 
				+
			
 
				+    # 帖子数据
			
 
				+    note_data: Dict[str, Any] = field(default_factory=dict)  # 帖子信息
			
 
				+
			
 
				+    # 相似特征列表 (按相似度降序排序)
			
 
				+    deconstructed_features: List[SimilarFeature] = field(default_factory=list)
			
 
				+
			
 
				+    # 统计信息
			
 
				+    similarity_statistics: SimilarityStatistics = field(default_factory=SimilarityStatistics)
			
 
				+
			
 
				+    # 综合得分P (可选，如果计算了的话)
			
 
				+    comprehensive_score: Optional[float] = None
			
 
				+    comprehensive_score_detail: Optional[ComprehensiveScoreDetail] = None
			
 
				+
			
 
				+    # 处理时间
			
 
				+    processing_time_seconds: float = 0.0
			
 
				+
			
 
				+    def to_dict(self) -> Dict[str, Any]:
			
 
				+        """转换为字典"""
			
 
				+        result = {
			
 
				+            'note_id': self.note_id,
			
 
				+            'original_feature': self.original_feature,
			
 
				+            'evaluation_score': self.evaluation_score,
			
 
				+            'search_word': self.search_word,
			
 
				+            'note_data': self.note_data,
			
 
				+            'deconstructed_features': [asdict(f) for f in self.deconstructed_features],
			
 
				+            'similarity_statistics': self.similarity_statistics.to_dict(),
			
 
				+            'processing_time_seconds': round(self.processing_time_seconds, 2)
			
 
				+        }
			
 
				+
			
 
				+        # 添加综合得分 (如果有)
			
 
				+        if self.comprehensive_score is not None:
			
 
				+            result['comprehensive_score'] = round(self.comprehensive_score, 3)
			
 
				+        if self.comprehensive_score_detail is not None:
			
 
				+            result['comprehensive_score_detail'] = self.comprehensive_score_detail.to_dict()
			
 
				+
			
 
				+        return result
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class OverallSimilarityStatistics:
			
 
				+    """整体相似度统计"""
			
 
				+    total_notes: int                     # 总帖子数
			
 
				+    total_features_extracted: int        # 提取的特征总数
			
 
				+    avg_features_per_note: float         # 平均特征数/帖子
			
 
				+    avg_max_similarity: float            # 平均最高相似度
			
 
				+    notes_with_high_similarity: int      # 包含高相似度特征的帖子数
			
 
				+
			
 
				+    def to_dict(self) -> Dict[str, Any]:
			
 
				+        """转换为字典"""
			
 
				+        return asdict(self)
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class PostSimilarityScores:
			
 
				+    """帖子相似度评分集合"""
			
 
				+    post_id: str                                          # 帖子ID
			
 
				+    similarity_scores: List[SimilarityScore]              # 相似度评分列表
			
 
				+
			
 
				+    # 相似度配置
			
 
				+    algorithm: str = "hybrid_similarity"                  # 算法名称
			
 
				+    weight_embedding: float = 0.5                         # 向量权重
			
 
				+    weight_semantic: float = 0.5                          # 语义权重
			
 
				+    min_similarity_threshold: float = 0.0                 # 最小相似度阈值
			
 
				+
			
 
				+    # 目标特征
			
 
				+    target_features: Optional[List[str]] = None           # 目标特征列表
			
 
				+
			
 
				+    # 整体统计
			
 
				+    overall_statistics: Optional[OverallSimilarityStatistics] = None
			
 
				+
			
 
				+    # 时间信息
			
 
				+    source_file: str = ""                                 # 来源文件
			
 
				+    created_at: str = ""                                  # 创建时间
			
 
				+    processing_time_seconds: float = 0.0                  # 处理耗时(秒)
			
 
				+
			
 
				+    def to_dict(self) -> Dict[str, Any]:
			
 
				+        """转换为字典"""
			
 
				+        return {
			
 
				+            'metadata': {
			
 
				+                'stage': 'similarity',
			
 
				+                'description': '解构特征与原始特征的相似度评分',
			
 
				+                'post_id': self.post_id,
			
 
				+                'source_file': self.source_file,
			
 
				+                'target_features': self.target_features if self.target_features else '全部',
			
 
				+                'similarity_config': {
			
 
				+                    'algorithm': self.algorithm,
			
 
				+                    'weight_embedding': self.weight_embedding,
			
 
				+                    'weight_semantic': self.weight_semantic,
			
 
				+                    'min_similarity_threshold': self.min_similarity_threshold
			
 
				+                },
			
 
				+                'overall_statistics': self.overall_statistics.to_dict() if self.overall_statistics else None,
			
 
				+                'created_at': self.created_at or datetime.now().isoformat(),
			
 
				+                'processing_time_seconds': round(self.processing_time_seconds, 2)
			
 
				+            },
			
 
				+            'results': [s.to_dict() for s in self.similarity_scores]
			
 
				+        }
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_json_file(cls, file_path: str) -> 'PostSimilarityScores':
			
 
				+        """从JSON文件加载"""
			
 
				+        import json
			
 
				+        with open(file_path, 'r', encoding='utf-8') as f:
			
 
				+            data = json.load(f)
			
 
				+
			
 
				+        metadata = data['metadata']
			
 
				+        results_data = data['results']
			
 
				+
			
 
				+        # 重建SimilarityScore对象
			
 
				+        scores = []
			
 
				+        for r in results_data:
			
 
				+            # 重建特征列表
			
 
				+            features = [
			
 
				+                SimilarFeature(**f) for f in r.get('deconstructed_features', [])
			
 
				+            ]
			
 
				+
			
 
				+            # 重建统计信息
			
 
				+            stats = SimilarityStatistics(**r['similarity_statistics'])
			
 
				+
			
 
				+            # 重建综合得分详情
			
 
				+            comprehensive_detail = None
			
 
				+            if 'comprehensive_score_detail' in r:
			
 
				+                comprehensive_detail = ComprehensiveScoreDetail(**r['comprehensive_score_detail'])
			
 
				+
			
 
				+            score = SimilarityScore(
			
 
				+                note_id=r['note_id'],
			
 
				+                original_feature=r['original_feature'],
			
 
				+                evaluation_score=r['evaluation_score'],
			
 
				+                search_word=r['search_word'],
			
 
				+                note_data=r['note_data'],
			
 
				+                deconstructed_features=features,
			
 
				+                similarity_statistics=stats,
			
 
				+                comprehensive_score=r.get('comprehensive_score'),
			
 
				+                comprehensive_score_detail=comprehensive_detail,
			
 
				+                processing_time_seconds=r['processing_time_seconds']
			
 
				+            )
			
 
				+            scores.append(score)
			
 
				+
			
 
				+        # 重建整体统计
			
 
				+        overall_stats = None
			
 
				+        if metadata.get('overall_statistics'):
			
 
				+            overall_stats = OverallSimilarityStatistics(**metadata['overall_statistics'])
			
 
				+
			
 
				+        similarity_config = metadata['similarity_config']
			
 
				+
			
 
				+        return cls(
			
 
				+            post_id=metadata['post_id'],
			
 
				+            similarity_scores=scores,
			
 
				+            algorithm=similarity_config['algorithm'],
			
 
				+            weight_embedding=similarity_config['weight_embedding'],
			
 
				+            weight_semantic=similarity_config['weight_semantic'],
			
 
				+            min_similarity_threshold=similarity_config['min_similarity_threshold'],
			
 
				+            target_features=metadata.get('target_features'),
			
 
				+            overall_statistics=overall_stats,
			
 
				+            source_file=metadata['source_file'],
			
 
				+            created_at=metadata['created_at'],
			
 
				+            processing_time_seconds=metadata['processing_time_seconds']
			
 
				+        )
			
--- a/src/pipeline/__init__.py
+++ b/src/pipeline/__init__.py
--- a/src/pipeline/feature_search_pipeline.py
+++ b/src/pipeline/feature_search_pipeline.py
@@ -0,0 +1,1291 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+增强搜索系统 V2
			
 
				+支持LLM评估和扩展搜索的完整流程
			
 
				+"""
			
 
				+
			
 
				+import json
			
 
				+import logging
			
 
				+import os
			
 
				+import argparse
			
 
				+import subprocess
			
 
				+from typing import Dict, List, Any, Optional
			
 
				+from datetime import datetime
			
 
				+from concurrent.futures import ThreadPoolExecutor, as_completed
			
 
				+
			
 
				+from src.clients.openrouter_client import OpenRouterClient
			
 
				+from src.evaluators.llm_evaluator import LLMEvaluator
			
 
				+from src.clients.xiaohongshu_search import XiaohongshuSearch
			
 
				+from src.analyzers.post_deconstruction_analyzer import PostDeconstructionAnalyzer
			
 
				+from src.analyzers.similarity_analyzer import SimilarityAnalyzer
			
 
				+
			
 
				+# 配置日志
			
 
				+logging.basicConfig(
			
 
				+    level=logging.INFO,
			
 
				+    format='%(asctime)s - %(levelname)s - %(message)s',
			
 
				+    datefmt='%Y-%m-%d %H:%M:%S',
			
 
				+    handlers=[
			
 
				+        logging.FileHandler('enhanced_search_v2.log', encoding='utf-8'),
			
 
				+        logging.StreamHandler()
			
 
				+    ]
			
 
				+)
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+
			
 
				+class EnhancedSearchV2:
			
 
				+    """增强搜索系统V2"""
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        how_json_path: str,
			
 
				+        openrouter_api_key: Optional[str] = None,
			
 
				+        output_dir: str = "output_v2",
			
 
				+        top_n: int = 10,
			
 
				+        max_total_searches: Optional[int] = None,
			
 
				+        search_max_workers: int = 3,
			
 
				+        max_searches_per_feature: Optional[int] = None,
			
 
				+        max_searches_per_base_word: Optional[int] = None,
			
 
				+        enable_evaluation: bool = True,
			
 
				+        evaluation_max_workers: int = 10,
			
 
				+        evaluation_max_notes_per_query: int = 20,
			
 
				+        enable_deep_analysis: bool = False,
			
 
				+        deep_analysis_only: bool = False,
			
 
				+        deep_analysis_max_workers: int = 5,
			
 
				+        deep_analysis_max_notes: Optional[int] = None,
			
 
				+        deep_analysis_skip_count: int = 0,
			
 
				+        deep_analysis_sort_by: str = 'score',
			
 
				+        deep_analysis_api_url: str = "http://192.168.245.150:7000/what/analysis/single",
			
 
				+        deep_analysis_min_score: float = 0.8,
			
 
				+        enable_similarity_analysis: bool = False,
			
 
				+        similarity_weight_embedding: float = 0.5,
			
 
				+        similarity_weight_semantic: float = 0.5,
			
 
				+        similarity_max_workers: int = 5,
			
 
				+        similarity_min_similarity: float = 0.0
			
 
				+    ):
			
 
				+        """
			
 
				+        初始化系统
			
 
				+
			
 
				+        Args:
			
 
				+            how_json_path: How解构文件路径
			
 
				+            openrouter_api_key: OpenRouter API密钥
			
 
				+            output_dir: 输出目录
			
 
				+            top_n: 每个原始特征取评分最高的N个搜索词（默认10）
			
 
				+            max_total_searches: 全局最大搜索次数限制（默认None不限制）
			
 
				+            search_max_workers: 搜索并发数（默认3）
			
 
				+            max_searches_per_feature: 每个原始特征的最大搜索次数（默认None不限制）
			
 
				+            max_searches_per_base_word: 每个base_word的最大搜索次数（默认None不限制）
			
 
				+            enable_evaluation: 是否启用结果评估（默认False）
			
 
				+            evaluation_max_workers: 结果评估并发评估数（默认10）
			
 
				+            evaluation_max_notes_per_query: 每个搜索结果评估的最大帖子数（默认20）
			
 
				+            enable_deep_analysis: 是否启用深度解构（默认False）
			
 
				+            deep_analysis_only: 只运行深度解构（从结果评估结果开始，默认False）
			
 
				+            deep_analysis_max_workers: 深度解构并发数（默认5）
			
 
				+            deep_analysis_max_notes: 深度解构最多处理多少个帖子（默认None不限制）
			
 
				+            deep_analysis_skip_count: 深度解构跳过前N个帖子（默认0）
			
 
				+            deep_analysis_sort_by: 深度解构排序方式：score/time/engagement（默认score）
			
 
				+            deep_analysis_api_url: 深度解构API地址
			
 
				+            deep_analysis_min_score: 深度解构处理的最低分数阈值（默认0.8，0-1分制）
			
 
				+            enable_similarity_analysis: 是否启用相似度分析（默认False）
			
 
				+            similarity_weight_embedding: 相似度分析向量模型权重（默认0.5）
			
 
				+            similarity_weight_semantic: 相似度分析LLM模型权重（默认0.5）
			
 
				+            similarity_max_workers: 相似度分析并发数（默认5）
			
 
				+            similarity_min_similarity: 相似度分析最小相似度阈值（默认0.0）
			
 
				+        """
			
 
				+        self.how_json_path = how_json_path
			
 
				+        self.output_dir = output_dir
			
 
				+        self.top_n = top_n
			
 
				+        self.max_total_searches = max_total_searches
			
 
				+        self.search_max_workers = search_max_workers
			
 
				+        self.max_searches_per_feature = max_searches_per_feature
			
 
				+        self.max_searches_per_base_word = max_searches_per_base_word
			
 
				+        self.enable_evaluation = enable_evaluation
			
 
				+        self.evaluation_max_workers = evaluation_max_workers
			
 
				+        self.evaluation_max_notes_per_query = evaluation_max_notes_per_query
			
 
				+        self.enable_deep_analysis = enable_deep_analysis
			
 
				+        self.deep_analysis_only = deep_analysis_only
			
 
				+        self.enable_similarity_analysis = enable_similarity_analysis
			
 
				+
			
 
				+        # 创建输出目录
			
 
				+        os.makedirs(output_dir, exist_ok=True)
			
 
				+
			
 
				+        # 加载数据
			
 
				+        logger.info("加载数据文件...")
			
 
				+        self.how_data = self._load_json(how_json_path)
			
 
				+        logger.info("  ✓ 已加载 how.json")
			
 
				+
			
 
				+        # 初始化组件
			
 
				+        logger.info("初始化组件...")
			
 
				+        self.openrouter_client = OpenRouterClient(
			
 
				+            api_key=openrouter_api_key,
			
 
				+            model="google/gemini-2.5-flash",
			
 
				+            retry_delay=5  # 增加重试延迟避免限流
			
 
				+        )
			
 
				+        self.llm_evaluator = LLMEvaluator(self.openrouter_client)
			
 
				+        self.search_client = XiaohongshuSearch()
			
 
				+
			
 
				+        # 初始化深度解构分析器
			
 
				+        self.deep_analyzer = PostDeconstructionAnalyzer(
			
 
				+            api_url=deep_analysis_api_url,
			
 
				+            max_workers=deep_analysis_max_workers,
			
 
				+            max_notes=deep_analysis_max_notes,
			
 
				+            min_score=deep_analysis_min_score,
			
 
				+            skip_count=deep_analysis_skip_count,
			
 
				+            sort_by=deep_analysis_sort_by,
			
 
				+            output_dir=output_dir,
			
 
				+            enable_image_download=False,  # 直接使用原始图片URL，不做代理
			
 
				+            image_server_url="http://localhost:8765",  # 图片服务器URL（已弃用）
			
 
				+            image_download_dir="downloaded_images"  # 图片下载目录（已弃用）
			
 
				+        )
			
 
				+
			
 
				+        # 初始化相似度分析器
			
 
				+        self.similarity_analyzer = SimilarityAnalyzer(
			
 
				+            weight_embedding=similarity_weight_embedding,
			
 
				+            weight_semantic=similarity_weight_semantic,
			
 
				+            max_workers=similarity_max_workers,
			
 
				+            min_similarity=similarity_min_similarity,
			
 
				+            evaluation_results_path=os.path.join(output_dir, "evaluated_results.json"),
			
 
				+            update_evaluation_scores=True  # 自动计算综合得分P
			
 
				+        )
			
 
				+
			
 
				+        logger.info("系统初始化完成")
			
 
				+
			
 
				+    def _load_json(self, file_path: str) -> Any:
			
 
				+        """加载JSON文件"""
			
 
				+        try:
			
 
				+            with open(file_path, 'r', encoding='utf-8') as f:
			
 
				+                return json.load(f)
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"加载文件失败 {file_path}: {e}")
			
 
				+            raise
			
 
				+
			
 
				+    def _save_json(self, data: Any, file_path: str):
			
 
				+        """保存JSON文件"""
			
 
				+        try:
			
 
				+            with open(file_path, 'w', encoding='utf-8') as f:
			
 
				+                json.dump(data, f, ensure_ascii=False, indent=2)
			
 
				+            logger.info(f"已保存: {file_path}")
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"保存文件失败 {file_path}: {e}")
			
 
				+            raise
			
 
				+
			
 
				+    # ========== 步骤1：筛选 0.5 <= 相似度 < 0.8 的特征 ==========
			
 
				+
			
 
				+    def filter_medium_similarity_features(self) -> List[Dict[str, Any]]:
			
 
				+        """
			
 
				+        步骤1：筛选中等匹配度特征
			
 
				+
			
 
				+        筛选条件：0.5 <= 最高相似度 < 0.8
			
 
				+
			
 
				+        Returns:
			
 
				+            筛选后的特征列表
			
 
				+        """
			
 
				+        logger.info("=" * 60)
			
 
				+        logger.info("步骤1：筛选中等匹配度特征 (0.5 <= 相似度 < 0.8)")
			
 
				+        logger.info("=" * 60)
			
 
				+
			
 
				+        results = []
			
 
				+        how_result = self.how_data.get('解构结果', {})
			
 
				+
			
 
				+        total_features = 0
			
 
				+        filtered_out_low = 0  # < 0.5
			
 
				+        filtered_out_high = 0  # >= 0.8
			
 
				+        selected_count = 0
			
 
				+
			
 
				+        # 遍历三个维度
			
 
				+        for level_name, level_list in how_result.items():
			
 
				+            if not isinstance(level_list, list):
			
 
				+                continue
			
 
				+
			
 
				+            logger.info(f"\n处理 {level_name}...")
			
 
				+
			
 
				+            for item_idx, item in enumerate(level_list):
			
 
				+                item_name = item.get('名称', f'未命名-{item_idx}')
			
 
				+
			
 
				+                # 新格式：直接读取点层级的匹配人设结果
			
 
				+                match_results = item.get('匹配人设结果', [])
			
 
				+
			
 
				+                total_features += 1
			
 
				+
			
 
				+                if not match_results:
			
 
				+                    logger.info(f"  ✗ {item_name}: 无匹配结果")
			
 
				+                    continue
			
 
				+
			
 
				+                # 找到最高相似度（新格式：相似度是直接字段）
			
 
				+                max_similarity = max(
			
 
				+                    (m.get('相似度', 0) for m in match_results),
			
 
				+                    default=0
			
 
				+                )
			
 
				+
			
 
				+                # 筛选条件
			
 
				+                if max_similarity < 0.5:
			
 
				+                    filtered_out_low += 1
			
 
				+                    logger.info(f"  ✗ {item_name}: 最高相似度 {max_similarity:.3f} < 0.5（过滤）")
			
 
				+                    continue
			
 
				+                elif max_similarity >= 0.8:
			
 
				+                    filtered_out_high += 1
			
 
				+                    logger.info(f"  ✗ {item_name}: 最高相似度 {max_similarity:.3f} >= 0.8（过滤）")
			
 
				+                    continue
			
 
				+
			
 
				+                # 0.5 <= max_similarity < 0.8，保留
			
 
				+                # 按相似度降序排序，取前3个
			
 
				+                sorted_matches = sorted(
			
 
				+                    match_results,
			
 
				+                    key=lambda x: x.get('相似度', 0),
			
 
				+                    reverse=True
			
 
				+                )
			
 
				+                top3_matches = sorted_matches[:3]  # 取前3个
			
 
				+
			
 
				+                # 构建top3匹配信息列表
			
 
				+                top3_match_info = []
			
 
				+                for match in top3_matches:
			
 
				+                    feature_classification = match.get('特征分类', [])
			
 
				+                    classification_path = self._build_classification_path(feature_classification)
			
 
				+
			
 
				+                    # 直接从匹配结果读取特征类型
			
 
				+                    is_classification = (match.get('特征类型') == '分类')
			
 
				+
			
 
				+                    top3_match_info.append({
			
 
				+                        '人设特征名称': match.get('人设特征名称'),
			
 
				+                        '人设特征层级': match.get('人设特征层级'),
			
 
				+                        '特征类型': match.get('特征类型'),
			
 
				+                        '特征分类': feature_classification,
			
 
				+                        '相似度': match.get('相似度', 0),  # 直接字段
			
 
				+                        '匹配说明': match.get('说明', ''),  # 直接字段
			
 
				+                        '是分类': is_classification,
			
 
				+                        '所属分类路径': classification_path
			
 
				+                    })
			
 
				+
			
 
				+                result_item = {
			
 
				+                    '原始特征名称': item_name,  # 使用点的名称作为特征名
			
 
				+                    '来源层级': level_name,
			
 
				+                    '权重': 1.0,  # 新格式没有权重字段，默认1.0
			
 
				+                    '所属点名称': item_name,
			
 
				+                    '最高匹配信息': top3_match_info[0],  # 保留第1个用于Stage2
			
 
				+                    'top3匹配信息': top3_match_info  # 新增字段
			
 
				+                }
			
 
				+
			
 
				+                results.append(result_item)
			
 
				+                selected_count += 1
			
 
				+
			
 
				+                # 显示top3匹配信息
			
 
				+                top3_names = [m['人设特征名称'] for m in top3_match_info]
			
 
				+                logger.info(f"  ✓ {item_name} → Top{len(top3_match_info)}: {', '.join(top3_names)}")
			
 
				+
			
 
				+        # 统计信息
			
 
				+        logger.info(f"\n" + "=" * 60)
			
 
				+        logger.info(f"步骤1完成")
			
 
				+        logger.info(f"  总特征数: {total_features}")
			
 
				+        logger.info(f"  过滤掉(<0.5): {filtered_out_low}")
			
 
				+        logger.info(f"  过滤掉(>=0.8): {filtered_out_high}")
			
 
				+        logger.info(f"  保留(0.5-0.8): {selected_count}")
			
 
				+        logger.info("=" * 60)
			
 
				+
			
 
				+        # 保存结果
			
 
				+        output_path = os.path.join(self.output_dir, "filtered_features.json")
			
 
				+        self._save_json(results, output_path)
			
 
				+
			
 
				+        return results
			
 
				+
			
 
				+    def _build_classification_path(self, feature_classification: List[str]) -> str:
			
 
				+        """
			
 
				+        构建分类路径
			
 
				+
			
 
				+        Args:
			
 
				+            feature_classification: 特征分类数组
			
 
				+
			
 
				+        Returns:
			
 
				+            分类路径
			
 
				+        """
			
 
				+        if not feature_classification:
			
 
				+            return ""
			
 
				+
			
 
				+        # 步骤1: 去掉中间元素的"实质"后缀
			
 
				+        cleaned = []
			
 
				+        for i, item in enumerate(feature_classification):
			
 
				+            if i == len(feature_classification) - 1:  # 最后一个保留
			
 
				+                cleaned.append(item)
			
 
				+            elif item.endswith("实质") and i != 0:  # 中间的去掉"实质"
			
 
				+                cleaned.append(item[:-2])
			
 
				+            else:
			
 
				+                cleaned.append(item)
			
 
				+
			
 
				+        # 步骤2: 反转数组
			
 
				+        reversed_list = list(reversed(cleaned))
			
 
				+
			
 
				+        # 步骤3: 拼接路径
			
 
				+        path = "/".join(reversed_list)
			
 
				+
			
 
				+        return path
			
 
				+
			
 
				+    # ========== 步骤2：从how文件提取高相似度候选词 ==========
			
 
				+
			
 
				+    def extract_candidate_words(self, filtered_features: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
			
 
				+        """
			
 
				+        步骤2：从how文件提取候选词
			
 
				+
			
 
				+        处理流程:
			
 
				+        1. 提取人设候选词：相似度 >= 0.8 的人设特征名称
			
 
				+        2. 提取帖子候选词：点的名称（灵感点、目的点、关键点），要求该点与人设的最高相似度 >= 0.8
			
 
				+        3. 合并两种候选词并去重
			
 
				+        4. 按相似度降序排序
			
 
				+        5. 为每个中心词分配候选词列表
			
 
				+        6. 构造 '高相似度候选_按base_word' 结构
			
 
				+
			
 
				+        Args:
			
 
				+            filtered_features: 特征筛选筛选的特征列表
			
 
				+
			
 
				+        Returns:
			
 
				+            带高相似度候选的特征列表
			
 
				+        """
			
 
				+        logger.info("=" * 60)
			
 
				+        logger.info("步骤2：从how文件提取候选词（人设+帖子）")
			
 
				+        logger.info("=" * 60)
			
 
				+
			
 
				+        how_result = self.how_data.get('解构结果', {})
			
 
				+
			
 
				+        # Step 1: 提取人设候选词（相似度 >= 0.8）
			
 
				+        persona_candidates_dict = {}  # {人设特征名称: {候选词信息}}
			
 
				+
			
 
				+        for dimension in ['灵感点列表', '关键点列表', '目的点列表']:
			
 
				+            items_list = how_result.get(dimension, [])
			
 
				+
			
 
				+            for item in items_list:
			
 
				+                item_name = item.get('名称', '')
			
 
				+                matches = item.get('匹配人设结果', [])
			
 
				+
			
 
				+                for match in matches:
			
 
				+                    similarity = match.get('相似度', 0)
			
 
				+                    persona_feature_name = match.get('人设特征名称', '')
			
 
				+
			
 
				+                    # 筛选相似度 >= 0.8
			
 
				+                    if similarity >= 0.8 and persona_feature_name:
			
 
				+                        # 去重逻辑：保留最高相似度
			
 
				+                        if persona_feature_name not in persona_candidates_dict or \
			
 
				+                           similarity > persona_candidates_dict[persona_feature_name]['相似度']:
			
 
				+                            persona_candidates_dict[persona_feature_name] = {
			
 
				+                                '候选词': persona_feature_name,
			
 
				+                                '候选词类型': 'persona',  # 标记为人设候选词
			
 
				+                                '相似度': similarity,
			
 
				+                                '特征类型': match.get('特征类型', ''),
			
 
				+                                '特征分类': match.get('特征分类', []),
			
 
				+                                '人设特征层级': match.get('人设特征层级', ''),
			
 
				+                                '来源层级': 'persona',
			
 
				+                                '来源路径': self._build_classification_path(match.get('特征分类', [])),
			
 
				+                                '匹配说明': match.get('说明', ''),
			
 
				+                                '来源原始特征': item_name
			
 
				+                            }
			
 
				+
			
 
				+        # Step 2: 提取帖子候选词（点名称，要求该点与人设的最高相似度 >= 0.8）
			
 
				+        post_candidates_dict = {}  # {点名称: {候选词信息}}
			
 
				+
			
 
				+        for dimension in ['灵感点列表', '关键点列表', '目的点列表']:
			
 
				+            items_list = how_result.get(dimension, [])
			
 
				+
			
 
				+            for item in items_list:
			
 
				+                item_name = item.get('名称', '')
			
 
				+                matches = item.get('匹配人设结果', [])
			
 
				+
			
 
				+                if not item_name or not matches:
			
 
				+                    continue
			
 
				+
			
 
				+                # 计算该点与人设的最高相似度
			
 
				+                max_similarity = max(
			
 
				+                    (m.get('相似度', 0) for m in matches),
			
 
				+                    default=0
			
 
				+                )
			
 
				+
			
 
				+                # 只有最高相似度 >= 0.8 的点才作为帖子候选词
			
 
				+                if max_similarity >= 0.8:
			
 
				+                    # 如果点名称已经作为人设候选词存在，跳过（优先保留人设候选词）
			
 
				+                    if item_name not in persona_candidates_dict and item_name not in post_candidates_dict:
			
 
				+                        post_candidates_dict[item_name] = {
			
 
				+                            '候选词': item_name,
			
 
				+                            '候选词类型': 'post',  # 标记为帖子候选词
			
 
				+                            '相似度': 1.0,  # 帖子自身的点，相似度视为1.0
			
 
				+                            '特征类型': item.get('类型', ''),
			
 
				+                            '特征分类': [],
			
 
				+                            '人设特征层级': '',
			
 
				+                            '来源层级': dimension,
			
 
				+                            '来源路径': f"帖子/{dimension}/{item_name}",
			
 
				+                            '匹配说明': item.get('描述', ''),
			
 
				+                            '来源原始特征': item_name,
			
 
				+                            '点最高人设相似度': max_similarity  # 记录该点与人设的最高相似度
			
 
				+                        }
			
 
				+
			
 
				+        # Step 3: 合并两种候选词
			
 
				+        all_candidates_dict = {}
			
 
				+        all_candidates_dict.update(persona_candidates_dict)  # 人设候选词
			
 
				+        all_candidates_dict.update(post_candidates_dict)     # 帖子候选词
			
 
				+
			
 
				+        # Step 4: 转为列表并按相似度降序排序
			
 
				+        global_candidates = sorted(
			
 
				+            all_candidates_dict.values(),
			
 
				+            key=lambda x: x['相似度'],
			
 
				+            reverse=True
			
 
				+        )
			
 
				+
			
 
				+        logger.info(f"候选词统计：")
			
 
				+        logger.info(f"  - 人设候选词: {len(persona_candidates_dict)} 个")
			
 
				+        logger.info(f"  - 帖子候选词: {len(post_candidates_dict)} 个")
			
 
				+        logger.info(f"  - 总候选词: {len(global_candidates)} 个")
			
 
				+
			
 
				+        # 显示Top 10候选词
			
 
				+        if global_candidates:
			
 
				+            logger.info("\nTop 10 候选词:")
			
 
				+            for i, candidate in enumerate(global_candidates[:10], 1):
			
 
				+                cand_type = "人设" if candidate['候选词类型'] == 'persona' else "帖子"
			
 
				+                logger.info(f"  {i}. {candidate['候选词']} (相似度: {candidate['相似度']:.3f}, 类型: {cand_type})")
			
 
				+
			
 
				+        # Step 3: 为每个特征构造输出结构
			
 
				+        results = []
			
 
				+        for idx, feature_data in enumerate(filtered_features, 1):
			
 
				+            original_feature_name = feature_data.get('原始特征名称', '')
			
 
				+            logger.info(f"\n[{idx}/{len(filtered_features)}] 处理: {original_feature_name}")
			
 
				+
			
 
				+            top3_matches = feature_data.get('top3匹配信息', [])
			
 
				+
			
 
				+            # 提取3个中心词
			
 
				+            base_words = [match.get('人设特征名称', '') for match in top3_matches[:3]]
			
 
				+            logger.info(f"  中心词: {', '.join(base_words)}")
			
 
				+
			
 
				+            # 所有中心词共享相同的候选词列表
			
 
				+            high_similarity_by_base = {}
			
 
				+            for base_word in base_words:
			
 
				+                if base_word:
			
 
				+                    high_similarity_by_base[base_word] = global_candidates.copy()
			
 
				+
			
 
				+            logger.info(f"  每个中心词分配 {len(global_candidates)} 个候选词")
			
 
				+
			
 
				+            result = {
			
 
				+                '原始特征名称': original_feature_name,
			
 
				+                '来源层级': feature_data.get('来源层级', ''),  # 保留元数据
			
 
				+                '权重': feature_data.get('权重', 0),  # 保留元数据
			
 
				+                'top3匹配信息': top3_matches,
			
 
				+                '找到的关联_按base_word': {},  # 新方式不需要关联分析
			
 
				+                '高相似度候选_按base_word': high_similarity_by_base
			
 
				+            }
			
 
				+            results.append(result)
			
 
				+
			
 
				+        # 保存结果
			
 
				+        output_path = os.path.join(self.output_dir, 'candidate_words.json')
			
 
				+        self._save_json(results, output_path)
			
 
				+
			
 
				+        logger.info(f"\n" + "=" * 60)
			
 
				+        logger.info(f"步骤2完成")
			
 
				+        logger.info(f"  提取候选词: {len(global_candidates)} 个")
			
 
				+        logger.info(f"  处理特征: {len(results)} 个")
			
 
				+        logger.info("=" * 60)
			
 
				+
			
 
				+        return results
			
 
				+
			
 
				+    # ========== 步骤4：多词组合 + LLM评估 ==========
			
 
				+
			
 
				+    def generate_search_queries(
			
 
				+        self,
			
 
				+        features_data: List[Dict[str, Any]],
			
 
				+        max_workers: int = 4,
			
 
				+        max_candidates: int = 20,
			
 
				+        max_combo_length: int = 4
			
 
				+    ) -> List[Dict[str, Any]]:
			
 
				+        """
			
 
				+        步骤4：多词组合 + LLM评估
			
 
				+
			
 
				+        基于Stage1的基础词和Stage3的高相似度候选，
			
 
				+        生成所有2-N词组合，通过LLM评估选出Top10
			
 
				+
			
 
				+        Args:
			
 
				+            features_data: 阶段3的数据（包含高相似度候选）
			
 
				+            max_workers: 并发评估的原始特征数（默认4）
			
 
				+            max_candidates: 参与组合的最大候选词数（默认20）
			
 
				+            max_combo_length: 最大组合词数（默认4，即基础词+3个候选）
			
 
				+
			
 
				+        Returns:
			
 
				+            带LLM评估的数据
			
 
				+        """
			
 
				+        logger.info("=" * 60)
			
 
				+        logger.info("步骤4：多词组合 + LLM评估")
			
 
				+        logger.info(f"  最大候选词数: {max_candidates}")
			
 
				+        logger.info(f"  最大组合长度: {max_combo_length} 词")
			
 
				+        logger.info(f"  并发数: {max_workers} 个原始特征")
			
 
				+        logger.info("=" * 60)
			
 
				+
			
 
				+        total_features = len(features_data)
			
 
				+
			
 
				+        # 使用ThreadPoolExecutor并行处理不同的原始特征
			
 
				+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
			
 
				+            # 提交所有任务
			
 
				+            futures = []
			
 
				+            for idx, feature_result in enumerate(features_data, 1):
			
 
				+                future = executor.submit(
			
 
				+                    self._process_single_feature_combinations,
			
 
				+                    idx,
			
 
				+                    total_features,
			
 
				+                    feature_result,
			
 
				+                    max_candidates,
			
 
				+                    max_combo_length
			
 
				+                )
			
 
				+                futures.append((future, feature_result))
			
 
				+
			
 
				+            # 等待所有任务完成并收集结果
			
 
				+            for future, feature_result in futures:
			
 
				+                try:
			
 
				+                    _ = future.result()  # 等待完成，结果已经写回到feature_result中
			
 
				+                except Exception as e:
			
 
				+                    logger.error(f"  评估失败: {feature_result['原始特征名称']}, 错误: {e}")
			
 
				+
			
 
				+        # 保存结果
			
 
				+        output_path = os.path.join(self.output_dir, "search_queries_evaluated.json")
			
 
				+        self._save_json(features_data, output_path)
			
 
				+
			
 
				+        logger.info(f"\n" + "=" * 60)
			
 
				+        logger.info(f"步骤4完成")
			
 
				+        logger.info("=" * 60)
			
 
				+
			
 
				+        return features_data
			
 
				+
			
 
				+    def _process_single_feature_combinations(
			
 
				+        self,
			
 
				+        idx: int,
			
 
				+        total: int,
			
 
				+        feature_result: Dict[str, Any],
			
 
				+        max_candidates: int,
			
 
				+        max_combo_length: int
			
 
				+    ) -> None:
			
 
				+        """
			
 
				+        处理单个原始特征的组合生成和评估
			
 
				+
			
 
				+        改进: 每个base_word使用自己的候选词（而不是共享）
			
 
				+
			
 
				+        Steps:
			
 
				+        1. Get top3 base_words from Stage1's top3匹配信息
			
 
				+        2. For each base_word:
			
 
				+           a. Get candidates from Stage3's 高相似度候选_按base_word
			
 
				+           b. Generate combinations
			
 
				+           c. LLM evaluation
			
 
				+           d. Select Top 10
			
 
				+        3. Save grouped results
			
 
				+
			
 
				+        Args:
			
 
				+            idx: 特征索引
			
 
				+            total: 总特征数
			
 
				+            feature_result: 特征结果数据
			
 
				+            max_candidates: 参与组合的最大候选词数
			
 
				+            max_combo_length: 最大组合词数
			
 
				+        """
			
 
				+        original_feature = feature_result['原始特征名称']
			
 
				+        logger.info(f"\n[{idx}/{total}] 处理: {original_feature}")
			
 
				+
			
 
				+        # 步骤1: 获取top3基础词
			
 
				+        top3_info = feature_result.get('top3匹配信息', [])
			
 
				+        if not top3_info:
			
 
				+            logger.info(f"  无top3匹配信息，跳过")
			
 
				+            feature_result['组合评估结果_分组'] = []
			
 
				+            return
			
 
				+
			
 
				+        logger.info(f"  找到 {len(top3_info)} 个base_word")
			
 
				+
			
 
				+        # 步骤2: 获取按base_word分组的候选词
			
 
				+        candidates_by_base_word = feature_result.get('高相似度候选_按base_word', {})
			
 
				+
			
 
				+        if not candidates_by_base_word:
			
 
				+            logger.warning(f"  无按base_word分组的候选词，跳过")
			
 
				+            feature_result['组合评估结果_分组'] = []
			
 
				+            return
			
 
				+
			
 
				+        # 步骤3: 为每个base_word独立处理
			
 
				+        grouped_results = []
			
 
				+
			
 
				+        for base_idx, base_info in enumerate(top3_info, 1):
			
 
				+            base_word = base_info.get('人设特征名称', '')
			
 
				+            base_similarity = base_info.get('相似度', 0)
			
 
				+
			
 
				+            if not base_word:
			
 
				+                continue
			
 
				+
			
 
				+            logger.info(f"  [{base_idx}/{len(top3_info)}] Base Word: {base_word} (相似度: {base_similarity:.3f})")
			
 
				+
			
 
				+            # 获取该base_word的候选词
			
 
				+            base_candidates = candidates_by_base_word.get(base_word, [])
			
 
				+            candidates = base_candidates[:max_candidates]
			
 
				+            candidate_words = [c['候选词'] for c in candidates]
			
 
				+
			
 
				+            if not candidate_words:
			
 
				+                logger.warning(f"    该base_word无候选词，跳过")
			
 
				+                grouped_results.append({
			
 
				+                    'base_word': base_word,
			
 
				+                    'base_word_similarity': base_similarity,
			
 
				+                    'base_word_info': base_info,
			
 
				+                    'top10_searches': [],
			
 
				+                    'available_words': []
			
 
				+                })
			
 
				+                continue
			
 
				+
			
 
				+            logger.info(f"    候选词数量: {len(candidate_words)} (限制: {max_candidates})")
			
 
				+
			
 
				+            # LLM生成query（新方式：直接让LLM基于候选词生成query）
			
 
				+            logger.info(f"    使用LLM生成query（中心词: {base_word}）...")
			
 
				+            evaluated = self.llm_evaluator.generate_queries_from_candidates(
			
 
				+                original_feature=original_feature,
			
 
				+                base_word=base_word,
			
 
				+                candidate_words=candidate_words,
			
 
				+                max_queries=10
			
 
				+            )
			
 
				+
			
 
				+            # 选出Top 10（已经由LLM生成方法控制数量）
			
 
				+            top_10 = evaluated[:10]
			
 
				+            logger.info(f"    生成完成，共 {len(top_10)} 个query")
			
 
				+
			
 
				+            # 保存分组结果 - 每个base_word有自己的available_words
			
 
				+            grouped_results.append({
			
 
				+                'base_word': base_word,
			
 
				+                'base_word_similarity': base_similarity,
			
 
				+                'base_word_info': base_info,
			
 
				+                'top10_searches': top_10,
			
 
				+                'available_words': candidate_words  # 该base_word自己的候选词
			
 
				+            })
			
 
				+
			
 
				+        # 写回结果
			
 
				+        feature_result['组合评估结果_分组'] = grouped_results
			
 
				+
			
 
				+        total_searches = sum(len(g['top10_searches']) for g in grouped_results)
			
 
				+        logger.info(f"  完成！共 {len(grouped_results)} 个base_word，{total_searches} 个搜索词")
			
 
				+
			
 
				+    # ========== 步骤5：执行搜索 ==========
			
 
				+
			
 
				+    def _execute_single_search(
			
 
				+        self,
			
 
				+        idx: int,
			
 
				+        total: int,
			
 
				+        search_word: str,
			
 
				+        feature_ref: Dict[str, Any]
			
 
				+    ) -> Dict[str, Any]:
			
 
				+        """
			
 
				+        执行单个搜索任务（用于并发执行）
			
 
				+
			
 
				+        Args:
			
 
				+            idx: 搜索索引
			
 
				+            total: 总搜索数
			
 
				+            search_word: 搜索词
			
 
				+            feature_ref: 特征引用（用于写入结果）
			
 
				+
			
 
				+        Returns:
			
 
				+            搜索结果信息
			
 
				+        """
			
 
				+        logger.info(f"[{idx}/{total}] 搜索: {search_word}")
			
 
				+
			
 
				+        try:
			
 
				+            result = self.search_client.search(
			
 
				+                keyword=search_word,
			
 
				+                content_type='不限',
			
 
				+                sort_type='综合',
			
 
				+                max_retries=3,
			
 
				+                use_cache=True  # 启用搜索缓存
			
 
				+            )
			
 
				+
			
 
				+            note_count = len(result.get('data', {}).get('data', []))
			
 
				+            logger.info(f"  ✓ 成功，获取 {note_count} 条帖子")
			
 
				+
			
 
				+            # 写入结果
			
 
				+            feature_ref['search_result'] = result
			
 
				+            feature_ref['search_metadata'] = {
			
 
				+                'searched_at': datetime.now().isoformat(),
			
 
				+                'status': 'success',
			
 
				+                'note_count': note_count,
			
 
				+                'search_params': {
			
 
				+                    'keyword': search_word,
			
 
				+                    'content_type': '图文',
			
 
				+                    'sort_type': '综合'
			
 
				+                }
			
 
				+            }
			
 
				+
			
 
				+            return {'status': 'success', 'search_word': search_word, 'note_count': note_count}
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"  ✗ 失败: {e}")
			
 
				+            feature_ref['search_result'] = None
			
 
				+            feature_ref['search_metadata'] = {
			
 
				+                'searched_at': datetime.now().isoformat(),
			
 
				+                'status': 'failed',
			
 
				+                'note_count': 0,
			
 
				+                'error': str(e)
			
 
				+            }
			
 
				+
			
 
				+            return {'status': 'failed', 'search_word': search_word, 'error': str(e)}
			
 
				+
			
 
				+    def execute_search_queries(
			
 
				+        self,
			
 
				+        features_data: List[Dict[str, Any]],
			
 
				+        search_delay: float = 2.0,
			
 
				+        top_n: int = 10
			
 
				+    ) -> List[Dict[str, Any]]:
			
 
				+        """
			
 
				+        步骤4：执行小红书搜索
			
 
				+
			
 
				+        Args:
			
 
				+            features_data: 阶段3的数据
			
 
				+            search_delay: 搜索延迟
			
 
				+            top_n: 每个原始特征取评分最高的N个搜索词
			
 
				+
			
 
				+        Returns:
			
 
				+            带搜索结果的数据
			
 
				+        """
			
 
				+        logger.info("=" * 60)
			
 
				+        logger.info("步骤4：执行小红书搜索")
			
 
				+        logger.info("=" * 60)
			
 
				+
			
 
				+        # 按原始特征分组收集搜索词（从搜索词生成结果中读取）
			
 
				+        feature_search_groups = {}
			
 
				+
			
 
				+        for feature_result in features_data:
			
 
				+            original_feature = feature_result['原始特征名称']
			
 
				+
			
 
				+            if original_feature not in feature_search_groups:
			
 
				+                feature_search_groups[original_feature] = []
			
 
				+
			
 
				+            # 从搜索词生成结果中读取（新结构）
			
 
				+            grouped_results = feature_result.get('组合评估结果_分组', [])
			
 
				+
			
 
				+            if grouped_results:
			
 
				+                # 使用分组结构：每个base_word的top10都执行
			
 
				+                for group in grouped_results:
			
 
				+                    base_word = group.get('base_word', '')
			
 
				+                    base_similarity = group.get('base_word_similarity', 0)
			
 
				+
			
 
				+                    base_word_searches = []
			
 
				+                    for eval_item in group.get('top10_searches', []):
			
 
				+                        sw = eval_item.get('search_word')
			
 
				+                        if not sw:
			
 
				+                            continue
			
 
				+
			
 
				+                        score = eval_item.get('score', 0.0)
			
 
				+
			
 
				+                        base_word_searches.append({
			
 
				+                            'search_word': sw,
			
 
				+                            'score': score,
			
 
				+                            'base_word': base_word,
			
 
				+                            'base_word_similarity': base_similarity,
			
 
				+                            'feature_ref': eval_item  # 引用评估项，用于写入搜索结果
			
 
				+                        })
			
 
				+
			
 
				+                    # 应用每个base_word的搜索次数限制
			
 
				+                    if self.max_searches_per_base_word and len(base_word_searches) > self.max_searches_per_base_word:
			
 
				+                        logger.info(f"  应用base_word限制: {base_word} 从 {len(base_word_searches)} 减少到 {self.max_searches_per_base_word}")
			
 
				+                        base_word_searches = base_word_searches[:self.max_searches_per_base_word]
			
 
				+
			
 
				+                    feature_search_groups[original_feature].extend(base_word_searches)
			
 
				+            else:
			
 
				+                # 兼容旧结构（组合评估结果）
			
 
				+                for eval_item in feature_result.get('组合评估结果', []):
			
 
				+                    sw = eval_item.get('search_word')
			
 
				+                    if not sw:
			
 
				+                        continue
			
 
				+
			
 
				+                    score = eval_item.get('score', 0.0)
			
 
				+
			
 
				+                    feature_search_groups[original_feature].append({
			
 
				+                        'search_word': sw,
			
 
				+                        'score': score,
			
 
				+                        'feature_ref': eval_item
			
 
				+                    })
			
 
				+
			
 
				+            # 应用每个原始特征的搜索次数限制
			
 
				+            if self.max_searches_per_feature and len(feature_search_groups[original_feature]) > self.max_searches_per_feature:
			
 
				+                logger.info(f"  应用特征限制: {original_feature} 从 {len(feature_search_groups[original_feature])} 减少到 {self.max_searches_per_feature}")
			
 
				+                feature_search_groups[original_feature] = feature_search_groups[original_feature][:self.max_searches_per_feature]
			
 
				+
			
 
				+        # 收集所有搜索任务（分组结构下执行所有base_word的top10，不再过滤）
			
 
				+        all_searches = []
			
 
				+        total_count = 0
			
 
				+
			
 
				+        for original_feature, search_list in feature_search_groups.items():
			
 
				+            total_count += len(search_list)
			
 
				+            all_searches.extend(search_list)
			
 
				+
			
 
				+            logger.info(f"  {original_feature}: {len(search_list)} 个搜索词")
			
 
				+
			
 
				+        # 应用全局搜索次数限制
			
 
				+        if self.max_total_searches and len(all_searches) > self.max_total_searches:
			
 
				+            logger.info(f"  应用全局限制：从 {len(all_searches)} 个减少到 {self.max_total_searches} 个")
			
 
				+            all_searches = all_searches[:self.max_total_searches]
			
 
				+
			
 
				+        logger.info(f"\n共 {len(all_searches)} 个搜索任务")
			
 
				+        logger.info(f"  并发执行搜索（并发数: {self.search_max_workers}）")
			
 
				+
			
 
				+        # 使用ThreadPoolExecutor并发执行搜索
			
 
				+        with ThreadPoolExecutor(max_workers=self.search_max_workers) as executor:
			
 
				+            # 提交所有搜索任务
			
 
				+            futures = []
			
 
				+            for idx, item in enumerate(all_searches, 1):
			
 
				+                future = executor.submit(
			
 
				+                    self._execute_single_search,
			
 
				+                    idx,
			
 
				+                    len(all_searches),
			
 
				+                    item['search_word'],
			
 
				+                    item['feature_ref']
			
 
				+                )
			
 
				+                futures.append(future)
			
 
				+
			
 
				+            # 等待所有搜索完成
			
 
				+            for future in as_completed(futures):
			
 
				+                try:
			
 
				+                    result = future.result()
			
 
				+                    # 结果已经写入feature_ref，无需额外处理
			
 
				+                except Exception as e:
			
 
				+                    logger.error(f"  搜索任务失败: {e}")
			
 
				+
			
 
				+        # 保存结果
			
 
				+        output_path = os.path.join(self.output_dir, "search_results.json")
			
 
				+        self._save_json(features_data, output_path)
			
 
				+
			
 
				+        logger.info(f"\n" + "=" * 60)
			
 
				+        logger.info(f"步骤4完成")
			
 
				+        logger.info("=" * 60)
			
 
				+
			
 
				+        return features_data
			
 
				+
			
 
				+    # ========== 步骤5：LLM评估搜索结果（两层过滤评估） ==========
			
 
				+    # 注：旧的单层评估方法已移至 backup/unused_methods_from_enhanced_search_v2.py
			
 
				+
			
 
				+    def evaluate_search_results(
			
 
				+        self,
			
 
				+        features_data: List[Dict[str, Any]]
			
 
				+    ) -> List[Dict[str, Any]]:
			
 
				+        """
			
 
				+        步骤5：用LLM评估搜索结果（使用两层过滤评估）
			
 
				+
			
 
				+        遍历所有搜索结果，使用两层评估机制：
			
 
				+        1. 第一层：过滤与搜索Query无关的结果
			
 
				+        2. 第二层：评估与目标特征的匹配度（0.8-1.0/0.6-0.79/0.5-0.59/≤0.4）
			
 
				+
			
 
				+        Args:
			
 
				+            features_data: 阶段4的数据
			
 
				+
			
 
				+        Returns:
			
 
				+            带评估结果的数据
			
 
				+        """
			
 
				+        logger.info("=" * 60)
			
 
				+        logger.info("步骤5：LLM评估搜索结果（两层过滤评估）")
			
 
				+        logger.info(f"  并发数: {self.evaluation_max_workers}")
			
 
				+        logger.info(f"  每个搜索最多评估: {self.evaluation_max_notes_per_query} 个帖子")
			
 
				+        logger.info("=" * 60)
			
 
				+
			
 
				+        # 收集所有需要评估的搜索项
			
 
				+        search_items_to_evaluate = []
			
 
				+
			
 
				+        for feature_result in features_data:
			
 
				+            original_feature = feature_result['原始特征名称']
			
 
				+
			
 
				+            # 从组合评估结果_分组中读取搜索结果
			
 
				+            grouped_results = feature_result.get('组合评估结果_分组', [])
			
 
				+
			
 
				+            if grouped_results:
			
 
				+                for group in grouped_results:
			
 
				+                    for eval_item in group.get('top10_searches', []):
			
 
				+                        # 检查是否有搜索结果
			
 
				+                        if eval_item.get('search_result') and eval_item.get('search_metadata', {}).get('status') == 'success':
			
 
				+                            search_items_to_evaluate.append({
			
 
				+                                'original_feature': original_feature,
			
 
				+                                'search_item': eval_item,
			
 
				+                                'base_word': group.get('base_word', '')
			
 
				+                            })
			
 
				+            else:
			
 
				+                # 兼容旧结构
			
 
				+                for eval_item in feature_result.get('组合评估结果', []):
			
 
				+                    if eval_item.get('search_result') and eval_item.get('search_metadata', {}).get('status') == 'success':
			
 
				+                        search_items_to_evaluate.append({
			
 
				+                            'original_feature': original_feature,
			
 
				+                            'search_item': eval_item,
			
 
				+                            'base_word': ''
			
 
				+                        })
			
 
				+
			
 
				+        logger.info(f"共 {len(search_items_to_evaluate)} 个搜索结果需要评估")
			
 
				+
			
 
				+        # 并行评估所有搜索结果
			
 
				+        with ThreadPoolExecutor(max_workers=self.evaluation_max_workers) as executor:
			
 
				+            futures = []
			
 
				+            for idx, item in enumerate(search_items_to_evaluate, 1):
			
 
				+                future = executor.submit(
			
 
				+                    self._evaluate_single_search_with_filter,
			
 
				+                    idx,
			
 
				+                    len(search_items_to_evaluate),
			
 
				+                    item['original_feature'],
			
 
				+                    item['search_item'],
			
 
				+                    item['base_word']
			
 
				+                )
			
 
				+                futures.append((future, item))
			
 
				+
			
 
				+            # 收集结果
			
 
				+            success_count = 0
			
 
				+            failed_count = 0
			
 
				+
			
 
				+            for future, item in futures:
			
 
				+                try:
			
 
				+                    evaluation = future.result()
			
 
				+                    item['search_item']['evaluation_with_filter'] = evaluation
			
 
				+                    success_count += 1
			
 
				+                except Exception as e:
			
 
				+                    logger.error(f"  评估失败: {item['search_item'].get('search_word', 'unknown')}, 错误: {e}")
			
 
				+                    item['search_item']['evaluation_with_filter'] = None
			
 
				+                    failed_count += 1
			
 
				+
			
 
				+        logger.info(f"\n评估完成: 成功 {success_count}, 失败 {failed_count}")
			
 
				+
			
 
				+        # 保存结果
			
 
				+        output_path = os.path.join(self.output_dir, "evaluated_results.json")
			
 
				+        self._save_json(features_data, output_path)
			
 
				+
			
 
				+        logger.info(f"\n" + "=" * 60)
			
 
				+        logger.info(f"步骤5完成")
			
 
				+        logger.info("=" * 60)
			
 
				+
			
 
				+        return features_data
			
 
				+
			
 
				+    def _evaluate_single_search_with_filter(
			
 
				+        self,
			
 
				+        idx: int,
			
 
				+        total: int,
			
 
				+        original_feature: str,
			
 
				+        search_item: Dict[str, Any],
			
 
				+        base_word: str
			
 
				+    ) -> Dict[str, Any]:
			
 
				+        """
			
 
				+        评估单个搜索结果（使用两层过滤）
			
 
				+
			
 
				+        Args:
			
 
				+            idx: 索引
			
 
				+            total: 总数
			
 
				+            original_feature: 原始特征
			
 
				+            search_item: 搜索项（包含search_word和search_result）
			
 
				+            base_word: 基础词
			
 
				+
			
 
				+        Returns:
			
 
				+            评估结果
			
 
				+        """
			
 
				+        search_word = search_item.get('search_word', '')
			
 
				+        notes = search_item['search_result'].get('data', {}).get('data', [])
			
 
				+
			
 
				+        logger.info(f"[{idx}/{total}] 评估: {search_word} (帖子数: {len(notes)})")
			
 
				+
			
 
				+        # 调用LLM评估器的批量评估方法
			
 
				+        evaluation = self.llm_evaluator.batch_evaluate_notes_with_filter(
			
 
				+            search_query=search_word,
			
 
				+            target_feature=original_feature,
			
 
				+            notes=notes,
			
 
				+            max_notes=self.evaluation_max_notes_per_query,
			
 
				+            max_workers=self.evaluation_max_workers
			
 
				+        )
			
 
				+
			
 
				+        # 统计信息
			
 
				+        filtered_count = evaluation.get('filtered_count', 0)
			
 
				+        evaluated_count = evaluation.get('evaluated_count', 0)
			
 
				+        match_dist = evaluation.get('match_distribution', {})
			
 
				+
			
 
				+        logger.info(f"  ✓ 完成: 过滤 {filtered_count}, 评估 {evaluated_count}, "
			
 
				+                   f"完全匹配 {match_dist.get('完全匹配(0.8-1.0)', 0)}, "
			
 
				+                   f"相似匹配 {match_dist.get('相似匹配(0.6-0.79)', 0)}")
			
 
				+
			
 
				+        return evaluation
			
 
				+
			
 
				+    # ========== 主流程 ==========
			
 
				+    # 注：旧的扩展搜索方法(extended_searches)已移至 backup/unused_methods_from_enhanced_search_v2.py
			
 
				+
			
 
				+    def run_full_pipeline(self):
			
 
				+        """执行完整流程"""
			
 
				+        logger.info("\n" + "=" * 60)
			
 
				+        logger.info("开始执行完整流程")
			
 
				+        logger.info("=" * 60)
			
 
				+
			
 
				+        try:
			
 
				+            # 深度解构 Only 模式：只运行深度解构分析（从 结果评估 结果开始）
			
 
				+            if self.deep_analysis_only:
			
 
				+                logger.info("运行模式: 深度解构 Only (从 结果评估 结果开始)")
			
 
				+                evaluation_path = os.path.join(self.output_dir, "evaluated_results.json")
			
 
				+
			
 
				+                if not os.path.exists(evaluation_path):
			
 
				+                    raise FileNotFoundError(f"结果评估 结果不存在: {evaluation_path}")
			
 
				+
			
 
				+                with open(evaluation_path, 'r', encoding='utf-8') as f:
			
 
				+                    evaluation_results = json.load(f)
			
 
				+
			
 
				+                deep_results = self.deep_analyzer.run(evaluation_results)
			
 
				+                return deep_results
			
 
				+
			
 
				+            # 正常流程：从 特征筛选 开始
			
 
				+            # 步骤1
			
 
				+            filtered_features = self.filter_medium_similarity_features()
			
 
				+
			
 
				+            # 步骤2：从how文件提取候选词
			
 
				+            candidates = self.extract_candidate_words(filtered_features)
			
 
				+
			
 
				+            # 步骤3：多词组合 + LLM评估
			
 
				+            queries = self.generate_search_queries(
			
 
				+                candidates,
			
 
				+                max_workers=8,         # 提高并发从4到8
			
 
				+                max_combo_length=3     # 降低组合长度从4到3
			
 
				+            )
			
 
				+
			
 
				+            # 步骤4：执行搜索
			
 
				+            search_results = self.execute_search_queries(queries, search_delay=2.0, top_n=self.top_n)
			
 
				+
			
 
				+            # 步骤5：LLM评估搜索结果 - 条件执行
			
 
				+            if self.enable_evaluation:
			
 
				+                evaluation_results = self.evaluate_search_results(search_results)
			
 
				+            else:
			
 
				+                evaluation_results = search_results
			
 
				+                logger.info("\n" + "=" * 60)
			
 
				+                logger.info("步骤5：跳过（未启用）")
			
 
				+                logger.info("=" * 60)
			
 
				+
			
 
				+            # 深度解构分析 - 条件执行
			
 
				+            if self.enable_deep_analysis:
			
 
				+                deep_results = self.deep_analyzer.run(evaluation_results)
			
 
				+                final_results = deep_results
			
 
				+            else:
			
 
				+                final_results = evaluation_results
			
 
				+
			
 
				+            # 相似度分析 - 条件执行
			
 
				+            if self.enable_similarity_analysis and self.enable_deep_analysis:
			
 
				+                logger.info("\n" + "=" * 60)
			
 
				+                logger.info("步骤7：相似度分析（解构特征与原始特征）")
			
 
				+                logger.info("=" * 60)
			
 
				+
			
 
				+                similarity_results = self.similarity_analyzer.run(
			
 
				+                    deep_results,
			
 
				+                    output_path=os.path.join(self.output_dir, "similarity_analysis_results.json")
			
 
				+                )
			
 
				+                final_results = similarity_results
			
 
				+
			
 
				+                logger.info("\n" + "=" * 60)
			
 
				+                logger.info("步骤7完成")
			
 
				+                logger.info("=" * 60)
			
 
				+
			
 
				+            logger.info("\n" + "=" * 60)
			
 
				+            if self.enable_similarity_analysis and self.enable_deep_analysis:
			
 
				+                logger.info("✓ 完整流程执行完成（完整流程+深度分析+相似度分析）")
			
 
				+            elif self.enable_deep_analysis:
			
 
				+                logger.info("✓ 完整流程执行完成（完整流程+深度分析）")
			
 
				+            elif self.enable_evaluation:
			
 
				+                logger.info("✓ 完整流程执行完成（完整流程）")
			
 
				+            else:
			
 
				+                logger.info("✓ 完整流程执行完成（基础流程）")
			
 
				+            logger.info("=" * 60)
			
 
				+
			
 
				+            # 自动执行可视化
			
 
				+            logger.info("\n" + "=" * 60)
			
 
				+            logger.info("开始生成可视化...")
			
 
				+            logger.info("=" * 60)
			
 
				+
			
 
				+            try:
			
 
				+                # 使用统一的可视化脚本
			
 
				+                viz_script = 'src/visualizers/search_results_visualizer.py'
			
 
				+                logger.info(f"  使用可视化脚本: {viz_script}")
			
 
				+
			
 
				+                result = subprocess.run(
			
 
				+                    ['python3', viz_script],
			
 
				+                    capture_output=True,
			
 
				+                    text=True,
			
 
				+                    timeout=60
			
 
				+                )
			
 
				+
			
 
				+                if result.returncode == 0:
			
 
				+                    logger.info("✓ 可视化生成成功")
			
 
				+                    logger.info(result.stdout)
			
 
				+                else:
			
 
				+                    logger.error(f"可视化生成失败: {result.stderr}")
			
 
				+            except subprocess.TimeoutExpired:
			
 
				+                logger.error("可视化生成超时")
			
 
				+            except Exception as e:
			
 
				+                logger.error(f"可视化生成异常: {e}")
			
 
				+
			
 
				+            return final_results
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"流程执行失败: {e}")
			
 
				+            raise
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    """主函数"""
			
 
				+    parser = argparse.ArgumentParser(description='增强搜索系统V2')
			
 
				+    parser.add_argument(
			
 
				+        '--how-json',
			
 
				+        default='input/posts/690d977d0000000007036331_how.json',
			
 
				+        help='How解构文件路径'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--api-key',
			
 
				+        default=None,
			
 
				+        help='OpenRouter API密钥（默认从环境变量读取）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--output-dir',
			
 
				+        default='output_v2',
			
 
				+        help='输出目录'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--top-n',
			
 
				+        type=int,
			
 
				+        default=10,
			
 
				+        help='每个原始特征取评分最高的N个搜索词（默认10）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--max-total-searches',
			
 
				+        type=int,
			
 
				+        default=None,
			
 
				+        help='全局最大搜索次数限制（默认None不限制）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--search-workers',
			
 
				+        type=int,
			
 
				+        default=3,
			
 
				+        help='搜索并发数（默认3）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--max-searches-per-feature',
			
 
				+        type=int,
			
 
				+        default=None,
			
 
				+        help='每个原始特征的最大搜索次数（默认None不限制）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--max-searches-per-base-word',
			
 
				+        type=int,
			
 
				+        default=None,
			
 
				+        help='每个base_word的最大搜索次数（默认None不限制）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--enable-stage5',
			
 
				+        action='store_true',
			
 
				+        help='启用结果评估（默认False）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--stage5-max-workers',
			
 
				+        type=int,
			
 
				+        default=10,
			
 
				+        help='结果评估并发评估数（默认10）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--stage5-max-notes',
			
 
				+        type=int,
			
 
				+        default=20,
			
 
				+        help='每个搜索结果评估的最大帖子数（默认20）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--enable-stage6',
			
 
				+        action='store_true',
			
 
				+        help='启用 深度解构分析'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--stage6-only',
			
 
				+        action='store_true',
			
 
				+        help='只运行 深度解构（从 结果评估 结果开始）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--stage6-max-workers',
			
 
				+        type=int,
			
 
				+        default=5,
			
 
				+        help='深度解构 并发数（默认5）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--stage6-max-notes',
			
 
				+        type=int,
			
 
				+        default=None,
			
 
				+        help='深度解构 最多处理多少个完全匹配的帖子（默认None不限制）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--stage6-skip',
			
 
				+        type=int,
			
 
				+        default=0,
			
 
				+        help='深度解构 跳过前 N 个完全匹配的帖子（默认0）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--stage6-sort-by',
			
 
				+        type=str,
			
 
				+        choices=['score', 'time', 'engagement'],
			
 
				+        default='score',
			
 
				+        help='深度解构 排序方式: score(评分), time(时间), engagement(互动量)'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--stage6-api-url',
			
 
				+        type=str,
			
 
				+        default='http://192.168.245.150:7000/what/analysis/single',
			
 
				+        help='深度解构 解构 API 地址'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--stage6-min-score',
			
 
				+        type=float,
			
 
				+        default=0.8,
			
 
				+        help='深度解构 处理的最低分数阈值（默认0.8，0-1分制）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--enable-stage8',
			
 
				+        action='store_true',
			
 
				+        help='启用相似度分析（默认False，需要先启用stage6）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--stage8-weight-embedding',
			
 
				+        type=float,
			
 
				+        default=0.5,
			
 
				+        help='相似度分析向量模型权重（默认0.5）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--stage8-weight-semantic',
			
 
				+        type=float,
			
 
				+        default=0.5,
			
 
				+        help='相似度分析LLM模型权重（默认0.5）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--stage8-max-workers',
			
 
				+        type=int,
			
 
				+        default=5,
			
 
				+        help='相似度分析并发数（默认5）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--stage8-min-similarity',
			
 
				+        type=float,
			
 
				+        default=0.0,
			
 
				+        help='相似度分析最小相似度阈值（默认0.0）'
			
 
				+    )
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    # 创建系统实例
			
 
				+    system = EnhancedSearchV2(
			
 
				+        how_json_path=args.how_json,
			
 
				+        openrouter_api_key=args.api_key,
			
 
				+        output_dir=args.output_dir,
			
 
				+        top_n=args.top_n,
			
 
				+        max_total_searches=args.max_total_searches,
			
 
				+        search_max_workers=args.search_workers,
			
 
				+        max_searches_per_feature=args.max_searches_per_feature,
			
 
				+        max_searches_per_base_word=args.max_searches_per_base_word,
			
 
				+        enable_evaluation=args.enable_stage5,
			
 
				+        evaluation_max_workers=args.stage5_max_workers,
			
 
				+        evaluation_max_notes_per_query=args.stage5_max_notes,
			
 
				+        enable_deep_analysis=args.enable_stage6,
			
 
				+        deep_analysis_only=args.stage6_only,
			
 
				+        deep_analysis_max_workers=args.stage6_max_workers,
			
 
				+        deep_analysis_max_notes=args.stage6_max_notes,
			
 
				+        deep_analysis_skip_count=args.stage6_skip,
			
 
				+        deep_analysis_sort_by=args.stage6_sort_by,
			
 
				+        deep_analysis_api_url=args.stage6_api_url,
			
 
				+        deep_analysis_min_score=args.stage6_min_score,
			
 
				+        enable_similarity_analysis=args.enable_stage8,
			
 
				+        similarity_weight_embedding=args.stage8_weight_embedding,
			
 
				+        similarity_weight_semantic=args.stage8_weight_semantic,
			
 
				+        similarity_max_workers=args.stage8_max_workers,
			
 
				+        similarity_min_similarity=args.stage8_min_similarity
			
 
				+    )
			
 
				+
			
 
				+    # 执行完整流程
			
 
				+    system.run_full_pipeline()
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    # 当作为主脚本运行时，添加项目根目录到Python路径
			
 
				+    import sys
			
 
				+    project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
			
 
				+    if project_root not in sys.path:
			
 
				+        sys.path.insert(0, project_root)
			
 
				+
			
 
				+    main()
			
--- a/src/visualizers/__init__.py
+++ b/src/visualizers/__init__.py
--- a/src/visualizers/cascade_search_visualizer.py
+++ b/src/visualizers/cascade_search_visualizer.py
@@ -0,0 +1,1341 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+级联搜索结果可视化工具
			
 
				+展示候选词 → Top3人设特征 → 搜索词 → 搜索结果的完整流程
			
 
				+"""
			
 
				+
			
 
				+import json
			
 
				+import os
			
 
				+import sys
			
 
				+from datetime import datetime
			
 
				+from typing import List, Dict, Any, Set
			
 
				+import webbrowser
			
 
				+
			
 
				+
			
 
				+def load_json(file_path: str) -> Any:
			
 
				+    """加载JSON文件"""
			
 
				+    with open(file_path, 'r', encoding='utf-8') as f:
			
 
				+        return json.load(f)
			
 
				+
			
 
				+
			
 
				+def load_all_data(output_dir: str = "output_v2") -> Dict[str, Any]:
			
 
				+    """
			
 
				+    加载所有需要的数据文件
			
 
				+
			
 
				+    Returns:
			
 
				+        包含所有数据的字典
			
 
				+    """
			
 
				+    print("正在加载数据文件...")
			
 
				+
			
 
				+    data = {
			
 
				+        'filtered_features': load_json(os.path.join(output_dir, 'filtered_features.json')),
			
 
				+        'candidate_words': load_json(os.path.join(output_dir, 'candidate_words.json')),
			
 
				+        'search_queries': load_json(os.path.join(output_dir, 'search_queries_evaluated.json')),
			
 
				+        'search_results': load_json(os.path.join(output_dir, 'search_results.json')),
			
 
				+        'evaluated_results': load_json(os.path.join(output_dir, 'evaluated_results.json'))
			
 
				+    }
			
 
				+
			
 
				+    # 尝试加载深度分析数据（可选）
			
 
				+    deep_path = os.path.join(output_dir, 'deep_analysis_results.json')
			
 
				+    similarity_path = os.path.join(output_dir, 'similarity_analysis_results.json')
			
 
				+
			
 
				+    if os.path.exists(deep_path):
			
 
				+        deep_data = load_json(deep_path)
			
 
				+        # 创建note_id到解构数据的映射
			
 
				+        data['stage7_mapping'] = {}
			
 
				+        for result in deep_data.get('results', []):
			
 
				+            note_id = result.get('note_id')
			
 
				+            if note_id:
			
 
				+                data['stage7_mapping'][note_id] = result
			
 
				+    else:
			
 
				+        data['stage7_mapping'] = {}
			
 
				+
			
 
				+    if os.path.exists(similarity_path):
			
 
				+        sim_data = load_json(similarity_path)
			
 
				+        # 创建note_id到相似度数据的映射
			
 
				+        data['stage8_mapping'] = {}
			
 
				+        for result in sim_data.get('results', []):
			
 
				+            note_id = result.get('note_id')
			
 
				+            if note_id:
			
 
				+                data['stage8_mapping'][note_id] = result
			
 
				+    else:
			
 
				+        data['stage8_mapping'] = {}
			
 
				+
			
 
				+    print(f"  ✓ 已加载 {len(data['filtered_features'])} 个原始特征")
			
 
				+    print(f"  ✓ 已加载 {len(data['candidate_words'])} 个候选词数据")
			
 
				+    print(f"  ✓ 已加载解构数据: {len(data['stage7_mapping'])} 个帖子")
			
 
				+    print(f"  ✓ 已加载相似度数据: {len(data['stage8_mapping'])} 个帖子")
			
 
				+
			
 
				+    return data
			
 
				+
			
 
				+
			
 
				+def extract_global_candidates(data: Dict[str, Any]) -> Dict[str, List[Dict[str, Any]]]:
			
 
				+    """
			
 
				+    提取全局候选词并按相似度分类
			
 
				+
			
 
				+    Returns:
			
 
				+        {
			
 
				+            'matched': [...],      # 相似度 >= 0.8
			
 
				+            'partial': [...],      # 0.5 <= 相似度 < 0.8
			
 
				+            'unmatched': [...]     # 相似度 < 0.5
			
 
				+        }
			
 
				+    """
			
 
				+    print("\n提取全局候选词...")
			
 
				+
			
 
				+    candidates_map = {}  # 用于去重
			
 
				+
			
 
				+    # 遍历所有特征的候选词
			
 
				+    for feature_data in data['candidate_words']:
			
 
				+        candidates_by_base = feature_data.get('高相似度候选_按base_word', {})
			
 
				+
			
 
				+        for base_word, candidates in candidates_by_base.items():
			
 
				+            for cand in candidates:
			
 
				+                cand_name = cand.get('候选词', '')
			
 
				+                if not cand_name:
			
 
				+                    continue
			
 
				+
			
 
				+                # 计算相似度
			
 
				+                similarity = cand.get('相似度', 0)
			
 
				+
			
 
				+                # 如果是帖子候选词，使用点最高人设相似度
			
 
				+                if cand.get('候选词类型') == 'post':
			
 
				+                    similarity = cand.get('点最高人设相似度', similarity)
			
 
				+
			
 
				+                # 去重：保留最高相似度
			
 
				+                if cand_name not in candidates_map or similarity > candidates_map[cand_name]['相似度']:
			
 
				+                    candidates_map[cand_name] = {
			
 
				+                        '名称': cand_name,
			
 
				+                        '类型': cand.get('候选词类型', 'unknown'),
			
 
				+                        '相似度': similarity,
			
 
				+                        '特征类型': cand.get('特征类型', ''),
			
 
				+                        '来源路径': cand.get('来源路径', ''),
			
 
				+                        '匹配说明': cand.get('匹配说明', '')
			
 
				+                    }
			
 
				+
			
 
				+    # 按相似度分类
			
 
				+    result = {
			
 
				+        'matched': [],     # >= 0.8
			
 
				+        'partial': [],     # 0.5 ~ 0.8
			
 
				+        'unmatched': []    # < 0.5
			
 
				+    }
			
 
				+
			
 
				+    for cand in candidates_map.values():
			
 
				+        similarity = cand['相似度']
			
 
				+        if similarity >= 0.8:
			
 
				+            result['matched'].append(cand)
			
 
				+        elif similarity >= 0.5:
			
 
				+            result['partial'].append(cand)
			
 
				+        else:
			
 
				+            result['unmatched'].append(cand)
			
 
				+
			
 
				+    # 排序：按相似度降序
			
 
				+    for category in result.values():
			
 
				+        category.sort(key=lambda x: x['相似度'], reverse=True)
			
 
				+
			
 
				+    print(f"  ✓ 已匹配: {len(result['matched'])} 个")
			
 
				+    print(f"  ✓ 部分匹配: {len(result['partial'])} 个")
			
 
				+    print(f"  ✓ 不匹配: {len(result['unmatched'])} 个")
			
 
				+
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				+def render_left_candidates_html(global_candidates: Dict[str, List[Dict[str, Any]]]) -> str:
			
 
				+    """
			
 
				+    渲染左侧固定候选词区域HTML
			
 
				+
			
 
				+    Args:
			
 
				+        global_candidates: 分类后的全局候选词
			
 
				+
			
 
				+    Returns:
			
 
				+        HTML字符串
			
 
				+    """
			
 
				+    html_parts = []
			
 
				+
			
 
				+    html_parts.append('''
			
 
				+    <div class="left-candidates-panel">
			
 
				+        <div class="candidates-header">
			
 
				+            <div class="candidates-title">📚 可用候选词</div>
			
 
				+            <div class="candidates-hint">此区域固定展示 不随滚动</div>
			
 
				+        </div>
			
 
				+        <div class="candidates-content">
			
 
				+    ''')
			
 
				+
			
 
				+    # 已匹配区域
			
 
				+    html_parts.append('''
			
 
				+            <div class="candidates-section matched-section">
			
 
				+                <div class="section-title">✅ 已匹配 <span class="section-count">({count})</span></div>
			
 
				+                <div class="section-hint">与人设相似度 ≥ 0.8</div>
			
 
				+                <div class="candidates-list">
			
 
				+    '''.format(count=len(global_candidates['matched'])))
			
 
				+
			
 
				+    for cand in global_candidates['matched']:
			
 
				+        icon = '📝' if cand['类型'] == 'post' else '👤'
			
 
				+        type_label = '帖子' if cand['类型'] == 'post' else '人设'
			
 
				+        html_parts.append(f'''
			
 
				+                    <div class="candidate-item matched">
			
 
				+                        <div class="candidate-icon">{icon}</div>
			
 
				+                        <div class="candidate-info">
			
 
				+                            <div class="candidate-name">{cand['名称']}</div>
			
 
				+                            <div class="candidate-meta">
			
 
				+                                <span class="candidate-type">{type_label}</span>
			
 
				+                                <span class="candidate-similarity">{cand['相似度']:.2f}</span>
			
 
				+                            </div>
			
 
				+                        </div>
			
 
				+                    </div>
			
 
				+        ''')
			
 
				+
			
 
				+    html_parts.append('''
			
 
				+                </div>
			
 
				+            </div>
			
 
				+    ''')
			
 
				+
			
 
				+    # 部分匹配区域
			
 
				+    html_parts.append('''
			
 
				+            <div class="candidates-section partial-section">
			
 
				+                <div class="section-title">🟡 部分匹配 <span class="section-count">({count})</span></div>
			
 
				+                <div class="section-hint">与人设特征相似度 0.5-0.8</div>
			
 
				+                <div class="candidates-list">
			
 
				+    '''.format(count=len(global_candidates['partial'])))
			
 
				+
			
 
				+    for cand in global_candidates['partial']:
			
 
				+        icon = '📝' if cand['类型'] == 'post' else '👤'
			
 
				+        type_label = '帖子' if cand['类型'] == 'post' else '人设'
			
 
				+        html_parts.append(f'''
			
 
				+                    <div class="candidate-item partial">
			
 
				+                        <div class="candidate-icon">{icon}</div>
			
 
				+                        <div class="candidate-info">
			
 
				+                            <div class="candidate-name">{cand['名称']}</div>
			
 
				+                            <div class="candidate-meta">
			
 
				+                                <span class="candidate-type">{type_label}</span>
			
 
				+                                <span class="candidate-similarity">{cand['相似度']:.2f}</span>
			
 
				+                            </div>
			
 
				+                        </div>
			
 
				+                    </div>
			
 
				+        ''')
			
 
				+
			
 
				+    html_parts.append('''
			
 
				+                </div>
			
 
				+            </div>
			
 
				+    ''')
			
 
				+
			
 
				+    # 不匹配区域
			
 
				+    html_parts.append('''
			
 
				+            <div class="candidates-section unmatched-section">
			
 
				+                <div class="section-title">❌ 不匹配 <span class="section-count">({count})</span></div>
			
 
				+                <div class="section-hint">与人设特征相似度 < 0.5</div>
			
 
				+                <div class="candidates-list">
			
 
				+    '''.format(count=len(global_candidates['unmatched'])))
			
 
				+
			
 
				+    for cand in global_candidates['unmatched']:
			
 
				+        icon = '📝' if cand['类型'] == 'post' else '👤'
			
 
				+        type_label = '帖子' if cand['类型'] == 'post' else '人设'
			
 
				+        html_parts.append(f'''
			
 
				+                    <div class="candidate-item unmatched">
			
 
				+                        <div class="candidate-icon">{icon}</div>
			
 
				+                        <div class="candidate-info">
			
 
				+                            <div class="candidate-name">{cand['名称']}</div>
			
 
				+                            <div class="candidate-meta">
			
 
				+                                <span class="candidate-type">{type_label}</span>
			
 
				+                                <span class="candidate-similarity">{cand['相似度']:.2f}</span>
			
 
				+                            </div>
			
 
				+                        </div>
			
 
				+                    </div>
			
 
				+        ''')
			
 
				+
			
 
				+    html_parts.append('''
			
 
				+                </div>
			
 
				+            </div>
			
 
				+        </div>
			
 
				+    </div>
			
 
				+    ''')
			
 
				+
			
 
				+    return ''.join(html_parts)
			
 
				+
			
 
				+
			
 
				+def render_cascade_flow_html(data: Dict[str, Any]) -> str:
			
 
				+    """
			
 
				+    渲染中间级联流程HTML（三层结构）
			
 
				+
			
 
				+    Returns:
			
 
				+        HTML字符串
			
 
				+    """
			
 
				+    html_parts = []
			
 
				+
			
 
				+    html_parts.append('''
			
 
				+    <div class="cascade-flow-panel">
			
 
				+        <div class="cascade-header">
			
 
				+            <div class="cascade-title">🔄 级联搜索流程</div>
			
 
				+        </div>
			
 
				+        <div class="cascade-content" id="cascadeContent">
			
 
				+    ''')
			
 
				+
			
 
				+    # 默认显示第一个特征的级联流程
			
 
				+    if data['evaluated_results']:
			
 
				+        first_feature = data['evaluated_results'][0]
			
 
				+        html_parts.append(render_single_cascade(first_feature, 0, data))
			
 
				+
			
 
				+    html_parts.append('''
			
 
				+        </div>
			
 
				+    </div>
			
 
				+    ''')
			
 
				+
			
 
				+    return ''.join(html_parts)
			
 
				+
			
 
				+
			
 
				+def render_single_cascade(feature_data: Dict[str, Any], feature_idx: int, data: Dict[str, Any]) -> str:
			
 
				+    """
			
 
				+    渲染单个特征的级联流程
			
 
				+
			
 
				+    Args:
			
 
				+        feature_data: 特征数据
			
 
				+        feature_idx: 特征索引
			
 
				+        data: 全部数据
			
 
				+
			
 
				+    Returns:
			
 
				+        HTML字符串
			
 
				+    """
			
 
				+    html_parts = []
			
 
				+
			
 
				+    original_feature = feature_data.get('原始特征名称', '')
			
 
				+    top3_matches = feature_data.get('top3匹配信息', [])
			
 
				+    groups = feature_data.get('组合评估结果_分组', [])
			
 
				+
			
 
				+    # 层级1: 原始特征
			
 
				+    html_parts.append(f'''
			
 
				+        <div class="cascade-layer layer-1">
			
 
				+            <div class="layer-title">📌 帖子选题点</div>
			
 
				+            <div class="feature-selector">
			
 
				+                <div class="selected-feature">
			
 
				+                    <div class="feature-name">{original_feature}</div>
			
 
				+                    <div class="feature-actions">
			
 
				+                        <button class="switch-feature-btn" onclick="showFeatureSelector()">切换特征</button>
			
 
				+                    </div>
			
 
				+                </div>
			
 
				+            </div>
			
 
				+        </div>
			
 
				+    ''')
			
 
				+
			
 
				+    # 层级2: Top3人设特征
			
 
				+    html_parts.append('''
			
 
				+        <div class="cascade-arrow">↓</div>
			
 
				+        <div class="cascade-layer layer-2">
			
 
				+            <div class="layer-title">🎯 Top1各 相似度(x)</div>
			
 
				+            <div class="top3-container">
			
 
				+    ''')
			
 
				+
			
 
				+    for idx, match in enumerate(top3_matches[:3], 1):
			
 
				+        base_word = match.get('人设特征名称', '')
			
 
				+        similarity = match.get('相似度', 0)
			
 
				+        is_top1 = (idx == 1)
			
 
				+        card_class = 'top3-card top1-card' if is_top1 else 'top3-card'
			
 
				+
			
 
				+        html_parts.append(f'''
			
 
				+                <div class="{card_class}" data-feature-idx="{feature_idx}" data-match-idx="{idx-1}" onclick="selectBaseWord({feature_idx}, {idx-1})">
			
 
				+                    <div class="top3-rank">Top{idx}</div>
			
 
				+                    <div class="top3-name">{base_word}</div>
			
 
				+                    <div class="top3-similarity">相似度: {similarity:.2f}</div>
			
 
				+                </div>
			
 
				+        ''')
			
 
				+
			
 
				+    html_parts.append('''
			
 
				+            </div>
			
 
				+        </div>
			
 
				+    ''')
			
 
				+
			
 
				+    # 层级3: 搜索词（默认展开Top1）
			
 
				+    if groups:
			
 
				+        html_parts.append('''
			
 
				+            <div class="cascade-arrow">↓</div>
			
 
				+            <div class="cascade-layer layer-3">
			
 
				+                <div class="layer-title">🔍 搜索词生成</div>
			
 
				+                <div class="search-words-container" id="searchWordsContainer">
			
 
				+        ''')
			
 
				+
			
 
				+        # 默认显示第一个group（Top1）
			
 
				+        html_parts.append(render_search_words_group(groups[0], feature_idx, 0))
			
 
				+
			
 
				+        html_parts.append('''
			
 
				+                </div>
			
 
				+            </div>
			
 
				+        ''')
			
 
				+
			
 
				+    return ''.join(html_parts)
			
 
				+
			
 
				+
			
 
				+def render_search_words_group(group: Dict[str, Any], feature_idx: int, group_idx: int) -> str:
			
 
				+    """
			
 
				+    渲染搜索词组
			
 
				+
			
 
				+    Args:
			
 
				+        group: 搜索词组数据
			
 
				+        feature_idx: 特征索引
			
 
				+        group_idx: 组索引
			
 
				+
			
 
				+    Returns:
			
 
				+        HTML字符串
			
 
				+    """
			
 
				+    html_parts = []
			
 
				+
			
 
				+    base_word = group.get('base_word', '')
			
 
				+    searches = group.get('top10_searches', [])
			
 
				+    available_words = group.get('available_words', [])
			
 
				+
			
 
				+    html_parts.append(f'''
			
 
				+        <div class="search-words-group" data-base-word="{base_word}">
			
 
				+            <div class="base-word-label">中心词: <span class="base-word-value">{base_word}</span></div>
			
 
				+    ''')
			
 
				+
			
 
				+    # 显示每个搜索词
			
 
				+    for sw_idx, search in enumerate(searches):
			
 
				+        html_parts.append(render_search_word_card(search, feature_idx, group_idx, sw_idx, available_words))
			
 
				+
			
 
				+    html_parts.append('''
			
 
				+        </div>
			
 
				+    ''')
			
 
				+
			
 
				+    return ''.join(html_parts)
			
 
				+
			
 
				+
			
 
				+def render_search_word_card(search: Dict[str, Any], feature_idx: int, group_idx: int, sw_idx: int, available_words: List) -> str:
			
 
				+    """
			
 
				+    渲染单个搜索词卡片
			
 
				+
			
 
				+    Args:
			
 
				+        search: 搜索词数据
			
 
				+        feature_idx, group_idx, sw_idx: 索引
			
 
				+        available_words: 可用候选词列表
			
 
				+
			
 
				+    Returns:
			
 
				+        HTML字符串
			
 
				+    """
			
 
				+    search_word = search.get('search_word', '')
			
 
				+    score = search.get('score', 0)
			
 
				+    reasoning = search.get('reasoning', '')
			
 
				+    has_result = search.get('search_result') is not None
			
 
				+
			
 
				+    # 检查是否已执行搜索
			
 
				+    status_icon = '✅' if has_result else '⏸️'
			
 
				+    status_text = '已搜索' if has_result else '未搜索'
			
 
				+    status_class = 'searched' if has_result else 'not-searched'
			
 
				+
			
 
				+    # 显示候选词（最多前10个）
			
 
				+    cand_names = [w.get('候选词', '') if isinstance(w, dict) else w for w in available_words[:10]]
			
 
				+    cand_display = ', '.join(cand_names) if cand_names else '无'
			
 
				+
			
 
				+    html = f'''
			
 
				+        <div class="search-word-card {status_class}" data-feature-idx="{feature_idx}" data-group-idx="{group_idx}" data-sw-idx="{sw_idx}" onclick="selectSearchWord({feature_idx}, {group_idx}, {sw_idx})">
			
 
				+            <div class="sw-header">
			
 
				+                <div class="sw-status">{status_icon} {status_text}</div>
			
 
				+                <div class="sw-rank">#{sw_idx + 1}</div>
			
 
				+            </div>
			
 
				+
			
 
				+            <div class="sw-candidates-pool">
			
 
				+                <div class="sw-label">可用候选词池:</div>
			
 
				+                <div class="sw-candidates">{cand_display}</div>
			
 
				+            </div>
			
 
				+
			
 
				+            <div class="sw-arrow-container">
			
 
				+                <div class="sw-arrow">
			
 
				+                    <span class="arrow-line">→</span>
			
 
				+                    <span class="arrow-score">score: {score:.2f}</span>
			
 
				+                </div>
			
 
				+            </div>
			
 
				+
			
 
				+            <div class="sw-result">
			
 
				+                <div class="sw-query">{search_word}</div>
			
 
				+            </div>
			
 
				+
			
 
				+            <div class="sw-reasoning">
			
 
				+                <div class="reasoning-label">💡 LLM推理理由:</div>
			
 
				+                <div class="reasoning-content">{reasoning}</div>
			
 
				+            </div>
			
 
				+        </div>
			
 
				+    '''
			
 
				+
			
 
				+    return html
			
 
				+
			
 
				+
			
 
				+def generate_html(data: Dict[str, Any], global_candidates: Dict[str, List[Dict[str, Any]]]) -> str:
			
 
				+    """
			
 
				+    生成完整HTML页面
			
 
				+
			
 
				+    Args:
			
 
				+        data: 所有数据
			
 
				+        global_candidates: 全局候选词
			
 
				+
			
 
				+    Returns:
			
 
				+        完整HTML字符串
			
 
				+    """
			
 
				+    print("\n正在生成HTML...")
			
 
				+
			
 
				+    # 准备数据JSON
			
 
				+    data_json = json.dumps(data['evaluated_results'], ensure_ascii=False)
			
 
				+    stage7_json = json.dumps(data['stage7_mapping'], ensure_ascii=False)
			
 
				+    stage8_json = json.dumps(data['stage8_mapping'], ensure_ascii=False)
			
 
				+
			
 
				+    # 生成各部分HTML
			
 
				+    left_html = render_left_candidates_html(global_candidates)
			
 
				+    cascade_html = render_cascade_flow_html(data)
			
 
				+
			
 
				+    # 生成完整HTML
			
 
				+    html_template = f'''<!DOCTYPE html>
			
 
				+<html lang="zh-CN">
			
 
				+<head>
			
 
				+    <meta charset="UTF-8">
			
 
				+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
			
 
				+    <title>级联搜索结果可视化</title>
			
 
				+    <style>
			
 
				+        {get_css_styles()}
			
 
				+    </style>
			
 
				+</head>
			
 
				+<body>
			
 
				+    <div class="page-header">
			
 
				+        <div class="header-title">🔍 级联搜索结果可视化系统</div>
			
 
				+        <div class="header-subtitle">候选词 → Top3人设特征 → 搜索词 → 搜索结果</div>
			
 
				+    </div>
			
 
				+
			
 
				+    <div class="main-layout">
			
 
				+        <!-- 左侧：候选词库 -->
			
 
				+        {left_html}
			
 
				+
			
 
				+        <!-- 中间：级联流程 -->
			
 
				+        {cascade_html}
			
 
				+
			
 
				+        <!-- 右侧：搜索结果 -->
			
 
				+        <div class="right-results-panel">
			
 
				+            <div class="results-header">
			
 
				+                <div class="results-title">📝 搜索结果卡片</div>
			
 
				+                <div class="results-subtitle" id="resultsSubtitle">请选择一个搜索词查看结果</div>
			
 
				+            </div>
			
 
				+            <div class="results-content" id="resultsContent">
			
 
				+                <div class="empty-results">
			
 
				+                    <div class="empty-icon">🔍</div>
			
 
				+                    <div class="empty-text">选择搜索词后，这里将显示对应的搜索结果</div>
			
 
				+                </div>
			
 
				+            </div>
			
 
				+        </div>
			
 
				+    </div>
			
 
				+
			
 
				+    <!-- 特征选择器Modal -->
			
 
				+    <div class="modal-overlay" id="featureSelectorModal">
			
 
				+        <div class="modal-window">
			
 
				+            <div class="modal-header">
			
 
				+                <div class="modal-title">选择原始特征</div>
			
 
				+                <button class="modal-close-btn" onclick="closeFeatureSelector()">×</button>
			
 
				+            </div>
			
 
				+            <div class="modal-body">
			
 
				+                <div class="feature-list" id="featureList"></div>
			
 
				+            </div>
			
 
				+        </div>
			
 
				+    </div>
			
 
				+
			
 
				+    <script>
			
 
				+        // 数据
			
 
				+        const allData = {data_json};
			
 
				+        const stage7Data = {stage7_json};
			
 
				+        const stage8Data = {stage8_json};
			
 
				+        let currentFeatureIdx = 0;
			
 
				+        let currentGroupIdx = 0;
			
 
				+        let currentSwIdx = 0;
			
 
				+
			
 
				+        {get_javascript_code()}
			
 
				+    </script>
			
 
				+</body>
			
 
				+</html>
			
 
				+'''
			
 
				+
			
 
				+    print("  ✓ HTML生成完成")
			
 
				+    return html_template
			
 
				+
			
 
				+
			
 
				+def get_css_styles() -> str:
			
 
				+    """获取CSS样式"""
			
 
				+    return '''
			
 
				+        * {
			
 
				+            margin: 0;
			
 
				+            padding: 0;
			
 
				+            box-sizing: border-box;
			
 
				+        }
			
 
				+
			
 
				+        body {
			
 
				+            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
			
 
				+            background: #f5f7fa;
			
 
				+            color: #333;
			
 
				+            overflow-x: hidden;
			
 
				+        }
			
 
				+
			
 
				+        /* 页面头部 */
			
 
				+        .page-header {
			
 
				+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
			
 
				+            color: white;
			
 
				+            padding: 20px;
			
 
				+            text-align: center;
			
 
				+            box-shadow: 0 2px 10px rgba(0,0,0,0.1);
			
 
				+        }
			
 
				+
			
 
				+        .header-title {
			
 
				+            font-size: 24px;
			
 
				+            font-weight: bold;
			
 
				+            margin-bottom: 5px;
			
 
				+        }
			
 
				+
			
 
				+        .header-subtitle {
			
 
				+            font-size: 14px;
			
 
				+            opacity: 0.9;
			
 
				+        }
			
 
				+
			
 
				+        /* 主布局 - 三栏 */
			
 
				+        .main-layout {
			
 
				+            display: flex;
			
 
				+            gap: 20px;
			
 
				+            padding: 20px;
			
 
				+            height: calc(100vh - 100px);
			
 
				+        }
			
 
				+
			
 
				+        /* 左侧候选词面板 - 固定 */
			
 
				+        .left-candidates-panel {
			
 
				+            width: 280px;
			
 
				+            background: white;
			
 
				+            border-radius: 8px;
			
 
				+            box-shadow: 0 2px 8px rgba(0,0,0,0.1);
			
 
				+            position: sticky;
			
 
				+            top: 20px;
			
 
				+            height: fit-content;
			
 
				+            max-height: calc(100vh - 140px);
			
 
				+            display: flex;
			
 
				+            flex-direction: column;
			
 
				+        }
			
 
				+
			
 
				+        .candidates-header {
			
 
				+            padding: 15px;
			
 
				+            border-bottom: 2px solid #e5e7eb;
			
 
				+        }
			
 
				+
			
 
				+        .candidates-title {
			
 
				+            font-size: 16px;
			
 
				+            font-weight: 600;
			
 
				+            color: #374151;
			
 
				+            margin-bottom: 5px;
			
 
				+        }
			
 
				+
			
 
				+        .candidates-hint {
			
 
				+            font-size: 11px;
			
 
				+            color: #ef4444;
			
 
				+            font-weight: 500;
			
 
				+        }
			
 
				+
			
 
				+        .candidates-content {
			
 
				+            flex: 1;
			
 
				+            overflow-y: auto;
			
 
				+            padding: 10px;
			
 
				+        }
			
 
				+
			
 
				+        .candidates-section {
			
 
				+            margin-bottom: 15px;
			
 
				+        }
			
 
				+
			
 
				+        .section-title {
			
 
				+            font-size: 13px;
			
 
				+            font-weight: 600;
			
 
				+            margin-bottom: 5px;
			
 
				+            display: flex;
			
 
				+            align-items: center;
			
 
				+            gap: 5px;
			
 
				+        }
			
 
				+
			
 
				+        .section-count {
			
 
				+            color: #6b7280;
			
 
				+            font-size: 12px;
			
 
				+        }
			
 
				+
			
 
				+        .section-hint {
			
 
				+            font-size: 11px;
			
 
				+            color: #6b7280;
			
 
				+            margin-bottom: 8px;
			
 
				+        }
			
 
				+
			
 
				+        .candidates-list {
			
 
				+            display: flex;
			
 
				+            flex-direction: column;
			
 
				+            gap: 6px;
			
 
				+        }
			
 
				+
			
 
				+        .candidate-item {
			
 
				+            display: flex;
			
 
				+            align-items: center;
			
 
				+            gap: 8px;
			
 
				+            padding: 8px;
			
 
				+            border-radius: 6px;
			
 
				+            border: 1px solid #e5e7eb;
			
 
				+            transition: all 0.2s;
			
 
				+            cursor: pointer;
			
 
				+        }
			
 
				+
			
 
				+        .candidate-item:hover {
			
 
				+            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
			
 
				+            transform: translateY(-1px);
			
 
				+        }
			
 
				+
			
 
				+        .candidate-item.matched {
			
 
				+            background: #f0fdf4;
			
 
				+            border-color: #86efac;
			
 
				+        }
			
 
				+
			
 
				+        .candidate-item.partial {
			
 
				+            background: #fffbeb;
			
 
				+            border-color: #fcd34d;
			
 
				+        }
			
 
				+
			
 
				+        .candidate-item.unmatched {
			
 
				+            background: #fef2f2;
			
 
				+            border-color: #fca5a5;
			
 
				+        }
			
 
				+
			
 
				+        .candidate-icon {
			
 
				+            font-size: 18px;
			
 
				+            flex-shrink: 0;
			
 
				+        }
			
 
				+
			
 
				+        .candidate-info {
			
 
				+            flex: 1;
			
 
				+            min-width: 0;
			
 
				+        }
			
 
				+
			
 
				+        .candidate-name {
			
 
				+            font-size: 12px;
			
 
				+            font-weight: 500;
			
 
				+            color: #374151;
			
 
				+            white-space: nowrap;
			
 
				+            overflow: hidden;
			
 
				+            text-overflow: ellipsis;
			
 
				+        }
			
 
				+
			
 
				+        .candidate-meta {
			
 
				+            display: flex;
			
 
				+            justify-content: space-between;
			
 
				+            align-items: center;
			
 
				+            margin-top: 2px;
			
 
				+        }
			
 
				+
			
 
				+        .candidate-type {
			
 
				+            font-size: 10px;
			
 
				+            color: #6b7280;
			
 
				+        }
			
 
				+
			
 
				+        .candidate-similarity {
			
 
				+            font-size: 10px;
			
 
				+            font-weight: 600;
			
 
				+            color: #10b981;
			
 
				+        }
			
 
				+
			
 
				+        /* 中间级联流程面板 */
			
 
				+        .cascade-flow-panel {
			
 
				+            flex: 1;
			
 
				+            background: white;
			
 
				+            border-radius: 8px;
			
 
				+            box-shadow: 0 2px 8px rgba(0,0,0,0.1);
			
 
				+            overflow-y: auto;
			
 
				+            padding: 20px;
			
 
				+        }
			
 
				+
			
 
				+        .cascade-header {
			
 
				+            margin-bottom: 20px;
			
 
				+            padding-bottom: 10px;
			
 
				+            border-bottom: 2px solid #e5e7eb;
			
 
				+        }
			
 
				+
			
 
				+        .cascade-title {
			
 
				+            font-size: 18px;
			
 
				+            font-weight: 600;
			
 
				+            color: #374151;
			
 
				+        }
			
 
				+
			
 
				+        .cascade-content {
			
 
				+            display: flex;
			
 
				+            flex-direction: column;
			
 
				+            gap: 15px;
			
 
				+        }
			
 
				+
			
 
				+        .cascade-layer {
			
 
				+            background: #f9fafb;
			
 
				+            border-radius: 8px;
			
 
				+            padding: 15px;
			
 
				+        }
			
 
				+
			
 
				+        .layer-title {
			
 
				+            font-size: 14px;
			
 
				+            font-weight: 600;
			
 
				+            color: #6b7280;
			
 
				+            margin-bottom: 10px;
			
 
				+        }
			
 
				+
			
 
				+        /* 层级1: 特征选择器 */
			
 
				+        .selected-feature {
			
 
				+            display: flex;
			
 
				+            justify-content: space-between;
			
 
				+            align-items: center;
			
 
				+            padding: 12px;
			
 
				+            background: white;
			
 
				+            border-radius: 6px;
			
 
				+            border: 2px solid #667eea;
			
 
				+        }
			
 
				+
			
 
				+        .feature-name {
			
 
				+            font-size: 15px;
			
 
				+            font-weight: 600;
			
 
				+            color: #374151;
			
 
				+        }
			
 
				+
			
 
				+        .switch-feature-btn {
			
 
				+            padding: 6px 12px;
			
 
				+            background: #667eea;
			
 
				+            color: white;
			
 
				+            border: none;
			
 
				+            border-radius: 4px;
			
 
				+            cursor: pointer;
			
 
				+            font-size: 12px;
			
 
				+            transition: all 0.2s;
			
 
				+        }
			
 
				+
			
 
				+        .switch-feature-btn:hover {
			
 
				+            background: #5568d3;
			
 
				+        }
			
 
				+
			
 
				+        /* 层级2: Top3卡片 */
			
 
				+        .top3-container {
			
 
				+            display: flex;
			
 
				+            gap: 10px;
			
 
				+        }
			
 
				+
			
 
				+        .top3-card {
			
 
				+            flex: 1;
			
 
				+            padding: 12px;
			
 
				+            background: white;
			
 
				+            border-radius: 6px;
			
 
				+            border: 2px solid #e5e7eb;
			
 
				+            cursor: pointer;
			
 
				+            transition: all 0.2s;
			
 
				+        }
			
 
				+
			
 
				+        .top3-card:hover {
			
 
				+            border-color: #667eea;
			
 
				+            box-shadow: 0 2px 6px rgba(102, 126, 234, 0.2);
			
 
				+        }
			
 
				+
			
 
				+        .top3-card.top1-card {
			
 
				+            border-color: #10b981;
			
 
				+            background: #f0fdf4;
			
 
				+        }
			
 
				+
			
 
				+        .top3-card.top1-card:hover {
			
 
				+            border-color: #059669;
			
 
				+        }
			
 
				+
			
 
				+        .top3-card.selected {
			
 
				+            border-color: #667eea;
			
 
				+            box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.2);
			
 
				+        }
			
 
				+
			
 
				+        .top3-rank {
			
 
				+            font-size: 11px;
			
 
				+            font-weight: 600;
			
 
				+            color: #6b7280;
			
 
				+            margin-bottom: 4px;
			
 
				+        }
			
 
				+
			
 
				+        .top3-name {
			
 
				+            font-size: 14px;
			
 
				+            font-weight: 600;
			
 
				+            color: #374151;
			
 
				+            margin-bottom: 4px;
			
 
				+        }
			
 
				+
			
 
				+        .top3-similarity {
			
 
				+            font-size: 12px;
			
 
				+            color: #10b981;
			
 
				+        }
			
 
				+
			
 
				+        /* 级联箭头 */
			
 
				+        .cascade-arrow {
			
 
				+            text-align: center;
			
 
				+            font-size: 24px;
			
 
				+            color: #667eea;
			
 
				+            margin: 5px 0;
			
 
				+        }
			
 
				+
			
 
				+        /* 层级3: 搜索词 */
			
 
				+        .base-word-label {
			
 
				+            font-size: 13px;
			
 
				+            color: #6b7280;
			
 
				+            margin-bottom: 12px;
			
 
				+        }
			
 
				+
			
 
				+        .base-word-value {
			
 
				+            font-weight: 600;
			
 
				+            color: #10b981;
			
 
				+        }
			
 
				+
			
 
				+        .search-word-card {
			
 
				+            background: white;
			
 
				+            border-radius: 8px;
			
 
				+            border: 2px solid #e5e7eb;
			
 
				+            padding: 15px;
			
 
				+            margin-bottom: 12px;
			
 
				+            cursor: pointer;
			
 
				+            transition: all 0.2s;
			
 
				+        }
			
 
				+
			
 
				+        .search-word-card:hover {
			
 
				+            border-color: #667eea;
			
 
				+            box-shadow: 0 2px 6px rgba(0,0,0,0.1);
			
 
				+        }
			
 
				+
			
 
				+        .search-word-card.searched {
			
 
				+            border-color: #10b981;
			
 
				+        }
			
 
				+
			
 
				+        .search-word-card.selected {
			
 
				+            border-color: #667eea;
			
 
				+            box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.2);
			
 
				+        }
			
 
				+
			
 
				+        .sw-header {
			
 
				+            display: flex;
			
 
				+            justify-content: space-between;
			
 
				+            align-items: center;
			
 
				+            margin-bottom: 10px;
			
 
				+        }
			
 
				+
			
 
				+        .sw-status {
			
 
				+            font-size: 12px;
			
 
				+            font-weight: 600;
			
 
				+            color: #10b981;
			
 
				+        }
			
 
				+
			
 
				+        .sw-rank {
			
 
				+            font-size: 11px;
			
 
				+            color: #6b7280;
			
 
				+        }
			
 
				+
			
 
				+        .sw-candidates-pool {
			
 
				+            margin-bottom: 10px;
			
 
				+        }
			
 
				+
			
 
				+        .sw-label {
			
 
				+            font-size: 11px;
			
 
				+            color: #6b7280;
			
 
				+            margin-bottom: 4px;
			
 
				+        }
			
 
				+
			
 
				+        .sw-candidates {
			
 
				+            font-size: 12px;
			
 
				+            color: #374151;
			
 
				+            background: #f9fafb;
			
 
				+            padding: 6px;
			
 
				+            border-radius: 4px;
			
 
				+        }
			
 
				+
			
 
				+        .sw-arrow-container {
			
 
				+            text-align: center;
			
 
				+            margin: 10px 0;
			
 
				+        }
			
 
				+
			
 
				+        .sw-arrow {
			
 
				+            display: inline-flex;
			
 
				+            align-items: center;
			
 
				+            gap: 8px;
			
 
				+        }
			
 
				+
			
 
				+        .arrow-line {
			
 
				+            font-size: 20px;
			
 
				+            color: #667eea;
			
 
				+        }
			
 
				+
			
 
				+        .arrow-score {
			
 
				+            font-size: 12px;
			
 
				+            font-weight: 600;
			
 
				+            color: #667eea;
			
 
				+            background: #ede9fe;
			
 
				+            padding: 2px 8px;
			
 
				+            border-radius: 4px;
			
 
				+        }
			
 
				+
			
 
				+        .sw-result {
			
 
				+            text-align: center;
			
 
				+            margin-bottom: 10px;
			
 
				+        }
			
 
				+
			
 
				+        .sw-query {
			
 
				+            font-size: 16px;
			
 
				+            font-weight: 600;
			
 
				+            color: #374151;
			
 
				+            background: #f0fdf4;
			
 
				+            padding: 8px;
			
 
				+            border-radius: 6px;
			
 
				+            border: 1px solid #86efac;
			
 
				+        }
			
 
				+
			
 
				+        .sw-reasoning {
			
 
				+            background: #fffbeb;
			
 
				+            padding: 10px;
			
 
				+            border-radius: 6px;
			
 
				+            border: 1px solid #fcd34d;
			
 
				+        }
			
 
				+
			
 
				+        .reasoning-label {
			
 
				+            font-size: 12px;
			
 
				+            font-weight: 600;
			
 
				+            color: #374151;
			
 
				+            margin-bottom: 4px;
			
 
				+        }
			
 
				+
			
 
				+        .reasoning-content {
			
 
				+            font-size: 12px;
			
 
				+            color: #6b7280;
			
 
				+            line-height: 1.5;
			
 
				+        }
			
 
				+
			
 
				+        /* 右侧结果面板 */
			
 
				+        .right-results-panel {
			
 
				+            width: 500px;
			
 
				+            background: white;
			
 
				+            border-radius: 8px;
			
 
				+            box-shadow: 0 2px 8px rgba(0,0,0,0.1);
			
 
				+            overflow-y: auto;
			
 
				+            display: flex;
			
 
				+            flex-direction: column;
			
 
				+        }
			
 
				+
			
 
				+        .results-header {
			
 
				+            padding: 15px;
			
 
				+            border-bottom: 2px solid #e5e7eb;
			
 
				+        }
			
 
				+
			
 
				+        .results-title {
			
 
				+            font-size: 16px;
			
 
				+            font-weight: 600;
			
 
				+            color: #374151;
			
 
				+            margin-bottom: 5px;
			
 
				+        }
			
 
				+
			
 
				+        .results-subtitle {
			
 
				+            font-size: 12px;
			
 
				+            color: #6b7280;
			
 
				+        }
			
 
				+
			
 
				+        .results-content {
			
 
				+            flex: 1;
			
 
				+            padding: 15px;
			
 
				+        }
			
 
				+
			
 
				+        .empty-results {
			
 
				+            text-align: center;
			
 
				+            padding: 60px 20px;
			
 
				+        }
			
 
				+
			
 
				+        .empty-icon {
			
 
				+            font-size: 48px;
			
 
				+            margin-bottom: 15px;
			
 
				+        }
			
 
				+
			
 
				+        .empty-text {
			
 
				+            font-size: 14px;
			
 
				+            color: #6b7280;
			
 
				+        }
			
 
				+
			
 
				+        /* Modal */
			
 
				+        .modal-overlay {
			
 
				+            display: none;
			
 
				+            position: fixed;
			
 
				+            top: 0;
			
 
				+            left: 0;
			
 
				+            right: 0;
			
 
				+            bottom: 0;
			
 
				+            background: rgba(0,0,0,0.5);
			
 
				+            z-index: 1000;
			
 
				+            align-items: center;
			
 
				+            justify-content: center;
			
 
				+        }
			
 
				+
			
 
				+        .modal-overlay.active {
			
 
				+            display: flex;
			
 
				+        }
			
 
				+
			
 
				+        .modal-window {
			
 
				+            background: white;
			
 
				+            border-radius: 12px;
			
 
				+            box-shadow: 0 10px 40px rgba(0,0,0,0.2);
			
 
				+            max-width: 600px;
			
 
				+            width: 90%;
			
 
				+            max-height: 80vh;
			
 
				+            display: flex;
			
 
				+            flex-direction: column;
			
 
				+        }
			
 
				+
			
 
				+        .modal-header {
			
 
				+            padding: 20px;
			
 
				+            border-bottom: 1px solid #e5e7eb;
			
 
				+            display: flex;
			
 
				+            justify-content: space-between;
			
 
				+            align-items: center;
			
 
				+        }
			
 
				+
			
 
				+        .modal-title {
			
 
				+            font-size: 18px;
			
 
				+            font-weight: 600;
			
 
				+            color: #374151;
			
 
				+        }
			
 
				+
			
 
				+        .modal-close-btn {
			
 
				+            background: none;
			
 
				+            border: none;
			
 
				+            font-size: 28px;
			
 
				+            color: #6b7280;
			
 
				+            cursor: pointer;
			
 
				+            padding: 0;
			
 
				+            width: 32px;
			
 
				+            height: 32px;
			
 
				+            display: flex;
			
 
				+            align-items: center;
			
 
				+            justify-content: center;
			
 
				+            border-radius: 4px;
			
 
				+        }
			
 
				+
			
 
				+        .modal-close-btn:hover {
			
 
				+            background: #f3f4f6;
			
 
				+        }
			
 
				+
			
 
				+        .modal-body {
			
 
				+            flex: 1;
			
 
				+            overflow-y: auto;
			
 
				+            padding: 20px;
			
 
				+        }
			
 
				+
			
 
				+        .feature-list {
			
 
				+            display: flex;
			
 
				+            flex-direction: column;
			
 
				+            gap: 10px;
			
 
				+        }
			
 
				+
			
 
				+        .feature-list-item {
			
 
				+            padding: 12px;
			
 
				+            background: #f9fafb;
			
 
				+            border-radius: 6px;
			
 
				+            border: 2px solid #e5e7eb;
			
 
				+            cursor: pointer;
			
 
				+            transition: all 0.2s;
			
 
				+        }
			
 
				+
			
 
				+        .feature-list-item:hover {
			
 
				+            border-color: #667eea;
			
 
				+            background: white;
			
 
				+        }
			
 
				+
			
 
				+        .feature-list-item.active {
			
 
				+            border-color: #10b981;
			
 
				+            background: #f0fdf4;
			
 
				+        }
			
 
				+    '''
			
 
				+
			
 
				+
			
 
				+def get_javascript_code() -> str:
			
 
				+    """获取JavaScript代码"""
			
 
				+    return '''
			
 
				+        // 初始化
			
 
				+        document.addEventListener('DOMContentLoaded', function() {
			
 
				+            console.log('页面加载完成');
			
 
				+            renderFeatureList();
			
 
				+        });
			
 
				+
			
 
				+        // 显示特征选择器
			
 
				+        function showFeatureSelector() {
			
 
				+            const modal = document.getElementById('featureSelectorModal');
			
 
				+            modal.classList.add('active');
			
 
				+        }
			
 
				+
			
 
				+        // 关闭特征选择器
			
 
				+        function closeFeatureSelector() {
			
 
				+            const modal = document.getElementById('featureSelectorModal');
			
 
				+            modal.classList.remove('active');
			
 
				+        }
			
 
				+
			
 
				+        // 渲染特征列表
			
 
				+        function renderFeatureList() {
			
 
				+            const listEl = document.getElementById('featureList');
			
 
				+            let html = '';
			
 
				+
			
 
				+            allData.forEach((feature, idx) => {
			
 
				+                const name = feature['原始特征名称'];
			
 
				+                const isActive = idx === currentFeatureIdx;
			
 
				+                const activeClass = isActive ? 'active' : '';
			
 
				+
			
 
				+                html += `
			
 
				+                    <div class="feature-list-item ${activeClass}" onclick="selectFeature(${idx})">
			
 
				+                        ${name}
			
 
				+                    </div>
			
 
				+                `;
			
 
				+            });
			
 
				+
			
 
				+            listEl.innerHTML = html;
			
 
				+        }
			
 
				+
			
 
				+        // 选择特征
			
 
				+        function selectFeature(featureIdx) {
			
 
				+            currentFeatureIdx = featureIdx;
			
 
				+            currentGroupIdx = 0;
			
 
				+            currentSwIdx = 0;
			
 
				+
			
 
				+            closeFeatureSelector();
			
 
				+            updateCascadeView();
			
 
				+            renderFeatureList();
			
 
				+        }
			
 
				+
			
 
				+        // 更新级联视图
			
 
				+        function updateCascadeView() {
			
 
				+            const feature = allData[currentFeatureIdx];
			
 
				+            const cascadeContent = document.getElementById('cascadeContent');
			
 
				+
			
 
				+            // 重新渲染级联流程（这里简化处理，实际应该用JavaScript动态更新）
			
 
				+            location.reload();  // 简化版：重新加载页面
			
 
				+        }
			
 
				+
			
 
				+        // 选择base_word
			
 
				+        function selectBaseWord(featureIdx, matchIdx) {
			
 
				+            currentFeatureIdx = featureIdx;
			
 
				+            currentGroupIdx = matchIdx;
			
 
				+            currentSwIdx = 0;
			
 
				+
			
 
				+            // 移除所有选中状态
			
 
				+            document.querySelectorAll('.top3-card').forEach(card => {
			
 
				+                card.classList.remove('selected');
			
 
				+            });
			
 
				+
			
 
				+            // 添加选中状态
			
 
				+            event.target.closest('.top3-card').classList.add('selected');
			
 
				+
			
 
				+            // 更新搜索词显示
			
 
				+            const feature = allData[currentFeatureIdx];
			
 
				+            const groups = feature['组合评估结果_分组'] || [];
			
 
				+            if (groups[currentGroupIdx]) {
			
 
				+                // TODO: 更新搜索词列表显示
			
 
				+                console.log('切换到group:', currentGroupIdx);
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        // 选择搜索词
			
 
				+        function selectSearchWord(featureIdx, groupIdx, swIdx) {
			
 
				+            currentFeatureIdx = featureIdx;
			
 
				+            currentGroupIdx = groupIdx;
			
 
				+            currentSwIdx = swIdx;
			
 
				+
			
 
				+            // 移除所有搜索词的选中状态
			
 
				+            document.querySelectorAll('.search-word-card').forEach(card => {
			
 
				+                card.classList.remove('selected');
			
 
				+            });
			
 
				+
			
 
				+            // 添加选中状态
			
 
				+            event.target.closest('.search-word-card').classList.add('selected');
			
 
				+
			
 
				+            // 显示搜索结果
			
 
				+            renderSearchResults(featureIdx, groupIdx, swIdx);
			
 
				+        }
			
 
				+
			
 
				+        // 渲染搜索结果
			
 
				+        function renderSearchResults(featureIdx, groupIdx, swIdx) {
			
 
				+            const feature = allData[featureIdx];
			
 
				+            const groups = feature['组合评估结果_分组'] || [];
			
 
				+            const group = groups[groupIdx];
			
 
				+            if (!group) return;
			
 
				+
			
 
				+            const searches = group['top10_searches'] || [];
			
 
				+            const search = searches[swIdx];
			
 
				+            if (!search) return;
			
 
				+
			
 
				+            const searchWord = search['search_word'] || '';
			
 
				+            const searchResult = search['search_result'];
			
 
				+
			
 
				+            const resultsContent = document.getElementById('resultsContent');
			
 
				+            const resultsSubtitle = document.getElementById('resultsSubtitle');
			
 
				+
			
 
				+            resultsSubtitle.textContent = `搜索词: ${searchWord}`;
			
 
				+
			
 
				+            if (!searchResult) {
			
 
				+                resultsContent.innerHTML = `
			
 
				+                    <div class="empty-results">
			
 
				+                        <div class="empty-icon">❌</div>
			
 
				+                        <div class="empty-text">该搜索词未执行搜索</div>
			
 
				+                    </div>
			
 
				+                `;
			
 
				+                return;
			
 
				+            }
			
 
				+
			
 
				+            const notes = searchResult.data?.data || [];
			
 
				+
			
 
				+            if (notes.length === 0) {
			
 
				+                resultsContent.innerHTML = `
			
 
				+                    <div class="empty-results">
			
 
				+                        <div class="empty-icon">📭</div>
			
 
				+                        <div class="empty-text">未找到匹配的帖子</div>
			
 
				+                    </div>
			
 
				+                `;
			
 
				+                return;
			
 
				+            }
			
 
				+
			
 
				+            // 渲染帖子卡片（简化版）
			
 
				+            let html = '<div class="notes-grid">';
			
 
				+            notes.forEach((note, idx) => {
			
 
				+                const card = note.note_card || {};
			
 
				+                const title = card.display_title || '无标题';
			
 
				+                const image = (card.image_list || [])[0] || '';
			
 
				+
			
 
				+                html += `
			
 
				+                    <div class="note-card-simple">
			
 
				+                        ${image ? `<img src="${image}" alt="${title}" loading="lazy">` : ''}
			
 
				+                        <div class="note-title-simple">${title}</div>
			
 
				+                    </div>
			
 
				+                `;
			
 
				+            });
			
 
				+            html += '</div>';
			
 
				+
			
 
				+            resultsContent.innerHTML = html;
			
 
				+        }
			
 
				+    '''
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    """主函数"""
			
 
				+    print("=" * 60)
			
 
				+    print("级联搜索结果可视化工具")
			
 
				+    print("=" * 60)
			
 
				+
			
 
				+    # 加载数据
			
 
				+    data = load_all_data()
			
 
				+
			
 
				+    # 提取全局候选词
			
 
				+    global_candidates = extract_global_candidates(data)
			
 
				+
			
 
				+    # 生成HTML
			
 
				+    html_content = generate_html(data, global_candidates)
			
 
				+
			
 
				+    # 保存HTML文件
			
 
				+    output_path = "visualization/cascade_search_results.html"
			
 
				+    os.makedirs(os.path.dirname(output_path), exist_ok=True)
			
 
				+
			
 
				+    with open(output_path, 'w', encoding='utf-8') as f:
			
 
				+        f.write(html_content)
			
 
				+
			
 
				+    print(f"\n✓ HTML文件已保存: {output_path}")
			
 
				+
			
 
				+    # 打开HTML文件
			
 
				+    abs_path = os.path.abspath(output_path)
			
 
				+    print(f"正在打开浏览器...")
			
 
				+    webbrowser.open(f'file://{abs_path}')
			
 
				+
			
 
				+    print("\n" + "=" * 60)
			
 
				+    print("✅ 可视化生成完成！")
			
 
				+    print("=" * 60)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    main()
			
--- a/src/visualizers/deconstruction_visualizer.py
+++ b/src/visualizers/deconstruction_visualizer.py
@@ -1313,13 +1313,13 @@ def generate_html(data: List[Dict[str, Any]], stats: Dict[str, Any],
 
				         }}
			
 
				 
			
 
				         .baseword-item:hover {{
			
 
				-            background: #f0fdf4;
			
 
				-            border-left-color: #22c55e;
			
 
				+            background: #fef3c7;
			
 
				+            border-left-color: #f59e0b;
			
 
				         }}
			
 
				 
			
 
				         .baseword-item.active {{
			
 
				-            background: linear-gradient(90deg, #dcfce7 0%, #f0fdf4 100%);
			
 
				-            border-left-color: #22c55e;
			
 
				+            background: linear-gradient(90deg, #fef3c7 0%, #fefce8 100%);
			
 
				+            border-left-color: #f59e0b;
			
 
				             position: relative;
			
 
				         }}
			
 
				 
			
@@ -1330,7 +1330,7 @@ def generate_html(data: List[Dict[str, Any]], stats: Dict[str, Any],
 
				             top: 50%;
			
 
				             width: 25px;
			
 
				             height: 2px;
			
 
				-            background: #22c55e;
			
 
				+            background: #f59e0b;
			
 
				         }}
			
 
				 
			
 
				         .baseword-item.active::before {{
			
@@ -1341,7 +1341,7 @@ def generate_html(data: List[Dict[str, Any]], stats: Dict[str, Any],
 
				             transform: translateY(-50%);
			
 
				             width: 0;
			
 
				             height: 0;
			
 
				-            border-left: 6px solid #22c55e;
			
 
				+            border-left: 6px solid #f59e0b;
			
 
				             border-top: 4px solid transparent;
			
 
				             border-bottom: 4px solid transparent;
			
 
				         }}
			
@@ -3125,8 +3125,8 @@ def generate_html(data: List[Dict[str, Any]], stats: Dict[str, Any],
 
				         }}
			
 
				 
			
 
				         .low-similarity-item {{
			
 
				-            padding: 10px 12px;
			
 
				-            margin: 8px 0;
			
 
				+            padding: 12px 15px;
			
 
				+            margin-bottom: 10px;
			
 
				             background: white;
			
 
				             border-left: 3px solid #dc2626;
			
 
				             border-radius: 4px;
			
@@ -3138,6 +3138,7 @@ def generate_html(data: List[Dict[str, Any]], stats: Dict[str, Any],
 
				         }}
			
 
				 
			
 
				         .low-feature-name {{
			
 
				+            font-size: 14px;
			
 
				             font-weight: 600;
			
 
				             color: #991b1b;
			
 
				             margin-bottom: 4px;
			
@@ -3145,17 +3146,18 @@ def generate_html(data: List[Dict[str, Any]], stats: Dict[str, Any],
 
				 
			
 
				         .low-feature-score {{
			
 
				             display: inline-block;
			
 
				-            font-size: 13px;
			
 
				+            font-size: 12px;
			
 
				             font-weight: 600;
			
 
				             color: #dc2626;
			
 
				             background: #fee2e2;
			
 
				             padding: 2px 8px;
			
 
				             border-radius: 4px;
			
 
				+            margin-right: 6px;
			
 
				         }}
			
 
				 
			
 
				         .low-feature-meta {{
			
 
				             font-size: 11px;
			
 
				-            color: #9ca3af;
			
 
				+            color: #dc2626;
			
 
				             margin-top: 4px;
			
 
				         }}
			
 
				     </style>
			
@@ -3655,7 +3657,7 @@ def generate_html(data: List[Dict[str, Any]], stats: Dict[str, Any],
 
				                         <div class="feature-item-left ${{isActive ? 'active' : ''}}"
			
 
				                              onclick="selectFeature(${{featureIdx}})"
			
 
				                              id="feature-left-${{featureIdx}}">
			
 
				-                            <div class="feature-name">🎯 ${{featureName}}${{postIcon}}</div>
			
 
				+                            <div class="feature-name">📝 ${{featureName}}${{postIcon}}</div>
			
 
				                             <div class="cascade-item-meta">
			
 
				                                 <span class="partial-feature-score">相似度: ${{similarity.toFixed(2)}}</span>
			
 
				                                 <span class="partial-feature-meta">${{dimension}}</span>
			
@@ -3683,9 +3685,11 @@ def generate_html(data: List[Dict[str, Any]], stats: Dict[str, Any],
 
				 
			
 
				                     html += `
			
 
				                         <div class="low-similarity-item">
			
 
				-                            <div class="low-feature-name">✗ ${{name}}</div>
			
 
				-                            <div class="low-feature-score">${{similarity.toFixed(2)}}</div>
			
 
				-                            <div class="low-feature-meta">${{dimension}}</div>
			
 
				+                            <div class="low-feature-name">📝 ${{name}}</div>
			
 
				+                            <div class="cascade-item-meta">
			
 
				+                                <span class="low-feature-score">相似度: ${{similarity.toFixed(2)}}</span>
			
 
				+                                <span class="low-feature-meta">${{dimension}}</span>
			
 
				+                            </div>
			
 
				                         </div>
			
 
				                     `;
			
 
				                 }});
			
@@ -3737,13 +3741,8 @@ def generate_html(data: List[Dict[str, Any]], stats: Dict[str, Any],
 
				                 endY = toRect.top + toRect.height / 2 - containerRect.top;
			
 
				             }}
			
 
				 
			
 
				-            // 使用贝塞尔曲线绘制连接线
			
 
				-            const controlPoint1X = startX + (endX - startX) * 0.5;
			
 
				-            const controlPoint1Y = startY;
			
 
				-            const controlPoint2X = startX + (endX - startX) * 0.5;
			
 
				-            const controlPoint2Y = endY;
			
 
				-
			
 
				-            const path = `M ${{startX}} ${{startY}} C ${{controlPoint1X}} ${{controlPoint1Y}}, ${{controlPoint2X}} ${{controlPoint2Y}}, ${{endX}} ${{endY}}`;
			
 
				+            // 使用直线绘制连接线，避免任何弯曲或折角
			
 
				+            const path = `M ${{startX}} ${{startY}} L ${{endX}} ${{endY}}`;
			
 
				 
			
 
				             lineEl.setAttribute('d', path);
			
 
				         }}
			
@@ -3872,7 +3871,7 @@ def generate_html(data: List[Dict[str, Any]], stats: Dict[str, Any],
 
				                     <div class="baseword-item ${{isActive ? 'active' : ''}}"
			
 
				                          onclick="selectBaseWord(${{featureIdx}}, ${{groupIdx}})"
			
 
				                          id="baseword-${{featureIdx}}-${{groupIdx}}">
			
 
				-                        <div class="cascade-item-title" style="color:#059669;">👤 ${{baseWord}}</div>
			
 
				+                        <div class="cascade-item-title" style="color:#ca8a04;">👤 ${{baseWord}}</div>
			
 
				                         <div class="cascade-item-meta">
			
 
				                             相似度: ${{baseSimilarity.toFixed(2)}} · ${{searches.length}}个搜索词
			
 
				                         </div>
			
@@ -4072,11 +4071,8 @@ def generate_html(data: List[Dict[str, Any]], stats: Dict[str, Any],
 
				 
			
 
				             // 渲染搜索结果
			
 
				             let html = `
			
 
				-                <div class="search-result-header" style="padding:20px;background:#f9fafb;border-bottom:2px solid #e5e7eb;">
			
 
				-                    <h3 style="margin:0 0 10px 0;">📝 ${{searchWord}}</h3>
			
 
				-                    <div style="font-size:12px;color:#6b7280;">
			
 
				-                        组合词: ${{sourceWord}} · ${{notes.length}}个搜索结果
			
 
				-                    </div>
			
 
				+                <div class="search-result-header" style="position:sticky;top:0;z-index:100;padding:20px;background:#f9fafb;border-bottom:2px solid #e5e7eb;">
			
 
				+                    <h3 style="margin:0;">📝 ${{searchWord}} · ${{notes.length}}个搜索结果</h3>
			
 
				                 </div>
			
 
				                 <div class="notes-grid" style="padding:20px;display:grid;grid-template-columns:repeat(auto-fill,minmax(280px,1fr));gap:15px;">
			
 
				             `;
			
@@ -4141,7 +4137,7 @@ def generate_html(data: List[Dict[str, Any]], stats: Dict[str, Any],
 
				                 html += `
			
 
				                     <div class="note-card ${{matchClass}}" style="border:2px solid #fbbf24;border-radius:12px;overflow:hidden;background:white;transition:all 0.2s;cursor:pointer;" onclick="openNoteImagesModal(${{featureIdx}}, ${{baseWordIdx}}, ${{swIdx}}, ${{noteIdx}})">
			
 
				                         <!-- 图片轮播区域 -->
			
 
				-                        <div style="position:relative;width:100%;height:200px;background:#f3f4f6;">
			
 
				+                        <div style="position:relative;width:100%;height:260px;background:#f3f4f6;">
			
 
				                             ${{cover ? `<img src="${{cover}}" style="width:100%;height:100%;object-fit:cover;">` : `<div style="width:100%;height:100%;display:flex;align-items:center;justify-content:center;color:#9ca3af;">${{typeIcon}}</div>`}}
			
 
				                             <div style="position:absolute;top:10px;right:10px;background:rgba(0,0,0,0.6);color:white;padding:4px 10px;border-radius:20px;font-size:12px;font-weight:600;">
			
 
				                                 1/${{imageList.length || 1}}
			
--- a/src/visualizers/search_results_visualizer.py
+++ b/src/visualizers/search_results_visualizer.py
@@ -0,0 +1,1487 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+搜索结果评估可视化工具
			
 
				+整合两层评估结果的交互式HTML页面
			
 
				+"""
			
 
				+
			
 
				+import json
			
 
				+import os
			
 
				+from datetime import datetime
			
 
				+from typing import List, Dict, Any
			
 
				+
			
 
				+
			
 
				+def load_data(json_path: str) -> List[Dict[str, Any]]:
			
 
				+    """加载JSON数据"""
			
 
				+    with open(json_path, 'r', encoding='utf-8') as f:
			
 
				+        return json.load(f)
			
 
				+
			
 
				+
			
 
				+def calculate_statistics(data: List[Dict[str, Any]]) -> Dict[str, Any]:
			
 
				+    """计算统计数据(包括评估结果)"""
			
 
				+    total_features = len(data)
			
 
				+    total_search_words = 0
			
 
				+    searched_count = 0  # 已执行搜索的数量
			
 
				+    not_searched_count = 0  # 未执行搜索的数量
			
 
				+    total_notes = 0
			
 
				+    video_count = 0
			
 
				+    normal_count = 0
			
 
				+
			
 
				+    # 评估统计
			
 
				+    total_evaluated_notes = 0
			
 
				+    total_filtered = 0
			
 
				+    match_complete = 0  # 0.8-1.0分
			
 
				+    match_similar = 0   # 0.6-0.79分
			
 
				+    match_weak = 0      # 0.5-0.59分
			
 
				+    match_none = 0      # ≤0.4分
			
 
				+
			
 
				+    for feature in data:
			
 
				+        grouped_results = feature.get('组合评估结果_分组', [])
			
 
				+
			
 
				+        for group in grouped_results:
			
 
				+            search_items = group.get('top10_searches', [])
			
 
				+            total_search_words += len(search_items)
			
 
				+
			
 
				+            for search_item in search_items:
			
 
				+                search_result = search_item.get('search_result', {})
			
 
				+
			
 
				+                # 统计搜索状态
			
 
				+                if search_result:
			
 
				+                    searched_count += 1
			
 
				+                    notes = search_result.get('data', {}).get('data', [])
			
 
				+                    total_notes += len(notes)
			
 
				+
			
 
				+                    # 统计视频/图文类型
			
 
				+                    for note in notes:
			
 
				+                        note_type = note.get('note_card', {}).get('type', '')
			
 
				+                        if note_type == 'video':
			
 
				+                            video_count += 1
			
 
				+                        else:
			
 
				+                            normal_count += 1
			
 
				+
			
 
				+                    # 统计评估结果
			
 
				+                    evaluation = search_item.get('evaluation_with_filter')
			
 
				+                    if evaluation:
			
 
				+                        total_evaluated_notes += evaluation.get('total_notes', 0)
			
 
				+                        total_filtered += evaluation.get('filtered_count', 0)
			
 
				+
			
 
				+                        stats = evaluation.get('statistics', {})
			
 
				+                        match_complete += stats.get('完全匹配(0.8-1.0)', 0)
			
 
				+                        match_similar += stats.get('相似匹配(0.6-0.79)', 0)
			
 
				+                        match_weak += stats.get('弱相似(0.5-0.59)', 0)
			
 
				+                        match_none += stats.get('无匹配(≤0.4)', 0)
			
 
				+                else:
			
 
				+                    not_searched_count += 1
			
 
				+
			
 
				+    # 计算百分比
			
 
				+    total_remaining = total_evaluated_notes - total_filtered if total_evaluated_notes > 0 else 0
			
 
				+
			
 
				+    return {
			
 
				+        'total_features': total_features,
			
 
				+        'total_search_words': total_search_words,
			
 
				+        'searched_count': searched_count,
			
 
				+        'not_searched_count': not_searched_count,
			
 
				+        'searched_percentage': round(searched_count / total_search_words * 100, 1) if total_search_words > 0 else 0,
			
 
				+        'total_notes': total_notes,
			
 
				+        'video_count': video_count,
			
 
				+        'normal_count': normal_count,
			
 
				+        'video_percentage': round(video_count / total_notes * 100, 1) if total_notes > 0 else 0,
			
 
				+        'normal_percentage': round(normal_count / total_notes * 100, 1) if total_notes > 0 else 0,
			
 
				+
			
 
				+        # 评估统计
			
 
				+        'total_evaluated': total_evaluated_notes,
			
 
				+        'total_filtered': total_filtered,
			
 
				+        'total_remaining': total_remaining,
			
 
				+        'filter_rate': round(total_filtered / total_evaluated_notes * 100, 1) if total_evaluated_notes > 0 else 0,
			
 
				+        'match_complete': match_complete,
			
 
				+        'match_similar': match_similar,
			
 
				+        'match_weak': match_weak,
			
 
				+        'match_none': match_none,
			
 
				+        'complete_rate': round(match_complete / total_remaining * 100, 1) if total_remaining > 0 else 0,
			
 
				+        'similar_rate': round(match_similar / total_remaining * 100, 1) if total_remaining > 0 else 0,
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def generate_html(data: List[Dict[str, Any]], stats: Dict[str, Any], output_path: str):
			
 
				+    """生成HTML可视化页面"""
			
 
				+
			
 
				+    # 准备数据JSON(用于JavaScript)
			
 
				+    data_json = json.dumps(data, ensure_ascii=False, indent=2)
			
 
				+
			
 
				+    html_content = f'''<!DOCTYPE html>
			
 
				+<html lang="zh-CN">
			
 
				+<head>
			
 
				+    <meta charset="UTF-8">
			
 
				+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
			
 
				+    <title>搜索结果评估可视化</title>
			
 
				+    <style>
			
 
				+        * {{
			
 
				+            margin: 0;
			
 
				+            padding: 0;
			
 
				+            box-sizing: border-box;
			
 
				+        }}
			
 
				+
			
 
				+        body {{
			
 
				+            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
			
 
				+            background: #f5f7fa;
			
 
				+            color: #333;
			
 
				+            overflow-x: hidden;
			
 
				+        }}
			
 
				+
			
 
				+        /* 顶部统计面板 */
			
 
				+        .stats-panel {{
			
 
				+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
			
 
				+            color: white;
			
 
				+            padding: 20px;
			
 
				+            box-shadow: 0 2px 10px rgba(0,0,0,0.1);
			
 
				+        }}
			
 
				+
			
 
				+        .stats-container {{
			
 
				+            max-width: 1400px;
			
 
				+            margin: 0 auto;
			
 
				+        }}
			
 
				+
			
 
				+        .stats-row {{
			
 
				+            display: flex;
			
 
				+            justify-content: space-around;
			
 
				+            align-items: center;
			
 
				+            flex-wrap: wrap;
			
 
				+            gap: 15px;
			
 
				+            margin-bottom: 15px;
			
 
				+        }}
			
 
				+
			
 
				+        .stats-row:last-child {{
			
 
				+            margin-bottom: 0;
			
 
				+            padding-top: 15px;
			
 
				+            border-top: 1px solid rgba(255,255,255,0.2);
			
 
				+        }}
			
 
				+
			
 
				+        .stat-item {{
			
 
				+            text-align: center;
			
 
				+        }}
			
 
				+
			
 
				+        .stat-value {{
			
 
				+            font-size: 28px;
			
 
				+            font-weight: bold;
			
 
				+            margin-bottom: 5px;
			
 
				+        }}
			
 
				+
			
 
				+        .stat-label {{
			
 
				+            font-size: 12px;
			
 
				+            opacity: 0.9;
			
 
				+        }}
			
 
				+
			
 
				+        .stat-item.small .stat-value {{
			
 
				+            font-size: 22px;
			
 
				+        }}
			
 
				+
			
 
				+        /* 过滤控制面板 */
			
 
				+        .filter-panel {{
			
 
				+            background: white;
			
 
				+            max-width: 1400px;
			
 
				+            margin: 20px auto;
			
 
				+            padding: 15px 20px;
			
 
				+            border-radius: 8px;
			
 
				+            box-shadow: 0 2px 8px rgba(0,0,0,0.1);
			
 
				+            display: flex;
			
 
				+            align-items: center;
			
 
				+            gap: 20px;
			
 
				+            flex-wrap: wrap;
			
 
				+        }}
			
 
				+
			
 
				+        .filter-label {{
			
 
				+            font-weight: 600;
			
 
				+            color: #374151;
			
 
				+        }}
			
 
				+
			
 
				+        .filter-buttons {{
			
 
				+            display: flex;
			
 
				+            gap: 10px;
			
 
				+            flex-wrap: wrap;
			
 
				+        }}
			
 
				+
			
 
				+        .filter-btn {{
			
 
				+            padding: 6px 12px;
			
 
				+            border: 2px solid #e5e7eb;
			
 
				+            background: white;
			
 
				+            border-radius: 6px;
			
 
				+            cursor: pointer;
			
 
				+            font-size: 13px;
			
 
				+            font-weight: 500;
			
 
				+            transition: all 0.2s;
			
 
				+        }}
			
 
				+
			
 
				+        .filter-btn:hover {{
			
 
				+            border-color: #667eea;
			
 
				+            background: #f9fafb;
			
 
				+        }}
			
 
				+
			
 
				+        .filter-btn.active {{
			
 
				+            border-color: #667eea;
			
 
				+            background: #667eea;
			
 
				+            color: white;
			
 
				+        }}
			
 
				+
			
 
				+        .filter-btn.complete {{
			
 
				+            border-color: #10b981;
			
 
				+        }}
			
 
				+        .filter-btn.complete.active {{
			
 
				+            background: #10b981;
			
 
				+            border-color: #10b981;
			
 
				+        }}
			
 
				+
			
 
				+        .filter-btn.similar {{
			
 
				+            border-color: #f59e0b;
			
 
				+        }}
			
 
				+        .filter-btn.similar.active {{
			
 
				+            background: #f59e0b;
			
 
				+            border-color: #f59e0b;
			
 
				+        }}
			
 
				+
			
 
				+        .filter-btn.weak {{
			
 
				+            border-color: #f97316;
			
 
				+        }}
			
 
				+        .filter-btn.weak.active {{
			
 
				+            background: #f97316;
			
 
				+            border-color: #f97316;
			
 
				+        }}
			
 
				+
			
 
				+        .filter-btn.none {{
			
 
				+            border-color: #ef4444;
			
 
				+        }}
			
 
				+        .filter-btn.none.active {{
			
 
				+            background: #ef4444;
			
 
				+            border-color: #ef4444;
			
 
				+        }}
			
 
				+
			
 
				+        .filter-btn.filtered {{
			
 
				+            border-color: #6b7280;
			
 
				+        }}
			
 
				+        .filter-btn.filtered.active {{
			
 
				+            background: #6b7280;
			
 
				+            border-color: #6b7280;
			
 
				+        }}
			
 
				+
			
 
				+        /* 主容器 */
			
 
				+        .main-container {{
			
 
				+            display: flex;
			
 
				+            max-width: 1400px;
			
 
				+            margin: 0 auto 20px;
			
 
				+            gap: 20px;
			
 
				+            padding: 0 20px;
			
 
				+            height: calc(100vh - 260px);
			
 
				+        }}
			
 
				+
			
 
				+        /* 左侧导航 */
			
 
				+        .left-sidebar {{
			
 
				+            width: 30%;
			
 
				+            background: white;
			
 
				+            border-radius: 8px;
			
 
				+            box-shadow: 0 2px 8px rgba(0,0,0,0.1);
			
 
				+            overflow-y: auto;
			
 
				+            position: sticky;
			
 
				+            top: 20px;
			
 
				+            height: fit-content;
			
 
				+            max-height: calc(100vh - 280px);
			
 
				+        }}
			
 
				+
			
 
				+        .feature-group {{
			
 
				+            border-bottom: 1px solid #e5e7eb;
			
 
				+        }}
			
 
				+
			
 
				+        .feature-header {{
			
 
				+            padding: 15px 20px;
			
 
				+            background: #f9fafb;
			
 
				+            cursor: pointer;
			
 
				+            user-select: none;
			
 
				+            transition: background 0.2s;
			
 
				+        }}
			
 
				+
			
 
				+        .feature-header:hover {{
			
 
				+            background: #f3f4f6;
			
 
				+        }}
			
 
				+
			
 
				+        .feature-header.active {{
			
 
				+            background: #667eea;
			
 
				+            color: white;
			
 
				+        }}
			
 
				+
			
 
				+        .feature-title {{
			
 
				+            font-size: 16px;
			
 
				+            font-weight: 600;
			
 
				+            margin-bottom: 5px;
			
 
				+        }}
			
 
				+
			
 
				+        .feature-meta {{
			
 
				+            font-size: 12px;
			
 
				+            color: #6b7280;
			
 
				+        }}
			
 
				+
			
 
				+        .feature-header.active .feature-meta {{
			
 
				+            color: rgba(255,255,255,0.8);
			
 
				+        }}
			
 
				+
			
 
				+        .search-words-list {{
			
 
				+            display: none;
			
 
				+            padding: 0;
			
 
				+        }}
			
 
				+
			
 
				+        .search-words-list.expanded {{
			
 
				+            display: block;
			
 
				+        }}
			
 
				+
			
 
				+        /* Base word分组层 */
			
 
				+        .base-word-group {{
			
 
				+            border-bottom: 1px solid #f3f4f6;
			
 
				+        }}
			
 
				+
			
 
				+        .base-word-header {{
			
 
				+            padding: 12px 20px 12px 30px;
			
 
				+            background: #fafbfc;
			
 
				+            cursor: pointer;
			
 
				+            user-select: none;
			
 
				+            transition: all 0.2s;
			
 
				+            border-left: 3px solid transparent;
			
 
				+        }}
			
 
				+
			
 
				+        .base-word-header:hover {{
			
 
				+            background: #f3f4f6;
			
 
				+            border-left-color: #a78bfa;
			
 
				+        }}
			
 
				+
			
 
				+        .base-word-header.active {{
			
 
				+            background: #f3f4f6;
			
 
				+            border-left-color: #7c3aed;
			
 
				+        }}
			
 
				+
			
 
				+        .base-word-title {{
			
 
				+            font-size: 15px;
			
 
				+            font-weight: 600;
			
 
				+            color: #7c3aed;
			
 
				+            margin-bottom: 4px;
			
 
				+        }}
			
 
				+
			
 
				+        .base-word-meta {{
			
 
				+            font-size: 11px;
			
 
				+            color: #6b7280;
			
 
				+        }}
			
 
				+
			
 
				+        .base-word-desc {{
			
 
				+            padding: 8px 20px 8px 30px;
			
 
				+            background: #fefce8;
			
 
				+            font-size: 12px;
			
 
				+            color: #854d0e;
			
 
				+            line-height: 1.5;
			
 
				+            border-left: 3px solid #fbbf24;
			
 
				+            display: none;
			
 
				+        }}
			
 
				+
			
 
				+        .base-word-desc.expanded {{
			
 
				+            display: block;
			
 
				+        }}
			
 
				+
			
 
				+        .search-words-sublist {{
			
 
				+            display: none;
			
 
				+        }}
			
 
				+
			
 
				+        .search-words-sublist.expanded {{
			
 
				+            display: block;
			
 
				+        }}
			
 
				+
			
 
				+        .search-word-item {{
			
 
				+            padding: 12px 20px 12px 50px;
			
 
				+            cursor: pointer;
			
 
				+            border-left: 3px solid transparent;
			
 
				+            transition: all 0.2s;
			
 
				+        }}
			
 
				+
			
 
				+        .search-word-item:hover {{
			
 
				+            background: #f9fafb;
			
 
				+            border-left-color: #667eea;
			
 
				+        }}
			
 
				+
			
 
				+        .search-word-item.active {{
			
 
				+            background: #ede9fe;
			
 
				+            border-left-color: #7c3aed;
			
 
				+        }}
			
 
				+
			
 
				+        .search-word-text {{
			
 
				+            font-size: 14px;
			
 
				+            font-weight: 500;
			
 
				+            color: #374151;
			
 
				+            margin-bottom: 4px;
			
 
				+        }}
			
 
				+
			
 
				+        .search-word-score {{
			
 
				+            display: inline-block;
			
 
				+            padding: 2px 8px;
			
 
				+            border-radius: 12px;
			
 
				+            font-size: 11px;
			
 
				+            font-weight: 600;
			
 
				+            margin-left: 8px;
			
 
				+        }}
			
 
				+
			
 
				+        .score-high {{
			
 
				+            background: #d1fae5;
			
 
				+            color: #065f46;
			
 
				+        }}
			
 
				+
			
 
				+        .score-medium {{
			
 
				+            background: #fef3c7;
			
 
				+            color: #92400e;
			
 
				+        }}
			
 
				+
			
 
				+        .score-low {{
			
 
				+            background: #fee2e2;
			
 
				+            color: #991b1b;
			
 
				+        }}
			
 
				+
			
 
				+        /* 评估徽章 */
			
 
				+        .eval-badge {{
			
 
				+            display: inline-block;
			
 
				+            padding: 2px 6px;
			
 
				+            border-radius: 10px;
			
 
				+            font-size: 11px;
			
 
				+            font-weight: 600;
			
 
				+            margin-left: 6px;
			
 
				+        }}
			
 
				+
			
 
				+        .eval-complete {{
			
 
				+            background: #d1fae5;
			
 
				+            color: #065f46;
			
 
				+            border: 1px solid #10b981;
			
 
				+        }}
			
 
				+
			
 
				+        .eval-similar {{
			
 
				+            background: #fef3c7;
			
 
				+            color: #92400e;
			
 
				+            border: 1px solid #f59e0b;
			
 
				+        }}
			
 
				+
			
 
				+        .eval-weak {{
			
 
				+            background: #fed7aa;
			
 
				+            color: #9a3412;
			
 
				+            border: 1px solid #f97316;
			
 
				+        }}
			
 
				+
			
 
				+        .eval-none {{
			
 
				+            background: #fee2e2;
			
 
				+            color: #991b1b;
			
 
				+            border: 1px solid #ef4444;
			
 
				+        }}
			
 
				+
			
 
				+        .eval-filtered {{
			
 
				+            background: #e5e7eb;
			
 
				+            color: #4b5563;
			
 
				+            border: 1px solid #6b7280;
			
 
				+        }}
			
 
				+
			
 
				+        .search-word-eval {{
			
 
				+            font-size: 11px;
			
 
				+            color: #6b7280;
			
 
				+            margin-top: 4px;
			
 
				+        }}
			
 
				+
			
 
				+        /* 右侧结果区 */
			
 
				+        .right-content {{
			
 
				+            flex: 1;
			
 
				+            overflow-y: auto;
			
 
				+            padding-bottom: 40px;
			
 
				+        }}
			
 
				+
			
 
				+        .result-block {{
			
 
				+            background: white;
			
 
				+            border-radius: 8px;
			
 
				+            box-shadow: 0 2px 8px rgba(0,0,0,0.1);
			
 
				+            margin-bottom: 30px;
			
 
				+            padding: 20px;
			
 
				+            scroll-margin-top: 20px;
			
 
				+        }}
			
 
				+
			
 
				+        .result-header {{
			
 
				+            margin-bottom: 20px;
			
 
				+            padding-bottom: 15px;
			
 
				+            border-bottom: 2px solid #e5e7eb;
			
 
				+        }}
			
 
				+
			
 
				+        .result-title {{
			
 
				+            font-size: 20px;
			
 
				+            font-weight: 600;
			
 
				+            color: #111827;
			
 
				+            margin-bottom: 10px;
			
 
				+        }}
			
 
				+
			
 
				+        .result-stats {{
			
 
				+            display: flex;
			
 
				+            gap: 10px;
			
 
				+            font-size: 12px;
			
 
				+            color: #6b7280;
			
 
				+            flex-wrap: wrap;
			
 
				+        }}
			
 
				+
			
 
				+        .stat-badge {{
			
 
				+            background: #f3f4f6;
			
 
				+            padding: 4px 10px;
			
 
				+            border-radius: 4px;
			
 
				+        }}
			
 
				+
			
 
				+        .stat-badge.eval {{
			
 
				+            font-weight: 600;
			
 
				+        }}
			
 
				+
			
 
				+        .stat-badge.eval.complete {{
			
 
				+            background: #d1fae5;
			
 
				+            color: #065f46;
			
 
				+        }}
			
 
				+
			
 
				+        .stat-badge.eval.similar {{
			
 
				+            background: #fef3c7;
			
 
				+            color: #92400e;
			
 
				+        }}
			
 
				+
			
 
				+        .stat-badge.eval.weak {{
			
 
				+            background: #fed7aa;
			
 
				+            color: #9a3412;
			
 
				+        }}
			
 
				+
			
 
				+        .stat-badge.eval.none {{
			
 
				+            background: #fee2e2;
			
 
				+            color: #991b1b;
			
 
				+        }}
			
 
				+
			
 
				+        .stat-badge.eval.filtered {{
			
 
				+            background: #e5e7eb;
			
 
				+            color: #4b5563;
			
 
				+        }}
			
 
				+
			
 
				+        /* 帖子网格 */
			
 
				+        .notes-grid {{
			
 
				+            display: grid;
			
 
				+            grid-template-columns: repeat(auto-fill, minmax(280px, 1fr));
			
 
				+            gap: 20px;
			
 
				+        }}
			
 
				+
			
 
				+        /* 空状态样式 */
			
 
				+        .empty-state {{
			
 
				+            text-align: center;
			
 
				+            padding: 60px 40px;
			
 
				+            color: #6b7280;
			
 
				+        }}
			
 
				+
			
 
				+        .empty-icon {{
			
 
				+            font-size: 48px;
			
 
				+            margin-bottom: 16px;
			
 
				+        }}
			
 
				+
			
 
				+        .empty-title {{
			
 
				+            font-size: 16px;
			
 
				+            font-weight: 600;
			
 
				+            color: #374151;
			
 
				+            margin-bottom: 8px;
			
 
				+        }}
			
 
				+
			
 
				+        .empty-desc {{
			
 
				+            font-size: 14px;
			
 
				+            line-height: 1.6;
			
 
				+            color: #9ca3af;
			
 
				+            max-width: 400px;
			
 
				+            margin: 0 auto;
			
 
				+        }}
			
 
				+
			
 
				+        .note-card {{
			
 
				+            border: 3px solid #e5e7eb;
			
 
				+            border-radius: 8px;
			
 
				+            overflow: hidden;
			
 
				+            cursor: pointer;
			
 
				+            transition: all 0.3s;
			
 
				+            background: white;
			
 
				+        }}
			
 
				+
			
 
				+        .note-card:hover {{
			
 
				+            transform: translateY(-4px);
			
 
				+            box-shadow: 0 10px 25px rgba(0,0,0,0.15);
			
 
				+        }}
			
 
				+
			
 
				+        /* 根据评估分数设置边框颜色 */
			
 
				+        .note-card.eval-complete {{
			
 
				+            border-color: #10b981;
			
 
				+        }}
			
 
				+
			
 
				+        .note-card.eval-similar {{
			
 
				+            border-color: #f59e0b;
			
 
				+        }}
			
 
				+
			
 
				+        .note-card.eval-weak {{
			
 
				+            border-color: #f97316;
			
 
				+        }}
			
 
				+
			
 
				+        .note-card.eval-none {{
			
 
				+            border-color: #ef4444;
			
 
				+        }}
			
 
				+
			
 
				+        .note-card.eval-filtered {{
			
 
				+            border-color: #6b7280;
			
 
				+            opacity: 0.6;
			
 
				+        }}
			
 
				+
			
 
				+        /* 图片轮播 */
			
 
				+        .image-carousel {{
			
 
				+            position: relative;
			
 
				+            width: 100%;
			
 
				+            height: 280px;
			
 
				+            background: #f3f4f6;
			
 
				+            overflow: hidden;
			
 
				+        }}
			
 
				+
			
 
				+        .carousel-images {{
			
 
				+            display: flex;
			
 
				+            height: 100%;
			
 
				+            transition: transform 0.3s ease;
			
 
				+        }}
			
 
				+
			
 
				+        .carousel-image {{
			
 
				+            min-width: 100%;
			
 
				+            height: 100%;
			
 
				+            object-fit: cover;
			
 
				+        }}
			
 
				+
			
 
				+        .carousel-btn {{
			
 
				+            position: absolute;
			
 
				+            top: 50%;
			
 
				+            transform: translateY(-50%);
			
 
				+            background: rgba(0,0,0,0.5);
			
 
				+            color: white;
			
 
				+            border: none;
			
 
				+            width: 32px;
			
 
				+            height: 32px;
			
 
				+            border-radius: 50%;
			
 
				+            cursor: pointer;
			
 
				+            font-size: 16px;
			
 
				+            display: none;
			
 
				+            align-items: center;
			
 
				+            justify-content: center;
			
 
				+            transition: background 0.2s;
			
 
				+            z-index: 10;
			
 
				+        }}
			
 
				+
			
 
				+        .carousel-btn:hover {{
			
 
				+            background: rgba(0,0,0,0.7);
			
 
				+        }}
			
 
				+
			
 
				+        .carousel-btn.prev {{
			
 
				+            left: 8px;
			
 
				+        }}
			
 
				+
			
 
				+        .carousel-btn.next {{
			
 
				+            right: 8px;
			
 
				+        }}
			
 
				+
			
 
				+        .note-card:hover .carousel-btn {{
			
 
				+            display: flex;
			
 
				+        }}
			
 
				+
			
 
				+        .carousel-indicators {{
			
 
				+            position: absolute;
			
 
				+            bottom: 10px;
			
 
				+            left: 50%;
			
 
				+            transform: translateX(-50%);
			
 
				+            display: flex;
			
 
				+            gap: 6px;
			
 
				+            z-index: 10;
			
 
				+        }}
			
 
				+
			
 
				+        .dot {{
			
 
				+            width: 8px;
			
 
				+            height: 8px;
			
 
				+            border-radius: 50%;
			
 
				+            background: rgba(255,255,255,0.5);
			
 
				+            cursor: pointer;
			
 
				+            transition: all 0.2s;
			
 
				+        }}
			
 
				+
			
 
				+        .dot.active {{
			
 
				+            background: white;
			
 
				+            width: 24px;
			
 
				+            border-radius: 4px;
			
 
				+        }}
			
 
				+
			
 
				+        .image-counter {{
			
 
				+            position: absolute;
			
 
				+            top: 10px;
			
 
				+            right: 10px;
			
 
				+            background: rgba(0,0,0,0.6);
			
 
				+            color: white;
			
 
				+            padding: 4px 8px;
			
 
				+            border-radius: 4px;
			
 
				+            font-size: 12px;
			
 
				+            z-index: 10;
			
 
				+        }}
			
 
				+
			
 
				+        /* 帖子信息 */
			
 
				+        .note-info {{
			
 
				+            padding: 12px;
			
 
				+        }}
			
 
				+
			
 
				+        .note-title {{
			
 
				+            font-size: 14px;
			
 
				+            font-weight: 500;
			
 
				+            color: #111827;
			
 
				+            margin-bottom: 8px;
			
 
				+            display: -webkit-box;
			
 
				+            -webkit-line-clamp: 2;
			
 
				+            -webkit-box-orient: vertical;
			
 
				+            overflow: hidden;
			
 
				+            line-height: 1.4;
			
 
				+        }}
			
 
				+
			
 
				+        .note-meta {{
			
 
				+            display: flex;
			
 
				+            align-items: center;
			
 
				+            justify-content: space-between;
			
 
				+            font-size: 12px;
			
 
				+            color: #6b7280;
			
 
				+            margin-bottom: 8px;
			
 
				+        }}
			
 
				+
			
 
				+        .note-type {{
			
 
				+            padding: 3px 8px;
			
 
				+            border-radius: 4px;
			
 
				+            font-weight: 500;
			
 
				+        }}
			
 
				+
			
 
				+        .type-video {{
			
 
				+            background: #dbeafe;
			
 
				+            color: #1e40af;
			
 
				+        }}
			
 
				+
			
 
				+        .type-normal {{
			
 
				+            background: #d1fae5;
			
 
				+            color: #065f46;
			
 
				+        }}
			
 
				+
			
 
				+        .note-author {{
			
 
				+            display: flex;
			
 
				+            align-items: center;
			
 
				+            gap: 6px;
			
 
				+        }}
			
 
				+
			
 
				+        .author-avatar {{
			
 
				+            width: 24px;
			
 
				+            height: 24px;
			
 
				+            border-radius: 50%;
			
 
				+        }}
			
 
				+
			
 
				+        /* 评估信息 */
			
 
				+        .note-eval {{
			
 
				+            padding: 8px 12px;
			
 
				+            background: #f9fafb;
			
 
				+            border-top: 1px solid #e5e7eb;
			
 
				+            font-size: 12px;
			
 
				+        }}
			
 
				+
			
 
				+        .note-eval-header {{
			
 
				+            display: flex;
			
 
				+            align-items: center;
			
 
				+            justify-content: space-between;
			
 
				+            cursor: pointer;
			
 
				+            user-select: none;
			
 
				+        }}
			
 
				+
			
 
				+        .note-eval-score {{
			
 
				+            font-weight: 600;
			
 
				+        }}
			
 
				+
			
 
				+        .note-eval-toggle {{
			
 
				+            color: #6b7280;
			
 
				+            font-size: 10px;
			
 
				+        }}
			
 
				+
			
 
				+        .note-eval-details {{
			
 
				+            margin-top: 8px;
			
 
				+            padding-top: 8px;
			
 
				+            border-top: 1px solid #e5e7eb;
			
 
				+            display: none;
			
 
				+            line-height: 1.5;
			
 
				+        }}
			
 
				+
			
 
				+        .note-eval-details.expanded {{
			
 
				+            display: block;
			
 
				+        }}
			
 
				+
			
 
				+        .eval-detail-label {{
			
 
				+            font-weight: 600;
			
 
				+            color: #374151;
			
 
				+            margin-top: 6px;
			
 
				+            margin-bottom: 2px;
			
 
				+        }}
			
 
				+
			
 
				+        .eval-detail-label:first-child {{
			
 
				+            margin-top: 0;
			
 
				+        }}
			
 
				+
			
 
				+        .eval-detail-text {{
			
 
				+            color: #6b7280;
			
 
				+        }}
			
 
				+
			
 
				+        /* 滚动条样式 */
			
 
				+        ::-webkit-scrollbar {{
			
 
				+            width: 8px;
			
 
				+            height: 8px;
			
 
				+        }}
			
 
				+
			
 
				+        ::-webkit-scrollbar-track {{
			
 
				+            background: #f1f1f1;
			
 
				+        }}
			
 
				+
			
 
				+        ::-webkit-scrollbar-thumb {{
			
 
				+            background: #888;
			
 
				+            border-radius: 4px;
			
 
				+        }}
			
 
				+
			
 
				+        ::-webkit-scrollbar-thumb:hover {{
			
 
				+            background: #555;
			
 
				+        }}
			
 
				+
			
 
				+        /* 隐藏类 */
			
 
				+        .hidden {{
			
 
				+            display: none !important;
			
 
				+        }}
			
 
				+    </style>
			
 
				+</head>
			
 
				+<body>
			
 
				+    <!-- 统计面板 -->
			
 
				+    <div class="stats-panel">
			
 
				+        <div class="stats-container">
			
 
				+            <div class="stats-row">
			
 
				+                <div class="stat-item">
			
 
				+                    <div class="stat-value">📊 {stats['total_features']}</div>
			
 
				+                    <div class="stat-label">原始特征数</div>
			
 
				+                </div>
			
 
				+                <div class="stat-item">
			
 
				+                    <div class="stat-value">🔍 {stats['total_search_words']}</div>
			
 
				+                    <div class="stat-label">搜索词总数</div>
			
 
				+                </div>
			
 
				+                <div class="stat-item">
			
 
				+                    <div class="stat-value">✅ {stats['searched_count']}</div>
			
 
				+                    <div class="stat-label">已搜索 ({stats['searched_percentage']}%)</div>
			
 
				+                </div>
			
 
				+                <div class="stat-item">
			
 
				+                    <div class="stat-value">⏸️ {stats['not_searched_count']}</div>
			
 
				+                    <div class="stat-label">未搜索</div>
			
 
				+                </div>
			
 
				+                <div class="stat-item">
			
 
				+                    <div class="stat-value">📝 {stats['total_notes']}</div>
			
 
				+                    <div class="stat-label">帖子总数</div>
			
 
				+                </div>
			
 
				+                <div class="stat-item">
			
 
				+                    <div class="stat-value">🎬 {stats['video_count']}</div>
			
 
				+                    <div class="stat-label">视频 ({stats['video_percentage']}%)</div>
			
 
				+                </div>
			
 
				+                <div class="stat-item">
			
 
				+                    <div class="stat-value">📷 {stats['normal_count']}</div>
			
 
				+                    <div class="stat-label">图文 ({stats['normal_percentage']}%)</div>
			
 
				+                </div>
			
 
				+            </div>
			
 
				+            <div class="stats-row">
			
 
				+                <div class="stat-item small">
			
 
				+                    <div class="stat-value">⚡ {stats['total_evaluated']}</div>
			
 
				+                    <div class="stat-label">已评估</div>
			
 
				+                </div>
			
 
				+                <div class="stat-item small">
			
 
				+                    <div class="stat-value">⚫ {stats['total_filtered']}</div>
			
 
				+                    <div class="stat-label">已过滤 ({stats['filter_rate']}%)</div>
			
 
				+                </div>
			
 
				+                <div class="stat-item small">
			
 
				+                    <div class="stat-value">🟢 {stats['match_complete']}</div>
			
 
				+                    <div class="stat-label">完全匹配 ({stats['complete_rate']}%)</div>
			
 
				+                </div>
			
 
				+                <div class="stat-item small">
			
 
				+                    <div class="stat-value">🟡 {stats['match_similar']}</div>
			
 
				+                    <div class="stat-label">相似匹配 ({stats['similar_rate']}%)</div>
			
 
				+                </div>
			
 
				+                <div class="stat-item small">
			
 
				+                    <div class="stat-value">🟠 {stats['match_weak']}</div>
			
 
				+                    <div class="stat-label">弱相似</div>
			
 
				+                </div>
			
 
				+                <div class="stat-item small">
			
 
				+                    <div class="stat-value">🔴 {stats['match_none']}</div>
			
 
				+                    <div class="stat-label">无匹配</div>
			
 
				+                </div>
			
 
				+            </div>
			
 
				+        </div>
			
 
				+    </div>
			
 
				+
			
 
				+    <!-- 过滤控制面板 -->
			
 
				+    <div class="filter-panel">
			
 
				+        <span class="filter-label">🔍 筛选显示:</span>
			
 
				+        <div class="filter-buttons">
			
 
				+            <button class="filter-btn active" onclick="filterNotes('all')">全部</button>
			
 
				+            <button class="filter-btn complete" onclick="filterNotes('complete')">🟢 完全匹配</button>
			
 
				+            <button class="filter-btn similar" onclick="filterNotes('similar')">🟡 相似匹配</button>
			
 
				+            <button class="filter-btn weak" onclick="filterNotes('weak')">🟠 弱相似</button>
			
 
				+            <button class="filter-btn none" onclick="filterNotes('none')">🔴 无匹配</button>
			
 
				+            <button class="filter-btn filtered" onclick="filterNotes('filtered')">⚫ 已过滤</button>
			
 
				+        </div>
			
 
				+    </div>
			
 
				+
			
 
				+    <!-- 主容器 -->
			
 
				+    <div class="main-container">
			
 
				+        <!-- 左侧导航 -->
			
 
				+        <div class="left-sidebar" id="leftSidebar">
			
 
				+            <!-- 通过JavaScript动态生成 -->
			
 
				+        </div>
			
 
				+
			
 
				+        <!-- 右侧结果区 -->
			
 
				+        <div class="right-content" id="rightContent">
			
 
				+            <!-- 通过JavaScript动态生成 -->
			
 
				+        </div>
			
 
				+    </div>
			
 
				+
			
 
				+    <script>
			
 
				+        // 数据
			
 
				+        const data = {data_json};
			
 
				+        let currentFilter = 'all';
			
 
				+
			
 
				+        // 创建评估映射(使用索引: "featureIdx-groupIdx-swIdx-noteIdx" -> evaluation)
			
 
				+        const noteEvaluations = {{}};
			
 
				+        data.forEach((feature, fIdx) => {{
			
 
				+            const groups = feature['组合评估结果_分组'] || [];
			
 
				+            groups.forEach((group, gIdx) => {{
			
 
				+                const searches = group['top10_searches'] || [];
			
 
				+                searches.forEach((search, sIdx) => {{
			
 
				+                    const evaluation = search['evaluation_with_filter'];
			
 
				+                    if (evaluation && evaluation.notes_evaluation) {{
			
 
				+                        evaluation.notes_evaluation.forEach(noteEval => {{
			
 
				+                            const key = `${{fIdx}}-${{gIdx}}-${{sIdx}}-${{noteEval.note_index}}`;
			
 
				+                            noteEvaluations[key] = noteEval;
			
 
				+                        }});
			
 
				+                    }}
			
 
				+                }});
			
 
				+            }});
			
 
				+        }});
			
 
				+
			
 
				+        // 获取评估类别
			
 
				+        function getEvalCategory(noteEval) {{
			
 
				+            if (!noteEval || noteEval['Query相关性'] !== '相关') {{
			
 
				+                return 'filtered';
			
 
				+            }}
			
 
				+            const score = noteEval['综合得分'];
			
 
				+            if (score >= 0.8) return 'complete';
			
 
				+            if (score >= 0.6) return 'similar';
			
 
				+            if (score >= 0.5) return 'weak';
			
 
				+            return 'none';
			
 
				+        }}
			
 
				+
			
 
				+        // 渲染左侧导航
			
 
				+        function renderLeftSidebar() {{
			
 
				+            const sidebar = document.getElementById('leftSidebar');
			
 
				+            let html = '';
			
 
				+
			
 
				+            data.forEach((feature, featureIdx) => {{
			
 
				+                const groups = feature['组合评估结果_分组'] || [];
			
 
				+                let totalSearches = 0;
			
 
				+                groups.forEach(group => {{
			
 
				+                    totalSearches += (group['top10_searches'] || []).length;
			
 
				+                }});
			
 
				+
			
 
				+                // 层级1: 原始特征
			
 
				+                html += `
			
 
				+                    <div class="feature-group">
			
 
				+                        <div class="feature-header" onclick="toggleFeature(${{featureIdx}})" id="feature-header-${{featureIdx}}">
			
 
				+                            <div class="feature-title">${{feature['原始特征名称']}}</div>
			
 
				+                            <div class="feature-meta">
			
 
				+                                ${{feature['来源层级']}} · 权重: ${{feature['权重'].toFixed(2)}} · ${{totalSearches}}个搜索词
			
 
				+                            </div>
			
 
				+                        </div>
			
 
				+                        <div class="search-words-list" id="search-words-${{featureIdx}}">
			
 
				+                `;
			
 
				+
			
 
				+                // 层级2: Base word分组
			
 
				+                groups.forEach((group, groupIdx) => {{
			
 
				+                    const baseWord = group['base_word'] || '';
			
 
				+                    const baseSimilarity = group['base_word_similarity'] || 0;
			
 
				+                    const searches = group['top10_searches'] || [];
			
 
				+
			
 
				+                    // 获取相关词汇
			
 
				+                    const relatedWords = feature['高相似度候选_按base_word']?.[baseWord] || [];
			
 
				+                    const relatedWordNames = relatedWords.map(w => w['人设特征名称']).slice(0, 10).join('、');
			
 
				+
			
 
				+                    html += `
			
 
				+                        <div class="base-word-group">
			
 
				+                            <div class="base-word-header" onclick="toggleBaseWord(${{featureIdx}}, ${{groupIdx}})"
			
 
				+                                 id="base-word-header-${{featureIdx}}-${{groupIdx}}">
			
 
				+                                <div class="base-word-title">🎯 ${{baseWord}}</div>
			
 
				+                                <div class="base-word-meta">相似度: ${{baseSimilarity.toFixed(2)}} · ${{searches.length}}个搜索词</div>
			
 
				+                            </div>
			
 
				+                            <div class="base-word-desc" id="base-word-desc-${{featureIdx}}-${{groupIdx}}">
			
 
				+                                <strong>关联特征范围（可用词汇池）：</strong>${{relatedWordNames || '无相关词汇'}}
			
 
				+                            </div>
			
 
				+                            <div class="search-words-sublist" id="search-words-sublist-${{featureIdx}}-${{groupIdx}}">
			
 
				+                    `;
			
 
				+
			
 
				+                    // 层级3: 搜索词列表
			
 
				+                    searches.forEach((sw, swIdx) => {{
			
 
				+                        const score = sw.score || 0;
			
 
				+                        const scoreClass = score >= 0.9 ? 'score-high' : score >= 0.7 ? 'score-medium' : 'score-low';
			
 
				+                        const blockId = `block-${{featureIdx}}-${{groupIdx}}-${{swIdx}}`;
			
 
				+                        const sourceWord = sw.source_word || '';
			
 
				+
			
 
				+                        // 获取评估统计
			
 
				+                        const evaluation = sw['evaluation_with_filter'];
			
 
				+                        let evalBadges = '';
			
 
				+                        if (evaluation) {{
			
 
				+                            const stats = evaluation.statistics || {{}};
			
 
				+                            const complete = stats['完全匹配(0.8-1.0)'] || 0;
			
 
				+                            const similar = stats['相似匹配(0.6-0.79)'] || 0;
			
 
				+                            const weak = stats['弱相似(0.5-0.59)'] || 0;
			
 
				+                            const none = stats['无匹配(≤0.4)'] || 0;
			
 
				+                            const filtered = evaluation.filtered_count || 0;
			
 
				+
			
 
				+                            if (complete > 0) evalBadges += `<span class="eval-badge eval-complete">🟢${{complete}}</span>`;
			
 
				+                            if (similar > 0) evalBadges += `<span class="eval-badge eval-similar">🟡${{similar}}</span>`;
			
 
				+                            if (weak > 0) evalBadges += `<span class="eval-badge eval-weak">🟠${{weak}}</span>`;
			
 
				+                            if (none > 0) evalBadges += `<span class="eval-badge eval-none">🔴${{none}}</span>`;
			
 
				+                            if (filtered > 0) evalBadges += `<span class="eval-badge eval-filtered">⚫${{filtered}}</span>`;
			
 
				+                        }}
			
 
				+
			
 
				+                        html += `
			
 
				+                            <div class="search-word-item" onclick="scrollToBlock('${{blockId}}')"
			
 
				+                                 id="sw-${{featureIdx}}-${{groupIdx}}-${{swIdx}}"
			
 
				+                                 data-block-id="${{blockId}}">
			
 
				+                                <div class="search-word-text">
			
 
				+                                    🔍 ${{sw.search_word}}
			
 
				+                                </div>
			
 
				+                                <div class="search-word-meta" style="font-size:11px;color:#9ca3af;margin-top:2px">
			
 
				+                                    来源: ${{sourceWord}}
			
 
				+                                </div>
			
 
				+                                <div class="search-word-eval">${{evalBadges}}</div>
			
 
				+                            </div>
			
 
				+                        `;
			
 
				+                    }});
			
 
				+
			
 
				+                    html += `
			
 
				+                            </div>
			
 
				+                        </div>
			
 
				+                    `;
			
 
				+                }});
			
 
				+
			
 
				+                html += `
			
 
				+                        </div>
			
 
				+                    </div>
			
 
				+                `;
			
 
				+            }});
			
 
				+
			
 
				+            sidebar.innerHTML = html;
			
 
				+        }}
			
 
				+
			
 
				+        // 渲染右侧结果区
			
 
				+        function renderRightContent() {{
			
 
				+            const content = document.getElementById('rightContent');
			
 
				+            let html = '';
			
 
				+
			
 
				+            data.forEach((feature, featureIdx) => {{
			
 
				+                const groups = feature['组合评估结果_分组'] || [];
			
 
				+
			
 
				+                groups.forEach((group, groupIdx) => {{
			
 
				+                    const searches = group['top10_searches'] || [];
			
 
				+
			
 
				+                    searches.forEach((sw, swIdx) => {{
			
 
				+                        const blockId = `block-${{featureIdx}}-${{groupIdx}}-${{swIdx}}`;
			
 
				+                        const hasSearchResult = sw.search_result != null;
			
 
				+                        const searchResult = sw.search_result || {{}};
			
 
				+                        const notes = searchResult.data?.data || [];
			
 
				+
			
 
				+                        const videoCount = notes.filter(n => n.note_card?.type === 'video').length;
			
 
				+                        const normalCount = notes.length - videoCount;
			
 
				+
			
 
				+                        // 获取评估统计
			
 
				+                        const evaluation = sw['evaluation_with_filter'];
			
 
				+                        let evalStats = '';
			
 
				+                        if (evaluation) {{
			
 
				+                            const stats = evaluation.statistics || {{}};
			
 
				+                            const complete = stats['完全匹配(0.8-1.0)'] || 0;
			
 
				+                            const similar = stats['相似匹配(0.6-0.79)'] || 0;
			
 
				+                            const weak = stats['弱相似(0.5-0.59)'] || 0;
			
 
				+                            const none = stats['无匹配(≤0.4)'] || 0;
			
 
				+                            const filtered = evaluation.filtered_count || 0;
			
 
				+
			
 
				+                            if (complete > 0) evalStats += `<span class="stat-badge eval complete">🟢 完全:${{complete}}</span>`;
			
 
				+                            if (similar > 0) evalStats += `<span class="stat-badge eval similar">🟡 相似:${{similar}}</span>`;
			
 
				+                            if (weak > 0) evalStats += `<span class="stat-badge eval weak">🟠 弱:${{weak}}</span>`;
			
 
				+                            if (none > 0) evalStats += `<span class="stat-badge eval none">🔴 无:${{none}}</span>`;
			
 
				+                            if (filtered > 0) evalStats += `<span class="stat-badge eval filtered">⚫ 过滤:${{filtered}}</span>`;
			
 
				+                        }}
			
 
				+
			
 
				+                        // 构建结果块
			
 
				+                        html += `
			
 
				+                            <div class="result-block" id="${{blockId}}">
			
 
				+                                <div class="result-header">
			
 
				+                                    <div class="result-title">${{sw.search_word}}</div>
			
 
				+                                    <div class="result-stats">
			
 
				+                        `;
			
 
				+
			
 
				+                        // 根据搜索状态显示不同的统计信息
			
 
				+                        if (!hasSearchResult) {{
			
 
				+                            // 未执行搜索
			
 
				+                            html += `
			
 
				+                                        <span class="stat-badge" style="background:#fef3c7;color:#92400e;font-weight:600">⏸️ 未执行搜索</span>
			
 
				+                            `;
			
 
				+                        }} else if (notes.length === 0) {{
			
 
				+                            // 搜索完成但无结果
			
 
				+                            html += `
			
 
				+                                        <span class="stat-badge">📝 0 条帖子</span>
			
 
				+                                        <span class="stat-badge" style="background:#fee2e2;color:#991b1b;font-weight:600">❌ 未找到匹配</span>
			
 
				+                            `;
			
 
				+                        }} else {{
			
 
				+                            // 正常有结果
			
 
				+                            html += `
			
 
				+                                        <span class="stat-badge">📝 ${{notes.length}} 条帖子</span>
			
 
				+                                        <span class="stat-badge">🎬 ${{videoCount}} 视频</span>
			
 
				+                                        <span class="stat-badge">📷 ${{normalCount}} 图文</span>
			
 
				+                                        ${{evalStats}}
			
 
				+                            `;
			
 
				+                        }}
			
 
				+
			
 
				+                        html += `
			
 
				+                                    </div>
			
 
				+                                </div>
			
 
				+                        `;
			
 
				+
			
 
				+                        // 根据搜索状态显示不同的内容区域
			
 
				+                        if (!hasSearchResult) {{
			
 
				+                            // 未执行搜索 - 显示空状态消息
			
 
				+                            html += `
			
 
				+                                <div class="empty-state">
			
 
				+                                    <div class="empty-icon">⏸️</div>
			
 
				+                                    <div class="empty-title">该搜索词未执行搜索</div>
			
 
				+                                    <div class="empty-desc">由于搜索次数限制（--max-searches-per-feature 和 --max-searches-per-base-word），该搜索词未被执行</div>
			
 
				+                                </div>
			
 
				+                            `;
			
 
				+                        }} else if (notes.length === 0) {{
			
 
				+                            // 搜索完成但无结果
			
 
				+                            html += `
			
 
				+                                <div class="empty-state">
			
 
				+                                    <div class="empty-icon">❌</div>
			
 
				+                                    <div class="empty-title">搜索完成，但未找到匹配的帖子</div>
			
 
				+                                    <div class="empty-desc">该搜索词已执行，但小红书返回了 0 条结果</div>
			
 
				+                                </div>
			
 
				+                            `;
			
 
				+                        }} else {{
			
 
				+                            // 正常有结果 - 显示帖子网格
			
 
				+                            html += `
			
 
				+                                <div class="notes-grid">
			
 
				+                                    ${{notes.map((note, noteIdx) => renderNoteCard(note, featureIdx, groupIdx, swIdx, noteIdx)).join('')}}
			
 
				+                                </div>
			
 
				+                            `;
			
 
				+                        }}
			
 
				+
			
 
				+                        html += `
			
 
				+                            </div>
			
 
				+                        `;
			
 
				+                    }});
			
 
				+                }});
			
 
				+            }});
			
 
				+
			
 
				+            content.innerHTML = html;
			
 
				+        }}
			
 
				+
			
 
				+        // 渲染单个帖子卡片
			
 
				+        function renderNoteCard(note, featureIdx, groupIdx, swIdx, noteIdx) {{
			
 
				+            const card = note.note_card || {{}};
			
 
				+            const images = card.image_list || [];
			
 
				+            const title = card.display_title || '无标题';
			
 
				+            const noteType = card.type || 'normal';
			
 
				+            const noteId = note.id || '';
			
 
				+            const user = card.user || {{}};
			
 
				+            const userName = user.nick_name || '未知用户';
			
 
				+            const userAvatar = user.avatar || '';
			
 
				+
			
 
				+            const carouselId = `carousel-${{featureIdx}}-${{groupIdx}}-${{swIdx}}-${{noteIdx}}`;
			
 
				+
			
 
				+            // 获取评估结果(使用索引key)
			
 
				+            const evalKey = `${{featureIdx}}-${{groupIdx}}-${{swIdx}}-${{noteIdx}}`;
			
 
				+            const noteEval = noteEvaluations[evalKey];
			
 
				+            const evalCategory = getEvalCategory(noteEval);
			
 
				+            const evalClass = `eval-${{evalCategory}}`;
			
 
				+
			
 
				+            let evalSection = '';
			
 
				+            if (noteEval) {{
			
 
				+                const score = noteEval['综合得分'];
			
 
				+                const scoreEmoji = score >= 0.8 ? '🟢' : score >= 0.6 ? '🟡' : score >= 0.5 ? '🟠' : '🔴';
			
 
				+                const scoreText = score >= 0.8 ? '完全匹配' : score >= 0.6 ? '相似匹配' : score >= 0.5 ? '弱相似' : '无匹配';
			
 
				+                const reasoning = noteEval['评分说明'] || '无';
			
 
				+                const matchingPoints = (noteEval['关键匹配点'] || []).join('、') || '无';
			
 
				+
			
 
				+                evalSection = `
			
 
				+                    <div class="note-eval">
			
 
				+                        <div class="note-eval-header" onclick="event.stopPropagation(); toggleEvalDetails('${{carouselId}}')">
			
 
				+                            <span class="note-eval-score">${{scoreEmoji}} ${{scoreText}} (${{score}}分)</span>
			
 
				+                            <span class="note-eval-toggle" id="${{carouselId}}-toggle">▼ 详情</span>
			
 
				+                        </div>
			
 
				+                        <div class="note-eval-details" id="${{carouselId}}-details">
			
 
				+                            <div class="eval-detail-label">评估理由:</div>
			
 
				+                            <div class="eval-detail-text">${{reasoning}}</div>
			
 
				+                            <div class="eval-detail-label">匹配要点:</div>
			
 
				+                            <div class="eval-detail-text">${{matchingPoints}}</div>
			
 
				+                        </div>
			
 
				+                    </div>
			
 
				+                `;
			
 
				+            }} else if (evalCategory === 'filtered') {{
			
 
				+                evalSection = `
			
 
				+                    <div class="note-eval">
			
 
				+                        <div class="note-eval-score">⚫ 已过滤(与搜索无关)</div>
			
 
				+                    </div>
			
 
				+                `;
			
 
				+            }}
			
 
				+
			
 
				+            return `
			
 
				+                <div class="note-card ${{evalClass}}" data-eval-category="${{evalCategory}}" onclick="openNote('${{noteId}}')">
			
 
				+                    <div class="image-carousel" id="${{carouselId}}">
			
 
				+                        <div class="carousel-images">
			
 
				+                            ${{images.map(img => `<img class="carousel-image" src="${{img}}" alt="帖子图片" loading="lazy">`).join('')}}
			
 
				+                        </div>
			
 
				+                        ${{images.length > 1 ? `
			
 
				+                            <button class="carousel-btn prev" onclick="event.stopPropagation(); changeImage('${{carouselId}}', -1)">←</button>
			
 
				+                            <button class="carousel-btn next" onclick="event.stopPropagation(); changeImage('${{carouselId}}', 1)">→</button>
			
 
				+                            <div class="carousel-indicators">
			
 
				+                                ${{images.map((_, i) => `<span class="dot ${{i === 0 ? 'active' : ''}}" onclick="event.stopPropagation(); goToImage('${{carouselId}}', ${{i}})"></span>`).join('')}}
			
 
				+                            </div>
			
 
				+                            <span class="image-counter">1/${{images.length}}</span>
			
 
				+                        ` : ''}}
			
 
				+                    </div>
			
 
				+                    <div class="note-info">
			
 
				+                        <div class="note-title">${{title}}</div>
			
 
				+                        <div class="note-meta">
			
 
				+                            <span class="note-type type-${{noteType}}">
			
 
				+                                ${{noteType === 'video' ? '🎬 视频' : '📷 图文'}}
			
 
				+                            </span>
			
 
				+                            <div class="note-author">
			
 
				+                                ${{userAvatar ? `<img class="author-avatar" src="${{userAvatar}}" alt="${{userName}}">` : ''}}
			
 
				+                                <span>${{userName}}</span>
			
 
				+                            </div>
			
 
				+                        </div>
			
 
				+                    </div>
			
 
				+                    ${{evalSection}}
			
 
				+                </div>
			
 
				+            `;
			
 
				+        }}
			
 
				+
			
 
				+        // 图片轮播逻辑
			
 
				+        const carouselStates = {{}};
			
 
				+
			
 
				+        function changeImage(carouselId, direction) {{
			
 
				+            if (!carouselStates[carouselId]) {{
			
 
				+                carouselStates[carouselId] = {{ currentIndex: 0 }};
			
 
				+            }}
			
 
				+
			
 
				+            const carousel = document.getElementById(carouselId);
			
 
				+            const imagesContainer = carousel.querySelector('.carousel-images');
			
 
				+            const images = carousel.querySelectorAll('.carousel-image');
			
 
				+            const dots = carousel.querySelectorAll('.dot');
			
 
				+            const counter = carousel.querySelector('.image-counter');
			
 
				+
			
 
				+            let newIndex = carouselStates[carouselId].currentIndex + direction;
			
 
				+            if (newIndex < 0) newIndex = images.length - 1;
			
 
				+            if (newIndex >= images.length) newIndex = 0;
			
 
				+
			
 
				+            carouselStates[carouselId].currentIndex = newIndex;
			
 
				+            imagesContainer.style.transform = `translateX(-${{newIndex * 100}}%)`;
			
 
				+
			
 
				+            // 更新指示器
			
 
				+            dots.forEach((dot, i) => {{
			
 
				+                dot.classList.toggle('active', i === newIndex);
			
 
				+            }});
			
 
				+
			
 
				+            // 更新计数器
			
 
				+            if (counter) {{
			
 
				+                counter.textContent = `${{newIndex + 1}}/${{images.length}}`;
			
 
				+            }}
			
 
				+        }}
			
 
				+
			
 
				+        function goToImage(carouselId, index) {{
			
 
				+            if (!carouselStates[carouselId]) {{
			
 
				+                carouselStates[carouselId] = {{ currentIndex: 0 }};
			
 
				+            }}
			
 
				+
			
 
				+            const carousel = document.getElementById(carouselId);
			
 
				+            const imagesContainer = carousel.querySelector('.carousel-images');
			
 
				+            const dots = carousel.querySelectorAll('.dot');
			
 
				+            const counter = carousel.querySelector('.image-counter');
			
 
				+
			
 
				+            carouselStates[carouselId].currentIndex = index;
			
 
				+            imagesContainer.style.transform = `translateX(-${{index * 100}}%)`;
			
 
				+
			
 
				+            // 更新指示器
			
 
				+            dots.forEach((dot, i) => {{
			
 
				+                dot.classList.toggle('active', i === index);
			
 
				+            }});
			
 
				+
			
 
				+            // 更新计数器
			
 
				+            if (counter) {{
			
 
				+                counter.textContent = `${{index + 1}}/${{dots.length}}`;
			
 
				+            }}
			
 
				+        }}
			
 
				+
			
 
				+        // 展开/折叠特征组
			
 
				+        function toggleFeature(featureIdx) {{
			
 
				+            const searchWordsList = document.getElementById(`search-words-${{featureIdx}}`);
			
 
				+            const featureHeader = document.getElementById(`feature-header-${{featureIdx}}`);
			
 
				+
			
 
				+            searchWordsList.classList.toggle('expanded');
			
 
				+            featureHeader.classList.toggle('active');
			
 
				+        }}
			
 
				+
			
 
				+        // 展开/折叠base word分组
			
 
				+        function toggleBaseWord(featureIdx, groupIdx) {{
			
 
				+            const baseWordHeader = document.getElementById(`base-word-header-${{featureIdx}}-${{groupIdx}}`);
			
 
				+            const baseWordDesc = document.getElementById(`base-word-desc-${{featureIdx}}-${{groupIdx}}`);
			
 
				+            const searchWordsSublist = document.getElementById(`search-words-sublist-${{featureIdx}}-${{groupIdx}}`);
			
 
				+
			
 
				+            baseWordHeader.classList.toggle('active');
			
 
				+            baseWordDesc.classList.toggle('expanded');
			
 
				+            searchWordsSublist.classList.toggle('expanded');
			
 
				+        }}
			
 
				+
			
 
				+        // 滚动到指定结果块
			
 
				+        function scrollToBlock(blockId) {{
			
 
				+            const block = document.getElementById(blockId);
			
 
				+            if (block) {{
			
 
				+                block.scrollIntoView({{ behavior: 'smooth', block: 'start' }});
			
 
				+
			
 
				+                // 高亮对应的搜索词
			
 
				+                document.querySelectorAll('.search-word-item').forEach(item => {{
			
 
				+                    item.classList.remove('active');
			
 
				+                }});
			
 
				+
			
 
				+                document.querySelectorAll(`[data-block-id="${{blockId}}"]`).forEach(item => {{
			
 
				+                    item.classList.add('active');
			
 
				+                }});
			
 
				+            }}
			
 
				+        }}
			
 
				+
			
 
				+        // 切换评估详情
			
 
				+        function toggleEvalDetails(carouselId) {{
			
 
				+            const details = document.getElementById(`${{carouselId}}-details`);
			
 
				+            const toggle = document.getElementById(`${{carouselId}}-toggle`);
			
 
				+
			
 
				+            if (details && toggle) {{
			
 
				+                details.classList.toggle('expanded');
			
 
				+                toggle.textContent = details.classList.contains('expanded') ? '▲ 收起' : '▼ 详情';
			
 
				+            }}
			
 
				+        }}
			
 
				+
			
 
				+        // 过滤帖子
			
 
				+        function filterNotes(category) {{
			
 
				+            currentFilter = category;
			
 
				+
			
 
				+            // 更新按钮状态
			
 
				+            document.querySelectorAll('.filter-btn').forEach(btn => {{
			
 
				+                btn.classList.remove('active');
			
 
				+            }});
			
 
				+            event.target.classList.add('active');
			
 
				+
			
 
				+            // 过滤帖子卡片
			
 
				+            document.querySelectorAll('.note-card').forEach(card => {{
			
 
				+                const evalCategory = card.getAttribute('data-eval-category');
			
 
				+                if (category === 'all' || evalCategory === category) {{
			
 
				+                    card.classList.remove('hidden');
			
 
				+                }} else {{
			
 
				+                    card.classList.add('hidden');
			
 
				+                }}
			
 
				+            }});
			
 
				+
			
 
				+            // 隐藏空的结果块
			
 
				+            document.querySelectorAll('.result-block').forEach(block => {{
			
 
				+                const visibleCards = block.querySelectorAll('.note-card:not(.hidden)');
			
 
				+                if (visibleCards.length === 0) {{
			
 
				+                    block.classList.add('hidden');
			
 
				+                }} else {{
			
 
				+                    block.classList.remove('hidden');
			
 
				+                }}
			
 
				+            }});
			
 
				+        }}
			
 
				+
			
 
				+        // 打开小红书帖子
			
 
				+        function openNote(noteId) {{
			
 
				+            if (noteId) {{
			
 
				+                window.open(`https://www.xiaohongshu.com/explore/${{noteId}}`, '_blank');
			
 
				+            }}
			
 
				+        }}
			
 
				+
			
 
				+        // 初始化
			
 
				+        document.addEventListener('DOMContentLoaded', () => {{
			
 
				+            renderLeftSidebar();
			
 
				+            renderRightContent();
			
 
				+
			
 
				+            // 默认展开第一个特征组和第一个base_word
			
 
				+            if (data.length > 0) {{
			
 
				+                toggleFeature(0);
			
 
				+
			
 
				+                // 展开第一个base_word分组
			
 
				+                const firstGroups = data[0]['组合评估结果_分组'];
			
 
				+                if (firstGroups && firstGroups.length > 0) {{
			
 
				+                    toggleBaseWord(0, 0);
			
 
				+                }}
			
 
				+            }}
			
 
				+        }});
			
 
				+    </script>
			
 
				+</body>
			
 
				+</html>
			
 
				+'''
			
 
				+
			
 
				+    # 写入文件
			
 
				+    with open(output_path, 'w', encoding='utf-8') as f:
			
 
				+        f.write(html_content)
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    """主函数"""
			
 
				+    # 配置路径 - 使用项目根目录
			
 
				+    script_dir = os.path.dirname(os.path.abspath(__file__))
			
 
				+    # 从 src/visualizers/ 向上回溯两级到项目根目录
			
 
				+    project_root = os.path.dirname(os.path.dirname(script_dir))
			
 
				+    json_path = os.path.join(project_root, 'output_v2', 'evaluated_results.json')
			
 
				+    output_dir = os.path.join(project_root, 'visualization')
			
 
				+    os.makedirs(output_dir, exist_ok=True)
			
 
				+
			
 
				+    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
			
 
				+    output_path = os.path.join(output_dir, f'search_results_interactive_{timestamp}.html')
			
 
				+
			
 
				+    # 加载数据
			
 
				+    print(f"📖 加载数据: {json_path}")
			
 
				+    data = load_data(json_path)
			
 
				+    print(f"✓ 加载了 {len(data)} 个原始特征")
			
 
				+
			
 
				+    # 计算统计
			
 
				+    print("📊 计算统计数据...")
			
 
				+    stats = calculate_statistics(data)
			
 
				+    print(f"✓ 统计完成:")
			
 
				+    print(f"  - 原始特征: {stats['total_features']}")
			
 
				+    print(f"  - 搜索词总数: {stats['total_search_words']}")
			
 
				+    print(f"  - 已搜索: {stats['searched_count']} ({stats['searched_percentage']}%)")
			
 
				+    print(f"  - 未搜索: {stats['not_searched_count']}")
			
 
				+    print(f"  - 帖子总数: {stats['total_notes']}")
			
 
				+    print(f"  - 视频: {stats['video_count']} ({stats['video_percentage']}%)")
			
 
				+    print(f"  - 图文: {stats['normal_count']} ({stats['normal_percentage']}%)")
			
 
				+    print(f"\n  评估结果:")
			
 
				+    print(f"  - 已评估: {stats['total_evaluated']}")
			
 
				+    print(f"  - 已过滤: {stats['total_filtered']} ({stats['filter_rate']}%)")
			
 
				+    print(f"  - 完全匹配: {stats['match_complete']} ({stats['complete_rate']}%)")
			
 
				+    print(f"  - 相似匹配: {stats['match_similar']} ({stats['similar_rate']}%)")
			
 
				+    print(f"  - 弱相似: {stats['match_weak']}")
			
 
				+    print(f"  - 无匹配: {stats['match_none']}")
			
 
				+
			
 
				+    # 生成HTML
			
 
				+    print(f"\n🎨 生成可视化页面...")
			
 
				+    generate_html(data, stats, output_path)
			
 
				+    print(f"✓ 生成完成: {output_path}")
			
 
				+
			
 
				+    # 打印访问提示
			
 
				+    print(f"\n🌐 在浏览器中打开查看:")
			
 
				+    print(f"   file://{output_path}")
			
 
				+
			
 
				+    return output_path
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    main()
			
--- a/tools/analyze_associations.py
+++ b/tools/analyze_associations.py
@@ -0,0 +1,255 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+分析 dimension_associations_analysis.json 中的关联关系
			
 
				+"""
			
 
				+import json
			
 
				+from collections import defaultdict, Counter
			
 
				+from typing import Dict, List, Any
			
 
				+
			
 
				+
			
 
				+def load_data(file_path: str) -> Dict:
			
 
				+    """加载JSON数据"""
			
 
				+    with open(file_path, 'r', encoding='utf-8') as f:
			
 
				+        return json.load(f)
			
 
				+
			
 
				+
			
 
				+def analyze_basic_info(data: Dict) -> None:
			
 
				+    """分析基本信息"""
			
 
				+    print("=" * 80)
			
 
				+    print("📊 基本信息分析")
			
 
				+    print("=" * 80)
			
 
				+
			
 
				+    info = data.get("分析说明", {})
			
 
				+    print(f"\n分析类型: {', '.join(info.get('分析类型', []))}")
			
 
				+    print(f"最小共同帖子数: {info.get('最小共同帖子数', 0)}")
			
 
				+    print(f"\n维度统计:")
			
 
				+    print(f"  灵感点: {info.get('灵感点分类数（全部）', 0)} 个分类 (非一级: {info.get('灵感点非一级分类数', 0)})")
			
 
				+    print(f"  目的点: {info.get('目的点分类数（全部）', 0)} 个分类 (非一级: {info.get('目的点非一级分类数', 0)})")
			
 
				+    print(f"  关键点: {info.get('关键点分类数（全部）', 0)} 个分类 (非一级: {info.get('关键点非一级分类数', 0)})")
			
 
				+
			
 
				+
			
 
				+def analyze_single_dimension(data: Dict) -> None:
			
 
				+    """分析单维度关联"""
			
 
				+    print("\n" + "=" * 80)
			
 
				+    print("🔗 单维度关联分析")
			
 
				+    print("=" * 80)
			
 
				+
			
 
				+    single_dim = data.get("单维度关联分析", {})
			
 
				+
			
 
				+    for dimension_name, dimension_data in single_dim.items():
			
 
				+        print(f"\n【{dimension_name}】")
			
 
				+        print(f"说明: {dimension_data.get('说明', '')}")
			
 
				+
			
 
				+        # 统计每种关联方向
			
 
				+        for direction, associations in dimension_data.items():
			
 
				+            if direction == "说明":
			
 
				+                continue
			
 
				+
			
 
				+            print(f"\n  {direction}:")
			
 
				+
			
 
				+            # 统计总体情况
			
 
				+            total_sources = len(associations)
			
 
				+            total_associations = 0
			
 
				+            high_similarity = []  # 高相似度关联
			
 
				+            high_overlap = []  # 高重叠系数关联
			
 
				+
			
 
				+            for source_name, source_data in associations.items():
			
 
				+                assoc_list = source_data.get("与目的点的关联", []) or \
			
 
				+                            source_data.get("与关键点的关联", []) or \
			
 
				+                            source_data.get("与灵感点的关联", [])
			
 
				+
			
 
				+                total_associations += len(assoc_list)
			
 
				+
			
 
				+                # 找出高相似度和高重叠系数的关联
			
 
				+                for assoc in assoc_list:
			
 
				+                    jaccard = assoc.get("Jaccard相似度", 0)
			
 
				+                    overlap = assoc.get("重叠系数", 0)
			
 
				+
			
 
				+                    if jaccard >= 0.5:
			
 
				+                        high_similarity.append({
			
 
				+                            "源": source_name,
			
 
				+                            "目标": assoc.get("目标分类", ""),
			
 
				+                            "Jaccard": jaccard,
			
 
				+                            "共同帖子数": assoc.get("共同帖子数", 0)
			
 
				+                        })
			
 
				+
			
 
				+                    if overlap >= 0.8:
			
 
				+                        high_overlap.append({
			
 
				+                            "源": source_name,
			
 
				+                            "目标": assoc.get("目标分类", ""),
			
 
				+                            "重叠系数": overlap,
			
 
				+                            "共同帖子数": assoc.get("共同帖子数", 0)
			
 
				+                        })
			
 
				+
			
 
				+            print(f"    总源分类数: {total_sources}")
			
 
				+            print(f"    总关联数: {total_associations}")
			
 
				+            print(f"    平均每个源分类的关联数: {total_associations/total_sources:.2f}" if total_sources > 0 else "    平均每个源分类的关联数: 0")
			
 
				+
			
 
				+            if high_similarity:
			
 
				+                print(f"\n    🔥 高相似度关联 (Jaccard >= 0.5): {len(high_similarity)} 个")
			
 
				+                for item in sorted(high_similarity, key=lambda x: x["Jaccard"], reverse=True)[:5]:
			
 
				+                    print(f"       • {item['源']} → {item['目标']}")
			
 
				+                    print(f"         Jaccard: {item['Jaccard']:.4f}, 共同帖子: {item['共同帖子数']}")
			
 
				+
			
 
				+            if high_overlap:
			
 
				+                print(f"\n    🎯 高重叠系数关联 (重叠 >= 0.8): {len(high_overlap)} 个")
			
 
				+                for item in sorted(high_overlap, key=lambda x: x["重叠系数"], reverse=True)[:5]:
			
 
				+                    print(f"       • {item['源']} → {item['目标']}")
			
 
				+                    print(f"         重叠系数: {item['重叠系数']:.4f}, 共同帖子: {item['共同帖子数']}")
			
 
				+
			
 
				+
			
 
				+def analyze_triple_dimension(data: Dict) -> None:
			
 
				+    """分析三维正交关联"""
			
 
				+    print("\n" + "=" * 80)
			
 
				+    print("🎲 三维正交关联分析")
			
 
				+    print("=" * 80)
			
 
				+
			
 
				+    triple_dim = data.get("三维正交关联分析", {})
			
 
				+
			
 
				+    if not triple_dim:
			
 
				+        print("未找到三维正交关联数据")
			
 
				+        return
			
 
				+
			
 
				+    # 按灵感点分类组织
			
 
				+    total_inspiration_classes = len(triple_dim)
			
 
				+    total_orthogonal_combinations = 0
			
 
				+    all_combinations = []
			
 
				+
			
 
				+    print(f"\n灵感点分类数: {total_inspiration_classes}")
			
 
				+
			
 
				+    for inspiration_class, inspiration_data in triple_dim.items():
			
 
				+        orthogonal_list = inspiration_data.get("正交关联", [])
			
 
				+        total_orthogonal_combinations += len(orthogonal_list)
			
 
				+
			
 
				+        for combo in orthogonal_list:
			
 
				+            all_combinations.append({
			
 
				+                "灵感点": inspiration_class,
			
 
				+                "目的点": combo.get("目的点分类", ""),
			
 
				+                "关键点": combo.get("关键点分类", ""),
			
 
				+                "三维共同帖子数": combo.get("三维共同帖子数", 0),
			
 
				+                "三维交集占灵感点比例": combo.get("三维交集占灵感点比例", 0),
			
 
				+                "三维交集占目的点比例": combo.get("三维交集占目的点比例", 0),
			
 
				+                "三维交集占关键点比例": combo.get("三维交集占关键点比例", 0),
			
 
				+                "共同帖子ID": combo.get("三维共同帖子ID", [])
			
 
				+            })
			
 
				+
			
 
				+    print(f"总正交组合数: {total_orthogonal_combinations}")
			
 
				+    print(f"平均每个灵感点的正交组合数: {total_orthogonal_combinations/total_inspiration_classes:.2f}" if total_inspiration_classes > 0 else "平均每个灵感点的正交组合数: 0")
			
 
				+
			
 
				+    if all_combinations:
			
 
				+        post_counts = [c["三维共同帖子数"] for c in all_combinations]
			
 
				+        print(f"\n正交组合帖子数统计:")
			
 
				+        print(f"  平均值: {sum(post_counts)/len(post_counts):.2f}")
			
 
				+        print(f"  最大值: {max(post_counts)}")
			
 
				+        print(f"  最小值: {min(post_counts)}")
			
 
				+
			
 
				+        # 高频组合
			
 
				+        high_post_combinations = [c for c in all_combinations if c["三维共同帖子数"] >= 2]
			
 
				+
			
 
				+        if high_post_combinations:
			
 
				+            print(f"\n🌟 高频三维正交组合 (三维共同帖子数 >= 2): {len(high_post_combinations)} 个")
			
 
				+            for combo in sorted(high_post_combinations, key=lambda x: x["三维共同帖子数"], reverse=True)[:10]:
			
 
				+                print(f"\n  三维共同帖子数: {combo['三维共同帖子数']}")
			
 
				+                print(f"    灵感点: {combo['灵感点']}")
			
 
				+                print(f"    目的点: {combo['目的点']}")
			
 
				+                print(f"    关键点: {combo['关键点']}")
			
 
				+                print(f"    交集占比 - 灵感:{combo['三维交集占灵感点比例']:.2f} 目的:{combo['三维交集占目的点比例']:.2f} 关键:{combo['三维交集占关键点比例']:.2f}")
			
 
				+
			
 
				+        # 高交集占比组合
			
 
				+        high_ratio_combinations = [c for c in all_combinations if
			
 
				+                                   c["三维交集占灵感点比例"] >= 0.5 and
			
 
				+                                   c["三维交集占目的点比例"] >= 0.5 and
			
 
				+                                   c["三维交集占关键点比例"] >= 0.5]
			
 
				+        if high_ratio_combinations:
			
 
				+            print(f"\n🔥 高交集占比正交组合 (三维度占比均 >= 0.5): {len(high_ratio_combinations)} 个")
			
 
				+            for combo in sorted(high_ratio_combinations, key=lambda x: x["三维共同帖子数"], reverse=True)[:5]:
			
 
				+                print(f"\n  三维共同帖子数: {combo['三维共同帖子数']}")
			
 
				+                print(f"    灵感点: {combo['灵感点']}")
			
 
				+                print(f"    目的点: {combo['目的点']}")
			
 
				+                print(f"    关键点: {combo['关键点']}")
			
 
				+                print(f"    交集占比 - 灵感:{combo['三维交集占灵感点比例']:.2f} 目的:{combo['三维交集占目的点比例']:.2f} 关键:{combo['三维交集占关键点比例']:.2f}")
			
 
				+
			
 
				+
			
 
				+def analyze_association_strength(data: Dict) -> None:
			
 
				+    """分析关联强度分布"""
			
 
				+    print("\n" + "=" * 80)
			
 
				+    print("📈 关联强度分布分析")
			
 
				+    print("=" * 80)
			
 
				+
			
 
				+    single_dim = data.get("单维度关联分析", {})
			
 
				+
			
 
				+    all_jaccard = []
			
 
				+    all_overlap = []
			
 
				+    all_coverage_source = []
			
 
				+    all_coverage_target = []
			
 
				+
			
 
				+    for dimension_name, dimension_data in single_dim.items():
			
 
				+        for direction, associations in dimension_data.items():
			
 
				+            if direction == "说明":
			
 
				+                continue
			
 
				+
			
 
				+            for source_name, source_data in associations.items():
			
 
				+                assoc_list = source_data.get("与目的点的关联", []) or \
			
 
				+                            source_data.get("与关键点的关联", []) or \
			
 
				+                            source_data.get("与灵感点的关联", [])
			
 
				+
			
 
				+                for assoc in assoc_list:
			
 
				+                    all_jaccard.append(assoc.get("Jaccard相似度", 0))
			
 
				+                    all_overlap.append(assoc.get("重叠系数", 0))
			
 
				+
			
 
				+                    # 根据direction确定覆盖率字段
			
 
				+                    if "灵感点→" in direction:
			
 
				+                        all_coverage_source.append(assoc.get("灵感点覆盖率", 0))
			
 
				+                    elif "目的点→" in direction:
			
 
				+                        all_coverage_source.append(assoc.get("目的点覆盖率", 0))
			
 
				+                    elif "关键点→" in direction:
			
 
				+                        all_coverage_source.append(assoc.get("关键点覆盖率", 0))
			
 
				+
			
 
				+                    all_coverage_target.append(assoc.get("目标维度覆盖率", 0))
			
 
				+
			
 
				+    if all_jaccard:
			
 
				+        print(f"\nJaccard相似度分布:")
			
 
				+        print(f"  平均值: {sum(all_jaccard)/len(all_jaccard):.4f}")
			
 
				+        print(f"  中位数: {sorted(all_jaccard)[len(all_jaccard)//2]:.4f}")
			
 
				+        print(f"  最大值: {max(all_jaccard):.4f}")
			
 
				+        print(f"  最小值: {min(all_jaccard):.4f}")
			
 
				+
			
 
				+        # 分段统计
			
 
				+        ranges = [(0, 0.2), (0.2, 0.4), (0.4, 0.6), (0.6, 0.8), (0.8, 1.0)]
			
 
				+        for low, high in ranges:
			
 
				+            count = sum(1 for j in all_jaccard if low <= j < high)
			
 
				+            pct = count / len(all_jaccard) * 100
			
 
				+            print(f"  [{low:.1f}, {high:.1f}): {count} ({pct:.1f}%)")
			
 
				+
			
 
				+    if all_overlap:
			
 
				+        print(f"\n重叠系数分布:")
			
 
				+        print(f"  平均值: {sum(all_overlap)/len(all_overlap):.4f}")
			
 
				+        print(f"  中位数: {sorted(all_overlap)[len(all_overlap)//2]:.4f}")
			
 
				+        print(f"  最大值: {max(all_overlap):.4f}")
			
 
				+        print(f"  最小值: {min(all_overlap):.4f}")
			
 
				+
			
 
				+        # 统计完全重叠(1.0)的数量
			
 
				+        perfect_overlap = sum(1 for o in all_overlap if o == 1.0)
			
 
				+        print(f"  完全重叠(1.0): {perfect_overlap} ({perfect_overlap/len(all_overlap)*100:.1f}%)")
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    file_path = "/Users/liulidong/project/pattern相关文件/optimization/dimension_associations_analysis.json"
			
 
				+
			
 
				+    print("🔍 加载数据...")
			
 
				+    data = load_data(file_path)
			
 
				+
			
 
				+    # 执行各项分析
			
 
				+    analyze_basic_info(data)
			
 
				+    analyze_single_dimension(data)
			
 
				+    analyze_triple_dimension(data)
			
 
				+    analyze_association_strength(data)
			
 
				+
			
 
				+    print("\n" + "=" * 80)
			
 
				+    print("✅ 分析完成！")
			
 
				+    print("=" * 80)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/tools/analyze_content_types.py
+++ b/tools/analyze_content_types.py
@@ -0,0 +1,100 @@
 
				+"""
			
 
				+分析搜索结果中的内容类型分布（视频 vs 图文）
			
 
				+"""
			
 
				+
			
 
				+import json
			
 
				+from collections import Counter
			
 
				+from typing import Dict, Any, List
			
 
				+
			
 
				+
			
 
				+def analyze_content_types(stage6_path: str):
			
 
				+    """分析 Stage6 搜索结果中的内容类型"""
			
 
				+
			
 
				+    # 加载数据
			
 
				+    with open(stage6_path, 'r', encoding='utf-8') as f:
			
 
				+        stage6_data = json.load(f)
			
 
				+
			
 
				+    print("=" * 80)
			
 
				+    print("Stage6 搜索结果内容类型分析")
			
 
				+    print("=" * 80)
			
 
				+
			
 
				+    # 收集所有搜索结果的内容类型
			
 
				+    content_type_counter = Counter()
			
 
				+    feature_content_types = {}  # 原始特征 -> 内容类型分布
			
 
				+
			
 
				+    total_searches = 0
			
 
				+    total_notes = 0
			
 
				+
			
 
				+    for original_feature in stage6_data:
			
 
				+        feature_name = original_feature['原始特征名称']
			
 
				+        feature_types = Counter()
			
 
				+
			
 
				+        for association in original_feature.get('找到的关联', []):
			
 
				+            for feature in association.get('特征列表', []):
			
 
				+                search_result = feature.get('search_result')
			
 
				+
			
 
				+                if search_result:
			
 
				+                    total_searches += 1
			
 
				+
			
 
				+                    # 提取帖子数据
			
 
				+                    notes = search_result.get('data', {}).get('data', [])
			
 
				+                    total_notes += len(notes)
			
 
				+
			
 
				+                    for note in notes:
			
 
				+                        note_card = note.get('note_card', {})
			
 
				+                        note_type = note_card.get('type', 'unknown')
			
 
				+
			
 
				+                        content_type_counter[note_type] += 1
			
 
				+                        feature_types[note_type] += 1
			
 
				+
			
 
				+        if feature_types:
			
 
				+            feature_content_types[feature_name] = feature_types
			
 
				+
			
 
				+    # 打印总体统计
			
 
				+    print(f"\n📊 总体统计:")
			
 
				+    print(f"  已执行搜索: {total_searches} 次")
			
 
				+    print(f"  总帖子数: {total_notes} 个")
			
 
				+
			
 
				+    print(f"\n📋 内容类型分布:")
			
 
				+    for content_type, count in content_type_counter.most_common():
			
 
				+        percentage = count / total_notes * 100
			
 
				+        print(f"  {content_type}: {count} 个 ({percentage:.1f}%)")
			
 
				+
			
 
				+    # 打印各特征的内容类型分布
			
 
				+    print(f"\n📊 各原始特征的内容类型分布:")
			
 
				+    for feature_name, types in feature_content_types.items():
			
 
				+        total_feature_notes = sum(types.values())
			
 
				+        print(f"\n  【{feature_name}】 共 {total_feature_notes} 个帖子")
			
 
				+
			
 
				+        for content_type, count in types.most_common():
			
 
				+            percentage = count / total_feature_notes * 100
			
 
				+            print(f"    {content_type}: {count} 个 ({percentage:.1f}%)")
			
 
				+
			
 
				+    # 分析视频占比
			
 
				+    video_count = content_type_counter.get('video', 0)
			
 
				+    normal_count = content_type_counter.get('normal', 0)  # 图文类型
			
 
				+
			
 
				+    print(f"\n🎯 关键发现:")
			
 
				+    if video_count > 0:
			
 
				+        video_ratio = video_count / total_notes * 100
			
 
				+        print(f"  ⚠️  发现 {video_count} 个视频帖子 (占比 {video_ratio:.1f}%)")
			
 
				+        print(f"  ✓ 图文帖子: {normal_count} 个 (占比 {normal_count/total_notes*100:.1f}%)")
			
 
				+        print(f"\n  问题原因分析:")
			
 
				+        print(f"    - 小红书 API 的 content_type='图文' 参数可能未被严格遵守")
			
 
				+        print(f"    - 或者 API 返回混合类型的内容")
			
 
				+        print(f"    - 建议在客户端侧添加内容类型过滤")
			
 
				+    else:
			
 
				+        print(f"  ✓ 未发现视频内容，全部为图文")
			
 
				+
			
 
				+    print("\n" + "=" * 80)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    import sys
			
 
				+
			
 
				+    stage6_path = 'output_v2/stage6_with_evaluations.json'
			
 
				+
			
 
				+    if len(sys.argv) > 1:
			
 
				+        stage6_path = sys.argv[1]
			
 
				+
			
 
				+    analyze_content_types(stage6_path)
			
--- a/tools/analyze_feature_matches.py
+++ b/tools/analyze_feature_matches.py
@@ -0,0 +1,202 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+统计 how 解构文件中所有原始特征匹配到的分类/标签及其路径
			
 
				+"""
			
 
				+
			
 
				+import json
			
 
				+from collections import defaultdict
			
 
				+from typing import Dict, List, Set, Any
			
 
				+
			
 
				+
			
 
				+def build_classification_path(classification_list: List[str]) -> str:
			
 
				+    """构建分类路径字符串"""
			
 
				+    if not classification_list:
			
 
				+        return ""
			
 
				+    return "/".join(classification_list)
			
 
				+
			
 
				+
			
 
				+def analyze_feature_matches(json_file_path: str) -> Dict[str, Any]:
			
 
				+    """
			
 
				+    分析文件中所有原始特征的匹配情况
			
 
				+
			
 
				+    返回结构：
			
 
				+    {
			
 
				+        "原始特征1": {
			
 
				+            "匹配的分类标签": [
			
 
				+                {
			
 
				+                    "名称": "...",
			
 
				+                    "类型": "标签/分类",
			
 
				+                    "路径": "...",
			
 
				+                    "层级": "...",
			
 
				+                    "相似度": 0.xxx
			
 
				+                }
			
 
				+            ],
			
 
				+            "统计": {
			
 
				+                "总匹配数": xxx,
			
 
				+                "高相似度匹配数(>0.8)": xxx,
			
 
				+                "中等相似度匹配数(0.5-0.8)": xxx,
			
 
				+                "低相似度匹配数(<0.5)": xxx
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+    """
			
 
				+
			
 
				+    # 读取JSON文件
			
 
				+    with open(json_file_path, 'r', encoding='utf-8') as f:
			
 
				+        data = json.load(f)
			
 
				+
			
 
				+    # 存储结果
			
 
				+    feature_matches = defaultdict(lambda: {
			
 
				+        "匹配的分类标签": [],
			
 
				+        "统计": {
			
 
				+            "高相似度匹配数(>=0.8)": 0
			
 
				+        }
			
 
				+    })
			
 
				+
			
 
				+    # 遍历 how解构结果
			
 
				+    how_result = data.get('how解构结果', {})
			
 
				+
			
 
				+    # 处理三种列表：灵感点列表、目的点列表、关键点列表
			
 
				+    for level_name in ['灵感点列表', '目的点列表', '关键点列表']:
			
 
				+        level_list = how_result.get(level_name, [])
			
 
				+
			
 
				+        for item in level_list:
			
 
				+            # 遍历how步骤列表
			
 
				+            for step in item.get('how步骤列表', []):
			
 
				+                # 遍历每个步骤中的特征
			
 
				+                for feature in step.get('特征列表', []):
			
 
				+                    feature_name = feature.get('特征名称', '')
			
 
				+                    matches = feature.get('匹配结果', [])
			
 
				+
			
 
				+                    if not feature_name:
			
 
				+                        continue
			
 
				+
			
 
				+                    # 处理每个匹配结果
			
 
				+                    for match in matches:
			
 
				+                        persona_feature_name = match.get('人设特征名称', '')
			
 
				+                        feature_type = match.get('特征类型', '')
			
 
				+                        classification_list = match.get('特征分类', [])
			
 
				+                        feature_level = match.get('人设特征层级', '')
			
 
				+                        similarity = match.get('匹配结果', {}).get('相似度', 0)
			
 
				+
			
 
				+                        # 只保留相似度>=0.8的匹配
			
 
				+                        if similarity < 0.8:
			
 
				+                            continue
			
 
				+
			
 
				+                        # 构建路径
			
 
				+                        path = build_classification_path(classification_list)
			
 
				+
			
 
				+                        # 添加到结果
			
 
				+                        match_info = {
			
 
				+                            "名称": persona_feature_name,
			
 
				+                            "类型": feature_type,
			
 
				+                            "路径": path,
			
 
				+                            "层级": feature_level,
			
 
				+                            "相似度": round(similarity, 3)
			
 
				+                        }
			
 
				+
			
 
				+                        feature_matches[feature_name]["匹配的分类标签"].append(match_info)
			
 
				+
			
 
				+                        # 更新统计
			
 
				+                        stats = feature_matches[feature_name]["统计"]
			
 
				+                        stats["高相似度匹配数(>=0.8)"] += 1
			
 
				+
			
 
				+    # 对每个原始特征的匹配结果按相似度降序排序
			
 
				+    for feature_name in feature_matches:
			
 
				+        feature_matches[feature_name]["匹配的分类标签"].sort(
			
 
				+            key=lambda x: x["相似度"],
			
 
				+            reverse=True
			
 
				+        )
			
 
				+
			
 
				+    return dict(feature_matches)
			
 
				+
			
 
				+
			
 
				+def print_summary(results: Dict[str, Any]):
			
 
				+    """打印统计摘要"""
			
 
				+    print("=" * 80)
			
 
				+    print("原始特征匹配统计摘要（仅相似度>=0.8）")
			
 
				+    print("=" * 80)
			
 
				+
			
 
				+    total_features = len(results)
			
 
				+    # 统计有匹配的特征数
			
 
				+    features_with_matches = sum(1 for data in results.values() if data["统计"]["高相似度匹配数(>=0.8)"] > 0)
			
 
				+
			
 
				+    print(f"\n总原始特征数: {total_features}")
			
 
				+    print(f"有高相似度匹配的特征数: {features_with_matches}")
			
 
				+    print(f"无匹配的特征数: {total_features - features_with_matches}")
			
 
				+
			
 
				+    # 统计总体数据
			
 
				+    total_matches = 0
			
 
				+
			
 
				+    for feature_name, data in results.items():
			
 
				+        stats = data["统计"]
			
 
				+        total_matches += stats["高相似度匹配数(>=0.8)"]
			
 
				+
			
 
				+    print(f"\n总高相似度匹配数(>=0.8): {total_matches}")
			
 
				+
			
 
				+    print("\n" + "=" * 80)
			
 
				+    print("各原始特征详细匹配情况")
			
 
				+    print("=" * 80)
			
 
				+
			
 
				+
			
 
				+def print_detailed_results(results: Dict[str, Any], top_n: int = None):
			
 
				+    """打印详细结果"""
			
 
				+
			
 
				+    for idx, (feature_name, data) in enumerate(results.items(), 1):
			
 
				+        stats = data["统计"]
			
 
				+        matches = data["匹配的分类标签"]
			
 
				+        match_count = stats['高相似度匹配数(>=0.8)']
			
 
				+
			
 
				+        # 跳过没有匹配的特征
			
 
				+        if match_count == 0:
			
 
				+            continue
			
 
				+
			
 
				+        print(f"\n[{idx}] 原始特征: {feature_name}")
			
 
				+        print(f"    高相似度匹配数(>=0.8): {match_count}")
			
 
				+
			
 
				+        # 显示所有匹配（如果指定了top_n则只显示前N个）
			
 
				+        display_matches = matches[:top_n] if top_n else matches
			
 
				+        print(f"    匹配列表（共{len(display_matches)}个）:")
			
 
				+        for i, match in enumerate(display_matches, 1):
			
 
				+            print(f"      {i}. {match['名称']} ({match['相似度']:.3f})")
			
 
				+            print(f"         类型: {match['类型']}, 层级: {match['层级']}")
			
 
				+            if match['路径']:
			
 
				+                print(f"         路径: {match['路径']}")
			
 
				+            else:
			
 
				+                print(f"         路径: (顶级分类)")
			
 
				+
			
 
				+
			
 
				+def save_results(results: Dict[str, Any], output_file: str):
			
 
				+    """保存结果到JSON文件"""
			
 
				+    with open(output_file, 'w', encoding='utf-8') as f:
			
 
				+        json.dump(results, f, ensure_ascii=False, indent=2)
			
 
				+    print(f"\n详细结果已保存到: {output_file}")
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    # 输入文件路径
			
 
				+    input_file = "/Users/liulidong/project/pattern相关文件/optimization/690d977d0000000007036331_how.json"
			
 
				+
			
 
				+    # 输出文件路径
			
 
				+    output_file = "/Users/liulidong/project/pattern相关文件/optimization/feature_matches_analysis.json"
			
 
				+
			
 
				+    print("开始分析特征匹配...")
			
 
				+
			
 
				+    # 分析
			
 
				+    results = analyze_feature_matches(input_file)
			
 
				+
			
 
				+    # 打印摘要
			
 
				+    print_summary(results)
			
 
				+
			
 
				+    # 打印详细结果（显示所有匹配，不限制数量）
			
 
				+    print_detailed_results(results, top_n=None)
			
 
				+
			
 
				+    # 保存结果
			
 
				+    save_results(results, output_file)
			
 
				+
			
 
				+    print("\n分析完成！")
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/tools/analyze_specific_feature.py
+++ b/tools/analyze_specific_feature.py
@@ -0,0 +1,168 @@
 
				+"""
			
 
				+分析特定原始特征的搜索执行情况
			
 
				+"""
			
 
				+
			
 
				+import json
			
 
				+import sys
			
 
				+from typing import Dict, Any, List
			
 
				+
			
 
				+
			
 
				+def analyze_feature_searches(stage4_path: str, stage6_path: str, feature_name: str):
			
 
				+    """分析指定原始特征的搜索情况"""
			
 
				+
			
 
				+    # 加载数据
			
 
				+    with open(stage4_path, 'r', encoding='utf-8') as f:
			
 
				+        stage4_data = json.load(f)
			
 
				+
			
 
				+    with open(stage6_path, 'r', encoding='utf-8') as f:
			
 
				+        stage6_data = json.load(f)
			
 
				+
			
 
				+    # 找到指定特征
			
 
				+    stage4_feature = None
			
 
				+    stage6_feature = None
			
 
				+
			
 
				+    for item in stage4_data:
			
 
				+        if item['原始特征名称'] == feature_name:
			
 
				+            stage4_feature = item
			
 
				+            break
			
 
				+
			
 
				+    for item in stage6_data:
			
 
				+        if item['原始特征名称'] == feature_name:
			
 
				+            stage6_feature = item
			
 
				+            break
			
 
				+
			
 
				+    if not stage4_feature:
			
 
				+        print(f"❌ 在 Stage4 中未找到特征: {feature_name}")
			
 
				+        return
			
 
				+
			
 
				+    if not stage6_feature:
			
 
				+        print(f"❌ 在 Stage6 中未找到特征: {feature_name}")
			
 
				+        return
			
 
				+
			
 
				+    print("=" * 80)
			
 
				+    print(f"原始特征: {feature_name}")
			
 
				+    print("=" * 80)
			
 
				+
			
 
				+    # 收集 Stage4 的所有搜索词
			
 
				+    stage4_search_words = []
			
 
				+    for association in stage4_feature.get('找到的关联', []):
			
 
				+        assoc_name = association.get('分类名称', '')
			
 
				+        for feature in association.get('特征列表', []):
			
 
				+            search_word = feature.get('search_word')
			
 
				+            llm_eval = feature.get('llm_evaluation', {})
			
 
				+
			
 
				+            if search_word:
			
 
				+                stage4_search_words.append({
			
 
				+                    'search_word': search_word,
			
 
				+                    'association': assoc_name,
			
 
				+                    'feature_name': feature.get('特征名称', ''),
			
 
				+                    'llm_score': llm_eval.get('score'),
			
 
				+                    'llm_rank': llm_eval.get('rank'),
			
 
				+                    'reasoning': llm_eval.get('reasoning', '')
			
 
				+                })
			
 
				+
			
 
				+    # 收集 Stage6 的所有搜索词及其执行状态
			
 
				+    stage6_search_words = []
			
 
				+    for association in stage6_feature.get('找到的关联', []):
			
 
				+        assoc_name = association.get('分类名称', '')
			
 
				+        for feature in association.get('特征列表', []):
			
 
				+            search_word = feature.get('search_word')
			
 
				+            search_result = feature.get('search_result')
			
 
				+            search_metadata = feature.get('search_metadata', {})
			
 
				+            llm_eval = feature.get('llm_evaluation', {})
			
 
				+
			
 
				+            if search_word:
			
 
				+                stage6_search_words.append({
			
 
				+                    'search_word': search_word,
			
 
				+                    'association': assoc_name,
			
 
				+                    'feature_name': feature.get('特征名称', ''),
			
 
				+                    'llm_score': llm_eval.get('score'),
			
 
				+                    'llm_rank': llm_eval.get('rank'),
			
 
				+                    'has_result': search_result is not None,
			
 
				+                    'status': search_metadata.get('status', 'not_searched'),
			
 
				+                    'note_count': search_metadata.get('note_count', 0)
			
 
				+                })
			
 
				+
			
 
				+    # 统计
			
 
				+    total_stage4 = len(stage4_search_words)
			
 
				+    total_stage6 = len(stage6_search_words)
			
 
				+    searched = sum(1 for w in stage6_search_words if w['has_result'])
			
 
				+    not_searched = total_stage6 - searched
			
 
				+
			
 
				+    print(f"\n📊 统计信息:")
			
 
				+    print(f"  Stage4 生成的搜索词数: {total_stage4}")
			
 
				+    print(f"  Stage6 保留的搜索词数: {total_stage6}")
			
 
				+    print(f"  已执行搜索: {searched} 个")
			
 
				+    print(f"  未执行搜索: {not_searched} 个")
			
 
				+    print(f"  搜索执行率: {searched/total_stage6*100:.1f}%")
			
 
				+
			
 
				+    # 按 rank 排序并展示
			
 
				+    stage6_sorted = sorted(stage6_search_words, key=lambda x: x['llm_rank'] if x['llm_rank'] else 999)
			
 
				+
			
 
				+    print(f"\n📋 详细搜索词列表 (按 LLM Rank 排序):")
			
 
				+    print(f"{'Rank':<6} {'评分':<6} {'搜索状态':<12} {'帖子数':<8} 搜索词")
			
 
				+    print("-" * 80)
			
 
				+
			
 
				+    for idx, word in enumerate(stage6_sorted, 1):
			
 
				+        rank = word['llm_rank'] if word['llm_rank'] else 'N/A'
			
 
				+        score = f"{word['llm_score']:.2f}" if word['llm_score'] else 'N/A'
			
 
				+        status = '✅ 已搜索' if word['has_result'] else '⏸️  未搜索'
			
 
				+        note_count = word['note_count'] if word['has_result'] else '-'
			
 
				+
			
 
				+        print(f"{rank:<6} {score:<6} {status:<12} {note_count:<8} {word['search_word']}")
			
 
				+
			
 
				+    # 展示已搜索的搜索词详情
			
 
				+    searched_words = [w for w in stage6_sorted if w['has_result']]
			
 
				+    if searched_words:
			
 
				+        print(f"\n✅ 已执行搜索的 {len(searched_words)} 个搜索词:")
			
 
				+        for idx, word in enumerate(searched_words, 1):
			
 
				+            print(f"\n  【{idx}】 {word['search_word']}")
			
 
				+            print(f"       关联: {word['association']}")
			
 
				+            print(f"       特征: {word['feature_name']}")
			
 
				+            print(f"       评分: {word['llm_score']:.2f}, 排名: #{word['llm_rank']}")
			
 
				+            print(f"       结果: {word['note_count']} 个帖子")
			
 
				+
			
 
				+    # 展示未搜索的搜索词
			
 
				+    not_searched_words = [w for w in stage6_sorted if not w['has_result']]
			
 
				+    if not_searched_words:
			
 
				+        print(f"\n⏸️  未执行搜索的 {len(not_searched_words)} 个搜索词:")
			
 
				+        for idx, word in enumerate(not_searched_words, 1):
			
 
				+            print(f"\n  【{idx}】 {word['search_word']}")
			
 
				+            print(f"       关联: {word['association']}")
			
 
				+            print(f"       特征: {word['feature_name']}")
			
 
				+            print(f"       评分: {word['llm_score']:.2f}, 排名: #{word['llm_rank']}")
			
 
				+
			
 
				+    # 分析为什么只搜索了部分
			
 
				+    print(f"\n🔍 搜索策略分析:")
			
 
				+    if searched == 10:
			
 
				+        print(f"  系统使用了 Top-10 策略")
			
 
				+        top_10_ranks = sorted([w['llm_rank'] for w in searched_words if w['llm_rank']])
			
 
				+        print(f"  实际搜索的 Rank 范围: {top_10_ranks}")
			
 
				+
			
 
				+        # 检查是否严格按 rank 取的 top-10
			
 
				+        expected_top_10_ranks = sorted([w['llm_rank'] for w in stage6_sorted[:10] if w['llm_rank']])
			
 
				+        if top_10_ranks == expected_top_10_ranks:
			
 
				+            print(f"  ✓ 严格按照 LLM Rank 取了 Top-10")
			
 
				+        else:
			
 
				+            print(f"  ⚠️  不是严格的 Top-10 (期望: {expected_top_10_ranks})")
			
 
				+    elif searched > 0:
			
 
				+        print(f"  系统执行了 {searched} 个搜索")
			
 
				+    else:
			
 
				+        print(f"  该特征的搜索尚未执行")
			
 
				+
			
 
				+    print("\n" + "=" * 80)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    stage4_path = 'output_v2/stage4_with_llm_scores.json'
			
 
				+    stage6_path = 'output_v2/stage6_with_evaluations.json'
			
 
				+    feature_name = '墨镜'
			
 
				+
			
 
				+    if len(sys.argv) > 1:
			
 
				+        feature_name = sys.argv[1]
			
 
				+    if len(sys.argv) > 2:
			
 
				+        stage4_path = sys.argv[2]
			
 
				+    if len(sys.argv) > 3:
			
 
				+        stage6_path = sys.argv[3]
			
 
				+
			
 
				+    analyze_feature_searches(stage4_path, stage6_path, feature_name)
			
--- a/tools/analyze_stage6_results.py
+++ b/tools/analyze_stage6_results.py
@@ -0,0 +1,236 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+Stage 6 评估结果统计分析
			
 
				+分析两层评估的过滤效果和匹配质量
			
 
				+"""
			
 
				+
			
 
				+import json
			
 
				+from typing import Dict, List, Any
			
 
				+from collections import defaultdict
			
 
				+
			
 
				+
			
 
				+def load_stage6_results(file_path: str) -> List[Dict[str, Any]]:
			
 
				+    """加载Stage 6评估结果"""
			
 
				+    with open(file_path, 'r', encoding='utf-8') as f:
			
 
				+        return json.load(f)
			
 
				+
			
 
				+
			
 
				+def analyze_evaluation_results(data: List[Dict[str, Any]]) -> Dict[str, Any]:
			
 
				+    """分析评估结果"""
			
 
				+
			
 
				+    # 全局统计
			
 
				+    global_stats = {
			
 
				+        'total_search_words': 0,
			
 
				+        'total_notes_evaluated': 0,
			
 
				+        'total_filtered': 0,
			
 
				+        'match_distribution': {
			
 
				+            '完全匹配(8-10)': 0,
			
 
				+            '相似匹配(6-7)': 0,
			
 
				+            '弱相似(5-6)': 0,
			
 
				+            '无匹配(≤4)': 0
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    # 按原始特征分组统计
			
 
				+    feature_stats = defaultdict(lambda: {
			
 
				+        'search_words_count': 0,
			
 
				+        'total_notes': 0,
			
 
				+        'total_filtered': 0,
			
 
				+        'match_distribution': {
			
 
				+            '完全匹配(8-10)': 0,
			
 
				+            '相似匹配(6-7)': 0,
			
 
				+            '弱相似(5-6)': 0,
			
 
				+            '无匹配(≤4)': 0
			
 
				+        },
			
 
				+        'search_words': []
			
 
				+    })
			
 
				+
			
 
				+    # 所有搜索词的详细统计
			
 
				+    search_word_details = []
			
 
				+
			
 
				+    # 遍历所有原始特征
			
 
				+    for feature_result in data:
			
 
				+        original_feature = feature_result.get('原始特征名称', 'Unknown')
			
 
				+
			
 
				+        # 从组合评估结果_分组中读取
			
 
				+        grouped_results = feature_result.get('组合评估结果_分组', [])
			
 
				+
			
 
				+        for group in grouped_results:
			
 
				+            base_word = group.get('base_word', '')
			
 
				+
			
 
				+            for eval_item in group.get('top10_searches', []):
			
 
				+                # 检查是否有评估结果
			
 
				+                evaluation = eval_item.get('evaluation_with_filter')
			
 
				+                if not evaluation:
			
 
				+                    continue
			
 
				+
			
 
				+                search_word = eval_item.get('search_word', '')
			
 
				+
			
 
				+                # 提取评估数据
			
 
				+                total_notes = evaluation.get('total_notes', 0)
			
 
				+                evaluated_notes = evaluation.get('evaluated_notes', 0)
			
 
				+                filtered_count = evaluation.get('filtered_count', 0)
			
 
				+                statistics = evaluation.get('statistics', {})
			
 
				+
			
 
				+                # 更新全局统计
			
 
				+                global_stats['total_search_words'] += 1
			
 
				+                global_stats['total_notes_evaluated'] += total_notes
			
 
				+                global_stats['total_filtered'] += filtered_count
			
 
				+
			
 
				+                for key in global_stats['match_distribution']:
			
 
				+                    global_stats['match_distribution'][key] += statistics.get(key, 0)
			
 
				+
			
 
				+                # 更新特征统计
			
 
				+                feature_stats[original_feature]['search_words_count'] += 1
			
 
				+                feature_stats[original_feature]['total_notes'] += total_notes
			
 
				+                feature_stats[original_feature]['total_filtered'] += filtered_count
			
 
				+
			
 
				+                for key in feature_stats[original_feature]['match_distribution']:
			
 
				+                    feature_stats[original_feature]['match_distribution'][key] += statistics.get(key, 0)
			
 
				+
			
 
				+                # 记录搜索词详情
			
 
				+                search_word_info = {
			
 
				+                    'original_feature': original_feature,
			
 
				+                    'base_word': base_word,
			
 
				+                    'search_word': search_word,
			
 
				+                    'total_notes': total_notes,
			
 
				+                    'evaluated_notes': evaluated_notes,
			
 
				+                    'filtered_count': filtered_count,
			
 
				+                    'match_distribution': statistics,
			
 
				+                    'high_quality_count': statistics.get('完全匹配(8-10)', 0),
			
 
				+                    'similar_count': statistics.get('相似匹配(6-7)', 0)
			
 
				+                }
			
 
				+
			
 
				+                search_word_details.append(search_word_info)
			
 
				+                feature_stats[original_feature]['search_words'].append(search_word_info)
			
 
				+
			
 
				+    # 计算全局过滤率
			
 
				+    if global_stats['total_notes_evaluated'] > 0:
			
 
				+        global_stats['filter_rate'] = global_stats['total_filtered'] / global_stats['total_notes_evaluated']
			
 
				+    else:
			
 
				+        global_stats['filter_rate'] = 0.0
			
 
				+
			
 
				+    # 计算每个特征的过滤率
			
 
				+    for feature_name, stats in feature_stats.items():
			
 
				+        if stats['total_notes'] > 0:
			
 
				+            stats['filter_rate'] = stats['total_filtered'] / stats['total_notes']
			
 
				+        else:
			
 
				+            stats['filter_rate'] = 0.0
			
 
				+
			
 
				+    # 按高质量匹配数排序搜索词
			
 
				+    search_word_details.sort(key=lambda x: x['high_quality_count'], reverse=True)
			
 
				+
			
 
				+    return {
			
 
				+        'global_stats': global_stats,
			
 
				+        'feature_stats': dict(feature_stats),
			
 
				+        'search_word_details': search_word_details
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def print_statistics(stats: Dict[str, Any]):
			
 
				+    """打印统计结果"""
			
 
				+    global_stats = stats['global_stats']
			
 
				+    feature_stats = stats['feature_stats']
			
 
				+    search_word_details = stats['search_word_details']
			
 
				+
			
 
				+    print("=" * 80)
			
 
				+    print("Stage 6 评估结果统计分析")
			
 
				+    print("=" * 80)
			
 
				+
			
 
				+    # 全局统计
			
 
				+    print("\n【全局统计】")
			
 
				+    print(f"  总搜索词数: {global_stats['total_search_words']}")
			
 
				+    print(f"  总评估帖子数: {global_stats['total_notes_evaluated']}")
			
 
				+    print(f"  总过滤帖子数: {global_stats['total_filtered']} (过滤率: {global_stats['filter_rate']*100:.1f}%)")
			
 
				+    print(f"\n  匹配度分布:")
			
 
				+    for match_type, count in global_stats['match_distribution'].items():
			
 
				+        print(f"    {match_type}: {count} 个帖子")
			
 
				+
			
 
				+    # 按原始特征统计
			
 
				+    print("\n" + "=" * 80)
			
 
				+    print("【按原始特征统计】")
			
 
				+    print("=" * 80)
			
 
				+
			
 
				+    for feature_name, stats in sorted(feature_stats.items()):
			
 
				+        print(f"\n特征: {feature_name}")
			
 
				+        print(f"  搜索词数: {stats['search_words_count']}")
			
 
				+        print(f"  总评估帖子: {stats['total_notes']}")
			
 
				+        print(f"  总过滤帖子: {stats['total_filtered']} (过滤率: {stats['filter_rate']*100:.1f}%)")
			
 
				+        print(f"  高质量匹配: {stats['match_distribution']['完全匹配(8-10)']} 个帖子")
			
 
				+        print(f"  相似匹配: {stats['match_distribution']['相似匹配(6-7)']} 个帖子")
			
 
				+
			
 
				+        # 找出该特征下高质量匹配最多的搜索词
			
 
				+        best_searches = sorted(stats['search_words'], key=lambda x: x['high_quality_count'], reverse=True)[:3]
			
 
				+        if best_searches:
			
 
				+            print(f"  Top 3 最佳搜索词:")
			
 
				+            for idx, sw in enumerate(best_searches, 1):
			
 
				+                print(f"    {idx}. \"{sw['search_word']}\" - {sw['high_quality_count']}个完全匹配")
			
 
				+
			
 
				+    # Top 10 最佳搜索词
			
 
				+    print("\n" + "=" * 80)
			
 
				+    print("【Top 10 最佳搜索词（按完全匹配数排序）】")
			
 
				+    print("=" * 80)
			
 
				+
			
 
				+    for idx, sw in enumerate(search_word_details[:10], 1):
			
 
				+        print(f"\n{idx}. \"{sw['search_word']}\"")
			
 
				+        print(f"   原始特征: {sw['original_feature']}")
			
 
				+        print(f"   Base Word: {sw['base_word']}")
			
 
				+        print(f"   评估帖子: {sw['total_notes']}, 过滤: {sw['filtered_count']}")
			
 
				+        print(f"   完全匹配(8-10): {sw['high_quality_count']} 个")
			
 
				+        print(f"   相似匹配(6-7): {sw['similar_count']} 个")
			
 
				+
			
 
				+    # 过滤效果分析
			
 
				+    print("\n" + "=" * 80)
			
 
				+    print("【过滤效果分析】")
			
 
				+    print("=" * 80)
			
 
				+
			
 
				+    total_evaluated = global_stats['total_notes_evaluated']
			
 
				+    total_filtered = global_stats['total_filtered']
			
 
				+    total_remaining = total_evaluated - total_filtered
			
 
				+
			
 
				+    total_high_quality = global_stats['match_distribution']['完全匹配(8-10)']
			
 
				+    total_similar = global_stats['match_distribution']['相似匹配(6-7)']
			
 
				+    total_weak = global_stats['match_distribution']['弱相似(5-6)']
			
 
				+    total_no_match = global_stats['match_distribution']['无匹配(≤4)']
			
 
				+
			
 
				+    print(f"  评估帖子总数: {total_evaluated}")
			
 
				+    print(f"  第一层过滤（Query不相关）: {total_filtered} ({total_filtered/total_evaluated*100:.1f}%)")
			
 
				+    print(f"  通过过滤的帖子: {total_remaining} ({total_remaining/total_evaluated*100:.1f}%)")
			
 
				+    print(f"\n  通过过滤后的质量分布:")
			
 
				+    if total_remaining > 0:
			
 
				+        print(f"    完全匹配(8-10): {total_high_quality} ({total_high_quality/total_remaining*100:.1f}%)")
			
 
				+        print(f"    相似匹配(6-7): {total_similar} ({total_similar/total_remaining*100:.1f}%)")
			
 
				+        print(f"    弱相似(5-6): {total_weak} ({total_weak/total_remaining*100:.1f}%)")
			
 
				+        print(f"    无匹配(≤4): {total_no_match} ({total_no_match/total_remaining*100:.1f}%)")
			
 
				+
			
 
				+    print("\n" + "=" * 80)
			
 
				+
			
 
				+
			
 
				+def save_statistics(stats: Dict[str, Any], output_path: str):
			
 
				+    """保存统计结果到JSON文件"""
			
 
				+    with open(output_path, 'w', encoding='utf-8') as f:
			
 
				+        json.dump(stats, f, ensure_ascii=False, indent=2)
			
 
				+    print(f"\n统计结果已保存到: {output_path}")
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    """主函数"""
			
 
				+    input_file = "output_v2/stage6_with_evaluations.json"
			
 
				+    output_file = "output_v2/stage6_statistics.json"
			
 
				+
			
 
				+    print("正在加载数据...")
			
 
				+    data = load_stage6_results(input_file)
			
 
				+
			
 
				+    print("正在分析评估结果...")
			
 
				+    stats = analyze_evaluation_results(data)
			
 
				+
			
 
				+    # 打印统计结果
			
 
				+    print_statistics(stats)
			
 
				+
			
 
				+    # 保存结果
			
 
				+    save_statistics(stats, output_file)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    main()
			
--- a/tools/remove_association_methods.py
+++ b/tools/remove_association_methods.py
@@ -0,0 +1,104 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+临时脚本：删除 association 相关的方法
			
 
				+"""
			
 
				+
			
 
				+# 需要删除的方法列表（方法名）
			
 
				+METHODS_TO_REMOVE = [
			
 
				+    '_is_classification',
			
 
				+    '_navigate_to_node',
			
 
				+    '_recursive_search',
			
 
				+    '_search_classification_path',
			
 
				+    'stage2_find_associations',
			
 
				+    '_find_associations',
			
 
				+    '_find_intra_dimension_associations',
			
 
				+    '_collect_classification_info',
			
 
				+    'stage3_filter_high_similarity_matches',
			
 
				+    '_collect_scope_from_associations',
			
 
				+    '_collect_stage2_scope',
			
 
				+    '_find_features_by_path',
			
 
				+]
			
 
				+
			
 
				+def find_method_bounds(lines, method_name):
			
 
				+    """
			
 
				+    查找方法的起始和结束行号
			
 
				+
			
 
				+    Returns:
			
 
				+        (start_line, end_line) 或 None
			
 
				+    """
			
 
				+    start_line = None
			
 
				+    indent_level = None
			
 
				+
			
 
				+    # 查找方法开始
			
 
				+    for i, line in enumerate(lines):
			
 
				+        if f'def {method_name}(' in line:
			
 
				+            start_line = i
			
 
				+            # 获取方法的缩进级别
			
 
				+            indent_level = len(line) - len(line.lstrip())
			
 
				+            break
			
 
				+
			
 
				+    if start_line is None:
			
 
				+        return None
			
 
				+
			
 
				+    # 查找方法结束（下一个同级或更外层的def/class，或遇到注释分隔符）
			
 
				+    for i in range(start_line + 1, len(lines)):
			
 
				+        line = lines[i]
			
 
				+        stripped = line.lstrip()
			
 
				+
			
 
				+        # 空行跳过
			
 
				+        if not stripped:
			
 
				+            continue
			
 
				+
			
 
				+        current_indent = len(line) - len(line.lstrip())
			
 
				+
			
 
				+        # 遇到 # ========== 注释分隔符
			
 
				+        if stripped.startswith('# =========='):
			
 
				+            return (start_line, i)
			
 
				+
			
 
				+        # 遇到同级或更外层的def/class
			
 
				+        if current_indent <= indent_level and (stripped.startswith('def ') or stripped.startswith('class ')):
			
 
				+            return (start_line, i)
			
 
				+
			
 
				+    # 如果到文件末尾都没找到
			
 
				+    return (start_line, len(lines))
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    input_file = 'enhanced_search_v2.py'
			
 
				+    output_file = 'enhanced_search_v2_cleaned.py'
			
 
				+
			
 
				+    # 读取文件
			
 
				+    with open(input_file, 'r', encoding='utf-8') as f:
			
 
				+        lines = f.readlines()
			
 
				+
			
 
				+    # 收集所有要删除的行范围
			
 
				+    ranges_to_remove = []
			
 
				+
			
 
				+    for method_name in METHODS_TO_REMOVE:
			
 
				+        result = find_method_bounds(lines, method_name)
			
 
				+        if result:
			
 
				+            ranges_to_remove.append(result)
			
 
				+            print(f"找到方法 {method_name}: 行 {result[0]+1} - {result[1]+1}")
			
 
				+        else:
			
 
				+            print(f"未找到方法 {method_name}")
			
 
				+
			
 
				+    # 按起始行排序（倒序）
			
 
				+    ranges_to_remove.sort(reverse=True)
			
 
				+
			
 
				+    # 删除方法（从后往前删）
			
 
				+    for start, end in ranges_to_remove:
			
 
				+        print(f"删除行 {start+1} - {end}")
			
 
				+        del lines[start:end]
			
 
				+
			
 
				+    # 写入新文件
			
 
				+    with open(output_file, 'w', encoding='utf-8') as f:
			
 
				+        f.writelines(lines)
			
 
				+
			
 
				+    print(f"\n✓ 已生成清理后的文件: {output_file}")
			
 
				+    print(f"原文件行数: {len(open(input_file).readlines())}")
			
 
				+    print(f"新文件行数: {len(lines)}")
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    main()