""" 分类优化结果可视化工具 功能: 1. 读取优化后的聚类数据 (optimized_clustered_data_*.json) 2. 生成交互式HTML树形可视化 3. 支持查看帖子详情和所有点的信息 4. 区分显示原始分类、LLM抽象、LLM细分 """ import json import os from typing import Dict, Any, List, Optional from pathlib import Path from datetime import datetime class ClassificationTreeVisualizer: """分类树可视化工具""" def __init__(self): self.post_cache: Dict[str, Dict[str, Any]] = {} self.xuanti_point_map: Dict[str, Dict[str, Any]] = {} self.clustered_data: Dict[str, Any] = {} self.dimension_associations: Dict[str, Any] = {} self.intra_dimension_associations: Dict[str, Any] = {} def load_post_data(self, post_id: str, posts_dir: Path) -> Optional[Dict[str, Any]]: """加载帖子详细数据""" if post_id in self.post_cache: return self.post_cache[post_id] post_file = posts_dir / f"{post_id}.json" if not post_file.exists(): return None try: with open(post_file, 'r', encoding='utf-8') as f: post_data = json.load(f) self.post_cache[post_id] = post_data return post_data except Exception as e: print(f"加载帖子 {post_id} 失败: {e}") return None def generate_tree_node_html( self, node_name: str, node_data: Dict[str, Any], level: int, point_type: str, path: List[str] ) -> str: """递归生成树节点的HTML - 支持部分细分结构""" import html as html_module node_name_escaped = html_module.escape(node_name) current_path = path + [node_name] node_id = f"{point_type}_{'_'.join(current_path)}".replace('/', '_').replace(' ', '_') meta = node_data.get('_meta', {}) source = meta.get('分类来源', '') # 检查是否有保留的点 has_kept_points = '点列表' in node_data and len(node_data.get('点列表', [])) > 0 # 检查是否有子分类 has_children = False for key in node_data.keys(): if key not in ['_meta', '点列表', '帖子数', '点数', '帖子列表'] and isinstance(node_data[key], dict): has_children = True break # 确定节点样式 if source == 'LLM抽象' or source == 'LLM细分': node_class = f"tree-node tree-node-llm level-{level}" else: node_class = f"tree-node tree-node-original level-{level}" html = f'