#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 将匹配图谱数据可视化为交互式HTML文件 输入:match_graph目录下的JSON文件 输出:单个HTML文件,包含所有帖子的图谱,可通过Tab切换 """ import json from pathlib import Path from typing import Dict, List import sys # 添加项目根目录到路径 project_root = Path(__file__).parent.parent.parent sys.path.insert(0, str(project_root)) from script.data_processing.path_config import PathConfig HTML_TEMPLATE = ''' 匹配图谱可视化
{tabs_html}

匹配图谱

匹配关系 (0)

点击节点或边查看详情

点击图中的节点或边,这里会显示详细信息

节点

灵感点
目的点
关键点

边

相同(≥0.8)
相似(0.5-0.8)
属于
分类共现(跨)
分类共现(内)
标签共现
视图控制
人设树配置
''' def generate_combined_html(all_graph_data: List[Dict], persona_tree_data: Dict, output_file: Path): """ 生成包含所有帖子图谱的HTML文件 Args: all_graph_data: 所有帖子的图谱数据列表 persona_tree_data: 完整的人设树数据(节点和边) output_file: 输出文件路径 """ # 生成Tab HTML tabs_html = "" for i, data in enumerate(all_graph_data): post_title = data.get("postTitle", "") # 使用帖子标题,如果太长则截断 if post_title: tab_name = post_title[:15] + "..." if len(post_title) > 15 else post_title else: tab_name = f"帖子 {i+1}" active_class = "active" if i == 0 else "" tabs_html += f'
{tab_name}
\n' # 生成HTML html_content = HTML_TEMPLATE.format( tabs_html=tabs_html, all_graph_data=json.dumps(all_graph_data, ensure_ascii=False), persona_tree_data=json.dumps(persona_tree_data, ensure_ascii=False) ) with open(output_file, "w", encoding="utf-8") as f: f.write(html_content) def main(): # 使用路径配置 config = PathConfig() print(f"账号: {config.account_name}") print(f"输出版本: {config.output_version}") print() # 输入目录 match_graph_dir = config.intermediate_dir / "match_graph" # 输出文件 output_file = config.intermediate_dir / "match_graph.html" print(f"输入目录: {match_graph_dir}") print(f"输出文件: {output_file}") print() # 读取人设树中间数据 persona_tree_file = config.intermediate_dir / "persona_tree.json" persona_tree_data = {"nodes": [], "edges": []} if persona_tree_file.exists(): print(f"读取人设树数据: {persona_tree_file.name}") with open(persona_tree_file, "r", encoding="utf-8") as f: tree_data = json.load(f) persona_tree_data["nodes"] = tree_data.get("nodes", []) persona_tree_data["edges"] = tree_data.get("edges", []) category_count = len([n for n in persona_tree_data["nodes"] if n.get("节点类型") == "分类"]) tag_count = len([n for n in persona_tree_data["nodes"] if n.get("节点类型") == "标签"]) print(f" 分类节点: {category_count}, 标签节点: {tag_count}") print(f" 边数: {len(persona_tree_data['edges'])}") print() # 读取所有匹配图谱文件 graph_files = sorted(match_graph_dir.glob("*_match_graph.json")) print(f"找到 {len(graph_files)} 个匹配图谱文件") all_graph_data = [] for i, graph_file in enumerate(graph_files, 1): print(f" [{i}/{len(graph_files)}] 读取: {graph_file.name}") with open(graph_file, "r", encoding="utf-8") as f: match_graph_data = json.load(f) # 提取需要的数据 graph_data = { "postId": match_graph_data["说明"]["帖子ID"], "postTitle": match_graph_data["说明"].get("帖子标题", ""), "stats": match_graph_data["说明"]["统计"], "nodes": match_graph_data["节点列表"], "edges": match_graph_data["边列表"] } all_graph_data.append(graph_data) # 生成HTML print("\n生成HTML文件...") generate_combined_html(all_graph_data, persona_tree_data, output_file) print("\n" + "="*60) print("处理完成!") print(f"输出文件: {output_file}") if __name__ == "__main__": main()