How解构结果可视化

#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ How解构结果可视化脚本将 how 解构结果转化为 HTML 格式，使用标签页展示多个帖子 """ import json from pathlib import Path from typing import Dict, List import sys # 添加项目根目录到路径 project_root = Path(__file__).parent.parent.parent sys.path.insert(0, str(project_root)) def get_relation_color(relation: str) -> str: """根据关系类型返回对应的颜色""" color_map = { "same": "#10b981", # 绿色 - 同义 "contains": "#3b82f6", # 蓝色 - 包含 "contained_by": "#8b5cf6", # 紫色 - 被包含 "coordinate": "#f59e0b", # 橙色 - 同级 "overlap": "#ec4899", # 粉色 - 部分重叠 "related": "#6366f1", # 靛蓝 - 相关 "unrelated": "#9ca3af" # 灰色 - 无关 } return color_map.get(relation, "#9ca3af") def get_relation_label(relation: str) -> str: """返回关系类型的中文标签""" label_map = { "same": "同义", "contains": "包含", "contained_by": "被包含", "coordinate": "同级", "overlap": "部分重叠", "related": "相关", "unrelated": "无关" } return label_map.get(relation, relation) def generate_match_item_html(match: Dict) -> str: """生成单个匹配项的HTML""" persona_name = match.get("人设特征名称", "") match_result = match.get("匹配结果", {}) relation = match_result.get("relation", "unrelated") score = match_result.get("score", 0.0) explanation = match_result.get("explanation", "") color = get_relation_color(relation) label = get_relation_label(relation) # 根据分数设置背景透明度 opacity = min(score, 1.0) bg_color = f"rgba({int(color[1:3], 16)}, {int(color[3:5], 16)}, {int(color[5:7], 16)}, {opacity * 0.15})" html = f"""

{persona_name} {label} 分数: {score:.2f}

{explanation}

""" return html def generate_feature_html(feature_data: Dict) -> str: """生成单个特征的HTML""" feature_name = feature_data.get("特征名称", "") match_results = feature_data.get("匹配结果", []) # 按分数排序（从高到低） sorted_matches = sorted(match_results, key=lambda x: x.get("匹配结果", {}).get("score", 0), reverse=True) # 统计匹配类型 relation_counts = {} for match in match_results: relation = match.get("匹配结果", {}).get("relation", "unrelated") relation_counts[relation] = relation_counts.get(relation, 0) + 1 # 生成统计信息 stats_html = "

" for relation, count in sorted(relation_counts.items(), key=lambda x: x[1], reverse=True): label = get_relation_label(relation) color = get_relation_color(relation) stats_html += f"{label}: {count}" stats_html += "

" matches_html = "".join(generate_match_item_html(match) for match in sorted_matches) html = f"""

特征: {feature_name}

{stats_html}

{matches_html}

""" return html def generate_inspiration_point_html(point_data: Dict) -> str: """生成单个灵感点的HTML""" name = point_data.get("名称", "") desc = point_data.get("描述", "") how_steps = point_data.get("how步骤列表", []) steps_html = "" for step in how_steps: step_name = step.get("步骤名称", "") features = step.get("特征列表", []) features_html = "".join(generate_feature_html(f) for f in features) steps_html += f"""

{step_name}

{features_html}

""" html = f"""

{name}

{desc}

{steps_html}

""" return html def generate_post_content_html(post_data: Dict) -> str: """生成单个帖子的内容HTML（不包含完整页面结构）""" post_id = post_data.get("帖子id", "") post_detail = post_data.get("帖子详情", {}) publish_time = post_detail.get("publish_time", "") like_count = post_detail.get("like_count", 0) link = post_detail.get("link", "") how_result = post_data.get("how解构结果", {}) inspiration_list = how_result.get("灵感点列表", []) inspiration_html = "".join(generate_inspiration_point_html(p) for p in inspiration_list) html = f"""

帖子ID: {post_id}

发布时间: {publish_time}

点赞数: {like_count}

查看原帖 →

{inspiration_html} """ return html def generate_combined_html(posts_data: List[Dict]) -> str: """生成包含所有帖子的单一HTML（带标签页）""" # 生成标签页按钮 tabs_html = "" for i, post in enumerate(posts_data): post_detail = post.get("帖子详情", {}) title = post_detail.get("title", "无标题") active_class = "active" if i == 0 else "" tabs_html += f'\n' # 生成标签页内容 contents_html = "" for i, post in enumerate(posts_data): active_class = "active" if i == 0 else "" content = generate_post_content_html(post) contents_html += f"""

{content}

""" html = f""" How解构结果可视化

{tabs_html}

{contents_html}

""" return html def main(): """主函数""" # 输入输出路径 script_dir = Path(__file__).parent project_root = script_dir.parent.parent data_dir = project_root / "data" / "data_1117" input_dir = data_dir / "当前帖子_how解构结果" output_file = data_dir / "当前帖子_how解构结果_可视化.html" print(f"读取 how 解构结果: {input_dir}") # 获取所有 JSON 文件 json_files = list(input_dir.glob("*_how.json")) print(f"找到 {len(json_files)} 个文件\n") # 读取所有帖子数据 posts_data = [] for i, file_path in enumerate(json_files, 1): print(f"读取文件 [{i}/{len(json_files)}]: {file_path.name}") with open(file_path, "r", encoding="utf-8") as f: post_data = json.load(f) posts_data.append(post_data) # 生成合并的 HTML print(f"\n生成合并的 HTML...") html_content = generate_combined_html(posts_data) # 保存 HTML 文件 print(f"保存到: {output_file}") with open(output_file, "w", encoding="utf-8") as f: f.write(html_content) print(f"\n完成! 可视化文件已保存") print(f"请在浏览器中打开: {output_file}") if __name__ == "__main__": main()

特征: {feature_name}

{step_name}

{name}

How 解构结果可视化