| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429 |
- #!/usr/bin/env python3
- """
- 脚本结果可视化工具
- 功能:为每个script_result_XXX.json文件生成独立的HTML可视化页面,包含三个Tab切换视图
- """
- import json
- import argparse
- import sys
- from pathlib import Path
- from datetime import datetime
- from typing import List, Dict, Any, Optional
- import re
- import html as html_module
- # 保证可以从项目根目录导入 static 包
- PROJECT_ROOT = Path(__file__).parent.parent
- if str(PROJECT_ROOT) not in sys.path:
- sys.path.insert(0, str(PROJECT_ROOT))
- # 导入拆分后的tab模块
- from static.visualize.tab1 import generate_tab1_content
- from static.visualize.tab2 import generate_tab2_content
- from static.visualize.tab3 import generate_tab3_content
- from static.visualize.tab5 import generate_tab5_content
- class ScriptResultVisualizer:
- """脚本结果可视化器"""
- def __init__(self, json_file: str = None):
- """
- 初始化可视化器
- Args:
- json_file: JSON文件路径
- """
- if json_file is None:
- self.json_file = None
- else:
- self.json_file = Path(json_file)
- if not self.json_file.is_absolute():
- self.json_file = Path.cwd() / json_file
- def load_json_data(self, file_path: Path) -> Optional[Dict[str, Any]]:
- """
- 加载JSON文件
- Args:
- file_path: JSON文件路径
- Returns:
- JSON数据字典,加载失败返回None
- """
- try:
- with open(file_path, 'r', encoding='utf-8') as f:
- return json.load(f)
- except Exception as e:
- print(f"加载文件失败 {file_path}: {e}")
- return None
- def generate_tab1_content(self, data: Dict[str, Any]) -> str:
- """生成Tab1内容:选题、灵感点、目的点、关键点"""
- return generate_tab1_content(data)
- def generate_tab2_content(self, data: Dict[str, Any]) -> str:
- """生成Tab2内容:段落"""
- return generate_tab2_content(data)
- def generate_tab3_content(self, data: Dict[str, Any]) -> str:
- """生成Tab3内容:按层次展示(实质/形式 → 具体元素/具体概念/抽象概念 → 树形展示)"""
- return generate_tab3_content(data)
- def generate_tab5_content(self, data: Dict[str, Any]) -> str:
- """生成tab5内容:实质点与灵感点、目的点、关键点的关系连线图"""
- return generate_tab5_content(data)
- def build_element_index(self, data: Dict[str, Any]) -> Dict[str, Any]:
- """
- 构建全局元素索引(包含实质列表和形式列表)
- Args:
- data: JSON数据
- Returns:
- 元素索引字典 {element_id: element_info}
- """
- element_index = {}
- if '脚本理解' not in data:
- return element_index
- script = data['脚本理解']
- # 处理实质列表
- substance_list = script.get('实质列表', [])
- for elem in substance_list:
- elem_id = str(elem.get('id', ''))
- if elem_id:
- element_index[elem_id] = {
- 'id': elem_id,
- 'name': elem.get('名称', ''),
- 'description': elem.get('描述', ''),
- 'type': '实质',
- 'dimension': elem.get('维度', {}),
- 'category': elem.get('分类', {}),
- 'full_data': elem
- }
- # 处理形式列表
- form_list = script.get('形式列表', [])
- for elem in form_list:
- elem_id = str(elem.get('id', ''))
- if elem_id:
- element_index[elem_id] = {
- 'id': elem_id,
- 'name': elem.get('名称', ''),
- 'description': elem.get('描述', ''),
- 'type': '形式',
- 'dimension': elem.get('维度', {}),
- 'category': elem.get('分类', {}),
- 'full_data': elem
- }
- return element_index
- def highlight_element_references(self, text: str, element_index: Dict[str, Any]) -> str:
- """
- 在文本中标记元素引用,使其可点击查看详情
- Args:
- text: 待处理的文本
- element_index: 全局元素索引
- Returns:
- 处理后的HTML文本
- """
- if not text or not element_index:
- return html_module.escape(str(text))
- result = html_module.escape(str(text))
- # 按元素ID长度降序排序,避免短ID覆盖长ID (如 "1" 和 "10")
- sorted_ids = sorted(element_index.keys(), key=lambda x: len(x), reverse=True)
- for elem_id in sorted_ids:
- elem = element_index[elem_id]
- elem_name = elem.get('name', '')
- # 匹配 #ID 格式 (如 "#24")
- pattern_id = f'#{elem_id}\\b'
- replacement_id = f'<span class="element-ref clickable" onclick="showElementDetail(\'{elem_id}\')" title="{html_module.escape(elem_name)}">#{elem_id}</span>'
- result = re.sub(pattern_id, replacement_id, result)
- # 匹配元素名称 (完整词匹配)
- if elem_name:
- pattern_name = f'\\b{re.escape(elem_name)}\\b'
- replacement_name = f'<span class="element-ref clickable" onclick="showElementDetail(\'{elem_id}\')" title="#{elem_id}">{elem_name}</span>'
- result = re.sub(pattern_name, replacement_name, result)
- return result
- def format_element_id_list(self, id_list, element_index: Dict[str, Any]) -> str:
- """
- 将元素ID列表格式化为可点击的HTML标签
- Args:
- id_list: 元素ID列表或单个ID
- element_index: 全局元素索引
- Returns:
- HTML字符串
- """
- if not id_list:
- return ''
- html = '<div class="detail-content">\n'
- # 处理单个ID或列表
- ids = [id_list] if not isinstance(id_list, list) else id_list
- for elem_id in ids:
- elem_id_str = str(elem_id)
- if elem_id_str in element_index:
- elem = element_index[elem_id_str]
- elem_name = elem.get('name', '')
- html += f'<span class="element-ref-tag clickable" onclick="showElementDetail(\'{elem_id_str}\')" title="{html_module.escape(elem_name)}">#{elem_id_str}</span>\n'
- else:
- html += f'<span class="detail-tag">#{elem_id_str}</span>\n'
- html += '</div>\n'
- return html
- def generate_html(self, data: Dict[str, Any], json_filename: str) -> str:
- """生成完整的HTML页面"""
- # 构建全局元素索引
- element_index = self.build_element_index(data)
- # 开始构建HTML
- html = '<!DOCTYPE html>\n'
- html += '<html lang="zh-CN">\n'
- html += '<head>\n'
- html += ' <meta charset="UTF-8">\n'
- html += ' <meta name="viewport" content="width=device-width, initial-scale=1.0">\n'
- html += f' <title>脚本结果可视化 - {json_filename}</title>\n'
- html += ' <link rel="stylesheet" href="visualize/style.css">\n'
- html += '</head>\n'
- html += '<body>\n'
- html += '<div class="container">\n'
- # 页眉
- html += '<div class="header">\n'
- html += ' <h1>脚本结果可视化</h1>\n'
- # 显示选题主题
- if '选题描述' in data and '主题' in data['选题描述']:
- html += f' <div class="subtitle">{html_module.escape(data["选题描述"]["主题"])}</div>\n'
- html += f' <div class="subtitle">{json_filename}</div>\n'
- html += f' <div class="subtitle">生成时间: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}</div>\n'
- html += '</div>\n'
- # Tab导航
- html += '<div class="tabs">\n'
- html += ' <button class="tab active" onclick="switchTab(\'tab1\')">选题点</button>\n'
- html += ' <button class="tab" onclick="switchTab(\'tab2\')">段落</button>\n'
- html += ' <button class="tab" onclick="switchTab(\'tab3\')">脚本点</button>\n'
- html += ' <button class="tab" onclick="switchTab(\'tab5\')">关系图</button>\n'
- html += '</div>\n'
- # 主内容
- html += '<div class="content">\n'
- # Tab1内容
- html += self.generate_tab1_content(data)
- # Tab2内容
- html += self.generate_tab2_content(data)
- # Tab3内容
- html += self.generate_tab3_content(data)
- # tab5内容
- html += self.generate_tab5_content(data)
- html += '</div>\n'
- # 页脚
- html += '<div class="footer">\n'
- html += f' <p>生成时间: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}</p>\n'
- html += '</div>\n'
- html += '</div>\n'
- # JavaScript (传递元素索引)
- html += '<script>\n'
- html += f'const elementIndex = {json.dumps(element_index, ensure_ascii=False)};\n'
- html += '</script>\n'
- html += '<script src="visualize/script.js"></script>\n'
- html += '</body>\n'
- html += '</html>\n'
- return html
- def save_all_html(self, output_dir: str | Path | None = None) -> List[str]:
- """
- 基于 output_demo_script.json,为其中每个视频生成一个独立的 HTML 页面。
- 仅支持这种结构:
- {
- "results": [
- {
- "video_data": {...},
- "script_result": {...}
- },
- ...
- ]
- }
- """
- if self.json_file is None:
- print("❌ 错误: 未指定JSON文件")
- return []
- # 加载JSON数据
- data = self.load_json_data(self.json_file)
- if data is None:
- return []
- results = data.get("results") or []
- if not isinstance(results, list) or not results:
- print("⚠️ JSON 中未找到有效的 results 数组,期望为 output_demo_script.json 结构")
- return []
- # 确定输出目录
- if output_dir is None:
- # 默认输出到examples/html目录
- output_dir = Path(__file__).parent / "html"
- else:
- output_dir = Path(output_dir)
- if not output_dir.is_absolute():
- output_dir = Path.cwd() / output_dir
- # 创建输出目录
- output_dir.mkdir(parents=True, exist_ok=True)
- # 确保样式和脚本文件可用:从 static/visualize 拷贝到 输出目录/visualize
- static_visualize_dir = PROJECT_ROOT / "static" / "visualize"
- target_visualize_dir = output_dir / "visualize"
- if static_visualize_dir.exists() and static_visualize_dir.is_dir():
- import shutil
- target_visualize_dir.mkdir(parents=True, exist_ok=True)
- for item in static_visualize_dir.iterdir():
- dst = target_visualize_dir / item.name
- if item.is_file():
- shutil.copy2(item, dst)
- generated_paths: List[str] = []
- print(f"📁 检测到 output_demo_script 格式,包含 {len(results)} 条结果")
- for idx, item in enumerate(results, start=1):
- script_data = item.get("script_result")
- if not isinstance(script_data, dict):
- print(f"⚠️ 跳过第 {idx} 条结果:缺少 script_result 字段或结构不正确")
- continue
- # 从 what_deconstruction_result 中获取三点解构数据并合并到 script_data
- what_result = item.get("what_deconstruction_result", {})
- if isinstance(what_result, dict) and "三点解构" in what_result:
- deconstruction = what_result["三点解构"]
- # 将三点解构数据合并到 script_data 顶层,供 tab1 使用
- if "灵感点" in deconstruction:
- script_data["灵感点"] = deconstruction["灵感点"]
- if "目的点" in deconstruction:
- script_data["目的点"] = deconstruction["目的点"]
- if "关键点" in deconstruction:
- script_data["关键点"] = deconstruction["关键点"]
- video_data = item.get("video_data") or {}
- video_id = video_data.get("video_id") or video_data.get("channel_content_id") # 兼容旧字段名
- # 用于 HTML 内部展示的"文件名"标签
- json_label = f"{self.json_file.name}#{idx}"
- # 生成输出文件名(优先使用 video_id,回退到序号)
- if video_id:
- output_filename = f"script_result_{video_id}.html"
- else:
- output_filename = f"{self.json_file.stem}_{idx}.html"
- output_path = output_dir / output_filename
- html_content = self.generate_html(script_data, json_label)
- with open(output_path, "w", encoding="utf-8") as f:
- f.write(html_content)
- generated_paths.append(str(output_path))
- print(f"✅ HTML文件已生成: {output_path}")
- if not generated_paths:
- print("⚠️ 未能从 JSON 中生成任何 HTML 文件")
- return generated_paths
- def main():
- """主函数"""
- # 解析命令行参数
- parser = argparse.ArgumentParser(
- description='脚本结果可视化工具 - 基于 output_demo_script.json 为每个视频生成独立的HTML页面',
- formatter_class=argparse.RawDescriptionHelpFormatter,
- epilog="""
- 使用示例:
- # 在当前 examples 目录下使用默认的 output_demo_script.json 并输出到 examples/html
- python visualize_script_results.py
- # 指定 JSON 文件
- python visualize_script_results.py examples/output_demo_script.json
- # 指定 JSON 文件和输出目录
- python visualize_script_results.py examples/output_demo_script.json --output-dir examples/html_script
- """
- )
- parser.add_argument(
- 'json_file',
- type=str,
- nargs='?',
- help='JSON文件路径(默认为 examples/output_demo_script.json)'
- )
- parser.add_argument(
- '-o', '--output-dir',
- type=str,
- default=None,
- help='输出目录路径(默认: examples/html)'
- )
- args = parser.parse_args()
- # 确定 JSON 文件路径
- if args.json_file:
- json_path = Path(args.json_file)
- if not json_path.is_absolute():
- json_path = Path.cwd() / json_path
- else:
- # 默认使用 examples/output_demo_script.json
- json_path = Path(__file__).parent / "output_decode_result.json"
- print("🚀 开始生成脚本结果可视化...")
- print(f"📁 JSON文件: {json_path}")
- print(f"📄 输出目录: {args.output_dir or (Path(__file__).parent / 'html')}")
- print()
- visualizer = ScriptResultVisualizer(json_file=str(json_path))
- generated_files = visualizer.save_all_html(output_dir=args.output_dir)
- if generated_files:
- print()
- print(f"🎉 完成! 共生成 {len(generated_files)} 个HTML文件")
- # 提示其中一个示例文件
- print(f"📄 示例: 请在浏览器中打开: {generated_files[0]}")
- if __name__ == "__main__":
- main()
|