visualize_script_results_v2.py 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285
  1. #!/usr/bin/env python3
  2. """
  3. 脚本结果可视化工具 V2
  4. 功能:为 output_demo_script_v2.json 中的每个视频生成独立的HTML可视化页面
  5. 交互形式:卡片+点击详情
  6. """
  7. import json
  8. import argparse
  9. import sys
  10. from pathlib import Path
  11. from datetime import datetime
  12. from typing import List, Dict, Any, Optional
  13. import html as html_module
  14. # 保证可以从项目根目录导入
  15. PROJECT_ROOT = Path(__file__).parent.parent
  16. if str(PROJECT_ROOT) not in sys.path:
  17. sys.path.insert(0, str(PROJECT_ROOT))
  18. # 导入tab模块
  19. from static.visualize_v2.tab1 import generate_tab1_content
  20. from static.visualize_v2.tab2 import generate_tab2_content
  21. from static.visualize_v2.tab3 import generate_tab3_content
  22. from static.visualize_v2.tab4 import generate_tab4_content
  23. class ScriptResultVisualizerV2:
  24. """脚本结果可视化器 V2"""
  25. def __init__(self, json_file: str = None):
  26. """
  27. 初始化可视化器
  28. Args:
  29. json_file: JSON文件路径
  30. """
  31. if json_file is None:
  32. self.json_file = None
  33. else:
  34. self.json_file = Path(json_file)
  35. if not self.json_file.is_absolute():
  36. self.json_file = Path.cwd() / json_file
  37. def load_json_data(self, file_path: Path) -> Optional[Dict[str, Any]]:
  38. """
  39. 加载JSON文件
  40. Args:
  41. file_path: JSON文件路径
  42. Returns:
  43. JSON数据字典,加载失败返回None
  44. """
  45. try:
  46. with open(file_path, 'r', encoding='utf-8') as f:
  47. return json.load(f)
  48. except Exception as e:
  49. print(f"加载文件失败 {file_path}: {e}")
  50. return None
  51. def generate_html(self, data: Dict[str, Any], video_data: Dict[str, Any], json_filename: str) -> str:
  52. """生成完整的HTML页面"""
  53. # 开始构建HTML
  54. html = '<!DOCTYPE html>\n'
  55. html += '<html lang="zh-CN">\n'
  56. html += '<head>\n'
  57. html += ' <meta charset="UTF-8">\n'
  58. html += ' <meta name="viewport" content="width=device-width, initial-scale=1.0">\n'
  59. html += f' <title>脚本结果可视化 V2 - {json_filename}</title>\n'
  60. html += ' <link rel="stylesheet" href="visualize/style.css">\n'
  61. html += '</head>\n'
  62. html += '<body>\n'
  63. html += '<div class="container">\n'
  64. # 页眉
  65. html += '<div class="header">\n'
  66. html += ' <h1>脚本结果可视化 V2</h1>\n'
  67. # 显示视频信息
  68. video_title = video_data.get("title", "")
  69. video_id = video_data.get("video_id", "")
  70. if video_title:
  71. html += f' <div class="subtitle">{html_module.escape(video_title)}</div>\n'
  72. if video_id:
  73. html += f' <div class="subtitle">视频ID: {video_id}</div>\n'
  74. html += f' <div class="subtitle">{json_filename}</div>\n'
  75. html += f' <div class="subtitle">生成时间: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}</div>\n'
  76. html += '</div>\n'
  77. # Tab导航
  78. html += '<div class="tabs">\n'
  79. html += ' <button class="tab active" onclick="switchTab(\'tab1\')">结构化内容库</button>\n'
  80. html += ' <button class="tab" onclick="switchTab(\'tab2\')">L3单元解构</button>\n'
  81. html += ' <button class="tab" onclick="switchTab(\'tab3\')">整体结构理解</button>\n'
  82. html += ' <button class="tab" onclick="switchTab(\'tab4\')">金句提取</button>\n'
  83. html += '</div>\n'
  84. # 主内容
  85. html += '<div class="content">\n'
  86. # Tab1内容:结构化内容库
  87. html += generate_tab1_content(data)
  88. # Tab2内容:L3单元解构
  89. html += generate_tab2_content(data)
  90. # Tab3内容:整体结构理解
  91. html += generate_tab3_content(data)
  92. # Tab4内容:金句提取
  93. html += generate_tab4_content(data)
  94. html += '</div>\n'
  95. # 页脚
  96. html += '<div class="footer">\n'
  97. html += f' <p>生成时间: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}</p>\n'
  98. html += '</div>\n'
  99. html += '</div>\n'
  100. # JavaScript
  101. html += '<script src="visualize/script.js"></script>\n'
  102. html += '</body>\n'
  103. html += '</html>\n'
  104. return html
  105. def save_all_html(self, output_dir: str | Path | None = None) -> List[str]:
  106. """
  107. 基于 output_demo_script_v2.json,为其中每个视频生成一个独立的 HTML 页面。
  108. 支持结构:
  109. {
  110. "results": [
  111. {
  112. "video_data": {...},
  113. "script_result": {...}
  114. },
  115. ...
  116. ]
  117. }
  118. """
  119. if self.json_file is None:
  120. print("❌ 错误: 未指定JSON文件")
  121. return []
  122. # 加载JSON数据
  123. data = self.load_json_data(self.json_file)
  124. if data is None:
  125. return []
  126. results = data.get("results") or []
  127. if not isinstance(results, list) or not results:
  128. print("⚠️ JSON 中未找到有效的 results 数组")
  129. return []
  130. # 确定输出目录
  131. if output_dir is None:
  132. # 默认输出到examples/html目录
  133. output_dir = Path(__file__).parent / "html"
  134. else:
  135. output_dir = Path(output_dir)
  136. if not output_dir.is_absolute():
  137. output_dir = Path.cwd() / output_dir
  138. # 创建输出目录
  139. output_dir.mkdir(parents=True, exist_ok=True)
  140. # 确保样式和脚本文件可用:从 html/visualize 拷贝到 输出目录/visualize
  141. source_visualize_dir = Path(__file__).parent / "html" / "visualize"
  142. target_visualize_dir = output_dir / "visualize"
  143. if source_visualize_dir.exists() and source_visualize_dir.is_dir():
  144. import shutil
  145. target_visualize_dir.mkdir(parents=True, exist_ok=True)
  146. for item in source_visualize_dir.iterdir():
  147. dst = target_visualize_dir / item.name
  148. if item.is_file():
  149. # 如果源文件和目标文件是同一个,跳过
  150. if item.resolve() != dst.resolve():
  151. shutil.copy2(item, dst)
  152. generated_paths: List[str] = []
  153. print(f"📁 检测到 output_demo_script_v2 格式,包含 {len(results)} 条结果")
  154. for idx, item in enumerate(results, start=1):
  155. script_data = item.get("script_result")
  156. if not isinstance(script_data, dict):
  157. print(f"⚠️ 跳过第 {idx} 条结果:缺少 script_result 字段或结构不正确")
  158. continue
  159. video_data = item.get("video_data") or {}
  160. video_id = video_data.get("video_id") or video_data.get("channel_content_id")
  161. # 用于 HTML 内部展示的"文件名"标签
  162. json_label = f"{self.json_file.name}#{idx}"
  163. # 生成输出文件名:{video_id}_v2.html
  164. if video_id:
  165. output_filename = f"{video_id}_v2.html"
  166. else:
  167. output_filename = f"{self.json_file.stem}_{idx}_v2.html"
  168. output_path = output_dir / output_filename
  169. html_content = self.generate_html(script_data, video_data, json_label)
  170. with open(output_path, "w", encoding="utf-8") as f:
  171. f.write(html_content)
  172. generated_paths.append(str(output_path))
  173. print(f"✅ HTML文件已生成: {output_path}")
  174. if not generated_paths:
  175. print("⚠️ 未能从 JSON 中生成任何 HTML 文件")
  176. return generated_paths
  177. def main():
  178. """主函数"""
  179. # 解析命令行参数
  180. parser = argparse.ArgumentParser(
  181. description='脚本结果可视化工具 V2 - 基于 output_demo_script_v2.json 为每个视频生成独立的HTML页面',
  182. formatter_class=argparse.RawDescriptionHelpFormatter,
  183. epilog="""
  184. 使用示例:
  185. # 在当前 examples 目录下使用默认的 output_demo_script_v2.json 并输出到 examples/html
  186. python visualize_script_results_v2.py
  187. # 指定 JSON 文件
  188. python visualize_script_results_v2.py examples/output_demo_script_v2.json
  189. # 指定 JSON 文件和输出目录
  190. python visualize_script_results_v2.py examples/output_demo_script_v2.json --output-dir examples/html
  191. """
  192. )
  193. parser.add_argument(
  194. 'json_file',
  195. type=str,
  196. nargs='?',
  197. help='JSON文件路径(默认为 examples/output_demo_script_v2.json)'
  198. )
  199. parser.add_argument(
  200. '-o', '--output-dir',
  201. type=str,
  202. default=None,
  203. help='输出目录路径(默认: examples/html)'
  204. )
  205. args = parser.parse_args()
  206. # 确定 JSON 文件路径
  207. if args.json_file:
  208. json_path = Path(args.json_file)
  209. if not json_path.is_absolute():
  210. json_path = Path.cwd() / json_path
  211. else:
  212. # 默认使用 examples/output_demo_script_v2.json
  213. json_path = Path(__file__).parent / "output_demo_script_v2.json"
  214. print("🚀 开始生成脚本结果可视化 V2...")
  215. print(f"📁 JSON文件: {json_path}")
  216. print(f"📄 输出目录: {args.output_dir or (Path(__file__).parent / 'html')}")
  217. print()
  218. visualizer = ScriptResultVisualizerV2(json_file=str(json_path))
  219. generated_files = visualizer.save_all_html(output_dir=args.output_dir)
  220. if generated_files:
  221. print()
  222. print(f"🎉 完成! 共生成 {len(generated_files)} 个HTML文件")
  223. # 提示其中一个示例文件
  224. print(f"📄 示例: 请在浏览器中打开: {generated_files[0]}")
  225. if __name__ == "__main__":
  226. main()