visualize_script_results.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429
  1. #!/usr/bin/env python3
  2. """
  3. 脚本结果可视化工具
  4. 功能:为每个script_result_XXX.json文件生成独立的HTML可视化页面,包含三个Tab切换视图
  5. """
  6. import json
  7. import argparse
  8. import sys
  9. from pathlib import Path
  10. from datetime import datetime
  11. from typing import List, Dict, Any, Optional
  12. import re
  13. import html as html_module
  14. # 保证可以从项目根目录导入 static 包
  15. PROJECT_ROOT = Path(__file__).parent.parent
  16. if str(PROJECT_ROOT) not in sys.path:
  17. sys.path.insert(0, str(PROJECT_ROOT))
  18. # 导入拆分后的tab模块
  19. from static.visualize.tab1 import generate_tab1_content
  20. from static.visualize.tab2 import generate_tab2_content
  21. from static.visualize.tab3 import generate_tab3_content
  22. from static.visualize.tab5 import generate_tab5_content
  23. class ScriptResultVisualizer:
  24. """脚本结果可视化器"""
  25. def __init__(self, json_file: str = None):
  26. """
  27. 初始化可视化器
  28. Args:
  29. json_file: JSON文件路径
  30. """
  31. if json_file is None:
  32. self.json_file = None
  33. else:
  34. self.json_file = Path(json_file)
  35. if not self.json_file.is_absolute():
  36. self.json_file = Path.cwd() / json_file
  37. def load_json_data(self, file_path: Path) -> Optional[Dict[str, Any]]:
  38. """
  39. 加载JSON文件
  40. Args:
  41. file_path: JSON文件路径
  42. Returns:
  43. JSON数据字典,加载失败返回None
  44. """
  45. try:
  46. with open(file_path, 'r', encoding='utf-8') as f:
  47. return json.load(f)
  48. except Exception as e:
  49. print(f"加载文件失败 {file_path}: {e}")
  50. return None
  51. def generate_tab1_content(self, data: Dict[str, Any]) -> str:
  52. """生成Tab1内容:选题、灵感点、目的点、关键点"""
  53. return generate_tab1_content(data)
  54. def generate_tab2_content(self, data: Dict[str, Any]) -> str:
  55. """生成Tab2内容:段落"""
  56. return generate_tab2_content(data)
  57. def generate_tab3_content(self, data: Dict[str, Any]) -> str:
  58. """生成Tab3内容:按层次展示(实质/形式 → 具体元素/具体概念/抽象概念 → 树形展示)"""
  59. return generate_tab3_content(data)
  60. def generate_tab5_content(self, data: Dict[str, Any]) -> str:
  61. """生成tab5内容:实质点与灵感点、目的点、关键点的关系连线图"""
  62. return generate_tab5_content(data)
  63. def build_element_index(self, data: Dict[str, Any]) -> Dict[str, Any]:
  64. """
  65. 构建全局元素索引(包含实质列表和形式列表)
  66. Args:
  67. data: JSON数据
  68. Returns:
  69. 元素索引字典 {element_id: element_info}
  70. """
  71. element_index = {}
  72. if '脚本理解' not in data:
  73. return element_index
  74. script = data['脚本理解']
  75. # 处理实质列表
  76. substance_list = script.get('实质列表', [])
  77. for elem in substance_list:
  78. elem_id = str(elem.get('id', ''))
  79. if elem_id:
  80. element_index[elem_id] = {
  81. 'id': elem_id,
  82. 'name': elem.get('名称', ''),
  83. 'description': elem.get('描述', ''),
  84. 'type': '实质',
  85. 'dimension': elem.get('维度', {}),
  86. 'category': elem.get('分类', {}),
  87. 'full_data': elem
  88. }
  89. # 处理形式列表
  90. form_list = script.get('形式列表', [])
  91. for elem in form_list:
  92. elem_id = str(elem.get('id', ''))
  93. if elem_id:
  94. element_index[elem_id] = {
  95. 'id': elem_id,
  96. 'name': elem.get('名称', ''),
  97. 'description': elem.get('描述', ''),
  98. 'type': '形式',
  99. 'dimension': elem.get('维度', {}),
  100. 'category': elem.get('分类', {}),
  101. 'full_data': elem
  102. }
  103. return element_index
  104. def highlight_element_references(self, text: str, element_index: Dict[str, Any]) -> str:
  105. """
  106. 在文本中标记元素引用,使其可点击查看详情
  107. Args:
  108. text: 待处理的文本
  109. element_index: 全局元素索引
  110. Returns:
  111. 处理后的HTML文本
  112. """
  113. if not text or not element_index:
  114. return html_module.escape(str(text))
  115. result = html_module.escape(str(text))
  116. # 按元素ID长度降序排序,避免短ID覆盖长ID (如 "1" 和 "10")
  117. sorted_ids = sorted(element_index.keys(), key=lambda x: len(x), reverse=True)
  118. for elem_id in sorted_ids:
  119. elem = element_index[elem_id]
  120. elem_name = elem.get('name', '')
  121. # 匹配 #ID 格式 (如 "#24")
  122. pattern_id = f'#{elem_id}\\b'
  123. replacement_id = f'<span class="element-ref clickable" onclick="showElementDetail(\'{elem_id}\')" title="{html_module.escape(elem_name)}">#{elem_id}</span>'
  124. result = re.sub(pattern_id, replacement_id, result)
  125. # 匹配元素名称 (完整词匹配)
  126. if elem_name:
  127. pattern_name = f'\\b{re.escape(elem_name)}\\b'
  128. replacement_name = f'<span class="element-ref clickable" onclick="showElementDetail(\'{elem_id}\')" title="#{elem_id}">{elem_name}</span>'
  129. result = re.sub(pattern_name, replacement_name, result)
  130. return result
  131. def format_element_id_list(self, id_list, element_index: Dict[str, Any]) -> str:
  132. """
  133. 将元素ID列表格式化为可点击的HTML标签
  134. Args:
  135. id_list: 元素ID列表或单个ID
  136. element_index: 全局元素索引
  137. Returns:
  138. HTML字符串
  139. """
  140. if not id_list:
  141. return ''
  142. html = '<div class="detail-content">\n'
  143. # 处理单个ID或列表
  144. ids = [id_list] if not isinstance(id_list, list) else id_list
  145. for elem_id in ids:
  146. elem_id_str = str(elem_id)
  147. if elem_id_str in element_index:
  148. elem = element_index[elem_id_str]
  149. elem_name = elem.get('name', '')
  150. html += f'<span class="element-ref-tag clickable" onclick="showElementDetail(\'{elem_id_str}\')" title="{html_module.escape(elem_name)}">#{elem_id_str}</span>\n'
  151. else:
  152. html += f'<span class="detail-tag">#{elem_id_str}</span>\n'
  153. html += '</div>\n'
  154. return html
  155. def generate_html(self, data: Dict[str, Any], json_filename: str) -> str:
  156. """生成完整的HTML页面"""
  157. # 构建全局元素索引
  158. element_index = self.build_element_index(data)
  159. # 开始构建HTML
  160. html = '<!DOCTYPE html>\n'
  161. html += '<html lang="zh-CN">\n'
  162. html += '<head>\n'
  163. html += ' <meta charset="UTF-8">\n'
  164. html += ' <meta name="viewport" content="width=device-width, initial-scale=1.0">\n'
  165. html += f' <title>脚本结果可视化 - {json_filename}</title>\n'
  166. html += ' <link rel="stylesheet" href="visualize/style.css">\n'
  167. html += '</head>\n'
  168. html += '<body>\n'
  169. html += '<div class="container">\n'
  170. # 页眉
  171. html += '<div class="header">\n'
  172. html += ' <h1>脚本结果可视化</h1>\n'
  173. # 显示选题主题
  174. if '选题描述' in data and '主题' in data['选题描述']:
  175. html += f' <div class="subtitle">{html_module.escape(data["选题描述"]["主题"])}</div>\n'
  176. html += f' <div class="subtitle">{json_filename}</div>\n'
  177. html += f' <div class="subtitle">生成时间: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}</div>\n'
  178. html += '</div>\n'
  179. # Tab导航
  180. html += '<div class="tabs">\n'
  181. html += ' <button class="tab active" onclick="switchTab(\'tab1\')">选题点</button>\n'
  182. html += ' <button class="tab" onclick="switchTab(\'tab2\')">段落</button>\n'
  183. html += ' <button class="tab" onclick="switchTab(\'tab3\')">脚本点</button>\n'
  184. html += ' <button class="tab" onclick="switchTab(\'tab5\')">关系图</button>\n'
  185. html += '</div>\n'
  186. # 主内容
  187. html += '<div class="content">\n'
  188. # Tab1内容
  189. html += self.generate_tab1_content(data)
  190. # Tab2内容
  191. html += self.generate_tab2_content(data)
  192. # Tab3内容
  193. html += self.generate_tab3_content(data)
  194. # tab5内容
  195. html += self.generate_tab5_content(data)
  196. html += '</div>\n'
  197. # 页脚
  198. html += '<div class="footer">\n'
  199. html += f' <p>生成时间: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}</p>\n'
  200. html += '</div>\n'
  201. html += '</div>\n'
  202. # JavaScript (传递元素索引)
  203. html += '<script>\n'
  204. html += f'const elementIndex = {json.dumps(element_index, ensure_ascii=False)};\n'
  205. html += '</script>\n'
  206. html += '<script src="visualize/script.js"></script>\n'
  207. html += '</body>\n'
  208. html += '</html>\n'
  209. return html
  210. def save_all_html(self, output_dir: str | Path | None = None) -> List[str]:
  211. """
  212. 基于 output_demo_script.json,为其中每个视频生成一个独立的 HTML 页面。
  213. 仅支持这种结构:
  214. {
  215. "results": [
  216. {
  217. "video_data": {...},
  218. "script_result": {...}
  219. },
  220. ...
  221. ]
  222. }
  223. """
  224. if self.json_file is None:
  225. print("❌ 错误: 未指定JSON文件")
  226. return []
  227. # 加载JSON数据
  228. data = self.load_json_data(self.json_file)
  229. if data is None:
  230. return []
  231. results = data.get("results") or []
  232. if not isinstance(results, list) or not results:
  233. print("⚠️ JSON 中未找到有效的 results 数组,期望为 output_demo_script.json 结构")
  234. return []
  235. # 确定输出目录
  236. if output_dir is None:
  237. # 默认输出到examples/html目录
  238. output_dir = Path(__file__).parent / "html"
  239. else:
  240. output_dir = Path(output_dir)
  241. if not output_dir.is_absolute():
  242. output_dir = Path.cwd() / output_dir
  243. # 创建输出目录
  244. output_dir.mkdir(parents=True, exist_ok=True)
  245. # 确保样式和脚本文件可用:从 static/visualize 拷贝到 输出目录/visualize
  246. static_visualize_dir = PROJECT_ROOT / "static" / "visualize"
  247. target_visualize_dir = output_dir / "visualize"
  248. if static_visualize_dir.exists() and static_visualize_dir.is_dir():
  249. import shutil
  250. target_visualize_dir.mkdir(parents=True, exist_ok=True)
  251. for item in static_visualize_dir.iterdir():
  252. dst = target_visualize_dir / item.name
  253. if item.is_file():
  254. shutil.copy2(item, dst)
  255. generated_paths: List[str] = []
  256. print(f"📁 检测到 output_demo_script 格式,包含 {len(results)} 条结果")
  257. for idx, item in enumerate(results, start=1):
  258. script_data = item.get("script_result")
  259. if not isinstance(script_data, dict):
  260. print(f"⚠️ 跳过第 {idx} 条结果:缺少 script_result 字段或结构不正确")
  261. continue
  262. # 从 what_deconstruction_result 中获取三点解构数据并合并到 script_data
  263. what_result = item.get("what_deconstruction_result", {})
  264. if isinstance(what_result, dict) and "三点解构" in what_result:
  265. deconstruction = what_result["三点解构"]
  266. # 将三点解构数据合并到 script_data 顶层,供 tab1 使用
  267. if "灵感点" in deconstruction:
  268. script_data["灵感点"] = deconstruction["灵感点"]
  269. if "目的点" in deconstruction:
  270. script_data["目的点"] = deconstruction["目的点"]
  271. if "关键点" in deconstruction:
  272. script_data["关键点"] = deconstruction["关键点"]
  273. video_data = item.get("video_data") or {}
  274. video_id = video_data.get("video_id") or video_data.get("channel_content_id") # 兼容旧字段名
  275. # 用于 HTML 内部展示的"文件名"标签
  276. json_label = f"{self.json_file.name}#{idx}"
  277. # 生成输出文件名(优先使用 video_id,回退到序号)
  278. if video_id:
  279. output_filename = f"script_result_{video_id}.html"
  280. else:
  281. output_filename = f"{self.json_file.stem}_{idx}.html"
  282. output_path = output_dir / output_filename
  283. html_content = self.generate_html(script_data, json_label)
  284. with open(output_path, "w", encoding="utf-8") as f:
  285. f.write(html_content)
  286. generated_paths.append(str(output_path))
  287. print(f"✅ HTML文件已生成: {output_path}")
  288. if not generated_paths:
  289. print("⚠️ 未能从 JSON 中生成任何 HTML 文件")
  290. return generated_paths
  291. def main():
  292. """主函数"""
  293. # 解析命令行参数
  294. parser = argparse.ArgumentParser(
  295. description='脚本结果可视化工具 - 基于 output_demo_script.json 为每个视频生成独立的HTML页面',
  296. formatter_class=argparse.RawDescriptionHelpFormatter,
  297. epilog="""
  298. 使用示例:
  299. # 在当前 examples 目录下使用默认的 output_demo_script.json 并输出到 examples/html
  300. python visualize_script_results.py
  301. # 指定 JSON 文件
  302. python visualize_script_results.py examples/output_demo_script.json
  303. # 指定 JSON 文件和输出目录
  304. python visualize_script_results.py examples/output_demo_script.json --output-dir examples/html_script
  305. """
  306. )
  307. parser.add_argument(
  308. 'json_file',
  309. type=str,
  310. nargs='?',
  311. help='JSON文件路径(默认为 examples/output_demo_script.json)'
  312. )
  313. parser.add_argument(
  314. '-o', '--output-dir',
  315. type=str,
  316. default=None,
  317. help='输出目录路径(默认: examples/html)'
  318. )
  319. args = parser.parse_args()
  320. # 确定 JSON 文件路径
  321. if args.json_file:
  322. json_path = Path(args.json_file)
  323. if not json_path.is_absolute():
  324. json_path = Path.cwd() / json_path
  325. else:
  326. # 默认使用 examples/output_demo_script.json
  327. json_path = Path(__file__).parent / "output_decode_result.json"
  328. print("🚀 开始生成脚本结果可视化...")
  329. print(f"📁 JSON文件: {json_path}")
  330. print(f"📄 输出目录: {args.output_dir or (Path(__file__).parent / 'html')}")
  331. print()
  332. visualizer = ScriptResultVisualizer(json_file=str(json_path))
  333. generated_files = visualizer.save_all_html(output_dir=args.output_dir)
  334. if generated_files:
  335. print()
  336. print(f"🎉 完成! 共生成 {len(generated_files)} 个HTML文件")
  337. # 提示其中一个示例文件
  338. print(f"📄 示例: 请在浏览器中打开: {generated_files[0]}")
  339. if __name__ == "__main__":
  340. main()