weapp
/
video_decode


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429
							#!/usr/bin/env python3
"""
脚本结果可视化工具
功能：为每个script_result_XXX.json文件生成独立的HTML可视化页面，包含三个Tab切换视图
"""

import json
import argparse
import sys
from pathlib import Path
from datetime import datetime
from typing import List, Dict, Any, Optional
import re
import html as html_module

# 保证可以从项目根目录导入 static 包
PROJECT_ROOT = Path(__file__).parent.parent
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

# 导入拆分后的tab模块
from static.visualize.tab1 import generate_tab1_content
from static.visualize.tab2 import generate_tab2_content
from static.visualize.tab3 import generate_tab3_content
from static.visualize.tab5 import generate_tab5_content


class ScriptResultVisualizer:
    """脚本结果可视化器"""

    def __init__(self, json_file: str = None):
        """
        初始化可视化器

        Args:
            json_file: JSON文件路径
        """
        if json_file is None:
            self.json_file = None
        else:
            self.json_file = Path(json_file)
            if not self.json_file.is_absolute():
                self.json_file = Path.cwd() / json_file

    def load_json_data(self, file_path: Path) -> Optional[Dict[str, Any]]:
        """
        加载JSON文件

        Args:
            file_path: JSON文件路径

        Returns:
            JSON数据字典，加载失败返回None
        """
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                return json.load(f)
        except Exception as e:
            print(f"加载文件失败 {file_path}: {e}")
            return None

    def generate_tab1_content(self, data: Dict[str, Any]) -> str:
        """生成Tab1内容：选题、灵感点、目的点、关键点"""
        return generate_tab1_content(data)

    def generate_tab2_content(self, data: Dict[str, Any]) -> str:
        """生成Tab2内容：段落"""
        return generate_tab2_content(data)

    def generate_tab3_content(self, data: Dict[str, Any]) -> str:
        """生成Tab3内容：按层次展示（实质/形式 → 具体元素/具体概念/抽象概念 → 树形展示）"""
        return generate_tab3_content(data)

    def generate_tab5_content(self, data: Dict[str, Any]) -> str:
        """生成tab5内容：实质点与灵感点、目的点、关键点的关系连线图"""
        return generate_tab5_content(data)

    def build_element_index(self, data: Dict[str, Any]) -> Dict[str, Any]:
        """
        构建全局元素索引（包含实质列表和形式列表）

        Args:
            data: JSON数据

        Returns:
            元素索引字典 {element_id: element_info}
        """
        element_index = {}

        if '脚本理解' not in data:
            return element_index

        script = data['脚本理解']

        # 处理实质列表
        substance_list = script.get('实质列表', [])
        for elem in substance_list:
            elem_id = str(elem.get('id', ''))
            if elem_id:
                element_index[elem_id] = {
                    'id': elem_id,
                    'name': elem.get('名称', ''),
                    'description': elem.get('描述', ''),
                    'type': '实质',
                    'dimension': elem.get('维度', {}),
                    'category': elem.get('分类', {}),
                    'full_data': elem
                }

        # 处理形式列表
        form_list = script.get('形式列表', [])
        for elem in form_list:
            elem_id = str(elem.get('id', ''))
            if elem_id:
                element_index[elem_id] = {
                    'id': elem_id,
                    'name': elem.get('名称', ''),
                    'description': elem.get('描述', ''),
                    'type': '形式',
                    'dimension': elem.get('维度', {}),
                    'category': elem.get('分类', {}),
                    'full_data': elem
                }

        return element_index

    def highlight_element_references(self, text: str, element_index: Dict[str, Any]) -> str:
        """
        在文本中标记元素引用,使其可点击查看详情

        Args:
            text: 待处理的文本
            element_index: 全局元素索引

        Returns:
            处理后的HTML文本
        """
        if not text or not element_index:
            return html_module.escape(str(text))

        result = html_module.escape(str(text))

        # 按元素ID长度降序排序,避免短ID覆盖长ID (如 "1" 和 "10")
        sorted_ids = sorted(element_index.keys(), key=lambda x: len(x), reverse=True)

        for elem_id in sorted_ids:
            elem = element_index[elem_id]
            elem_name = elem.get('name', '')

            # 匹配 #ID 格式 (如 "#24")
            pattern_id = f'#{elem_id}\\b'
            replacement_id = f'<span class="element-ref clickable" onclick="showElementDetail(\'{elem_id}\')" title="{html_module.escape(elem_name)}">#{elem_id}</span>'
            result = re.sub(pattern_id, replacement_id, result)

            # 匹配元素名称 (完整词匹配)
            if elem_name:
                pattern_name = f'\\b{re.escape(elem_name)}\\b'
                replacement_name = f'<span class="element-ref clickable" onclick="showElementDetail(\'{elem_id}\')" title="#{elem_id}">{elem_name}</span>'
                result = re.sub(pattern_name, replacement_name, result)

        return result

    def format_element_id_list(self, id_list, element_index: Dict[str, Any]) -> str:
        """
        将元素ID列表格式化为可点击的HTML标签

        Args:
            id_list: 元素ID列表或单个ID
            element_index: 全局元素索引

        Returns:
            HTML字符串
        """
        if not id_list:
            return ''

        html = '<div class="detail-content">\n'

        # 处理单个ID或列表
        ids = [id_list] if not isinstance(id_list, list) else id_list

        for elem_id in ids:
            elem_id_str = str(elem_id)
            if elem_id_str in element_index:
                elem = element_index[elem_id_str]
                elem_name = elem.get('name', '')
                html += f'<span class="element-ref-tag clickable" onclick="showElementDetail(\'{elem_id_str}\')" title="{html_module.escape(elem_name)}">#{elem_id_str}</span>\n'
            else:
                html += f'<span class="detail-tag">#{elem_id_str}</span>\n'

        html += '</div>\n'
        return html

    def generate_html(self, data: Dict[str, Any], json_filename: str) -> str:
        """生成完整的HTML页面"""
        # 构建全局元素索引
        element_index = self.build_element_index(data)

        # 开始构建HTML
        html = '<!DOCTYPE html>\n'
        html += '<html lang="zh-CN">\n'
        html += '<head>\n'
        html += '    <meta charset="UTF-8">\n'
        html += '    <meta name="viewport" content="width=device-width, initial-scale=1.0">\n'
        html += f'    <title>脚本结果可视化 - {json_filename}</title>\n'
        html += '    <link rel="stylesheet" href="visualize/style.css">\n'
        html += '</head>\n'
        html += '<body>\n'

        html += '<div class="container">\n'

        # 页眉
        html += '<div class="header">\n'
        html += '    <h1>脚本结果可视化</h1>\n'

        # 显示选题主题
        if '选题描述' in data and '主题' in data['选题描述']:
            html += f'    <div class="subtitle">{html_module.escape(data["选题描述"]["主题"])}</div>\n'

        html += f'    <div class="subtitle">{json_filename}</div>\n'
        html += f'    <div class="subtitle">生成时间: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}</div>\n'
        html += '</div>\n'

        # Tab导航
        html += '<div class="tabs">\n'
        html += '    <button class="tab active" onclick="switchTab(\'tab1\')">选题点</button>\n'
        html += '    <button class="tab" onclick="switchTab(\'tab2\')">段落</button>\n'
        html += '    <button class="tab" onclick="switchTab(\'tab3\')">脚本点</button>\n'
        html += '    <button class="tab" onclick="switchTab(\'tab5\')">关系图</button>\n'
        html += '</div>\n'

        # 主内容
        html += '<div class="content">\n'

        # Tab1内容
        html += self.generate_tab1_content(data)

        # Tab2内容
        html += self.generate_tab2_content(data)

        # Tab3内容
        html += self.generate_tab3_content(data)

        # tab5内容
        html += self.generate_tab5_content(data)

        html += '</div>\n'

        # 页脚
        html += '<div class="footer">\n'
        html += f'    <p>生成时间: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}</p>\n'
        html += '</div>\n'

        html += '</div>\n'

        # JavaScript (传递元素索引)
        html += '<script>\n'
        html += f'const elementIndex = {json.dumps(element_index, ensure_ascii=False)};\n'
        html += '</script>\n'
        html += '<script src="visualize/script.js"></script>\n'

        html += '</body>\n'
        html += '</html>\n'

        return html

    def save_all_html(self, output_dir: str | Path | None = None) -> List[str]:
        """
        基于 output_demo_script.json，为其中每个视频生成一个独立的 HTML 页面。

        仅支持这种结构：
        {
          "results": [
            {
              "video_data": {...},
              "script_result": {...}
            },
            ...
          ]
        }
        """
        if self.json_file is None:
            print("❌ 错误: 未指定JSON文件")
            return []

        # 加载JSON数据
        data = self.load_json_data(self.json_file)
        if data is None:
            return []

        results = data.get("results") or []
        if not isinstance(results, list) or not results:
            print("⚠️  JSON 中未找到有效的 results 数组，期望为 output_demo_script.json 结构")
            return []

        # 确定输出目录
        if output_dir is None:
            # 默认输出到examples/html目录
            output_dir = Path(__file__).parent / "html"
        else:
            output_dir = Path(output_dir)
            if not output_dir.is_absolute():
                output_dir = Path.cwd() / output_dir

        # 创建输出目录
        output_dir.mkdir(parents=True, exist_ok=True)

        # 确保样式和脚本文件可用：从 static/visualize 拷贝到 输出目录/visualize
        static_visualize_dir = PROJECT_ROOT / "static" / "visualize"
        target_visualize_dir = output_dir / "visualize"
        if static_visualize_dir.exists() and static_visualize_dir.is_dir():
            import shutil
            target_visualize_dir.mkdir(parents=True, exist_ok=True)
            for item in static_visualize_dir.iterdir():
                dst = target_visualize_dir / item.name
                if item.is_file():
                    shutil.copy2(item, dst)

        generated_paths: List[str] = []

        print(f"📁 检测到 output_demo_script 格式，包含 {len(results)} 条结果")

        for idx, item in enumerate(results, start=1):
            script_data = item.get("script_result")
            if not isinstance(script_data, dict):
                print(f"⚠️  跳过第 {idx} 条结果：缺少 script_result 字段或结构不正确")
                continue

            # 从 what_deconstruction_result 中获取三点解构数据并合并到 script_data
            what_result = item.get("what_deconstruction_result", {})
            if isinstance(what_result, dict) and "三点解构" in what_result:
                deconstruction = what_result["三点解构"]
                # 将三点解构数据合并到 script_data 顶层，供 tab1 使用
                if "灵感点" in deconstruction:
                    script_data["灵感点"] = deconstruction["灵感点"]
                if "目的点" in deconstruction:
                    script_data["目的点"] = deconstruction["目的点"]
                if "关键点" in deconstruction:
                    script_data["关键点"] = deconstruction["关键点"]

            video_data = item.get("video_data") or {}
            video_id = video_data.get("video_id") or video_data.get("channel_content_id")  # 兼容旧字段名

            # 用于 HTML 内部展示的"文件名"标签
            json_label = f"{self.json_file.name}#{idx}"

            # 生成输出文件名（优先使用 video_id，回退到序号）
            if video_id:
                output_filename = f"script_result_{video_id}.html"
            else:
                output_filename = f"{self.json_file.stem}_{idx}.html"

            output_path = output_dir / output_filename

            html_content = self.generate_html(script_data, json_label)

            with open(output_path, "w", encoding="utf-8") as f:
                f.write(html_content)

            generated_paths.append(str(output_path))
            print(f"✅ HTML文件已生成: {output_path}")

        if not generated_paths:
            print("⚠️  未能从 JSON 中生成任何 HTML 文件")

        return generated_paths


def main():
    """主函数"""
    # 解析命令行参数
    parser = argparse.ArgumentParser(
        description='脚本结果可视化工具 - 基于 output_demo_script.json 为每个视频生成独立的HTML页面',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
使用示例:
  # 在当前 examples 目录下使用默认的 output_demo_script.json 并输出到 examples/html
  python visualize_script_results.py

  # 指定 JSON 文件
  python visualize_script_results.py examples/output_demo_script.json

  # 指定 JSON 文件和输出目录
  python visualize_script_results.py examples/output_demo_script.json --output-dir examples/html_script
        """
    )

    parser.add_argument(
        'json_file',
        type=str,
        nargs='?',
        help='JSON文件路径（默认为 examples/output_demo_script.json）'
    )

    parser.add_argument(
        '-o', '--output-dir',
        type=str,
        default=None,
        help='输出目录路径（默认: examples/html）'
    )

    args = parser.parse_args()

    # 确定 JSON 文件路径
    if args.json_file:
        json_path = Path(args.json_file)
        if not json_path.is_absolute():
            json_path = Path.cwd() / json_path
    else:
        # 默认使用 examples/output_demo_script.json
        json_path = Path(__file__).parent / "output_decode_result.json"

    print("🚀 开始生成脚本结果可视化...")
    print(f"📁 JSON文件: {json_path}")
    print(f"📄 输出目录: {args.output_dir or (Path(__file__).parent / 'html')}")
    print()

    visualizer = ScriptResultVisualizer(json_file=str(json_path))
    generated_files = visualizer.save_all_html(output_dir=args.output_dir)

    if generated_files:
        print()
        print(f"🎉 完成! 共生成 {len(generated_files)} 个HTML文件")
        # 提示其中一个示例文件
        print(f"📄 示例: 请在浏览器中打开: {generated_files[0]}")


if __name__ == "__main__":
    main()