| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193 |
- """
- 测试脚本:运行视频解构
- 功能:
- 1. 支持视频输入(视频详情.json)
- 2. 运行 WhatDeconstructionWorkflow(视频分析版本)
- """
- import json
- import sys
- import os
- import argparse
- from pathlib import Path
- from datetime import datetime
- # 添加项目根目录到路径
- project_root = Path(__file__).parent.parent
- sys.path.insert(0, str(project_root))
- # 手动加载.env文件
- def load_env_file(env_path):
- """手动加载.env文件"""
- if not env_path.exists():
- return False
- with open(env_path, 'r') as f:
- for line in f:
- line = line.strip()
- # 跳过注释和空行
- if not line or line.startswith('#'):
- continue
- # 解析KEY=VALUE
- if '=' in line:
- key, value = line.split('=', 1)
- os.environ[key.strip()] = value.strip()
- return True
- env_path = project_root / ".env"
- if load_env_file(env_path):
- print(f"✅ 已加载环境变量从: {env_path}")
- # 验证API密钥
- api_key = os.environ.get("GEMINI_API_KEY", "")
- if api_key:
- print(f" GEMINI_API_KEY: {api_key[:10]}...")
- else:
- print(f"⚠️ 未找到.env文件: {env_path}")
- from src.workflows.what_deconstruction_workflow import WhatDeconstructionWorkflow
- from src.utils.logger import get_logger
- logger = get_logger(__name__)
- def load_test_data(directory):
- """
- 加载测试数据(视频格式)
- Args:
- directory: 视频目录名(如"56898272")
- """
- # 加载视频详情文件
- video_data_path = Path(__file__).parent / directory / "视频详情.json"
- if not video_data_path.exists():
- raise FileNotFoundError(f"未找到视频详情文件:{video_data_path}\n请确保目录下存在'视频详情.json'文件")
-
- with open(video_data_path, "r", encoding="utf-8") as f:
- data = json.load(f)
-
- return data
- def convert_to_workflow_input(raw_data):
- """
- 将原始数据转换为工作流输入格式(视频分析版本)
- Args:
- raw_data: 原始帖子数据(视频格式)
- """
- # 视频分析版本:直接使用视频URL和文本信息
- input_data = {
- "video": raw_data.get("video", ""),
- "channel_content_id": raw_data.get("channel_content_id", ""),
- "title": raw_data.get("title", ""),
- "body_text": raw_data.get("body_text", ""),
- }
- return input_data
- def main():
- """主函数"""
- # 解析命令行参数
- parser = argparse.ArgumentParser(description='运行单个视频的What解构工作流(视频分析版本)')
- parser.add_argument('directory', type=str, help='视频目录名(如"56898272"),目录下需要有"视频详情.json"文件')
- args = parser.parse_args()
- directory = args.directory
- print("=" * 80)
- print(f"开始测试 What 解构工作流(视频分析版本)- 目录: {directory}")
- print("=" * 80)
- # 1. 加载测试数据(目标视频)
- print("\n[1] 加载测试数据(目标视频)...")
- try:
- raw_data = load_test_data(directory)
- print(f"✅ 成功加载测试数据")
- print(f" - 标题: {raw_data.get('title')}")
- print(f" - 内容类型: {raw_data.get('content_type', 'unknown')}")
- video_url = raw_data.get('video', '')
- if video_url:
- print(f" - 视频URL: {video_url[:50]}...")
- else:
- print(f" - 视频URL: 未提供")
- except Exception as e:
- print(f"❌ 加载测试数据失败: {e}")
- return
- # 2. 转换数据格式
- print("\n[2] 转换数据格式...")
- try:
- input_data = convert_to_workflow_input(raw_data)
- print(f"✅ 数据格式转换成功")
- print(f" - 视频URL: {input_data.get('video', '')[:50]}...")
- print(f" - 标题: {input_data.get('title', '')}")
- except Exception as e:
- print(f"❌ 数据格式转换失败: {e}")
- return
- # 3. 初始化工作流
- print("\n[3] 初始化工作流...")
- try:
- workflow = WhatDeconstructionWorkflow(
- model_provider="google_genai",
- max_depth=10
- )
- print(f"✅ 工作流初始化成功")
- except Exception as e:
- print(f"❌ 工作流初始化失败: {e}")
- import traceback
- traceback.print_exc()
- return
- # 4. 执行工作流
- print("\n[4] 执行工作流...")
- print(" 注意:这可能需要几分钟时间...")
- try:
- result = workflow.invoke(input_data)
- print(f"✅ 工作流执行成功")
- except Exception as e:
- print(f"❌ 工作流执行失败: {e}")
- import traceback
- traceback.print_exc()
- return
- # 5. 保存结果
- print("\n[5] 保存结果...")
- try:
- # 生成带时间戳的文件名
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
- output_filename = f"result_{timestamp}.json"
- output_path = Path(__file__).parent / directory / "output" / output_filename
- output_path.parent.mkdir(parents=True, exist_ok=True)
- with open(output_path, "w", encoding="utf-8") as f:
- json.dump(result, f, ensure_ascii=False, indent=2)
- print(f"✅ 结果已保存到: {output_path}")
- print(f" 文件名: {output_filename}")
- except Exception as e:
- print(f"❌ 保存结果失败: {e}")
- return
- # 6. 显示结果摘要
- print("\n" + "=" * 80)
- print("结果摘要")
- print("=" * 80)
- if result:
- # 1. 视频信息
- topic_selection_v2 = result.get("topic_selection_v2", {})
- if topic_selection_v2:
- print(f"✅ 选题结构分析 V2 完成")
- print(f" - 选题结构: {topic_selection_v2}")
- else:
- print(f"❌ 选题结构分析 V2 失败")
- return
-
- if __name__ == "__main__":
- main()
|