| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194 |
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- """
- 使用真实数据测试脚本正交分析Agent
- """
- import sys
- import os
- import json
- from pathlib import Path
- from dotenv import load_dotenv
- # 添加项目根目录到 Python 路径
- project_root = Path(__file__).parent.parent.parent
- sys.path.insert(0, str(project_root))
- # 加载环境变量
- load_dotenv(project_root / ".env")
- from src.components.agents.script_orthogonal_analysis_agent import ScriptOrthogonalAnalysisAgent
- def load_script_result(file_path: str) -> dict:
- """加载脚本解析结果文件
- Args:
- file_path: JSON文件路径
- Returns:
- 解析后的JSON数据
- """
- with open(file_path, 'r', encoding='utf-8') as f:
- return json.load(f)
- def test_with_real_data():
- """使用真实数据测试脚本正交分析Agent"""
- print("=" * 100)
- print("使用真实数据测试脚本正交分析Agent")
- print("=" * 100)
- # 加载真实数据
- script_result_path = project_root / "examples/阿里多多酱/output/script_result_20251118_152626.json"
- print(f"\n📁 加载脚本解析结果: {script_result_path}")
- script_result = load_script_result(str(script_result_path))
- # 提取所需数据
- topic_description = script_result.get("选题描述", {})
- content_weight = script_result.get("图文权重", {})
- script_understanding = script_result.get("脚本理解", {})
- # 构建state
- state = {
- "text": {
- "title": "当代年轻人对食物的双标日常",
- "body": "#讨好型水果[话题]#"
- },
- "images": script_understanding.get("图片列表", []),
- "topic_selection_understanding": topic_description,
- "content_weight": content_weight,
- "script_sections": {
- "内容品类": script_understanding.get("内容品类", ""),
- "段落列表": script_understanding.get("段落列表", [])
- },
- "script_elements": {
- "元素列表": script_understanding.get("元素列表", [])
- }
- }
- print(f"\n✓ 数据加载成功")
- print(f" - 选题主题: {topic_description.get('主题', 'N/A')}")
- print(f" - 内容品类: {script_understanding.get('内容品类', 'N/A')}")
- print(f" - 段落数量: {len(script_understanding.get('段落列表', []))}")
- print(f" - 元素数量: {len(script_understanding.get('元素列表', []))}")
- print(f" - 图片数量: {len(script_understanding.get('图片列表', []))}")
- # 初始化Agent
- print("\n🤖 初始化ScriptOrthogonalAnalysisAgent...")
- agent = ScriptOrthogonalAnalysisAgent()
- print(" ✓ Agent初始化成功")
- # 执行正交分析
- print("\n⚙️ 开始执行正交分析...")
- print("-" * 100)
- result = agent.process(state)
- print("-" * 100)
- # 输出结果
- print("\n" + "=" * 100)
- print("📊 正交分析结果")
- print("=" * 100)
- orthogonal_matrix = result.get("正交矩阵", [])
- element_type_list = result.get("元素类型列表", [])
- print(f"\n✓ 正交矩阵生成成功")
- print(f" - 段落行数: {len(orthogonal_matrix)}")
- print(f" - 元素类型列数: {len(element_type_list)}")
- print(f"\n📋 元素类型列表 (共{len(element_type_list)}个):")
- for idx, element_type in enumerate(element_type_list, 1):
- print(f" {idx}. {element_type}")
- print(f"\n📋 正交矩阵预览 (前3个段落):")
- for idx, row in enumerate(orthogonal_matrix[:3], 1):
- print(f"\n 段落 {idx}: {row['段落']}")
- print(f" 内容范围: {row['内容范围']}")
- print(f" 元素类型分析:")
- for element_type, analysis in row.get('元素类型分析', {}).items():
- if analysis: # 只显示非空的分析
- print(f" - {element_type}: {analysis[:80]}{'...' if len(analysis) > 80 else ''}")
- # 保存结果
- output_dir = project_root / "examples/阿里多多酱/output"
- output_path = output_dir / "orthogonal_analysis_result.json"
- print(f"\n💾 保存结果到: {output_path}")
- with open(output_path, 'w', encoding='utf-8') as f:
- json.dump(result, f, ensure_ascii=False, indent=2)
- print(f" ✓ 结果保存成功")
- # 生成表格形式的输出(Markdown格式)
- markdown_output_path = output_dir / "orthogonal_analysis_table.md"
- print(f"\n📝 生成Markdown表格: {markdown_output_path}")
- with open(markdown_output_path, 'w', encoding='utf-8') as f:
- f.write("# 脚本正交分析矩阵\n\n")
- f.write(f"**选题主题**: {topic_description.get('主题', 'N/A')}\n\n")
- f.write(f"**内容品类**: {script_understanding.get('内容品类', 'N/A')}\n\n")
- # 生成表格
- f.write("## 正交矩阵表格\n\n")
- # 表头
- header = "| 段落 |"
- separator = "|------|"
- for element_type in element_type_list:
- header += f" {element_type} |"
- separator += "------|"
- f.write(header + "\n")
- f.write(separator + "\n")
- # 表格内容
- for row in orthogonal_matrix:
- line = f"| {row['段落']} |"
- for element_type in element_type_list:
- analysis = row.get('元素类型分析', {}).get(element_type, "")
- # 处理换行和特殊字符
- analysis = analysis.replace("\n", " ").replace("|", "\\|")
- line += f" {analysis} |"
- f.write(line + "\n")
- # 添加详细说明
- f.write("\n## 段落内容范围详情\n\n")
- for idx, row in enumerate(orthogonal_matrix, 1):
- f.write(f"### {idx}. {row['段落']}\n\n")
- f.write(f"**内容范围**:\n")
- for content in row['内容范围']:
- f.write(f"- {content}\n")
- f.write("\n")
- print(f" ✓ Markdown表格生成成功")
- # 测试总结
- print("\n" + "=" * 100)
- print("✅ 测试完成总结")
- print("=" * 100)
- print(f" ✓ 成功加载真实数据文件")
- print(f" ✓ 成功提取 {len(orthogonal_matrix)} 个段落")
- print(f" ✓ 成功提取 {len(element_type_list)} 个元素类型")
- print(f" ✓ 成功生成正交分析矩阵")
- print(f" ✓ 结果已保存为JSON和Markdown格式")
- print(f"\n 📂 输出文件:")
- print(f" - JSON: {output_path}")
- print(f" - Markdown: {markdown_output_path}")
- print("\n" + "🎉" * 50)
- print("测试成功完成!")
- print("🎉" * 50 + "\n")
- return result
- if __name__ == "__main__":
- try:
- test_with_real_data()
- except Exception as e:
- print(f"\n❌ 测试失败: {e}\n")
- import traceback
- traceback.print_exc()
- exit(1)
|