""" 集成测试 4 - 复杂文档生成任务 测试场景:复杂的技术文档生成,需要多步骤、信息收集和质量验证 目标:验证 Agent 在复杂任务中是否会使用 goal 和 subagent 工具 任务特点: - 需要先读取参考文档 - 需要生成 5 个不同的文档 - 需要理解技术规范并应用 - 需要创建图表(Mermaid) - 需要保证文档质量和一致性 """ import os import sys import asyncio from pathlib import Path # 添加项目根目录到 Python 路径 sys.path.insert(0, str(Path(__file__).parent.parent.parent)) from dotenv import load_dotenv load_dotenv() from agent.llm.prompts import SimplePrompt from agent.core.runner import AgentRunner from agent.execution import FileSystemTraceStore, Trace, Message from agent.llm import create_openrouter_llm_call async def main(): # 路径配置 base_dir = Path(__file__).parent project_root = base_dir.parent.parent prompt_path = base_dir / "task.prompt" output_dir = base_dir / "output" print("=" * 80) print("集成测试 4 - 复杂文档生成:项目管理工具技术文档") print("=" * 80) print() # 1. 加载 prompt print("1. 加载任务...") prompt = SimplePrompt(prompt_path) system_prompt = prompt._messages.get("system", "") user_prompt = prompt._messages.get("user", "") print(f" ✓ 任务类型: 复杂文档生成") print(f" ✓ 需要生成 5 个文档") print(f" ✓ 需要读取参考文档") print(f" ✓ 无工具提示,无步骤提示") print() # 2. 创建 Agent Runner print("2. 创建 Agent Runner...") print(f" - 模型: Claude Sonnet 4.5") print() runner = AgentRunner( trace_store=FileSystemTraceStore(base_path=".trace"), llm_call=create_openrouter_llm_call(model="anthropic/claude-sonnet-4.5"), skills_dir=str(project_root / "agent" / "skills"), debug=False ) # 3. 运行 Agent print("3. 启动 Agent...") print("=" * 80) print() current_trace_id = None goal_used = False subagent_used = False evaluate_used = False delegate_used = False iteration_count = 0 tool_calls_count = {} async for item in runner.run( task=user_prompt, system_prompt=system_prompt, model="anthropic/claude-sonnet-4.5", temperature=0.5, max_iterations=50, ): # 处理 Trace 对象 if isinstance(item, Trace): current_trace_id = item.trace_id if item.status == "running": print(f"[Trace] 开始: {item.trace_id[:8]}...") elif item.status == "completed": print() print("=" * 80) print(f"[Trace] 完成") print(f" - 总消息数: {item.total_messages}") print(f" - 总 Token 数: {item.total_tokens}") print(f" - 总成本: ${item.total_cost:.4f}") print("=" * 80) elif item.status == "failed": print() print(f"[Trace] 失败: {item.error}") # 处理 Message 对象 elif isinstance(item, Message): if item.role == "assistant": iteration_count += 1 content = item.content if isinstance(content, dict): text = content.get("text", "") tool_calls = content.get("tool_calls") # 显示 Agent 的思考 if text and not tool_calls: print(f"\n[{iteration_count}] Agent 回复:") print(f" {text[:200]}{'...' if len(text) > 200 else ''}") elif text: print(f"\n[{iteration_count}] Agent 思考:") print(f" {text[:150]}{'...' if len(text) > 150 else ''}") # 显示工具调用 if tool_calls: for tc in tool_calls: tool_name = tc.get("function", {}).get("name", "unknown") args = tc.get("function", {}).get("arguments", {}) # 如果 args 是字符串,尝试解析为 JSON if isinstance(args, str): import json try: args = json.loads(args) except: args = {} # 统计工具使用 tool_calls_count[tool_name] = tool_calls_count.get(tool_name, 0) + 1 # 检测关键工具使用 if tool_name == "goal": goal_used = True if isinstance(args, dict): if args.get("add"): print(f" → goal(add): {args['add'][:80]}...") elif args.get("done"): print(f" → goal(done): {args['done'][:80]}...") elif args.get("focus"): print(f" → goal(focus): {args['focus']}") else: print(f" → goal(...)") elif tool_name == "subagent": subagent_used = True if isinstance(args, dict): mode = args.get("mode", "unknown") if mode == "evaluate": evaluate_used = True target = args.get("target_goal_id", "?") print(f" → subagent(evaluate): 评估目标 {target}") elif mode == "delegate": delegate_used = True task = args.get("task", "") print(f" → subagent(delegate): {task[:60]}...") else: print(f" → subagent({mode})") else: print(f" → subagent(...)") else: # 其他工具简化显示 if tool_name in ["read_file", "write_file", "edit_file"]: if isinstance(args, dict): file_path = args.get("file_path", "") if file_path: file_name = Path(file_path).name print(f" → {tool_name}: {file_name}") else: print(f" → {tool_name}") else: print(f" → {tool_name}") elif tool_name == "bash_command": if isinstance(args, dict): cmd = args.get("command", "") print(f" → bash: {cmd[:60]}...") else: print(f" → bash") else: print(f" → {tool_name}") # 4. 测试结果总结 print() print("=" * 80) print("测试结果总结") print("=" * 80) print() print("功能使用情况:") print(f" {'✓' if goal_used else '✗'} Goal 工具: {'已使用' if goal_used else '未使用'}") print(f" {'✓' if subagent_used else '✗'} SubAgent 工具: {'已使用' if subagent_used else '未使用'}") if subagent_used: print(f" - Evaluate 模式: {'已使用' if evaluate_used else '未使用'}") print(f" - Delegate 模式: {'已使用' if delegate_used else '未使用'}") print() print("工具调用统计:") for tool_name, count in sorted(tool_calls_count.items()): print(f" - {tool_name}: {count} 次") print() print(f"总迭代次数: {iteration_count}") print() # 5. 验证结果 print("验证生成的文档:") expected_docs = [ "系统架构设计", "数据库设计", "API接口", "前端组件", "部署运维" ] if output_dir.exists(): files = list(output_dir.glob("*.md")) if files: for file in files: size = file.stat().st_size print(f" ✓ {file.name} ({size} bytes)") print(f"\n 总计: {len(files)} 个文档") else: print(f" ✗ 输出目录为空") else: print(f" ✗ 输出目录不存在") print() print("=" * 80) print("集成测试 4 完成") print("=" * 80) if __name__ == "__main__": asyncio.run(main())