""" 集成测试 3 - 内容生成任务 测试场景:真实的内容创作任务,完全不提示工具和步骤 目标:验证 Agent 在真实使用场景中的自主能力 任务类型:内容生成(咖啡店品牌文案) - 不提示使用任何工具 - 不提示任何步骤 - 只给任务目标和要求 - 模拟真实用户使用场景 """ import os import sys import asyncio from pathlib import Path # 添加项目根目录到 Python 路径 sys.path.insert(0, str(Path(__file__).parent.parent.parent)) from dotenv import load_dotenv load_dotenv() from agent.llm.prompts import SimplePrompt from agent.core.runner import AgentRunner from agent.execution import FileSystemTraceStore, Trace, Message from agent.llm import create_openrouter_llm_call async def main(): # 路径配置 base_dir = Path(__file__).parent project_root = base_dir.parent.parent prompt_path = base_dir / "task.prompt" output_dir = base_dir / "output" print("=" * 80) print("集成测试 3 - 内容生成任务:咖啡店品牌文案") print("=" * 80) print() # 1. 加载 prompt print("1. 加载任务...") prompt = SimplePrompt(prompt_path) system_prompt = prompt._messages.get("system", "") user_prompt = prompt._messages.get("user", "") print(f" ✓ 任务类型: 内容生成") print(f" ✓ 无工具提示,无步骤提示") print() # 2. 创建 Agent Runner print("2. 创建 Agent Runner...") print(f" - 模型: Claude Sonnet 4.5") print() runner = AgentRunner( trace_store=FileSystemTraceStore(base_path=".trace"), llm_call=create_openrouter_llm_call(model="anthropic/claude-sonnet-4.5"), skills_dir=str(project_root / "agent" / "skills"), debug=False ) # 3. 运行 Agent print("3. 启动 Agent...") print("=" * 80) print() current_trace_id = None goal_used = False subagent_used = False evaluate_used = False delegate_used = False iteration_count = 0 tool_calls_count = {} async for item in runner.run( task=user_prompt, system_prompt=system_prompt, model="anthropic/claude-sonnet-4.5", temperature=0.7, max_iterations=30, ): # 处理 Trace 对象 if isinstance(item, Trace): current_trace_id = item.trace_id if item.status == "running": print(f"[Trace] 开始: {item.trace_id[:8]}...") elif item.status == "completed": print() print("=" * 80) print(f"[Trace] 完成") print(f" - 总消息数: {item.total_messages}") print(f" - 总 Token 数: {item.total_tokens}") print(f" - 总成本: ${item.total_cost:.4f}") print("=" * 80) elif item.status == "failed": print() print(f"[Trace] 失败: {item.error}") # 处理 Message 对象 elif isinstance(item, Message): if item.role == "assistant": iteration_count += 1 content = item.content if isinstance(content, dict): text = content.get("text", "") tool_calls = content.get("tool_calls") # 显示 Agent 的思考 if text and not tool_calls: print(f"\n[{iteration_count}] Agent 回复:") print(f" {text[:200]}{'...' if len(text) > 200 else ''}") elif text: print(f"\n[{iteration_count}] Agent 思考:") print(f" {text[:150]}{'...' if len(text) > 150 else ''}") # 显示工具调用 if tool_calls: for tc in tool_calls: tool_name = tc.get("function", {}).get("name", "unknown") args = tc.get("function", {}).get("arguments", {}) # 如果 args 是字符串,尝试解析为 JSON if isinstance(args, str): import json try: args = json.loads(args) except: args = {} # 统计工具使用 tool_calls_count[tool_name] = tool_calls_count.get(tool_name, 0) + 1 # 检测关键工具使用 if tool_name == "goal": goal_used = True if isinstance(args, dict): if args.get("add"): print(f" → goal(add): {args['add'][:80]}...") elif args.get("done"): print(f" → goal(done): {args['done'][:80]}...") elif args.get("focus"): print(f" → goal(focus): {args['focus']}") else: print(f" → goal(...)") elif tool_name == "subagent": subagent_used = True if isinstance(args, dict): mode = args.get("mode", "unknown") if mode == "evaluate": evaluate_used = True target = args.get("target_goal_id", "?") print(f" → subagent(evaluate): 评估目标 {target}") elif mode == "delegate": delegate_used = True task = args.get("task", "") print(f" → subagent(delegate): {task[:60]}...") else: print(f" → subagent({mode})") else: print(f" → subagent(...)") else: # 其他工具简化显示 if tool_name in ["read_file", "write_file", "edit_file"]: if isinstance(args, dict): file_path = args.get("file_path", "") if file_path: file_name = Path(file_path).name print(f" → {tool_name}: {file_name}") else: print(f" → {tool_name}") else: print(f" → {tool_name}") elif tool_name == "bash_command": if isinstance(args, dict): cmd = args.get("command", "") print(f" → bash: {cmd[:60]}...") else: print(f" → bash") else: print(f" → {tool_name}") # 4. 测试结果总结 print() print("=" * 80) print("测试结果总结") print("=" * 80) print() print("功能使用情况:") print(f" {'✓' if goal_used else '✗'} Goal 工具: {'已使用' if goal_used else '未使用'}") print(f" {'✓' if subagent_used else '✗'} SubAgent 工具: {'已使用' if subagent_used else '未使用'}") if subagent_used: print(f" - Evaluate 模式: {'已使用' if evaluate_used else '未使用'}") print(f" - Delegate 模式: {'已使用' if delegate_used else '未使用'}") print() print("工具调用统计:") for tool_name, count in sorted(tool_calls_count.items()): print(f" - {tool_name}: {count} 次") print() print(f"总迭代次数: {iteration_count}") print() # 5. 验证结果 print("验证生成的文件:") # 检查输出目录 if output_dir.exists(): files = list(output_dir.glob("*.md")) + list(output_dir.glob("*.txt")) if files: for file in files: size = file.stat().st_size print(f" ✓ {file.name} ({size} bytes)") else: print(f" ✗ 输出目录为空") else: print(f" ✗ 输出目录不存在") print() print("=" * 80) print("集成测试 3 完成") print("=" * 80) if __name__ == "__main__": asyncio.run(main())