run.py 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246
  1. """
  2. 集成测试 - 真实场景测试
  3. 测试场景:代码重构与测试任务
  4. 目标:让 Agent 在真实场景中自然使用各种工具,验证重构后的功能
  5. 测试内容:
  6. 1. Goal 工具 - 创建和管理执行计划
  7. 2. SubAgent 工具 - delegate 模式(委托任务)
  8. 3. SubAgent 工具 - evaluate 模式(评估结果)
  9. 4. 文件操作工具 - 读写编辑文件
  10. 5. Bash 工具 - 运行测试
  11. 不刻意测试某个功能,而是让 Agent 自然地完成一个真实任务。
  12. """
  13. import os
  14. import sys
  15. import asyncio
  16. from pathlib import Path
  17. # 添加项目根目录到 Python 路径
  18. sys.path.insert(0, str(Path(__file__).parent.parent.parent))
  19. from dotenv import load_dotenv
  20. load_dotenv()
  21. from agent.llm.prompts import SimplePrompt
  22. from agent.core.runner import AgentRunner
  23. from agent.execution import FileSystemTraceStore, Trace, Message
  24. from agent.llm import create_openrouter_llm_call
  25. async def main():
  26. # 路径配置
  27. base_dir = Path(__file__).parent
  28. project_root = base_dir.parent.parent
  29. prompt_path = base_dir / "task.prompt"
  30. project_dir = base_dir / "project"
  31. print("=" * 80)
  32. print("集成测试 - 真实场景:代码重构与测试")
  33. print("=" * 80)
  34. print()
  35. # 1. 加载 prompt
  36. print("1. 加载任务 prompt...")
  37. prompt = SimplePrompt(prompt_path)
  38. system_prompt = prompt._messages.get("system", "")
  39. user_prompt = prompt._messages.get("user", "")
  40. print(f" ✓ System prompt 已加载")
  41. print(f" ✓ User prompt 已加载")
  42. print()
  43. # 2. 创建 Agent Runner
  44. print("2. 创建 Agent Runner...")
  45. print(f" - 模型: Claude Sonnet 4.5 (via OpenRouter)")
  46. print(f" - Trace 存储: .trace/")
  47. print()
  48. runner = AgentRunner(
  49. trace_store=FileSystemTraceStore(base_path=".trace"),
  50. llm_call=create_openrouter_llm_call(model="anthropic/claude-sonnet-4.5"),
  51. skills_dir=str(project_root / "agent" / "skills"),
  52. debug=False
  53. )
  54. # 3. 运行 Agent
  55. print("3. 启动 Agent 执行任务...")
  56. print("=" * 80)
  57. print()
  58. current_trace_id = None
  59. goal_used = False
  60. subagent_used = False
  61. evaluate_used = False
  62. delegate_used = False
  63. iteration_count = 0
  64. tool_calls_count = {}
  65. async for item in runner.run(
  66. task=user_prompt,
  67. system_prompt=system_prompt,
  68. model="anthropic/claude-sonnet-4.5",
  69. temperature=0.3,
  70. max_iterations=30,
  71. ):
  72. # 处理 Trace 对象
  73. if isinstance(item, Trace):
  74. current_trace_id = item.trace_id
  75. if item.status == "running":
  76. print(f"[Trace] 开始: {item.trace_id[:8]}...")
  77. elif item.status == "completed":
  78. print()
  79. print("=" * 80)
  80. print(f"[Trace] 完成")
  81. print(f" - 总消息数: {item.total_messages}")
  82. print(f" - 总 Token 数: {item.total_tokens}")
  83. print(f" - 总成本: ${item.total_cost:.4f}")
  84. print("=" * 80)
  85. elif item.status == "failed":
  86. print()
  87. print(f"[Trace] 失败: {item.error}")
  88. # 处理 Message 对象
  89. elif isinstance(item, Message):
  90. if item.role == "assistant":
  91. iteration_count += 1
  92. content = item.content
  93. if isinstance(content, dict):
  94. text = content.get("text", "")
  95. tool_calls = content.get("tool_calls")
  96. # 显示 Agent 的思考
  97. if text and not tool_calls:
  98. print(f"\n[{iteration_count}] Agent 回复:")
  99. print(f" {text[:200]}{'...' if len(text) > 200 else ''}")
  100. elif text:
  101. print(f"\n[{iteration_count}] Agent 思考:")
  102. print(f" {text[:150]}{'...' if len(text) > 150 else ''}")
  103. # 显示工具调用
  104. if tool_calls:
  105. for tc in tool_calls:
  106. tool_name = tc.get("function", {}).get("name", "unknown")
  107. args = tc.get("function", {}).get("arguments", {})
  108. # 如果 args 是字符串,尝试解析为 JSON
  109. if isinstance(args, str):
  110. import json
  111. try:
  112. args = json.loads(args)
  113. except:
  114. args = {}
  115. # 统计工具使用
  116. tool_calls_count[tool_name] = tool_calls_count.get(tool_name, 0) + 1
  117. # 检测关键工具使用
  118. if tool_name == "goal":
  119. goal_used = True
  120. # 显示 goal 操作
  121. if isinstance(args, dict):
  122. if args.get("add"):
  123. print(f" → goal(add): {args['add'][:80]}...")
  124. elif args.get("done"):
  125. print(f" → goal(done): {args['done'][:80]}...")
  126. elif args.get("focus"):
  127. print(f" → goal(focus): {args['focus']}")
  128. else:
  129. print(f" → goal(...)")
  130. elif tool_name == "subagent":
  131. subagent_used = True
  132. if isinstance(args, dict):
  133. mode = args.get("mode", "unknown")
  134. if mode == "evaluate":
  135. evaluate_used = True
  136. target = args.get("target_goal_id", "?")
  137. print(f" → subagent(evaluate): 评估目标 {target}")
  138. elif mode == "delegate":
  139. delegate_used = True
  140. task = args.get("task", "")
  141. print(f" → subagent(delegate): {task[:60]}...")
  142. else:
  143. print(f" → subagent({mode})")
  144. else:
  145. print(f" → subagent(...)")
  146. else:
  147. # 其他工具简化显示
  148. if tool_name in ["read_file", "write_file", "edit_file"]:
  149. if isinstance(args, dict):
  150. file_path = args.get("file_path", "")
  151. if file_path:
  152. file_name = Path(file_path).name
  153. print(f" → {tool_name}: {file_name}")
  154. else:
  155. print(f" → {tool_name}")
  156. else:
  157. print(f" → {tool_name}")
  158. elif tool_name == "bash_command":
  159. if isinstance(args, dict):
  160. cmd = args.get("command", "")
  161. print(f" → bash: {cmd[:60]}...")
  162. else:
  163. print(f" → bash")
  164. else:
  165. print(f" → {tool_name}")
  166. elif item.role == "tool":
  167. # 工具返回结果(简化显示)
  168. pass
  169. # 4. 测试结果总结
  170. print()
  171. print("=" * 80)
  172. print("测试结果总结")
  173. print("=" * 80)
  174. print()
  175. print("功能使用情况:")
  176. print(f" ✓ Goal 工具: {'已使用' if goal_used else '未使用'}")
  177. print(f" ✓ SubAgent 工具: {'已使用' if subagent_used else '未使用'}")
  178. print(f" - Evaluate 模式: {'已使用' if evaluate_used else '未使用'}")
  179. print(f" - Delegate 模式: {'已使用' if delegate_used else '未使用'}")
  180. print()
  181. print("工具调用统计:")
  182. for tool_name, count in sorted(tool_calls_count.items()):
  183. print(f" - {tool_name}: {count} 次")
  184. print()
  185. print(f"总迭代次数: {iteration_count}")
  186. print()
  187. # 5. 验证结果
  188. print("验证生成的文件:")
  189. # 检查是否生成了测试文件
  190. test_file = project_dir / "test_calculator.py"
  191. if test_file.exists():
  192. print(f" ✓ 测试文件已生成: {test_file.name}")
  193. else:
  194. print(f" ✗ 测试文件未生成")
  195. # 检查 calculator.py 是否被修改(添加了 average 函数)
  196. calc_file = project_dir / "calculator.py"
  197. if calc_file.exists():
  198. content = calc_file.read_text()
  199. if "average" in content or "mean" in content:
  200. print(f" ✓ Calculator 已添加新功能")
  201. else:
  202. print(f" ✗ Calculator 未添加新功能")
  203. print()
  204. print("=" * 80)
  205. print("集成测试完成")
  206. print("=" * 80)
  207. if __name__ == "__main__":
  208. asyncio.run(main())