test_subagent_tool.py 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351
  1. """
  2. 测试重构后的 SubAgent 工具功能
  3. 测试内容:
  4. 1. subagent 工具的三种模式(evaluate/delegate/explore)
  5. 2. SubAgentManager 的统一管理
  6. 3. 参数验证和错误处理
  7. """
  8. import asyncio
  9. import sys
  10. from pathlib import Path
  11. # 添加项目根目录到 Python 路径
  12. sys.path.insert(0, str(Path(__file__).parent.parent))
  13. from agent.models.goal import GoalTree, Goal
  14. from agent.services.subagent.manager import SubAgentManager
  15. from agent.tools.builtin.subagent import subagent
  16. # Mock 函数用于测试
  17. class MockStore:
  18. """模拟 TraceStore"""
  19. async def get_goal_tree(self, trace_id):
  20. """返回模拟的 GoalTree"""
  21. tree = GoalTree(mission="测试任务")
  22. tree.add_goals(["实现登录功能", "实现注册功能", "实现密码重置"])
  23. return tree
  24. async def update_goal(self, trace_id, goal_id, **kwargs):
  25. """模拟更新 Goal"""
  26. print(f" [Mock] 更新 Goal {goal_id}: {kwargs}")
  27. async def add_goal(self, trace_id, goal):
  28. """模拟添加 Goal"""
  29. print(f" [Mock] 添加 Goal: {goal.description}")
  30. async def create_trace(self, trace):
  31. """模拟创建 Trace"""
  32. print(f" [Mock] 创建 Trace: {trace.trace_id}")
  33. async def get_trace(self, trace_id):
  34. """模拟获取 Trace"""
  35. from agent.execution.models import Trace
  36. return Trace(
  37. trace_id=trace_id,
  38. mode="agent",
  39. task="测试任务",
  40. status="completed",
  41. total_messages=5,
  42. total_tokens=1000,
  43. total_cost=0.01
  44. )
  45. async def append_message(self, trace_id, message):
  46. """模拟添加消息"""
  47. print(f" [Mock] 添加消息到 {trace_id}")
  48. async def append_event(self, trace_id, event_type, data):
  49. """模拟添加事件"""
  50. print(f" [Mock] 事件 {event_type}: {data}")
  51. async def mock_run_agent(trace):
  52. """模拟运行 Agent"""
  53. print(f" [Mock] 运行 Agent: {trace.trace_id}")
  54. # 根据 agent_type 返回不同的结果
  55. if trace.agent_type == "evaluator":
  56. return """## 评估结论
  57. 通过
  58. ## 评估理由
  59. 登录功能实现完整,包含了密码加密和会话管理,符合所有要求。
  60. ## 修改建议
  61. """
  62. elif trace.agent_type == "delegate":
  63. return {"summary": "任务已完成,实现了用户注册功能"}
  64. elif trace.agent_type == "explore":
  65. return "探索完成,JWT 方案更适合当前需求"
  66. return "任务完成"
  67. async def test_subagent_evaluate_mode():
  68. """测试 subagent 工具的 evaluate 模式"""
  69. print("=" * 80)
  70. print("测试 1: SubAgent 工具 - Evaluate 模式")
  71. print("=" * 80)
  72. print()
  73. store = MockStore()
  74. # 测试评估模式
  75. print("1. 评估目标 1 的执行结果")
  76. result = await subagent(
  77. mode="evaluate",
  78. target_goal_id="1",
  79. evaluation_input={
  80. "goal_description": "实现用户登录功能",
  81. "actual_result": "已实现登录接口,包含密码加密(bcrypt)和会话管理(JWT)",
  82. "context": {
  83. "files": ["auth/login.py", "auth/session.py"],
  84. "tests": "所有测试通过"
  85. }
  86. },
  87. requirements="需要包含密码加密和会话管理",
  88. context={
  89. "store": store,
  90. "trace_id": "test-trace-001",
  91. "goal_id": "eval-1",
  92. "run_agent": mock_run_agent
  93. }
  94. )
  95. print("\n评估结果:")
  96. print(f" 通过: {result.get('passed')}")
  97. print(f" 理由: {result.get('reason')}")
  98. print(f" 建议: {result.get('suggestions')}")
  99. print()
  100. print("=" * 80)
  101. print("✅ Evaluate 模式测试完成")
  102. print("=" * 80)
  103. async def test_subagent_delegate_mode():
  104. """测试 subagent 工具的 delegate 模式"""
  105. print("\n" + "=" * 80)
  106. print("测试 2: SubAgent 工具 - Delegate 模式")
  107. print("=" * 80)
  108. print()
  109. store = MockStore()
  110. # 测试委托模式
  111. print("1. 委托任务:实现用户注册功能")
  112. result = await subagent(
  113. mode="delegate",
  114. task="实现用户注册功能,包括邮箱验证和密码强度检查",
  115. context={
  116. "store": store,
  117. "trace_id": "test-trace-002",
  118. "goal_id": "delegate-1",
  119. "run_agent": mock_run_agent
  120. }
  121. )
  122. print("\n委托结果:")
  123. print(f" 摘要: {result.get('summary')}")
  124. print(f" 统计: {result.get('stats')}")
  125. print()
  126. print("=" * 80)
  127. print("✅ Delegate 模式测试完成")
  128. print("=" * 80)
  129. async def test_subagent_explore_mode():
  130. """测试 subagent 工具的 explore 模式"""
  131. print("\n" + "=" * 80)
  132. print("测试 3: SubAgent 工具 - Explore 模式")
  133. print("=" * 80)
  134. print()
  135. store = MockStore()
  136. # 测试探索模式
  137. print("1. 探索认证方案")
  138. result = await subagent(
  139. mode="explore",
  140. branches=[
  141. "JWT Token 方案",
  142. "Session Cookie 方案",
  143. "OAuth 2.0 方案"
  144. ],
  145. background="需要为 Web 应用选择合适的认证方案",
  146. context={
  147. "store": store,
  148. "trace_id": "test-trace-003",
  149. "goal_id": "explore-1",
  150. "run_agent": mock_run_agent
  151. }
  152. )
  153. print("\n探索结果:")
  154. print(f" 摘要: {result.get('summary')}")
  155. print()
  156. print("=" * 80)
  157. print("✅ Explore 模式测试完成")
  158. print("=" * 80)
  159. async def test_subagent_error_handling():
  160. """测试 subagent 工具的错误处理"""
  161. print("\n" + "=" * 80)
  162. print("测试 4: SubAgent 工具 - 错误处理")
  163. print("=" * 80)
  164. print()
  165. store = MockStore()
  166. # 1. 缺少 context
  167. print("1. 缺少 context 参数")
  168. result = await subagent(mode="evaluate", target_goal_id="1", evaluation_input={})
  169. print(f" 结果: {result}")
  170. print()
  171. # 2. 无效的 mode
  172. print("2. 无效的 mode 参数")
  173. result = await subagent(
  174. mode="invalid_mode",
  175. context={
  176. "store": store,
  177. "trace_id": "test",
  178. "run_agent": mock_run_agent
  179. }
  180. )
  181. print(f" 结果: {result}")
  182. print()
  183. # 3. evaluate 模式缺少必需参数
  184. print("3. evaluate 模式缺少 target_goal_id")
  185. result = await subagent(
  186. mode="evaluate",
  187. evaluation_input={"actual_result": "测试"},
  188. context={
  189. "store": store,
  190. "trace_id": "test",
  191. "goal_id": "1",
  192. "run_agent": mock_run_agent
  193. }
  194. )
  195. print(f" 结果: {result}")
  196. print()
  197. # 4. delegate 模式缺少 task
  198. print("4. delegate 模式缺少 task 参数")
  199. result = await subagent(
  200. mode="delegate",
  201. context={
  202. "store": store,
  203. "trace_id": "test",
  204. "goal_id": "1",
  205. "run_agent": mock_run_agent
  206. }
  207. )
  208. print(f" 结果: {result}")
  209. print()
  210. # 5. explore 模式缺少 branches
  211. print("5. explore 模式缺少 branches 参数")
  212. result = await subagent(
  213. mode="explore",
  214. context={
  215. "store": store,
  216. "trace_id": "test",
  217. "goal_id": "1",
  218. "run_agent": mock_run_agent
  219. }
  220. )
  221. print(f" 结果: {result}")
  222. print()
  223. print("=" * 80)
  224. print("✅ 错误处理测试完成")
  225. print("=" * 80)
  226. async def test_subagent_manager_directly():
  227. """直接测试 SubAgentManager"""
  228. print("\n" + "=" * 80)
  229. print("测试 5: 直接测试 SubAgentManager")
  230. print("=" * 80)
  231. print()
  232. store = MockStore()
  233. manager = SubAgentManager(store)
  234. # 测试 evaluate 模式
  235. print("1. 使用 SubAgentManager 执行 evaluate 模式")
  236. result = await manager.execute(
  237. mode="evaluate",
  238. current_trace_id="test-trace-004",
  239. current_goal_id="manager-test-1",
  240. options={
  241. "target_goal_id": "1",
  242. "evaluation_input": {
  243. "actual_result": "功能已实现"
  244. },
  245. "requirements": "需要完整实现"
  246. },
  247. run_agent=mock_run_agent
  248. )
  249. print(f"\n结果: {result}")
  250. print()
  251. # 测试权限配置
  252. print("2. 验证不同模式的权限配置")
  253. evaluate_tools = manager._get_allowed_tools("evaluate")
  254. delegate_tools = manager._get_allowed_tools("delegate")
  255. explore_tools = manager._get_allowed_tools("explore")
  256. print(f" Evaluate 允许的工具: {evaluate_tools}")
  257. print(f" Delegate 允许的工具: {delegate_tools}")
  258. print(f" Explore 允许的工具: {explore_tools}")
  259. print()
  260. # 测试最大轮次配置
  261. print("3. 验证不同模式的最大轮次")
  262. print(f" Evaluate 最大轮次: {manager._get_max_turns('evaluate')}")
  263. print(f" Delegate 最大轮次: {manager._get_max_turns('delegate')}")
  264. print(f" Explore 最大轮次: {manager._get_max_turns('explore')}")
  265. print()
  266. print("=" * 80)
  267. print("✅ SubAgentManager 直接测试完成")
  268. print("=" * 80)
  269. async def main():
  270. """运行所有测试"""
  271. print("\n" + "🧪" * 40)
  272. print("SubAgent 工具功能测试")
  273. print("🧪" * 40 + "\n")
  274. try:
  275. await test_subagent_evaluate_mode()
  276. await test_subagent_delegate_mode()
  277. await test_subagent_explore_mode()
  278. await test_subagent_error_handling()
  279. await test_subagent_manager_directly()
  280. print("\n" + "=" * 80)
  281. print("🎉 所有测试完成!")
  282. print("=" * 80)
  283. except Exception as e:
  284. print(f"\n❌ 测试失败: {e}")
  285. import traceback
  286. traceback.print_exc()
  287. if __name__ == "__main__":
  288. asyncio.run(main())