| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351 |
- """
- 测试重构后的 SubAgent 工具功能
- 测试内容:
- 1. subagent 工具的三种模式(evaluate/delegate/explore)
- 2. SubAgentManager 的统一管理
- 3. 参数验证和错误处理
- """
- import asyncio
- import sys
- from pathlib import Path
- # 添加项目根目录到 Python 路径
- sys.path.insert(0, str(Path(__file__).parent.parent))
- from agent.models.goal import GoalTree, Goal
- from agent.services.subagent.manager import SubAgentManager
- from agent.tools.builtin.subagent import subagent
- # Mock 函数用于测试
- class MockStore:
- """模拟 TraceStore"""
- async def get_goal_tree(self, trace_id):
- """返回模拟的 GoalTree"""
- tree = GoalTree(mission="测试任务")
- tree.add_goals(["实现登录功能", "实现注册功能", "实现密码重置"])
- return tree
- async def update_goal(self, trace_id, goal_id, **kwargs):
- """模拟更新 Goal"""
- print(f" [Mock] 更新 Goal {goal_id}: {kwargs}")
- async def add_goal(self, trace_id, goal):
- """模拟添加 Goal"""
- print(f" [Mock] 添加 Goal: {goal.description}")
- async def create_trace(self, trace):
- """模拟创建 Trace"""
- print(f" [Mock] 创建 Trace: {trace.trace_id}")
- async def get_trace(self, trace_id):
- """模拟获取 Trace"""
- from agent.execution.models import Trace
- return Trace(
- trace_id=trace_id,
- mode="agent",
- task="测试任务",
- status="completed",
- total_messages=5,
- total_tokens=1000,
- total_cost=0.01
- )
- async def append_message(self, trace_id, message):
- """模拟添加消息"""
- print(f" [Mock] 添加消息到 {trace_id}")
- async def append_event(self, trace_id, event_type, data):
- """模拟添加事件"""
- print(f" [Mock] 事件 {event_type}: {data}")
- async def mock_run_agent(trace):
- """模拟运行 Agent"""
- print(f" [Mock] 运行 Agent: {trace.trace_id}")
- # 根据 agent_type 返回不同的结果
- if trace.agent_type == "evaluator":
- return """## 评估结论
- 通过
- ## 评估理由
- 登录功能实现完整,包含了密码加密和会话管理,符合所有要求。
- ## 修改建议
- 无
- """
- elif trace.agent_type == "delegate":
- return {"summary": "任务已完成,实现了用户注册功能"}
- elif trace.agent_type == "explore":
- return "探索完成,JWT 方案更适合当前需求"
- return "任务完成"
- async def test_subagent_evaluate_mode():
- """测试 subagent 工具的 evaluate 模式"""
- print("=" * 80)
- print("测试 1: SubAgent 工具 - Evaluate 模式")
- print("=" * 80)
- print()
- store = MockStore()
- # 测试评估模式
- print("1. 评估目标 1 的执行结果")
- result = await subagent(
- mode="evaluate",
- target_goal_id="1",
- evaluation_input={
- "goal_description": "实现用户登录功能",
- "actual_result": "已实现登录接口,包含密码加密(bcrypt)和会话管理(JWT)",
- "context": {
- "files": ["auth/login.py", "auth/session.py"],
- "tests": "所有测试通过"
- }
- },
- requirements="需要包含密码加密和会话管理",
- context={
- "store": store,
- "trace_id": "test-trace-001",
- "goal_id": "eval-1",
- "run_agent": mock_run_agent
- }
- )
- print("\n评估结果:")
- print(f" 通过: {result.get('passed')}")
- print(f" 理由: {result.get('reason')}")
- print(f" 建议: {result.get('suggestions')}")
- print()
- print("=" * 80)
- print("✅ Evaluate 模式测试完成")
- print("=" * 80)
- async def test_subagent_delegate_mode():
- """测试 subagent 工具的 delegate 模式"""
- print("\n" + "=" * 80)
- print("测试 2: SubAgent 工具 - Delegate 模式")
- print("=" * 80)
- print()
- store = MockStore()
- # 测试委托模式
- print("1. 委托任务:实现用户注册功能")
- result = await subagent(
- mode="delegate",
- task="实现用户注册功能,包括邮箱验证和密码强度检查",
- context={
- "store": store,
- "trace_id": "test-trace-002",
- "goal_id": "delegate-1",
- "run_agent": mock_run_agent
- }
- )
- print("\n委托结果:")
- print(f" 摘要: {result.get('summary')}")
- print(f" 统计: {result.get('stats')}")
- print()
- print("=" * 80)
- print("✅ Delegate 模式测试完成")
- print("=" * 80)
- async def test_subagent_explore_mode():
- """测试 subagent 工具的 explore 模式"""
- print("\n" + "=" * 80)
- print("测试 3: SubAgent 工具 - Explore 模式")
- print("=" * 80)
- print()
- store = MockStore()
- # 测试探索模式
- print("1. 探索认证方案")
- result = await subagent(
- mode="explore",
- branches=[
- "JWT Token 方案",
- "Session Cookie 方案",
- "OAuth 2.0 方案"
- ],
- background="需要为 Web 应用选择合适的认证方案",
- context={
- "store": store,
- "trace_id": "test-trace-003",
- "goal_id": "explore-1",
- "run_agent": mock_run_agent
- }
- )
- print("\n探索结果:")
- print(f" 摘要: {result.get('summary')}")
- print()
- print("=" * 80)
- print("✅ Explore 模式测试完成")
- print("=" * 80)
- async def test_subagent_error_handling():
- """测试 subagent 工具的错误处理"""
- print("\n" + "=" * 80)
- print("测试 4: SubAgent 工具 - 错误处理")
- print("=" * 80)
- print()
- store = MockStore()
- # 1. 缺少 context
- print("1. 缺少 context 参数")
- result = await subagent(mode="evaluate", target_goal_id="1", evaluation_input={})
- print(f" 结果: {result}")
- print()
- # 2. 无效的 mode
- print("2. 无效的 mode 参数")
- result = await subagent(
- mode="invalid_mode",
- context={
- "store": store,
- "trace_id": "test",
- "run_agent": mock_run_agent
- }
- )
- print(f" 结果: {result}")
- print()
- # 3. evaluate 模式缺少必需参数
- print("3. evaluate 模式缺少 target_goal_id")
- result = await subagent(
- mode="evaluate",
- evaluation_input={"actual_result": "测试"},
- context={
- "store": store,
- "trace_id": "test",
- "goal_id": "1",
- "run_agent": mock_run_agent
- }
- )
- print(f" 结果: {result}")
- print()
- # 4. delegate 模式缺少 task
- print("4. delegate 模式缺少 task 参数")
- result = await subagent(
- mode="delegate",
- context={
- "store": store,
- "trace_id": "test",
- "goal_id": "1",
- "run_agent": mock_run_agent
- }
- )
- print(f" 结果: {result}")
- print()
- # 5. explore 模式缺少 branches
- print("5. explore 模式缺少 branches 参数")
- result = await subagent(
- mode="explore",
- context={
- "store": store,
- "trace_id": "test",
- "goal_id": "1",
- "run_agent": mock_run_agent
- }
- )
- print(f" 结果: {result}")
- print()
- print("=" * 80)
- print("✅ 错误处理测试完成")
- print("=" * 80)
- async def test_subagent_manager_directly():
- """直接测试 SubAgentManager"""
- print("\n" + "=" * 80)
- print("测试 5: 直接测试 SubAgentManager")
- print("=" * 80)
- print()
- store = MockStore()
- manager = SubAgentManager(store)
- # 测试 evaluate 模式
- print("1. 使用 SubAgentManager 执行 evaluate 模式")
- result = await manager.execute(
- mode="evaluate",
- current_trace_id="test-trace-004",
- current_goal_id="manager-test-1",
- options={
- "target_goal_id": "1",
- "evaluation_input": {
- "actual_result": "功能已实现"
- },
- "requirements": "需要完整实现"
- },
- run_agent=mock_run_agent
- )
- print(f"\n结果: {result}")
- print()
- # 测试权限配置
- print("2. 验证不同模式的权限配置")
- evaluate_tools = manager._get_allowed_tools("evaluate")
- delegate_tools = manager._get_allowed_tools("delegate")
- explore_tools = manager._get_allowed_tools("explore")
- print(f" Evaluate 允许的工具: {evaluate_tools}")
- print(f" Delegate 允许的工具: {delegate_tools}")
- print(f" Explore 允许的工具: {explore_tools}")
- print()
- # 测试最大轮次配置
- print("3. 验证不同模式的最大轮次")
- print(f" Evaluate 最大轮次: {manager._get_max_turns('evaluate')}")
- print(f" Delegate 最大轮次: {manager._get_max_turns('delegate')}")
- print(f" Explore 最大轮次: {manager._get_max_turns('explore')}")
- print()
- print("=" * 80)
- print("✅ SubAgentManager 直接测试完成")
- print("=" * 80)
- async def main():
- """运行所有测试"""
- print("\n" + "🧪" * 40)
- print("SubAgent 工具功能测试")
- print("🧪" * 40 + "\n")
- try:
- await test_subagent_evaluate_mode()
- await test_subagent_delegate_mode()
- await test_subagent_explore_mode()
- await test_subagent_error_handling()
- await test_subagent_manager_directly()
- print("\n" + "=" * 80)
- print("🎉 所有测试完成!")
- print("=" * 80)
- except Exception as e:
- print(f"\n❌ 测试失败: {e}")
- import traceback
- traceback.print_exc()
- if __name__ == "__main__":
- asyncio.run(main())
|