run.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216
  1. """
  2. Subagent 工具真实测试
  3. 使用真实 LLM 测试 subagent 工具的三种模式:
  4. 1. delegate - 委托子任务
  5. 2. explore - 并行探索方案
  6. 3. evaluate - 评估结果
  7. """
  8. import os
  9. import sys
  10. import asyncio
  11. from pathlib import Path
  12. # 添加项目根目录到 Python 路径
  13. sys.path.insert(0, str(Path(__file__).parent.parent.parent))
  14. from dotenv import load_dotenv
  15. load_dotenv()
  16. from agent.llm.prompts import SimplePrompt
  17. from agent.core.runner import AgentRunner
  18. from agent.trace import (
  19. FileSystemTraceStore,
  20. Trace,
  21. Message,
  22. )
  23. from agent.llm import create_openrouter_llm_call
  24. async def main():
  25. # 路径配置
  26. base_dir = Path(__file__).parent
  27. project_root = base_dir.parent.parent
  28. prompt_path = base_dir / "test.prompt"
  29. output_dir = base_dir / "output"
  30. output_dir.mkdir(exist_ok=True)
  31. print("=" * 60)
  32. print("Subagent 工具测试 (真实 LLM)")
  33. print("=" * 60)
  34. print()
  35. # 1. 加载 prompt
  36. print("1. 加载 prompt...")
  37. prompt = SimplePrompt(prompt_path)
  38. # 提取配置
  39. system_prompt = prompt._messages.get("system", "")
  40. user_task = prompt._messages.get("user", "")
  41. model_name = prompt.config.get('model', 'gemini-2.5-flash')
  42. temperature = float(prompt.config.get('temperature', 0.3))
  43. print(f" - 任务: {user_task[:80]}...")
  44. print(f" - 模型: {model_name}")
  45. # 2. 构建消息
  46. print("2. 构建任务消息...")
  47. messages = prompt.build_messages()
  48. # 3. 创建 Agent Runner
  49. print("3. 创建 Agent Runner...")
  50. print(f" - 模型: {model_name} (via OpenRouter)")
  51. # Trace 输出到测试目录
  52. trace_dir = base_dir / ".trace"
  53. trace_dir.mkdir(exist_ok=True)
  54. print(f" - Trace 目录: {trace_dir}")
  55. runner = AgentRunner(
  56. trace_store=FileSystemTraceStore(base_path=str(trace_dir)),
  57. llm_call=create_openrouter_llm_call(model=f"google/{model_name}"),
  58. skills_dir=None,
  59. debug=True
  60. )
  61. # 4. Agent 模式执行
  62. print(f"4. 启动 Agent 模式...")
  63. print()
  64. final_response = ""
  65. current_trace_id = None
  66. subagent_calls = []
  67. async for item in runner.run(
  68. task=user_task,
  69. messages=messages,
  70. system_prompt=system_prompt,
  71. model=f"google/{model_name}",
  72. temperature=temperature,
  73. max_iterations=30, # 增加迭代次数以支持多个 subagent 调用
  74. ):
  75. # 处理 Trace 对象
  76. if isinstance(item, Trace):
  77. current_trace_id = item.trace_id
  78. if item.status == "running":
  79. print(f"[Trace] 开始: {item.trace_id[:8]}")
  80. elif item.status == "completed":
  81. print(f"[Trace] 完成")
  82. print(f" - Total messages: {item.total_messages}")
  83. print(f" - Total tokens: {item.total_tokens}")
  84. print(f" - Total cost: ${item.total_cost:.4f}")
  85. elif item.status == "failed":
  86. print(f"[Trace] 失败: {item.error_message}")
  87. # 处理 Message 对象
  88. elif isinstance(item, Message):
  89. if item.role == "assistant":
  90. content = item.content
  91. if isinstance(content, dict):
  92. text = content.get("text", "")
  93. tool_calls = content.get("tool_calls")
  94. if text and not tool_calls:
  95. final_response = text
  96. print(f"[Response] Agent 完成")
  97. elif text:
  98. print(f"[Assistant] {text[:100]}...")
  99. if tool_calls:
  100. for tc in tool_calls:
  101. tool_name = tc.get("function", {}).get("name", "unknown")
  102. print(f"[Tool Call] {tool_name}")
  103. # 记录 subagent 调用
  104. if tool_name == "subagent":
  105. import json
  106. args = tc.get("function", {}).get("arguments", {})
  107. # arguments 可能是字符串,需要解析
  108. if isinstance(args, str):
  109. try:
  110. args = json.loads(args)
  111. except:
  112. args = {}
  113. mode = args.get("mode", "unknown")
  114. subagent_calls.append({
  115. "mode": mode,
  116. "task": args.get("task", args.get("background", ""))[:50]
  117. })
  118. print(f" → mode: {mode}")
  119. elif item.role == "tool":
  120. content = item.content
  121. if isinstance(content, dict):
  122. tool_name = content.get("tool_name", "unknown")
  123. print(f"[Tool Result] {tool_name}")
  124. if item.description:
  125. desc = item.description[:80] if len(item.description) > 80 else item.description
  126. print(f" {desc}...")
  127. # 5. 输出结果
  128. print()
  129. print("=" * 60)
  130. print("Agent 响应:")
  131. print("=" * 60)
  132. print(final_response)
  133. print("=" * 60)
  134. print()
  135. # 6. 统计 subagent 调用
  136. print("=" * 60)
  137. print("Subagent 调用统计:")
  138. print("=" * 60)
  139. delegate_count = sum(1 for call in subagent_calls if call["mode"] == "delegate")
  140. explore_count = sum(1 for call in subagent_calls if call["mode"] == "explore")
  141. evaluate_count = sum(1 for call in subagent_calls if call["mode"] == "evaluate")
  142. print(f" - delegate 模式: {delegate_count} 次")
  143. print(f" - explore 模式: {explore_count} 次")
  144. print(f" - evaluate 模式: {evaluate_count} 次")
  145. print(f" - 总计: {len(subagent_calls)} 次")
  146. print()
  147. for i, call in enumerate(subagent_calls, 1):
  148. print(f" {i}. [{call['mode']}] {call['task']}...")
  149. print("=" * 60)
  150. print()
  151. # 7. 保存结果
  152. output_file = output_dir / "subagent_test_result.txt"
  153. with open(output_file, 'w', encoding='utf-8') as f:
  154. f.write("=" * 60 + "\n")
  155. f.write("Agent 响应\n")
  156. f.write("=" * 60 + "\n\n")
  157. f.write(final_response)
  158. f.write("\n\n" + "=" * 60 + "\n")
  159. f.write("Subagent 调用统计\n")
  160. f.write("=" * 60 + "\n\n")
  161. f.write(f"delegate 模式: {delegate_count} 次\n")
  162. f.write(f"explore 模式: {explore_count} 次\n")
  163. f.write(f"evaluate 模式: {evaluate_count} 次\n")
  164. f.write(f"总计: {len(subagent_calls)} 次\n\n")
  165. for i, call in enumerate(subagent_calls, 1):
  166. f.write(f"{i}. [{call['mode']}] {call['task']}...\n")
  167. print(f"✓ 结果已保存到: {output_file}")
  168. print()
  169. # 8. 可视化提示
  170. print("=" * 60)
  171. print("Trace 信息:")
  172. print("=" * 60)
  173. print(f"Trace ID: {current_trace_id}")
  174. print(f"Trace 目录: {trace_dir}")
  175. print()
  176. print("查看 trace 文件:")
  177. print(f" ls -la {trace_dir}")
  178. print()
  179. print("或启动 API Server 可视化:")
  180. print(" python3 api_server.py")
  181. print(" 访问: http://localhost:8000/api/traces")
  182. print("=" * 60)
  183. if __name__ == "__main__":
  184. asyncio.run(main())