run.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. """
  2. Subagent 工具真实测试
  3. 使用真实 LLM 测试 subagent 工具的三种模式:
  4. 1. delegate - 委托子任务
  5. 2. explore - 并行探索方案
  6. 3. evaluate - 评估结果
  7. """
  8. import os
  9. import sys
  10. import asyncio
  11. from pathlib import Path
  12. # 添加项目根目录到 Python 路径
  13. sys.path.insert(0, str(Path(__file__).parent.parent.parent))
  14. from dotenv import load_dotenv
  15. load_dotenv()
  16. from agent.llm.prompts import SimplePrompt
  17. from agent.core.runner import AgentRunner, RunConfig
  18. from agent.trace import (
  19. FileSystemTraceStore,
  20. Trace,
  21. Message,
  22. )
  23. from agent.llm import create_openrouter_llm_call
  24. async def main():
  25. # 路径配置
  26. base_dir = Path(__file__).parent
  27. project_root = base_dir.parent.parent
  28. prompt_path = base_dir / "test.prompt"
  29. output_dir = base_dir / "output"
  30. output_dir.mkdir(exist_ok=True)
  31. print("=" * 60)
  32. print("Subagent 工具测试 (真实 LLM)")
  33. print("=" * 60)
  34. print()
  35. # 1. 加载 prompt
  36. print("1. 加载 prompt...")
  37. prompt = SimplePrompt(prompt_path)
  38. # 提取配置
  39. system_prompt = prompt._messages.get("system", "")
  40. user_task = prompt._messages.get("user", "")
  41. model_name = prompt.config.get('model', 'gemini-2.5-flash')
  42. temperature = float(prompt.config.get('temperature', 0.3))
  43. print(f" - 任务: {user_task[:80]}...")
  44. print(f" - 模型: {model_name}")
  45. # 2. 构建消息
  46. print("2. 构建任务消息...")
  47. messages = prompt.build_messages()
  48. # 3. 创建 Agent Runner
  49. print("3. 创建 Agent Runner...")
  50. print(f" - 模型: {model_name} (via OpenRouter)")
  51. # Trace 输出到测试目录
  52. trace_dir = base_dir / ".trace"
  53. trace_dir.mkdir(exist_ok=True)
  54. print(f" - Trace 目录: {trace_dir}")
  55. runner = AgentRunner(
  56. trace_store=FileSystemTraceStore(base_path=str(trace_dir)),
  57. llm_call=create_openrouter_llm_call(model=f"google/{model_name}"),
  58. skills_dir=None,
  59. debug=True
  60. )
  61. # 4. Agent 模式执行
  62. print(f"4. 启动 Agent 模式...")
  63. print()
  64. final_response = ""
  65. current_trace_id = None
  66. subagent_calls = []
  67. async for item in runner.run(
  68. messages=messages,
  69. config=RunConfig(
  70. system_prompt=system_prompt,
  71. model=f"google/{model_name}",
  72. temperature=temperature,
  73. max_iterations=30,
  74. name=user_task[:50],
  75. ),
  76. ):
  77. # 处理 Trace 对象
  78. if isinstance(item, Trace):
  79. current_trace_id = item.trace_id
  80. if item.status == "running":
  81. print(f"[Trace] 开始: {item.trace_id[:8]}")
  82. elif item.status == "completed":
  83. print(f"[Trace] 完成")
  84. print(f" - Total messages: {item.total_messages}")
  85. print(f" - Total tokens: {item.total_tokens}")
  86. print(f" - Total cost: ${item.total_cost:.4f}")
  87. elif item.status == "failed":
  88. print(f"[Trace] 失败: {item.error_message}")
  89. # 处理 Message 对象
  90. elif isinstance(item, Message):
  91. if item.role == "assistant":
  92. content = item.content
  93. if isinstance(content, dict):
  94. text = content.get("text", "")
  95. tool_calls = content.get("tool_calls")
  96. if text and not tool_calls:
  97. final_response = text
  98. print(f"[Response] Agent 完成")
  99. elif text:
  100. print(f"[Assistant] {text[:100]}...")
  101. if tool_calls:
  102. for tc in tool_calls:
  103. tool_name = tc.get("function", {}).get("name", "unknown")
  104. print(f"[Tool Call] {tool_name}")
  105. # 记录 subagent 调用
  106. if tool_name == "subagent":
  107. import json
  108. args = tc.get("function", {}).get("arguments", {})
  109. # arguments 可能是字符串,需要解析
  110. if isinstance(args, str):
  111. try:
  112. args = json.loads(args)
  113. except:
  114. args = {}
  115. mode = args.get("mode", "unknown")
  116. subagent_calls.append({
  117. "mode": mode,
  118. "task": args.get("task", args.get("background", ""))[:50]
  119. })
  120. print(f" → mode: {mode}")
  121. elif item.role == "tool":
  122. content = item.content
  123. if isinstance(content, dict):
  124. tool_name = content.get("tool_name", "unknown")
  125. print(f"[Tool Result] {tool_name}")
  126. if item.description:
  127. desc = item.description[:80] if len(item.description) > 80 else item.description
  128. print(f" {desc}...")
  129. # 5. 输出结果
  130. print()
  131. print("=" * 60)
  132. print("Agent 响应:")
  133. print("=" * 60)
  134. print(final_response)
  135. print("=" * 60)
  136. print()
  137. # 6. 统计 subagent 调用
  138. print("=" * 60)
  139. print("Subagent 调用统计:")
  140. print("=" * 60)
  141. delegate_count = sum(1 for call in subagent_calls if call["mode"] == "delegate")
  142. explore_count = sum(1 for call in subagent_calls if call["mode"] == "explore")
  143. evaluate_count = sum(1 for call in subagent_calls if call["mode"] == "evaluate")
  144. print(f" - delegate 模式: {delegate_count} 次")
  145. print(f" - explore 模式: {explore_count} 次")
  146. print(f" - evaluate 模式: {evaluate_count} 次")
  147. print(f" - 总计: {len(subagent_calls)} 次")
  148. print()
  149. for i, call in enumerate(subagent_calls, 1):
  150. print(f" {i}. [{call['mode']}] {call['task']}...")
  151. print("=" * 60)
  152. print()
  153. # 7. 保存结果
  154. output_file = output_dir / "subagent_test_result.txt"
  155. with open(output_file, 'w', encoding='utf-8') as f:
  156. f.write("=" * 60 + "\n")
  157. f.write("Agent 响应\n")
  158. f.write("=" * 60 + "\n\n")
  159. f.write(final_response)
  160. f.write("\n\n" + "=" * 60 + "\n")
  161. f.write("Subagent 调用统计\n")
  162. f.write("=" * 60 + "\n\n")
  163. f.write(f"delegate 模式: {delegate_count} 次\n")
  164. f.write(f"explore 模式: {explore_count} 次\n")
  165. f.write(f"evaluate 模式: {evaluate_count} 次\n")
  166. f.write(f"总计: {len(subagent_calls)} 次\n\n")
  167. for i, call in enumerate(subagent_calls, 1):
  168. f.write(f"{i}. [{call['mode']}] {call['task']}...\n")
  169. print(f"✓ 结果已保存到: {output_file}")
  170. print()
  171. # 8. 可视化提示
  172. print("=" * 60)
  173. print("Trace 信息:")
  174. print("=" * 60)
  175. print(f"Trace ID: {current_trace_id}")
  176. print(f"Trace 目录: {trace_dir}")
  177. print()
  178. print("查看 trace 文件:")
  179. print(f" ls -la {trace_dir}")
  180. print()
  181. print("或启动 API Server 可视化:")
  182. print(" python3 api_server.py")
  183. print(" 访问: http://localhost:8000/api/traces")
  184. print("=" * 60)
  185. if __name__ == "__main__":
  186. asyncio.run(main())