run.py 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. """
  2. 集成测试 3 - 内容生成任务
  3. 测试场景:真实的内容创作任务,完全不提示工具和步骤
  4. 目标:验证 Agent 在真实使用场景中的自主能力
  5. 任务类型:内容生成(咖啡店品牌文案)
  6. - 不提示使用任何工具
  7. - 不提示任何步骤
  8. - 只给任务目标和要求
  9. - 模拟真实用户使用场景
  10. """
  11. import os
  12. import sys
  13. import asyncio
  14. from pathlib import Path
  15. # 添加项目根目录到 Python 路径
  16. sys.path.insert(0, str(Path(__file__).parent.parent.parent))
  17. from dotenv import load_dotenv
  18. load_dotenv()
  19. from agent.llm.prompts import SimplePrompt
  20. from agent.core.runner import AgentRunner
  21. from agent.execution import FileSystemTraceStore, Trace, Message
  22. from agent.llm import create_openrouter_llm_call
  23. async def main():
  24. # 路径配置
  25. base_dir = Path(__file__).parent
  26. project_root = base_dir.parent.parent
  27. prompt_path = base_dir / "task.prompt"
  28. output_dir = base_dir / "output"
  29. print("=" * 80)
  30. print("集成测试 3 - 内容生成任务:咖啡店品牌文案")
  31. print("=" * 80)
  32. print()
  33. # 1. 加载 prompt
  34. print("1. 加载任务...")
  35. prompt = SimplePrompt(prompt_path)
  36. system_prompt = prompt._messages.get("system", "")
  37. user_prompt = prompt._messages.get("user", "")
  38. print(f" ✓ 任务类型: 内容生成")
  39. print(f" ✓ 无工具提示,无步骤提示")
  40. print()
  41. # 2. 创建 Agent Runner
  42. print("2. 创建 Agent Runner...")
  43. print(f" - 模型: Claude Sonnet 4.5")
  44. print()
  45. runner = AgentRunner(
  46. trace_store=FileSystemTraceStore(base_path=".trace"),
  47. llm_call=create_openrouter_llm_call(model="anthropic/claude-sonnet-4.5"),
  48. skills_dir=str(project_root / "agent" / "skills"),
  49. debug=False
  50. )
  51. # 3. 运行 Agent
  52. print("3. 启动 Agent...")
  53. print("=" * 80)
  54. print()
  55. current_trace_id = None
  56. goal_used = False
  57. subagent_used = False
  58. evaluate_used = False
  59. delegate_used = False
  60. iteration_count = 0
  61. tool_calls_count = {}
  62. async for item in runner.run(
  63. task=user_prompt,
  64. system_prompt=system_prompt,
  65. model="anthropic/claude-sonnet-4.5",
  66. temperature=0.7,
  67. max_iterations=30,
  68. ):
  69. # 处理 Trace 对象
  70. if isinstance(item, Trace):
  71. current_trace_id = item.trace_id
  72. if item.status == "running":
  73. print(f"[Trace] 开始: {item.trace_id[:8]}...")
  74. elif item.status == "completed":
  75. print()
  76. print("=" * 80)
  77. print(f"[Trace] 完成")
  78. print(f" - 总消息数: {item.total_messages}")
  79. print(f" - 总 Token 数: {item.total_tokens}")
  80. print(f" - 总成本: ${item.total_cost:.4f}")
  81. print("=" * 80)
  82. elif item.status == "failed":
  83. print()
  84. print(f"[Trace] 失败: {item.error}")
  85. # 处理 Message 对象
  86. elif isinstance(item, Message):
  87. if item.role == "assistant":
  88. iteration_count += 1
  89. content = item.content
  90. if isinstance(content, dict):
  91. text = content.get("text", "")
  92. tool_calls = content.get("tool_calls")
  93. # 显示 Agent 的思考
  94. if text and not tool_calls:
  95. print(f"\n[{iteration_count}] Agent 回复:")
  96. print(f" {text[:200]}{'...' if len(text) > 200 else ''}")
  97. elif text:
  98. print(f"\n[{iteration_count}] Agent 思考:")
  99. print(f" {text[:150]}{'...' if len(text) > 150 else ''}")
  100. # 显示工具调用
  101. if tool_calls:
  102. for tc in tool_calls:
  103. tool_name = tc.get("function", {}).get("name", "unknown")
  104. args = tc.get("function", {}).get("arguments", {})
  105. # 如果 args 是字符串,尝试解析为 JSON
  106. if isinstance(args, str):
  107. import json
  108. try:
  109. args = json.loads(args)
  110. except:
  111. args = {}
  112. # 统计工具使用
  113. tool_calls_count[tool_name] = tool_calls_count.get(tool_name, 0) + 1
  114. # 检测关键工具使用
  115. if tool_name == "goal":
  116. goal_used = True
  117. if isinstance(args, dict):
  118. if args.get("add"):
  119. print(f" → goal(add): {args['add'][:80]}...")
  120. elif args.get("done"):
  121. print(f" → goal(done): {args['done'][:80]}...")
  122. elif args.get("focus"):
  123. print(f" → goal(focus): {args['focus']}")
  124. else:
  125. print(f" → goal(...)")
  126. elif tool_name == "subagent":
  127. subagent_used = True
  128. if isinstance(args, dict):
  129. mode = args.get("mode", "unknown")
  130. if mode == "evaluate":
  131. evaluate_used = True
  132. target = args.get("target_goal_id", "?")
  133. print(f" → subagent(evaluate): 评估目标 {target}")
  134. elif mode == "delegate":
  135. delegate_used = True
  136. task = args.get("task", "")
  137. print(f" → subagent(delegate): {task[:60]}...")
  138. else:
  139. print(f" → subagent({mode})")
  140. else:
  141. print(f" → subagent(...)")
  142. else:
  143. # 其他工具简化显示
  144. if tool_name in ["read_file", "write_file", "edit_file"]:
  145. if isinstance(args, dict):
  146. file_path = args.get("file_path", "")
  147. if file_path:
  148. file_name = Path(file_path).name
  149. print(f" → {tool_name}: {file_name}")
  150. else:
  151. print(f" → {tool_name}")
  152. else:
  153. print(f" → {tool_name}")
  154. elif tool_name == "bash_command":
  155. if isinstance(args, dict):
  156. cmd = args.get("command", "")
  157. print(f" → bash: {cmd[:60]}...")
  158. else:
  159. print(f" → bash")
  160. else:
  161. print(f" → {tool_name}")
  162. # 4. 测试结果总结
  163. print()
  164. print("=" * 80)
  165. print("测试结果总结")
  166. print("=" * 80)
  167. print()
  168. print("功能使用情况:")
  169. print(f" {'✓' if goal_used else '✗'} Goal 工具: {'已使用' if goal_used else '未使用'}")
  170. print(f" {'✓' if subagent_used else '✗'} SubAgent 工具: {'已使用' if subagent_used else '未使用'}")
  171. if subagent_used:
  172. print(f" - Evaluate 模式: {'已使用' if evaluate_used else '未使用'}")
  173. print(f" - Delegate 模式: {'已使用' if delegate_used else '未使用'}")
  174. print()
  175. print("工具调用统计:")
  176. for tool_name, count in sorted(tool_calls_count.items()):
  177. print(f" - {tool_name}: {count} 次")
  178. print()
  179. print(f"总迭代次数: {iteration_count}")
  180. print()
  181. # 5. 验证结果
  182. print("验证生成的文件:")
  183. # 检查输出目录
  184. if output_dir.exists():
  185. files = list(output_dir.glob("*.md")) + list(output_dir.glob("*.txt"))
  186. if files:
  187. for file in files:
  188. size = file.stat().st_size
  189. print(f" ✓ {file.name} ({size} bytes)")
  190. else:
  191. print(f" ✗ 输出目录为空")
  192. else:
  193. print(f" ✗ 输出目录不存在")
  194. print()
  195. print("=" * 80)
  196. print("集成测试 3 完成")
  197. print("=" * 80)
  198. if __name__ == "__main__":
  199. asyncio.run(main())