run.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369
  1. #!/usr/bin/env python3
  2. """
  3. 集成测试 6: 信号驱动机制测试
  4. 测试目标:
  5. - 验证信号的发送和接收机制
  6. - 验证 wait=True 模式(同步等待信号)
  7. - 验证后台任务执行
  8. - 验证信号轮询机制
  9. - 验证错误信号传播
  10. """
  11. import asyncio
  12. import sys
  13. import os
  14. from pathlib import Path
  15. # 添加项目根目录到 Python 路径
  16. project_root = Path(__file__).parent.parent.parent
  17. sys.path.insert(0, str(project_root))
  18. from dotenv import load_dotenv
  19. load_dotenv()
  20. from agent.llm.prompts import SimplePrompt
  21. from agent.core.runner import AgentRunner
  22. from agent.execution import FileSystemTraceStore, Trace, Message
  23. from agent.llm import create_openrouter_llm_call
  24. async def main():
  25. """运行测试"""
  26. # 路径配置
  27. base_dir = Path(__file__).parent
  28. prompt_path = base_dir / "task.prompt"
  29. output_dir = base_dir / "output"
  30. print("=" * 80)
  31. print("集成测试 6: 信号驱动机制测试")
  32. print("=" * 80)
  33. print()
  34. # 1. 加载 prompt
  35. print("1. 加载任务...")
  36. prompt = SimplePrompt(prompt_path)
  37. system_prompt = prompt._messages.get("system", "")
  38. user_prompt = prompt._messages.get("user", "")
  39. print(f" ✓ 任务类型: 数据验证模块实现")
  40. print(f" ✓ 测试重点: 信号机制")
  41. print(f" ✓ 监控内容: 信号发送、接收、轮询")
  42. print()
  43. # 2. 创建 Agent Runner
  44. print("2. 创建 Agent Runner...")
  45. print(f" - 模型: Claude Sonnet 4.5")
  46. print(f" - 信号机制: 已启用")
  47. print()
  48. runner = AgentRunner(
  49. trace_store=FileSystemTraceStore(base_path=".trace"),
  50. llm_call=create_openrouter_llm_call(model="anthropic/claude-sonnet-4.5"),
  51. skills_dir=str(project_root / "agent" / "skills"),
  52. debug=False
  53. )
  54. # 验证 SignalBus 已创建
  55. if hasattr(runner, 'signal_bus'):
  56. print(" ✅ SignalBus 已创建")
  57. else:
  58. print(" ❌ SignalBus 未创建")
  59. return
  60. # 3. 运行 Agent
  61. print()
  62. print("3. 启动 Agent...")
  63. print("=" * 80)
  64. print()
  65. # 创建输出目录
  66. output_dir.mkdir(exist_ok=True)
  67. # 监控变量
  68. current_trace_id = None
  69. goal_used = False
  70. subagent_used = False
  71. evaluate_used = False
  72. iteration_count = 0
  73. tool_calls_count = {}
  74. evaluation_count = 0
  75. evaluation_results = []
  76. # 信号监控
  77. signals_emitted = []
  78. signals_received = []
  79. signal_types = set()
  80. # 钩子:监控信号发送
  81. original_emit = runner.signal_bus.emit
  82. def monitored_emit(signal):
  83. signals_emitted.append({
  84. "type": signal.type,
  85. "trace_id": signal.trace_id,
  86. "data_keys": list(signal.data.keys())
  87. })
  88. signal_types.add(signal.type)
  89. print(f" [信号发送] {signal.type} (trace: {signal.trace_id[:8]}...)")
  90. return original_emit(signal)
  91. runner.signal_bus.emit = monitored_emit
  92. # 钩子:监控信号接收
  93. original_check_buffer = runner.signal_bus.check_buffer
  94. def monitored_check_buffer(trace_id):
  95. signals = original_check_buffer(trace_id)
  96. if signals:
  97. for signal in signals:
  98. signals_received.append({
  99. "type": signal.type,
  100. "trace_id": signal.trace_id
  101. })
  102. print(f" [信号接收] {signal.type} (trace: {signal.trace_id[:8]}...)")
  103. return signals
  104. runner.signal_bus.check_buffer = monitored_check_buffer
  105. async for item in runner.run(
  106. task=user_prompt,
  107. system_prompt=system_prompt,
  108. model="anthropic/claude-sonnet-4.5",
  109. temperature=0.5,
  110. max_iterations=30,
  111. ):
  112. # 处理 Trace 对象
  113. if isinstance(item, Trace):
  114. current_trace_id = item.trace_id
  115. if item.status == "running":
  116. print(f"[Trace] 开始: {item.trace_id[:8]}...")
  117. elif item.status == "completed":
  118. print()
  119. print("=" * 80)
  120. print(f"[Trace] 完成")
  121. print(f" - 总消息数: {item.total_messages}")
  122. print(f" - 总 Token 数: {item.total_tokens}")
  123. print(f" - 总成本: ${item.total_cost:.4f}")
  124. print("=" * 80)
  125. elif item.status == "failed":
  126. print()
  127. print(f"[Trace] 失败: {item.error_message}")
  128. # 处理 Message 对象
  129. elif isinstance(item, Message):
  130. if item.role == "assistant":
  131. iteration_count += 1
  132. content = item.content
  133. if isinstance(content, dict):
  134. text = content.get("text", "")
  135. tool_calls = content.get("tool_calls")
  136. # 显示 Agent 的思考
  137. if text and not tool_calls:
  138. print(f"\n[{iteration_count}] Agent 回复:")
  139. print(f" {text[:200]}{'...' if len(text) > 200 else ''}")
  140. elif text:
  141. print(f"\n[{iteration_count}] Agent 思考:")
  142. print(f" {text[:150]}{'...' if len(text) > 150 else ''}")
  143. # 显示工具调用
  144. if tool_calls:
  145. for tc in tool_calls:
  146. tool_name = tc.get("function", {}).get("name", "unknown")
  147. args = tc.get("function", {}).get("arguments", {})
  148. # 如果 args 是字符串,尝试解析为 JSON
  149. if isinstance(args, str):
  150. import json
  151. try:
  152. args = json.loads(args)
  153. except:
  154. args = {}
  155. # 统计工具使用
  156. tool_calls_count[tool_name] = tool_calls_count.get(tool_name, 0) + 1
  157. # 检测关键工具使用
  158. if tool_name == "goal":
  159. goal_used = True
  160. if isinstance(args, dict):
  161. if args.get("add"):
  162. print(f" → goal(add): {args['add'][:80]}...")
  163. elif args.get("done"):
  164. print(f" → goal(done): {args['done'][:80]}...")
  165. elif args.get("focus"):
  166. print(f" → goal(focus): {args['focus']}")
  167. elif tool_name == "subagent":
  168. subagent_used = True
  169. if isinstance(args, dict):
  170. mode = args.get("mode", "unknown")
  171. wait = args.get("wait", True)
  172. if mode == "evaluate":
  173. evaluate_used = True
  174. evaluation_count += 1
  175. target = args.get("target_goal_id", "?")
  176. wait_str = f"wait={wait}"
  177. print(f" → subagent(evaluate, {wait_str}): 评估目标 {target} [评估 #{evaluation_count}]")
  178. else:
  179. # 其他工具简化显示
  180. if tool_name in ["read_file", "write_file", "edit_file"]:
  181. if isinstance(args, dict):
  182. file_path = args.get("file_path", "")
  183. if file_path:
  184. file_name = Path(file_path).name
  185. print(f" → {tool_name}: {file_name}")
  186. elif item.role == "tool":
  187. # 检查是否是评估结果
  188. content = item.content
  189. if isinstance(content, str):
  190. import json
  191. try:
  192. result = json.loads(content)
  193. if isinstance(result, dict) and "passed" in result:
  194. passed = result.get("passed", False)
  195. reason = result.get("reason", "")[:100]
  196. evaluation_results.append({
  197. "passed": passed,
  198. "reason": reason
  199. })
  200. status = "✅ 通过" if passed else "❌ 不通过"
  201. print(f" [评估结果] {status}")
  202. except:
  203. pass
  204. # 4. 测试结果总结
  205. print()
  206. print("=" * 80)
  207. print("测试结果总结")
  208. print("=" * 80)
  209. print()
  210. print("功能使用情况:")
  211. print(f" - goal 工具: {'✅ 使用' if goal_used else '❌ 未使用'}")
  212. print(f" - subagent 工具: {'✅ 使用' if subagent_used else '❌ 未使用'}")
  213. print(f" - evaluate 模式: {'✅ 使用' if evaluate_used else '❌ 未使用'} ({evaluation_count} 次)")
  214. print()
  215. print("工具调用统计:")
  216. for tool_name, count in sorted(tool_calls_count.items(), key=lambda x: x[1], reverse=True):
  217. print(f" - {tool_name}: {count} 次")
  218. print()
  219. # 信号机制测试结果
  220. print("=" * 80)
  221. print("信号机制测试结果")
  222. print("=" * 80)
  223. print()
  224. print(f"信号统计:")
  225. print(f" - 发送信号数: {len(signals_emitted)}")
  226. print(f" - 接收信号数: {len(signals_received)}")
  227. print(f" - 信号类型: {', '.join(sorted(signal_types))}")
  228. print()
  229. if signals_emitted:
  230. print("发送的信号:")
  231. for i, sig in enumerate(signals_emitted, 1):
  232. print(f" {i}. {sig['type']} (trace: {sig['trace_id'][:8]}...)")
  233. print()
  234. if signals_received:
  235. print("接收的信号:")
  236. for i, sig in enumerate(signals_received, 1):
  237. print(f" {i}. {sig['type']} (trace: {sig['trace_id'][:8]}...)")
  238. print()
  239. # 检查输出文件
  240. print("输出文件:")
  241. validator_file = output_dir / "validator.py"
  242. report_file = output_dir / "REPORT.md"
  243. if validator_file.exists():
  244. size = validator_file.stat().st_size
  245. print(f" ✅ validator.py ({size} bytes)")
  246. else:
  247. print(f" ❌ validator.py (未生成)")
  248. if report_file.exists():
  249. size = report_file.stat().st_size
  250. print(f" ✅ REPORT.md ({size} bytes)")
  251. else:
  252. print(f" ❌ REPORT.md (未生成)")
  253. print()
  254. # 验证测试目标
  255. print("=" * 80)
  256. print("测试目标验证")
  257. print("=" * 80)
  258. print()
  259. success = True
  260. # 1. 验证 SignalBus 创建
  261. if hasattr(runner, 'signal_bus'):
  262. print(f" ✅ SignalBus 已创建")
  263. else:
  264. print(f" ❌ SignalBus 未创建")
  265. success = False
  266. # 2. 验证信号发送
  267. if len(signals_emitted) > 0:
  268. print(f" ✅ 信号已发送 ({len(signals_emitted)} 个)")
  269. else:
  270. print(f" ❌ 未发送信号")
  271. success = False
  272. # 3. 验证信号接收
  273. if len(signals_received) > 0:
  274. print(f" ✅ 信号已接收 ({len(signals_received)} 个)")
  275. else:
  276. print(f" ❌ 未接收信号")
  277. success = False
  278. # 4. 验证信号类型
  279. expected_types = {"subagent.start", "subagent.complete"}
  280. if expected_types.issubset(signal_types):
  281. print(f" ✅ 包含预期的信号类型")
  282. else:
  283. missing = expected_types - signal_types
  284. print(f" ⚠️ 缺少信号类型: {', '.join(missing)}")
  285. # 5. 验证 subagent 使用
  286. if evaluate_used:
  287. print(f" ✅ 使用了 subagent(evaluate) ({evaluation_count} 次)")
  288. else:
  289. print(f" ❌ 未使用 subagent(evaluate)")
  290. success = False
  291. # 6. 验证评估结果
  292. if evaluation_results:
  293. print(f" ✅ 获得了评估结果 ({len(evaluation_results)} 次)")
  294. else:
  295. print(f" ❌ 未获得评估结果")
  296. # 7. 验证文件生成
  297. if validator_file.exists():
  298. print(f" ✅ 生成了代码文件")
  299. else:
  300. print(f" ❌ 未生成代码文件")
  301. success = False
  302. print()
  303. if success:
  304. print("🎉 测试成功!信号驱动机制工作正常。")
  305. else:
  306. print("⚠️ 测试未完全通过,请检查实现。")
  307. print()
  308. if current_trace_id:
  309. print(f"详细日志: .trace/{current_trace_id}/")
  310. print("=" * 80)
  311. if __name__ == "__main__":
  312. asyncio.run(main())