subagent.py 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809
  1. """
  2. Sub-Agent 工具 - agent / evaluate
  3. agent: 创建 Agent 执行任务(单任务 delegate 或多任务并行 explore)
  4. evaluate: 评估目标执行结果是否满足要求
  5. """
  6. import asyncio
  7. from datetime import datetime
  8. from typing import Any, Dict, List, Optional, Union
  9. from agent.tools import tool
  10. from agent.trace.models import Trace, Messages
  11. from agent.trace.trace_id import generate_sub_trace_id
  12. from agent.trace.goal_models import GoalTree
  13. from agent.trace.websocket import broadcast_sub_trace_started, broadcast_sub_trace_completed
  14. # ===== prompts =====
  15. # ===== 评估任务 =====
  16. EVALUATE_PROMPT_TEMPLATE = """# 评估任务
  17. 请评估以下任务的执行结果是否满足要求。
  18. ## 目标描述
  19. {goal_description}
  20. ## 执行结果
  21. {result_text}
  22. ## 输出格式
  23. ## 评估结论
  24. [通过/不通过]
  25. ## 评估理由
  26. [详细说明通过或不通过原因]
  27. ## 修改建议(如果不通过)
  28. 1. [建议1]
  29. 2. [建议2]
  30. """
  31. # ===== 结果格式化 =====
  32. DELEGATE_RESULT_HEADER = "## 委托任务完成\n"
  33. DELEGATE_SAVED_KNOWLEDGE_HEADER = "**保存的知识** ({count} 条):"
  34. DELEGATE_STATS_HEADER = "**执行统计**:"
  35. EXPLORE_RESULT_HEADER = "## 探索结果\n"
  36. EXPLORE_BRANCH_TEMPLATE = "### 方案 {branch_name}: {task}"
  37. EXPLORE_STATUS_SUCCESS = "**状态**: ✓ 完成"
  38. EXPLORE_STATUS_FAILED = "**状态**: ✗ 失败"
  39. EXPLORE_STATUS_ERROR = "**状态**: ✗ 异常"
  40. EXPLORE_SUMMARY_HEADER = "## 总结"
  41. def build_evaluate_prompt(goal_description: str, result_text: str) -> str:
  42. return EVALUATE_PROMPT_TEMPLATE.format(
  43. goal_description=goal_description,
  44. result_text=result_text or "(无执行结果)",
  45. )
  46. def _make_run_config(**kwargs):
  47. """延迟导入 RunConfig 以避免循环导入"""
  48. from agent.core.runner import RunConfig
  49. return RunConfig(**kwargs)
  50. # ===== 辅助函数 =====
  51. async def _update_collaborator(
  52. store, trace_id: str,
  53. name: str, sub_trace_id: str,
  54. status: str, summary: str = "",
  55. ) -> None:
  56. """
  57. 更新 trace.context["collaborators"] 中的协作者信息。
  58. 如果同名协作者已存在则更新,否则追加。
  59. """
  60. trace = await store.get_trace(trace_id)
  61. if not trace:
  62. return
  63. collaborators = trace.context.get("collaborators", [])
  64. # 查找已有记录
  65. existing = None
  66. for c in collaborators:
  67. if c.get("trace_id") == sub_trace_id:
  68. existing = c
  69. break
  70. if existing:
  71. existing["status"] = status
  72. if summary:
  73. existing["summary"] = summary
  74. else:
  75. collaborators.append({
  76. "name": name,
  77. "type": "agent",
  78. "trace_id": sub_trace_id,
  79. "status": status,
  80. "summary": summary,
  81. })
  82. trace.context["collaborators"] = collaborators
  83. await store.update_trace(trace_id, context=trace.context)
  84. async def _update_goal_start(
  85. store, trace_id: str, goal_id: str, mode: str, sub_trace_ids: List[str]
  86. ) -> None:
  87. """标记 Goal 开始执行"""
  88. if not goal_id:
  89. return
  90. await store.update_goal(
  91. trace_id, goal_id,
  92. type="agent_call",
  93. agent_call_mode=mode,
  94. status="in_progress",
  95. sub_trace_ids=sub_trace_ids
  96. )
  97. async def _update_goal_complete(
  98. store, trace_id: str, goal_id: str,
  99. status: str, summary: str, sub_trace_ids: List[str]
  100. ) -> None:
  101. """标记 Goal 完成"""
  102. if not goal_id:
  103. return
  104. await store.update_goal(
  105. trace_id, goal_id,
  106. status=status,
  107. summary=summary,
  108. sub_trace_ids=sub_trace_ids
  109. )
  110. def _aggregate_stats(results: List[Dict[str, Any]]) -> Dict[str, Any]:
  111. """聚合多个结果的统计信息"""
  112. total_messages = 0
  113. total_tokens = 0
  114. total_cost = 0.0
  115. for result in results:
  116. if isinstance(result, dict) and "stats" in result:
  117. stats = result["stats"]
  118. total_messages += stats.get("total_messages", 0)
  119. total_tokens += stats.get("total_tokens", 0)
  120. total_cost += stats.get("total_cost", 0.0)
  121. return {
  122. "total_messages": total_messages,
  123. "total_tokens": total_tokens,
  124. "total_cost": total_cost
  125. }
  126. def _get_allowed_tools(single: bool, context: dict) -> Optional[List[str]]:
  127. """获取允许工具列表。single=True: 全部(去掉 agent/evaluate); single=False: 只读"""
  128. if not single:
  129. return ["read_file", "grep_content", "glob_files", "goal"]
  130. # single (delegate): 获取所有工具,排除 agent 和 evaluate
  131. runner = context.get("runner")
  132. if runner and hasattr(runner, "tools") and hasattr(runner.tools, "registry"):
  133. all_tools = list(runner.tools.registry.keys())
  134. return [t for t in all_tools if t not in ("agent", "evaluate")]
  135. return None
  136. def _format_single_result(result: Dict[str, Any], sub_trace_id: str, continued: bool) -> Dict[str, Any]:
  137. """格式化单任务(delegate)结果"""
  138. lines = [DELEGATE_RESULT_HEADER]
  139. summary = result.get("summary", "")
  140. if summary:
  141. lines.append(summary)
  142. lines.append("")
  143. # 添加保存的知识 ID
  144. saved_knowledge_ids = result.get("saved_knowledge_ids", [])
  145. if saved_knowledge_ids:
  146. lines.append("---\n")
  147. lines.append(DELEGATE_SAVED_KNOWLEDGE_HEADER.format(count=len(saved_knowledge_ids)))
  148. for kid in saved_knowledge_ids:
  149. lines.append(f"- {kid}")
  150. lines.append("")
  151. lines.append("---\n")
  152. lines.append(DELEGATE_STATS_HEADER)
  153. stats = result.get("stats", {})
  154. if stats:
  155. lines.append(f"- 消息数: {stats.get('total_messages', 0)}")
  156. lines.append(f"- Tokens: {stats.get('total_tokens', 0)}")
  157. lines.append(f"- 成本: ${stats.get('total_cost', 0.0):.4f}")
  158. formatted_summary = "\n".join(lines)
  159. return {
  160. "mode": "delegate",
  161. "sub_trace_id": sub_trace_id,
  162. "continue_from": continued,
  163. "saved_knowledge_ids": saved_knowledge_ids, # 传递给父 agent
  164. **result,
  165. "summary": formatted_summary,
  166. }
  167. def _format_multi_result(
  168. tasks: List[str], results: List[Dict[str, Any]], sub_trace_ids: List[Dict]
  169. ) -> Dict[str, Any]:
  170. """格式化多任务(explore)聚合结果"""
  171. lines = [EXPLORE_RESULT_HEADER]
  172. successful = 0
  173. failed = 0
  174. total_tokens = 0
  175. total_cost = 0.0
  176. for i, (task_item, result) in enumerate(zip(tasks, results)):
  177. branch_name = chr(ord('A') + i)
  178. lines.append(EXPLORE_BRANCH_TEMPLATE.format(branch_name=branch_name, task=task_item))
  179. if isinstance(result, dict):
  180. status = result.get("status", "unknown")
  181. if status == "completed":
  182. lines.append(EXPLORE_STATUS_SUCCESS)
  183. successful += 1
  184. else:
  185. lines.append(EXPLORE_STATUS_FAILED)
  186. failed += 1
  187. summary = result.get("summary", "")
  188. if summary:
  189. lines.append(f"**摘要**: {summary[:200]}...")
  190. stats = result.get("stats", {})
  191. if stats:
  192. messages = stats.get("total_messages", 0)
  193. tokens = stats.get("total_tokens", 0)
  194. cost = stats.get("total_cost", 0.0)
  195. lines.append(f"**统计**: {messages} messages, {tokens} tokens, ${cost:.4f}")
  196. total_tokens += tokens
  197. total_cost += cost
  198. else:
  199. lines.append(EXPLORE_STATUS_ERROR)
  200. failed += 1
  201. lines.append("")
  202. lines.append("---\n")
  203. lines.append(EXPLORE_SUMMARY_HEADER)
  204. lines.append(f"- 总分支数: {len(tasks)}")
  205. lines.append(f"- 成功: {successful}")
  206. lines.append(f"- 失败: {failed}")
  207. lines.append(f"- 总 tokens: {total_tokens}")
  208. lines.append(f"- 总成本: ${total_cost:.4f}")
  209. aggregated_summary = "\n".join(lines)
  210. overall_status = "completed" if successful > 0 else "failed"
  211. return {
  212. "mode": "explore",
  213. "status": overall_status,
  214. "summary": aggregated_summary,
  215. "sub_trace_ids": sub_trace_ids,
  216. "tasks": tasks,
  217. "stats": _aggregate_stats(results),
  218. }
  219. async def _get_goal_description(store, trace_id: str, goal_id: str) -> str:
  220. """从 GoalTree 获取目标描述"""
  221. if not goal_id:
  222. return ""
  223. goal_tree = await store.get_goal_tree(trace_id)
  224. if goal_tree:
  225. target_goal = goal_tree.find(goal_id)
  226. if target_goal:
  227. return target_goal.description
  228. return f"Goal {goal_id}"
  229. def _build_evaluate_prompt(goal_description: str, messages: Optional[Messages]) -> str:
  230. """
  231. 构建评估 prompt。
  232. Args:
  233. goal_description: 代码从 GoalTree 注入的目标描述
  234. messages: 模型提供的消息(执行结果+上下文)
  235. """
  236. # 从 messages 提取文本内容
  237. result_text = ""
  238. if messages:
  239. parts = []
  240. for msg in messages:
  241. content = msg.get("content", "")
  242. if isinstance(content, str):
  243. parts.append(content)
  244. elif isinstance(content, list):
  245. # 多模态内容,提取文本部分
  246. for item in content:
  247. if isinstance(item, dict) and item.get("type") == "text":
  248. parts.append(item.get("text", ""))
  249. result_text = "\n".join(parts)
  250. return build_evaluate_prompt(goal_description, result_text)
  251. def _make_event_printer(label: str):
  252. """
  253. 创建子 Agent 执行过程打印函数。
  254. 当父 runner.debug=True 时,传给 run_result(on_event=...),
  255. 实时输出子 Agent 的工具调用和助手消息。
  256. """
  257. prefix = f" [{label}]"
  258. def on_event(item):
  259. from agent.trace.models import Trace, Message
  260. if isinstance(item, Message):
  261. if item.role == "assistant":
  262. content = item.content
  263. if isinstance(content, dict):
  264. text = content.get("text", "")
  265. tool_calls = content.get("tool_calls")
  266. if text:
  267. preview = text[:120] + "..." if len(text) > 120 else text
  268. print(f"{prefix} {preview}")
  269. if tool_calls:
  270. for tc in tool_calls:
  271. name = tc.get("function", {}).get("name", "unknown")
  272. print(f"{prefix} 🛠️ {name}")
  273. elif item.role == "tool":
  274. content = item.content
  275. if isinstance(content, dict):
  276. name = content.get("tool_name", "unknown")
  277. desc = item.description or ""
  278. desc_short = (desc[:60] + "...") if len(desc) > 60 else desc
  279. suffix = f": {desc_short}" if desc_short else ""
  280. print(f"{prefix} ✅ {name}{suffix}")
  281. elif isinstance(item, Trace):
  282. if item.status == "completed":
  283. print(f"{prefix} ✓ 完成")
  284. elif item.status == "failed":
  285. err = (item.error_message or "")[:80]
  286. print(f"{prefix} ✗ 失败: {err}")
  287. return on_event
  288. # ===== 统一内部执行函数 =====
  289. async def _run_agents(
  290. tasks: List[str],
  291. per_agent_msgs: List[Messages],
  292. continue_from: Optional[str],
  293. store, trace_id: str, goal_id: str, runner, context: dict,
  294. agent_type: Optional[str] = None,
  295. skills: Optional[List[str]] = None,
  296. ) -> Dict[str, Any]:
  297. """
  298. 统一 agent 执行逻辑。
  299. single (len(tasks)==1): delegate 模式,全量工具(排除 agent/evaluate)
  300. multi (len(tasks)>1): explore 模式,只读工具,并行执行
  301. """
  302. single = len(tasks) == 1
  303. parent_trace = await store.get_trace(trace_id)
  304. # continue_from: 复用已有 trace(仅 single)
  305. sub_trace_id = None
  306. continued = False
  307. if single and continue_from:
  308. existing = await store.get_trace(continue_from)
  309. if not existing:
  310. return {"status": "failed", "error": f"Continue-from trace not found: {continue_from}"}
  311. sub_trace_id = continue_from
  312. continued = True
  313. goal_tree = await store.get_goal_tree(continue_from)
  314. mission = goal_tree.mission if goal_tree else tasks[0]
  315. sub_trace_ids = [{"trace_id": sub_trace_id, "mission": mission}]
  316. else:
  317. sub_trace_ids = []
  318. # 创建 sub-traces 和执行协程
  319. coros = []
  320. all_sub_trace_ids = list(sub_trace_ids) # copy for continue_from case
  321. for i, (task_item, msgs) in enumerate(zip(tasks, per_agent_msgs)):
  322. if single and continued:
  323. # continue_from 已经设置了 sub_trace_id
  324. pass
  325. else:
  326. resolved_agent_type = agent_type or ("delegate" if single else "explore")
  327. suffix = "delegate" if single else f"explore-{i+1:03d}"
  328. stid = generate_sub_trace_id(trace_id, suffix)
  329. sub_trace = Trace(
  330. trace_id=stid,
  331. mode="agent",
  332. task=task_item,
  333. parent_trace_id=trace_id,
  334. parent_goal_id=goal_id,
  335. agent_type=resolved_agent_type,
  336. uid=parent_trace.uid if parent_trace else None,
  337. model=parent_trace.model if parent_trace else None,
  338. status="running",
  339. context={"created_by_tool": "agent"},
  340. created_at=datetime.now(),
  341. )
  342. await store.create_trace(sub_trace)
  343. await store.update_goal_tree(stid, GoalTree(mission=task_item))
  344. all_sub_trace_ids.append({"trace_id": stid, "mission": task_item})
  345. # 广播 sub_trace_started
  346. await broadcast_sub_trace_started(
  347. trace_id, stid, goal_id or "",
  348. resolved_agent_type, task_item,
  349. )
  350. if single:
  351. sub_trace_id = stid
  352. # 注册为活跃协作者
  353. cur_stid = sub_trace_id if single else all_sub_trace_ids[-1]["trace_id"]
  354. collab_name = task_item[:30] if single and not continued else (
  355. f"delegate-{cur_stid[:8]}" if single else f"explore-{i+1}"
  356. )
  357. await _update_collaborator(
  358. store, trace_id,
  359. name=collab_name, sub_trace_id=cur_stid,
  360. status="running", summary=task_item[:80],
  361. )
  362. # 构建消息
  363. agent_msgs = list(msgs) + [{"role": "user", "content": task_item}]
  364. allowed_tools = _get_allowed_tools(single, context)
  365. debug = getattr(runner, 'debug', False)
  366. agent_label = (agent_type or ("delegate" if single else f"explore-{i+1}"))
  367. on_event = _make_event_printer(agent_label) if debug else None
  368. coro = runner.run_result(
  369. messages=agent_msgs,
  370. config=_make_run_config(
  371. trace_id=cur_stid,
  372. agent_type=agent_type or ("delegate" if single else "explore"),
  373. model=parent_trace.model if parent_trace else "gpt-4o",
  374. uid=parent_trace.uid if parent_trace else None,
  375. tools=allowed_tools,
  376. name=task_item[:50],
  377. skills=skills,
  378. ),
  379. on_event=on_event,
  380. )
  381. coros.append((i, cur_stid, collab_name, coro))
  382. # 更新主 Goal 为 in_progress
  383. await _update_goal_start(
  384. store, trace_id, goal_id,
  385. "delegate" if single else "explore",
  386. all_sub_trace_ids,
  387. )
  388. # 执行
  389. if single:
  390. # 单任务直接执行(带异常处理)
  391. _, stid, collab_name, coro = coros[0]
  392. try:
  393. result = await coro
  394. await broadcast_sub_trace_completed(
  395. trace_id, stid,
  396. result.get("status", "completed"),
  397. result.get("summary", ""),
  398. result.get("stats", {}),
  399. )
  400. await _update_collaborator(
  401. store, trace_id,
  402. name=collab_name, sub_trace_id=stid,
  403. status=result.get("status", "completed"),
  404. summary=result.get("summary", "")[:80],
  405. )
  406. formatted = _format_single_result(result, stid, continued)
  407. await _update_goal_complete(
  408. store, trace_id, goal_id,
  409. result.get("status", "completed"),
  410. formatted["summary"],
  411. all_sub_trace_ids,
  412. )
  413. return formatted
  414. except Exception as e:
  415. error_msg = str(e)
  416. await broadcast_sub_trace_completed(
  417. trace_id, stid, "failed", error_msg, {},
  418. )
  419. await _update_collaborator(
  420. store, trace_id,
  421. name=collab_name, sub_trace_id=stid,
  422. status="failed", summary=error_msg[:80],
  423. )
  424. await _update_goal_complete(
  425. store, trace_id, goal_id,
  426. "failed", f"委托任务失败: {error_msg}",
  427. all_sub_trace_ids,
  428. )
  429. return {
  430. "mode": "delegate",
  431. "status": "failed",
  432. "error": error_msg,
  433. "sub_trace_id": stid,
  434. }
  435. else:
  436. # 多任务并行执行
  437. raw_results = await asyncio.gather(
  438. *(coro for _, _, _, coro in coros),
  439. return_exceptions=True,
  440. )
  441. processed_results = []
  442. for idx, raw in enumerate(raw_results):
  443. _, stid, collab_name, _ = coros[idx]
  444. if isinstance(raw, Exception):
  445. error_result = {
  446. "status": "failed",
  447. "summary": f"执行出错: {str(raw)}",
  448. "stats": {"total_messages": 0, "total_tokens": 0, "total_cost": 0.0},
  449. }
  450. processed_results.append(error_result)
  451. await broadcast_sub_trace_completed(
  452. trace_id, stid, "failed", str(raw), {},
  453. )
  454. await _update_collaborator(
  455. store, trace_id,
  456. name=collab_name, sub_trace_id=stid,
  457. status="failed", summary=str(raw)[:80],
  458. )
  459. else:
  460. processed_results.append(raw)
  461. await broadcast_sub_trace_completed(
  462. trace_id, stid,
  463. raw.get("status", "completed"),
  464. raw.get("summary", ""),
  465. raw.get("stats", {}),
  466. )
  467. await _update_collaborator(
  468. store, trace_id,
  469. name=collab_name, sub_trace_id=stid,
  470. status=raw.get("status", "completed"),
  471. summary=raw.get("summary", "")[:80],
  472. )
  473. formatted = _format_multi_result(tasks, processed_results, all_sub_trace_ids)
  474. await _update_goal_complete(
  475. store, trace_id, goal_id,
  476. formatted["status"],
  477. formatted["summary"],
  478. all_sub_trace_ids,
  479. )
  480. return formatted
  481. # ===== 工具定义 =====
  482. @tool(description="创建 Agent 执行任务", hidden_params=["context"])
  483. async def agent(
  484. task: Union[str, List[str]],
  485. messages: Optional[Union[Messages, List[Messages]]] = None,
  486. continue_from: Optional[str] = None,
  487. agent_type: Optional[str] = None,
  488. skills: Optional[List[str]] = None,
  489. context: Optional[dict] = None,
  490. ) -> Dict[str, Any]:
  491. """
  492. 创建 Agent 执行任务。
  493. 单任务 (task: str): delegate 模式,全量工具
  494. 多任务 (task: List[str]): explore 模式,只读工具,并行执行
  495. Args:
  496. task: 任务描述。字符串=单任务,列表=多任务并行
  497. messages: 预置消息。1D 列表=所有 agent 共享;2D 列表=per-agent
  498. continue_from: 继续已有 trace(仅单任务)
  499. agent_type: 子 Agent 类型,决定 preset 和默认 skills(如 "deconstruct")
  500. skills: 附加到 system prompt 的 skill 名称列表,覆盖 preset 默认值
  501. context: 框架自动注入的上下文
  502. """
  503. if not context:
  504. return {"status": "failed", "error": "context is required"}
  505. store = context.get("store")
  506. trace_id = context.get("trace_id")
  507. goal_id = context.get("goal_id")
  508. runner = context.get("runner")
  509. missing = []
  510. if not store:
  511. missing.append("store")
  512. if not trace_id:
  513. missing.append("trace_id")
  514. if not runner:
  515. missing.append("runner")
  516. if missing:
  517. return {"status": "failed", "error": f"Missing required context: {', '.join(missing)}"}
  518. # 归一化 task → list
  519. single = isinstance(task, str)
  520. tasks = [task] if single else task
  521. if not tasks:
  522. return {"status": "failed", "error": "task is required"}
  523. # 归一化 messages → List[Messages](per-agent)
  524. if messages is None:
  525. per_agent_msgs: List[Messages] = [[] for _ in tasks]
  526. elif messages and isinstance(messages[0], list):
  527. per_agent_msgs = messages # 2D: per-agent
  528. else:
  529. per_agent_msgs = [messages] * len(tasks) # 1D: 共享
  530. if continue_from and not single:
  531. return {"status": "failed", "error": "continue_from requires single task"}
  532. return await _run_agents(
  533. tasks, per_agent_msgs, continue_from,
  534. store, trace_id, goal_id, runner, context,
  535. agent_type=agent_type,
  536. skills=skills,
  537. )
  538. @tool(description="评估目标执行结果是否满足要求", hidden_params=["context"])
  539. async def evaluate(
  540. messages: Optional[Messages] = None,
  541. target_goal_id: Optional[str] = None,
  542. continue_from: Optional[str] = None,
  543. context: Optional[dict] = None,
  544. ) -> Dict[str, Any]:
  545. """
  546. 评估目标执行结果是否满足要求。
  547. 代码自动从 GoalTree 注入目标描述。模型把执行结果和上下文放在 messages 中。
  548. Args:
  549. messages: 执行结果和上下文消息(OpenAI 格式)
  550. target_goal_id: 要评估的目标 ID(默认当前 goal_id)
  551. continue_from: 继续已有评估 trace
  552. context: 框架自动注入的上下文
  553. """
  554. if not context:
  555. return {"status": "failed", "error": "context is required"}
  556. store = context.get("store")
  557. trace_id = context.get("trace_id")
  558. current_goal_id = context.get("goal_id")
  559. runner = context.get("runner")
  560. missing = []
  561. if not store:
  562. missing.append("store")
  563. if not trace_id:
  564. missing.append("trace_id")
  565. if not runner:
  566. missing.append("runner")
  567. if missing:
  568. return {"status": "failed", "error": f"Missing required context: {', '.join(missing)}"}
  569. # target_goal_id 默认 context["goal_id"]
  570. goal_id = target_goal_id or current_goal_id
  571. # 从 GoalTree 获取目标描述
  572. goal_desc = await _get_goal_description(store, trace_id, goal_id)
  573. # 构建 evaluator prompt
  574. eval_prompt = _build_evaluate_prompt(goal_desc, messages)
  575. # 获取父 Trace 信息
  576. parent_trace = await store.get_trace(trace_id)
  577. # 处理 continue_from 或创建新 Sub-Trace
  578. if continue_from:
  579. existing_trace = await store.get_trace(continue_from)
  580. if not existing_trace:
  581. return {"status": "failed", "error": f"Continue-from trace not found: {continue_from}"}
  582. sub_trace_id = continue_from
  583. goal_tree = await store.get_goal_tree(continue_from)
  584. mission = goal_tree.mission if goal_tree else eval_prompt
  585. sub_trace_ids = [{"trace_id": sub_trace_id, "mission": mission}]
  586. else:
  587. sub_trace_id = generate_sub_trace_id(trace_id, "evaluate")
  588. sub_trace = Trace(
  589. trace_id=sub_trace_id,
  590. mode="agent",
  591. task=eval_prompt,
  592. parent_trace_id=trace_id,
  593. parent_goal_id=current_goal_id,
  594. agent_type="evaluate",
  595. uid=parent_trace.uid if parent_trace else None,
  596. model=parent_trace.model if parent_trace else None,
  597. status="running",
  598. context={"created_by_tool": "evaluate"},
  599. created_at=datetime.now(),
  600. )
  601. await store.create_trace(sub_trace)
  602. await store.update_goal_tree(sub_trace_id, GoalTree(mission=eval_prompt))
  603. sub_trace_ids = [{"trace_id": sub_trace_id, "mission": eval_prompt}]
  604. # 广播 sub_trace_started
  605. await broadcast_sub_trace_started(
  606. trace_id, sub_trace_id, current_goal_id or "",
  607. "evaluate", eval_prompt,
  608. )
  609. # 更新主 Goal 为 in_progress
  610. await _update_goal_start(store, trace_id, current_goal_id, "evaluate", sub_trace_ids)
  611. # 注册为活跃协作者
  612. eval_name = f"评估: {(goal_id or 'unknown')[:20]}"
  613. await _update_collaborator(
  614. store, trace_id,
  615. name=eval_name, sub_trace_id=sub_trace_id,
  616. status="running", summary=f"评估 Goal {goal_id}",
  617. )
  618. # 执行评估
  619. try:
  620. # evaluate 使用只读工具 + goal
  621. allowed_tools = ["read_file", "grep_content", "glob_files", "goal"]
  622. result = await runner.run_result(
  623. messages=[{"role": "user", "content": eval_prompt}],
  624. config=_make_run_config(
  625. trace_id=sub_trace_id,
  626. agent_type="evaluate",
  627. model=parent_trace.model if parent_trace else "gpt-4o",
  628. uid=parent_trace.uid if parent_trace else None,
  629. tools=allowed_tools,
  630. name=f"评估: {goal_id}",
  631. ),
  632. on_event=_make_event_printer("evaluate") if getattr(runner, 'debug', False) else None,
  633. )
  634. await broadcast_sub_trace_completed(
  635. trace_id, sub_trace_id,
  636. result.get("status", "completed"),
  637. result.get("summary", ""),
  638. result.get("stats", {}),
  639. )
  640. await _update_collaborator(
  641. store, trace_id,
  642. name=eval_name, sub_trace_id=sub_trace_id,
  643. status=result.get("status", "completed"),
  644. summary=result.get("summary", "")[:80],
  645. )
  646. formatted_summary = result.get("summary", "")
  647. await _update_goal_complete(
  648. store, trace_id, current_goal_id,
  649. result.get("status", "completed"),
  650. formatted_summary,
  651. sub_trace_ids,
  652. )
  653. return {
  654. "mode": "evaluate",
  655. "sub_trace_id": sub_trace_id,
  656. "continue_from": bool(continue_from),
  657. **result,
  658. "summary": formatted_summary,
  659. }
  660. except Exception as e:
  661. error_msg = str(e)
  662. await broadcast_sub_trace_completed(
  663. trace_id, sub_trace_id, "failed", error_msg, {},
  664. )
  665. await _update_collaborator(
  666. store, trace_id,
  667. name=eval_name, sub_trace_id=sub_trace_id,
  668. status="failed", summary=error_msg[:80],
  669. )
  670. await _update_goal_complete(
  671. store, trace_id, current_goal_id,
  672. "failed", f"评估任务失败: {error_msg}",
  673. sub_trace_ids,
  674. )
  675. return {
  676. "mode": "evaluate",
  677. "status": "failed",
  678. "error": error_msg,
  679. "sub_trace_id": sub_trace_id,
  680. }