Просмотр исходного кода

refactor: unify message type & redefine agent/evaluate tool

Talegorithm 3 недель назад
Родитель
Сommit
c08f65a89e

+ 4 - 1
agent/__init__.py

@@ -15,7 +15,7 @@ from agent.core.runner import AgentRunner, AgentConfig, CallResult, RunConfig
 from agent.core.presets import AgentPreset, AGENT_PRESETS, get_preset
 
 # 执行追踪
-from agent.trace.models import Trace, Message, Step, StepType, StepStatus
+from agent.trace.models import Trace, Message, Step, StepType, StepStatus, ChatMessage, Messages, MessageContent
 from agent.trace.goal_models import Goal, GoalTree, GoalStatus
 from agent.trace.protocols import TraceStore
 from agent.trace.store import FileSystemTraceStore
@@ -43,6 +43,9 @@ __all__ = [
     # Trace
     "Trace",
     "Message",
+    "ChatMessage",
+    "Messages",
+    "MessageContent",
     "Step",
     "StepType",
     "StepStatus",

+ 3 - 2
agent/core/runner.py

@@ -83,7 +83,8 @@ BUILTIN_TOOLS = [
     "skill",
     "list_skills",
     "goal",
-    "subagent",
+    "agent",
+    "evaluate",
 
     # 搜索工具
     "search_posts",
@@ -263,7 +264,7 @@ class AgentRunner:
         """
         结果模式 — 消费 run(),返回结构化结果。
 
-        主要用于 subagent 工具内部。
+        主要用于 agent/evaluate 工具内部。
         """
         last_assistant_text = ""
         final_trace: Optional[Trace] = None

+ 3 - 2
agent/tools/builtin/__init__.py

@@ -14,7 +14,7 @@ from agent.tools.builtin.glob_tool import glob_files
 from agent.tools.builtin.file.grep import grep_content
 from agent.tools.builtin.bash import bash_command
 from agent.tools.builtin.skill import skill, list_skills
-from agent.tools.builtin.subagent import subagent
+from agent.tools.builtin.subagent import agent, evaluate
 from agent.tools.builtin.search import search_posts, get_search_suggestions
 from agent.tools.builtin.sandbox import (sandbox_create_environment, sandbox_run_shell,
                                          sandbox_rebuild_with_ports,sandbox_destroy_environment)
@@ -35,7 +35,8 @@ __all__ = [
     "bash_command",
     "skill",
     "list_skills",
-    "subagent",
+    "agent",
+    "evaluate",
     "search_posts",
     "get_search_suggestions",
     "sandbox_create_environment",

+ 3 - 3
agent/tools/builtin/browser/baseClass.py

@@ -51,7 +51,7 @@ import aiohttp
 import re
 import base64
 from urllib.parse import urlparse, parse_qs, unquote
-from typing import Optional, List, Dict, Any, Tuple
+from typing import Optional, List, Dict, Any, Tuple, Union
 from pathlib import Path
 from langchain_core.runnables import RunnableLambda
 from argparse import Namespace # 使用 Namespace 快速构造带属性的对象
@@ -1347,9 +1347,9 @@ async def _detect_and_download_pdf_via_cdp(browser) -> Optional[str]:
 
 @tool()
 async def browser_read_long_content(
-    goal: Any,
+    goal: Union[str, dict],
     source: str = "page",
-    context: Any = "",
+    context: str = "",
     **kwargs
 ) -> ToolResult:
     """

+ 2 - 1
agent/tools/builtin/feishu/chat.py

@@ -6,6 +6,7 @@ import asyncio
 from typing import Optional, List, Dict, Any, Union
 from .feishu_client import FeishuClient, FeishuDomain
 from agent.tools import tool, ToolResult, ToolContext
+from agent.trace.models import MessageContent
 
 # 从环境变量获取飞书配置
 # 也可以在此设置硬编码的默认值,但推荐使用环境变量
@@ -176,7 +177,7 @@ async def feishu_get_contact_list(context: Optional[ToolContext] = None) -> Tool
 )
 async def feishu_send_message_to_contact(
     contact_name: str,
-    content: Any,
+    content: MessageContent,
     context: Optional[ToolContext] = None
 ) -> ToolResult:
     """

+ 437 - 470
agent/tools/builtin/subagent.py

@@ -1,15 +1,16 @@
 """
-Sub-Agent 工具 - 统一 explore/delegate/evaluate
+Sub-Agent 工具 - agent / evaluate
 
-作为普通工具运行:创建(或继承)子 Trace,执行并返回结构化结果。
+agent: 创建 Agent 执行任务(单任务 delegate 或多任务并行 explore)
+evaluate: 评估目标执行结果是否满足要求
 """
 
 import asyncio
 from datetime import datetime
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Union
 
 from agent.tools import tool
-from agent.trace.models import Trace
+from agent.trace.models import Trace, Messages
 from agent.trace.trace_id import generate_sub_trace_id
 from agent.trace.goal_models import GoalTree
 from agent.trace.websocket import broadcast_sub_trace_started, broadcast_sub_trace_completed
@@ -21,69 +22,6 @@ def _make_run_config(**kwargs):
     return RunConfig(**kwargs)
 
 
-def _build_explore_prompt(branches: List[str], background: Optional[str]) -> str:
-    lines = ["# 探索任务", ""]
-    if background:
-        lines.extend([background, ""])
-    lines.append("请探索以下方案:")
-    for i, branch in enumerate(branches, 1):
-        lines.append(f"{i}. {branch}")
-    return "\n".join(lines)
-
-
-async def _build_evaluate_prompt(
-    store,
-    trace_id: str,
-    target_goal_id: str,
-    evaluation_input: Dict[str, Any],
-    requirements: Optional[str],
-) -> str:
-    goal_tree = await store.get_goal_tree(trace_id)
-    target_desc = ""
-    if goal_tree:
-        target_goal = goal_tree.find(target_goal_id)
-        if target_goal:
-            target_desc = target_goal.description
-
-    goal_description = evaluation_input.get("goal_description") or target_desc or f"Goal {target_goal_id}"
-    actual_result = evaluation_input.get("actual_result", "(无执行结果)")
-
-    lines = [
-        "# 评估任务",
-        "",
-        "请评估以下任务的执行结果是否满足要求。",
-        "",
-        "## 目标描述",
-        "",
-        str(goal_description),
-        "",
-        "## 执行结果",
-        "",
-        str(actual_result),
-        "",
-    ]
-
-    if requirements:
-        lines.extend(["## 评估要求", "", requirements, ""])
-
-    lines.extend(
-        [
-            "## 输出格式",
-            "",
-            "## 评估结论",
-            "[通过/不通过]",
-            "",
-            "## 评估理由",
-            "[详细说明通过或不通过原因]",
-            "",
-            "## 修改建议(如果不通过)",
-            "1. [建议1]",
-            "2. [建议2]",
-        ]
-    )
-    return "\n".join(lines)
-
-
 # ===== 辅助函数 =====
 
 async def _update_collaborator(
@@ -125,6 +63,7 @@ async def _update_collaborator(
     trace.context["collaborators"] = collaborators
     await store.update_trace(trace_id, context=trace.context)
 
+
 async def _update_goal_start(
     store, trace_id: str, goal_id: str, mode: str, sub_trace_ids: List[str]
 ) -> None:
@@ -155,20 +94,76 @@ async def _update_goal_complete(
     )
 
 
-def _format_explore_results(
-    branches: List[str], results: List[Dict[str, Any]]
-) -> str:
-    """格式化 explore 模式的汇总结果(Markdown)"""
-    lines = ["## 探索结果\n"]
+def _aggregate_stats(results: List[Dict[str, Any]]) -> Dict[str, Any]:
+    """聚合多个结果的统计信息"""
+    total_messages = 0
+    total_tokens = 0
+    total_cost = 0.0
+
+    for result in results:
+        if isinstance(result, dict) and "stats" in result:
+            stats = result["stats"]
+            total_messages += stats.get("total_messages", 0)
+            total_tokens += stats.get("total_tokens", 0)
+            total_cost += stats.get("total_cost", 0.0)
+
+    return {
+        "total_messages": total_messages,
+        "total_tokens": total_tokens,
+        "total_cost": total_cost
+    }
+
+
+def _get_allowed_tools(single: bool, context: dict) -> Optional[List[str]]:
+    """获取允许工具列表。single=True: 全部(去掉 agent/evaluate); single=False: 只读"""
+    if not single:
+        return ["read_file", "grep_content", "glob_files", "goal"]
+    # single (delegate): 获取所有工具,排除 agent 和 evaluate
+    runner = context.get("runner")
+    if runner and hasattr(runner, "tools") and hasattr(runner.tools, "registry"):
+        all_tools = list(runner.tools.registry.keys())
+        return [t for t in all_tools if t not in ("agent", "evaluate")]
+    return None
+
+
+def _format_single_result(result: Dict[str, Any], sub_trace_id: str, continued: bool) -> Dict[str, Any]:
+    """格式化单任务(delegate)结果"""
+    lines = ["## 委托任务完成\n"]
+    summary = result.get("summary", "")
+    if summary:
+        lines.append(summary)
+        lines.append("")
+    lines.append("---\n")
+    lines.append("**执行统计**:")
+    stats = result.get("stats", {})
+    if stats:
+        lines.append(f"- 消息数: {stats.get('total_messages', 0)}")
+        lines.append(f"- Tokens: {stats.get('total_tokens', 0)}")
+        lines.append(f"- 成本: ${stats.get('total_cost', 0.0):.4f}")
+    formatted_summary = "\n".join(lines)
 
+    return {
+        "mode": "delegate",
+        "sub_trace_id": sub_trace_id,
+        "continue_from": continued,
+        **result,
+        "summary": formatted_summary,
+    }
+
+
+def _format_multi_result(
+    tasks: List[str], results: List[Dict[str, Any]], sub_trace_ids: List[Dict]
+) -> Dict[str, Any]:
+    """格式化多任务(explore)聚合结果"""
+    lines = ["## 探索结果\n"]
     successful = 0
     failed = 0
     total_tokens = 0
     total_cost = 0.0
 
-    for i, (branch, result) in enumerate(zip(branches, results)):
-        branch_name = chr(ord('A') + i)  # A, B, C...
-        lines.append(f"### 方案 {branch_name}: {branch}")
+    for i, (task_item, result) in enumerate(zip(tasks, results)):
+        branch_name = chr(ord('A') + i)
+        lines.append(f"### 方案 {branch_name}: {task_item}")
 
         if isinstance(result, dict):
             status = result.get("status", "unknown")
@@ -181,7 +176,7 @@ def _format_explore_results(
 
             summary = result.get("summary", "")
             if summary:
-                lines.append(f"**摘要**: {summary[:200]}...")  # 限制长度
+                lines.append(f"**摘要**: {summary[:200]}...")
 
             stats = result.get("stats", {})
             if stats:
@@ -199,545 +194,517 @@ def _format_explore_results(
 
     lines.append("---\n")
     lines.append("## 总结")
-    lines.append(f"- 总分支数: {len(branches)}")
+    lines.append(f"- 总分支数: {len(tasks)}")
     lines.append(f"- 成功: {successful}")
     lines.append(f"- 失败: {failed}")
     lines.append(f"- 总 tokens: {total_tokens}")
     lines.append(f"- 总成本: ${total_cost:.4f}")
 
-    return "\n".join(lines)
-
-
-def _format_delegate_result(result: Dict[str, Any]) -> str:
-    """格式化 delegate 模式的详细结果"""
-    lines = ["## 委托任务完成\n"]
-
-    summary = result.get("summary", "")
-    if summary:
-        lines.append(summary)
-        lines.append("")
-
-    lines.append("---\n")
-    lines.append("**执行统计**:")
-
-    stats = result.get("stats", {})
-    if stats:
-        lines.append(f"- 消息数: {stats.get('total_messages', 0)}")
-        lines.append(f"- Tokens: {stats.get('total_tokens', 0)}")
-        lines.append(f"- 成本: ${stats.get('total_cost', 0.0):.4f}")
-
-    return "\n".join(lines)
-
+    aggregated_summary = "\n".join(lines)
+    overall_status = "completed" if successful > 0 else "failed"
 
-def _format_evaluate_result(result: Dict[str, Any]) -> str:
-    """格式化 evaluate 模式的评估结果"""
-    summary = result.get("summary", "")
-    return summary  # evaluate 的 summary 已经是格式化的评估结果
+    return {
+        "mode": "explore",
+        "status": overall_status,
+        "summary": aggregated_summary,
+        "sub_trace_ids": sub_trace_ids,
+        "tasks": tasks,
+        "stats": _aggregate_stats(results),
+    }
 
 
-def _get_allowed_tools_for_mode(mode: str, context: dict) -> Optional[List[str]]:
-    """获取模式对应的允许工具列表"""
-    if mode == "explore":
-        return ["read_file", "grep_content", "glob_files", "goal"]
-    elif mode in ["delegate", "evaluate"]:
-        # 获取所有工具,排除 subagent
-        runner = context.get("runner")
-        if runner and hasattr(runner, "tools") and hasattr(runner.tools, "registry"):
-            all_tools = list(runner.tools.registry.keys())
-            return [t for t in all_tools if t != "subagent"]
-    return None  # 使用默认(所有工具)
+async def _get_goal_description(store, trace_id: str, goal_id: str) -> str:
+    """从 GoalTree 获取目标描述"""
+    if not goal_id:
+        return ""
+    goal_tree = await store.get_goal_tree(trace_id)
+    if goal_tree:
+        target_goal = goal_tree.find(goal_id)
+        if target_goal:
+            return target_goal.description
+    return f"Goal {goal_id}"
 
 
-def _aggregate_stats(results: List[Dict[str, Any]]) -> Dict[str, Any]:
-    """聚合多个结果的统计信息"""
-    total_messages = 0
-    total_tokens = 0
-    total_cost = 0.0
+def _build_evaluate_prompt(goal_description: str, messages: Optional[Messages]) -> str:
+    """
+    构建评估 prompt。
 
-    for result in results:
-        if isinstance(result, dict) and "stats" in result:
-            stats = result["stats"]
-            total_messages += stats.get("total_messages", 0)
-            total_tokens += stats.get("total_tokens", 0)
-            total_cost += stats.get("total_cost", 0.0)
+    Args:
+        goal_description: 代码从 GoalTree 注入的目标描述
+        messages: 模型提供的消息(执行结果+上下文)
+    """
+    # 从 messages 提取文本内容
+    result_text = ""
+    if messages:
+        parts = []
+        for msg in messages:
+            content = msg.get("content", "")
+            if isinstance(content, str):
+                parts.append(content)
+            elif isinstance(content, list):
+                # 多模态内容,提取文本部分
+                for item in content:
+                    if isinstance(item, dict) and item.get("type") == "text":
+                        parts.append(item.get("text", ""))
+        result_text = "\n".join(parts)
 
-    return {
-        "total_messages": total_messages,
-        "total_tokens": total_tokens,
-        "total_cost": total_cost
-    }
+    lines = [
+        "# 评估任务",
+        "",
+        "请评估以下任务的执行结果是否满足要求。",
+        "",
+        "## 目标描述",
+        "",
+        goal_description,
+        "",
+        "## 执行结果",
+        "",
+        result_text or "(无执行结果)",
+        "",
+        "## 输出格式",
+        "",
+        "## 评估结论",
+        "[通过/不通过]",
+        "",
+        "## 评估理由",
+        "[详细说明通过或不通过原因]",
+        "",
+        "## 修改建议(如果不通过)",
+        "1. [建议1]",
+        "2. [建议2]",
+    ]
+    return "\n".join(lines)
 
 
-# ===== 模式处理函数 =====
+# ===== 统一内部执行函数 =====
 
-async def _handle_explore_mode(
-    branches: List[str],
-    background: Optional[str],
+async def _run_agents(
+    tasks: List[str],
+    per_agent_msgs: List[Messages],
     continue_from: Optional[str],
-    store, current_trace_id: str, current_goal_id: str, runner
+    store, trace_id: str, goal_id: str, runner, context: dict,
 ) -> Dict[str, Any]:
-    """Explore 模式:并行探索多个方案"""
+    """
+    统一 agent 执行逻辑。
 
-    # 1. 检查 continue_from(不支持)
-    if continue_from:
-        return {
-            "status": "failed",
-            "error": "explore mode does not support continue_from parameter"
-        }
+    single (len(tasks)==1): delegate 模式,全量工具(排除 agent/evaluate)
+    multi (len(tasks)>1): explore 模式,只读工具,并行执行
+    """
+    single = len(tasks) == 1
+    parent_trace = await store.get_trace(trace_id)
+
+    # continue_from: 复用已有 trace(仅 single)
+    sub_trace_id = None
+    continued = False
+    if single and continue_from:
+        existing = await store.get_trace(continue_from)
+        if not existing:
+            return {"status": "failed", "error": f"Continue-from trace not found: {continue_from}"}
+        sub_trace_id = continue_from
+        continued = True
+        goal_tree = await store.get_goal_tree(continue_from)
+        mission = goal_tree.mission if goal_tree else tasks[0]
+        sub_trace_ids = [{"trace_id": sub_trace_id, "mission": mission}]
+    else:
+        sub_trace_ids = []
 
-    # 2. 获取父 Trace 信息(用于继承 uid、model)
-    parent_trace = await store.get_trace(current_trace_id)
+    # 创建 sub-traces 和执行协程
+    coros = []
+    all_sub_trace_ids = list(sub_trace_ids)  # copy for continue_from case
 
-    # 3. 创建所有 Sub-Traces
-    sub_trace_ids = []
-    tasks = []
+    for i, (task_item, msgs) in enumerate(zip(tasks, per_agent_msgs)):
+        if single and continued:
+            # continue_from 已经设置了 sub_trace_id
+            pass
+        else:
+            agent_type = "delegate" if single else "explore"
+            suffix = "delegate" if single else f"explore-{i+1:03d}"
+            stid = generate_sub_trace_id(trace_id, suffix)
+
+            sub_trace = Trace(
+                trace_id=stid,
+                mode="agent",
+                task=task_item,
+                parent_trace_id=trace_id,
+                parent_goal_id=goal_id,
+                agent_type=agent_type,
+                uid=parent_trace.uid if parent_trace else None,
+                model=parent_trace.model if parent_trace else None,
+                status="running",
+                context={"created_by_tool": "agent"},
+                created_at=datetime.now(),
+            )
+            await store.create_trace(sub_trace)
+            await store.update_goal_tree(stid, GoalTree(mission=task_item))
 
-    for i, branch in enumerate(branches):
-        # 生成唯一的 sub_trace_id
-        sub_trace_id = generate_sub_trace_id(current_trace_id, f"explore-{i+1:03d}")
-        sub_trace_ids.append({
-            "trace_id": sub_trace_id,
-            "mission": branch
-        })
+            all_sub_trace_ids.append({"trace_id": stid, "mission": task_item})
 
-        # 创建 Sub-Trace
-        sub_trace = Trace(
-            trace_id=sub_trace_id,
-            mode="agent",
-            task=branch,
-            parent_trace_id=current_trace_id,
-            parent_goal_id=current_goal_id,
-            agent_type="explore",
-            uid=parent_trace.uid if parent_trace else None,
-            model=parent_trace.model if parent_trace else None,
-            status="running",
-            context={"subagent_mode": "explore", "created_by_tool": "subagent"},
-            created_at=datetime.now(),
-        )
-        await store.create_trace(sub_trace)
-        await store.update_goal_tree(sub_trace_id, GoalTree(mission=branch))
+            # 广播 sub_trace_started
+            await broadcast_sub_trace_started(
+                trace_id, stid, goal_id or "",
+                agent_type, task_item,
+            )
 
-        # 广播 sub_trace_started
-        await broadcast_sub_trace_started(
-            current_trace_id, sub_trace_id, current_goal_id or "",
-            "explore", branch
-        )
+            if single:
+                sub_trace_id = stid
 
         # 注册为活跃协作者
+        cur_stid = sub_trace_id if single else all_sub_trace_ids[-1]["trace_id"]
+        collab_name = task_item[:30] if single and not continued else (
+            f"delegate-{cur_stid[:8]}" if single else f"explore-{i+1}"
+        )
         await _update_collaborator(
-            store, current_trace_id,
-            name=f"explore-{i+1}", sub_trace_id=sub_trace_id,
-            status="running", summary=branch[:80],
+            store, trace_id,
+            name=collab_name, sub_trace_id=cur_stid,
+            status="running", summary=task_item[:80],
         )
 
-        # 创建执行任务
-        task_coro = runner.run_result(
-            messages=[{"role": "user", "content": branch}],
+        # 构建消息
+        agent_msgs = list(msgs) + [{"role": "user", "content": task_item}]
+        allowed_tools = _get_allowed_tools(single, context)
+
+        coro = runner.run_result(
+            messages=agent_msgs,
             config=_make_run_config(
-                trace_id=sub_trace_id,
-                agent_type="explore",
+                trace_id=cur_stid,
+                agent_type="delegate" if single else "explore",
                 model=parent_trace.model if parent_trace else "gpt-4o",
                 uid=parent_trace.uid if parent_trace else None,
-                tools=["read_file", "grep_content", "glob_files", "goal"],
-                name=branch,
+                tools=allowed_tools,
+                name=task_item[:50],
             ),
         )
-        tasks.append(task_coro)
-
-    # 4. 更新主 Goal 为 in_progress
-    await _update_goal_start(store, current_trace_id, current_goal_id, "explore", sub_trace_ids)
+        coros.append((i, cur_stid, collab_name, coro))
 
-    # 5. 并行执行所有分支
-    results = await asyncio.gather(*tasks, return_exceptions=True)
+    # 更新主 Goal 为 in_progress
+    await _update_goal_start(
+        store, trace_id, goal_id,
+        "delegate" if single else "explore",
+        all_sub_trace_ids,
+    )
 
-    # 6. 处理结果并广播完成事件
-    processed_results = []
+    # 执行
+    if single:
+        # 单任务直接执行(带异常处理)
+        _, stid, collab_name, coro = coros[0]
+        try:
+            result = await coro
 
-    for i, result in enumerate(results):
-        sub_tid = sub_trace_ids[i]["trace_id"]
-        if isinstance(result, Exception):
-            # 异常处理
-            error_result = {
-                "status": "failed",
-                "summary": f"执行出错: {str(result)}",
-                "stats": {"total_messages": 0, "total_tokens": 0, "total_cost": 0.0}
-            }
-            processed_results.append(error_result)
-            await broadcast_sub_trace_completed(
-                current_trace_id, sub_tid, "failed", str(result), {}
-            )
-            await _update_collaborator(
-                store, current_trace_id,
-                name=f"explore-{i+1}", sub_trace_id=sub_tid,
-                status="failed", summary=str(result)[:80],
-            )
-        else:
-            processed_results.append(result)
             await broadcast_sub_trace_completed(
-                current_trace_id, sub_tid,
+                trace_id, stid,
                 result.get("status", "completed"),
                 result.get("summary", ""),
-                result.get("stats", {})
+                result.get("stats", {}),
             )
             await _update_collaborator(
-                store, current_trace_id,
-                name=f"explore-{i+1}", sub_trace_id=sub_tid,
+                store, trace_id,
+                name=collab_name, sub_trace_id=stid,
                 status=result.get("status", "completed"),
                 summary=result.get("summary", "")[:80],
             )
 
-    # 7. 格式化汇总结果
-    aggregated_summary = _format_explore_results(branches, processed_results)
+            formatted = _format_single_result(result, stid, continued)
 
-    # 8. 更新主 Goal 为 completed
-    overall_status = "completed" if any(
-        r.get("status") == "completed" for r in processed_results if isinstance(r, dict)
-    ) else "failed"
+            await _update_goal_complete(
+                store, trace_id, goal_id,
+                result.get("status", "completed"),
+                formatted["summary"],
+                all_sub_trace_ids,
+            )
+            return formatted
 
-    await _update_goal_complete(
-        store, current_trace_id, current_goal_id,
-        overall_status, aggregated_summary, sub_trace_ids
-    )
+        except Exception as e:
+            error_msg = str(e)
+            await broadcast_sub_trace_completed(
+                trace_id, stid, "failed", error_msg, {},
+            )
+            await _update_collaborator(
+                store, trace_id,
+                name=collab_name, sub_trace_id=stid,
+                status="failed", summary=error_msg[:80],
+            )
+            await _update_goal_complete(
+                store, trace_id, goal_id,
+                "failed", f"委托任务失败: {error_msg}",
+                all_sub_trace_ids,
+            )
+            return {
+                "mode": "delegate",
+                "status": "failed",
+                "error": error_msg,
+                "sub_trace_id": stid,
+            }
+    else:
+        # 多任务并行执行
+        raw_results = await asyncio.gather(
+            *(coro for _, _, _, coro in coros),
+            return_exceptions=True,
+        )
 
-    # 9. 返回结果
-    return {
-        "mode": "explore",
-        "status": overall_status,
-        "summary": aggregated_summary,
-        "sub_trace_ids": sub_trace_ids,
-        "branches": branches,
-        "stats": _aggregate_stats(processed_results)
-    }
+        processed_results = []
+        for idx, raw in enumerate(raw_results):
+            _, stid, collab_name, _ = coros[idx]
+            if isinstance(raw, Exception):
+                error_result = {
+                    "status": "failed",
+                    "summary": f"执行出错: {str(raw)}",
+                    "stats": {"total_messages": 0, "total_tokens": 0, "total_cost": 0.0},
+                }
+                processed_results.append(error_result)
+                await broadcast_sub_trace_completed(
+                    trace_id, stid, "failed", str(raw), {},
+                )
+                await _update_collaborator(
+                    store, trace_id,
+                    name=collab_name, sub_trace_id=stid,
+                    status="failed", summary=str(raw)[:80],
+                )
+            else:
+                processed_results.append(raw)
+                await broadcast_sub_trace_completed(
+                    trace_id, stid,
+                    raw.get("status", "completed"),
+                    raw.get("summary", ""),
+                    raw.get("stats", {}),
+                )
+                await _update_collaborator(
+                    store, trace_id,
+                    name=collab_name, sub_trace_id=stid,
+                    status=raw.get("status", "completed"),
+                    summary=raw.get("summary", "")[:80],
+                )
+
+        formatted = _format_multi_result(tasks, processed_results, all_sub_trace_ids)
 
+        await _update_goal_complete(
+            store, trace_id, goal_id,
+            formatted["status"],
+            formatted["summary"],
+            all_sub_trace_ids,
+        )
+        return formatted
 
-async def _handle_delegate_mode(
-    task: str,
-    continue_from: Optional[str],
-    store, current_trace_id: str, current_goal_id: str, runner, context: dict
+
+# ===== 工具定义 =====
+
+@tool(description="创建 Agent 执行任务")
+async def agent(
+    task: Union[str, List[str]],
+    messages: Optional[Union[Messages, List[Messages]]] = None,
+    continue_from: Optional[str] = None,
+    context: Optional[dict] = None,
 ) -> Dict[str, Any]:
-    """Delegate 模式:委托单个任务"""
+    """
+    创建 Agent 执行任务。
 
-    # 1. 获取父 Trace 信息
-    parent_trace = await store.get_trace(current_trace_id)
+    单任务 (task: str): delegate 模式,全量工具
+    多任务 (task: List[str]): explore 模式,只读工具,并行执行
 
-    # 2. 处理 continue_from 或创建新 Sub-Trace
-    if continue_from:
-        existing_trace = await store.get_trace(continue_from)
-        if not existing_trace:
-            return {"status": "failed", "error": f"Continue-from trace not found: {continue_from}"}
-        sub_trace_id = continue_from
-        # 获取 mission
-        goal_tree = await store.get_goal_tree(continue_from)
-        mission = goal_tree.mission if goal_tree else task
-        sub_trace_ids = [{"trace_id": sub_trace_id, "mission": mission}]
-    else:
-        sub_trace_id = generate_sub_trace_id(current_trace_id, "delegate")
-        sub_trace = Trace(
-            trace_id=sub_trace_id,
-            mode="agent",
-            task=task,
-            parent_trace_id=current_trace_id,
-            parent_goal_id=current_goal_id,
-            agent_type="delegate",
-            uid=parent_trace.uid if parent_trace else None,
-            model=parent_trace.model if parent_trace else None,
-            status="running",
-            context={"subagent_mode": "delegate", "created_by_tool": "subagent"},
-            created_at=datetime.now(),
-        )
-        await store.create_trace(sub_trace)
-        await store.update_goal_tree(sub_trace_id, GoalTree(mission=task))
-        sub_trace_ids = [{"trace_id": sub_trace_id, "mission": task}]
+    Args:
+        task: 任务描述。字符串=单任务,列表=多任务并行
+        messages: 预置消息。1D 列表=所有 agent 共享;2D 列表=per-agent
+        continue_from: 继续已有 trace(仅单任务)
+        context: 框架自动注入的上下文
+    """
+    if not context:
+        return {"status": "failed", "error": "context is required"}
 
-        # 广播 sub_trace_started
-        await broadcast_sub_trace_started(
-            current_trace_id, sub_trace_id, current_goal_id or "",
-            "delegate", task
-        )
+    store = context.get("store")
+    trace_id = context.get("trace_id")
+    goal_id = context.get("goal_id")
+    runner = context.get("runner")
 
-    # 注册为活跃协作者
-    delegate_name = task[:30] if not continue_from else f"delegate-{sub_trace_id[:8]}"
-    await _update_collaborator(
-        store, current_trace_id,
-        name=delegate_name, sub_trace_id=sub_trace_id,
-        status="running", summary=task[:80],
-    )
+    missing = []
+    if not store:
+        missing.append("store")
+    if not trace_id:
+        missing.append("trace_id")
+    if not runner:
+        missing.append("runner")
+    if missing:
+        return {"status": "failed", "error": f"Missing required context: {', '.join(missing)}"}
 
-    # 3. 更新主 Goal 为 in_progress
-    await _update_goal_start(store, current_trace_id, current_goal_id, "delegate", sub_trace_ids)
+    # 归一化 task → list
+    single = isinstance(task, str)
+    tasks = [task] if single else task
 
-    # 4. 执行任务
-    try:
-        allowed_tools = _get_allowed_tools_for_mode("delegate", context)
-        result = await runner.run_result(
-            messages=[{"role": "user", "content": task}],
-            config=_make_run_config(
-                trace_id=sub_trace_id,
-                agent_type="delegate",
-                model=parent_trace.model if parent_trace else "gpt-4o",
-                uid=parent_trace.uid if parent_trace else None,
-                tools=allowed_tools,
-                name=task[:50],
-            ),
-        )
+    if not tasks:
+        return {"status": "failed", "error": "task is required"}
 
-        # 4. 广播 sub_trace_completed
-        await broadcast_sub_trace_completed(
-            current_trace_id, sub_trace_id,
-            result.get("status", "completed"),
-            result.get("summary", ""),
-            result.get("stats", {})
-        )
+    # 归一化 messages → List[Messages](per-agent)
+    if messages is None:
+        per_agent_msgs: List[Messages] = [[] for _ in tasks]
+    elif messages and isinstance(messages[0], list):
+        per_agent_msgs = messages  # 2D: per-agent
+    else:
+        per_agent_msgs = [messages] * len(tasks)  # 1D: 共享
 
-        # 更新协作者状态
-        await _update_collaborator(
-            store, current_trace_id,
-            name=delegate_name, sub_trace_id=sub_trace_id,
-            status=result.get("status", "completed"),
-            summary=result.get("summary", "")[:80],
-        )
+    if continue_from and not single:
+        return {"status": "failed", "error": "continue_from requires single task"}
 
-        # 5. 格式化结果
-        formatted_summary = _format_delegate_result(result)
+    return await _run_agents(
+        tasks, per_agent_msgs, continue_from,
+        store, trace_id, goal_id, runner, context,
+    )
 
-        # 6. 更新主 Goal 为 completed
-        await _update_goal_complete(
-            store, current_trace_id, current_goal_id,
-            result.get("status", "completed"), formatted_summary, sub_trace_ids
-        )
 
-        # 7. 返回结果
-        return {
-            "mode": "delegate",
-            "sub_trace_id": sub_trace_id,
-            "continue_from": bool(continue_from),
-            **result,
-            "summary": formatted_summary
-        }
+@tool(description="评估目标执行结果是否满足要求")
+async def evaluate(
+    messages: Optional[Messages] = None,
+    target_goal_id: Optional[str] = None,
+    continue_from: Optional[str] = None,
+    context: Optional[dict] = None,
+) -> Dict[str, Any]:
+    """
+    评估目标执行结果是否满足要求。
 
-    except Exception as e:
-        # 错误处理
-        error_msg = str(e)
-        await broadcast_sub_trace_completed(
-            current_trace_id, sub_trace_id,
-            "failed", error_msg, {}
-        )
+    代码自动从 GoalTree 注入目标描述。模型把执行结果和上下文放在 messages 中。
 
-        await _update_collaborator(
-            store, current_trace_id,
-            name=delegate_name, sub_trace_id=sub_trace_id,
-            status="failed", summary=error_msg[:80],
-        )
+    Args:
+        messages: 执行结果和上下文消息(OpenAI 格式)
+        target_goal_id: 要评估的目标 ID(默认当前 goal_id)
+        continue_from: 继续已有评估 trace
+        context: 框架自动注入的上下文
+    """
+    if not context:
+        return {"status": "failed", "error": "context is required"}
 
-        await _update_goal_complete(
-            store, current_trace_id, current_goal_id,
-            "failed", f"委托任务失败: {error_msg}", sub_trace_ids
-        )
+    store = context.get("store")
+    trace_id = context.get("trace_id")
+    current_goal_id = context.get("goal_id")
+    runner = context.get("runner")
 
-        return {
-            "mode": "delegate",
-            "status": "failed",
-            "error": error_msg,
-            "sub_trace_id": sub_trace_id
-        }
+    missing = []
+    if not store:
+        missing.append("store")
+    if not trace_id:
+        missing.append("trace_id")
+    if not runner:
+        missing.append("runner")
+    if missing:
+        return {"status": "failed", "error": f"Missing required context: {', '.join(missing)}"}
 
+    # target_goal_id 默认 context["goal_id"]
+    goal_id = target_goal_id or current_goal_id
 
-async def _handle_evaluate_mode(
-    target_goal_id: str,
-    evaluation_input: Dict[str, Any],
-    requirements: Optional[str],
-    continue_from: Optional[str],
-    store, current_trace_id: str, current_goal_id: str, runner, context: dict
-) -> Dict[str, Any]:
-    """Evaluate 模式:评估任务结果"""
+    # 从 GoalTree 获取目标描述
+    goal_desc = await _get_goal_description(store, trace_id, goal_id)
 
-    # 1. 构建评估 prompt
-    task_prompt = await _build_evaluate_prompt(
-        store, current_trace_id, target_goal_id,
-        evaluation_input, requirements
-    )
+    # 构建 evaluator prompt
+    eval_prompt = _build_evaluate_prompt(goal_desc, messages)
 
-    # 2. 获取父 Trace 信息
-    parent_trace = await store.get_trace(current_trace_id)
+    # 获取父 Trace 信息
+    parent_trace = await store.get_trace(trace_id)
 
-    # 3. 处理 continue_from 或创建新 Sub-Trace
+    # 处理 continue_from 或创建新 Sub-Trace
     if continue_from:
         existing_trace = await store.get_trace(continue_from)
         if not existing_trace:
             return {"status": "failed", "error": f"Continue-from trace not found: {continue_from}"}
         sub_trace_id = continue_from
-        # 获取 mission
         goal_tree = await store.get_goal_tree(continue_from)
-        mission = goal_tree.mission if goal_tree else task_prompt
+        mission = goal_tree.mission if goal_tree else eval_prompt
         sub_trace_ids = [{"trace_id": sub_trace_id, "mission": mission}]
     else:
-        sub_trace_id = generate_sub_trace_id(current_trace_id, "evaluate")
+        sub_trace_id = generate_sub_trace_id(trace_id, "evaluate")
         sub_trace = Trace(
             trace_id=sub_trace_id,
             mode="agent",
-            task=task_prompt,
-            parent_trace_id=current_trace_id,
+            task=eval_prompt,
+            parent_trace_id=trace_id,
             parent_goal_id=current_goal_id,
             agent_type="evaluate",
             uid=parent_trace.uid if parent_trace else None,
             model=parent_trace.model if parent_trace else None,
             status="running",
-            context={"subagent_mode": "evaluate", "created_by_tool": "subagent"},
+            context={"created_by_tool": "evaluate"},
             created_at=datetime.now(),
         )
         await store.create_trace(sub_trace)
-        await store.update_goal_tree(sub_trace_id, GoalTree(mission=task_prompt))
-        sub_trace_ids = [{"trace_id": sub_trace_id, "mission": task_prompt}]
+        await store.update_goal_tree(sub_trace_id, GoalTree(mission=eval_prompt))
+        sub_trace_ids = [{"trace_id": sub_trace_id, "mission": eval_prompt}]
 
         # 广播 sub_trace_started
         await broadcast_sub_trace_started(
-            current_trace_id, sub_trace_id, current_goal_id or "",
-            "evaluate", task_prompt
+            trace_id, sub_trace_id, current_goal_id or "",
+            "evaluate", eval_prompt,
         )
 
-    # 4. 更新主 Goal 为 in_progress
-    await _update_goal_start(store, current_trace_id, current_goal_id, "evaluate", sub_trace_ids)
+    # 更新主 Goal 为 in_progress
+    await _update_goal_start(store, trace_id, current_goal_id, "evaluate", sub_trace_ids)
 
     # 注册为活跃协作者
-    eval_name = f"评估: {target_goal_id[:20]}"
+    eval_name = f"评估: {(goal_id or 'unknown')[:20]}"
     await _update_collaborator(
-        store, current_trace_id,
+        store, trace_id,
         name=eval_name, sub_trace_id=sub_trace_id,
-        status="running", summary=f"评估 Goal {target_goal_id}",
+        status="running", summary=f"评估 Goal {goal_id}",
     )
 
-    # 5. 执行评估
+    # 执行评估
     try:
-        allowed_tools = _get_allowed_tools_for_mode("evaluate", context)
+        # evaluate 使用只读工具 + goal
+        allowed_tools = ["read_file", "grep_content", "glob_files", "goal"]
         result = await runner.run_result(
-            messages=[{"role": "user", "content": task_prompt}],
+            messages=[{"role": "user", "content": eval_prompt}],
             config=_make_run_config(
                 trace_id=sub_trace_id,
                 agent_type="evaluate",
                 model=parent_trace.model if parent_trace else "gpt-4o",
                 uid=parent_trace.uid if parent_trace else None,
                 tools=allowed_tools,
-                name=f"评估: {target_goal_id}",
+                name=f"评估: {goal_id}",
             ),
         )
 
-        # 5. 广播 sub_trace_completed
         await broadcast_sub_trace_completed(
-            current_trace_id, sub_trace_id,
+            trace_id, sub_trace_id,
             result.get("status", "completed"),
             result.get("summary", ""),
-            result.get("stats", {})
+            result.get("stats", {}),
         )
-
-        # 更新协作者状态
         await _update_collaborator(
-            store, current_trace_id,
+            store, trace_id,
             name=eval_name, sub_trace_id=sub_trace_id,
             status=result.get("status", "completed"),
             summary=result.get("summary", "")[:80],
         )
 
-        # 6. 格式化结果
-        formatted_summary = _format_evaluate_result(result)
+        formatted_summary = result.get("summary", "")
 
-        # 7. 更新主 Goal 为 completed
         await _update_goal_complete(
-            store, current_trace_id, current_goal_id,
-            result.get("status", "completed"), formatted_summary, sub_trace_ids
+            store, trace_id, current_goal_id,
+            result.get("status", "completed"),
+            formatted_summary,
+            sub_trace_ids,
         )
 
-        # 8. 返回结果
         return {
             "mode": "evaluate",
             "sub_trace_id": sub_trace_id,
             "continue_from": bool(continue_from),
             **result,
-            "summary": formatted_summary
+            "summary": formatted_summary,
         }
 
     except Exception as e:
-        # 错误处理
         error_msg = str(e)
         await broadcast_sub_trace_completed(
-            current_trace_id, sub_trace_id,
-            "failed", error_msg, {}
+            trace_id, sub_trace_id, "failed", error_msg, {},
         )
-
         await _update_collaborator(
-            store, current_trace_id,
+            store, trace_id,
             name=eval_name, sub_trace_id=sub_trace_id,
             status="failed", summary=error_msg[:80],
         )
-
         await _update_goal_complete(
-            store, current_trace_id, current_goal_id,
-            "failed", f"评估任务失败: {error_msg}", sub_trace_ids
+            store, trace_id, current_goal_id,
+            "failed", f"评估任务失败: {error_msg}",
+            sub_trace_ids,
         )
-
         return {
             "mode": "evaluate",
             "status": "failed",
             "error": error_msg,
-            "sub_trace_id": sub_trace_id
+            "sub_trace_id": sub_trace_id,
         }
-
-
-@tool(description="创建 Sub-Agent 执行任务(evaluate/delegate/explore)")
-async def subagent(
-    mode: str,
-    task: Optional[str] = None,
-    target_goal_id: Optional[str] = None,
-    evaluation_input: Optional[Dict[str, Any]] = None,
-    requirements: Optional[str] = None,
-    branches: Optional[List[str]] = None,
-    background: Optional[str] = None,
-    continue_from: Optional[str] = None,
-    context: Optional[dict] = None,
-) -> Dict[str, Any]:
-    # 1. 验证 context
-    if not context:
-        return {"status": "failed", "error": "context is required"}
-
-    store = context.get("store")
-    current_trace_id = context.get("trace_id")
-    current_goal_id = context.get("goal_id")
-    runner = context.get("runner")
-
-    missing = []
-    if not store:
-        missing.append("store")
-    if not current_trace_id:
-        missing.append("trace_id")
-    if not runner:
-        missing.append("runner")
-    if missing:
-        return {"status": "failed", "error": f"Missing required context: {', '.join(missing)}"}
-
-    # 2. 验证 mode
-    if mode not in {"evaluate", "delegate", "explore"}:
-        return {"status": "failed", "error": "Invalid mode: must be evaluate/delegate/explore"}
-
-    # 3. 验证模式特定参数
-    if mode == "delegate" and not task:
-        return {"status": "failed", "error": "delegate mode requires task"}
-    if mode == "explore" and not branches:
-        return {"status": "failed", "error": "explore mode requires branches"}
-    if mode == "evaluate" and (not target_goal_id or evaluation_input is None):
-        return {"status": "failed", "error": "evaluate mode requires target_goal_id and evaluation_input"}
-
-    # 4. 路由到模式处理函数
-    if mode == "explore":
-        return await _handle_explore_mode(
-            branches, background, continue_from,
-            store, current_trace_id, current_goal_id, runner
-        )
-    elif mode == "delegate":
-        return await _handle_delegate_mode(
-            task, continue_from,
-            store, current_trace_id, current_goal_id, runner, context
-        )
-    else:  # evaluate
-        return await _handle_evaluate_mode(
-            target_goal_id, evaluation_input, requirements, continue_from,
-            store, current_trace_id, current_goal_id, runner, context
-        )

+ 20 - 7
agent/tools/schema.py

@@ -11,7 +11,7 @@ Schema Generator - 从函数签名自动生成 OpenAI Tool Schema
 
 import inspect
 import logging
-from typing import Any, Dict, List, Optional, get_args, get_origin
+from typing import Any, Dict, List, Literal, Optional, Union, get_args, get_origin
 
 logger = logging.getLogger(__name__)
 
@@ -142,16 +142,29 @@ class SchemaGenerator:
     @classmethod
     def _type_to_schema(cls, python_type: Any) -> Dict[str, Any]:
         """将 Python 类型转换为 JSON Schema"""
-        # 处理 Optional[T]
+        if python_type is Any:
+            return {}
+
         origin = get_origin(python_type)
         args = get_args(python_type)
 
-        if origin is Optional.__class__ or (origin and str(origin) == "typing.Union"):
-            # Optional[T] = Union[T, None]
+        # 处理 Literal[...]
+        if origin is Literal:
+            values = list(args)
+            if all(isinstance(v, str) for v in values):
+                return {"type": "string", "enum": values}
+            elif all(isinstance(v, int) for v in values):
+                return {"type": "integer", "enum": values}
+            return {"enum": values}
+
+        # 处理 Union[T, ...] 和 Optional[T]
+        if origin is Union:
             if len(args) == 2 and type(None) in args:
-                inner_type = args[0] if args[1] is type(None) else args[1]
-                schema = cls._type_to_schema(inner_type)
-                return schema
+                # Optional[T] = Union[T, None]
+                inner = args[0] if args[1] is type(None) else args[1]
+                return cls._type_to_schema(inner)
+            non_none = [a for a in args if a is not type(None)]
+            return {"oneOf": [cls._type_to_schema(a) for a in non_none]}
 
         # 处理 List[T]
         if origin is list or origin is List:

+ 10 - 1
agent/trace/models.py

@@ -7,9 +7,18 @@ Message: Trace 中的 LLM 消息,对应 LLM API 格式
 
 from dataclasses import dataclass, field
 from datetime import datetime
-from typing import Dict, Any, List, Optional, Literal
+from typing import Dict, Any, List, Optional, Literal, Union
 import uuid
 
+# ===== 消息线格式类型别名 =====
+# 轻量 wire-format 类型,用于工具参数和 runner/LLM API 接口。
+# 内部存储使用下方的 Message dataclass。
+
+ChatMessage = Dict[str, Any]                          # 单条 OpenAI 格式消息
+Messages = List[ChatMessage]                          # 消息列表
+MessageContent = Union[str, List[Dict[str, str]]]     # content 字段(文本或多模态)
+
+
 # 导入 TokenUsage(延迟导入避免循环依赖)
 def _get_token_usage_class():
     from ..llm.usage import TokenUsage

+ 53 - 24
docs/README.md

@@ -15,7 +15,7 @@
 | 类型 | 创建方式 | 父子关系 | 状态 |
 |------|---------|---------|------|
 | 主 Agent | 直接调用 `runner.run()` | 无 parent | 正常执行 |
-| 子 Agent | 通过 `subagent` 工具 | `parent_trace_id` / `parent_goal_id` 指向父 | 正常执行 |
+| 子 Agent | 通过 `agent` 工具 | `parent_trace_id` / `parent_goal_id` 指向父 | 正常执行 |
 | 人类协助 | 通过 `ask_human` 工具 | `parent_trace_id` 指向父 | 阻塞等待 |
 
 ---
@@ -53,7 +53,7 @@ agent/
 │       ├── search.py      # 网络搜索
 │       ├── webfetch.py    # 网页抓取
 │       ├── skill.py       # 技能加载
-│       └── subagent.py    # 子 Agent 统一入口(evaluate/delegate/explore
+│       └── subagent.py    # agent / evaluate 工具(子 Agent 创建与评估
 ├── memory/                # 跨会话记忆
 │   ├── models.py          # Experience, Skill
@@ -243,7 +243,7 @@ async for item in runner.run(
 `insert_after` 的值是 message 的 `sequence` 号,可通过 `GET /api/traces/{trace_id}/messages` 查看。如果指定的 sequence 是一条带 `tool_calls` 的 assistant 消息,系统会自动将截断点扩展到其所有对应的 tool response 之后(安全截断)。
 
 - `run(messages, config)`:**核心方法**,流式返回 `AsyncIterator[Union[Trace, Message]]`
-- `run_result(messages, config)`:便利方法,内部消费 `run()`,返回结构化结果。主要用于 `subagent` 工具内部
+- `run_result(messages, config)`:便利方法,内部消费 `run()`,返回结构化结果。主要用于 `agent`/`evaluate` 工具内部
 
 ### REST API
 
@@ -365,10 +365,10 @@ class Goal:
 
 **Goal 类型**:
 - `normal` - 普通目标,由 Agent 直接执行
-- `agent_call` - 通过 subagent 工具创建的目标,会启动 Sub-Trace
+- `agent_call` - 通过 `agent`/`evaluate` 工具创建的目标,会启动 Sub-Trace
 
 **agent_call 类型的 Goal**:
-- 调用 subagent 工具时自动设置
+- 调用 `agent`/`evaluate` 工具时自动设置
 - `agent_call_mode` 记录使用的模式(explore/delegate/evaluate)
 - `sub_trace_ids` 记录创建的所有 Sub-Trace ID
 - 状态转换:pending → in_progress(Sub-Trace 启动)→ completed(Sub-Trace 完成)
@@ -468,38 +468,67 @@ AGENT_PRESETS = {
 
 ## 子 Trace 机制
 
-通过 `subagent` 工具创建子 Agent 执行任务,支持三种模式。子 Agent 通过 `name` 参数命名,便于后续引用和续跑
+通过 `agent` 工具创建子 Agent 执行任务。`task` 参数为字符串时为单任务(delegate),为列表时并行执行多任务(explore)。支持通过 `messages` 参数预置消息,通过 `continue_from` 参数续跑已有 Sub-Trace
 
-`subagent` 工具负责创建 Sub-Trace 和初始化 GoalTree(因为需要设置自定义 context 元数据和命名规则),创建完成后将 `trace_id` 传给 `RunConfig`,由 Runner 接管后续执行。工具同时维护父 Trace 的 `context["collaborators"]` 列表。
+`agent` 工具负责创建 Sub-Trace 和初始化 GoalTree(因为需要设置自定义 context 元数据和命名规则),创建完成后将 `trace_id` 传给 `RunConfig`,由 Runner 接管后续执行。工具同时维护父 Trace 的 `context["collaborators"]` 列表。
 
-### explore 模式
+### agent 工具
 
-并行探索多个分支,适合技术选型、方案对比等场景。
+```python
+@tool(description="创建 Agent 执行任务")
+async def agent(
+    task: Union[str, List[str]],
+    messages: Optional[Union[Messages, List[Messages]]] = None,
+    continue_from: Optional[str] = None,
+    context: Optional[dict] = None,
+) -> Dict[str, Any]:
+```
 
-- 使用 `asyncio.gather()` 并行执行所有分支
-- 每个分支创建独立的 Sub-Trace
+**单任务(delegate)**:`task: str`
+- 创建单个 Sub-Trace
+- 完整工具权限(除 agent/evaluate 外,防止递归)
+- 支持 `continue_from` 续跑已有 Sub-Trace
+- 支持 `messages` 预置上下文消息
+
+**多任务(explore)**:`task: List[str]`
+- 使用 `asyncio.gather()` 并行执行所有任务
+- 每个任务创建独立的 Sub-Trace
 - 只读工具权限(read_file, grep_content, glob_files, goal)
+- `messages` 支持 1D(共享)或 2D(per-agent)
+- 不支持 `continue_from`
 - 汇总所有分支结果返回
 
-### delegate 模式
+### evaluate 工具
 
-委派单个任务给子 Agent 执行,适合代码分析、文档生成等场景。
+```python
+@tool(description="评估目标执行结果是否满足要求")
+async def evaluate(
+    messages: Optional[Messages] = None,
+    target_goal_id: Optional[str] = None,
+    continue_from: Optional[str] = None,
+    context: Optional[dict] = None,
+) -> Dict[str, Any]:
+```
 
-- 创建单个 Sub-Trace
-- 完整工具权限(除 subagent 外,防止递归)
-- 支持通过 `name` 续跑已有子 Agent:`subagent(name="coder", task="继续")`
+- 代码自动从 GoalTree 注入目标描述(无需 criteria 参数)
+- 模型把执行结果和上下文放在 `messages` 中
+- `target_goal_id` 默认为当前 goal_id
+- 只读工具权限
+- 返回评估结论和改进建议
 
-### evaluate 模式
+### 消息类型别名
 
-评估指定 Goal 的执行结果,提供质量评估和改进建议。
+定义在 `agent/trace/models.py`,用于工具参数和 runner/LLM API 接口:
 
-- 访问目标 Goal 的执行结果
-- 完整工具权限
-- 返回评估结论和建议
+```python
+ChatMessage = Dict[str, Any]                          # 单条 OpenAI 格式消息
+Messages = List[ChatMessage]                          # 消息列表
+MessageContent = Union[str, List[Dict[str, str]]]     # content 字段(文本或多模态)
+```
 
 **实现位置**:`agent/tools/builtin/subagent.py`
 
-**详细文档**:[工具系统 - Subagent 工具](./tools.md#subagent-工具)
+**详细文档**:[工具系统 - Agent/Evaluate 工具](./tools.md#agent-工具)
 
 ### ask_human 工具
 
@@ -548,7 +577,7 @@ AGENT_PRESETS = {
 ### 维护
 
 各工具负责更新 collaborators 列表(通过 `context["store"]` 写入 trace.context):
-- `subagent` 工具:创建/续跑子 Agent 时更新
+- `agent` 工具:创建/续跑子 Agent 时更新
 - `feishu` 工具:发送消息/收到回复时更新
 - Runner 只负责读取和注入
 
@@ -583,7 +612,7 @@ async def my_tool(arg: str, ctx: ToolContext) -> ToolResult:
 | 目录 | 工具 | 说明 |
 |-----|------|------|
 | `trace/` | goal | Agent 内部计划管理 |
-| `builtin/` | subagent | 子 Trace 创建(explore/delegate/evaluate) |
+| `builtin/` | agent, evaluate | 子 Agent 创建与评估 |
 | `builtin/file/` | read, write, edit, glob, grep | 文件操作 |
 | `builtin/browser/` | browser actions | 浏览器自动化 |
 | `builtin/` | bash, sandbox, search, webfetch, skill, ask_human | 其他工具 |

+ 63 - 0
docs/decisions.md

@@ -773,4 +773,67 @@ Agent(含 sub-agent)有时不创建 goal 就直接执行工具调用,导
 
 **实现**:`agent/core/runner.py:AgentRunner._build_context_injection`
 
+---
+
+## 18. 统一 Message 类型 + 重构 Agent/Evaluate 工具
+
+**日期**: 2026-02-12
+
+### 问题
+
+原 `subagent` 工具存在几个问题:
+1. **概念冗余**:单一工具通过 `mode` 参数区分三种行为(explore/delegate/evaluate),参数组合复杂,模型容易用错
+2. **evaluate 的 criteria 参数多余**:模型既要在 `evaluation_input` 里放结果,又要在 `criteria` 里放标准,信息分散
+3. **缺少消息线格式类型**:工具参数和 runner 接口使用裸 `Dict`/`List[Dict]`/`Any`,无语义类型
+4. **SchemaGenerator 不支持 `Literal`/`Union`**:无法为新工具签名生成正确的 JSON Schema
+
+### 决策
+
+#### 18a. 拆分 `subagent` → `agent` + `evaluate` 两个独立工具
+
+- `agent(task, messages, continue_from)` — 创建 Agent 执行任务
+  - `task: str` → 单任务(delegate),全量工具(排除 agent/evaluate)
+  - `task: List[str]` → 多任务并行(explore),只读工具
+  - 通过 `isinstance(task, str)` 判断,无需 `mode` 参数
+- `evaluate(messages, target_goal_id, continue_from)` — 评估目标执行结果
+  - 代码自动从 GoalTree 注入目标描述,无 `criteria` 参数
+  - 模型把所有上下文放在 `messages` 中
+
+内部统一为 `_run_agents()` 函数,`single = len(tasks)==1` 区分 delegate/explore 行为。
+
+#### 18b. 增加消息线格式类型别名(`agent/trace/models.py`)
+
+```python
+ChatMessage = Dict[str, Any]                          # 单条 OpenAI 格式消息
+Messages = List[ChatMessage]                          # 消息列表
+MessageContent = Union[str, List[Dict[str, str]]]     # content 字段(文本或多模态)
+```
+
+放在 `models.py` 而非新文件——与存储层 `Message` dataclass 描述同一概念的不同层次。
+
+#### 18c. SchemaGenerator 支持 `Literal`/`Union`
+
+`_type_to_schema()` 新增:
+- `Literal["a", "b"]` → `{"type": "string", "enum": ["a", "b"]}`
+- `Union[str, List[str]]` → `{"oneOf": [...]}`
+- `Any` → `{}`(无约束)
+
+### 理由
+
+1. **最少概念**:两个单职责工具比一个多 mode 工具更易理解和使用
+2. **最少参数**:evaluate 无需 criteria(GoalTree 已有目标描述),agent 的 messages 支持 1D/2D 避免额外参数
+3. **模型/代码职责分离**:模型只管给 messages,代码自动注入 goal 上下文
+4. **类型安全**:`Union[str, List[str]]` 在 Schema 中生成 `oneOf`,LLM 能正确理解参数格式
+
+### 变更范围
+
+- `agent/trace/models.py` — 类型别名
+- `agent/tools/schema.py` — `Literal`/`Union` 支持
+- `agent/tools/builtin/subagent.py` — `agent` + `evaluate` 工具,`_run_agents()` 统一函数
+- `agent/tools/builtin/__init__.py`, `agent/core/runner.py` — 注册表更新
+- `agent/tools/builtin/feishu/chat.py`, `agent/tools/builtin/browser/baseClass.py` — 类型注解修正
+- `agent/__init__.py` — 导出新类型
+
+**实现**:`agent/tools/builtin/subagent.py`, `agent/trace/models.py`, `agent/tools/schema.py`
+
 ---

+ 53 - 20
docs/tools.md

@@ -712,42 +712,75 @@ print(f"Success rate: {stats['success_rate']:.1%}")
 | `bash_command` | 执行 shell 命令 | opencode bash.ts |
 | `glob_files` | 文件模式匹配 | opencode glob.ts |
 | `grep_content` | 内容搜索(正则表达式) | opencode grep.ts |
-| `subagent` | 统一子 Agent 调用(evaluate/delegate/explore) | main 自研 |
+| `agent` | 创建 Agent 执行任务(单任务 delegate / 多任务并行 explore) | 自研 |
+| `evaluate` | 评估目标执行结果是否满足要求 | 自研 |
 
-### Subagent 工具
+### Agent 工具
 
-创建子 Agent 执行任务,支持三种模式:
+创建子 Agent 执行任务。通过 `task` 参数的类型自动区分模式:
 
-| 模式 | 用途 | 并行执行 | 工具权限 |
-|------|------|---------|---------|
-| **explore** | 并行探索多个方案 | ✅ | 只读(read_file, grep_content, glob_files, goal) |
-| **delegate** | 委托单个任务 | ❌ | 完整(除 subagent 外) |
-| **evaluate** | 评估任务结果 | ❌ | 完整(除 subagent 外) |
+| task 类型 | 模式 | 并行执行 | 工具权限 |
+|-----------|------|---------|---------|
+| `str`(单任务) | delegate | ❌ | 完整(除 agent/evaluate 外) |
+| `List[str]`(多任务) | explore | ✅ | 只读(read_file, grep_content, glob_files, goal) |
 
-**Explore 模式**:
+```python
+@tool(description="创建 Agent 执行任务")
+async def agent(
+    task: Union[str, List[str]],
+    messages: Optional[Union[Messages, List[Messages]]] = None,
+    continue_from: Optional[str] = None,
+    context: Optional[dict] = None,
+) -> Dict[str, Any]:
+```
+
+**messages 参数**:
+- `None`:无预置消息
+- `Messages`(1D 列表):所有 agent 共享
+- `List[Messages]`(2D 列表):per-agent 独立消息
+
+运行时判断:`messages[0]` 是 dict → 1D 共享;是 list → 2D per-agent。
+
+**单任务(delegate)**:
+- 适合委托专门任务(如代码分析、文档生成)
+- 完整工具权限,可执行复杂操作
+- 支持 `continue_from` 参数续跑已有 Sub-Trace
+
+**多任务(explore)**:
 - 适合对比多个方案(如技术选型、架构设计)
 - 使用 `asyncio.gather()` 并行执行,显著提升效率
-- 每个分支创建独立的 Sub-Trace,互不干扰
+- 每个任务创建独立的 Sub-Trace,互不干扰
 - 只读权限(文件系统层面),可使用 goal 工具管理计划
+- 不支持 `continue_from`
 
-**Delegate 模式**:
-- 适合委托专门任务(如代码分析、文档生成)
-- 完整工具权限,可执行复杂操作
-- 支持 `continue_from` 参数继续执行
+### Evaluate 工具
+
+评估指定 Goal 的执行结果,提供质量评估和改进建议。
+
+```python
+@tool(description="评估目标执行结果是否满足要求")
+async def evaluate(
+    messages: Optional[Messages] = None,
+    target_goal_id: Optional[str] = None,
+    continue_from: Optional[str] = None,
+    context: Optional[dict] = None,
+) -> Dict[str, Any]:
+```
 
-**Evaluate 模式**:
-- 适合评估任务完成质量
-- 可访问目标 Goal 的执行结果
-- 提供评估结论和改进建议
+- 无 `criteria` 参数——代码自动从 GoalTree 注入目标描述
+- 模型把执行结果和上下文放在 `messages` 中
+- `target_goal_id` 默认为当前 `goal_id`
+- 只读工具权限
+- 返回评估结论和改进建议
 
 **Sub-Trace 结构**:
-- 每个 subagent 调用创建独立的 Sub-Trace
+- 每个 `agent`/`evaluate` 调用创建独立的 Sub-Trace
 - Sub-Trace ID 格式:`{parent_id}@{mode}-{序号}-{timestamp}-001`
 - 通过 `parent_trace_id` 和 `parent_goal_id` 建立父子关系
 - Sub-Trace 信息存储在独立的 trace 目录中
 
 **Goal 集成**:
-- Subagent 调用会将 Goal 标记为 `type: "agent_call"`
+- `agent`/`evaluate` 调用会将 Goal 标记为 `type: "agent_call"`
 - `agent_call_mode` 记录使用的模式
 - `sub_trace_ids` 记录所有创建的 Sub-Trace
 - Goal 完成后,`summary` 包含格式化的汇总结果