Pārlūkot izejas kodu

feat: refactor runner with run config, rewind, active collaborator and server api

Talegorithm 3 nedēļas atpakaļ
vecāks
revīzija
d4357a91f1

+ 2 - 1
agent/__init__.py

@@ -11,7 +11,7 @@ Reson Agent - 模块化、可扩展的 Agent 框架
 """
 """
 
 
 # 核心引擎
 # 核心引擎
-from agent.core.runner import AgentRunner, AgentConfig, CallResult
+from agent.core.runner import AgentRunner, AgentConfig, CallResult, RunConfig
 from agent.core.presets import AgentPreset, AGENT_PRESETS, get_preset
 from agent.core.presets import AgentPreset, AGENT_PRESETS, get_preset
 
 
 # 执行追踪
 # 执行追踪
@@ -36,6 +36,7 @@ __all__ = [
     "AgentRunner",
     "AgentRunner",
     "AgentConfig",
     "AgentConfig",
     "CallResult",
     "CallResult",
+    "RunConfig",
     "AgentPreset",
     "AgentPreset",
     "AGENT_PRESETS",
     "AGENT_PRESETS",
     "get_preset",
     "get_preset",

+ 2 - 1
agent/core/__init__.py

@@ -7,7 +7,7 @@ Agent Core - 核心引擎模块
 3. Agent 预设(AgentPreset)
 3. Agent 预设(AgentPreset)
 """
 """
 
 
-from agent.core.runner import AgentRunner, BUILTIN_TOOLS, AgentConfig, CallResult
+from agent.core.runner import AgentRunner, BUILTIN_TOOLS, AgentConfig, CallResult, RunConfig
 from agent.core.presets import AgentPreset, AGENT_PRESETS, get_preset, register_preset
 from agent.core.presets import AgentPreset, AGENT_PRESETS, get_preset, register_preset
 
 
 __all__ = [
 __all__ = [
@@ -15,6 +15,7 @@ __all__ = [
     "BUILTIN_TOOLS",
     "BUILTIN_TOOLS",
     "AgentConfig",
     "AgentConfig",
     "CallResult",
     "CallResult",
+    "RunConfig",
     "AgentPreset",
     "AgentPreset",
     "AGENT_PRESETS",
     "AGENT_PRESETS",
     "get_preset",
     "get_preset",

+ 651 - 459
agent/core/runner.py

@@ -6,14 +6,20 @@ Agent Runner - Agent 执行引擎
 2. 记录执行轨迹(Trace + Messages + GoalTree)
 2. 记录执行轨迹(Trace + Messages + GoalTree)
 3. 检索和注入记忆(Experience + Skill)
 3. 检索和注入记忆(Experience + Skill)
 4. 管理执行计划(GoalTree)
 4. 管理执行计划(GoalTree)
-5. 收集反馈,提取经验
+5. 支持续跑(continue)和回溯重跑(rewind)
+
+参数分层:
+- Infrastructure: AgentRunner 构造时设置(trace_store, llm_call 等)
+- RunConfig: 每次 run 时指定(model, trace_id, insert_after 等)
+- Messages: OpenAI SDK 格式的任务消息
 """
 """
 
 
-from agent.tools.builtin.browser import browser_read_long_content
+import json
 import logging
 import logging
-from dataclasses import dataclass
+import uuid
+from dataclasses import dataclass, field
 from datetime import datetime
 from datetime import datetime
-from typing import AsyncIterator, Optional, Dict, Any, List, Callable, Literal, Union
+from typing import AsyncIterator, Optional, Dict, Any, List, Callable, Literal, Tuple, Union
 
 
 from agent.trace.models import Trace, Message
 from agent.trace.models import Trace, Message
 from agent.trace.protocols import TraceStore
 from agent.trace.protocols import TraceStore
@@ -26,24 +32,39 @@ from agent.tools import ToolRegistry, get_tool_registry
 logger = logging.getLogger(__name__)
 logger = logging.getLogger(__name__)
 
 
 
 
+# ===== 运行配置 =====
+
 @dataclass
 @dataclass
-class AgentConfig:
-    """Agent 配置"""
-    agent_type: str = "default"
+class RunConfig:
+    """
+    运行参数 — 控制 Agent 如何执行
+
+    分为模型层参数(由上游 agent 或用户决定)和框架层参数(由系统注入)。
+    """
+    # --- 模型层参数 ---
+    model: str = "gpt-4o"
+    temperature: float = 0.3
     max_iterations: int = 200
     max_iterations: int = 200
+    tools: Optional[List[str]] = None          # None = 全部内置工具
+
+    # --- 框架层参数 ---
+    agent_type: str = "default"
+    uid: Optional[str] = None
+    system_prompt: Optional[str] = None        # None = 从 skills 自动构建
     enable_memory: bool = True
     enable_memory: bool = True
     auto_execute_tools: bool = True
     auto_execute_tools: bool = True
+    name: Optional[str] = None                 # 显示名称(空则由 utility_llm 自动生成)
 
 
+    # --- Trace 控制 ---
+    trace_id: Optional[str] = None             # None = 新建
+    parent_trace_id: Optional[str] = None      # 子 Agent 专用
+    parent_goal_id: Optional[str] = None
 
 
-@dataclass
-class CallResult:
-    """单次调用结果"""
-    reply: str
-    tool_calls: Optional[List[Dict]] = None
-    trace_id: Optional[str] = None
-    step_id: Optional[str] = None
-    tokens: Optional[Dict[str, int]] = None
-    cost: float = 0.0
+    # --- 续跑控制 ---
+    insert_after: Optional[int] = None         # 回溯插入点(message sequence)
+
+    # --- 额外 LLM 参数(传给 llm_call 的 **kwargs)---
+    extra_llm_params: Dict[str, Any] = field(default_factory=dict)
 
 
 
 
 # 内置工具列表(始终自动加载)
 # 内置工具列表(始终自动加载)
@@ -98,22 +119,44 @@ BUILTIN_TOOLS = [
     "browser_ensure_login_with_cookies",
     "browser_ensure_login_with_cookies",
     "browser_wait_for_user_action",
     "browser_wait_for_user_action",
     "browser_done",
     "browser_done",
-
-    # 飞书工具
-    "feishu_get_chat_history",
-    "feishu_get_contact_replies",
-    "feishu_send_message_to_contact",
-    "feishu_get_contact_list",
 ]
 ]
 
 
 
 
+# ===== 向后兼容 =====
+
+@dataclass
+class AgentConfig:
+    """[向后兼容] Agent 配置,新代码请使用 RunConfig"""
+    agent_type: str = "default"
+    max_iterations: int = 200
+    enable_memory: bool = True
+    auto_execute_tools: bool = True
+
+
+@dataclass
+class CallResult:
+    """单次调用结果"""
+    reply: str
+    tool_calls: Optional[List[Dict]] = None
+    trace_id: Optional[str] = None
+    step_id: Optional[str] = None
+    tokens: Optional[Dict[str, int]] = None
+    cost: float = 0.0
+
+
+# ===== 执行引擎 =====
+
+CONTEXT_INJECTION_INTERVAL = 10  # 每 N 轮注入一次 GoalTree + Collaborators
+
+
 class AgentRunner:
 class AgentRunner:
     """
     """
     Agent 执行引擎
     Agent 执行引擎
 
 
-    支持两种模式:
-    1. call(): 单次 LLM 调用(简洁 API)
-    2. run(): Agent 模式(循环 + 记忆 + 追踪)
+    支持三种运行模式(通过 RunConfig 区分):
+    1. 新建:trace_id=None
+    2. 续跑:trace_id=已有ID, insert_after=None
+    3. 回溯:trace_id=已有ID, insert_after=N
     """
     """
 
 
     def __init__(
     def __init__(
@@ -123,6 +166,7 @@ class AgentRunner:
         state_store: Optional[StateStore] = None,
         state_store: Optional[StateStore] = None,
         tool_registry: Optional[ToolRegistry] = None,
         tool_registry: Optional[ToolRegistry] = None,
         llm_call: Optional[Callable] = None,
         llm_call: Optional[Callable] = None,
+        utility_llm_call: Optional[Callable] = None,
         config: Optional[AgentConfig] = None,
         config: Optional[AgentConfig] = None,
         skills_dir: Optional[str] = None,
         skills_dir: Optional[str] = None,
         goal_tree: Optional[GoalTree] = None,
         goal_tree: Optional[GoalTree] = None,
@@ -132,162 +176,99 @@ class AgentRunner:
         初始化 AgentRunner
         初始化 AgentRunner
 
 
         Args:
         Args:
-            trace_store: Trace 存储(可选,不提供则不记录)
-            memory_store: Memory 存储(可选,不提供则不使用记忆)
-            state_store: State 存储(可选,用于任务状态)
-            tool_registry: 工具注册表(可选,默认使用全局注册表)
-            llm_call: LLM 调用函数(必须提供,用于实际调用 LLM)
-            config: Agent 配置
-            skills_dir: Skills 目录路径(可选,不提供则不加载 skills)
-            goal_tree: 执行计划(可选,不提供则在运行时按需创建)
-            debug: 保留参数(已废弃,请使用 API Server 可视化)
+            trace_store: Trace 存储
+            memory_store: Memory 存储(可选)
+            state_store: State 存储(可选)
+            tool_registry: 工具注册表(默认使用全局注册表)
+            llm_call: 主 LLM 调用函数
+            utility_llm_call: 轻量 LLM(用于生成任务标题等),可选
+            config: [向后兼容] AgentConfig
+            skills_dir: Skills 目录路径
+            goal_tree: 初始 GoalTree(可选)
+            debug: 保留参数(已废弃)
         """
         """
         self.trace_store = trace_store
         self.trace_store = trace_store
         self.memory_store = memory_store
         self.memory_store = memory_store
         self.state_store = state_store
         self.state_store = state_store
         self.tools = tool_registry or get_tool_registry()
         self.tools = tool_registry or get_tool_registry()
         self.llm_call = llm_call
         self.llm_call = llm_call
+        self.utility_llm_call = utility_llm_call
         self.config = config or AgentConfig()
         self.config = config or AgentConfig()
         self.skills_dir = skills_dir
         self.skills_dir = skills_dir
         self.goal_tree = goal_tree
         self.goal_tree = goal_tree
         self.debug = debug
         self.debug = debug
 
 
-    def _generate_id(self) -> str:
-        """生成唯一 ID"""
-        import uuid
-        return str(uuid.uuid4())
+    # ===== 核心公开方法 =====
 
 
-    # ===== 单次调用 =====
-
-    async def call(
+    async def run(
         self,
         self,
         messages: List[Dict],
         messages: List[Dict],
-        model: str = "gpt-4o",
-        tools: Optional[List[str]] = None,
-        uid: Optional[str] = None,
-        trace: bool = True,
-        **kwargs
-    ) -> CallResult:
+        config: Optional[RunConfig] = None,
+    ) -> AsyncIterator[Union[Trace, Message]]:
         """
         """
-        单次 LLM 调用
+        Agent 模式执行(核心方法)
 
 
         Args:
         Args:
-            messages: 消息列表
-            model: 模型名称
-            tools: 工具名称列表
-            uid: 用户 ID
-            trace: 是否记录 Trace
-            **kwargs: 其他参数传递给 LLM
+            messages: OpenAI SDK 格式的输入消息
+                新建: 初始任务消息 [{"role": "user", "content": "..."}]
+                续跑: 追加的新消息
+                回溯: 在插入点之后追加的消息
+            config: 运行配置
 
 
-        Returns:
-            CallResult
+        Yields:
+            Union[Trace, Message]: Trace 对象(状态变化)或 Message 对象(执行过程)
         """
         """
         if not self.llm_call:
         if not self.llm_call:
             raise ValueError("llm_call function not provided")
             raise ValueError("llm_call function not provided")
 
 
-        trace_id = None
-        message_id = None
-
-        # 准备工具 Schema
-        tool_names = BUILTIN_TOOLS.copy()
-        if tools:
-            for tool in tools:
-                if tool not in tool_names:
-                    tool_names.append(tool)
-        tool_schemas = self.tools.get_schemas(tool_names)
+        config = config or RunConfig()
+        trace = None
 
 
-        # 创建 Trace
-        if trace and self.trace_store:
-            trace_obj = Trace.create(
-                mode="call",
-                uid=uid,
-                model=model,
-                tools=tool_schemas,  # 保存工具定义
-                llm_params=kwargs,  # 保存 LLM 参数
-            )
-            trace_id = await self.trace_store.create_trace(trace_obj)
-
-        # 调用 LLM
-        result = await self.llm_call(
-            messages=messages,
-            model=model,
-            tools=tool_schemas,
-            **kwargs
-        )
-
-        # 记录 Message(单次调用模式不使用 GoalTree)
-        if trace and self.trace_store and trace_id:
-            msg = Message.create(
-                trace_id=trace_id,
-                role="assistant",
-                sequence=1,
-                goal_id=None,  # 单次调用没有 goal
-                content={"text": result.get("content", ""), "tool_calls": result.get("tool_calls")},
-                prompt_tokens=result.get("prompt_tokens", 0),
-                completion_tokens=result.get("completion_tokens", 0),
-                finish_reason=result.get("finish_reason"),
-                cost=result.get("cost", 0),
-            )
-            message_id = await self.trace_store.add_message(msg)
+        try:
+            # Phase 1: PREPARE TRACE
+            trace, goal_tree, sequence = await self._prepare_trace(messages, config)
+            yield trace
 
 
-            # 完成 Trace
-            await self.trace_store.update_trace(
-                trace_id,
-                status="completed",
-                completed_at=datetime.now(),
+            # Phase 2: BUILD HISTORY
+            history, sequence, created_messages = await self._build_history(
+                trace.trace_id, messages, goal_tree, config, sequence
             )
             )
+            for msg in created_messages:
+                yield msg
 
 
-        return CallResult(
-            reply=result.get("content", ""),
-            tool_calls=result.get("tool_calls"),
-            trace_id=trace_id,
-            step_id=message_id,  # 兼容字段名
-            tokens={
-                "prompt": result.get("prompt_tokens", 0),
-                "completion": result.get("completion_tokens", 0),
-            },
-            cost=result.get("cost", 0)
-        )
+            # Phase 3: AGENT LOOP
+            async for event in self._agent_loop(trace, history, goal_tree, config, sequence):
+                yield event
 
 
-    # ===== Agent 模式 =====
+        except Exception as e:
+            logger.error(f"Agent run failed: {e}")
+            tid = config.trace_id or (trace.trace_id if trace else None)
+            if self.trace_store and tid:
+                await self.trace_store.update_trace(
+                    tid,
+                    status="failed",
+                    error_message=str(e),
+                    completed_at=datetime.now()
+                )
+                trace_obj = await self.trace_store.get_trace(tid)
+                if trace_obj:
+                    yield trace_obj
+            raise
 
 
     async def run_result(
     async def run_result(
         self,
         self,
-        task: str,
-        messages: Optional[List[Dict]] = None,
-        system_prompt: Optional[str] = None,
-        model: str = "gpt-4o",
-        tools: Optional[List[str]] = None,
-        agent_type: Optional[str] = None,
-        uid: Optional[str] = None,
-        max_iterations: Optional[int] = None,
-        enable_memory: Optional[bool] = None,
-        auto_execute_tools: Optional[bool] = None,
-        trace_id: Optional[str] = None,
-        **kwargs
+        messages: List[Dict],
+        config: Optional[RunConfig] = None,
     ) -> Dict[str, Any]:
     ) -> Dict[str, Any]:
         """
         """
-        Agent 结果模式执行
+        结果模式 — 消费 run(),返回结构化结果。
 
 
-        消费 run() 的流式事件,返回结构化结果(最后一条有文本的 assistant + trace 统计)
+        主要用于 subagent 工具内部。
         """
         """
         last_assistant_text = ""
         last_assistant_text = ""
         final_trace: Optional[Trace] = None
         final_trace: Optional[Trace] = None
 
 
-        async for item in self.run(
-            task=task,
-            messages=messages,
-            system_prompt=system_prompt,
-            model=model,
-            tools=tools,
-            agent_type=agent_type,
-            uid=uid,
-            max_iterations=max_iterations,
-            enable_memory=enable_memory,
-            auto_execute_tools=auto_execute_tools,
-            trace_id=trace_id,
-            **kwargs
-        ):
+        async for item in self.run(messages=messages, config=config):
             if isinstance(item, Message) and item.role == "assistant":
             if isinstance(item, Message) and item.role == "assistant":
                 content = item.content
                 content = item.content
                 text = ""
                 text = ""
@@ -300,8 +281,9 @@ class AgentRunner:
             elif isinstance(item, Trace):
             elif isinstance(item, Trace):
                 final_trace = item
                 final_trace = item
 
 
-        if not final_trace and trace_id and self.trace_store:
-            final_trace = await self.trace_store.get_trace(trace_id)
+        config = config or RunConfig()
+        if not final_trace and config.trace_id and self.trace_store:
+            final_trace = await self.trace_store.get_trace(config.trace_id)
 
 
         status = final_trace.status if final_trace else "unknown"
         status = final_trace.status if final_trace else "unknown"
         error = final_trace.error_message if final_trace else None
         error = final_trace.error_message if final_trace else None
@@ -309,12 +291,12 @@ class AgentRunner:
 
 
         if not summary:
         if not summary:
             status = "failed"
             status = "failed"
-            error = error or "Sub-Agent 没有产生 assistant 文本结果"
+            error = error or "Agent 没有产生 assistant 文本结果"
 
 
         return {
         return {
             "status": status,
             "status": status,
             "summary": summary,
             "summary": summary,
-            "trace_id": final_trace.trace_id if final_trace else trace_id,
+            "trace_id": final_trace.trace_id if final_trace else config.trace_id,
             "error": error,
             "error": error,
             "stats": {
             "stats": {
                 "total_messages": final_trace.total_messages if final_trace else 0,
                 "total_messages": final_trace.total_messages if final_trace else 0,
@@ -323,51 +305,26 @@ class AgentRunner:
             },
             },
         }
         }
 
 
-    async def run(
+    # ===== 单次调用(保留)=====
+
+    async def call(
         self,
         self,
-        task: str,
-        messages: Optional[List[Dict]] = None,
-        system_prompt: Optional[str] = None,
+        messages: List[Dict],
         model: str = "gpt-4o",
         model: str = "gpt-4o",
         tools: Optional[List[str]] = None,
         tools: Optional[List[str]] = None,
-        agent_type: Optional[str] = None,
         uid: Optional[str] = None,
         uid: Optional[str] = None,
-        max_iterations: Optional[int] = None,
-        enable_memory: Optional[bool] = None,
-        auto_execute_tools: Optional[bool] = None,
-        trace_id: Optional[str] = None,
+        trace: bool = True,
         **kwargs
         **kwargs
-    ) -> AsyncIterator[Union[Trace, Message]]:
+    ) -> CallResult:
         """
         """
-        Agent 模式执行
-
-        Args:
-            task: 任务描述
-            messages: 初始消息(可选)
-            system_prompt: 系统提示(可选)
-            model: 模型名称
-            tools: 工具名称列表
-            agent_type: Agent 类型
-            uid: 用户 ID
-            max_iterations: 最大迭代次数
-            enable_memory: 是否启用记忆
-            auto_execute_tools: 是否自动执行工具
-            trace_id: Trace ID(可选,传入时复用已有 Trace)
-            **kwargs: 其他参数
-
-        Yields:
-            Union[Trace, Message]: Trace 对象(状态变化)或 Message 对象(执行过程)
+        单次 LLM 调用(无 Agent Loop)
         """
         """
         if not self.llm_call:
         if not self.llm_call:
             raise ValueError("llm_call function not provided")
             raise ValueError("llm_call function not provided")
 
 
-        # 使用配置默认值
-        agent_type = agent_type or self.config.agent_type
-        max_iterations = max_iterations or self.config.max_iterations
-        enable_memory = enable_memory if enable_memory is not None else self.config.enable_memory
-        auto_execute_tools = auto_execute_tools if auto_execute_tools is not None else self.config.auto_execute_tools
+        trace_id = None
+        message_id = None
 
 
-        # 准备工具 Schema(提前准备,用于 Trace)
         tool_names = BUILTIN_TOOLS.copy()
         tool_names = BUILTIN_TOOLS.copy()
         if tools:
         if tools:
             for tool in tools:
             for tool in tools:
@@ -375,319 +332,554 @@ class AgentRunner:
                     tool_names.append(tool)
                     tool_names.append(tool)
         tool_schemas = self.tools.get_schemas(tool_names)
         tool_schemas = self.tools.get_schemas(tool_names)
 
 
-        # 创建或复用 Trace
-        if trace_id:
-            if self.trace_store:
-                trace_obj = await self.trace_store.get_trace(trace_id)
-                if not trace_obj:
-                    raise ValueError(f"Trace not found: {trace_id}")
-            else:
-                trace_obj = Trace(
-                    trace_id=trace_id,
-                    mode="agent",
-                    task=task,
-                    agent_type=agent_type,
-                    uid=uid,
-                    model=model,
-                    tools=tool_schemas,
-                    llm_params=kwargs,
-                    status="running"
-                )
+        if trace and self.trace_store:
+            trace_obj = Trace.create(mode="call", uid=uid, model=model, tools=tool_schemas, llm_params=kwargs)
+            trace_id = await self.trace_store.create_trace(trace_obj)
+
+        result = await self.llm_call(messages=messages, model=model, tools=tool_schemas, **kwargs)
+
+        if trace and self.trace_store and trace_id:
+            msg = Message.create(
+                trace_id=trace_id, role="assistant", sequence=1, goal_id=None,
+                content={"text": result.get("content", ""), "tool_calls": result.get("tool_calls")},
+                prompt_tokens=result.get("prompt_tokens", 0),
+                completion_tokens=result.get("completion_tokens", 0),
+                finish_reason=result.get("finish_reason"),
+                cost=result.get("cost", 0),
+            )
+            message_id = await self.trace_store.add_message(msg)
+            await self.trace_store.update_trace(trace_id, status="completed", completed_at=datetime.now())
+
+        return CallResult(
+            reply=result.get("content", ""),
+            tool_calls=result.get("tool_calls"),
+            trace_id=trace_id,
+            step_id=message_id,
+            tokens={"prompt": result.get("prompt_tokens", 0), "completion": result.get("completion_tokens", 0)},
+            cost=result.get("cost", 0)
+        )
+
+    # ===== Phase 1: PREPARE TRACE =====
+
+    async def _prepare_trace(
+        self,
+        messages: List[Dict],
+        config: RunConfig,
+    ) -> Tuple[Trace, Optional[GoalTree], int]:
+        """
+        准备 Trace:创建新的或加载已有的
+
+        Returns:
+            (trace, goal_tree, next_sequence)
+        """
+        if config.trace_id:
+            return await self._prepare_existing_trace(config)
         else:
         else:
-            trace_id = self._generate_id()
-            trace_obj = Trace(
-                trace_id=trace_id,
-                mode="agent",
-                task=task,
-                agent_type=agent_type,
-                uid=uid,
-                model=model,
-                tools=tool_schemas,  # 保存工具定义
-                llm_params=kwargs,  # 保存 LLM 参数
-                status="running"
+            return await self._prepare_new_trace(messages, config)
+
+    async def _prepare_new_trace(
+        self,
+        messages: List[Dict],
+        config: RunConfig,
+    ) -> Tuple[Trace, Optional[GoalTree], int]:
+        """创建新 Trace"""
+        trace_id = str(uuid.uuid4())
+
+        # 生成任务名称
+        task_name = config.name or await self._generate_task_name(messages)
+
+        # 准备工具 Schema
+        tool_schemas = self._get_tool_schemas(config.tools)
+
+        trace_obj = Trace(
+            trace_id=trace_id,
+            mode="agent",
+            task=task_name,
+            agent_type=config.agent_type,
+            parent_trace_id=config.parent_trace_id,
+            parent_goal_id=config.parent_goal_id,
+            uid=config.uid,
+            model=config.model,
+            tools=tool_schemas,
+            llm_params={"temperature": config.temperature, **config.extra_llm_params},
+            status="running",
+        )
+
+        goal_tree = self.goal_tree or GoalTree(mission=task_name)
+
+        if self.trace_store:
+            await self.trace_store.create_trace(trace_obj)
+            await self.trace_store.update_goal_tree(trace_id, goal_tree)
+
+        return trace_obj, goal_tree, 1
+
+    async def _prepare_existing_trace(
+        self,
+        config: RunConfig,
+    ) -> Tuple[Trace, Optional[GoalTree], int]:
+        """加载已有 Trace(续跑或回溯)"""
+        if not self.trace_store:
+            raise ValueError("trace_store required for continue/rewind")
+
+        trace_obj = await self.trace_store.get_trace(config.trace_id)
+        if not trace_obj:
+            raise ValueError(f"Trace not found: {config.trace_id}")
+
+        goal_tree = await self.trace_store.get_goal_tree(config.trace_id)
+
+        if config.insert_after is not None:
+            # 回溯模式
+            sequence = await self._rewind(config.trace_id, config.insert_after, goal_tree)
+        else:
+            # 续跑模式:从最大 sequence + 1 开始
+            all_messages = await self.trace_store.get_trace_messages(
+                config.trace_id, include_abandoned=True
             )
             )
+            sequence = max((m.sequence for m in all_messages), default=0) + 1
 
 
-            if self.trace_store:
-                await self.trace_store.create_trace(trace_obj)
+        # 状态置为 running
+        await self.trace_store.update_trace(
+            config.trace_id,
+            status="running",
+            completed_at=None,
+        )
+        trace_obj.status = "running"
 
 
-                # 初始化 GoalTree
-                goal_tree = self.goal_tree or GoalTree(mission=task)
-                await self.trace_store.update_goal_tree(trace_id, goal_tree)
+        return trace_obj, goal_tree, sequence
 
 
-        # 返回 Trace(表示开始)
-        yield trace_obj
+    # ===== Phase 2: BUILD HISTORY =====
+
+    async def _build_history(
+        self,
+        trace_id: str,
+        new_messages: List[Dict],
+        goal_tree: Optional[GoalTree],
+        config: RunConfig,
+        sequence: int,
+    ) -> Tuple[List[Dict], int, List[Message]]:
+        """
+        构建完整的 LLM 消息历史
+
+        1. 加载已有 active messages(续跑/回溯场景)
+        2. 构建 system prompt(新建时注入 skills/experiences)
+        3. 追加 input messages
+
+        Returns:
+            (history, next_sequence, created_messages)
+            created_messages: 本次新创建并持久化的 Message 列表,供 run() yield 给调用方
+        """
+        history: List[Dict] = []
+        created_messages: List[Message] = []
+
+        # 1. 加载已有 messages
+        if config.trace_id and self.trace_store:
+            existing_messages = await self.trace_store.get_trace_messages(trace_id)
+            history = [msg.to_llm_dict() for msg in existing_messages]
+
+        # 2. 构建 system prompt(如果历史中没有 system message)
+        has_system = any(m.get("role") == "system" for m in history)
+        has_system_in_new = any(m.get("role") == "system" for m in new_messages)
+
+        if not has_system and not has_system_in_new:
+            system_prompt = await self._build_system_prompt(config)
+            if system_prompt:
+                history = [{"role": "system", "content": system_prompt}] + history
 
 
-        try:
-            # 加载记忆(Experience 和 Skill)
-            experiences_text = ""
-            skills_text = ""
-
-            if enable_memory and self.memory_store:
-                scope = f"agent:{agent_type}"
-                experiences = await self.memory_store.search_experiences(scope, task)
-                experiences_text = self._format_experiences(experiences)
-                logger.info(f"加载 {len(experiences)} 条经验")
-
-            # 加载 Skills(内置 + 用户自定义)
-            skills = load_skills_from_dir(self.skills_dir)
-            if skills:
-                skills_text = self._format_skills(skills)
-                if self.skills_dir:
-                    logger.info(f"加载 {len(skills)} 个 skills (内置 + 自定义: {self.skills_dir})")
-                else:
-                    logger.info(f"加载 {len(skills)} 个内置 skills")
-
-            # 构建初始消息
-            sequence = 1
-            if messages is None:
-                if trace_id and self.trace_store:
-                    existing_messages = await self.trace_store.get_trace_messages(trace_id)
-                    messages = []
-                    for msg in existing_messages:
-                        msg_dict = {"role": msg.role}
-                        if isinstance(msg.content, dict):
-                            if msg.content.get("text"):
-                                msg_dict["content"] = msg.content["text"]
-                            if msg.content.get("tool_calls"):
-                                msg_dict["tool_calls"] = msg.content["tool_calls"]
-                        else:
-                            msg_dict["content"] = msg.content
-
-                        if msg.role == "tool" and msg.tool_call_id:
-                            msg_dict["tool_call_id"] = msg.tool_call_id
-                            msg_dict["name"] = msg.description or "unknown"
-
-                        messages.append(msg_dict)
-
-                    if existing_messages:
-                        sequence = existing_messages[-1].sequence + 1
-                else:
-                    messages = []
-            # 记录初始 system 和 user 消息到 trace
-
-            if system_prompt and not any(m.get("role") == "system" for m in messages):
-                # 注入记忆和 skills 到 system prompt
-                full_system = system_prompt
-                if skills_text:
-                    full_system += f"\n\n## Skills\n{skills_text}"
-                if experiences_text:
-                    full_system += f"\n\n## 相关经验\n{experiences_text}"
-
-                messages = [{"role": "system", "content": full_system}] + messages
-
-                # 保存 system 消息
                 if self.trace_store:
                 if self.trace_store:
                     system_msg = Message.create(
                     system_msg = Message.create(
-                        trace_id=trace_id,
-                        role="system",
-                        sequence=sequence,
-                        goal_id=None,  # 初始消息没有 goal
-                        content=full_system,
+                        trace_id=trace_id, role="system", sequence=sequence,
+                        goal_id=None, content=system_prompt,
                     )
                     )
                     await self.trace_store.add_message(system_msg)
                     await self.trace_store.add_message(system_msg)
-                    yield system_msg
+                    created_messages.append(system_msg)
                     sequence += 1
                     sequence += 1
 
 
-            # 添加任务描述(支持 continue_from 场景再次追加)
-            if task:
-                messages.append({"role": "user", "content": task})
+        # 3. 追加新 messages
+        for msg_dict in new_messages:
+            history.append(msg_dict)
 
 
-                # 保存 user 消息(任务描述)
-                if self.trace_store:
-                    user_msg = Message.create(
-                        trace_id=trace_id,
-                        role="user",
-                        sequence=sequence,
-                        goal_id=None,  # 初始消息没有 goal
-                        content=task,
+            if self.trace_store:
+                stored_msg = Message.from_llm_dict(
+                    msg_dict, trace_id=trace_id, sequence=sequence, goal_id=None
+                )
+                await self.trace_store.add_message(stored_msg)
+                created_messages.append(stored_msg)
+                sequence += 1
+
+        return history, sequence, created_messages
+
+    # ===== Phase 3: AGENT LOOP =====
+
+    async def _agent_loop(
+        self,
+        trace: Trace,
+        history: List[Dict],
+        goal_tree: Optional[GoalTree],
+        config: RunConfig,
+        sequence: int,
+    ) -> AsyncIterator[Union[Trace, Message]]:
+        """ReAct 循环"""
+        trace_id = trace.trace_id
+        tool_schemas = self._get_tool_schemas(config.tools)
+
+        # 设置 goal_tree 到 goal 工具
+        if goal_tree and self.trace_store:
+            from agent.trace.goal_tool import set_goal_tree
+            set_goal_tree(goal_tree)
+
+        for iteration in range(config.max_iterations):
+            # 构建 LLM messages(注入上下文)
+            llm_messages = list(history)
+
+            # 周期性注入 GoalTree + Collaborators
+            if iteration % CONTEXT_INJECTION_INTERVAL == 0:
+                context_injection = self._build_context_injection(trace, goal_tree)
+                if context_injection:
+                    llm_messages.append({"role": "system", "content": context_injection})
+
+            # 调用 LLM
+            result = await self.llm_call(
+                messages=llm_messages,
+                model=config.model,
+                tools=tool_schemas,
+                temperature=config.temperature,
+                **config.extra_llm_params,
+            )
+
+            response_content = result.get("content", "")
+            tool_calls = result.get("tool_calls")
+            finish_reason = result.get("finish_reason")
+            prompt_tokens = result.get("prompt_tokens", 0)
+            completion_tokens = result.get("completion_tokens", 0)
+            step_cost = result.get("cost", 0)
+
+            # 按需自动创建 root goal
+            if goal_tree and not goal_tree.goals and tool_calls:
+                has_goal_call = any(
+                    tc.get("function", {}).get("name") == "goal"
+                    for tc in tool_calls
+                )
+                if not has_goal_call:
+                    mission = goal_tree.mission
+                    root_desc = mission[:200] if len(mission) > 200 else mission
+                    goal_tree.add_goals(
+                        descriptions=[root_desc],
+                        reasons=["系统自动创建:Agent 未显式创建目标"],
+                        parent_id=None
                     )
                     )
-                    await self.trace_store.add_message(user_msg)
-                    yield user_msg
-                    sequence += 1
+                    goal_tree.focus(goal_tree.goals[0].id)
+                    if self.trace_store:
+                        await self.trace_store.update_goal_tree(trace_id, goal_tree)
+                        await self.trace_store.add_goal(trace_id, goal_tree.goals[0])
+                    logger.info(f"自动创建 root goal: {goal_tree.goals[0].id}")
+
+            # 获取当前 goal_id
+            current_goal_id = goal_tree.current_id if (goal_tree and goal_tree.current_id) else None
+
+            # 记录 assistant Message
+            assistant_msg = Message.create(
+                trace_id=trace_id,
+                role="assistant",
+                sequence=sequence,
+                goal_id=current_goal_id,
+                content={"text": response_content, "tool_calls": tool_calls},
+                prompt_tokens=prompt_tokens,
+                completion_tokens=completion_tokens,
+                finish_reason=finish_reason,
+                cost=step_cost,
+            )
 
 
-            # 获取 GoalTree
-            goal_tree = None
             if self.trace_store:
             if self.trace_store:
-                goal_tree = await self.trace_store.get_goal_tree(trace_id)
-
-                # 设置 goal_tree 到 goal 工具(供 LLM 调用)
-                from agent.trace.goal_tool import set_goal_tree
-                set_goal_tree(goal_tree)
-
-            # 执行循环
-            for iteration in range(max_iterations):
-                # 注入当前计划到 messages(如果有 goals)
-                llm_messages = list(messages)
-                if goal_tree and goal_tree.goals:
-                    plan_text = f"\n## Current Plan\n\n{goal_tree.to_prompt()}"
-                    # 在最后一条 system 消息之后注入
-                    llm_messages.append({"role": "system", "content": plan_text})
-
-                # 调用 LLM
-                result = await self.llm_call(
-                    messages=llm_messages,
-                    model=model,
-                    tools=tool_schemas,
-                    **kwargs
-                )
+                await self.trace_store.add_message(assistant_msg)
+
+            yield assistant_msg
+            sequence += 1
+
+            # 处理工具调用
+            if tool_calls and config.auto_execute_tools:
+                history.append({
+                    "role": "assistant",
+                    "content": response_content,
+                    "tool_calls": tool_calls,
+                })
+
+                for tc in tool_calls:
+                    current_goal_id = goal_tree.current_id if (goal_tree and goal_tree.current_id) else None
+
+                    tool_name = tc["function"]["name"]
+                    tool_args = tc["function"]["arguments"]
+
+                    if isinstance(tool_args, str):
+                        tool_args = json.loads(tool_args) if tool_args.strip() else {}
+                    elif tool_args is None:
+                        tool_args = {}
+
+                    tool_result = await self.tools.execute(
+                        tool_name,
+                        tool_args,
+                        uid=config.uid or "",
+                        context={
+                            "store": self.trace_store,
+                            "trace_id": trace_id,
+                            "goal_id": current_goal_id,
+                            "runner": self,
+                        }
+                    )
 
 
-                response_content = result.get("content", "")
-                tool_calls = result.get("tool_calls")
-                finish_reason = result.get("finish_reason")
-                prompt_tokens = result.get("prompt_tokens", 0)
-                completion_tokens = result.get("completion_tokens", 0)
-                step_tokens = prompt_tokens + completion_tokens
-                step_cost = result.get("cost", 0)
-
-                # 按需自动创建 root goal:LLM 有 tool 调用但未主动创建目标时兜底
-                if goal_tree and not goal_tree.goals and tool_calls:
-                    has_goal_call = any(
-                        tc.get("function", {}).get("name") == "goal"
-                        for tc in tool_calls
+                    tool_msg = Message.create(
+                        trace_id=trace_id,
+                        role="tool",
+                        sequence=sequence,
+                        goal_id=current_goal_id,
+                        tool_call_id=tc["id"],
+                        content={"tool_name": tool_name, "result": tool_result},
                     )
                     )
-                    if not has_goal_call:
-                        root_desc = goal_tree.mission[:200] if len(goal_tree.mission) > 200 else goal_tree.mission
-                        goal_tree.add_goals(
-                            descriptions=[root_desc],
-                            reasons=["系统自动创建:Agent 未显式创建目标"],
-                            parent_id=None
-                        )
-                        goal_tree.focus(goal_tree.goals[0].id)
-                        if self.trace_store:
-                            await self.trace_store.update_goal_tree(trace_id, goal_tree)
-                            await self.trace_store.add_goal(trace_id, goal_tree.goals[0])
-                        logger.info(f"自动创建 root goal: {goal_tree.goals[0].id}")
-
-                # 获取当前 goal_id
-                current_goal_id = goal_tree.current_id if (goal_tree and goal_tree.current_id) else None
-
-                # 记录 assistant Message
-                assistant_msg = Message.create(
-                    trace_id=trace_id,
-                    role="assistant",
-                    sequence=sequence,
-                    goal_id=current_goal_id,
-                    content={"text": response_content, "tool_calls": tool_calls},
-                    prompt_tokens=prompt_tokens,
-                    completion_tokens=completion_tokens,
-                    finish_reason=finish_reason,
-                    cost=step_cost,
-                )
 
 
-                if self.trace_store:
-                    await self.trace_store.add_message(assistant_msg)
-                    # WebSocket 广播由 add_message 内部的 append_event 触发
+                    if self.trace_store:
+                        await self.trace_store.add_message(tool_msg)
 
 
-                yield assistant_msg
-                sequence += 1
+                    yield tool_msg
+                    sequence += 1
 
 
-                # 处理工具调用
-                if tool_calls and auto_execute_tools:
-                    # 添加 assistant 消息到对话历史
-                    messages.append({
-                        "role": "assistant",
-                        "content": response_content,
-                        "tool_calls": tool_calls,
+                    history.append({
+                        "role": "tool",
+                        "tool_call_id": tc["id"],
+                        "name": tool_name,
+                        "content": str(tool_result),
                     })
                     })
 
 
-                    for tc in tool_calls:
-                        # 每次工具执行前重新获取最新的 goal_id(处理并行 tool_calls 的情况)
-                        current_goal_id = goal_tree.current_id if (goal_tree and goal_tree.current_id) else None
-
-                        tool_name = tc["function"]["name"]
-                        tool_args = tc["function"]["arguments"]
-
-                        # 解析参数
-                        if isinstance(tool_args, str):
-                            if tool_args.strip():  # 非空字符串
-                                import json
-                                tool_args = json.loads(tool_args)
-                            else:
-                                tool_args = {}  # 空字符串转换为空字典
-                        elif tool_args is None:
-                            tool_args = {}  # None 转换为空字典
-
-                        # 执行工具(统一处理,传递 context)
-                        tool_result = await self.tools.execute(
-                            tool_name,
-                            tool_args,
-                            uid=uid or "",
-                            context={
-                                "store": self.trace_store,
-                                "trace_id": trace_id,
-                                "goal_id": current_goal_id,
-                                "runner": self,
-                            }
-                        )
-
-                        # 记录 tool Message
-                        tool_msg = Message.create(
-                            trace_id=trace_id,
-                            role="tool",
-                            sequence=sequence,
-                            goal_id=current_goal_id,
-                            tool_call_id=tc["id"],
-                            content={"tool_name": tool_name, "result": tool_result},
-                        )
-
-                        if self.trace_store:
-                            await self.trace_store.add_message(tool_msg)
-
-                        yield tool_msg
-                        sequence += 1
-
-                        # 添加到消息历史
-                        messages.append({
-                            "role": "tool",
-                            "tool_call_id": tc["id"],
-                            "name": tool_name,
-                            "content": str(tool_result),
-                        })
-
-                    continue  # 继续循环
-
-                # 无工具调用,任务完成
+                continue  # 继续循环
+
+            # 无工具调用,任务完成
+            break
+
+        # 完成 Trace
+        if self.trace_store:
+            await self.trace_store.update_trace(
+                trace_id,
+                status="completed",
+                completed_at=datetime.now(),
+            )
+            trace_obj = await self.trace_store.get_trace(trace_id)
+            if trace_obj:
+                yield trace_obj
+
+    # ===== 回溯(Rewind)=====
+
+    async def _rewind(
+        self,
+        trace_id: str,
+        insert_after: int,
+        goal_tree: Optional[GoalTree],
+    ) -> int:
+        """
+        执行回溯:标记 insert_after 之后的 messages 和 goals 为 abandoned
+
+        Returns:
+            下一个可用的 sequence 号
+        """
+        if not self.trace_store:
+            raise ValueError("trace_store required for rewind")
+
+        # 1. 加载所有 messages(含已 abandoned 的)
+        all_messages = await self.trace_store.get_trace_messages(
+            trace_id, include_abandoned=True
+        )
+
+        if not all_messages:
+            return 1
+
+        # 2. 找到安全截断点(确保不截断在 tool_call 和 tool response 之间)
+        cutoff = self._find_safe_cutoff(all_messages, insert_after)
+
+        # 3. 批量标记 messages 为 abandoned
+        abandoned_ids = await self.trace_store.abandon_messages_after(trace_id, cutoff)
+
+        # 4. 处理 Goals
+        if goal_tree:
+            active_messages = [m for m in all_messages if m.sequence <= cutoff]
+            active_goal_ids = {m.goal_id for m in active_messages if m.goal_id}
+
+            for goal in goal_tree.goals:
+                if goal.status == "abandoned":
+                    continue  # 已 abandoned,跳过
+                if goal.status == "completed" and goal.id in active_goal_ids:
+                    continue  # 已完成且有截断点之前的 messages → 保留
+                # 其余全部 abandon(含无 active messages 的 completed goal)
+                goal.status = "abandoned"
+                goal.summary = "回溯导致放弃"
+
+            # 重置 current_id
+            goal_tree._current_id = None
+
+            await self.trace_store.update_goal_tree(trace_id, goal_tree)
+
+        # 5. 记录 rewind 事件
+        abandoned_sequences = [
+            m.sequence for m in all_messages
+            if m.sequence > cutoff and m.status != "abandoned"  # 本次新 abandon 的
+        ]
+        await self.trace_store.append_event(trace_id, "rewind", {
+            "insert_after_sequence": cutoff,
+            "abandoned_message_count": len(abandoned_ids),
+            "abandoned_sequences": abandoned_sequences[:20],  # 只记前 20 条
+        })
+
+        # 6. 返回 next sequence
+        max_seq = max((m.sequence for m in all_messages), default=0)
+        return max_seq + 1
+
+    def _find_safe_cutoff(self, messages: List[Message], insert_after: int) -> int:
+        """
+        找到安全的截断点。
+
+        如果 insert_after 指向一条带 tool_calls 的 assistant message,
+        则自动扩展到其所有对应的 tool response 之后。
+        """
+        cutoff = insert_after
+
+        # 找到 insert_after 对应的 message
+        target_msg = None
+        for msg in messages:
+            if msg.sequence == insert_after:
+                target_msg = msg
                 break
                 break
 
 
-            # 完成 Trace
-            if self.trace_store:
-                trace_obj = await self.trace_store.get_trace(trace_id)
-                if trace_obj:
-                    await self.trace_store.update_trace(
-                        trace_id,
-                        status="completed",
-                        completed_at=datetime.now(),
-                    )
-                    # 重新获取更新后的 Trace 并返回
-                    trace_obj = await self.trace_store.get_trace(trace_id)
-                    if trace_obj:
-                        yield trace_obj
+        if not target_msg:
+            return cutoff
 
 
-        except Exception as e:
-            logger.error(f"Agent run failed: {e}")
+        # 如果是 assistant 且有 tool_calls,找到所有对应的 tool responses
+        if target_msg.role == "assistant":
+            content = target_msg.content
+            if isinstance(content, dict) and content.get("tool_calls"):
+                tool_call_ids = set()
+                for tc in content["tool_calls"]:
+                    if isinstance(tc, dict) and tc.get("id"):
+                        tool_call_ids.add(tc["id"])
 
 
-            if self.trace_store:
-                await self.trace_store.update_trace(
-                    trace_id,
-                    status="failed",
-                    error_message=str(e),
-                    completed_at=datetime.now()
-                )
-                trace_obj = await self.trace_store.get_trace(trace_id)
-                if trace_obj:
-                    yield trace_obj
-            raise
+                # 找到这些 tool_call 对应的 tool messages
+                for msg in messages:
+                    if (msg.role == "tool" and msg.tool_call_id
+                            and msg.tool_call_id in tool_call_ids):
+                        cutoff = max(cutoff, msg.sequence)
+
+        return cutoff
+
+    # ===== 上下文注入 =====
+
+    def _build_context_injection(
+        self,
+        trace: Trace,
+        goal_tree: Optional[GoalTree],
+    ) -> str:
+        """构建周期性注入的上下文(GoalTree + Active Collaborators)"""
+        parts = []
+
+        # GoalTree
+        if goal_tree and goal_tree.goals:
+            parts.append(f"## Current Plan\n\n{goal_tree.to_prompt()}")
+
+        # Active Collaborators
+        collaborators = trace.context.get("collaborators", [])
+        if collaborators:
+            lines = ["## Active Collaborators"]
+            for c in collaborators:
+                status_str = c.get("status", "unknown")
+                ctype = c.get("type", "agent")
+                summary = c.get("summary", "")
+                name = c.get("name", "unnamed")
+                lines.append(f"- {name} [{ctype}, {status_str}]: {summary}")
+            parts.append("\n".join(lines))
+
+        return "\n\n".join(parts)
 
 
     # ===== 辅助方法 =====
     # ===== 辅助方法 =====
 
 
+    def _get_tool_schemas(self, tools: Optional[List[str]]) -> List[Dict]:
+        """获取工具 Schema"""
+        tool_names = BUILTIN_TOOLS.copy()
+        if tools:
+            for tool in tools:
+                if tool not in tool_names:
+                    tool_names.append(tool)
+        return self.tools.get_schemas(tool_names)
+
+    async def _build_system_prompt(self, config: RunConfig) -> Optional[str]:
+        """构建 system prompt(注入 skills 和 experiences)"""
+        system_prompt = config.system_prompt
+
+        # 加载 Skills
+        skills_text = ""
+        skills = load_skills_from_dir(self.skills_dir)
+        if skills:
+            skills_text = self._format_skills(skills)
+
+        # 加载 Experiences
+        experiences_text = ""
+        if config.enable_memory and self.memory_store:
+            scope = f"agent:{config.agent_type}"
+            # 从 messages 提取文本作为查询
+            experiences = await self.memory_store.search_experiences(scope, system_prompt or "")
+            experiences_text = self._format_experiences(experiences)
+
+        # 拼装
+        if system_prompt:
+            if skills_text:
+                system_prompt += f"\n\n## Skills\n{skills_text}"
+            if experiences_text:
+                system_prompt += f"\n\n## 相关经验\n{experiences_text}"
+        elif skills_text or experiences_text:
+            parts = []
+            if skills_text:
+                parts.append(f"## Skills\n{skills_text}")
+            if experiences_text:
+                parts.append(f"## 相关经验\n{experiences_text}")
+            system_prompt = "\n\n".join(parts)
+
+        return system_prompt
+
+    async def _generate_task_name(self, messages: List[Dict]) -> str:
+        """生成任务名称:优先使用 utility_llm,fallback 到文本截取"""
+        # 提取 messages 中的文本内容
+        text_parts = []
+        for msg in messages:
+            content = msg.get("content", "")
+            if isinstance(content, str):
+                text_parts.append(content)
+            elif isinstance(content, list):
+                for part in content:
+                    if isinstance(part, dict) and part.get("type") == "text":
+                        text_parts.append(part.get("text", ""))
+        raw_text = " ".join(text_parts).strip()
+
+        if not raw_text:
+            return "未命名任务"
+
+        # 尝试使用 utility_llm 生成标题
+        if self.utility_llm_call:
+            try:
+                result = await self.utility_llm_call(
+                    messages=[
+                        {"role": "system", "content": "用中文为以下任务生成一个简短标题(10-30字),只输出标题本身:"},
+                        {"role": "user", "content": raw_text[:2000]},
+                    ],
+                    model="gpt-4o-mini",  # 使用便宜模型
+                )
+                title = result.get("content", "").strip()
+                if title and len(title) < 100:
+                    return title
+            except Exception:
+                pass
+
+        # Fallback: 截取前 50 字符
+        return raw_text[:50] + ("..." if len(raw_text) > 50 else "")
+
     def _format_skills(self, skills: List[Skill]) -> str:
     def _format_skills(self, skills: List[Skill]) -> str:
-        """格式化技能为 Prompt 文本"""
         if not skills:
         if not skills:
             return ""
             return ""
         return "\n\n".join(s.to_prompt_text() for s in skills)
         return "\n\n".join(s.to_prompt_text() for s in skills)
 
 
     def _format_experiences(self, experiences: List[Experience]) -> str:
     def _format_experiences(self, experiences: List[Experience]) -> str:
-        """格式化经验为 Prompt 文本"""
         if not experiences:
         if not experiences:
             return ""
             return ""
         return "\n".join(f"- {e.to_prompt_text()}" for e in experiences)
         return "\n".join(f"- {e.to_prompt_text()}" for e in experiences)

+ 159 - 31
agent/tools/builtin/subagent.py

@@ -15,6 +15,12 @@ from agent.trace.goal_models import GoalTree
 from agent.trace.websocket import broadcast_sub_trace_started, broadcast_sub_trace_completed
 from agent.trace.websocket import broadcast_sub_trace_started, broadcast_sub_trace_completed
 
 
 
 
+def _make_run_config(**kwargs):
+    """延迟导入 RunConfig 以避免循环导入"""
+    from agent.core.runner import RunConfig
+    return RunConfig(**kwargs)
+
+
 def _build_explore_prompt(branches: List[str], background: Optional[str]) -> str:
 def _build_explore_prompt(branches: List[str], background: Optional[str]) -> str:
     lines = ["# 探索任务", ""]
     lines = ["# 探索任务", ""]
     if background:
     if background:
@@ -80,6 +86,45 @@ async def _build_evaluate_prompt(
 
 
 # ===== 辅助函数 =====
 # ===== 辅助函数 =====
 
 
+async def _update_collaborator(
+    store, trace_id: str,
+    name: str, sub_trace_id: str,
+    status: str, summary: str = "",
+) -> None:
+    """
+    更新 trace.context["collaborators"] 中的协作者信息。
+
+    如果同名协作者已存在则更新,否则追加。
+    """
+    trace = await store.get_trace(trace_id)
+    if not trace:
+        return
+
+    collaborators = trace.context.get("collaborators", [])
+
+    # 查找已有记录
+    existing = None
+    for c in collaborators:
+        if c.get("trace_id") == sub_trace_id:
+            existing = c
+            break
+
+    if existing:
+        existing["status"] = status
+        if summary:
+            existing["summary"] = summary
+    else:
+        collaborators.append({
+            "name": name,
+            "type": "agent",
+            "trace_id": sub_trace_id,
+            "status": status,
+            "summary": summary,
+        })
+
+    trace.context["collaborators"] = collaborators
+    await store.update_trace(trace_id, context=trace.context)
+
 async def _update_goal_start(
 async def _update_goal_start(
     store, trace_id: str, goal_id: str, mode: str, sub_trace_ids: List[str]
     store, trace_id: str, goal_id: str, mode: str, sub_trace_ids: List[str]
 ) -> None:
 ) -> None:
@@ -240,7 +285,10 @@ async def _handle_explore_mode(
             "error": "explore mode does not support continue_from parameter"
             "error": "explore mode does not support continue_from parameter"
         }
         }
 
 
-    # 2. 创建所有 Sub-Traces
+    # 2. 获取父 Trace 信息(用于继承 uid、model)
+    parent_trace = await store.get_trace(current_trace_id)
+
+    # 3. 创建所有 Sub-Traces
     sub_trace_ids = []
     sub_trace_ids = []
     tasks = []
     tasks = []
 
 
@@ -253,7 +301,6 @@ async def _handle_explore_mode(
         })
         })
 
 
         # 创建 Sub-Trace
         # 创建 Sub-Trace
-        parent_trace = await store.get_trace(current_trace_id)
         sub_trace = Trace(
         sub_trace = Trace(
             trace_id=sub_trace_id,
             trace_id=sub_trace_id,
             mode="agent",
             mode="agent",
@@ -276,25 +323,38 @@ async def _handle_explore_mode(
             "explore", branch
             "explore", branch
         )
         )
 
 
+        # 注册为活跃协作者
+        await _update_collaborator(
+            store, current_trace_id,
+            name=f"explore-{i+1}", sub_trace_id=sub_trace_id,
+            status="running", summary=branch[:80],
+        )
+
         # 创建执行任务
         # 创建执行任务
         task_coro = runner.run_result(
         task_coro = runner.run_result(
-            task=branch,
-            trace_id=sub_trace_id,
-            agent_type="explore",
-            tools=["read_file", "grep_content", "glob_files", "goal"]
+            messages=[{"role": "user", "content": branch}],
+            config=_make_run_config(
+                trace_id=sub_trace_id,
+                agent_type="explore",
+                model=parent_trace.model if parent_trace else "gpt-4o",
+                uid=parent_trace.uid if parent_trace else None,
+                tools=["read_file", "grep_content", "glob_files", "goal"],
+                name=branch,
+            ),
         )
         )
         tasks.append(task_coro)
         tasks.append(task_coro)
 
 
-    # 3. 更新主 Goal 为 in_progress
+    # 4. 更新主 Goal 为 in_progress
     await _update_goal_start(store, current_trace_id, current_goal_id, "explore", sub_trace_ids)
     await _update_goal_start(store, current_trace_id, current_goal_id, "explore", sub_trace_ids)
 
 
-    # 4. 并行执行所有分支
+    # 5. 并行执行所有分支
     results = await asyncio.gather(*tasks, return_exceptions=True)
     results = await asyncio.gather(*tasks, return_exceptions=True)
 
 
-    # 5. 处理结果并广播完成事件
+    # 6. 处理结果并广播完成事件
     processed_results = []
     processed_results = []
 
 
     for i, result in enumerate(results):
     for i, result in enumerate(results):
+        sub_tid = sub_trace_ids[i]["trace_id"]
         if isinstance(result, Exception):
         if isinstance(result, Exception):
             # 异常处理
             # 异常处理
             error_result = {
             error_result = {
@@ -304,22 +364,32 @@ async def _handle_explore_mode(
             }
             }
             processed_results.append(error_result)
             processed_results.append(error_result)
             await broadcast_sub_trace_completed(
             await broadcast_sub_trace_completed(
-                current_trace_id, sub_trace_ids[i]["trace_id"],
-                "failed", str(result), {}
+                current_trace_id, sub_tid, "failed", str(result), {}
+            )
+            await _update_collaborator(
+                store, current_trace_id,
+                name=f"explore-{i+1}", sub_trace_id=sub_tid,
+                status="failed", summary=str(result)[:80],
             )
             )
         else:
         else:
             processed_results.append(result)
             processed_results.append(result)
             await broadcast_sub_trace_completed(
             await broadcast_sub_trace_completed(
-                current_trace_id, sub_trace_ids[i]["trace_id"],
+                current_trace_id, sub_tid,
                 result.get("status", "completed"),
                 result.get("status", "completed"),
                 result.get("summary", ""),
                 result.get("summary", ""),
                 result.get("stats", {})
                 result.get("stats", {})
             )
             )
+            await _update_collaborator(
+                store, current_trace_id,
+                name=f"explore-{i+1}", sub_trace_id=sub_tid,
+                status=result.get("status", "completed"),
+                summary=result.get("summary", "")[:80],
+            )
 
 
-    # 6. 格式化汇总结果
+    # 7. 格式化汇总结果
     aggregated_summary = _format_explore_results(branches, processed_results)
     aggregated_summary = _format_explore_results(branches, processed_results)
 
 
-    # 7. 更新主 Goal 为 completed
+    # 8. 更新主 Goal 为 completed
     overall_status = "completed" if any(
     overall_status = "completed" if any(
         r.get("status") == "completed" for r in processed_results if isinstance(r, dict)
         r.get("status") == "completed" for r in processed_results if isinstance(r, dict)
     ) else "failed"
     ) else "failed"
@@ -329,7 +399,7 @@ async def _handle_explore_mode(
         overall_status, aggregated_summary, sub_trace_ids
         overall_status, aggregated_summary, sub_trace_ids
     )
     )
 
 
-    # 8. 返回结果
+    # 9. 返回结果
     return {
     return {
         "mode": "explore",
         "mode": "explore",
         "status": overall_status,
         "status": overall_status,
@@ -347,7 +417,10 @@ async def _handle_delegate_mode(
 ) -> Dict[str, Any]:
 ) -> Dict[str, Any]:
     """Delegate 模式:委托单个任务"""
     """Delegate 模式:委托单个任务"""
 
 
-    # 1. 处理 continue_from 或创建新 Sub-Trace
+    # 1. 获取父 Trace 信息
+    parent_trace = await store.get_trace(current_trace_id)
+
+    # 2. 处理 continue_from 或创建新 Sub-Trace
     if continue_from:
     if continue_from:
         existing_trace = await store.get_trace(continue_from)
         existing_trace = await store.get_trace(continue_from)
         if not existing_trace:
         if not existing_trace:
@@ -358,7 +431,6 @@ async def _handle_delegate_mode(
         mission = goal_tree.mission if goal_tree else task
         mission = goal_tree.mission if goal_tree else task
         sub_trace_ids = [{"trace_id": sub_trace_id, "mission": mission}]
         sub_trace_ids = [{"trace_id": sub_trace_id, "mission": mission}]
     else:
     else:
-        parent_trace = await store.get_trace(current_trace_id)
         sub_trace_id = generate_sub_trace_id(current_trace_id, "delegate")
         sub_trace_id = generate_sub_trace_id(current_trace_id, "delegate")
         sub_trace = Trace(
         sub_trace = Trace(
             trace_id=sub_trace_id,
             trace_id=sub_trace_id,
@@ -383,17 +455,30 @@ async def _handle_delegate_mode(
             "delegate", task
             "delegate", task
         )
         )
 
 
-    # 2. 更新主 Goal 为 in_progress
+    # 注册为活跃协作者
+    delegate_name = task[:30] if not continue_from else f"delegate-{sub_trace_id[:8]}"
+    await _update_collaborator(
+        store, current_trace_id,
+        name=delegate_name, sub_trace_id=sub_trace_id,
+        status="running", summary=task[:80],
+    )
+
+    # 3. 更新主 Goal 为 in_progress
     await _update_goal_start(store, current_trace_id, current_goal_id, "delegate", sub_trace_ids)
     await _update_goal_start(store, current_trace_id, current_goal_id, "delegate", sub_trace_ids)
 
 
-    # 3. 执行任务
+    # 4. 执行任务
     try:
     try:
         allowed_tools = _get_allowed_tools_for_mode("delegate", context)
         allowed_tools = _get_allowed_tools_for_mode("delegate", context)
         result = await runner.run_result(
         result = await runner.run_result(
-            task=task,
-            trace_id=sub_trace_id,
-            agent_type="delegate",
-            tools=allowed_tools
+            messages=[{"role": "user", "content": task}],
+            config=_make_run_config(
+                trace_id=sub_trace_id,
+                agent_type="delegate",
+                model=parent_trace.model if parent_trace else "gpt-4o",
+                uid=parent_trace.uid if parent_trace else None,
+                tools=allowed_tools,
+                name=task[:50],
+            ),
         )
         )
 
 
         # 4. 广播 sub_trace_completed
         # 4. 广播 sub_trace_completed
@@ -404,6 +489,14 @@ async def _handle_delegate_mode(
             result.get("stats", {})
             result.get("stats", {})
         )
         )
 
 
+        # 更新协作者状态
+        await _update_collaborator(
+            store, current_trace_id,
+            name=delegate_name, sub_trace_id=sub_trace_id,
+            status=result.get("status", "completed"),
+            summary=result.get("summary", "")[:80],
+        )
+
         # 5. 格式化结果
         # 5. 格式化结果
         formatted_summary = _format_delegate_result(result)
         formatted_summary = _format_delegate_result(result)
 
 
@@ -430,6 +523,12 @@ async def _handle_delegate_mode(
             "failed", error_msg, {}
             "failed", error_msg, {}
         )
         )
 
 
+        await _update_collaborator(
+            store, current_trace_id,
+            name=delegate_name, sub_trace_id=sub_trace_id,
+            status="failed", summary=error_msg[:80],
+        )
+
         await _update_goal_complete(
         await _update_goal_complete(
             store, current_trace_id, current_goal_id,
             store, current_trace_id, current_goal_id,
             "failed", f"委托任务失败: {error_msg}", sub_trace_ids
             "failed", f"委托任务失败: {error_msg}", sub_trace_ids
@@ -458,7 +557,10 @@ async def _handle_evaluate_mode(
         evaluation_input, requirements
         evaluation_input, requirements
     )
     )
 
 
-    # 2. 处理 continue_from 或创建新 Sub-Trace
+    # 2. 获取父 Trace 信息
+    parent_trace = await store.get_trace(current_trace_id)
+
+    # 3. 处理 continue_from 或创建新 Sub-Trace
     if continue_from:
     if continue_from:
         existing_trace = await store.get_trace(continue_from)
         existing_trace = await store.get_trace(continue_from)
         if not existing_trace:
         if not existing_trace:
@@ -469,7 +571,6 @@ async def _handle_evaluate_mode(
         mission = goal_tree.mission if goal_tree else task_prompt
         mission = goal_tree.mission if goal_tree else task_prompt
         sub_trace_ids = [{"trace_id": sub_trace_id, "mission": mission}]
         sub_trace_ids = [{"trace_id": sub_trace_id, "mission": mission}]
     else:
     else:
-        parent_trace = await store.get_trace(current_trace_id)
         sub_trace_id = generate_sub_trace_id(current_trace_id, "evaluate")
         sub_trace_id = generate_sub_trace_id(current_trace_id, "evaluate")
         sub_trace = Trace(
         sub_trace = Trace(
             trace_id=sub_trace_id,
             trace_id=sub_trace_id,
@@ -494,17 +595,30 @@ async def _handle_evaluate_mode(
             "evaluate", task_prompt
             "evaluate", task_prompt
         )
         )
 
 
-    # 3. 更新主 Goal 为 in_progress
+    # 4. 更新主 Goal 为 in_progress
     await _update_goal_start(store, current_trace_id, current_goal_id, "evaluate", sub_trace_ids)
     await _update_goal_start(store, current_trace_id, current_goal_id, "evaluate", sub_trace_ids)
 
 
-    # 4. 执行评估
+    # 注册为活跃协作者
+    eval_name = f"评估: {target_goal_id[:20]}"
+    await _update_collaborator(
+        store, current_trace_id,
+        name=eval_name, sub_trace_id=sub_trace_id,
+        status="running", summary=f"评估 Goal {target_goal_id}",
+    )
+
+    # 5. 执行评估
     try:
     try:
         allowed_tools = _get_allowed_tools_for_mode("evaluate", context)
         allowed_tools = _get_allowed_tools_for_mode("evaluate", context)
         result = await runner.run_result(
         result = await runner.run_result(
-            task=task_prompt,
-            trace_id=sub_trace_id,
-            agent_type="evaluate",
-            tools=allowed_tools
+            messages=[{"role": "user", "content": task_prompt}],
+            config=_make_run_config(
+                trace_id=sub_trace_id,
+                agent_type="evaluate",
+                model=parent_trace.model if parent_trace else "gpt-4o",
+                uid=parent_trace.uid if parent_trace else None,
+                tools=allowed_tools,
+                name=f"评估: {target_goal_id}",
+            ),
         )
         )
 
 
         # 5. 广播 sub_trace_completed
         # 5. 广播 sub_trace_completed
@@ -515,6 +629,14 @@ async def _handle_evaluate_mode(
             result.get("stats", {})
             result.get("stats", {})
         )
         )
 
 
+        # 更新协作者状态
+        await _update_collaborator(
+            store, current_trace_id,
+            name=eval_name, sub_trace_id=sub_trace_id,
+            status=result.get("status", "completed"),
+            summary=result.get("summary", "")[:80],
+        )
+
         # 6. 格式化结果
         # 6. 格式化结果
         formatted_summary = _format_evaluate_result(result)
         formatted_summary = _format_evaluate_result(result)
 
 
@@ -541,6 +663,12 @@ async def _handle_evaluate_mode(
             "failed", error_msg, {}
             "failed", error_msg, {}
         )
         )
 
 
+        await _update_collaborator(
+            store, current_trace_id,
+            name=eval_name, sub_trace_id=sub_trace_id,
+            status="failed", summary=error_msg[:80],
+        )
+
         await _update_goal_complete(
         await _update_goal_complete(
             store, current_trace_id, current_goal_id,
             store, current_trace_id, current_goal_id,
             "failed", f"评估任务失败: {error_msg}", sub_trace_ids
             "failed", f"评估任务失败: {error_msg}", sub_trace_ids

+ 5 - 3
agent/trace/api.py

@@ -124,7 +124,8 @@ async def get_trace(trace_id: str):
 @router.get("/{trace_id}/messages", response_model=MessagesResponse)
 @router.get("/{trace_id}/messages", response_model=MessagesResponse)
 async def get_messages(
 async def get_messages(
     trace_id: str,
     trace_id: str,
-    goal_id: Optional[str] = Query(None, description="过滤指定 Goal 的消息。使用 '_init' 查询初始阶段(goal_id=None)的消息")
+    goal_id: Optional[str] = Query(None, description="过滤指定 Goal 的消息。使用 '_init' 查询初始阶段(goal_id=None)的消息"),
+    include_abandoned: bool = Query(False, description="是否包含已 abandoned 的消息(回溯后被标记的)"),
 ):
 ):
     """
     """
     获取 Messages
     获取 Messages
@@ -135,6 +136,7 @@ async def get_messages(
                 - 不指定: 返回所有消息
                 - 不指定: 返回所有消息
                 - "_init" 或 "null": 返回初始阶段(goal_id=None)的消息
                 - "_init" 或 "null": 返回初始阶段(goal_id=None)的消息
                 - 其他值: 返回指定 Goal 的消息
                 - 其他值: 返回指定 Goal 的消息
+        include_abandoned: 是否包含已 abandoned 的消息(默认 False)
     """
     """
     store = get_trace_store()
     store = get_trace_store()
 
 
@@ -146,10 +148,10 @@ async def get_messages(
     # 获取 Messages
     # 获取 Messages
     if goal_id is None:
     if goal_id is None:
         # 没有指定 goal_id,返回所有消息
         # 没有指定 goal_id,返回所有消息
-        messages = await store.get_trace_messages(trace_id)
+        messages = await store.get_trace_messages(trace_id, include_abandoned=include_abandoned)
     elif goal_id in ("_init", "null"):
     elif goal_id in ("_init", "null"):
         # 特殊值:查询初始阶段的消息(goal_id=None)
         # 特殊值:查询初始阶段的消息(goal_id=None)
-        all_messages = await store.get_trace_messages(trace_id)
+        all_messages = await store.get_trace_messages(trace_id, include_abandoned=include_abandoned)
         messages = [m for m in all_messages if m.goal_id is None]
         messages = [m for m in all_messages if m.goal_id is None]
     else:
     else:
         # 查询指定 Goal 的消息
         # 查询指定 Goal 的消息

+ 69 - 0
agent/trace/models.py

@@ -142,6 +142,7 @@ class Message:
     trace_id: str
     trace_id: str
     role: Literal["system", "user", "assistant", "tool"]   # 和 LLM API 一致
     role: Literal["system", "user", "assistant", "tool"]   # 和 LLM API 一致
     sequence: int                        # 全局顺序
     sequence: int                        # 全局顺序
+    status: Literal["active", "abandoned"] = "active"  # 回溯时后续消息标记为 abandoned
     goal_id: Optional[str] = None        # 关联的 Goal 内部 ID(None = 还没有创建 Goal)
     goal_id: Optional[str] = None        # 关联的 Goal 内部 ID(None = 还没有创建 Goal)
     description: str = ""                # 消息描述(系统自动生成)
     description: str = ""                # 消息描述(系统自动生成)
     tool_call_id: Optional[str] = None   # tool 消息关联对应的 tool_call
     tool_call_id: Optional[str] = None   # tool 消息关联对应的 tool_call
@@ -156,6 +157,7 @@ class Message:
     cost: Optional[float] = None
     cost: Optional[float] = None
     duration_ms: Optional[int] = None
     duration_ms: Optional[int] = None
     created_at: datetime = field(default_factory=datetime.now)
     created_at: datetime = field(default_factory=datetime.now)
+    abandoned_at: Optional[datetime] = None  # 回溯标记时间
 
 
     # LLM 响应信息(仅 role="assistant" 时使用)
     # LLM 响应信息(仅 role="assistant" 时使用)
     finish_reason: Optional[str] = None  # stop, length, tool_calls, content_filter 等
     finish_reason: Optional[str] = None  # stop, length, tool_calls, content_filter 等
@@ -181,6 +183,64 @@ class Message:
             cache_read_tokens=self.cache_read_tokens or 0,
             cache_read_tokens=self.cache_read_tokens or 0,
         )
         )
 
 
+    def to_llm_dict(self) -> Dict[str, Any]:
+        """转换为 OpenAI SDK 格式的消息字典(用于 LLM 调用)"""
+        msg: Dict[str, Any] = {"role": self.role}
+
+        if self.role == "tool":
+            # tool message: tool_call_id + name + content(string)
+            if self.tool_call_id:
+                msg["tool_call_id"] = self.tool_call_id
+                msg["name"] = self.description or "unknown"
+            if isinstance(self.content, dict):
+                msg["content"] = str(self.content.get("result", self.content))
+            else:
+                msg["content"] = str(self.content) if self.content is not None else ""
+
+        elif self.role == "assistant":
+            # assistant message: content(text) + tool_calls
+            if isinstance(self.content, dict):
+                msg["content"] = self.content.get("text", "") or ""
+                if self.content.get("tool_calls"):
+                    msg["tool_calls"] = self.content["tool_calls"]
+            elif isinstance(self.content, str):
+                msg["content"] = self.content
+            else:
+                msg["content"] = ""
+
+        else:
+            # system / user message: content 直接传
+            msg["content"] = self.content
+
+        return msg
+
+    @classmethod
+    def from_llm_dict(
+        cls,
+        d: Dict[str, Any],
+        trace_id: str,
+        sequence: int,
+        goal_id: Optional[str] = None,
+    ) -> "Message":
+        """从 OpenAI SDK 格式创建 Message"""
+        role = d["role"]
+
+        if role == "assistant":
+            content = {"text": d.get("content", ""), "tool_calls": d.get("tool_calls")}
+        elif role == "tool":
+            content = {"tool_name": d.get("name", "unknown"), "result": d.get("content", "")}
+        else:
+            content = d.get("content", "")
+
+        return cls.create(
+            trace_id=trace_id,
+            role=role,
+            sequence=sequence,
+            goal_id=goal_id,
+            content=content,
+            tool_call_id=d.get("tool_call_id"),
+        )
+
     @classmethod
     @classmethod
     def from_dict(cls, data: Dict[str, Any]) -> "Message":
     def from_dict(cls, data: Dict[str, Any]) -> "Message":
         """从字典创建 Message(处理向后兼容)"""
         """从字典创建 Message(处理向后兼容)"""
@@ -190,6 +250,12 @@ class Message:
         # 解析 datetime
         # 解析 datetime
         if filtered_data.get("created_at") and isinstance(filtered_data["created_at"], str):
         if filtered_data.get("created_at") and isinstance(filtered_data["created_at"], str):
             filtered_data["created_at"] = datetime.fromisoformat(filtered_data["created_at"])
             filtered_data["created_at"] = datetime.fromisoformat(filtered_data["created_at"])
+        if filtered_data.get("abandoned_at") and isinstance(filtered_data["abandoned_at"], str):
+            filtered_data["abandoned_at"] = datetime.fromisoformat(filtered_data["abandoned_at"])
+
+        # 向后兼容:旧消息没有 status 字段,默认 active
+        if "status" not in filtered_data:
+            filtered_data["status"] = "active"
 
 
         return cls(**filtered_data)
         return cls(**filtered_data)
 
 
@@ -302,6 +368,7 @@ class Message:
             "trace_id": self.trace_id,
             "trace_id": self.trace_id,
             "role": self.role,
             "role": self.role,
             "sequence": self.sequence,
             "sequence": self.sequence,
+            "status": self.status,
             "goal_id": self.goal_id,
             "goal_id": self.goal_id,
             "tool_call_id": self.tool_call_id,
             "tool_call_id": self.tool_call_id,
             "content": self.content,
             "content": self.content,
@@ -315,6 +382,8 @@ class Message:
             "created_at": self.created_at.isoformat() if self.created_at else None,
             "created_at": self.created_at.isoformat() if self.created_at else None,
         }
         }
         # 只添加非空的可选字段
         # 只添加非空的可选字段
+        if self.abandoned_at:
+            result["abandoned_at"] = self.abandoned_at.isoformat()
         if self.reasoning_tokens:
         if self.reasoning_tokens:
             result["reasoning_tokens"] = self.reasoning_tokens
             result["reasoning_tokens"] = self.reasoning_tokens
         if self.cache_creation_tokens:
         if self.cache_creation_tokens:

+ 16 - 1
agent/trace/protocols.py

@@ -120,13 +120,15 @@ class TraceStore(Protocol):
 
 
     async def get_trace_messages(
     async def get_trace_messages(
         self,
         self,
-        trace_id: str
+        trace_id: str,
+        include_abandoned: bool = False
     ) -> List[Message]:
     ) -> List[Message]:
         """
         """
         获取 Trace 的所有 Messages(按 sequence 排序)
         获取 Trace 的所有 Messages(按 sequence 排序)
 
 
         Args:
         Args:
             trace_id: Trace ID
             trace_id: Trace ID
+            include_abandoned: 是否包含已 abandoned 的消息(默认 False,仅返回 active)
 
 
         Returns:
         Returns:
             Message 列表
             Message 列表
@@ -160,6 +162,19 @@ class TraceStore(Protocol):
         """
         """
         ...
         ...
 
 
+    async def abandon_messages_after(self, trace_id: str, cutoff_sequence: int) -> List[str]:
+        """
+        将 cutoff_sequence 之后的所有 active 消息标记为 abandoned(回溯专用)
+
+        Args:
+            trace_id: Trace ID
+            cutoff_sequence: 截断点(该 sequence 及之前的消息保留)
+
+        Returns:
+            被标记为 abandoned 的 message_id 列表
+        """
+        ...
+
     # ===== 事件流操作(用于 WebSocket 断线续传)=====
     # ===== 事件流操作(用于 WebSocket 断线续传)=====
 
 
     async def get_events(
     async def get_events(

+ 238 - 0
agent/trace/run_api.py

@@ -0,0 +1,238 @@
+"""
+Trace 操作 API — 新建 / 续跑 / 回溯
+
+提供 POST 端点触发 Agent 执行。需要通过 set_runner() 注入 AgentRunner 实例。
+执行在后台异步进行,客户端通过 WebSocket (/api/traces/{trace_id}/watch) 监听实时更新。
+"""
+
+import asyncio
+import logging
+from typing import Any, Dict, List, Optional
+
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel, Field
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/api/traces", tags=["run"])
+
+
+# ===== 全局 Runner(由 api_server.py 注入)=====
+
+_runner = None
+
+
+def set_runner(runner):
+    """注入 AgentRunner 实例"""
+    global _runner
+    _runner = runner
+
+
+def _get_runner():
+    if _runner is None:
+        raise HTTPException(
+            status_code=503,
+            detail="AgentRunner not configured. Server is in read-only mode.",
+        )
+    return _runner
+
+
+# ===== Request / Response 模型 =====
+
+
+class RunRequest(BaseModel):
+    """新建执行"""
+    messages: List[Dict[str, Any]] = Field(..., description="OpenAI SDK 格式的输入消息")
+    model: str = Field("gpt-4o", description="模型名称")
+    temperature: float = Field(0.3)
+    max_iterations: int = Field(200)
+    system_prompt: Optional[str] = Field(None, description="自定义 system prompt(None = 从 skills 自动构建)")
+    tools: Optional[List[str]] = Field(None, description="工具白名单(None = 全部)")
+    name: Optional[str] = Field(None, description="任务名称(None = 自动生成)")
+    uid: Optional[str] = Field(None)
+
+
+class ContinueRequest(BaseModel):
+    """续跑"""
+    messages: List[Dict[str, Any]] = Field(
+        default=[{"role": "user", "content": "继续"}],
+        description="追加到末尾的新消息",
+    )
+
+
+class RewindRequest(BaseModel):
+    """回溯重放"""
+    insert_after: int = Field(..., description="截断点的 message sequence(保留该 sequence 及之前的消息)")
+    messages: List[Dict[str, Any]] = Field(
+        default=[{"role": "user", "content": "继续"}],
+        description="在截断点之后插入的新消息",
+    )
+
+
+class RunResponse(BaseModel):
+    """操作响应(立即返回,后台执行)"""
+    trace_id: str
+    mode: str  # "new" | "continue" | "rewind"
+    status: str = "started"
+    message: str = ""
+
+
+# ===== 后台执行 =====
+
+_running_tasks: Dict[str, asyncio.Task] = {}
+
+
+async def _run_in_background(trace_id: str, messages: List[Dict], config):
+    """后台执行 agent,消费 run() 的所有 yield"""
+    runner = _get_runner()
+    try:
+        async for _item in runner.run(messages=messages, config=config):
+            pass  # WebSocket 广播由 runner 内部的 store 事件驱动
+    except Exception as e:
+        logger.error(f"Background run failed for {trace_id}: {e}")
+    finally:
+        _running_tasks.pop(trace_id, None)
+
+
+async def _run_with_trace_signal(
+    messages: List[Dict], config, trace_id_future: asyncio.Future,
+):
+    """后台执行 agent,通过 Future 将 trace_id 传回给等待的 endpoint"""
+    from agent.trace.models import Trace
+
+    runner = _get_runner()
+    trace_id: Optional[str] = None
+    try:
+        async for item in runner.run(messages=messages, config=config):
+            if isinstance(item, Trace) and not trace_id_future.done():
+                trace_id = item.trace_id
+                trace_id_future.set_result(trace_id)
+    except Exception as e:
+        if not trace_id_future.done():
+            trace_id_future.set_exception(e)
+        logger.error(f"Background run failed: {e}")
+    finally:
+        if trace_id:
+            _running_tasks.pop(trace_id, None)
+
+
+# ===== 路由 =====
+
+
+@router.post("", response_model=RunResponse)
+async def create_and_run(req: RunRequest):
+    """
+    新建 Trace 并开始执行
+
+    立即返回 trace_id,后台异步执行。
+    通过 WebSocket /api/traces/{trace_id}/watch 监听实时更新。
+    """
+    from agent.core.runner import RunConfig
+
+    _get_runner()  # 验证 Runner 已配置
+
+    config = RunConfig(
+        model=req.model,
+        temperature=req.temperature,
+        max_iterations=req.max_iterations,
+        system_prompt=req.system_prompt,
+        tools=req.tools,
+        name=req.name,
+        uid=req.uid,
+    )
+
+    # 启动后台执行,通过 Future 等待 trace_id(Phase 1 完成后即返回)
+    trace_id_future: asyncio.Future[str] = asyncio.get_running_loop().create_future()
+    task = asyncio.create_task(
+        _run_with_trace_signal(req.messages, config, trace_id_future)
+    )
+
+    trace_id = await trace_id_future
+    _running_tasks[trace_id] = task
+
+    return RunResponse(
+        trace_id=trace_id,
+        mode="new",
+        status="started",
+        message=f"Execution started. Watch via WebSocket: /api/traces/{trace_id}/watch",
+    )
+
+
+@router.post("/{trace_id}/continue", response_model=RunResponse)
+async def continue_trace(trace_id: str, req: ContinueRequest):
+    """
+    续跑已有 Trace
+
+    在已有 trace 末尾追加消息,继续执行。
+    """
+    from agent.core.runner import RunConfig
+
+    runner = _get_runner()
+
+    # 验证 trace 存在
+    if runner.trace_store:
+        trace = await runner.trace_store.get_trace(trace_id)
+        if not trace:
+            raise HTTPException(status_code=404, detail=f"Trace not found: {trace_id}")
+
+    # 检查是否已在运行
+    if trace_id in _running_tasks and not _running_tasks[trace_id].done():
+        raise HTTPException(status_code=409, detail="Trace is already running")
+
+    config = RunConfig(trace_id=trace_id)
+    task = asyncio.create_task(_run_in_background(trace_id, req.messages, config))
+    _running_tasks[trace_id] = task
+
+    return RunResponse(
+        trace_id=trace_id,
+        mode="continue",
+        status="started",
+        message=f"Continue started. Watch via WebSocket: /api/traces/{trace_id}/watch",
+    )
+
+
+@router.post("/{trace_id}/rewind", response_model=RunResponse)
+async def rewind_trace(trace_id: str, req: RewindRequest):
+    """
+    回溯重放
+
+    从指定 sequence 处截断,abandon 后续消息和 goals,插入新消息重新执行。
+    insert_after 的值是 message 的 sequence 号,可通过 GET /api/traces/{trace_id}/messages 查看。
+    如果指定的 sequence 是一条带 tool_calls 的 assistant 消息,系统会自动扩展截断点到其所有 tool response 之后。
+    """
+    from agent.core.runner import RunConfig
+
+    runner = _get_runner()
+
+    # 验证 trace 存在
+    if runner.trace_store:
+        trace = await runner.trace_store.get_trace(trace_id)
+        if not trace:
+            raise HTTPException(status_code=404, detail=f"Trace not found: {trace_id}")
+
+    # 检查是否已在运行
+    if trace_id in _running_tasks and not _running_tasks[trace_id].done():
+        raise HTTPException(status_code=409, detail="Trace is already running")
+
+    config = RunConfig(trace_id=trace_id, insert_after=req.insert_after)
+    task = asyncio.create_task(_run_in_background(trace_id, req.messages, config))
+    _running_tasks[trace_id] = task
+
+    return RunResponse(
+        trace_id=trace_id,
+        mode="rewind",
+        status="started",
+        message=f"Rewind to sequence {req.insert_after} started. Watch via WebSocket: /api/traces/{trace_id}/watch",
+    )
+
+
+@router.get("/running", tags=["run"])
+async def list_running():
+    """列出正在运行的 Trace"""
+    running = []
+    for tid, task in list(_running_tasks.items()):
+        if task.done():
+            _running_tasks.pop(tid, None)
+        else:
+            running.append(tid)
+    return {"running": running}

+ 29 - 3
agent/trace/store.py

@@ -468,9 +468,10 @@ class FileSystemTraceStore:
 
 
     async def get_trace_messages(
     async def get_trace_messages(
         self,
         self,
-        trace_id: str
+        trace_id: str,
+        include_abandoned: bool = False
     ) -> List[Message]:
     ) -> List[Message]:
-        """获取 Trace 的所有 Messages"""
+        """获取 Trace 的 Messages(默认只返回 active 的)"""
         messages_dir = self._get_messages_dir(trace_id)
         messages_dir = self._get_messages_dir(trace_id)
 
 
         if not messages_dir.exists():
         if not messages_dir.exists():
@@ -480,7 +481,9 @@ class FileSystemTraceStore:
         for message_file in messages_dir.glob("*.json"):
         for message_file in messages_dir.glob("*.json"):
             try:
             try:
                 data = json.loads(message_file.read_text())
                 data = json.loads(message_file.read_text())
-                messages.append(Message.from_dict(data))
+                msg = Message.from_dict(data)
+                if include_abandoned or msg.status == "active":
+                    messages.append(msg)
             except Exception:
             except Exception:
                 continue
                 continue
 
 
@@ -513,6 +516,29 @@ class FileSystemTraceStore:
         message_file = messages_dir / f"{message_id}.json"
         message_file = messages_dir / f"{message_id}.json"
         message_file.write_text(json.dumps(message.to_dict(), indent=2, ensure_ascii=False))
         message_file.write_text(json.dumps(message.to_dict(), indent=2, ensure_ascii=False))
 
 
+    async def abandon_messages_after(self, trace_id: str, cutoff_sequence: int) -> List[str]:
+        """
+        将 sequence > cutoff_sequence 的 active messages 标记为 abandoned。
+        返回被 abandon 的 message_id 列表。
+        """
+        all_messages = await self.get_trace_messages(trace_id, include_abandoned=True)
+        abandoned_ids = []
+        now = datetime.now()
+
+        for msg in all_messages:
+            if msg.sequence > cutoff_sequence and msg.status == "active":
+                msg.status = "abandoned"
+                msg.abandoned_at = now
+                # 直接写回文件
+                message_file = self._get_messages_dir(trace_id) / f"{msg.message_id}.json"
+                message_file.write_text(
+                    json.dumps(msg.to_dict(), indent=2, ensure_ascii=False),
+                    encoding="utf-8"
+                )
+                abandoned_ids.append(msg.message_id)
+
+        return abandoned_ids
+
     # ===== 事件流操作(用于 WebSocket 断线续传)=====
     # ===== 事件流操作(用于 WebSocket 断线续传)=====
 
 
     async def get_events(
     async def get_events(

+ 27 - 5
api_server.py

@@ -1,7 +1,10 @@
 """
 """
 API Server - FastAPI 应用入口
 API Server - FastAPI 应用入口
 
 
-聚合所有模块的 API 路由(step_tree、未来的 memory 等)
+聚合所有模块的 API 路由:
+- GET  /api/traces — 查询(trace/api.py)
+- POST /api/traces — 执行(trace/run_api.py,需配置 Runner)
+- WS   /api/traces/{id}/watch — 实时推送(trace/websocket.py)
 """
 """
 
 
 import logging
 import logging
@@ -13,6 +16,7 @@ import uvicorn
 
 
 from agent.trace import FileSystemTraceStore
 from agent.trace import FileSystemTraceStore
 from agent.trace.api import router as api_router, set_trace_store as set_api_trace_store
 from agent.trace.api import router as api_router, set_trace_store as set_api_trace_store
+from agent.trace.run_api import router as run_router, set_runner
 from agent.trace.websocket import router as ws_router, set_trace_store as set_ws_trace_store
 from agent.trace.websocket import router as ws_router, set_trace_store as set_ws_trace_store
 
 
 
 
@@ -28,8 +32,8 @@ logger = logging.getLogger(__name__)
 # ===== FastAPI 应用 =====
 # ===== FastAPI 应用 =====
 
 
 app = FastAPI(
 app = FastAPI(
-    title="Agent Step Tree API",
-    description="Step 树可视化 API",
+    title="Agent API",
+    description="Agent 查询 + 执行 API",
     version="1.0.0"
     version="1.0.0"
 )
 )
 
 
@@ -53,12 +57,30 @@ set_api_trace_store(trace_store)
 set_ws_trace_store(trace_store)
 set_ws_trace_store(trace_store)
 
 
 
 
+# ===== 可选:配置 Runner(启用执行 API)=====
+
+# 如需启用 POST /api/traces(新建/续跑/回溯),取消以下注释并配置 LLM:
+#
+# from agent.core.runner import AgentRunner
+# from agent.llm import create_openrouter_llm_call
+#
+# runner = AgentRunner(
+#     trace_store=trace_store,
+#     llm_call=create_openrouter_llm_call(model="google/gemini-2.5-flash"),
+# )
+# set_runner(runner)
+
+
 # ===== 注册路由 =====
 # ===== 注册路由 =====
 
 
-# Step 树 RESTful API
+# Trace 执行 API(POST + GET /running,需配置 Runner)
+# 注意:run_router 必须在 api_router 之前注册,否则 GET /running 会被 /{trace_id} 捕获
+app.include_router(run_router)
+
+# Trace 查询 API(GET)
 app.include_router(api_router)
 app.include_router(api_router)
 
 
-# Step 树 WebSocket
+# Trace WebSocket(实时推送)
 app.include_router(ws_router)
 app.include_router(ws_router)
 
 
 @app.websocket("/ws_ping")
 @app.websocket("/ws_ping")

+ 226 - 73
docs/README.md

@@ -108,83 +108,172 @@ agent/
 
 
 ## 核心流程:Agent Loop
 ## 核心流程:Agent Loop
 
 
+### 参数分层
+
+```
+Layer 1: Infrastructure(基础设施,AgentRunner 构造时设置)
+  trace_store, memory_store, tool_registry, llm_call, skills_dir, utility_llm_call
+
+Layer 2: RunConfig(运行参数,每次 run 时指定)
+  ├─ 模型层:model, temperature, max_iterations, tools
+  └─ 框架层:trace_id, agent_type, uid, system_prompt, parent_trace_id, ...
+
+Layer 3: Messages(任务消息,OpenAI SDK 格式 List[Dict])
+  [{"role": "user", "content": "分析这张图的构图"}]
+```
+
+### RunConfig
+
 ```python
 ```python
-async def run(task: str, agent_type: str = "default") -> AsyncIterator[Union[Trace, Message]]:
-    # 1. 创建 Trace
-    trace = Trace.create(
-        mode="agent",
-        task=task,
-        agent_type=agent_type,
-        model=config.model
-    )
-    await store.create_trace(trace)
+@dataclass
+class RunConfig:
+    # 模型层参数
+    model: str = "gpt-4o"
+    temperature: float = 0.3
+    max_iterations: int = 200
+    tools: Optional[List[str]] = None          # None = 全部内置工具
+
+    # 框架层参数
+    agent_type: str = "default"
+    uid: Optional[str] = None
+    system_prompt: Optional[str] = None        # None = 从 skills 自动构建
+    enable_memory: bool = True
+    auto_execute_tools: bool = True
+    name: Optional[str] = None                 # 显示名称(空则由 utility_llm 自动生成)
+
+    # Trace 控制
+    trace_id: Optional[str] = None             # None = 新建
+    parent_trace_id: Optional[str] = None      # 子 Agent 专用
+    parent_goal_id: Optional[str] = None
+
+    # 续跑控制
+    insert_after: Optional[int] = None         # 回溯插入点(message sequence)
+```
+
+**实现**:`agent/core/runner.py:RunConfig`
+
+### 三种运行模式
+
+通过 RunConfig 参数自然区分,统一入口 `run(messages, config)`:
+
+| 模式 | trace_id | insert_after | messages 含义 |
+|------|----------|-------------|--------------|
+| 新建 | None | - | 初始任务消息 |
+| 续跑 | 已有 ID | None | 追加到末尾的新消息 |
+| 回溯 | 已有 ID | 指定 sequence | 在插入点之后追加的新消息 |
+
+### 执行流程
+
+```python
+async def run(messages: List[Dict], config: RunConfig = None) -> AsyncIterator[Union[Trace, Message]]:
+    # Phase 1: PREPARE TRACE
+    #   无 trace_id → 创建新 Trace(生成 name,初始化 GoalTree)
+    #   有 trace_id + 无 insert_after → 加载已有 Trace,状态置为 running
+    #   有 trace_id + 有 insert_after → 加载 Trace,执行 rewind(标记后续 msgs/goals 为 abandoned)
+    trace = await _prepare_trace(config)
     yield trace
     yield trace
 
 
-    # 2. 加载 Skills,构建 system prompt
-    skills = load_skills_from_dir(skills_dir)
-    system_prompt = build_system_prompt(skills)
-
-    # 3. 初始化
-    messages = [{"role": "user", "content": task}]
-
-    # 4. ReAct 循环
-    for step in range(max_iterations):
-        # 注入当前计划(如果有 goals)
-        if goal_tree.goals:
-            inject_plan(goal_tree.to_prompt())
-
-        # 调用 LLM
-        response = await llm.chat(
-            messages=messages,
-            system=system_prompt,
-            tools=tool_registry.to_schema()
-        )
-
-        # 按需自动创建 root goal:LLM 有 tool 调用但未主动创建目标时兜底
-        if not goal_tree.goals and response.tool_calls:
-            if "goal" not in [tc.name for tc in response.tool_calls]:
-                goal_tree.add_goals([mission[:200]])
-                goal_tree.focus(goal_tree.goals[0].id)
-
-        # 记录 assistant Message(goal_id = goal_tree.current_id)
-        await store.add_message(Message.create(
-            trace_id=trace.trace_id,
-            role="assistant",
-            sequence=next_seq,
-            goal_id=goal_tree.current_id,
-            content=response
-        ))
-        yield assistant_msg
-
-        # 没有工具调用,完成
-        if not response.tool_calls:
-            break
-
-        # 执行工具,记录 tool Message
-        for tool_call in response.tool_calls:
-            result = await execute_tool(tool_call)
-            await store.add_message(Message.create(
-                trace_id=trace.trace_id,
-                role="tool",
-                sequence=next_seq,
-                goal_id=goal_tree.current_id,
-                content=result
-            ))
-            yield tool_msg
-
-    # 5. 完成
+    # Phase 2: BUILD HISTORY
+    #   加载已有 active messages(续跑/回溯场景)
+    #   构建 system prompt(新建时注入 skills/experiences;续跑时复用已有)
+    #   追加 input messages
+    history, sequence = await _build_history(trace, messages, config)
+
+    # Phase 3: AGENT LOOP
+    for iteration in range(config.max_iterations):
+        # 周期性注入 GoalTree + Active Collaborators(每 10 轮)
+        if iteration % 10 == 0:
+            inject_context(goal_tree, collaborators)
+
+        response = await llm_call(messages=history, model=config.model, tools=tool_schemas)
+
+        # 按需自动创建 root goal(兜底)
+        # 记录 assistant Message
+        # 执行工具,记录 tool Messages
+        # 无 tool_calls 则 break
+
+    # Phase 4: COMPLETE
+    #   更新 Trace 状态 (completed/failed)
     trace.status = "completed"
     trace.status = "completed"
     yield trace
     yield trace
 ```
 ```
 
 
 **实现**:`agent/core/runner.py:AgentRunner`
 **实现**:`agent/core/runner.py:AgentRunner`
 
 
-### Runner 两种调用形态
+### 回溯(Rewind)
+
+回溯通过 `RunConfig(trace_id=..., insert_after=N)` 触发,在 Phase 1 中执行:
+
+1. **验证插入点**:确保不截断在 assistant(tool_calls) 和 tool response 之间
+2. **标记 Messages**:sequence > cutoff 的 messages 标记 `status="abandoned"`
+3. **处理 Goals**:已完成且消息均在插入点之前的保留,其余 abandon
+4. **记录事件**:events.jsonl 追加 `rewind` 事件
+5. **更新 Trace**:status 改回 running
+
+新消息的 sequence 从 `max(all_sequences) + 1` 开始,不复用被 abandon 的序号。
+
+### 调用接口
+
+三种模式共享同一入口 `run(messages, config)`:
+
+```python
+# 新建
+async for item in runner.run(
+    messages=[{"role": "user", "content": "分析项目架构"}],
+    config=RunConfig(model="gpt-4o"),
+):
+    ...
+
+# 续跑:在已有 trace 末尾追加消息继续执行
+async for item in runner.run(
+    messages=[{"role": "user", "content": "继续"}],
+    config=RunConfig(trace_id="existing-trace-id"),
+):
+    ...
+
+# 回溯:从指定 sequence 处切断,插入新消息重新执行
+# insert_after=5 表示保留 sequence ≤ 5 的消息,abandon 之后的,从此处开始
+async for item in runner.run(
+    messages=[{"role": "user", "content": "换一个方案试试"}],
+    config=RunConfig(trace_id="existing-trace-id", insert_after=5),
+):
+    ...
+```
+
+`insert_after` 的值是 message 的 `sequence` 号,可通过 `GET /api/traces/{trace_id}/messages` 查看。如果指定的 sequence 是一条带 `tool_calls` 的 assistant 消息,系统会自动将截断点扩展到其所有对应的 tool response 之后(安全截断)。
+
+- `run(messages, config)`:**核心方法**,流式返回 `AsyncIterator[Union[Trace, Message]]`
+- `run_result(messages, config)`:便利方法,内部消费 `run()`,返回结构化结果。主要用于 `subagent` 工具内部
 
 
-- `run(...)`:流式事件模式,返回 `AsyncIterator[Union[Trace, Message]]`
-- `run_result(...)`:结果模式,内部消费 `run(...)`,返回结构化结果(`status/summary/trace_id/stats/error`)
+### REST API
 
 
-`subagent` 工具默认使用 `run_result(...)`,并通过 `trace_id` 复用已创建或继承的子 Trace。
+操作型端点(需在 `api_server.py` 中配置 Runner)。执行在后台异步进行,通过 WebSocket 监听进度。
+
+| 方法 | 路径 | 说明 |
+|------|------|------|
+| POST | `/api/traces` | 新建 Trace 并执行 |
+| POST | `/api/traces/{id}/continue` | 续跑 |
+| POST | `/api/traces/{id}/rewind` | 回溯重放 |
+| GET  | `/api/traces/running` | 列出正在运行的 Trace |
+
+```bash
+# 新建
+curl -X POST http://localhost:8000/api/traces \
+  -H "Content-Type: application/json" \
+  -d '{"messages": [{"role": "user", "content": "分析项目架构"}], "model": "gpt-4o"}'
+
+# 续跑
+curl -X POST http://localhost:8000/api/traces/{trace_id}/continue \
+  -d '{"messages": [{"role": "user", "content": "继续深入分析"}]}'
+
+# 回溯:从 sequence 5 处截断,插入新消息重新执行
+curl -X POST http://localhost:8000/api/traces/{trace_id}/rewind \
+  -d '{"insert_after": 5, "messages": [{"role": "user", "content": "换一个方案"}]}'
+```
+
+响应立即返回 `{"trace_id": "...", "mode": "rewind", "status": "started"}`,通过 `WS /api/traces/{trace_id}/watch` 监听实时事件。
+
+**实现**:`agent/trace/run_api.py`
 
 
 ---
 ---
 
 
@@ -231,7 +320,7 @@ class Trace:
     model: Optional[str] = None              # 默认模型
     model: Optional[str] = None              # 默认模型
     tools: Optional[List[Dict]] = None       # 工具定义(OpenAI 格式)
     tools: Optional[List[Dict]] = None       # 工具定义(OpenAI 格式)
     llm_params: Dict[str, Any] = {}          # LLM 参数(temperature 等)
     llm_params: Dict[str, Any] = {}          # LLM 参数(temperature 等)
-    context: Dict[str, Any] = {}             # 其他元数据
+    context: Dict[str, Any] = {}             # 元数据(含 collaborators 列表)
 
 
     # 当前焦点
     # 当前焦点
     current_goal_id: Optional[str] = None
     current_goal_id: Optional[str] = None
@@ -306,6 +395,7 @@ class Message:
     trace_id: str
     trace_id: str
     role: Literal["system", "user", "assistant", "tool"]
     role: Literal["system", "user", "assistant", "tool"]
     sequence: int                            # 全局顺序
     sequence: int                            # 全局顺序
+    status: Literal["active", "abandoned"] = "active"  # 回溯时后续消息标记为 abandoned
     goal_id: Optional[str] = None            # 关联的 Goal ID(初始消息为 None,系统会按需自动创建 root goal 兜底)
     goal_id: Optional[str] = None            # 关联的 Goal ID(初始消息为 None,系统会按需自动创建 root goal 兜底)
     description: str = ""                    # 系统自动生成的摘要
     description: str = ""                    # 系统自动生成的摘要
     tool_call_id: Optional[str] = None
     tool_call_id: Optional[str] = None
@@ -321,8 +411,15 @@ class Message:
     finish_reason: Optional[str] = None
     finish_reason: Optional[str] = None
 
 
     created_at: datetime
     created_at: datetime
+    abandoned_at: Optional[datetime] = None  # 回溯标记时间
 ```
 ```
 
 
+Message 提供格式转换方法:
+- `to_llm_dict()` → OpenAI 格式 Dict(用于 LLM 调用)
+- `from_llm_dict(d, trace_id, sequence, goal_id)` → 从 OpenAI 格式创建 Message
+
+加载 messages 时,默认只返回 `status="active"` 的消息。
+
 **实现**:`agent/trace/models.py`
 **实现**:`agent/trace/models.py`
 
 
 ---
 ---
@@ -371,7 +468,9 @@ AGENT_PRESETS = {
 
 
 ## 子 Trace 机制
 ## 子 Trace 机制
 
 
-通过 `subagent` 工具创建子 Agent 执行任务,支持三种模式。
+通过 `subagent` 工具创建子 Agent 执行任务,支持三种模式。子 Agent 通过 `name` 参数命名,便于后续引用和续跑。
+
+`subagent` 工具负责创建 Sub-Trace 和初始化 GoalTree(因为需要设置自定义 context 元数据和命名规则),创建完成后将 `trace_id` 传给 `RunConfig`,由 Runner 接管后续执行。工具同时维护父 Trace 的 `context["collaborators"]` 列表。
 
 
 ### explore 模式
 ### explore 模式
 
 
@@ -388,7 +487,7 @@ AGENT_PRESETS = {
 
 
 - 创建单个 Sub-Trace
 - 创建单个 Sub-Trace
 - 完整工具权限(除 subagent 外,防止递归)
 - 完整工具权限(除 subagent 外,防止递归)
-- 支持 `continue_from` 参数继续执行
+- 支持通过 `name` 续跑已有子 Agent:`subagent(name="coder", task="继续")`
 
 
 ### evaluate 模式
 ### evaluate 模式
 
 
@@ -407,7 +506,55 @@ AGENT_PRESETS = {
 创建阻塞式 Trace,等待人类通过 IM/邮件等渠道回复。
 创建阻塞式 Trace,等待人类通过 IM/邮件等渠道回复。
 
 
 **注意**:此功能规划中,暂未实现。
 **注意**:此功能规划中,暂未实现。
-**注意**:此功能规划中,暂未实现。
+
+---
+
+## Active Collaborators(活跃协作者)
+
+任务执行中与模型密切协作的实体(子 Agent 或人类),按 **与当前任务的关系** 分类,而非按 human/agent 分类:
+
+| | 持久存在(外部可查) | 任务内活跃(需要注入) |
+|---|---|---|
+| Agent | 专用 Agent(代码审查等) | 当前任务创建的子 Agent |
+| Human | 飞书通讯录 | 当前任务中正在对接的人 |
+
+### 数据模型
+
+活跃协作者存储在 `trace.context["collaborators"]`:
+
+```python
+{
+    "name": "researcher",            # 名称(模型可见)
+    "type": "agent",                 # agent | human
+    "trace_id": "abc-@delegate-001", # trace_id(agent 场景)
+    "status": "completed",           # running | waiting | completed | failed
+    "summary": "方案A最优",          # 最近状态摘要
+}
+```
+
+### 注入方式
+
+与 GoalTree 一同周期性注入(每 10 轮),渲染为 Markdown:
+
+```markdown
+## Active Collaborators
+- researcher [agent, completed]: 方案A最优
+- 谭景玉 [human, waiting]: 已发送方案确认,等待回复
+- coder [agent, running]: 正在实现特征提取模块
+```
+
+列表为空时不注入。
+
+### 维护
+
+各工具负责更新 collaborators 列表(通过 `context["store"]` 写入 trace.context):
+- `subagent` 工具:创建/续跑子 Agent 时更新
+- `feishu` 工具:发送消息/收到回复时更新
+- Runner 只负责读取和注入
+
+**持久联系人/Agent**:通过工具按需查询(如 `feishu_get_contact_list`),不随任务注入。
+
+**实现**:`agent/core/runner.py:AgentRunner._build_context_injection`, `agent/tools/builtin/subagent.py`
 
 
 ---
 ---
 
 
@@ -555,8 +702,10 @@ class TraceStore(Protocol):
     async def get_trace(self, trace_id: str) -> Trace: ...
     async def get_trace(self, trace_id: str) -> Trace: ...
     async def update_trace(self, trace_id: str, **updates) -> None: ...
     async def update_trace(self, trace_id: str, **updates) -> None: ...
     async def add_message(self, message: Message) -> None: ...
     async def add_message(self, message: Message) -> None: ...
-    async def get_messages(self, trace_id: str) -> List[Message]: ...
+    async def get_trace_messages(self, trace_id: str, include_abandoned: bool = False) -> List[Message]: ...
     async def get_messages_by_goal(self, trace_id: str, goal_id: str) -> List[Message]: ...
     async def get_messages_by_goal(self, trace_id: str, goal_id: str) -> List[Message]: ...
+    async def abandon_messages_after(self, trace_id: str, cutoff_sequence: int) -> List[str]: ...
+    async def append_event(self, trace_id: str, event_type: str, payload: Dict) -> int: ...
 ```
 ```
 
 
 **实现**:
 **实现**:
@@ -582,7 +731,7 @@ class TraceStore(Protocol):
 **events.jsonl 说明**:
 **events.jsonl 说明**:
 - 记录 Trace 执行过程中的关键事件
 - 记录 Trace 执行过程中的关键事件
 - 每行一个 JSON 对象,包含 event_id、event 类型、时间戳等
 - 每行一个 JSON 对象,包含 event_id、event 类型、时间戳等
-- 主要事件类型:goal_added, goal_updated, sub_trace_started, sub_trace_completed
+- 主要事件类型:goal_added, goal_updated, sub_trace_started, sub_trace_completed, rewind
 - 用于实时监控和历史回放
 - 用于实时监控和历史回放
 
 
 **Sub-Trace 目录命名**:
 **Sub-Trace 目录命名**:
@@ -601,7 +750,11 @@ class TraceStore(Protocol):
   "model": "google/gemini-2.5-flash",
   "model": "google/gemini-2.5-flash",
   "tools": [...],
   "tools": [...],
   "llm_params": {"temperature": 0.3},
   "llm_params": {"temperature": 0.3},
-  "context": {},
+  "context": {
+    "collaborators": [
+      {"name": "researcher", "type": "agent", "trace_id": "...", "status": "completed", "summary": "方案A最优"}
+    ]
+  },
   "current_goal_id": "3"
   "current_goal_id": "3"
 }
 }
 ```
 ```

+ 76 - 14
docs/decisions.md

@@ -463,18 +463,6 @@ Step 工具等核心功能如何让 Agent 知道?
 
 
 ---
 ---
 
 
-## 总结
-
-这些设计决策的核心原则:
-
-1. **灵活性优先**:大多数特性都是可选的,保持系统简洁
-2. **Token 效率**:通过动态加载、双层记忆等机制优化 context 使用
-3. **可扩展性**:通过 Protocol 定义接口,便于后期切换实现
-4. **安全性**:敏感数据占位符、域名匹配等机制保护隐私
-5. **可观测性**:内建统计、完整追踪,便于监控和调试
-
----
-
 ## 10. 删除未使用的结构化错误功能
 ## 10. 删除未使用的结构化错误功能
 
 
 **日期**: 2026-02-03
 **日期**: 2026-02-03
@@ -635,7 +623,7 @@ execution trace v2.0 引入了 Blob 存储系统用于处理大输出和图片
 
 
 ---
 ---
 
 
-## Subagent 工具设计
+## 14. Subagent 工具设计
 
 
 ### 决策
 ### 决策
 
 
@@ -667,7 +655,7 @@ execution trace v2.0 引入了 Blob 存储系统用于处理大输出和图片
 
 
 ---
 ---
 
 
-## Goal 按需自动创建
+## 15. Goal 按需自动创建
 
 
 **日期**: 2026-02-10
 **日期**: 2026-02-10
 
 
@@ -712,3 +700,77 @@ Agent(含 sub-agent)有时不创建 goal 就直接执行工具调用,导
 
 
 ---
 ---
 
 
+## 16. Runner 重新设计:参数分层与统一执行入口
+
+**日期**: 2026-02-11
+
+### 问题
+
+原 `AgentRunner.run()` 存在以下问题:
+1. 签名臃肿(13 个参数),运行参数与任务内容混在一起
+2. `task` 字符串同时充当 user message、GoalTree mission、trace.task
+3. 新建/续跑逻辑通过 `if trace_id:` 分支耦合在一起
+4. `subagent.py` 越权管理 Trace 生命周期(创建 Trace、GoalTree 等本该由 Runner 处理的事务)
+5. 不支持回溯重跑(rewind)
+6. Agent 间通信的消息格式不统一
+
+### 决策
+
+**选择:参数分层 + 统一 `run(messages, config)` 入口**
+
+**参数分三层**:
+- **Infrastructure**(AgentRunner 构造时):trace_store, llm_call 等基础设施
+- **RunConfig**(每次 run 时):model, temperature, trace_id, insert_after 等运行参数
+- **Messages**(OpenAI SDK 格式):`List[Dict]` 任务消息,支持多模态
+
+**三种模式通过 RunConfig 区分**:
+- 新建:`trace_id=None`
+- 续跑:`trace_id=已有ID, insert_after=None`
+- 回溯:`trace_id=已有ID, insert_after=N`
+
+**回溯机制**:Message 新增 `status` 字段(`active`/`abandoned`),插入点之后的消息标记为 abandoned,goals 按规则 abandon/保留。
+
+**任务命名**:RunConfig.name 可选指定,未指定时由 utility_llm(小模型)自动生成标题。
+
+**活跃协作者注入**:GoalTree 和 collaborators 信息每 10 轮注入一次(非每轮),减少 context 开销。
+
+**Subagent 简化**:subagent 工具仍负责创建 Sub-Trace 和 GoalTree(需要自定义元数据和命名规则),然后将 trace_id 传给 RunConfig,由 Runner 接管执行。工具同时维护 `trace.context["collaborators"]` 列表。
+
+### 理由
+
+1. **OpenAI 格式统一**:Agent 间传递消息用标准格式,兼容各种 LLM API
+2. **职责清晰**:Runner 管 Trace 生命周期,工具只管业务逻辑
+3. **可组合**:新建/续跑/回溯共享同一个执行流水线,差异仅在 Phase 1
+4. **回溯能力**:支持从任意断点插入消息重新运行,原始数据保留(标记而非删除)
+
+**实现**:`agent/core/runner.py`, `agent/trace/models.py`, `agent/tools/builtin/subagent.py`
+
+---
+
+## 17. Active Collaborators:活跃协作者机制
+
+**日期**: 2026-02-11
+
+### 问题
+
+模型需要知道当前任务中有哪些可以交互的实体(子 Agent、正在对接的人类),但不应该把所有持久联系人都注入到 context。
+
+### 决策
+
+**选择:按任务关系分类,活跃协作者随 GoalTree 注入**
+
+按"与当前任务的关系"(而非 human/agent)分两类:
+- **持久存在**:通过工具按需查询(如 `feishu_get_contact_list`)
+- **任务内活跃**:存 `trace.context["collaborators"]`,周期性注入到 LLM 上下文
+
+各工具(subagent、feishu 等)负责维护 collaborators 列表,Runner 只负责读取和注入。
+
+### 理由
+
+1. **维度正确**:人和 Agent 都可能是持久或任务内活跃的,不应按类型一刀切
+2. **开销可控**:只注入活跃协作者(通常 2-5 个),不浪费 context
+3. **可扩展**:未来新增通信渠道只需在对应工具中更新 collaborators 即可
+
+**实现**:`agent/core/runner.py:AgentRunner._build_context_injection`
+
+---

+ 9 - 8
examples/feature_extract/run.py

@@ -16,7 +16,7 @@ from dotenv import load_dotenv
 load_dotenv()
 load_dotenv()
 
 
 from agent.llm.prompts import SimplePrompt
 from agent.llm.prompts import SimplePrompt
-from agent.core.runner import AgentRunner
+from agent.core.runner import AgentRunner, RunConfig
 from agent.trace import (
 from agent.trace import (
     FileSystemTraceStore,
     FileSystemTraceStore,
     Trace,
     Trace,
@@ -100,13 +100,14 @@ async def main():
     current_trace_id = None  # 保存 trace_id 用于后续测试
     current_trace_id = None  # 保存 trace_id 用于后续测试
 
 
     async for item in runner.run(
     async for item in runner.run(
-        task="[图片和特征描述已包含在 messages 中]",  # 占位符
-        messages=[user_message_with_image],  # 传入包含图片的用户消息
-        system_prompt=system_prompt,
-        model="anthropic/claude-sonnet-4.5",  # OpenRouter 模型名称
-        temperature=float(prompt.config.get('temperature', 0.3)),
-        max_iterations=1000,
-        # tools 参数不传入,测试自动加载内置工具
+        messages=[user_message_with_image],
+        config=RunConfig(
+            system_prompt=system_prompt,
+            model="anthropic/claude-sonnet-4.5",
+            temperature=float(prompt.config.get('temperature', 0.3)),
+            max_iterations=1000,
+            name="特征提取任务",
+        ),
     ):
     ):
         # 处理 Trace 对象(整体状态变化)
         # 处理 Trace 对象(整体状态变化)
         if isinstance(item, Trace):
         if isinstance(item, Trace):

+ 1 - 1
examples/feature_extract/test.prompt

@@ -1,5 +1,5 @@
 ---
 ---
-model: gemini-2.5-flash
+model: sonnet-4.5
 temperature: 0.3
 temperature: 0.3
 ---
 ---
 
 

+ 8 - 6
examples/research/run.py

@@ -25,7 +25,7 @@ logging.getLogger("agent.core.message_manager").setLevel(logging.INFO)  # 开启
 logging.getLogger("tools").setLevel(logging.INFO)  # 开启工具日志
 logging.getLogger("tools").setLevel(logging.INFO)  # 开启工具日志
 
 
 from agent.llm.prompts import SimplePrompt
 from agent.llm.prompts import SimplePrompt
-from agent.core.runner import AgentRunner
+from agent.core.runner import AgentRunner, RunConfig
 from agent.trace import (
 from agent.trace import (
     FileSystemTraceStore,
     FileSystemTraceStore,
     Trace,
     Trace,
@@ -87,12 +87,14 @@ async def main():
         print()
         print()
 
 
         async for item in runner.run(
         async for item in runner.run(
-            task=user_task,
             messages=messages,
             messages=messages,
-            system_prompt=system_prompt,
-            model=f"google/{model_name}",
-            temperature=temperature,
-            max_iterations=20,
+            config=RunConfig(
+                system_prompt=system_prompt,
+                model=f"google/{model_name}",
+                temperature=temperature,
+                max_iterations=20,
+                name=user_task[:50],
+            ),
         ):
         ):
             # 处理 Trace 对象(整体状态变化)
             # 处理 Trace 对象(整体状态变化)
             if isinstance(item, Trace):
             if isinstance(item, Trace):

+ 37 - 196
examples/subagent_example.py

@@ -2,228 +2,69 @@
 Sub-Agent 使用示例
 Sub-Agent 使用示例
 
 
 演示如何使用 Sub-Agent 机制处理复杂任务。
 演示如何使用 Sub-Agent 机制处理复杂任务。
+
+注意:本示例中的 AgentDefinition 和 get_agent_registry 尚未实现,
+此处仅用于演示未来的设计方向。当前可用的 subagent 功能通过
+runner.run(messages, config=RunConfig(...)) 和工具层的 subagent 工具实现。
 """
 """
 
 
 import asyncio
 import asyncio
 import os
 import os
-from agent import AgentRunner, AgentConfig
+from agent import AgentRunner
+from agent.core.runner import RunConfig
+from agent.trace import Trace, Message
 from agent.llm import create_gemini_llm_call
 from agent.llm import create_gemini_llm_call
-from agent.agent_registry import get_agent_registry
-from agent.models.agent import AgentDefinition
 
 
 
 
 async def example_basic_subagent():
 async def example_basic_subagent():
-    """示例 1: 使用内置 Sub-Agent"""
-    print("=== 示例 1: 使用内置 Sub-Agent ===\n")
-
-    # 配置主 Agent
-    config = AgentConfig(
-        agent_type="primary",
-        max_iterations=20,
-        skills_dir="./skills",
-    )
+    """示例 1: 使用 Agent 执行任务(通过 subagent 工具自动委托子任务)"""
+    print("=== 示例 1: 基本 Agent 执行 ===\n")
 
 
     runner = AgentRunner(
     runner = AgentRunner(
         llm_call=create_gemini_llm_call(os.getenv("GEMINI_API_KEY")),
         llm_call=create_gemini_llm_call(os.getenv("GEMINI_API_KEY")),
-        config=config,
     )
     )
 
 
-    # 主 Agent 会自动知道如何使用 Task 工具启动 Sub-Agent
     task = """
     task = """
     分析这个 Python 项目的架构:
     分析这个 Python 项目的架构:
     1. 找出所有主要的模块和它们的职责
     1. 找出所有主要的模块和它们的职责
     2. 识别核心的数据流
     2. 识别核心的数据流
     3. 列出使用的外部依赖
     3. 列出使用的外部依赖
 
 
-    请使用 explore sub-agent 来探索代码库。
-    """
-
-    async for event in runner.run(task=task, model="gemini-2.0-flash-exp"):
-        if event.type == "tool_call" and event.data.get("tool") == "task":
-            print(f"🚀 启动 Sub-Agent: {event.data['args']['subagent_type']}")
-            print(f"   任务: {event.data['args']['description']}")
-
-        elif event.type == "tool_result" and event.data.get("tool") == "task":
-            metadata = event.data.get("metadata", {})
-            print(f"✅ Sub-Agent 完成")
-            print(f"   Sub-Trace ID: {metadata.get('sub_trace_id')}")
-            print(f"   工具调用: {metadata.get('tool_summary')}")
-
-        elif event.type == "conclusion":
-            print(f"\n📋 最终结果:\n{event.data['content']}")
-
-
-async def example_custom_subagent():
-    """示例 2: 定义和使用自定义 Sub-Agent"""
-    print("\n=== 示例 2: 自定义 Sub-Agent ===\n")
-
-    # 1. 定义自定义 Sub-Agent
-    security_scanner = AgentDefinition(
-        name="security-scanner",
-        description="安全扫描专家,查找常见漏洞和安全问题",
-        mode="subagent",
-        allowed_tools=[
-            "read_file",
-            "search_code",
-            "list_files",
-        ],
-        denied_tools=[
-            "write_file",
-            "edit_file",
-            "execute_bash",
-        ],
-        system_prompt="""你是一个安全扫描专家。专注于:
-
-1. 查找常见安全漏洞:
-   - SQL 注入
-   - XSS 攻击
-   - CSRF 漏洞
-   - 不安全的序列化
-   - 路径遍历
-
-2. 检查敏感信息泄露:
-   - 硬编码的密钥和密码
-   - API token
-   - 数据库凭据
-
-3. 依赖项安全:
-   - 过时的依赖
-   - 已知漏洞的包
-
-输出格式:
-- **漏洞类型**: [类型名称]
-- **严重程度**: [高/中/低]
-- **位置**: [文件路径:行号]
-- **描述**: [详细说明]
-- **修复建议**: [如何修复]
-""",
-        max_iterations=25,
-        temperature=0.2,  # 更精确的分析
-    )
-
-    # 2. 注册到全局注册表
-    get_agent_registry().register(security_scanner)
-    print(f"✓ 注册自定义 Sub-Agent: {security_scanner.name}")
-
-    # 3. 使用自定义 Sub-Agent
-    config = AgentConfig(
-        agent_type="primary",
-        max_iterations=20,
-    )
-
-    runner = AgentRunner(
-        llm_call=create_gemini_llm_call(os.getenv("GEMINI_API_KEY")),
-        config=config,
-    )
-
-    task = """
-    对这个项目进行安全审计,使用 security-scanner sub-agent。
-    重点关注:
-    1. 认证和授权相关代码
-    2. 数据库交互代码
-    3. 用户输入处理
+    请使用 subagent explore 模式来探索代码库。
     """
     """
 
 
-    async for event in runner.run(task=task, model="gemini-2.0-flash-exp"):
-        if event.type == "tool_call" and event.data.get("tool") == "task":
-            print(f"🔍 启动安全扫描...")
-
-        elif event.type == "conclusion":
-            print(f"\n🔒 安全审计报告:\n{event.data['content']}")
-
-
-async def example_multiple_subagents():
-    """示例 3: 并行使用多个 Sub-Agent"""
-    print("\n=== 示例 3: 并行使用多个 Sub-Agent ===\n")
-
-    config = AgentConfig(
-        agent_type="primary",
-        max_iterations=30,
-    )
-
-    runner = AgentRunner(
-        llm_call=create_gemini_llm_call(os.getenv("GEMINI_API_KEY")),
-        config=config,
-    )
-
-    task = """
-    完整分析这个项目:
-
-    1. 使用 explore sub-agent 探索代码结构
-    2. 使用 analyst sub-agent 分析技术栈和依赖
-    3. 综合两个分析结果,给出项目概览
-
-    可以并行启动多个 sub-agent 来加快速度。
-    """
-
-    subagent_results = {}
-
-    async for event in runner.run(task=task, model="gemini-2.0-flash-exp"):
-        if event.type == "tool_call" and event.data.get("tool") == "task":
-            agent_type = event.data['args']['subagent_type']
-            print(f"🔄 启动 Sub-Agent: {agent_type}")
-
-        elif event.type == "tool_result" and event.data.get("tool") == "task":
-            metadata = event.data.get("metadata", {})
-            agent_type = metadata.get("subagent_type")
-            subagent_results[agent_type] = {
-                "sub_trace_id": metadata.get("sub_trace_id"),
-                "summary": metadata.get("tool_summary"),
-            }
-            print(f"✅ {agent_type} 完成: {metadata.get('tool_summary')}")
-
-        elif event.type == "conclusion":
-            print(f"\n📊 综合分析报告:\n{event.data['content']}")
-            print(f"\n📈 Sub-Agent 使用统计:")
-            for agent_type, result in subagent_results.items():
-                print(f"  - {agent_type}: {result['summary']}")
-
-
-async def example_subagent_config_file():
-    """示例 4: 从配置文件加载 Sub-Agent"""
-    print("\n=== 示例 4: 配置文件方式 ===\n")
-
-    # 1. 从配置文件加载
-    # 假设有 sub_agents.json:
-    # {
-    #   "agents": {
-    #     "code-reviewer": {
-    #       "description": "代码审查专家",
-    #       "mode": "subagent",
-    #       "allowed_tools": ["read_file", "search_code"],
-    #       "system_prompt": "你是代码审查专家..."
-    #     }
-    #   }
-    # }
-
-    registry = get_agent_registry()
-
-    # 如果配置文件存在
-    config_path = "sub_agents.json"
-    if os.path.exists(config_path):
-        registry.load_from_config(config_path)
-        print(f"✓ 从 {config_path} 加载 Agent 配置")
-
-    # 2. 列出所有可用的 Sub-Agent
-    print("\n可用的 Sub-Agent:")
-    for agent in registry.list_subagents():
-        print(f"  - {agent.name}: {agent.description}")
+    async for item in runner.run(
+        messages=[{"role": "user", "content": task}],
+        config=RunConfig(
+            model="gemini-2.0-flash-exp",
+            max_iterations=20,
+            name="项目架构分析",
+        ),
+    ):
+        if isinstance(item, Trace):
+            if item.status == "running":
+                print(f"[Trace] 开始: {item.trace_id[:8]}")
+            elif item.status == "completed":
+                print(f"[Trace] 完成 (tokens: {item.total_tokens})")
+        elif isinstance(item, Message):
+            if item.role == "assistant":
+                content = item.content
+                if isinstance(content, dict):
+                    text = content.get("text", "")
+                    tool_calls = content.get("tool_calls")
+                    if tool_calls:
+                        for tc in tool_calls:
+                            tool_name = tc.get("function", {}).get("name", "")
+                            if tool_name == "subagent":
+                                print(f"  启动 Sub-Agent...")
+                    elif text:
+                        print(f"\n最终结果:\n{text[:500]}")
 
 
 
 
 async def main():
 async def main():
-    """运行所有示例"""
-    # 示例 1: 基本使用
+    """运行示例"""
     await example_basic_subagent()
     await example_basic_subagent()
 
 
-    # 示例 2: 自定义 Sub-Agent
-    await example_custom_subagent()
-
-    # 示例 3: 并行使用多个 Sub-Agent
-    await example_multiple_subagents()
-
-    # 示例 4: 配置文件方式
-    await example_subagent_config_file()
-
 
 
 if __name__ == "__main__":
 if __name__ == "__main__":
     asyncio.run(main())
     asyncio.run(main())

+ 8 - 6
examples/test_subagent_real/run.py

@@ -19,7 +19,7 @@ from dotenv import load_dotenv
 load_dotenv()
 load_dotenv()
 
 
 from agent.llm.prompts import SimplePrompt
 from agent.llm.prompts import SimplePrompt
-from agent.core.runner import AgentRunner
+from agent.core.runner import AgentRunner, RunConfig
 from agent.trace import (
 from agent.trace import (
     FileSystemTraceStore,
     FileSystemTraceStore,
     Trace,
     Trace,
@@ -83,12 +83,14 @@ async def main():
     subagent_calls = []
     subagent_calls = []
 
 
     async for item in runner.run(
     async for item in runner.run(
-        task=user_task,
         messages=messages,
         messages=messages,
-        system_prompt=system_prompt,
-        model=f"google/{model_name}",
-        temperature=temperature,
-        max_iterations=30,  # 增加迭代次数以支持多个 subagent 调用
+        config=RunConfig(
+            system_prompt=system_prompt,
+            model=f"google/{model_name}",
+            temperature=temperature,
+            max_iterations=30,
+            name=user_task[:50],
+        ),
     ):
     ):
         # 处理 Trace 对象
         # 处理 Trace 对象
         if isinstance(item, Trace):
         if isinstance(item, Trace):