Преглед на файлове

Merge branch 'main' of https://git.yishihui.com/howard/Agent

max_liu преди 3 седмици
родител
ревизия
02a445eb97

+ 8 - 0
.env.template

@@ -0,0 +1,8 @@
+# 完成配置后,将 .env.template 重命名为 .env
+
+
+# OpenRouter API Key
+OPEN_ROUTER_API_KEY=
+
+# BrowserUse API Key
+BROWSER_USE_API_KEY=

+ 1 - 1
.gitignore

@@ -52,7 +52,7 @@ Thumbs.db
 .env
 debug.log
 info.log
-.browser_use_files
+.cache
 output
 
 

+ 215 - 0
README.md

@@ -0,0 +1,215 @@
+# Reson Agent
+
+可扩展的 Agent 框架。支持多步工具调用、计划管理、子 Agent 协作、回溯重跑和上下文压缩。
+
+## Quick Start
+
+```bash
+pip install -r requirements.txt
+
+# 配置 LLM API Key
+cp .env.example .env  # 编辑填入 API Key
+```
+
+### 最小示例
+
+```python
+import asyncio
+from agent import AgentRunner, RunConfig
+from agent.trace import FileSystemTraceStore
+from agent.llm import create_openrouter_llm_call
+
+runner = AgentRunner(
+    trace_store=FileSystemTraceStore(base_path=".trace"),
+    llm_call=create_openrouter_llm_call(model="anthropic/claude-sonnet-4.5"),
+)
+
+async def main():
+    async for item in runner.run(
+        messages=[{"role": "user", "content": "列出当前目录的文件"}],
+        config=RunConfig(model="anthropic/claude-sonnet-4.5"),
+    ):
+        print(item)
+
+asyncio.run(main())
+```
+
+## 自定义工具
+
+用 `@tool` 装饰器注册。`RunConfig(tools=None)`(默认)时所有已注册工具自动对 LLM 可用,无需额外配置。
+
+```python
+from agent import tool, ToolResult
+
+@tool(description="查询产品库存")
+async def check_inventory(product_id: str, warehouse: str = "default") -> ToolResult:
+    """查询指定仓库的产品库存
+
+    Args:
+        product_id: 产品唯一标识符
+        warehouse: 仓库编码,默认为主仓库
+    """
+    stock = await query_db(product_id, warehouse)
+    return ToolResult(output=f"库存: {stock}")
+
+# 确保此模块在 runner.run() 之前被 import
+```
+
+**注意**: `@tool` 通过副作用注册到全局 registry,必须确保定义工具的模块在调用 `runner.run()` 前被 import。
+
+### 参数 Schema 生成
+
+框架从函数签名和 docstring 自动生成 OpenAI Tool Schema,无需手写 JSON:
+
+- **参数类型**:从类型注解推断(`str`/`int`/`float`/`bool`/`list`/`dict`,支持 `Optional`、`Literal`、`List[T]`)
+- **参数描述**:从 Google 风格 docstring 的 `Args:` 段提取
+- **必填/可选**:有默认值的参数为可选,否则为必填
+- **工具描述**:优先使用 `@tool(description=...)` 参数,其次取 docstring 首行
+- `uid` 和 `context` 参数由框架自动注入,不会出现在 Schema 中
+
+上面的 `check_inventory` 会生成:
+
+```json
+{
+  "type": "function",
+  "function": {
+    "name": "check_inventory",
+    "description": "查询产品库存",
+    "parameters": {
+      "type": "object",
+      "properties": {
+        "product_id": {"type": "string", "description": "产品唯一标识符"},
+        "warehouse": {"type": "string", "description": "仓库编码,默认为主仓库", "default": "default"}
+      },
+      "required": ["product_id"]
+    }
+  }
+}
+```
+
+### 限制工具范围
+
+```python
+# 只启用指定工具(在内置工具基础上追加)
+config = RunConfig(tools=["check_inventory", "another_tool"])
+```
+
+## 自定义 Skills
+
+Skills 是 Markdown 文件,提供领域知识,注入到 system prompt。
+
+```
+my_project/
+└── skills/
+    └── my_domain.md
+```
+
+```markdown
+---
+name: my-domain-skill
+description: 领域专属知识
+---
+
+## Guidelines
+- 规则 1
+- 规则 2
+```
+
+```python
+runner = AgentRunner(
+    llm_call=...,
+    trace_store=...,
+    skills_dir="./skills",  # 指向你的 skills 目录
+)
+```
+
+内置 skills(`agent/memory/skills/`)始终自动加载,`skills_dir` 的内容额外追加。
+
+## AgentRunner 参数
+
+```python
+AgentRunner(
+    llm_call,                # 必需:LLM 调用函数
+    trace_store=None,        # Trace 持久化(推荐 FileSystemTraceStore)
+    tool_registry=None,      # 工具注册表(默认:全局 registry)
+    skills_dir=None,         # 自定义 skills 目录
+    experiences_path="./cache/experiences.md",  # 经验文件路径
+    memory_store=None,       # 记忆存储
+    utility_llm_call=None,   # 轻量 LLM(生成任务标题等)
+)
+```
+
+## RunConfig 参数
+
+```python
+RunConfig(
+    model="gpt-4o",          # 模型标识
+    temperature=0.3,
+    max_iterations=200,       # Agent loop 最大轮数
+    tools=None,               # None=全部已注册工具,List[str]=内置+指定工具
+    system_prompt=None,       # None=从 skills 自动构建
+    agent_type="default",     # 预设类型:default / explore / analyst
+    trace_id=None,            # 续跑/回溯时传入已有 trace ID
+    insert_after=None,        # 回溯插入点(message sequence)
+)
+```
+
+## LLM Providers
+
+框架内置两个 provider:
+
+```python
+from agent.llm import create_openrouter_llm_call, create_gemini_llm_call
+
+# OpenRouter(支持多种模型)
+llm = create_openrouter_llm_call(model="anthropic/claude-sonnet-4.5")
+
+# Google Gemini
+llm = create_gemini_llm_call(model="gemini-2.5-flash")
+```
+
+自定义 provider 只需实现签名:
+
+```python
+async def my_llm_call(messages, model, tools, temperature, **kwargs) -> dict:
+    # 调用你的 LLM
+    return {
+        "content": "...",
+        "tool_calls": [...] or None,
+        "prompt_tokens": 100,
+        "completion_tokens": 50,
+        "cost": 0.001,
+        "finish_reason": "stop",
+    }
+```
+
+## API Server
+
+```bash
+python api_server.py
+```
+
+| 方法 | 路径 | 说明 |
+|------|------|------|
+| GET | `/api/traces` | 列出 Traces |
+| GET | `/api/traces/{id}` | Trace 详情 |
+| GET | `/api/traces/{id}/messages` | 消息列表 |
+| POST | `/api/traces` | 新建并执行 |
+| POST | `/api/traces/{id}/run` | 续跑/回溯 |
+| POST | `/api/traces/{id}/stop` | 停止 |
+| WS | `/api/traces/{id}/watch` | 实时事件 |
+
+需在 `api_server.py` 中配置 Runner 才能启用 POST 端点。
+
+## 项目结构
+
+```
+agent/
+├── core/           # AgentRunner + 预设
+├── tools/          # 工具系统(registry + 内置工具)
+├── trace/          # 执行追踪 + 计划(GoalTree)+ API
+├── memory/         # Skills + Experiences
+└── llm/            # LLM Provider 适配
+```
+
+详细架构文档:[docs/README.md](./docs/README.md)

+ 4 - 1
agent/__init__.py

@@ -15,7 +15,7 @@ from agent.core.runner import AgentRunner, AgentConfig, CallResult, RunConfig
 from agent.core.presets import AgentPreset, AGENT_PRESETS, get_preset
 
 # 执行追踪
-from agent.trace.models import Trace, Message, Step, StepType, StepStatus
+from agent.trace.models import Trace, Message, Step, StepType, StepStatus, ChatMessage, Messages, MessageContent
 from agent.trace.goal_models import Goal, GoalTree, GoalStatus
 from agent.trace.protocols import TraceStore
 from agent.trace.store import FileSystemTraceStore
@@ -43,6 +43,9 @@ __all__ = [
     # Trace
     "Trace",
     "Message",
+    "ChatMessage",
+    "Messages",
+    "MessageContent",
     "Step",
     "StepType",
     "StepStatus",

+ 198 - 92
agent/core/runner.py

@@ -14,8 +14,10 @@ Agent Runner - Agent 执行引擎
 - Messages: OpenAI SDK 格式的任务消息
 """
 
+import asyncio
 import json
 import logging
+import os
 import uuid
 from dataclasses import dataclass, field
 from datetime import datetime
@@ -24,7 +26,7 @@ from typing import AsyncIterator, Optional, Dict, Any, List, Callable, Literal,
 from agent.trace.models import Trace, Message
 from agent.trace.protocols import TraceStore
 from agent.trace.goal_models import GoalTree
-from agent.memory.models import Experience, Skill
+from agent.memory.models import Skill
 from agent.memory.protocols import MemoryStore, StateStore
 from agent.memory.skill_loader import load_skills_from_dir
 from agent.tools import ToolRegistry, get_tool_registry
@@ -45,7 +47,7 @@ class RunConfig:
     model: str = "gpt-4o"
     temperature: float = 0.3
     max_iterations: int = 200
-    tools: Optional[List[str]] = None          # None = 全部内置工具
+    tools: Optional[List[str]] = None          # None = 全部已注册工具
 
     # --- 框架层参数 ---
     agent_type: str = "default"
@@ -83,7 +85,8 @@ BUILTIN_TOOLS = [
     "skill",
     "list_skills",
     "goal",
-    "subagent",
+    "agent",
+    "evaluate",
 
     # 搜索工具
     "search_posts",
@@ -113,12 +116,15 @@ BUILTIN_TOOLS = [
     "browser_select_dropdown_option",
     "browser_extract_content",
     "browser_read_long_content",
+    "browser_download_direct_url",
     "browser_get_page_html",
-    "browser_get_selector_map",
+    "browser_get_visual_selector_map",
     "browser_evaluate",
     "browser_ensure_login_with_cookies",
     "browser_wait_for_user_action",
     "browser_done",
+    "browser_export_cookies",
+    "browser_load_cookies"
 ]
 
 
@@ -169,6 +175,7 @@ class AgentRunner:
         utility_llm_call: Optional[Callable] = None,
         config: Optional[AgentConfig] = None,
         skills_dir: Optional[str] = None,
+        experiences_path: Optional[str] = "./cache/experiences.md",
         goal_tree: Optional[GoalTree] = None,
         debug: bool = False,
     ):
@@ -184,6 +191,7 @@ class AgentRunner:
             utility_llm_call: 轻量 LLM(用于生成任务标题等),可选
             config: [向后兼容] AgentConfig
             skills_dir: Skills 目录路径
+            experiences_path: 经验文件路径(默认 ./cache/experiences.md)
             goal_tree: 初始 GoalTree(可选)
             debug: 保留参数(已废弃)
         """
@@ -195,8 +203,10 @@ class AgentRunner:
         self.utility_llm_call = utility_llm_call
         self.config = config or AgentConfig()
         self.skills_dir = skills_dir
+        self.experiences_path = experiences_path
         self.goal_tree = goal_tree
         self.debug = debug
+        self._cancel_events: Dict[str, asyncio.Event] = {}  # trace_id → cancel event
 
     # ===== 核心公开方法 =====
 
@@ -227,12 +237,16 @@ class AgentRunner:
         try:
             # Phase 1: PREPARE TRACE
             trace, goal_tree, sequence = await self._prepare_trace(messages, config)
+            # 注册取消事件
+            self._cancel_events[trace.trace_id] = asyncio.Event()
             yield trace
 
             # Phase 2: BUILD HISTORY
-            history, sequence, created_messages = await self._build_history(
+            history, sequence, created_messages, head_seq = await self._build_history(
                 trace.trace_id, messages, goal_tree, config, sequence
             )
+            # Update trace's head_sequence in memory
+            trace.head_sequence = head_seq
             for msg in created_messages:
                 yield msg
 
@@ -254,6 +268,10 @@ class AgentRunner:
                 if trace_obj:
                     yield trace_obj
             raise
+        finally:
+            # 清理取消事件
+            if trace:
+                self._cancel_events.pop(trace.trace_id, None)
 
     async def run_result(
         self,
@@ -263,7 +281,7 @@ class AgentRunner:
         """
         结果模式 — 消费 run(),返回结构化结果。
 
-        主要用于 subagent 工具内部。
+        主要用于 agent/evaluate 工具内部。
         """
         last_assistant_text = ""
         final_trace: Optional[Trace] = None
@@ -305,6 +323,22 @@ class AgentRunner:
             },
         }
 
+    async def stop(self, trace_id: str) -> bool:
+        """
+        停止运行中的 Trace
+
+        设置取消信号,agent loop 在下一个 LLM 调用前检查并退出。
+        Trace 状态置为 "stopped"。
+
+        Returns:
+            True 如果成功发送停止信号,False 如果该 trace 不在运行中
+        """
+        cancel_event = self._cancel_events.get(trace_id)
+        if cancel_event is None:
+            return False
+        cancel_event.set()
+        return True
+
     # ===== 单次调用(保留)=====
 
     async def call(
@@ -325,12 +359,7 @@ class AgentRunner:
         trace_id = None
         message_id = None
 
-        tool_names = BUILTIN_TOOLS.copy()
-        if tools:
-            for tool in tools:
-                if tool not in tool_names:
-                    tool_names.append(tool)
-        tool_schemas = self.tools.get_schemas(tool_names)
+        tool_schemas = self._get_tool_schemas(tools)
 
         if trace and self.trace_store:
             trace_obj = Trace.create(mode="call", uid=uid, model=model, tools=tool_schemas, llm_params=kwargs)
@@ -431,11 +460,8 @@ class AgentRunner:
             # 回溯模式
             sequence = await self._rewind(config.trace_id, config.insert_after, goal_tree)
         else:
-            # 续跑模式:从最大 sequence + 1 开始
-            all_messages = await self.trace_store.get_trace_messages(
-                config.trace_id, include_abandoned=True
-            )
-            sequence = max((m.sequence for m in all_messages), default=0) + 1
+            # 续跑模式:从 last_sequence + 1 开始
+            sequence = trace_obj.last_sequence + 1
 
         # 状态置为 running
         await self.trace_store.update_trace(
@@ -460,21 +486,30 @@ class AgentRunner:
         """
         构建完整的 LLM 消息历史
 
-        1. 加载已有 active messages(续跑/回溯场景)
-        2. 构建 system prompt(新建时注入 skills/experiences)
-        3. 追加 input messages
+        1. 从 head_sequence 沿 parent chain 加载主路径消息(续跑/回溯场景)
+        2. 构建 system prompt(新建时注入 skills)
+        3. 新建时:在第一条 user message 末尾注入当前经验
+        4. 追加 input messages(设置 parent_sequence 链接到当前 head)
 
         Returns:
-            (history, next_sequence, created_messages)
+            (history, next_sequence, created_messages, head_sequence)
             created_messages: 本次新创建并持久化的 Message 列表,供 run() yield 给调用方
+            head_sequence: 当前主路径头节点的 sequence
         """
         history: List[Dict] = []
         created_messages: List[Message] = []
+        head_seq: Optional[int] = None  # 当前主路径的头节点 sequence
 
-        # 1. 加载已有 messages
+        # 1. 加载已有 messages(通过主路径遍历)
         if config.trace_id and self.trace_store:
-            existing_messages = await self.trace_store.get_trace_messages(trace_id)
-            history = [msg.to_llm_dict() for msg in existing_messages]
+            trace_obj = await self.trace_store.get_trace(trace_id)
+            if trace_obj and trace_obj.head_sequence > 0:
+                main_path = await self.trace_store.get_main_path_messages(
+                    trace_id, trace_obj.head_sequence
+                )
+                history = [msg.to_llm_dict() for msg in main_path]
+                if main_path:
+                    head_seq = main_path[-1].sequence
 
         # 2. 构建 system prompt(如果历史中没有 system message)
         has_system = any(m.get("role") == "system" for m in history)
@@ -489,24 +524,41 @@ class AgentRunner:
                     system_msg = Message.create(
                         trace_id=trace_id, role="system", sequence=sequence,
                         goal_id=None, content=system_prompt,
+                        parent_sequence=None,  # system message 是 root
                     )
                     await self.trace_store.add_message(system_msg)
                     created_messages.append(system_msg)
+                    head_seq = sequence
                     sequence += 1
 
-        # 3. 追加新 messages
+        # 3. 新建时:在第一条 user message 末尾注入当前经验
+        if not config.trace_id:  # 新建模式
+            experiences_text = self._load_experiences()
+            if experiences_text:
+                for msg in new_messages:
+                    if msg.get("role") == "user" and isinstance(msg.get("content"), str):
+                        msg["content"] += f"\n\n## 参考经验\n\n{experiences_text}"
+                        break
+
+        # 4. 追加新 messages(设置 parent_sequence 链接到当前 head)
         for msg_dict in new_messages:
             history.append(msg_dict)
 
             if self.trace_store:
                 stored_msg = Message.from_llm_dict(
-                    msg_dict, trace_id=trace_id, sequence=sequence, goal_id=None
+                    msg_dict, trace_id=trace_id, sequence=sequence,
+                    goal_id=None, parent_sequence=head_seq,
                 )
                 await self.trace_store.add_message(stored_msg)
                 created_messages.append(stored_msg)
+                head_seq = sequence
                 sequence += 1
 
-        return history, sequence, created_messages
+        # 5. 更新 trace 的 head_sequence
+        if self.trace_store and head_seq is not None:
+            await self.trace_store.update_trace(trace_id, head_sequence=head_seq)
+
+        return history, sequence, created_messages, head_seq or 0
 
     # ===== Phase 3: AGENT LOOP =====
 
@@ -522,12 +574,30 @@ class AgentRunner:
         trace_id = trace.trace_id
         tool_schemas = self._get_tool_schemas(config.tools)
 
+        # 当前主路径头节点的 sequence(用于设置 parent_sequence)
+        head_seq = trace.head_sequence
+
         # 设置 goal_tree 到 goal 工具
         if goal_tree and self.trace_store:
             from agent.trace.goal_tool import set_goal_tree
             set_goal_tree(goal_tree)
 
         for iteration in range(config.max_iterations):
+            # 检查取消信号
+            cancel_event = self._cancel_events.get(trace_id)
+            if cancel_event and cancel_event.is_set():
+                logger.info(f"Trace {trace_id} stopped by user")
+                if self.trace_store:
+                    await self.trace_store.update_trace(
+                        trace_id,
+                        status="stopped",
+                        completed_at=datetime.now(),
+                    )
+                    trace_obj = await self.trace_store.get_trace(trace_id)
+                    if trace_obj:
+                        yield trace_obj
+                return
+
             # 构建 LLM messages(注入上下文)
             llm_messages = list(history)
 
@@ -576,12 +646,13 @@ class AgentRunner:
             # 获取当前 goal_id
             current_goal_id = goal_tree.current_id if (goal_tree and goal_tree.current_id) else None
 
-            # 记录 assistant Message
+            # 记录 assistant Message(parent_sequence 指向当前 head)
             assistant_msg = Message.create(
                 trace_id=trace_id,
                 role="assistant",
                 sequence=sequence,
                 goal_id=current_goal_id,
+                parent_sequence=head_seq if head_seq > 0 else None,
                 content={"text": response_content, "tool_calls": tool_calls},
                 prompt_tokens=prompt_tokens,
                 completion_tokens=completion_tokens,
@@ -593,6 +664,7 @@ class AgentRunner:
                 await self.trace_store.add_message(assistant_msg)
 
             yield assistant_msg
+            head_seq = sequence
             sequence += 1
 
             # 处理工具调用
@@ -626,38 +698,72 @@ class AgentRunner:
                         }
                     )
 
+                    # --- 支持多模态工具反馈 ---
+                    # execute() 返回 dict{"text","images"} 或 str
+                    if isinstance(tool_result, dict) and tool_result.get("images"):
+                        tool_result_text = tool_result["text"]
+                        # 构建多模态消息格式
+                        tool_content_for_llm = [{"type": "text", "text": tool_result_text}]
+                        for img in tool_result["images"]:
+                            if img.get("type") == "base64" and img.get("data"):
+                                media_type = img.get("media_type", "image/png")
+                                tool_content_for_llm.append({
+                                    "type": "image_url",
+                                    "image_url": {
+                                        "url": f"data:{media_type};base64,{img['data']}"
+                                    }
+                                })
+                        img_count = len(tool_content_for_llm) - 1  # 减去 text 块
+                        print(f"[Runner] 多模态工具反馈: tool={tool_name}, images={img_count}, text_len={len(tool_result_text)}")
+                    else:
+                        tool_result_text = str(tool_result)
+                        tool_content_for_llm = tool_result_text
+
                     tool_msg = Message.create(
                         trace_id=trace_id,
                         role="tool",
                         sequence=sequence,
                         goal_id=current_goal_id,
+                        parent_sequence=head_seq,
                         tool_call_id=tc["id"],
-                        content={"tool_name": tool_name, "result": tool_result},
+                        content={"tool_name": tool_name, "result": tool_result_text},
                     )
 
                     if self.trace_store:
                         await self.trace_store.add_message(tool_msg)
+                        # 截图单独存为同名 PNG 文件
+                        if isinstance(tool_result, dict) and tool_result.get("images"):
+                            import base64 as b64mod
+                            for img in tool_result["images"]:
+                                if img.get("data"):
+                                    png_path = self.trace_store._get_messages_dir(trace_id) / f"{tool_msg.message_id}.png"
+                                    png_path.write_bytes(b64mod.b64decode(img["data"]))
+                                    print(f"[Runner] 截图已保存: {png_path.name}")
+                                    break  # 只存第一张
 
                     yield tool_msg
+                    head_seq = sequence
                     sequence += 1
 
                     history.append({
                         "role": "tool",
                         "tool_call_id": tc["id"],
                         "name": tool_name,
-                        "content": str(tool_result),
+                        "content": tool_content_for_llm, # 这里传入 list 即可触发模型的视觉能力
                     })
+                    # ------------------------------------------
 
                 continue  # 继续循环
 
             # 无工具调用,任务完成
             break
 
-        # 完成 Trace
+        # 更新 head_sequence 并完成 Trace
         if self.trace_store:
             await self.trace_store.update_trace(
                 trace_id,
                 status="completed",
+                head_sequence=head_seq,
                 completed_at=datetime.now(),
             )
             trace_obj = await self.trace_store.get_trace(trace_id)
@@ -673,7 +779,9 @@ class AgentRunner:
         goal_tree: Optional[GoalTree],
     ) -> int:
         """
-        执行回溯:标记 insert_after 之后的 messages 和 goals 为 abandoned
+        执行回溯:快照 GoalTree,重建干净树,设置 head_sequence
+
+        新消息的 parent_sequence 将指向 rewind 点,旧消息通过树结构自然脱离主路径。
 
         Returns:
             下一个可用的 sequence 号
@@ -681,7 +789,7 @@ class AgentRunner:
         if not self.trace_store:
             raise ValueError("trace_store required for rewind")
 
-        # 1. 加载所有 messages(含已 abandoned 的)
+        # 1. 加载所有 messages
         all_messages = await self.trace_store.get_trace_messages(
             trace_id, include_abandoned=True
         )
@@ -692,40 +800,37 @@ class AgentRunner:
         # 2. 找到安全截断点(确保不截断在 tool_call 和 tool response 之间)
         cutoff = self._find_safe_cutoff(all_messages, insert_after)
 
-        # 3. 批量标记 messages 为 abandoned
-        abandoned_ids = await self.trace_store.abandon_messages_after(trace_id, cutoff)
-
-        # 4. 处理 Goals
+        # 3. 快照并重建 GoalTree
         if goal_tree:
-            active_messages = [m for m in all_messages if m.sequence <= cutoff]
-            active_goal_ids = {m.goal_id for m in active_messages if m.goal_id}
-
+            # 找出 rewind 点之前已完成的 goal IDs
+            # 通过主路径消息来判断:cutoff 之前的消息引用的 completed goals
+            messages_before = [m for m in all_messages if m.sequence <= cutoff]
+            completed_goal_ids = set()
             for goal in goal_tree.goals:
-                if goal.status == "abandoned":
-                    continue  # 已 abandoned,跳过
-                if goal.status == "completed" and goal.id in active_goal_ids:
-                    continue  # 已完成且有截断点之前的 messages → 保留
-                # 其余全部 abandon(含无 active messages 的 completed goal)
-                goal.status = "abandoned"
-                goal.summary = "回溯导致放弃"
-
-            # 重置 current_id
-            goal_tree._current_id = None
-
-            await self.trace_store.update_goal_tree(trace_id, goal_tree)
-
-        # 5. 记录 rewind 事件
-        abandoned_sequences = [
-            m.sequence for m in all_messages
-            if m.sequence > cutoff and m.status != "abandoned"  # 本次新 abandon 的
-        ]
-        await self.trace_store.append_event(trace_id, "rewind", {
-            "insert_after_sequence": cutoff,
-            "abandoned_message_count": len(abandoned_ids),
-            "abandoned_sequences": abandoned_sequences[:20],  # 只记前 20 条
-        })
-
-        # 6. 返回 next sequence
+                if goal.status == "completed":
+                    # 检查该 goal 是否在 rewind 点之前就已完成(有关联消息在 cutoff 之前)
+                    goal_msgs = [m for m in messages_before if m.goal_id == goal.id]
+                    if goal_msgs:
+                        completed_goal_ids.add(goal.id)
+
+            # 快照到 events
+            await self.trace_store.append_event(trace_id, "rewind", {
+                "insert_after_sequence": cutoff,
+                "goal_tree_snapshot": goal_tree.to_dict(),
+            })
+
+            # 重建干净的 GoalTree
+            new_tree = goal_tree.rebuild_for_rewind(completed_goal_ids)
+            await self.trace_store.update_goal_tree(trace_id, new_tree)
+
+            # 更新内存中的引用
+            goal_tree.goals = new_tree.goals
+            goal_tree.current_id = new_tree.current_id
+
+        # 4. 更新 head_sequence 到 rewind 点
+        await self.trace_store.update_trace(trace_id, head_sequence=cutoff)
+
+        # 5. 返回 next sequence(全局递增,不复用)
         max_seq = max((m.sequence for m in all_messages), default=0)
         return max_seq + 1
 
@@ -796,16 +901,25 @@ class AgentRunner:
     # ===== 辅助方法 =====
 
     def _get_tool_schemas(self, tools: Optional[List[str]]) -> List[Dict]:
-        """获取工具 Schema"""
-        tool_names = BUILTIN_TOOLS.copy()
-        if tools:
-            for tool in tools:
-                if tool not in tool_names:
-                    tool_names.append(tool)
+        """
+        获取工具 Schema
+
+        - tools=None: 使用 registry 中全部已注册工具(含内置 + 外部注册的)
+        - tools=["a", "b"]: 在 BUILTIN_TOOLS 基础上追加指定工具
+        """
+        if tools is None:
+            # 全部已注册工具
+            tool_names = self.tools.get_tool_names()
+        else:
+            # BUILTIN_TOOLS + 显式指定的额外工具
+            tool_names = BUILTIN_TOOLS.copy()
+            for t in tools:
+                if t not in tool_names:
+                    tool_names.append(t)
         return self.tools.get_schemas(tool_names)
 
     async def _build_system_prompt(self, config: RunConfig) -> Optional[str]:
-        """构建 system prompt(注入 skills 和 experiences)"""
+        """构建 system prompt(注入 skills)"""
         system_prompt = config.system_prompt
 
         # 加载 Skills
@@ -814,27 +928,12 @@ class AgentRunner:
         if skills:
             skills_text = self._format_skills(skills)
 
-        # 加载 Experiences
-        experiences_text = ""
-        if config.enable_memory and self.memory_store:
-            scope = f"agent:{config.agent_type}"
-            # 从 messages 提取文本作为查询
-            experiences = await self.memory_store.search_experiences(scope, system_prompt or "")
-            experiences_text = self._format_experiences(experiences)
-
         # 拼装
         if system_prompt:
             if skills_text:
                 system_prompt += f"\n\n## Skills\n{skills_text}"
-            if experiences_text:
-                system_prompt += f"\n\n## 相关经验\n{experiences_text}"
-        elif skills_text or experiences_text:
-            parts = []
-            if skills_text:
-                parts.append(f"## Skills\n{skills_text}")
-            if experiences_text:
-                parts.append(f"## 相关经验\n{experiences_text}")
-            system_prompt = "\n\n".join(parts)
+        elif skills_text:
+            system_prompt = f"## Skills\n{skills_text}"
 
         return system_prompt
 
@@ -879,7 +978,14 @@ class AgentRunner:
             return ""
         return "\n\n".join(s.to_prompt_text() for s in skills)
 
-    def _format_experiences(self, experiences: List[Experience]) -> str:
-        if not experiences:
+    def _load_experiences(self) -> str:
+        """从文件加载经验(./cache/experiences.md)"""
+        if not self.experiences_path:
             return ""
-        return "\n".join(f"- {e.to_prompt_text()}" for e in experiences)
+        try:
+            if os.path.exists(self.experiences_path):
+                with open(self.experiences_path, "r", encoding="utf-8") as f:
+                    return f.read().strip()
+        except Exception as e:
+            logger.warning(f"Failed to load experiences from {self.experiences_path}: {e}")
+        return ""

+ 6 - 4
agent/memory/skills/core.md

@@ -78,7 +78,7 @@ goal(abandon="方案A需要Redis,环境没有")
 
 1. **页面导航**: 使用 `browser_navigate_to_url` 或 `browser_search_web` 到达目标页面
 2. **等待加载**: 页面跳转后调用 `browser_wait(seconds=2)` 等待内容加载
-3. **获取元素索引**: 调用 `browser_get_selector_map` 获取可交互元素的索引映射
+3. **获取元素索引**: 调用 `browser_get_visual_selector_map` 获取可交互元素的索引映射和当前界面的截图
 4. **执行交互**: 使用 `browser_click_element`、`browser_input_text` 等工具操作页面
 5. **提取内容**: 使用 `browser_extract_content`, `browser_read_long_content`, `browser_get_page_html` 获取数据
 
@@ -88,7 +88,9 @@ goal(abandon="方案A需要Redis,环境没有")
 - **必须先获取索引**: 所有 `index` 参数都需要先通过 `browser_get_selector_map` 获取
 - **高级工具**:优先使用`browser_extract_content`, `browser_read_long_content`等工具获取数据,而不是使用`browser_get_selector_map`获取索引后手动解析
 - **操作后等待**: 任何可能触发页面变化的操作(点击、输入、滚动)后都要调用 `browser_wait`
-- **登录处理**: 需要登录的网站使用 `browser_ensure_login_with_cookies(cookie_type="xhs")` 注入Cookie
+- **登录处理**:
+  - **正常登录**:当遇到需要登录的网页时,使用`browser_load_cookies`来登录
+  - **首次登录**:当没有该网站的cookie时,点击进入登录界面,然后等待人类来登录,登录后使用`browser_export_cookies`将账户信息存储下来
 - **复杂操作用JS**: 当标准工具无法满足时,使用 `browser_evaluate` 执行JavaScript代码
 
 ### 工具分类
@@ -96,5 +98,5 @@ goal(abandon="方案A需要Redis,环境没有")
 **导航**: browser_navigate_to_url, browser_search_web, browser_go_back, browser_wait
 **交互**: browser_click_element, browser_input_text, browser_send_keys, browser_upload_file
 **视图**: browser_scroll_page, browser_find_text, browser_screenshot
-**提取**: browser_extract_content, browser_read_long_content, browser_get_page_html,    browser_get_selector_map
-**高级**: browser_evaluate, browser_ensure_login_with_cookies, browser_wait_for_user_action
+**提取**: browser_extract_content, browser_read_long_content, browser_get_page_html, browser_get_selector_map, browser_get_visual_selector_map
+**高级**: browser_evaluate, browser_load_cookies, browser_export_cookies, browser_wait_for_user_action, browser_download_direct_url

+ 3 - 2
agent/tools/builtin/__init__.py

@@ -14,7 +14,7 @@ from agent.tools.builtin.glob_tool import glob_files
 from agent.tools.builtin.file.grep import grep_content
 from agent.tools.builtin.bash import bash_command
 from agent.tools.builtin.skill import skill, list_skills
-from agent.tools.builtin.subagent import subagent
+from agent.tools.builtin.subagent import agent, evaluate
 from agent.tools.builtin.search import search_posts, get_search_suggestions
 from agent.tools.builtin.sandbox import (sandbox_create_environment, sandbox_run_shell,
                                          sandbox_rebuild_with_ports,sandbox_destroy_environment)
@@ -35,7 +35,8 @@ __all__ = [
     "bash_command",
     "skill",
     "list_skills",
-    "subagent",
+    "agent",
+    "evaluate",
     "search_posts",
     "get_search_suggestions",
     "sandbox_create_environment",

+ 12 - 0
agent/tools/builtin/browser/__init__.py

@@ -40,7 +40,9 @@ from agent.tools.builtin.browser.baseClass import (
     browser_extract_content,
     browser_read_long_content,
     browser_get_page_html,
+    browser_download_direct_url,
     browser_get_selector_map,
+    browser_get_visual_selector_map,
 
     # JavaScript 执行工具
     browser_evaluate,
@@ -51,6 +53,10 @@ from agent.tools.builtin.browser.baseClass import (
 
     # 任务完成
     browser_done,
+
+    # Cookie 持久化
+    browser_export_cookies,
+    browser_load_cookies,
 )
 
 __all__ = [
@@ -88,8 +94,10 @@ __all__ = [
     # 内容提取工具
     'browser_extract_content',
     'browser_read_long_content',
+    'browser_download_direct_url',
     'browser_get_page_html',
     'browser_get_selector_map',
+    'browser_get_visual_selector_map',
 
     # JavaScript 执行工具
     'browser_evaluate',
@@ -100,4 +108,8 @@ __all__ = [
 
     # 任务完成
     'browser_done',
+
+    # Cookie 持久化
+    'browser_export_cookies',
+    'browser_load_cookies',
 ]

+ 376 - 135
agent/tools/builtin/browser/baseClass.py

@@ -37,21 +37,22 @@ Native Browser-Use Tools Adapter
 3. 任务结束时调用 cleanup_browser_session()
 
 文件操作说明:
-- 浏览器专用文件目录:.browser_use_files/ (在当前工作目录下)
+- 浏览器专用文件目录:.cache/.browser_use_files/ (在当前工作目录下)
   用于存储浏览器会话产生的临时文件(下载、上传、截图等)
 - 一般文件操作:请使用 agent.tools.builtin 中的文件工具 (read_file, write_file, edit_file)
   这些工具功能更完善,支持diff预览、智能匹配、分页读取等
 """
-
+import logging
 import sys
 import os
 import json
+import httpx
 import asyncio
 import aiohttp
 import re
 import base64
 from urllib.parse import urlparse, parse_qs, unquote
-from typing import Optional, List, Dict, Any, Tuple
+from typing import Optional, List, Dict, Any, Tuple, Union
 from pathlib import Path
 from langchain_core.runnables import RunnableLambda
 from argparse import Namespace # 使用 Namespace 快速构造带属性的对象
@@ -229,153 +230,71 @@ async def init_browser_session(
     browser_profile: Optional[BrowserProfile] = None,
     **kwargs
 ) -> tuple[BrowserSession, Tools]:
-    """
-    初始化全局浏览器会话 - 支持三种浏览器类型
-
-    Args:
-        browser_type: 浏览器类型 ("local", "cloud", "container")
-        headless: 是否无头模式
-        url: 初始访问URL(可选)
-             - local/cloud: 初始化后会自动导航到此URL
-             - container: 必需,容器启动时访问的URL
-        profile_name: 配置文件/账户名称(默认 "default")
-                     - local: 用于创建用户数据目录路径
-                     - cloud: 云浏览器配置ID
-                     - container: 容器账户名称
-        user_data_dir: 用户数据目录(仅 local 模式,高级用法)
-                      如果提供则覆盖 profile_name 生成的路径
-        browser_profile: BrowserProfile 对象(通用,高级用法)
-                        用于预设 cookies 等
-        **kwargs: 其他 BrowserSession 参数
-
-    Returns:
-        (BrowserSession, Tools) 元组
-
-    Examples:
-        # 本地浏览器
-        browser, tools = await init_browser_session(
-            browser_type="local",
-            url="https://www.baidu.com"  # 可选
-        )
-
-        # 云浏览器
-        browser, tools = await init_browser_session(
-            browser_type="cloud",
-            profile_name="my_cloud_profile"  # 可选
-        )
-
-        # 容器浏览器
-        browser, tools = await init_browser_session(
-            browser_type="container",
-            url="https://www.xiaohongshu.com",  # 必需
-            profile_name="my_account"  # 可选
-        )
-    """
     global _browser_session, _browser_tools, _file_system
 
     if _browser_session is not None:
         return _browser_session, _browser_tools
 
-    # 验证 browser_type
     valid_types = ["local", "cloud", "container"]
     if browser_type not in valid_types:
-        raise ValueError(f"无效的 browser_type: {browser_type},必须是 {valid_types} 之一")
+        raise ValueError(f"无效的 browser_type: {browser_type}")
+
+    # --- 核心:定义本地统一存储路径 ---
+    save_dir = Path.cwd() / ".cache/.browser_use_files"
+    save_dir.mkdir(parents=True, exist_ok=True)
 
-    # 创建浏览器会话参数
+    # 基础参数配置
     session_params = {
         "headless": headless,
+        # 告诉 Playwright 所有的下载临时流先存入此本地目录
+        "downloads_path": str(save_dir), 
     }
 
-    # === Container 模式 ===
     if browser_type == "container":
         print("🐳 使用容器浏览器模式")
-
-        # container 模式必须提供 URL
-        if not url:
-            url = "about:blank"  # 使用默认空白页
-            print("⚠️  未提供 url 参数,使用默认空白页")
-
-        # 创建容器并获取 CDP URL
-        print(f"📦 正在创建容器...")
-        container_info = await create_container(
-            url=url,
-            account_name=profile_name
-        )
-
+        if not url: url = "about:blank"
+        container_info = await create_container(url=url, account_name=profile_name)
         if not container_info["success"]:
             raise RuntimeError(f"容器创建失败: {container_info['error']}")
-
-        cdp_url = container_info["cdp"]
-        print(f"✅ 容器创建成功")
-        print(f"   CDP URL: {cdp_url}")
-        print(f"   Container ID: {container_info['container_id']}")
-        print(f"   Connection ID: {container_info.get('connection_id')}")
-
-        # 使用容器的 CDP URL 连接
-        session_params["cdp_url"] = cdp_url
-
-        # 等待容器完全启动
-        print("⏳ 等待容器浏览器启动...")
+        session_params["cdp_url"] = container_info["cdp"]
         await asyncio.sleep(3)
 
-    # === Cloud 模式 ===
     elif browser_type == "cloud":
         print("🌐 使用云浏览器模式")
         session_params["use_cloud"] = True
-
-        # profile_name 作为云配置ID
         if profile_name and profile_name != "default":
             session_params["cloud_profile_id"] = profile_name
 
-    # === Local 模式 ===
     else:  # local
         print("💻 使用本地浏览器模式")
         session_params["is_local"] = True
-
-        # 设置用户数据目录(持久化登录状态)
         if user_data_dir is None and profile_name:
             user_data_dir = str(Path.home() / ".browser_use" / "profiles" / profile_name)
             Path(user_data_dir).mkdir(parents=True, exist_ok=True)
-
-        # macOS 上显式指定 Chrome 路径
+            session_params["user_data_dir"] = user_data_dir
+        
+        # macOS 路径兼容
         import platform
-        if platform.system() == "Darwin":  # macOS
+        if platform.system() == "Darwin":
             chrome_path = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
             if Path(chrome_path).exists():
                 session_params["executable_path"] = chrome_path
 
-        # 只在有值时才添加 user_data_dir
-        if user_data_dir:
-            session_params["user_data_dir"] = user_data_dir
-
-    # 只在有值时才添加 browser_profile (适用于所有模式)
     if browser_profile:
         session_params["browser_profile"] = browser_profile
 
-    # 合并其他参数
     session_params.update(kwargs)
 
-    # 创建浏览器会话
+    # 创建会话
     _browser_session = BrowserSession(**session_params)
-
-    # 启动浏览器
     await _browser_session.start()
 
-    # 创建工具实例
     _browser_tools = Tools()
+    _file_system = FileSystem(base_dir=str(save_dir))
 
-    # 创建文件系统实例(用于浏览器会话产生的文件)
-    # 注意:这个目录仅用于浏览器操作相关的临时文件(下载、上传、截图等)
-    # 对于一般文件读写操作,请使用 agent.tools.builtin 中的文件工具
-    base_dir = Path.cwd() / ".browser_use_files"
-    base_dir.mkdir(parents=True, exist_ok=True)
-    _file_system = FileSystem(base_dir=str(base_dir))
-
-    print("✅ 浏览器会话初始化成功")
+    print(f"✅ 浏览器会话初始化成功 | 默认下载路径: {save_dir}")
 
-    # 如果是 local 或 cloud 模式且提供了 URL,导航到该 URL
     if browser_type in ["local", "cloud"] and url:
-        print(f"🔗 导航到: {url}")
         await _browser_tools.navigate(url=url, browser_session=_browser_session)
 
     return _browser_session, _browser_tools
@@ -713,33 +632,132 @@ async def browser_wait(seconds: int = 3) -> ToolResult:
 # 元素交互工具 (Element Interaction Tools)
 # ============================================================
 
-@tool()
-async def browser_click_element(index: int) -> ToolResult:
-    """
-    通过索引点击页面元素
-    Click an element by index
-
-    Args:
-        index: 元素索引(从浏览器状态中获取)
+# 定义一个专门捕获下载链接的 Handler
+class DownloadLinkCaptureHandler(logging.Handler):
+    def __init__(self):
+        super().__init__()
+        self.captured_url = None
+
+    def emit(self, record):
+        # 如果已经捕获到了(通常第一条是最完整的),就不再处理后续日志
+        if self.captured_url:
+            return
+
+        message = record.getMessage()
+        # 寻找包含下载信息的日志
+        if "redirection?filename=" in message or "Failed to download" in message:
+            # 使用更严格的正则,确保不抓取带省略号(...)的截断链接
+            # 排除掉末尾带有三个点的干扰
+            match = re.search(r"https?://[^\s]+(?!\.\.\.)", message)
+            if match:
+                url = match.group(0)
+                # 再次过滤:如果发现提取出的 URL 确实包含三个点,说明依然抓到了截断版,跳过
+                if "..." not in url:
+                    self.captured_url = url
+                    # print(f"🎯 成功锁定完整直链: {url[:50]}...") # 调试用
 
-    Returns:
-        ToolResult: 包含点击操作结果的工具返回对象
+@tool()
+async def browser_download_direct_url(url: str, save_name: str = "book.epub") -> ToolResult:
+    save_dir = Path.cwd() / ".cache/.browser_use_files"
+    save_dir.mkdir(parents=True, exist_ok=True)
+    
+    # 提取域名作为 Referer,这能骗过 90% 的防盗链校验
+    from urllib.parse import urlparse
+    parsed_url = urlparse(url)
+    base_url = f"{parsed_url.scheme}://{parsed_url.netloc}/"
+    
+    # 如果没传 save_name,自动从 URL 获取
+    if not save_name:
+        import unquote
+        # 尝试从 URL 路径获取文件名并解码(处理中文)
+        save_name = Path(urlparse(url).path).name or f"download_{int(time.time())}"
+        save_name = unquote(save_name) 
+
+    target_path = save_dir / save_name
+
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
+        "Accept": "*/*",
+        "Referer": base_url,  # 动态设置 Referer
+        "Range": "bytes=0-",  # 有时对大文件下载有奇效
+    }
 
-    Example:
-        click_element(index=5)
+    try:
+        print(f"🚀 开始下载: {url[:60]}...")
+        
+        # 使用 follow_redirects=True 处理链接中的 redirection
+        async with httpx.AsyncClient(headers=headers, follow_redirects=True, timeout=60.0) as client:
+            async with client.stream("GET", url) as response:
+                if response.status_code != 200:
+                    print(f"❌ 下载失败,HTTP 状态码: {response.status_code}")
+                    return
+                
+                # 获取实际文件名(如果服务器提供了)
+                # 这里会优先使用你指定的 save_name
+                
+                with open(target_path, "wb") as f:
+                    downloaded_bytes = 0
+                    async for chunk in response.aiter_bytes():
+                        f.write(chunk)
+                        downloaded_bytes += len(chunk)
+                        if downloaded_bytes % (1024 * 1024) == 0: # 每下载 1MB 打印一次
+                            print(f"📥 已下载: {downloaded_bytes // (1024 * 1024)} MB")
+
+        print(f"✅ 下载完成!文件已存至: {target_path}")
+        success_msg = f"✅ 下载完成!文件已存至: {target_path}"
+        return ToolResult(
+            title="直链下载成功",
+            output=success_msg,
+            long_term_memory=success_msg,
+            metadata={"path": str(target_path)}
+        )
 
-    Note:
-        需要先通过 get_selector_map 获取页面元素索引
+    except Exception as e:
+        # 异常捕获返回
+        return ToolResult(
+            title="下载异常",
+            output="",
+            error=f"💥 发生错误: {str(e)}",
+            long_term_memory=f"下载任务由于异常中断: {str(e)}"
+        )
+    
+@tool()
+async def browser_click_element(index: int) -> ToolResult:
     """
+    点击页面元素,并自动通过拦截内部日志获取下载直链。
+    """
+    # 1. 挂载日志窃听器
+    capture_handler = DownloadLinkCaptureHandler()
+    logger = logging.getLogger("browser_use") # 拦截整个 browser_use 命名空间
+    logger.addHandler(capture_handler)
+    
     try:
         browser, tools = await get_browser_session()
 
+        # 2. 执行原生的点击动作
         result = await tools.click(
             index=index,
             browser_session=browser
         )
 
-        return action_result_to_tool_result(result, f"点击元素 {index}")
+        # 3. 检查是否有“意外收获”
+        download_msg = ""
+        if capture_handler.captured_url:
+            captured_url = capture_handler.captured_url
+            download_msg = f"\n\n⚠️ 系统检测到浏览器下载被拦截,已自动捕获准确直链:\n{captured_url}\n\n建议:你可以直接使用 browser_download_direct_url 工具下载此链接。"
+            
+            # 如果你想更激进一点,甚至可以在这里直接自动触发本地下载逻辑
+            # await auto_download_file(captured_url)
+
+        # 4. 转换结果并附加捕获的信息
+        tool_result = action_result_to_tool_result(result, f"点击元素 {index}")
+        
+        if download_msg:
+            # 关键:把日志里的信息塞进 output,这样 LLM 就能看到了!
+            tool_result.output = (tool_result.output or "") + download_msg
+            tool_result.long_term_memory = (tool_result.long_term_memory or "") + f" 捕获下载链接: {captured_url}"
+
+        return tool_result
 
     except Exception as e:
         return ToolResult(
@@ -748,6 +766,9 @@ async def browser_click_element(index: int) -> ToolResult:
             error=f"Failed to click element {index}: {str(e)}",
             long_term_memory=f"点击元素 {index} 失败"
         )
+    finally:
+        # 5. 务必移除监听器,防止内存泄漏和日志污染
+        logger.removeHandler(capture_handler)
 
 
 @tool()
@@ -868,20 +889,20 @@ async def browser_upload_file(index: int, path: str) -> ToolResult:
             long_term_memory=f"上传文件 {path} 失败"
         )
 
-
 # ============================================================
 # 滚动和视图工具 (Scroll & View Tools)
 # ============================================================
 @tool()
 async def browser_scroll_page(down: bool = True, pages: float = 1.0, index: Optional[int] = None) -> ToolResult:
     try:
+        # 限制单次滚动幅度,避免 agent 一次滚 100 页
+        MAX_PAGES = 10
+        if pages > MAX_PAGES:
+            pages = MAX_PAGES
+
         browser, tools = await get_browser_session()
-        
-        # --- 核心修复 1: 必须先 await 拿到 session 实例 ---
         cdp_session = await browser.get_or_create_cdp_session()
-        
-        # 这里的执行方式建议参考你已有的 cdp 调用逻辑
-        # 如果 cdp_session 没有直接封装 .eval(),使用 Runtime.evaluate
+
         before_y_result = await cdp_session.cdp_client.send.Runtime.evaluate(
             params={'expression': 'window.scrollY'},
             session_id=cdp_session.session_id
@@ -890,25 +911,36 @@ async def browser_scroll_page(down: bool = True, pages: float = 1.0, index: Opti
 
         # 执行滚动
         result = await tools.scroll(down=down, pages=pages, index=index, browser_session=browser)
-        
-        # 等待渲染并检查偏移
-        await asyncio.sleep(1)
-        
+
+        # 等待渲染(懒加载页面需要更长时间)
+        await asyncio.sleep(2)
+
         after_y_result = await cdp_session.cdp_client.send.Runtime.evaluate(
             params={'expression': 'window.scrollY'},
             session_id=cdp_session.session_id
         )
         after_y = after_y_result.get('result', {}).get('value', 0)
 
-        # 3. 验证是否真的动了
+        # 如果第一次检测没动,再等一轮(应对懒加载触发后的延迟滚动)
         if before_y == after_y and index is None:
+            await asyncio.sleep(2)
+            retry_result = await cdp_session.cdp_client.send.Runtime.evaluate(
+                params={'expression': 'window.scrollY'},
+                session_id=cdp_session.session_id
+            )
+            after_y = retry_result.get('result', {}).get('value', 0)
+
+        if before_y == after_y and index is None:
+            direction = "下" if down else "上"
             return ToolResult(
-                title="滚动无效", 
-                output="页面已到达边界或滚动被拦截", 
+                title="滚动无效",
+                output=f"页面已到达{direction}边界,无法继续滚动",
                 error="No movement detected"
             )
 
-        return action_result_to_tool_result(result, f"已滚动")
+        delta = abs(after_y - before_y)
+        direction = "下" if down else "上"
+        return action_result_to_tool_result(result, f"已向{direction}滚动 {delta}px")
 
     except Exception as e:
         # --- 核心修复 2: 必须补全 output 参数,否则框架会报错 ---
@@ -919,6 +951,7 @@ async def browser_scroll_page(down: bool = True, pages: float = 1.0, index: Opti
         )
 
 
+
 @tool()
 async def browser_find_text(text: str) -> ToolResult:
     """
@@ -954,7 +987,103 @@ async def browser_find_text(text: str) -> ToolResult:
             long_term_memory=f"查找文本 '{text}' 失败"
         )
 
+@tool()
+async def browser_get_visual_selector_map() -> ToolResult:
+    """
+    获取当前页面的视觉快照和交互元素索引映射。
+    Get visual snapshot and selector map of interactive elements.
 
+    该工具会同时执行两个操作:
+    1. 捕捉当前页面的截图,并用 browser-use 内置方法在截图上标注元素索引号。
+    2. 生成页面所有可交互元素的索引字典(含 href、type 等属性信息)。
+
+    Returns:
+        ToolResult: 包含高亮截图(在 images 中)和元素列表的工具返回对象。
+    """
+    try:
+        browser, _ = await get_browser_session()
+
+        # 1. 构造同时包含 DOM 和 截图 的请求
+        from browser_use.browser.events import BrowserStateRequestEvent
+        from browser_use.browser.python_highlights import create_highlighted_screenshot_async
+
+        event = browser.event_bus.dispatch(
+            BrowserStateRequestEvent(
+                include_dom=True,
+                include_screenshot=True,
+                include_recent_events=False
+            )
+        )
+
+        # 2. 等待浏览器返回完整状态
+        browser_state = await event.event_result(raise_if_none=True, raise_if_any=True)
+
+        # 3. 提取 Selector Map
+        selector_map = browser_state.dom_state.selector_map if browser_state.dom_state else {}
+
+        # 4. 提取截图并生成带索引标注的高亮截图(通过 CDP 获取精确 DPI 和滚动偏移)
+        screenshot_b64 = browser_state.screenshot or ""
+        highlighted_b64 = ""
+        if screenshot_b64 and selector_map:
+            try:
+                cdp_session = await browser.get_or_create_cdp_session()
+                highlighted_b64 = await create_highlighted_screenshot_async(
+                    screenshot_b64, selector_map,
+                    cdp_session=cdp_session,
+                    filter_highlight_ids=False
+                )
+            except Exception:
+                highlighted_b64 = screenshot_b64  # fallback to raw screenshot
+        else:
+            highlighted_b64 = screenshot_b64
+
+        # 5. 构建供 Agent 阅读的完整元素列表,包含丰富的属性信息
+        elements_info = []
+        for index, node in selector_map.items():
+            tag = node.tag_name
+            attrs = node.attributes or {}
+            desc = attrs.get('aria-label') or attrs.get('placeholder') or attrs.get('title') or node.get_all_children_text(max_depth=1) or ""
+            # 收集有用的属性片段
+            extra_parts = []
+            if attrs.get('href'):
+                extra_parts.append(f"href={attrs['href'][:60]}")
+            if attrs.get('type'):
+                extra_parts.append(f"type={attrs['type']}")
+            if attrs.get('role'):
+                extra_parts.append(f"role={attrs['role']}")
+            if attrs.get('name'):
+                extra_parts.append(f"name={attrs['name']}")
+            extra = f" ({', '.join(extra_parts)})" if extra_parts else ""
+            elements_info.append(f"Index {index}: <{tag}> \"{desc[:50]}\"{extra}")
+
+        output = f"页面截图已捕获(含元素索引标注)\n找到 {len(selector_map)} 个交互元素\n\n"
+        output += "元素列表:\n" + "\n".join(elements_info)
+
+        # 6. 将高亮截图存入 images 字段,metadata 保留结构化数据
+        images = []
+        if highlighted_b64:
+            images.append({"type": "base64", "media_type": "image/png", "data": highlighted_b64})
+
+        return ToolResult(
+            title="视觉元素观察",
+            output=output,
+            long_term_memory=f"在页面观察到 {len(selector_map)} 个元素并保存了截图",
+            images=images,
+            metadata={
+                "selector_map": {k: str(v) for k, v in list(selector_map.items())[:100]},
+                "url": browser_state.url,
+                "title": browser_state.title
+            }
+        )
+
+    except Exception as e:
+        return ToolResult(
+            title="视觉观察失败",
+            output="",
+            error=f"Failed to get visual selector map: {str(e)}",
+            long_term_memory="获取视觉元素映射失败"
+        )
+    
 @tool()
 async def browser_screenshot() -> ToolResult:
     """
@@ -1325,7 +1454,7 @@ async def _detect_and_download_pdf_via_cdp(browser) -> Optional[str]:
         pdf_bytes = base64.b64decode(base64_data)
 
         # 保存到本地
-        save_dir = Path.cwd() / ".browser_use_files"
+        save_dir = Path.cwd() / ".cache/.browser_use_files"
         save_dir.mkdir(parents=True, exist_ok=True)
 
         filename = Path(parsed.path).name if parsed.path else ""
@@ -1347,9 +1476,9 @@ async def _detect_and_download_pdf_via_cdp(browser) -> Optional[str]:
 
 @tool()
 async def browser_read_long_content(
-    goal: Any,
+    goal: Union[str, dict],
     source: str = "page",
-    context: Any = "",
+    context: str = "",
     **kwargs
 ) -> ToolResult:
     """
@@ -1787,6 +1916,112 @@ async def browser_done(text: str, success: bool = True,
         )
 
 
+# ============================================================
+# Cookie 持久化工具
+# ============================================================
+
+_COOKIES_DIR = Path(__file__).parent.parent.parent.parent.parent / ".cache/.cookies"
+
+@tool()
+async def browser_export_cookies(name: str = "", account: str = "") -> ToolResult:
+    """
+    导出当前浏览器的所有 Cookie 到本地 .cookies/ 目录。
+    文件命名格式:{域名}_{账号名}.json,如 bilibili.com_zhangsan.json
+    登录成功后调用此工具,下次可通过 browser_load_cookies 恢复登录态。
+
+    Args:
+        name: 自定义文件名(可选,提供则忽略自动命名)
+        account: 账号名称(可选,用于区分同一网站的不同账号)
+    """
+    try:
+        browser, _ = await get_browser_session()
+
+        # 获取所有 Cookie(CDP 格式)
+        all_cookies = await browser._cdp_get_cookies()
+        if not all_cookies:
+            return ToolResult(title="Cookie 导出", output="当前浏览器没有 Cookie", long_term_memory="无 Cookie 可导出")
+
+        # 获取当前域名,用于过滤和命名
+        from urllib.parse import urlparse
+        current_url = await browser.get_current_page_url() or ''
+        domain = urlparse(current_url).netloc.replace("www.", "") or "default"
+
+        if not name:
+            name = f"{domain}_{account}" if account else domain
+
+        # 只保留当前域名的 cookie(过滤第三方)
+        cookies = [c for c in all_cookies if domain in c.get("domain", "").lstrip(".")]
+
+        # 保存
+        _COOKIES_DIR.mkdir(parents=True, exist_ok=True)
+        cookie_file = _COOKIES_DIR / f"{name}.json"
+        cookie_file.write_text(json.dumps(cookies, ensure_ascii=False, indent=2), encoding="utf-8")
+
+        return ToolResult(
+            title="Cookie 已导出",
+            output=f"已保存 {len(cookies)} 条 Cookie 到 .cookies/{name}.json(从 {len(all_cookies)} 条中过滤当前域名)",
+            long_term_memory=f"导出 {len(cookies)} 条 Cookie 到 .cookies/{name}.json"
+        )
+    except Exception as e:
+        return ToolResult(title="Cookie 导出失败", output="", error=str(e), long_term_memory="导出 Cookie 失败")
+
+
+@tool()
+async def browser_load_cookies(url: str, name: str = "") -> ToolResult:
+    """
+    根据目标 URL 自动查找本地 Cookie 文件,注入浏览器并导航到目标页面恢复登录态。
+    重要:此工具会自动完成导航,调用前不需要先调用 browser_navigate_to_url。
+
+    Args:
+        url: 目标 URL(必须提供,同时用于自动匹配 Cookie 文件)
+        name: Cookie 文件名(可选,不传则根据 URL 域名自动查找)
+    """
+    try:
+        browser, tools = await get_browser_session()
+
+        if not url.startswith("http"):
+            url = f"https://{url}"
+
+        # 根据域名自动查找 Cookie 文件
+        if not name:
+            from urllib.parse import urlparse
+            domain = urlparse(url).netloc.replace("www.", "")
+            if _COOKIES_DIR.exists():
+                matches = list(_COOKIES_DIR.glob(f"{domain}*.json"))
+                if matches:
+                    cookie_file = matches[0]  # 取第一个匹配的
+                else:
+                    available = [f.stem for f in _COOKIES_DIR.glob("*.json")]
+                    return ToolResult(title="未找到 Cookie", output=f"没有匹配 {domain} 的文件,可用: {available}", error=f"无 {domain} 的 Cookie 文件")
+            else:
+                return ToolResult(title="未找到 Cookie", output=".cookies 目录不存在", error="Cookie 目录不存在")
+        else:
+            cookie_file = _COOKIES_DIR / f"{name}.json"
+            if not cookie_file.exists():
+                available = [f.stem for f in _COOKIES_DIR.glob("*.json")] if _COOKIES_DIR.exists() else []
+                return ToolResult(title="文件不存在", output=f"可用: {available}", error=f"未找到 .cookies/{name}.json")
+
+        cookies = json.loads(cookie_file.read_text(encoding="utf-8"))
+
+        # 直接注入(export 和 load 使用相同的 CDP 格式,无需标准化)
+        await browser._cdp_set_cookies(cookies)
+
+        # 导航到目标页面(带上刚注入的 Cookie)
+        if url:
+            if not url.startswith("http"):
+                url = f"https://{url}"
+            await tools.navigate(url=url, browser_session=browser)
+            await tools.wait(seconds=3, browser_session=browser)
+
+        return ToolResult(
+            title="Cookie 注入并导航完成",
+            output=f"从 {cookie_file.name} 注入 {len(cookies)} 条 Cookie,已导航到 {url}",
+            long_term_memory=f"已从 {cookie_file.name} 注入 Cookie 并导航到 {url},登录态已恢复"
+        )
+    except Exception as e:
+        return ToolResult(title="Cookie 加载失败", output="", error=str(e), long_term_memory="加载 Cookie 失败")
+
+
 # ============================================================
 # 导出所有工具函数(供外部使用)
 # ============================================================
@@ -1827,7 +2062,9 @@ __all__ = [
     'browser_extract_content',
     'browser_get_page_html',
     'browser_read_long_content',
+    'browser_download_direct_url',
     'browser_get_selector_map',
+    'browser_get_visual_selector_map',
 
     # JavaScript 执行工具
     'browser_evaluate',
@@ -1838,4 +2075,8 @@ __all__ = [
 
     # 任务完成
     'browser_done',
+
+    # Cookie 持久化
+    'browser_export_cookies',
+    'browser_load_cookies',
 ]

+ 2 - 1
agent/tools/builtin/feishu/chat.py

@@ -6,6 +6,7 @@ import asyncio
 from typing import Optional, List, Dict, Any, Union
 from .feishu_client import FeishuClient, FeishuDomain
 from agent.tools import tool, ToolResult, ToolContext
+from agent.trace.models import MessageContent
 
 # 从环境变量获取飞书配置
 # 也可以在此设置硬编码的默认值,但推荐使用环境变量
@@ -176,7 +177,7 @@ async def feishu_get_contact_list(context: Optional[ToolContext] = None) -> Tool
 )
 async def feishu_send_message_to_contact(
     contact_name: str,
-    content: Any,
+    content: MessageContent,
     context: Optional[ToolContext] = None
 ) -> ToolResult:
     """

+ 437 - 470
agent/tools/builtin/subagent.py

@@ -1,15 +1,16 @@
 """
-Sub-Agent 工具 - 统一 explore/delegate/evaluate
+Sub-Agent 工具 - agent / evaluate
 
-作为普通工具运行:创建(或继承)子 Trace,执行并返回结构化结果。
+agent: 创建 Agent 执行任务(单任务 delegate 或多任务并行 explore)
+evaluate: 评估目标执行结果是否满足要求
 """
 
 import asyncio
 from datetime import datetime
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Union
 
 from agent.tools import tool
-from agent.trace.models import Trace
+from agent.trace.models import Trace, Messages
 from agent.trace.trace_id import generate_sub_trace_id
 from agent.trace.goal_models import GoalTree
 from agent.trace.websocket import broadcast_sub_trace_started, broadcast_sub_trace_completed
@@ -21,69 +22,6 @@ def _make_run_config(**kwargs):
     return RunConfig(**kwargs)
 
 
-def _build_explore_prompt(branches: List[str], background: Optional[str]) -> str:
-    lines = ["# 探索任务", ""]
-    if background:
-        lines.extend([background, ""])
-    lines.append("请探索以下方案:")
-    for i, branch in enumerate(branches, 1):
-        lines.append(f"{i}. {branch}")
-    return "\n".join(lines)
-
-
-async def _build_evaluate_prompt(
-    store,
-    trace_id: str,
-    target_goal_id: str,
-    evaluation_input: Dict[str, Any],
-    requirements: Optional[str],
-) -> str:
-    goal_tree = await store.get_goal_tree(trace_id)
-    target_desc = ""
-    if goal_tree:
-        target_goal = goal_tree.find(target_goal_id)
-        if target_goal:
-            target_desc = target_goal.description
-
-    goal_description = evaluation_input.get("goal_description") or target_desc or f"Goal {target_goal_id}"
-    actual_result = evaluation_input.get("actual_result", "(无执行结果)")
-
-    lines = [
-        "# 评估任务",
-        "",
-        "请评估以下任务的执行结果是否满足要求。",
-        "",
-        "## 目标描述",
-        "",
-        str(goal_description),
-        "",
-        "## 执行结果",
-        "",
-        str(actual_result),
-        "",
-    ]
-
-    if requirements:
-        lines.extend(["## 评估要求", "", requirements, ""])
-
-    lines.extend(
-        [
-            "## 输出格式",
-            "",
-            "## 评估结论",
-            "[通过/不通过]",
-            "",
-            "## 评估理由",
-            "[详细说明通过或不通过原因]",
-            "",
-            "## 修改建议(如果不通过)",
-            "1. [建议1]",
-            "2. [建议2]",
-        ]
-    )
-    return "\n".join(lines)
-
-
 # ===== 辅助函数 =====
 
 async def _update_collaborator(
@@ -125,6 +63,7 @@ async def _update_collaborator(
     trace.context["collaborators"] = collaborators
     await store.update_trace(trace_id, context=trace.context)
 
+
 async def _update_goal_start(
     store, trace_id: str, goal_id: str, mode: str, sub_trace_ids: List[str]
 ) -> None:
@@ -155,20 +94,76 @@ async def _update_goal_complete(
     )
 
 
-def _format_explore_results(
-    branches: List[str], results: List[Dict[str, Any]]
-) -> str:
-    """格式化 explore 模式的汇总结果(Markdown)"""
-    lines = ["## 探索结果\n"]
+def _aggregate_stats(results: List[Dict[str, Any]]) -> Dict[str, Any]:
+    """聚合多个结果的统计信息"""
+    total_messages = 0
+    total_tokens = 0
+    total_cost = 0.0
+
+    for result in results:
+        if isinstance(result, dict) and "stats" in result:
+            stats = result["stats"]
+            total_messages += stats.get("total_messages", 0)
+            total_tokens += stats.get("total_tokens", 0)
+            total_cost += stats.get("total_cost", 0.0)
+
+    return {
+        "total_messages": total_messages,
+        "total_tokens": total_tokens,
+        "total_cost": total_cost
+    }
+
+
+def _get_allowed_tools(single: bool, context: dict) -> Optional[List[str]]:
+    """获取允许工具列表。single=True: 全部(去掉 agent/evaluate); single=False: 只读"""
+    if not single:
+        return ["read_file", "grep_content", "glob_files", "goal"]
+    # single (delegate): 获取所有工具,排除 agent 和 evaluate
+    runner = context.get("runner")
+    if runner and hasattr(runner, "tools") and hasattr(runner.tools, "registry"):
+        all_tools = list(runner.tools.registry.keys())
+        return [t for t in all_tools if t not in ("agent", "evaluate")]
+    return None
+
+
+def _format_single_result(result: Dict[str, Any], sub_trace_id: str, continued: bool) -> Dict[str, Any]:
+    """格式化单任务(delegate)结果"""
+    lines = ["## 委托任务完成\n"]
+    summary = result.get("summary", "")
+    if summary:
+        lines.append(summary)
+        lines.append("")
+    lines.append("---\n")
+    lines.append("**执行统计**:")
+    stats = result.get("stats", {})
+    if stats:
+        lines.append(f"- 消息数: {stats.get('total_messages', 0)}")
+        lines.append(f"- Tokens: {stats.get('total_tokens', 0)}")
+        lines.append(f"- 成本: ${stats.get('total_cost', 0.0):.4f}")
+    formatted_summary = "\n".join(lines)
 
+    return {
+        "mode": "delegate",
+        "sub_trace_id": sub_trace_id,
+        "continue_from": continued,
+        **result,
+        "summary": formatted_summary,
+    }
+
+
+def _format_multi_result(
+    tasks: List[str], results: List[Dict[str, Any]], sub_trace_ids: List[Dict]
+) -> Dict[str, Any]:
+    """格式化多任务(explore)聚合结果"""
+    lines = ["## 探索结果\n"]
     successful = 0
     failed = 0
     total_tokens = 0
     total_cost = 0.0
 
-    for i, (branch, result) in enumerate(zip(branches, results)):
-        branch_name = chr(ord('A') + i)  # A, B, C...
-        lines.append(f"### 方案 {branch_name}: {branch}")
+    for i, (task_item, result) in enumerate(zip(tasks, results)):
+        branch_name = chr(ord('A') + i)
+        lines.append(f"### 方案 {branch_name}: {task_item}")
 
         if isinstance(result, dict):
             status = result.get("status", "unknown")
@@ -181,7 +176,7 @@ def _format_explore_results(
 
             summary = result.get("summary", "")
             if summary:
-                lines.append(f"**摘要**: {summary[:200]}...")  # 限制长度
+                lines.append(f"**摘要**: {summary[:200]}...")
 
             stats = result.get("stats", {})
             if stats:
@@ -199,545 +194,517 @@ def _format_explore_results(
 
     lines.append("---\n")
     lines.append("## 总结")
-    lines.append(f"- 总分支数: {len(branches)}")
+    lines.append(f"- 总分支数: {len(tasks)}")
     lines.append(f"- 成功: {successful}")
     lines.append(f"- 失败: {failed}")
     lines.append(f"- 总 tokens: {total_tokens}")
     lines.append(f"- 总成本: ${total_cost:.4f}")
 
-    return "\n".join(lines)
-
-
-def _format_delegate_result(result: Dict[str, Any]) -> str:
-    """格式化 delegate 模式的详细结果"""
-    lines = ["## 委托任务完成\n"]
-
-    summary = result.get("summary", "")
-    if summary:
-        lines.append(summary)
-        lines.append("")
-
-    lines.append("---\n")
-    lines.append("**执行统计**:")
-
-    stats = result.get("stats", {})
-    if stats:
-        lines.append(f"- 消息数: {stats.get('total_messages', 0)}")
-        lines.append(f"- Tokens: {stats.get('total_tokens', 0)}")
-        lines.append(f"- 成本: ${stats.get('total_cost', 0.0):.4f}")
-
-    return "\n".join(lines)
-
+    aggregated_summary = "\n".join(lines)
+    overall_status = "completed" if successful > 0 else "failed"
 
-def _format_evaluate_result(result: Dict[str, Any]) -> str:
-    """格式化 evaluate 模式的评估结果"""
-    summary = result.get("summary", "")
-    return summary  # evaluate 的 summary 已经是格式化的评估结果
+    return {
+        "mode": "explore",
+        "status": overall_status,
+        "summary": aggregated_summary,
+        "sub_trace_ids": sub_trace_ids,
+        "tasks": tasks,
+        "stats": _aggregate_stats(results),
+    }
 
 
-def _get_allowed_tools_for_mode(mode: str, context: dict) -> Optional[List[str]]:
-    """获取模式对应的允许工具列表"""
-    if mode == "explore":
-        return ["read_file", "grep_content", "glob_files", "goal"]
-    elif mode in ["delegate", "evaluate"]:
-        # 获取所有工具,排除 subagent
-        runner = context.get("runner")
-        if runner and hasattr(runner, "tools") and hasattr(runner.tools, "registry"):
-            all_tools = list(runner.tools.registry.keys())
-            return [t for t in all_tools if t != "subagent"]
-    return None  # 使用默认(所有工具)
+async def _get_goal_description(store, trace_id: str, goal_id: str) -> str:
+    """从 GoalTree 获取目标描述"""
+    if not goal_id:
+        return ""
+    goal_tree = await store.get_goal_tree(trace_id)
+    if goal_tree:
+        target_goal = goal_tree.find(goal_id)
+        if target_goal:
+            return target_goal.description
+    return f"Goal {goal_id}"
 
 
-def _aggregate_stats(results: List[Dict[str, Any]]) -> Dict[str, Any]:
-    """聚合多个结果的统计信息"""
-    total_messages = 0
-    total_tokens = 0
-    total_cost = 0.0
+def _build_evaluate_prompt(goal_description: str, messages: Optional[Messages]) -> str:
+    """
+    构建评估 prompt。
 
-    for result in results:
-        if isinstance(result, dict) and "stats" in result:
-            stats = result["stats"]
-            total_messages += stats.get("total_messages", 0)
-            total_tokens += stats.get("total_tokens", 0)
-            total_cost += stats.get("total_cost", 0.0)
+    Args:
+        goal_description: 代码从 GoalTree 注入的目标描述
+        messages: 模型提供的消息(执行结果+上下文)
+    """
+    # 从 messages 提取文本内容
+    result_text = ""
+    if messages:
+        parts = []
+        for msg in messages:
+            content = msg.get("content", "")
+            if isinstance(content, str):
+                parts.append(content)
+            elif isinstance(content, list):
+                # 多模态内容,提取文本部分
+                for item in content:
+                    if isinstance(item, dict) and item.get("type") == "text":
+                        parts.append(item.get("text", ""))
+        result_text = "\n".join(parts)
 
-    return {
-        "total_messages": total_messages,
-        "total_tokens": total_tokens,
-        "total_cost": total_cost
-    }
+    lines = [
+        "# 评估任务",
+        "",
+        "请评估以下任务的执行结果是否满足要求。",
+        "",
+        "## 目标描述",
+        "",
+        goal_description,
+        "",
+        "## 执行结果",
+        "",
+        result_text or "(无执行结果)",
+        "",
+        "## 输出格式",
+        "",
+        "## 评估结论",
+        "[通过/不通过]",
+        "",
+        "## 评估理由",
+        "[详细说明通过或不通过原因]",
+        "",
+        "## 修改建议(如果不通过)",
+        "1. [建议1]",
+        "2. [建议2]",
+    ]
+    return "\n".join(lines)
 
 
-# ===== 模式处理函数 =====
+# ===== 统一内部执行函数 =====
 
-async def _handle_explore_mode(
-    branches: List[str],
-    background: Optional[str],
+async def _run_agents(
+    tasks: List[str],
+    per_agent_msgs: List[Messages],
     continue_from: Optional[str],
-    store, current_trace_id: str, current_goal_id: str, runner
+    store, trace_id: str, goal_id: str, runner, context: dict,
 ) -> Dict[str, Any]:
-    """Explore 模式:并行探索多个方案"""
+    """
+    统一 agent 执行逻辑。
 
-    # 1. 检查 continue_from(不支持)
-    if continue_from:
-        return {
-            "status": "failed",
-            "error": "explore mode does not support continue_from parameter"
-        }
+    single (len(tasks)==1): delegate 模式,全量工具(排除 agent/evaluate)
+    multi (len(tasks)>1): explore 模式,只读工具,并行执行
+    """
+    single = len(tasks) == 1
+    parent_trace = await store.get_trace(trace_id)
+
+    # continue_from: 复用已有 trace(仅 single)
+    sub_trace_id = None
+    continued = False
+    if single and continue_from:
+        existing = await store.get_trace(continue_from)
+        if not existing:
+            return {"status": "failed", "error": f"Continue-from trace not found: {continue_from}"}
+        sub_trace_id = continue_from
+        continued = True
+        goal_tree = await store.get_goal_tree(continue_from)
+        mission = goal_tree.mission if goal_tree else tasks[0]
+        sub_trace_ids = [{"trace_id": sub_trace_id, "mission": mission}]
+    else:
+        sub_trace_ids = []
 
-    # 2. 获取父 Trace 信息(用于继承 uid、model)
-    parent_trace = await store.get_trace(current_trace_id)
+    # 创建 sub-traces 和执行协程
+    coros = []
+    all_sub_trace_ids = list(sub_trace_ids)  # copy for continue_from case
 
-    # 3. 创建所有 Sub-Traces
-    sub_trace_ids = []
-    tasks = []
+    for i, (task_item, msgs) in enumerate(zip(tasks, per_agent_msgs)):
+        if single and continued:
+            # continue_from 已经设置了 sub_trace_id
+            pass
+        else:
+            agent_type = "delegate" if single else "explore"
+            suffix = "delegate" if single else f"explore-{i+1:03d}"
+            stid = generate_sub_trace_id(trace_id, suffix)
+
+            sub_trace = Trace(
+                trace_id=stid,
+                mode="agent",
+                task=task_item,
+                parent_trace_id=trace_id,
+                parent_goal_id=goal_id,
+                agent_type=agent_type,
+                uid=parent_trace.uid if parent_trace else None,
+                model=parent_trace.model if parent_trace else None,
+                status="running",
+                context={"created_by_tool": "agent"},
+                created_at=datetime.now(),
+            )
+            await store.create_trace(sub_trace)
+            await store.update_goal_tree(stid, GoalTree(mission=task_item))
 
-    for i, branch in enumerate(branches):
-        # 生成唯一的 sub_trace_id
-        sub_trace_id = generate_sub_trace_id(current_trace_id, f"explore-{i+1:03d}")
-        sub_trace_ids.append({
-            "trace_id": sub_trace_id,
-            "mission": branch
-        })
+            all_sub_trace_ids.append({"trace_id": stid, "mission": task_item})
 
-        # 创建 Sub-Trace
-        sub_trace = Trace(
-            trace_id=sub_trace_id,
-            mode="agent",
-            task=branch,
-            parent_trace_id=current_trace_id,
-            parent_goal_id=current_goal_id,
-            agent_type="explore",
-            uid=parent_trace.uid if parent_trace else None,
-            model=parent_trace.model if parent_trace else None,
-            status="running",
-            context={"subagent_mode": "explore", "created_by_tool": "subagent"},
-            created_at=datetime.now(),
-        )
-        await store.create_trace(sub_trace)
-        await store.update_goal_tree(sub_trace_id, GoalTree(mission=branch))
+            # 广播 sub_trace_started
+            await broadcast_sub_trace_started(
+                trace_id, stid, goal_id or "",
+                agent_type, task_item,
+            )
 
-        # 广播 sub_trace_started
-        await broadcast_sub_trace_started(
-            current_trace_id, sub_trace_id, current_goal_id or "",
-            "explore", branch
-        )
+            if single:
+                sub_trace_id = stid
 
         # 注册为活跃协作者
+        cur_stid = sub_trace_id if single else all_sub_trace_ids[-1]["trace_id"]
+        collab_name = task_item[:30] if single and not continued else (
+            f"delegate-{cur_stid[:8]}" if single else f"explore-{i+1}"
+        )
         await _update_collaborator(
-            store, current_trace_id,
-            name=f"explore-{i+1}", sub_trace_id=sub_trace_id,
-            status="running", summary=branch[:80],
+            store, trace_id,
+            name=collab_name, sub_trace_id=cur_stid,
+            status="running", summary=task_item[:80],
         )
 
-        # 创建执行任务
-        task_coro = runner.run_result(
-            messages=[{"role": "user", "content": branch}],
+        # 构建消息
+        agent_msgs = list(msgs) + [{"role": "user", "content": task_item}]
+        allowed_tools = _get_allowed_tools(single, context)
+
+        coro = runner.run_result(
+            messages=agent_msgs,
             config=_make_run_config(
-                trace_id=sub_trace_id,
-                agent_type="explore",
+                trace_id=cur_stid,
+                agent_type="delegate" if single else "explore",
                 model=parent_trace.model if parent_trace else "gpt-4o",
                 uid=parent_trace.uid if parent_trace else None,
-                tools=["read_file", "grep_content", "glob_files", "goal"],
-                name=branch,
+                tools=allowed_tools,
+                name=task_item[:50],
             ),
         )
-        tasks.append(task_coro)
-
-    # 4. 更新主 Goal 为 in_progress
-    await _update_goal_start(store, current_trace_id, current_goal_id, "explore", sub_trace_ids)
+        coros.append((i, cur_stid, collab_name, coro))
 
-    # 5. 并行执行所有分支
-    results = await asyncio.gather(*tasks, return_exceptions=True)
+    # 更新主 Goal 为 in_progress
+    await _update_goal_start(
+        store, trace_id, goal_id,
+        "delegate" if single else "explore",
+        all_sub_trace_ids,
+    )
 
-    # 6. 处理结果并广播完成事件
-    processed_results = []
+    # 执行
+    if single:
+        # 单任务直接执行(带异常处理)
+        _, stid, collab_name, coro = coros[0]
+        try:
+            result = await coro
 
-    for i, result in enumerate(results):
-        sub_tid = sub_trace_ids[i]["trace_id"]
-        if isinstance(result, Exception):
-            # 异常处理
-            error_result = {
-                "status": "failed",
-                "summary": f"执行出错: {str(result)}",
-                "stats": {"total_messages": 0, "total_tokens": 0, "total_cost": 0.0}
-            }
-            processed_results.append(error_result)
-            await broadcast_sub_trace_completed(
-                current_trace_id, sub_tid, "failed", str(result), {}
-            )
-            await _update_collaborator(
-                store, current_trace_id,
-                name=f"explore-{i+1}", sub_trace_id=sub_tid,
-                status="failed", summary=str(result)[:80],
-            )
-        else:
-            processed_results.append(result)
             await broadcast_sub_trace_completed(
-                current_trace_id, sub_tid,
+                trace_id, stid,
                 result.get("status", "completed"),
                 result.get("summary", ""),
-                result.get("stats", {})
+                result.get("stats", {}),
             )
             await _update_collaborator(
-                store, current_trace_id,
-                name=f"explore-{i+1}", sub_trace_id=sub_tid,
+                store, trace_id,
+                name=collab_name, sub_trace_id=stid,
                 status=result.get("status", "completed"),
                 summary=result.get("summary", "")[:80],
             )
 
-    # 7. 格式化汇总结果
-    aggregated_summary = _format_explore_results(branches, processed_results)
+            formatted = _format_single_result(result, stid, continued)
 
-    # 8. 更新主 Goal 为 completed
-    overall_status = "completed" if any(
-        r.get("status") == "completed" for r in processed_results if isinstance(r, dict)
-    ) else "failed"
+            await _update_goal_complete(
+                store, trace_id, goal_id,
+                result.get("status", "completed"),
+                formatted["summary"],
+                all_sub_trace_ids,
+            )
+            return formatted
 
-    await _update_goal_complete(
-        store, current_trace_id, current_goal_id,
-        overall_status, aggregated_summary, sub_trace_ids
-    )
+        except Exception as e:
+            error_msg = str(e)
+            await broadcast_sub_trace_completed(
+                trace_id, stid, "failed", error_msg, {},
+            )
+            await _update_collaborator(
+                store, trace_id,
+                name=collab_name, sub_trace_id=stid,
+                status="failed", summary=error_msg[:80],
+            )
+            await _update_goal_complete(
+                store, trace_id, goal_id,
+                "failed", f"委托任务失败: {error_msg}",
+                all_sub_trace_ids,
+            )
+            return {
+                "mode": "delegate",
+                "status": "failed",
+                "error": error_msg,
+                "sub_trace_id": stid,
+            }
+    else:
+        # 多任务并行执行
+        raw_results = await asyncio.gather(
+            *(coro for _, _, _, coro in coros),
+            return_exceptions=True,
+        )
 
-    # 9. 返回结果
-    return {
-        "mode": "explore",
-        "status": overall_status,
-        "summary": aggregated_summary,
-        "sub_trace_ids": sub_trace_ids,
-        "branches": branches,
-        "stats": _aggregate_stats(processed_results)
-    }
+        processed_results = []
+        for idx, raw in enumerate(raw_results):
+            _, stid, collab_name, _ = coros[idx]
+            if isinstance(raw, Exception):
+                error_result = {
+                    "status": "failed",
+                    "summary": f"执行出错: {str(raw)}",
+                    "stats": {"total_messages": 0, "total_tokens": 0, "total_cost": 0.0},
+                }
+                processed_results.append(error_result)
+                await broadcast_sub_trace_completed(
+                    trace_id, stid, "failed", str(raw), {},
+                )
+                await _update_collaborator(
+                    store, trace_id,
+                    name=collab_name, sub_trace_id=stid,
+                    status="failed", summary=str(raw)[:80],
+                )
+            else:
+                processed_results.append(raw)
+                await broadcast_sub_trace_completed(
+                    trace_id, stid,
+                    raw.get("status", "completed"),
+                    raw.get("summary", ""),
+                    raw.get("stats", {}),
+                )
+                await _update_collaborator(
+                    store, trace_id,
+                    name=collab_name, sub_trace_id=stid,
+                    status=raw.get("status", "completed"),
+                    summary=raw.get("summary", "")[:80],
+                )
+
+        formatted = _format_multi_result(tasks, processed_results, all_sub_trace_ids)
 
+        await _update_goal_complete(
+            store, trace_id, goal_id,
+            formatted["status"],
+            formatted["summary"],
+            all_sub_trace_ids,
+        )
+        return formatted
 
-async def _handle_delegate_mode(
-    task: str,
-    continue_from: Optional[str],
-    store, current_trace_id: str, current_goal_id: str, runner, context: dict
+
+# ===== 工具定义 =====
+
+@tool(description="创建 Agent 执行任务")
+async def agent(
+    task: Union[str, List[str]],
+    messages: Optional[Union[Messages, List[Messages]]] = None,
+    continue_from: Optional[str] = None,
+    context: Optional[dict] = None,
 ) -> Dict[str, Any]:
-    """Delegate 模式:委托单个任务"""
+    """
+    创建 Agent 执行任务。
 
-    # 1. 获取父 Trace 信息
-    parent_trace = await store.get_trace(current_trace_id)
+    单任务 (task: str): delegate 模式,全量工具
+    多任务 (task: List[str]): explore 模式,只读工具,并行执行
 
-    # 2. 处理 continue_from 或创建新 Sub-Trace
-    if continue_from:
-        existing_trace = await store.get_trace(continue_from)
-        if not existing_trace:
-            return {"status": "failed", "error": f"Continue-from trace not found: {continue_from}"}
-        sub_trace_id = continue_from
-        # 获取 mission
-        goal_tree = await store.get_goal_tree(continue_from)
-        mission = goal_tree.mission if goal_tree else task
-        sub_trace_ids = [{"trace_id": sub_trace_id, "mission": mission}]
-    else:
-        sub_trace_id = generate_sub_trace_id(current_trace_id, "delegate")
-        sub_trace = Trace(
-            trace_id=sub_trace_id,
-            mode="agent",
-            task=task,
-            parent_trace_id=current_trace_id,
-            parent_goal_id=current_goal_id,
-            agent_type="delegate",
-            uid=parent_trace.uid if parent_trace else None,
-            model=parent_trace.model if parent_trace else None,
-            status="running",
-            context={"subagent_mode": "delegate", "created_by_tool": "subagent"},
-            created_at=datetime.now(),
-        )
-        await store.create_trace(sub_trace)
-        await store.update_goal_tree(sub_trace_id, GoalTree(mission=task))
-        sub_trace_ids = [{"trace_id": sub_trace_id, "mission": task}]
+    Args:
+        task: 任务描述。字符串=单任务,列表=多任务并行
+        messages: 预置消息。1D 列表=所有 agent 共享;2D 列表=per-agent
+        continue_from: 继续已有 trace(仅单任务)
+        context: 框架自动注入的上下文
+    """
+    if not context:
+        return {"status": "failed", "error": "context is required"}
 
-        # 广播 sub_trace_started
-        await broadcast_sub_trace_started(
-            current_trace_id, sub_trace_id, current_goal_id or "",
-            "delegate", task
-        )
+    store = context.get("store")
+    trace_id = context.get("trace_id")
+    goal_id = context.get("goal_id")
+    runner = context.get("runner")
 
-    # 注册为活跃协作者
-    delegate_name = task[:30] if not continue_from else f"delegate-{sub_trace_id[:8]}"
-    await _update_collaborator(
-        store, current_trace_id,
-        name=delegate_name, sub_trace_id=sub_trace_id,
-        status="running", summary=task[:80],
-    )
+    missing = []
+    if not store:
+        missing.append("store")
+    if not trace_id:
+        missing.append("trace_id")
+    if not runner:
+        missing.append("runner")
+    if missing:
+        return {"status": "failed", "error": f"Missing required context: {', '.join(missing)}"}
 
-    # 3. 更新主 Goal 为 in_progress
-    await _update_goal_start(store, current_trace_id, current_goal_id, "delegate", sub_trace_ids)
+    # 归一化 task → list
+    single = isinstance(task, str)
+    tasks = [task] if single else task
 
-    # 4. 执行任务
-    try:
-        allowed_tools = _get_allowed_tools_for_mode("delegate", context)
-        result = await runner.run_result(
-            messages=[{"role": "user", "content": task}],
-            config=_make_run_config(
-                trace_id=sub_trace_id,
-                agent_type="delegate",
-                model=parent_trace.model if parent_trace else "gpt-4o",
-                uid=parent_trace.uid if parent_trace else None,
-                tools=allowed_tools,
-                name=task[:50],
-            ),
-        )
+    if not tasks:
+        return {"status": "failed", "error": "task is required"}
 
-        # 4. 广播 sub_trace_completed
-        await broadcast_sub_trace_completed(
-            current_trace_id, sub_trace_id,
-            result.get("status", "completed"),
-            result.get("summary", ""),
-            result.get("stats", {})
-        )
+    # 归一化 messages → List[Messages](per-agent)
+    if messages is None:
+        per_agent_msgs: List[Messages] = [[] for _ in tasks]
+    elif messages and isinstance(messages[0], list):
+        per_agent_msgs = messages  # 2D: per-agent
+    else:
+        per_agent_msgs = [messages] * len(tasks)  # 1D: 共享
 
-        # 更新协作者状态
-        await _update_collaborator(
-            store, current_trace_id,
-            name=delegate_name, sub_trace_id=sub_trace_id,
-            status=result.get("status", "completed"),
-            summary=result.get("summary", "")[:80],
-        )
+    if continue_from and not single:
+        return {"status": "failed", "error": "continue_from requires single task"}
 
-        # 5. 格式化结果
-        formatted_summary = _format_delegate_result(result)
+    return await _run_agents(
+        tasks, per_agent_msgs, continue_from,
+        store, trace_id, goal_id, runner, context,
+    )
 
-        # 6. 更新主 Goal 为 completed
-        await _update_goal_complete(
-            store, current_trace_id, current_goal_id,
-            result.get("status", "completed"), formatted_summary, sub_trace_ids
-        )
 
-        # 7. 返回结果
-        return {
-            "mode": "delegate",
-            "sub_trace_id": sub_trace_id,
-            "continue_from": bool(continue_from),
-            **result,
-            "summary": formatted_summary
-        }
+@tool(description="评估目标执行结果是否满足要求")
+async def evaluate(
+    messages: Optional[Messages] = None,
+    target_goal_id: Optional[str] = None,
+    continue_from: Optional[str] = None,
+    context: Optional[dict] = None,
+) -> Dict[str, Any]:
+    """
+    评估目标执行结果是否满足要求。
 
-    except Exception as e:
-        # 错误处理
-        error_msg = str(e)
-        await broadcast_sub_trace_completed(
-            current_trace_id, sub_trace_id,
-            "failed", error_msg, {}
-        )
+    代码自动从 GoalTree 注入目标描述。模型把执行结果和上下文放在 messages 中。
 
-        await _update_collaborator(
-            store, current_trace_id,
-            name=delegate_name, sub_trace_id=sub_trace_id,
-            status="failed", summary=error_msg[:80],
-        )
+    Args:
+        messages: 执行结果和上下文消息(OpenAI 格式)
+        target_goal_id: 要评估的目标 ID(默认当前 goal_id)
+        continue_from: 继续已有评估 trace
+        context: 框架自动注入的上下文
+    """
+    if not context:
+        return {"status": "failed", "error": "context is required"}
 
-        await _update_goal_complete(
-            store, current_trace_id, current_goal_id,
-            "failed", f"委托任务失败: {error_msg}", sub_trace_ids
-        )
+    store = context.get("store")
+    trace_id = context.get("trace_id")
+    current_goal_id = context.get("goal_id")
+    runner = context.get("runner")
 
-        return {
-            "mode": "delegate",
-            "status": "failed",
-            "error": error_msg,
-            "sub_trace_id": sub_trace_id
-        }
+    missing = []
+    if not store:
+        missing.append("store")
+    if not trace_id:
+        missing.append("trace_id")
+    if not runner:
+        missing.append("runner")
+    if missing:
+        return {"status": "failed", "error": f"Missing required context: {', '.join(missing)}"}
 
+    # target_goal_id 默认 context["goal_id"]
+    goal_id = target_goal_id or current_goal_id
 
-async def _handle_evaluate_mode(
-    target_goal_id: str,
-    evaluation_input: Dict[str, Any],
-    requirements: Optional[str],
-    continue_from: Optional[str],
-    store, current_trace_id: str, current_goal_id: str, runner, context: dict
-) -> Dict[str, Any]:
-    """Evaluate 模式:评估任务结果"""
+    # 从 GoalTree 获取目标描述
+    goal_desc = await _get_goal_description(store, trace_id, goal_id)
 
-    # 1. 构建评估 prompt
-    task_prompt = await _build_evaluate_prompt(
-        store, current_trace_id, target_goal_id,
-        evaluation_input, requirements
-    )
+    # 构建 evaluator prompt
+    eval_prompt = _build_evaluate_prompt(goal_desc, messages)
 
-    # 2. 获取父 Trace 信息
-    parent_trace = await store.get_trace(current_trace_id)
+    # 获取父 Trace 信息
+    parent_trace = await store.get_trace(trace_id)
 
-    # 3. 处理 continue_from 或创建新 Sub-Trace
+    # 处理 continue_from 或创建新 Sub-Trace
     if continue_from:
         existing_trace = await store.get_trace(continue_from)
         if not existing_trace:
             return {"status": "failed", "error": f"Continue-from trace not found: {continue_from}"}
         sub_trace_id = continue_from
-        # 获取 mission
         goal_tree = await store.get_goal_tree(continue_from)
-        mission = goal_tree.mission if goal_tree else task_prompt
+        mission = goal_tree.mission if goal_tree else eval_prompt
         sub_trace_ids = [{"trace_id": sub_trace_id, "mission": mission}]
     else:
-        sub_trace_id = generate_sub_trace_id(current_trace_id, "evaluate")
+        sub_trace_id = generate_sub_trace_id(trace_id, "evaluate")
         sub_trace = Trace(
             trace_id=sub_trace_id,
             mode="agent",
-            task=task_prompt,
-            parent_trace_id=current_trace_id,
+            task=eval_prompt,
+            parent_trace_id=trace_id,
             parent_goal_id=current_goal_id,
             agent_type="evaluate",
             uid=parent_trace.uid if parent_trace else None,
             model=parent_trace.model if parent_trace else None,
             status="running",
-            context={"subagent_mode": "evaluate", "created_by_tool": "subagent"},
+            context={"created_by_tool": "evaluate"},
             created_at=datetime.now(),
         )
         await store.create_trace(sub_trace)
-        await store.update_goal_tree(sub_trace_id, GoalTree(mission=task_prompt))
-        sub_trace_ids = [{"trace_id": sub_trace_id, "mission": task_prompt}]
+        await store.update_goal_tree(sub_trace_id, GoalTree(mission=eval_prompt))
+        sub_trace_ids = [{"trace_id": sub_trace_id, "mission": eval_prompt}]
 
         # 广播 sub_trace_started
         await broadcast_sub_trace_started(
-            current_trace_id, sub_trace_id, current_goal_id or "",
-            "evaluate", task_prompt
+            trace_id, sub_trace_id, current_goal_id or "",
+            "evaluate", eval_prompt,
         )
 
-    # 4. 更新主 Goal 为 in_progress
-    await _update_goal_start(store, current_trace_id, current_goal_id, "evaluate", sub_trace_ids)
+    # 更新主 Goal 为 in_progress
+    await _update_goal_start(store, trace_id, current_goal_id, "evaluate", sub_trace_ids)
 
     # 注册为活跃协作者
-    eval_name = f"评估: {target_goal_id[:20]}"
+    eval_name = f"评估: {(goal_id or 'unknown')[:20]}"
     await _update_collaborator(
-        store, current_trace_id,
+        store, trace_id,
         name=eval_name, sub_trace_id=sub_trace_id,
-        status="running", summary=f"评估 Goal {target_goal_id}",
+        status="running", summary=f"评估 Goal {goal_id}",
     )
 
-    # 5. 执行评估
+    # 执行评估
     try:
-        allowed_tools = _get_allowed_tools_for_mode("evaluate", context)
+        # evaluate 使用只读工具 + goal
+        allowed_tools = ["read_file", "grep_content", "glob_files", "goal"]
         result = await runner.run_result(
-            messages=[{"role": "user", "content": task_prompt}],
+            messages=[{"role": "user", "content": eval_prompt}],
             config=_make_run_config(
                 trace_id=sub_trace_id,
                 agent_type="evaluate",
                 model=parent_trace.model if parent_trace else "gpt-4o",
                 uid=parent_trace.uid if parent_trace else None,
                 tools=allowed_tools,
-                name=f"评估: {target_goal_id}",
+                name=f"评估: {goal_id}",
             ),
         )
 
-        # 5. 广播 sub_trace_completed
         await broadcast_sub_trace_completed(
-            current_trace_id, sub_trace_id,
+            trace_id, sub_trace_id,
             result.get("status", "completed"),
             result.get("summary", ""),
-            result.get("stats", {})
+            result.get("stats", {}),
         )
-
-        # 更新协作者状态
         await _update_collaborator(
-            store, current_trace_id,
+            store, trace_id,
             name=eval_name, sub_trace_id=sub_trace_id,
             status=result.get("status", "completed"),
             summary=result.get("summary", "")[:80],
         )
 
-        # 6. 格式化结果
-        formatted_summary = _format_evaluate_result(result)
+        formatted_summary = result.get("summary", "")
 
-        # 7. 更新主 Goal 为 completed
         await _update_goal_complete(
-            store, current_trace_id, current_goal_id,
-            result.get("status", "completed"), formatted_summary, sub_trace_ids
+            store, trace_id, current_goal_id,
+            result.get("status", "completed"),
+            formatted_summary,
+            sub_trace_ids,
         )
 
-        # 8. 返回结果
         return {
             "mode": "evaluate",
             "sub_trace_id": sub_trace_id,
             "continue_from": bool(continue_from),
             **result,
-            "summary": formatted_summary
+            "summary": formatted_summary,
         }
 
     except Exception as e:
-        # 错误处理
         error_msg = str(e)
         await broadcast_sub_trace_completed(
-            current_trace_id, sub_trace_id,
-            "failed", error_msg, {}
+            trace_id, sub_trace_id, "failed", error_msg, {},
         )
-
         await _update_collaborator(
-            store, current_trace_id,
+            store, trace_id,
             name=eval_name, sub_trace_id=sub_trace_id,
             status="failed", summary=error_msg[:80],
         )
-
         await _update_goal_complete(
-            store, current_trace_id, current_goal_id,
-            "failed", f"评估任务失败: {error_msg}", sub_trace_ids
+            store, trace_id, current_goal_id,
+            "failed", f"评估任务失败: {error_msg}",
+            sub_trace_ids,
         )
-
         return {
             "mode": "evaluate",
             "status": "failed",
             "error": error_msg,
-            "sub_trace_id": sub_trace_id
+            "sub_trace_id": sub_trace_id,
         }
-
-
-@tool(description="创建 Sub-Agent 执行任务(evaluate/delegate/explore)")
-async def subagent(
-    mode: str,
-    task: Optional[str] = None,
-    target_goal_id: Optional[str] = None,
-    evaluation_input: Optional[Dict[str, Any]] = None,
-    requirements: Optional[str] = None,
-    branches: Optional[List[str]] = None,
-    background: Optional[str] = None,
-    continue_from: Optional[str] = None,
-    context: Optional[dict] = None,
-) -> Dict[str, Any]:
-    # 1. 验证 context
-    if not context:
-        return {"status": "failed", "error": "context is required"}
-
-    store = context.get("store")
-    current_trace_id = context.get("trace_id")
-    current_goal_id = context.get("goal_id")
-    runner = context.get("runner")
-
-    missing = []
-    if not store:
-        missing.append("store")
-    if not current_trace_id:
-        missing.append("trace_id")
-    if not runner:
-        missing.append("runner")
-    if missing:
-        return {"status": "failed", "error": f"Missing required context: {', '.join(missing)}"}
-
-    # 2. 验证 mode
-    if mode not in {"evaluate", "delegate", "explore"}:
-        return {"status": "failed", "error": "Invalid mode: must be evaluate/delegate/explore"}
-
-    # 3. 验证模式特定参数
-    if mode == "delegate" and not task:
-        return {"status": "failed", "error": "delegate mode requires task"}
-    if mode == "explore" and not branches:
-        return {"status": "failed", "error": "explore mode requires branches"}
-    if mode == "evaluate" and (not target_goal_id or evaluation_input is None):
-        return {"status": "failed", "error": "evaluate mode requires target_goal_id and evaluation_input"}
-
-    # 4. 路由到模式处理函数
-    if mode == "explore":
-        return await _handle_explore_mode(
-            branches, background, continue_from,
-            store, current_trace_id, current_goal_id, runner
-        )
-    elif mode == "delegate":
-        return await _handle_delegate_mode(
-            task, continue_from,
-            store, current_trace_id, current_goal_id, runner, context
-        )
-    else:  # evaluate
-        return await _handle_evaluate_mode(
-            target_goal_id, evaluation_input, requirements, continue_from,
-            store, current_trace_id, current_goal_id, runner, context
-        )

+ 4 - 1
agent/tools/registry.py

@@ -234,13 +234,16 @@ class ToolRegistry:
 			duration = time.time() - start_time
 			stats.total_duration += duration
 
-			# 返回 JSON 字符串或文本
+			# 返回结果:ToolResult 转为可序列化格式
 			if isinstance(result, str):
 				return result
 
 			# 处理 ToolResult 对象
 			from agent.tools.models import ToolResult
 			if isinstance(result, ToolResult):
+				# 有图片时返回 dict 以便 runner 构建多模态消息
+				if result.images:
+					return {"text": result.to_llm_message(), "images": result.images}
 				return result.to_llm_message()
 
 			return json.dumps(result, ensure_ascii=False, indent=2)

+ 20 - 7
agent/tools/schema.py

@@ -11,7 +11,7 @@ Schema Generator - 从函数签名自动生成 OpenAI Tool Schema
 
 import inspect
 import logging
-from typing import Any, Dict, List, Optional, get_args, get_origin
+from typing import Any, Dict, List, Literal, Optional, Union, get_args, get_origin
 
 logger = logging.getLogger(__name__)
 
@@ -142,16 +142,29 @@ class SchemaGenerator:
     @classmethod
     def _type_to_schema(cls, python_type: Any) -> Dict[str, Any]:
         """将 Python 类型转换为 JSON Schema"""
-        # 处理 Optional[T]
+        if python_type is Any:
+            return {}
+
         origin = get_origin(python_type)
         args = get_args(python_type)
 
-        if origin is Optional.__class__ or (origin and str(origin) == "typing.Union"):
-            # Optional[T] = Union[T, None]
+        # 处理 Literal[...]
+        if origin is Literal:
+            values = list(args)
+            if all(isinstance(v, str) for v in values):
+                return {"type": "string", "enum": values}
+            elif all(isinstance(v, int) for v in values):
+                return {"type": "integer", "enum": values}
+            return {"enum": values}
+
+        # 处理 Union[T, ...] 和 Optional[T]
+        if origin is Union:
             if len(args) == 2 and type(None) in args:
-                inner_type = args[0] if args[1] is type(None) else args[1]
-                schema = cls._type_to_schema(inner_type)
-                return schema
+                # Optional[T] = Union[T, None]
+                inner = args[0] if args[1] is type(None) else args[1]
+                return cls._type_to_schema(inner)
+            non_none = [a for a in args if a is not type(None)]
+            return {"oneOf": [cls._type_to_schema(a) for a in non_none]}
 
         # 处理 List[T]
         if origin is list or origin is List:

+ 145 - 102
agent/trace/compaction.py

@@ -1,135 +1,115 @@
 """
-Context 压缩
+Context 压缩 — 两级压缩策略
 
-基于 Goal 状态进行增量压缩:
-- 当 Goal 完成或放弃时,将相关的详细 messages 替换为 summary
+Level 1: GoalTree 过滤(确定性,零成本)
+  - 跳过 completed/abandoned goals 的消息(信息已在 GoalTree summary 中)
+  - 始终保留:system prompt、第一条 user message、当前 focus goal 的消息
+
+Level 2: LLM 总结(仅在 Level 1 后仍超限时触发)
+  - 在消息列表末尾追加压缩 prompt → 主模型回复 → summary 存为新消息
+  - summary 的 parent_sequence 跳过被压缩的范围
+
+压缩不修改存储:原始消息永远保留在 messages/,通过 parent_sequence 树结构实现跳过。
 """
 
-from typing import List, Dict, Any, Optional
-from .goal_models import GoalTree, Goal
+from dataclasses import dataclass
+from typing import List, Dict, Any, Optional, Set
+
+from .goal_models import GoalTree
+from .models import Message
+
+
+# ===== 配置 =====
 
+@dataclass
+class CompressionConfig:
+    """压缩配置"""
+    max_tokens: int = 100000           # 最大 token 数
+    threshold_ratio: float = 0.8       # 触发 Level 2 的阈值比例(80%)
+    keep_recent_messages: int = 10     # Level 1 中始终保留最近 N 条消息
 
-def compress_messages_for_goal(
-    messages: List[Dict[str, Any]],
-    goal_id: str,
-    summary: str,
-) -> List[Dict[str, Any]]:
+
+# ===== Level 1: GoalTree 过滤 =====
+
+def filter_by_goal_status(
+    messages: List[Message],
+    goal_tree: Optional[GoalTree],
+) -> List[Message]:
     """
-    压缩指定 goal 关联的 messages
+    Level 1 过滤:跳过 completed/abandoned goals 的消息
+
+    始终保留:
+    - goal_id 为 None 的消息(system prompt、初始 user message)
+    - 当前 focus goal 及其祖先链上的消息
+    - in_progress 和 pending goals 的消息
 
-    将 goal_id 关联的所有详细 messages 替换为一条 summary message。
+    跳过:
+    - completed 且不在焦点路径上的 goals 的消息
+    - abandoned goals 的消息
 
     Args:
-        messages: 原始消息列表
-        goal_id: 要压缩的 goal ID
-        summary: 压缩后的摘要
+        messages: 主路径上的有序消息列表
+        goal_tree: GoalTree 实例
 
     Returns:
-        压缩后的消息列表
+        过滤后的消息列表
     """
-    # 分离:关联的 messages vs 其他 messages
-    related = []
-    other = []
+    if not goal_tree or not goal_tree.goals:
+        return messages
 
-    for msg in messages:
-        if msg.get("goal_id") == goal_id:
-            related.append(msg)
-        else:
-            other.append(msg)
+    # 构建焦点路径(当前焦点 + 父链 + 直接子节点)
+    focus_path = _get_focus_path(goal_tree)
 
-    # 如果没有关联的消息,直接返回
-    if not related:
-        return messages
+    # 构建需要跳过的 goal IDs
+    skip_goal_ids: Set[str] = set()
+    for goal in goal_tree.goals:
+        if goal.id in focus_path:
+            continue  # 焦点路径上的 goal 始终保留
+        if goal.status in ("completed", "abandoned"):
+            skip_goal_ids.add(goal.id)
 
-    # 找到第一条关联消息的位置(用于插入 summary)
-    first_related_index = None
-    for i, msg in enumerate(messages):
-        if msg.get("goal_id") == goal_id:
-            first_related_index = i
-            break
-
-    # 创建 summary message
-    summary_message = {
-        "role": "assistant",
-        "content": f"[Goal {goal_id} Summary] {summary}",
-        "goal_id": goal_id,
-        "is_summary": True,
-    }
-
-    # 构建新的消息列表
+    # 过滤消息
     result = []
-    summary_inserted = False
-
-    for i, msg in enumerate(messages):
-        if msg.get("goal_id") == goal_id:
-            # 跳过关联的详细消息,在第一个位置插入 summary
-            if not summary_inserted:
-                result.append(summary_message)
-                summary_inserted = True
-        else:
-            result.append(msg)
+    for msg in messages:
+        if msg.goal_id is None:
+            result.append(msg)  # 无 goal 的消息始终保留
+        elif msg.goal_id not in skip_goal_ids:
+            result.append(msg)  # 不在跳过列表中的消息保留
 
     return result
 
 
-def should_compress(goal: Goal) -> bool:
-    """判断 goal 是否需要压缩"""
-    return goal.status in ("completed", "abandoned") and goal.summary is not None
+def _get_focus_path(goal_tree: GoalTree) -> Set[str]:
+    """获取焦点路径上的所有 goal IDs(焦点 + 父链 + 直接子节点)"""
+    focus_ids: Set[str] = set()
 
+    if not goal_tree.current_id:
+        return focus_ids
 
-def compress_all_completed(
-    messages: List[Dict[str, Any]],
-    tree: GoalTree,
-) -> List[Dict[str, Any]]:
-    """
-    压缩所有已完成/已放弃的 goals
-
-    遍历 GoalTree,对所有需要压缩的 goal 执行压缩。
+    # 焦点自身
+    focus_ids.add(goal_tree.current_id)
 
-    Args:
-        messages: 原始消息列表
-        tree: GoalTree 实例
-
-    Returns:
-        压缩后的消息列表
-    """
-    result = messages
-
-    def process_goal(goal: Goal):
-        nonlocal result
-        if should_compress(goal):
-            # 检查是否已经压缩过(避免重复压缩)
-            already_compressed = any(
-                msg.get("goal_id") == goal.id and msg.get("is_summary")
-                for msg in result
-            )
-            if not already_compressed:
-                result = compress_messages_for_goal(result, goal.id, goal.summary)
-
-        # 递归处理子目标
-        for child in goal.children:
-            process_goal(child)
-
-    for goal in tree.goals:
-        process_goal(goal)
+    # 父链
+    goal = goal_tree.find(goal_tree.current_id)
+    while goal and goal.parent_id:
+        focus_ids.add(goal.parent_id)
+        goal = goal_tree.find(goal.parent_id)
 
-    return result
+    # 直接子节点
+    children = goal_tree.get_children(goal_tree.current_id)
+    for child in children:
+        focus_ids.add(child.id)
 
+    return focus_ids
 
-def get_messages_for_goal(
-    messages: List[Dict[str, Any]],
-    goal_id: str,
-) -> List[Dict[str, Any]]:
-    """获取指定 goal 关联的所有 messages"""
-    return [msg for msg in messages if msg.get("goal_id") == goal_id]
 
+# ===== Token 估算 =====
 
-def count_tokens_estimate(messages: List[Dict[str, Any]]) -> int:
+def estimate_tokens(messages: List[Dict[str, Any]]) -> int:
     """
-    估算消息的 token 数量(简单估算)
+    估算消息列表的 token 数量
 
-    实际使用时应该用 tiktoken 或 API 返回的 token 数。
-    这里用简单的字符数 / 4 来估算。
+    简单估算:字符数 / 4。实际使用时应该用 tiktoken 或 API 返回的 token 数。
     """
     total_chars = 0
     for msg in messages:
@@ -137,9 +117,72 @@ def count_tokens_estimate(messages: List[Dict[str, Any]]) -> int:
         if isinstance(content, str):
             total_chars += len(content)
         elif isinstance(content, list):
-            # 多模态消息
             for part in content:
                 if isinstance(part, dict) and part.get("type") == "text":
                     total_chars += len(part.get("text", ""))
+        # tool_calls
+        tool_calls = msg.get("tool_calls")
+        if tool_calls and isinstance(tool_calls, list):
+            for tc in tool_calls:
+                if isinstance(tc, dict):
+                    func = tc.get("function", {})
+                    total_chars += len(func.get("name", ""))
+                    args = func.get("arguments", "")
+                    if isinstance(args, str):
+                        total_chars += len(args)
 
     return total_chars // 4
+
+
+def estimate_tokens_from_messages(messages: List[Message]) -> int:
+    """从 Message 对象列表估算 token 数"""
+    return estimate_tokens([msg.to_llm_dict() for msg in messages])
+
+
+def needs_level2_compression(
+    token_count: int,
+    config: CompressionConfig,
+) -> bool:
+    """判断是否需要触发 Level 2 压缩"""
+    return token_count > config.max_tokens * config.threshold_ratio
+
+
+# ===== Level 2: 压缩 Prompt =====
+
+COMPRESSION_PROMPT = """请对以上对话历史进行压缩总结。
+
+要求:
+1. 保留关键决策、结论和产出(如创建的文件、修改的代码、得出的分析结论)
+2. 保留重要的上下文(如用户的要求、约束条件、之前的讨论结果)
+3. 省略中间探索过程、重复的工具调用细节
+4. 使用结构化格式(标题 + 要点)
+5. 控制在 2000 字以内
+
+当前 GoalTree 状态(完整版,含 summary):
+{goal_tree_prompt}
+"""
+
+REFLECT_PROMPT = """请回顾以上整个执行过程,提取有价值的经验教训。
+
+关注以下方面:
+1. **人工干预**:如果有用户中途修改了指令或纠正了方向,说明之前的决策哪里有问题
+2. **弯路**:哪些尝试是不必要的,有没有更直接的方法
+3. **好的决策**:哪些判断和选择是正确的,值得记住
+4. **工具使用**:哪些工具用法是高效的,哪些可以改进
+
+请以简洁的规则列表形式输出,每条规则格式为:
+- 当遇到 [条件] 时,应该 [动作](原因:[简短说明])
+"""
+
+
+def build_compression_prompt(goal_tree: Optional[GoalTree]) -> str:
+    """构建 Level 2 压缩 prompt"""
+    goal_prompt = ""
+    if goal_tree:
+        goal_prompt = goal_tree.to_prompt(include_summary=True)
+    return COMPRESSION_PROMPT.format(goal_tree_prompt=goal_prompt)
+
+
+def build_reflect_prompt() -> str:
+    """构建反思 prompt"""
+    return REFLECT_PROMPT

+ 49 - 3
agent/trace/goal_models.py

@@ -326,15 +326,22 @@ class GoalTree:
 
         return goal
 
-    def to_prompt(self, include_abandoned: bool = False) -> str:
+    def to_prompt(self, include_abandoned: bool = False, include_summary: bool = False) -> str:
         """
         格式化为 Prompt 注入文本
 
+        Args:
+            include_abandoned: 是否包含已废弃的目标
+            include_summary: 是否显示 completed/abandoned goals 的 summary 详情
+                False(默认)= 精简视图,用于日常周期性注入
+                True = 完整视图(含 summary),用于压缩时提供上下文
+
         展示策略:
         - 过滤掉 abandoned 目标(除非明确要求)
         - 完整展示所有顶层目标
         - 完整展示当前 focus 目标的父链及其所有子孙
         - 其他分支的子目标折叠显示(只显示数量和状态)
+        - include_summary=True 时不折叠,全部展开并显示 summary
         """
         lines = []
         lines.append(f"**Mission**: {self.mission}")
@@ -384,13 +391,19 @@ class GoalTree:
 
             result = [f"{prefix}{icon} {display_id}. {goal.description}{current_mark}"]
 
-            # 显示 summary(如果有)
-            if goal.summary:
+            # 显示 summary:include_summary=True 时全部显示,否则只在焦点路径上显示
+            if goal.summary and (include_summary or goal.id in current_path):
                 result.append(f"{prefix}    → {goal.summary}")
 
             # 递归处理子目标
             children = self.get_children(goal.id)
 
+            # include_summary 模式下不折叠,全部展开
+            if include_summary:
+                for child in children:
+                    result.extend(format_goal(child, indent + 1))
+                return result
+
             # 判断是否需要折叠
             # 如果当前 goal 或其子孙在焦点路径上,完整展示
             should_expand = goal.id in current_path or any(
@@ -464,6 +477,39 @@ class GoalTree:
             created_at=created_at or datetime.now(),
         )
 
+    def rebuild_for_rewind(self, completed_goal_ids: set) -> "GoalTree":
+        """
+        为 Rewind 重建干净的 GoalTree
+
+        保留 rewind 点之前已 completed 的 goals,丢弃其余。
+        清空 current_id,让 Agent 重新选择焦点。
+
+        Args:
+            completed_goal_ids: rewind 点之前已 completed 的 Goal ID 集合
+
+        Returns:
+            新的干净 GoalTree
+        """
+        surviving_goals = []
+        for goal in self.goals:
+            if goal.id in completed_goal_ids and goal.status == "completed":
+                surviving_goals.append(goal)
+
+        # 清理 parent_id 引用:如果 parent 不在存活列表中,设为 None
+        surviving_ids = {g.id for g in surviving_goals}
+        for goal in surviving_goals:
+            if goal.parent_id and goal.parent_id not in surviving_ids:
+                goal.parent_id = None
+
+        new_tree = GoalTree(
+            mission=self.mission,
+            goals=surviving_goals,
+            current_id=None,
+            _next_id=self._next_id,
+            created_at=self.created_at,
+        )
+        return new_tree
+
     def save(self, path: str) -> None:
         """保存到 JSON 文件"""
         with open(path, "w", encoding="utf-8") as f:

+ 25 - 4
agent/trace/models.py

@@ -7,9 +7,18 @@ Message: Trace 中的 LLM 消息,对应 LLM API 格式
 
 from dataclasses import dataclass, field
 from datetime import datetime
-from typing import Dict, Any, List, Optional, Literal
+from typing import Dict, Any, List, Optional, Literal, Union
 import uuid
 
+# ===== 消息线格式类型别名 =====
+# 轻量 wire-format 类型,用于工具参数和 runner/LLM API 接口。
+# 内部存储使用下方的 Message dataclass。
+
+ChatMessage = Dict[str, Any]                          # 单条 OpenAI 格式消息
+Messages = List[ChatMessage]                          # 消息列表
+MessageContent = Union[str, List[Dict[str, str]]]     # content 字段(文本或多模态)
+
+
 # 导入 TokenUsage(延迟导入避免循环依赖)
 def _get_token_usage_class():
     from ..llm.usage import TokenUsage
@@ -42,7 +51,7 @@ class Trace:
     parent_goal_id: Optional[str] = None      # 哪个 Goal 启动的
 
     # 状态
-    status: Literal["running", "completed", "failed"] = "running"
+    status: Literal["running", "completed", "failed", "stopped"] = "running"
 
     # 统计
     total_messages: int = 0      # 消息总数(改名自 total_steps)
@@ -57,6 +66,7 @@ class Trace:
 
     # 进度追踪(head)
     last_sequence: int = 0      # 最新 message 的 sequence
+    head_sequence: int = 0      # 当前主路径的头节点 sequence(用于 build_llm_messages)
     last_event_id: int = 0      # 最新事件 ID(用于 WS 续传)
 
     # 配置
@@ -111,6 +121,7 @@ class Trace:
             "total_cost": self.total_cost,
             "total_duration_ms": self.total_duration_ms,
             "last_sequence": self.last_sequence,
+            "head_sequence": self.head_sequence,
             "last_event_id": self.last_event_id,
             "uid": self.uid,
             "model": self.model,
@@ -142,7 +153,8 @@ class Message:
     trace_id: str
     role: Literal["system", "user", "assistant", "tool"]   # 和 LLM API 一致
     sequence: int                        # 全局顺序
-    status: Literal["active", "abandoned"] = "active"  # 回溯时后续消息标记为 abandoned
+    parent_sequence: Optional[int] = None  # 父消息的 sequence(构成消息树)
+    status: Literal["active", "abandoned"] = "active"  # [已弃用] 由 parent_sequence 树结构替代
     goal_id: Optional[str] = None        # 关联的 Goal 内部 ID(None = 还没有创建 Goal)
     description: str = ""                # 消息描述(系统自动生成)
     tool_call_id: Optional[str] = None   # tool 消息关联对应的 tool_call
@@ -157,7 +169,7 @@ class Message:
     cost: Optional[float] = None
     duration_ms: Optional[int] = None
     created_at: datetime = field(default_factory=datetime.now)
-    abandoned_at: Optional[datetime] = None  # 回溯标记时间
+    abandoned_at: Optional[datetime] = None  # [已弃用] 由 parent_sequence 树结构替代
 
     # LLM 响应信息(仅 role="assistant" 时使用)
     finish_reason: Optional[str] = None  # stop, length, tool_calls, content_filter 等
@@ -221,6 +233,7 @@ class Message:
         trace_id: str,
         sequence: int,
         goal_id: Optional[str] = None,
+        parent_sequence: Optional[int] = None,
     ) -> "Message":
         """从 OpenAI SDK 格式创建 Message"""
         role = d["role"]
@@ -237,6 +250,7 @@ class Message:
             role=role,
             sequence=sequence,
             goal_id=goal_id,
+            parent_sequence=parent_sequence,
             content=content,
             tool_call_id=d.get("tool_call_id"),
         )
@@ -257,6 +271,10 @@ class Message:
         if "status" not in filtered_data:
             filtered_data["status"] = "active"
 
+        # 向后兼容:旧消息没有 parent_sequence 字段
+        if "parent_sequence" not in filtered_data:
+            filtered_data["parent_sequence"] = None
+
         return cls(**filtered_data)
 
     @classmethod
@@ -268,6 +286,7 @@ class Message:
         goal_id: Optional[str] = None,
         content: Any = None,
         tool_call_id: Optional[str] = None,
+        parent_sequence: Optional[int] = None,
         prompt_tokens: Optional[int] = None,
         completion_tokens: Optional[int] = None,
         reasoning_tokens: Optional[int] = None,
@@ -285,6 +304,7 @@ class Message:
             trace_id=trace_id,
             role=role,
             sequence=sequence,
+            parent_sequence=parent_sequence,
             goal_id=goal_id,
             content=content,
             description=description,
@@ -368,6 +388,7 @@ class Message:
             "trace_id": self.trace_id,
             "role": self.role,
             "sequence": self.sequence,
+            "parent_sequence": self.parent_sequence,
             "status": self.status,
             "goal_id": self.goal_id,
             "tool_call_id": self.tool_call_id,

+ 17 - 0
agent/trace/protocols.py

@@ -135,6 +135,23 @@ class TraceStore(Protocol):
         """
         ...
 
+    async def get_main_path_messages(
+        self,
+        trace_id: str,
+        head_sequence: int
+    ) -> List[Message]:
+        """
+        获取主路径上的消息(从 head_sequence 沿 parent_sequence 链回溯到 root)
+
+        Args:
+            trace_id: Trace ID
+            head_sequence: 主路径头节点的 sequence
+
+        Returns:
+            按 sequence 正序排列的主路径 Message 列表
+        """
+        ...
+
     async def get_messages_by_goal(
         self,
         trace_id: str,

+ 144 - 43
agent/trace/run_api.py

@@ -1,12 +1,22 @@
 """
-Trace 操作 API — 新建 / 续跑 / 回溯
+Trace 控制 API — 新建 / 运行 / 停止 / 反思
 
-提供 POST 端点触发 Agent 执行。需要通过 set_runner() 注入 AgentRunner 实例。
+提供 POST 端点触发 Agent 执行和控制。需要通过 set_runner() 注入 AgentRunner 实例。
 执行在后台异步进行,客户端通过 WebSocket (/api/traces/{trace_id}/watch) 监听实时更新。
+
+端点:
+  POST /api/traces              — 新建 Trace 并执行
+  POST /api/traces/{id}/run     — 运行(统一续跑 + 回溯)
+  POST /api/traces/{id}/stop    — 停止运行中的 Trace
+  POST /api/traces/{id}/reflect — 反思,在 trace 末尾追加反思 prompt 运行,结果追加到 experiences 文件
+  GET  /api/traces/running      — 列出正在运行的 Trace
+  GET  /api/experiences         — 读取经验文件内容
 """
 
 import asyncio
 import logging
+import os
+from datetime import datetime
 from typing import Any, Dict, List, Optional
 
 from fastapi import APIRouter, HTTPException
@@ -16,6 +26,9 @@ logger = logging.getLogger(__name__)
 
 router = APIRouter(prefix="/api/traces", tags=["run"])
 
+# 经验 API 使用独立 prefix
+experiences_router = APIRouter(prefix="/api", tags=["experiences"])
+
 
 # ===== 全局 Runner(由 api_server.py 注入)=====
 
@@ -40,7 +53,7 @@ def _get_runner():
 # ===== Request / Response 模型 =====
 
 
-class RunRequest(BaseModel):
+class CreateRequest(BaseModel):
     """新建执行"""
     messages: List[Dict[str, Any]] = Field(..., description="OpenAI SDK 格式的输入消息")
     model: str = Field("gpt-4o", description="模型名称")
@@ -52,31 +65,42 @@ class RunRequest(BaseModel):
     uid: Optional[str] = Field(None)
 
 
-class ContinueRequest(BaseModel):
-    """续跑"""
+class TraceRunRequest(BaseModel):
+    """运行(统一续跑 + 回溯)"""
     messages: List[Dict[str, Any]] = Field(
-        default=[{"role": "user", "content": "继续"}],
-        description="追加到末尾的新消息",
+        default_factory=list,
+        description="追加的新消息(可为空,用于重新生成场景)",
+    )
+    insert_after: Optional[int] = Field(
+        None,
+        description="回溯插入点的 message sequence。None = 从末尾续跑,int = 回溯到该 sequence 后运行",
     )
 
 
-class RewindRequest(BaseModel):
-    """回溯重放"""
-    insert_after: int = Field(..., description="截断点的 message sequence(保留该 sequence 及之前的消息)")
-    messages: List[Dict[str, Any]] = Field(
-        default=[{"role": "user", "content": "继续"}],
-        description="在截断点之后插入的新消息",
-    )
+class ReflectRequest(BaseModel):
+    """反思请求"""
+    focus: Optional[str] = Field(None, description="反思重点(可选)")
 
 
 class RunResponse(BaseModel):
     """操作响应(立即返回,后台执行)"""
     trace_id: str
-    mode: str  # "new" | "continue" | "rewind"
     status: str = "started"
     message: str = ""
 
 
+class StopResponse(BaseModel):
+    """停止响应"""
+    trace_id: str
+    status: str  # "stopping" | "not_running"
+
+
+class ReflectResponse(BaseModel):
+    """反思响应"""
+    trace_id: str
+    reflection: str
+
+
 # ===== 后台执行 =====
 
 _running_tasks: Dict[str, asyncio.Task] = {}
@@ -120,7 +144,7 @@ async def _run_with_trace_signal(
 
 
 @router.post("", response_model=RunResponse)
-async def create_and_run(req: RunRequest):
+async def create_and_run(req: CreateRequest):
     """
     新建 Trace 并开始执行
 
@@ -152,18 +176,22 @@ async def create_and_run(req: RunRequest):
 
     return RunResponse(
         trace_id=trace_id,
-        mode="new",
         status="started",
         message=f"Execution started. Watch via WebSocket: /api/traces/{trace_id}/watch",
     )
 
 
-@router.post("/{trace_id}/continue", response_model=RunResponse)
-async def continue_trace(trace_id: str, req: ContinueRequest):
+@router.post("/{trace_id}/run", response_model=RunResponse)
+async def run_trace(trace_id: str, req: TraceRunRequest):
     """
-    续跑已有 Trace
+    运行已有 Trace(统一续跑 + 回溯)
+
+    - insert_after 为 null(或省略):从末尾续跑
+    - insert_after 为 int:回溯到该 sequence 后运行
+    - messages 为空 + insert_after 为 int:重新生成(从该位置重跑,不插入新消息)
 
-    在已有 trace 末尾追加消息,继续执行。
+    insert_after 的值是 message 的 sequence 号。如果指定的 sequence 是一条带
+    tool_calls 的 assistant 消息,系统会自动扩展截断点到其所有 tool response 之后。
     """
     from agent.core.runner import RunConfig
 
@@ -179,50 +207,105 @@ async def continue_trace(trace_id: str, req: ContinueRequest):
     if trace_id in _running_tasks and not _running_tasks[trace_id].done():
         raise HTTPException(status_code=409, detail="Trace is already running")
 
-    config = RunConfig(trace_id=trace_id)
+    config = RunConfig(trace_id=trace_id, insert_after=req.insert_after)
     task = asyncio.create_task(_run_in_background(trace_id, req.messages, config))
     _running_tasks[trace_id] = task
 
+    mode = "rewind" if req.insert_after is not None else "continue"
     return RunResponse(
         trace_id=trace_id,
-        mode="continue",
         status="started",
-        message=f"Continue started. Watch via WebSocket: /api/traces/{trace_id}/watch",
+        message=f"Run ({mode}) started. Watch via WebSocket: /api/traces/{trace_id}/watch",
     )
 
 
-@router.post("/{trace_id}/rewind", response_model=RunResponse)
-async def rewind_trace(trace_id: str, req: RewindRequest):
+@router.post("/{trace_id}/stop", response_model=StopResponse)
+async def stop_trace(trace_id: str):
     """
-    回溯重放
+    停止运行中的 Trace
 
-    从指定 sequence 处截断,abandon 后续消息和 goals,插入新消息重新执行。
-    insert_after 的值是 message 的 sequence 号,可通过 GET /api/traces/{trace_id}/messages 查看。
-    如果指定的 sequence 是一条带 tool_calls 的 assistant 消息,系统会自动扩展截断点到其所有 tool response 之后。
+    设置取消信号,agent loop 在下一个 LLM 调用前检查并退出。
+    Trace 状态置为 "stopped"。
+    """
+    runner = _get_runner()
+
+    # 通过 runner 的 stop 方法设置取消信号
+    stopped = await runner.stop(trace_id)
+
+    if not stopped:
+        # 检查是否在 _running_tasks 但 runner 不知道(可能已完成)
+        if trace_id in _running_tasks:
+            task = _running_tasks[trace_id]
+            if not task.done():
+                task.cancel()
+                _running_tasks.pop(trace_id, None)
+                return StopResponse(trace_id=trace_id, status="stopping")
+        return StopResponse(trace_id=trace_id, status="not_running")
+
+    return StopResponse(trace_id=trace_id, status="stopping")
+
+
+@router.post("/{trace_id}/reflect", response_model=ReflectResponse)
+async def reflect_trace(trace_id: str, req: ReflectRequest):
+    """
+    触发反思
+
+    在 trace 末尾追加一条包含反思 prompt 的 user message,运行 agent 获取反思结果,
+    将结果追加到 experiences 文件(默认 ./cache/experiences.md)。
+
+    反思消息作为侧枝(side branch):运行前保存 head_sequence,运行后恢复。
+    这样反思消息不会出现在主对话路径上。
     """
     from agent.core.runner import RunConfig
+    from agent.trace.compaction import build_reflect_prompt
 
     runner = _get_runner()
 
+    if not runner.trace_store:
+        raise HTTPException(status_code=503, detail="TraceStore not configured")
+
     # 验证 trace 存在
-    if runner.trace_store:
-        trace = await runner.trace_store.get_trace(trace_id)
-        if not trace:
-            raise HTTPException(status_code=404, detail=f"Trace not found: {trace_id}")
+    trace = await runner.trace_store.get_trace(trace_id)
+    if not trace:
+        raise HTTPException(status_code=404, detail=f"Trace not found: {trace_id}")
 
-    # 检查是否已在运行
+    # 检查是否在运行
     if trace_id in _running_tasks and not _running_tasks[trace_id].done():
-        raise HTTPException(status_code=409, detail="Trace is already running")
+        raise HTTPException(status_code=409, detail="Cannot reflect on a running trace. Stop it first.")
 
-    config = RunConfig(trace_id=trace_id, insert_after=req.insert_after)
-    task = asyncio.create_task(_run_in_background(trace_id, req.messages, config))
-    _running_tasks[trace_id] = task
+    # 保存当前 head_sequence(反思完成后恢复,使反思消息成为侧枝)
+    saved_head_sequence = trace.head_sequence
 
-    return RunResponse(
+    # 构建反思 prompt
+    prompt = build_reflect_prompt()
+    if req.focus:
+        prompt += f"\n\n请特别关注:{req.focus}"
+
+    # 以续跑方式运行:追加 user message,agent 回复反思内容
+    config = RunConfig(trace_id=trace_id)
+    result = await runner.run_result(
+        messages=[{"role": "user", "content": prompt}],
+        config=config,
+    )
+
+    reflection_text = result.get("summary", "")
+
+    # 恢复 head_sequence(反思消息成为侧枝,不影响主路径)
+    await runner.trace_store.update_trace(trace_id, head_sequence=saved_head_sequence)
+
+    # 追加到 experiences 文件
+    if reflection_text:
+        experiences_path = getattr(runner, "experiences_path", "./cache/experiences.md")
+        if experiences_path:
+            os.makedirs(os.path.dirname(experiences_path), exist_ok=True)
+            header = f"\n\n---\n\n## {trace_id} ({datetime.now().strftime('%Y-%m-%d %H:%M')})\n\n"
+            with open(experiences_path, "a", encoding="utf-8") as f:
+                f.write(header + reflection_text + "\n")
+            logger.info(f"Reflection appended to {experiences_path}")
+
+    return ReflectResponse(
         trace_id=trace_id,
-        mode="rewind",
-        status="started",
-        message=f"Rewind to sequence {req.insert_after} started. Watch via WebSocket: /api/traces/{trace_id}/watch",
+        reflection=reflection_text,
     )
 
 
@@ -236,3 +319,21 @@ async def list_running():
         else:
             running.append(tid)
     return {"running": running}
+
+
+# ===== 经验 API =====
+
+
+@experiences_router.get("/experiences")
+async def list_experiences():
+    """读取经验文件内容"""
+    runner = _get_runner()
+    experiences_path = getattr(runner, "experiences_path", "./cache/experiences.md")
+
+    if not experiences_path or not os.path.exists(experiences_path):
+        return {"content": "", "path": experiences_path}
+
+    with open(experiences_path, "r", encoding="utf-8") as f:
+        content = f.read()
+
+    return {"content": content, "path": experiences_path}

+ 29 - 0
agent/trace/store.py

@@ -491,6 +491,35 @@ class FileSystemTraceStore:
         messages.sort(key=lambda m: m.sequence)
         return messages
 
+    async def get_main_path_messages(
+        self,
+        trace_id: str,
+        head_sequence: int
+    ) -> List[Message]:
+        """
+        获取主路径上的消息(从 head_sequence 沿 parent_sequence 链回溯到 root)
+
+        Returns:
+            按 sequence 正序排列的主路径 Message 列表
+        """
+        # 加载所有消息,建立 sequence -> Message 索引
+        all_messages = await self.get_trace_messages(trace_id, include_abandoned=True)
+        messages_by_seq = {m.sequence: m for m in all_messages}
+
+        # 从 head 沿 parent chain 回溯
+        path = []
+        seq = head_sequence
+        while seq is not None:
+            msg = messages_by_seq.get(seq)
+            if not msg:
+                break
+            path.append(msg)
+            seq = msg.parent_sequence
+
+        # 反转为正序(root → head)
+        path.reverse()
+        return path
+
     async def get_messages_by_goal(
         self,
         trace_id: str,

+ 17 - 12
api_server.py

@@ -3,8 +3,9 @@ API Server - FastAPI 应用入口
 
 聚合所有模块的 API 路由:
 - GET  /api/traces — 查询(trace/api.py)
-- POST /api/traces — 执行(trace/run_api.py,需配置 Runner)
+- POST /api/traces — 执行控制(trace/run_api.py,需配置 Runner)
 - WS   /api/traces/{id}/watch — 实时推送(trace/websocket.py)
+- GET  /api/experiences — 经验查询(trace/run_api.py,需配置 Runner)
 """
 
 import logging
@@ -16,7 +17,7 @@ import uvicorn
 
 from agent.trace import FileSystemTraceStore
 from agent.trace.api import router as api_router, set_trace_store as set_api_trace_store
-from agent.trace.run_api import router as run_router, set_runner
+from agent.trace.run_api import router as run_router, experiences_router, set_runner
 from agent.trace.websocket import router as ws_router, set_trace_store as set_ws_trace_store
 
 
@@ -59,16 +60,17 @@ set_ws_trace_store(trace_store)
 
 # ===== 可选:配置 Runner(启用执行 API)=====
 
-# 如需启用 POST /api/traces(新建/续跑/回溯),取消以下注释并配置 LLM:
-#
-# from agent.core.runner import AgentRunner
-# from agent.llm import create_openrouter_llm_call
-#
-# runner = AgentRunner(
-#     trace_store=trace_store,
-#     llm_call=create_openrouter_llm_call(model="google/gemini-2.5-flash"),
-# )
-# set_runner(runner)
+# 如需启用 POST /api/traces(新建/运行/停止/反思),取消以下注释并配置 LLM:
+
+from agent.core.runner import AgentRunner
+from agent.llm import create_openrouter_llm_call
+
+runner = AgentRunner(
+    trace_store=trace_store,
+    llm_call=create_openrouter_llm_call(model="anthropic/claude-sonnet-4.5"),
+    experiences_path="./cache/experiences.md",  # 经验文件路径
+)
+set_runner(runner)
 
 
 # ===== 注册路由 =====
@@ -77,6 +79,9 @@ set_ws_trace_store(trace_store)
 # 注意:run_router 必须在 api_router 之前注册,否则 GET /running 会被 /{trace_id} 捕获
 app.include_router(run_router)
 
+# 经验 API(GET /api/experiences,需配置 Runner)
+app.include_router(experiences_router)
+
 # Trace 查询 API(GET)
 app.include_router(api_router)
 

+ 221 - 94
docs/README.md

@@ -15,7 +15,7 @@
 | 类型 | 创建方式 | 父子关系 | 状态 |
 |------|---------|---------|------|
 | 主 Agent | 直接调用 `runner.run()` | 无 parent | 正常执行 |
-| 子 Agent | 通过 `subagent` 工具 | `parent_trace_id` / `parent_goal_id` 指向父 | 正常执行 |
+| 子 Agent | 通过 `agent` 工具 | `parent_trace_id` / `parent_goal_id` 指向父 | 正常执行 |
 | 人类协助 | 通过 `ask_human` 工具 | `parent_trace_id` 指向父 | 阻塞等待 |
 
 ---
@@ -53,7 +53,7 @@ agent/
 │       ├── search.py      # 网络搜索
 │       ├── webfetch.py    # 网页抓取
 │       ├── skill.py       # 技能加载
-│       └── subagent.py    # 子 Agent 统一入口(evaluate/delegate/explore
+│       └── subagent.py    # agent / evaluate 工具(子 Agent 创建与评估
 ├── memory/                # 跨会话记忆
 │   ├── models.py          # Experience, Skill
@@ -131,7 +131,7 @@ class RunConfig:
     model: str = "gpt-4o"
     temperature: float = 0.3
     max_iterations: int = 200
-    tools: Optional[List[str]] = None          # None = 全部内置工具
+    tools: Optional[List[str]] = None          # None = 全部已注册工具
 
     # 框架层参数
     agent_type: str = "default"
@@ -156,11 +156,11 @@ class RunConfig:
 
 通过 RunConfig 参数自然区分,统一入口 `run(messages, config)`:
 
-| 模式 | trace_id | insert_after | messages 含义 |
-|------|----------|-------------|--------------|
-| 新建 | None | - | 初始任务消息 |
-| 续跑 | 已有 ID | None | 追加到末尾的新消息 |
-| 回溯 | 已有 ID | 指定 sequence | 在插入点之后追加的新消息 |
+| 模式 | trace_id | insert_after | messages 含义 | API 端点 |
+|------|----------|-------------|--------------|----------|
+| 新建 | None | - | 初始任务消息 | `POST /api/traces` |
+| 续跑 | 已有 ID | None | 追加到末尾的新消息 | `POST /api/traces/{id}/run` |
+| 回溯 | 已有 ID | 指定 sequence | 在插入点之后追加的新消息 | `POST /api/traces/{id}/run` |
 
 ### 执行流程
 
@@ -169,14 +169,14 @@ async def run(messages: List[Dict], config: RunConfig = None) -> AsyncIterator[U
     # Phase 1: PREPARE TRACE
     #   无 trace_id → 创建新 Trace(生成 name,初始化 GoalTree)
     #   有 trace_id + 无 insert_after → 加载已有 Trace,状态置为 running
-    #   有 trace_id + 有 insert_after → 加载 Trace,执行 rewind(标记后续 msgs/goals 为 abandoned
+    #   有 trace_id + 有 insert_after → 加载 Trace,执行 rewind(快照 GoalTree,重建,设 parent_sequence
     trace = await _prepare_trace(config)
     yield trace
 
     # Phase 2: BUILD HISTORY
-    #   加载已有 active messages(续跑/回溯场景)
+    #   从 head_sequence 沿 parent chain 回溯构建主路径消息
     #   构建 system prompt(新建时注入 skills/experiences;续跑时复用已有)
-    #   追加 input messages
+    #   追加 input messages(设置 parent_sequence 指向当前 head)
     history, sequence = await _build_history(trace, messages, config)
 
     # Phase 3: AGENT LOOP
@@ -205,12 +205,12 @@ async def run(messages: List[Dict], config: RunConfig = None) -> AsyncIterator[U
 回溯通过 `RunConfig(trace_id=..., insert_after=N)` 触发,在 Phase 1 中执行:
 
 1. **验证插入点**:确保不截断在 assistant(tool_calls) 和 tool response 之间
-2. **标记 Messages**:sequence > cutoff 的 messages 标记 `status="abandoned"`
-3. **处理 Goals**:已完成且消息均在插入点之前的保留,其余 abandon
-4. **记录事件**:events.jsonl 追加 `rewind` 事件
-5. **更新 Trace**:status 改回 running
+2. **快照 GoalTree**:将当前完整 GoalTree 存入 `events.jsonl`(rewind 事件的 `goal_tree_snapshot` 字段)
+3. **重建 GoalTree**:保留 rewind 点之前已 completed 的 goals,丢弃其余,清空 `current_id`
+4. **设置 parent_sequence**:新消息的 `parent_sequence` 指向 rewind 点,旧消息自动脱离主路径
+5. **更新 Trace**:`head_sequence` 更新为新消息的 sequence,status 改回 running
 
-新消息的 sequence 从 `max(all_sequences) + 1` 开始,不复用被 abandon 的序号
+新消息的 sequence 从 `last_sequence + 1` 开始(全局递增,不复用)。旧消息无需标记 abandoned,通过消息树结构自然隔离
 
 ### 调用接口
 
@@ -238,23 +238,53 @@ async for item in runner.run(
     config=RunConfig(trace_id="existing-trace-id", insert_after=5),
 ):
     ...
+
+# 重新生成:回溯后不插入新消息,直接基于已有消息重跑
+async for item in runner.run(
+    messages=[],
+    config=RunConfig(trace_id="existing-trace-id", insert_after=5),
+):
+    ...
 ```
 
 `insert_after` 的值是 message 的 `sequence` 号,可通过 `GET /api/traces/{trace_id}/messages` 查看。如果指定的 sequence 是一条带 `tool_calls` 的 assistant 消息,系统会自动将截断点扩展到其所有对应的 tool response 之后(安全截断)。
 
+**停止运行**:
+
+```python
+# 停止正在运行的 Trace
+await runner.stop(trace_id)
+```
+
+调用后 agent loop 在下一个检查点退出,Trace 状态置为 `stopped`。
+
 - `run(messages, config)`:**核心方法**,流式返回 `AsyncIterator[Union[Trace, Message]]`
-- `run_result(messages, config)`:便利方法,内部消费 `run()`,返回结构化结果。主要用于 `subagent` 工具内部
+- `run_result(messages, config)`:便利方法,内部消费 `run()`,返回结构化结果。主要用于 `agent`/`evaluate` 工具内部
 
 ### REST API
 
-操作型端点(需在 `api_server.py` 中配置 Runner)。执行在后台异步进行,通过 WebSocket 监听进度。
+#### 查询端点
 
 | 方法 | 路径 | 说明 |
 |------|------|------|
-| POST | `/api/traces` | 新建 Trace 并执行 |
-| POST | `/api/traces/{id}/continue` | 续跑 |
-| POST | `/api/traces/{id}/rewind` | 回溯重放 |
+| GET  | `/api/traces` | 列出 Traces |
+| GET  | `/api/traces/{id}` | 获取 Trace 详情(含 GoalTree、Sub-Traces) |
+| GET  | `/api/traces/{id}/messages` | 获取 Messages |
 | GET  | `/api/traces/running` | 列出正在运行的 Trace |
+| WS   | `/api/traces/{id}/watch` | 实时事件推送 |
+
+**实现**:`agent/trace/api.py`, `agent/trace/websocket.py`
+
+#### 控制端点
+
+需在 `api_server.py` 中配置 Runner。执行在后台异步进行,通过 WebSocket 监听进度。
+
+| 方法 | 路径 | 说明 |
+|------|------|------|
+| POST | `/api/traces` | 新建 Trace 并执行 |
+| POST | `/api/traces/{id}/run` | 运行(统一续跑 + 回溯) |
+| POST | `/api/traces/{id}/stop` | 停止运行中的 Trace |
+| POST | `/api/traces/{id}/reflect` | 触发反思,从执行历史中提取经验 |
 
 ```bash
 # 新建
@@ -262,16 +292,35 @@ curl -X POST http://localhost:8000/api/traces \
   -H "Content-Type: application/json" \
   -d '{"messages": [{"role": "user", "content": "分析项目架构"}], "model": "gpt-4o"}'
 
-# 续跑
-curl -X POST http://localhost:8000/api/traces/{trace_id}/continue \
+# 续跑(insert_after 为 null 或省略)
+curl -X POST http://localhost:8000/api/traces/{trace_id}/run \
   -d '{"messages": [{"role": "user", "content": "继续深入分析"}]}'
 
 # 回溯:从 sequence 5 处截断,插入新消息重新执行
-curl -X POST http://localhost:8000/api/traces/{trace_id}/rewind \
+curl -X POST http://localhost:8000/api/traces/{trace_id}/run \
   -d '{"insert_after": 5, "messages": [{"role": "user", "content": "换一个方案"}]}'
+
+# 重新生成:回溯到 sequence 5,不插入新消息,直接重跑
+curl -X POST http://localhost:8000/api/traces/{trace_id}/run \
+  -d '{"insert_after": 5, "messages": []}'
+
+# 停止
+curl -X POST http://localhost:8000/api/traces/{trace_id}/stop
+
+# 反思:追加反思 prompt 运行,结果追加到 experiences 文件
+curl -X POST http://localhost:8000/api/traces/{trace_id}/reflect \
+  -d '{"focus": "为什么第三步选择了错误的方案"}'
 ```
 
-响应立即返回 `{"trace_id": "...", "mode": "rewind", "status": "started"}`,通过 `WS /api/traces/{trace_id}/watch` 监听实时事件。
+响应立即返回 `{"trace_id": "...", "status": "started"}`,通过 `WS /api/traces/{trace_id}/watch` 监听实时事件。
+
+**实现**:`agent/trace/run_api.py`
+
+#### 经验端点
+
+| 方法 | 路径 | 说明 |
+|------|------|------|
+| GET  | `/api/experiences` | 读取经验文件内容 |
 
 **实现**:`agent/trace/run_api.py`
 
@@ -301,7 +350,7 @@ class Trace:
     parent_goal_id: Optional[str] = None     # 哪个 Goal 启动的
 
     # 状态
-    status: Literal["running", "completed", "failed"] = "running"
+    status: Literal["running", "completed", "failed", "stopped"] = "running"
 
     # 统计
     total_messages: int = 0
@@ -312,7 +361,8 @@ class Trace:
     total_duration_ms: int = 0
 
     # 进度追踪
-    last_sequence: int = 0                   # 最新 message 的 sequence
+    last_sequence: int = 0                   # 最新 message 的 sequence(全局递增,不复用)
+    head_sequence: int = 0                   # 当前主路径的头节点 sequence(用于 build_llm_messages)
     last_event_id: int = 0                   # 最新事件 ID(用于 WS 续传)
 
     # 配置
@@ -365,10 +415,10 @@ class Goal:
 
 **Goal 类型**:
 - `normal` - 普通目标,由 Agent 直接执行
-- `agent_call` - 通过 subagent 工具创建的目标,会启动 Sub-Trace
+- `agent_call` - 通过 `agent`/`evaluate` 工具创建的目标,会启动 Sub-Trace
 
 **agent_call 类型的 Goal**:
-- 调用 subagent 工具时自动设置
+- 调用 `agent`/`evaluate` 工具时自动设置
 - `agent_call_mode` 记录使用的模式(explore/delegate/evaluate)
 - `sub_trace_ids` 记录创建的所有 Sub-Trace ID
 - 状态转换:pending → in_progress(Sub-Trace 启动)→ completed(Sub-Trace 完成)
@@ -386,7 +436,7 @@ class Goal:
 
 ### Message(执行消息)
 
-对应 LLM API 的消息,每条 Message 关联一个 Goal。
+对应 LLM API 的消息,每条 Message 关联一个 Goal。消息通过 `parent_sequence` 形成树结构。
 
 ```python
 @dataclass
@@ -394,8 +444,8 @@ class Message:
     message_id: str                          # 格式:{trace_id}-{sequence:04d}
     trace_id: str
     role: Literal["system", "user", "assistant", "tool"]
-    sequence: int                            # 全局顺序
-    status: Literal["active", "abandoned"] = "active"  # 回溯时后续消息标记为 abandoned
+    sequence: int                            # 全局顺序(递增,不复用)
+    parent_sequence: Optional[int] = None    # 父消息的 sequence(构成消息树)
     goal_id: Optional[str] = None            # 关联的 Goal ID(初始消息为 None,系统会按需自动创建 root goal 兜底)
     description: str = ""                    # 系统自动生成的摘要
     tool_call_id: Optional[str] = None
@@ -411,15 +461,29 @@ class Message:
     finish_reason: Optional[str] = None
 
     created_at: datetime
-    abandoned_at: Optional[datetime] = None  # 回溯标记时间
+
+    # [已弃用] 由 parent_sequence 树结构替代
+    status: Literal["active", "abandoned"] = "active"
+    abandoned_at: Optional[datetime] = None
 ```
 
+**消息树(Message Tree)**:
+
+消息通过 `parent_sequence` 形成树。主路径 = 从 `trace.head_sequence` 沿 parent chain 回溯到 root。
+
+```
+正常对话:1 → 2 → 3 → 4 → 5       (每条的 parent 指向前一条)
+Rewind 到 3:3 → 6(parent=3) → 7   (新主路径,4-5 自动脱离)
+压缩 1-3:   8(summary, parent=None) → 6 → 7  (summary 跳过被压缩的消息)
+反思分支:   5 → 9(reflect, parent=5) → 10     (侧枝,不在主路径上)
+```
+
+`build_llm_messages` = 从 head 沿 parent_sequence 链回溯到 root,反转后返回。
+
 Message 提供格式转换方法:
 - `to_llm_dict()` → OpenAI 格式 Dict(用于 LLM 调用)
 - `from_llm_dict(d, trace_id, sequence, goal_id)` → 从 OpenAI 格式创建 Message
 
-加载 messages 时,默认只返回 `status="active"` 的消息。
-
 **实现**:`agent/trace/models.py`
 
 ---
@@ -468,38 +532,67 @@ AGENT_PRESETS = {
 
 ## 子 Trace 机制
 
-通过 `subagent` 工具创建子 Agent 执行任务,支持三种模式。子 Agent 通过 `name` 参数命名,便于后续引用和续跑
+通过 `agent` 工具创建子 Agent 执行任务。`task` 参数为字符串时为单任务(delegate),为列表时并行执行多任务(explore)。支持通过 `messages` 参数预置消息,通过 `continue_from` 参数续跑已有 Sub-Trace
 
-`subagent` 工具负责创建 Sub-Trace 和初始化 GoalTree(因为需要设置自定义 context 元数据和命名规则),创建完成后将 `trace_id` 传给 `RunConfig`,由 Runner 接管后续执行。工具同时维护父 Trace 的 `context["collaborators"]` 列表。
+`agent` 工具负责创建 Sub-Trace 和初始化 GoalTree(因为需要设置自定义 context 元数据和命名规则),创建完成后将 `trace_id` 传给 `RunConfig`,由 Runner 接管后续执行。工具同时维护父 Trace 的 `context["collaborators"]` 列表。
 
-### explore 模式
+### agent 工具
 
-并行探索多个分支,适合技术选型、方案对比等场景。
+```python
+@tool(description="创建 Agent 执行任务")
+async def agent(
+    task: Union[str, List[str]],
+    messages: Optional[Union[Messages, List[Messages]]] = None,
+    continue_from: Optional[str] = None,
+    context: Optional[dict] = None,
+) -> Dict[str, Any]:
+```
 
-- 使用 `asyncio.gather()` 并行执行所有分支
-- 每个分支创建独立的 Sub-Trace
+**单任务(delegate)**:`task: str`
+- 创建单个 Sub-Trace
+- 完整工具权限(除 agent/evaluate 外,防止递归)
+- 支持 `continue_from` 续跑已有 Sub-Trace
+- 支持 `messages` 预置上下文消息
+
+**多任务(explore)**:`task: List[str]`
+- 使用 `asyncio.gather()` 并行执行所有任务
+- 每个任务创建独立的 Sub-Trace
 - 只读工具权限(read_file, grep_content, glob_files, goal)
+- `messages` 支持 1D(共享)或 2D(per-agent)
+- 不支持 `continue_from`
 - 汇总所有分支结果返回
 
-### delegate 模式
+### evaluate 工具
 
-委派单个任务给子 Agent 执行,适合代码分析、文档生成等场景。
+```python
+@tool(description="评估目标执行结果是否满足要求")
+async def evaluate(
+    messages: Optional[Messages] = None,
+    target_goal_id: Optional[str] = None,
+    continue_from: Optional[str] = None,
+    context: Optional[dict] = None,
+) -> Dict[str, Any]:
+```
 
-- 创建单个 Sub-Trace
-- 完整工具权限(除 subagent 外,防止递归)
-- 支持通过 `name` 续跑已有子 Agent:`subagent(name="coder", task="继续")`
+- 代码自动从 GoalTree 注入目标描述(无需 criteria 参数)
+- 模型把执行结果和上下文放在 `messages` 中
+- `target_goal_id` 默认为当前 goal_id
+- 只读工具权限
+- 返回评估结论和改进建议
 
-### evaluate 模式
+### 消息类型别名
 
-评估指定 Goal 的执行结果,提供质量评估和改进建议。
+定义在 `agent/trace/models.py`,用于工具参数和 runner/LLM API 接口:
 
-- 访问目标 Goal 的执行结果
-- 完整工具权限
-- 返回评估结论和建议
+```python
+ChatMessage = Dict[str, Any]                          # 单条 OpenAI 格式消息
+Messages = List[ChatMessage]                          # 消息列表
+MessageContent = Union[str, List[Dict[str, str]]]     # content 字段(文本或多模态)
+```
 
 **实现位置**:`agent/tools/builtin/subagent.py`
 
-**详细文档**:[工具系统 - Subagent 工具](./tools.md#subagent-工具)
+**详细文档**:[工具系统 - Agent/Evaluate 工具](./tools.md#agent-工具)
 
 ### ask_human 工具
 
@@ -548,7 +641,7 @@ AGENT_PRESETS = {
 ### 维护
 
 各工具负责更新 collaborators 列表(通过 `context["store"]` 写入 trace.context):
-- `subagent` 工具:创建/续跑子 Agent 时更新
+- `agent` 工具:创建/续跑子 Agent 时更新
 - `feishu` 工具:发送消息/收到回复时更新
 - Runner 只负责读取和注入
 
@@ -583,7 +676,7 @@ async def my_tool(arg: str, ctx: ToolContext) -> ToolResult:
 | 目录 | 工具 | 说明 |
 |-----|------|------|
 | `trace/` | goal | Agent 内部计划管理 |
-| `builtin/` | subagent | 子 Trace 创建(explore/delegate/evaluate) |
+| `builtin/` | agent, evaluate | 子 Agent 创建与评估 |
 | `builtin/file/` | read, write, edit, glob, grep | 文件操作 |
 | `builtin/browser/` | browser actions | 浏览器自动化 |
 | `builtin/` | bash, sandbox, search, webfetch, skill, ask_human | 其他工具 |
@@ -633,62 +726,94 @@ agent/memory/skills/
 
 从执行历史中提取的经验规则,用于指导未来任务。
 
-### 数据结构
+### 存储
 
-```python
-@dataclass
-class Experience:
-    id: str
-    scope: str           # "agent:executor" 或 "user:123"
-    condition: str       # "当遇到数据库连接超时"
-    rule: str            # "增加重试次数到5次"
-    evidence: Dict       # 证据(trace_ids)
-    confidence: float
-    usage_count: int
-    success_rate: float
-    embedding: List[float]  # 向量,用于检索
+经验以 Markdown 文件存储(默认 `./cache/experiences.md`),人类可读、可编辑、可版本控制。
+
+文件格式:
+
+```markdown
+---
+
+## trace-id-xxx (2026-02-12 15:30)
+
+- 当遇到 X 情况时,应该先 Y 再 Z
+- 分析代码前应先读取项目结构
+
+---
+
+## trace-id-yyy (2026-02-12 16:00)
+
+- 执行 bash 命令前应检查路径是否存在
 ```
 
-### 检索和注入
+### 反思机制(Reflect)
 
-```python
-# 1. 检索相关 Experiences
-experiences = await db.query(
-    "SELECT * FROM experiences WHERE scope = $1 ORDER BY embedding <-> $2 LIMIT 10",
-    f"agent:{agent_type}", embed(task)
-)
+通过 `POST /api/traces/{id}/reflect` 触发:
+
+1. 在 trace 末尾追加一条 user message(内置反思 prompt),**作为侧枝**(parent_sequence 分叉,不在主路径上)
+2. Agent 回顾整个执行过程,生成经验总结
+3. 将 assistant 的反思内容追加到 `./cache/experiences.md`
+
+反思消息不影响主对话路径。正常 continue/rewind 时看不到反思消息。
+
+反思 prompt 引导 Agent 关注:人工干预说明做错了什么、走了哪些弯路、哪些决策是对的。
+
+**实现**:`agent/trace/run_api.py:reflect_trace`
 
-# 2. 注入到 system prompt
-system_prompt += "\n# Learned Experiences\n" + format_experiences(experiences)
+### 注入
+
+新建 Trace 时,Runner 自动读取 `./cache/experiences.md` 并追加到第一条 user message 末尾:
+
+```python
+# _build_history 中(仅新建模式):
+if not config.trace_id:
+    experiences_text = self._load_experiences()  # 读取文件
+    if experiences_text:
+        first_user_msg["content"] += f"\n\n## 参考经验\n\n{experiences_text}"
 ```
 
-**存储**:PostgreSQL + pgvector
+后续 continue/rewind 不重新注入(经验已在初始消息中)。
 
-**实现**:`agent/memory/stores.py:ExperienceStore`
+**实现**:`agent/core/runner.py:AgentRunner._build_history`
 
 ---
 
 ## Context 压缩
 
-### 压缩时机
+### 两级压缩策略
 
-Goal 完成(done)或放弃(abandon)时,将详细 Messages 替换为 Summary Message。
+#### Level 1:GoalTree 过滤(确定性,零成本)
 
-### 压缩策略
+每轮 agent loop 构建 `llm_messages` 时自动执行:
+- 始终保留:system prompt、第一条 user message(含 GoalTree 精简视图)、当前 focus goal 的消息
+- 跳过 completed/abandoned goals 的消息(信息已在 GoalTree summary 中)
+- 通过 Message Tree 的 parent_sequence 实现跳过
 
-```
-Goal 状态变化
-
-收集该 Goal 下的所有 Messages
-
-生成 Summary(由 LLM 提供)
-
-替换原始 Messages 为单条 Summary Message
-
-更新统计信息
-```
+大多数情况下 Level 1 足够。
+
+#### Level 2:LLM 总结(仅在 Level 1 后仍超限时触发)
+
+触发条件:Level 1 之后 token 数仍超过阈值(默认 `max_tokens × 0.8`)。
+
+流程:
+1. **经验提取**:先在消息列表末尾追加反思 prompt → 主模型回复 → 追加到 `./cache/experiences.md`。反思消息为侧枝(parent_sequence 分叉,不在主路径上)
+2. **压缩**:在消息列表末尾追加压缩 prompt(含 GoalTree 完整视图) → 主模型回复 → summary 存为新消息,其 `parent_sequence` 跳过被压缩的范围
+
+### GoalTree 双视图
 
-**实现**:`agent/trace/compaction.py`
+`to_prompt()` 支持两种模式:
+- `include_summary=False`(默认):精简视图,用于日常周期性注入
+- `include_summary=True`:含所有 completed goals 的 summary,用于 Level 2 压缩时提供上下文
+
+### 压缩存储
+
+- 原始消息永远保留在 `messages/`
+- 压缩 summary 作为普通 Message 存储
+- 通过 `parent_sequence` 树结构实现跳过,无需 compression events 或 skip list
+- Rewind 到压缩区域内时,summary 脱离主路径,原始消息自动恢复
+
+**实现**:`agent/trace/compaction.py`, `agent/trace/goal_models.py`
 
 **详细文档**:[Context 管理](./context-management.md)
 
@@ -703,11 +828,13 @@ class TraceStore(Protocol):
     async def update_trace(self, trace_id: str, **updates) -> None: ...
     async def add_message(self, message: Message) -> None: ...
     async def get_trace_messages(self, trace_id: str, include_abandoned: bool = False) -> List[Message]: ...
+    async def get_main_path_messages(self, trace_id: str, head_sequence: int) -> List[Message]: ...
     async def get_messages_by_goal(self, trace_id: str, goal_id: str) -> List[Message]: ...
-    async def abandon_messages_after(self, trace_id: str, cutoff_sequence: int) -> List[str]: ...
     async def append_event(self, trace_id: str, event_type: str, payload: Dict) -> int: ...
 ```
 
+`get_main_path_messages` 从 `head_sequence` 沿 `parent_sequence` 链回溯,返回主路径上的有序消息列表。
+
 **实现**:
 - 协议定义:`agent/trace/protocols.py`
 - 文件存储:`agent/trace/store.py:FileSystemTraceStore`

+ 326 - 0
docs/decisions.md

@@ -773,4 +773,330 @@ Agent(含 sub-agent)有时不创建 goal 就直接执行工具调用,导
 
 **实现**:`agent/core/runner.py:AgentRunner._build_context_injection`
 
+---
+
+## 18. 统一 Message 类型 + 重构 Agent/Evaluate 工具
+
+**日期**: 2026-02-12
+
+### 问题
+
+原 `subagent` 工具存在几个问题:
+1. **概念冗余**:单一工具通过 `mode` 参数区分三种行为(explore/delegate/evaluate),参数组合复杂,模型容易用错
+2. **evaluate 的 criteria 参数多余**:模型既要在 `evaluation_input` 里放结果,又要在 `criteria` 里放标准,信息分散
+3. **缺少消息线格式类型**:工具参数和 runner 接口使用裸 `Dict`/`List[Dict]`/`Any`,无语义类型
+4. **SchemaGenerator 不支持 `Literal`/`Union`**:无法为新工具签名生成正确的 JSON Schema
+
+### 决策
+
+#### 18a. 拆分 `subagent` → `agent` + `evaluate` 两个独立工具
+
+- `agent(task, messages, continue_from)` — 创建 Agent 执行任务
+  - `task: str` → 单任务(delegate),全量工具(排除 agent/evaluate)
+  - `task: List[str]` → 多任务并行(explore),只读工具
+  - 通过 `isinstance(task, str)` 判断,无需 `mode` 参数
+- `evaluate(messages, target_goal_id, continue_from)` — 评估目标执行结果
+  - 代码自动从 GoalTree 注入目标描述,无 `criteria` 参数
+  - 模型把所有上下文放在 `messages` 中
+
+内部统一为 `_run_agents()` 函数,`single = len(tasks)==1` 区分 delegate/explore 行为。
+
+#### 18b. 增加消息线格式类型别名(`agent/trace/models.py`)
+
+```python
+ChatMessage = Dict[str, Any]                          # 单条 OpenAI 格式消息
+Messages = List[ChatMessage]                          # 消息列表
+MessageContent = Union[str, List[Dict[str, str]]]     # content 字段(文本或多模态)
+```
+
+放在 `models.py` 而非新文件——与存储层 `Message` dataclass 描述同一概念的不同层次。
+
+#### 18c. SchemaGenerator 支持 `Literal`/`Union`
+
+`_type_to_schema()` 新增:
+- `Literal["a", "b"]` → `{"type": "string", "enum": ["a", "b"]}`
+- `Union[str, List[str]]` → `{"oneOf": [...]}`
+- `Any` → `{}`(无约束)
+
+### 理由
+
+1. **最少概念**:两个单职责工具比一个多 mode 工具更易理解和使用
+2. **最少参数**:evaluate 无需 criteria(GoalTree 已有目标描述),agent 的 messages 支持 1D/2D 避免额外参数
+3. **模型/代码职责分离**:模型只管给 messages,代码自动注入 goal 上下文
+4. **类型安全**:`Union[str, List[str]]` 在 Schema 中生成 `oneOf`,LLM 能正确理解参数格式
+
+### 变更范围
+
+- `agent/trace/models.py` — 类型别名
+- `agent/tools/schema.py` — `Literal`/`Union` 支持
+- `agent/tools/builtin/subagent.py` — `agent` + `evaluate` 工具,`_run_agents()` 统一函数
+- `agent/tools/builtin/__init__.py`, `agent/core/runner.py` — 注册表更新
+- `agent/tools/builtin/feishu/chat.py`, `agent/tools/builtin/browser/baseClass.py` — 类型注解修正
+- `agent/__init__.py` — 导出新类型
+
+**实现**:`agent/tools/builtin/subagent.py`, `agent/trace/models.py`, `agent/tools/schema.py`
+
+---
+
+## 19. 前端控制 API:统一 run + stop + reflect
+
+**日期**: 2026-02-12
+
+### 问题
+
+需要从前端控制 Agent 的创建、启动(含从任意位置重放)、插入用户消息、打断运行。原有 API 将 `continue` 和 `rewind` 拆分为两个独立端点,但它们本质上是同一操作(在某个位置运行),仅 `insert_after` 是否为 null 的区别。此外,缺少停止和反思机制。
+
+### 决策
+
+#### 19a. 合并 `continue` + `rewind` → 统一 `run` 端点
+
+```
+POST /api/traces/{id}/run
+{
+  "messages": [...],
+  "insert_after": null | int
+}
+```
+
+- `insert_after: null` → 从末尾续跑(原 continue)
+- `insert_after: N` → 回溯到 sequence N 后运行(原 rewind)
+- `messages: []` + `insert_after: N` → 重新生成(从 N 处重跑,不插入新消息)
+
+删除 `POST /{id}/continue` 和 `POST /{id}/rewind` 两个端点。
+
+#### 19b. 新增 `stop` 端点 + Runner 取消机制
+
+```
+POST /api/traces/{id}/stop
+```
+
+Runner 内部维护 `_cancel_events: Dict[str, asyncio.Event]`,agent loop 在每次 LLM 调用前检查。`stop()` 方法设置事件,loop 退出,Trace 状态置为 `stopped`。
+
+Trace.status 新增 `"stopped"` 值。
+
+#### 19c. 新增 `reflect` 端点 — 追加反思 prompt 运行
+
+```
+POST /api/traces/{id}/reflect
+{
+  "focus": "optional, 反思重点"
+}
+```
+
+在 trace 末尾追加一条内置反思 prompt 的 user message,以续跑方式运行 agent。Agent 回顾整个执行过程后生成经验总结,结果自动追加到 `./cache/experiences.md`。
+
+不单独调用 LLM、不解析结构化数据——反思就是一次普通的 agent 运行,只是 user message 是预置的反思 prompt。
+
+#### 19d. 经验存储简化为文件
+
+经验存储从 MemoryStore(内存/数据库)简化为 `./cache/experiences.md` 文件:
+- 人类可读可编辑(Markdown)
+- 可版本控制(git)
+- 新建 Trace 时由 Runner 读取并注入到第一条 user message 末尾
+- `GET /api/experiences` 直接读取文件内容返回
+
+### 最终 API 设计
+
+```
+控制类(3 个端点,替代原来的 3 个):
+  POST /api/traces              → 创建并运行(不变)
+  POST /api/traces/{id}/run     → 运行(合并 continue + rewind)
+  POST /api/traces/{id}/stop    → 停止(新增)
+
+学习类(2 个端点,全新):
+  POST /api/traces/{id}/reflect → 追加反思 prompt 运行,结果追加到 experiences 文件
+  GET  /api/experiences         → 读取经验文件内容
+```
+
+### 理由
+
+1. **API 更少**:`continue` 和 `rewind` 合并后端点总数不增反减(3 → 3 控制 + 2 学习)
+2. **概念统一**:`run` 就是"在某个位置运行",`insert_after` 自然区分续跑和回溯,与 `RunConfig` 设计一致
+3. **前端简化**:`sendMessage()` 直接透传 `branchPoint` 作为 `insert_after`,无需判断调哪个 API
+4. **停止机制**:asyncio.Event 轻量可靠,每次 LLM 调用前检查,不会在工具执行中途被打断
+5. **反思闭环**:Run → Observe → Intervene → Reflect → Run,形成完整的学习循环
+6. **经验存储极简**:一个 Markdown 文件,不需要数据库,人类可读可编辑可版本控制
+
+### 变更范围
+
+- `agent/trace/models.py` — Trace.status 增加 `"stopped"`
+- `agent/core/runner.py` — `_cancel_events` 字典,`stop()` 方法,agent loop 检查取消;`experiences_path` 参数,`_load_experiences()` 方法,新建时注入经验到 user message
+- `agent/trace/run_api.py` — 合并 `continue`/`rewind` 为 `run`,新增 `stop`/`reflect` 端点,`GET /api/experiences` 读取文件
+- `api_server.py` — 注入 experiences_router
+
+**实现**:`agent/trace/run_api.py`, `agent/core/runner.py`, `agent/trace/models.py`
+
+---
+
+## 20. Message Tree:用 parent_sequence 构建消息树
+
+**日期**: 2026-02-13
+
+### 问题
+
+原有的消息管理使用线性列表 + `status=abandoned` 标记,导致:
+1. 压缩需要独立的 compression events + skip list 来标记跳过哪些消息
+2. 反思消息掺入主对话列表,需要额外过滤
+3. Rewind 需要标记 abandoned + 维护 GoalTree 快照
+4. `build_llm_messages` 逻辑复杂(过滤 abandoned + 应用 skip + 排除反思)
+
+### 决策
+
+**选择:Message 新增 `parent_sequence` 字段,消息形成树结构**
+
+核心规则:**`build_llm_messages` = 从 head 沿 parent_sequence 链回溯到 root**。
+
+**压缩**:summary 的 `parent_sequence` 指向压缩范围起点的前一条消息,旧消息自然脱离主路径。
+
+```
+压缩前主路径:1 → 2 → 3 → ... → 41 → 42 → ...
+压缩后:
+  1 → 2 → 3 → ... → 41 (旧路径,脱离主路径)
+       ↓
+  2 → 45(summary, parent=2) → 46 → ...  (新主路径)
+```
+
+**反思**:反思消息从当前消息分出侧枝,不汇入主路径,天然隔离。
+
+**Rewind**:新消息的 `parent_sequence` 指向 rewind 点,旧路径自动变成死胡同。
+
+```
+Rewind 到 seq 20:
+  主路径原本:1 → ... → 20 → 21 → ... → 50
+  Rewind 后:20 → 51(新, parent=20) → 52 → ...
+  新主路径:1 → ... → 20 → 51 → 52 → ...
+  旧消息 21-50 脱离主路径,无需标记 abandoned
+```
+
+**build_llm_messages**:
+
+```python
+def build_llm_messages(head_sequence, messages_by_seq):
+    path = []
+    seq = head_sequence
+    while seq is not None:
+        msg = messages_by_seq[seq]
+        path.append(msg)
+        seq = msg.parent_sequence
+    path.reverse()
+    return [m.to_llm_dict() for m in path]
+```
+
+### 不再需要的机制
+
+- ~~Message.status (abandoned)~~ → 树结构替代
+- ~~Message.abandoned_at~~ → 树结构替代
+- ~~compression events in events.jsonl~~ → summary.parent_sequence 替代
+- ~~abandon_messages_after()~~ → 新消息设 parent_sequence 即可
+- ~~skip list / 过滤逻辑~~ → parent chain 遍历替代
+
+### 变更范围
+
+- `agent/trace/models.py` — Message 新增 `parent_sequence`,`status`/`abandoned_at` 保留但标记弃用
+- `agent/trace/store.py` — 新增 `get_main_path_messages()`,Trace 追踪 `head_sequence`
+- `agent/trace/protocols.py` — 新增 `get_main_path_messages()` 接口
+- `agent/core/runner.py` — agent loop 中设置 parent_sequence,rewind 使用新模型
+
+**实现**:`agent/trace/models.py`, `agent/trace/store.py`, `agent/core/runner.py`
+
+---
+
+## 21. GoalTree Rewind:快照 + 重建
+
+**日期**: 2026-02-13
+
+### 问题
+
+Message Tree 解决了消息层面的分支问题,但 GoalTree 是独立的状态,不适合从消息树派生(压缩会使目标创建消息脱离主路径,但目标应该保留)。
+
+### 决策
+
+**选择:GoalTree 保持独立管理,rewind 时快照 + 重建**
+
+**Rewind 流程**:
+1. 把当前完整 GoalTree 快照存入 `events.jsonl`(rewind 事件的 `goal_tree_snapshot` 字段)
+2. 重建干净的 GoalTree:保留 rewind 点之前已 completed 的 goals,丢弃其余
+3. 清空 `current_id`,让 Agent 重新选择焦点
+
+**快照用途**:仅用于非运行态下查看历史版本,运行时和前端展示只使用当前干净的 goal.json。
+
+**Agent 自主废弃**:Agent 调用 `goal(abandon=...)` 时,abandoned goals 正常保留在 GoalTree 中,前端逐一收到事件,可以展示废弃的分支。
+
+**用户 Rewind**:不展示废弃的分支。GoalTree 被清理为只包含存活 goals,用户可通过"历史版本"页面查看快照。
+
+### 理由
+
+1. GoalTree 和 Messages 的生命周期不同——压缩可以移除消息但不能移除目标
+2. 快照 + 重建逻辑简单可靠,不需要 event sourcing
+3. 干净的 goal.json 让运行时和前端展示始终一致
+
+### 变更范围
+
+- `agent/core/runner.py:_rewind()` — 快照旧树到事件,重建干净树
+- `agent/trace/store.py` — rewind 事件增加 `goal_tree_snapshot`
+
+**实现**:`agent/core/runner.py`
+
+---
+
+## 22. Context 压缩:GoalTree 双视图 + 两级压缩
+
+**日期**: 2026-02-13
+
+### 问题
+
+长时间运行的 Agent 会累积大量 messages,超出 LLM 上下文窗口。需要在保留关键信息的前提下压缩历史。
+
+### 决策
+
+**选择:Level 1 确定性过滤 + Level 2 LLM 总结,压缩不修改存储**
+
+#### 22a. GoalTree 双视图
+
+`to_prompt()` 支持两种模式:
+- `include_summary=False`(默认):精简视图,用于日常周期性注入
+- `include_summary=True`:含所有 completed goals 的 summary,用于压缩时提供上下文
+
+压缩视图追加到第一条 user message 末尾(构建 `llm_messages` 时的内存操作,不修改存储)。
+
+#### 22b. Level 1:GoalTree 过滤(确定性,零成本)
+
+每轮 agent loop 构建 `llm_messages` 时:
+- 始终保留:system prompt、第一条 user message、focus goal 的消息
+- 跳过 completed/abandoned goals 的消息(信息已在 GoalTree summary 中)
+- 通过 Message Tree 的 parent_sequence 实现(压缩 summary 的 parent 跳过被压缩的消息)
+
+大多数情况下 Level 1 足够。
+
+#### 22c. Level 2:LLM 总结(仅在 Level 1 后仍超限时触发)
+
+触发条件:Level 1 之后 token 数仍超过阈值(默认 max_tokens × 0.8)。
+
+做法:在当前消息列表末尾追加压缩 prompt → 主模型回复 → summary 作为新消息存入 messages/,其 parent_sequence 跳过被压缩的范围。
+
+不使用 utility_llm,就用主模型。压缩和反思都是"在消息列表末尾追加 prompt,主模型回复"。
+
+#### 22d. 压缩前经验提取
+
+触发 Level 2 压缩之前,先在消息列表末尾追加反思 prompt → 主模型回复 → 结果追加到 `./cache/experiences.md`。反思消息为侧枝(parent_sequence 分叉,不在主路径上)。
+
+#### 22e. 压缩不修改存储
+
+- `messages/` 始终保留原始消息
+- 压缩结果(summary)作为新消息存入 messages/
+- 通过 parent_sequence 树结构实现"跳过",不需要 compression events 或 skip list
+- Rewind 到压缩区域内时,原始消息自动恢复到主路径(summary 脱离新主路径)
+
+#### 22f. 多次压缩的恢复
+
+每次压缩的 summary 消息通过 parent_sequence 跳过被压缩的范围。Rewind 时,如果 rewind 点在某次压缩之后,该压缩的 summary 仍在主路径上,压缩保持生效;如果 rewind 点在压缩之前,summary 脱离新主路径,原始消息自动恢复。无需特殊恢复逻辑。
+
+### 变更范围
+
+- `agent/trace/goal_models.py` — `to_prompt(include_summary)` 双视图
+- `agent/trace/compaction.py` — 压缩触发逻辑、Level 1/Level 2 实现
+- `agent/core/runner.py` — agent loop 中集成压缩
+
+**实现**:`agent/trace/compaction.py`, `agent/trace/goal_models.py`, `agent/core/runner.py`
+
 ---

+ 53 - 20
docs/tools.md

@@ -712,42 +712,75 @@ print(f"Success rate: {stats['success_rate']:.1%}")
 | `bash_command` | 执行 shell 命令 | opencode bash.ts |
 | `glob_files` | 文件模式匹配 | opencode glob.ts |
 | `grep_content` | 内容搜索(正则表达式) | opencode grep.ts |
-| `subagent` | 统一子 Agent 调用(evaluate/delegate/explore) | main 自研 |
+| `agent` | 创建 Agent 执行任务(单任务 delegate / 多任务并行 explore) | 自研 |
+| `evaluate` | 评估目标执行结果是否满足要求 | 自研 |
 
-### Subagent 工具
+### Agent 工具
 
-创建子 Agent 执行任务,支持三种模式:
+创建子 Agent 执行任务。通过 `task` 参数的类型自动区分模式:
 
-| 模式 | 用途 | 并行执行 | 工具权限 |
-|------|------|---------|---------|
-| **explore** | 并行探索多个方案 | ✅ | 只读(read_file, grep_content, glob_files, goal) |
-| **delegate** | 委托单个任务 | ❌ | 完整(除 subagent 外) |
-| **evaluate** | 评估任务结果 | ❌ | 完整(除 subagent 外) |
+| task 类型 | 模式 | 并行执行 | 工具权限 |
+|-----------|------|---------|---------|
+| `str`(单任务) | delegate | ❌ | 完整(除 agent/evaluate 外) |
+| `List[str]`(多任务) | explore | ✅ | 只读(read_file, grep_content, glob_files, goal) |
 
-**Explore 模式**:
+```python
+@tool(description="创建 Agent 执行任务")
+async def agent(
+    task: Union[str, List[str]],
+    messages: Optional[Union[Messages, List[Messages]]] = None,
+    continue_from: Optional[str] = None,
+    context: Optional[dict] = None,
+) -> Dict[str, Any]:
+```
+
+**messages 参数**:
+- `None`:无预置消息
+- `Messages`(1D 列表):所有 agent 共享
+- `List[Messages]`(2D 列表):per-agent 独立消息
+
+运行时判断:`messages[0]` 是 dict → 1D 共享;是 list → 2D per-agent。
+
+**单任务(delegate)**:
+- 适合委托专门任务(如代码分析、文档生成)
+- 完整工具权限,可执行复杂操作
+- 支持 `continue_from` 参数续跑已有 Sub-Trace
+
+**多任务(explore)**:
 - 适合对比多个方案(如技术选型、架构设计)
 - 使用 `asyncio.gather()` 并行执行,显著提升效率
-- 每个分支创建独立的 Sub-Trace,互不干扰
+- 每个任务创建独立的 Sub-Trace,互不干扰
 - 只读权限(文件系统层面),可使用 goal 工具管理计划
+- 不支持 `continue_from`
 
-**Delegate 模式**:
-- 适合委托专门任务(如代码分析、文档生成)
-- 完整工具权限,可执行复杂操作
-- 支持 `continue_from` 参数继续执行
+### Evaluate 工具
+
+评估指定 Goal 的执行结果,提供质量评估和改进建议。
+
+```python
+@tool(description="评估目标执行结果是否满足要求")
+async def evaluate(
+    messages: Optional[Messages] = None,
+    target_goal_id: Optional[str] = None,
+    continue_from: Optional[str] = None,
+    context: Optional[dict] = None,
+) -> Dict[str, Any]:
+```
 
-**Evaluate 模式**:
-- 适合评估任务完成质量
-- 可访问目标 Goal 的执行结果
-- 提供评估结论和改进建议
+- 无 `criteria` 参数——代码自动从 GoalTree 注入目标描述
+- 模型把执行结果和上下文放在 `messages` 中
+- `target_goal_id` 默认为当前 `goal_id`
+- 只读工具权限
+- 返回评估结论和改进建议
 
 **Sub-Trace 结构**:
-- 每个 subagent 调用创建独立的 Sub-Trace
+- 每个 `agent`/`evaluate` 调用创建独立的 Sub-Trace
 - Sub-Trace ID 格式:`{parent_id}@{mode}-{序号}-{timestamp}-001`
 - 通过 `parent_trace_id` 和 `parent_goal_id` 建立父子关系
 - Sub-Trace 信息存储在独立的 trace 目录中
 
 **Goal 集成**:
-- Subagent 调用会将 Goal 标记为 `type: "agent_call"`
+- `agent`/`evaluate` 调用会将 Goal 标记为 `type: "agent_call"`
 - `agent_call_mode` 记录使用的模式
 - `sub_trace_ids` 记录所有创建的 Sub-Trace
 - Goal 完成后,`summary` 包含格式化的汇总结果

+ 108 - 18
docs/trace-api.md

@@ -6,16 +6,20 @@
 
 ## 架构概览
 
-**职责定位**:`agent/execution` 模块负责所有 Trace/Message 相关功能
+**职责定位**:`agent/trace` 模块负责所有 Trace/Message 相关功能
 
 ```
-agent/execution/
+agent/trace/
 ├── models.py          # Trace/Message 数据模型
+├── goal_models.py     # Goal/GoalTree 数据模型
 ├── protocols.py       # TraceStore 存储接口
-├── fs_store.py        # 文件系统存储实现
+├── store.py           # 文件系统存储实现
 ├── trace_id.py        # Trace ID 生成工具
-├── api.py             # RESTful API
-└── websocket.py       # WebSocket 实时推送
+├── api.py             # RESTful 查询 API
+├── run_api.py         # 控制 API(run/stop/reflect)
+├── websocket.py       # WebSocket 实时推送
+├── goal_tool.py       # goal 工具(计划管理)
+└── compaction.py      # Context 压缩
 ```
 
 **设计原则**:
@@ -54,22 +58,23 @@ trace.task            # 任务描述
 trace.parent_trace_id # 父 Trace ID(Sub-Trace 专用)
 trace.parent_goal_id  # 触发的父 Goal ID(Sub-Trace 专用)
 trace.agent_type      # Agent 类型:explore, delegate 等
-trace.status          # "running" | "completed" | "failed"
+trace.status          # "running" | "completed" | "failed" | "stopped"
 trace.total_messages  # Message 总数
 trace.total_tokens    # Token 总数
 trace.total_cost      # 总成本
 trace.current_goal_id # 当前焦点 goal
+trace.head_sequence   # 当前主路径头节点 sequence(用于 build_llm_messages)
 ```
 
 **Trace ID 格式**:
 - **主 Trace**:标准 UUID,例如 `2f8d3a1c-4b6e-4f9a-8c2d-1e5b7a9f3c4d`
 - **Sub-Trace**:`{parent_uuid}@{mode}-{timestamp}-{seq}`,例如 `2f8d3a1c...@explore-20260204220012-001`
 
-**实现**:`agent/execution/models.py:Trace`
+**实现**:`agent/trace/models.py:Trace`
 
 ### Message - 执行消息
 
-对应 LLM API 消息,加上元数据。通过 `goal_id` 关联 GoalTree 中的目标。
+对应 LLM API 消息,加上元数据。通过 `goal_id` 关联 GoalTree 中的目标。通过 `parent_sequence` 形成消息树。
 
 ```python
 # assistant 消息(模型返回,可能含 text + tool_calls)
@@ -78,6 +83,7 @@ assistant_msg = Message.create(
     role="assistant",
     goal_id="3",                    # Goal ID(Trace 内部自增)
     content={"text": "...", "tool_calls": [...]},
+    parent_sequence=5,              # 父消息的 sequence
 )
 
 # tool 消息
@@ -87,14 +93,17 @@ tool_msg = Message.create(
     goal_id="5",
     tool_call_id="call_abc123",
     content="工具执行结果",
+    parent_sequence=6,
 )
 ```
 
+**parent_sequence**:指向父消息的 sequence,构成消息树。主路径 = 从 `trace.head_sequence` 沿 parent chain 回溯到 root。
+
 **description 字段**(系统自动生成):
 - `assistant` 消息:优先取 content 中的 text,若无 text 则生成 "tool call: XX, XX"
 - `tool` 消息:使用 tool name
 
-**实现**:`agent/execution/models.py:Message`
+**实现**:`agent/trace/models.py:Message`
 
 ---
 
@@ -120,6 +129,7 @@ class TraceStore(Protocol):
     async def add_message(self, message: Message) -> str: ...
     async def get_message(self, message_id: str) -> Optional[Message]: ...
     async def get_trace_messages(self, trace_id: str) -> List[Message]: ...
+    async def get_main_path_messages(self, trace_id: str, head_sequence: int) -> List[Message]: ...
     async def get_messages_by_goal(self, trace_id: str, goal_id: str) -> List[Message]: ...
     async def update_message(self, message_id: str, **updates) -> None: ...
 
@@ -128,12 +138,12 @@ class TraceStore(Protocol):
     async def append_event(self, trace_id: str, event_type: str, payload: Dict) -> int: ...
 ```
 
-**实现**:`agent/execution/protocols.py`
+**实现**:`agent/trace/protocols.py`
 
 ### FileSystemTraceStore
 
 ```python
-from agent.execution import FileSystemTraceStore
+from agent.trace import FileSystemTraceStore
 
 store = FileSystemTraceStore(base_path=".trace")
 ```
@@ -164,13 +174,15 @@ store = FileSystemTraceStore(base_path=".trace")
 - ✅ 每个 Sub-Trace 是顶层独立目录
 - ✅ Sub-Trace 有完整的 Trace 结构(meta + goal + messages + events)
 
-**实现**:`agent/execution/fs_store.py`
+**实现**:`agent/trace/store.py`
 
 ---
 
 ## REST API 端点
 
-### 1. 列出 Traces
+### 查询端点
+
+#### 1. 列出 Traces
 
 ```http
 GET /api/traces?mode=agent&status=running&limit=20
@@ -178,7 +190,7 @@ GET /api/traces?mode=agent&status=running&limit=20
 
 返回所有 Traces(包括主 Trace 和 Sub-Traces)。
 
-### 2. 获取 Trace + GoalTree + Sub-Traces
+#### 2. 获取 Trace + GoalTree + Sub-Traces
 
 ```http
 GET /api/traces/{trace_id}
@@ -189,7 +201,7 @@ GET /api/traces/{trace_id}
 - GoalTree(该 Trace 的完整 Goal 树)
 - Sub-Traces 元数据(查询所有 `parent_trace_id == trace_id` 的 Traces)
 
-### 3. 获取 Messages
+#### 3. 获取 Messages
 
 ```http
 GET /api/traces/{trace_id}/messages?goal_id=3
@@ -197,7 +209,85 @@ GET /api/traces/{trace_id}/messages?goal_id=3
 
 返回指定 Trace 的 Messages,可选按 Goal 过滤。
 
-**实现**:`agent/execution/api.py`
+**实现**:`agent/trace/api.py`
+
+### 控制端点
+
+需在 `api_server.py` 中配置 Runner。执行在后台异步进行,通过 WebSocket 监听进度。
+
+#### 4. 新建 Trace 并执行
+
+```http
+POST /api/traces
+Content-Type: application/json
+
+{
+  "messages": [{"role": "user", "content": "分析项目架构"}],
+  "model": "gpt-4o",
+  "temperature": 0.3,
+  "max_iterations": 200,
+  "system_prompt": null,
+  "tools": null,
+  "name": "任务名称",
+  "uid": "user_id"
+}
+```
+
+#### 5. 运行(统一续跑 + 回溯)
+
+```http
+POST /api/traces/{trace_id}/run
+Content-Type: application/json
+
+{
+  "messages": [{"role": "user", "content": "..."}],
+  "insert_after": null
+}
+```
+
+- `insert_after: null`(或省略) → 从末尾续跑
+- `insert_after: N` → 回溯到 sequence N 后运行
+- `messages: []` + `insert_after: N` → 重新生成
+
+#### 6. 停止运行中的 Trace
+
+```http
+POST /api/traces/{trace_id}/stop
+```
+
+设置取消信号,agent loop 在下一个检查点退出,Trace 状态置为 `stopped`。
+
+#### 7. 列出正在运行的 Trace
+
+```http
+GET /api/traces/running
+```
+
+#### 8. 反思(提取经验)
+
+```http
+POST /api/traces/{trace_id}/reflect
+Content-Type: application/json
+
+{
+  "focus": "可选,反思重点"
+}
+```
+
+在 trace 末尾追加一条包含反思 prompt 的 user message,以续跑方式运行 agent。
+Agent 回顾整个执行过程后生成经验总结,结果自动追加到 `./cache/experiences.md`。
+
+### 经验端点
+
+#### 9. 读取经验文件
+
+```http
+GET /api/experiences
+```
+
+返回 `./cache/experiences.md` 的文件内容。
+
+**实现**:`agent/trace/run_api.py`
 
 ---
 
@@ -235,7 +325,7 @@ ws://localhost:8000/api/traces/{trace_id}/watch?since_event_id=0
 2. 自动设置父 Goal 的 `status = "completed"`
 3. 在 `goal_updated` 事件的 `affected_goals` 中包含级联完成的父节点
 
-**实现**:`agent/execution/websocket.py`
+**实现**:`agent/trace/websocket.py`
 
 ---
 
@@ -288,7 +378,7 @@ result = await delegate_tool(
 
 ```python
 from agent import AgentRunner
-from agent.execution import FileSystemTraceStore
+from agent.trace import FileSystemTraceStore
 
 store = FileSystemTraceStore(base_path=".trace")
 runner = AgentRunner(trace_store=store, llm_call=my_llm_fn)

+ 0 - 354
examples/cloud_browser_demo_db.py

@@ -1,354 +0,0 @@
-"""
-小红书云浏览器数据获取脚本(数据库配置版)
-从数据库 agent_channel_cookies 获取 Cookie 和 cloud_profile_id
-"""
-
-import sys
-import os
-import asyncio
-import json
-import re
-from datetime import datetime
-from pathlib import Path
-from urllib.parse import quote
-from dotenv import load_dotenv
-
-load_dotenv()
-
-project_root = Path(__file__).parent.parent
-sys.path.insert(0, str(project_root))
-
-from agent.tools.builtin.browser.baseClass import (
-    init_browser_session,
-    cleanup_browser_session,
-    kill_browser_session,
-    browser_navigate_to_url,
-    browser_scroll_page,
-    browser_evaluate,
-    browser_wait,
-    browser_get_page_html,
-    _fetch_cookie_row,
-    _fetch_profile_id,
-    _normalize_cookies,
-    _cookie_domain_for_type,
-    _extract_cookie_value,
-)
-
-
-async def example_xhs_fitness_search(cookie_type: str = "xhs") -> dict:
-    """
-    小红书搜索示例
-
-    Args:
-        cookie_type: Cookie 类型,用于从数据库获取配置
-    """
-    print("\n" + "="*60)
-    print("示例: 小红书云浏览器搜索 - 健身")
-    print("="*60)
-
-    api_key = os.getenv("BROWSER_USE_API_KEY")
-    if not api_key:
-        raise RuntimeError("未找到 BROWSER_USE_API_KEY")
-
-    keyword = "健身"
-    search_url = f"https://www.xiaohongshu.com/search_result?keyword={quote(keyword)}&type=51"
-    last_data: dict = {
-        "success": False,
-        "keyword": keyword,
-        "count": 0,
-        "results": [],
-        "error": "未知错误",
-        "timestamp": datetime.now().isoformat(),
-    }
-
-    # 从数据库获取配置
-    print(f"\n🔍 从数据库获取配置 (type={cookie_type})...")
-    profile_id = _fetch_profile_id(cookie_type)
-    cookie_row = _fetch_cookie_row(cookie_type)
-
-    if profile_id:
-        print(f"✅ 获取到 cloud_profile_id: {profile_id}")
-    else:
-        print("⚠️  未找到 cloud_profile_id,将使用环境变量或默认值")
-        profile_id = os.getenv("XHS_PROFILE_ID")
-
-    if cookie_row:
-        print(f"✅ 获取到 Cookie 配置")
-    else:
-        print("⚠️  未找到 Cookie 配置")
-
-    for attempt in range(3):
-        try:
-            # 确保每次重试都清理旧会话
-            if attempt > 0:
-                try:
-                    await kill_browser_session()
-                except Exception:
-                    pass
-                await asyncio.sleep(2)  # 等待清理完成
-
-            print(f"\n🌐 启动云浏览器 (尝试 {attempt + 1}/3)...")
-            browser, tools = await init_browser_session(
-                headless=False,
-                use_cloud=True,
-                cloud_profile_id=profile_id,
-                user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
-                disable_security=False,
-            )
-            if browser is None or tools is None:
-                raise RuntimeError("浏览器初始化失败")
-
-            print("✅ 云浏览器启动成功")
-
-            # 访问首页
-            print("\n🏠 访问小红书首页...")
-            nav_result = await browser_navigate_to_url("https://www.xiaohongshu.com")
-            if nav_result.error:
-                raise RuntimeError(nav_result.error)
-            await browser_wait(3)
-
-            # 注入 Cookie(如果有)
-            if cookie_row:
-                print("\n🍪 注入 Cookie...")
-                cookie_value = _extract_cookie_value(cookie_row)
-                if cookie_value:
-                    domain, base_url = _cookie_domain_for_type(cookie_type, "https://www.xiaohongshu.com")
-                    cookies = _normalize_cookies(cookie_value, domain, base_url)
-                    if cookies:
-                        await browser._cdp_set_cookies(cookies)
-                        print(f"✅ 成功注入 {len(cookies)} 个 Cookie")
-                        # 刷新页面使 Cookie 生效
-                        await navigate_to_url("https://www.xiaohongshu.com")
-                        await browser_wait(2)
-                    else:
-                        print("⚠️  Cookie 解析失败")
-                else:
-                    print("⚠️  未找到 Cookie 值")
-
-            # 访问搜索页面
-            print(f"\n🔗 访问搜索页面: {keyword}")
-            nav_result = await browser_navigate_to_url(search_url)
-            if nav_result.error:
-                raise RuntimeError(nav_result.error)
-            await browser_wait(8)
-
-            # 滚动页面
-            print("\n📜 滚动页面...")
-            for i in range(3):
-                await browser_scroll_page(down=True, pages=2.0)
-                await browser_wait(2)
-
-            # 提取数据
-            print("\n🔍 提取数据...")
-            html_result = await browser_get_page_html()
-            if html_result.error:
-                raise RuntimeError(html_result.error)
-            html = html_result.metadata.get("html", "")
-            output_dir = project_root / "output"
-            output_dir.mkdir(parents=True, exist_ok=True)
-            output_path = output_dir / "xhs.html"
-            output_path.write_text(html or "", encoding="utf-8")
-            print(f"✅ 已保存页面 HTML: {output_path}")
-
-            extract_js = """
-        (function(){
-            const maxCount = 20;
-            const seen = new Set();
-            const results = [];
-
-            function pushItem(item){
-                if (!item || !item.link || seen.has(item.link)) return;
-                seen.add(item.link);
-                results.push(item);
-            }
-
-            const anchors = document.querySelectorAll('a[href*="/explore/"]');
-            anchors.forEach(a => {
-                if (results.length >= maxCount) return;
-                const link = a.href || '';
-                const img = a.querySelector('img');
-                const title = ((img && img.alt) || a.textContent || '').trim();
-                const cover = (img && img.src) || '';
-                if (link && title) {
-                    pushItem({ title, link, cover });
-                }
-            });
-
-            const scriptNodes = document.querySelectorAll('script[type="application/json"], script#__NEXT_DATA__, script#__NUXT__');
-            const walk = (node) => {
-                if (!node || results.length >= maxCount) return;
-                if (Array.isArray(node)) {
-                    for (const item of node) {
-                        walk(item);
-                        if (results.length >= maxCount) return;
-                    }
-                    return;
-                }
-                if (typeof node === 'object') {
-                    const title = (node.title || node.desc || node.name || node.noteTitle || '').toString().trim();
-                    const id = node.noteId || node.note_id || node.id || node.noteID;
-                    const cover = (node.cover && (node.cover.url || node.cover.urlDefault)) || node.coverUrl || node.image || '';
-                    let link = '';
-                    if (id) {
-                        link = `https://www.xiaohongshu.com/explore/${id}`;
-                    }
-                    if (title && link) {
-                        pushItem({ title, link, cover });
-                    }
-                    for (const key in node) {
-                        if (typeof node[key] === 'object') walk(node[key]);
-                    }
-                }
-            };
-
-            scriptNodes.forEach(node => {
-                if (results.length >= maxCount) return;
-                const text = node.textContent || '';
-                if (!text) return;
-                try {
-                    const data = JSON.parse(text);
-                    walk(data);
-                } catch (e) {}
-            });
-
-            return {
-                success: true,
-                keyword: __KEYWORD__,
-                count: results.length,
-                results: results,
-                timestamp: new Date().toISOString(),
-            };
-        })()
-        """
-            extract_js = extract_js.replace("__KEYWORD__", json.dumps(keyword, ensure_ascii=False))
-
-            async def run_extract() -> dict:
-                result = await browser_evaluate(extract_js)
-                if result.error:
-                    raise RuntimeError(result.error)
-                output = result.output
-                if isinstance(output, str) and output.startswith("Result: "):
-                    output = output[8:]
-                if not output:
-                    return {
-                        "success": False,
-                        "keyword": keyword,
-                        "count": 0,
-                        "results": [],
-                        "error": "可能被登录或验证码拦截",
-                        "timestamp": datetime.now().isoformat(),
-                    }
-
-                try:
-                    data = json.loads(output)
-                except Exception:
-                    data = {
-                        "success": False,
-                        "keyword": keyword,
-                        "count": 0,
-                        "results": [],
-                        "error": "JSON 解析失败",
-                        "raw_output": str(output)[:2000],
-                        "timestamp": datetime.now().isoformat(),
-                    }
-
-                if isinstance(data, dict) and data.get("count", 0) == 0:
-                    html_result = await browser_get_page_html()
-                    if html_result.error:
-                        raise RuntimeError(html_result.error)
-                    html = html_result.metadata.get("html", "")
-                    blocked_markers = ["登录", "验证", "验证码", "请先登录", "异常访问"]
-                    if html and any(marker in html for marker in blocked_markers):
-                        data = {
-                            "success": False,
-                            "keyword": keyword,
-                            "count": 0,
-                            "results": [],
-                            "error": "可能被登录或验证码拦截",
-                            "timestamp": datetime.now().isoformat(),
-                        }
-                    elif html:
-                        results = []
-                        seen = set()
-                        pattern = re.compile(r'"noteId":"(.*?)".*?"title":"(.*?)"', re.S)
-                        for match in pattern.finditer(html):
-                            note_id = match.group(1)
-                            title = match.group(2).encode("utf-8", "ignore").decode("unicode_escape").strip()
-                            link = f"https://www.xiaohongshu.com/explore/{note_id}"
-                            if note_id and link not in seen and title:
-                                seen.add(link)
-                                results.append({"title": title, "link": link})
-                            if len(results) >= 20:
-                                break
-                        if results:
-                            data = {
-                                "success": True,
-                                "keyword": keyword,
-                                "count": len(results),
-                                "results": results,
-                                "timestamp": datetime.now().isoformat(),
-                                "source": "html_fallback",
-                            }
-
-                return data
-
-            data = await run_extract()
-
-            last_data = data if isinstance(data, dict) else last_data
-
-            # 输出结果
-            if isinstance(last_data, dict) and last_data.get("count", 0) > 0:
-                print(f"\n✅ 成功获取 {last_data['count']} 条数据")
-                print(f"数据来源: {last_data.get('source', 'javascript')}")
-                print("\n前 5 条结果:")
-                for i, item in enumerate(last_data["results"][:5], 1):
-                    print(f"{i}. {item['title'][:50]}...")
-
-                # 成功获取数据,清理并返回
-                await cleanup_browser_session()
-                return last_data
-
-            if isinstance(last_data, dict) and last_data.get("error") == "可能被登录或验证码拦截":
-                print("\n⚠️  检测到登录或验证码拦截")
-                print("💡 建议:在数据库中配置有效的 Cookie")
-
-        except Exception as e:
-            err_text = str(e)
-            print(f"⚠️  尝试 {attempt + 1}/3 失败: {err_text}")
-            last_data = {
-                "success": False,
-                "keyword": keyword,
-                "count": 0,
-                "results": [],
-                "error": err_text,
-                "timestamp": datetime.now().isoformat(),
-            }
-        finally:
-            # 清理当前会话
-            try:
-                await cleanup_browser_session()
-            except Exception:
-                pass
-
-        # 如果不是最后一次尝试,等待后继续
-        if attempt < 2:
-            print(f"等待 5 秒后重试...")
-            await asyncio.sleep(5)
-
-    return last_data
-
-
-async def main():
-    # 可以通过命令行参数指定 cookie_type
-    cookie_type = sys.argv[1] if len(sys.argv) > 1 else "xhs"
-
-    data = await example_xhs_fitness_search(cookie_type)
-
-    print("\n" + "="*60)
-    print("📊 最终结果")
-    print("="*60)
-    print(json.dumps(data, ensure_ascii=False, indent=2))
-
-
-if __name__ == "__main__":
-    asyncio.run(main())

+ 7 - 3
examples/research/run.py

@@ -34,12 +34,13 @@ from agent.trace import (
 from agent.llm import create_openrouter_llm_call
 
 # 导入浏览器清理工具
-from agent.tools.builtin.browser.baseClass import kill_browser_session
+from agent.tools.builtin.browser.baseClass import get_browser_session,kill_browser_session,init_browser_session 
 
 async def main():
     # 路径配置
     base_dir = Path(__file__).parent
     project_root = base_dir.parent.parent
+    trace_dir = project_root / ".trace"
     prompt_path = base_dir / "test.prompt"
     output_dir = base_dir / "output"
     output_dir.mkdir(exist_ok=True)
@@ -64,7 +65,7 @@ async def main():
 
     print(f"   - 任务: {user_task[:80]}...")
     print(f"   - 模型: {model_name}")
-
+    
     # 2. 构建消息
     print("2. 构建任务消息...")
     messages = prompt.build_messages()
@@ -83,7 +84,10 @@ async def main():
 
     # 4. Agent 模式执行(使用 try...finally 确保清理)
     try:
-        print(f"4. 启动 Agent 模式执行...")
+        print(f"4. 初始化云浏览器...")                              
+        await init_browser_session(browser_type="cloud", headless=True)                                                          
+
+        print(f"5. 启动 Agent 模式执行...")    
         print()
 
         async for item in runner.run(

+ 1 - 1
examples/research/test.prompt

@@ -7,4 +7,4 @@ $system$
 你是最顶尖的AI助手,可以拆分并调用工具逐步解决复杂问题。
 
 $user$
-使用浏览器帮我做个调研:一张图片中的构图可以如何表示?我希望寻找一些构图特征的表示方法。尝试查阅一些论文pdf, 网页等资料,最后输出一份调研报告
+去zh.zlib.li网页找一些构图相关的书(可以用load_cookies登录),并下载下来

+ 0 - 70
examples/subagent_example.py

@@ -1,70 +0,0 @@
-"""
-Sub-Agent 使用示例
-
-演示如何使用 Sub-Agent 机制处理复杂任务。
-
-注意:本示例中的 AgentDefinition 和 get_agent_registry 尚未实现,
-此处仅用于演示未来的设计方向。当前可用的 subagent 功能通过
-runner.run(messages, config=RunConfig(...)) 和工具层的 subagent 工具实现。
-"""
-
-import asyncio
-import os
-from agent import AgentRunner
-from agent.core.runner import RunConfig
-from agent.trace import Trace, Message
-from agent.llm import create_gemini_llm_call
-
-
-async def example_basic_subagent():
-    """示例 1: 使用 Agent 执行任务(通过 subagent 工具自动委托子任务)"""
-    print("=== 示例 1: 基本 Agent 执行 ===\n")
-
-    runner = AgentRunner(
-        llm_call=create_gemini_llm_call(os.getenv("GEMINI_API_KEY")),
-    )
-
-    task = """
-    分析这个 Python 项目的架构:
-    1. 找出所有主要的模块和它们的职责
-    2. 识别核心的数据流
-    3. 列出使用的外部依赖
-
-    请使用 subagent explore 模式来探索代码库。
-    """
-
-    async for item in runner.run(
-        messages=[{"role": "user", "content": task}],
-        config=RunConfig(
-            model="gemini-2.0-flash-exp",
-            max_iterations=20,
-            name="项目架构分析",
-        ),
-    ):
-        if isinstance(item, Trace):
-            if item.status == "running":
-                print(f"[Trace] 开始: {item.trace_id[:8]}")
-            elif item.status == "completed":
-                print(f"[Trace] 完成 (tokens: {item.total_tokens})")
-        elif isinstance(item, Message):
-            if item.role == "assistant":
-                content = item.content
-                if isinstance(content, dict):
-                    text = content.get("text", "")
-                    tool_calls = content.get("tool_calls")
-                    if tool_calls:
-                        for tc in tool_calls:
-                            tool_name = tc.get("function", {}).get("name", "")
-                            if tool_name == "subagent":
-                                print(f"  启动 Sub-Agent...")
-                    elif text:
-                        print(f"\n最终结果:\n{text[:500]}")
-
-
-async def main():
-    """运行示例"""
-    await example_basic_subagent()
-
-
-if __name__ == "__main__":
-    asyncio.run(main())

+ 0 - 129
examples/test_skill.py

@@ -1,129 +0,0 @@
-import json
-import subprocess
-import time
-from pathlib import Path
-
-
-def run_cli(session: str, args: list[str]) -> dict:
-    command = ["browser-use", "--session", session, "--json"] + args
-    result = subprocess.run(command, capture_output=True, text=True)
-    if result.returncode != 0:
-        raise RuntimeError(result.stderr.strip() or "browser-use command failed")
-    payload = result.stdout.strip()
-    if not payload:
-        raise RuntimeError("browser-use returned empty output")
-    data = json.loads(payload)
-    if not data.get("success", False):
-        raise RuntimeError(data.get("error", "browser-use command error"))
-    return data.get("data", {})
-
-
-def stop_session_server(session: str) -> None:
-    subprocess.run(
-        ["browser-use", "--session", session, "server", "stop"],
-        capture_output=True,
-        text=True,
-    )
-
-
-def main():
-    project_root = Path(__file__).resolve().parents[1]
-    output_dir = project_root / "output"
-    output_dir.mkdir(parents=True, exist_ok=True)
-
-    json_file = output_dir / "skill_baidu.json"
-    html_file = output_dir / "skill_baidu_page.html"
-
-    session = "skill_baidu"
-    keyword = "瑜伽美女"
-
-    try:
-        stop_session_server(session)
-        try:
-            run_cli(session, ["open", "https://www.baidu.com"])
-        except RuntimeError:
-            stop_session_server(session)
-            run_cli(session, ["open", "https://www.baidu.com"])
-
-        search_js = (
-            "(function(){"
-            "const input=document.querySelector('#kw');"
-            "const btn=document.querySelector('#su');"
-            "if(input){input.value='" + keyword + "';}"
-            "if(btn){btn.click();}"
-            "else if(input&&input.form){input.form.submit();}"
-            "return {hasInput:!!input,hasButton:!!btn};"
-            "})()"
-        )
-        run_cli(session, ["eval", search_js])
-
-        wait_js = (
-            "(function(){"
-            "const items=document.querySelectorAll('#content_left .result, #content_left .c-container, #content_left .result-op');"
-            "const bodyReady=!!document.body;"
-            "const bodyLen=bodyReady?(document.body.innerText||'').length:0;"
-            "return {count:items.length, bodyReady:bodyReady, bodyLen:bodyLen};"
-            "})()"
-        )
-
-        count = 0
-        for _ in range(12):
-            data = run_cli(session, ["eval", wait_js])
-            result = data.get("result") if isinstance(data, dict) else {}
-            count = int(result.get("count") or 0)
-            body_len = int(result.get("bodyLen") or 0)
-            if count >= 3 or body_len > 1000:
-                break
-            time.sleep(1)
-
-        extract_js = (
-            "(function(){"
-            "const items=Array.from(document.querySelectorAll('#content_left .result, #content_left .c-container, #content_left .result-op'));"
-            "const results=[];"
-            "for(const item of items){"
-            "const a=item.querySelector('h3 a')||item.querySelector('a[data-click]')||item.querySelector('a');"
-            "if(!a) continue;"
-            "const title=(a.textContent||'').trim();"
-            "const link=a.href||'';"
-            "const summaryEl=item.querySelector('.c-abstract, .content-right_8Zs40, .content-right_8Zs40_2gVt2');"
-            "const summary=(summaryEl?summaryEl.textContent:'').trim();"
-            "results.push({index:results.length+1,title,link,summary});"
-            "if(results.length>=10) break;"
-            "}"
-            "return {success:true,keyword:'" + keyword + "',count:results.length,timestamp:new Date().toISOString(),results:results};"
-            "})()"
-        )
-
-        data = run_cli(session, ["eval", extract_js])
-        extracted = data.get("result") if isinstance(data, dict) else data
-
-        if not extracted:
-            extracted = {
-                "success": False,
-                "keyword": keyword,
-                "count": 0,
-                "timestamp": time.strftime("%Y-%m-%dT%H:%M:%S"),
-                "results": [],
-            }
-
-        with open(json_file, "w", encoding="utf-8") as f:
-            json.dump(extracted, f, ensure_ascii=False, indent=2)
-
-        html_data = run_cli(session, ["eval", "document.documentElement.outerHTML"])
-        html_content = html_data.get("result") if isinstance(html_data, dict) else html_data
-
-        with open(html_file, "w", encoding="utf-8") as f:
-            f.write(html_content or "")
-
-        print(f"✅ 数据已保存到: {json_file}")
-        print(f"✅ HTML 已保存到: {html_file}")
-
-    finally:
-        try:
-            run_cli(session, ["close"])
-        except Exception:
-            pass
-
-
-if __name__ == "__main__":
-    main()

+ 0 - 61
examples/test_subagent_real/README.md

@@ -1,61 +0,0 @@
-# Subagent 工具真实测试
-
-本测试用例用于验证 subagent 工具在真实 LLM 环境下的表现。
-
-## 测试目标
-
-测试 subagent 工具的三种核心模式:
-
-1. **delegate 模式** - 委托子任务给专门的 agent 处理
-2. **explore 模式** - 并行探索多个可能的方案
-3. **evaluate 模式** - 评估任务完成情况
-
-## 测试场景
-
-分析 Agent-main 项目的架构,这个任务自然需要:
-- 委托不同模块的分析(delegate)
-- 并行探索改进方案(explore)
-- 评估分析完整性(evaluate)
-
-## 运行方式
-
-```bash
-cd /Users/elksmmx/Desktop/agent_2.9/Agent-main
-python examples/test_subagent_real/run.py
-```
-
-## 前置要求
-
-1. 配置 `.env` 文件,设置 OpenRouter API Key:
-   ```
-   OPENROUTER_API_KEY=your_key_here
-   ```
-
-2. 确保已安装依赖:
-   ```bash
-   pip install -r requirements.txt
-   ```
-
-## 预期结果
-
-Agent 应该:
-1. 使用 delegate 模式委托 2-4 个子任务分析不同模块
-2. 使用 explore 模式并行探索 2-3 个改进方案
-3. 使用 evaluate 模式评估分析的完整性
-4. 生成完整的架构分析报告
-
-## 输出
-
-- 控制台:实时显示 agent 执行过程和 subagent 调用
-- 文件:`output/subagent_test_result.txt` 包含最终结果和统计
-- Trace:`.trace/` 目录保存完整执行记录
-
-## 可视化
-
-启动 API Server 查看 trace tree:
-
-```bash
-python3 api_server.py
-```
-
-访问:http://localhost:8000/api/traces

+ 0 - 218
examples/test_subagent_real/run.py

@@ -1,218 +0,0 @@
-"""
-Subagent 工具真实测试
-
-使用真实 LLM 测试 subagent 工具的三种模式:
-1. delegate - 委托子任务
-2. explore - 并行探索方案
-3. evaluate - 评估结果
-"""
-
-import os
-import sys
-import asyncio
-from pathlib import Path
-
-# 添加项目根目录到 Python 路径
-sys.path.insert(0, str(Path(__file__).parent.parent.parent))
-
-from dotenv import load_dotenv
-load_dotenv()
-
-from agent.llm.prompts import SimplePrompt
-from agent.core.runner import AgentRunner, RunConfig
-from agent.trace import (
-    FileSystemTraceStore,
-    Trace,
-    Message,
-)
-from agent.llm import create_openrouter_llm_call
-
-
-async def main():
-    # 路径配置
-    base_dir = Path(__file__).parent
-    project_root = base_dir.parent.parent
-    prompt_path = base_dir / "test.prompt"
-    output_dir = base_dir / "output"
-    output_dir.mkdir(exist_ok=True)
-
-    print("=" * 60)
-    print("Subagent 工具测试 (真实 LLM)")
-    print("=" * 60)
-    print()
-
-    # 1. 加载 prompt
-    print("1. 加载 prompt...")
-    prompt = SimplePrompt(prompt_path)
-
-    # 提取配置
-    system_prompt = prompt._messages.get("system", "")
-    user_task = prompt._messages.get("user", "")
-    model_name = prompt.config.get('model', 'gemini-2.5-flash')
-    temperature = float(prompt.config.get('temperature', 0.3))
-
-    print(f"   - 任务: {user_task[:80]}...")
-    print(f"   - 模型: {model_name}")
-
-    # 2. 构建消息
-    print("2. 构建任务消息...")
-    messages = prompt.build_messages()
-
-    # 3. 创建 Agent Runner
-    print("3. 创建 Agent Runner...")
-    print(f"   - 模型: {model_name} (via OpenRouter)")
-
-    # Trace 输出到测试目录
-    trace_dir = base_dir / ".trace"
-    trace_dir.mkdir(exist_ok=True)
-    print(f"   - Trace 目录: {trace_dir}")
-
-    runner = AgentRunner(
-        trace_store=FileSystemTraceStore(base_path=str(trace_dir)),
-        llm_call=create_openrouter_llm_call(model=f"google/{model_name}"),
-        skills_dir=None,
-        debug=True
-    )
-
-    # 4. Agent 模式执行
-    print(f"4. 启动 Agent 模式...")
-    print()
-
-    final_response = ""
-    current_trace_id = None
-    subagent_calls = []
-
-    async for item in runner.run(
-        messages=messages,
-        config=RunConfig(
-            system_prompt=system_prompt,
-            model=f"google/{model_name}",
-            temperature=temperature,
-            max_iterations=30,
-            name=user_task[:50],
-        ),
-    ):
-        # 处理 Trace 对象
-        if isinstance(item, Trace):
-            current_trace_id = item.trace_id
-            if item.status == "running":
-                print(f"[Trace] 开始: {item.trace_id[:8]}")
-            elif item.status == "completed":
-                print(f"[Trace] 完成")
-                print(f"  - Total messages: {item.total_messages}")
-                print(f"  - Total tokens: {item.total_tokens}")
-                print(f"  - Total cost: ${item.total_cost:.4f}")
-            elif item.status == "failed":
-                print(f"[Trace] 失败: {item.error_message}")
-
-        # 处理 Message 对象
-        elif isinstance(item, Message):
-            if item.role == "assistant":
-                content = item.content
-                if isinstance(content, dict):
-                    text = content.get("text", "")
-                    tool_calls = content.get("tool_calls")
-
-                    if text and not tool_calls:
-                        final_response = text
-                        print(f"[Response] Agent 完成")
-                    elif text:
-                        print(f"[Assistant] {text[:100]}...")
-
-                    if tool_calls:
-                        for tc in tool_calls:
-                            tool_name = tc.get("function", {}).get("name", "unknown")
-                            print(f"[Tool Call] {tool_name}")
-
-                            # 记录 subagent 调用
-                            if tool_name == "subagent":
-                                import json
-                                args = tc.get("function", {}).get("arguments", {})
-                                # arguments 可能是字符串,需要解析
-                                if isinstance(args, str):
-                                    try:
-                                        args = json.loads(args)
-                                    except:
-                                        args = {}
-                                mode = args.get("mode", "unknown")
-                                subagent_calls.append({
-                                    "mode": mode,
-                                    "task": args.get("task", args.get("background", ""))[:50]
-                                })
-                                print(f"  → mode: {mode}")
-
-            elif item.role == "tool":
-                content = item.content
-                if isinstance(content, dict):
-                    tool_name = content.get("tool_name", "unknown")
-                    print(f"[Tool Result] {tool_name}")
-                if item.description:
-                    desc = item.description[:80] if len(item.description) > 80 else item.description
-                    print(f"  {desc}...")
-
-    # 5. 输出结果
-    print()
-    print("=" * 60)
-    print("Agent 响应:")
-    print("=" * 60)
-    print(final_response)
-    print("=" * 60)
-    print()
-
-    # 6. 统计 subagent 调用
-    print("=" * 60)
-    print("Subagent 调用统计:")
-    print("=" * 60)
-    delegate_count = sum(1 for call in subagent_calls if call["mode"] == "delegate")
-    explore_count = sum(1 for call in subagent_calls if call["mode"] == "explore")
-    evaluate_count = sum(1 for call in subagent_calls if call["mode"] == "evaluate")
-
-    print(f"  - delegate 模式: {delegate_count} 次")
-    print(f"  - explore 模式: {explore_count} 次")
-    print(f"  - evaluate 模式: {evaluate_count} 次")
-    print(f"  - 总计: {len(subagent_calls)} 次")
-    print()
-
-    for i, call in enumerate(subagent_calls, 1):
-        print(f"  {i}. [{call['mode']}] {call['task']}...")
-    print("=" * 60)
-    print()
-
-    # 7. 保存结果
-    output_file = output_dir / "subagent_test_result.txt"
-    with open(output_file, 'w', encoding='utf-8') as f:
-        f.write("=" * 60 + "\n")
-        f.write("Agent 响应\n")
-        f.write("=" * 60 + "\n\n")
-        f.write(final_response)
-        f.write("\n\n" + "=" * 60 + "\n")
-        f.write("Subagent 调用统计\n")
-        f.write("=" * 60 + "\n\n")
-        f.write(f"delegate 模式: {delegate_count} 次\n")
-        f.write(f"explore 模式: {explore_count} 次\n")
-        f.write(f"evaluate 模式: {evaluate_count} 次\n")
-        f.write(f"总计: {len(subagent_calls)} 次\n\n")
-        for i, call in enumerate(subagent_calls, 1):
-            f.write(f"{i}. [{call['mode']}] {call['task']}...\n")
-
-    print(f"✓ 结果已保存到: {output_file}")
-    print()
-
-    # 8. 可视化提示
-    print("=" * 60)
-    print("Trace 信息:")
-    print("=" * 60)
-    print(f"Trace ID: {current_trace_id}")
-    print(f"Trace 目录: {trace_dir}")
-    print()
-    print("查看 trace 文件:")
-    print(f"   ls -la {trace_dir}")
-    print()
-    print("或启动 API Server 可视化:")
-    print("   python3 api_server.py")
-    print("   访问: http://localhost:8000/api/traces")
-    print("=" * 60)
-
-
-if __name__ == "__main__":
-    asyncio.run(main())

+ 0 - 28
examples/test_subagent_real/test.prompt

@@ -1,28 +0,0 @@
----
-model: gemini-2.5-flash
-temperature: 0.3
----
-
-$system$
-你是一个专业的代码分析助手,擅长使用 subagent 工具来分解复杂任务。
-
-你有以下工具可用:
-- subagent: 用于委托子任务、并行探索方案、评估结果
-- read_file, glob_files, grep_content: 用于代码分析
-- goal: 用于任务规划和进度追踪
-
-**重要规则**:
-- 在任务完成前,必须始终保持至少一个活跃的 goal
-- 当所有 goal 完成后,如果任务还未完全结束,必须立即创建新的 goal
-- 只有在确认任务完全完成后,才能让 goal 列表为空
-- goal 为空表示任务已完成,系统将结束执行
-
-$user$
-请分析 /Users/elksmmx/Desktop/agent_2.9/Agent-main 项目的架构,并提出改进建议。
-
-具体要求:
-1. 使用 subagent 的 delegate 模式,委托子 agent 分析不同模块(core、trace、tools、memory)
-2. 使用 subagent 的 explore 模式,并行探索 2-3 个可能的架构改进方案
-3. 使用 subagent 的 evaluate 模式,评估你的分析是否完整
-
-请充分利用 subagent 工具的各种模式来完成这个任务。

+ 0 - 28
examples/test_subagent_real/test_continue.prompt

@@ -1,28 +0,0 @@
----
-model: gemini-2.5-flash
-temperature: 0.3
----
-
-$system$
-你是一个专业的代码分析助手,擅长使用 subagent 工具来分解复杂任务。
-
-你有以下工具可用:
-- subagent: 用于委托子任务、并行探索方案、评估结果
-  - mode="delegate": 委托子任务
-  - mode="explore": 并行探索多个方案
-  - mode="evaluate": 评估结果
-  - continue_from: 继续已有的 trace(用于迭代改进)
-- read_file, glob_files, grep_content: 用于代码分析
-- goal: 用于任务规划和进度追踪
-
-$user$
-请分析 /Users/elksmmx/Desktop/agent_2.9/Agent-main 项目的 core 模块架构,并提出改进建议。
-
-具体要求:
-1. 使用 subagent 的 delegate 模式,委托子 agent 分析 core 模块的基本架构
-2. 使用 subagent 的 delegate 模式 + continue_from 参数,继续深入分析 core 模块的设计模式和最佳实践
-3. 使用 subagent 的 explore 模式,并行探索 2-3 个可能的改进方案
-4. 使用 subagent 的 evaluate 模式,评估你的分析是否完整
-5. 如果评估不通过,使用 continue_from 继续改进分析
-
-**重点测试 continue_from 参数的使用**,展示如何在同一个 trace 上迭代改进任务。

+ 0 - 187
examples/test_subagent_real/visualize_trace.py

@@ -1,187 +0,0 @@
-"""
-Trace 树可视化工具
-
-读取 trace 目录并生成树形结构的可视化输出
-"""
-
-import json
-import sys
-from pathlib import Path
-from datetime import datetime
-
-
-def load_trace_meta(trace_dir):
-    """加载 trace 的 meta.json"""
-    meta_file = trace_dir / "meta.json"
-    if not meta_file.exists():
-        return None
-    with open(meta_file, 'r', encoding='utf-8') as f:
-        return json.load(f)
-
-
-def format_duration(start_str, end_str):
-    """计算并格式化持续时间"""
-    if not start_str or not end_str:
-        return "N/A"
-    try:
-        start = datetime.fromisoformat(start_str)
-        end = datetime.fromisoformat(end_str)
-        duration = (end - start).total_seconds()
-        return f"{duration:.1f}s"
-    except:
-        return "N/A"
-
-
-def extract_mode_from_trace_id(trace_id):
-    """从 trace_id 中提取模式"""
-    if '@delegate-' in trace_id:
-        return 'delegate'
-    elif '@explore-' in trace_id:
-        return 'explore'
-    elif '@evaluate-' in trace_id:
-        return 'evaluate'
-    return 'main'
-
-
-def print_trace_tree(trace_base_path, output_file=None):
-    """打印 trace 树结构"""
-    trace_base = Path(trace_base_path)
-
-    if not trace_base.exists():
-        print(f"错误: Trace 目录不存在: {trace_base}")
-        return
-
-    # 查找所有 trace 目录
-    all_traces = {}
-    main_trace_id = None
-
-    for trace_dir in sorted(trace_base.iterdir()):
-        if not trace_dir.is_dir():
-            continue
-
-        meta = load_trace_meta(trace_dir)
-        if not meta:
-            continue
-
-        trace_id = meta['trace_id']
-        all_traces[trace_id] = {
-            'meta': meta,
-            'dir': trace_dir,
-            'children': []
-        }
-
-        # 找到主 trace
-        if meta.get('parent_trace_id') is None:
-            main_trace_id = trace_id
-
-    if not main_trace_id:
-        print("错误: 未找到主 trace")
-        return
-
-    # 构建树结构
-    for trace_id, trace_info in all_traces.items():
-        parent_id = trace_info['meta'].get('parent_trace_id')
-        if parent_id and parent_id in all_traces:
-            all_traces[parent_id]['children'].append(trace_id)
-
-    # 输出函数
-    def output(text):
-        print(text)
-        if output_file:
-            output_file.write(text + '\n')
-
-    # 打印树
-    output("=" * 80)
-    output("Trace 执行树")
-    output("=" * 80)
-    output("")
-
-    def print_node(trace_id, prefix="", is_last=True):
-        trace_info = all_traces[trace_id]
-        meta = trace_info['meta']
-
-        # 树形连接符
-        connector = "└── " if is_last else "├── "
-
-        # 提取信息
-        mode = extract_mode_from_trace_id(trace_id)
-        task = meta.get('task', 'N/A')
-        if len(task) > 60:
-            task = task[:60] + "..."
-        status = meta.get('status', 'unknown')
-        messages = meta.get('total_messages', 0)
-        tokens = meta.get('total_tokens', 0)
-        duration = format_duration(
-            meta.get('created_at'),
-            meta.get('completed_at')
-        )
-
-        # 状态符号
-        status_symbol = {
-            'completed': '✓',
-            'failed': '✗',
-            'running': '⟳',
-        }.get(status, '?')
-
-        # 打印节点
-        output(f"{prefix}{connector}[{mode}] {status_symbol} {trace_id[:8]}")
-        output(f"{prefix}{'    ' if is_last else '│   '}Task: {task}")
-        output(f"{prefix}{'    ' if is_last else '│   '}Stats: {messages} msgs, {tokens:,} tokens, {duration}")
-
-        # 打印子节点
-        children = trace_info['children']
-        for i, child_id in enumerate(children):
-            is_last_child = (i == len(children) - 1)
-            child_prefix = prefix + ("    " if is_last else "│   ")
-            print_node(child_id, child_prefix, is_last_child)
-
-    # 从主 trace 开始打印
-    print_node(main_trace_id)
-
-    output("")
-    output("=" * 80)
-    output("统计信息")
-    output("=" * 80)
-
-    # 统计各模式的数量
-    mode_counts = {}
-    total_messages = 0
-    total_tokens = 0
-
-    for trace_info in all_traces.values():
-        meta = trace_info['meta']
-        mode = extract_mode_from_trace_id(meta['trace_id'])
-        mode_counts[mode] = mode_counts.get(mode, 0) + 1
-        total_messages += meta.get('total_messages', 0)
-        total_tokens += meta.get('total_tokens', 0)
-
-    output(f"总 Trace 数: {len(all_traces)}")
-    output(f"  - main: {mode_counts.get('main', 0)}")
-    output(f"  - delegate: {mode_counts.get('delegate', 0)}")
-    output(f"  - explore: {mode_counts.get('explore', 0)}")
-    output(f"  - evaluate: {mode_counts.get('evaluate', 0)}")
-    output(f"")
-    output(f"总消息数: {total_messages}")
-    output(f"总 Token 数: {total_tokens:,}")
-    output("=" * 80)
-
-
-if __name__ == "__main__":
-    if len(sys.argv) < 2:
-        print("用法: python visualize_trace.py <trace_directory> [output_file]")
-        print("示例: python visualize_trace.py .trace")
-        sys.exit(1)
-
-    trace_dir = sys.argv[1]
-    output_path = sys.argv[2] if len(sys.argv) > 2 else None
-
-    output_file = None
-    if output_path:
-        output_file = open(output_path, 'w', encoding='utf-8')
-
-    try:
-        print_trace_tree(trace_dir, output_file)
-    finally:
-        if output_file:
-            output_file.close()
-            print(f"\n✓ 输出已保存到: {output_path}")

+ 0 - 141
examples/test_tools_baidu.py

@@ -1,141 +0,0 @@
-import asyncio
-import json
-import os
-import sys
-from datetime import datetime
-from pathlib import Path
-from urllib.parse import quote
-
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-from agent.tools.builtin.browser.baseClass import (
-    init_browser_session,
-    browser_navigate_to_url,
-    browser_wait,
-    browser_get_page_html,
-    browser_evaluate,
-    browser_scroll_page,
-    cleanup_browser_session,
-)
-
-
-async def run_task():
-    project_root = Path(__file__).resolve().parents[1]
-    output_dir = project_root / "output"
-    output_dir.mkdir(parents=True, exist_ok=True)
-
-    json_file = output_dir / "baidu.json"
-    html_file = output_dir / "baidu_page.html"
-
-    try:
-        await init_browser_session(headless=False, profile_name="baidu_profile")
-
-        await browser_navigate_to_url("https://www.baidu.com")
-        await browser_wait(seconds=2)
-
-        keyword = "Python 教程"
-        search_url = f"https://www.baidu.com/s?wd={quote(keyword)}"
-        await browser_navigate_to_url(search_url)
-        await browser_wait(seconds=3)
-        await browser_scroll_page(down=True, pages=1.0)
-        await browser_wait(seconds=2)
-
-        extract_js = """
-        (function(){
-            try {
-                const results = [];
-                const resultItems = document.querySelectorAll('#content_left > div[class*="result"]');
-                resultItems.forEach((item, index) => {
-                    if (index >= 10) return;
-                    try {
-                        const titleEl = item.querySelector('h3 a, .t a');
-                        const title = titleEl ? titleEl.textContent.trim() : '';
-                        const link = titleEl ? titleEl.href : '';
-                        const summaryEl = item.querySelector('.c-abstract, .content-right_8Zs40');
-                        const summary = summaryEl ? summaryEl.textContent.trim() : '';
-                        const sourceEl = item.querySelector('.c-color-gray, .source_1Vdff');
-                        const source = sourceEl ? sourceEl.textContent.trim() : '';
-                        if (title || link) {
-                            results.push({
-                                index: index + 1,
-                                title: title,
-                                link: link,
-                                summary: summary.substring(0, 200),
-                                source: source
-                            });
-                        }
-                    } catch (e) {
-                    }
-                });
-                return {
-                    success: true,
-                    count: results.length,
-                    keyword: 'Python 教程',
-                    timestamp: new Date().toISOString(),
-                    results: results
-                };
-            } catch (e) {
-                return {
-                    success: false,
-                    error: e.message,
-                    stack: e.stack
-                };
-            }
-        })()
-        """
-
-        result = await browser_evaluate(code=extract_js)
-        output = result.output
-        if output.startswith("Result: "):
-            output = output[8:]
-
-        try:
-            data = json.loads(output)
-        except json.JSONDecodeError:
-            data = {
-                "success": False,
-                "error": "JSON解析失败",
-                "raw_output": output[:1000],
-                "keyword": keyword,
-                "timestamp": datetime.now().isoformat(),
-            }
-
-        with open(json_file, "w", encoding="utf-8") as f:
-            json.dump(data, f, ensure_ascii=False, indent=2)
-
-        html_result = await browser_get_page_html()
-        html_content = html_result.metadata.get("html", "")
-        page_url = html_result.metadata.get("url", "")
-        page_title = html_result.metadata.get("title", "")
-        meta_info = (
-            "\n".join(
-                [
-                    "<!--",
-                    f"    页面标题: {page_title}",
-                    f"    页面URL: {page_url}",
-                    f"    保存时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
-                    f"    搜索关键词: {keyword}",
-                    "-->",
-                    "",
-                ]
-            )
-            + "\n"
-        )
-
-        with open(html_file, "w", encoding="utf-8") as f:
-            f.write(meta_info)
-            f.write(html_content)
-
-        print(f"✅ 数据已保存到: {json_file}")
-        print(f"✅ HTML 已保存到: {html_file}")
-
-    finally:
-        await cleanup_browser_session()
-
-
-def main():
-    asyncio.run(run_task())
-
-
-if __name__ == "__main__":
-    main()

+ 0 - 247
examples/test_xhs_container.py

@@ -1,247 +0,0 @@
-"""
-小红书容器测试脚本
-演示容器浏览器的使用:
-1. 初始化容器浏览器(自动创建容器并连接)
-2. 搜索健身
-3. 随机进入一个详情页
-4. 获取详情页的HTML和iframe并保存到output
-"""
-
-import sys
-import os
-import asyncio
-import json
-import random
-from datetime import datetime
-from pathlib import Path
-from urllib.parse import quote
-from dotenv import load_dotenv
-
-load_dotenv()
-
-project_root = Path(__file__).parent.parent
-sys.path.insert(0, str(project_root))
-
-from agent.tools.builtin.browser.baseClass import (
-    init_browser_session,
-    cleanup_browser_session,
-    browser_navigate_to_url,
-    browser_scroll_page,
-    browser_evaluate,
-    browser_wait,
-    browser_get_page_html,
-    browser_switch_tab,
-)
-
-
-async def test_xhs_container():
-    """
-    测试小红书容器功能
-    """
-    print("\n" + "="*60)
-    print("小红书容器测试")
-    print("="*60)
-
-    keyword = "健身"
-    search_url = f"https://www.xiaohongshu.com/search_result?keyword={quote(keyword)}&type=51"
-
-    # 创建输出目录
-    output_dir = project_root / "output"
-    output_dir.mkdir(parents=True, exist_ok=True)
-
-    try:
-        # 初始化容器浏览器(一步完成)
-        print(f"\n🚀 初始化容器浏览器...")
-        browser, tools = await init_browser_session(
-            browser_type="container",
-            url="https://www.xiaohongshu.com",  # 容器启动时访问的URL
-            headless=True
-        )
-
-        print("✅ 容器浏览器初始化成功")
-
-        # 等待页面完全加载
-        await browser_wait(3)
-
-        # 步骤1: 搜索健身
-        print(f"\n🔍 搜索关键词: {keyword}")
-        try:
-            nav_result = await browser_navigate_to_url(search_url)
-            if nav_result.error:
-                print(f"⚠️  导航警告: {nav_result.error[:100]}")
-        except Exception as e:
-            print(f"⚠️  导航异常: {str(e)[:100]}")
-
-        await browser_wait(10)
-
-        # 滚动页面加载更多内容
-        print("\n📜 滚动页面...")
-        for i in range(2):
-            await browser_scroll_page(down=True, pages=2.0)
-            await browser_wait(2)
-
-        # 提取搜索结果
-        print("\n🔍 提取搜索结果...")
-
-        # 先保存HTML看看页面内容
-        html_result = await browser_get_page_html()
-        if not html_result.error:
-            html = html_result.metadata.get("html", "")
-            debug_html_path = output_dir / "search_page_debug.html"
-            debug_html_path.write_text(html or "", encoding="utf-8")
-            print(f"   💾 已保存搜索页HTML用于调试: {debug_html_path}")
-
-        extract_js = """
-        (function(){
-            const results = [];
-            const seen = new Set();
-
-            const anchors = document.querySelectorAll('a[href*="/explore/"]');
-            anchors.forEach(a => {
-                const link = a.href || '';
-                if (link && !seen.has(link)) {
-                    seen.add(link);
-                    const img = a.querySelector('img');
-                    const title = ((img && img.alt) || a.textContent || '').trim();
-                    results.push({ title, link });
-                }
-            });
-
-            return results;
-        })()
-        """
-
-        eval_result = await browser_evaluate(extract_js)
-        if eval_result.error:
-            raise RuntimeError(f"提取搜索结果失败: {eval_result.error}")
-
-        output = eval_result.output
-        if isinstance(output, str) and output.startswith("Result: "):
-            output = output[8:]
-
-        posts = json.loads(output) if isinstance(output, str) else output
-
-        if not posts or len(posts) == 0:
-            raise RuntimeError("未找到任何帖子")
-
-        print(f"✅ 找到 {len(posts)} 个帖子")
-
-        # 步骤2: 随机进入一个详情页
-        selected_post = random.choice(posts)
-        post_url = selected_post["link"]
-
-        print(f"\n🎲 随机选择帖子: {selected_post['title'][:50]}...")
-        print(f"🔗 访问帖子详情页: {post_url}")
-
-        try:
-            nav_result = await browser_navigate_to_url(post_url)
-            if nav_result.error:
-                print(f"⚠️  导航警告: {nav_result.error[:100]}")
-        except Exception as e:
-            print(f"⚠️  导航异常: {str(e)[:100]}")
-
-        await browser_wait(8)
-
-        # 滚动详情页
-        print("\n📜 滚动详情页...")
-        for i in range(3):
-            await browser_scroll_page(down=True, pages=1.5)
-            await browser_wait(2)
-
-        # 步骤3: 保存详情页HTML
-        print("\n💾 保存详情页 HTML...")
-        html_result = await browser_get_page_html()
-        if html_result.error:
-            print(f"⚠️  获取HTML失败: {html_result.error}")
-        else:
-            html = html_result.metadata.get("html", "")
-            html_path = output_dir / "container_post_detail.html"
-            html_path.write_text(html or "", encoding="utf-8")
-            print(f"✅ 已保存详情页 HTML: {html_path}")
-
-        # 查找并保存iframe
-        print("\n🔍 查找页面中的iframe...")
-        iframe_js = """
-        (function(){
-            const iframes = document.querySelectorAll('iframe');
-            const results = [];
-            iframes.forEach((iframe, index) => {
-                results.push({
-                    index: index,
-                    src: iframe.src || '',
-                    id: iframe.id || '',
-                    name: iframe.name || ''
-                });
-            });
-            return results;
-        })()
-        """
-
-        iframe_result = await browser_evaluate(iframe_js)
-        if not iframe_result.error:
-            iframe_output = iframe_result.output
-            if isinstance(iframe_output, str) and iframe_output.startswith("Result: "):
-                iframe_output = iframe_output[8:]
-
-            try:
-                iframes = json.loads(iframe_output) if isinstance(iframe_output, str) else iframe_output
-
-                if iframes and len(iframes) > 0:
-                    print(f"✅ 找到 {len(iframes)} 个iframe")
-
-                    for idx, iframe_info in enumerate(iframes):
-                        print(f"\n📄 处理iframe {idx + 1}/{len(iframes)}")
-                        print(f"   src: {iframe_info.get('src', 'N/A')[:80]}")
-
-                        # 获取iframe HTML
-                        get_iframe_html_js = f"""
-                        (function(){{
-                            const iframe = document.querySelectorAll('iframe')[{idx}];
-                            if (!iframe) return null;
-                            try {{
-                                const iframeDoc = iframe.contentDocument || iframe.contentWindow.document;
-                                return iframeDoc.documentElement.outerHTML;
-                            }} catch(e) {{
-                                return 'Error: ' + e.message;
-                            }}
-                        }})()
-                        """
-
-                        iframe_html_result = await browser_evaluate(get_iframe_html_js)
-                        if not iframe_html_result.error:
-                            iframe_html = iframe_html_result.output
-                            if isinstance(iframe_html, str) and iframe_html.startswith("Result: "):
-                                iframe_html = iframe_html[8:]
-
-                            if iframe_html and not iframe_html.startswith("Error:"):
-                                iframe_path = output_dir / f"container_iframe_{idx}.html"
-                                iframe_path.write_text(iframe_html, encoding="utf-8")
-                                print(f"   ✅ 已保存iframe HTML: {iframe_path}")
-                            else:
-                                print(f"   ⚠️  iframe内容为空或无法访问")
-                else:
-                    print("⚠️  页面中没有找到iframe")
-            except Exception as e:
-                print(f"⚠️  处理iframe失败: {str(e)}")
-
-        print("\n✅ 测试完成!")
-
-    except Exception as e:
-        print(f"\n❌ 发生错误: {str(e)}")
-        import traceback
-        traceback.print_exc()
-
-    finally:
-        # 清理浏览器会话
-        try:
-            await cleanup_browser_session()
-        except Exception:
-            pass
-
-
-async def main():
-    await test_xhs_container()
-
-
-if __name__ == "__main__":
-    asyncio.run(main())