Просмотр исходного кода

Merge remote-tracking branch 'origin/main'

tanjingyu 1 месяц назад
Родитель
Сommit
d265d89022
42 измененных файлов с 4970 добавлено и 1233 удалено
  1. 14 0
      .claude/settings.local.json
  2. 11 0
      .gitignore
  3. 2 2
      agent/__init__.py
  4. 9 0
      agent/debug/__init__.py
  5. 614 0
      agent/debug/tree_dump.py
  6. 5 0
      agent/llm/providers/__init__.py
  7. 96 25
      agent/llm/providers/gemini.py
  8. 130 0
      agent/llm/providers/openrouter.py
  9. 4 3
      agent/models/__init__.py
  10. 14 1
      agent/models/memory.py
  11. 158 51
      agent/runner.py
  12. 69 0
      agent/skills/core.md
  13. 4 4
      agent/storage/__init__.py
  14. 2 81
      agent/storage/memory_impl.py
  15. 2 71
      agent/storage/protocols.py
  16. 25 0
      agent/storage/skill_loader.py
  17. 3 0
      agent/tools/__init__.py
  18. 1299 0
      agent/tools/builtin/baseClass.py
  19. 66 0
      agent/trace/__init__.py
  20. 275 0
      agent/trace/api.py
  21. 89 0
      agent/trace/memory_store.py
  22. 90 32
      agent/trace/models.py
  23. 79 0
      agent/trace/protocols.py
  24. 181 0
      agent/trace/websocket.py
  25. 85 0
      api_server.py
  26. 58 34
      docs/README.md
  27. 162 0
      docs/decisions.md
  28. 35 2
      docs/skills.md
  29. 624 0
      docs/step-tree.md
  30. 383 0
      docs/trace-api.md
  31. 0 3
      examples/__init__.py
  32. 0 62
      examples/browser_use_setup_demo.py
  33. 0 53
      examples/feature_extract/output_1/result.txt
  34. 91 27
      examples/feature_extract/run.py
  35. 1 1
      examples/feature_extract/test.prompt
  36. 129 0
      examples/test_skill.py
  37. 141 0
      examples/test_tools_baidu.py
  38. 0 188
      examples/tools_complete_demo.py
  39. 0 581
      examples/tools_examples.py
  40. 5 0
      requirements.txt
  41. 15 12
      tests/test_runner.py
  42. 0 0
      tools/__init__.py

+ 14 - 0
.claude/settings.local.json

@@ -0,0 +1,14 @@
+{
+  "permissions": {
+    "allow": [
+      "Bash(cat:*)",
+      "Bash(python:*)",
+      "Bash(pip show:*)",
+      "Read(//usr/local/anaconda3/lib/python3.13/site-packages/browser_use/**)",
+      "Bash(tee:*)",
+      "Bash(browser-use:*)"
+    ],
+    "deny": [],
+    "ask": []
+  }
+}

+ 11 - 0
.gitignore

@@ -47,3 +47,14 @@ htmlcov/
 # Misc
 .DS_Store
 Thumbs.db
+
+.env
+debug.log
+info.log
+.browser_use_files
+output
+
+
+
+# Debug output
+.trace/

+ 2 - 2
agent/__init__.py

@@ -13,10 +13,10 @@ Reson Agent - 可扩展、可学习的 Agent 框架
 
 from agent.runner import AgentRunner, AgentConfig
 from agent.events import AgentEvent
-from agent.models.trace import Trace, Step, StepType
+from agent.trace import Trace, Step, StepType, TraceStore
 from agent.models.memory import Experience, Skill
 from agent.tools import tool, ToolRegistry, get_tool_registry
-from agent.storage.protocols import TraceStore, MemoryStore, StateStore
+from agent.storage.protocols import MemoryStore, StateStore
 
 __version__ = "0.1.0"
 

+ 9 - 0
agent/debug/__init__.py

@@ -0,0 +1,9 @@
+"""
+Debug 工具模块
+
+提供 Step 树的实时查看功能,用于开发调试。
+"""
+
+from .tree_dump import StepTreeDumper, dump_tree, dump_markdown, dump_json
+
+__all__ = ["StepTreeDumper", "dump_tree", "dump_markdown", "dump_json"]

+ 614 - 0
agent/debug/tree_dump.py

@@ -0,0 +1,614 @@
+"""
+Step 树 Debug 输出
+
+将 Step 树以完整格式输出到文件,便于开发调试。
+
+使用方式:
+    1. 命令行实时查看:
+       watch -n 0.5 cat .trace/tree.txt
+
+    2. VS Code 打开文件自动刷新:
+       code .trace/tree.txt
+
+    3. 代码中使用:
+       from agent.debug import dump_tree
+       dump_tree(trace, steps)
+"""
+
+import json
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+# 默认输出路径
+DEFAULT_DUMP_PATH = ".trace/tree.txt"
+DEFAULT_JSON_PATH = ".trace/tree.json"
+DEFAULT_MD_PATH = ".trace/tree.md"
+
+
+class StepTreeDumper:
+    """Step 树 Debug 输出器"""
+
+    def __init__(self, output_path: str = DEFAULT_DUMP_PATH):
+        self.output_path = Path(output_path)
+        self.output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    def dump(
+        self,
+        trace: Optional[Dict[str, Any]] = None,
+        steps: Optional[List[Dict[str, Any]]] = None,
+        title: str = "Step Tree Debug",
+    ) -> str:
+        """
+        输出完整的树形结构到文件
+
+        Args:
+            trace: Trace 字典(可选)
+            steps: Step 字典列表
+            title: 输出标题
+
+        Returns:
+            输出的文本内容
+        """
+        lines = []
+
+        # 标题和时间
+        lines.append("=" * 60)
+        lines.append(f" {title}")
+        lines.append(f" Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+        lines.append("=" * 60)
+        lines.append("")
+
+        # Trace 信息
+        if trace:
+            lines.append("## Trace")
+            lines.append(f"  trace_id: {trace.get('trace_id', 'N/A')}")
+            lines.append(f"  task: {trace.get('task', 'N/A')}")
+            lines.append(f"  status: {trace.get('status', 'N/A')}")
+            lines.append(f"  total_steps: {trace.get('total_steps', 0)}")
+            lines.append(f"  total_tokens: {trace.get('total_tokens', 0)}")
+            lines.append(f"  total_cost: {trace.get('total_cost', 0.0):.4f}")
+            lines.append("")
+
+        # Step 树
+        if steps:
+            lines.append("## Steps")
+            lines.append("")
+
+            # 构建树结构
+            tree = self._build_tree(steps)
+            tree_output = self._render_tree(tree, steps)
+            lines.append(tree_output)
+
+        content = "\n".join(lines)
+
+        # 写入文件
+        self.output_path.write_text(content, encoding="utf-8")
+
+        return content
+
+    def _build_tree(self, steps: List[Dict[str, Any]]) -> Dict[str, List[str]]:
+        """构建父子关系映射"""
+        # parent_id -> [child_ids]
+        children: Dict[str, List[str]] = {"__root__": []}
+
+        for step in steps:
+            step_id = step.get("step_id", "")
+            parent_id = step.get("parent_id")
+
+            if parent_id is None:
+                children["__root__"].append(step_id)
+            else:
+                if parent_id not in children:
+                    children[parent_id] = []
+                children[parent_id].append(step_id)
+
+        return children
+
+    def _render_tree(
+        self,
+        tree: Dict[str, List[str]],
+        steps: List[Dict[str, Any]],
+        parent_id: str = "__root__",
+        indent: int = 0,
+    ) -> str:
+        """递归渲染树结构"""
+        # step_id -> step 映射
+        step_map = {s.get("step_id"): s for s in steps}
+
+        lines = []
+        child_ids = tree.get(parent_id, [])
+
+        for i, step_id in enumerate(child_ids):
+            step = step_map.get(step_id, {})
+            is_last = i == len(child_ids) - 1
+
+            # 渲染当前节点
+            node_output = self._render_node(step, indent, is_last)
+            lines.append(node_output)
+
+            # 递归渲染子节点
+            if step_id in tree:
+                child_output = self._render_tree(tree, steps, step_id, indent + 1)
+                lines.append(child_output)
+
+        return "\n".join(lines)
+
+    def _render_node(self, step: Dict[str, Any], indent: int, is_last: bool) -> str:
+        """渲染单个节点的完整信息"""
+        lines = []
+
+        # 缩进和连接符
+        prefix = "  " * indent
+        connector = "└── " if is_last else "├── "
+        child_prefix = "  " * indent + ("    " if is_last else "│   ")
+
+        # 状态图标
+        status = step.get("status", "unknown")
+        status_icons = {
+            "completed": "✓",
+            "in_progress": "→",
+            "planned": "○",
+            "failed": "✗",
+            "skipped": "⊘",
+        }
+        icon = status_icons.get(status, "?")
+
+        # 类型和描述
+        step_type = step.get("step_type", "unknown")
+        description = step.get("description", "")
+
+        # 第一行:类型和描述
+        lines.append(f"{prefix}{connector}[{icon}] {step_type}: {description}")
+
+        # 详细信息
+        step_id = step.get("step_id", "")[:8]  # 只显示前 8 位
+        lines.append(f"{child_prefix}id: {step_id}...")
+
+        # 执行指标
+        if step.get("duration_ms") is not None:
+            lines.append(f"{child_prefix}duration: {step.get('duration_ms')}ms")
+        if step.get("tokens") is not None:
+            lines.append(f"{child_prefix}tokens: {step.get('tokens')}")
+        if step.get("cost") is not None:
+            lines.append(f"{child_prefix}cost: ${step.get('cost'):.4f}")
+
+        # summary(如果有)
+        if step.get("summary"):
+            summary = step.get("summary", "")
+            # 截断长 summary
+            if len(summary) > 100:
+                summary = summary[:100] + "..."
+            lines.append(f"{child_prefix}summary: {summary}")
+
+        # data 内容(格式化输出)
+        data = step.get("data", {})
+        if data:
+            lines.append(f"{child_prefix}data:")
+            data_lines = self._format_data(data, child_prefix + "  ")
+            lines.append(data_lines)
+
+        # 时间
+        created_at = step.get("created_at", "")
+        if created_at:
+            if isinstance(created_at, str):
+                # 只显示时间部分
+                time_part = created_at.split("T")[-1][:8] if "T" in created_at else created_at
+            else:
+                time_part = created_at.strftime("%H:%M:%S")
+            lines.append(f"{child_prefix}time: {time_part}")
+
+        lines.append("")  # 空行分隔
+        return "\n".join(lines)
+
+    def _format_data(self, data: Dict[str, Any], prefix: str, max_value_len: int = 200) -> str:
+        """格式化 data 字典"""
+        lines = []
+
+        for key, value in data.items():
+            # 格式化值
+            if isinstance(value, str):
+                if len(value) > max_value_len:
+                    value_str = value[:max_value_len] + f"... ({len(value)} chars)"
+                else:
+                    value_str = value
+                # 处理多行字符串
+                if "\n" in value_str:
+                    first_line = value_str.split("\n")[0]
+                    value_str = first_line + f"... ({value_str.count(chr(10))+1} lines)"
+            elif isinstance(value, (dict, list)):
+                value_str = json.dumps(value, ensure_ascii=False, indent=2)
+                if len(value_str) > max_value_len:
+                    value_str = value_str[:max_value_len] + "..."
+                # 缩进多行
+                value_str = value_str.replace("\n", "\n" + prefix + "  ")
+            else:
+                value_str = str(value)
+
+            lines.append(f"{prefix}{key}: {value_str}")
+
+        return "\n".join(lines)
+
+    def dump_markdown(
+        self,
+        trace: Optional[Dict[str, Any]] = None,
+        steps: Optional[List[Dict[str, Any]]] = None,
+        title: str = "Step Tree Debug",
+        output_path: Optional[str] = None,
+    ) -> str:
+        """
+        输出 Markdown 格式(支持折叠,完整内容)
+
+        Args:
+            trace: Trace 字典(可选)
+            steps: Step 字典列表
+            title: 输出标题
+            output_path: 输出路径(默认 .trace/tree.md)
+
+        Returns:
+            输出的 Markdown 内容
+        """
+        lines = []
+
+        # 标题
+        lines.append(f"# {title}")
+        lines.append("")
+        lines.append(f"*Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*")
+        lines.append("")
+
+        # Trace 信息
+        if trace:
+            lines.append("## Trace")
+            lines.append("")
+            lines.append(f"- **trace_id**: `{trace.get('trace_id', 'N/A')}`")
+            lines.append(f"- **task**: {trace.get('task', 'N/A')}")
+            lines.append(f"- **status**: {trace.get('status', 'N/A')}")
+            lines.append(f"- **total_steps**: {trace.get('total_steps', 0)}")
+            lines.append(f"- **total_tokens**: {trace.get('total_tokens', 0)}")
+            lines.append(f"- **total_cost**: ${trace.get('total_cost', 0.0):.4f}")
+            lines.append("")
+
+        # Steps
+        if steps:
+            lines.append("## Steps")
+            lines.append("")
+
+            # 构建树并渲染为 Markdown
+            tree = self._build_tree(steps)
+            step_map = {s.get("step_id"): s for s in steps}
+            md_output = self._render_markdown_tree(tree, step_map, level=3)
+            lines.append(md_output)
+
+        content = "\n".join(lines)
+
+        # 写入文件
+        if output_path is None:
+            output_path = str(self.output_path).replace(".txt", ".md")
+
+        Path(output_path).write_text(content, encoding="utf-8")
+        return content
+
+    def _render_markdown_tree(
+        self,
+        tree: Dict[str, List[str]],
+        step_map: Dict[str, Dict[str, Any]],
+        parent_id: str = "__root__",
+        level: int = 3,
+    ) -> str:
+        """递归渲染 Markdown 树"""
+        lines = []
+        child_ids = tree.get(parent_id, [])
+
+        for step_id in child_ids:
+            step = step_map.get(step_id, {})
+
+            # 渲染节点
+            node_md = self._render_markdown_node(step, level)
+            lines.append(node_md)
+
+            # 递归子节点
+            if step_id in tree:
+                child_md = self._render_markdown_tree(tree, step_map, step_id, level + 1)
+                lines.append(child_md)
+
+        return "\n".join(lines)
+
+    def _render_markdown_node(self, step: Dict[str, Any], level: int) -> str:
+        """渲染单个节点的 Markdown"""
+        lines = []
+
+        # 标题
+        status = step.get("status", "unknown")
+        status_icons = {
+            "completed": "✓",
+            "in_progress": "→",
+            "planned": "○",
+            "failed": "✗",
+            "skipped": "⊘",
+        }
+        icon = status_icons.get(status, "?")
+
+        step_type = step.get("step_type", "unknown")
+        description = step.get("description", "")
+        heading = "#" * level
+
+        lines.append(f"{heading} [{icon}] {step_type}: {description}")
+        lines.append("")
+
+        # 基本信息
+        lines.append("**基本信息**")
+        lines.append("")
+        step_id = step.get("step_id", "")[:16]
+        lines.append(f"- **id**: `{step_id}...`")
+
+        if step.get("duration_ms") is not None:
+            lines.append(f"- **duration**: {step.get('duration_ms')}ms")
+        if step.get("tokens") is not None:
+            lines.append(f"- **tokens**: {step.get('tokens')}")
+        if step.get("cost") is not None:
+            lines.append(f"- **cost**: ${step.get('cost'):.4f}")
+
+        created_at = step.get("created_at", "")
+        if created_at:
+            if isinstance(created_at, str):
+                time_part = created_at.split("T")[-1][:8] if "T" in created_at else created_at
+            else:
+                time_part = created_at.strftime("%H:%M:%S")
+            lines.append(f"- **time**: {time_part}")
+
+        lines.append("")
+
+        # Summary
+        if step.get("summary"):
+            lines.append("<details>")
+            lines.append("<summary><b>📝 Summary</b></summary>")
+            lines.append("")
+            lines.append(f"```\n{step.get('summary')}\n```")
+            lines.append("")
+            lines.append("</details>")
+            lines.append("")
+
+        # Data(完整输出,不截断)
+        data = step.get("data", {})
+        if data:
+            lines.append(self._render_markdown_data(data))
+            lines.append("")
+
+        return "\n".join(lines)
+
+    def _render_markdown_data(self, data: Dict[str, Any]) -> str:
+        """渲染 data 字典为可折叠的 Markdown"""
+        lines = []
+
+        # 定义输出顺序(重要的放前面)
+        key_order = ["messages", "tools", "response", "content", "tool_calls", "model"]
+
+        # 先按顺序输出重要的 key
+        remaining_keys = set(data.keys())
+        for key in key_order:
+            if key in data:
+                lines.append(self._render_data_item(key, data[key]))
+                remaining_keys.remove(key)
+
+        # 再输出剩余的 key
+        for key in sorted(remaining_keys):
+            lines.append(self._render_data_item(key, data[key]))
+
+        return "\n".join(lines)
+
+    def _render_data_item(self, key: str, value: Any) -> str:
+        """渲染单个 data 项"""
+        # 确定图标
+        icon_map = {
+            "messages": "📨",
+            "response": "🤖",
+            "tools": "🛠️",
+            "tool_calls": "🔧",
+            "model": "🎯",
+            "error": "❌",
+            "content": "💬",
+        }
+        icon = icon_map.get(key, "📄")
+
+        # 特殊处理:跳过 None 值
+        if value is None:
+            return ""
+
+        # 判断是否需要折叠(长内容或复杂结构)
+        needs_collapse = False
+        if isinstance(value, str):
+            needs_collapse = len(value) > 100 or "\n" in value
+        elif isinstance(value, (dict, list)):
+            needs_collapse = True
+
+        if needs_collapse:
+            lines = []
+            # 可折叠块
+            lines.append("<details>")
+            lines.append(f"<summary><b>{icon} {key.capitalize()}</b></summary>")
+            lines.append("")
+
+            # 格式化内容
+            if isinstance(value, str):
+                # 检查是否包含图片 base64
+                if "data:image" in value or (isinstance(value, str) and len(value) > 10000):
+                    lines.append("```")
+                    lines.append(f"[IMAGE DATA: {len(value)} chars, truncated for display]")
+                    lines.append(value[:200] + "...")
+                    lines.append("```")
+                else:
+                    lines.append("```")
+                    lines.append(value)
+                    lines.append("```")
+            elif isinstance(value, (dict, list)):
+                # 递归截断图片 base64
+                truncated_value = self._truncate_image_data(value)
+                lines.append("```json")
+                lines.append(json.dumps(truncated_value, ensure_ascii=False, indent=2))
+                lines.append("```")
+
+            lines.append("")
+            lines.append("</details>")
+            return "\n".join(lines)
+        else:
+            # 简单值,直接显示
+            return f"- **{icon} {key}**: `{value}`"
+
+    def _truncate_image_data(self, obj: Any, max_length: int = 200) -> Any:
+        """递归截断对象中的图片 base64 数据"""
+        if isinstance(obj, dict):
+            result = {}
+            for key, value in obj.items():
+                # 检测图片 URL(data:image/...;base64,...)
+                if isinstance(value, str) and value.startswith("data:image"):
+                    # 提取 MIME 类型和数据长度
+                    header_end = value.find(",")
+                    if header_end > 0:
+                        header = value[:header_end]
+                        data = value[header_end+1:]
+                        data_size_kb = len(data) / 1024
+                        result[key] = f"<IMAGE_DATA: {data_size_kb:.1f}KB, {header}, preview: {data[:50]}...>"
+                    else:
+                        result[key] = value[:max_length] + f"... ({len(value)} chars)"
+                else:
+                    result[key] = self._truncate_image_data(value, max_length)
+            return result
+        elif isinstance(obj, list):
+            return [self._truncate_image_data(item, max_length) for item in obj]
+        elif isinstance(obj, str) and len(obj) > 100000:
+            # 超长字符串(可能是未检测到的 base64)
+            return obj[:max_length] + f"... (TRUNCATED: {len(obj)} chars total)"
+        else:
+            return obj
+
+
+def dump_tree(
+    trace: Optional[Any] = None,
+    steps: Optional[List[Any]] = None,
+    output_path: str = DEFAULT_DUMP_PATH,
+    title: str = "Step Tree Debug",
+) -> str:
+    """
+    便捷函数:输出 Step 树到文件
+
+    Args:
+        trace: Trace 对象或字典
+        steps: Step 对象或字典列表
+        output_path: 输出文件路径
+        title: 输出标题
+
+    Returns:
+        输出的文本内容
+
+    示例:
+        from agent.debug import dump_tree
+
+        # 每次 step 变化后调用
+        dump_tree(trace, steps)
+
+        # 自定义路径
+        dump_tree(trace, steps, output_path=".debug/my_trace.txt")
+    """
+    # 转换为字典
+    trace_dict = None
+    if trace is not None:
+        trace_dict = trace.to_dict() if hasattr(trace, "to_dict") else trace
+
+    steps_list = []
+    if steps:
+        for step in steps:
+            if hasattr(step, "to_dict"):
+                steps_list.append(step.to_dict())
+            else:
+                steps_list.append(step)
+
+    dumper = StepTreeDumper(output_path)
+    return dumper.dump(trace_dict, steps_list, title)
+
+
+def dump_json(
+    trace: Optional[Any] = None,
+    steps: Optional[List[Any]] = None,
+    output_path: str = DEFAULT_JSON_PATH,
+) -> str:
+    """
+    输出完整的 JSON 格式(用于程序化分析)
+
+    Args:
+        trace: Trace 对象或字典
+        steps: Step 对象或字典列表
+        output_path: 输出文件路径
+
+    Returns:
+        JSON 字符串
+    """
+    path = Path(output_path)
+    path.parent.mkdir(parents=True, exist_ok=True)
+
+    # 转换为字典
+    trace_dict = None
+    if trace is not None:
+        trace_dict = trace.to_dict() if hasattr(trace, "to_dict") else trace
+
+    steps_list = []
+    if steps:
+        for step in steps:
+            if hasattr(step, "to_dict"):
+                steps_list.append(step.to_dict())
+            else:
+                steps_list.append(step)
+
+    data = {
+        "generated_at": datetime.now().isoformat(),
+        "trace": trace_dict,
+        "steps": steps_list,
+    }
+
+    content = json.dumps(data, ensure_ascii=False, indent=2)
+    path.write_text(content, encoding="utf-8")
+
+    return content
+
+
+def dump_markdown(
+    trace: Optional[Any] = None,
+    steps: Optional[List[Any]] = None,
+    output_path: str = DEFAULT_MD_PATH,
+    title: str = "Step Tree Debug",
+) -> str:
+    """
+    便捷函数:输出 Markdown 格式(支持折叠,完整内容)
+
+    Args:
+        trace: Trace 对象或字典
+        steps: Step 对象或字典列表
+        output_path: 输出文件路径(默认 .trace/tree.md)
+        title: 输出标题
+
+    Returns:
+        输出的 Markdown 内容
+
+    示例:
+        from agent.debug import dump_markdown
+
+        # 输出完整可折叠的 Markdown
+        dump_markdown(trace, steps)
+
+        # 自定义路径
+        dump_markdown(trace, steps, output_path=".debug/debug.md")
+    """
+    # 转换为字典
+    trace_dict = None
+    if trace is not None:
+        trace_dict = trace.to_dict() if hasattr(trace, "to_dict") else trace
+
+    steps_list = []
+    if steps:
+        for step in steps:
+            if hasattr(step, "to_dict"):
+                steps_list.append(step.to_dict())
+            else:
+                steps_list.append(step)
+
+    dumper = StepTreeDumper(output_path)
+    return dumper.dump_markdown(trace_dict, steps_list, title, output_path)

+ 5 - 0
agent/llm/providers/__init__.py

@@ -3,3 +3,8 @@ LLM Providers
 
 各个 LLM 提供商的适配器
 """
+
+from .gemini import create_gemini_llm_call
+from .openrouter import create_openrouter_llm_call
+
+__all__ = ["create_gemini_llm_call", "create_openrouter_llm_call"]

+ 96 - 25
agent/llm/providers/gemini.py

@@ -8,10 +8,60 @@ Gemini Provider (HTTP API)
 
 import os
 import json
+import sys
 import httpx
 from typing import List, Dict, Any, Optional
 
 
+def _dump_llm_request(endpoint: str, payload: Dict[str, Any], model: str):
+    """
+    Dump完整的LLM请求用于调试(需要设置 AGENT_DEBUG=1)
+
+    特别处理:
+    - 图片base64数据:只显示前50字符 + 长度信息
+    - Tools schema:完整显示
+    - 输出到stderr,避免污染正常输出
+    """
+    if not os.getenv("AGENT_DEBUG"):
+        return
+
+    def truncate_images(obj):
+        """递归处理对象,truncate图片base64数据"""
+        if isinstance(obj, dict):
+            result = {}
+            for key, value in obj.items():
+                # 处理 inline_data 中的 base64 图片
+                if key == "inline_data" and isinstance(value, dict):
+                    mime_type = value.get("mime_type", "unknown")
+                    data = value.get("data", "")
+                    data_size_kb = len(data) / 1024 if data else 0
+                    result[key] = {
+                        "mime_type": mime_type,
+                        "data": f"<BASE64_IMAGE: {data_size_kb:.1f}KB, preview: {data[:50]}...>"
+                    }
+                else:
+                    result[key] = truncate_images(value)
+            return result
+        elif isinstance(obj, list):
+            return [truncate_images(item) for item in obj]
+        else:
+            return obj
+
+    # 构造完整的调试信息
+    debug_info = {
+        "endpoint": endpoint,
+        "model": model,
+        "payload": truncate_images(payload)
+    }
+
+    # 输出到stderr
+    print("\n" + "="*80, file=sys.stderr)
+    print("[AGENT_DEBUG] LLM Request Dump", file=sys.stderr)
+    print("="*80, file=sys.stderr)
+    print(json.dumps(debug_info, indent=2, ensure_ascii=False), file=sys.stderr)
+    print("="*80 + "\n", file=sys.stderr)
+
+
 def _convert_messages_to_gemini(messages: List[Dict]) -> tuple[List[Dict], Optional[str]]:
     """
     将 OpenAI 格式消息转换为 Gemini 格式
@@ -299,6 +349,9 @@ def create_gemini_llm_call(
             if gemini_tools:
                 payload["tools"] = gemini_tools
 
+        # Debug: dump完整请求(需要设置 AGENT_DEBUG=1)
+        _dump_llm_request(endpoint, payload, model)
+
         # 调用 API
         try:
             response = await client.post(endpoint, json=payload)
@@ -313,37 +366,55 @@ def create_gemini_llm_call(
             print(f"[Gemini HTTP] Request failed: {e}")
             raise
 
+        # Debug: 输出原始响应(如果启用)
+        if os.getenv("AGENT_DEBUG"):
+            print("\n[AGENT_DEBUG] Gemini Response:", file=sys.stderr)
+            print(json.dumps(gemini_resp, ensure_ascii=False, indent=2)[:2000], file=sys.stderr)
+            print("\n", file=sys.stderr)
+
         # 解析响应
         content = ""
         tool_calls = None
 
         candidates = gemini_resp.get("candidates", [])
         if candidates:
-            parts = candidates[0].get("content", {}).get("parts", [])
-
-            # 提取文本
-            for part in parts:
-                if "text" in part:
-                    content += part.get("text", "")
-
-            # 提取 functionCall
-            for i, part in enumerate(parts):
-                if "functionCall" in part:
-                    if tool_calls is None:
-                        tool_calls = []
-
-                    fc = part["functionCall"]
-                    name = fc.get("name", "")
-                    args = fc.get("args", {})
-
-                    tool_calls.append({
-                        "id": f"call_{i}",
-                        "type": "function",
-                        "function": {
-                            "name": name,
-                            "arguments": json.dumps(args, ensure_ascii=False)
-                        }
-                    })
+            candidate = candidates[0]
+
+            # 检查是否有错误
+            finish_reason = candidate.get("finishReason")
+            if finish_reason == "MALFORMED_FUNCTION_CALL":
+                # Gemini 返回了格式错误的函数调用
+                # 提取 finishMessage 中的内容作为 content
+                finish_message = candidate.get("finishMessage", "")
+                print(f"[Gemini HTTP] Warning: MALFORMED_FUNCTION_CALL\n{finish_message}")
+                content = f"[模型尝试调用工具但格式错误]\n\n{finish_message}"
+            else:
+                # 正常解析
+                parts = candidate.get("content", {}).get("parts", [])
+
+                # 提取文本
+                for part in parts:
+                    if "text" in part:
+                        content += part.get("text", "")
+
+                # 提取 functionCall
+                for i, part in enumerate(parts):
+                    if "functionCall" in part:
+                        if tool_calls is None:
+                            tool_calls = []
+
+                        fc = part["functionCall"]
+                        name = fc.get("name", "")
+                        args = fc.get("args", {})
+
+                        tool_calls.append({
+                            "id": f"call_{i}",
+                            "type": "function",
+                            "function": {
+                                "name": name,
+                                "arguments": json.dumps(args, ensure_ascii=False)
+                            }
+                        })
 
         # 提取 usage
         usage_meta = gemini_resp.get("usageMetadata", {})

+ 130 - 0
agent/llm/providers/openrouter.py

@@ -0,0 +1,130 @@
+"""
+OpenRouter Provider
+
+使用 OpenRouter API 调用各种模型(包括 Claude Sonnet 4.5)
+支持 OpenAI 兼容的 API 格式
+"""
+
+import os
+import json
+import httpx
+from typing import List, Dict, Any, Optional
+
+
+async def openrouter_llm_call(
+    messages: List[Dict[str, Any]],
+    model: str = "anthropic/claude-sonnet-4.5",
+    tools: Optional[List[Dict]] = None,
+    **kwargs
+) -> Dict[str, Any]:
+    """
+    OpenRouter LLM 调用函数
+
+    Args:
+        messages: OpenAI 格式消息列表
+        model: 模型名称(如 "anthropic/claude-sonnet-4.5")
+        tools: OpenAI 格式工具定义
+        **kwargs: 其他参数(temperature, max_tokens 等)
+
+    Returns:
+        {
+            "content": str,
+            "tool_calls": List[Dict] | None,
+            "prompt_tokens": int,
+            "completion_tokens": int,
+            "cost": float
+        }
+    """
+    api_key = os.getenv("OPEN_ROUTER_API_KEY")
+    if not api_key:
+        raise ValueError("OPEN_ROUTER_API_KEY environment variable not set")
+
+    base_url = "https://openrouter.ai/api/v1"
+    endpoint = f"{base_url}/chat/completions"
+
+    # 构建请求
+    payload = {
+        "model": model,
+        "messages": messages,
+    }
+
+    # 添加可选参数
+    if tools:
+        payload["tools"] = tools
+
+    if "temperature" in kwargs:
+        payload["temperature"] = kwargs["temperature"]
+    if "max_tokens" in kwargs:
+        payload["max_tokens"] = kwargs["max_tokens"]
+
+    # OpenRouter 特定参数
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "HTTP-Referer": "https://github.com/your-repo",  # 可选,用于统计
+        "X-Title": "Agent Framework",  # 可选,显示在 OpenRouter dashboard
+    }
+
+    # 调用 API
+    async with httpx.AsyncClient(timeout=120.0) as client:
+        try:
+            response = await client.post(endpoint, json=payload, headers=headers)
+            response.raise_for_status()
+            result = response.json()
+
+        except httpx.HTTPStatusError as e:
+            error_body = e.response.text
+            print(f"[OpenRouter] Error {e.response.status_code}: {error_body}")
+            raise
+        except Exception as e:
+            print(f"[OpenRouter] Request failed: {e}")
+            raise
+
+    # 解析响应(OpenAI 格式)
+    choice = result["choices"][0] if result.get("choices") else {}
+    message = choice.get("message", {})
+
+    content = message.get("content", "")
+    tool_calls = message.get("tool_calls")
+
+    # 提取 usage
+    usage = result.get("usage", {})
+    prompt_tokens = usage.get("prompt_tokens", 0)
+    completion_tokens = usage.get("completion_tokens", 0)
+
+    # 计算成本(OpenRouter 通常在响应中提供,但这里简化为 0)
+    cost = 0.0
+
+    return {
+        "content": content,
+        "tool_calls": tool_calls,
+        "prompt_tokens": prompt_tokens,
+        "completion_tokens": completion_tokens,
+        "cost": cost
+    }
+
+
+def create_openrouter_llm_call(
+    model: str = "anthropic/claude-sonnet-4.5"
+):
+    """
+    创建 OpenRouter LLM 调用函数
+
+    Args:
+        model: 模型名称
+            - "anthropic/claude-sonnet-4.5"
+            - "anthropic/claude-opus-4.5"
+            - "openai/gpt-4o"
+            等等
+
+    Returns:
+        异步 LLM 调用函数
+    """
+    async def llm_call(
+        messages: List[Dict[str, Any]],
+        model: str = model,
+        tools: Optional[List[Dict]] = None,
+        **kwargs
+    ) -> Dict[str, Any]:
+        return await openrouter_llm_call(messages, model, tools, **kwargs)
+
+    return llm_call

+ 4 - 3
agent/models/__init__.py

@@ -1,8 +1,9 @@
 """
-Models 包
+Models 包 - 记忆相关模型
+
+Trace/Step 模型已移动到 agent.trace 模块
 """
 
-from agent.models.trace import Trace, Step, StepType
 from agent.models.memory import Experience, Skill
 
-__all__ = ["Trace", "Step", "StepType", "Experience", "Skill"]
+__all__ = ["Experience", "Skill"]

+ 14 - 1
agent/models/memory.py

@@ -102,6 +102,7 @@ class Skill:
     parent_id: Optional[str] = None
 
     # 内容
+    content: Optional[str] = None  # 完整的 skill 内容(Markdown)
     guidelines: List[str] = field(default_factory=list)
     derived_from: List[str] = field(default_factory=list)  # experience_ids
 
@@ -119,6 +120,7 @@ class Skill:
         name: str,
         description: str,
         category: str = "general",
+        content: Optional[str] = None,
         guidelines: List[str] = None,
         derived_from: List[str] = None,
         parent_id: Optional[str] = None,
@@ -132,6 +134,7 @@ class Skill:
             description=description,
             category=category,
             parent_id=parent_id,
+            content=content,
             guidelines=guidelines or [],
             derived_from=derived_from or [],
             created_at=now,
@@ -147,6 +150,7 @@ class Skill:
             "description": self.description,
             "category": self.category,
             "parent_id": self.parent_id,
+            "content": self.content,
             "guidelines": self.guidelines,
             "derived_from": self.derived_from,
             "version": self.version,
@@ -155,7 +159,16 @@ class Skill:
         }
 
     def to_prompt_text(self) -> str:
-        """转换为可注入 Prompt 的文本"""
+        """
+        转换为可注入 Prompt 的文本
+
+        优先使用完整的 content(如果有),否则使用 description + guidelines
+        """
+        # 如果有完整的 content,直接使用
+        if self.content:
+            return self.content.strip()
+
+        # 否则使用旧的格式(向后兼容)
         lines = [f"### {self.name}", self.description]
         if self.guidelines:
             lines.append("指导原则:")

+ 158 - 51
agent/runner.py

@@ -14,15 +14,29 @@ from datetime import datetime
 from typing import AsyncIterator, Optional, Dict, Any, List, Callable, Literal
 
 from agent.events import AgentEvent
-from agent.models.trace import Trace, Step
+from agent.trace import Trace, Step, TraceStore
 from agent.models.memory import Experience, Skill
-from agent.storage.protocols import TraceStore, MemoryStore, StateStore
+from agent.storage.protocols import MemoryStore, StateStore
 from agent.storage.skill_loader import load_skills_from_dir
 from agent.tools import ToolRegistry, get_tool_registry
+from agent.debug import dump_tree, dump_markdown
 
 logger = logging.getLogger(__name__)
 
 
+# 内置工具列表(始终自动加载)
+BUILTIN_TOOLS = [
+    "read_file",
+    "edit_file",
+    "write_file",
+    "glob_files",
+    "grep_content",
+    "bash_command",
+    "skill",
+    "list_skills",
+]
+
+
 @dataclass
 class AgentConfig:
     """Agent 配置"""
@@ -60,6 +74,8 @@ class AgentRunner:
         tool_registry: Optional[ToolRegistry] = None,
         llm_call: Optional[Callable] = None,
         config: Optional[AgentConfig] = None,
+        skills_dir: Optional[str] = None,
+        debug: bool = False,
     ):
         """
         初始化 AgentRunner
@@ -71,6 +87,8 @@ class AgentRunner:
             tool_registry: 工具注册表(可选,默认使用全局注册表)
             llm_call: LLM 调用函数(必须提供,用于实际调用 LLM)
             config: Agent 配置
+            skills_dir: Skills 目录路径(可选,不提供则不加载 skills)
+            debug: 是否启用 debug 模式(输出 step tree 到 .trace/tree.txt)
         """
         self.trace_store = trace_store
         self.memory_store = memory_store
@@ -78,12 +96,27 @@ class AgentRunner:
         self.tools = tool_registry or get_tool_registry()
         self.llm_call = llm_call
         self.config = config or AgentConfig()
+        self.skills_dir = skills_dir
+        self.debug = debug
 
     def _generate_id(self) -> str:
         """生成唯一 ID"""
         import uuid
         return str(uuid.uuid4())
 
+    async def _dump_debug(self, trace_id: str) -> None:
+        """Debug 模式下输出 step tree(txt + markdown 两种格式)"""
+        if not self.debug or not self.trace_store:
+            return
+        trace = await self.trace_store.get_trace(trace_id)
+        steps = await self.trace_store.get_trace_steps(trace_id)
+
+        # 输出 tree.txt(简洁格式,兼容旧版)
+        dump_tree(trace, steps)
+
+        # 输出 tree.md(完整可折叠格式)
+        dump_markdown(trace, steps)
+
     # ===== 单次调用 =====
 
     async def call(
@@ -125,9 +158,15 @@ class AgentRunner:
             trace_id = await self.trace_store.create_trace(trace_obj)
 
         # 准备工具 Schema
-        tool_schemas = None
+        # 合并内置工具 + 用户指定工具
+        tool_names = BUILTIN_TOOLS.copy()
         if tools:
-            tool_schemas = self.tools.get_schemas(tools)
+            # 添加用户指定的工具(去重)
+            for tool in tools:
+                if tool not in tool_names:
+                    tool_names.append(tool)
+
+        tool_schemas = self.tools.get_schemas(tool_names)
 
         # 调用 LLM
         result = await self.llm_call(
@@ -141,19 +180,22 @@ class AgentRunner:
         if trace and self.trace_store and trace_id:
             step = Step.create(
                 trace_id=trace_id,
-                step_type="llm_call",
+                step_type="thought",
                 sequence=0,
+                status="completed",
+                description=f"LLM 调用 ({model})",
                 data={
                     "messages": messages,
                     "response": result.get("content", ""),
                     "model": model,
+                    "tools": tool_schemas,  # 记录传给模型的 tools schema
                     "tool_calls": result.get("tool_calls"),
-                    "prompt_tokens": result.get("prompt_tokens", 0),
-                    "completion_tokens": result.get("completion_tokens", 0),
-                    "cost": result.get("cost", 0),
-                }
+                },
+                tokens=result.get("prompt_tokens", 0) + result.get("completion_tokens", 0),
+                cost=result.get("cost", 0),
             )
             step_id = await self.trace_store.add_step(step)
+            await self._dump_debug(trace_id)
 
             # 完成 Trace
             await self.trace_store.update_trace(
@@ -240,8 +282,9 @@ class AgentRunner:
         })
 
         try:
-            # 加载记忆(Experience)
+            # 加载记忆(Experience 和 Skill
             experiences_text = ""
+            skills_text = ""
 
             if enable_memory and self.memory_store:
                 scope = f"agent:{agent_type}"
@@ -254,24 +297,36 @@ class AgentRunner:
                         trace_id=trace_id,
                         step_type="memory_read",
                         sequence=0,
+                        status="completed",
+                        description=f"加载 {len(experiences)} 条经验",
                         data={
                             "experiences_count": len(experiences),
                             "experiences": [e.to_dict() for e in experiences],
                         }
                     )
                     await self.trace_store.add_step(mem_step)
+                    await self._dump_debug(trace_id)
 
                 yield AgentEvent("memory_loaded", {
                     "experiences_count": len(experiences)
                 })
 
+            # 加载 Skills(如果提供了 skills_dir)
+            if self.skills_dir:
+                skills = load_skills_from_dir(self.skills_dir)
+                if skills:
+                    skills_text = self._format_skills(skills)
+                    logger.info(f"加载 {len(skills)} 个 skills 从 {self.skills_dir}")
+
             # 构建初始消息
             if messages is None:
                 messages = []
 
             if system_prompt:
-                # 注入记忆到 system prompt
+                # 注入记忆和 skills 到 system prompt
                 full_system = system_prompt
+                if skills_text:
+                    full_system += f"\n\n## Skills\n{skills_text}"
                 if experiences_text:
                     full_system += f"\n\n## 相关经验\n{experiences_text}"
 
@@ -280,13 +335,19 @@ class AgentRunner:
             # 添加任务描述
             messages.append({"role": "user", "content": task})
 
-            # 准备工具
-            tool_schemas = None
+            # 准备工具 Schema
+            # 合并内置工具 + 用户指定工具
+            tool_names = BUILTIN_TOOLS.copy()
             if tools:
-                tool_schemas = self.tools.get_schemas(tools)
+                # 添加用户指定的工具(去重)
+                for tool in tools:
+                    if tool not in tool_names:
+                        tool_names.append(tool)
+
+            tool_schemas = self.tools.get_schemas(tool_names)
 
             # 执行循环
-            parent_step_ids = []
+            current_goal_id = None  # 当前焦点 goal
             sequence = 1
             total_tokens = 0
             total_cost = 0.0
@@ -294,7 +355,7 @@ class AgentRunner:
             for iteration in range(max_iterations):
                 yield AgentEvent("step_started", {
                     "iteration": iteration,
-                    "step_type": "llm_call"
+                    "step_type": "thought"
                 })
 
                 # 调用 LLM
@@ -307,42 +368,51 @@ class AgentRunner:
 
                 response_content = result.get("content", "")
                 tool_calls = result.get("tool_calls")
-                tokens = result.get("prompt_tokens", 0) + result.get("completion_tokens", 0)
-                cost = result.get("cost", 0)
+                step_tokens = result.get("prompt_tokens", 0) + result.get("completion_tokens", 0)
+                step_cost = result.get("cost", 0)
 
-                total_tokens += tokens
-                total_cost += cost
+                total_tokens += step_tokens
+                total_cost += step_cost
 
                 # 记录 LLM 调用 Step
                 llm_step_id = self._generate_id()
                 if self.trace_store:
+                    # 推断 step_type
+                    step_type = "thought"
+                    if tool_calls:
+                        step_type = "thought"  # 有工具调用的思考
+                    elif not tool_calls and iteration > 0:
+                        step_type = "response"  # 无工具调用,可能是最终回复
+
                     llm_step = Step(
                         step_id=llm_step_id,
                         trace_id=trace_id,
-                        step_type="llm_call",
+                        step_type=step_type,
+                        status="completed",
                         sequence=sequence,
-                        parent_ids=parent_step_ids,
+                        parent_id=current_goal_id,
+                        description=response_content[:100] + "..." if len(response_content) > 100 else response_content,
                         data={
-                            "messages": messages,
-                            "response": response_content,
+                            "messages": messages,  # 记录完整的 messages(包含 system prompt)
+                            "content": response_content,
                             "model": model,
+                            "tools": tool_schemas,  # 记录传给模型的 tools schema
                             "tool_calls": tool_calls,
-                            "prompt_tokens": result.get("prompt_tokens", 0),
-                            "completion_tokens": result.get("completion_tokens", 0),
-                            "cost": cost,
-                        }
+                        },
+                        tokens=step_tokens,
+                        cost=step_cost,
                     )
                     await self.trace_store.add_step(llm_step)
+                    await self._dump_debug(trace_id)
 
                 sequence += 1
-                parent_step_ids = [llm_step_id]
 
                 yield AgentEvent("llm_call_completed", {
                     "step_id": llm_step_id,
                     "content": response_content,
                     "tool_calls": tool_calls,
-                    "tokens": tokens,
-                    "cost": cost
+                    "tokens": step_tokens,
+                    "cost": step_cost
                 })
 
                 # 处理工具调用
@@ -379,28 +449,50 @@ class AgentRunner:
                             uid=uid or ""
                         )
 
-                        # 记录 tool_call Step
-                        tool_step_id = self._generate_id()
+                        # 记录 action Step
+                        action_step_id = self._generate_id()
                         if self.trace_store:
-                            tool_step = Step(
-                                step_id=tool_step_id,
+                            action_step = Step(
+                                step_id=action_step_id,
                                 trace_id=trace_id,
-                                step_type="tool_call",
+                                step_type="action",
+                                status="completed",
                                 sequence=sequence,
-                                parent_ids=[llm_step_id],
+                                parent_id=llm_step_id,
+                                description=f"{tool_name}({', '.join(f'{k}={v}' for k, v in list(tool_args.items())[:2])})",
                                 data={
                                     "tool_name": tool_name,
                                     "arguments": tool_args,
-                                    "result": tool_result,
                                 }
                             )
-                            await self.trace_store.add_step(tool_step)
+                            await self.trace_store.add_step(action_step)
+                            await self._dump_debug(trace_id)
+
+                        sequence += 1
+
+                        # 记录 result Step
+                        result_step_id = self._generate_id()
+                        if self.trace_store:
+                            result_step = Step(
+                                step_id=result_step_id,
+                                trace_id=trace_id,
+                                step_type="result",
+                                status="completed",
+                                sequence=sequence,
+                                parent_id=action_step_id,
+                                description=str(tool_result)[:100] if tool_result else "",
+                                data={
+                                    "tool_name": tool_name,
+                                    "output": tool_result,
+                                }
+                            )
+                            await self.trace_store.add_step(result_step)
+                            await self._dump_debug(trace_id)
 
                         sequence += 1
-                        parent_step_ids.append(tool_step_id)
 
                         yield AgentEvent("tool_result", {
-                            "step_id": tool_step_id,
+                            "step_id": result_step_id,
                             "tool_name": tool_name,
                             "result": tool_result
                         })
@@ -416,24 +508,27 @@ class AgentRunner:
                     continue  # 继续循环
 
                 # 无工具调用,任务完成
-                # 记录 conclusion Step
-                conclusion_step_id = self._generate_id()
+                # 记录 response Step
+                response_step_id = self._generate_id()
                 if self.trace_store:
-                    conclusion_step = Step(
-                        step_id=conclusion_step_id,
+                    response_step = Step(
+                        step_id=response_step_id,
                         trace_id=trace_id,
-                        step_type="conclusion",
+                        step_type="response",
+                        status="completed",
                         sequence=sequence,
-                        parent_ids=parent_step_ids,
+                        parent_id=current_goal_id,
+                        description=response_content[:100] + "..." if len(response_content) > 100 else response_content,
                         data={
                             "content": response_content,
                             "is_final": True
                         }
                     )
-                    await self.trace_store.add_step(conclusion_step)
+                    await self.trace_store.add_step(response_step)
+                    await self._dump_debug(trace_id)
 
                 yield AgentEvent("conclusion", {
-                    "step_id": conclusion_step_id,
+                    "step_id": response_step_id,
                     "content": response_content,
                     "is_final": True
                 })
@@ -511,7 +606,9 @@ class AgentRunner:
             trace_id=trace_id,
             step_type="feedback",
             sequence=max_seq + 1,
-            parent_ids=[target_step_id],
+            status="completed",
+            description=f"{feedback_type}: {content[:50]}...",
+            parent_id=target_step_id,
             data={
                 "target_step_id": target_step_id,
                 "feedback_type": feedback_type,
@@ -519,6 +616,7 @@ class AgentRunner:
             }
         )
         await self.trace_store.add_step(feedback_step)
+        await self._dump_debug(trace_id)
 
         # 提取经验
         exp_id = None
@@ -538,7 +636,9 @@ class AgentRunner:
                 trace_id=trace_id,
                 step_type="memory_write",
                 sequence=max_seq + 2,
-                parent_ids=[feedback_step.step_id],
+                status="completed",
+                description=f"保存经验: {exp.condition[:30]}...",
+                parent_id=feedback_step.step_id,
                 data={
                     "experience_id": exp_id,
                     "condition": exp.condition,
@@ -546,6 +646,7 @@ class AgentRunner:
                 }
             )
             await self.trace_store.add_step(mem_step)
+            await self._dump_debug(trace_id)
 
         return exp_id
 
@@ -562,3 +663,9 @@ class AgentRunner:
         if not experiences:
             return ""
         return "\n".join(f"- {e.to_prompt_text()}" for e in experiences)
+
+    def _format_skills(self, skills: List[Skill]) -> str:
+        """格式化 Skills 为 Prompt 文本"""
+        if not skills:
+            return ""
+        return "\n\n".join(s.to_prompt_text() for s in skills)

+ 69 - 0
agent/skills/core.md

@@ -0,0 +1,69 @@
+---
+name: core
+type: core
+description: 核心系统功能,自动加载到 System Prompt
+---
+
+# Core Skills
+
+本文档描述 Agent 的核心系统功能。
+
+---
+
+## Step 管理
+
+你可以使用 `step` 工具来管理执行计划和进度。
+
+### 何时使用
+
+- **复杂任务**(3 个以上步骤):先制定计划再执行
+- **简单任务**:直接执行,无需计划
+
+### 创建计划
+
+当任务复杂时,先制定计划:
+
+```
+step(plan=["探索代码库", "修改配置", "运行测试"])
+```
+
+### 开始执行
+
+聚焦到某个目标开始执行:
+
+```
+step(focus="探索代码库")
+```
+
+### 完成并切换
+
+完成当前目标,提供总结,切换到下一个:
+
+```
+step(complete=True, summary="主配置在 /src/config.yaml,包含数据库连接配置", focus="修改配置")
+```
+
+### 调整计划
+
+执行中发现需要增加步骤:
+
+```
+step(plan=["备份原配置"])  # 追加新目标
+```
+
+### 查看进度
+
+查看当前执行进度:
+
+```
+read_progress()
+```
+
+---
+
+## 使用规范
+
+1. **同时只有一个目标处于执行中**:完成当前目标后再切换
+2. **summary 应简洁**:记录关键结论和发现,不要冗长
+3. **计划可调整**:根据执行情况追加或跳过目标
+4. **简单任务不需要计划**:单步操作直接执行即可

+ 4 - 4
agent/storage/__init__.py

@@ -1,15 +1,15 @@
 """
 Storage 包 - 存储接口和实现
+
+TraceStore 和 MemoryTraceStore 已移动到 agent.trace 模块
 """
 
-from agent.storage.protocols import TraceStore, MemoryStore, StateStore
-from agent.storage.memory_impl import MemoryTraceStore, MemoryMemoryStore, MemoryStateStore
+from agent.storage.protocols import MemoryStore, StateStore
+from agent.storage.memory_impl import MemoryMemoryStore, MemoryStateStore
 
 __all__ = [
-    "TraceStore",
     "MemoryStore",
     "StateStore",
-    "MemoryTraceStore",
     "MemoryMemoryStore",
     "MemoryStateStore",
 ]

+ 2 - 81
agent/storage/memory_impl.py

@@ -2,95 +2,16 @@
 Memory Implementation - 内存存储实现
 
 用于测试和简单场景,数据不持久化
+
+MemoryTraceStore 已移动到 agent.trace.memory_store
 """
 
 from typing import Dict, List, Optional, Any
 from datetime import datetime
 
-from agent.models.trace import Trace, Step
 from agent.models.memory import Experience, Skill
 
 
-class MemoryTraceStore:
-    """内存 Trace 存储"""
-
-    def __init__(self):
-        self._traces: Dict[str, Trace] = {}
-        self._steps: Dict[str, Step] = {}
-        self._trace_steps: Dict[str, List[str]] = {}  # trace_id -> [step_ids]
-
-    async def create_trace(self, trace: Trace) -> str:
-        self._traces[trace.trace_id] = trace
-        self._trace_steps[trace.trace_id] = []
-        return trace.trace_id
-
-    async def get_trace(self, trace_id: str) -> Optional[Trace]:
-        return self._traces.get(trace_id)
-
-    async def update_trace(self, trace_id: str, **updates) -> None:
-        trace = self._traces.get(trace_id)
-        if trace:
-            for key, value in updates.items():
-                if hasattr(trace, key):
-                    setattr(trace, key, value)
-
-    async def list_traces(
-        self,
-        mode: Optional[str] = None,
-        agent_type: Optional[str] = None,
-        uid: Optional[str] = None,
-        status: Optional[str] = None,
-        limit: int = 50
-    ) -> List[Trace]:
-        traces = list(self._traces.values())
-
-        # 过滤
-        if mode:
-            traces = [t for t in traces if t.mode == mode]
-        if agent_type:
-            traces = [t for t in traces if t.agent_type == agent_type]
-        if uid:
-            traces = [t for t in traces if t.uid == uid]
-        if status:
-            traces = [t for t in traces if t.status == status]
-
-        # 排序(最新的在前)
-        traces.sort(key=lambda t: t.created_at, reverse=True)
-
-        return traces[:limit]
-
-    async def add_step(self, step: Step) -> str:
-        self._steps[step.step_id] = step
-
-        # 添加到 trace 的 steps 列表
-        if step.trace_id in self._trace_steps:
-            self._trace_steps[step.trace_id].append(step.step_id)
-
-        # 更新 trace 的 total_steps
-        trace = self._traces.get(step.trace_id)
-        if trace:
-            trace.total_steps += 1
-
-        return step.step_id
-
-    async def get_step(self, step_id: str) -> Optional[Step]:
-        return self._steps.get(step_id)
-
-    async def get_trace_steps(self, trace_id: str) -> List[Step]:
-        step_ids = self._trace_steps.get(trace_id, [])
-        steps = [self._steps[sid] for sid in step_ids if sid in self._steps]
-        steps.sort(key=lambda s: s.sequence)
-        return steps
-
-    async def get_step_children(self, step_id: str) -> List[Step]:
-        children = []
-        for step in self._steps.values():
-            if step_id in step.parent_ids:
-                children.append(step)
-        children.sort(key=lambda s: s.sequence)
-        return children
-
-
 class MemoryMemoryStore:
     """内存 Memory 存储(Experience + Skill)"""
 

+ 2 - 71
agent/storage/protocols.py

@@ -2,84 +2,15 @@
 Storage Protocols - 存储接口定义
 
 使用 Protocol 定义接口,允许不同的存储实现(内存、PostgreSQL、Neo4j 等)
+
+TraceStore 已移动到 agent.trace.protocols
 """
 
 from typing import Protocol, List, Optional, Dict, Any, runtime_checkable
 
-from agent.models.trace import Trace, Step
 from agent.models.memory import Experience, Skill
 
 
-@runtime_checkable
-class TraceStore(Protocol):
-    """Trace + Step 存储接口"""
-
-    # ===== Trace 操作 =====
-
-    async def create_trace(self, trace: Trace) -> str:
-        """
-        创建新的 Trace
-
-        Args:
-            trace: Trace 对象
-
-        Returns:
-            trace_id
-        """
-        ...
-
-    async def get_trace(self, trace_id: str) -> Optional[Trace]:
-        """获取 Trace"""
-        ...
-
-    async def update_trace(self, trace_id: str, **updates) -> None:
-        """
-        更新 Trace
-
-        Args:
-            trace_id: Trace ID
-            **updates: 要更新的字段
-        """
-        ...
-
-    async def list_traces(
-        self,
-        mode: Optional[str] = None,
-        agent_type: Optional[str] = None,
-        uid: Optional[str] = None,
-        status: Optional[str] = None,
-        limit: int = 50
-    ) -> List[Trace]:
-        """列出 Traces"""
-        ...
-
-    # ===== Step 操作 =====
-
-    async def add_step(self, step: Step) -> str:
-        """
-        添加 Step
-
-        Args:
-            step: Step 对象
-
-        Returns:
-            step_id
-        """
-        ...
-
-    async def get_step(self, step_id: str) -> Optional[Step]:
-        """获取 Step"""
-        ...
-
-    async def get_trace_steps(self, trace_id: str) -> List[Step]:
-        """获取 Trace 的所有 Steps(按 sequence 排序)"""
-        ...
-
-    async def get_step_children(self, step_id: str) -> List[Step]:
-        """获取 Step 的子节点"""
-        ...
-
-
 @runtime_checkable
 class MemoryStore(Protocol):
     """Experience + Skill 存储接口"""

+ 25 - 0
agent/storage/skill_loader.py

@@ -200,12 +200,16 @@ class SkillLoader:
         # 提取 Guidelines
         guidelines = self._extract_list_items(remaining_lines, "Guidelines")
 
+        # 保存完整的内容(去掉 frontmatter)
+        content = remaining_content.strip()
+
         # 创建 Skill
         return Skill.create(
             scope=scope,
             name=name,
             description=description.strip(),
             category=category,
+            content=content,  # 完整的 Markdown 内容
             guidelines=guidelines,
             parent_id=parent_id,
         )
@@ -242,12 +246,33 @@ class SkillLoader:
         # 提取指导原则
         guidelines = self._extract_list_items(lines, "Guidelines")
 
+        # 提取完整内容(去掉元数据行和标题行)
+        content_lines = []
+        skip_metadata = False
+        for line in lines:
+            stripped = line.strip()
+            # 跳过标题
+            if stripped.startswith("# "):
+                continue
+            # 跳过元数据
+            if stripped.startswith(">"):
+                skip_metadata = True
+                continue
+            # 如果之前是元数据,跳过后续的空行
+            if skip_metadata and not stripped:
+                skip_metadata = False
+                continue
+            content_lines.append(line)
+
+        content = "\n".join(content_lines).strip()
+
         # 创建 Skill
         return Skill.create(
             scope=scope,
             name=name,
             description=description.strip(),
             category=category,
+            content=content,  # 完整的 Markdown 内容
             guidelines=guidelines,
             parent_id=parent_id,
         )

+ 3 - 0
agent/tools/__init__.py

@@ -6,6 +6,9 @@ from agent.tools.registry import ToolRegistry, tool, get_tool_registry
 from agent.tools.schema import SchemaGenerator
 from agent.tools.models import ToolResult, ToolContext, ToolContextImpl
 
+# 导入 builtin 工具以触发 @tool 装饰器注册
+# noqa: F401 表示这是故意的副作用导入
+import agent.tools.builtin  # noqa: F401
 
 __all__ = [
 	"ToolRegistry",

+ 1299 - 0
agent/tools/builtin/baseClass.py

@@ -0,0 +1,1299 @@
+"""
+Browser-Use 原生工具适配器
+Native Browser-Use Tools Adapter
+
+直接使用 browser-use 的原生类(BrowserSession, Tools)实现所有浏览器操作工具。
+不依赖 Playwright,完全基于 CDP 协议。
+
+核心特性:
+1. 浏览器会话持久化 - 只启动一次浏览器
+2. 状态自动保持 - 登录状态、Cookie、LocalStorage 等
+3. 完整的底层访问 - 可以直接使用 CDP 协议
+4. 性能优异 - 避免频繁创建/销毁浏览器实例
+
+使用方法:
+1. 在 Agent 初始化时调用 init_browser_session()
+2. 使用各个工具函数执行浏览器操作
+3. 任务结束时调用 cleanup_browser_session()
+"""
+
+import sys
+import os
+from typing import Optional, List
+from pathlib import Path
+
+# 将项目根目录添加到 Python 路径
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+# 导入框架的工具装饰器和结果类
+from agent.tools import tool, ToolResult
+
+# 导入 browser-use 的核心类
+from browser_use import BrowserSession, BrowserProfile
+from browser_use.tools.service import Tools
+from browser_use.agent.views import ActionResult
+from browser_use.filesystem.file_system import FileSystem
+
+# ============================================================
+# 全局浏览器会话管理
+# ============================================================
+
+# 全局变量:浏览器会话和工具实例
+_browser_session: Optional[BrowserSession] = None
+_browser_tools: Optional[Tools] = None
+_file_system: Optional[FileSystem] = None
+
+
+async def init_browser_session(
+    headless: bool = False,
+    user_data_dir: Optional[str] = None,
+    profile_name: str = "default",
+    browser_profile: Optional[BrowserProfile] = None,
+    **kwargs
+) -> tuple[BrowserSession, Tools]:
+    """
+    初始化全局浏览器会话
+
+    Args:
+        headless: 是否无头模式
+        user_data_dir: 用户数据目录(用于保存登录状态)
+        profile_name: 配置文件名称
+        browser_profile: BrowserProfile 对象(用于预设 cookies 等)
+        **kwargs: 其他 BrowserSession 参数
+
+    Returns:
+        (BrowserSession, Tools) 元组
+    """
+    global _browser_session, _browser_tools, _file_system
+
+    if _browser_session is not None:
+        return _browser_session, _browser_tools
+
+    # 设置用户数据目录(持久化登录状态)
+    if user_data_dir is None and profile_name:
+        user_data_dir = str(Path.home() / ".browser_use" / "profiles" / profile_name)
+        Path(user_data_dir).mkdir(parents=True, exist_ok=True)
+
+    # 创建浏览器会话
+    # 明确指定 is_local=True 以确保本地浏览器启动
+    session_params = {
+        "headless": headless,
+        "is_local": True,  # 明确指定本地浏览器
+    }
+
+    # macOS 上显式指定 Chrome 路径
+    import platform
+    if platform.system() == "Darwin":  # macOS
+        chrome_path = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
+        if Path(chrome_path).exists():
+            session_params["executable_path"] = chrome_path
+
+    # 只在有值时才添加 user_data_dir
+    if user_data_dir:
+        session_params["user_data_dir"] = user_data_dir
+
+    # 只在有值时才添加 browser_profile
+    if browser_profile:
+        session_params["browser_profile"] = browser_profile
+
+    # 合并其他参数
+    session_params.update(kwargs)
+
+    _browser_session = BrowserSession(**session_params)
+
+    # 启动浏览器
+    await _browser_session.start()
+
+    # 创建工具实例
+    _browser_tools = Tools()
+
+    # 创建文件系统实例(用于文件操作)
+    base_dir = Path.cwd() / ".browser_use_files"
+    base_dir.mkdir(parents=True, exist_ok=True)
+    _file_system = FileSystem(base_dir=str(base_dir))
+
+    return _browser_session, _browser_tools
+
+
+async def get_browser_session() -> tuple[BrowserSession, Tools]:
+    """
+    获取当前浏览器会话,如果不存在则自动创建
+
+    Returns:
+        (BrowserSession, Tools) 元组
+    """
+    global _browser_session, _browser_tools
+
+    if _browser_session is None:
+        await init_browser_session()
+
+    return _browser_session, _browser_tools
+
+
+async def cleanup_browser_session():
+    """
+    清理浏览器会话
+    优雅地停止浏览器但保留会话状态
+    """
+    global _browser_session, _browser_tools, _file_system
+
+    if _browser_session is not None:
+        await _browser_session.stop()
+        _browser_session = None
+        _browser_tools = None
+        _file_system = None
+
+
+async def kill_browser_session():
+    """
+    强制终止浏览器会话
+    完全关闭浏览器进程
+    """
+    global _browser_session, _browser_tools, _file_system
+
+    if _browser_session is not None:
+        await _browser_session.kill()
+        _browser_session = None
+        _browser_tools = None
+        _file_system = None
+
+
+# ============================================================
+# 辅助函数:ActionResult 转 ToolResult
+# ============================================================
+
+def action_result_to_tool_result(result: ActionResult, title: str = None) -> ToolResult:
+    """
+    将 browser-use 的 ActionResult 转换为框架的 ToolResult
+
+    Args:
+        result: browser-use 的 ActionResult
+        title: 可选的标题(如果不提供则从 result 推断)
+
+    Returns:
+        ToolResult
+    """
+    if result.error:
+        return ToolResult(
+            title=title or "操作失败",
+            output="",
+            error=result.error,
+            long_term_memory=result.long_term_memory or result.error
+        )
+
+    return ToolResult(
+        title=title or "操作成功",
+        output=result.extracted_content or "",
+        long_term_memory=result.long_term_memory or result.extracted_content or "",
+        metadata=result.metadata or {}
+    )
+
+
+# ============================================================
+# 导航类工具 (Navigation Tools)
+# ============================================================
+
+@tool()
+async def navigate_to_url(url: str, new_tab: bool = False, uid: str = "") -> ToolResult:
+    """
+    导航到指定的 URL
+    Navigate to a specific URL
+
+    使用 browser-use 的原生导航功能,支持在新标签页打开。
+
+    Args:
+        url: 要访问的 URL 地址
+        new_tab: 是否在新标签页中打开(默认 False)
+        uid: 用户 ID(由框架自动注入)
+
+    Returns:
+        ToolResult: 包含导航结果的工具返回对象
+
+    Example:
+        navigate_to_url("https://www.baidu.com")
+        navigate_to_url("https://www.google.com", new_tab=True)
+    """
+    try:
+        browser, tools = await get_browser_session()
+
+        # 使用 browser-use 的 navigate 工具
+        result = await tools.navigate(
+            url=url,
+            new_tab=new_tab,
+            browser_session=browser
+        )
+
+        return action_result_to_tool_result(result, f"导航到 {url}")
+
+    except Exception as e:
+        return ToolResult(
+            title="导航失败",
+            output="",
+            error=f"Failed to navigate to {url}: {str(e)}",
+            long_term_memory=f"导航到 {url} 失败"
+        )
+
+
+@tool()
+async def search_web(query: str, engine: str = "google", uid: str = "") -> ToolResult:
+    """
+    使用搜索引擎搜索
+    Search the web using a search engine
+
+    Args:
+        query: 搜索关键词
+        engine: 搜索引擎 (google, duckduckgo, bing) - 默认: google
+        uid: 用户 ID(由框架自动注入)
+
+    Returns:
+        ToolResult: 搜索结果
+
+    Example:
+        search_web("Python async programming", engine="google")
+    """
+    try:
+        browser, tools = await get_browser_session()
+
+        # 使用 browser-use 的 search 工具
+        result = await tools.search(
+            query=query,
+            engine=engine,
+            browser_session=browser
+        )
+
+        return action_result_to_tool_result(result, f"搜索: {query}")
+
+    except Exception as e:
+        return ToolResult(
+            title="搜索失败",
+            output="",
+            error=f"Search failed: {str(e)}",
+            long_term_memory=f"搜索 '{query}' 失败"
+        )
+
+
+@tool()
+async def go_back(uid: str = "") -> ToolResult:
+    """
+    返回到上一个页面
+    Go back to the previous page
+
+    模拟浏览器的"后退"按钮功能。
+
+    Args:
+        uid: 用户 ID(由框架自动注入)
+
+    Returns:
+        ToolResult: 包含返回操作结果的工具返回对象
+    """
+    try:
+        browser, tools = await get_browser_session()
+
+        result = await tools.go_back(browser_session=browser)
+
+        return action_result_to_tool_result(result, "返回上一页")
+
+    except Exception as e:
+        return ToolResult(
+            title="返回失败",
+            output="",
+            error=f"Failed to go back: {str(e)}",
+            long_term_memory="返回上一页失败"
+        )
+
+
+@tool()
+async def wait(seconds: int = 3, uid: str = "") -> ToolResult:
+    """
+    等待指定的秒数
+    Wait for a specified number of seconds
+
+    用于等待页面加载、动画完成或其他异步操作。
+
+    Args:
+        seconds: 等待时间(秒),最大30秒
+        uid: 用户 ID(由框架自动注入)
+
+    Returns:
+        ToolResult: 包含等待操作结果的工具返回对象
+
+    Example:
+        wait(5)  # 等待5秒
+    """
+    try:
+        browser, tools = await get_browser_session()
+
+        result = await tools.wait(seconds=seconds, browser_session=browser)
+
+        return action_result_to_tool_result(result, f"等待 {seconds} 秒")
+
+    except Exception as e:
+        return ToolResult(
+            title="等待失败",
+            output="",
+            error=f"Failed to wait: {str(e)}",
+            long_term_memory="等待失败"
+        )
+
+
+# ============================================================
+# 元素交互工具 (Element Interaction Tools)
+# ============================================================
+
+@tool()
+async def click_element(index: int, uid: str = "") -> ToolResult:
+    """
+    通过索引点击页面元素
+    Click an element by index
+
+    Args:
+        index: 元素索引(从浏览器状态中获取)
+        uid: 用户 ID(由框架自动注入)
+
+    Returns:
+        ToolResult: 包含点击操作结果的工具返回对象
+
+    Example:
+        click_element(index=5)
+
+    Note:
+        需要先通过 get_selector_map 获取页面元素索引
+    """
+    try:
+        browser, tools = await get_browser_session()
+
+        result = await tools.click(
+            index=index,
+            browser_session=browser
+        )
+
+        return action_result_to_tool_result(result, f"点击元素 {index}")
+
+    except Exception as e:
+        return ToolResult(
+            title="点击失败",
+            output="",
+            error=f"Failed to click element {index}: {str(e)}",
+            long_term_memory=f"点击元素 {index} 失败"
+        )
+
+
+@tool()
+async def input_text(index: int, text: str, clear: bool = True, uid: str = "") -> ToolResult:
+    """
+    在指定元素中输入文本
+    Input text into an element
+
+    Args:
+        index: 元素索引(从浏览器状态中获取)
+        text: 要输入的文本内容
+        clear: 是否先清除现有文本(默认 True)
+        uid: 用户 ID(由框架自动注入)
+
+    Returns:
+        ToolResult: 包含输入操作结果的工具返回对象
+
+    Example:
+        input_text(index=0, text="Hello World", clear=True)
+    """
+    try:
+        browser, tools = await get_browser_session()
+
+        result = await tools.input(
+            index=index,
+            text=text,
+            clear=clear,
+            browser_session=browser
+        )
+
+        return action_result_to_tool_result(result, f"输入文本到元素 {index}")
+
+    except Exception as e:
+        return ToolResult(
+            title="输入失败",
+            output="",
+            error=f"Failed to input text into element {index}: {str(e)}",
+            long_term_memory=f"输入文本失败"
+        )
+
+
+@tool()
+async def send_keys(keys: str, uid: str = "") -> ToolResult:
+    """
+    发送键盘按键或快捷键
+    Send keyboard keys or shortcuts
+
+    支持发送单个按键、组合键和快捷键。
+
+    Args:
+        keys: 要发送的按键字符串
+              - 单个按键: "Enter", "Escape", "PageDown", "Tab"
+              - 组合键: "Control+o", "Shift+Tab", "Alt+F4"
+              - 功能键: "F1", "F2", ..., "F12"
+        uid: 用户 ID(由框架自动注入)
+
+    Returns:
+        ToolResult: 包含按键操作结果的工具返回对象
+
+    Example:
+        send_keys("Enter")
+        send_keys("Control+A")
+    """
+    try:
+        browser, tools = await get_browser_session()
+
+        result = await tools.send_keys(
+            keys=keys,
+            browser_session=browser
+        )
+
+        return action_result_to_tool_result(result, f"发送按键: {keys}")
+
+    except Exception as e:
+        return ToolResult(
+            title="发送按键失败",
+            output="",
+            error=f"Failed to send keys: {str(e)}",
+            long_term_memory="发送按键失败"
+        )
+
+
+@tool()
+async def upload_file(index: int, path: str, uid: str = "") -> ToolResult:
+    """
+    上传文件到文件输入元素
+    Upload a file to a file input element
+
+    Args:
+        index: 文件输入框的元素索引
+        path: 要上传的文件路径(绝对路径)
+        uid: 用户 ID(由框架自动注入)
+
+    Returns:
+        ToolResult: 包含上传操作结果的工具返回对象
+
+    Example:
+        upload_file(index=7, path="/path/to/file.pdf")
+
+    Note:
+        文件必须存在且路径必须是绝对路径
+    """
+    try:
+        browser, tools = await get_browser_session()
+
+        result = await tools.upload_file(
+            index=index,
+            path=path,
+            browser_session=browser,
+            available_file_paths=[path],
+            file_system=_file_system
+        )
+
+        return action_result_to_tool_result(result, f"上传文件: {path}")
+
+    except Exception as e:
+        return ToolResult(
+            title="上传失败",
+            output="",
+            error=f"Failed to upload file: {str(e)}",
+            long_term_memory=f"上传文件 {path} 失败"
+        )
+
+
+# ============================================================
+# 滚动和视图工具 (Scroll & View Tools)
+# ============================================================
+
+@tool()
+async def scroll_page(down: bool = True, pages: float = 1.0,
+                     index: Optional[int] = None, uid: str = "") -> ToolResult:
+    """
+    滚动页面或元素
+    Scroll the page or a specific element
+
+    Args:
+        down: True 向下滚动,False 向上滚动
+        pages: 滚动页数(0.5=半页,1=全页,10=滚动到底部/顶部)
+        index: 可选,滚动特定元素(如下拉框内部)
+        uid: 用户 ID(由框架自动注入)
+
+    Returns:
+        ToolResult: 滚动结果
+
+    Example:
+        scroll_page(down=True, pages=2.0)  # 向下滚动2页
+        scroll_page(down=False, pages=1.0)  # 向上滚动1页
+    """
+    try:
+        browser, tools = await get_browser_session()
+
+        result = await tools.scroll(
+            down=down,
+            pages=pages,
+            index=index,
+            browser_session=browser
+        )
+
+        direction = "向下" if down else "向上"
+        return action_result_to_tool_result(result, f"{direction}滚动 {pages} 页")
+
+    except Exception as e:
+        return ToolResult(
+            title="滚动失败",
+            output="",
+            error=f"Failed to scroll: {str(e)}",
+            long_term_memory="滚动失败"
+        )
+
+
+@tool()
+async def find_text(text: str, uid: str = "") -> ToolResult:
+    """
+    查找页面中的文本并滚动到该位置
+    Find text on the page and scroll to it
+
+    在页面中搜索指定的文本,找到后自动滚动到该位置。
+
+    Args:
+        text: 要查找的文本内容
+        uid: 用户 ID(由框架自动注入)
+
+    Returns:
+        ToolResult: 包含查找结果的工具返回对象
+
+    Example:
+        find_text("Privacy Policy")
+    """
+    try:
+        browser, tools = await get_browser_session()
+
+        result = await tools.find_text(
+            text=text,
+            browser_session=browser
+        )
+
+        return action_result_to_tool_result(result, f"查找文本: {text}")
+
+    except Exception as e:
+        return ToolResult(
+            title="查找失败",
+            output="",
+            error=f"Failed to find text: {str(e)}",
+            long_term_memory=f"查找文本 '{text}' 失败"
+        )
+
+
+@tool()
+async def screenshot(uid: str = "") -> ToolResult:
+    """
+    请求在下次观察中包含页面截图
+    Request a screenshot to be included in the next observation
+
+    用于视觉检查页面状态,帮助理解页面布局和内容。
+
+    Args:
+        uid: 用户 ID(由框架自动注入)
+
+    Returns:
+        ToolResult: 包含截图请求结果的工具返回对象
+
+    Example:
+        screenshot()
+
+    Note:
+        截图会在下次页面观察时自动包含在结果中。
+    """
+    try:
+        browser, tools = await get_browser_session()
+
+        result = await tools.screenshot(browser_session=browser)
+
+        return action_result_to_tool_result(result, "截图请求")
+
+    except Exception as e:
+        return ToolResult(
+            title="截图失败",
+            output="",
+            error=f"Failed to capture screenshot: {str(e)}",
+            long_term_memory="截图失败"
+        )
+
+
+# ============================================================
+# 标签页管理工具 (Tab Management Tools)
+# ============================================================
+
+@tool()
+async def switch_tab(tab_id: str, uid: str = "") -> ToolResult:
+    """
+    切换到指定标签页
+    Switch to a different browser tab
+
+    Args:
+        tab_id: 4字符标签ID(target_id 的最后4位)
+        uid: 用户 ID(由框架自动注入)
+
+    Returns:
+        ToolResult: 切换结果
+
+    Example:
+        switch_tab(tab_id="a3f2")
+    """
+    try:
+        browser, tools = await get_browser_session()
+
+        result = await tools.switch(
+            tab_id=tab_id,
+            browser_session=browser
+        )
+
+        return action_result_to_tool_result(result, f"切换到标签页 {tab_id}")
+
+    except Exception as e:
+        return ToolResult(
+            title="切换标签页失败",
+            output="",
+            error=f"Failed to switch tab: {str(e)}",
+            long_term_memory=f"切换到标签页 {tab_id} 失败"
+        )
+
+
+@tool()
+async def close_tab(tab_id: str, uid: str = "") -> ToolResult:
+    """
+    关闭指定标签页
+    Close a browser tab
+
+    Args:
+        tab_id: 4字符标签ID
+        uid: 用户 ID(由框架自动注入)
+
+    Returns:
+        ToolResult: 关闭结果
+
+    Example:
+        close_tab(tab_id="a3f2")
+    """
+    try:
+        browser, tools = await get_browser_session()
+
+        result = await tools.close(
+            tab_id=tab_id,
+            browser_session=browser
+        )
+
+        return action_result_to_tool_result(result, f"关闭标签页 {tab_id}")
+
+    except Exception as e:
+        return ToolResult(
+            title="关闭标签页失败",
+            output="",
+            error=f"Failed to close tab: {str(e)}",
+            long_term_memory=f"关闭标签页 {tab_id} 失败"
+        )
+
+
+# ============================================================
+# 下拉框工具 (Dropdown Tools)
+# ============================================================
+
+@tool()
+async def get_dropdown_options(index: int, uid: str = "") -> ToolResult:
+    """
+    获取下拉框的所有选项
+    Get options from a dropdown element
+
+    Args:
+        index: 下拉框的元素索引
+        uid: 用户 ID(由框架自动注入)
+
+    Returns:
+        ToolResult: 包含所有选项的结果
+
+    Example:
+        get_dropdown_options(index=8)
+    """
+    try:
+        browser, tools = await get_browser_session()
+
+        result = await tools.dropdown_options(
+            index=index,
+            browser_session=browser
+        )
+
+        return action_result_to_tool_result(result, f"获取下拉框选项: {index}")
+
+    except Exception as e:
+        return ToolResult(
+            title="获取下拉框选项失败",
+            output="",
+            error=f"Failed to get dropdown options: {str(e)}",
+            long_term_memory=f"获取下拉框 {index} 选项失败"
+        )
+
+
+@tool()
+async def select_dropdown_option(index: int, text: str, uid: str = "") -> ToolResult:
+    """
+    选择下拉框选项
+    Select an option from a dropdown
+
+    Args:
+        index: 下拉框的元素索引
+        text: 要选择的选项文本(精确匹配)
+        uid: 用户 ID(由框架自动注入)
+
+    Returns:
+        ToolResult: 选择结果
+
+    Example:
+        select_dropdown_option(index=8, text="Option 2")
+    """
+    try:
+        browser, tools = await get_browser_session()
+
+        result = await tools.select_dropdown(
+            index=index,
+            text=text,
+            browser_session=browser
+        )
+
+        return action_result_to_tool_result(result, f"选择下拉框选项: {text}")
+
+    except Exception as e:
+        return ToolResult(
+            title="选择下拉框选项失败",
+            output="",
+            error=f"Failed to select dropdown option: {str(e)}",
+            long_term_memory=f"选择选项 '{text}' 失败"
+        )
+
+
+# ============================================================
+# 内容提取工具 (Content Extraction Tools)
+# ============================================================
+
+@tool()
+async def extract_content(query: str, extract_links: bool = False,
+                         start_from_char: int = 0, uid: str = "") -> ToolResult:
+    """
+    使用 LLM 从页面提取结构化数据
+    Extract content from the current page using LLM
+
+    Args:
+        query: 提取查询(告诉 LLM 要提取什么内容)
+        extract_links: 是否提取链接(默认 False,节省 token)
+        start_from_char: 从哪个字符开始提取(用于分页提取大内容)
+        uid: 用户 ID(由框架自动注入)
+
+    Returns:
+        ToolResult: 提取的内容
+
+    Example:
+        extract_content(query="提取页面上所有产品的名称和价格", extract_links=True)
+
+    Note:
+        需要配置 page_extraction_llm,否则会失败
+        支持分页提取,最大100k字符
+    """
+    try:
+        browser, tools = await get_browser_session()
+
+        # 注意:extract 需要 page_extraction_llm 参数
+        # 这里我们假设用户会在初始化时配置 LLM
+        # 如果没有配置,会抛出异常
+        result = await tools.extract(
+            query=query,
+            extract_links=extract_links,
+            start_from_char=start_from_char,
+            browser_session=browser,
+            page_extraction_llm=None,  # 需要用户配置
+            file_system=_file_system
+        )
+
+        return action_result_to_tool_result(result, f"提取内容: {query}")
+
+    except Exception as e:
+        return ToolResult(
+            title="内容提取失败",
+            output="",
+            error=f"Failed to extract content: {str(e)}",
+            long_term_memory=f"提取内容失败: {query}"
+        )
+
+
+@tool()
+async def get_page_html(uid: str = "") -> ToolResult:
+    """
+    获取当前页面的完整 HTML
+    Get the full HTML of the current page
+
+    返回当前页面的完整 HTML 源代码。
+
+    Args:
+        uid: 用户 ID(由框架自动注入)
+
+    Returns:
+        ToolResult: 包含页面 HTML 的工具返回对象
+
+    Example:
+        get_page_html()
+
+    Note:
+        - 返回的是完整的 HTML 源代码
+        - 输出会被限制在 10000 字符以内(完整内容保存在 metadata 中)
+    """
+    try:
+        browser, tools = await get_browser_session()
+
+        # 使用 CDP 获取页面 HTML
+        cdp = await browser.get_or_create_cdp_session()
+
+        # 获取页面内容
+        result = await cdp.cdp_client.send.Runtime.evaluate(
+            params={'expression': 'document.documentElement.outerHTML'},
+            session_id=cdp.session_id
+        )
+
+        html = result.get('result', {}).get('value', '')
+
+        # 获取 URL 和标题
+        url = await browser.get_current_page_url()
+
+        title_result = await cdp.cdp_client.send.Runtime.evaluate(
+            params={'expression': 'document.title'},
+            session_id=cdp.session_id
+        )
+        title = title_result.get('result', {}).get('value', '')
+
+        # 限制输出大小
+        output_html = html
+        if len(html) > 10000:
+            output_html = html[:10000] + "... (truncated)"
+
+        return ToolResult(
+            title=f"获取 HTML: {url}",
+            output=f"页面: {title}\nURL: {url}\n\nHTML:\n{output_html}",
+            long_term_memory=f"获取 HTML: {url}",
+            metadata={"url": url, "title": title, "html": html}
+        )
+
+    except Exception as e:
+        return ToolResult(
+            title="获取 HTML 失败",
+            output="",
+            error=f"Failed to get page HTML: {str(e)}",
+            long_term_memory="获取 HTML 失败"
+        )
+
+
+@tool()
+async def get_selector_map(uid: str = "") -> ToolResult:
+    """
+    获取当前页面的元素索引映射
+    Get the selector map of interactive elements on the current page
+
+    返回页面所有可交互元素的索引字典,用于后续的元素操作。
+
+    Args:
+        uid: 用户 ID(由框架自动注入)
+
+    Returns:
+        ToolResult: 包含元素映射的工具返回对象
+
+    Example:
+        get_selector_map()
+
+    Note:
+        返回的索引可以用于 click_element, input_text 等操作
+    """
+    try:
+        browser, tools = await get_browser_session()
+
+        # 获取选择器映射
+        selector_map = await browser.get_selector_map()
+
+        # 构建输出信息
+        elements_info = []
+        for index, node in list(selector_map.items())[:20]:  # 只显示前20个
+            tag = node.tag_name
+            attrs = node.attributes or {}
+            text = attrs.get('aria-label') or attrs.get('placeholder') or attrs.get('value', '')
+            elements_info.append(f"索引 {index}: <{tag}> {text[:50]}")
+
+        output = f"找到 {len(selector_map)} 个交互元素\n\n"
+        output += "\n".join(elements_info)
+        if len(selector_map) > 20:
+            output += f"\n... 还有 {len(selector_map) - 20} 个元素"
+
+        return ToolResult(
+            title="获取元素映射",
+            output=output,
+            long_term_memory=f"获取到 {len(selector_map)} 个交互元素",
+            metadata={"selector_map": {k: str(v) for k, v in list(selector_map.items())[:100]}}
+        )
+
+    except Exception as e:
+        return ToolResult(
+            title="获取元素映射失败",
+            output="",
+            error=f"Failed to get selector map: {str(e)}",
+            long_term_memory="获取元素映射失败"
+        )
+
+
+# ============================================================
+# JavaScript 执行工具 (JavaScript Tools)
+# ============================================================
+
+@tool()
+async def evaluate(code: str, uid: str = "") -> ToolResult:
+    """
+    在页面中执行 JavaScript 代码
+    Execute JavaScript code in the page context
+
+    允许在当前页面中执行任意 JavaScript 代码,用于复杂的页面操作或数据提取。
+
+    Args:
+        code: 要执行的 JavaScript 代码字符串
+        uid: 用户 ID(由框架自动注入)
+
+    Returns:
+        ToolResult: 包含执行结果的工具返回对象
+
+    Example:
+        evaluate("document.title")
+        evaluate("document.querySelectorAll('a').length")
+
+    Note:
+        - 代码在页面上下文中执行,可以访问 DOM 和全局变量
+        - 返回值会被自动序列化为字符串
+        - 执行结果限制在 20k 字符以内
+    """
+    try:
+        browser, tools = await get_browser_session()
+
+        result = await tools.evaluate(
+            code=code,
+            browser_session=browser
+        )
+
+        return action_result_to_tool_result(result, "执行 JavaScript")
+
+    except Exception as e:
+        return ToolResult(
+            title="JavaScript 执行失败",
+            output="",
+            error=f"Failed to execute JavaScript: {str(e)}",
+            long_term_memory="JavaScript 执行失败"
+        )
+
+
+# ============================================================
+# 文件系统工具 (File System Tools)
+# ============================================================
+
+@tool()
+async def write_file(file_name: str, content: str, append: bool = False, uid: str = "") -> ToolResult:
+    """
+    写入文件到本地文件系统
+    Write content to a local file
+
+    支持多种文件格式的写入操作。
+
+    Args:
+        file_name: 文件名(包含扩展名)
+        content: 要写入的文件内容
+        append: 是否追加模式(默认 False,覆盖写入)
+        uid: 用户 ID(由框架自动注入)
+
+    Returns:
+        ToolResult: 包含写入结果的工具返回对象
+
+    Example:
+        write_file("output.txt", "Hello World")
+        write_file("data.json", '{"key": "value"}')
+
+    Note:
+        支持的文件格式: .txt, .md, .json, .jsonl, .csv, .pdf
+    """
+    try:
+        browser, tools = await get_browser_session()
+
+        result = await tools.write_file(
+            file_name=file_name,
+            content=content,
+            append=append,
+            file_system=_file_system
+        )
+
+        return action_result_to_tool_result(result, f"写入文件: {file_name}")
+
+    except Exception as e:
+        return ToolResult(
+            title="写入文件失败",
+            output="",
+            error=f"Failed to write file: {str(e)}",
+            long_term_memory=f"写入文件 {file_name} 失败"
+        )
+
+
+@tool()
+async def read_file(file_name: str, uid: str = "") -> ToolResult:
+    """
+    读取文件内容
+    Read content from a local file
+
+    支持多种文件格式的读取操作。
+
+    Args:
+        file_name: 文件名(包含扩展名)
+        uid: 用户 ID(由框架自动注入)
+
+    Returns:
+        ToolResult: 包含文件内容的工具返回对象
+
+    Example:
+        read_file("input.txt")
+        read_file("data.json")
+
+    Note:
+        支持的文件格式: 文本文件、PDF、DOCX、图片等
+    """
+    try:
+        browser, tools = await get_browser_session()
+
+        result = await tools.read_file(
+            file_name=file_name,
+            available_file_paths=[],
+            file_system=_file_system
+        )
+
+        return action_result_to_tool_result(result, f"读取文件: {file_name}")
+
+    except Exception as e:
+        return ToolResult(
+            title="读取文件失败",
+            output="",
+            error=f"Failed to read file: {str(e)}",
+            long_term_memory=f"读取文件 {file_name} 失败"
+        )
+
+
+@tool()
+async def replace_file(file_name: str, old_str: str, new_str: str, uid: str = "") -> ToolResult:
+    """
+    替换文件中的特定文本
+    Replace specific text in a file
+
+    在文件中查找并替换指定的文本内容。
+
+    Args:
+        file_name: 文件名(包含扩展名)
+        old_str: 要替换的文本
+        new_str: 新文本
+        uid: 用户 ID(由框架自动注入)
+
+    Returns:
+        ToolResult: 包含替换结果的工具返回对象
+
+    Example:
+        replace_file("config.txt", "old_value", "new_value")
+
+    Note:
+        - 会替换文件中所有匹配的文本
+        - 如果找不到要替换的文本,会返回警告
+    """
+    try:
+        browser, tools = await get_browser_session()
+
+        result = await tools.replace_file(
+            file_name=file_name,
+            old_str=old_str,
+            new_str=new_str,
+            file_system=_file_system
+        )
+
+        return action_result_to_tool_result(result, f"替换文件内容: {file_name}")
+
+    except Exception as e:
+        return ToolResult(
+            title="替换文件失败",
+            output="",
+            error=f"Failed to replace file content: {str(e)}",
+            long_term_memory=f"替换文件 {file_name} 失败"
+        )
+
+
+# ============================================================
+# 等待用户操作工具 (Wait for User Action)
+# ============================================================
+
+@tool()
+async def wait_for_user_action(message: str = "Please complete the action in browser",
+                               timeout: int = 300, uid: str = "") -> ToolResult:
+    """
+    等待用户在浏览器中完成操作(如登录)
+    Wait for user to complete an action in the browser (e.g., login)
+
+    暂停自动化流程,等待用户手动完成某些操作(如登录、验证码等)。
+
+    Args:
+        message: 提示用户需要完成的操作
+        timeout: 最大等待时间(秒),默认 300 秒(5 分钟)
+        uid: 用户 ID(由框架自动注入)
+
+    Returns:
+        ToolResult: 包含等待结果的工具返回对象
+
+    Example:
+        wait_for_user_action("Please login to Xiaohongshu", timeout=180)
+        wait_for_user_action("Please complete the CAPTCHA", timeout=60)
+
+    Note:
+        - 用户需要在浏览器窗口中手动完成操作
+        - 完成后按回车键继续
+        - 超时后会自动继续执行
+    """
+    try:
+        import asyncio
+
+        print(f"\n{'='*60}")
+        print(f"⏸️  WAITING FOR USER ACTION")
+        print(f"{'='*60}")
+        print(f"📝 {message}")
+        print(f"⏱️  Timeout: {timeout} seconds")
+        print(f"\n👉 Please complete the action in the browser window")
+        print(f"👉 Press ENTER when done, or wait for timeout")
+        print(f"{'='*60}\n")
+
+        # Wait for user input or timeout
+        try:
+            loop = asyncio.get_event_loop()
+
+            # Wait for either user input or timeout
+            await asyncio.wait_for(
+                loop.run_in_executor(None, input),
+                timeout=timeout
+            )
+
+            return ToolResult(
+                title="用户操作完成",
+                output=f"User completed: {message}",
+                long_term_memory=f"用户完成操作: {message}"
+            )
+        except asyncio.TimeoutError:
+            return ToolResult(
+                title="用户操作超时",
+                output=f"Timeout waiting for: {message}",
+                long_term_memory=f"等待用户操作超时: {message}"
+            )
+
+    except Exception as e:
+        return ToolResult(
+            title="等待用户操作失败",
+            output="",
+            error=f"Failed to wait for user action: {str(e)}",
+            long_term_memory="等待用户操作失败"
+        )
+
+
+# ============================================================
+# 任务完成工具 (Task Completion)
+# ============================================================
+
+@tool()
+async def done(text: str, success: bool = True,
+              files_to_display: Optional[List[str]] = None, uid: str = "") -> ToolResult:
+    """
+    标记任务完成并返回最终消息
+    Mark the task as complete and return final message to user
+
+    Args:
+        text: 给用户的最终消息
+        success: 任务是否成功完成
+        files_to_display: 可选的要显示的文件路径列表
+        uid: 用户 ID(由框架自动注入)
+
+    Returns:
+        ToolResult: 完成结果
+
+    Example:
+        done("任务已完成,提取了10个产品信息", success=True)
+    """
+    try:
+        browser, tools = await get_browser_session()
+
+        result = await tools.done(
+            text=text,
+            success=success,
+            files_to_display=files_to_display,
+            file_system=_file_system
+        )
+
+        return action_result_to_tool_result(result, "任务完成")
+
+    except Exception as e:
+        return ToolResult(
+            title="标记任务完成失败",
+            output="",
+            error=f"Failed to complete task: {str(e)}",
+            long_term_memory="标记任务完成失败"
+        )
+
+
+# ============================================================
+# 导出所有工具函数(供外部使用)
+# ============================================================
+
+__all__ = [
+    # 会话管理
+    'init_browser_session',
+    'get_browser_session',
+    'cleanup_browser_session',
+    'kill_browser_session',
+
+    # 导航类工具
+    'navigate_to_url',
+    'search_web',
+    'go_back',
+    'wait',
+
+    # 元素交互工具
+    'click_element',
+    'input_text',
+    'send_keys',
+    'upload_file',
+
+    # 滚动和视图工具
+    'scroll_page',
+    'find_text',
+    'screenshot',
+
+    # 标签页管理工具
+    'switch_tab',
+    'close_tab',
+
+    # 下拉框工具
+    'get_dropdown_options',
+    'select_dropdown_option',
+
+    # 内容提取工具
+    'extract_content',
+    'get_page_html',
+    'get_selector_map',
+
+    # JavaScript 执行工具
+    'evaluate',
+
+    # 文件系统工具
+    'write_file',
+    'read_file',
+    'replace_file',
+
+    # 等待用户操作
+    'wait_for_user_action',
+
+    # 任务完成
+    'done',
+]

+ 66 - 0
agent/trace/__init__.py

@@ -0,0 +1,66 @@
+"""
+Trace 模块 - Context 管理 + 可视化
+
+核心职责:
+1. Trace/Step 模型定义
+2. 存储接口和实现(内存/数据库)
+3. RESTful API(可视化查询)
+4. WebSocket 推送(实时更新)
+"""
+
+# 模型(核心,无依赖)
+from agent.trace.models import Trace, Step, StepType, Status
+
+# 存储接口(核心,无依赖)
+from agent.trace.protocols import TraceStore
+
+# 内存存储实现(核心,无依赖)
+from agent.trace.memory_store import MemoryTraceStore
+
+
+# API 路由(可选,需要 FastAPI)
+def _get_api_router():
+    """延迟导入 API Router(避免强制依赖 FastAPI)"""
+    from agent.trace.api import router
+    return router
+
+
+def _get_ws_router():
+    """延迟导入 WebSocket Router(避免强制依赖 FastAPI)"""
+    from agent.trace.websocket import router
+    return router
+
+
+# WebSocket 广播函数(可选,需要 FastAPI)
+def _get_broadcast_functions():
+    """延迟导入 WebSocket 广播函数"""
+    from agent.trace.websocket import (
+        broadcast_step_added,
+        broadcast_step_updated,
+        broadcast_trace_completed,
+    )
+    return broadcast_step_added, broadcast_step_updated, broadcast_trace_completed
+
+
+# 便捷属性(仅在访问时导入)
+@property
+def api_router():
+    return _get_api_router()
+
+
+@property
+def ws_router():
+    return _get_ws_router()
+
+
+__all__ = [
+    # 模型
+    "Trace",
+    "Step",
+    "StepType",
+    "Status",
+    # 存储
+    "TraceStore",
+    "MemoryTraceStore",
+]
+

+ 275 - 0
agent/trace/api.py

@@ -0,0 +1,275 @@
+"""
+Step 树 RESTful API
+
+提供 Trace 和 Step 的查询接口,支持懒加载
+"""
+
+from typing import List, Optional, Dict, Any
+from fastapi import APIRouter, HTTPException, Query
+from pydantic import BaseModel
+
+from agent.trace.protocols import TraceStore
+
+
+router = APIRouter(prefix="/api/traces", tags=["traces"])
+
+
+# ===== Response 模型 =====
+
+
+class TraceListResponse(BaseModel):
+    """Trace 列表响应"""
+    traces: List[Dict[str, Any]]
+
+
+class TraceResponse(BaseModel):
+    """Trace 元数据响应"""
+    trace_id: str
+    mode: str
+    task: Optional[str] = None
+    agent_type: Optional[str] = None
+    status: str
+    total_steps: int
+    total_tokens: int
+    total_cost: float
+    created_at: str
+    completed_at: Optional[str] = None
+
+
+class StepNode(BaseModel):
+    """Step 节点(递归结构)"""
+    step_id: str
+    step_type: str
+    status: str
+    description: str
+    sequence: int
+    parent_id: Optional[str] = None
+    data: Optional[Dict[str, Any]] = None
+    summary: Optional[str] = None
+    duration_ms: Optional[int] = None
+    tokens: Optional[int] = None
+    cost: Optional[float] = None
+    created_at: str
+    children: List["StepNode"] = []
+
+
+class TreeResponse(BaseModel):
+    """完整树响应"""
+    trace_id: str
+    root_steps: List[StepNode]
+
+
+class NodeResponse(BaseModel):
+    """节点响应"""
+    step_id: Optional[str]
+    step_type: Optional[str]
+    description: Optional[str]
+    children: List[StepNode]
+
+
+# ===== 全局 TraceStore(由 api_server.py 注入)=====
+
+
+_trace_store: Optional[TraceStore] = None
+
+
+def set_trace_store(store: TraceStore):
+    """设置 TraceStore 实例"""
+    global _trace_store
+    _trace_store = store
+
+
+def get_trace_store() -> TraceStore:
+    """获取 TraceStore 实例"""
+    if _trace_store is None:
+        raise RuntimeError("TraceStore not initialized")
+    return _trace_store
+
+
+# ===== 路由 =====
+
+
+@router.get("", response_model=TraceListResponse)
+async def list_traces(
+    mode: Optional[str] = None,
+    agent_type: Optional[str] = None,
+    uid: Optional[str] = None,
+    status: Optional[str] = None,
+    limit: int = Query(20, le=100)
+):
+    """
+    列出 Traces
+
+    Args:
+        mode: 模式过滤(call/agent)
+        agent_type: Agent 类型过滤
+        uid: 用户 ID 过滤
+        status: 状态过滤(running/completed/failed)
+        limit: 最大返回数量
+    """
+    store = get_trace_store()
+    traces = await store.list_traces(
+        mode=mode,
+        agent_type=agent_type,
+        uid=uid,
+        status=status,
+        limit=limit
+    )
+    return TraceListResponse(
+        traces=[t.to_dict() for t in traces]
+    )
+
+
+@router.get("/{trace_id}", response_model=TraceResponse)
+async def get_trace(trace_id: str):
+    """
+    获取 Trace 元数据
+
+    Args:
+        trace_id: Trace ID
+    """
+    store = get_trace_store()
+    trace = await store.get_trace(trace_id)
+    if not trace:
+        raise HTTPException(status_code=404, detail="Trace not found")
+
+    return TraceResponse(**trace.to_dict())
+
+
+@router.get("/{trace_id}/tree", response_model=TreeResponse)
+async def get_full_tree(trace_id: str):
+    """
+    获取完整 Step 树(小型 Trace 推荐)
+
+    Args:
+        trace_id: Trace ID
+    """
+    store = get_trace_store()
+
+    # 验证 Trace 存在
+    trace = await store.get_trace(trace_id)
+    if not trace:
+        raise HTTPException(status_code=404, detail="Trace not found")
+
+    # 获取所有 Steps
+    steps = await store.get_trace_steps(trace_id)
+
+    # 构建树结构
+    root_nodes = await _build_tree(store, trace_id, None, expand=True, max_depth=999)
+
+    return TreeResponse(
+        trace_id=trace_id,
+        root_steps=root_nodes
+    )
+
+
+@router.get("/{trace_id}/node/{step_id}", response_model=NodeResponse)
+async def get_node(
+    trace_id: str,
+    step_id: str,
+    expand: bool = Query(False, description="是否加载子节点"),
+    max_depth: int = Query(1, ge=1, le=10, description="递归深度")
+):
+    """
+    懒加载节点 + 子节点(大型 Trace 推荐)
+
+    Args:
+        trace_id: Trace ID
+        step_id: Step ID("null" 表示根节点)
+        expand: 是否加载子节点
+        max_depth: 递归深度
+    """
+    store = get_trace_store()
+
+    # 验证 Trace 存在
+    trace = await store.get_trace(trace_id)
+    if not trace:
+        raise HTTPException(status_code=404, detail="Trace not found")
+
+    # step_id = "null" 表示根节点
+    actual_step_id = None if step_id == "null" else step_id
+
+    # 验证 Step 存在(非根节点)
+    if actual_step_id:
+        step = await store.get_step(actual_step_id)
+        if not step or step.trace_id != trace_id:
+            raise HTTPException(status_code=404, detail="Step not found")
+
+    # 构建节点树
+    children = await _build_tree(store, trace_id, actual_step_id, expand, max_depth)
+
+    # 如果是根节点,返回所有根 Steps
+    if actual_step_id is None:
+        return NodeResponse(
+            step_id=None,
+            step_type=None,
+            description=None,
+            children=children
+        )
+
+    # 否则返回当前节点 + 子节点
+    step = await store.get_step(actual_step_id)
+    return NodeResponse(
+        step_id=step.step_id,
+        step_type=step.step_type,
+        description=step.description,
+        children=children
+    )
+
+
+# ===== 核心算法:懒加载树构建 =====
+
+
+async def _build_tree(
+    store: TraceStore,
+    trace_id: str,
+    step_id: Optional[str],
+    expand: bool = False,
+    max_depth: int = 1,
+    current_depth: int = 0
+) -> List[StepNode]:
+    """
+    懒加载核心逻辑(简洁版本)
+
+    没有"批次计算"、没有"同层完整性检查"
+    只有简单的递归遍历
+
+    Args:
+        store: TraceStore 实例
+        trace_id: Trace ID
+        step_id: 当前 Step ID(None 表示根节点)
+        expand: 是否展开子节点
+        max_depth: 最大递归深度
+        current_depth: 当前递归深度
+
+    Returns:
+        List[StepNode]: 节点列表
+    """
+    # 1. 获取当前层节点
+    if step_id is None:
+        # 根节点:获取所有 parent_id=None 的 Steps
+        steps = await store.get_trace_steps(trace_id)
+        current_nodes = [s for s in steps if s.parent_id is None]
+    else:
+        # 非根节点:获取子节点
+        current_nodes = await store.get_step_children(step_id)
+
+    # 2. 构建响应
+    result_nodes = []
+    for step in current_nodes:
+        node_dict = step.to_dict()
+        node_dict["children"] = []
+
+        # 3. 递归加载子节点(可选)
+        if expand and current_depth < max_depth:
+            children = await store.get_step_children(step.step_id)
+            if children:
+                node_dict["children"] = await _build_tree(
+                    store, trace_id, step.step_id,
+                    expand=True, max_depth=max_depth,
+                    current_depth=current_depth + 1
+                )
+
+        result_nodes.append(StepNode(**node_dict))
+
+    return result_nodes

+ 89 - 0
agent/trace/memory_store.py

@@ -0,0 +1,89 @@
+"""
+Memory Trace Store - 内存存储实现
+
+用于测试和简单场景,数据不持久化
+"""
+
+from typing import Dict, List, Optional
+
+from agent.trace.models import Trace, Step
+
+
+class MemoryTraceStore:
+    """内存 Trace 存储"""
+
+    def __init__(self):
+        self._traces: Dict[str, Trace] = {}
+        self._steps: Dict[str, Step] = {}
+        self._trace_steps: Dict[str, List[str]] = {}  # trace_id -> [step_ids]
+
+    async def create_trace(self, trace: Trace) -> str:
+        self._traces[trace.trace_id] = trace
+        self._trace_steps[trace.trace_id] = []
+        return trace.trace_id
+
+    async def get_trace(self, trace_id: str) -> Optional[Trace]:
+        return self._traces.get(trace_id)
+
+    async def update_trace(self, trace_id: str, **updates) -> None:
+        trace = self._traces.get(trace_id)
+        if trace:
+            for key, value in updates.items():
+                if hasattr(trace, key):
+                    setattr(trace, key, value)
+
+    async def list_traces(
+        self,
+        mode: Optional[str] = None,
+        agent_type: Optional[str] = None,
+        uid: Optional[str] = None,
+        status: Optional[str] = None,
+        limit: int = 50
+    ) -> List[Trace]:
+        traces = list(self._traces.values())
+
+        # 过滤
+        if mode:
+            traces = [t for t in traces if t.mode == mode]
+        if agent_type:
+            traces = [t for t in traces if t.agent_type == agent_type]
+        if uid:
+            traces = [t for t in traces if t.uid == uid]
+        if status:
+            traces = [t for t in traces if t.status == status]
+
+        # 排序(最新的在前)
+        traces.sort(key=lambda t: t.created_at, reverse=True)
+
+        return traces[:limit]
+
+    async def add_step(self, step: Step) -> str:
+        self._steps[step.step_id] = step
+
+        # 添加到 trace 的 steps 列表
+        if step.trace_id in self._trace_steps:
+            self._trace_steps[step.trace_id].append(step.step_id)
+
+        # 更新 trace 的 total_steps
+        trace = self._traces.get(step.trace_id)
+        if trace:
+            trace.total_steps += 1
+
+        return step.step_id
+
+    async def get_step(self, step_id: str) -> Optional[Step]:
+        return self._steps.get(step_id)
+
+    async def get_trace_steps(self, trace_id: str) -> List[Step]:
+        step_ids = self._trace_steps.get(trace_id, [])
+        steps = [self._steps[sid] for sid in step_ids if sid in self._steps]
+        steps.sort(key=lambda s: s.sequence)
+        return steps
+
+    async def get_step_children(self, step_id: str) -> List[Step]:
+        children = []
+        for step in self._steps.values():
+            if step.parent_id == step_id:
+                children.append(step)
+        children.sort(key=lambda s: s.sequence)
+        return children

+ 90 - 32
agent/models/trace.py → agent/trace/models.py

@@ -2,7 +2,7 @@
 Trace 和 Step 数据模型
 
 Trace: 一次完整的 LLM 交互(单次调用或 Agent 任务)
-Step: Trace 中的一个原子操作
+Step: Trace 中的一个原子操作,形成树结构
 """
 
 from dataclasses import dataclass, field
@@ -11,14 +11,34 @@ from typing import Dict, Any, List, Optional, Literal
 import uuid
 
 
+# Step 类型
 StepType = Literal[
-    "llm_call",      # LLM 调用
-    "tool_call",     # 工具调用
-    "tool_result",   # 工具结果
-    "conclusion",    # 中间/最终结论
-    "feedback",      # 人工反馈
+    # 计划相关
+    "goal",        # 目标/计划项(可以有子 steps)
+
+    # LLM 输出
+    "thought",     # 思考/分析(中间过程)
+    "evaluation",  # 评估总结(需要 summary)
+    "response",    # 最终回复
+
+    # 工具相关
+    "action",      # 工具调用(tool_call)
+    "result",      # 工具结果(tool_result)
+
+    # 系统相关
     "memory_read",   # 读取记忆(经验/技能)
     "memory_write",  # 写入记忆
+    "feedback",      # 人工反馈
+]
+
+
+# Step 状态
+Status = Literal[
+    "planned",      # 计划中(未执行)
+    "in_progress",  # 执行中
+    "completed",    # 已完成
+    "failed",       # 失败
+    "skipped",      # 跳过
 ]
 
 
@@ -28,7 +48,7 @@ class Trace:
     执行轨迹 - 一次完整的 LLM 交互
 
     单次调用: mode="call", 只有 1 个 Step
-    Agent 模式: mode="agent", 多个 Steps 形成 DAG
+    Agent 模式: mode="agent", 多个 Steps 形成树结构
     """
     trace_id: str
     mode: Literal["call", "agent"]
@@ -52,6 +72,9 @@ class Trace:
     uid: Optional[str] = None
     context: Dict[str, Any] = field(default_factory=dict)
 
+    # 当前焦点 goal(用于 step 工具)
+    current_goal_id: Optional[str] = None
+
     # 时间
     created_at: datetime = field(default_factory=datetime.now)
     completed_at: Optional[datetime] = None
@@ -83,6 +106,7 @@ class Trace:
             "total_cost": self.total_cost,
             "uid": self.uid,
             "context": self.context,
+            "current_goal_id": self.current_goal_id,
             "created_at": self.created_at.isoformat() if self.created_at else None,
             "completed_at": self.completed_at.isoformat() if self.completed_at else None,
         }
@@ -93,19 +117,31 @@ class Step:
     """
     执行步骤 - Trace 中的一个原子操作
 
-    Step 之间通过 parent_ids 形成 DAG 结构
+    Step 之间通过 parent_id 形成树结构(单父节点)
     """
     step_id: str
     trace_id: str
     step_type: StepType
+    status: Status
     sequence: int  # 在 Trace 中的顺序
 
-    # DAG 结构(支持多父节点)
-    parent_ids: List[str] = field(default_factory=list)
+    # 树结构(单父节点)
+    parent_id: Optional[str] = None
+
+    # 内容
+    description: str = ""  # 所有节点都有,系统自动提取
 
     # 类型相关数据
     data: Dict[str, Any] = field(default_factory=dict)
 
+    # 仅 evaluation 类型需要
+    summary: Optional[str] = None
+
+    # 执行指标
+    duration_ms: Optional[int] = None
+    tokens: Optional[int] = None
+    cost: Optional[float] = None
+
     # 时间
     created_at: datetime = field(default_factory=datetime.now)
 
@@ -115,17 +151,29 @@ class Step:
         trace_id: str,
         step_type: StepType,
         sequence: int,
+        status: Status = "completed",
+        description: str = "",
         data: Dict[str, Any] = None,
-        parent_ids: List[str] = None,
+        parent_id: Optional[str] = None,
+        summary: Optional[str] = None,
+        duration_ms: Optional[int] = None,
+        tokens: Optional[int] = None,
+        cost: Optional[float] = None,
     ) -> "Step":
         """创建新的 Step"""
         return cls(
             step_id=str(uuid.uuid4()),
             trace_id=trace_id,
             step_type=step_type,
+            status=status,
             sequence=sequence,
-            parent_ids=parent_ids or [],
+            parent_id=parent_id,
+            description=description,
             data=data or {},
+            summary=summary,
+            duration_ms=duration_ms,
+            tokens=tokens,
+            cost=cost,
         )
 
     def to_dict(self) -> Dict[str, Any]:
@@ -134,44 +182,54 @@ class Step:
             "step_id": self.step_id,
             "trace_id": self.trace_id,
             "step_type": self.step_type,
+            "status": self.status,
             "sequence": self.sequence,
-            "parent_ids": self.parent_ids,
+            "parent_id": self.parent_id,
+            "description": self.description,
             "data": self.data,
+            "summary": self.summary,
+            "duration_ms": self.duration_ms,
+            "tokens": self.tokens,
+            "cost": self.cost,
             "created_at": self.created_at.isoformat() if self.created_at else None,
         }
 
 
 # Step.data 结构说明
 #
-# llm_call:
+# goal:
+#   {
+#       "description": "探索代码库",
+#   }
+#
+# thought:
+#   {
+#       "content": "需要先了解项目结构...",
+#   }
+#
+# action:
 #   {
-#       "messages": [...],
-#       "response": "...",
-#       "model": "gpt-4o",
-#       "prompt_tokens": 100,
-#       "completion_tokens": 50,
-#       "cost": 0.01,
-#       "tool_calls": [...]  # 如果有
+#       "tool_name": "glob_files",
+#       "arguments": {"pattern": "**/*.py"},
 #   }
 #
-# tool_call:
+# result:
 #   {
-#       "tool_name": "search_blocks",
-#       "arguments": {...},
-#       "llm_step_id": "..."  # 哪个 LLM 调用触发的
+#       "tool_name": "glob_files",
+#       "output": ["src/main.py", ...],
+#       "title": "找到 15 个文件",
 #   }
 #
-# tool_result:
+# evaluation:
 #   {
-#       "tool_call_step_id": "...",
-#       "result": "...",
-#       "duration_ms": 123
+#       "content": "分析完成...",
 #   }
+#   # summary 字段存储简短总结
 #
-# conclusion:
+# response:
 #   {
-#       "content": "...",
-#       "is_final": True/False
+#       "content": "任务已完成...",
+#       "is_final": True,
 #   }
 #
 # feedback:

+ 79 - 0
agent/trace/protocols.py

@@ -0,0 +1,79 @@
+"""
+Trace Storage Protocol - Trace 存储接口定义
+
+使用 Protocol 定义接口,允许不同的存储实现(内存、PostgreSQL、Neo4j 等)
+"""
+
+from typing import Protocol, List, Optional, runtime_checkable
+
+from agent.trace.models import Trace, Step
+
+
+@runtime_checkable
+class TraceStore(Protocol):
+    """Trace + Step 存储接口"""
+
+    # ===== Trace 操作 =====
+
+    async def create_trace(self, trace: Trace) -> str:
+        """
+        创建新的 Trace
+
+        Args:
+            trace: Trace 对象
+
+        Returns:
+            trace_id
+        """
+        ...
+
+    async def get_trace(self, trace_id: str) -> Optional[Trace]:
+        """获取 Trace"""
+        ...
+
+    async def update_trace(self, trace_id: str, **updates) -> None:
+        """
+        更新 Trace
+
+        Args:
+            trace_id: Trace ID
+            **updates: 要更新的字段
+        """
+        ...
+
+    async def list_traces(
+        self,
+        mode: Optional[str] = None,
+        agent_type: Optional[str] = None,
+        uid: Optional[str] = None,
+        status: Optional[str] = None,
+        limit: int = 50
+    ) -> List[Trace]:
+        """列出 Traces"""
+        ...
+
+    # ===== Step 操作 =====
+
+    async def add_step(self, step: Step) -> str:
+        """
+        添加 Step
+
+        Args:
+            step: Step 对象
+
+        Returns:
+            step_id
+        """
+        ...
+
+    async def get_step(self, step_id: str) -> Optional[Step]:
+        """获取 Step"""
+        ...
+
+    async def get_trace_steps(self, trace_id: str) -> List[Step]:
+        """获取 Trace 的所有 Steps(按 sequence 排序)"""
+        ...
+
+    async def get_step_children(self, step_id: str) -> List[Step]:
+        """获取 Step 的子节点"""
+        ...

+ 181 - 0
agent/trace/websocket.py

@@ -0,0 +1,181 @@
+"""
+Step 树 WebSocket 推送
+
+实时推送进行中 Trace 的 Step 更新
+"""
+
+from typing import Dict, Set
+from fastapi import APIRouter, WebSocket, WebSocketDisconnect
+
+from agent.trace.protocols import TraceStore
+
+
+router = APIRouter(prefix="/api/traces", tags=["websocket"])
+
+
+# ===== 全局状态 =====
+
+
+_trace_store: TraceStore = None
+_active_connections: Dict[str, Set[WebSocket]] = {}  # trace_id -> Set[WebSocket]
+
+
+def set_trace_store(store: TraceStore):
+    """设置 TraceStore 实例"""
+    global _trace_store
+    _trace_store = store
+
+
+def get_trace_store() -> TraceStore:
+    """获取 TraceStore 实例"""
+    if _trace_store is None:
+        raise RuntimeError("TraceStore not initialized")
+    return _trace_store
+
+
+# ===== WebSocket 路由 =====
+
+
+@router.websocket("/{trace_id}/watch")
+async def watch_trace(websocket: WebSocket, trace_id: str):
+    """
+    监听 Trace 的 Step 更新
+
+    Args:
+        trace_id: Trace ID
+    """
+    await websocket.accept()
+
+    # 验证 Trace 存在
+    store = get_trace_store()
+    trace = await store.get_trace(trace_id)
+    if not trace:
+        await websocket.send_json({
+            "event": "error",
+            "message": "Trace not found"
+        })
+        await websocket.close()
+        return
+
+    # 注册连接
+    if trace_id not in _active_connections:
+        _active_connections[trace_id] = set()
+    _active_connections[trace_id].add(websocket)
+
+    try:
+        # 发送连接成功消息
+        await websocket.send_json({
+            "event": "connected",
+            "trace_id": trace_id
+        })
+
+        # 保持连接(等待客户端断开或接收消息)
+        while True:
+            try:
+                # 接收客户端消息(心跳检测)
+                data = await websocket.receive_text()
+                # 可以处理客户端请求(如请求完整状态)
+                if data == "ping":
+                    await websocket.send_json({"event": "pong"})
+            except WebSocketDisconnect:
+                break
+
+    finally:
+        # 清理连接
+        if trace_id in _active_connections:
+            _active_connections[trace_id].discard(websocket)
+            if not _active_connections[trace_id]:
+                del _active_connections[trace_id]
+
+
+# ===== 广播函数(由 AgentRunner 调用)=====
+
+
+async def broadcast_step_added(trace_id: str, step_dict: Dict):
+    """
+    广播 Step 添加事件
+
+    Args:
+        trace_id: Trace ID
+        step_dict: Step 字典(from step.to_dict())
+    """
+    if trace_id not in _active_connections:
+        return
+
+    message = {
+        "event": "step_added",
+        "step": step_dict
+    }
+
+    # 发送给所有监听该 Trace 的客户端
+    disconnected = []
+    for websocket in _active_connections[trace_id]:
+        try:
+            await websocket.send_json(message)
+        except Exception:
+            disconnected.append(websocket)
+
+    # 清理断开的连接
+    for ws in disconnected:
+        _active_connections[trace_id].discard(ws)
+
+
+async def broadcast_step_updated(trace_id: str, step_id: str, updates: Dict):
+    """
+    广播 Step 更新事件
+
+    Args:
+        trace_id: Trace ID
+        step_id: Step ID
+        updates: 更新字段
+    """
+    if trace_id not in _active_connections:
+        return
+
+    message = {
+        "event": "step_updated",
+        "step_id": step_id,
+        "updates": updates
+    }
+
+    disconnected = []
+    for websocket in _active_connections[trace_id]:
+        try:
+            await websocket.send_json(message)
+        except Exception:
+            disconnected.append(websocket)
+
+    for ws in disconnected:
+        _active_connections[trace_id].discard(ws)
+
+
+async def broadcast_trace_completed(trace_id: str, total_steps: int):
+    """
+    广播 Trace 完成事件
+
+    Args:
+        trace_id: Trace ID
+        total_steps: 总 Step 数
+    """
+    if trace_id not in _active_connections:
+        return
+
+    message = {
+        "event": "trace_completed",
+        "trace_id": trace_id,
+        "total_steps": total_steps
+    }
+
+    disconnected = []
+    for websocket in _active_connections[trace_id]:
+        try:
+            await websocket.send_json(message)
+        except Exception:
+            disconnected.append(websocket)
+
+    for ws in disconnected:
+        _active_connections[trace_id].discard(ws)
+
+    # 完成后清理所有连接
+    if trace_id in _active_connections:
+        del _active_connections[trace_id]

+ 85 - 0
api_server.py

@@ -0,0 +1,85 @@
+"""
+API Server - FastAPI 应用入口
+
+聚合所有模块的 API 路由(step_tree、未来的 memory 等)
+"""
+
+import logging
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+import uvicorn
+
+from agent.trace import MemoryTraceStore
+from agent.trace.api import router as api_router, set_trace_store as set_api_trace_store
+from agent.trace.websocket import router as ws_router, set_trace_store as set_ws_trace_store
+
+
+# ===== 日志配置 =====
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s"
+)
+logger = logging.getLogger(__name__)
+
+
+# ===== FastAPI 应用 =====
+
+app = FastAPI(
+    title="Agent Step Tree API",
+    description="Step 树可视化 API",
+    version="1.0.0"
+)
+
+# CORS 配置(允许前端跨域访问)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # 生产环境应限制具体域名
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+
+# ===== 初始化存储 =====
+
+# 使用内存存储(后续可替换为 PostgreSQL)
+trace_store = MemoryTraceStore()
+
+# 注入到 step_tree 模块
+set_api_trace_store(trace_store)
+set_ws_trace_store(trace_store)
+
+
+# ===== 注册路由 =====
+
+# Step 树 RESTful API
+app.include_router(api_router)
+
+# Step 树 WebSocket
+app.include_router(ws_router)
+
+
+# ===== 健康检查 =====
+
+@app.get("/health")
+async def health_check():
+    """健康检查"""
+    return {
+        "status": "ok",
+        "service": "Agent Step Tree API",
+        "version": "1.0.0"
+    }
+
+
+# ===== 启动服务 =====
+
+if __name__ == "__main__":
+    logger.info("Starting API server...")
+    uvicorn.run(
+        "api_server:app",
+        host="0.0.0.0",
+        port=8000,
+        reload=True,  # 开发模式
+        log_level="info"
+    )

+ 58 - 34
docs/README.md

@@ -185,13 +185,18 @@ class Trace:
 class Step:
     step_id: str
     trace_id: str
-    step_type: StepType  # "llm_call", "tool_call", "tool_result", ...
-    parent_ids: List[str] = field(default_factory=list)
+    step_type: StepType    # "goal", "thought", "action", "result", "evaluation", "response"
+    status: Status         # "planned", "in_progress", "completed", "failed", "skipped"
+    parent_id: Optional[str] = None  # 树结构(单父节点)
+    description: str = ""            # 系统自动提取
     data: Dict[str, Any] = field(default_factory=dict)
+    summary: Optional[str] = None    # 仅 evaluation 类型需要
 ```
 
 **实现**:`agent/models/trace.py:Step`
 
+**详细设计**:参考 [`docs/step-tree.md`](./step-tree.md)
+
 ---
 
 ## 模块详情
@@ -213,6 +218,13 @@ class Step:
 
 **使用示例**:`examples/subagent_example.py`
 
+### [Step 树与 Context 管理](./step-tree.md)
+- Step 类型:goal、action、result、evaluation
+- Step 状态:planned、in_progress、completed、failed、skipped
+- 树结构:统一表达计划和执行
+- step 工具:计划管理和进度更新
+- Context 压缩:基于树结构的历史消息压缩
+
 ### [工具系统](./tools.md)
 - 工具定义和注册
 - 双层记忆管理
@@ -304,51 +316,40 @@ messages = prompt.build_messages(text="...", images="img.png")
 
 ### Skills(技能库)
 
-**存储**:Markdown 文件 + 环境配置代码
-
-```
-./agent/skills/                # Skills 目录
-├── browser_use/              # browser-use skill
-│   ├── browser-use.md        # 使用文档
-│   ├── setup.py              # 环境配置(依赖检查和安装)
-│   └── __init__.py           # 模块导出
-└── [其他 skills]/
-```
-
-**格式**:
+**分类**:
 
-```markdown
----
-name: error-handling
-description: Error handling best practices
----
+| 类型 | 加载位置 | 加载时机 |
+|------|---------|---------|
+| **Core Skill** | System Prompt | Agent 启动时自动 |
+| **普通 Skill** | 对话消息 | 模型调用 `skill` 工具时 |
 
-## When to use
-- Analyzing error logs
-- Debugging production issues
+**目录结构**:
 
-## Guidelines
-- Look for stack traces first
-- Check error frequency
-- Group by error type
+```
+./agent/skills/
+├── core.md                   # Core Skill(自动加载到 System Prompt)
+└── browser_use/              # 普通 Skill(按需加载到对话消息)
+    ├── browser-use.md
+    ├── setup.py
+    └── __init__.py
 ```
 
-**加载**:通过 `skill` 工具动态加载
+**Core Skill**(`agent/skills/core.md`):
+- 核心系统功能:Step 管理、进度追踪
+- 框架自动注入到 System Prompt
+
+**普通 Skill**:通过 `skill` 工具动态加载
 
-Agent 在需要时调用 `skill` 工具:
 ```python
-# Agent 运行时
+# Agent 运行时调用
 await tools.execute("skill", {"skill_name": "browser-use"})
-# 自动检查环境依赖,加载使用文档
+# 内容注入到对话历史
 ```
 
-工具会读取文件并返回内容,注入到对话历史中。
-
 **实现**:
 - `agent/storage/skill_loader.py:SkillLoader` - Markdown 解析器
 - `agent/tools/builtin/skill.py:skill()` - skill 工具实现
 - `agent/tools/builtin/skill.py:list_skills()` - 列出可用 skills
-- `agent/skills/*/setup.py` - 环境配置(可选,每个 skill 可自定义)
 
 **详细文档**:参考 [`docs/skills.md`](./skills.md)
 
@@ -485,6 +486,28 @@ agent/
 
 ---
 
+## Debug 工具
+
+开发调试时可实时查看 Step 树:
+
+```python
+from agent.debug import dump_tree
+
+# 每次 step 变化后调用
+dump_tree(trace, steps)
+```
+
+```bash
+# 终端实时查看
+watch -n 0.5 cat .trace/tree.txt
+```
+
+**实现**:`agent/debug/tree_dump.py`
+
+**详细说明**:参考 [`docs/step-tree.md`](./step-tree.md#debug-工具)
+
+---
+
 ## 测试
 
 详见 [测试指南](./testing.md)
@@ -513,7 +536,8 @@ GEMINI_API_KEY=xxx pytest tests/e2e/ -v -m e2e
 | 概念 | 定义 | 存储 | 实现 |
 |------|------|------|------|
 | **Trace** | 一次任务执行 | 文件系统(JSON) | `models/trace.py` |
-| **Step** | 执行步骤 | 文件系统(JSON) | `models/trace.py` |
+| **Step** | 执行步骤(树结构) | 文件系统(JSON) | `models/trace.py` |
+| **Goal Step** | 计划项/目标 | Step 的一种类型 | `models/trace.py` |
 | **Sub-Agent** | 专门化的子代理 | 独立 Trace | `tools/builtin/task.py` |
 | **AgentDefinition** | Agent 类型定义 | 配置文件/代码 | `models/agent.py` |
 | **Skill** | 能力描述(Markdown) | 文件系统 | `storage/skill_fs.py` |

+ 162 - 0
docs/decisions.md

@@ -347,6 +347,168 @@ async def advanced_search(
 
 ---
 
+## 11. Step 树结构 vs DAG
+
+### 问题
+Step 之间的关系应该是树(单父节点)还是 DAG(多父节点)?
+
+### 方案对比
+
+| 方案 | 优点 | 缺点 |
+|------|------|------|
+| **DAG(多父节点)** | 能精确表达并行汇合 | 复杂,难以折叠/展开 |
+| **树(单父节点)** | 简单,天然支持折叠 | 并行汇合需要其他方式表达 |
+
+### 决策
+**选择:树结构(单父节点)**
+
+**理由**:
+1. **可视化友好**:树结构天然支持折叠/展开
+2. **足够表达**:并行工具调用可以是同一父节点的多个子节点
+3. **简化实现**:不需要处理复杂的 DAG 遍历
+
+**实现**:`Step.parent_id: Optional[str]`(单个值,不是列表)
+
+---
+
+## 12. 计划管理:统一到 Step 树 vs 独立 TODO 列表
+
+### 问题
+Agent 的计划(TODO)应该如何管理?
+
+### 方案对比
+
+| 方案 | 优点 | 缺点 |
+|------|------|------|
+| **独立 TODO 列表**(OpenCode 方式) | 简单,与执行分离 | 计划与执行无结构化关联 |
+| **统一到 Step 树** | 计划和执行在同一结构中,可追踪关联 | 稍复杂 |
+
+### 决策
+**选择:统一到 Step 树**
+
+**设计**:
+- `Step.status = "planned"` 表示计划中的步骤
+- `Step.step_type = "goal"` 表示计划项/目标
+- 模型通过 `step` 工具管理计划
+
+**理由**:
+1. **统一模型**:不需要额外的 TODO 数据结构
+2. **可追踪**:执行步骤自动关联到计划项
+3. **可视化**:计划和执行在同一棵树中展示
+
+**参考**:OpenCode 的 `todowrite`/`todoread` 工具(`src/tool/todo.ts`)
+
+---
+
+## 13. Summary 生成策略
+
+### 问题
+哪些 Step 需要生成 summary?
+
+### 决策
+**选择:仅 evaluation 类型节点需要 summary**
+
+**理由**:
+1. **避免浪费**:不是每个 step 都需要总结
+2. **有意义的总结**:evaluation 是对一组操作的评估,值得总结
+3. **节省资源**:减少 LLM 调用次数
+
+**实现**:
+- `Step.summary` 字段可选
+- 仅在 `step_type == "evaluation"` 时填充
+- `tool_call`/`tool_result` 不需要 summary,直接从 `data` 提取关键信息
+
+---
+
+## 14. Context 压缩策略
+
+### 问题
+当消息历史过长时,如何压缩?
+
+### 决策
+**选择:基于树结构的分层压缩**
+
+**设计**:
+- **Todo 格式(简略)**:仅选择 `goal` 类型节点
+- **历史压缩格式(详细)**:选择 `goal` + `result` + `evaluation` 节点
+
+**触发时机**:
+- 正常情况:模型通过工具按需读取进度
+- 压缩时(context 超 70%):自动注入详细历史摘要
+
+**理由**:
+1. **信息分层**:不同用途需要不同详略程度
+2. **节点选择**:关键是选择哪些节点,而非每个节点展示什么
+3. **按需读取**:正常情况不浪费 context
+
+---
+
+## 15. Step 元数据设置策略
+
+### 问题
+Step 的元数据(step_type、description、parent_id 等)如何设置?
+
+### 方案对比
+
+| 方案 | 优点 | 缺点 |
+|------|------|------|
+| **LLM 显式输出** | 准确 | 需要 LLM 配合特定格式,增加复杂度 |
+| **系统自动推断** | 简单,不需要 LLM 额外输出 | 可能不够准确 |
+| **混合** | 平衡准确性和简洁性 | 需要明确划分 |
+
+### 决策
+**选择:系统自动推断为主,显式工具调用为辅**
+
+**设计**:
+- **系统自动记录**:`step_id`、`parent_id`、`tokens`、`cost`、`duration_ms`、`created_at`
+- **系统推断**:`step_type`(基于输出内容)、`description`(从输出提取)
+- **显式声明**(通过 step 工具):`goal`、`evaluation`(summary)
+
+**step_type 推断规则**:
+1. 有工具调用 → `action`
+2. 调用 step 工具且 complete=True → `evaluation`
+3. 调用 step 工具且 plan 不为空 → `goal`
+4. 最终回复 → `response`
+5. 默认 → `thought`
+
+**理由**:
+1. **简化 LLM 负担**:不需要输出特定格式的元数据
+2. **step 工具是显式意图**:计划和评估通过工具明确声明
+3. **其他类型自动推断**:`thought`、`action`、`result`、`response` 可从输出内容判断
+
+---
+
+## 16. Skill 分层:Core Skill vs 普通 Skill
+
+### 问题
+Step 工具等核心功能如何让 Agent 知道?
+
+### 方案对比
+
+| 方案 | 优点 | 缺点 |
+|------|------|------|
+| **写在 System Prompt** | 始终可见 | 每次消耗 token,内容膨胀 |
+| **作为普通 Skill** | 按需加载 | 模型不知道存在就不会加载 |
+| **分层:Core + 普通** | 核心功能始终可见,其他按需 | 需要区分两类 |
+
+### 决策
+**选择:Skill 分层**
+
+**设计**:
+- **Core Skill**:`agent/skills/core.md`,自动注入到 System Prompt
+- **普通 Skill**:`agent/skills/{name}/`,通过 `skill` 工具加载到对话消息
+
+**理由**:
+1. **核心功能必须可见**:Step 管理等功能,模型需要始终知道
+2. **避免 System Prompt 膨胀**:只有核心内容在 System Prompt
+3. **普通 Skill 按需加载**:领域知识在需要时才加载,节省 token
+
+**实现**:
+- Core Skill:框架在 `build_system_prompt()` 时自动读取并拼接
+- 普通 Skill:模型调用 `skill` 工具时返回内容到对话消息
+
+---
+
 ## 总结
 
 这些设计决策的核心原则:

+ 35 - 2
docs/skills.md

@@ -1,8 +1,41 @@
 # Skills 使用指南
 
-Skills 是 Agent 的领域知识库,存储在 Markdown 文件中。Agent 通过 `skill` 工具按需加载。
+Skills 是 Agent 的领域知识库,存储在 Markdown 文件中。
 
-## Skill 文件格式
+---
+
+## Skill 分类
+
+| 类型 | 加载位置 | 加载时机 | 文件位置 |
+|------|---------|---------|---------|
+| **Core Skill** | System Prompt | Agent 启动时自动加载 | `agent/skills/core.md` |
+| **普通 Skill** | 对话消息 | 模型调用 `skill` 工具时 | `agent/skills/{name}/` |
+
+### Core Skill
+
+核心系统功能,每个 Agent 都需要了解:
+
+- Step 管理(计划、执行、进度)
+- 其他系统级功能
+
+**位置**:`agent/skills/core.md`
+
+**加载方式**:框架自动注入到 System Prompt
+
+### 普通 Skill
+
+特定领域能力,按需加载:
+
+- browser_use(浏览器自动化)
+- 其他领域 skills
+
+**位置**:`agent/skills/{name}/`
+
+**加载方式**:模型调用 `skill` 工具
+
+---
+
+## 普通 Skill 文件格式
 
 ```markdown
 ---

+ 624 - 0
docs/step-tree.md

@@ -0,0 +1,624 @@
+# Step 树结构与 Context 管理
+
+> 本文档描述 Agent 执行过程的结构化记录、计划管理和 Context 压缩机制。
+
+---
+
+## 设计目标
+
+1. **可视化**:支持执行路径的树状展示,可折叠/展开
+2. **计划管理**:统一表达"已执行"和"计划中"的步骤
+3. **Context 优化**:基于树结构压缩历史消息,节省 token
+
+---
+
+## 核心设计:Step 树
+
+### Step 类型
+
+```python
+StepType = Literal[
+    # 计划相关
+    "goal",        # 目标/计划项(可以有子 steps)
+
+    # LLM 输出
+    "thought",     # 思考/分析(中间过程)
+    "evaluation",  # 评估总结(需要 summary)
+    "response",    # 最终回复
+
+    # 工具相关
+    "action",      # 工具调用(tool_call)
+    "result",      # 工具结果(tool_result)
+]
+```
+
+| 类型 | 来源 | 说明 |
+|------|------|------|
+| `goal` | LLM(通过 step 工具) | 设定目标/计划 |
+| `thought` | LLM | 中间思考,不产生工具调用 |
+| `evaluation` | LLM | 对一组操作的总结,需要 summary |
+| `response` | LLM | 最终给用户的回复 |
+| `action` | System | LLM 决定调用工具,系统记录 |
+| `result` | System | 工具执行结果 |
+
+### Step 状态
+
+```python
+Status = Literal[
+    "planned",      # 计划中(未执行)
+    "in_progress",  # 执行中
+    "completed",    # 已完成
+    "failed",       # 失败
+    "skipped",      # 跳过
+]
+```
+
+### Step 模型
+
+```python
+@dataclass
+class Step:
+    step_id: str
+    trace_id: str
+    step_type: StepType
+    status: Status
+    sequence: int
+
+    # 树结构(单父节点)
+    parent_id: Optional[str] = None
+
+    # 内容
+    description: str                      # 所有节点都有
+    data: Dict[str, Any] = field(default_factory=dict)
+
+    # 仅 evaluation 类型需要
+    summary: Optional[str] = None
+
+    # 执行指标
+    duration_ms: Optional[int] = None
+    cost: Optional[float] = None
+    tokens: Optional[int] = None
+
+    # 时间
+    created_at: datetime = field(default_factory=datetime.now)
+```
+
+**关键点**:
+- `parent_id` 是单个值(树结构),不是列表(DAG)
+- `summary` 仅在 `evaluation` 类型节点填充,不是每个节点都需要
+- `planned` 状态的 step 相当于 TODO item
+
+---
+
+## 树结构示例
+
+```
+Trace
+├── goal: "探索代码库" (completed)
+│   ├── thought: "需要先了解项目结构"
+│   ├── action: glob_files
+│   ├── result: [15 files...]
+│   ├── thought: "发现配置文件,需要查看内容"
+│   ├── action: read_file
+│   ├── result: [content...]
+│   └── evaluation: "主配置在 /src/config.yaml" ← summary
+│
+├── goal: "修改配置" (in_progress)
+│   ├── action: read_file
+│   └── result: [content...]
+│
+└── goal: "运行测试" (planned)
+```
+
+### Parent 关系规则
+
+| Step 类型 | parent 是谁 |
+|----------|------------|
+| `goal` | 上一个 `goal`(或 None) |
+| `thought` | 当前 `in_progress` 的 `goal` |
+| `action` | 当前 `in_progress` 的 `goal` |
+| `result` | 对应的 `action` |
+| `evaluation` | 所属的 `goal` |
+| `response` | 当前 `in_progress` 的 `goal`(或 None) |
+
+---
+
+## 元数据设置
+
+### 系统自动记录
+
+以下字段由系统自动填充,不需要 LLM 参与:
+
+```python
+step_id: str          # 自动生成
+parent_id: str        # 根据当前 focus 的 goal 自动设置
+step_type: StepType   # 根据 LLM 输出推断(见下)
+sequence: int         # 递增序号
+tokens: int           # API 返回
+cost: float           # 计算得出
+duration_ms: int      # 计时
+created_at: datetime  # 当前时间
+```
+
+### Step 类型推断
+
+系统根据 LLM 输出内容自动推断类型,不需要显式声明:
+
+```python
+def infer_step_type(llm_response) -> StepType:
+    # 有工具调用 → action
+    if llm_response.tool_calls:
+        return "action"
+
+    # 调用了 step 工具且 complete=True → evaluation
+    if called_step_tool(llm_response, complete=True):
+        return "evaluation"
+
+    # 调用了 step 工具且 plan 不为空 → goal
+    if called_step_tool(llm_response, plan=True):
+        return "goal"
+
+    # 最终回复(无后续工具调用,对话结束)
+    if is_final_response(llm_response):
+        return "response"
+
+    # 默认:中间思考
+    return "thought"
+```
+
+### description 提取
+
+`description` 字段由系统从 LLM 输出中提取:
+
+| Step 类型 | description 来源 |
+|----------|-----------------|
+| `goal` | step 工具的 plan 参数 |
+| `thought` | LLM 输出的第一句话(或截断) |
+| `action` | 工具名 + 关键参数 |
+| `result` | 工具返回的 title 或简要输出 |
+| `evaluation` | step 工具的 summary 参数 |
+| `response` | LLM 输出的第一句话(或截断) |
+
+---
+
+## 计划管理工具
+
+### step 工具
+
+模型通过 `step` 工具管理执行进度:
+
+```python
+@tool
+def step(
+    plan: Optional[List[str]] = None,     # 添加 planned goals
+    focus: Optional[str] = None,          # 切换焦点到哪个 goal
+    complete: bool = False,               # 完成当前 goal
+    summary: Optional[str] = None,        # 评估总结(配合 complete)
+):
+    """管理执行步骤"""
+```
+
+### 使用示例
+
+```python
+# 1. 创建计划
+step(plan=["探索代码库", "修改配置", "运行测试"])
+
+# 2. 开始执行第一个
+step(focus="探索代码库")
+
+# 3. [执行各种 tool_call...]
+
+# 4. 完成并切换到下一个
+step(complete=True, summary="主配置在 /src/config.yaml", focus="修改配置")
+
+# 5. 中途调整计划
+step(plan=["备份配置"])  # 追加新的 goal
+```
+
+### 状态变化
+
+```
+调用 step(plan=["A", "B", "C"]) 后:
+├── goal: "A" (planned)
+├── goal: "B" (planned)
+└── goal: "C" (planned)
+
+调用 step(focus="A") 后:
+├── goal: "A" (in_progress) ← 当前焦点
+├── goal: "B" (planned)
+└── goal: "C" (planned)
+
+调用 step(complete=True, summary="...", focus="B") 后:
+├── goal: "A" (completed)
+│   └── evaluation: "..." ← 自动创建
+├── goal: "B" (in_progress) ← 新焦点
+└── goal: "C" (planned)
+```
+
+---
+
+## Context 管理
+
+### 信息分层
+
+不同用途需要不同的信息粒度:
+
+| 用途 | 选择哪些节点 | 详略程度 |
+|------|-------------|---------|
+| **Todo 列表** | 仅 `goal` 类型 | 简略:描述 + 状态 |
+| **历史压缩** | `goal` + `result` + `evaluation` | 详细:包含关键结果 |
+
+### Todo 格式(简略)
+
+```python
+def to_todo_string(tree: StepTree) -> str:
+    lines = []
+    for goal in tree.filter(step_type="goal"):
+        icon = {"completed": "✓", "in_progress": "→", "planned": " "}[goal.status]
+        lines.append(f"[{icon}] {goal.description}")
+    return "\n".join(lines)
+```
+
+输出:
+```
+[✓] 探索代码库
+[→] 修改配置
+[ ] 运行测试
+```
+
+### 历史压缩格式(详细)
+
+```python
+def to_history_string(tree: StepTree) -> str:
+    lines = []
+    for goal in tree.filter(step_type="goal"):
+        status_label = {"completed": "完成", "in_progress": "进行中", "planned": "待做"}
+        lines.append(f"[{status_label[goal.status]}] {goal.description}")
+
+        if goal.status == "completed":
+            # 选择关键结果节点
+            for step in goal.children():
+                if step.step_type == "result":
+                    lines.append(f"  → {extract_brief(step.data)}")
+                elif step.step_type == "evaluation":
+                    lines.append(f"  总结: {step.summary}")
+
+    return "\n".join(lines)
+```
+
+输出:
+```
+[完成] 探索代码库
+  → glob_files: 找到 15 个文件
+  → read_file(config.yaml): db_host=prod.db.com
+  总结: 主配置在 /src/config.yaml,包含数据库连接配置
+
+[进行中] 修改配置
+  → read_file(config.yaml): 已读取
+
+[待做] 运行测试
+```
+
+### 压缩触发
+
+```python
+def build_messages(messages: List, tree: StepTree) -> List:
+    # 正常情况:不压缩
+    if estimate_tokens(messages) < MAX_CONTEXT * 0.7:
+        return messages
+
+    # 超限时:用树摘要替代历史详情
+    history_summary = tree.to_history_string()
+    summary_msg = {"role": "assistant", "content": history_summary}
+
+    # 保留最近的详细消息
+    return [summary_msg] + recent_messages(messages)
+```
+
+### 按需读取
+
+模型可通过工具读取当前进度,而非每次都注入:
+
+```python
+@tool
+def read_progress() -> str:
+    """读取当前执行进度"""
+    return tree.to_todo_string()
+```
+
+**策略**:
+- 正常情况:模型通过 `read_progress` 按需读取(省 context)
+- 压缩时:自动注入详细历史摘要(保证不丢失)
+
+---
+
+## 可视化支持
+
+树结构天然支持可视化:
+
+- **折叠**:折叠某个 `goal` 节点 → 隐藏其子节点
+- **展开**:展示子节点详情
+- **回溯**:`failed` 或 `skipped` 状态的分支
+- **并行**:同一 `goal` 下的多个 `action`(并行工具调用)
+
+### 边的信息
+
+可视化时,边(连接线)可展示:
+- 执行时间:`Step.duration_ms`
+- 成本:`Step.cost`
+- 简要描述:`Step.description`
+
+---
+
+## 与 OpenCode 的对比
+
+| 方面 | OpenCode | 本设计 |
+|------|----------|--------|
+| 计划存储 | Markdown 文件 + Todo 列表 | Step 树(`planned` 状态) |
+| 计划与执行关联 | 无结构化关联 | 统一在树结构中 |
+| 进度读取 | `todoread` 工具 | `read_progress` 工具 |
+| 进度更新 | `todowrite` 工具 | `step` 工具 |
+| Context 压缩 | 无 | 基于树结构自动压缩 |
+
+**参考**:OpenCode 的实现见 `src/tool/todo.ts`、`src/session/prompt.ts`
+
+---
+
+## Debug 工具
+
+### 实时查看 Step 树
+
+开发调试时,系统自动输出三种格式的 Step 树:
+
+```python
+from agent.debug import dump_tree, dump_markdown, dump_json
+
+# 1. 文本格式(简洁,带截断)
+dump_tree(trace, steps)  # 输出到 .trace/tree.txt
+
+# 2. Markdown 格式(完整,可折叠)
+dump_markdown(trace, steps)  # 输出到 .trace/tree.md
+
+# 3. JSON 格式(程序化分析)
+dump_json(trace, steps)  # 输出到 .trace/tree.json
+```
+
+**自动生成**:在 `AgentRunner` 的 debug 模式下,会自动生成 `tree.txt` 和 `tree.md` 两个文件。
+
+### 三种格式对比
+
+| 格式 | 文件大小 | 内容完整性 | 适用场景 |
+|-----|---------|----------|---------|
+| **tree.txt** | 小(1-2KB) | 截断长内容 | 快速预览、终端查看 |
+| **tree.md** | 中(5-10KB) | 完整内容 | 详细调试、编辑器查看 |
+| **tree.json** | 大(可能>10KB) | 完整结构化 | 程序化分析、工具处理 |
+
+### Markdown 格式特性
+
+**完整可折叠**:使用 HTML `<details>` 标签实现原生折叠
+
+```markdown
+<details>
+<summary><b>📨 Messages</b></summary>
+
+```json
+[完整的 messages 内容]
+```
+
+</details>
+```
+
+**智能截断**:
+- ✅ **文本内容**:完整显示,不截断
+- ✅ **工具调用**:完整显示 JSON schema
+- ✅ **图片 base64**:智能截断,显示大小和预览
+
+示例输出:
+```json
+{
+  "type": "image_url",
+  "image_url": {
+    "url": "<IMAGE_DATA: 2363.7KB, data:image/png;base64, preview: iVBORw0KGgo...>"
+  }
+}
+```
+
+### 查看方式
+
+```bash
+# 方式1:终端实时刷新(tree.txt)
+watch -n 0.5 cat .trace/tree.txt
+
+# 方式2:VS Code 打开(tree.md,支持折叠)
+code .trace/tree.md
+
+# 方式3:浏览器预览(tree.md)
+# 在 VS Code 中右键 → "Open Preview" 或使用 Markdown 预览插件
+```
+
+### tree.txt 输出示例
+
+```
+============================================================
+ Step Tree Debug
+ Generated: 2024-01-15 14:30:25
+============================================================
+
+## Trace
+  trace_id: abc123
+  task: 修改配置文件
+  status: running
+  total_steps: 5
+  total_tokens: 1234
+  total_cost: 0.0150
+
+## Steps
+
+├── [✓] goal: 探索代码库
+│   id: a1b2c3d4...
+│   duration: 1234ms
+│   tokens: 500
+│   cost: $0.0050
+│   data:
+│     description: 探索代码库
+│   time: 14:30:10
+│
+│   ├── [✓] thought: 需要先了解项目结构
+│   │   id: e5f6g7h8...
+│   │   data:
+│   │     content: 让我先看看项目的目录结构...
+│   │   time: 14:30:11
+│   │
+│   ├── [✓] action: glob_files
+│   │   id: i9j0k1l2...
+│   │   duration: 50ms
+│   │   data:
+│   │     tool_name: glob_files
+│   │     arguments: {"pattern": "**/*.py"}
+│   │   time: 14:30:12
+│   │
+│   └── [✓] result: 找到 15 个文件
+│       id: m3n4o5p6...
+│       data:
+│         output: ["src/main.py", "src/config.py", ...]
+│       time: 14:30:12
+│
+└── [→] goal: 修改配置
+    id: q7r8s9t0...
+    time: 14:30:15
+```
+
+**实现**:`agent/debug/tree_dump.py`
+
+---
+
+## 实现位置
+
+- Step 模型:`agent/models/trace.py:Step`(已实现)
+- Trace 模型:`agent/models/trace.py:Trace`(已实现)
+- 存储接口:`agent/storage/protocols.py:TraceStore`(已实现)
+- 内存存储:`agent/storage/memory_impl.py:MemoryTraceStore`(已实现)
+- Debug 工具:`agent/debug/tree_dump.py`(已实现)
+- **Core Skill**:`agent/skills/core.md`(已实现)
+- step 工具:`agent/tools/builtin/step.py`(待实现)
+- read_progress 工具:`agent/tools/builtin/step.py`(待实现)
+- Context 压缩:`agent/context/compressor.py`(待实现)
+
+---
+
+## 可视化 API
+
+### 设计目标
+
+为前端提供 Step 树的查询和实时推送接口,支持:
+1. 历史任务和进行中任务的查询
+2. 大型 Trace(上千 Step)的按需加载
+3. WebSocket 实时推送进行中任务的更新
+
+### 核心设计
+
+**简化原则**:消除"批次计算"和"同层完整性检查"的复杂逻辑,使用简单的层级懒加载
+
+**数据结构**:返回树形 JSON,前端无需自行构建
+
+**性能策略**:
+- 小型 Trace(<100 Steps):用 `/tree` 一次性返回完整树
+- 大型 Trace(>100 Steps):用 `/node/{step_id}` 按需懒加载
+- 进行中任务:WebSocket 推送增量更新
+
+### API 端点
+
+```
+GET  /api/traces                          # 列出 Traces(支持过滤)
+GET  /api/traces/{trace_id}               # 获取 Trace 元数据
+GET  /api/traces/{trace_id}/tree          # 获取完整树(小型 Trace)
+GET  /api/traces/{trace_id}/node/{step_id}  # 懒加载节点 + 子节点
+WS   /api/traces/{trace_id}/watch         # 监听进行中的更新
+```
+
+### 懒加载核心逻辑
+
+```python
+async def get_node_with_children(
+    store: TraceStore,
+    step_id: Optional[str],  # None = 根节点
+    trace_id: str,
+    expand: bool = False,
+    max_depth: int = 1
+) -> dict:
+    # 1. 获取当前层节点
+    if step_id is None:
+        steps = await store.get_trace_steps(trace_id)
+        current_nodes = [s for s in steps if s.parent_id is None]
+    else:
+        current_nodes = await store.get_step_children(step_id)
+
+    # 2. 构建响应
+    result = []
+    for step in current_nodes:
+        node = step.to_dict()
+        node["children"] = []
+
+        # 3. 递归加载子节点(可选)
+        if expand and current_depth < max_depth:
+            children = await store.get_step_children(step.step_id)
+            if children:
+                node["children"] = [...]  # 递归
+
+        result.append(node)
+
+    return result
+```
+
+**品味评分**:🟢 好品味(逻辑清晰,< 30 行,无特殊情况)
+
+### WebSocket 事件
+
+```json
+// 新增 Step
+{"event": "step_added", "step": {...}}
+
+// Step 更新
+{"event": "step_updated", "step_id": "...", "updates": {...}}
+
+// Trace 完成
+{"event": "trace_completed", "trace_id": "..."}
+```
+
+### 实现位置(待定)
+
+两种方案:
+
+**方案 1:独立 API 模块**(推荐,如果未来需要多种 API)
+```
+agent/api/
+├── server.py           # FastAPI 应用
+├── routes/
+│   ├── traces.py       # Step 树路由
+│   └── websocket.py    # WebSocket 推送
+└── schemas.py          # Pydantic 模型
+```
+
+**方案 2:Step 树专用模块**(推荐,如果只用于 Step 树可视化)
+```
+agent/step_tree/
+├── api.py              # FastAPI 路由
+├── websocket.py        # WebSocket 推送
+└── server.py           # 独立服务入口
+```
+
+决策依据:
+- 如果系统未来需要提供多种 API(Experience 管理、Agent 控制等)→ 方案 1
+- 如果 API 仅用于 Step 树可视化 → 方案 2
+
+**详细设计**:参见 `/Users/sunlit/.claude/plans/starry-yawning-zebra.md`
+
+---
+
+## 未来扩展
+
+- 重试原因、重试次数、是否降级/兜底
+- 为什么选择某个动作、是否触发了 skills、系统 prompt 中的策略
+- 数据库持久化(PostgreSQL/Neo4j)
+- 递归查询优化(PostgreSQL CTE)

+ 383 - 0
docs/trace-api.md

@@ -0,0 +1,383 @@
+# Trace 模块 - Context 管理 + 可视化
+
+> 执行轨迹记录、存储和可视化 API
+
+---
+
+## 架构概览
+
+**职责定位**:`agent/trace` 模块负责所有 Trace/Step 相关功能
+
+```
+agent/trace/
+├── models.py          # Trace/Step 数据模型
+├── protocols.py       # TraceStore 存储接口
+├── memory_store.py    # 内存存储实现
+├── api.py             # RESTful API(懒加载)
+└── websocket.py       # WebSocket 实时推送
+```
+
+**设计原则**:
+- ✅ **高内聚**:所有 Trace 相关代码在一个模块
+- ✅ **松耦合**:核心模型不依赖 FastAPI
+- ✅ **可扩展**:易于添加 PostgreSQL/Neo4j 实现
+
+---
+
+## 核心模型
+
+### Trace - 执行轨迹
+
+一次完整的 LLM 交互(单次调用或 Agent 任务)
+
+```python
+from agent.trace import Trace
+
+trace = Trace.create(
+    mode="agent",
+    task="探索代码库",
+    agent_type="researcher"
+)
+
+# 字段说明
+trace.trace_id        # UUID
+trace.mode            # "call" | "agent"
+trace.task            # 任务描述
+trace.status          # "running" | "completed" | "failed"
+trace.total_steps     # Step 总数
+trace.total_tokens    # Token 总数
+trace.total_cost      # 总成本
+```
+
+### Step - 执行步骤
+
+Trace 中的原子操作,形成树结构
+
+```python
+from agent.trace import Step
+
+step = Step.create(
+    trace_id=trace.trace_id,
+    step_type="action",
+    sequence=1,
+    description="glob_files",
+    parent_id=parent_step_id,  # 树结构
+    data={
+        "tool_name": "glob_files",
+        "arguments": {"pattern": "**/*.py"}
+    }
+)
+
+# Step 类型
+# - goal: 目标/计划项
+# - thought: 思考/分析
+# - action: 工具调用
+# - result: 工具结果
+# - response: 最终回复
+# - memory_read/write: 记忆操作
+# - feedback: 人工反馈
+```
+
+---
+
+## 存储接口
+
+### TraceStore Protocol
+
+定义所有存储实现必须遵守的接口
+
+```python
+from agent.trace import TraceStore
+
+class MyCustomStore:
+    """实现 TraceStore 接口的所有方法"""
+
+    async def create_trace(self, trace: Trace) -> str: ...
+    async def get_trace(self, trace_id: str) -> Optional[Trace]: ...
+    async def list_traces(self, ...) -> List[Trace]: ...
+
+    async def add_step(self, step: Step) -> str: ...
+    async def get_step(self, step_id: str) -> Optional[Step]: ...
+    async def get_trace_steps(self, trace_id: str) -> List[Step]: ...
+    async def get_step_children(self, step_id: str) -> List[Step]: ...
+```
+
+### MemoryTraceStore
+
+内存存储实现(用于开发和测试)
+
+```python
+from agent.trace import MemoryTraceStore
+
+store = MemoryTraceStore()
+
+# 使用方法
+trace_id = await store.create_trace(trace)
+trace = await store.get_trace(trace_id)
+steps = await store.get_trace_steps(trace_id)
+```
+
+---
+
+## API 服务
+
+### 启动服务
+
+```bash
+# 1. 安装依赖
+pip install -r requirements.txt
+
+# 2. 启动服务
+python api_server.py
+
+# 3. 访问 API 文档
+open http://localhost:8000/docs
+```
+
+### RESTful 端点
+
+#### 1. 列出 Traces
+
+```http
+GET /api/traces?mode=agent&status=running&limit=20
+```
+
+**响应**:
+```json
+{
+  "traces": [
+    {
+      "trace_id": "abc123",
+      "mode": "agent",
+      "task": "探索代码库",
+      "status": "running",
+      "total_steps": 15,
+      "total_tokens": 5000,
+      "total_cost": 0.05
+    }
+  ]
+}
+```
+
+#### 2. 获取完整树(小型 Trace)
+
+```http
+GET /api/traces/{trace_id}/tree
+```
+
+**响应**:递归 Step 树(完整)
+
+#### 3. 懒加载节点(大型 Trace)
+
+```http
+GET /api/traces/{trace_id}/node/{step_id}?expand=true&max_depth=2
+```
+
+**参数**:
+- `step_id`: Step ID(`null` 表示根节点)
+- `expand`: 是否加载子节点
+- `max_depth`: 递归深度(1-10)
+
+**核心算法**:简洁的层级懒加载(< 30 行)
+
+```python
+async def _build_tree(store, trace_id, step_id, expand, max_depth, current_depth):
+    # 1. 获取当前层节点
+    if step_id is None:
+        nodes = [s for s in steps if s.parent_id is None]
+    else:
+        nodes = await store.get_step_children(step_id)
+
+    # 2. 构建响应
+    result = []
+    for step in nodes:
+        node_dict = step.to_dict()
+        node_dict["children"] = []
+
+        # 3. 递归加载子节点(可选)
+        if expand and current_depth < max_depth:
+            node_dict["children"] = await _build_tree(...)
+
+        result.append(node_dict)
+
+    return result
+```
+
+### WebSocket 推送
+
+实时监听进行中 Trace 的更新
+
+```javascript
+// 连接
+ws = new WebSocket(`/api/traces/${trace_id}/watch`)
+
+// 事件
+ws.onmessage = (e) => {
+  const event = JSON.parse(e.data)
+
+  switch (event.event) {
+    case "connected":
+      console.log("已连接")
+      break
+    case "step_added":
+      // 新增 Step
+      addStepToTree(event.step)
+      break
+    case "step_updated":
+      // Step 状态更新
+      updateStep(event.step_id, event.updates)
+      break
+    case "trace_completed":
+      // Trace 完成
+      console.log("完成")
+      ws.close()
+      break
+  }
+}
+```
+
+---
+
+## 使用场景
+
+### 1. Agent 执行时记录 Trace
+
+```python
+from agent import AgentRunner
+from agent.trace import MemoryTraceStore
+
+# 初始化
+store = MemoryTraceStore()
+runner = AgentRunner(trace_store=store, llm_call=my_llm_fn)
+
+# 执行 Agent(自动记录 Trace)
+async for event in runner.run(task="探索代码库"):
+    print(event)
+
+# 查询 Trace
+traces = await store.list_traces(mode="agent", limit=10)
+steps = await store.get_trace_steps(traces[0].trace_id)
+```
+
+### 2. 前端可视化(小型 Trace)
+
+```javascript
+// 一次性加载完整树
+const response = await fetch(`/api/traces/${traceId}/tree`)
+const { root_steps } = await response.json()
+
+// 渲染树
+renderTree(root_steps)
+```
+
+### 3. 前端可视化(大型 Trace)
+
+```javascript
+// 懒加载:只加载根节点
+const response = await fetch(`/api/traces/${traceId}/node/null?expand=false`)
+const { children } = await response.json()
+
+// 用户点击展开时
+async function expandNode(stepId) {
+  const response = await fetch(
+    `/api/traces/${traceId}/node/${stepId}?expand=true&max_depth=1`
+  )
+  const { children } = await response.json()
+  return children
+}
+```
+
+### 4. 实时监控进行中的任务
+
+```javascript
+// WebSocket 监听
+ws = new WebSocket(`/api/traces/${traceId}/watch`)
+ws.onmessage = (e) => {
+  const event = JSON.parse(e.data)
+  if (event.event === "step_added") {
+    // 实时添加新 Step 到 UI
+    appendStep(event.step)
+  }
+}
+```
+
+---
+
+## 扩展存储实现
+
+### PostgreSQL 实现(未来)
+
+```python
+from agent.trace import TraceStore, Trace, Step
+
+class PostgreSQLTraceStore:
+    """PostgreSQL 存储实现"""
+
+    def __init__(self, connection_string: str):
+        self.pool = create_pool(connection_string)
+
+    async def create_trace(self, trace: Trace) -> str:
+        async with self.pool.acquire() as conn:
+            await conn.execute(
+                "INSERT INTO traces (...) VALUES (...)",
+                trace.to_dict()
+            )
+        return trace.trace_id
+
+    async def get_step_children(self, step_id: str) -> List[Step]:
+        # 使用递归 CTE 优化查询
+        query = """
+        WITH RECURSIVE subtree AS (
+          SELECT * FROM steps WHERE parent_id = $1
+        )
+        SELECT * FROM subtree ORDER BY sequence
+        """
+        # ...
+```
+
+---
+
+## 导入路径(唯一正确方式)
+
+```python
+# ✅ 推荐导入
+from agent.trace import Trace, Step, StepType, Status
+from agent.trace import TraceStore, MemoryTraceStore
+
+# ✅ 顶层导入(等价)
+from agent import Trace, Step, TraceStore
+
+# ❌ 旧导入(已删除,会报错)
+from agent.models.trace import Trace  # ModuleNotFoundError
+from agent.storage.protocols import TraceStore  # ImportError
+```
+
+---
+
+## 性能优化
+
+### 小型 Trace(< 100 Steps)
+
+- **推荐**:使用 `/tree` 一次性加载
+- **优点**:最少请求数,前端体验最优
+- **缺点**:单次响应较大
+
+### 大型 Trace(> 100 Steps)
+
+- **推荐**:使用 `/node/{step_id}` 懒加载
+- **优点**:按需加载,内存占用小
+- **缺点**:需要多次请求
+
+### WebSocket vs 轮询
+
+- **进行中任务**:使用 WebSocket(实时推送)
+- **历史任务**:使用 RESTful(静态数据)
+
+---
+
+## 相关文档
+
+- [agent/trace/models.py](../agent/trace/models.py) - Trace/Step 模型定义
+- [agent/trace/api.py](../agent/trace/api.py) - RESTful API 实现
+- [api_server.py](../api_server.py) - FastAPI 应用入口
+- [requirements.txt](../requirements.txt) - FastAPI 依赖

+ 0 - 3
examples/__init__.py

@@ -1,3 +0,0 @@
-"""
-Examples 包 - 使用样例
-"""

+ 0 - 62
examples/browser_use_setup_demo.py

@@ -1,62 +0,0 @@
-"""
-Browser-Use 自动设置演示
-
-展示如何使用自动检查和安装工具
-"""
-
-import asyncio
-from agent.skills.browser_use import (
-    check_browser_use,
-    install_browser_use_chromium
-)
-from agent.tools.builtin import skill
-
-
-async def demo():
-    """演示 browser-use 设置流程"""
-
-    print("=" * 60)
-    print("Browser-Use 自动设置演示")
-    print("=" * 60)
-
-    # 1. 加载 skill(会自动检查依赖)
-    print("\n1. 加载 browser-use skill(自动检查依赖)")
-    result = await skill(skill_name="browser-use")
-    print(f"✅ {result.title}")
-    if "⚠️" in result.output:
-        print("   检测到缺失的依赖,输出中包含安装提示")
-
-    # 2. 手动检查依赖
-    print("\n2. 手动检查依赖状态")
-    result = await check_browser_use()
-    print(f"✅ {result.title}")
-    print(f"   CLI 已安装: {result.metadata.get('cli_installed', False)}")
-    print(f"   Chromium 已安装: {result.metadata.get('chromium_installed', False)}")
-    print(f"   状态: {result.metadata.get('status', 'unknown')}")
-
-    # 3. 自动安装 Chromium(如果需要)
-    if not result.metadata.get("chromium_installed", False):
-        print("\n3. 安装 Chromium 浏览器(可选)")
-        print("   注意:这会下载 200-300MB 数据")
-
-        # 用户确认
-        confirm = input("   是否继续安装?(y/N): ")
-        if confirm.lower() == "y":
-            result = await install_browser_use_chromium()
-            print(f"   {result.title}")
-            if result.metadata.get("installed"):
-                print("   ✅ 安装成功")
-            else:
-                print("   ❌ 安装失败,请查看输出")
-        else:
-            print("   跳过安装")
-    else:
-        print("\n3. Chromium 已安装,跳过")
-
-    print("\n" + "=" * 60)
-    print("演示完成!")
-    print("=" * 60)
-
-
-if __name__ == "__main__":
-    asyncio.run(demo())

+ 0 - 53
examples/feature_extract/output_1/result.txt

@@ -1,53 +0,0 @@
-作为一位计算机视觉专家和社媒博主,我将根据您提供的“整体构图”特征描述,对其进行结构化和精炼,以便于内容生成、聚类分析和模型训练。
-
-# 特征表示分析
-
-为了在保留关键信息的同时,使特征既能用于生成类似内容(需要细节),又能用于聚类和模型训练(需要精简和标准化),我将采用嵌套的JSON结构。
-
-*   **顶层**:明确指出这是关于“整体构图”的特征,并包含原始的“评分”。
-*   **构图手法列表**:使用一个列表来列出主要的构图类型,这对于快速分类和聚类非常有用。
-*   **具体构图元素**:对于引导线、对称和框架构图,我将它们分别抽象为独立的子对象,每个子对象包含:
-    *   `存在`:布尔值,表示该构图手法是否存在(便于快速判断和训练)。
-    *   `元素`:构成该手法的具体物体(例如“道路”、“树木”)。
-    *   `形式`:该元素的具体呈现方式(例如“拱形通道”、“水坑倒影”)。
-    *   `方向/位置`:描述元素在画面中的相对位置或方向。
-    *   `目标/对象`:该构图手法旨在强调或引导的对象。
-*   **主体放置**:对画面中主体(如骑行者)的相对位置进行概括性描述。
-*   **补充说明**:提供一段概括性的文字,用于在内容生成时增加语境和艺术性。
-
-# 提取的特征
-
-```json
-{
-  "整体构图": {
-    "主要构图手法": [
-      "中心构图",
-      "引导线构图",
-      "对称构图",
-      "框架构图"
-    ],
-    "引导线": {
-      "存在": true,
-      "元素": "道路",
-      "方向": "从画面底部中央向远方延伸",
-      "引导目标": ["骑行者", "光束"]
-    },
-    "对称构图": {
-      "存在": true,
-      "类型": "水平倒影对称",
-      "对称元素": "水坑倒影",
-      "位置描述": "画面下半部,倒影区域宽度覆盖整个画面"
-    },
-    "框架构图": {
-      "存在": true,
-      "元素": "高大树木",
-      "形式": "形成自然的拱形通道",
-      "位置": "画面左右两侧",
-      "框选对象": "骑行者 (主体)"
-    },
-    "主体放置": "中心偏远"
-  },
-  "补充说明": "多种构图手法巧妙结合,营造出强烈的纵深感和空间感,特别是光线和倒影的运用,极大地增强了画面的艺术感染力,使得整体画面富有叙事性和沉浸感。",
-  "评分": 0.652
-}
-```

+ 91 - 27
examples/feature_extract/run.py

@@ -1,7 +1,7 @@
 """
 特征提取示例
 
-使用 Agent 框架 + Prompt loader + 多模态支持
+使用 Agent 模式 + Skills + 多模态支持
 """
 
 import os
@@ -17,20 +17,25 @@ load_dotenv()
 
 from agent.prompts import SimplePrompt
 from agent.runner import AgentRunner
-from agent.llm.providers.gemini import create_gemini_llm_call
+from agent.storage import MemoryTraceStore
+from agent.llm.providers.openrouter import create_openrouter_llm_call
 
 
 async def main():
     # 路径配置
     base_dir = Path(__file__).parent
+    project_root = base_dir.parent.parent
     prompt_path = base_dir / "test.prompt"
     feature_md_path = base_dir / "input_1" / "feature.md"
     image_path = base_dir / "input_1" / "image.png"
     output_dir = base_dir / "output_1"
     output_dir.mkdir(exist_ok=True)
 
+    # Skills 目录
+    skills_dir = project_root / "agent" / "skills"
+
     print("=" * 60)
-    print("特征提取任务")
+    print("特征提取任务 (Agent 模式)")
     print("=" * 60)
     print()
 
@@ -38,60 +43,119 @@ async def main():
     print("1. 加载 prompt...")
     prompt = SimplePrompt(prompt_path)
 
+    # 提取 system prompt 和 user template
+    system_prompt = prompt._messages.get("system", "")
+    user_template = prompt._messages.get("user", "")
+
     # 2. 读取特征描述
     print("2. 读取特征描述...")
     with open(feature_md_path, 'r', encoding='utf-8') as f:
         feature_text = f.read()
 
-    # 3. 构建多模态消息
-    print("3. 构建多模态消息(文本 + 图片)...")
-    messages = prompt.build_messages(
+    # 3. 构建任务文本(包含图片)
+    print("3. 构建任务(文本 + 图片)...")
+
+    # 使用 prompt 构建多模态消息
+    temp_messages = prompt.build_messages(
         text=feature_text,
-        images=image_path  # 框架自动处理图片
+        images=image_path
     )
 
-    print(f"   - 消息数量: {len(messages)}")
-    print(f"   - 图片: {image_path.name}")
+    # 提取用户消息(包含文本和图片)
+    user_message_with_image = None
+    for msg in temp_messages:
+        if msg["role"] == "user":
+            user_message_with_image = msg
+            break
+
+    if not user_message_with_image:
+        raise ValueError("No user message found in prompt")
+
+    print(f"   - 任务已构建(包含图片: {image_path.name})")
 
-    # 4. 创建 Agent Runner
+    # 4. 创建 Agent Runner(配置 skills)
     print("4. 创建 Agent Runner...")
+    print(f"   - Skills 目录: {skills_dir}")
+    print(f"   - 模型: Claude Sonnet 4.5 (via OpenRouter)")
+
     runner = AgentRunner(
-        llm_call=create_gemini_llm_call()
+        trace_store=MemoryTraceStore(),
+        llm_call=create_openrouter_llm_call(model="anthropic/claude-sonnet-4.5"),
+        skills_dir=str(skills_dir),  # 恢复加载 skills,测试 Claude 是否能处理
+        debug=True  # 启用 debug,输出到 .trace/
     )
 
-    # 5. 调用 Agent
-    print(f"5. 调用模型: {prompt.config.get('model', 'gemini-2.5-flash')}...")
+    # 5. Agent 模式执行
+    # 注意:使用 OpenRouter 时,模型在创建 llm_call 时已指定
+    # 这里传入的 model 参数会被忽略(由 llm_call 内部控制)
+    print(f"5. 启动 Agent 模式...")
     print()
 
-    result = await runner.call(
-        messages=messages,
-        model=prompt.config.get('model', 'gemini-2.5-flash'),
+    final_response = ""
+
+    async for event in runner.run(
+        task="[图片和特征描述已包含在 messages 中]",  # 占位符
+        messages=[user_message_with_image],  # 传入包含图片的用户消息
+        system_prompt=system_prompt,
+        model="anthropic/claude-sonnet-4.5",  # OpenRouter 模型名称
         temperature=float(prompt.config.get('temperature', 0.3)),
-        trace=False  # 暂不记录 trace
-    )
+        max_iterations=10,
+        # tools 参数不传入,测试自动加载内置工具
+    ):
+        event_type = event.type
+        event_data = event.data
+
+        if event_type == "trace_started":
+            print(f"[Trace] 开始: {event_data.get('trace_id', '')[:8]}")
+
+        elif event_type == "memory_loaded":
+            exp_count = event_data.get('experiences_count', 0)
+            if exp_count > 0:
+                print(f"[Memory] 加载 {exp_count} 条经验")
+
+        elif event_type == "step_started":
+            step_type = event_data.get('step_type', '')
+            print(f"[Step] {step_type}...")
+
+        elif event_type == "thought":
+            content = event_data.get('content', '')
+            if content:
+                print(f"[Thought] {content[:100]}...")
+
+        elif event_type == "tool_execution":
+            tool_name = event_data.get('tool_name', '')
+            print(f"[Tool] 执行 {tool_name}")
+
+        elif event_type == "conclusion":
+            final_response = event_data.get('content', '')  # 修正:字段名是 content 不是 response
+            print(f"[Conclusion] Agent 完成")
+
+        elif event_type == "trace_completed":
+            print(f"[Trace] 完成")
+            print(f"  - Total tokens: {event_data.get('total_tokens', 0)}")
+            print(f"  - Total cost: ${event_data.get('total_cost', 0.0):.4f}")
 
     # 6. 输出结果
+    print()
     print("=" * 60)
-    print("模型响应:")
+    print("Agent 响应:")
     print("=" * 60)
-    print(result.reply)
+    print(final_response)
     print("=" * 60)
     print()
 
     # 7. 保存结果
     output_file = output_dir / "result.txt"
     with open(output_file, 'w', encoding='utf-8') as f:
-        f.write(result.reply)
+        f.write(final_response)
 
     print(f"✓ 结果已保存到: {output_file}")
     print()
 
-    # 8. 打印统计信息
-    print("统计信息:")
-    if result.tokens:
-        print(f"  输入 tokens: {result.tokens.get('prompt', 0)}")
-        print(f"  输出 tokens: {result.tokens.get('completion', 0)}")
-    print(f"  费用: ${result.cost:.4f}")
+    # 8. 提示查看 debug 文件
+    print("Debug 文件:")
+    print(f"  - 完整可折叠: {Path.cwd() / '.trace' / 'tree.md'}")
+    print(f"  - 简洁文本: {Path.cwd() / '.trace' / 'tree.txt'}")
 
 
 if __name__ == "__main__":

+ 1 - 1
examples/feature_extract/test.prompt

@@ -8,7 +8,7 @@ $system$
 你是一位计算机视觉专家,也是一位才华横溢的社媒博主、内容创作者。
 
 # 任务
-分析一个优质内容的指定特征适合如何表示,并完成该特征的提取。
+分析一个优质内容的指定特征适合什么样的表示(仅仅语言描述是不够的),并完成该特征的提取。
 提取的特征将用于在生成类似内容时作为参考内容(所以要保留重要信息),也会和其他内容的同一维度的特征放在一起聚类发现规律(所以特征表示要尽量精简、不要过于具体),或用于模型训练。
 
 # 工具

+ 129 - 0
examples/test_skill.py

@@ -0,0 +1,129 @@
+import json
+import subprocess
+import time
+from pathlib import Path
+
+
+def run_cli(session: str, args: list[str]) -> dict:
+    command = ["browser-use", "--session", session, "--json"] + args
+    result = subprocess.run(command, capture_output=True, text=True)
+    if result.returncode != 0:
+        raise RuntimeError(result.stderr.strip() or "browser-use command failed")
+    payload = result.stdout.strip()
+    if not payload:
+        raise RuntimeError("browser-use returned empty output")
+    data = json.loads(payload)
+    if not data.get("success", False):
+        raise RuntimeError(data.get("error", "browser-use command error"))
+    return data.get("data", {})
+
+
+def stop_session_server(session: str) -> None:
+    subprocess.run(
+        ["browser-use", "--session", session, "server", "stop"],
+        capture_output=True,
+        text=True,
+    )
+
+
+def main():
+    project_root = Path(__file__).resolve().parents[1]
+    output_dir = project_root / "output"
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    json_file = output_dir / "skill_baidu.json"
+    html_file = output_dir / "skill_baidu_page.html"
+
+    session = "skill_baidu"
+    keyword = "瑜伽美女"
+
+    try:
+        stop_session_server(session)
+        try:
+            run_cli(session, ["open", "https://www.baidu.com"])
+        except RuntimeError:
+            stop_session_server(session)
+            run_cli(session, ["open", "https://www.baidu.com"])
+
+        search_js = (
+            "(function(){"
+            "const input=document.querySelector('#kw');"
+            "const btn=document.querySelector('#su');"
+            "if(input){input.value='" + keyword + "';}"
+            "if(btn){btn.click();}"
+            "else if(input&&input.form){input.form.submit();}"
+            "return {hasInput:!!input,hasButton:!!btn};"
+            "})()"
+        )
+        run_cli(session, ["eval", search_js])
+
+        wait_js = (
+            "(function(){"
+            "const items=document.querySelectorAll('#content_left .result, #content_left .c-container, #content_left .result-op');"
+            "const bodyReady=!!document.body;"
+            "const bodyLen=bodyReady?(document.body.innerText||'').length:0;"
+            "return {count:items.length, bodyReady:bodyReady, bodyLen:bodyLen};"
+            "})()"
+        )
+
+        count = 0
+        for _ in range(12):
+            data = run_cli(session, ["eval", wait_js])
+            result = data.get("result") if isinstance(data, dict) else {}
+            count = int(result.get("count") or 0)
+            body_len = int(result.get("bodyLen") or 0)
+            if count >= 3 or body_len > 1000:
+                break
+            time.sleep(1)
+
+        extract_js = (
+            "(function(){"
+            "const items=Array.from(document.querySelectorAll('#content_left .result, #content_left .c-container, #content_left .result-op'));"
+            "const results=[];"
+            "for(const item of items){"
+            "const a=item.querySelector('h3 a')||item.querySelector('a[data-click]')||item.querySelector('a');"
+            "if(!a) continue;"
+            "const title=(a.textContent||'').trim();"
+            "const link=a.href||'';"
+            "const summaryEl=item.querySelector('.c-abstract, .content-right_8Zs40, .content-right_8Zs40_2gVt2');"
+            "const summary=(summaryEl?summaryEl.textContent:'').trim();"
+            "results.push({index:results.length+1,title,link,summary});"
+            "if(results.length>=10) break;"
+            "}"
+            "return {success:true,keyword:'" + keyword + "',count:results.length,timestamp:new Date().toISOString(),results:results};"
+            "})()"
+        )
+
+        data = run_cli(session, ["eval", extract_js])
+        extracted = data.get("result") if isinstance(data, dict) else data
+
+        if not extracted:
+            extracted = {
+                "success": False,
+                "keyword": keyword,
+                "count": 0,
+                "timestamp": time.strftime("%Y-%m-%dT%H:%M:%S"),
+                "results": [],
+            }
+
+        with open(json_file, "w", encoding="utf-8") as f:
+            json.dump(extracted, f, ensure_ascii=False, indent=2)
+
+        html_data = run_cli(session, ["eval", "document.documentElement.outerHTML"])
+        html_content = html_data.get("result") if isinstance(html_data, dict) else html_data
+
+        with open(html_file, "w", encoding="utf-8") as f:
+            f.write(html_content or "")
+
+        print(f"✅ 数据已保存到: {json_file}")
+        print(f"✅ HTML 已保存到: {html_file}")
+
+    finally:
+        try:
+            run_cli(session, ["close"])
+        except Exception:
+            pass
+
+
+if __name__ == "__main__":
+    main()

+ 141 - 0
examples/test_tools_baidu.py

@@ -0,0 +1,141 @@
+import asyncio
+import json
+import os
+import sys
+from datetime import datetime
+from pathlib import Path
+from urllib.parse import quote
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from agent.tools.builtin.baseClass import (
+    init_browser_session,
+    navigate_to_url,
+    wait,
+    get_page_html,
+    evaluate,
+    scroll_page,
+    cleanup_browser_session,
+)
+
+
+async def run_task():
+    project_root = Path(__file__).resolve().parents[1]
+    output_dir = project_root / "output"
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    json_file = output_dir / "baidu.json"
+    html_file = output_dir / "baidu_page.html"
+
+    try:
+        await init_browser_session(headless=False, profile_name="baidu_profile")
+
+        await navigate_to_url("https://www.baidu.com")
+        await wait(seconds=2)
+
+        keyword = "Python 教程"
+        search_url = f"https://www.baidu.com/s?wd={quote(keyword)}"
+        await navigate_to_url(search_url)
+        await wait(seconds=3)
+        await scroll_page(down=True, pages=1.0)
+        await wait(seconds=2)
+
+        extract_js = """
+        (function(){
+            try {
+                const results = [];
+                const resultItems = document.querySelectorAll('#content_left > div[class*="result"]');
+                resultItems.forEach((item, index) => {
+                    if (index >= 10) return;
+                    try {
+                        const titleEl = item.querySelector('h3 a, .t a');
+                        const title = titleEl ? titleEl.textContent.trim() : '';
+                        const link = titleEl ? titleEl.href : '';
+                        const summaryEl = item.querySelector('.c-abstract, .content-right_8Zs40');
+                        const summary = summaryEl ? summaryEl.textContent.trim() : '';
+                        const sourceEl = item.querySelector('.c-color-gray, .source_1Vdff');
+                        const source = sourceEl ? sourceEl.textContent.trim() : '';
+                        if (title || link) {
+                            results.push({
+                                index: index + 1,
+                                title: title,
+                                link: link,
+                                summary: summary.substring(0, 200),
+                                source: source
+                            });
+                        }
+                    } catch (e) {
+                    }
+                });
+                return {
+                    success: true,
+                    count: results.length,
+                    keyword: 'Python 教程',
+                    timestamp: new Date().toISOString(),
+                    results: results
+                };
+            } catch (e) {
+                return {
+                    success: false,
+                    error: e.message,
+                    stack: e.stack
+                };
+            }
+        })()
+        """
+
+        result = await evaluate(code=extract_js)
+        output = result.output
+        if output.startswith("Result: "):
+            output = output[8:]
+
+        try:
+            data = json.loads(output)
+        except json.JSONDecodeError:
+            data = {
+                "success": False,
+                "error": "JSON解析失败",
+                "raw_output": output[:1000],
+                "keyword": keyword,
+                "timestamp": datetime.now().isoformat(),
+            }
+
+        with open(json_file, "w", encoding="utf-8") as f:
+            json.dump(data, f, ensure_ascii=False, indent=2)
+
+        html_result = await get_page_html()
+        html_content = html_result.metadata.get("html", "")
+        page_url = html_result.metadata.get("url", "")
+        page_title = html_result.metadata.get("title", "")
+        meta_info = (
+            "\n".join(
+                [
+                    "<!--",
+                    f"    页面标题: {page_title}",
+                    f"    页面URL: {page_url}",
+                    f"    保存时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
+                    f"    搜索关键词: {keyword}",
+                    "-->",
+                    "",
+                ]
+            )
+            + "\n"
+        )
+
+        with open(html_file, "w", encoding="utf-8") as f:
+            f.write(meta_info)
+            f.write(html_content)
+
+        print(f"✅ 数据已保存到: {json_file}")
+        print(f"✅ HTML 已保存到: {html_file}")
+
+    finally:
+        await cleanup_browser_session()
+
+
+def main():
+    asyncio.run(run_task())
+
+
+if __name__ == "__main__":
+    main()

+ 0 - 188
examples/tools_complete_demo.py

@@ -1,188 +0,0 @@
-"""
-完整工具系统使用示例
-
-演示基础工具和高级工具的使用。
-"""
-
-import asyncio
-from agent.tools.builtin import (
-    read_file,
-    edit_file,
-    write_file,
-    bash_command,
-    glob_files,
-    grep_content
-)
-
-
-async def demo_basic_tools():
-    """演示基础工具(Python 实现)"""
-
-    print("=" * 60)
-    print("基础工具演示")
-    print("=" * 60)
-
-    # 1. 读取文件
-    print("\n1. 读取文件")
-    result = await read_file(file_path="README.md", limit=20)
-    print(f"✅ {result.title}")
-    print(f"   前 5 行: {result.output[:200]}...")
-
-    # 2. 搜索文件
-    print("\n2. Glob 搜索")
-    result = await glob_files(pattern="**/*.py", path="agent/tools")
-    print(f"✅ {result.title}")
-    print(f"   找到 {result.metadata['count']} 个文件")
-
-    # 3. 内容搜索
-    print("\n3. Grep 搜索")
-    result = await grep_content(
-        pattern="async def",
-        path="agent/tools/builtin",
-        include="*.py"
-    )
-    print(f"✅ {result.title}")
-    print(f"   找到 {result.metadata['matches']} 个匹配")
-
-    # 4. 执行命令
-    print("\n4. Bash 命令")
-    result = await bash_command(
-        command="git status --short",
-        timeout=10
-    )
-    print(f"✅ {result.title}")
-    print(f"   退出码: {result.metadata['exit_code']}")
-
-    # 5. 编辑文件(演示智能匹配)
-    print("\n5. 智能编辑(9 种策略)")
-
-    # 创建测试文件
-    test_content = """
-def hello():
-    print("Hello")
-
-def world():
-    print("World")
-"""
-    await write_file(file_path="/tmp/test_edit.py", content=test_content)
-
-    # 编辑:忽略缩进(会使用 IndentationFlexibleReplacer)
-    result = await edit_file(
-        file_path="/tmp/test_edit.py",
-        old_string='def hello():\nprint("Hello")',  # 缩进不同
-        new_string='def hello():\n    print("Hello, World!")'
-    )
-    print(f"✅ {result.title}")
-    print(f"   Diff:\n{result.metadata['diff'][:200]}...")
-
-
-async def demo_advanced_tools():
-    """演示高级工具(Bun 适配器)"""
-
-    print("\n" + "=" * 60)
-    print("高级工具演示(需要 Bun)")
-    print("=" * 60)
-
-    try:
-        from agent.tools.advanced import webfetch, lsp_diagnostics
-
-        # 1. 网页抓取
-        print("\n1. 网页抓取 (HTML -> Markdown)")
-        result = await webfetch(
-            url="https://example.com",
-            format="markdown"
-        )
-        print(f"✅ {result.title}")
-        print(f"   内容长度: {len(result.output)} 字符")
-
-        # 2. LSP 诊断
-        print("\n2. LSP 诊断")
-        result = await lsp_diagnostics(
-            file_path="agent/tools/builtin/edit.py"
-        )
-        print(f"✅ {result.title}")
-        print(f"   诊断结果: {result.output[:200]}...")
-
-    except Exception as e:
-        print(f"⚠️  高级工具需要 Bun 运行时: {e}")
-        print("   安装: curl -fsSL https://bun.sh/install | bash")
-
-
-async def demo_edit_strategies():
-    """演示 edit_file 的 9 种匹配策略"""
-
-    print("\n" + "=" * 60)
-    print("edit_file 策略演示")
-    print("=" * 60)
-
-    test_cases = [
-        {
-            "name": "策略 1: 精确匹配",
-            "content": "DEBUG = True\nVERBOSE = False",
-            "old": "DEBUG = True",
-            "new": "DEBUG = False"
-        },
-        {
-            "name": "策略 2: 忽略行首尾空白",
-            "content": "  DEBUG = True  \nVERBOSE = False",
-            "old": "DEBUG = True",  # 无空白
-            "new": "DEBUG = False"
-        },
-        {
-            "name": "策略 4: 空白归一化",
-            "content": "DEBUG  =   True",
-            "old": "DEBUG = True",  # 单空格
-            "new": "DEBUG = False"
-        },
-        {
-            "name": "策略 5: 灵活缩进",
-            "content": """
-def foo():
-    if True:
-        print("hello")
-""",
-            "old": "if True:\nprint(\"hello\")",  # 无缩进
-            "new": "if True:\n    print(\"world\")"
-        }
-    ]
-
-    for i, test in enumerate(test_cases, 1):
-        print(f"\n{i}. {test['name']}")
-
-        # 创建测试文件
-        test_file = f"/tmp/test_strategy_{i}.py"
-        await write_file(file_path=test_file, content=test["content"])
-
-        # 执行编辑
-        try:
-            result = await edit_file(
-                file_path=test_file,
-                old_string=test["old"],
-                new_string=test["new"]
-            )
-            print(f"   ✅ 成功匹配")
-        except Exception as e:
-            print(f"   ❌ 失败: {e}")
-
-
-async def main():
-    """运行所有演示"""
-
-    print("\n🚀 工具系统完整演示\n")
-
-    # 基础工具
-    await demo_basic_tools()
-
-    # 编辑策略
-    await demo_edit_strategies()
-
-    # 高级工具
-    await demo_advanced_tools()
-
-    print("\n" + "=" * 60)
-    print("演示完成!")
-    print("=" * 60)
-
-
-if __name__ == "__main__":
-    asyncio.run(main())

+ 0 - 581
examples/tools_examples.py

@@ -1,581 +0,0 @@
-"""
-工具系统完整示例
-
-本文件展示 @tool 装饰器的所有用法,包括:
-
-## 基础功能
-1. 最简形式
-2. 带 i18n 展示信息
-3. 带可编辑参数
-4. 需要用户确认
-5. 带 context 参数
-6. 同步工具
-7. 复杂返回类型
-
-## 高级功能
-8. 域名过滤(URL Patterns)
-9. 敏感数据处理(<secret> 占位符 + TOTP)
-10. 工具使用统计
-11. 组合所有功能
-
-注意:
-- uid 参数会由框架自动注入,不需要用户传递
-- context 参数用于传递额外上下文(如浏览器会话、当前 URL 等)
-- 返回值可以是字符串、字典或 ToolResult
-"""
-
-import asyncio
-import json
-from typing import List, Dict, Any, Optional
-from agent import tool, ToolResult, ToolContext, get_tool_registry
-
-
-# ============================================================
-# 基础功能示例
-# ============================================================
-
-# 1. 最简形式
-@tool()
-async def hello_world(name: str, uid: str = "") -> Dict[str, str]:
-	"""
-	最简单的工具示例
-
-	Args:
-		name: 要问候的名字
-		uid: 用户ID(自动注入)
-
-	Returns:
-		包含问候语的字典
-	"""
-	return {"greeting": f"Hello, {name}!"}
-
-
-# 2. 带 i18n 展示信息的工具
-@tool(
-	display={
-		"zh": {
-			"name": "搜索内容",
-			"params": {
-				"query": "搜索关键词",
-				"limit": "返回数量"
-			}
-		},
-		"en": {
-			"name": "Search Content",
-			"params": {
-				"query": "Search query",
-				"limit": "Number of results"
-			}
-		}
-	}
-)
-async def search_content(
-	query: str,
-	limit: int = 10,
-	uid: str = ""
-) -> List[Dict[str, Any]]:
-	"""
-	搜索用户的内容
-
-	使用语义搜索查找相关内容。display 参数用于前端展示:
-	- 工具名称会根据用户语言显示为"搜索内容"或"Search Content"
-	- 参数名称也会相应翻译
-
-	Args:
-		query: 搜索查询文本
-		limit: 返回结果数量(默认10)
-		uid: 用户ID(自动注入)
-
-	Returns:
-		搜索结果列表,每个包含 id, title, content, score
-	"""
-	# 实际实现中会调用向量搜索
-	return [
-		{
-			"id": "doc_001",
-			"title": f"关于 {query} 的文档",
-			"content": f"这是与 {query} 相关的内容...",
-			"score": 0.95
-		}
-	]
-
-
-# 3. 带可编辑参数的工具
-@tool(
-	editable_params=["query", "filters"],
-	display={
-		"zh": {
-			"name": "高级搜索",
-			"params": {
-				"query": "搜索关键词",
-				"filters": "过滤条件",
-				"sort_by": "排序方式"
-			}
-		}
-	}
-)
-async def advanced_search(
-	query: str,
-	filters: Optional[Dict[str, Any]] = None,
-	sort_by: str = "relevance",
-	limit: int = 20,
-	uid: str = ""
-) -> Dict[str, Any]:
-	"""
-	高级搜索工具(允许用户编辑参数)
-
-	editable_params 指定哪些参数允许用户在 LLM 生成后编辑:
-	- LLM 会先生成 query 和 filters
-	- 用户可以在确认前修改这些参数
-	- 适用于搜索、创建等需要用户微调的场景
-
-	Args:
-		query: 搜索查询
-		filters: 过滤条件(如 {"type": "note", "date_range": "7d"})
-		sort_by: 排序方式(relevance/date/title)
-		limit: 返回数量
-		uid: 用户ID(自动注入)
-
-	Returns:
-		搜索结果和元数据
-	"""
-	return {
-		"results": [
-			{"id": "1", "title": "Result 1", "score": 0.9},
-			{"id": "2", "title": "Result 2", "score": 0.8},
-		],
-		"total": 42,
-		"query": query,
-		"filters_applied": filters or {},
-		"sort_by": sort_by
-	}
-
-
-# 4. 需要用户确认的危险操作
-@tool(
-	requires_confirmation=True,
-	display={
-		"zh": {
-			"name": "删除内容",
-			"params": {
-				"content_id": "内容ID",
-				"permanent": "永久删除"
-			}
-		}
-	}
-)
-async def delete_content(
-	content_id: str,
-	permanent: bool = False,
-	uid: str = ""
-) -> Dict[str, Any]:
-	"""
-	删除内容(需要用户确认)
-
-	requires_confirmation=True 表示这是一个危险操作:
-	- LLM 调用此工具时,不会立即执行
-	- 会先向用户展示操作详情,等待确认
-	- 用户确认后才会真正执行
-
-	适用场景:删除操作、发送消息、修改重要设置、任何不可逆操作
-
-	Args:
-		content_id: 要删除的内容ID
-		permanent: 是否永久删除(False=移到回收站)
-		uid: 用户ID(自动注入)
-
-	Returns:
-		删除结果
-	"""
-	return {
-		"success": True,
-		"content_id": content_id,
-		"permanent": permanent,
-		"message": f"内容 {content_id} 已{'永久删除' if permanent else '移到回收站'}"
-	}
-
-
-# 5. 带 context 参数的工具
-@tool(
-	display={
-		"zh": {"name": "获取相关推荐", "params": {"top_k": "推荐数量"}}
-	}
-)
-async def get_recommendations(
-	top_k: int = 5,
-	uid: str = "",
-	context: Optional[Dict[str, Any]] = None
-) -> List[Dict[str, Any]]:
-	"""
-	获取相关推荐(使用 context 获取额外信息)
-
-	context 参数用于传递执行上下文,由框架自动注入:
-	- 当前阅读位置 (current_location)
-	- 当前会话 ID (session_id)
-	- 排除的内容 ID (exclude_ids)
-
-	Args:
-		top_k: 返回推荐数量
-		uid: 用户ID(自动注入)
-		context: 执行上下文(自动注入)
-
-	Returns:
-		推荐列表
-	"""
-	current_location = None
-	if context:
-		current_location = context.get("current_location")
-
-	return [
-		{
-			"id": "rec_001",
-			"title": "推荐内容 1",
-			"reason": f"基于当前位置 {current_location}" if current_location else "基于您的兴趣"
-		}
-	]
-
-
-# 6. 同步工具(非 async)
-@tool()
-def format_text(
-	text: str,
-	format_type: str = "markdown",
-	uid: str = ""
-) -> str:
-	"""
-	格式化文本(同步工具)
-
-	不需要 async 的工具可以定义为普通函数。
-	框架会自动检测并正确调用。
-
-	适用于:纯计算操作、文本处理、不需要 I/O 的操作
-
-	Args:
-		text: 要格式化的文本
-		format_type: 格式类型(markdown/plain/html)
-		uid: 用户ID(自动注入)
-
-	Returns:
-		格式化后的文本
-	"""
-	if format_type == "markdown":
-		return f"**{text}**"
-	elif format_type == "html":
-		return f"<p>{text}</p>"
-	else:
-		return text
-
-
-# 7. 使用 ToolResult 的工具
-@tool()
-async def analyze_content(
-	content_id: str,
-	analysis_types: Optional[List[str]] = None,
-	uid: str = ""
-) -> ToolResult:
-	"""
-	分析内容(使用 ToolResult)
-
-	ToolResult 支持双层记忆管理:
-	- output: 完整结果(可能很长)
-	- long_term_memory: 简短摘要(永久保存)
-
-	Args:
-		content_id: 要分析的内容ID
-		analysis_types: 分析类型列表(sentiment/keywords/summary)
-		uid: 用户ID(自动注入)
-
-	Returns:
-		ToolResult 包含分析结果
-	"""
-	types = analysis_types or ["sentiment", "keywords"]
-
-	result = {
-		"content_id": content_id,
-		"analyses": {}
-	}
-
-	if "sentiment" in types:
-		result["analyses"]["sentiment"] = {
-			"score": 0.8,
-			"label": "positive",
-			"confidence": 0.92
-		}
-
-	if "keywords" in types:
-		result["analyses"]["keywords"] = [
-			{"word": "AI", "weight": 0.9},
-			{"word": "学习", "weight": 0.7}
-		]
-
-	return ToolResult(
-		title=f"Analysis of {content_id}",
-		output=json.dumps(result, indent=2, ensure_ascii=False),
-		long_term_memory=f"Analyzed {content_id}: {', '.join(types)}",
-		metadata={"types": types}
-	)
-
-
-# ============================================================
-# 高级功能示例
-# ============================================================
-
-# 8. 域名过滤示例
-@tool(url_patterns=["*.google.com", "www.google.*"])
-async def google_search(query: str, uid: str = "") -> ToolResult:
-	"""
-	Google 搜索(仅在 Google 页面可用)
-
-	使用 url_patterns 限制工具只在特定域名显示。
-	在 Google 页面时,此工具会出现在可用工具列表中。
-	在其他页面时,此工具会被过滤掉。
-
-	Args:
-		query: 搜索查询
-		uid: 用户ID(自动注入)
-
-	Returns:
-		搜索结果
-	"""
-	return ToolResult(
-		title="Google Search",
-		output=f"Searching Google for: {query}",
-		long_term_memory=f"Searched Google for '{query}'"
-	)
-
-
-@tool(url_patterns=["*.github.com"])
-async def create_github_issue(
-	title: str,
-	body: str,
-	uid: str = ""
-) -> ToolResult:
-	"""
-	创建 GitHub Issue(仅在 GitHub 页面可用)
-
-	Args:
-		title: Issue 标题
-		body: Issue 内容
-		uid: 用户ID(自动注入)
-
-	Returns:
-		创建结果
-	"""
-	return ToolResult(
-		title="Issue Created",
-		output=f"Created issue: {title}",
-		long_term_memory=f"Created GitHub issue: {title}"
-	)
-
-
-@tool()  # 无 url_patterns,所有页面都可用
-async def take_screenshot(uid: str = "") -> ToolResult:
-	"""截图(所有页面都可用)"""
-	return ToolResult(
-		title="Screenshot",
-		output="Screenshot taken",
-		attachments=["screenshot_001.png"]
-	)
-
-
-# 9. 敏感数据处理示例
-@tool(url_patterns=["*.github.com"])
-async def github_login(
-	username: str,
-	password: str,
-	totp_code: str,
-	uid: str = ""
-) -> ToolResult:
-	"""
-	GitHub 登录(支持敏感数据占位符)
-
-	LLM 会输出类似:
-	{
-		"username": "user@example.com",
-		"password": "<secret>github_password</secret>",
-		"totp_code": "<secret>github_2fa_bu_2fa_code</secret>"
-	}
-
-	执行时会自动替换为实际值。
-
-	Args:
-		username: 用户名
-		password: 密码(可以是占位符)
-		totp_code: TOTP 验证码(可以是占位符,自动生成)
-		uid: 用户ID(自动注入)
-
-	Returns:
-		登录结果
-	"""
-	# 注意:password 和 totp_code 在到达这里时已经被替换
-	return ToolResult(
-		title="Login Successful",
-		output=f"Logged in as {username}",
-		long_term_memory=f"Logged in to GitHub as {username}"
-	)
-
-
-# 10. 组合所有功能
-@tool(
-	url_patterns=["*.example.com"],
-	requires_confirmation=True,
-	editable_params=["message"],
-	display={
-		"zh": {
-			"name": "发送认证消息",
-			"params": {
-				"recipient": "接收者",
-				"message": "消息内容",
-				"api_key": "API密钥"
-			}
-		}
-	}
-)
-async def send_authenticated_message(
-	recipient: str,
-	message: str,
-	api_key: str,
-	ctx: ToolContext,
-	uid: str = ""
-) -> ToolResult:
-	"""
-	发送消息(组合多个功能)
-
-	展示所有高级功能:
-	- 仅在 example.com 可用(域名过滤)
-	- 需要用户确认(危险操作)
-	- 消息可编辑(用户微调)
-	- API key 使用敏感数据占位符
-	- 使用 ToolContext 获取上下文
-
-	Args:
-		recipient: 接收者
-		message: 消息内容
-		api_key: API密钥(可以是占位符)
-		ctx: 工具上下文
-		uid: 用户ID(自动注入)
-
-	Returns:
-		发送结果
-	"""
-	# api_key 会从 <secret>api_key</secret> 替换为实际值
-	# ctx 包含 page_url, browser_session 等信息
-
-	return ToolResult(
-		title="Message Sent",
-		output=f"Sent to {recipient}: {message}",
-		long_term_memory=f"Sent message to {recipient} on {ctx.page_url}",
-		metadata={"recipient": recipient}
-	)
-
-
-# ============================================================
-# 使用示例
-# ============================================================
-
-async def main():
-	registry = get_tool_registry()
-
-	print("=" * 60)
-	print("工具系统完整示例")
-	print("=" * 60)
-
-	# ============================================================
-	# 示例 1:基础工具调用
-	# ============================================================
-	print("\n1. 基础工具调用")
-	print("-" * 60)
-
-	result = await registry.execute("hello_world", {"name": "Alice"})
-	print(f"hello_world: {result}")
-
-	result = await registry.execute("search_content", {"query": "Python", "limit": 5})
-	print(f"search_content: {result}")
-
-	# ============================================================
-	# 示例 2:域名过滤
-	# ============================================================
-	print("\n\n2. 域名过滤示例")
-	print("-" * 60)
-
-	# 在 Google 页面
-	google_url = "https://www.google.com/search?q=test"
-	google_tools = registry.get_tool_names(google_url)
-	print(f"在 {google_url} 可用的工具:")
-	print(f"  包含 google_search: {'google_search' in google_tools}")
-
-	# 在 GitHub 页面
-	github_url = "https://github.com/user/repo"
-	github_tools = registry.get_tool_names(github_url)
-	print(f"\n在 {github_url} 可用的工具:")
-	print(f"  包含 create_github_issue: {'create_github_issue' in github_tools}")
-	print(f"  包含 google_search: {'google_search' in github_tools}")
-
-	# ============================================================
-	# 示例 3:敏感数据处理
-	# ============================================================
-	print("\n\n3. 敏感数据处理示例")
-	print("-" * 60)
-
-	# 配置敏感数据
-	sensitive_data = {
-		"*.github.com": {
-			"github_password": "my_secret_password",
-			"github_2fa_bu_2fa_code": "JBSWY3DPEHPK3PXP"  # TOTP secret
-		}
-	}
-
-	# 模拟 LLM 输出(包含占位符)
-	llm_output_args = {
-		"username": "user@example.com",
-		"password": "<secret>github_password</secret>",
-		"totp_code": "<secret>github_2fa_bu_2fa_code</secret>"
-	}
-
-	print("LLM 输出的参数(包含占位符):")
-	print(f"  {llm_output_args}")
-
-	# 执行工具(自动替换敏感数据)
-	result = await registry.execute(
-		"github_login",
-		llm_output_args,
-		context={"page_url": "https://github.com/login"},
-		sensitive_data=sensitive_data
-	)
-
-	print(f"\n执行结果(密码已替换):")
-	print(f"  {result}")
-
-	# ============================================================
-	# 示例 4:工具统计
-	# ============================================================
-	print("\n\n4. 工具统计示例")
-	print("-" * 60)
-
-	# 模拟多次调用
-	for i in range(5):
-		await registry.execute("google_search", {"query": f"test {i}"})
-
-	await registry.execute("take_screenshot", {})
-	await registry.execute("take_screenshot", {})
-
-	# 查看统计
-	stats = registry.get_stats()
-	print("工具使用统计:")
-	for tool_name, tool_stats in stats.items():
-		if tool_stats["call_count"] > 0:
-			print(f"\n  {tool_name}:")
-			print(f"    调用次数: {tool_stats['call_count']}")
-			print(f"    成功率: {tool_stats['success_rate']:.1%}")
-			print(f"    平均执行时间: {tool_stats['average_duration']:.3f}s")
-
-	# 获取 Top 工具
-	print("\n\nTop 3 最常用工具:")
-	top_tools = registry.get_top_tools(limit=3, by="call_count")
-	for i, tool_name in enumerate(top_tools, 1):
-		tool_stats = stats[tool_name]
-		print(f"  {i}. {tool_name} ({tool_stats['call_count']} 次调用)")
-
-
-if __name__ == "__main__":
-	asyncio.run(main())

+ 5 - 0
requirements.txt

@@ -6,3 +6,8 @@ python-dotenv>=1.0.0
 # 推荐安装方式: uv add browser-use && uv sync
 # 或使用: pip install browser-use
 browser-use>=0.11.0
+
+# API Server (Step Tree Visualization)
+fastapi>=0.115.0
+uvicorn[standard]>=0.32.0
+websockets>=13.0

+ 15 - 12
tests/test_runner.py

@@ -13,7 +13,7 @@ from agent import (
     tool,
     get_tool_registry,
 )
-from reson_agent.storage import MemoryTraceStore, MemoryMemoryStore
+from agent.storage import MemoryTraceStore, MemoryMemoryStore
 
 
 # 测试工具
@@ -21,7 +21,7 @@ from reson_agent.storage import MemoryTraceStore, MemoryMemoryStore
     editable_params=["query"],
     display={"zh": {"name": "测试搜索", "params": {"query": "关键词"}}}
 )
-async def test_search(query: str, limit: int = 10, uid: str = "") -> dict:
+async def search_tool(query: str, limit: int = 10, uid: str = "") -> dict:
     """测试搜索工具"""
     return {"results": [f"结果: {query}"], "count": 1}
 
@@ -43,7 +43,7 @@ async def mock_llm_call(
             "tool_calls": [{
                 "id": "call_123",
                 "function": {
-                    "name": "test_search",
+                    "name": "search_tool",
                     "arguments": '{"query": "测试查询"}'
                 }
             }],
@@ -74,14 +74,17 @@ class TestTraceAndStep:
     def test_step_create(self):
         step = Step.create(
             trace_id="trace_123",
-            step_type="llm_call",
+            step_type="thought",
             sequence=0,
-            data={"response": "hello"}
+            status="completed",
+            description="测试步骤",
+            data={"content": "hello"}
         )
         assert step.step_id is not None
         assert step.trace_id == "trace_123"
-        assert step.step_type == "llm_call"
-        assert step.data["response"] == "hello"
+        assert step.step_type == "thought"
+        assert step.status == "completed"
+        assert step.data["content"] == "hello"
 
 
 class TestMemoryStore:
@@ -136,18 +139,18 @@ class TestToolRegistry:
 
     def test_tool_registered(self):
         registry = get_tool_registry()
-        assert registry.is_registered("test_search")
+        assert registry.is_registered("search_tool")
 
     def test_get_schemas(self):
         registry = get_tool_registry()
-        schemas = registry.get_schemas(["test_search"])
+        schemas = registry.get_schemas(["search_tool"])
         assert len(schemas) == 1
-        assert schemas[0]["function"]["name"] == "test_search"
+        assert schemas[0]["function"]["name"] == "search_tool"
 
     @pytest.mark.asyncio
     async def test_execute_tool(self):
         registry = get_tool_registry()
-        result = await registry.execute("test_search", {"query": "hello"}, uid="test")
+        result = await registry.execute("search_tool", {"query": "hello"}, uid="test")
         assert "结果" in result
 
 
@@ -202,7 +205,7 @@ class TestAgentRunner:
         events = []
         async for event in runner.run(
             task="请搜索相关内容",
-            tools=["test_search"],
+            tools=["search_tool"],
             agent_type="test"
         ):
             events.append(event)

+ 0 - 0
tools/__init__.py