Просмотр исходного кода

Merge branch 'main' of https://git.yishihui.com/howard/Agent

max_liu 1 месяц назад
Родитель
Сommit
ee727bea75

+ 101 - 24
agent/core/runner.py

@@ -28,15 +28,54 @@ logger = logging.getLogger(__name__)
 
 # 内置工具列表(始终自动加载)
 BUILTIN_TOOLS = [
+    # 文件操作工具
     "read_file",
     "edit_file",
     "write_file",
     "glob_files",
     "grep_content",
+
+    # 系统工具
     "bash_command",
+
+    # 技能和目标管理
     "skill",
     "list_skills",
     "goal",
+
+    # 搜索工具
+    "search_posts",
+    "get_search_suggestions",
+
+    # 沙箱工具
+    "sandbox_create_environment",
+    "sandbox_run_shell",
+    "sandbox_rebuild_with_ports",
+    "sandbox_destroy_environment",
+
+    # 浏览器工具
+    "browser_navigate_to_url",
+    "browser_search_web",
+    "browser_go_back",
+    "browser_wait",
+    "browser_click_element",
+    "browser_input_text",
+    "browser_send_keys",
+    "browser_upload_file",
+    "browser_scroll_page",
+    "browser_find_text",
+    "browser_screenshot",
+    "browser_switch_tab",
+    "browser_close_tab",
+    "browser_get_dropdown_options",
+    "browser_select_dropdown_option",
+    "browser_extract_content",
+    "browser_get_page_html",
+    "browser_get_selector_map",
+    "browser_evaluate",
+    "browser_ensure_login_with_cookies",
+    "browser_wait_for_user_action",
+    "browser_done",
 ]
 
 
@@ -121,15 +160,6 @@ class AgentRunner:
         trace_id = None
         message_id = None
 
-        # 创建 Trace
-        if trace and self.trace_store:
-            trace_obj = Trace.create(
-                mode="call",
-                uid=uid,
-                context={"model": model}
-            )
-            trace_id = await self.trace_store.create_trace(trace_obj)
-
         # 准备工具 Schema
         tool_names = BUILTIN_TOOLS.copy()
         if tools:
@@ -139,6 +169,17 @@ class AgentRunner:
 
         tool_schemas = self.tools.get_schemas(tool_names)
 
+        # 创建 Trace
+        if trace and self.trace_store:
+            trace_obj = Trace.create(
+                mode="call",
+                uid=uid,
+                model=model,
+                tools=tool_schemas,  # 保存工具定义
+                llm_params=kwargs,  # 保存 LLM 参数
+            )
+            trace_id = await self.trace_store.create_trace(trace_obj)
+
         # 调用 LLM
         result = await self.llm_call(
             messages=messages,
@@ -155,7 +196,9 @@ class AgentRunner:
                 sequence=1,
                 goal_id=None,  # 单次调用没有 goal
                 content={"text": result.get("content", ""), "tool_calls": result.get("tool_calls")},
-                tokens=result.get("prompt_tokens", 0) + result.get("completion_tokens", 0),
+                prompt_tokens=result.get("prompt_tokens", 0),
+                completion_tokens=result.get("completion_tokens", 0),
+                finish_reason=result.get("finish_reason"),
                 cost=result.get("cost", 0),
             )
             message_id = await self.trace_store.add_message(msg)
@@ -223,6 +266,14 @@ class AgentRunner:
         enable_memory = enable_memory if enable_memory is not None else self.config.enable_memory
         auto_execute_tools = auto_execute_tools if auto_execute_tools is not None else self.config.auto_execute_tools
 
+        # 准备工具 Schema(提前准备,用于 Trace)
+        tool_names = BUILTIN_TOOLS.copy()
+        if tools:
+            for tool in tools:
+                if tool not in tool_names:
+                    tool_names.append(tool)
+        tool_schemas = self.tools.get_schemas(tool_names)
+
         # 创建 Trace
         trace_id = self._generate_id()
         trace_obj = Trace(
@@ -231,7 +282,9 @@ class AgentRunner:
             task=task,
             agent_type=agent_type,
             uid=uid,
-            context={"model": model, **kwargs},
+            model=model,
+            tools=tool_schemas,  # 保存工具定义
+            llm_params=kwargs,  # 保存 LLM 参数
             status="running"
         )
 
@@ -269,6 +322,9 @@ class AgentRunner:
             if messages is None:
                 messages = []
 
+            # 记录初始 system 和 user 消息到 trace
+            sequence = 1
+
             if system_prompt:
                 # 注入记忆和 skills 到 system prompt
                 full_system = system_prompt
@@ -279,9 +335,35 @@ class AgentRunner:
 
                 messages = [{"role": "system", "content": full_system}] + messages
 
+                # 保存 system 消息
+                if self.trace_store:
+                    system_msg = Message.create(
+                        trace_id=trace_id,
+                        role="system",
+                        sequence=sequence,
+                        goal_id=None,  # 初始消息没有 goal
+                        content=full_system,
+                    )
+                    await self.trace_store.add_message(system_msg)
+                    yield system_msg
+                    sequence += 1
+
             # 添加任务描述
             messages.append({"role": "user", "content": task})
 
+            # 保存 user 消息(任务描述)
+            if self.trace_store:
+                user_msg = Message.create(
+                    trace_id=trace_id,
+                    role="user",
+                    sequence=sequence,
+                    goal_id=None,  # 初始消息没有 goal
+                    content=task,
+                )
+                await self.trace_store.add_message(user_msg)
+                yield user_msg
+                sequence += 1
+
             # 获取 GoalTree
             goal_tree = None
             if self.trace_store:
@@ -291,18 +373,7 @@ class AgentRunner:
                 from agent.tools.builtin.goal import set_goal_tree
                 set_goal_tree(goal_tree)
 
-            # 准备工具 Schema
-            tool_names = BUILTIN_TOOLS.copy()
-            if tools:
-                for tool in tools:
-                    if tool not in tool_names:
-                        tool_names.append(tool)
-
-            tool_schemas = self.tools.get_schemas(tool_names)
-
             # 执行循环
-            sequence = 1
-
             for iteration in range(max_iterations):
                 # 注入当前计划到 messages(如果有 goals)
                 llm_messages = list(messages)
@@ -321,7 +392,10 @@ class AgentRunner:
 
                 response_content = result.get("content", "")
                 tool_calls = result.get("tool_calls")
-                step_tokens = result.get("prompt_tokens", 0) + result.get("completion_tokens", 0)
+                finish_reason = result.get("finish_reason")
+                prompt_tokens = result.get("prompt_tokens", 0)
+                completion_tokens = result.get("completion_tokens", 0)
+                step_tokens = prompt_tokens + completion_tokens
                 step_cost = result.get("cost", 0)
 
                 # 获取当前 goal_id
@@ -334,7 +408,9 @@ class AgentRunner:
                     sequence=sequence,
                     goal_id=current_goal_id,
                     content={"text": response_content, "tool_calls": tool_calls},
-                    tokens=step_tokens,
+                    prompt_tokens=prompt_tokens,
+                    completion_tokens=completion_tokens,
+                    finish_reason=finish_reason,
                     cost=step_cost,
                 )
 
@@ -429,6 +505,7 @@ class AgentRunner:
                 await self.trace_store.update_trace(
                     trace_id,
                     status="failed",
+                    error_message=str(e),
                     completed_at=datetime.now()
                 )
                 trace_obj = await self.trace_store.get_trace(trace_id)

+ 13 - 6
agent/execution/fs_store.py

@@ -316,8 +316,17 @@ class FileSystemTraceStore:
             trace.total_messages += 1
             trace.last_sequence = max(trace.last_sequence, message.sequence)
 
+            # 累计 tokens(拆分)
+            if message.prompt_tokens:
+                trace.total_prompt_tokens += message.prompt_tokens
+            if message.completion_tokens:
+                trace.total_completion_tokens += message.completion_tokens
+            # 向后兼容:也更新 total_tokens
             if message.tokens:
                 trace.total_tokens += message.tokens
+            elif message.prompt_tokens or message.completion_tokens:
+                trace.total_tokens += (message.prompt_tokens or 0) + (message.completion_tokens or 0)
+
             if message.cost:
                 trace.total_cost += message.cost
             if message.duration_ms:
@@ -329,6 +338,8 @@ class FileSystemTraceStore:
                 total_messages=trace.total_messages,
                 last_sequence=trace.last_sequence,
                 total_tokens=trace.total_tokens,
+                total_prompt_tokens=trace.total_prompt_tokens,
+                total_completion_tokens=trace.total_completion_tokens,
                 total_cost=trace.total_cost,
                 total_duration_ms=trace.total_duration_ms
             )
@@ -439,9 +450,7 @@ class FileSystemTraceStore:
             if message_file.exists():
                 try:
                     data = json.loads(message_file.read_text())
-                    if data.get("created_at"):
-                        data["created_at"] = datetime.fromisoformat(data["created_at"])
-                    return Message(**data)
+                    return Message.from_dict(data)
                 except Exception:
                     pass
 
@@ -461,9 +470,7 @@ class FileSystemTraceStore:
         for message_file in messages_dir.glob("*.json"):
             try:
                 data = json.loads(message_file.read_text())
-                if data.get("created_at"):
-                    data["created_at"] = datetime.fromisoformat(data["created_at"])
-                messages.append(Message(**data))
+                messages.append(Message.from_dict(data))
             except Exception:
                 continue
 

+ 72 - 12
agent/execution/models.py

@@ -41,21 +41,30 @@ class Trace:
 
     # 统计
     total_messages: int = 0      # 消息总数(改名自 total_steps)
-    total_tokens: int = 0
+    total_tokens: int = 0        # 总 tokens(向后兼容,= prompt + completion)
+    total_prompt_tokens: int = 0      # 总输入 tokens
+    total_completion_tokens: int = 0  # 总输出 tokens
     total_cost: float = 0.0
-    total_duration_ms: int = 0  # 总耗时(毫秒)
+    total_duration_ms: int = 0   # 总耗时(毫秒)
 
     # 进度追踪(head)
     last_sequence: int = 0      # 最新 message 的 sequence
     last_event_id: int = 0      # 最新事件 ID(用于 WS 续传)
 
-    # 上下文
+    # 配置
     uid: Optional[str] = None
-    context: Dict[str, Any] = field(default_factory=dict)
+    model: Optional[str] = None              # 默认模型
+    tools: Optional[List[Dict]] = None       # 工具定义(整个 trace 共享)
+    llm_params: Dict[str, Any] = field(default_factory=dict)  # LLM 参数(temperature 等)
+    context: Dict[str, Any] = field(default_factory=dict)     # 其他元数据
 
     # 当前焦点 goal
     current_goal_id: Optional[str] = None
 
+    # 结果
+    result_summary: Optional[str] = None     # 执行结果摘要
+    error_message: Optional[str] = None      # 错误信息
+
     # 时间
     created_at: datetime = field(default_factory=datetime.now)
     completed_at: Optional[datetime] = None
@@ -86,13 +95,20 @@ class Trace:
             "status": self.status,
             "total_messages": self.total_messages,
             "total_tokens": self.total_tokens,
+            "total_prompt_tokens": self.total_prompt_tokens,
+            "total_completion_tokens": self.total_completion_tokens,
             "total_cost": self.total_cost,
             "total_duration_ms": self.total_duration_ms,
             "last_sequence": self.last_sequence,
             "last_event_id": self.last_event_id,
             "uid": self.uid,
+            "model": self.model,
+            "tools": self.tools,
+            "llm_params": self.llm_params,
             "context": self.context,
             "current_goal_id": self.current_goal_id,
+            "result_summary": self.result_summary,
+            "error_message": self.error_message,
             "created_at": self.created_at.isoformat() if self.created_at else None,
             "completed_at": self.completed_at.isoformat() if self.completed_at else None,
         }
@@ -103,15 +119,17 @@ class Message:
     """
     执行消息 - Trace 中的 LLM 消息
 
-    对应 LLM API 消息格式(assistant/tool),通过 goal_id 关联 Goal。
+    对应 LLM API 消息格式(system/user/assistant/tool),通过 goal_id 关联 Goal。
 
     description 字段自动生成规则:
+    - system: 取 content 前 200 字符
+    - user: 取 content 前 200 字符
     - assistant: 优先取 content,若无 content 则生成 "tool call: XX, XX"
     - tool: 使用 tool name
     """
     message_id: str
     trace_id: str
-    role: Literal["assistant", "tool"]   # 和 LLM API 一致
+    role: Literal["system", "user", "assistant", "tool"]   # 和 LLM API 一致
     sequence: int                        # 全局顺序
     goal_id: Optional[str] = None        # 关联的 Goal 内部 ID(None = 还没有创建 Goal)
     description: str = ""                # 消息描述(系统自动生成)
@@ -119,23 +137,46 @@ class Message:
     content: Any = None                  # 消息内容(和 LLM API 格式一致)
 
     # 元数据
-    tokens: Optional[int] = None
+    prompt_tokens: Optional[int] = None  # 输入 tokens
+    completion_tokens: Optional[int] = None  # 输出 tokens
     cost: Optional[float] = None
     duration_ms: Optional[int] = None
     created_at: datetime = field(default_factory=datetime.now)
 
+    # LLM 响应信息(仅 role="assistant" 时使用)
+    finish_reason: Optional[str] = None  # stop, length, tool_calls, content_filter 等
+
+    @property
+    def tokens(self) -> int:
+        """动态计算总 tokens(向后兼容)"""
+        return (self.prompt_tokens or 0) + (self.completion_tokens or 0)
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "Message":
+        """从字典创建 Message(处理向后兼容)"""
+        # 过滤掉已删除的字段
+        filtered_data = {k: v for k, v in data.items() if k not in ["tokens", "available_tools"]}
+
+        # 解析 datetime
+        if filtered_data.get("created_at") and isinstance(filtered_data["created_at"], str):
+            filtered_data["created_at"] = datetime.fromisoformat(filtered_data["created_at"])
+
+        return cls(**filtered_data)
+
     @classmethod
     def create(
         cls,
         trace_id: str,
-        role: Literal["assistant", "tool"],
+        role: Literal["system", "user", "assistant", "tool"],
         sequence: int,
         goal_id: Optional[str] = None,
         content: Any = None,
         tool_call_id: Optional[str] = None,
-        tokens: Optional[int] = None,
+        prompt_tokens: Optional[int] = None,
+        completion_tokens: Optional[int] = None,
         cost: Optional[float] = None,
         duration_ms: Optional[int] = None,
+        finish_reason: Optional[str] = None,
     ) -> "Message":
         """创建新的 Message,自动生成 description"""
         description = cls._generate_description(role, content)
@@ -149,9 +190,11 @@ class Message:
             content=content,
             description=description,
             tool_call_id=tool_call_id,
-            tokens=tokens,
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
             cost=cost,
             duration_ms=duration_ms,
+            finish_reason=finish_reason,
         )
 
     @staticmethod
@@ -159,10 +202,24 @@ class Message:
         """
         自动生成 description
 
+        - system: 取 content 前 200 字符
+        - user: 取 content 前 200 字符
         - assistant: 优先取 content,若无 content 则生成 "tool call: XX, XX"
         - tool: 使用 tool name
         """
-        if role == "assistant":
+        if role == "system":
+            # system 消息:直接截取文本
+            if isinstance(content, str):
+                return content[:200] + "..." if len(content) > 200 else content
+            return "system prompt"
+
+        elif role == "user":
+            # user 消息:直接截取文本
+            if isinstance(content, str):
+                return content[:200] + "..." if len(content) > 200 else content
+            return "user message"
+
+        elif role == "assistant":
             # assistant 消息:content 是字典,可能包含 text 和 tool_calls
             if isinstance(content, dict):
                 # 优先返回文本内容
@@ -213,9 +270,12 @@ class Message:
             "tool_call_id": self.tool_call_id,
             "content": self.content,
             "description": self.description,
-            "tokens": self.tokens,
+            "tokens": self.tokens,  # 使用 @property 动态计算
+            "prompt_tokens": self.prompt_tokens,
+            "completion_tokens": self.completion_tokens,
             "cost": self.cost,
             "duration_ms": self.duration_ms,
+            "finish_reason": self.finish_reason,
             "created_at": self.created_at.isoformat() if self.created_at else None,
         }
 

+ 18 - 4
agent/goal/models.py

@@ -142,6 +142,13 @@ class GoalTree:
                 return goal
         return None
 
+    def find_by_display_id(self, display_id: str) -> Optional[Goal]:
+        """按显示 ID 查找 Goal(如 "1", "2.1", "2.2")"""
+        for goal in self.goals:
+            if self._generate_display_id(goal) == display_id:
+                return goal
+        return None
+
     def find_parent(self, goal_id: str) -> Optional[Goal]:
         """查找指定 Goal 的父节点"""
         goal = self.find(goal_id)
@@ -270,8 +277,15 @@ class GoalTree:
         self.current_id = goal_id
         return goal
 
-    def complete(self, goal_id: str, summary: str) -> Goal:
-        """完成指定 Goal"""
+    def complete(self, goal_id: str, summary: str, clear_focus: bool = True) -> Goal:
+        """
+        完成指定 Goal
+
+        Args:
+            goal_id: 要完成的目标 ID
+            summary: 完成总结
+            clear_focus: 如果完成的是当前焦点,是否清除焦点(默认 True)
+        """
         goal = self.find(goal_id)
         if not goal:
             raise ValueError(f"Goal not found: {goal_id}")
@@ -279,8 +293,8 @@ class GoalTree:
         goal.status = "completed"
         goal.summary = summary
 
-        # 如果完成的是当前焦点,清除焦点
-        if self.current_id == goal_id:
+        # 如果完成的是当前焦点,根据参数决定是否清除焦点
+        if clear_focus and self.current_id == goal_id:
             self.current_id = None
 
         # 检查是否所有兄弟都完成了,如果是则自动完成父节点

+ 46 - 60
agent/goal/tool.py

@@ -36,33 +36,23 @@ async def goal_tool(
         under: 为指定目标添加子目标
         done: 完成当前目标,值为 summary
         abandon: 放弃当前目标,值为原因
-        focus: 切换焦点到指定内部 id
+        focus: 切换焦点到指定 ID
 
     Returns:
         更新后的计划状态文本
     """
     changes = []
 
-    # 1. 处理 abandon(先处理,因为可能需要在 add 新目标前放弃旧的)
-    if abandon is not None:
-        if not tree.current_id:
-            return "错误:没有当前目标可以放弃"
-        goal = tree.abandon(tree.current_id, abandon)
-        display_id = tree._generate_display_id(goal)
-        changes.append(f"已放弃: {display_id}. {goal.description}")
-
-        # 推送事件
-        if store and trace_id:
-            print(f"[DEBUG] goal_tool: calling store.update_goal for abandon: goal_id={goal.id}")
-            await store.update_goal(trace_id, goal.id, status="abandoned", summary=abandon)
-        else:
-            print(f"[DEBUG] goal_tool: skip event push (store={store}, trace_id={trace_id})")
-
-    # 2. 处理 done
+    # 1. 处理 done(完成当前目标)
     if done is not None:
         if not tree.current_id:
-            return "错误:没有当前目标可以完成"
-        goal = tree.complete(tree.current_id, done)
+            return f"错误:没有当前目标可以完成。当前焦点为空,请先使用 focus 参数切换到要完成的目标。\n\n当前计划:\n{tree.to_prompt()}"
+
+        # 完成当前目标
+        # 如果同时指定了 focus,则不清空焦点(后面会切换到新目标)
+        # 如果只有 done,则清空焦点
+        clear_focus = (focus is None)
+        goal = tree.complete(tree.current_id, done, clear_focus=clear_focus)
         display_id = tree._generate_display_id(goal)
         changes.append(f"已完成: {display_id}. {goal.description}")
 
@@ -73,34 +63,39 @@ async def goal_tool(
         else:
             print(f"[DEBUG] goal_tool: skip event push (store={store}, trace_id={trace_id})")
 
-        # 检查是否有级联完成的父目标
+        # 检查是否有级联完成的父目标(complete方法已经处理,这里只需要记录)
         if goal.parent_id:
             parent = tree.find(goal.parent_id)
             if parent and parent.status == "completed":
                 parent_display_id = tree._generate_display_id(parent)
                 changes.append(f"自动完成: {parent_display_id}. {parent.description}(所有子目标已完成)")
 
-    # 3. 处理 focus(在 add 之前,这样 add 可以添加到新焦点下
+    # 2. 处理 focus(切换焦点到新目标
     if focus is not None:
-        # focus 参数可以是内部 ID 或显示 ID
-        # 先尝试作为内部 ID 查找
-        goal = tree.find(focus)
-
-        # 如果找不到,尝试根据显示 ID 查找
-        if not goal:
-            # 通过遍历所有 goal 查找匹配的显示 ID
-            for g in tree.goals:
-                if tree._generate_display_id(g) == focus:
-                    goal = g
-                    break
+        goal = tree.find_by_display_id(focus)
 
         if not goal:
-            return f"错误:找不到目标 {focus}"
+            return f"错误:找不到目标 {focus}\n\n当前计划:\n{tree.to_prompt()}"
 
         tree.focus(goal.id)
         display_id = tree._generate_display_id(goal)
         changes.append(f"切换焦点: {display_id}. {goal.description}")
 
+    # 3. 处理 abandon(放弃当前目标)
+    if abandon is not None:
+        if not tree.current_id:
+            return f"错误:没有当前目标可以放弃。当前焦点为空。\n\n当前计划:\n{tree.to_prompt()}"
+        goal = tree.abandon(tree.current_id, abandon)
+        display_id = tree._generate_display_id(goal)
+        changes.append(f"已放弃: {display_id}. {goal.description}")
+
+        # 推送事件
+        if store and trace_id:
+            print(f"[DEBUG] goal_tool: calling store.update_goal for abandon: goal_id={goal.id}")
+            await store.update_goal(trace_id, goal.id, status="abandoned", summary=abandon)
+        else:
+            print(f"[DEBUG] goal_tool: skip event push (store={store}, trace_id={trace_id})")
+
     # 4. 处理 add
     if add is not None:
         # 检查 after 和 under 互斥
@@ -120,34 +115,20 @@ async def goal_tool(
             # 确定添加位置
             if after is not None:
                 # 在指定 goal 后面添加(同层级)
-                # after 参数可以是内部 ID 或显示 ID
-                target_goal = tree.find(after)
-                if not target_goal:
-                    # 尝试根据显示 ID 查找
-                    for g in tree.goals:
-                        if tree._generate_display_id(g) == after:
-                            target_goal = g
-                            break
+                target_goal = tree.find_by_display_id(after)
 
                 if not target_goal:
-                    return f"错误:找不到目标 {after}"
+                    return f"错误:找不到目标 {after}\n\n当前计划:\n{tree.to_prompt()}"
 
                 new_goals = tree.add_goals_after(target_goal.id, descriptions, reasons=reasons)
                 changes.append(f"在 {tree._generate_display_id(target_goal)} 后面添加 {len(new_goals)} 个同级目标")
 
             elif under is not None:
                 # 为指定 goal 添加子目标
-                # under 参数可以是内部 ID 或显示 ID
-                parent_goal = tree.find(under)
-                if not parent_goal:
-                    # 尝试根据显示 ID 查找
-                    for g in tree.goals:
-                        if tree._generate_display_id(g) == under:
-                            parent_goal = g
-                            break
+                parent_goal = tree.find_by_display_id(under)
 
                 if not parent_goal:
-                    return f"错误:找不到目标 {under}"
+                    return f"错误:找不到目标 {under}\n\n当前计划:\n{tree.to_prompt()}"
 
                 new_goals = tree.add_goals(descriptions, reasons=reasons, parent_id=parent_goal.id)
                 changes.append(f"在 {tree._generate_display_id(parent_goal)} 下添加 {len(new_goals)} 个子目标")
@@ -198,11 +179,11 @@ def create_goal_tool_schema() -> dict:
 
 - add: 添加目标(逗号分隔多个)
 - reason: 创建理由(逗号分隔多个,与 add 一一对应)。说明为什么要做这些目标。
-- after: 在指定目标后面添加(同层级)。可以是内部 ID 或显示 ID。
-- under: 为指定目标添加子目标。可以是内部 ID 或显示 ID。如已有子目标,追加到最后。
+- after: 在指定目标后面添加(同层级)。使用目标的 ID。
+- under: 为指定目标添加子目标。使用目标的 ID。如已有子目标,追加到最后。
 - done: 完成当前目标,值为 summary
 - abandon: 放弃当前目标,值为原因(会触发 context 压缩)
-- focus: 切换焦点到指定 id(可以是内部 ID 或显示 ID)
+- focus: 切换焦点到指定目标。使用目标的 ID。
 
 位置控制(优先使用 after):
 - 不指定 after/under: 添加到当前 focus 下作为子目标(无 focus 时添加到顶层)
@@ -210,17 +191,22 @@ def create_goal_tool_schema() -> dict:
 - under="X": 为目标 X 添加子目标
 - after 和 under 不能同时指定
 
+执行顺序:
+- done → focus → abandon → add
+- 如果同时指定 done 和 focus,会先完成当前目标,再切换焦点到新目标
+
 示例:
 - goal(add="分析代码, 实现功能, 测试") - 添加顶层目标
 - goal(add="设计接口, 实现代码", under="2") - 为目标2添加子目标
 - goal(add="编写文档", after="3") - 在目标3后面添加同级任务
 - goal(add="集成测试", after="2.2") - 在目标2.2后面添加同级任务
 - goal(done="发现用户模型在 models/user.py") - 完成当前目标
+- goal(done="已完成调研", focus="2") - 完成当前目标,切换到目标2
 - goal(abandon="方案A需要Redis,环境没有") - 放弃当前目标
 
-注意:内部 ID 是纯自增数字("1", "2", "3"),显示 ID 是带层级的("1", "2.1", "2.2")。
-所有 ID 参数都可以使用任意格式的 ID。
-reason 应该与 add 的目标数量一致,如果数量不一致,缺少的 reason 将为空
+注意:
+- 目标 ID 的格式为 "1", "2", "2.1", "2.2" 等,在计划视图中可以看到
+- reason 应该与 add 的目标数量一致,如果数量不一致,缺少的 reason 将为空
 """,
         "parameters": {
             "type": "object",
@@ -235,11 +221,11 @@ reason 应该与 add 的目标数量一致,如果数量不一致,缺少的 r
                 },
                 "after": {
                     "type": "string",
-                    "description": "在指定目标后面添加(同层级)。可以是内部 ID 或显示 ID。"
+                    "description": "在指定目标后面添加(同层级)。使用目标的 ID,如 \"2\" 或 \"2.1\"。"
                 },
                 "under": {
                     "type": "string",
-                    "description": "为指定目标添加子目标。可以是内部 ID 或显示 ID。"
+                    "description": "为指定目标添加子目标。使用目标的 ID,如 \"2\" 或 \"2.1\"。"
                 },
                 "done": {
                     "type": "string",
@@ -251,7 +237,7 @@ reason 应该与 add 的目标数量一致,如果数量不一致,缺少的 r
                 },
                 "focus": {
                     "type": "string",
-                    "description": "切换焦点到指定 goal id(可以是内部 ID 或显示 ID)"
+                    "description": "切换焦点到指定目标。使用目标的 ID,如 \"2\" 或 \"2.1\"。"
                 }
             },
             "required": []

+ 17 - 2
agent/llm/gemini.py

@@ -327,6 +327,7 @@ def create_gemini_llm_call(
                 "tool_calls": List[Dict] | None,
                 "prompt_tokens": int,
                 "completion_tokens": int,
+                "finish_reason": str,
                 "cost": float
             }
         """
@@ -375,14 +376,27 @@ def create_gemini_llm_call(
         # 解析响应
         content = ""
         tool_calls = None
+        finish_reason = "stop"  # 默认值
 
         candidates = gemini_resp.get("candidates", [])
         if candidates:
             candidate = candidates[0]
 
+            # 提取 finish_reason(Gemini -> OpenAI 格式映射)
+            gemini_finish_reason = candidate.get("finishReason", "STOP")
+            if gemini_finish_reason == "STOP":
+                finish_reason = "stop"
+            elif gemini_finish_reason == "MAX_TOKENS":
+                finish_reason = "length"
+            elif gemini_finish_reason in ("SAFETY", "RECITATION"):
+                finish_reason = "content_filter"
+            elif gemini_finish_reason == "MALFORMED_FUNCTION_CALL":
+                finish_reason = "stop"  # 映射为 stop,但在 content 中包含错误信息
+            else:
+                finish_reason = gemini_finish_reason.lower()  # 保持原值,转小写
+
             # 检查是否有错误
-            finish_reason = candidate.get("finishReason")
-            if finish_reason == "MALFORMED_FUNCTION_CALL":
+            if gemini_finish_reason == "MALFORMED_FUNCTION_CALL":
                 # Gemini 返回了格式错误的函数调用
                 # 提取 finishMessage 中的内容作为 content
                 finish_message = candidate.get("finishMessage", "")
@@ -426,6 +440,7 @@ def create_gemini_llm_call(
             "tool_calls": tool_calls,
             "prompt_tokens": prompt_tokens,
             "completion_tokens": completion_tokens,
+            "finish_reason": finish_reason,
             "cost": 0.0
         }
 

+ 3 - 0
agent/llm/openrouter.py

@@ -32,6 +32,7 @@ async def openrouter_llm_call(
             "tool_calls": List[Dict] | None,
             "prompt_tokens": int,
             "completion_tokens": int,
+            "finish_reason": str,
             "cost": float
         }
     """
@@ -85,6 +86,7 @@ async def openrouter_llm_call(
 
     content = message.get("content", "")
     tool_calls = message.get("tool_calls")
+    finish_reason = choice.get("finish_reason")  # stop, length, tool_calls, content_filter 等
 
     # 提取 usage
     usage = result.get("usage", {})
@@ -99,6 +101,7 @@ async def openrouter_llm_call(
         "tool_calls": tool_calls,
         "prompt_tokens": prompt_tokens,
         "completion_tokens": completion_tokens,
+        "finish_reason": finish_reason,
         "cost": cost
     }
 

+ 59 - 12
agent/skills/core.md

@@ -12,38 +12,56 @@ description: 核心系统能力,自动加载到 System Prompt
 
 ## 计划与执行
 
-对于复杂任务,你要先分析需求,并使用 `step` 工具来管理执行计划和进度。这一工具会形成一棵思维树。
+对于复杂任务,你要先分析需求,并使用 `goal` 工具来管理执行计划和进度。这一工具会形成一棵目标树。
 
 ### 创建计划:拆分任务步骤,创建TODO
 
 ```
-step(plan=["调研并确定方案", "执行方案", "评估结果"])
+goal(add="调研并确定方案, 执行方案, 评估结果")
 ```
 
-这将在当前节点下增加3个plan节点。你可以在执行过程中设置进一步的sub plan
+这将创建3个目标。你可以在执行过程中继续添加子目标
 
 ### 开始执行
 
-聚焦到某个目标开始执行:
+聚焦到某个目标开始执行(使用目标的 ID)
 
 ```
-step(focus="调研并确定方案")
+goal(focus="1")
+```
+
+目标的 ID 会显示在计划视图中,格式如 "1", "2", "2.1", "2.2" 等。
+
+### 完成目标
+
+完成当前目标并提供总结:
+
+```
+goal(done="人物姿势的最佳提取工具是openpose")
 ```
 
 ### 完成并切换
 
-完成当前目标,提供总结,切换到下一个:
+先完成当前目标,再切换焦点到下一个:
 
 ```
-step(complete=True, summary="人物姿势的最佳提取工具是openpose", focus="执行方案")
+goal(done="人物姿势的最佳提取工具是openpose", focus="2")
 ```
 
-### 查看进度
+这会先完成当前正在执行的目标,然后切换焦点到目标 "2"。
+
+### 添加子目标
 
-查看当前执行进度:
+为指定目标添加子目标
 
 ```
-read_progress()
+goal(add="设计接口, 实现代码", under="2")
+```
+
+在指定目标后面添加同级目标:
+
+```
+goal(add="编写文档", after="2")
 ```
 
 ### 使用规范
@@ -52,9 +70,38 @@ read_progress()
 2. **summary 应简洁**:记录关键结论和发现,不要冗长
 3. **计划可调整**:根据执行情况追加或跳过目标
 4. **简单任务不需要计划**:单步操作直接执行即可
+5. **使用 ID 进行定位**:focus、after、under 参数都使用目标的 ID(如 "1", "2.1")
 
 ## 信息调研
 
-你可以通过联网搜索工具`search_posts`获取来自Github、小红书、微信公众号、知乎等渠道的信息。
+你可以通过联网搜索工具`search_posts`获取来自Github、小红书、微信公众号、知乎等渠道的信息。对于需要深度交互的网页内容,使用浏览器工具进行操作。
+
+调研过程可能需要多次搜索,比如基于搜索结果中获得的启发或信息启动新的搜索,直到得到令人满意的答案。你可以使用`goal`工具管理搜索的过程,或者使用文档记录搜索的中间或最终结果。
+
+## 浏览器工具使用指南
+
+所有浏览器工具都以 `browser_` 为前缀。浏览器会话会持久化,无需每次重新启动。
+
+### 基本工作流程
+
+1. **页面导航**: 使用 `browser_navigate_to_url` 或 `browser_search_web` 到达目标页面
+2. **等待加载**: 页面跳转后调用 `browser_wait(seconds=2)` 等待内容加载
+3. **获取元素索引**: 调用 `browser_get_selector_map` 获取可交互元素的索引映射
+4. **执行交互**: 使用 `browser_click_element`、`browser_input_text` 等工具操作页面
+5. **提取内容**: 使用 `browser_extract_content` 或 `browser_get_page_html` 获取数据
+
+### 关键原则
+
+- **必须先获取索引**: 所有 `index` 参数都需要先通过 `browser_get_selector_map` 获取
+- **操作后等待**: 任何可能触发页面变化的操作(点击、输入、滚动)后都要调用 `browser_wait`
+- **优先用高级工具**: 优先使用 `browser_extract_content` 而不是手动解析HTML
+- **登录处理**: 需要登录的网站使用 `browser_ensure_login_with_cookies(cookie_type="xhs")` 注入Cookie
+- **复杂操作用JS**: 当标准工具无法满足时,使用 `browser_evaluate` 执行JavaScript代码
+
+### 工具分类
 
-调研过程可能需要多次搜索,比如基于搜索结果中获得的启发或信息启动新的搜索,直到得到令人满意的答案。你可以使用`step`工具管理搜索的过程,或者使用文档记录搜索的中间或最终结果。
+**导航**: browser_navigate_to_url, browser_search_web, browser_go_back, browser_wait
+**交互**: browser_click_element, browser_input_text, browser_send_keys, browser_upload_file
+**视图**: browser_scroll_page, browser_find_text, browser_screenshot
+**提取**: browser_extract_content, browser_get_page_html, browser_get_selector_map
+**高级**: browser_evaluate, browser_ensure_login_with_cookies, browser_wait_for_user_action

+ 3 - 0
agent/tools/builtin/__init__.py

@@ -19,6 +19,9 @@ from agent.tools.builtin.search import search_posts, get_search_suggestions
 from agent.tools.builtin.sandbox import (sandbox_create_environment, sandbox_run_shell,
                                          sandbox_rebuild_with_ports,sandbox_destroy_environment)
 
+# 导入浏览器工具以触发注册
+import agent.tools.builtin.browser  # noqa: F401
+
 __all__ = [
     "read_file",
     "edit_file",

+ 101 - 0
agent/tools/builtin/browser/__init__.py

@@ -0,0 +1,101 @@
+"""
+浏览器工具 - Browser-Use 原生工具适配器
+
+基于 browser-use 实现的浏览器自动化工具集。
+"""
+
+from agent.tools.builtin.browser.baseClass import (
+    # 会话管理
+    init_browser_session,
+    get_browser_session,
+    cleanup_browser_session,
+    kill_browser_session,
+
+    # 导航类工具
+    browser_navigate_to_url,
+    browser_search_web,
+    browser_go_back,
+    browser_wait,
+
+    # 元素交互工具
+    browser_click_element,
+    browser_input_text,
+    browser_send_keys,
+    browser_upload_file,
+
+    # 滚动和视图工具
+    browser_scroll_page,
+    browser_find_text,
+    browser_screenshot,
+
+    # 标签页管理工具
+    browser_switch_tab,
+    browser_close_tab,
+
+    # 下拉框工具
+    browser_get_dropdown_options,
+    browser_select_dropdown_option,
+
+    # 内容提取工具
+    browser_extract_content,
+    browser_get_page_html,
+    browser_get_selector_map,
+
+    # JavaScript 执行工具
+    browser_evaluate,
+    browser_ensure_login_with_cookies,
+
+    # 等待用户操作
+    browser_wait_for_user_action,
+
+    # 任务完成
+    browser_done,
+)
+
+__all__ = [
+    # 会话管理
+    'init_browser_session',
+    'get_browser_session',
+    'cleanup_browser_session',
+    'kill_browser_session',
+
+    # 导航类工具
+    'browser_navigate_to_url',
+    'browser_search_web',
+    'browser_go_back',
+    'browser_wait',
+
+    # 元素交互工具
+    'browser_click_element',
+    'browser_input_text',
+    'browser_send_keys',
+    'browser_upload_file',
+
+    # 滚动和视图工具
+    'browser_scroll_page',
+    'browser_find_text',
+    'browser_screenshot',
+
+    # 标签页管理工具
+    'browser_switch_tab',
+    'browser_close_tab',
+
+    # 下拉框工具
+    'browser_get_dropdown_options',
+    'browser_select_dropdown_option',
+
+    # 内容提取工具
+    'browser_extract_content',
+    'browser_get_page_html',
+    'browser_get_selector_map',
+
+    # JavaScript 执行工具
+    'browser_evaluate',
+    'browser_ensure_login_with_cookies',
+
+    # 等待用户操作
+    'browser_wait_for_user_action',
+
+    # 任务完成
+    'browser_done',
+]

+ 298 - 198
agent/tools/builtin/browser/baseClass.py

@@ -10,9 +10,29 @@ Native Browser-Use Tools Adapter
 2. 状态自动保持 - 登录状态、Cookie、LocalStorage 等
 3. 完整的底层访问 - 可以直接使用 CDP 协议
 4. 性能优异 - 避免频繁创建/销毁浏览器实例
+5. 多种浏览器类型 - 支持 local、cloud、container 三种模式
+
+支持的浏览器类型:
+1. Local (本地浏览器):
+   - 在本地运行 Chrome
+   - 支持可视化调试
+   - 速度最快
+   - 示例: init_browser_session(browser_type="local")
+
+2. Cloud (云浏览器):
+   - 在云端运行
+   - 不占用本地资源
+   - 适合生产环境
+   - 示例: init_browser_session(browser_type="cloud")
+
+3. Container (容器浏览器):
+   - 在独立容器中运行
+   - 隔离性好
+   - 支持预配置账户
+   - 示例: init_browser_session(browser_type="container", container_url="https://example.com")
 
 使用方法:
-1. 在 Agent 初始化时调用 init_browser_session()
+1. 在 Agent 初始化时调用 init_browser_session() 并指定 browser_type
 2. 使用各个工具函数执行浏览器操作
 3. 任务结束时调用 cleanup_browser_session()
 
@@ -27,6 +47,7 @@ import sys
 import os
 import json
 import asyncio
+import aiohttp
 from typing import Optional, List, Dict, Any, Tuple
 from pathlib import Path
 from urllib.parse import urlparse
@@ -44,6 +65,12 @@ from browser_use.tools.service import Tools
 from browser_use.agent.views import ActionResult
 from browser_use.filesystem.file_system import FileSystem
 
+
+# ============================================================
+# 无需注册的内部辅助函数
+# ============================================================
+
+
 # ============================================================
 # 全局浏览器会话管理
 # ============================================================
@@ -53,52 +80,248 @@ _browser_session: Optional[BrowserSession] = None
 _browser_tools: Optional[Tools] = None
 _file_system: Optional[FileSystem] = None
 
+async def create_container(url: str, account_name: str = "liuwenwu") -> Dict[str, Any]:
+    """
+    创建浏览器容器并导航到指定URL
+
+    按照 test.md 的要求:
+    1.1 调用接口创建容器
+    1.2 调用接口创建窗口并导航到URL
+
+    Args:
+        url: 要导航的URL地址
+        account_name: 账户名称
+
+    Returns:
+        包含容器信息的字典:
+        - success: 是否成功
+        - container_id: 容器ID
+        - vnc: VNC访问URL
+        - cdp: CDP协议URL(用于浏览器连接)
+        - connection_id: 窗口连接ID
+        - error: 错误信息(如果失败)
+    """
+    result = {
+        "success": False,
+        "container_id": None,
+        "vnc": None,
+        "cdp": None,
+        "connection_id": None,
+        "error": None
+    }
+
+    try:
+        async with aiohttp.ClientSession() as session:
+            # 步骤1.1: 创建容器
+            print("📦 步骤1.1: 创建容器...")
+            create_url = "http://47.84.182.56:8200/api/v1/container/create"
+            create_payload = {
+                "auto_remove": True,
+                "need_port_binding": True,
+                "max_lifetime_seconds": 900
+            }
+
+            async with session.post(create_url, json=create_payload) as resp:
+                if resp.status != 200:
+                    raise RuntimeError(f"创建容器失败: HTTP {resp.status}")
+
+                create_result = await resp.json()
+                if create_result.get("code") != 0:
+                    raise RuntimeError(f"创建容器失败: {create_result.get('msg')}")
+
+                data = create_result.get("data", {})
+                result["container_id"] = data.get("container_id")
+                result["vnc"] = data.get("vnc")
+                result["cdp"] = data.get("cdp")
+
+                print(f"✅ 容器创建成功")
+                print(f"   Container ID: {result['container_id']}")
+                print(f"   VNC: {result['vnc']}")
+                print(f"   CDP: {result['cdp']}")
+
+            # 等待容器内的浏览器启动
+            print(f"\n⏳ 等待容器内浏览器启动...")
+            await asyncio.sleep(5)
+
+            # 步骤1.2: 创建页面并导航
+            print(f"\n📱 步骤1.2: 创建页面并导航到 {url}...")
+
+            page_create_url = "http://47.84.182.56:8200/api/v1/browser/page/create"
+            page_payload = {
+                "container_id": result["container_id"],
+                "url": url,
+                "account_name": account_name,
+                "need_wait": True,
+                "timeout": 30
+            }
+
+            # 重试机制:最多尝试3次
+            max_retries = 3
+            page_created = False
+            last_error = None
+
+            for attempt in range(max_retries):
+                try:
+                    if attempt > 0:
+                        print(f"   重试 {attempt + 1}/{max_retries}...")
+                        await asyncio.sleep(3)  # 重试前等待
+
+                    async with session.post(page_create_url, json=page_payload, timeout=aiohttp.ClientTimeout(total=60)) as resp:
+                        if resp.status != 200:
+                            response_text = await resp.text()
+                            last_error = f"HTTP {resp.status}: {response_text[:200]}"
+                            continue
+
+                        page_result = await resp.json()
+                        if page_result.get("code") != 0:
+                            last_error = f"{page_result.get('msg')}"
+                            continue
+
+                        page_data = page_result.get("data", {})
+                        result["connection_id"] = page_data.get("connection_id")
+                        result["success"] = True
+                        page_created = True
+
+                        print(f"✅ 页面创建成功")
+                        print(f"   Connection ID: {result['connection_id']}")
+                        break
+
+                except asyncio.TimeoutError:
+                    last_error = "请求超时"
+                    continue
+                except aiohttp.ClientError as e:
+                    last_error = f"网络错误: {str(e)}"
+                    continue
+                except Exception as e:
+                    last_error = f"未知错误: {str(e)}"
+                    continue
+
+            if not page_created:
+                raise RuntimeError(f"创建页面失败(尝试{max_retries}次后): {last_error}")
+
+    except Exception as e:
+        result["error"] = str(e)
+        print(f"❌ 错误: {str(e)}")
+
+    return result
 
 async def init_browser_session(
+    browser_type: str = "local",
     headless: bool = False,
-    user_data_dir: Optional[str] = None,
+    url: Optional[str] = None,
     profile_name: str = "default",
+    user_data_dir: Optional[str] = None,
     browser_profile: Optional[BrowserProfile] = None,
-    use_cloud: bool = False,
     **kwargs
 ) -> tuple[BrowserSession, Tools]:
     """
-    初始化全局浏览器会话
+    初始化全局浏览器会话 - 支持三种浏览器类型
 
     Args:
+        browser_type: 浏览器类型 ("local", "cloud", "container")
         headless: 是否无头模式
-        user_data_dir: 用户数据目录(用于保存登录状态)
-        profile_name: 配置文件名称
-        browser_profile: BrowserProfile 对象(用于预设 cookies 等)
-        use_cloud: 是否使用云浏览器(默认 False,使用本地浏览器)
+        url: 初始访问URL(可选)
+             - local/cloud: 初始化后会自动导航到此URL
+             - container: 必需,容器启动时访问的URL
+        profile_name: 配置文件/账户名称(默认 "default")
+                     - local: 用于创建用户数据目录路径
+                     - cloud: 云浏览器配置ID
+                     - container: 容器账户名称
+        user_data_dir: 用户数据目录(仅 local 模式,高级用法)
+                      如果提供则覆盖 profile_name 生成的路径
+        browser_profile: BrowserProfile 对象(通用,高级用法)
+                        用于预设 cookies 等
         **kwargs: 其他 BrowserSession 参数
 
     Returns:
         (BrowserSession, Tools) 元组
+
+    Examples:
+        # 本地浏览器
+        browser, tools = await init_browser_session(
+            browser_type="local",
+            url="https://www.baidu.com"  # 可选
+        )
+
+        # 云浏览器
+        browser, tools = await init_browser_session(
+            browser_type="cloud",
+            profile_name="my_cloud_profile"  # 可选
+        )
+
+        # 容器浏览器
+        browser, tools = await init_browser_session(
+            browser_type="container",
+            url="https://www.xiaohongshu.com",  # 必需
+            profile_name="my_account"  # 可选
+        )
     """
     global _browser_session, _browser_tools, _file_system
 
     if _browser_session is not None:
         return _browser_session, _browser_tools
 
-    # 设置用户数据目录(持久化登录状态)
-    if user_data_dir is None and profile_name and not use_cloud:
-        user_data_dir = str(Path.home() / ".browser_use" / "profiles" / profile_name)
-        Path(user_data_dir).mkdir(parents=True, exist_ok=True)
+    # 验证 browser_type
+    valid_types = ["local", "cloud", "container"]
+    if browser_type not in valid_types:
+        raise ValueError(f"无效的 browser_type: {browser_type},必须是 {valid_types} 之一")
 
-    # 创建浏览器会话
+    # 创建浏览器会话参数
     session_params = {
         "headless": headless,
     }
 
-    if use_cloud:
-        # 云浏览器模式
-        session_params["use_cloud"] = True
+    # === Container 模式 ===
+    if browser_type == "container":
+        print("🐳 使用容器浏览器模式")
+
+        # container 模式必须提供 URL
+        if not url:
+            url = "about:blank"  # 使用默认空白页
+            print("⚠️  未提供 url 参数,使用默认空白页")
+
+        # 创建容器并获取 CDP URL
+        print(f"📦 正在创建容器...")
+        container_info = await create_container(
+            url=url,
+            account_name=profile_name
+        )
+
+        if not container_info["success"]:
+            raise RuntimeError(f"容器创建失败: {container_info['error']}")
+
+        cdp_url = container_info["cdp"]
+        print(f"✅ 容器创建成功")
+        print(f"   CDP URL: {cdp_url}")
+        print(f"   Container ID: {container_info['container_id']}")
+        print(f"   Connection ID: {container_info.get('connection_id')}")
+
+        # 使用容器的 CDP URL 连接
+        session_params["cdp_url"] = cdp_url
+
+        # 等待容器完全启动
+        print("⏳ 等待容器浏览器启动...")
+        await asyncio.sleep(3)
+
+    # === Cloud 模式 ===
+    elif browser_type == "cloud":
         print("🌐 使用云浏览器模式")
-    else:
-        # 本地浏览器模式
+        session_params["use_cloud"] = True
+
+        # profile_name 作为云配置ID
+        if profile_name and profile_name != "default":
+            session_params["cloud_profile_id"] = profile_name
+
+    # === Local 模式 ===
+    else:  # local
+        print("💻 使用本地浏览器模式")
         session_params["is_local"] = True
 
+        # 设置用户数据目录(持久化登录状态)
+        if user_data_dir is None and profile_name:
+            user_data_dir = str(Path.home() / ".browser_use" / "profiles" / profile_name)
+            Path(user_data_dir).mkdir(parents=True, exist_ok=True)
+
         # macOS 上显式指定 Chrome 路径
         import platform
         if platform.system() == "Darwin":  # macOS
@@ -110,13 +333,14 @@ async def init_browser_session(
         if user_data_dir:
             session_params["user_data_dir"] = user_data_dir
 
-    # 只在有值时才添加 browser_profile
+    # 只在有值时才添加 browser_profile (适用于所有模式)
     if browser_profile:
         session_params["browser_profile"] = browser_profile
 
     # 合并其他参数
     session_params.update(kwargs)
 
+    # 创建浏览器会话
     _browser_session = BrowserSession(**session_params)
 
     # 启动浏览器
@@ -132,6 +356,13 @@ async def init_browser_session(
     base_dir.mkdir(parents=True, exist_ok=True)
     _file_system = FileSystem(base_dir=str(base_dir))
 
+    print("✅ 浏览器会话初始化成功")
+
+    # 如果是 local 或 cloud 模式且提供了 URL,导航到该 URL
+    if browser_type in ["local", "cloud"] and url:
+        print(f"🔗 导航到: {url}")
+        await _browser_tools.navigate(url=url, browser_session=_browser_session)
+
     return _browser_session, _browser_tools
 
 
@@ -318,12 +549,16 @@ def _fetch_profile_id(cookie_type: str) -> Optional[str]:
         return None
 
 
+# ============================================================
+# 需要注册的工具
+# ============================================================
+
 # ============================================================
 # 导航类工具 (Navigation Tools)
 # ============================================================
 
 @tool()
-async def navigate_to_url(url: str, new_tab: bool = False) -> ToolResult:
+async def browser_navigate_to_url(url: str, new_tab: bool = False) -> ToolResult:
     """
     导航到指定的 URL
     Navigate to a specific URL
@@ -363,7 +598,7 @@ async def navigate_to_url(url: str, new_tab: bool = False) -> ToolResult:
 
 
 @tool()
-async def search_web(query: str, engine: str = "google") -> ToolResult:
+async def browser_search_web(query: str, engine: str = "google") -> ToolResult:
     """
     使用搜索引擎搜索
     Search the web using a search engine
@@ -400,7 +635,7 @@ async def search_web(query: str, engine: str = "google") -> ToolResult:
 
 
 @tool()
-async def go_back() -> ToolResult:
+async def browser_go_back() -> ToolResult:
     """
     返回到上一个页面
     Go back to the previous page
@@ -427,7 +662,7 @@ async def go_back() -> ToolResult:
 
 
 @tool()
-async def wait(seconds: int = 3) -> ToolResult:
+async def browser_wait(seconds: int = 3) -> ToolResult:
     """
     等待指定的秒数
     Wait for a specified number of seconds
@@ -464,7 +699,7 @@ async def wait(seconds: int = 3) -> ToolResult:
 # ============================================================
 
 @tool()
-async def click_element(index: int) -> ToolResult:
+async def browser_click_element(index: int) -> ToolResult:
     """
     通过索引点击页面元素
     Click an element by index
@@ -501,7 +736,7 @@ async def click_element(index: int) -> ToolResult:
 
 
 @tool()
-async def input_text(index: int, text: str, clear: bool = True) -> ToolResult:
+async def browser_input_text(index: int, text: str, clear: bool = True) -> ToolResult:
     """
     在指定元素中输入文本
     Input text into an element
@@ -539,7 +774,7 @@ async def input_text(index: int, text: str, clear: bool = True) -> ToolResult:
 
 
 @tool()
-async def send_keys(keys: str) -> ToolResult:
+async def browser_send_keys(keys: str) -> ToolResult:
     """
     发送键盘按键或快捷键
     Send keyboard keys or shortcuts
@@ -579,7 +814,7 @@ async def send_keys(keys: str) -> ToolResult:
 
 
 @tool()
-async def upload_file(index: int, path: str) -> ToolResult:
+async def browser_upload_file(index: int, path: str) -> ToolResult:
     """
     上传文件到文件输入元素
     Upload a file to a file input element
@@ -624,7 +859,7 @@ async def upload_file(index: int, path: str) -> ToolResult:
 # ============================================================
 
 @tool()
-async def scroll_page(down: bool = True, pages: float = 1.0,
+async def browser_scroll_page(down: bool = True, pages: float = 1.0,
                      index: Optional[int] = None) -> ToolResult:
     """
     滚动页面或元素
@@ -665,7 +900,7 @@ async def scroll_page(down: bool = True, pages: float = 1.0,
 
 
 @tool()
-async def find_text(text: str) -> ToolResult:
+async def browser_find_text(text: str) -> ToolResult:
     """
     查找页面中的文本并滚动到该位置
     Find text on the page and scroll to it
@@ -701,7 +936,7 @@ async def find_text(text: str) -> ToolResult:
 
 
 @tool()
-async def screenshot() -> ToolResult:
+async def browser_screenshot() -> ToolResult:
     """
     请求在下次观察中包含页面截图
     Request a screenshot to be included in the next observation
@@ -738,7 +973,7 @@ async def screenshot() -> ToolResult:
 # ============================================================
 
 @tool()
-async def switch_tab(tab_id: str) -> ToolResult:
+async def browser_switch_tab(tab_id: str) -> ToolResult:
     """
     切换到指定标签页
     Switch to a different browser tab
@@ -773,7 +1008,7 @@ async def switch_tab(tab_id: str) -> ToolResult:
 
 
 @tool()
-async def close_tab(tab_id: str) -> ToolResult:
+async def browser_close_tab(tab_id: str) -> ToolResult:
     """
     关闭指定标签页
     Close a browser tab
@@ -812,7 +1047,7 @@ async def close_tab(tab_id: str) -> ToolResult:
 # ============================================================
 
 @tool()
-async def get_dropdown_options(index: int) -> ToolResult:
+async def browser_get_dropdown_options(index: int) -> ToolResult:
     """
     获取下拉框的所有选项
     Get options from a dropdown element
@@ -846,7 +1081,7 @@ async def get_dropdown_options(index: int) -> ToolResult:
 
 
 @tool()
-async def select_dropdown_option(index: int, text: str) -> ToolResult:
+async def browser_select_dropdown_option(index: int, text: str) -> ToolResult:
     """
     选择下拉框选项
     Select an option from a dropdown
@@ -886,7 +1121,7 @@ async def select_dropdown_option(index: int, text: str) -> ToolResult:
 # ============================================================
 
 @tool()
-async def extract_content(query: str, extract_links: bool = False,
+async def browser_extract_content(query: str, extract_links: bool = False,
                          start_from_char: int = 0) -> ToolResult:
     """
     使用 LLM 从页面提取结构化数据
@@ -934,7 +1169,7 @@ async def extract_content(query: str, extract_links: bool = False,
 
 
 @tool()
-async def get_page_html() -> ToolResult:
+async def browser_get_page_html() -> ToolResult:
     """
     获取当前页面的完整 HTML
     Get the full HTML of the current page
@@ -996,7 +1231,7 @@ async def get_page_html() -> ToolResult:
 
 
 @tool()
-async def get_selector_map() -> ToolResult:
+async def browser_get_selector_map() -> ToolResult:
     """
     获取当前页面的元素索引映射
     Get the selector map of interactive elements on the current page
@@ -1052,7 +1287,7 @@ async def get_selector_map() -> ToolResult:
 # ============================================================
 
 @tool()
-async def evaluate(code: str) -> ToolResult:
+async def browser_evaluate(code: str) -> ToolResult:
     """
     在页面中执行 JavaScript 代码
     Execute JavaScript code in the page context
@@ -1094,7 +1329,7 @@ async def evaluate(code: str) -> ToolResult:
 
 
 @tool()
-async def ensure_login_with_cookies(cookie_type: str, url: str = "https://www.xiaohongshu.com") -> ToolResult:
+async def browser_ensure_login_with_cookies(cookie_type: str, url: str = "https://www.xiaohongshu.com") -> ToolResult:
     """
     检查登录状态并在需要时注入 cookies
     """
@@ -1190,7 +1425,7 @@ async def ensure_login_with_cookies(cookie_type: str, url: str = "https://www.xi
 # ============================================================
 
 @tool()
-async def wait_for_user_action(message: str = "Please complete the action in browser",
+async def browser_wait_for_user_action(message: str = "Please complete the action in browser",
                                timeout: int = 300) -> ToolResult:
     """
     等待用户在浏览器中完成操作(如登录)
@@ -1262,7 +1497,7 @@ async def wait_for_user_action(message: str = "Please complete the action in bro
 # ============================================================
 
 @tool()
-async def done(text: str, success: bool = True,
+async def browser_done(text: str, success: bool = True,
               files_to_display: Optional[List[str]] = None) -> ToolResult:
     """
     标记任务完成并返回最终消息
@@ -1300,138 +1535,6 @@ async def done(text: str, success: bool = True,
         )
 
 
-# ============================================================
-# 容器管理工具 (Container Management Tools)
-# ============================================================
-
-import aiohttp
-
-async def create_container(url: str, account_name: str = "liuwenwu") -> Dict[str, Any]:
-    """
-    创建浏览器容器并导航到指定URL
-
-    按照 test.md 的要求:
-    1.1 调用接口创建容器
-    1.2 调用接口创建窗口并导航到URL
-
-    Args:
-        url: 要导航的URL地址
-        account_name: 账户名称
-
-    Returns:
-        包含容器信息的字典:
-        - success: 是否成功
-        - container_id: 容器ID
-        - vnc: VNC访问URL
-        - cdp: CDP协议URL(用于浏览器连接)
-        - connection_id: 窗口连接ID
-        - error: 错误信息(如果失败)
-    """
-    result = {
-        "success": False,
-        "container_id": None,
-        "vnc": None,
-        "cdp": None,
-        "connection_id": None,
-        "error": None
-    }
-
-    try:
-        async with aiohttp.ClientSession() as session:
-            # 步骤1.1: 创建容器
-            print("📦 步骤1.1: 创建容器...")
-            create_url = "http://47.84.182.56:8200/api/v1/container/create"
-            create_payload = {
-                "auto_remove": True,
-                "need_port_binding": True,
-                "max_lifetime_seconds": 900
-            }
-
-            async with session.post(create_url, json=create_payload) as resp:
-                if resp.status != 200:
-                    raise RuntimeError(f"创建容器失败: HTTP {resp.status}")
-
-                create_result = await resp.json()
-                if create_result.get("code") != 0:
-                    raise RuntimeError(f"创建容器失败: {create_result.get('msg')}")
-
-                data = create_result.get("data", {})
-                result["container_id"] = data.get("container_id")
-                result["vnc"] = data.get("vnc")
-                result["cdp"] = data.get("cdp")
-
-                print(f"✅ 容器创建成功")
-                print(f"   Container ID: {result['container_id']}")
-                print(f"   VNC: {result['vnc']}")
-                print(f"   CDP: {result['cdp']}")
-
-            # 等待容器内的浏览器启动
-            print(f"\n⏳ 等待容器内浏览器启动...")
-            await asyncio.sleep(5)
-
-            # 步骤1.2: 创建页面并导航
-            print(f"\n📱 步骤1.2: 创建页面并导航到 {url}...")
-
-            page_create_url = "http://47.84.182.56:8200/api/v1/browser/page/create"
-            page_payload = {
-                "container_id": result["container_id"],
-                "url": url,
-                "account_name": account_name,
-                "need_wait": True,
-                "timeout": 30
-            }
-
-            # 重试机制:最多尝试3次
-            max_retries = 3
-            page_created = False
-            last_error = None
-
-            for attempt in range(max_retries):
-                try:
-                    if attempt > 0:
-                        print(f"   重试 {attempt + 1}/{max_retries}...")
-                        await asyncio.sleep(3)  # 重试前等待
-
-                    async with session.post(page_create_url, json=page_payload, timeout=aiohttp.ClientTimeout(total=60)) as resp:
-                        if resp.status != 200:
-                            response_text = await resp.text()
-                            last_error = f"HTTP {resp.status}: {response_text[:200]}"
-                            continue
-
-                        page_result = await resp.json()
-                        if page_result.get("code") != 0:
-                            last_error = f"{page_result.get('msg')}"
-                            continue
-
-                        page_data = page_result.get("data", {})
-                        result["connection_id"] = page_data.get("connection_id")
-                        result["success"] = True
-                        page_created = True
-
-                        print(f"✅ 页面创建成功")
-                        print(f"   Connection ID: {result['connection_id']}")
-                        break
-
-                except asyncio.TimeoutError:
-                    last_error = "请求超时"
-                    continue
-                except aiohttp.ClientError as e:
-                    last_error = f"网络错误: {str(e)}"
-                    continue
-                except Exception as e:
-                    last_error = f"未知错误: {str(e)}"
-                    continue
-
-            if not page_created:
-                raise RuntimeError(f"创建页面失败(尝试{max_retries}次后): {last_error}")
-
-    except Exception as e:
-        result["error"] = str(e)
-        print(f"❌ 错误: {str(e)}")
-
-    return result
-
-
 # ============================================================
 # 导出所有工具函数(供外部使用)
 # ============================================================
@@ -1444,45 +1547,42 @@ __all__ = [
     'kill_browser_session',
 
     # 导航类工具
-    'navigate_to_url',
-    'search_web',
-    'go_back',
-    'wait',
+    'browser_navigate_to_url',
+    'browser_search_web',
+    'browser_go_back',
+    'browser_wait',
 
     # 元素交互工具
-    'click_element',
-    'input_text',
-    'send_keys',
-    'upload_file',
+    'browser_click_element',
+    'browser_input_text',
+    'browser_send_keys',
+    'browser_upload_file',
 
     # 滚动和视图工具
-    'scroll_page',
-    'find_text',
-    'screenshot',
+    'browser_scroll_page',
+    'browser_find_text',
+    'browser_screenshot',
 
     # 标签页管理工具
-    'switch_tab',
-    'close_tab',
+    'browser_switch_tab',
+    'browser_close_tab',
 
     # 下拉框工具
-    'get_dropdown_options',
-    'select_dropdown_option',
+    'browser_get_dropdown_options',
+    'browser_select_dropdown_option',
 
     # 内容提取工具
-    'extract_content',
-    'get_page_html',
-    'get_selector_map',
+    'browser_extract_content',
+    'browser_get_page_html',
+    'browser_get_selector_map',
 
     # JavaScript 执行工具
-    'evaluate',
-    'ensure_login_with_cookies',
+    'browser_evaluate',
+    'browser_ensure_login_with_cookies',
 
     # 等待用户操作
-    'wait_for_user_action',
+    'browser_wait_for_user_action',
 
     # 任务完成
-    'done',
-
-    # 容器管理
-    'create_container',
+    'browser_done',
 ]

+ 2 - 1
agent/tools/builtin/goal.py

@@ -40,13 +40,14 @@ async def goal(
         reason: 创建理由(逗号分隔多个,与 add 一一对应)。说明为什么要做这些目标。
         done: 完成当前目标,值为 summary
         abandon: 放弃当前目标,值为原因(会触发 context 压缩)
-        focus: 切换焦点到指定 id(可以是内部 ID 或显示 ID
+        focus: 切换焦点到指定 ID(如 "1", "2.1", "2.2"
         context: 工具执行上下文(包含 store 和 trace_id)
 
     Examples:
         goal(add="分析代码, 实现功能, 测试", reason="了解现有结构, 完成需求, 确保质量")
         goal(focus="2", add="设计接口, 实现代码", reason="明确API规范, 编写核心逻辑")
         goal(done="发现用户模型在 models/user.py")
+        goal(done="已完成调研", focus="2")
         goal(abandon="方案A需要Redis,环境没有", add="实现方案B", reason="使用现有技术栈")
 
     Returns:

+ 31 - 0
docs/ref/skills.md

@@ -0,0 +1,31 @@
+Skill structure
+Every Skill requires a SKILL.md file with YAML frontmatter:
+
+---
+name: your-skill-name
+description: Brief description of what this Skill does and when to use it
+---
+
+# Your Skill Name
+
+## Instructions
+[Clear, step-by-step guidance for Claude to follow]
+
+## Examples
+[Concrete examples of using this Skill]
+Required fields: name and description
+
+Field requirements:
+
+name:
+
+Maximum 64 characters
+Must contain only lowercase letters, numbers, and hyphens
+Cannot contain XML tags
+Cannot contain reserved words: "anthropic", "claude"
+description:
+
+Must be non-empty
+Maximum 1024 characters
+Cannot contain XML tags
+The description should include both what the Skill does and when Claude should use it. For complete authoring guidance, see the best practices guide.

+ 15 - 15
examples/cloud_browser_demo_db.py

@@ -22,11 +22,11 @@ from agent.tools.builtin.browser.baseClass import (
     init_browser_session,
     cleanup_browser_session,
     kill_browser_session,
-    navigate_to_url,
-    scroll_page,
-    evaluate,
-    wait,
-    get_page_html,
+    browser_navigate_to_url,
+    browser_scroll_page,
+    browser_evaluate,
+    browser_wait,
+    browser_get_page_html,
     _fetch_cookie_row,
     _fetch_profile_id,
     _normalize_cookies,
@@ -102,10 +102,10 @@ async def example_xhs_fitness_search(cookie_type: str = "xhs") -> dict:
 
             # 访问首页
             print("\n🏠 访问小红书首页...")
-            nav_result = await navigate_to_url("https://www.xiaohongshu.com")
+            nav_result = await browser_navigate_to_url("https://www.xiaohongshu.com")
             if nav_result.error:
                 raise RuntimeError(nav_result.error)
-            await wait(3)
+            await browser_wait(3)
 
             # 注入 Cookie(如果有)
             if cookie_row:
@@ -119,7 +119,7 @@ async def example_xhs_fitness_search(cookie_type: str = "xhs") -> dict:
                         print(f"✅ 成功注入 {len(cookies)} 个 Cookie")
                         # 刷新页面使 Cookie 生效
                         await navigate_to_url("https://www.xiaohongshu.com")
-                        await wait(2)
+                        await browser_wait(2)
                     else:
                         print("⚠️  Cookie 解析失败")
                 else:
@@ -127,20 +127,20 @@ async def example_xhs_fitness_search(cookie_type: str = "xhs") -> dict:
 
             # 访问搜索页面
             print(f"\n🔗 访问搜索页面: {keyword}")
-            nav_result = await navigate_to_url(search_url)
+            nav_result = await browser_navigate_to_url(search_url)
             if nav_result.error:
                 raise RuntimeError(nav_result.error)
-            await wait(8)
+            await browser_wait(8)
 
             # 滚动页面
             print("\n📜 滚动页面...")
             for i in range(3):
-                await scroll_page(down=True, pages=2.0)
-                await wait(2)
+                await browser_scroll_page(down=True, pages=2.0)
+                await browser_wait(2)
 
             # 提取数据
             print("\n🔍 提取数据...")
-            html_result = await get_page_html()
+            html_result = await browser_get_page_html()
             if html_result.error:
                 raise RuntimeError(html_result.error)
             html = html_result.metadata.get("html", "")
@@ -223,7 +223,7 @@ async def example_xhs_fitness_search(cookie_type: str = "xhs") -> dict:
             extract_js = extract_js.replace("__KEYWORD__", json.dumps(keyword, ensure_ascii=False))
 
             async def run_extract() -> dict:
-                result = await evaluate(extract_js)
+                result = await browser_evaluate(extract_js)
                 if result.error:
                     raise RuntimeError(result.error)
                 output = result.output
@@ -253,7 +253,7 @@ async def example_xhs_fitness_search(cookie_type: str = "xhs") -> dict:
                     }
 
                 if isinstance(data, dict) and data.get("count", 0) == 0:
-                    html_result = await get_page_html()
+                    html_result = await browser_get_page_html()
                     if html_result.error:
                         raise RuntimeError(html_result.error)
                     html = html_result.metadata.get("html", "")

+ 164 - 0
examples/research/run.py

@@ -0,0 +1,164 @@
+"""
+浏览器调研示例
+
+使用 Agent 模式 + 浏览器工具进行网络调研
+"""
+
+import os
+import sys
+import asyncio
+from pathlib import Path
+
+# 添加项目根目录到 Python 路径
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+
+from dotenv import load_dotenv
+load_dotenv()
+
+from agent.llm.prompts import SimplePrompt
+from agent.core.runner import AgentRunner
+from agent.execution import (
+    FileSystemTraceStore,
+    Trace,
+    Message,
+)
+from agent.llm import create_openrouter_llm_call
+
+
+async def main():
+    # 路径配置
+    base_dir = Path(__file__).parent
+    project_root = base_dir.parent.parent
+    prompt_path = base_dir / "test.prompt"
+    output_dir = base_dir / "output"
+    output_dir.mkdir(exist_ok=True)
+
+    # Skills 目录(可选:用户自定义 skills)
+    # 注意:内置 skills(agent/skills/core.md)会自动加载
+    skills_dir = None  # 或者指定自定义 skills 目录,如: project_root / "skills"
+
+    print("=" * 60)
+    print("浏览器调研任务 (Agent 模式)")
+    print("=" * 60)
+    print()
+
+    # 1. 加载 prompt
+    print("1. 加载 prompt...")
+    prompt = SimplePrompt(prompt_path)
+
+    # 提取配置
+    system_prompt = prompt._messages.get("system", "")
+    user_task = prompt._messages.get("user", "")
+    model_name = prompt.config.get('model', 'gemini-2.5-flash')
+    temperature = float(prompt.config.get('temperature', 0.3))
+
+    print(f"   - 任务: {user_task[:80]}...")
+    print(f"   - 模型: {model_name}")
+
+    # 2. 构建消息
+    print("2. 构建任务消息...")
+    messages = prompt.build_messages()
+
+    # 3. 创建 Agent Runner(配置 skills 和浏览器工具)
+    print("3. 创建 Agent Runner...")
+    print(f"   - Skills 目录: {skills_dir}")
+    print(f"   - 模型: {model_name} (via OpenRouter)")
+
+    # 使用 OpenRouter 的 Gemini 模型
+    runner = AgentRunner(
+        trace_store=FileSystemTraceStore(base_path=".trace"),
+        llm_call=create_openrouter_llm_call(model=f"google/{model_name}"),
+        skills_dir=skills_dir,
+        debug=True  # 启用 debug,输出到 .trace/
+    )
+
+    # 4. Agent 模式执行
+    print(f"4. 启动 Agent 模式...")
+    print()
+
+    final_response = ""
+    current_trace_id = None
+
+    async for item in runner.run(
+        task=user_task,
+        messages=messages,
+        system_prompt=system_prompt,
+        model=f"google/{model_name}",
+        temperature=temperature,
+        max_iterations=20,  # 调研任务可能需要更多迭代
+    ):
+        # 处理 Trace 对象(整体状态变化)
+        if isinstance(item, Trace):
+            current_trace_id = item.trace_id
+            if item.status == "running":
+                print(f"[Trace] 开始: {item.trace_id[:8]}")
+            elif item.status == "completed":
+                print(f"[Trace] 完成")
+                print(f"  - Total messages: {item.total_messages}")
+                print(f"  - Total tokens: {item.total_tokens}")
+                print(f"  - Total cost: ${item.total_cost:.4f}")
+            elif item.status == "failed":
+                print(f"[Trace] 失败: {item.error_message}")
+
+        # 处理 Message 对象(执行过程)
+        elif isinstance(item, Message):
+            if item.role == "assistant":
+                content = item.content
+                if isinstance(content, dict):
+                    text = content.get("text", "")
+                    tool_calls = content.get("tool_calls")
+
+                    if text and not tool_calls:
+                        # 纯文本回复(最终响应)
+                        final_response = text
+                        print(f"[Response] Agent 完成")
+                    elif text:
+                        print(f"[Assistant] {text[:100]}...")
+
+                    if tool_calls:
+                        for tc in tool_calls:
+                            tool_name = tc.get("function", {}).get("name", "unknown")
+                            print(f"[Tool Call] {tool_name}")
+
+            elif item.role == "tool":
+                content = item.content
+                if isinstance(content, dict):
+                    tool_name = content.get("tool_name", "unknown")
+                    print(f"[Tool Result] {tool_name}")
+                if item.description:
+                    desc = item.description[:80] if len(item.description) > 80 else item.description
+                    print(f"  {desc}...")
+
+    # 5. 输出结果
+    print()
+    print("=" * 60)
+    print("Agent 响应:")
+    print("=" * 60)
+    print(final_response)
+    print("=" * 60)
+    print()
+
+    # 6. 保存结果
+    output_file = output_dir / "research_result.txt"
+    with open(output_file, 'w', encoding='utf-8') as f:
+        f.write(final_response)
+
+    print(f"✓ 结果已保存到: {output_file}")
+    print()
+
+    # 提示使用 API 可视化
+    print("=" * 60)
+    print("可视化 Step Tree:")
+    print("=" * 60)
+    print("1. 启动 API Server:")
+    print("   python3 api_server.py")
+    print()
+    print("2. 浏览器访问:")
+    print("   http://localhost:8000/api/traces")
+    print()
+    print(f"3. Trace ID: {current_trace_id}")
+    print("=" * 60)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())

+ 11 - 0
examples/research/test.prompt

@@ -0,0 +1,11 @@
+---
+model: gemini-2.5-flash
+temperature: 0.3
+---
+
+$system$
+你是最顶尖的AI助手,可以拆分并调用工具逐步解决复杂问题。
+
+$user$
+使用浏览器帮我做个调研:一张图片中的构图可以如何表示?我希望寻找一些构图特征的表示方法。
+注意使用explore工具,在合适的时候调用多个分支并行探索。

+ 13 - 13
examples/test_tools_baidu.py

@@ -10,11 +10,11 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 from agent.tools.builtin.browser.baseClass import (
     init_browser_session,
-    navigate_to_url,
-    wait,
-    get_page_html,
-    evaluate,
-    scroll_page,
+    browser_navigate_to_url,
+    browser_wait,
+    browser_get_page_html,
+    browser_evaluate,
+    browser_scroll_page,
     cleanup_browser_session,
 )
 
@@ -30,15 +30,15 @@ async def run_task():
     try:
         await init_browser_session(headless=False, profile_name="baidu_profile")
 
-        await navigate_to_url("https://www.baidu.com")
-        await wait(seconds=2)
+        await browser_navigate_to_url("https://www.baidu.com")
+        await browser_wait(seconds=2)
 
         keyword = "Python 教程"
         search_url = f"https://www.baidu.com/s?wd={quote(keyword)}"
-        await navigate_to_url(search_url)
-        await wait(seconds=3)
-        await scroll_page(down=True, pages=1.0)
-        await wait(seconds=2)
+        await browser_navigate_to_url(search_url)
+        await browser_wait(seconds=3)
+        await browser_scroll_page(down=True, pages=1.0)
+        await browser_wait(seconds=2)
 
         extract_js = """
         (function(){
@@ -84,7 +84,7 @@ async def run_task():
         })()
         """
 
-        result = await evaluate(code=extract_js)
+        result = await browser_evaluate(code=extract_js)
         output = result.output
         if output.startswith("Result: "):
             output = output[8:]
@@ -103,7 +103,7 @@ async def run_task():
         with open(json_file, "w", encoding="utf-8") as f:
             json.dump(data, f, ensure_ascii=False, indent=2)
 
-        html_result = await get_page_html()
+        html_result = await browser_get_page_html()
         html_content = html_result.metadata.get("html", "")
         page_url = html_result.metadata.get("url", "")
         page_title = html_result.metadata.get("title", "")

+ 35 - 79
examples/test_xhs_container.py

@@ -1,12 +1,10 @@
 """
 小红书容器测试脚本
-根据 test.md 要求实现:
-1. 创建容器并导航到小红书
-2. 初始化浏览器会话
-3. 切换到指定窗口
-4. 搜索健身
-5. 随机进入一个详情页
-6. 获取详情页的HTML和iframe并保存到output
+演示容器浏览器的使用:
+1. 初始化容器浏览器(自动创建容器并连接)
+2. 搜索健身
+3. 随机进入一个详情页
+4. 获取详情页的HTML和iframe并保存到output
 """
 
 import sys
@@ -25,14 +23,14 @@ project_root = Path(__file__).parent.parent
 sys.path.insert(0, str(project_root))
 
 from agent.tools.builtin.browser.baseClass import (
-    create_container,
     init_browser_session,
     cleanup_browser_session,
-    navigate_to_url,
-    scroll_page,
-    evaluate,
-    wait,
-    get_page_html,
+    browser_navigate_to_url,
+    browser_scroll_page,
+    browser_evaluate,
+    browser_wait,
+    browser_get_page_html,
+    browser_switch_tab,
 )
 
 
@@ -52,83 +50,41 @@ async def test_xhs_container():
     output_dir.mkdir(parents=True, exist_ok=True)
 
     try:
-        # 步骤1: 创建容器并导航到小红书
-        container_info = await create_container(url="https://www.xiaohongshu.com")
-
-        if not container_info["success"]:
-            raise RuntimeError(f"容器创建失败: {container_info['error']}")
-
-        cdp_url = container_info["cdp"]
-        container_id = container_info["container_id"]
-        connection_id = container_info.get("connection_id")
-
-        print(f"\n📋 容器信息:")
-        print(f"   CDP URL: {cdp_url}")
-        print(f"   Container ID: {container_id}")
-        print(f"   Connection ID: {connection_id}")
-
-        # 等待容器完全启动
-        print(f"\n⏳ 等待容器启动...")
-        await asyncio.sleep(3)
-
-        # 步骤2: 初始化浏览器会话
-        print(f"\n🌐 初始化浏览器会话...")
+        # 初始化容器浏览器(一步完成)
+        print(f"\n🚀 初始化容器浏览器...")
         browser, tools = await init_browser_session(
-            headless=True,
-            cdp_url=cdp_url
+            browser_type="container",
+            url="https://www.xiaohongshu.com",  # 容器启动时访问的URL
+            headless=True
         )
 
-        if browser is None or tools is None:
-            raise RuntimeError("浏览器初始化失败")
-
-        print("✅ 浏览器会话初始化成功")
-
-        # 步骤3: 如果有 connection_id,切换到对应窗口
-        if connection_id:
-            print(f"\n🔄 切换到窗口: {connection_id}")
-            await wait(2)
-
-            # 获取当前浏览器状态
-            try:
-                state = await browser.get_browser_state_summary(cached=False)
-                print(f"   当前标签页数: {len(state.tabs)}")
-                for tab in state.tabs:
-                    print(f"   - Tab ID: {tab.target_id[-4:]}, URL: {tab.url}")
-
-                # 尝试切换到 connection_id 对应的标签页
-                # connection_id 可能是完整ID,取最后4位
-                from agent.tools.builtin.browser.baseClass import switch_tab
-                await switch_tab(connection_id[-4:] if len(connection_id) > 4 else connection_id)
-                await wait(2)
-                print(f"✅ 已切换到窗口")
-            except Exception as e:
-                print(f"⚠️  切换窗口警告: {str(e)[:100]}")
-                print(f"   将继续使用当前窗口")
+        print("✅ 容器浏览器初始化成功")
 
-        await wait(3)
+        # 等待页面完全加载
+        await browser_wait(3)
 
-        # 步骤4: 搜索健身
+        # 步骤1: 搜索健身
         print(f"\n🔍 搜索关键词: {keyword}")
         try:
-            nav_result = await navigate_to_url(search_url)
+            nav_result = await browser_navigate_to_url(search_url)
             if nav_result.error:
                 print(f"⚠️  导航警告: {nav_result.error[:100]}")
         except Exception as e:
             print(f"⚠️  导航异常: {str(e)[:100]}")
 
-        await wait(10)
+        await browser_wait(10)
 
         # 滚动页面加载更多内容
         print("\n📜 滚动页面...")
         for i in range(2):
-            await scroll_page(down=True, pages=2.0)
-            await wait(2)
+            await browser_scroll_page(down=True, pages=2.0)
+            await browser_wait(2)
 
         # 提取搜索结果
         print("\n🔍 提取搜索结果...")
 
         # 先保存HTML看看页面内容
-        html_result = await get_page_html()
+        html_result = await browser_get_page_html()
         if not html_result.error:
             html = html_result.metadata.get("html", "")
             debug_html_path = output_dir / "search_page_debug.html"
@@ -155,7 +111,7 @@ async def test_xhs_container():
         })()
         """
 
-        eval_result = await evaluate(extract_js)
+        eval_result = await browser_evaluate(extract_js)
         if eval_result.error:
             raise RuntimeError(f"提取搜索结果失败: {eval_result.error}")
 
@@ -170,7 +126,7 @@ async def test_xhs_container():
 
         print(f"✅ 找到 {len(posts)} 个帖子")
 
-        # 步骤5: 随机进入一个详情页
+        # 步骤2: 随机进入一个详情页
         selected_post = random.choice(posts)
         post_url = selected_post["link"]
 
@@ -178,23 +134,23 @@ async def test_xhs_container():
         print(f"🔗 访问帖子详情页: {post_url}")
 
         try:
-            nav_result = await navigate_to_url(post_url)
+            nav_result = await browser_navigate_to_url(post_url)
             if nav_result.error:
                 print(f"⚠️  导航警告: {nav_result.error[:100]}")
         except Exception as e:
             print(f"⚠️  导航异常: {str(e)[:100]}")
 
-        await wait(8)
+        await browser_wait(8)
 
         # 滚动详情页
         print("\n📜 滚动详情页...")
         for i in range(3):
-            await scroll_page(down=True, pages=1.5)
-            await wait(2)
+            await browser_scroll_page(down=True, pages=1.5)
+            await browser_wait(2)
 
-        # 步骤6: 保存详情页HTML
+        # 步骤3: 保存详情页HTML
         print("\n💾 保存详情页 HTML...")
-        html_result = await get_page_html()
+        html_result = await browser_get_page_html()
         if html_result.error:
             print(f"⚠️  获取HTML失败: {html_result.error}")
         else:
@@ -221,7 +177,7 @@ async def test_xhs_container():
         })()
         """
 
-        iframe_result = await evaluate(iframe_js)
+        iframe_result = await browser_evaluate(iframe_js)
         if not iframe_result.error:
             iframe_output = iframe_result.output
             if isinstance(iframe_output, str) and iframe_output.startswith("Result: "):
@@ -251,7 +207,7 @@ async def test_xhs_container():
                         }})()
                         """
 
-                        iframe_html_result = await evaluate(get_iframe_html_js)
+                        iframe_html_result = await browser_evaluate(get_iframe_html_js)
                         if not iframe_html_result.error:
                             iframe_html = iframe_html_result.output
                             if isinstance(iframe_html, str) and iframe_html.startswith("Result: "):