Просмотр исходного кода

fix: goal tool description & store model

Talegorithm 1 месяц назад
Родитель
Сommit
ab8fbeb2d8

+ 101 - 24
agent/core/runner.py

@@ -28,15 +28,54 @@ logger = logging.getLogger(__name__)
 
 
 # 内置工具列表(始终自动加载)
 # 内置工具列表(始终自动加载)
 BUILTIN_TOOLS = [
 BUILTIN_TOOLS = [
+    # 文件操作工具
     "read_file",
     "read_file",
     "edit_file",
     "edit_file",
     "write_file",
     "write_file",
     "glob_files",
     "glob_files",
     "grep_content",
     "grep_content",
+
+    # 系统工具
     "bash_command",
     "bash_command",
+
+    # 技能和目标管理
     "skill",
     "skill",
     "list_skills",
     "list_skills",
     "goal",
     "goal",
+
+    # 搜索工具
+    "search_posts",
+    "get_search_suggestions",
+
+    # 沙箱工具
+    "sandbox_create_environment",
+    "sandbox_run_shell",
+    "sandbox_rebuild_with_ports",
+    "sandbox_destroy_environment",
+
+    # 浏览器工具
+    "browser_navigate_to_url",
+    "browser_search_web",
+    "browser_go_back",
+    "browser_wait",
+    "browser_click_element",
+    "browser_input_text",
+    "browser_send_keys",
+    "browser_upload_file",
+    "browser_scroll_page",
+    "browser_find_text",
+    "browser_screenshot",
+    "browser_switch_tab",
+    "browser_close_tab",
+    "browser_get_dropdown_options",
+    "browser_select_dropdown_option",
+    "browser_extract_content",
+    "browser_get_page_html",
+    "browser_get_selector_map",
+    "browser_evaluate",
+    "browser_ensure_login_with_cookies",
+    "browser_wait_for_user_action",
+    "browser_done",
 ]
 ]
 
 
 
 
@@ -121,15 +160,6 @@ class AgentRunner:
         trace_id = None
         trace_id = None
         message_id = None
         message_id = None
 
 
-        # 创建 Trace
-        if trace and self.trace_store:
-            trace_obj = Trace.create(
-                mode="call",
-                uid=uid,
-                context={"model": model}
-            )
-            trace_id = await self.trace_store.create_trace(trace_obj)
-
         # 准备工具 Schema
         # 准备工具 Schema
         tool_names = BUILTIN_TOOLS.copy()
         tool_names = BUILTIN_TOOLS.copy()
         if tools:
         if tools:
@@ -139,6 +169,17 @@ class AgentRunner:
 
 
         tool_schemas = self.tools.get_schemas(tool_names)
         tool_schemas = self.tools.get_schemas(tool_names)
 
 
+        # 创建 Trace
+        if trace and self.trace_store:
+            trace_obj = Trace.create(
+                mode="call",
+                uid=uid,
+                model=model,
+                tools=tool_schemas,  # 保存工具定义
+                llm_params=kwargs,  # 保存 LLM 参数
+            )
+            trace_id = await self.trace_store.create_trace(trace_obj)
+
         # 调用 LLM
         # 调用 LLM
         result = await self.llm_call(
         result = await self.llm_call(
             messages=messages,
             messages=messages,
@@ -155,7 +196,9 @@ class AgentRunner:
                 sequence=1,
                 sequence=1,
                 goal_id=None,  # 单次调用没有 goal
                 goal_id=None,  # 单次调用没有 goal
                 content={"text": result.get("content", ""), "tool_calls": result.get("tool_calls")},
                 content={"text": result.get("content", ""), "tool_calls": result.get("tool_calls")},
-                tokens=result.get("prompt_tokens", 0) + result.get("completion_tokens", 0),
+                prompt_tokens=result.get("prompt_tokens", 0),
+                completion_tokens=result.get("completion_tokens", 0),
+                finish_reason=result.get("finish_reason"),
                 cost=result.get("cost", 0),
                 cost=result.get("cost", 0),
             )
             )
             message_id = await self.trace_store.add_message(msg)
             message_id = await self.trace_store.add_message(msg)
@@ -223,6 +266,14 @@ class AgentRunner:
         enable_memory = enable_memory if enable_memory is not None else self.config.enable_memory
         enable_memory = enable_memory if enable_memory is not None else self.config.enable_memory
         auto_execute_tools = auto_execute_tools if auto_execute_tools is not None else self.config.auto_execute_tools
         auto_execute_tools = auto_execute_tools if auto_execute_tools is not None else self.config.auto_execute_tools
 
 
+        # 准备工具 Schema(提前准备,用于 Trace)
+        tool_names = BUILTIN_TOOLS.copy()
+        if tools:
+            for tool in tools:
+                if tool not in tool_names:
+                    tool_names.append(tool)
+        tool_schemas = self.tools.get_schemas(tool_names)
+
         # 创建 Trace
         # 创建 Trace
         trace_id = self._generate_id()
         trace_id = self._generate_id()
         trace_obj = Trace(
         trace_obj = Trace(
@@ -231,7 +282,9 @@ class AgentRunner:
             task=task,
             task=task,
             agent_type=agent_type,
             agent_type=agent_type,
             uid=uid,
             uid=uid,
-            context={"model": model, **kwargs},
+            model=model,
+            tools=tool_schemas,  # 保存工具定义
+            llm_params=kwargs,  # 保存 LLM 参数
             status="running"
             status="running"
         )
         )
 
 
@@ -269,6 +322,9 @@ class AgentRunner:
             if messages is None:
             if messages is None:
                 messages = []
                 messages = []
 
 
+            # 记录初始 system 和 user 消息到 trace
+            sequence = 1
+
             if system_prompt:
             if system_prompt:
                 # 注入记忆和 skills 到 system prompt
                 # 注入记忆和 skills 到 system prompt
                 full_system = system_prompt
                 full_system = system_prompt
@@ -279,9 +335,35 @@ class AgentRunner:
 
 
                 messages = [{"role": "system", "content": full_system}] + messages
                 messages = [{"role": "system", "content": full_system}] + messages
 
 
+                # 保存 system 消息
+                if self.trace_store:
+                    system_msg = Message.create(
+                        trace_id=trace_id,
+                        role="system",
+                        sequence=sequence,
+                        goal_id=None,  # 初始消息没有 goal
+                        content=full_system,
+                    )
+                    await self.trace_store.add_message(system_msg)
+                    yield system_msg
+                    sequence += 1
+
             # 添加任务描述
             # 添加任务描述
             messages.append({"role": "user", "content": task})
             messages.append({"role": "user", "content": task})
 
 
+            # 保存 user 消息(任务描述)
+            if self.trace_store:
+                user_msg = Message.create(
+                    trace_id=trace_id,
+                    role="user",
+                    sequence=sequence,
+                    goal_id=None,  # 初始消息没有 goal
+                    content=task,
+                )
+                await self.trace_store.add_message(user_msg)
+                yield user_msg
+                sequence += 1
+
             # 获取 GoalTree
             # 获取 GoalTree
             goal_tree = None
             goal_tree = None
             if self.trace_store:
             if self.trace_store:
@@ -291,18 +373,7 @@ class AgentRunner:
                 from agent.tools.builtin.goal import set_goal_tree
                 from agent.tools.builtin.goal import set_goal_tree
                 set_goal_tree(goal_tree)
                 set_goal_tree(goal_tree)
 
 
-            # 准备工具 Schema
-            tool_names = BUILTIN_TOOLS.copy()
-            if tools:
-                for tool in tools:
-                    if tool not in tool_names:
-                        tool_names.append(tool)
-
-            tool_schemas = self.tools.get_schemas(tool_names)
-
             # 执行循环
             # 执行循环
-            sequence = 1
-
             for iteration in range(max_iterations):
             for iteration in range(max_iterations):
                 # 注入当前计划到 messages(如果有 goals)
                 # 注入当前计划到 messages(如果有 goals)
                 llm_messages = list(messages)
                 llm_messages = list(messages)
@@ -321,7 +392,10 @@ class AgentRunner:
 
 
                 response_content = result.get("content", "")
                 response_content = result.get("content", "")
                 tool_calls = result.get("tool_calls")
                 tool_calls = result.get("tool_calls")
-                step_tokens = result.get("prompt_tokens", 0) + result.get("completion_tokens", 0)
+                finish_reason = result.get("finish_reason")
+                prompt_tokens = result.get("prompt_tokens", 0)
+                completion_tokens = result.get("completion_tokens", 0)
+                step_tokens = prompt_tokens + completion_tokens
                 step_cost = result.get("cost", 0)
                 step_cost = result.get("cost", 0)
 
 
                 # 获取当前 goal_id
                 # 获取当前 goal_id
@@ -334,7 +408,9 @@ class AgentRunner:
                     sequence=sequence,
                     sequence=sequence,
                     goal_id=current_goal_id,
                     goal_id=current_goal_id,
                     content={"text": response_content, "tool_calls": tool_calls},
                     content={"text": response_content, "tool_calls": tool_calls},
-                    tokens=step_tokens,
+                    prompt_tokens=prompt_tokens,
+                    completion_tokens=completion_tokens,
+                    finish_reason=finish_reason,
                     cost=step_cost,
                     cost=step_cost,
                 )
                 )
 
 
@@ -429,6 +505,7 @@ class AgentRunner:
                 await self.trace_store.update_trace(
                 await self.trace_store.update_trace(
                     trace_id,
                     trace_id,
                     status="failed",
                     status="failed",
+                    error_message=str(e),
                     completed_at=datetime.now()
                     completed_at=datetime.now()
                 )
                 )
                 trace_obj = await self.trace_store.get_trace(trace_id)
                 trace_obj = await self.trace_store.get_trace(trace_id)

+ 13 - 6
agent/execution/fs_store.py

@@ -316,8 +316,17 @@ class FileSystemTraceStore:
             trace.total_messages += 1
             trace.total_messages += 1
             trace.last_sequence = max(trace.last_sequence, message.sequence)
             trace.last_sequence = max(trace.last_sequence, message.sequence)
 
 
+            # 累计 tokens(拆分)
+            if message.prompt_tokens:
+                trace.total_prompt_tokens += message.prompt_tokens
+            if message.completion_tokens:
+                trace.total_completion_tokens += message.completion_tokens
+            # 向后兼容:也更新 total_tokens
             if message.tokens:
             if message.tokens:
                 trace.total_tokens += message.tokens
                 trace.total_tokens += message.tokens
+            elif message.prompt_tokens or message.completion_tokens:
+                trace.total_tokens += (message.prompt_tokens or 0) + (message.completion_tokens or 0)
+
             if message.cost:
             if message.cost:
                 trace.total_cost += message.cost
                 trace.total_cost += message.cost
             if message.duration_ms:
             if message.duration_ms:
@@ -329,6 +338,8 @@ class FileSystemTraceStore:
                 total_messages=trace.total_messages,
                 total_messages=trace.total_messages,
                 last_sequence=trace.last_sequence,
                 last_sequence=trace.last_sequence,
                 total_tokens=trace.total_tokens,
                 total_tokens=trace.total_tokens,
+                total_prompt_tokens=trace.total_prompt_tokens,
+                total_completion_tokens=trace.total_completion_tokens,
                 total_cost=trace.total_cost,
                 total_cost=trace.total_cost,
                 total_duration_ms=trace.total_duration_ms
                 total_duration_ms=trace.total_duration_ms
             )
             )
@@ -439,9 +450,7 @@ class FileSystemTraceStore:
             if message_file.exists():
             if message_file.exists():
                 try:
                 try:
                     data = json.loads(message_file.read_text())
                     data = json.loads(message_file.read_text())
-                    if data.get("created_at"):
-                        data["created_at"] = datetime.fromisoformat(data["created_at"])
-                    return Message(**data)
+                    return Message.from_dict(data)
                 except Exception:
                 except Exception:
                     pass
                     pass
 
 
@@ -461,9 +470,7 @@ class FileSystemTraceStore:
         for message_file in messages_dir.glob("*.json"):
         for message_file in messages_dir.glob("*.json"):
             try:
             try:
                 data = json.loads(message_file.read_text())
                 data = json.loads(message_file.read_text())
-                if data.get("created_at"):
-                    data["created_at"] = datetime.fromisoformat(data["created_at"])
-                messages.append(Message(**data))
+                messages.append(Message.from_dict(data))
             except Exception:
             except Exception:
                 continue
                 continue
 
 

+ 72 - 12
agent/execution/models.py

@@ -41,21 +41,30 @@ class Trace:
 
 
     # 统计
     # 统计
     total_messages: int = 0      # 消息总数(改名自 total_steps)
     total_messages: int = 0      # 消息总数(改名自 total_steps)
-    total_tokens: int = 0
+    total_tokens: int = 0        # 总 tokens(向后兼容,= prompt + completion)
+    total_prompt_tokens: int = 0      # 总输入 tokens
+    total_completion_tokens: int = 0  # 总输出 tokens
     total_cost: float = 0.0
     total_cost: float = 0.0
-    total_duration_ms: int = 0  # 总耗时(毫秒)
+    total_duration_ms: int = 0   # 总耗时(毫秒)
 
 
     # 进度追踪(head)
     # 进度追踪(head)
     last_sequence: int = 0      # 最新 message 的 sequence
     last_sequence: int = 0      # 最新 message 的 sequence
     last_event_id: int = 0      # 最新事件 ID(用于 WS 续传)
     last_event_id: int = 0      # 最新事件 ID(用于 WS 续传)
 
 
-    # 上下文
+    # 配置
     uid: Optional[str] = None
     uid: Optional[str] = None
-    context: Dict[str, Any] = field(default_factory=dict)
+    model: Optional[str] = None              # 默认模型
+    tools: Optional[List[Dict]] = None       # 工具定义(整个 trace 共享)
+    llm_params: Dict[str, Any] = field(default_factory=dict)  # LLM 参数(temperature 等)
+    context: Dict[str, Any] = field(default_factory=dict)     # 其他元数据
 
 
     # 当前焦点 goal
     # 当前焦点 goal
     current_goal_id: Optional[str] = None
     current_goal_id: Optional[str] = None
 
 
+    # 结果
+    result_summary: Optional[str] = None     # 执行结果摘要
+    error_message: Optional[str] = None      # 错误信息
+
     # 时间
     # 时间
     created_at: datetime = field(default_factory=datetime.now)
     created_at: datetime = field(default_factory=datetime.now)
     completed_at: Optional[datetime] = None
     completed_at: Optional[datetime] = None
@@ -86,13 +95,20 @@ class Trace:
             "status": self.status,
             "status": self.status,
             "total_messages": self.total_messages,
             "total_messages": self.total_messages,
             "total_tokens": self.total_tokens,
             "total_tokens": self.total_tokens,
+            "total_prompt_tokens": self.total_prompt_tokens,
+            "total_completion_tokens": self.total_completion_tokens,
             "total_cost": self.total_cost,
             "total_cost": self.total_cost,
             "total_duration_ms": self.total_duration_ms,
             "total_duration_ms": self.total_duration_ms,
             "last_sequence": self.last_sequence,
             "last_sequence": self.last_sequence,
             "last_event_id": self.last_event_id,
             "last_event_id": self.last_event_id,
             "uid": self.uid,
             "uid": self.uid,
+            "model": self.model,
+            "tools": self.tools,
+            "llm_params": self.llm_params,
             "context": self.context,
             "context": self.context,
             "current_goal_id": self.current_goal_id,
             "current_goal_id": self.current_goal_id,
+            "result_summary": self.result_summary,
+            "error_message": self.error_message,
             "created_at": self.created_at.isoformat() if self.created_at else None,
             "created_at": self.created_at.isoformat() if self.created_at else None,
             "completed_at": self.completed_at.isoformat() if self.completed_at else None,
             "completed_at": self.completed_at.isoformat() if self.completed_at else None,
         }
         }
@@ -103,15 +119,17 @@ class Message:
     """
     """
     执行消息 - Trace 中的 LLM 消息
     执行消息 - Trace 中的 LLM 消息
 
 
-    对应 LLM API 消息格式(assistant/tool),通过 goal_id 关联 Goal。
+    对应 LLM API 消息格式(system/user/assistant/tool),通过 goal_id 关联 Goal。
 
 
     description 字段自动生成规则:
     description 字段自动生成规则:
+    - system: 取 content 前 200 字符
+    - user: 取 content 前 200 字符
     - assistant: 优先取 content,若无 content 则生成 "tool call: XX, XX"
     - assistant: 优先取 content,若无 content 则生成 "tool call: XX, XX"
     - tool: 使用 tool name
     - tool: 使用 tool name
     """
     """
     message_id: str
     message_id: str
     trace_id: str
     trace_id: str
-    role: Literal["assistant", "tool"]   # 和 LLM API 一致
+    role: Literal["system", "user", "assistant", "tool"]   # 和 LLM API 一致
     sequence: int                        # 全局顺序
     sequence: int                        # 全局顺序
     goal_id: Optional[str] = None        # 关联的 Goal 内部 ID(None = 还没有创建 Goal)
     goal_id: Optional[str] = None        # 关联的 Goal 内部 ID(None = 还没有创建 Goal)
     description: str = ""                # 消息描述(系统自动生成)
     description: str = ""                # 消息描述(系统自动生成)
@@ -119,23 +137,46 @@ class Message:
     content: Any = None                  # 消息内容(和 LLM API 格式一致)
     content: Any = None                  # 消息内容(和 LLM API 格式一致)
 
 
     # 元数据
     # 元数据
-    tokens: Optional[int] = None
+    prompt_tokens: Optional[int] = None  # 输入 tokens
+    completion_tokens: Optional[int] = None  # 输出 tokens
     cost: Optional[float] = None
     cost: Optional[float] = None
     duration_ms: Optional[int] = None
     duration_ms: Optional[int] = None
     created_at: datetime = field(default_factory=datetime.now)
     created_at: datetime = field(default_factory=datetime.now)
 
 
+    # LLM 响应信息(仅 role="assistant" 时使用)
+    finish_reason: Optional[str] = None  # stop, length, tool_calls, content_filter 等
+
+    @property
+    def tokens(self) -> int:
+        """动态计算总 tokens(向后兼容)"""
+        return (self.prompt_tokens or 0) + (self.completion_tokens or 0)
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "Message":
+        """从字典创建 Message(处理向后兼容)"""
+        # 过滤掉已删除的字段
+        filtered_data = {k: v for k, v in data.items() if k not in ["tokens", "available_tools"]}
+
+        # 解析 datetime
+        if filtered_data.get("created_at") and isinstance(filtered_data["created_at"], str):
+            filtered_data["created_at"] = datetime.fromisoformat(filtered_data["created_at"])
+
+        return cls(**filtered_data)
+
     @classmethod
     @classmethod
     def create(
     def create(
         cls,
         cls,
         trace_id: str,
         trace_id: str,
-        role: Literal["assistant", "tool"],
+        role: Literal["system", "user", "assistant", "tool"],
         sequence: int,
         sequence: int,
         goal_id: Optional[str] = None,
         goal_id: Optional[str] = None,
         content: Any = None,
         content: Any = None,
         tool_call_id: Optional[str] = None,
         tool_call_id: Optional[str] = None,
-        tokens: Optional[int] = None,
+        prompt_tokens: Optional[int] = None,
+        completion_tokens: Optional[int] = None,
         cost: Optional[float] = None,
         cost: Optional[float] = None,
         duration_ms: Optional[int] = None,
         duration_ms: Optional[int] = None,
+        finish_reason: Optional[str] = None,
     ) -> "Message":
     ) -> "Message":
         """创建新的 Message,自动生成 description"""
         """创建新的 Message,自动生成 description"""
         description = cls._generate_description(role, content)
         description = cls._generate_description(role, content)
@@ -149,9 +190,11 @@ class Message:
             content=content,
             content=content,
             description=description,
             description=description,
             tool_call_id=tool_call_id,
             tool_call_id=tool_call_id,
-            tokens=tokens,
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
             cost=cost,
             cost=cost,
             duration_ms=duration_ms,
             duration_ms=duration_ms,
+            finish_reason=finish_reason,
         )
         )
 
 
     @staticmethod
     @staticmethod
@@ -159,10 +202,24 @@ class Message:
         """
         """
         自动生成 description
         自动生成 description
 
 
+        - system: 取 content 前 200 字符
+        - user: 取 content 前 200 字符
         - assistant: 优先取 content,若无 content 则生成 "tool call: XX, XX"
         - assistant: 优先取 content,若无 content 则生成 "tool call: XX, XX"
         - tool: 使用 tool name
         - tool: 使用 tool name
         """
         """
-        if role == "assistant":
+        if role == "system":
+            # system 消息:直接截取文本
+            if isinstance(content, str):
+                return content[:200] + "..." if len(content) > 200 else content
+            return "system prompt"
+
+        elif role == "user":
+            # user 消息:直接截取文本
+            if isinstance(content, str):
+                return content[:200] + "..." if len(content) > 200 else content
+            return "user message"
+
+        elif role == "assistant":
             # assistant 消息:content 是字典,可能包含 text 和 tool_calls
             # assistant 消息:content 是字典,可能包含 text 和 tool_calls
             if isinstance(content, dict):
             if isinstance(content, dict):
                 # 优先返回文本内容
                 # 优先返回文本内容
@@ -213,9 +270,12 @@ class Message:
             "tool_call_id": self.tool_call_id,
             "tool_call_id": self.tool_call_id,
             "content": self.content,
             "content": self.content,
             "description": self.description,
             "description": self.description,
-            "tokens": self.tokens,
+            "tokens": self.tokens,  # 使用 @property 动态计算
+            "prompt_tokens": self.prompt_tokens,
+            "completion_tokens": self.completion_tokens,
             "cost": self.cost,
             "cost": self.cost,
             "duration_ms": self.duration_ms,
             "duration_ms": self.duration_ms,
+            "finish_reason": self.finish_reason,
             "created_at": self.created_at.isoformat() if self.created_at else None,
             "created_at": self.created_at.isoformat() if self.created_at else None,
         }
         }
 
 

+ 18 - 4
agent/goal/models.py

@@ -142,6 +142,13 @@ class GoalTree:
                 return goal
                 return goal
         return None
         return None
 
 
+    def find_by_display_id(self, display_id: str) -> Optional[Goal]:
+        """按显示 ID 查找 Goal(如 "1", "2.1", "2.2")"""
+        for goal in self.goals:
+            if self._generate_display_id(goal) == display_id:
+                return goal
+        return None
+
     def find_parent(self, goal_id: str) -> Optional[Goal]:
     def find_parent(self, goal_id: str) -> Optional[Goal]:
         """查找指定 Goal 的父节点"""
         """查找指定 Goal 的父节点"""
         goal = self.find(goal_id)
         goal = self.find(goal_id)
@@ -270,8 +277,15 @@ class GoalTree:
         self.current_id = goal_id
         self.current_id = goal_id
         return goal
         return goal
 
 
-    def complete(self, goal_id: str, summary: str) -> Goal:
-        """完成指定 Goal"""
+    def complete(self, goal_id: str, summary: str, clear_focus: bool = True) -> Goal:
+        """
+        完成指定 Goal
+
+        Args:
+            goal_id: 要完成的目标 ID
+            summary: 完成总结
+            clear_focus: 如果完成的是当前焦点,是否清除焦点(默认 True)
+        """
         goal = self.find(goal_id)
         goal = self.find(goal_id)
         if not goal:
         if not goal:
             raise ValueError(f"Goal not found: {goal_id}")
             raise ValueError(f"Goal not found: {goal_id}")
@@ -279,8 +293,8 @@ class GoalTree:
         goal.status = "completed"
         goal.status = "completed"
         goal.summary = summary
         goal.summary = summary
 
 
-        # 如果完成的是当前焦点,清除焦点
-        if self.current_id == goal_id:
+        # 如果完成的是当前焦点,根据参数决定是否清除焦点
+        if clear_focus and self.current_id == goal_id:
             self.current_id = None
             self.current_id = None
 
 
         # 检查是否所有兄弟都完成了,如果是则自动完成父节点
         # 检查是否所有兄弟都完成了,如果是则自动完成父节点

+ 46 - 60
agent/goal/tool.py

@@ -36,33 +36,23 @@ async def goal_tool(
         under: 为指定目标添加子目标
         under: 为指定目标添加子目标
         done: 完成当前目标,值为 summary
         done: 完成当前目标,值为 summary
         abandon: 放弃当前目标,值为原因
         abandon: 放弃当前目标,值为原因
-        focus: 切换焦点到指定内部 id
+        focus: 切换焦点到指定 ID
 
 
     Returns:
     Returns:
         更新后的计划状态文本
         更新后的计划状态文本
     """
     """
     changes = []
     changes = []
 
 
-    # 1. 处理 abandon(先处理,因为可能需要在 add 新目标前放弃旧的)
-    if abandon is not None:
-        if not tree.current_id:
-            return "错误:没有当前目标可以放弃"
-        goal = tree.abandon(tree.current_id, abandon)
-        display_id = tree._generate_display_id(goal)
-        changes.append(f"已放弃: {display_id}. {goal.description}")
-
-        # 推送事件
-        if store and trace_id:
-            print(f"[DEBUG] goal_tool: calling store.update_goal for abandon: goal_id={goal.id}")
-            await store.update_goal(trace_id, goal.id, status="abandoned", summary=abandon)
-        else:
-            print(f"[DEBUG] goal_tool: skip event push (store={store}, trace_id={trace_id})")
-
-    # 2. 处理 done
+    # 1. 处理 done(完成当前目标)
     if done is not None:
     if done is not None:
         if not tree.current_id:
         if not tree.current_id:
-            return "错误:没有当前目标可以完成"
-        goal = tree.complete(tree.current_id, done)
+            return f"错误:没有当前目标可以完成。当前焦点为空,请先使用 focus 参数切换到要完成的目标。\n\n当前计划:\n{tree.to_prompt()}"
+
+        # 完成当前目标
+        # 如果同时指定了 focus,则不清空焦点(后面会切换到新目标)
+        # 如果只有 done,则清空焦点
+        clear_focus = (focus is None)
+        goal = tree.complete(tree.current_id, done, clear_focus=clear_focus)
         display_id = tree._generate_display_id(goal)
         display_id = tree._generate_display_id(goal)
         changes.append(f"已完成: {display_id}. {goal.description}")
         changes.append(f"已完成: {display_id}. {goal.description}")
 
 
@@ -73,34 +63,39 @@ async def goal_tool(
         else:
         else:
             print(f"[DEBUG] goal_tool: skip event push (store={store}, trace_id={trace_id})")
             print(f"[DEBUG] goal_tool: skip event push (store={store}, trace_id={trace_id})")
 
 
-        # 检查是否有级联完成的父目标
+        # 检查是否有级联完成的父目标(complete方法已经处理,这里只需要记录)
         if goal.parent_id:
         if goal.parent_id:
             parent = tree.find(goal.parent_id)
             parent = tree.find(goal.parent_id)
             if parent and parent.status == "completed":
             if parent and parent.status == "completed":
                 parent_display_id = tree._generate_display_id(parent)
                 parent_display_id = tree._generate_display_id(parent)
                 changes.append(f"自动完成: {parent_display_id}. {parent.description}(所有子目标已完成)")
                 changes.append(f"自动完成: {parent_display_id}. {parent.description}(所有子目标已完成)")
 
 
-    # 3. 处理 focus(在 add 之前,这样 add 可以添加到新焦点下
+    # 2. 处理 focus(切换焦点到新目标
     if focus is not None:
     if focus is not None:
-        # focus 参数可以是内部 ID 或显示 ID
-        # 先尝试作为内部 ID 查找
-        goal = tree.find(focus)
-
-        # 如果找不到,尝试根据显示 ID 查找
-        if not goal:
-            # 通过遍历所有 goal 查找匹配的显示 ID
-            for g in tree.goals:
-                if tree._generate_display_id(g) == focus:
-                    goal = g
-                    break
+        goal = tree.find_by_display_id(focus)
 
 
         if not goal:
         if not goal:
-            return f"错误:找不到目标 {focus}"
+            return f"错误:找不到目标 {focus}\n\n当前计划:\n{tree.to_prompt()}"
 
 
         tree.focus(goal.id)
         tree.focus(goal.id)
         display_id = tree._generate_display_id(goal)
         display_id = tree._generate_display_id(goal)
         changes.append(f"切换焦点: {display_id}. {goal.description}")
         changes.append(f"切换焦点: {display_id}. {goal.description}")
 
 
+    # 3. 处理 abandon(放弃当前目标)
+    if abandon is not None:
+        if not tree.current_id:
+            return f"错误:没有当前目标可以放弃。当前焦点为空。\n\n当前计划:\n{tree.to_prompt()}"
+        goal = tree.abandon(tree.current_id, abandon)
+        display_id = tree._generate_display_id(goal)
+        changes.append(f"已放弃: {display_id}. {goal.description}")
+
+        # 推送事件
+        if store and trace_id:
+            print(f"[DEBUG] goal_tool: calling store.update_goal for abandon: goal_id={goal.id}")
+            await store.update_goal(trace_id, goal.id, status="abandoned", summary=abandon)
+        else:
+            print(f"[DEBUG] goal_tool: skip event push (store={store}, trace_id={trace_id})")
+
     # 4. 处理 add
     # 4. 处理 add
     if add is not None:
     if add is not None:
         # 检查 after 和 under 互斥
         # 检查 after 和 under 互斥
@@ -120,34 +115,20 @@ async def goal_tool(
             # 确定添加位置
             # 确定添加位置
             if after is not None:
             if after is not None:
                 # 在指定 goal 后面添加(同层级)
                 # 在指定 goal 后面添加(同层级)
-                # after 参数可以是内部 ID 或显示 ID
-                target_goal = tree.find(after)
-                if not target_goal:
-                    # 尝试根据显示 ID 查找
-                    for g in tree.goals:
-                        if tree._generate_display_id(g) == after:
-                            target_goal = g
-                            break
+                target_goal = tree.find_by_display_id(after)
 
 
                 if not target_goal:
                 if not target_goal:
-                    return f"错误:找不到目标 {after}"
+                    return f"错误:找不到目标 {after}\n\n当前计划:\n{tree.to_prompt()}"
 
 
                 new_goals = tree.add_goals_after(target_goal.id, descriptions, reasons=reasons)
                 new_goals = tree.add_goals_after(target_goal.id, descriptions, reasons=reasons)
                 changes.append(f"在 {tree._generate_display_id(target_goal)} 后面添加 {len(new_goals)} 个同级目标")
                 changes.append(f"在 {tree._generate_display_id(target_goal)} 后面添加 {len(new_goals)} 个同级目标")
 
 
             elif under is not None:
             elif under is not None:
                 # 为指定 goal 添加子目标
                 # 为指定 goal 添加子目标
-                # under 参数可以是内部 ID 或显示 ID
-                parent_goal = tree.find(under)
-                if not parent_goal:
-                    # 尝试根据显示 ID 查找
-                    for g in tree.goals:
-                        if tree._generate_display_id(g) == under:
-                            parent_goal = g
-                            break
+                parent_goal = tree.find_by_display_id(under)
 
 
                 if not parent_goal:
                 if not parent_goal:
-                    return f"错误:找不到目标 {under}"
+                    return f"错误:找不到目标 {under}\n\n当前计划:\n{tree.to_prompt()}"
 
 
                 new_goals = tree.add_goals(descriptions, reasons=reasons, parent_id=parent_goal.id)
                 new_goals = tree.add_goals(descriptions, reasons=reasons, parent_id=parent_goal.id)
                 changes.append(f"在 {tree._generate_display_id(parent_goal)} 下添加 {len(new_goals)} 个子目标")
                 changes.append(f"在 {tree._generate_display_id(parent_goal)} 下添加 {len(new_goals)} 个子目标")
@@ -198,11 +179,11 @@ def create_goal_tool_schema() -> dict:
 
 
 - add: 添加目标(逗号分隔多个)
 - add: 添加目标(逗号分隔多个)
 - reason: 创建理由(逗号分隔多个,与 add 一一对应)。说明为什么要做这些目标。
 - reason: 创建理由(逗号分隔多个,与 add 一一对应)。说明为什么要做这些目标。
-- after: 在指定目标后面添加(同层级)。可以是内部 ID 或显示 ID。
-- under: 为指定目标添加子目标。可以是内部 ID 或显示 ID。如已有子目标,追加到最后。
+- after: 在指定目标后面添加(同层级)。使用目标的 ID。
+- under: 为指定目标添加子目标。使用目标的 ID。如已有子目标,追加到最后。
 - done: 完成当前目标,值为 summary
 - done: 完成当前目标,值为 summary
 - abandon: 放弃当前目标,值为原因(会触发 context 压缩)
 - abandon: 放弃当前目标,值为原因(会触发 context 压缩)
-- focus: 切换焦点到指定 id(可以是内部 ID 或显示 ID)
+- focus: 切换焦点到指定目标。使用目标的 ID。
 
 
 位置控制(优先使用 after):
 位置控制(优先使用 after):
 - 不指定 after/under: 添加到当前 focus 下作为子目标(无 focus 时添加到顶层)
 - 不指定 after/under: 添加到当前 focus 下作为子目标(无 focus 时添加到顶层)
@@ -210,17 +191,22 @@ def create_goal_tool_schema() -> dict:
 - under="X": 为目标 X 添加子目标
 - under="X": 为目标 X 添加子目标
 - after 和 under 不能同时指定
 - after 和 under 不能同时指定
 
 
+执行顺序:
+- done → focus → abandon → add
+- 如果同时指定 done 和 focus,会先完成当前目标,再切换焦点到新目标
+
 示例:
 示例:
 - goal(add="分析代码, 实现功能, 测试") - 添加顶层目标
 - goal(add="分析代码, 实现功能, 测试") - 添加顶层目标
 - goal(add="设计接口, 实现代码", under="2") - 为目标2添加子目标
 - goal(add="设计接口, 实现代码", under="2") - 为目标2添加子目标
 - goal(add="编写文档", after="3") - 在目标3后面添加同级任务
 - goal(add="编写文档", after="3") - 在目标3后面添加同级任务
 - goal(add="集成测试", after="2.2") - 在目标2.2后面添加同级任务
 - goal(add="集成测试", after="2.2") - 在目标2.2后面添加同级任务
 - goal(done="发现用户模型在 models/user.py") - 完成当前目标
 - goal(done="发现用户模型在 models/user.py") - 完成当前目标
+- goal(done="已完成调研", focus="2") - 完成当前目标,切换到目标2
 - goal(abandon="方案A需要Redis,环境没有") - 放弃当前目标
 - goal(abandon="方案A需要Redis,环境没有") - 放弃当前目标
 
 
-注意:内部 ID 是纯自增数字("1", "2", "3"),显示 ID 是带层级的("1", "2.1", "2.2")。
-所有 ID 参数都可以使用任意格式的 ID。
-reason 应该与 add 的目标数量一致,如果数量不一致,缺少的 reason 将为空
+注意:
+- 目标 ID 的格式为 "1", "2", "2.1", "2.2" 等,在计划视图中可以看到
+- reason 应该与 add 的目标数量一致,如果数量不一致,缺少的 reason 将为空
 """,
 """,
         "parameters": {
         "parameters": {
             "type": "object",
             "type": "object",
@@ -235,11 +221,11 @@ reason 应该与 add 的目标数量一致,如果数量不一致,缺少的 r
                 },
                 },
                 "after": {
                 "after": {
                     "type": "string",
                     "type": "string",
-                    "description": "在指定目标后面添加(同层级)。可以是内部 ID 或显示 ID。"
+                    "description": "在指定目标后面添加(同层级)。使用目标的 ID,如 \"2\" 或 \"2.1\"。"
                 },
                 },
                 "under": {
                 "under": {
                     "type": "string",
                     "type": "string",
-                    "description": "为指定目标添加子目标。可以是内部 ID 或显示 ID。"
+                    "description": "为指定目标添加子目标。使用目标的 ID,如 \"2\" 或 \"2.1\"。"
                 },
                 },
                 "done": {
                 "done": {
                     "type": "string",
                     "type": "string",
@@ -251,7 +237,7 @@ reason 应该与 add 的目标数量一致,如果数量不一致,缺少的 r
                 },
                 },
                 "focus": {
                 "focus": {
                     "type": "string",
                     "type": "string",
-                    "description": "切换焦点到指定 goal id(可以是内部 ID 或显示 ID)"
+                    "description": "切换焦点到指定目标。使用目标的 ID,如 \"2\" 或 \"2.1\"。"
                 }
                 }
             },
             },
             "required": []
             "required": []

+ 17 - 2
agent/llm/gemini.py

@@ -327,6 +327,7 @@ def create_gemini_llm_call(
                 "tool_calls": List[Dict] | None,
                 "tool_calls": List[Dict] | None,
                 "prompt_tokens": int,
                 "prompt_tokens": int,
                 "completion_tokens": int,
                 "completion_tokens": int,
+                "finish_reason": str,
                 "cost": float
                 "cost": float
             }
             }
         """
         """
@@ -375,14 +376,27 @@ def create_gemini_llm_call(
         # 解析响应
         # 解析响应
         content = ""
         content = ""
         tool_calls = None
         tool_calls = None
+        finish_reason = "stop"  # 默认值
 
 
         candidates = gemini_resp.get("candidates", [])
         candidates = gemini_resp.get("candidates", [])
         if candidates:
         if candidates:
             candidate = candidates[0]
             candidate = candidates[0]
 
 
+            # 提取 finish_reason(Gemini -> OpenAI 格式映射)
+            gemini_finish_reason = candidate.get("finishReason", "STOP")
+            if gemini_finish_reason == "STOP":
+                finish_reason = "stop"
+            elif gemini_finish_reason == "MAX_TOKENS":
+                finish_reason = "length"
+            elif gemini_finish_reason in ("SAFETY", "RECITATION"):
+                finish_reason = "content_filter"
+            elif gemini_finish_reason == "MALFORMED_FUNCTION_CALL":
+                finish_reason = "stop"  # 映射为 stop,但在 content 中包含错误信息
+            else:
+                finish_reason = gemini_finish_reason.lower()  # 保持原值,转小写
+
             # 检查是否有错误
             # 检查是否有错误
-            finish_reason = candidate.get("finishReason")
-            if finish_reason == "MALFORMED_FUNCTION_CALL":
+            if gemini_finish_reason == "MALFORMED_FUNCTION_CALL":
                 # Gemini 返回了格式错误的函数调用
                 # Gemini 返回了格式错误的函数调用
                 # 提取 finishMessage 中的内容作为 content
                 # 提取 finishMessage 中的内容作为 content
                 finish_message = candidate.get("finishMessage", "")
                 finish_message = candidate.get("finishMessage", "")
@@ -426,6 +440,7 @@ def create_gemini_llm_call(
             "tool_calls": tool_calls,
             "tool_calls": tool_calls,
             "prompt_tokens": prompt_tokens,
             "prompt_tokens": prompt_tokens,
             "completion_tokens": completion_tokens,
             "completion_tokens": completion_tokens,
+            "finish_reason": finish_reason,
             "cost": 0.0
             "cost": 0.0
         }
         }
 
 

+ 3 - 0
agent/llm/openrouter.py

@@ -32,6 +32,7 @@ async def openrouter_llm_call(
             "tool_calls": List[Dict] | None,
             "tool_calls": List[Dict] | None,
             "prompt_tokens": int,
             "prompt_tokens": int,
             "completion_tokens": int,
             "completion_tokens": int,
+            "finish_reason": str,
             "cost": float
             "cost": float
         }
         }
     """
     """
@@ -85,6 +86,7 @@ async def openrouter_llm_call(
 
 
     content = message.get("content", "")
     content = message.get("content", "")
     tool_calls = message.get("tool_calls")
     tool_calls = message.get("tool_calls")
+    finish_reason = choice.get("finish_reason")  # stop, length, tool_calls, content_filter 等
 
 
     # 提取 usage
     # 提取 usage
     usage = result.get("usage", {})
     usage = result.get("usage", {})
@@ -99,6 +101,7 @@ async def openrouter_llm_call(
         "tool_calls": tool_calls,
         "tool_calls": tool_calls,
         "prompt_tokens": prompt_tokens,
         "prompt_tokens": prompt_tokens,
         "completion_tokens": completion_tokens,
         "completion_tokens": completion_tokens,
+        "finish_reason": finish_reason,
         "cost": cost
         "cost": cost
     }
     }
 
 

+ 59 - 12
agent/skills/core.md

@@ -12,38 +12,56 @@ description: 核心系统能力,自动加载到 System Prompt
 
 
 ## 计划与执行
 ## 计划与执行
 
 
-对于复杂任务,你要先分析需求,并使用 `step` 工具来管理执行计划和进度。这一工具会形成一棵思维树。
+对于复杂任务,你要先分析需求,并使用 `goal` 工具来管理执行计划和进度。这一工具会形成一棵目标树。
 
 
 ### 创建计划:拆分任务步骤,创建TODO
 ### 创建计划:拆分任务步骤,创建TODO
 
 
 ```
 ```
-step(plan=["调研并确定方案", "执行方案", "评估结果"])
+goal(add="调研并确定方案, 执行方案, 评估结果")
 ```
 ```
 
 
-这将在当前节点下增加3个plan节点。你可以在执行过程中设置进一步的sub plan
+这将创建3个目标。你可以在执行过程中继续添加子目标
 
 
 ### 开始执行
 ### 开始执行
 
 
-聚焦到某个目标开始执行:
+聚焦到某个目标开始执行(使用目标的 ID)
 
 
 ```
 ```
-step(focus="调研并确定方案")
+goal(focus="1")
+```
+
+目标的 ID 会显示在计划视图中,格式如 "1", "2", "2.1", "2.2" 等。
+
+### 完成目标
+
+完成当前目标并提供总结:
+
+```
+goal(done="人物姿势的最佳提取工具是openpose")
 ```
 ```
 
 
 ### 完成并切换
 ### 完成并切换
 
 
-完成当前目标,提供总结,切换到下一个:
+先完成当前目标,再切换焦点到下一个:
 
 
 ```
 ```
-step(complete=True, summary="人物姿势的最佳提取工具是openpose", focus="执行方案")
+goal(done="人物姿势的最佳提取工具是openpose", focus="2")
 ```
 ```
 
 
-### 查看进度
+这会先完成当前正在执行的目标,然后切换焦点到目标 "2"。
+
+### 添加子目标
 
 
-查看当前执行进度:
+为指定目标添加子目标
 
 
 ```
 ```
-read_progress()
+goal(add="设计接口, 实现代码", under="2")
+```
+
+在指定目标后面添加同级目标:
+
+```
+goal(add="编写文档", after="2")
 ```
 ```
 
 
 ### 使用规范
 ### 使用规范
@@ -52,9 +70,38 @@ read_progress()
 2. **summary 应简洁**:记录关键结论和发现,不要冗长
 2. **summary 应简洁**:记录关键结论和发现,不要冗长
 3. **计划可调整**:根据执行情况追加或跳过目标
 3. **计划可调整**:根据执行情况追加或跳过目标
 4. **简单任务不需要计划**:单步操作直接执行即可
 4. **简单任务不需要计划**:单步操作直接执行即可
+5. **使用 ID 进行定位**:focus、after、under 参数都使用目标的 ID(如 "1", "2.1")
 
 
 ## 信息调研
 ## 信息调研
 
 
-你可以通过联网搜索工具`search_posts`获取来自Github、小红书、微信公众号、知乎等渠道的信息。
+你可以通过联网搜索工具`search_posts`获取来自Github、小红书、微信公众号、知乎等渠道的信息。对于需要深度交互的网页内容,使用浏览器工具进行操作。
+
+调研过程可能需要多次搜索,比如基于搜索结果中获得的启发或信息启动新的搜索,直到得到令人满意的答案。你可以使用`goal`工具管理搜索的过程,或者使用文档记录搜索的中间或最终结果。
+
+## 浏览器工具使用指南
+
+所有浏览器工具都以 `browser_` 为前缀。浏览器会话会持久化,无需每次重新启动。
+
+### 基本工作流程
+
+1. **页面导航**: 使用 `browser_navigate_to_url` 或 `browser_search_web` 到达目标页面
+2. **等待加载**: 页面跳转后调用 `browser_wait(seconds=2)` 等待内容加载
+3. **获取元素索引**: 调用 `browser_get_selector_map` 获取可交互元素的索引映射
+4. **执行交互**: 使用 `browser_click_element`、`browser_input_text` 等工具操作页面
+5. **提取内容**: 使用 `browser_extract_content` 或 `browser_get_page_html` 获取数据
+
+### 关键原则
+
+- **必须先获取索引**: 所有 `index` 参数都需要先通过 `browser_get_selector_map` 获取
+- **操作后等待**: 任何可能触发页面变化的操作(点击、输入、滚动)后都要调用 `browser_wait`
+- **优先用高级工具**: 优先使用 `browser_extract_content` 而不是手动解析HTML
+- **登录处理**: 需要登录的网站使用 `browser_ensure_login_with_cookies(cookie_type="xhs")` 注入Cookie
+- **复杂操作用JS**: 当标准工具无法满足时,使用 `browser_evaluate` 执行JavaScript代码
+
+### 工具分类
 
 
-调研过程可能需要多次搜索,比如基于搜索结果中获得的启发或信息启动新的搜索,直到得到令人满意的答案。你可以使用`step`工具管理搜索的过程,或者使用文档记录搜索的中间或最终结果。
+**导航**: browser_navigate_to_url, browser_search_web, browser_go_back, browser_wait
+**交互**: browser_click_element, browser_input_text, browser_send_keys, browser_upload_file
+**视图**: browser_scroll_page, browser_find_text, browser_screenshot
+**提取**: browser_extract_content, browser_get_page_html, browser_get_selector_map
+**高级**: browser_evaluate, browser_ensure_login_with_cookies, browser_wait_for_user_action

+ 3 - 0
agent/tools/builtin/__init__.py

@@ -19,6 +19,9 @@ from agent.tools.builtin.search import search_posts, get_search_suggestions
 from agent.tools.builtin.sandbox import (sandbox_create_environment, sandbox_run_shell,
 from agent.tools.builtin.sandbox import (sandbox_create_environment, sandbox_run_shell,
                                          sandbox_rebuild_with_ports,sandbox_destroy_environment)
                                          sandbox_rebuild_with_ports,sandbox_destroy_environment)
 
 
+# 导入浏览器工具以触发注册
+import agent.tools.builtin.browser  # noqa: F401
+
 __all__ = [
 __all__ = [
     "read_file",
     "read_file",
     "edit_file",
     "edit_file",

+ 101 - 0
agent/tools/builtin/browser/__init__.py

@@ -0,0 +1,101 @@
+"""
+浏览器工具 - Browser-Use 原生工具适配器
+
+基于 browser-use 实现的浏览器自动化工具集。
+"""
+
+from agent.tools.builtin.browser.baseClass import (
+    # 会话管理
+    init_browser_session,
+    get_browser_session,
+    cleanup_browser_session,
+    kill_browser_session,
+
+    # 导航类工具
+    browser_navigate_to_url,
+    browser_search_web,
+    browser_go_back,
+    browser_wait,
+
+    # 元素交互工具
+    browser_click_element,
+    browser_input_text,
+    browser_send_keys,
+    browser_upload_file,
+
+    # 滚动和视图工具
+    browser_scroll_page,
+    browser_find_text,
+    browser_screenshot,
+
+    # 标签页管理工具
+    browser_switch_tab,
+    browser_close_tab,
+
+    # 下拉框工具
+    browser_get_dropdown_options,
+    browser_select_dropdown_option,
+
+    # 内容提取工具
+    browser_extract_content,
+    browser_get_page_html,
+    browser_get_selector_map,
+
+    # JavaScript 执行工具
+    browser_evaluate,
+    browser_ensure_login_with_cookies,
+
+    # 等待用户操作
+    browser_wait_for_user_action,
+
+    # 任务完成
+    browser_done,
+)
+
+__all__ = [
+    # 会话管理
+    'init_browser_session',
+    'get_browser_session',
+    'cleanup_browser_session',
+    'kill_browser_session',
+
+    # 导航类工具
+    'browser_navigate_to_url',
+    'browser_search_web',
+    'browser_go_back',
+    'browser_wait',
+
+    # 元素交互工具
+    'browser_click_element',
+    'browser_input_text',
+    'browser_send_keys',
+    'browser_upload_file',
+
+    # 滚动和视图工具
+    'browser_scroll_page',
+    'browser_find_text',
+    'browser_screenshot',
+
+    # 标签页管理工具
+    'browser_switch_tab',
+    'browser_close_tab',
+
+    # 下拉框工具
+    'browser_get_dropdown_options',
+    'browser_select_dropdown_option',
+
+    # 内容提取工具
+    'browser_extract_content',
+    'browser_get_page_html',
+    'browser_get_selector_map',
+
+    # JavaScript 执行工具
+    'browser_evaluate',
+    'browser_ensure_login_with_cookies',
+
+    # 等待用户操作
+    'browser_wait_for_user_action',
+
+    # 任务完成
+    'browser_done',
+]

+ 298 - 198
agent/tools/builtin/browser/baseClass.py

@@ -10,9 +10,29 @@ Native Browser-Use Tools Adapter
 2. 状态自动保持 - 登录状态、Cookie、LocalStorage 等
 2. 状态自动保持 - 登录状态、Cookie、LocalStorage 等
 3. 完整的底层访问 - 可以直接使用 CDP 协议
 3. 完整的底层访问 - 可以直接使用 CDP 协议
 4. 性能优异 - 避免频繁创建/销毁浏览器实例
 4. 性能优异 - 避免频繁创建/销毁浏览器实例
+5. 多种浏览器类型 - 支持 local、cloud、container 三种模式
+
+支持的浏览器类型:
+1. Local (本地浏览器):
+   - 在本地运行 Chrome
+   - 支持可视化调试
+   - 速度最快
+   - 示例: init_browser_session(browser_type="local")
+
+2. Cloud (云浏览器):
+   - 在云端运行
+   - 不占用本地资源
+   - 适合生产环境
+   - 示例: init_browser_session(browser_type="cloud")
+
+3. Container (容器浏览器):
+   - 在独立容器中运行
+   - 隔离性好
+   - 支持预配置账户
+   - 示例: init_browser_session(browser_type="container", container_url="https://example.com")
 
 
 使用方法:
 使用方法:
-1. 在 Agent 初始化时调用 init_browser_session()
+1. 在 Agent 初始化时调用 init_browser_session() 并指定 browser_type
 2. 使用各个工具函数执行浏览器操作
 2. 使用各个工具函数执行浏览器操作
 3. 任务结束时调用 cleanup_browser_session()
 3. 任务结束时调用 cleanup_browser_session()
 
 
@@ -27,6 +47,7 @@ import sys
 import os
 import os
 import json
 import json
 import asyncio
 import asyncio
+import aiohttp
 from typing import Optional, List, Dict, Any, Tuple
 from typing import Optional, List, Dict, Any, Tuple
 from pathlib import Path
 from pathlib import Path
 from urllib.parse import urlparse
 from urllib.parse import urlparse
@@ -44,6 +65,12 @@ from browser_use.tools.service import Tools
 from browser_use.agent.views import ActionResult
 from browser_use.agent.views import ActionResult
 from browser_use.filesystem.file_system import FileSystem
 from browser_use.filesystem.file_system import FileSystem
 
 
+
+# ============================================================
+# 无需注册的内部辅助函数
+# ============================================================
+
+
 # ============================================================
 # ============================================================
 # 全局浏览器会话管理
 # 全局浏览器会话管理
 # ============================================================
 # ============================================================
@@ -53,52 +80,248 @@ _browser_session: Optional[BrowserSession] = None
 _browser_tools: Optional[Tools] = None
 _browser_tools: Optional[Tools] = None
 _file_system: Optional[FileSystem] = None
 _file_system: Optional[FileSystem] = None
 
 
+async def create_container(url: str, account_name: str = "liuwenwu") -> Dict[str, Any]:
+    """
+    创建浏览器容器并导航到指定URL
+
+    按照 test.md 的要求:
+    1.1 调用接口创建容器
+    1.2 调用接口创建窗口并导航到URL
+
+    Args:
+        url: 要导航的URL地址
+        account_name: 账户名称
+
+    Returns:
+        包含容器信息的字典:
+        - success: 是否成功
+        - container_id: 容器ID
+        - vnc: VNC访问URL
+        - cdp: CDP协议URL(用于浏览器连接)
+        - connection_id: 窗口连接ID
+        - error: 错误信息(如果失败)
+    """
+    result = {
+        "success": False,
+        "container_id": None,
+        "vnc": None,
+        "cdp": None,
+        "connection_id": None,
+        "error": None
+    }
+
+    try:
+        async with aiohttp.ClientSession() as session:
+            # 步骤1.1: 创建容器
+            print("📦 步骤1.1: 创建容器...")
+            create_url = "http://47.84.182.56:8200/api/v1/container/create"
+            create_payload = {
+                "auto_remove": True,
+                "need_port_binding": True,
+                "max_lifetime_seconds": 900
+            }
+
+            async with session.post(create_url, json=create_payload) as resp:
+                if resp.status != 200:
+                    raise RuntimeError(f"创建容器失败: HTTP {resp.status}")
+
+                create_result = await resp.json()
+                if create_result.get("code") != 0:
+                    raise RuntimeError(f"创建容器失败: {create_result.get('msg')}")
+
+                data = create_result.get("data", {})
+                result["container_id"] = data.get("container_id")
+                result["vnc"] = data.get("vnc")
+                result["cdp"] = data.get("cdp")
+
+                print(f"✅ 容器创建成功")
+                print(f"   Container ID: {result['container_id']}")
+                print(f"   VNC: {result['vnc']}")
+                print(f"   CDP: {result['cdp']}")
+
+            # 等待容器内的浏览器启动
+            print(f"\n⏳ 等待容器内浏览器启动...")
+            await asyncio.sleep(5)
+
+            # 步骤1.2: 创建页面并导航
+            print(f"\n📱 步骤1.2: 创建页面并导航到 {url}...")
+
+            page_create_url = "http://47.84.182.56:8200/api/v1/browser/page/create"
+            page_payload = {
+                "container_id": result["container_id"],
+                "url": url,
+                "account_name": account_name,
+                "need_wait": True,
+                "timeout": 30
+            }
+
+            # 重试机制:最多尝试3次
+            max_retries = 3
+            page_created = False
+            last_error = None
+
+            for attempt in range(max_retries):
+                try:
+                    if attempt > 0:
+                        print(f"   重试 {attempt + 1}/{max_retries}...")
+                        await asyncio.sleep(3)  # 重试前等待
+
+                    async with session.post(page_create_url, json=page_payload, timeout=aiohttp.ClientTimeout(total=60)) as resp:
+                        if resp.status != 200:
+                            response_text = await resp.text()
+                            last_error = f"HTTP {resp.status}: {response_text[:200]}"
+                            continue
+
+                        page_result = await resp.json()
+                        if page_result.get("code") != 0:
+                            last_error = f"{page_result.get('msg')}"
+                            continue
+
+                        page_data = page_result.get("data", {})
+                        result["connection_id"] = page_data.get("connection_id")
+                        result["success"] = True
+                        page_created = True
+
+                        print(f"✅ 页面创建成功")
+                        print(f"   Connection ID: {result['connection_id']}")
+                        break
+
+                except asyncio.TimeoutError:
+                    last_error = "请求超时"
+                    continue
+                except aiohttp.ClientError as e:
+                    last_error = f"网络错误: {str(e)}"
+                    continue
+                except Exception as e:
+                    last_error = f"未知错误: {str(e)}"
+                    continue
+
+            if not page_created:
+                raise RuntimeError(f"创建页面失败(尝试{max_retries}次后): {last_error}")
+
+    except Exception as e:
+        result["error"] = str(e)
+        print(f"❌ 错误: {str(e)}")
+
+    return result
 
 
 async def init_browser_session(
 async def init_browser_session(
+    browser_type: str = "local",
     headless: bool = False,
     headless: bool = False,
-    user_data_dir: Optional[str] = None,
+    url: Optional[str] = None,
     profile_name: str = "default",
     profile_name: str = "default",
+    user_data_dir: Optional[str] = None,
     browser_profile: Optional[BrowserProfile] = None,
     browser_profile: Optional[BrowserProfile] = None,
-    use_cloud: bool = False,
     **kwargs
     **kwargs
 ) -> tuple[BrowserSession, Tools]:
 ) -> tuple[BrowserSession, Tools]:
     """
     """
-    初始化全局浏览器会话
+    初始化全局浏览器会话 - 支持三种浏览器类型
 
 
     Args:
     Args:
+        browser_type: 浏览器类型 ("local", "cloud", "container")
         headless: 是否无头模式
         headless: 是否无头模式
-        user_data_dir: 用户数据目录(用于保存登录状态)
-        profile_name: 配置文件名称
-        browser_profile: BrowserProfile 对象(用于预设 cookies 等)
-        use_cloud: 是否使用云浏览器(默认 False,使用本地浏览器)
+        url: 初始访问URL(可选)
+             - local/cloud: 初始化后会自动导航到此URL
+             - container: 必需,容器启动时访问的URL
+        profile_name: 配置文件/账户名称(默认 "default")
+                     - local: 用于创建用户数据目录路径
+                     - cloud: 云浏览器配置ID
+                     - container: 容器账户名称
+        user_data_dir: 用户数据目录(仅 local 模式,高级用法)
+                      如果提供则覆盖 profile_name 生成的路径
+        browser_profile: BrowserProfile 对象(通用,高级用法)
+                        用于预设 cookies 等
         **kwargs: 其他 BrowserSession 参数
         **kwargs: 其他 BrowserSession 参数
 
 
     Returns:
     Returns:
         (BrowserSession, Tools) 元组
         (BrowserSession, Tools) 元组
+
+    Examples:
+        # 本地浏览器
+        browser, tools = await init_browser_session(
+            browser_type="local",
+            url="https://www.baidu.com"  # 可选
+        )
+
+        # 云浏览器
+        browser, tools = await init_browser_session(
+            browser_type="cloud",
+            profile_name="my_cloud_profile"  # 可选
+        )
+
+        # 容器浏览器
+        browser, tools = await init_browser_session(
+            browser_type="container",
+            url="https://www.xiaohongshu.com",  # 必需
+            profile_name="my_account"  # 可选
+        )
     """
     """
     global _browser_session, _browser_tools, _file_system
     global _browser_session, _browser_tools, _file_system
 
 
     if _browser_session is not None:
     if _browser_session is not None:
         return _browser_session, _browser_tools
         return _browser_session, _browser_tools
 
 
-    # 设置用户数据目录(持久化登录状态)
-    if user_data_dir is None and profile_name and not use_cloud:
-        user_data_dir = str(Path.home() / ".browser_use" / "profiles" / profile_name)
-        Path(user_data_dir).mkdir(parents=True, exist_ok=True)
+    # 验证 browser_type
+    valid_types = ["local", "cloud", "container"]
+    if browser_type not in valid_types:
+        raise ValueError(f"无效的 browser_type: {browser_type},必须是 {valid_types} 之一")
 
 
-    # 创建浏览器会话
+    # 创建浏览器会话参数
     session_params = {
     session_params = {
         "headless": headless,
         "headless": headless,
     }
     }
 
 
-    if use_cloud:
-        # 云浏览器模式
-        session_params["use_cloud"] = True
+    # === Container 模式 ===
+    if browser_type == "container":
+        print("🐳 使用容器浏览器模式")
+
+        # container 模式必须提供 URL
+        if not url:
+            url = "about:blank"  # 使用默认空白页
+            print("⚠️  未提供 url 参数,使用默认空白页")
+
+        # 创建容器并获取 CDP URL
+        print(f"📦 正在创建容器...")
+        container_info = await create_container(
+            url=url,
+            account_name=profile_name
+        )
+
+        if not container_info["success"]:
+            raise RuntimeError(f"容器创建失败: {container_info['error']}")
+
+        cdp_url = container_info["cdp"]
+        print(f"✅ 容器创建成功")
+        print(f"   CDP URL: {cdp_url}")
+        print(f"   Container ID: {container_info['container_id']}")
+        print(f"   Connection ID: {container_info.get('connection_id')}")
+
+        # 使用容器的 CDP URL 连接
+        session_params["cdp_url"] = cdp_url
+
+        # 等待容器完全启动
+        print("⏳ 等待容器浏览器启动...")
+        await asyncio.sleep(3)
+
+    # === Cloud 模式 ===
+    elif browser_type == "cloud":
         print("🌐 使用云浏览器模式")
         print("🌐 使用云浏览器模式")
-    else:
-        # 本地浏览器模式
+        session_params["use_cloud"] = True
+
+        # profile_name 作为云配置ID
+        if profile_name and profile_name != "default":
+            session_params["cloud_profile_id"] = profile_name
+
+    # === Local 模式 ===
+    else:  # local
+        print("💻 使用本地浏览器模式")
         session_params["is_local"] = True
         session_params["is_local"] = True
 
 
+        # 设置用户数据目录(持久化登录状态)
+        if user_data_dir is None and profile_name:
+            user_data_dir = str(Path.home() / ".browser_use" / "profiles" / profile_name)
+            Path(user_data_dir).mkdir(parents=True, exist_ok=True)
+
         # macOS 上显式指定 Chrome 路径
         # macOS 上显式指定 Chrome 路径
         import platform
         import platform
         if platform.system() == "Darwin":  # macOS
         if platform.system() == "Darwin":  # macOS
@@ -110,13 +333,14 @@ async def init_browser_session(
         if user_data_dir:
         if user_data_dir:
             session_params["user_data_dir"] = user_data_dir
             session_params["user_data_dir"] = user_data_dir
 
 
-    # 只在有值时才添加 browser_profile
+    # 只在有值时才添加 browser_profile (适用于所有模式)
     if browser_profile:
     if browser_profile:
         session_params["browser_profile"] = browser_profile
         session_params["browser_profile"] = browser_profile
 
 
     # 合并其他参数
     # 合并其他参数
     session_params.update(kwargs)
     session_params.update(kwargs)
 
 
+    # 创建浏览器会话
     _browser_session = BrowserSession(**session_params)
     _browser_session = BrowserSession(**session_params)
 
 
     # 启动浏览器
     # 启动浏览器
@@ -132,6 +356,13 @@ async def init_browser_session(
     base_dir.mkdir(parents=True, exist_ok=True)
     base_dir.mkdir(parents=True, exist_ok=True)
     _file_system = FileSystem(base_dir=str(base_dir))
     _file_system = FileSystem(base_dir=str(base_dir))
 
 
+    print("✅ 浏览器会话初始化成功")
+
+    # 如果是 local 或 cloud 模式且提供了 URL,导航到该 URL
+    if browser_type in ["local", "cloud"] and url:
+        print(f"🔗 导航到: {url}")
+        await _browser_tools.navigate(url=url, browser_session=_browser_session)
+
     return _browser_session, _browser_tools
     return _browser_session, _browser_tools
 
 
 
 
@@ -318,12 +549,16 @@ def _fetch_profile_id(cookie_type: str) -> Optional[str]:
         return None
         return None
 
 
 
 
+# ============================================================
+# 需要注册的工具
+# ============================================================
+
 # ============================================================
 # ============================================================
 # 导航类工具 (Navigation Tools)
 # 导航类工具 (Navigation Tools)
 # ============================================================
 # ============================================================
 
 
 @tool()
 @tool()
-async def navigate_to_url(url: str, new_tab: bool = False) -> ToolResult:
+async def browser_navigate_to_url(url: str, new_tab: bool = False) -> ToolResult:
     """
     """
     导航到指定的 URL
     导航到指定的 URL
     Navigate to a specific URL
     Navigate to a specific URL
@@ -363,7 +598,7 @@ async def navigate_to_url(url: str, new_tab: bool = False) -> ToolResult:
 
 
 
 
 @tool()
 @tool()
-async def search_web(query: str, engine: str = "google") -> ToolResult:
+async def browser_search_web(query: str, engine: str = "google") -> ToolResult:
     """
     """
     使用搜索引擎搜索
     使用搜索引擎搜索
     Search the web using a search engine
     Search the web using a search engine
@@ -400,7 +635,7 @@ async def search_web(query: str, engine: str = "google") -> ToolResult:
 
 
 
 
 @tool()
 @tool()
-async def go_back() -> ToolResult:
+async def browser_go_back() -> ToolResult:
     """
     """
     返回到上一个页面
     返回到上一个页面
     Go back to the previous page
     Go back to the previous page
@@ -427,7 +662,7 @@ async def go_back() -> ToolResult:
 
 
 
 
 @tool()
 @tool()
-async def wait(seconds: int = 3) -> ToolResult:
+async def browser_wait(seconds: int = 3) -> ToolResult:
     """
     """
     等待指定的秒数
     等待指定的秒数
     Wait for a specified number of seconds
     Wait for a specified number of seconds
@@ -464,7 +699,7 @@ async def wait(seconds: int = 3) -> ToolResult:
 # ============================================================
 # ============================================================
 
 
 @tool()
 @tool()
-async def click_element(index: int) -> ToolResult:
+async def browser_click_element(index: int) -> ToolResult:
     """
     """
     通过索引点击页面元素
     通过索引点击页面元素
     Click an element by index
     Click an element by index
@@ -501,7 +736,7 @@ async def click_element(index: int) -> ToolResult:
 
 
 
 
 @tool()
 @tool()
-async def input_text(index: int, text: str, clear: bool = True) -> ToolResult:
+async def browser_input_text(index: int, text: str, clear: bool = True) -> ToolResult:
     """
     """
     在指定元素中输入文本
     在指定元素中输入文本
     Input text into an element
     Input text into an element
@@ -539,7 +774,7 @@ async def input_text(index: int, text: str, clear: bool = True) -> ToolResult:
 
 
 
 
 @tool()
 @tool()
-async def send_keys(keys: str) -> ToolResult:
+async def browser_send_keys(keys: str) -> ToolResult:
     """
     """
     发送键盘按键或快捷键
     发送键盘按键或快捷键
     Send keyboard keys or shortcuts
     Send keyboard keys or shortcuts
@@ -579,7 +814,7 @@ async def send_keys(keys: str) -> ToolResult:
 
 
 
 
 @tool()
 @tool()
-async def upload_file(index: int, path: str) -> ToolResult:
+async def browser_upload_file(index: int, path: str) -> ToolResult:
     """
     """
     上传文件到文件输入元素
     上传文件到文件输入元素
     Upload a file to a file input element
     Upload a file to a file input element
@@ -624,7 +859,7 @@ async def upload_file(index: int, path: str) -> ToolResult:
 # ============================================================
 # ============================================================
 
 
 @tool()
 @tool()
-async def scroll_page(down: bool = True, pages: float = 1.0,
+async def browser_scroll_page(down: bool = True, pages: float = 1.0,
                      index: Optional[int] = None) -> ToolResult:
                      index: Optional[int] = None) -> ToolResult:
     """
     """
     滚动页面或元素
     滚动页面或元素
@@ -665,7 +900,7 @@ async def scroll_page(down: bool = True, pages: float = 1.0,
 
 
 
 
 @tool()
 @tool()
-async def find_text(text: str) -> ToolResult:
+async def browser_find_text(text: str) -> ToolResult:
     """
     """
     查找页面中的文本并滚动到该位置
     查找页面中的文本并滚动到该位置
     Find text on the page and scroll to it
     Find text on the page and scroll to it
@@ -701,7 +936,7 @@ async def find_text(text: str) -> ToolResult:
 
 
 
 
 @tool()
 @tool()
-async def screenshot() -> ToolResult:
+async def browser_screenshot() -> ToolResult:
     """
     """
     请求在下次观察中包含页面截图
     请求在下次观察中包含页面截图
     Request a screenshot to be included in the next observation
     Request a screenshot to be included in the next observation
@@ -738,7 +973,7 @@ async def screenshot() -> ToolResult:
 # ============================================================
 # ============================================================
 
 
 @tool()
 @tool()
-async def switch_tab(tab_id: str) -> ToolResult:
+async def browser_switch_tab(tab_id: str) -> ToolResult:
     """
     """
     切换到指定标签页
     切换到指定标签页
     Switch to a different browser tab
     Switch to a different browser tab
@@ -773,7 +1008,7 @@ async def switch_tab(tab_id: str) -> ToolResult:
 
 
 
 
 @tool()
 @tool()
-async def close_tab(tab_id: str) -> ToolResult:
+async def browser_close_tab(tab_id: str) -> ToolResult:
     """
     """
     关闭指定标签页
     关闭指定标签页
     Close a browser tab
     Close a browser tab
@@ -812,7 +1047,7 @@ async def close_tab(tab_id: str) -> ToolResult:
 # ============================================================
 # ============================================================
 
 
 @tool()
 @tool()
-async def get_dropdown_options(index: int) -> ToolResult:
+async def browser_get_dropdown_options(index: int) -> ToolResult:
     """
     """
     获取下拉框的所有选项
     获取下拉框的所有选项
     Get options from a dropdown element
     Get options from a dropdown element
@@ -846,7 +1081,7 @@ async def get_dropdown_options(index: int) -> ToolResult:
 
 
 
 
 @tool()
 @tool()
-async def select_dropdown_option(index: int, text: str) -> ToolResult:
+async def browser_select_dropdown_option(index: int, text: str) -> ToolResult:
     """
     """
     选择下拉框选项
     选择下拉框选项
     Select an option from a dropdown
     Select an option from a dropdown
@@ -886,7 +1121,7 @@ async def select_dropdown_option(index: int, text: str) -> ToolResult:
 # ============================================================
 # ============================================================
 
 
 @tool()
 @tool()
-async def extract_content(query: str, extract_links: bool = False,
+async def browser_extract_content(query: str, extract_links: bool = False,
                          start_from_char: int = 0) -> ToolResult:
                          start_from_char: int = 0) -> ToolResult:
     """
     """
     使用 LLM 从页面提取结构化数据
     使用 LLM 从页面提取结构化数据
@@ -934,7 +1169,7 @@ async def extract_content(query: str, extract_links: bool = False,
 
 
 
 
 @tool()
 @tool()
-async def get_page_html() -> ToolResult:
+async def browser_get_page_html() -> ToolResult:
     """
     """
     获取当前页面的完整 HTML
     获取当前页面的完整 HTML
     Get the full HTML of the current page
     Get the full HTML of the current page
@@ -996,7 +1231,7 @@ async def get_page_html() -> ToolResult:
 
 
 
 
 @tool()
 @tool()
-async def get_selector_map() -> ToolResult:
+async def browser_get_selector_map() -> ToolResult:
     """
     """
     获取当前页面的元素索引映射
     获取当前页面的元素索引映射
     Get the selector map of interactive elements on the current page
     Get the selector map of interactive elements on the current page
@@ -1052,7 +1287,7 @@ async def get_selector_map() -> ToolResult:
 # ============================================================
 # ============================================================
 
 
 @tool()
 @tool()
-async def evaluate(code: str) -> ToolResult:
+async def browser_evaluate(code: str) -> ToolResult:
     """
     """
     在页面中执行 JavaScript 代码
     在页面中执行 JavaScript 代码
     Execute JavaScript code in the page context
     Execute JavaScript code in the page context
@@ -1094,7 +1329,7 @@ async def evaluate(code: str) -> ToolResult:
 
 
 
 
 @tool()
 @tool()
-async def ensure_login_with_cookies(cookie_type: str, url: str = "https://www.xiaohongshu.com") -> ToolResult:
+async def browser_ensure_login_with_cookies(cookie_type: str, url: str = "https://www.xiaohongshu.com") -> ToolResult:
     """
     """
     检查登录状态并在需要时注入 cookies
     检查登录状态并在需要时注入 cookies
     """
     """
@@ -1190,7 +1425,7 @@ async def ensure_login_with_cookies(cookie_type: str, url: str = "https://www.xi
 # ============================================================
 # ============================================================
 
 
 @tool()
 @tool()
-async def wait_for_user_action(message: str = "Please complete the action in browser",
+async def browser_wait_for_user_action(message: str = "Please complete the action in browser",
                                timeout: int = 300) -> ToolResult:
                                timeout: int = 300) -> ToolResult:
     """
     """
     等待用户在浏览器中完成操作(如登录)
     等待用户在浏览器中完成操作(如登录)
@@ -1262,7 +1497,7 @@ async def wait_for_user_action(message: str = "Please complete the action in bro
 # ============================================================
 # ============================================================
 
 
 @tool()
 @tool()
-async def done(text: str, success: bool = True,
+async def browser_done(text: str, success: bool = True,
               files_to_display: Optional[List[str]] = None) -> ToolResult:
               files_to_display: Optional[List[str]] = None) -> ToolResult:
     """
     """
     标记任务完成并返回最终消息
     标记任务完成并返回最终消息
@@ -1300,138 +1535,6 @@ async def done(text: str, success: bool = True,
         )
         )
 
 
 
 
-# ============================================================
-# 容器管理工具 (Container Management Tools)
-# ============================================================
-
-import aiohttp
-
-async def create_container(url: str, account_name: str = "liuwenwu") -> Dict[str, Any]:
-    """
-    创建浏览器容器并导航到指定URL
-
-    按照 test.md 的要求:
-    1.1 调用接口创建容器
-    1.2 调用接口创建窗口并导航到URL
-
-    Args:
-        url: 要导航的URL地址
-        account_name: 账户名称
-
-    Returns:
-        包含容器信息的字典:
-        - success: 是否成功
-        - container_id: 容器ID
-        - vnc: VNC访问URL
-        - cdp: CDP协议URL(用于浏览器连接)
-        - connection_id: 窗口连接ID
-        - error: 错误信息(如果失败)
-    """
-    result = {
-        "success": False,
-        "container_id": None,
-        "vnc": None,
-        "cdp": None,
-        "connection_id": None,
-        "error": None
-    }
-
-    try:
-        async with aiohttp.ClientSession() as session:
-            # 步骤1.1: 创建容器
-            print("📦 步骤1.1: 创建容器...")
-            create_url = "http://47.84.182.56:8200/api/v1/container/create"
-            create_payload = {
-                "auto_remove": True,
-                "need_port_binding": True,
-                "max_lifetime_seconds": 900
-            }
-
-            async with session.post(create_url, json=create_payload) as resp:
-                if resp.status != 200:
-                    raise RuntimeError(f"创建容器失败: HTTP {resp.status}")
-
-                create_result = await resp.json()
-                if create_result.get("code") != 0:
-                    raise RuntimeError(f"创建容器失败: {create_result.get('msg')}")
-
-                data = create_result.get("data", {})
-                result["container_id"] = data.get("container_id")
-                result["vnc"] = data.get("vnc")
-                result["cdp"] = data.get("cdp")
-
-                print(f"✅ 容器创建成功")
-                print(f"   Container ID: {result['container_id']}")
-                print(f"   VNC: {result['vnc']}")
-                print(f"   CDP: {result['cdp']}")
-
-            # 等待容器内的浏览器启动
-            print(f"\n⏳ 等待容器内浏览器启动...")
-            await asyncio.sleep(5)
-
-            # 步骤1.2: 创建页面并导航
-            print(f"\n📱 步骤1.2: 创建页面并导航到 {url}...")
-
-            page_create_url = "http://47.84.182.56:8200/api/v1/browser/page/create"
-            page_payload = {
-                "container_id": result["container_id"],
-                "url": url,
-                "account_name": account_name,
-                "need_wait": True,
-                "timeout": 30
-            }
-
-            # 重试机制:最多尝试3次
-            max_retries = 3
-            page_created = False
-            last_error = None
-
-            for attempt in range(max_retries):
-                try:
-                    if attempt > 0:
-                        print(f"   重试 {attempt + 1}/{max_retries}...")
-                        await asyncio.sleep(3)  # 重试前等待
-
-                    async with session.post(page_create_url, json=page_payload, timeout=aiohttp.ClientTimeout(total=60)) as resp:
-                        if resp.status != 200:
-                            response_text = await resp.text()
-                            last_error = f"HTTP {resp.status}: {response_text[:200]}"
-                            continue
-
-                        page_result = await resp.json()
-                        if page_result.get("code") != 0:
-                            last_error = f"{page_result.get('msg')}"
-                            continue
-
-                        page_data = page_result.get("data", {})
-                        result["connection_id"] = page_data.get("connection_id")
-                        result["success"] = True
-                        page_created = True
-
-                        print(f"✅ 页面创建成功")
-                        print(f"   Connection ID: {result['connection_id']}")
-                        break
-
-                except asyncio.TimeoutError:
-                    last_error = "请求超时"
-                    continue
-                except aiohttp.ClientError as e:
-                    last_error = f"网络错误: {str(e)}"
-                    continue
-                except Exception as e:
-                    last_error = f"未知错误: {str(e)}"
-                    continue
-
-            if not page_created:
-                raise RuntimeError(f"创建页面失败(尝试{max_retries}次后): {last_error}")
-
-    except Exception as e:
-        result["error"] = str(e)
-        print(f"❌ 错误: {str(e)}")
-
-    return result
-
-
 # ============================================================
 # ============================================================
 # 导出所有工具函数(供外部使用)
 # 导出所有工具函数(供外部使用)
 # ============================================================
 # ============================================================
@@ -1444,45 +1547,42 @@ __all__ = [
     'kill_browser_session',
     'kill_browser_session',
 
 
     # 导航类工具
     # 导航类工具
-    'navigate_to_url',
-    'search_web',
-    'go_back',
-    'wait',
+    'browser_navigate_to_url',
+    'browser_search_web',
+    'browser_go_back',
+    'browser_wait',
 
 
     # 元素交互工具
     # 元素交互工具
-    'click_element',
-    'input_text',
-    'send_keys',
-    'upload_file',
+    'browser_click_element',
+    'browser_input_text',
+    'browser_send_keys',
+    'browser_upload_file',
 
 
     # 滚动和视图工具
     # 滚动和视图工具
-    'scroll_page',
-    'find_text',
-    'screenshot',
+    'browser_scroll_page',
+    'browser_find_text',
+    'browser_screenshot',
 
 
     # 标签页管理工具
     # 标签页管理工具
-    'switch_tab',
-    'close_tab',
+    'browser_switch_tab',
+    'browser_close_tab',
 
 
     # 下拉框工具
     # 下拉框工具
-    'get_dropdown_options',
-    'select_dropdown_option',
+    'browser_get_dropdown_options',
+    'browser_select_dropdown_option',
 
 
     # 内容提取工具
     # 内容提取工具
-    'extract_content',
-    'get_page_html',
-    'get_selector_map',
+    'browser_extract_content',
+    'browser_get_page_html',
+    'browser_get_selector_map',
 
 
     # JavaScript 执行工具
     # JavaScript 执行工具
-    'evaluate',
-    'ensure_login_with_cookies',
+    'browser_evaluate',
+    'browser_ensure_login_with_cookies',
 
 
     # 等待用户操作
     # 等待用户操作
-    'wait_for_user_action',
+    'browser_wait_for_user_action',
 
 
     # 任务完成
     # 任务完成
-    'done',
-
-    # 容器管理
-    'create_container',
+    'browser_done',
 ]
 ]

+ 2 - 1
agent/tools/builtin/goal.py

@@ -40,13 +40,14 @@ async def goal(
         reason: 创建理由(逗号分隔多个,与 add 一一对应)。说明为什么要做这些目标。
         reason: 创建理由(逗号分隔多个,与 add 一一对应)。说明为什么要做这些目标。
         done: 完成当前目标,值为 summary
         done: 完成当前目标,值为 summary
         abandon: 放弃当前目标,值为原因(会触发 context 压缩)
         abandon: 放弃当前目标,值为原因(会触发 context 压缩)
-        focus: 切换焦点到指定 id(可以是内部 ID 或显示 ID
+        focus: 切换焦点到指定 ID(如 "1", "2.1", "2.2"
         context: 工具执行上下文(包含 store 和 trace_id)
         context: 工具执行上下文(包含 store 和 trace_id)
 
 
     Examples:
     Examples:
         goal(add="分析代码, 实现功能, 测试", reason="了解现有结构, 完成需求, 确保质量")
         goal(add="分析代码, 实现功能, 测试", reason="了解现有结构, 完成需求, 确保质量")
         goal(focus="2", add="设计接口, 实现代码", reason="明确API规范, 编写核心逻辑")
         goal(focus="2", add="设计接口, 实现代码", reason="明确API规范, 编写核心逻辑")
         goal(done="发现用户模型在 models/user.py")
         goal(done="发现用户模型在 models/user.py")
+        goal(done="已完成调研", focus="2")
         goal(abandon="方案A需要Redis,环境没有", add="实现方案B", reason="使用现有技术栈")
         goal(abandon="方案A需要Redis,环境没有", add="实现方案B", reason="使用现有技术栈")
 
 
     Returns:
     Returns:

+ 31 - 0
docs/ref/skills.md

@@ -0,0 +1,31 @@
+Skill structure
+Every Skill requires a SKILL.md file with YAML frontmatter:
+
+---
+name: your-skill-name
+description: Brief description of what this Skill does and when to use it
+---
+
+# Your Skill Name
+
+## Instructions
+[Clear, step-by-step guidance for Claude to follow]
+
+## Examples
+[Concrete examples of using this Skill]
+Required fields: name and description
+
+Field requirements:
+
+name:
+
+Maximum 64 characters
+Must contain only lowercase letters, numbers, and hyphens
+Cannot contain XML tags
+Cannot contain reserved words: "anthropic", "claude"
+description:
+
+Must be non-empty
+Maximum 1024 characters
+Cannot contain XML tags
+The description should include both what the Skill does and when Claude should use it. For complete authoring guidance, see the best practices guide.

+ 15 - 15
examples/cloud_browser_demo_db.py

@@ -22,11 +22,11 @@ from agent.tools.builtin.browser.baseClass import (
     init_browser_session,
     init_browser_session,
     cleanup_browser_session,
     cleanup_browser_session,
     kill_browser_session,
     kill_browser_session,
-    navigate_to_url,
-    scroll_page,
-    evaluate,
-    wait,
-    get_page_html,
+    browser_navigate_to_url,
+    browser_scroll_page,
+    browser_evaluate,
+    browser_wait,
+    browser_get_page_html,
     _fetch_cookie_row,
     _fetch_cookie_row,
     _fetch_profile_id,
     _fetch_profile_id,
     _normalize_cookies,
     _normalize_cookies,
@@ -102,10 +102,10 @@ async def example_xhs_fitness_search(cookie_type: str = "xhs") -> dict:
 
 
             # 访问首页
             # 访问首页
             print("\n🏠 访问小红书首页...")
             print("\n🏠 访问小红书首页...")
-            nav_result = await navigate_to_url("https://www.xiaohongshu.com")
+            nav_result = await browser_navigate_to_url("https://www.xiaohongshu.com")
             if nav_result.error:
             if nav_result.error:
                 raise RuntimeError(nav_result.error)
                 raise RuntimeError(nav_result.error)
-            await wait(3)
+            await browser_wait(3)
 
 
             # 注入 Cookie(如果有)
             # 注入 Cookie(如果有)
             if cookie_row:
             if cookie_row:
@@ -119,7 +119,7 @@ async def example_xhs_fitness_search(cookie_type: str = "xhs") -> dict:
                         print(f"✅ 成功注入 {len(cookies)} 个 Cookie")
                         print(f"✅ 成功注入 {len(cookies)} 个 Cookie")
                         # 刷新页面使 Cookie 生效
                         # 刷新页面使 Cookie 生效
                         await navigate_to_url("https://www.xiaohongshu.com")
                         await navigate_to_url("https://www.xiaohongshu.com")
-                        await wait(2)
+                        await browser_wait(2)
                     else:
                     else:
                         print("⚠️  Cookie 解析失败")
                         print("⚠️  Cookie 解析失败")
                 else:
                 else:
@@ -127,20 +127,20 @@ async def example_xhs_fitness_search(cookie_type: str = "xhs") -> dict:
 
 
             # 访问搜索页面
             # 访问搜索页面
             print(f"\n🔗 访问搜索页面: {keyword}")
             print(f"\n🔗 访问搜索页面: {keyword}")
-            nav_result = await navigate_to_url(search_url)
+            nav_result = await browser_navigate_to_url(search_url)
             if nav_result.error:
             if nav_result.error:
                 raise RuntimeError(nav_result.error)
                 raise RuntimeError(nav_result.error)
-            await wait(8)
+            await browser_wait(8)
 
 
             # 滚动页面
             # 滚动页面
             print("\n📜 滚动页面...")
             print("\n📜 滚动页面...")
             for i in range(3):
             for i in range(3):
-                await scroll_page(down=True, pages=2.0)
-                await wait(2)
+                await browser_scroll_page(down=True, pages=2.0)
+                await browser_wait(2)
 
 
             # 提取数据
             # 提取数据
             print("\n🔍 提取数据...")
             print("\n🔍 提取数据...")
-            html_result = await get_page_html()
+            html_result = await browser_get_page_html()
             if html_result.error:
             if html_result.error:
                 raise RuntimeError(html_result.error)
                 raise RuntimeError(html_result.error)
             html = html_result.metadata.get("html", "")
             html = html_result.metadata.get("html", "")
@@ -223,7 +223,7 @@ async def example_xhs_fitness_search(cookie_type: str = "xhs") -> dict:
             extract_js = extract_js.replace("__KEYWORD__", json.dumps(keyword, ensure_ascii=False))
             extract_js = extract_js.replace("__KEYWORD__", json.dumps(keyword, ensure_ascii=False))
 
 
             async def run_extract() -> dict:
             async def run_extract() -> dict:
-                result = await evaluate(extract_js)
+                result = await browser_evaluate(extract_js)
                 if result.error:
                 if result.error:
                     raise RuntimeError(result.error)
                     raise RuntimeError(result.error)
                 output = result.output
                 output = result.output
@@ -253,7 +253,7 @@ async def example_xhs_fitness_search(cookie_type: str = "xhs") -> dict:
                     }
                     }
 
 
                 if isinstance(data, dict) and data.get("count", 0) == 0:
                 if isinstance(data, dict) and data.get("count", 0) == 0:
-                    html_result = await get_page_html()
+                    html_result = await browser_get_page_html()
                     if html_result.error:
                     if html_result.error:
                         raise RuntimeError(html_result.error)
                         raise RuntimeError(html_result.error)
                     html = html_result.metadata.get("html", "")
                     html = html_result.metadata.get("html", "")

+ 164 - 0
examples/research/run.py

@@ -0,0 +1,164 @@
+"""
+浏览器调研示例
+
+使用 Agent 模式 + 浏览器工具进行网络调研
+"""
+
+import os
+import sys
+import asyncio
+from pathlib import Path
+
+# 添加项目根目录到 Python 路径
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+
+from dotenv import load_dotenv
+load_dotenv()
+
+from agent.llm.prompts import SimplePrompt
+from agent.core.runner import AgentRunner
+from agent.execution import (
+    FileSystemTraceStore,
+    Trace,
+    Message,
+)
+from agent.llm import create_openrouter_llm_call
+
+
+async def main():
+    # 路径配置
+    base_dir = Path(__file__).parent
+    project_root = base_dir.parent.parent
+    prompt_path = base_dir / "test.prompt"
+    output_dir = base_dir / "output"
+    output_dir.mkdir(exist_ok=True)
+
+    # Skills 目录(可选:用户自定义 skills)
+    # 注意:内置 skills(agent/skills/core.md)会自动加载
+    skills_dir = None  # 或者指定自定义 skills 目录,如: project_root / "skills"
+
+    print("=" * 60)
+    print("浏览器调研任务 (Agent 模式)")
+    print("=" * 60)
+    print()
+
+    # 1. 加载 prompt
+    print("1. 加载 prompt...")
+    prompt = SimplePrompt(prompt_path)
+
+    # 提取配置
+    system_prompt = prompt._messages.get("system", "")
+    user_task = prompt._messages.get("user", "")
+    model_name = prompt.config.get('model', 'gemini-2.5-flash')
+    temperature = float(prompt.config.get('temperature', 0.3))
+
+    print(f"   - 任务: {user_task[:80]}...")
+    print(f"   - 模型: {model_name}")
+
+    # 2. 构建消息
+    print("2. 构建任务消息...")
+    messages = prompt.build_messages()
+
+    # 3. 创建 Agent Runner(配置 skills 和浏览器工具)
+    print("3. 创建 Agent Runner...")
+    print(f"   - Skills 目录: {skills_dir}")
+    print(f"   - 模型: {model_name} (via OpenRouter)")
+
+    # 使用 OpenRouter 的 Gemini 模型
+    runner = AgentRunner(
+        trace_store=FileSystemTraceStore(base_path=".trace"),
+        llm_call=create_openrouter_llm_call(model=f"google/{model_name}"),
+        skills_dir=skills_dir,
+        debug=True  # 启用 debug,输出到 .trace/
+    )
+
+    # 4. Agent 模式执行
+    print(f"4. 启动 Agent 模式...")
+    print()
+
+    final_response = ""
+    current_trace_id = None
+
+    async for item in runner.run(
+        task=user_task,
+        messages=messages,
+        system_prompt=system_prompt,
+        model=f"google/{model_name}",
+        temperature=temperature,
+        max_iterations=20,  # 调研任务可能需要更多迭代
+    ):
+        # 处理 Trace 对象(整体状态变化)
+        if isinstance(item, Trace):
+            current_trace_id = item.trace_id
+            if item.status == "running":
+                print(f"[Trace] 开始: {item.trace_id[:8]}")
+            elif item.status == "completed":
+                print(f"[Trace] 完成")
+                print(f"  - Total messages: {item.total_messages}")
+                print(f"  - Total tokens: {item.total_tokens}")
+                print(f"  - Total cost: ${item.total_cost:.4f}")
+            elif item.status == "failed":
+                print(f"[Trace] 失败: {item.error_message}")
+
+        # 处理 Message 对象(执行过程)
+        elif isinstance(item, Message):
+            if item.role == "assistant":
+                content = item.content
+                if isinstance(content, dict):
+                    text = content.get("text", "")
+                    tool_calls = content.get("tool_calls")
+
+                    if text and not tool_calls:
+                        # 纯文本回复(最终响应)
+                        final_response = text
+                        print(f"[Response] Agent 完成")
+                    elif text:
+                        print(f"[Assistant] {text[:100]}...")
+
+                    if tool_calls:
+                        for tc in tool_calls:
+                            tool_name = tc.get("function", {}).get("name", "unknown")
+                            print(f"[Tool Call] {tool_name}")
+
+            elif item.role == "tool":
+                content = item.content
+                if isinstance(content, dict):
+                    tool_name = content.get("tool_name", "unknown")
+                    print(f"[Tool Result] {tool_name}")
+                if item.description:
+                    desc = item.description[:80] if len(item.description) > 80 else item.description
+                    print(f"  {desc}...")
+
+    # 5. 输出结果
+    print()
+    print("=" * 60)
+    print("Agent 响应:")
+    print("=" * 60)
+    print(final_response)
+    print("=" * 60)
+    print()
+
+    # 6. 保存结果
+    output_file = output_dir / "research_result.txt"
+    with open(output_file, 'w', encoding='utf-8') as f:
+        f.write(final_response)
+
+    print(f"✓ 结果已保存到: {output_file}")
+    print()
+
+    # 提示使用 API 可视化
+    print("=" * 60)
+    print("可视化 Step Tree:")
+    print("=" * 60)
+    print("1. 启动 API Server:")
+    print("   python3 api_server.py")
+    print()
+    print("2. 浏览器访问:")
+    print("   http://localhost:8000/api/traces")
+    print()
+    print(f"3. Trace ID: {current_trace_id}")
+    print("=" * 60)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())

+ 11 - 0
examples/research/test.prompt

@@ -0,0 +1,11 @@
+---
+model: gemini-2.5-flash
+temperature: 0.3
+---
+
+$system$
+你是最顶尖的AI助手,可以拆分并调用工具逐步解决复杂问题。
+
+$user$
+使用浏览器帮我做个调研:一张图片中的构图可以如何表示?我希望寻找一些构图特征的表示方法。
+注意使用explore工具,在合适的时候调用多个分支并行探索。

+ 13 - 13
examples/test_tools_baidu.py

@@ -10,11 +10,11 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 
 from agent.tools.builtin.browser.baseClass import (
 from agent.tools.builtin.browser.baseClass import (
     init_browser_session,
     init_browser_session,
-    navigate_to_url,
-    wait,
-    get_page_html,
-    evaluate,
-    scroll_page,
+    browser_navigate_to_url,
+    browser_wait,
+    browser_get_page_html,
+    browser_evaluate,
+    browser_scroll_page,
     cleanup_browser_session,
     cleanup_browser_session,
 )
 )
 
 
@@ -30,15 +30,15 @@ async def run_task():
     try:
     try:
         await init_browser_session(headless=False, profile_name="baidu_profile")
         await init_browser_session(headless=False, profile_name="baidu_profile")
 
 
-        await navigate_to_url("https://www.baidu.com")
-        await wait(seconds=2)
+        await browser_navigate_to_url("https://www.baidu.com")
+        await browser_wait(seconds=2)
 
 
         keyword = "Python 教程"
         keyword = "Python 教程"
         search_url = f"https://www.baidu.com/s?wd={quote(keyword)}"
         search_url = f"https://www.baidu.com/s?wd={quote(keyword)}"
-        await navigate_to_url(search_url)
-        await wait(seconds=3)
-        await scroll_page(down=True, pages=1.0)
-        await wait(seconds=2)
+        await browser_navigate_to_url(search_url)
+        await browser_wait(seconds=3)
+        await browser_scroll_page(down=True, pages=1.0)
+        await browser_wait(seconds=2)
 
 
         extract_js = """
         extract_js = """
         (function(){
         (function(){
@@ -84,7 +84,7 @@ async def run_task():
         })()
         })()
         """
         """
 
 
-        result = await evaluate(code=extract_js)
+        result = await browser_evaluate(code=extract_js)
         output = result.output
         output = result.output
         if output.startswith("Result: "):
         if output.startswith("Result: "):
             output = output[8:]
             output = output[8:]
@@ -103,7 +103,7 @@ async def run_task():
         with open(json_file, "w", encoding="utf-8") as f:
         with open(json_file, "w", encoding="utf-8") as f:
             json.dump(data, f, ensure_ascii=False, indent=2)
             json.dump(data, f, ensure_ascii=False, indent=2)
 
 
-        html_result = await get_page_html()
+        html_result = await browser_get_page_html()
         html_content = html_result.metadata.get("html", "")
         html_content = html_result.metadata.get("html", "")
         page_url = html_result.metadata.get("url", "")
         page_url = html_result.metadata.get("url", "")
         page_title = html_result.metadata.get("title", "")
         page_title = html_result.metadata.get("title", "")

+ 35 - 79
examples/test_xhs_container.py

@@ -1,12 +1,10 @@
 """
 """
 小红书容器测试脚本
 小红书容器测试脚本
-根据 test.md 要求实现:
-1. 创建容器并导航到小红书
-2. 初始化浏览器会话
-3. 切换到指定窗口
-4. 搜索健身
-5. 随机进入一个详情页
-6. 获取详情页的HTML和iframe并保存到output
+演示容器浏览器的使用:
+1. 初始化容器浏览器(自动创建容器并连接)
+2. 搜索健身
+3. 随机进入一个详情页
+4. 获取详情页的HTML和iframe并保存到output
 """
 """
 
 
 import sys
 import sys
@@ -25,14 +23,14 @@ project_root = Path(__file__).parent.parent
 sys.path.insert(0, str(project_root))
 sys.path.insert(0, str(project_root))
 
 
 from agent.tools.builtin.browser.baseClass import (
 from agent.tools.builtin.browser.baseClass import (
-    create_container,
     init_browser_session,
     init_browser_session,
     cleanup_browser_session,
     cleanup_browser_session,
-    navigate_to_url,
-    scroll_page,
-    evaluate,
-    wait,
-    get_page_html,
+    browser_navigate_to_url,
+    browser_scroll_page,
+    browser_evaluate,
+    browser_wait,
+    browser_get_page_html,
+    browser_switch_tab,
 )
 )
 
 
 
 
@@ -52,83 +50,41 @@ async def test_xhs_container():
     output_dir.mkdir(parents=True, exist_ok=True)
     output_dir.mkdir(parents=True, exist_ok=True)
 
 
     try:
     try:
-        # 步骤1: 创建容器并导航到小红书
-        container_info = await create_container(url="https://www.xiaohongshu.com")
-
-        if not container_info["success"]:
-            raise RuntimeError(f"容器创建失败: {container_info['error']}")
-
-        cdp_url = container_info["cdp"]
-        container_id = container_info["container_id"]
-        connection_id = container_info.get("connection_id")
-
-        print(f"\n📋 容器信息:")
-        print(f"   CDP URL: {cdp_url}")
-        print(f"   Container ID: {container_id}")
-        print(f"   Connection ID: {connection_id}")
-
-        # 等待容器完全启动
-        print(f"\n⏳ 等待容器启动...")
-        await asyncio.sleep(3)
-
-        # 步骤2: 初始化浏览器会话
-        print(f"\n🌐 初始化浏览器会话...")
+        # 初始化容器浏览器(一步完成)
+        print(f"\n🚀 初始化容器浏览器...")
         browser, tools = await init_browser_session(
         browser, tools = await init_browser_session(
-            headless=True,
-            cdp_url=cdp_url
+            browser_type="container",
+            url="https://www.xiaohongshu.com",  # 容器启动时访问的URL
+            headless=True
         )
         )
 
 
-        if browser is None or tools is None:
-            raise RuntimeError("浏览器初始化失败")
-
-        print("✅ 浏览器会话初始化成功")
-
-        # 步骤3: 如果有 connection_id,切换到对应窗口
-        if connection_id:
-            print(f"\n🔄 切换到窗口: {connection_id}")
-            await wait(2)
-
-            # 获取当前浏览器状态
-            try:
-                state = await browser.get_browser_state_summary(cached=False)
-                print(f"   当前标签页数: {len(state.tabs)}")
-                for tab in state.tabs:
-                    print(f"   - Tab ID: {tab.target_id[-4:]}, URL: {tab.url}")
-
-                # 尝试切换到 connection_id 对应的标签页
-                # connection_id 可能是完整ID,取最后4位
-                from agent.tools.builtin.browser.baseClass import switch_tab
-                await switch_tab(connection_id[-4:] if len(connection_id) > 4 else connection_id)
-                await wait(2)
-                print(f"✅ 已切换到窗口")
-            except Exception as e:
-                print(f"⚠️  切换窗口警告: {str(e)[:100]}")
-                print(f"   将继续使用当前窗口")
+        print("✅ 容器浏览器初始化成功")
 
 
-        await wait(3)
+        # 等待页面完全加载
+        await browser_wait(3)
 
 
-        # 步骤4: 搜索健身
+        # 步骤1: 搜索健身
         print(f"\n🔍 搜索关键词: {keyword}")
         print(f"\n🔍 搜索关键词: {keyword}")
         try:
         try:
-            nav_result = await navigate_to_url(search_url)
+            nav_result = await browser_navigate_to_url(search_url)
             if nav_result.error:
             if nav_result.error:
                 print(f"⚠️  导航警告: {nav_result.error[:100]}")
                 print(f"⚠️  导航警告: {nav_result.error[:100]}")
         except Exception as e:
         except Exception as e:
             print(f"⚠️  导航异常: {str(e)[:100]}")
             print(f"⚠️  导航异常: {str(e)[:100]}")
 
 
-        await wait(10)
+        await browser_wait(10)
 
 
         # 滚动页面加载更多内容
         # 滚动页面加载更多内容
         print("\n📜 滚动页面...")
         print("\n📜 滚动页面...")
         for i in range(2):
         for i in range(2):
-            await scroll_page(down=True, pages=2.0)
-            await wait(2)
+            await browser_scroll_page(down=True, pages=2.0)
+            await browser_wait(2)
 
 
         # 提取搜索结果
         # 提取搜索结果
         print("\n🔍 提取搜索结果...")
         print("\n🔍 提取搜索结果...")
 
 
         # 先保存HTML看看页面内容
         # 先保存HTML看看页面内容
-        html_result = await get_page_html()
+        html_result = await browser_get_page_html()
         if not html_result.error:
         if not html_result.error:
             html = html_result.metadata.get("html", "")
             html = html_result.metadata.get("html", "")
             debug_html_path = output_dir / "search_page_debug.html"
             debug_html_path = output_dir / "search_page_debug.html"
@@ -155,7 +111,7 @@ async def test_xhs_container():
         })()
         })()
         """
         """
 
 
-        eval_result = await evaluate(extract_js)
+        eval_result = await browser_evaluate(extract_js)
         if eval_result.error:
         if eval_result.error:
             raise RuntimeError(f"提取搜索结果失败: {eval_result.error}")
             raise RuntimeError(f"提取搜索结果失败: {eval_result.error}")
 
 
@@ -170,7 +126,7 @@ async def test_xhs_container():
 
 
         print(f"✅ 找到 {len(posts)} 个帖子")
         print(f"✅ 找到 {len(posts)} 个帖子")
 
 
-        # 步骤5: 随机进入一个详情页
+        # 步骤2: 随机进入一个详情页
         selected_post = random.choice(posts)
         selected_post = random.choice(posts)
         post_url = selected_post["link"]
         post_url = selected_post["link"]
 
 
@@ -178,23 +134,23 @@ async def test_xhs_container():
         print(f"🔗 访问帖子详情页: {post_url}")
         print(f"🔗 访问帖子详情页: {post_url}")
 
 
         try:
         try:
-            nav_result = await navigate_to_url(post_url)
+            nav_result = await browser_navigate_to_url(post_url)
             if nav_result.error:
             if nav_result.error:
                 print(f"⚠️  导航警告: {nav_result.error[:100]}")
                 print(f"⚠️  导航警告: {nav_result.error[:100]}")
         except Exception as e:
         except Exception as e:
             print(f"⚠️  导航异常: {str(e)[:100]}")
             print(f"⚠️  导航异常: {str(e)[:100]}")
 
 
-        await wait(8)
+        await browser_wait(8)
 
 
         # 滚动详情页
         # 滚动详情页
         print("\n📜 滚动详情页...")
         print("\n📜 滚动详情页...")
         for i in range(3):
         for i in range(3):
-            await scroll_page(down=True, pages=1.5)
-            await wait(2)
+            await browser_scroll_page(down=True, pages=1.5)
+            await browser_wait(2)
 
 
-        # 步骤6: 保存详情页HTML
+        # 步骤3: 保存详情页HTML
         print("\n💾 保存详情页 HTML...")
         print("\n💾 保存详情页 HTML...")
-        html_result = await get_page_html()
+        html_result = await browser_get_page_html()
         if html_result.error:
         if html_result.error:
             print(f"⚠️  获取HTML失败: {html_result.error}")
             print(f"⚠️  获取HTML失败: {html_result.error}")
         else:
         else:
@@ -221,7 +177,7 @@ async def test_xhs_container():
         })()
         })()
         """
         """
 
 
-        iframe_result = await evaluate(iframe_js)
+        iframe_result = await browser_evaluate(iframe_js)
         if not iframe_result.error:
         if not iframe_result.error:
             iframe_output = iframe_result.output
             iframe_output = iframe_result.output
             if isinstance(iframe_output, str) and iframe_output.startswith("Result: "):
             if isinstance(iframe_output, str) and iframe_output.startswith("Result: "):
@@ -251,7 +207,7 @@ async def test_xhs_container():
                         }})()
                         }})()
                         """
                         """
 
 
-                        iframe_html_result = await evaluate(get_iframe_html_js)
+                        iframe_html_result = await browser_evaluate(get_iframe_html_js)
                         if not iframe_html_result.error:
                         if not iframe_html_result.error:
                             iframe_html = iframe_html_result.output
                             iframe_html = iframe_html_result.output
                             if isinstance(iframe_html, str) and iframe_html.startswith("Result: "):
                             if isinstance(iframe_html, str) and iframe_html.startswith("Result: "):