Просмотр исходного кода

Merge remote-tracking branch 'refs/remotes/origin/main'

Talegorithm 1 месяц назад
Родитель
Сommit
d62037e9a9

+ 2 - 1
.claude/settings.local.json

@@ -7,7 +7,8 @@
       "Read(//usr/local/anaconda3/lib/python3.13/site-packages/browser_use/**)",
       "Bash(tee:*)",
       "Bash(browser-use:*)",
-      "Bash(pip install:*)"
+      "Bash(pip install:*)",
+      "Bash(timeout 60 python:*)"
     ],
     "deny": [],
     "ask": []

+ 1 - 0
.gitignore

@@ -59,3 +59,4 @@ output
 
 # Debug output
 .trace/
+cloud_xhs/

+ 206 - 1
agent/tools/builtin/baseClass.py → agent/tools/builtin/browser/baseClass.py

@@ -19,14 +19,17 @@ Native Browser-Use Tools Adapter
 
 import sys
 import os
-from typing import Optional, List
+import json
+from typing import Optional, List, Dict, Any, Tuple
 from pathlib import Path
+from urllib.parse import urlparse
 
 # 将项目根目录添加到 Python 路径
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 # 导入框架的工具装饰器和结果类
 from agent.tools import tool, ToolResult
+from agent.tools.builtin.browser.sync_mysql_help import mysql
 
 # 导入 browser-use 的核心类
 from browser_use import BrowserSession, BrowserProfile
@@ -197,6 +200,115 @@ def action_result_to_tool_result(result: ActionResult, title: str = None) -> Too
     )
 
 
+def _cookie_domain_for_type(cookie_type: str, url: str) -> Tuple[str, str]:
+    if cookie_type:
+        key = cookie_type.lower()
+        if key in {"xiaohongshu", "xhs"}:
+            return ".xiaohongshu.com", "https://www.xiaohongshu.com"
+    parsed = urlparse(url or "")
+    domain = parsed.netloc or ""
+    domain = domain.replace("www.", "")
+    if domain:
+        domain = f".{domain}"
+    base_url = f"{parsed.scheme}://{parsed.netloc}" if parsed.scheme and parsed.netloc else url
+    return domain, base_url
+
+
+def _parse_cookie_string(cookie_str: str, domain: str, url: str) -> List[Dict[str, Any]]:
+    cookies: List[Dict[str, Any]] = []
+    if not cookie_str:
+        return cookies
+    parts = cookie_str.split(";")
+    for part in parts:
+        if not part:
+            continue
+        if "=" not in part:
+            continue
+        name, value = part.split("=", 1)
+        cookie = {
+            "name": str(name).strip(),
+            "value": str(value).strip(),
+            "domain": domain,
+            "path": "/",
+            "expires": -1,
+            "httpOnly": False,
+            "secure": True,
+            "sameSite": "None"
+        }
+        if url:
+            cookie["url"] = url
+        cookies.append(cookie)
+    return cookies
+
+
+def _normalize_cookies(cookie_value: Any, domain: str, url: str) -> List[Dict[str, Any]]:
+    if cookie_value is None:
+        return []
+    if isinstance(cookie_value, list):
+        return cookie_value
+    if isinstance(cookie_value, dict):
+        if "cookies" in cookie_value:
+            return _normalize_cookies(cookie_value.get("cookies"), domain, url)
+        if "name" in cookie_value and "value" in cookie_value:
+            return [cookie_value]
+        return []
+    if isinstance(cookie_value, (bytes, bytearray)):
+        cookie_value = cookie_value.decode("utf-8", errors="ignore")
+    if isinstance(cookie_value, str):
+        text = cookie_value.strip()
+        if not text:
+            return []
+        try:
+            parsed = json.loads(text)
+        except Exception:
+            parsed = None
+        if parsed is not None:
+            return _normalize_cookies(parsed, domain, url)
+        return _parse_cookie_string(text, domain, url)
+    return []
+
+
+def _extract_cookie_value(row: Optional[Dict[str, Any]]) -> Any:
+    if not row:
+        return None
+    # 优先使用 cookies 字段
+    if "cookies" in row:
+        return row["cookies"]
+    # 兼容其他可能的字段名
+    for key, value in row.items():
+        if "cookie" in key.lower():
+            return value
+    return None
+
+
+def _fetch_cookie_row(cookie_type: str) -> Optional[Dict[str, Any]]:
+    if not cookie_type:
+        return None
+    try:
+        return mysql.fetchone(
+            "select * from agent_channel_cookies where type=%s limit 1",
+            (cookie_type,)
+        )
+    except Exception:
+        return None
+
+
+def _fetch_profile_id(cookie_type: str) -> Optional[str]:
+    """从数据库获取 cloud_profile_id"""
+    if not cookie_type:
+        return None
+    try:
+        row = mysql.fetchone(
+            "select profileId from agent_channel_cookies where type=%s limit 1",
+            (cookie_type,)
+        )
+        if row and "profileId" in row:
+            return row["profileId"]
+        return None
+    except Exception:
+        return None
+
+
 # ============================================================
 # 导航类工具 (Navigation Tools)
 # ============================================================
@@ -997,6 +1109,98 @@ async def evaluate(code: str, uid: str = "") -> ToolResult:
         )
 
 
+@tool()
+async def ensure_login_with_cookies(cookie_type: str, url: str = "https://www.xiaohongshu.com", uid: str = "") -> ToolResult:
+    """
+    检查登录状态并在需要时注入 cookies
+    """
+    try:
+        browser, tools = await get_browser_session()
+
+        if url:
+            await tools.navigate(url=url, browser_session=browser)
+            await tools.wait(seconds=2, browser_session=browser)
+
+        check_login_js = """
+        (function() {
+            const loginBtn = document.querySelector('[class*="login"]') ||
+                           document.querySelector('[href*="login"]') ||
+                           Array.from(document.querySelectorAll('button, a')).find(el => (el.textContent || '').includes('登录'));
+
+            const userInfo = document.querySelector('[class*="user"]') ||
+                           document.querySelector('[class*="avatar"]');
+
+            return {
+                needLogin: !!loginBtn && !userInfo,
+                hasLoginBtn: !!loginBtn,
+                hasUserInfo: !!userInfo
+            };
+        })()
+        """
+
+        result = await tools.evaluate(code=check_login_js, browser_session=browser)
+        status_output = result.extracted_content
+        if isinstance(status_output, str) and status_output.startswith("Result: "):
+            status_output = status_output[8:]
+        login_info: Dict[str, Any] = {}
+        if isinstance(status_output, str):
+            try:
+                login_info = json.loads(status_output)
+            except Exception:
+                login_info = {}
+        elif isinstance(status_output, dict):
+            login_info = status_output
+
+        if not login_info.get("needLogin"):
+            output = json.dumps({"need_login": False}, ensure_ascii=False)
+            return ToolResult(
+                title="已登录",
+                output=output,
+                long_term_memory=output
+            )
+
+        row = _fetch_cookie_row(cookie_type)
+        cookie_value = _extract_cookie_value(row)
+        if not cookie_value:
+            output = json.dumps({"need_login": True, "cookies_count": 0}, ensure_ascii=False)
+            return ToolResult(
+                title="未找到 cookies",
+                output=output,
+                error="未找到 cookies",
+                long_term_memory=output
+            )
+
+        domain, base_url = _cookie_domain_for_type(cookie_type, url)
+        cookies = _normalize_cookies(cookie_value, domain, base_url)
+        if not cookies:
+            output = json.dumps({"need_login": True, "cookies_count": 0}, ensure_ascii=False)
+            return ToolResult(
+                title="cookies 解析失败",
+                output=output,
+                error="cookies 解析失败",
+                long_term_memory=output
+            )
+
+        await browser._cdp_set_cookies(cookies)
+        if url:
+            await tools.navigate(url=url, browser_session=browser)
+            await tools.wait(seconds=2, browser_session=browser)
+
+        output = json.dumps({"need_login": True, "cookies_count": len(cookies)}, ensure_ascii=False)
+        return ToolResult(
+            title="已注入 cookies",
+            output=output,
+            long_term_memory=output
+        )
+    except Exception as e:
+        return ToolResult(
+            title="登录检查失败",
+            output="",
+            error=str(e),
+            long_term_memory="登录检查失败"
+        )
+
+
 # ============================================================
 # 文件系统工具 (File System Tools)
 # ============================================================
@@ -1293,6 +1497,7 @@ __all__ = [
 
     # JavaScript 执行工具
     'evaluate',
+    'ensure_login_with_cookies',
 
     # 文件系统工具
     'write_file',

+ 86 - 0
agent/tools/builtin/browser/sync_mysql_help.py

@@ -0,0 +1,86 @@
+import pymysql
+
+
+from typing import Tuple, Any, Dict, Literal, Optional
+from dbutils.pooled_db import PooledDB, PooledDedicatedDBConnection
+from dbutils.steady_db import SteadyDBCursor
+from pymysql.cursors import DictCursor
+
+
+class SyncMySQLHelper(object):
+    _pool: PooledDB = None
+    _instance = None
+
+    def __new__(cls, *args, **kwargs):
+        """单例"""
+        if cls._instance is None:
+            cls._instance = super().__new__(cls, *args, **kwargs)
+        return cls._instance
+
+    def get_pool(self):
+        if self._pool is None:
+            self._pool = PooledDB(
+                creator=pymysql,
+                mincached=10,
+                maxconnections=20,
+                blocking=True,
+                host='rm-t4na9qj85v7790tf84o.mysql.singapore.rds.aliyuncs.com',
+                port=3306,
+                user='crawler_admin',
+                password='cyber#crawler_2023',
+                database='aigc-admin-prod')
+
+        return self._pool
+
+    def fetchone(self, sql: str, data: Optional[Tuple[Any, ...]] = None) -> Dict[str, Any]:
+        pool = self.get_pool()
+        with pool.connection() as conn:  
+            with conn.cursor(DictCursor) as cursor: 
+                cursor.execute(sql, data)
+                result = cursor.fetchone()
+                return result
+
+    def fetchall(self, sql: str, data: Optional[Tuple[Any, ...]] = None) -> Tuple[Dict[str, Any]]:
+        pool = self.get_pool()
+        with pool.connection() as conn: 
+            with conn.cursor(DictCursor) as cursor: 
+                cursor.execute(sql, data)
+                result = cursor.fetchall()
+                return result
+
+    def fetchmany(self,
+                  sql: str,
+                  data: Optional[Tuple[Any, ...]] = None,
+                  size: Optional[int] = None) -> Tuple[Dict[str, Any]]:
+        pool = self.get_pool()
+        with pool.connection() as conn:  
+            with conn.cursor(DictCursor) as cursor: 
+                cursor.execute(sql, data)
+                result = cursor.fetchmany(size=size)
+                return result
+
+    def execute(self, sql: str, data: Optional[Tuple[Any, ...]] = None):
+        pool = self.get_pool()
+        with pool.connection() as conn:  
+            with conn.cursor(DictCursor) as cursor:  
+                try:
+                    cursor.execute(sql, data)
+                    result = conn.commit()
+                    return result
+                except pymysql.err.IntegrityError as e:
+                    if e.args[0] == 1062:  # 重复值
+                        return None
+                    else:
+                        raise e
+                except pymysql.err.OperationalError as e:
+                    if e.args[0] == 1205:  # 死锁
+                        conn.rollback()
+                        return None
+                    else:
+                        raise e
+
+
+mysql = SyncMySQLHelper()
+
+
+

+ 25 - 12
docs/cloud_browser_guide.md

@@ -1,6 +1,7 @@
 # Browser-Use 云浏览器模式使用指南
 
 ## 目录
+
 - [简介](#简介)
 - [云浏览器 vs 本地浏览器](#云浏览器-vs-本地浏览器)
 - [环境配置](#环境配置)
@@ -27,15 +28,15 @@ Browser-Use 云浏览器模式允许你在云端运行浏览器自动化任务
 
 ## 云浏览器 vs 本地浏览器
 
-| 特性 | 云浏览器 | 本地浏览器 |
-|------|---------|-----------|
-| **安装要求** | 无需安装 Chrome | 需要安装 Chrome/Chromium |
-| **运行环境** | 云端 | 本地机器 |
-| **资源占用** | 不占用本地资源 | 占用本地 CPU/内存 |
-| **网络延迟** | 可能有轻微延迟 | 无网络延迟 |
-| **成本** | 需要 API 配额 | 免费 |
-| **调试** | 提供 Live URL 实时查看 | 可以直接看到浏览器窗口 |
-| **适用场景** | 服务器部署、分布式任务 | 本地开发、调试 |
+| 特性         | 云浏览器               | 本地浏览器               |
+| ------------ | ---------------------- | ------------------------ |
+| **安装要求** | 无需安装 Chrome        | 需要安装 Chrome/Chromium |
+| **运行环境** | 云端                   | 本地机器                 |
+| **资源占用** | 不占用本地资源         | 占用本地 CPU/内存        |
+| **网络延迟** | 可能有轻微延迟         | 无网络延迟               |
+| **成本**     | 需要 API 配额          | 免费                     |
+| **调试**     | 提供 Live URL 实时查看 | 可以直接看到浏览器窗口   |
+| **适用场景** | 服务器部署、分布式任务 | 本地开发、调试           |
 
 ---
 
@@ -80,7 +81,7 @@ GEMINI_API_KEY=your_gemini_api_key
 import asyncio
 import os
 from dotenv import load_dotenv
-from agent.tools.builtin.baseClass import (
+from agent.tools.builtin.browser.baseClass import (
     init_browser_session,
     cleanup_browser_session,
     navigate_to_url,
@@ -130,7 +131,7 @@ python examples/cloud_browser_example.py --all
 ### 1. 初始化云浏览器会话
 
 ```python
-from agent.tools.builtin.baseClass import init_browser_session
+from agent.tools.builtin.browser.baseClass import init_browser_session
 
 # 云浏览器模式
 browser, tools = await init_browser_session(
@@ -140,6 +141,7 @@ browser, tools = await init_browser_session(
 ```
 
 **参数说明:**
+
 - `headless`: 是否使用无头模式(云浏览器推荐 True)
 - `use_cloud`: 是否使用云浏览器(True=云浏览器,False=本地浏览器)
 - `browser_profile`: 可选,预设 cookies、localStorage 等
@@ -184,6 +186,7 @@ browser, tools = await init_browser_session(
 项目提供了丰富的浏览器操作工具,所有工具都支持云浏览器:
 
 #### 导航类工具
+
 ```python
 # 导航到 URL
 await navigate_to_url("https://example.com")
@@ -202,6 +205,7 @@ await wait(seconds=3)
 ```
 
 #### 元素交互工具
+
 ```python
 # 点击元素(需要先获取元素索引)
 await click_element(index=5)
@@ -218,6 +222,7 @@ await upload_file(index=7, path="/path/to/file.pdf")
 ```
 
 #### 页面操作工具
+
 ```python
 # 滚动页面
 await scroll_page(down=True, pages=2.0)
@@ -239,6 +244,7 @@ result = await evaluate("document.title")
 ```
 
 #### 标签页管理
+
 ```python
 # 切换标签页
 await switch_tab(tab_id="a3f2")
@@ -248,6 +254,7 @@ await close_tab(tab_id="a3f2")
 ```
 
 #### 文件操作
+
 ```python
 # 写入文件
 await write_file("output.txt", "Hello World")
@@ -439,7 +446,7 @@ assert browser1 is browser2
 ### 4. 强制终止浏览器
 
 ```python
-from agent.tools.builtin.baseClass import kill_browser_session
+from agent.tools.builtin.browser.baseClass import kill_browser_session
 
 # 优雅关闭(推荐)
 await cleanup_browser_session()
@@ -457,6 +464,7 @@ await kill_browser_session()
 **问题:** `python-socks is required to use a SOCKS proxy`
 
 **解决:**
+
 ```bash
 pip install python-socks
 ```
@@ -466,6 +474,7 @@ pip install python-socks
 **问题:** `未找到 BROWSER_USE_API_KEY`
 
 **解决:**
+
 1. 确保 `.env` 文件在项目根目录
 2. 确保 API Key 格式正确
 3. 确保代码中调用了 `load_dotenv()`
@@ -475,6 +484,7 @@ pip install python-socks
 **问题:** 云浏览器启动后无法连接
 
 **解决:**
+
 1. 检查网络连接
 2. 检查防火墙设置
 3. 尝试使用代理
@@ -482,6 +492,7 @@ pip install python-socks
 ### Q4: 如何切换回本地浏览器
 
 **解决:**
+
 ```python
 # 使用本地浏览器
 browser, tools = await init_browser_session(
@@ -494,6 +505,7 @@ browser, tools = await init_browser_session(
 **问题:** API 配额用完了怎么办
 
 **解决:**
+
 1. 查看 Browser-Use 官网的定价计划
 2. 升级到更高的配额
 3. 优化代码,减少不必要的浏览器操作
@@ -593,6 +605,7 @@ await cleanup_browser_session()
 ## 更新日志
 
 ### v1.0.0 (2026-01-30)
+
 - ✅ 初始版本
 - ✅ 支持云浏览器模式
 - ✅ 提供 5 个完整示例

+ 354 - 0
examples/cloud_browser_demo_db.py

@@ -0,0 +1,354 @@
+"""
+小红书云浏览器数据获取脚本(数据库配置版)
+从数据库 agent_channel_cookies 获取 Cookie 和 cloud_profile_id
+"""
+
+import sys
+import os
+import asyncio
+import json
+import re
+from datetime import datetime
+from pathlib import Path
+from urllib.parse import quote
+from dotenv import load_dotenv
+
+load_dotenv()
+
+project_root = Path(__file__).parent.parent
+sys.path.insert(0, str(project_root))
+
+from agent.tools.builtin.browser.baseClass import (
+    init_browser_session,
+    cleanup_browser_session,
+    kill_browser_session,
+    navigate_to_url,
+    scroll_page,
+    evaluate,
+    wait,
+    get_page_html,
+    _fetch_cookie_row,
+    _fetch_profile_id,
+    _normalize_cookies,
+    _cookie_domain_for_type,
+    _extract_cookie_value,
+)
+
+
+async def example_xhs_fitness_search(cookie_type: str = "xhs") -> dict:
+    """
+    小红书搜索示例
+
+    Args:
+        cookie_type: Cookie 类型,用于从数据库获取配置
+    """
+    print("\n" + "="*60)
+    print("示例: 小红书云浏览器搜索 - 健身")
+    print("="*60)
+
+    api_key = os.getenv("BROWSER_USE_API_KEY")
+    if not api_key:
+        raise RuntimeError("未找到 BROWSER_USE_API_KEY")
+
+    keyword = "健身"
+    search_url = f"https://www.xiaohongshu.com/search_result?keyword={quote(keyword)}&type=51"
+    last_data: dict = {
+        "success": False,
+        "keyword": keyword,
+        "count": 0,
+        "results": [],
+        "error": "未知错误",
+        "timestamp": datetime.now().isoformat(),
+    }
+
+    # 从数据库获取配置
+    print(f"\n🔍 从数据库获取配置 (type={cookie_type})...")
+    profile_id = _fetch_profile_id(cookie_type)
+    cookie_row = _fetch_cookie_row(cookie_type)
+
+    if profile_id:
+        print(f"✅ 获取到 cloud_profile_id: {profile_id}")
+    else:
+        print("⚠️  未找到 cloud_profile_id,将使用环境变量或默认值")
+        profile_id = os.getenv("XHS_PROFILE_ID")
+
+    if cookie_row:
+        print(f"✅ 获取到 Cookie 配置")
+    else:
+        print("⚠️  未找到 Cookie 配置")
+
+    for attempt in range(3):
+        try:
+            # 确保每次重试都清理旧会话
+            if attempt > 0:
+                try:
+                    await kill_browser_session()
+                except Exception:
+                    pass
+                await asyncio.sleep(2)  # 等待清理完成
+
+            print(f"\n🌐 启动云浏览器 (尝试 {attempt + 1}/3)...")
+            browser, tools = await init_browser_session(
+                headless=False,
+                use_cloud=True,
+                cloud_profile_id=profile_id,
+                user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
+                disable_security=False,
+            )
+            if browser is None or tools is None:
+                raise RuntimeError("浏览器初始化失败")
+
+            print("✅ 云浏览器启动成功")
+
+            # 访问首页
+            print("\n🏠 访问小红书首页...")
+            nav_result = await navigate_to_url("https://www.xiaohongshu.com")
+            if nav_result.error:
+                raise RuntimeError(nav_result.error)
+            await wait(3)
+
+            # 注入 Cookie(如果有)
+            if cookie_row:
+                print("\n🍪 注入 Cookie...")
+                cookie_value = _extract_cookie_value(cookie_row)
+                if cookie_value:
+                    domain, base_url = _cookie_domain_for_type(cookie_type, "https://www.xiaohongshu.com")
+                    cookies = _normalize_cookies(cookie_value, domain, base_url)
+                    if cookies:
+                        await browser._cdp_set_cookies(cookies)
+                        print(f"✅ 成功注入 {len(cookies)} 个 Cookie")
+                        # 刷新页面使 Cookie 生效
+                        await navigate_to_url("https://www.xiaohongshu.com")
+                        await wait(2)
+                    else:
+                        print("⚠️  Cookie 解析失败")
+                else:
+                    print("⚠️  未找到 Cookie 值")
+
+            # 访问搜索页面
+            print(f"\n🔗 访问搜索页面: {keyword}")
+            nav_result = await navigate_to_url(search_url)
+            if nav_result.error:
+                raise RuntimeError(nav_result.error)
+            await wait(8)
+
+            # 滚动页面
+            print("\n📜 滚动页面...")
+            for i in range(3):
+                await scroll_page(down=True, pages=2.0)
+                await wait(2)
+
+            # 提取数据
+            print("\n🔍 提取数据...")
+            html_result = await get_page_html()
+            if html_result.error:
+                raise RuntimeError(html_result.error)
+            html = html_result.metadata.get("html", "")
+            output_dir = project_root / "output"
+            output_dir.mkdir(parents=True, exist_ok=True)
+            output_path = output_dir / "xhs.html"
+            output_path.write_text(html or "", encoding="utf-8")
+            print(f"✅ 已保存页面 HTML: {output_path}")
+
+            extract_js = """
+        (function(){
+            const maxCount = 20;
+            const seen = new Set();
+            const results = [];
+
+            function pushItem(item){
+                if (!item || !item.link || seen.has(item.link)) return;
+                seen.add(item.link);
+                results.push(item);
+            }
+
+            const anchors = document.querySelectorAll('a[href*="/explore/"]');
+            anchors.forEach(a => {
+                if (results.length >= maxCount) return;
+                const link = a.href || '';
+                const img = a.querySelector('img');
+                const title = ((img && img.alt) || a.textContent || '').trim();
+                const cover = (img && img.src) || '';
+                if (link && title) {
+                    pushItem({ title, link, cover });
+                }
+            });
+
+            const scriptNodes = document.querySelectorAll('script[type="application/json"], script#__NEXT_DATA__, script#__NUXT__');
+            const walk = (node) => {
+                if (!node || results.length >= maxCount) return;
+                if (Array.isArray(node)) {
+                    for (const item of node) {
+                        walk(item);
+                        if (results.length >= maxCount) return;
+                    }
+                    return;
+                }
+                if (typeof node === 'object') {
+                    const title = (node.title || node.desc || node.name || node.noteTitle || '').toString().trim();
+                    const id = node.noteId || node.note_id || node.id || node.noteID;
+                    const cover = (node.cover && (node.cover.url || node.cover.urlDefault)) || node.coverUrl || node.image || '';
+                    let link = '';
+                    if (id) {
+                        link = `https://www.xiaohongshu.com/explore/${id}`;
+                    }
+                    if (title && link) {
+                        pushItem({ title, link, cover });
+                    }
+                    for (const key in node) {
+                        if (typeof node[key] === 'object') walk(node[key]);
+                    }
+                }
+            };
+
+            scriptNodes.forEach(node => {
+                if (results.length >= maxCount) return;
+                const text = node.textContent || '';
+                if (!text) return;
+                try {
+                    const data = JSON.parse(text);
+                    walk(data);
+                } catch (e) {}
+            });
+
+            return {
+                success: true,
+                keyword: __KEYWORD__,
+                count: results.length,
+                results: results,
+                timestamp: new Date().toISOString(),
+            };
+        })()
+        """
+            extract_js = extract_js.replace("__KEYWORD__", json.dumps(keyword, ensure_ascii=False))
+
+            async def run_extract() -> dict:
+                result = await evaluate(extract_js)
+                if result.error:
+                    raise RuntimeError(result.error)
+                output = result.output
+                if isinstance(output, str) and output.startswith("Result: "):
+                    output = output[8:]
+                if not output:
+                    return {
+                        "success": False,
+                        "keyword": keyword,
+                        "count": 0,
+                        "results": [],
+                        "error": "可能被登录或验证码拦截",
+                        "timestamp": datetime.now().isoformat(),
+                    }
+
+                try:
+                    data = json.loads(output)
+                except Exception:
+                    data = {
+                        "success": False,
+                        "keyword": keyword,
+                        "count": 0,
+                        "results": [],
+                        "error": "JSON 解析失败",
+                        "raw_output": str(output)[:2000],
+                        "timestamp": datetime.now().isoformat(),
+                    }
+
+                if isinstance(data, dict) and data.get("count", 0) == 0:
+                    html_result = await get_page_html()
+                    if html_result.error:
+                        raise RuntimeError(html_result.error)
+                    html = html_result.metadata.get("html", "")
+                    blocked_markers = ["登录", "验证", "验证码", "请先登录", "异常访问"]
+                    if html and any(marker in html for marker in blocked_markers):
+                        data = {
+                            "success": False,
+                            "keyword": keyword,
+                            "count": 0,
+                            "results": [],
+                            "error": "可能被登录或验证码拦截",
+                            "timestamp": datetime.now().isoformat(),
+                        }
+                    elif html:
+                        results = []
+                        seen = set()
+                        pattern = re.compile(r'"noteId":"(.*?)".*?"title":"(.*?)"', re.S)
+                        for match in pattern.finditer(html):
+                            note_id = match.group(1)
+                            title = match.group(2).encode("utf-8", "ignore").decode("unicode_escape").strip()
+                            link = f"https://www.xiaohongshu.com/explore/{note_id}"
+                            if note_id and link not in seen and title:
+                                seen.add(link)
+                                results.append({"title": title, "link": link})
+                            if len(results) >= 20:
+                                break
+                        if results:
+                            data = {
+                                "success": True,
+                                "keyword": keyword,
+                                "count": len(results),
+                                "results": results,
+                                "timestamp": datetime.now().isoformat(),
+                                "source": "html_fallback",
+                            }
+
+                return data
+
+            data = await run_extract()
+
+            last_data = data if isinstance(data, dict) else last_data
+
+            # 输出结果
+            if isinstance(last_data, dict) and last_data.get("count", 0) > 0:
+                print(f"\n✅ 成功获取 {last_data['count']} 条数据")
+                print(f"数据来源: {last_data.get('source', 'javascript')}")
+                print("\n前 5 条结果:")
+                for i, item in enumerate(last_data["results"][:5], 1):
+                    print(f"{i}. {item['title'][:50]}...")
+
+                # 成功获取数据,清理并返回
+                await cleanup_browser_session()
+                return last_data
+
+            if isinstance(last_data, dict) and last_data.get("error") == "可能被登录或验证码拦截":
+                print("\n⚠️  检测到登录或验证码拦截")
+                print("💡 建议:在数据库中配置有效的 Cookie")
+
+        except Exception as e:
+            err_text = str(e)
+            print(f"⚠️  尝试 {attempt + 1}/3 失败: {err_text}")
+            last_data = {
+                "success": False,
+                "keyword": keyword,
+                "count": 0,
+                "results": [],
+                "error": err_text,
+                "timestamp": datetime.now().isoformat(),
+            }
+        finally:
+            # 清理当前会话
+            try:
+                await cleanup_browser_session()
+            except Exception:
+                pass
+
+        # 如果不是最后一次尝试,等待后继续
+        if attempt < 2:
+            print(f"等待 5 秒后重试...")
+            await asyncio.sleep(5)
+
+    return last_data
+
+
+async def main():
+    # 可以通过命令行参数指定 cookie_type
+    cookie_type = sys.argv[1] if len(sys.argv) > 1 else "xhs"
+
+    data = await example_xhs_fitness_search(cookie_type)
+
+    print("\n" + "="*60)
+    print("📊 最终结果")
+    print("="*60)
+    print(json.dumps(data, ensure_ascii=False, indent=2))
+
+
+if __name__ == "__main__":
+    asyncio.run(main())

+ 0 - 802
examples/cloud_browser_example.py

@@ -1,802 +0,0 @@
-"""
-云浏览器模式示例
-Cloud Browser Mode Example
-
-本示例展示如何使用 browser-use 的云浏览器模式进行网页自动化操作。
-云浏览器模式的优势:
-1. 无需本地安装 Chrome/Chromium
-2. 可以在无头服务器上运行
-3. 更好的稳定性和性能
-4. 支持分布式部署
-
-使用前提:
-1. 在 .env 文件中配置 BROWSER_USE_API_KEY
-2. 确保网络连接正常
-"""
-
-import sys
-import os
-import asyncio
-import json
-import re
-from datetime import datetime
-from pathlib import Path
-from urllib.parse import quote
-from dotenv import load_dotenv
-
-# 加载环境变量
-load_dotenv()
-
-# 将项目根目录添加到 Python 路径
-project_root = Path(__file__).parent.parent
-sys.path.insert(0, str(project_root))
-
-# 导入 browser-use 核心类
-from browser_use import BrowserSession, BrowserProfile
-from browser_use.tools.service import Tools
-
-# 导入框架的工具函数
-from agent.tools.builtin.baseClass import (
-    init_browser_session,
-    cleanup_browser_session,
-    navigate_to_url,
-    search_web,
-    get_selector_map,
-    click_element,
-    input_text,
-    screenshot,
-    get_page_html,
-    evaluate,
-    wait,
-    scroll_page,
-    wait_for_user_action,
-)
-
-
-async def example_1_basic_navigation():
-    """
-    示例 1: 基础导航操作
-    演示如何使用云浏览器访问网页
-    """
-    print("\n" + "="*60)
-    print("示例 1: 基础导航操作")
-    print("="*60)
-
-    try:
-        # 初始化云浏览器会话
-        # 关键参数:is_local=False 表示使用云浏览器
-        api_key = os.getenv("BROWSER_USE_API_KEY")
-        if not api_key:
-            print("❌ 错误: 未找到 BROWSER_USE_API_KEY,请在 .env 文件中配置")
-            return
-
-        print(f"✅ 使用云浏览器 API Key: {api_key[:20]}...")
-
-        # 初始化浏览器会话(云模式)
-        # 注意:API key 会自动从环境变量 BROWSER_USE_API_KEY 读取
-        browser, tools = await init_browser_session(
-            headless=True,  # 云浏览器通常使用无头模式
-            use_cloud=True,  # 关键:设置为 True 使用云浏览器
-        )
-
-        print("✅ 云浏览器会话已启动")
-
-        # 导航到百度
-        print("\n📍 导航到百度...")
-        result = await navigate_to_url("https://www.baidu.com")
-        print(f"   结果: {result.title}")
-
-        # 等待页面加载
-        await wait(2)
-
-        # 获取页面标题
-        print("\n📄 获取页面信息...")
-        title_result = await evaluate("document.title")
-        print(f"   页面标题: {title_result.output}")
-
-        # 截图
-        print("\n📸 截图...")
-        screenshot_result = await screenshot()
-        print(f"   截图结果: {screenshot_result.title}")
-
-        print("\n✅ 示例 1 完成")
-
-    except Exception as e:
-        print(f"❌ 错误: {str(e)}")
-    finally:
-        # 清理浏览器会话
-        await cleanup_browser_session()
-        print("🧹 浏览器会话已清理")
-
-
-async def example_2_search_and_extract():
-    """
-    示例 2: 搜索和内容提取
-    演示如何使用云浏览器进行搜索并提取内容
-    """
-    print("\n" + "="*60)
-    print("示例 2: 搜索和内容提取")
-    print("="*60)
-
-    try:
-        # 初始化云浏览器
-        api_key = os.getenv("BROWSER_USE_API_KEY")
-        if not api_key:
-            print("❌ 错误: 未找到 BROWSER_USE_API_KEY")
-            return
-
-        browser, tools = await init_browser_session(
-            headless=True,
-            use_cloud=True,
-        )
-
-        print("✅ 云浏览器会话已启动")
-
-        # 使用搜索引擎搜索
-        print("\n🔍 搜索: Python async programming...")
-        result = await search_web("Python async programming", engine="google")
-        print(f"   搜索结果: {result.title}")
-
-        # 等待搜索结果加载
-        await wait(3)
-
-        # 获取页面 HTML(部分)
-        print("\n📄 获取页面 HTML...")
-        html_result = await get_page_html()
-        print(f"   HTML 长度: {len(html_result.metadata.get('html', ''))} 字符")
-
-        # 获取可交互元素
-        print("\n🎯 获取页面元素...")
-        selector_result = await get_selector_map()
-        print(f"   {selector_result.output[:200]}...")
-
-        print("\n✅ 示例 2 完成")
-
-    except Exception as e:
-        print(f"❌ 错误: {str(e)}")
-    finally:
-        await cleanup_browser_session()
-        print("🧹 浏览器会话已清理")
-
-
-async def example_3_with_browser_profile():
-    """
-    示例 3: 使用 BrowserProfile 预设配置
-    演示如何使用 BrowserProfile 预设 cookies、localStorage 等
-    """
-    print("\n" + "="*60)
-    print("示例 3: 使用 BrowserProfile 预设配置")
-    print("="*60)
-
-    try:
-        api_key = os.getenv("BROWSER_USE_API_KEY")
-        if not api_key:
-            print("❌ 错误: 未找到 BROWSER_USE_API_KEY")
-            return
-
-        # 创建 BrowserProfile 并预设一些配置
-        profile = BrowserProfile(
-            # 可以预设 cookies
-            cookies=[
-                {
-                    "name": "test_cookie",
-                    "value": "test_value",
-                    "domain": ".example.com",
-                    "path": "/",
-                }
-            ],
-            # 可以预设 localStorage
-            local_storage={
-                "example.com": {
-                    "key1": "value1",
-                    "key2": "value2",
-                }
-            },
-            # 可以设置用户代理
-            user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
-        )
-
-        print("✅ 创建了 BrowserProfile 配置")
-
-        # 使用 profile 初始化浏览器
-        browser, tools = await init_browser_session(
-            headless=True,
-            use_cloud=True,
-            browser_profile=profile,  # 传入 profile
-        )
-
-        print("✅ 云浏览器会话已启动(带预设配置)")
-
-        # 访问一个网页
-        print("\n📍 导航到示例网站...")
-        result = await navigate_to_url("https://httpbin.org/headers")
-        print(f"   结果: {result.title}")
-
-        await wait(2)
-
-        # 检查 User-Agent 是否生效
-        print("\n🔍 检查 User-Agent...")
-        ua_result = await evaluate("navigator.userAgent")
-        print(f"   User-Agent: {ua_result.output[:100]}...")
-
-        print("\n✅ 示例 3 完成")
-
-    except Exception as e:
-        print(f"❌ 错误: {str(e)}")
-    finally:
-        await cleanup_browser_session()
-        print("🧹 浏览器会话已清理")
-
-
-async def example_4_form_interaction():
-    """
-    示例 4: 表单交互
-    演示如何在云浏览器中进行表单填写和提交
-    """
-    print("\n" + "="*60)
-    print("示例 4: 表单交互")
-    print("="*60)
-
-    try:
-        api_key = os.getenv("BROWSER_USE_API_KEY")
-        if not api_key:
-            print("❌ 错误: 未找到 BROWSER_USE_API_KEY")
-            return
-
-        browser, tools = await init_browser_session(
-            headless=True,
-            use_cloud=True,
-        )
-
-        print("✅ 云浏览器会话已启动")
-
-        # 访问一个有表单的测试页面
-        print("\n📍 导航到表单测试页面...")
-        result = await navigate_to_url("https://httpbin.org/forms/post")
-        print(f"   结果: {result.title}")
-
-        await wait(2)
-
-        # 获取页面元素
-        print("\n🎯 获取页面元素...")
-        selector_result = await get_selector_map()
-        print(f"   找到 {selector_result.long_term_memory}")
-
-        # 注意:实际使用时需要根据页面结构找到正确的元素索引
-        # 这里只是演示流程
-
-        print("\n✅ 示例 4 完成")
-
-    except Exception as e:
-        print(f"❌ 错误: {str(e)}")
-    finally:
-        await cleanup_browser_session()
-        print("🧹 浏览器会话已清理")
-
-
-async def example_5_multi_tab():
-    """
-    示例 5: 多标签页操作
-    演示如何在云浏览器中管理多个标签页
-    """
-    print("\n" + "="*60)
-    print("示例 5: 多标签页操作")
-    print("="*60)
-
-    try:
-        api_key = os.getenv("BROWSER_USE_API_KEY")
-        if not api_key:
-            print("❌ 错误: 未找到 BROWSER_USE_API_KEY")
-            return
-
-        browser, tools = await init_browser_session(
-            headless=True,
-            use_cloud=True,
-        )
-
-        print("✅ 云浏览器会话已启动")
-
-        # 在第一个标签页打开百度
-        print("\n📍 标签页 1: 打开百度...")
-        result1 = await navigate_to_url("https://www.baidu.com")
-        print(f"   结果: {result1.title}")
-
-        await wait(2)
-
-        # 在新标签页打开谷歌
-        print("\n📍 标签页 2: 打开谷歌(新标签页)...")
-        result2 = await navigate_to_url("https://www.google.com", new_tab=True)
-        print(f"   结果: {result2.title}")
-
-        await wait(2)
-
-        # 获取当前页面信息
-        print("\n📄 当前页面信息...")
-        title_result = await evaluate("document.title")
-        print(f"   当前标题: {title_result.output}")
-
-        print("\n✅ 示例 5 完成")
-
-    except Exception as e:
-        print(f"❌ 错误: {str(e)}")
-    finally:
-        await cleanup_browser_session()
-        print("🧹 浏览器会话已清理")
-
- 
-def load_cookies(cookie_str, domain, url=None):
-    cookies = []
-    try:
-        for cookie_part in cookie_str.split(';'):
-            if  cookie_part:
-                name, value = cookie_part.split('=', 1)
-                cookie = {"name": str(name).strip(), "value": str(value).strip(), "domain": domain,
-                    "path":"/",
-                    "expires":-1,
-                    "httpOnly": False,
-                    "secure": True,
-                    "sameSite":"None"}
-                if url:
-                    cookie["url"] = url
-                cookies.append(cookie)
-    except:
-       pass
-    return cookies
-
-async def example_6_xhs_search_save():
-    """
-    示例 6: 小红书搜索并保存结果(带登录)
-    演示如何处理需要登录的网站
-    """
-    print("\n" + "="*60)
-    print("示例 6: 小红书搜索并保存结果(带登录)")
-    print("="*60)
-
-    try:
-        api_key = os.getenv("BROWSER_USE_API_KEY")
-        if not api_key:
-            print("❌ 错误: 未找到 BROWSER_USE_API_KEY")
-            return
-
-        # 创建 BrowserProfile
-       
-        cookiesStr = "gid=yjJiiqSqKKf8yjJiiqSJiWMKyJvfq2vIJxYDh4EfAyCW9Sq89uUhxI888y4JW8y8WJS448Kj; a1=19a5821e25frfgqcz1g48ktmjilzla6dvt8saird230000337474; webId=bf5a89012d3e96b8e8317a9158d2237b; abRequestId=bf5a89012d3e96b8e8317a9158d2237b; x-user-id-pgy.xiaohongshu.com=64cb5fa2000000002b00a903; x-user-id-ad.xiaohongshu.com=67078bac000000001d022a25; x-user-id-mcc.xiaohongshu.com=67078bac000000001d022a25; web_session=040069b5bf1ceafef95542ee0a3b4b114d9a59; x-user-id-pro.xiaohongshu.com=67078bac000000001d022a25; x-user-id-creator.xiaohongshu.com=64cb5fa2000000002b00a903; webBuild=5.8.0; unread={%22ub%22:%226972cc62000000001a032ef0%22%2C%22ue%22:%226978c695000000001a030baf%22%2C%22uc%22:25}; acw_tc=0a0d0d6817697823078311273e2749a170e3d6e7c28bc3c6b3df1b05366b21; xsecappid=ugc; websectiga=f47eda31ec99545da40c2f731f0630efd2b0959e1dd10d5fedac3dce0bd1e04d; sec_poison_id=8f37e824-4cf9-4c1a-8a6b-1297a36d51ba; customer-sso-sid=68c517601157138359418885nha1gpvvujwqbhia; customerClientId=609975161834570; access-token-creator.xiaohongshu.com=customer.creator.AT-68c517601157138359418887mosxcziw5qwkllrs; galaxy_creator_session_id=NIUNVxmv6LPmZ31jZ2DoKYgyUutPOItjJ24t; galaxy.creator.beaker.session.id=1769782309631057230248; loadts=1769782310288"
-        
-        cookie_url = "https://www.xiaohongshu.com"
-        cookies = load_cookies(cookiesStr, ".xiaohongshu.com", cookie_url)
-
-        profile = BrowserProfile(
-            user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
-        )
-
-        # 初始化云浏览器(非无头模式,方便用户看到登录界面)
-        browser, tools = await init_browser_session(
-            headless=False,  # 设置为 False,方便用户看到浏览器界面
-            use_cloud=True,
-            browser_profile=profile,
-        )
-
-        print("✅ 云浏览器会话已启动")
-        print("📝 提示: 云浏览器启动时会输出 Live URL,你可以在浏览器中打开查看")
-
-        # 步骤 1: 先访问小红书首页,检查是否需要登录
-        print("\n📍 步骤 1: 访问小红书首页...")
-        await navigate_to_url("https://www.xiaohongshu.com")
-        await wait(3)
-        await browser._cdp_set_cookies(cookies)
-        await wait(1)
-        await navigate_to_url("https://www.xiaohongshu.com")
-        await wait(3)
-
-        # 检查是否需要登录
-        print("\n🔍 检查登录状态...")
-        check_login_js = """
-        (function() {
-            // 检查是否有登录按钮或登录相关元素
-            const loginBtn = document.querySelector('[class*="login"]') ||
-                           document.querySelector('[href*="login"]') ||
-                           Array.from(document.querySelectorAll('button, a')).find(el => (el.textContent || '').includes('登录'));
-
-            // 检查是否有用户信息(已登录)
-            const userInfo = document.querySelector('[class*="user"]') ||
-                           document.querySelector('[class*="avatar"]');
-
-            return {
-                needLogin: !!loginBtn && !userInfo,
-                hasLoginBtn: !!loginBtn,
-                hasUserInfo: !!userInfo
-            };
-        })()
-        """
-
-        login_status = await evaluate(check_login_js)
-        print(f"   登录状态检查: {login_status.output}")
-        status_output = login_status.output
-        if isinstance(status_output, str) and status_output.startswith("Result: "):
-            status_output = status_output[8:]
-        login_info = None
-        if isinstance(status_output, str):
-            try:
-                login_info = json.loads(status_output)
-            except Exception:
-                login_info = None
-        elif isinstance(status_output, dict):
-            login_info = status_output
-
-        if login_info and login_info.get("needLogin"):
-            print("\n👤 步骤 2: 登录处理...")
-            print("   如果小红书需要登录,请在云浏览器中完成以下操作:")
-            print("   1. 打开上面输出的 Live URL(在日志中查找 '🔗 Live URL')")
-            print("   2. 在 Live URL 页面中完成登录(扫码或账号密码)")
-            print("   3. 登录成功后,回到这里按 Enter 继续")
-            await wait_for_user_action(
-                message="请在云浏览器中完成小红书登录,完成后按 Enter 继续",
-                timeout=300
-            )
-            print("\n✅ 用户已确认登录完成,继续执行...")
-        else:
-            print("\n✅ 已检测为登录状态,跳过手动登录")
-
-        # 步骤 3: 执行搜索
-        keyword = "瑜伽美女"
-        search_url = f"https://www.xiaohongshu.com/search_result?keyword={quote(keyword)}&type=51"
-        print(f"\n📍 步骤 3: 导航到搜索页: {keyword} ...")
-        await navigate_to_url(search_url)
-        await wait(6)
-
-        # 滚动页面加载更多内容
-        print("\n📜 滚动页面加载更多内容...")
-        for i in range(3):
-            print(f"   滚动 {i+1}/3...")
-            await scroll_page(down=True, pages=2.0)
-            await wait(2)
-
-        # 步骤 4: 提取数据
-        print("\n📊 步骤 4: 提取搜索结果...")
-        extract_js = """
-        (function(){
-            const maxCount = 20;
-            const seen = new Set();
-            const results = [];
-
-            function pushItem(item){
-                if (!item || !item.link || seen.has(item.link)) return;
-                seen.add(item.link);
-                results.push(item);
-            }
-
-            // 方法 1: 从 DOM 中提取
-            const anchors = document.querySelectorAll('a[href*="/explore/"]');
-            anchors.forEach(a => {
-                if (results.length >= maxCount) return;
-                const link = a.href || '';
-                const img = a.querySelector('img');
-                const title = ((img && img.alt) || a.textContent || '').trim();
-                const cover = (img && img.src) || '';
-                if (link && title) {
-                    pushItem({ title, link, cover });
-                }
-            });
-
-            // 方法 2: 从 JSON 数据中提取
-            const scriptNodes = document.querySelectorAll('script[type="application/json"], script#__NEXT_DATA__, script#__NUXT__');
-            const walk = (node) => {
-                if (!node || results.length >= maxCount) return;
-                if (Array.isArray(node)) {
-                    for (const item of node) {
-                        walk(item);
-                        if (results.length >= maxCount) return;
-                    }
-                    return;
-                }
-                if (typeof node === 'object') {
-                    const title = (node.title || node.desc || node.name || node.noteTitle || '').toString().trim();
-                    const id = node.noteId || node.note_id || node.id || node.noteID;
-                    const cover = (node.cover && (node.cover.url || node.cover.urlDefault)) || node.coverUrl || node.image || '';
-                    let link = '';
-                    if (id) {
-                        link = `https://www.xiaohongshu.com/explore/${id}`;
-                    }
-                    if (title && link) {
-                        pushItem({ title, link, cover });
-                    }
-                    for (const key in node) {
-                        if (typeof node[key] === 'object') walk(node[key]);
-                    }
-                }
-            };
-
-            scriptNodes.forEach(node => {
-                if (results.length >= maxCount) return;
-                const text = node.textContent || '';
-                if (!text) return;
-                try {
-                    const data = JSON.parse(text);
-                    walk(data);
-                } catch (e) {}
-            });
-
-            return {
-                success: true,
-                keyword: '瑜伽美女',
-                count: results.length,
-                results: results,
-                timestamp: new Date().toISOString(),
-            };
-        })()
-        """
-
-        async def run_extract():
-            result = await evaluate(extract_js)
-            output = result.output
-            if isinstance(output, str) and output.startswith("Result: "):
-                output = output[8:]
-
-            try:
-                data = json.loads(output)
-            except Exception:
-                data = {
-                    "success": False,
-                    "keyword": keyword,
-                    "error": "JSON 解析失败",
-                    "raw_output": str(output)[:2000],
-                    "timestamp": datetime.now().isoformat(),
-                }
-
-            if isinstance(data, dict) and data.get("count", 0) == 0:
-                print("   JS 提取结果为空,尝试从 HTML 中提取...")
-                html_result = await get_page_html()
-                html = html_result.metadata.get("html", "")
-                if html:
-                    def decode_text(value: str) -> str:
-                        try:
-                            return bytes(value, "utf-8").decode("unicode_escape")
-                        except Exception:
-                            return value
-
-                    results = []
-                    seen = set()
-                    pattern = re.compile(r'"noteId":"(.*?)".*?"title":"(.*?)"', re.S)
-                    for match in pattern.finditer(html):
-                        note_id = match.group(1)
-                        title = decode_text(match.group(2)).strip()
-                        link = f"https://www.xiaohongshu.com/explore/{note_id}"
-                        if note_id and link not in seen and title:
-                            seen.add(link)
-                            results.append({"title": title, "link": link})
-                        if len(results) >= 20:
-                            break
-
-                    if results:
-                        data = {
-                            "success": True,
-                            "keyword": keyword,
-                            "count": len(results),
-                            "results": results,
-                            "timestamp": datetime.now().isoformat(),
-                            "source": "html_fallback",
-                        }
-                    else:
-                        blocked_markers = ["登录", "验证", "验证码", "请先登录", "异常访问"]
-                        if any(marker in html for marker in blocked_markers):
-                            data = {
-                                "success": False,
-                                "keyword": keyword,
-                                "count": 0,
-                                "results": [],
-                                "error": "可能被登录或验证码拦截",
-                                "timestamp": datetime.now().isoformat(),
-                            }
-            return data
-
-        data = await run_extract()
-        if isinstance(data, dict) and data.get("count", 0) == 0 and data.get("error") == "可能被登录或验证码拦截":
-            print("\n👤 检测到拦截,请在云浏览器中完成登录或验证码验证")
-            await wait_for_user_action(
-                message="完成后按 Enter 继续,将重新提取搜索结果",
-                timeout=300
-            )
-            data = await run_extract()
-
-        # 步骤 5: 保存结果
-        print(f"\n💾 步骤 5: 保存结果...")
-        print(f"   提取到 {data.get('count', 0)} 条数据")
-
-        output_dir = Path(__file__).parent.parent / "output"
-        output_dir.mkdir(parents=True, exist_ok=True)
-        output_path = output_dir / "xhs.json"
-        with open(output_path, "w", encoding="utf-8") as f:
-            json.dump(data, f, ensure_ascii=False, indent=2)
-
-        print(f"✅ 数据已保存到: {output_path}")
-
-        # 显示部分结果
-        if data.get("results"):
-            print(f"\n📋 前 3 条结果预览:")
-            for i, item in enumerate(data["results"][:3], 1):
-                print(f"   {i}. {item.get('title', 'N/A')[:50]}")
-                print(f"      {item.get('link', 'N/A')}")
-
-        print("\n✅ 示例 6 完成")
-
-    except Exception as e:
-        print(f"❌ 错误: {str(e)}")
-        import traceback
-        traceback.print_exc()
-    finally:
-        await cleanup_browser_session()
-        print("🧹 浏览器会话已清理")
-
-
-async def example_7_baidu_search_save():
-    print("\n" + "="*60)
-    print("示例 7: 百度搜索并保存结果")
-    print("="*60)
-
-    try:
-        api_key = os.getenv("BROWSER_USE_API_KEY")
-        if not api_key:
-            print("❌ 错误: 未找到 BROWSER_USE_API_KEY")
-            return
-
-        await init_browser_session(
-            headless=True,
-            use_cloud=True,
-        )
-
-        print("✅ 云浏览器会话已启动")
-
-        keyword = "瑜伽美女"
-        search_url = f"https://www.baidu.com/s?wd={quote(keyword)}"
-        print(f"\n📍 导航到百度搜索页: {keyword} ...")
-        await navigate_to_url(search_url)
-        await wait(3)
-        await scroll_page(down=True, pages=1.5)
-        await wait(2)
-
-        extract_js = """
-        (function(){
-            const results = [];
-            const items = document.querySelectorAll('#content_left > div[class*="result"]');
-            items.forEach((item, index) => {
-                if (index >= 10) return;
-                const titleEl = item.querySelector('h3 a, .t a');
-                const title = titleEl ? titleEl.textContent.trim() : '';
-                const link = titleEl ? titleEl.href : '';
-                const summaryEl = item.querySelector('.c-abstract, .content-right_8Zs40');
-                const summary = summaryEl ? summaryEl.textContent.trim() : '';
-                const sourceEl = item.querySelector('.c-color-gray, .source_1Vdff');
-                const source = sourceEl ? sourceEl.textContent.trim() : '';
-                if (title || link) {
-                    results.push({
-                        index: index + 1,
-                        title,
-                        link,
-                        summary: summary.substring(0, 200),
-                        source,
-                    });
-                }
-            });
-            return {
-                success: true,
-                keyword: '瑜伽美女',
-                count: results.length,
-                results,
-                timestamp: new Date().toISOString(),
-            };
-        })()
-        """
-
-        result = await evaluate(extract_js)
-        output = result.output
-        if isinstance(output, str) and output.startswith("Result: "):
-            output = output[8:]
-
-        try:
-            data = json.loads(output)
-        except Exception:
-            data = {
-                "success": False,
-                "keyword": keyword,
-                "error": "JSON 解析失败",
-                "raw_output": str(output)[:2000],
-                "timestamp": datetime.now().isoformat(),
-            }
-
-        output_dir = Path(__file__).parent.parent / "output"
-        output_dir.mkdir(parents=True, exist_ok=True)
-        output_path = output_dir / "baidu.json"
-        with open(output_path, "w", encoding="utf-8") as f:
-            json.dump(data, f, ensure_ascii=False, indent=2)
-
-        print(f"✅ 数据已保存到: {output_path}")
-        if data.get("results"):
-            print("\n📋 前 3 条结果预览:")
-            for i, item in enumerate(data["results"][:3], 1):
-                print(f"   {i}. {item.get('title', 'N/A')[:50]}")
-                print(f"      {item.get('link', 'N/A')}")
-
-        print("\n✅ 示例 7 完成")
-
-    except Exception as e:
-        print(f"❌ 错误: {str(e)}")
-    finally:
-        await cleanup_browser_session()
-        print("🧹 浏览器会话已清理")
-
-
-async def main():
-    """
-    主函数:运行所有示例
-    """
-    import argparse
-
-    print("\n" + "="*60)
-    print("🌐 Browser-Use 云浏览器模式示例")
-    print("="*60)
-
-    # 检查 API Key
-    api_key = os.getenv("BROWSER_USE_API_KEY")
-    if not api_key:
-        print("\n❌ 错误: 未找到 BROWSER_USE_API_KEY")
-        print("请在 .env 文件中配置 BROWSER_USE_API_KEY")
-        return
-
-    print(f"\n✅ 已加载 API Key: {api_key[:20]}...")
-
-    # 运行示例(可以选择运行哪些示例)
-    examples = [
-        ("基础导航操作", example_1_basic_navigation),
-        ("搜索和内容提取", example_2_search_and_extract),
-        ("使用 BrowserProfile", example_3_with_browser_profile),
-        ("表单交互", example_4_form_interaction),
-        ("多标签页操作", example_5_multi_tab),
-        ("小红书搜索并保存结果", example_6_xhs_search_save),
-        ("百度搜索并保存结果", example_7_baidu_search_save),
-    ]
-
-    # 解析命令行参数
-    parser = argparse.ArgumentParser(description="Browser-Use 云浏览器模式示例")
-    parser.add_argument(
-        "--example",
-        type=int,
-        choices=range(1, len(examples) + 1),
-        help="选择要运行的示例 (1-7),不指定则运行第一个示例"
-    )
-    parser.add_argument(
-        "--all",
-        action="store_true",
-        help="运行所有示例"
-    )
-    args = parser.parse_args()
-
-    print("\n可用示例:")
-    for i, (name, _) in enumerate(examples, 1):
-        print(f"  {i}. {name}")
-
-    if args.all:
-        # 运行所有示例
-        print("\n运行所有示例...")
-        for name, func in examples:
-            await func()
-            print("\n" + "-"*60)
-    elif args.example:
-        # 运行指定示例
-        name, func = examples[args.example - 1]
-        print(f"\n运行示例 {args.example}: {name}")
-        await func()
-    else:
-        # 默认运行第一个示例
-        name, func = examples[0]
-        print(f"\n运行默认示例: {name}")
-        print("(使用 --example N 运行其他示例,或 --all 运行所有示例)")
-        await func()
-
-    print("\n" + "="*60)
-    print("✅ 示例运行完成")
-    print("="*60)
-
-
-if __name__ == "__main__":
-    # 运行主函数
-    asyncio.run(main())

+ 1 - 1
examples/test_tools_baidu.py

@@ -8,7 +8,7 @@ from urllib.parse import quote
 
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-from agent.tools.builtin.baseClass import (
+from agent.tools.builtin.browser.baseClass import (
     init_browser_session,
     navigate_to_url,
     wait,