baseClass.py 58 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833
  1. """
  2. Browser-Use 原生工具适配器
  3. Native Browser-Use Tools Adapter
  4. 直接使用 browser-use 的原生类(BrowserSession, Tools)实现所有浏览器操作工具。
  5. 不依赖 Playwright,完全基于 CDP 协议。
  6. 核心特性:
  7. 1. 浏览器会话持久化 - 只启动一次浏览器
  8. 2. 状态自动保持 - 登录状态、Cookie、LocalStorage 等
  9. 3. 完整的底层访问 - 可以直接使用 CDP 协议
  10. 4. 性能优异 - 避免频繁创建/销毁浏览器实例
  11. 5. 多种浏览器类型 - 支持 local、cloud、container 三种模式
  12. 支持的浏览器类型:
  13. 1. Local (本地浏览器):
  14. - 在本地运行 Chrome
  15. - 支持可视化调试
  16. - 速度最快
  17. - 示例: init_browser_session(browser_type="local")
  18. 2. Cloud (云浏览器):
  19. - 在云端运行
  20. - 不占用本地资源
  21. - 适合生产环境
  22. - 示例: init_browser_session(browser_type="cloud")
  23. 3. Container (容器浏览器):
  24. - 在独立容器中运行
  25. - 隔离性好
  26. - 支持预配置账户
  27. - 示例: init_browser_session(browser_type="container", container_url="https://example.com")
  28. 使用方法:
  29. 1. 在 Agent 初始化时调用 init_browser_session() 并指定 browser_type
  30. 2. 使用各个工具函数执行浏览器操作
  31. 3. 任务结束时调用 cleanup_browser_session()
  32. 文件操作说明:
  33. - 浏览器专用文件目录:.browser_use_files/ (在当前工作目录下)
  34. 用于存储浏览器会话产生的临时文件(下载、上传、截图等)
  35. - 一般文件操作:请使用 agent.tools.builtin 中的文件工具 (read_file, write_file, edit_file)
  36. 这些工具功能更完善,支持diff预览、智能匹配、分页读取等
  37. """
  38. import sys
  39. import os
  40. import json
  41. import asyncio
  42. import aiohttp
  43. import re
  44. import base64
  45. from urllib.parse import urlparse, parse_qs, unquote
  46. from typing import Optional, List, Dict, Any, Tuple
  47. from pathlib import Path
  48. from langchain_core.runnables import RunnableLambda
  49. from argparse import Namespace # 使用 Namespace 快速构造带属性的对象
  50. from langchain_core.messages import AIMessage
  51. from ....llm.openrouter import openrouter_llm_call
  52. # 将项目根目录添加到 Python 路径
  53. sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  54. # 导入框架的工具装饰器和结果类
  55. from agent.tools import tool, ToolResult
  56. from agent.tools.builtin.browser.sync_mysql_help import mysql
  57. # 导入 browser-use 的核心类
  58. from browser_use import BrowserSession, BrowserProfile
  59. from browser_use.tools.service import Tools
  60. from browser_use.tools.views import ReadContentAction
  61. from browser_use.agent.views import ActionResult
  62. from browser_use.filesystem.file_system import FileSystem
  63. # ============================================================
  64. # 无需注册的内部辅助函数
  65. # ============================================================
  66. # ============================================================
  67. # 全局浏览器会话管理
  68. # ============================================================
  69. # 全局变量:浏览器会话和工具实例
  70. _browser_session: Optional[BrowserSession] = None
  71. _browser_tools: Optional[Tools] = None
  72. _file_system: Optional[FileSystem] = None
  73. async def create_container(url: str, account_name: str = "liuwenwu") -> Dict[str, Any]:
  74. """
  75. 创建浏览器容器并导航到指定URL
  76. 按照 test.md 的要求:
  77. 1.1 调用接口创建容器
  78. 1.2 调用接口创建窗口并导航到URL
  79. Args:
  80. url: 要导航的URL地址
  81. account_name: 账户名称
  82. Returns:
  83. 包含容器信息的字典:
  84. - success: 是否成功
  85. - container_id: 容器ID
  86. - vnc: VNC访问URL
  87. - cdp: CDP协议URL(用于浏览器连接)
  88. - connection_id: 窗口连接ID
  89. - error: 错误信息(如果失败)
  90. """
  91. result = {
  92. "success": False,
  93. "container_id": None,
  94. "vnc": None,
  95. "cdp": None,
  96. "connection_id": None,
  97. "error": None
  98. }
  99. try:
  100. async with aiohttp.ClientSession() as session:
  101. # 步骤1.1: 创建容器
  102. print("📦 步骤1.1: 创建容器...")
  103. create_url = "http://47.84.182.56:8200/api/v1/container/create"
  104. create_payload = {
  105. "auto_remove": True,
  106. "need_port_binding": True,
  107. "max_lifetime_seconds": 900
  108. }
  109. async with session.post(create_url, json=create_payload) as resp:
  110. if resp.status != 200:
  111. raise RuntimeError(f"创建容器失败: HTTP {resp.status}")
  112. create_result = await resp.json()
  113. if create_result.get("code") != 0:
  114. raise RuntimeError(f"创建容器失败: {create_result.get('msg')}")
  115. data = create_result.get("data", {})
  116. result["container_id"] = data.get("container_id")
  117. result["vnc"] = data.get("vnc")
  118. result["cdp"] = data.get("cdp")
  119. print(f"✅ 容器创建成功")
  120. print(f" Container ID: {result['container_id']}")
  121. print(f" VNC: {result['vnc']}")
  122. print(f" CDP: {result['cdp']}")
  123. # 等待容器内的浏览器启动
  124. print(f"\n⏳ 等待容器内浏览器启动...")
  125. await asyncio.sleep(5)
  126. # 步骤1.2: 创建页面并导航
  127. print(f"\n📱 步骤1.2: 创建页面并导航到 {url}...")
  128. page_create_url = "http://47.84.182.56:8200/api/v1/browser/page/create"
  129. page_payload = {
  130. "container_id": result["container_id"],
  131. "url": url,
  132. "account_name": account_name,
  133. "need_wait": True,
  134. "timeout": 30
  135. }
  136. # 重试机制:最多尝试3次
  137. max_retries = 3
  138. page_created = False
  139. last_error = None
  140. for attempt in range(max_retries):
  141. try:
  142. if attempt > 0:
  143. print(f" 重试 {attempt + 1}/{max_retries}...")
  144. await asyncio.sleep(3) # 重试前等待
  145. async with session.post(page_create_url, json=page_payload, timeout=aiohttp.ClientTimeout(total=60)) as resp:
  146. if resp.status != 200:
  147. response_text = await resp.text()
  148. last_error = f"HTTP {resp.status}: {response_text[:200]}"
  149. continue
  150. page_result = await resp.json()
  151. if page_result.get("code") != 0:
  152. last_error = f"{page_result.get('msg')}"
  153. continue
  154. page_data = page_result.get("data", {})
  155. result["connection_id"] = page_data.get("connection_id")
  156. result["success"] = True
  157. page_created = True
  158. print(f"✅ 页面创建成功")
  159. print(f" Connection ID: {result['connection_id']}")
  160. break
  161. except asyncio.TimeoutError:
  162. last_error = "请求超时"
  163. continue
  164. except aiohttp.ClientError as e:
  165. last_error = f"网络错误: {str(e)}"
  166. continue
  167. except Exception as e:
  168. last_error = f"未知错误: {str(e)}"
  169. continue
  170. if not page_created:
  171. raise RuntimeError(f"创建页面失败(尝试{max_retries}次后): {last_error}")
  172. except Exception as e:
  173. result["error"] = str(e)
  174. print(f"❌ 错误: {str(e)}")
  175. return result
  176. async def init_browser_session(
  177. browser_type: str = "local",
  178. headless: bool = False,
  179. url: Optional[str] = None,
  180. profile_name: str = "default",
  181. user_data_dir: Optional[str] = None,
  182. browser_profile: Optional[BrowserProfile] = None,
  183. **kwargs
  184. ) -> tuple[BrowserSession, Tools]:
  185. """
  186. 初始化全局浏览器会话 - 支持三种浏览器类型
  187. Args:
  188. browser_type: 浏览器类型 ("local", "cloud", "container")
  189. headless: 是否无头模式
  190. url: 初始访问URL(可选)
  191. - local/cloud: 初始化后会自动导航到此URL
  192. - container: 必需,容器启动时访问的URL
  193. profile_name: 配置文件/账户名称(默认 "default")
  194. - local: 用于创建用户数据目录路径
  195. - cloud: 云浏览器配置ID
  196. - container: 容器账户名称
  197. user_data_dir: 用户数据目录(仅 local 模式,高级用法)
  198. 如果提供则覆盖 profile_name 生成的路径
  199. browser_profile: BrowserProfile 对象(通用,高级用法)
  200. 用于预设 cookies 等
  201. **kwargs: 其他 BrowserSession 参数
  202. Returns:
  203. (BrowserSession, Tools) 元组
  204. Examples:
  205. # 本地浏览器
  206. browser, tools = await init_browser_session(
  207. browser_type="local",
  208. url="https://www.baidu.com" # 可选
  209. )
  210. # 云浏览器
  211. browser, tools = await init_browser_session(
  212. browser_type="cloud",
  213. profile_name="my_cloud_profile" # 可选
  214. )
  215. # 容器浏览器
  216. browser, tools = await init_browser_session(
  217. browser_type="container",
  218. url="https://www.xiaohongshu.com", # 必需
  219. profile_name="my_account" # 可选
  220. )
  221. """
  222. global _browser_session, _browser_tools, _file_system
  223. if _browser_session is not None:
  224. return _browser_session, _browser_tools
  225. # 验证 browser_type
  226. valid_types = ["local", "cloud", "container"]
  227. if browser_type not in valid_types:
  228. raise ValueError(f"无效的 browser_type: {browser_type},必须是 {valid_types} 之一")
  229. # 创建浏览器会话参数
  230. session_params = {
  231. "headless": headless,
  232. }
  233. # === Container 模式 ===
  234. if browser_type == "container":
  235. print("🐳 使用容器浏览器模式")
  236. # container 模式必须提供 URL
  237. if not url:
  238. url = "about:blank" # 使用默认空白页
  239. print("⚠️ 未提供 url 参数,使用默认空白页")
  240. # 创建容器并获取 CDP URL
  241. print(f"📦 正在创建容器...")
  242. container_info = await create_container(
  243. url=url,
  244. account_name=profile_name
  245. )
  246. if not container_info["success"]:
  247. raise RuntimeError(f"容器创建失败: {container_info['error']}")
  248. cdp_url = container_info["cdp"]
  249. print(f"✅ 容器创建成功")
  250. print(f" CDP URL: {cdp_url}")
  251. print(f" Container ID: {container_info['container_id']}")
  252. print(f" Connection ID: {container_info.get('connection_id')}")
  253. # 使用容器的 CDP URL 连接
  254. session_params["cdp_url"] = cdp_url
  255. # 等待容器完全启动
  256. print("⏳ 等待容器浏览器启动...")
  257. await asyncio.sleep(3)
  258. # === Cloud 模式 ===
  259. elif browser_type == "cloud":
  260. print("🌐 使用云浏览器模式")
  261. session_params["use_cloud"] = True
  262. # profile_name 作为云配置ID
  263. if profile_name and profile_name != "default":
  264. session_params["cloud_profile_id"] = profile_name
  265. # === Local 模式 ===
  266. else: # local
  267. print("💻 使用本地浏览器模式")
  268. session_params["is_local"] = True
  269. # 设置用户数据目录(持久化登录状态)
  270. if user_data_dir is None and profile_name:
  271. user_data_dir = str(Path.home() / ".browser_use" / "profiles" / profile_name)
  272. Path(user_data_dir).mkdir(parents=True, exist_ok=True)
  273. # macOS 上显式指定 Chrome 路径
  274. import platform
  275. if platform.system() == "Darwin": # macOS
  276. chrome_path = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
  277. if Path(chrome_path).exists():
  278. session_params["executable_path"] = chrome_path
  279. # 只在有值时才添加 user_data_dir
  280. if user_data_dir:
  281. session_params["user_data_dir"] = user_data_dir
  282. # 只在有值时才添加 browser_profile (适用于所有模式)
  283. if browser_profile:
  284. session_params["browser_profile"] = browser_profile
  285. # 合并其他参数
  286. session_params.update(kwargs)
  287. # 创建浏览器会话
  288. _browser_session = BrowserSession(**session_params)
  289. # 启动浏览器
  290. await _browser_session.start()
  291. # 创建工具实例
  292. _browser_tools = Tools()
  293. # 创建文件系统实例(用于浏览器会话产生的文件)
  294. # 注意:这个目录仅用于浏览器操作相关的临时文件(下载、上传、截图等)
  295. # 对于一般文件读写操作,请使用 agent.tools.builtin 中的文件工具
  296. base_dir = Path.cwd() / ".browser_use_files"
  297. base_dir.mkdir(parents=True, exist_ok=True)
  298. _file_system = FileSystem(base_dir=str(base_dir))
  299. print("✅ 浏览器会话初始化成功")
  300. # 如果是 local 或 cloud 模式且提供了 URL,导航到该 URL
  301. if browser_type in ["local", "cloud"] and url:
  302. print(f"🔗 导航到: {url}")
  303. await _browser_tools.navigate(url=url, browser_session=_browser_session)
  304. return _browser_session, _browser_tools
  305. async def get_browser_session() -> tuple[BrowserSession, Tools]:
  306. """
  307. 获取当前浏览器会话,如果不存在则自动创建
  308. Returns:
  309. (BrowserSession, Tools) 元组
  310. """
  311. global _browser_session, _browser_tools
  312. if _browser_session is None:
  313. await init_browser_session()
  314. return _browser_session, _browser_tools
  315. async def cleanup_browser_session():
  316. """
  317. 清理浏览器会话
  318. 优雅地停止浏览器但保留会话状态
  319. """
  320. global _browser_session, _browser_tools, _file_system
  321. if _browser_session is not None:
  322. await _browser_session.stop()
  323. _browser_session = None
  324. _browser_tools = None
  325. _file_system = None
  326. async def kill_browser_session():
  327. """
  328. 强制终止浏览器会话
  329. 完全关闭浏览器进程
  330. """
  331. global _browser_session, _browser_tools, _file_system
  332. if _browser_session is not None:
  333. await _browser_session.kill()
  334. _browser_session = None
  335. _browser_tools = None
  336. _file_system = None
  337. # ============================================================
  338. # 辅助函数:ActionResult 转 ToolResult
  339. # ============================================================
  340. def action_result_to_tool_result(result: ActionResult, title: str = None) -> ToolResult:
  341. """
  342. 将 browser-use 的 ActionResult 转换为框架的 ToolResult
  343. Args:
  344. result: browser-use 的 ActionResult
  345. title: 可选的标题(如果不提供则从 result 推断)
  346. Returns:
  347. ToolResult
  348. """
  349. if result.error:
  350. return ToolResult(
  351. title=title or "操作失败",
  352. output="",
  353. error=result.error,
  354. long_term_memory=result.long_term_memory or result.error
  355. )
  356. return ToolResult(
  357. title=title or "操作成功",
  358. output=result.extracted_content or "",
  359. long_term_memory=result.long_term_memory or result.extracted_content or "",
  360. metadata=result.metadata or {}
  361. )
  362. def _cookie_domain_for_type(cookie_type: str, url: str) -> Tuple[str, str]:
  363. if cookie_type:
  364. key = cookie_type.lower()
  365. if key in {"xiaohongshu", "xhs"}:
  366. return ".xiaohongshu.com", "https://www.xiaohongshu.com"
  367. parsed = urlparse(url or "")
  368. domain = parsed.netloc or ""
  369. domain = domain.replace("www.", "")
  370. if domain:
  371. domain = f".{domain}"
  372. base_url = f"{parsed.scheme}://{parsed.netloc}" if parsed.scheme and parsed.netloc else url
  373. return domain, base_url
  374. def _parse_cookie_string(cookie_str: str, domain: str, url: str) -> List[Dict[str, Any]]:
  375. cookies: List[Dict[str, Any]] = []
  376. if not cookie_str:
  377. return cookies
  378. parts = cookie_str.split(";")
  379. for part in parts:
  380. if not part:
  381. continue
  382. if "=" not in part:
  383. continue
  384. name, value = part.split("=", 1)
  385. cookie = {
  386. "name": str(name).strip(),
  387. "value": str(value).strip(),
  388. "domain": domain,
  389. "path": "/",
  390. "expires": -1,
  391. "httpOnly": False,
  392. "secure": True,
  393. "sameSite": "None"
  394. }
  395. if url:
  396. cookie["url"] = url
  397. cookies.append(cookie)
  398. return cookies
  399. def _normalize_cookies(cookie_value: Any, domain: str, url: str) -> List[Dict[str, Any]]:
  400. if cookie_value is None:
  401. return []
  402. if isinstance(cookie_value, list):
  403. return cookie_value
  404. if isinstance(cookie_value, dict):
  405. if "cookies" in cookie_value:
  406. return _normalize_cookies(cookie_value.get("cookies"), domain, url)
  407. if "name" in cookie_value and "value" in cookie_value:
  408. return [cookie_value]
  409. return []
  410. if isinstance(cookie_value, (bytes, bytearray)):
  411. cookie_value = cookie_value.decode("utf-8", errors="ignore")
  412. if isinstance(cookie_value, str):
  413. text = cookie_value.strip()
  414. if not text:
  415. return []
  416. try:
  417. parsed = json.loads(text)
  418. except Exception:
  419. parsed = None
  420. if parsed is not None:
  421. return _normalize_cookies(parsed, domain, url)
  422. return _parse_cookie_string(text, domain, url)
  423. return []
  424. def _extract_cookie_value(row: Optional[Dict[str, Any]]) -> Any:
  425. if not row:
  426. return None
  427. # 优先使用 cookies 字段
  428. if "cookies" in row:
  429. return row["cookies"]
  430. # 兼容其他可能的字段名
  431. for key, value in row.items():
  432. if "cookie" in key.lower():
  433. return value
  434. return None
  435. def _fetch_cookie_row(cookie_type: str) -> Optional[Dict[str, Any]]:
  436. if not cookie_type:
  437. return None
  438. try:
  439. return mysql.fetchone(
  440. "select * from agent_channel_cookies where type=%s limit 1",
  441. (cookie_type,)
  442. )
  443. except Exception:
  444. return None
  445. def _fetch_profile_id(cookie_type: str) -> Optional[str]:
  446. """从数据库获取 cloud_profile_id"""
  447. if not cookie_type:
  448. return None
  449. try:
  450. row = mysql.fetchone(
  451. "select profileId from agent_channel_cookies where type=%s limit 1",
  452. (cookie_type,)
  453. )
  454. if row and "profileId" in row:
  455. return row["profileId"]
  456. return None
  457. except Exception:
  458. return None
  459. # ============================================================
  460. # 需要注册的工具
  461. # ============================================================
  462. # ============================================================
  463. # 导航类工具 (Navigation Tools)
  464. # ============================================================
  465. @tool()
  466. async def browser_navigate_to_url(url: str, new_tab: bool = False) -> ToolResult:
  467. """
  468. 导航到指定的 URL
  469. Navigate to a specific URL
  470. 使用 browser-use 的原生导航功能,支持在新标签页打开。
  471. Args:
  472. url: 要访问的 URL 地址
  473. new_tab: 是否在新标签页中打开(默认 False)
  474. Returns:
  475. ToolResult: 包含导航结果的工具返回对象
  476. Example:
  477. navigate_to_url("https://www.baidu.com")
  478. navigate_to_url("https://www.google.com", new_tab=True)
  479. """
  480. try:
  481. browser, tools = await get_browser_session()
  482. # 使用 browser-use 的 navigate 工具
  483. result = await tools.navigate(
  484. url=url,
  485. new_tab=new_tab,
  486. browser_session=browser
  487. )
  488. return action_result_to_tool_result(result, f"导航到 {url}")
  489. except Exception as e:
  490. return ToolResult(
  491. title="导航失败",
  492. output="",
  493. error=f"Failed to navigate to {url}: {str(e)}",
  494. long_term_memory=f"导航到 {url} 失败"
  495. )
  496. @tool()
  497. async def browser_search_web(query: str, engine: str = "bing") -> ToolResult:
  498. """
  499. 使用搜索引擎搜索
  500. Search the web using a search engine
  501. Args:
  502. query: 搜索关键词
  503. engine: 搜索引擎 (google, duckduckgo, bing) - 默认: google
  504. Returns:
  505. ToolResult: 搜索结果
  506. Example:
  507. search_web("Python async programming", engine="google")
  508. """
  509. try:
  510. browser, tools = await get_browser_session()
  511. # 使用 browser-use 的 search 工具
  512. result = await tools.search(
  513. query=query,
  514. engine=engine,
  515. browser_session=browser
  516. )
  517. return action_result_to_tool_result(result, f"搜索: {query}")
  518. except Exception as e:
  519. return ToolResult(
  520. title="搜索失败",
  521. output="",
  522. error=f"Search failed: {str(e)}",
  523. long_term_memory=f"搜索 '{query}' 失败"
  524. )
  525. @tool()
  526. async def browser_go_back() -> ToolResult:
  527. """
  528. 返回到上一个页面
  529. Go back to the previous page
  530. 模拟浏览器的"后退"按钮功能。
  531. Returns:
  532. ToolResult: 包含返回操作结果的工具返回对象
  533. """
  534. try:
  535. browser, tools = await get_browser_session()
  536. result = await tools.go_back(browser_session=browser)
  537. return action_result_to_tool_result(result, "返回上一页")
  538. except Exception as e:
  539. return ToolResult(
  540. title="返回失败",
  541. output="",
  542. error=f"Failed to go back: {str(e)}",
  543. long_term_memory="返回上一页失败"
  544. )
  545. @tool()
  546. async def browser_wait(seconds: int = 3) -> ToolResult:
  547. """
  548. 等待指定的秒数
  549. Wait for a specified number of seconds
  550. 用于等待页面加载、动画完成或其他异步操作。
  551. Args:
  552. seconds: 等待时间(秒),最大30秒
  553. Returns:
  554. ToolResult: 包含等待操作结果的工具返回对象
  555. Example:
  556. wait(5) # 等待5秒
  557. """
  558. try:
  559. browser, tools = await get_browser_session()
  560. result = await tools.wait(seconds=seconds, browser_session=browser)
  561. return action_result_to_tool_result(result, f"等待 {seconds} 秒")
  562. except Exception as e:
  563. return ToolResult(
  564. title="等待失败",
  565. output="",
  566. error=f"Failed to wait: {str(e)}",
  567. long_term_memory="等待失败"
  568. )
  569. # ============================================================
  570. # 元素交互工具 (Element Interaction Tools)
  571. # ============================================================
  572. @tool()
  573. async def browser_click_element(index: int) -> ToolResult:
  574. """
  575. 通过索引点击页面元素
  576. Click an element by index
  577. Args:
  578. index: 元素索引(从浏览器状态中获取)
  579. Returns:
  580. ToolResult: 包含点击操作结果的工具返回对象
  581. Example:
  582. click_element(index=5)
  583. Note:
  584. 需要先通过 get_selector_map 获取页面元素索引
  585. """
  586. try:
  587. browser, tools = await get_browser_session()
  588. result = await tools.click(
  589. index=index,
  590. browser_session=browser
  591. )
  592. return action_result_to_tool_result(result, f"点击元素 {index}")
  593. except Exception as e:
  594. return ToolResult(
  595. title="点击失败",
  596. output="",
  597. error=f"Failed to click element {index}: {str(e)}",
  598. long_term_memory=f"点击元素 {index} 失败"
  599. )
  600. @tool()
  601. async def browser_input_text(index: int, text: str, clear: bool = True) -> ToolResult:
  602. """
  603. 在指定元素中输入文本
  604. Input text into an element
  605. Args:
  606. index: 元素索引(从浏览器状态中获取)
  607. text: 要输入的文本内容
  608. clear: 是否先清除现有文本(默认 True)
  609. Returns:
  610. ToolResult: 包含输入操作结果的工具返回对象
  611. Example:
  612. input_text(index=0, text="Hello World", clear=True)
  613. """
  614. try:
  615. browser, tools = await get_browser_session()
  616. result = await tools.input(
  617. index=index,
  618. text=text,
  619. clear=clear,
  620. browser_session=browser
  621. )
  622. return action_result_to_tool_result(result, f"输入文本到元素 {index}")
  623. except Exception as e:
  624. return ToolResult(
  625. title="输入失败",
  626. output="",
  627. error=f"Failed to input text into element {index}: {str(e)}",
  628. long_term_memory=f"输入文本失败"
  629. )
  630. @tool()
  631. async def browser_send_keys(keys: str) -> ToolResult:
  632. """
  633. 发送键盘按键或快捷键
  634. Send keyboard keys or shortcuts
  635. 支持发送单个按键、组合键和快捷键。
  636. Args:
  637. keys: 要发送的按键字符串
  638. - 单个按键: "Enter", "Escape", "PageDown", "Tab"
  639. - 组合键: "Control+o", "Shift+Tab", "Alt+F4"
  640. - 功能键: "F1", "F2", ..., "F12"
  641. Returns:
  642. ToolResult: 包含按键操作结果的工具返回对象
  643. Example:
  644. send_keys("Enter")
  645. send_keys("Control+A")
  646. """
  647. try:
  648. browser, tools = await get_browser_session()
  649. result = await tools.send_keys(
  650. keys=keys,
  651. browser_session=browser
  652. )
  653. return action_result_to_tool_result(result, f"发送按键: {keys}")
  654. except Exception as e:
  655. return ToolResult(
  656. title="发送按键失败",
  657. output="",
  658. error=f"Failed to send keys: {str(e)}",
  659. long_term_memory="发送按键失败"
  660. )
  661. @tool()
  662. async def browser_upload_file(index: int, path: str) -> ToolResult:
  663. """
  664. 上传文件到文件输入元素
  665. Upload a file to a file input element
  666. Args:
  667. index: 文件输入框的元素索引
  668. path: 要上传的文件路径(绝对路径)
  669. Returns:
  670. ToolResult: 包含上传操作结果的工具返回对象
  671. Example:
  672. upload_file(index=7, path="/path/to/file.pdf")
  673. Note:
  674. 文件必须存在且路径必须是绝对路径
  675. """
  676. try:
  677. browser, tools = await get_browser_session()
  678. result = await tools.upload_file(
  679. index=index,
  680. path=path,
  681. browser_session=browser,
  682. available_file_paths=[path],
  683. file_system=_file_system
  684. )
  685. return action_result_to_tool_result(result, f"上传文件: {path}")
  686. except Exception as e:
  687. return ToolResult(
  688. title="上传失败",
  689. output="",
  690. error=f"Failed to upload file: {str(e)}",
  691. long_term_memory=f"上传文件 {path} 失败"
  692. )
  693. # ============================================================
  694. # 滚动和视图工具 (Scroll & View Tools)
  695. # ============================================================
  696. @tool()
  697. async def browser_scroll_page(down: bool = True, pages: float = 1.0, index: Optional[int] = None) -> ToolResult:
  698. try:
  699. browser, tools = await get_browser_session()
  700. # --- 核心修复 1: 必须先 await 拿到 session 实例 ---
  701. cdp_session = await browser.get_or_create_cdp_session()
  702. # 这里的执行方式建议参考你已有的 cdp 调用逻辑
  703. # 如果 cdp_session 没有直接封装 .eval(),使用 Runtime.evaluate
  704. before_y_result = await cdp_session.cdp_client.send.Runtime.evaluate(
  705. params={'expression': 'window.scrollY'},
  706. session_id=cdp_session.session_id
  707. )
  708. before_y = before_y_result.get('result', {}).get('value', 0)
  709. # 执行滚动
  710. result = await tools.scroll(down=down, pages=pages, index=index, browser_session=browser)
  711. # 等待渲染并检查偏移
  712. await asyncio.sleep(1)
  713. after_y_result = await cdp_session.cdp_client.send.Runtime.evaluate(
  714. params={'expression': 'window.scrollY'},
  715. session_id=cdp_session.session_id
  716. )
  717. after_y = after_y_result.get('result', {}).get('value', 0)
  718. # 3. 验证是否真的动了
  719. if before_y == after_y and index is None:
  720. return ToolResult(
  721. title="滚动无效",
  722. output="页面已到达边界或滚动被拦截",
  723. error="No movement detected"
  724. )
  725. return action_result_to_tool_result(result, f"已滚动")
  726. except Exception as e:
  727. # --- 核心修复 2: 必须补全 output 参数,否则框架会报错 ---
  728. return ToolResult(
  729. title="滚动失败",
  730. output="", # 补全这个缺失的必填参数
  731. error=str(e)
  732. )
  733. @tool()
  734. async def browser_find_text(text: str) -> ToolResult:
  735. """
  736. 查找页面中的文本并滚动到该位置
  737. Find text on the page and scroll to it
  738. 在页面中搜索指定的文本,找到后自动滚动到该位置。
  739. Args:
  740. text: 要查找的文本内容
  741. Returns:
  742. ToolResult: 包含查找结果的工具返回对象
  743. Example:
  744. find_text("Privacy Policy")
  745. """
  746. try:
  747. browser, tools = await get_browser_session()
  748. result = await tools.find_text(
  749. text=text,
  750. browser_session=browser
  751. )
  752. return action_result_to_tool_result(result, f"查找文本: {text}")
  753. except Exception as e:
  754. return ToolResult(
  755. title="查找失败",
  756. output="",
  757. error=f"Failed to find text: {str(e)}",
  758. long_term_memory=f"查找文本 '{text}' 失败"
  759. )
  760. @tool()
  761. async def browser_screenshot() -> ToolResult:
  762. """
  763. 请求在下次观察中包含页面截图
  764. Request a screenshot to be included in the next observation
  765. 用于视觉检查页面状态,帮助理解页面布局和内容。
  766. Returns:
  767. ToolResult: 包含截图请求结果的工具返回对象
  768. Example:
  769. screenshot()
  770. Note:
  771. 截图会在下次页面观察时自动包含在结果中。
  772. """
  773. try:
  774. browser, tools = await get_browser_session()
  775. result = await tools.screenshot(browser_session=browser)
  776. return action_result_to_tool_result(result, "截图请求")
  777. except Exception as e:
  778. return ToolResult(
  779. title="截图失败",
  780. output="",
  781. error=f"Failed to capture screenshot: {str(e)}",
  782. long_term_memory="截图失败"
  783. )
  784. # ============================================================
  785. # 标签页管理工具 (Tab Management Tools)
  786. # ============================================================
  787. @tool()
  788. async def browser_switch_tab(tab_id: str) -> ToolResult:
  789. """
  790. 切换到指定标签页
  791. Switch to a different browser tab
  792. Args:
  793. tab_id: 4字符标签ID(target_id 的最后4位)
  794. Returns:
  795. ToolResult: 切换结果
  796. Example:
  797. switch_tab(tab_id="a3f2")
  798. """
  799. try:
  800. browser, tools = await get_browser_session()
  801. normalized_tab_id = tab_id[-4:] if tab_id else tab_id
  802. result = await tools.switch(
  803. tab_id=normalized_tab_id,
  804. browser_session=browser
  805. )
  806. return action_result_to_tool_result(result, f"切换到标签页 {normalized_tab_id}")
  807. except Exception as e:
  808. return ToolResult(
  809. title="切换标签页失败",
  810. output="",
  811. error=f"Failed to switch tab: {str(e)}",
  812. long_term_memory=f"切换到标签页 {tab_id} 失败"
  813. )
  814. @tool()
  815. async def browser_close_tab(tab_id: str) -> ToolResult:
  816. """
  817. 关闭指定标签页
  818. Close a browser tab
  819. Args:
  820. tab_id: 4字符标签ID
  821. Returns:
  822. ToolResult: 关闭结果
  823. Example:
  824. close_tab(tab_id="a3f2")
  825. """
  826. try:
  827. browser, tools = await get_browser_session()
  828. normalized_tab_id = tab_id[-4:] if tab_id else tab_id
  829. result = await tools.close(
  830. tab_id=normalized_tab_id,
  831. browser_session=browser
  832. )
  833. return action_result_to_tool_result(result, f"关闭标签页 {normalized_tab_id}")
  834. except Exception as e:
  835. return ToolResult(
  836. title="关闭标签页失败",
  837. output="",
  838. error=f"Failed to close tab: {str(e)}",
  839. long_term_memory=f"关闭标签页 {tab_id} 失败"
  840. )
  841. # ============================================================
  842. # 下拉框工具 (Dropdown Tools)
  843. # ============================================================
  844. @tool()
  845. async def browser_get_dropdown_options(index: int) -> ToolResult:
  846. """
  847. 获取下拉框的所有选项
  848. Get options from a dropdown element
  849. Args:
  850. index: 下拉框的元素索引
  851. Returns:
  852. ToolResult: 包含所有选项的结果
  853. Example:
  854. get_dropdown_options(index=8)
  855. """
  856. try:
  857. browser, tools = await get_browser_session()
  858. result = await tools.dropdown_options(
  859. index=index,
  860. browser_session=browser
  861. )
  862. return action_result_to_tool_result(result, f"获取下拉框选项: {index}")
  863. except Exception as e:
  864. return ToolResult(
  865. title="获取下拉框选项失败",
  866. output="",
  867. error=f"Failed to get dropdown options: {str(e)}",
  868. long_term_memory=f"获取下拉框 {index} 选项失败"
  869. )
  870. @tool()
  871. async def browser_select_dropdown_option(index: int, text: str) -> ToolResult:
  872. """
  873. 选择下拉框选项
  874. Select an option from a dropdown
  875. Args:
  876. index: 下拉框的元素索引
  877. text: 要选择的选项文本(精确匹配)
  878. Returns:
  879. ToolResult: 选择结果
  880. Example:
  881. select_dropdown_option(index=8, text="Option 2")
  882. """
  883. try:
  884. browser, tools = await get_browser_session()
  885. result = await tools.select_dropdown(
  886. index=index,
  887. text=text,
  888. browser_session=browser
  889. )
  890. return action_result_to_tool_result(result, f"选择下拉框选项: {text}")
  891. except Exception as e:
  892. return ToolResult(
  893. title="选择下拉框选项失败",
  894. output="",
  895. error=f"Failed to select dropdown option: {str(e)}",
  896. long_term_memory=f"选择选项 '{text}' 失败"
  897. )
  898. # ============================================================
  899. # 内容提取工具 (Content Extraction Tools)
  900. # ============================================================
  901. def scrub_search_redirect_url(url: str) -> str:
  902. """
  903. 自动检测并解析 Bing/Google 等搜索引擎的重定向链接,提取真实目标 URL。
  904. """
  905. if not url or not isinstance(url, str):
  906. return url
  907. try:
  908. parsed = urlparse(url)
  909. # 1. 处理 Bing 重定向 (特征:u 参数带 Base64)
  910. # 示例:...&u=a1aHR0cHM6Ly96aHVhbmxhbi56aGlodS5jb20vcC8zODYxMjgwOQ&...
  911. if "bing.com" in parsed.netloc:
  912. u_param = parse_qs(parsed.query).get('u', [None])[0]
  913. if u_param:
  914. # 移除开头的 'a1', 'a0' 等标识符
  915. b64_str = u_param[2:]
  916. # 补齐 Base64 填充符
  917. padding = '=' * (4 - len(b64_str) % 4)
  918. decoded = base64.b64decode(b64_str + padding).decode('utf-8', errors='ignore')
  919. if decoded.startswith('http'):
  920. return decoded
  921. # 2. 处理 Google 重定向 (特征:url 参数)
  922. if "google.com" in parsed.netloc:
  923. url_param = parse_qs(parsed.query).get('url', [None])[0]
  924. if url_param:
  925. return unquote(url_param)
  926. # 3. 兜底:处理常见的跳转参数
  927. for param in ['target', 'dest', 'destination', 'link']:
  928. found = parse_qs(parsed.query).get(param, [None])[0]
  929. if found and found.startswith('http'):
  930. return unquote(found)
  931. except Exception:
  932. pass # 解析失败则返回原链接
  933. return url
  934. async def extraction_adapter(input_data):
  935. # 提取字符串
  936. if isinstance(input_data, list):
  937. prompt = input_data[-1].content if hasattr(input_data[-1], 'content') else str(input_data[-1])
  938. else:
  939. prompt = str(input_data)
  940. response = await openrouter_llm_call(
  941. messages=[{"role": "user", "content": prompt}]
  942. )
  943. content = response["content"]
  944. # --- 核心改进:URL 自动修复 ---
  945. # 使用正则表达式匹配内容中的所有 URL,并尝试进行洗涤
  946. urls = re.findall(r'https?://[^\s<>"\']+', content)
  947. for original_url in urls:
  948. clean_url = scrub_search_redirect_url(original_url)
  949. if clean_url != original_url:
  950. content = content.replace(original_url, clean_url)
  951. from argparse import Namespace
  952. return Namespace(completion=content)
  953. @tool()
  954. async def browser_extract_content(query: str, extract_links: bool = False,
  955. start_from_char: int = 0) -> ToolResult:
  956. """
  957. 使用 LLM 从页面提取结构化数据
  958. Extract content from the current page using LLM
  959. Args:
  960. query: 提取查询(告诉 LLM 要提取什么内容)
  961. extract_links: 是否提取链接(默认 False,节省 token)
  962. start_from_char: 从哪个字符开始提取(用于分页提取大内容)
  963. Returns:
  964. ToolResult: 提取的内容
  965. Example:
  966. extract_content(query="提取页面上所有产品的名称和价格", extract_links=True)
  967. Note:
  968. 需要配置 page_extraction_llm,否则会失败
  969. 支持分页提取,最大100k字符
  970. """
  971. try:
  972. browser, tools = await get_browser_session()
  973. # 注意:extract 需要 page_extraction_llm 参数
  974. # 这里我们假设用户会在初始化时配置 LLM
  975. # 如果没有配置,会抛出异常
  976. result = await tools.extract(
  977. query=query,
  978. extract_links=extract_links,
  979. start_from_char=start_from_char,
  980. browser_session=browser,
  981. page_extraction_llm=RunnableLambda(extraction_adapter), # 需要用户配置
  982. file_system=_file_system
  983. )
  984. return action_result_to_tool_result(result, f"提取内容: {query}")
  985. except Exception as e:
  986. return ToolResult(
  987. title="内容提取失败",
  988. output="",
  989. error=f"Failed to extract content: {str(e)}",
  990. long_term_memory=f"提取内容失败: {query}"
  991. )
  992. async def _detect_and_download_pdf_via_cdp(browser) -> Optional[str]:
  993. """
  994. 检测当前页面是否为 PDF,如果是则通过 CDP(浏览器内 fetch)下载到本地。
  995. 优势:自动携带浏览器的 cookies/session,可访问需要登录的 PDF。
  996. 返回本地文件路径,非 PDF 页面返回 None。
  997. """
  998. try:
  999. current_url = await browser.get_current_page_url()
  1000. if not current_url:
  1001. return None
  1002. parsed = urlparse(current_url)
  1003. is_pdf = parsed.path.lower().endswith('.pdf')
  1004. # URL 不明显是 PDF 时,通过 CDP 检查 content-type
  1005. if not is_pdf:
  1006. try:
  1007. cdp = await browser.get_or_create_cdp_session()
  1008. ct_result = await cdp.cdp_client.send.Runtime.evaluate(
  1009. params={'expression': 'document.contentType'},
  1010. session_id=cdp.session_id
  1011. )
  1012. content_type = ct_result.get('result', {}).get('value', '')
  1013. is_pdf = 'pdf' in content_type.lower()
  1014. except Exception:
  1015. pass
  1016. if not is_pdf:
  1017. return None
  1018. # 通过浏览器内 fetch API 下载 PDF(自动携带 cookies)
  1019. cdp = await browser.get_or_create_cdp_session()
  1020. js_code = """
  1021. (async () => {
  1022. try {
  1023. const resp = await fetch(window.location.href);
  1024. if (!resp.ok) return JSON.stringify({error: 'HTTP ' + resp.status});
  1025. const blob = await resp.blob();
  1026. return new Promise((resolve, reject) => {
  1027. const reader = new FileReader();
  1028. reader.onloadend = () => resolve(JSON.stringify({data: reader.result}));
  1029. reader.onerror = () => resolve(JSON.stringify({error: 'FileReader failed'}));
  1030. reader.readAsDataURL(blob);
  1031. });
  1032. } catch(e) {
  1033. return JSON.stringify({error: e.message});
  1034. }
  1035. })()
  1036. """
  1037. result = await cdp.cdp_client.send.Runtime.evaluate(
  1038. params={
  1039. 'expression': js_code,
  1040. 'awaitPromise': True,
  1041. 'returnByValue': True,
  1042. 'timeout': 60000
  1043. },
  1044. session_id=cdp.session_id
  1045. )
  1046. value = result.get('result', {}).get('value', '')
  1047. if not value:
  1048. print("⚠️ CDP fetch PDF: 无返回值")
  1049. return None
  1050. data = json.loads(value)
  1051. if 'error' in data:
  1052. print(f"⚠️ CDP fetch PDF 失败: {data['error']}")
  1053. return None
  1054. # 从 data URL 中提取 base64 并解码
  1055. data_url = data['data'] # data:application/pdf;base64,JVBERi0...
  1056. base64_data = data_url.split(',', 1)[1]
  1057. pdf_bytes = base64.b64decode(base64_data)
  1058. # 保存到本地
  1059. save_dir = Path.cwd() / ".browser_use_files"
  1060. save_dir.mkdir(parents=True, exist_ok=True)
  1061. filename = Path(parsed.path).name if parsed.path else ""
  1062. if not filename or not filename.lower().endswith('.pdf'):
  1063. import time
  1064. filename = f"downloaded_{int(time.time())}.pdf"
  1065. save_path = str(save_dir / filename)
  1066. with open(save_path, 'wb') as f:
  1067. f.write(pdf_bytes)
  1068. print(f"📄 PDF 已通过 CDP 下载到: {save_path} ({len(pdf_bytes)} bytes)")
  1069. return save_path
  1070. except Exception as e:
  1071. print(f"⚠️ PDF 检测/下载异常: {e}")
  1072. return None
  1073. @tool()
  1074. async def browser_read_long_content(
  1075. goal: Any,
  1076. source: str = "page",
  1077. context: Any = "",
  1078. **kwargs
  1079. ) -> ToolResult:
  1080. """
  1081. 智能读取长内容。支持自动检测并读取网页上的 PDF 文件。
  1082. 当 source="page" 且当前页面是 PDF 时,会通过 CDP 下载 PDF 并用 pypdf 解析,
  1083. 而非使用 DOM 提取(DOM 无法读取浏览器内置 PDF Viewer 的内容)。
  1084. 通过 CDP 下载可自动携带浏览器的 cookies/session,支持需要登录的 PDF。
  1085. """
  1086. try:
  1087. browser, tools = await get_browser_session()
  1088. # 1. 提取目标文本 (针对 GoalTree 字典结构)
  1089. final_goal_text = ""
  1090. if isinstance(goal, dict):
  1091. final_goal_text = goal.get("mission") or goal.get("goal") or str(goal)
  1092. else:
  1093. final_goal_text = str(goal)
  1094. # 2. 清洗业务背景 (过滤框架注入的 dict 类型 context)
  1095. business_context = context if isinstance(context, str) else ""
  1096. # 3. PDF 自动检测:当 source="page" 时检查是否为 PDF 页面
  1097. available_files = []
  1098. if source.lower() == "page":
  1099. pdf_path = await _detect_and_download_pdf_via_cdp(browser)
  1100. if pdf_path:
  1101. source = pdf_path
  1102. available_files.append(pdf_path)
  1103. # 4. 验证并实例化
  1104. action_params = ReadContentAction(
  1105. goal=final_goal_text,
  1106. source=source,
  1107. context=business_context
  1108. )
  1109. # 5. 解包参数调用底层方法
  1110. result = await tools.read_long_content(
  1111. **action_params.model_dump(),
  1112. browser_session=browser,
  1113. page_extraction_llm=RunnableLambda(extraction_adapter),
  1114. available_file_paths=available_files
  1115. )
  1116. return action_result_to_tool_result(result, f"深度读取: {source}")
  1117. except Exception as e:
  1118. return ToolResult(
  1119. title="深度读取失败",
  1120. output="",
  1121. error=f"Read long content failed: {str(e)}",
  1122. long_term_memory="参数解析或校验失败,请检查输入"
  1123. )
  1124. @tool()
  1125. async def browser_get_page_html() -> ToolResult:
  1126. """
  1127. 获取当前页面的完整 HTML
  1128. Get the full HTML of the current page
  1129. 返回当前页面的完整 HTML 源代码。
  1130. Returns:
  1131. ToolResult: 包含页面 HTML 的工具返回对象
  1132. Example:
  1133. get_page_html()
  1134. Note:
  1135. - 返回的是完整的 HTML 源代码
  1136. - 输出会被限制在 10000 字符以内(完整内容保存在 metadata 中)
  1137. """
  1138. try:
  1139. browser, tools = await get_browser_session()
  1140. # 使用 CDP 获取页面 HTML
  1141. cdp = await browser.get_or_create_cdp_session()
  1142. # 获取页面内容
  1143. result = await cdp.cdp_client.send.Runtime.evaluate(
  1144. params={'expression': 'document.documentElement.outerHTML'},
  1145. session_id=cdp.session_id
  1146. )
  1147. html = result.get('result', {}).get('value', '')
  1148. # 获取 URL 和标题
  1149. url = await browser.get_current_page_url()
  1150. title_result = await cdp.cdp_client.send.Runtime.evaluate(
  1151. params={'expression': 'document.title'},
  1152. session_id=cdp.session_id
  1153. )
  1154. title = title_result.get('result', {}).get('value', '')
  1155. # 限制输出大小
  1156. output_html = html
  1157. if len(html) > 10000:
  1158. output_html = html[:10000] + "... (truncated)"
  1159. return ToolResult(
  1160. title=f"获取 HTML: {url}",
  1161. output=f"页面: {title}\nURL: {url}\n\nHTML:\n{output_html}",
  1162. long_term_memory=f"获取 HTML: {url}",
  1163. metadata={"url": url, "title": title, "html": html}
  1164. )
  1165. except Exception as e:
  1166. return ToolResult(
  1167. title="获取 HTML 失败",
  1168. output="",
  1169. error=f"Failed to get page HTML: {str(e)}",
  1170. long_term_memory="获取 HTML 失败"
  1171. )
  1172. @tool()
  1173. async def browser_get_selector_map() -> ToolResult:
  1174. """
  1175. 获取当前页面的元素索引映射
  1176. Get the selector map of interactive elements on the current page
  1177. 返回页面所有可交互元素的索引字典,用于后续的元素操作。
  1178. Returns:
  1179. ToolResult: 包含元素映射的工具返回对象
  1180. Example:
  1181. get_selector_map()
  1182. Note:
  1183. 返回的索引可以用于 click_element, input_text 等操作
  1184. """
  1185. try:
  1186. browser, tools = await get_browser_session()
  1187. # 关键修复:先触发 BrowserStateRequestEvent 来更新 DOM 状态
  1188. # 这会触发 DOM watchdog 重新构建 DOM 树并更新 selector_map
  1189. from browser_use.browser.events import BrowserStateRequestEvent
  1190. # 触发事件并等待结果
  1191. event = browser.event_bus.dispatch(
  1192. BrowserStateRequestEvent(
  1193. include_dom=True,
  1194. include_screenshot=False, # 不需要截图,节省时间
  1195. include_recent_events=False
  1196. )
  1197. )
  1198. # 等待 DOM 更新完成
  1199. browser_state = await event.event_result(raise_if_none=True, raise_if_any=True)
  1200. # 从更新后的状态中获取 selector_map
  1201. selector_map = browser_state.dom_state.selector_map if browser_state.dom_state else {}
  1202. # 构建输出信息
  1203. elements_info = []
  1204. for index, node in list(selector_map.items())[:20]: # 只显示前20个
  1205. tag = node.tag_name
  1206. attrs = node.attributes or {}
  1207. text = attrs.get('aria-label') or attrs.get('placeholder') or attrs.get('value', '')
  1208. elements_info.append(f"索引 {index}: <{tag}> {text[:50]}")
  1209. output = f"找到 {len(selector_map)} 个交互元素\n\n"
  1210. output += "\n".join(elements_info)
  1211. if len(selector_map) > 20:
  1212. output += f"\n... 还有 {len(selector_map) - 20} 个元素"
  1213. return ToolResult(
  1214. title="获取元素映射",
  1215. output=output,
  1216. long_term_memory=f"获取到 {len(selector_map)} 个交互元素",
  1217. metadata={"selector_map": {k: str(v) for k, v in list(selector_map.items())[:100]}}
  1218. )
  1219. except Exception as e:
  1220. return ToolResult(
  1221. title="获取元素映射失败",
  1222. output="",
  1223. error=f"Failed to get selector map: {str(e)}",
  1224. long_term_memory="获取元素映射失败"
  1225. )
  1226. # ============================================================
  1227. # JavaScript 执行工具 (JavaScript Tools)
  1228. # ============================================================
  1229. @tool()
  1230. async def browser_evaluate(code: str) -> ToolResult:
  1231. """
  1232. 在页面中执行 JavaScript 代码
  1233. Execute JavaScript code in the page context
  1234. 允许在当前页面中执行任意 JavaScript 代码,用于复杂的页面操作或数据提取。
  1235. Args:
  1236. code: 要执行的 JavaScript 代码字符串
  1237. Returns:
  1238. ToolResult: 包含执行结果的工具返回对象
  1239. Example:
  1240. evaluate("document.title")
  1241. evaluate("document.querySelectorAll('a').length")
  1242. Note:
  1243. - 代码在页面上下文中执行,可以访问 DOM 和全局变量
  1244. - 返回值会被自动序列化为字符串
  1245. - 执行结果限制在 20k 字符以内
  1246. """
  1247. try:
  1248. browser, tools = await get_browser_session()
  1249. result = await tools.evaluate(
  1250. code=code,
  1251. browser_session=browser
  1252. )
  1253. return action_result_to_tool_result(result, "执行 JavaScript")
  1254. except Exception as e:
  1255. return ToolResult(
  1256. title="JavaScript 执行失败",
  1257. output="",
  1258. error=f"Failed to execute JavaScript: {str(e)}",
  1259. long_term_memory="JavaScript 执行失败"
  1260. )
  1261. @tool()
  1262. async def browser_ensure_login_with_cookies(cookie_type: str, url: str = "https://www.xiaohongshu.com") -> ToolResult:
  1263. """
  1264. 检查登录状态并在需要时注入 cookies
  1265. """
  1266. try:
  1267. browser, tools = await get_browser_session()
  1268. if url:
  1269. await tools.navigate(url=url, browser_session=browser)
  1270. await tools.wait(seconds=2, browser_session=browser)
  1271. check_login_js = """
  1272. (function() {
  1273. const loginBtn = document.querySelector('[class*="login"]') ||
  1274. document.querySelector('[href*="login"]') ||
  1275. Array.from(document.querySelectorAll('button, a')).find(el => (el.textContent || '').includes('登录'));
  1276. const userInfo = document.querySelector('[class*="user"]') ||
  1277. document.querySelector('[class*="avatar"]');
  1278. return {
  1279. needLogin: !!loginBtn && !userInfo,
  1280. hasLoginBtn: !!loginBtn,
  1281. hasUserInfo: !!userInfo
  1282. };
  1283. })()
  1284. """
  1285. result = await tools.evaluate(code=check_login_js, browser_session=browser)
  1286. status_output = result.extracted_content
  1287. if isinstance(status_output, str) and status_output.startswith("Result: "):
  1288. status_output = status_output[8:]
  1289. login_info: Dict[str, Any] = {}
  1290. if isinstance(status_output, str):
  1291. try:
  1292. login_info = json.loads(status_output)
  1293. except Exception:
  1294. login_info = {}
  1295. elif isinstance(status_output, dict):
  1296. login_info = status_output
  1297. if not login_info.get("needLogin"):
  1298. output = json.dumps({"need_login": False}, ensure_ascii=False)
  1299. return ToolResult(
  1300. title="已登录",
  1301. output=output,
  1302. long_term_memory=output
  1303. )
  1304. row = _fetch_cookie_row(cookie_type)
  1305. cookie_value = _extract_cookie_value(row)
  1306. if not cookie_value:
  1307. output = json.dumps({"need_login": True, "cookies_count": 0}, ensure_ascii=False)
  1308. return ToolResult(
  1309. title="未找到 cookies",
  1310. output=output,
  1311. error="未找到 cookies",
  1312. long_term_memory=output
  1313. )
  1314. domain, base_url = _cookie_domain_for_type(cookie_type, url)
  1315. cookies = _normalize_cookies(cookie_value, domain, base_url)
  1316. if not cookies:
  1317. output = json.dumps({"need_login": True, "cookies_count": 0}, ensure_ascii=False)
  1318. return ToolResult(
  1319. title="cookies 解析失败",
  1320. output=output,
  1321. error="cookies 解析失败",
  1322. long_term_memory=output
  1323. )
  1324. await browser._cdp_set_cookies(cookies)
  1325. if url:
  1326. await tools.navigate(url=url, browser_session=browser)
  1327. await tools.wait(seconds=2, browser_session=browser)
  1328. output = json.dumps({"need_login": True, "cookies_count": len(cookies)}, ensure_ascii=False)
  1329. return ToolResult(
  1330. title="已注入 cookies",
  1331. output=output,
  1332. long_term_memory=output
  1333. )
  1334. except Exception as e:
  1335. return ToolResult(
  1336. title="登录检查失败",
  1337. output="",
  1338. error=str(e),
  1339. long_term_memory="登录检查失败"
  1340. )
  1341. # ============================================================
  1342. # 等待用户操作工具 (Wait for User Action)
  1343. # ============================================================
  1344. @tool()
  1345. async def browser_wait_for_user_action(message: str = "Please complete the action in browser",
  1346. timeout: int = 300) -> ToolResult:
  1347. """
  1348. 等待用户在浏览器中完成操作(如登录)
  1349. Wait for user to complete an action in the browser (e.g., login)
  1350. 暂停自动化流程,等待用户手动完成某些操作(如登录、验证码等)。
  1351. Args:
  1352. message: 提示用户需要完成的操作
  1353. timeout: 最大等待时间(秒),默认 300 秒(5 分钟)
  1354. Returns:
  1355. ToolResult: 包含等待结果的工具返回对象
  1356. Example:
  1357. wait_for_user_action("Please login to Xiaohongshu", timeout=180)
  1358. wait_for_user_action("Please complete the CAPTCHA", timeout=60)
  1359. Note:
  1360. - 用户需要在浏览器窗口中手动完成操作
  1361. - 完成后按回车键继续
  1362. - 超时后会自动继续执行
  1363. """
  1364. try:
  1365. import asyncio
  1366. print(f"\n{'='*60}")
  1367. print(f"⏸️ WAITING FOR USER ACTION")
  1368. print(f"{'='*60}")
  1369. print(f"📝 {message}")
  1370. print(f"⏱️ Timeout: {timeout} seconds")
  1371. print(f"\n👉 Please complete the action in the browser window")
  1372. print(f"👉 Press ENTER when done, or wait for timeout")
  1373. print(f"{'='*60}\n")
  1374. # Wait for user input or timeout
  1375. try:
  1376. loop = asyncio.get_event_loop()
  1377. # Wait for either user input or timeout
  1378. await asyncio.wait_for(
  1379. loop.run_in_executor(None, input),
  1380. timeout=timeout
  1381. )
  1382. return ToolResult(
  1383. title="用户操作完成",
  1384. output=f"User completed: {message}",
  1385. long_term_memory=f"用户完成操作: {message}"
  1386. )
  1387. except asyncio.TimeoutError:
  1388. return ToolResult(
  1389. title="用户操作超时",
  1390. output=f"Timeout waiting for: {message}",
  1391. long_term_memory=f"等待用户操作超时: {message}"
  1392. )
  1393. except Exception as e:
  1394. return ToolResult(
  1395. title="等待用户操作失败",
  1396. output="",
  1397. error=f"Failed to wait for user action: {str(e)}",
  1398. long_term_memory="等待用户操作失败"
  1399. )
  1400. # ============================================================
  1401. # 任务完成工具 (Task Completion)
  1402. # ============================================================
  1403. @tool()
  1404. async def browser_done(text: str, success: bool = True,
  1405. files_to_display: Optional[List[str]] = None) -> ToolResult:
  1406. """
  1407. 标记任务完成并返回最终消息
  1408. Mark the task as complete and return final message to user
  1409. Args:
  1410. text: 给用户的最终消息
  1411. success: 任务是否成功完成
  1412. files_to_display: 可选的要显示的文件路径列表
  1413. Returns:
  1414. ToolResult: 完成结果
  1415. Example:
  1416. done("任务已完成,提取了10个产品信息", success=True)
  1417. """
  1418. try:
  1419. browser, tools = await get_browser_session()
  1420. result = await tools.done(
  1421. text=text,
  1422. success=success,
  1423. files_to_display=files_to_display,
  1424. file_system=_file_system
  1425. )
  1426. return action_result_to_tool_result(result, "任务完成")
  1427. except Exception as e:
  1428. return ToolResult(
  1429. title="标记任务完成失败",
  1430. output="",
  1431. error=f"Failed to complete task: {str(e)}",
  1432. long_term_memory="标记任务完成失败"
  1433. )
  1434. # ============================================================
  1435. # 导出所有工具函数(供外部使用)
  1436. # ============================================================
  1437. __all__ = [
  1438. # 会话管理
  1439. 'init_browser_session',
  1440. 'get_browser_session',
  1441. 'cleanup_browser_session',
  1442. 'kill_browser_session',
  1443. # 导航类工具
  1444. 'browser_navigate_to_url',
  1445. 'browser_search_web',
  1446. 'browser_go_back',
  1447. 'browser_wait',
  1448. # 元素交互工具
  1449. 'browser_click_element',
  1450. 'browser_input_text',
  1451. 'browser_send_keys',
  1452. 'browser_upload_file',
  1453. # 滚动和视图工具
  1454. 'browser_scroll_page',
  1455. 'browser_find_text',
  1456. 'browser_screenshot',
  1457. # 标签页管理工具
  1458. 'browser_switch_tab',
  1459. 'browser_close_tab',
  1460. # 下拉框工具
  1461. 'browser_get_dropdown_options',
  1462. 'browser_select_dropdown_option',
  1463. # 内容提取工具
  1464. 'browser_extract_content',
  1465. 'browser_get_page_html',
  1466. 'browser_read_long_content',
  1467. 'browser_get_selector_map',
  1468. # JavaScript 执行工具
  1469. 'browser_evaluate',
  1470. 'browser_ensure_login_with_cookies',
  1471. # 等待用户操作
  1472. 'browser_wait_for_user_action',
  1473. # 任务完成
  1474. 'browser_done',
  1475. ]