baseClassTools.py 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299
  1. """
  2. Browser-Use 原生工具适配器
  3. Native Browser-Use Tools Adapter
  4. 直接使用 browser-use 的原生类(BrowserSession, Tools)实现所有浏览器操作工具。
  5. 不依赖 Playwright,完全基于 CDP 协议。
  6. 核心特性:
  7. 1. 浏览器会话持久化 - 只启动一次浏览器
  8. 2. 状态自动保持 - 登录状态、Cookie、LocalStorage 等
  9. 3. 完整的底层访问 - 可以直接使用 CDP 协议
  10. 4. 性能优异 - 避免频繁创建/销毁浏览器实例
  11. 使用方法:
  12. 1. 在 Agent 初始化时调用 init_browser_session()
  13. 2. 使用各个工具函数执行浏览器操作
  14. 3. 任务结束时调用 cleanup_browser_session()
  15. """
  16. import sys
  17. import os
  18. from typing import Optional, List
  19. from pathlib import Path
  20. # 将项目根目录添加到 Python 路径
  21. sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  22. # 导入框架的工具装饰器和结果类
  23. from agent.tools import tool, ToolResult
  24. # 导入 browser-use 的核心类
  25. from browser_use import BrowserSession, BrowserProfile
  26. from browser_use.tools.service import Tools
  27. from browser_use.agent.views import ActionResult
  28. from browser_use.filesystem.file_system import FileSystem
  29. # ============================================================
  30. # 全局浏览器会话管理
  31. # ============================================================
  32. # 全局变量:浏览器会话和工具实例
  33. _browser_session: Optional[BrowserSession] = None
  34. _browser_tools: Optional[Tools] = None
  35. _file_system: Optional[FileSystem] = None
  36. async def init_browser_session(
  37. headless: bool = False,
  38. user_data_dir: Optional[str] = None,
  39. profile_name: str = "default",
  40. browser_profile: Optional[BrowserProfile] = None,
  41. **kwargs
  42. ) -> tuple[BrowserSession, Tools]:
  43. """
  44. 初始化全局浏览器会话
  45. Args:
  46. headless: 是否无头模式
  47. user_data_dir: 用户数据目录(用于保存登录状态)
  48. profile_name: 配置文件名称
  49. browser_profile: BrowserProfile 对象(用于预设 cookies 等)
  50. **kwargs: 其他 BrowserSession 参数
  51. Returns:
  52. (BrowserSession, Tools) 元组
  53. """
  54. global _browser_session, _browser_tools, _file_system
  55. if _browser_session is not None:
  56. return _browser_session, _browser_tools
  57. # 设置用户数据目录(持久化登录状态)
  58. if user_data_dir is None and profile_name:
  59. user_data_dir = str(Path.home() / ".browser_use" / "profiles" / profile_name)
  60. Path(user_data_dir).mkdir(parents=True, exist_ok=True)
  61. # 创建浏览器会话
  62. # 明确指定 is_local=True 以确保本地浏览器启动
  63. session_params = {
  64. "headless": headless,
  65. "is_local": True, # 明确指定本地浏览器
  66. }
  67. # macOS 上显式指定 Chrome 路径
  68. import platform
  69. if platform.system() == "Darwin": # macOS
  70. chrome_path = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
  71. if Path(chrome_path).exists():
  72. session_params["executable_path"] = chrome_path
  73. # 只在有值时才添加 user_data_dir
  74. if user_data_dir:
  75. session_params["user_data_dir"] = user_data_dir
  76. # 只在有值时才添加 browser_profile
  77. if browser_profile:
  78. session_params["browser_profile"] = browser_profile
  79. # 合并其他参数
  80. session_params.update(kwargs)
  81. _browser_session = BrowserSession(**session_params)
  82. # 启动浏览器
  83. await _browser_session.start()
  84. # 创建工具实例
  85. _browser_tools = Tools()
  86. # 创建文件系统实例(用于文件操作)
  87. base_dir = Path.cwd() / ".browser_use_files"
  88. base_dir.mkdir(parents=True, exist_ok=True)
  89. _file_system = FileSystem(base_dir=str(base_dir))
  90. return _browser_session, _browser_tools
  91. async def get_browser_session() -> tuple[BrowserSession, Tools]:
  92. """
  93. 获取当前浏览器会话,如果不存在则自动创建
  94. Returns:
  95. (BrowserSession, Tools) 元组
  96. """
  97. global _browser_session, _browser_tools
  98. if _browser_session is None:
  99. await init_browser_session()
  100. return _browser_session, _browser_tools
  101. async def cleanup_browser_session():
  102. """
  103. 清理浏览器会话
  104. 优雅地停止浏览器但保留会话状态
  105. """
  106. global _browser_session, _browser_tools, _file_system
  107. if _browser_session is not None:
  108. await _browser_session.stop()
  109. _browser_session = None
  110. _browser_tools = None
  111. _file_system = None
  112. async def kill_browser_session():
  113. """
  114. 强制终止浏览器会话
  115. 完全关闭浏览器进程
  116. """
  117. global _browser_session, _browser_tools, _file_system
  118. if _browser_session is not None:
  119. await _browser_session.kill()
  120. _browser_session = None
  121. _browser_tools = None
  122. _file_system = None
  123. # ============================================================
  124. # 辅助函数:ActionResult 转 ToolResult
  125. # ============================================================
  126. def action_result_to_tool_result(result: ActionResult, title: str = None) -> ToolResult:
  127. """
  128. 将 browser-use 的 ActionResult 转换为框架的 ToolResult
  129. Args:
  130. result: browser-use 的 ActionResult
  131. title: 可选的标题(如果不提供则从 result 推断)
  132. Returns:
  133. ToolResult
  134. """
  135. if result.error:
  136. return ToolResult(
  137. title=title or "操作失败",
  138. output="",
  139. error=result.error,
  140. long_term_memory=result.long_term_memory or result.error
  141. )
  142. return ToolResult(
  143. title=title or "操作成功",
  144. output=result.extracted_content or "",
  145. long_term_memory=result.long_term_memory or result.extracted_content or "",
  146. metadata=result.metadata or {}
  147. )
  148. # ============================================================
  149. # 导航类工具 (Navigation Tools)
  150. # ============================================================
  151. @tool()
  152. async def navigate_to_url(url: str, new_tab: bool = False, uid: str = "") -> ToolResult:
  153. """
  154. 导航到指定的 URL
  155. Navigate to a specific URL
  156. 使用 browser-use 的原生导航功能,支持在新标签页打开。
  157. Args:
  158. url: 要访问的 URL 地址
  159. new_tab: 是否在新标签页中打开(默认 False)
  160. uid: 用户 ID(由框架自动注入)
  161. Returns:
  162. ToolResult: 包含导航结果的工具返回对象
  163. Example:
  164. navigate_to_url("https://www.baidu.com")
  165. navigate_to_url("https://www.google.com", new_tab=True)
  166. """
  167. try:
  168. browser, tools = await get_browser_session()
  169. # 使用 browser-use 的 navigate 工具
  170. result = await tools.navigate(
  171. url=url,
  172. new_tab=new_tab,
  173. browser_session=browser
  174. )
  175. return action_result_to_tool_result(result, f"导航到 {url}")
  176. except Exception as e:
  177. return ToolResult(
  178. title="导航失败",
  179. output="",
  180. error=f"Failed to navigate to {url}: {str(e)}",
  181. long_term_memory=f"导航到 {url} 失败"
  182. )
  183. @tool()
  184. async def search_web(query: str, engine: str = "google", uid: str = "") -> ToolResult:
  185. """
  186. 使用搜索引擎搜索
  187. Search the web using a search engine
  188. Args:
  189. query: 搜索关键词
  190. engine: 搜索引擎 (google, duckduckgo, bing) - 默认: google
  191. uid: 用户 ID(由框架自动注入)
  192. Returns:
  193. ToolResult: 搜索结果
  194. Example:
  195. search_web("Python async programming", engine="google")
  196. """
  197. try:
  198. browser, tools = await get_browser_session()
  199. # 使用 browser-use 的 search 工具
  200. result = await tools.search(
  201. query=query,
  202. engine=engine,
  203. browser_session=browser
  204. )
  205. return action_result_to_tool_result(result, f"搜索: {query}")
  206. except Exception as e:
  207. return ToolResult(
  208. title="搜索失败",
  209. output="",
  210. error=f"Search failed: {str(e)}",
  211. long_term_memory=f"搜索 '{query}' 失败"
  212. )
  213. @tool()
  214. async def go_back(uid: str = "") -> ToolResult:
  215. """
  216. 返回到上一个页面
  217. Go back to the previous page
  218. 模拟浏览器的"后退"按钮功能。
  219. Args:
  220. uid: 用户 ID(由框架自动注入)
  221. Returns:
  222. ToolResult: 包含返回操作结果的工具返回对象
  223. """
  224. try:
  225. browser, tools = await get_browser_session()
  226. result = await tools.go_back(browser_session=browser)
  227. return action_result_to_tool_result(result, "返回上一页")
  228. except Exception as e:
  229. return ToolResult(
  230. title="返回失败",
  231. output="",
  232. error=f"Failed to go back: {str(e)}",
  233. long_term_memory="返回上一页失败"
  234. )
  235. @tool()
  236. async def wait(seconds: int = 3, uid: str = "") -> ToolResult:
  237. """
  238. 等待指定的秒数
  239. Wait for a specified number of seconds
  240. 用于等待页面加载、动画完成或其他异步操作。
  241. Args:
  242. seconds: 等待时间(秒),最大30秒
  243. uid: 用户 ID(由框架自动注入)
  244. Returns:
  245. ToolResult: 包含等待操作结果的工具返回对象
  246. Example:
  247. wait(5) # 等待5秒
  248. """
  249. try:
  250. browser, tools = await get_browser_session()
  251. result = await tools.wait(seconds=seconds, browser_session=browser)
  252. return action_result_to_tool_result(result, f"等待 {seconds} 秒")
  253. except Exception as e:
  254. return ToolResult(
  255. title="等待失败",
  256. output="",
  257. error=f"Failed to wait: {str(e)}",
  258. long_term_memory="等待失败"
  259. )
  260. # ============================================================
  261. # 元素交互工具 (Element Interaction Tools)
  262. # ============================================================
  263. @tool()
  264. async def click_element(index: int, uid: str = "") -> ToolResult:
  265. """
  266. 通过索引点击页面元素
  267. Click an element by index
  268. Args:
  269. index: 元素索引(从浏览器状态中获取)
  270. uid: 用户 ID(由框架自动注入)
  271. Returns:
  272. ToolResult: 包含点击操作结果的工具返回对象
  273. Example:
  274. click_element(index=5)
  275. Note:
  276. 需要先通过 get_selector_map 获取页面元素索引
  277. """
  278. try:
  279. browser, tools = await get_browser_session()
  280. result = await tools.click(
  281. index=index,
  282. browser_session=browser
  283. )
  284. return action_result_to_tool_result(result, f"点击元素 {index}")
  285. except Exception as e:
  286. return ToolResult(
  287. title="点击失败",
  288. output="",
  289. error=f"Failed to click element {index}: {str(e)}",
  290. long_term_memory=f"点击元素 {index} 失败"
  291. )
  292. @tool()
  293. async def input_text(index: int, text: str, clear: bool = True, uid: str = "") -> ToolResult:
  294. """
  295. 在指定元素中输入文本
  296. Input text into an element
  297. Args:
  298. index: 元素索引(从浏览器状态中获取)
  299. text: 要输入的文本内容
  300. clear: 是否先清除现有文本(默认 True)
  301. uid: 用户 ID(由框架自动注入)
  302. Returns:
  303. ToolResult: 包含输入操作结果的工具返回对象
  304. Example:
  305. input_text(index=0, text="Hello World", clear=True)
  306. """
  307. try:
  308. browser, tools = await get_browser_session()
  309. result = await tools.input(
  310. index=index,
  311. text=text,
  312. clear=clear,
  313. browser_session=browser
  314. )
  315. return action_result_to_tool_result(result, f"输入文本到元素 {index}")
  316. except Exception as e:
  317. return ToolResult(
  318. title="输入失败",
  319. output="",
  320. error=f"Failed to input text into element {index}: {str(e)}",
  321. long_term_memory=f"输入文本失败"
  322. )
  323. @tool()
  324. async def send_keys(keys: str, uid: str = "") -> ToolResult:
  325. """
  326. 发送键盘按键或快捷键
  327. Send keyboard keys or shortcuts
  328. 支持发送单个按键、组合键和快捷键。
  329. Args:
  330. keys: 要发送的按键字符串
  331. - 单个按键: "Enter", "Escape", "PageDown", "Tab"
  332. - 组合键: "Control+o", "Shift+Tab", "Alt+F4"
  333. - 功能键: "F1", "F2", ..., "F12"
  334. uid: 用户 ID(由框架自动注入)
  335. Returns:
  336. ToolResult: 包含按键操作结果的工具返回对象
  337. Example:
  338. send_keys("Enter")
  339. send_keys("Control+A")
  340. """
  341. try:
  342. browser, tools = await get_browser_session()
  343. result = await tools.send_keys(
  344. keys=keys,
  345. browser_session=browser
  346. )
  347. return action_result_to_tool_result(result, f"发送按键: {keys}")
  348. except Exception as e:
  349. return ToolResult(
  350. title="发送按键失败",
  351. output="",
  352. error=f"Failed to send keys: {str(e)}",
  353. long_term_memory="发送按键失败"
  354. )
  355. @tool()
  356. async def upload_file(index: int, path: str, uid: str = "") -> ToolResult:
  357. """
  358. 上传文件到文件输入元素
  359. Upload a file to a file input element
  360. Args:
  361. index: 文件输入框的元素索引
  362. path: 要上传的文件路径(绝对路径)
  363. uid: 用户 ID(由框架自动注入)
  364. Returns:
  365. ToolResult: 包含上传操作结果的工具返回对象
  366. Example:
  367. upload_file(index=7, path="/path/to/file.pdf")
  368. Note:
  369. 文件必须存在且路径必须是绝对路径
  370. """
  371. try:
  372. browser, tools = await get_browser_session()
  373. result = await tools.upload_file(
  374. index=index,
  375. path=path,
  376. browser_session=browser,
  377. available_file_paths=[path],
  378. file_system=_file_system
  379. )
  380. return action_result_to_tool_result(result, f"上传文件: {path}")
  381. except Exception as e:
  382. return ToolResult(
  383. title="上传失败",
  384. output="",
  385. error=f"Failed to upload file: {str(e)}",
  386. long_term_memory=f"上传文件 {path} 失败"
  387. )
  388. # ============================================================
  389. # 滚动和视图工具 (Scroll & View Tools)
  390. # ============================================================
  391. @tool()
  392. async def scroll_page(down: bool = True, pages: float = 1.0,
  393. index: Optional[int] = None, uid: str = "") -> ToolResult:
  394. """
  395. 滚动页面或元素
  396. Scroll the page or a specific element
  397. Args:
  398. down: True 向下滚动,False 向上滚动
  399. pages: 滚动页数(0.5=半页,1=全页,10=滚动到底部/顶部)
  400. index: 可选,滚动特定元素(如下拉框内部)
  401. uid: 用户 ID(由框架自动注入)
  402. Returns:
  403. ToolResult: 滚动结果
  404. Example:
  405. scroll_page(down=True, pages=2.0) # 向下滚动2页
  406. scroll_page(down=False, pages=1.0) # 向上滚动1页
  407. """
  408. try:
  409. browser, tools = await get_browser_session()
  410. result = await tools.scroll(
  411. down=down,
  412. pages=pages,
  413. index=index,
  414. browser_session=browser
  415. )
  416. direction = "向下" if down else "向上"
  417. return action_result_to_tool_result(result, f"{direction}滚动 {pages} 页")
  418. except Exception as e:
  419. return ToolResult(
  420. title="滚动失败",
  421. output="",
  422. error=f"Failed to scroll: {str(e)}",
  423. long_term_memory="滚动失败"
  424. )
  425. @tool()
  426. async def find_text(text: str, uid: str = "") -> ToolResult:
  427. """
  428. 查找页面中的文本并滚动到该位置
  429. Find text on the page and scroll to it
  430. 在页面中搜索指定的文本,找到后自动滚动到该位置。
  431. Args:
  432. text: 要查找的文本内容
  433. uid: 用户 ID(由框架自动注入)
  434. Returns:
  435. ToolResult: 包含查找结果的工具返回对象
  436. Example:
  437. find_text("Privacy Policy")
  438. """
  439. try:
  440. browser, tools = await get_browser_session()
  441. result = await tools.find_text(
  442. text=text,
  443. browser_session=browser
  444. )
  445. return action_result_to_tool_result(result, f"查找文本: {text}")
  446. except Exception as e:
  447. return ToolResult(
  448. title="查找失败",
  449. output="",
  450. error=f"Failed to find text: {str(e)}",
  451. long_term_memory=f"查找文本 '{text}' 失败"
  452. )
  453. @tool()
  454. async def screenshot(uid: str = "") -> ToolResult:
  455. """
  456. 请求在下次观察中包含页面截图
  457. Request a screenshot to be included in the next observation
  458. 用于视觉检查页面状态,帮助理解页面布局和内容。
  459. Args:
  460. uid: 用户 ID(由框架自动注入)
  461. Returns:
  462. ToolResult: 包含截图请求结果的工具返回对象
  463. Example:
  464. screenshot()
  465. Note:
  466. 截图会在下次页面观察时自动包含在结果中。
  467. """
  468. try:
  469. browser, tools = await get_browser_session()
  470. result = await tools.screenshot(browser_session=browser)
  471. return action_result_to_tool_result(result, "截图请求")
  472. except Exception as e:
  473. return ToolResult(
  474. title="截图失败",
  475. output="",
  476. error=f"Failed to capture screenshot: {str(e)}",
  477. long_term_memory="截图失败"
  478. )
  479. # ============================================================
  480. # 标签页管理工具 (Tab Management Tools)
  481. # ============================================================
  482. @tool()
  483. async def switch_tab(tab_id: str, uid: str = "") -> ToolResult:
  484. """
  485. 切换到指定标签页
  486. Switch to a different browser tab
  487. Args:
  488. tab_id: 4字符标签ID(target_id 的最后4位)
  489. uid: 用户 ID(由框架自动注入)
  490. Returns:
  491. ToolResult: 切换结果
  492. Example:
  493. switch_tab(tab_id="a3f2")
  494. """
  495. try:
  496. browser, tools = await get_browser_session()
  497. result = await tools.switch(
  498. tab_id=tab_id,
  499. browser_session=browser
  500. )
  501. return action_result_to_tool_result(result, f"切换到标签页 {tab_id}")
  502. except Exception as e:
  503. return ToolResult(
  504. title="切换标签页失败",
  505. output="",
  506. error=f"Failed to switch tab: {str(e)}",
  507. long_term_memory=f"切换到标签页 {tab_id} 失败"
  508. )
  509. @tool()
  510. async def close_tab(tab_id: str, uid: str = "") -> ToolResult:
  511. """
  512. 关闭指定标签页
  513. Close a browser tab
  514. Args:
  515. tab_id: 4字符标签ID
  516. uid: 用户 ID(由框架自动注入)
  517. Returns:
  518. ToolResult: 关闭结果
  519. Example:
  520. close_tab(tab_id="a3f2")
  521. """
  522. try:
  523. browser, tools = await get_browser_session()
  524. result = await tools.close(
  525. tab_id=tab_id,
  526. browser_session=browser
  527. )
  528. return action_result_to_tool_result(result, f"关闭标签页 {tab_id}")
  529. except Exception as e:
  530. return ToolResult(
  531. title="关闭标签页失败",
  532. output="",
  533. error=f"Failed to close tab: {str(e)}",
  534. long_term_memory=f"关闭标签页 {tab_id} 失败"
  535. )
  536. # ============================================================
  537. # 下拉框工具 (Dropdown Tools)
  538. # ============================================================
  539. @tool()
  540. async def get_dropdown_options(index: int, uid: str = "") -> ToolResult:
  541. """
  542. 获取下拉框的所有选项
  543. Get options from a dropdown element
  544. Args:
  545. index: 下拉框的元素索引
  546. uid: 用户 ID(由框架自动注入)
  547. Returns:
  548. ToolResult: 包含所有选项的结果
  549. Example:
  550. get_dropdown_options(index=8)
  551. """
  552. try:
  553. browser, tools = await get_browser_session()
  554. result = await tools.dropdown_options(
  555. index=index,
  556. browser_session=browser
  557. )
  558. return action_result_to_tool_result(result, f"获取下拉框选项: {index}")
  559. except Exception as e:
  560. return ToolResult(
  561. title="获取下拉框选项失败",
  562. output="",
  563. error=f"Failed to get dropdown options: {str(e)}",
  564. long_term_memory=f"获取下拉框 {index} 选项失败"
  565. )
  566. @tool()
  567. async def select_dropdown_option(index: int, text: str, uid: str = "") -> ToolResult:
  568. """
  569. 选择下拉框选项
  570. Select an option from a dropdown
  571. Args:
  572. index: 下拉框的元素索引
  573. text: 要选择的选项文本(精确匹配)
  574. uid: 用户 ID(由框架自动注入)
  575. Returns:
  576. ToolResult: 选择结果
  577. Example:
  578. select_dropdown_option(index=8, text="Option 2")
  579. """
  580. try:
  581. browser, tools = await get_browser_session()
  582. result = await tools.select_dropdown(
  583. index=index,
  584. text=text,
  585. browser_session=browser
  586. )
  587. return action_result_to_tool_result(result, f"选择下拉框选项: {text}")
  588. except Exception as e:
  589. return ToolResult(
  590. title="选择下拉框选项失败",
  591. output="",
  592. error=f"Failed to select dropdown option: {str(e)}",
  593. long_term_memory=f"选择选项 '{text}' 失败"
  594. )
  595. # ============================================================
  596. # 内容提取工具 (Content Extraction Tools)
  597. # ============================================================
  598. @tool()
  599. async def extract_content(query: str, extract_links: bool = False,
  600. start_from_char: int = 0, uid: str = "") -> ToolResult:
  601. """
  602. 使用 LLM 从页面提取结构化数据
  603. Extract content from the current page using LLM
  604. Args:
  605. query: 提取查询(告诉 LLM 要提取什么内容)
  606. extract_links: 是否提取链接(默认 False,节省 token)
  607. start_from_char: 从哪个字符开始提取(用于分页提取大内容)
  608. uid: 用户 ID(由框架自动注入)
  609. Returns:
  610. ToolResult: 提取的内容
  611. Example:
  612. extract_content(query="提取页面上所有产品的名称和价格", extract_links=True)
  613. Note:
  614. 需要配置 page_extraction_llm,否则会失败
  615. 支持分页提取,最大100k字符
  616. """
  617. try:
  618. browser, tools = await get_browser_session()
  619. # 注意:extract 需要 page_extraction_llm 参数
  620. # 这里我们假设用户会在初始化时配置 LLM
  621. # 如果没有配置,会抛出异常
  622. result = await tools.extract(
  623. query=query,
  624. extract_links=extract_links,
  625. start_from_char=start_from_char,
  626. browser_session=browser,
  627. page_extraction_llm=None, # 需要用户配置
  628. file_system=_file_system
  629. )
  630. return action_result_to_tool_result(result, f"提取内容: {query}")
  631. except Exception as e:
  632. return ToolResult(
  633. title="内容提取失败",
  634. output="",
  635. error=f"Failed to extract content: {str(e)}",
  636. long_term_memory=f"提取内容失败: {query}"
  637. )
  638. @tool()
  639. async def get_page_html(uid: str = "") -> ToolResult:
  640. """
  641. 获取当前页面的完整 HTML
  642. Get the full HTML of the current page
  643. 返回当前页面的完整 HTML 源代码。
  644. Args:
  645. uid: 用户 ID(由框架自动注入)
  646. Returns:
  647. ToolResult: 包含页面 HTML 的工具返回对象
  648. Example:
  649. get_page_html()
  650. Note:
  651. - 返回的是完整的 HTML 源代码
  652. - 输出会被限制在 10000 字符以内(完整内容保存在 metadata 中)
  653. """
  654. try:
  655. browser, tools = await get_browser_session()
  656. # 使用 CDP 获取页面 HTML
  657. cdp = await browser.get_or_create_cdp_session()
  658. # 获取页面内容
  659. result = await cdp.cdp_client.send.Runtime.evaluate(
  660. params={'expression': 'document.documentElement.outerHTML'},
  661. session_id=cdp.session_id
  662. )
  663. html = result.get('result', {}).get('value', '')
  664. # 获取 URL 和标题
  665. url = await browser.get_current_page_url()
  666. title_result = await cdp.cdp_client.send.Runtime.evaluate(
  667. params={'expression': 'document.title'},
  668. session_id=cdp.session_id
  669. )
  670. title = title_result.get('result', {}).get('value', '')
  671. # 限制输出大小
  672. output_html = html
  673. if len(html) > 10000:
  674. output_html = html[:10000] + "... (truncated)"
  675. return ToolResult(
  676. title=f"获取 HTML: {url}",
  677. output=f"页面: {title}\nURL: {url}\n\nHTML:\n{output_html}",
  678. long_term_memory=f"获取 HTML: {url}",
  679. metadata={"url": url, "title": title, "html": html}
  680. )
  681. except Exception as e:
  682. return ToolResult(
  683. title="获取 HTML 失败",
  684. output="",
  685. error=f"Failed to get page HTML: {str(e)}",
  686. long_term_memory="获取 HTML 失败"
  687. )
  688. @tool()
  689. async def get_selector_map(uid: str = "") -> ToolResult:
  690. """
  691. 获取当前页面的元素索引映射
  692. Get the selector map of interactive elements on the current page
  693. 返回页面所有可交互元素的索引字典,用于后续的元素操作。
  694. Args:
  695. uid: 用户 ID(由框架自动注入)
  696. Returns:
  697. ToolResult: 包含元素映射的工具返回对象
  698. Example:
  699. get_selector_map()
  700. Note:
  701. 返回的索引可以用于 click_element, input_text 等操作
  702. """
  703. try:
  704. browser, tools = await get_browser_session()
  705. # 获取选择器映射
  706. selector_map = await browser.get_selector_map()
  707. # 构建输出信息
  708. elements_info = []
  709. for index, node in list(selector_map.items())[:20]: # 只显示前20个
  710. tag = node.tag_name
  711. attrs = node.attributes or {}
  712. text = attrs.get('aria-label') or attrs.get('placeholder') or attrs.get('value', '')
  713. elements_info.append(f"索引 {index}: <{tag}> {text[:50]}")
  714. output = f"找到 {len(selector_map)} 个交互元素\n\n"
  715. output += "\n".join(elements_info)
  716. if len(selector_map) > 20:
  717. output += f"\n... 还有 {len(selector_map) - 20} 个元素"
  718. return ToolResult(
  719. title="获取元素映射",
  720. output=output,
  721. long_term_memory=f"获取到 {len(selector_map)} 个交互元素",
  722. metadata={"selector_map": {k: str(v) for k, v in list(selector_map.items())[:100]}}
  723. )
  724. except Exception as e:
  725. return ToolResult(
  726. title="获取元素映射失败",
  727. output="",
  728. error=f"Failed to get selector map: {str(e)}",
  729. long_term_memory="获取元素映射失败"
  730. )
  731. # ============================================================
  732. # JavaScript 执行工具 (JavaScript Tools)
  733. # ============================================================
  734. @tool()
  735. async def evaluate(code: str, uid: str = "") -> ToolResult:
  736. """
  737. 在页面中执行 JavaScript 代码
  738. Execute JavaScript code in the page context
  739. 允许在当前页面中执行任意 JavaScript 代码,用于复杂的页面操作或数据提取。
  740. Args:
  741. code: 要执行的 JavaScript 代码字符串
  742. uid: 用户 ID(由框架自动注入)
  743. Returns:
  744. ToolResult: 包含执行结果的工具返回对象
  745. Example:
  746. evaluate("document.title")
  747. evaluate("document.querySelectorAll('a').length")
  748. Note:
  749. - 代码在页面上下文中执行,可以访问 DOM 和全局变量
  750. - 返回值会被自动序列化为字符串
  751. - 执行结果限制在 20k 字符以内
  752. """
  753. try:
  754. browser, tools = await get_browser_session()
  755. result = await tools.evaluate(
  756. code=code,
  757. browser_session=browser
  758. )
  759. return action_result_to_tool_result(result, "执行 JavaScript")
  760. except Exception as e:
  761. return ToolResult(
  762. title="JavaScript 执行失败",
  763. output="",
  764. error=f"Failed to execute JavaScript: {str(e)}",
  765. long_term_memory="JavaScript 执行失败"
  766. )
  767. # ============================================================
  768. # 文件系统工具 (File System Tools)
  769. # ============================================================
  770. @tool()
  771. async def write_file(file_name: str, content: str, append: bool = False, uid: str = "") -> ToolResult:
  772. """
  773. 写入文件到本地文件系统
  774. Write content to a local file
  775. 支持多种文件格式的写入操作。
  776. Args:
  777. file_name: 文件名(包含扩展名)
  778. content: 要写入的文件内容
  779. append: 是否追加模式(默认 False,覆盖写入)
  780. uid: 用户 ID(由框架自动注入)
  781. Returns:
  782. ToolResult: 包含写入结果的工具返回对象
  783. Example:
  784. write_file("output.txt", "Hello World")
  785. write_file("data.json", '{"key": "value"}')
  786. Note:
  787. 支持的文件格式: .txt, .md, .json, .jsonl, .csv, .pdf
  788. """
  789. try:
  790. browser, tools = await get_browser_session()
  791. result = await tools.write_file(
  792. file_name=file_name,
  793. content=content,
  794. append=append,
  795. file_system=_file_system
  796. )
  797. return action_result_to_tool_result(result, f"写入文件: {file_name}")
  798. except Exception as e:
  799. return ToolResult(
  800. title="写入文件失败",
  801. output="",
  802. error=f"Failed to write file: {str(e)}",
  803. long_term_memory=f"写入文件 {file_name} 失败"
  804. )
  805. @tool()
  806. async def read_file(file_name: str, uid: str = "") -> ToolResult:
  807. """
  808. 读取文件内容
  809. Read content from a local file
  810. 支持多种文件格式的读取操作。
  811. Args:
  812. file_name: 文件名(包含扩展名)
  813. uid: 用户 ID(由框架自动注入)
  814. Returns:
  815. ToolResult: 包含文件内容的工具返回对象
  816. Example:
  817. read_file("input.txt")
  818. read_file("data.json")
  819. Note:
  820. 支持的文件格式: 文本文件、PDF、DOCX、图片等
  821. """
  822. try:
  823. browser, tools = await get_browser_session()
  824. result = await tools.read_file(
  825. file_name=file_name,
  826. available_file_paths=[],
  827. file_system=_file_system
  828. )
  829. return action_result_to_tool_result(result, f"读取文件: {file_name}")
  830. except Exception as e:
  831. return ToolResult(
  832. title="读取文件失败",
  833. output="",
  834. error=f"Failed to read file: {str(e)}",
  835. long_term_memory=f"读取文件 {file_name} 失败"
  836. )
  837. @tool()
  838. async def replace_file(file_name: str, old_str: str, new_str: str, uid: str = "") -> ToolResult:
  839. """
  840. 替换文件中的特定文本
  841. Replace specific text in a file
  842. 在文件中查找并替换指定的文本内容。
  843. Args:
  844. file_name: 文件名(包含扩展名)
  845. old_str: 要替换的文本
  846. new_str: 新文本
  847. uid: 用户 ID(由框架自动注入)
  848. Returns:
  849. ToolResult: 包含替换结果的工具返回对象
  850. Example:
  851. replace_file("config.txt", "old_value", "new_value")
  852. Note:
  853. - 会替换文件中所有匹配的文本
  854. - 如果找不到要替换的文本,会返回警告
  855. """
  856. try:
  857. browser, tools = await get_browser_session()
  858. result = await tools.replace_file(
  859. file_name=file_name,
  860. old_str=old_str,
  861. new_str=new_str,
  862. file_system=_file_system
  863. )
  864. return action_result_to_tool_result(result, f"替换文件内容: {file_name}")
  865. except Exception as e:
  866. return ToolResult(
  867. title="替换文件失败",
  868. output="",
  869. error=f"Failed to replace file content: {str(e)}",
  870. long_term_memory=f"替换文件 {file_name} 失败"
  871. )
  872. # ============================================================
  873. # 等待用户操作工具 (Wait for User Action)
  874. # ============================================================
  875. @tool()
  876. async def wait_for_user_action(message: str = "Please complete the action in browser",
  877. timeout: int = 300, uid: str = "") -> ToolResult:
  878. """
  879. 等待用户在浏览器中完成操作(如登录)
  880. Wait for user to complete an action in the browser (e.g., login)
  881. 暂停自动化流程,等待用户手动完成某些操作(如登录、验证码等)。
  882. Args:
  883. message: 提示用户需要完成的操作
  884. timeout: 最大等待时间(秒),默认 300 秒(5 分钟)
  885. uid: 用户 ID(由框架自动注入)
  886. Returns:
  887. ToolResult: 包含等待结果的工具返回对象
  888. Example:
  889. wait_for_user_action("Please login to Xiaohongshu", timeout=180)
  890. wait_for_user_action("Please complete the CAPTCHA", timeout=60)
  891. Note:
  892. - 用户需要在浏览器窗口中手动完成操作
  893. - 完成后按回车键继续
  894. - 超时后会自动继续执行
  895. """
  896. try:
  897. import asyncio
  898. print(f"\n{'='*60}")
  899. print(f"⏸️ WAITING FOR USER ACTION")
  900. print(f"{'='*60}")
  901. print(f"📝 {message}")
  902. print(f"⏱️ Timeout: {timeout} seconds")
  903. print(f"\n👉 Please complete the action in the browser window")
  904. print(f"👉 Press ENTER when done, or wait for timeout")
  905. print(f"{'='*60}\n")
  906. # Wait for user input or timeout
  907. try:
  908. loop = asyncio.get_event_loop()
  909. # Wait for either user input or timeout
  910. await asyncio.wait_for(
  911. loop.run_in_executor(None, input),
  912. timeout=timeout
  913. )
  914. return ToolResult(
  915. title="用户操作完成",
  916. output=f"User completed: {message}",
  917. long_term_memory=f"用户完成操作: {message}"
  918. )
  919. except asyncio.TimeoutError:
  920. return ToolResult(
  921. title="用户操作超时",
  922. output=f"Timeout waiting for: {message}",
  923. long_term_memory=f"等待用户操作超时: {message}"
  924. )
  925. except Exception as e:
  926. return ToolResult(
  927. title="等待用户操作失败",
  928. output="",
  929. error=f"Failed to wait for user action: {str(e)}",
  930. long_term_memory="等待用户操作失败"
  931. )
  932. # ============================================================
  933. # 任务完成工具 (Task Completion)
  934. # ============================================================
  935. @tool()
  936. async def done(text: str, success: bool = True,
  937. files_to_display: Optional[List[str]] = None, uid: str = "") -> ToolResult:
  938. """
  939. 标记任务完成并返回最终消息
  940. Mark the task as complete and return final message to user
  941. Args:
  942. text: 给用户的最终消息
  943. success: 任务是否成功完成
  944. files_to_display: 可选的要显示的文件路径列表
  945. uid: 用户 ID(由框架自动注入)
  946. Returns:
  947. ToolResult: 完成结果
  948. Example:
  949. done("任务已完成,提取了10个产品信息", success=True)
  950. """
  951. try:
  952. browser, tools = await get_browser_session()
  953. result = await tools.done(
  954. text=text,
  955. success=success,
  956. files_to_display=files_to_display,
  957. file_system=_file_system
  958. )
  959. return action_result_to_tool_result(result, "任务完成")
  960. except Exception as e:
  961. return ToolResult(
  962. title="标记任务完成失败",
  963. output="",
  964. error=f"Failed to complete task: {str(e)}",
  965. long_term_memory="标记任务完成失败"
  966. )
  967. # ============================================================
  968. # 导出所有工具函数(供外部使用)
  969. # ============================================================
  970. __all__ = [
  971. # 会话管理
  972. 'init_browser_session',
  973. 'get_browser_session',
  974. 'cleanup_browser_session',
  975. 'kill_browser_session',
  976. # 导航类工具
  977. 'navigate_to_url',
  978. 'search_web',
  979. 'go_back',
  980. 'wait',
  981. # 元素交互工具
  982. 'click_element',
  983. 'input_text',
  984. 'send_keys',
  985. 'upload_file',
  986. # 滚动和视图工具
  987. 'scroll_page',
  988. 'find_text',
  989. 'screenshot',
  990. # 标签页管理工具
  991. 'switch_tab',
  992. 'close_tab',
  993. # 下拉框工具
  994. 'get_dropdown_options',
  995. 'select_dropdown_option',
  996. # 内容提取工具
  997. 'extract_content',
  998. 'get_page_html',
  999. 'get_selector_map',
  1000. # JavaScript 执行工具
  1001. 'evaluate',
  1002. # 文件系统工具
  1003. 'write_file',
  1004. 'read_file',
  1005. 'replace_file',
  1006. # 等待用户操作
  1007. 'wait_for_user_action',
  1008. # 任务完成
  1009. 'done',
  1010. ]