baseClass.py 37 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307
  1. """
  2. Browser-Use 原生工具适配器
  3. Native Browser-Use Tools Adapter
  4. 直接使用 browser-use 的原生类(BrowserSession, Tools)实现所有浏览器操作工具。
  5. 不依赖 Playwright,完全基于 CDP 协议。
  6. 核心特性:
  7. 1. 浏览器会话持久化 - 只启动一次浏览器
  8. 2. 状态自动保持 - 登录状态、Cookie、LocalStorage 等
  9. 3. 完整的底层访问 - 可以直接使用 CDP 协议
  10. 4. 性能优异 - 避免频繁创建/销毁浏览器实例
  11. 使用方法:
  12. 1. 在 Agent 初始化时调用 init_browser_session()
  13. 2. 使用各个工具函数执行浏览器操作
  14. 3. 任务结束时调用 cleanup_browser_session()
  15. """
  16. import sys
  17. import os
  18. from typing import Optional, List
  19. from pathlib import Path
  20. # 将项目根目录添加到 Python 路径
  21. sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  22. # 导入框架的工具装饰器和结果类
  23. from agent.tools import tool, ToolResult
  24. # 导入 browser-use 的核心类
  25. from browser_use import BrowserSession, BrowserProfile
  26. from browser_use.tools.service import Tools
  27. from browser_use.agent.views import ActionResult
  28. from browser_use.filesystem.file_system import FileSystem
  29. # ============================================================
  30. # 全局浏览器会话管理
  31. # ============================================================
  32. # 全局变量:浏览器会话和工具实例
  33. _browser_session: Optional[BrowserSession] = None
  34. _browser_tools: Optional[Tools] = None
  35. _file_system: Optional[FileSystem] = None
  36. async def init_browser_session(
  37. headless: bool = False,
  38. user_data_dir: Optional[str] = None,
  39. profile_name: str = "default",
  40. browser_profile: Optional[BrowserProfile] = None,
  41. use_cloud: bool = False,
  42. **kwargs
  43. ) -> tuple[BrowserSession, Tools]:
  44. """
  45. 初始化全局浏览器会话
  46. Args:
  47. headless: 是否无头模式
  48. user_data_dir: 用户数据目录(用于保存登录状态)
  49. profile_name: 配置文件名称
  50. browser_profile: BrowserProfile 对象(用于预设 cookies 等)
  51. use_cloud: 是否使用云浏览器(默认 False,使用本地浏览器)
  52. **kwargs: 其他 BrowserSession 参数
  53. Returns:
  54. (BrowserSession, Tools) 元组
  55. """
  56. global _browser_session, _browser_tools, _file_system
  57. if _browser_session is not None:
  58. return _browser_session, _browser_tools
  59. # 设置用户数据目录(持久化登录状态)
  60. if user_data_dir is None and profile_name and not use_cloud:
  61. user_data_dir = str(Path.home() / ".browser_use" / "profiles" / profile_name)
  62. Path(user_data_dir).mkdir(parents=True, exist_ok=True)
  63. # 创建浏览器会话
  64. session_params = {
  65. "headless": headless,
  66. }
  67. if use_cloud:
  68. # 云浏览器模式
  69. session_params["use_cloud"] = True
  70. print("🌐 使用云浏览器模式")
  71. else:
  72. # 本地浏览器模式
  73. session_params["is_local"] = True
  74. # macOS 上显式指定 Chrome 路径
  75. import platform
  76. if platform.system() == "Darwin": # macOS
  77. chrome_path = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
  78. if Path(chrome_path).exists():
  79. session_params["executable_path"] = chrome_path
  80. # 只在有值时才添加 user_data_dir
  81. if user_data_dir:
  82. session_params["user_data_dir"] = user_data_dir
  83. # 只在有值时才添加 browser_profile
  84. if browser_profile:
  85. session_params["browser_profile"] = browser_profile
  86. # 合并其他参数
  87. session_params.update(kwargs)
  88. _browser_session = BrowserSession(**session_params)
  89. # 启动浏览器
  90. await _browser_session.start()
  91. # 创建工具实例
  92. _browser_tools = Tools()
  93. # 创建文件系统实例(用于文件操作)
  94. base_dir = Path.cwd() / ".browser_use_files"
  95. base_dir.mkdir(parents=True, exist_ok=True)
  96. _file_system = FileSystem(base_dir=str(base_dir))
  97. return _browser_session, _browser_tools
  98. async def get_browser_session() -> tuple[BrowserSession, Tools]:
  99. """
  100. 获取当前浏览器会话,如果不存在则自动创建
  101. Returns:
  102. (BrowserSession, Tools) 元组
  103. """
  104. global _browser_session, _browser_tools
  105. if _browser_session is None:
  106. await init_browser_session()
  107. return _browser_session, _browser_tools
  108. async def cleanup_browser_session():
  109. """
  110. 清理浏览器会话
  111. 优雅地停止浏览器但保留会话状态
  112. """
  113. global _browser_session, _browser_tools, _file_system
  114. if _browser_session is not None:
  115. await _browser_session.stop()
  116. _browser_session = None
  117. _browser_tools = None
  118. _file_system = None
  119. async def kill_browser_session():
  120. """
  121. 强制终止浏览器会话
  122. 完全关闭浏览器进程
  123. """
  124. global _browser_session, _browser_tools, _file_system
  125. if _browser_session is not None:
  126. await _browser_session.kill()
  127. _browser_session = None
  128. _browser_tools = None
  129. _file_system = None
  130. # ============================================================
  131. # 辅助函数:ActionResult 转 ToolResult
  132. # ============================================================
  133. def action_result_to_tool_result(result: ActionResult, title: str = None) -> ToolResult:
  134. """
  135. 将 browser-use 的 ActionResult 转换为框架的 ToolResult
  136. Args:
  137. result: browser-use 的 ActionResult
  138. title: 可选的标题(如果不提供则从 result 推断)
  139. Returns:
  140. ToolResult
  141. """
  142. if result.error:
  143. return ToolResult(
  144. title=title or "操作失败",
  145. output="",
  146. error=result.error,
  147. long_term_memory=result.long_term_memory or result.error
  148. )
  149. return ToolResult(
  150. title=title or "操作成功",
  151. output=result.extracted_content or "",
  152. long_term_memory=result.long_term_memory or result.extracted_content or "",
  153. metadata=result.metadata or {}
  154. )
  155. # ============================================================
  156. # 导航类工具 (Navigation Tools)
  157. # ============================================================
  158. @tool()
  159. async def navigate_to_url(url: str, new_tab: bool = False, uid: str = "") -> ToolResult:
  160. """
  161. 导航到指定的 URL
  162. Navigate to a specific URL
  163. 使用 browser-use 的原生导航功能,支持在新标签页打开。
  164. Args:
  165. url: 要访问的 URL 地址
  166. new_tab: 是否在新标签页中打开(默认 False)
  167. uid: 用户 ID(由框架自动注入)
  168. Returns:
  169. ToolResult: 包含导航结果的工具返回对象
  170. Example:
  171. navigate_to_url("https://www.baidu.com")
  172. navigate_to_url("https://www.google.com", new_tab=True)
  173. """
  174. try:
  175. browser, tools = await get_browser_session()
  176. # 使用 browser-use 的 navigate 工具
  177. result = await tools.navigate(
  178. url=url,
  179. new_tab=new_tab,
  180. browser_session=browser
  181. )
  182. return action_result_to_tool_result(result, f"导航到 {url}")
  183. except Exception as e:
  184. return ToolResult(
  185. title="导航失败",
  186. output="",
  187. error=f"Failed to navigate to {url}: {str(e)}",
  188. long_term_memory=f"导航到 {url} 失败"
  189. )
  190. @tool()
  191. async def search_web(query: str, engine: str = "google", uid: str = "") -> ToolResult:
  192. """
  193. 使用搜索引擎搜索
  194. Search the web using a search engine
  195. Args:
  196. query: 搜索关键词
  197. engine: 搜索引擎 (google, duckduckgo, bing) - 默认: google
  198. uid: 用户 ID(由框架自动注入)
  199. Returns:
  200. ToolResult: 搜索结果
  201. Example:
  202. search_web("Python async programming", engine="google")
  203. """
  204. try:
  205. browser, tools = await get_browser_session()
  206. # 使用 browser-use 的 search 工具
  207. result = await tools.search(
  208. query=query,
  209. engine=engine,
  210. browser_session=browser
  211. )
  212. return action_result_to_tool_result(result, f"搜索: {query}")
  213. except Exception as e:
  214. return ToolResult(
  215. title="搜索失败",
  216. output="",
  217. error=f"Search failed: {str(e)}",
  218. long_term_memory=f"搜索 '{query}' 失败"
  219. )
  220. @tool()
  221. async def go_back(uid: str = "") -> ToolResult:
  222. """
  223. 返回到上一个页面
  224. Go back to the previous page
  225. 模拟浏览器的"后退"按钮功能。
  226. Args:
  227. uid: 用户 ID(由框架自动注入)
  228. Returns:
  229. ToolResult: 包含返回操作结果的工具返回对象
  230. """
  231. try:
  232. browser, tools = await get_browser_session()
  233. result = await tools.go_back(browser_session=browser)
  234. return action_result_to_tool_result(result, "返回上一页")
  235. except Exception as e:
  236. return ToolResult(
  237. title="返回失败",
  238. output="",
  239. error=f"Failed to go back: {str(e)}",
  240. long_term_memory="返回上一页失败"
  241. )
  242. @tool()
  243. async def wait(seconds: int = 3, uid: str = "") -> ToolResult:
  244. """
  245. 等待指定的秒数
  246. Wait for a specified number of seconds
  247. 用于等待页面加载、动画完成或其他异步操作。
  248. Args:
  249. seconds: 等待时间(秒),最大30秒
  250. uid: 用户 ID(由框架自动注入)
  251. Returns:
  252. ToolResult: 包含等待操作结果的工具返回对象
  253. Example:
  254. wait(5) # 等待5秒
  255. """
  256. try:
  257. browser, tools = await get_browser_session()
  258. result = await tools.wait(seconds=seconds, browser_session=browser)
  259. return action_result_to_tool_result(result, f"等待 {seconds} 秒")
  260. except Exception as e:
  261. return ToolResult(
  262. title="等待失败",
  263. output="",
  264. error=f"Failed to wait: {str(e)}",
  265. long_term_memory="等待失败"
  266. )
  267. # ============================================================
  268. # 元素交互工具 (Element Interaction Tools)
  269. # ============================================================
  270. @tool()
  271. async def click_element(index: int, uid: str = "") -> ToolResult:
  272. """
  273. 通过索引点击页面元素
  274. Click an element by index
  275. Args:
  276. index: 元素索引(从浏览器状态中获取)
  277. uid: 用户 ID(由框架自动注入)
  278. Returns:
  279. ToolResult: 包含点击操作结果的工具返回对象
  280. Example:
  281. click_element(index=5)
  282. Note:
  283. 需要先通过 get_selector_map 获取页面元素索引
  284. """
  285. try:
  286. browser, tools = await get_browser_session()
  287. result = await tools.click(
  288. index=index,
  289. browser_session=browser
  290. )
  291. return action_result_to_tool_result(result, f"点击元素 {index}")
  292. except Exception as e:
  293. return ToolResult(
  294. title="点击失败",
  295. output="",
  296. error=f"Failed to click element {index}: {str(e)}",
  297. long_term_memory=f"点击元素 {index} 失败"
  298. )
  299. @tool()
  300. async def input_text(index: int, text: str, clear: bool = True, uid: str = "") -> ToolResult:
  301. """
  302. 在指定元素中输入文本
  303. Input text into an element
  304. Args:
  305. index: 元素索引(从浏览器状态中获取)
  306. text: 要输入的文本内容
  307. clear: 是否先清除现有文本(默认 True)
  308. uid: 用户 ID(由框架自动注入)
  309. Returns:
  310. ToolResult: 包含输入操作结果的工具返回对象
  311. Example:
  312. input_text(index=0, text="Hello World", clear=True)
  313. """
  314. try:
  315. browser, tools = await get_browser_session()
  316. result = await tools.input(
  317. index=index,
  318. text=text,
  319. clear=clear,
  320. browser_session=browser
  321. )
  322. return action_result_to_tool_result(result, f"输入文本到元素 {index}")
  323. except Exception as e:
  324. return ToolResult(
  325. title="输入失败",
  326. output="",
  327. error=f"Failed to input text into element {index}: {str(e)}",
  328. long_term_memory=f"输入文本失败"
  329. )
  330. @tool()
  331. async def send_keys(keys: str, uid: str = "") -> ToolResult:
  332. """
  333. 发送键盘按键或快捷键
  334. Send keyboard keys or shortcuts
  335. 支持发送单个按键、组合键和快捷键。
  336. Args:
  337. keys: 要发送的按键字符串
  338. - 单个按键: "Enter", "Escape", "PageDown", "Tab"
  339. - 组合键: "Control+o", "Shift+Tab", "Alt+F4"
  340. - 功能键: "F1", "F2", ..., "F12"
  341. uid: 用户 ID(由框架自动注入)
  342. Returns:
  343. ToolResult: 包含按键操作结果的工具返回对象
  344. Example:
  345. send_keys("Enter")
  346. send_keys("Control+A")
  347. """
  348. try:
  349. browser, tools = await get_browser_session()
  350. result = await tools.send_keys(
  351. keys=keys,
  352. browser_session=browser
  353. )
  354. return action_result_to_tool_result(result, f"发送按键: {keys}")
  355. except Exception as e:
  356. return ToolResult(
  357. title="发送按键失败",
  358. output="",
  359. error=f"Failed to send keys: {str(e)}",
  360. long_term_memory="发送按键失败"
  361. )
  362. @tool()
  363. async def upload_file(index: int, path: str, uid: str = "") -> ToolResult:
  364. """
  365. 上传文件到文件输入元素
  366. Upload a file to a file input element
  367. Args:
  368. index: 文件输入框的元素索引
  369. path: 要上传的文件路径(绝对路径)
  370. uid: 用户 ID(由框架自动注入)
  371. Returns:
  372. ToolResult: 包含上传操作结果的工具返回对象
  373. Example:
  374. upload_file(index=7, path="/path/to/file.pdf")
  375. Note:
  376. 文件必须存在且路径必须是绝对路径
  377. """
  378. try:
  379. browser, tools = await get_browser_session()
  380. result = await tools.upload_file(
  381. index=index,
  382. path=path,
  383. browser_session=browser,
  384. available_file_paths=[path],
  385. file_system=_file_system
  386. )
  387. return action_result_to_tool_result(result, f"上传文件: {path}")
  388. except Exception as e:
  389. return ToolResult(
  390. title="上传失败",
  391. output="",
  392. error=f"Failed to upload file: {str(e)}",
  393. long_term_memory=f"上传文件 {path} 失败"
  394. )
  395. # ============================================================
  396. # 滚动和视图工具 (Scroll & View Tools)
  397. # ============================================================
  398. @tool()
  399. async def scroll_page(down: bool = True, pages: float = 1.0,
  400. index: Optional[int] = None, uid: str = "") -> ToolResult:
  401. """
  402. 滚动页面或元素
  403. Scroll the page or a specific element
  404. Args:
  405. down: True 向下滚动,False 向上滚动
  406. pages: 滚动页数(0.5=半页,1=全页,10=滚动到底部/顶部)
  407. index: 可选,滚动特定元素(如下拉框内部)
  408. uid: 用户 ID(由框架自动注入)
  409. Returns:
  410. ToolResult: 滚动结果
  411. Example:
  412. scroll_page(down=True, pages=2.0) # 向下滚动2页
  413. scroll_page(down=False, pages=1.0) # 向上滚动1页
  414. """
  415. try:
  416. browser, tools = await get_browser_session()
  417. result = await tools.scroll(
  418. down=down,
  419. pages=pages,
  420. index=index,
  421. browser_session=browser
  422. )
  423. direction = "向下" if down else "向上"
  424. return action_result_to_tool_result(result, f"{direction}滚动 {pages} 页")
  425. except Exception as e:
  426. return ToolResult(
  427. title="滚动失败",
  428. output="",
  429. error=f"Failed to scroll: {str(e)}",
  430. long_term_memory="滚动失败"
  431. )
  432. @tool()
  433. async def find_text(text: str, uid: str = "") -> ToolResult:
  434. """
  435. 查找页面中的文本并滚动到该位置
  436. Find text on the page and scroll to it
  437. 在页面中搜索指定的文本,找到后自动滚动到该位置。
  438. Args:
  439. text: 要查找的文本内容
  440. uid: 用户 ID(由框架自动注入)
  441. Returns:
  442. ToolResult: 包含查找结果的工具返回对象
  443. Example:
  444. find_text("Privacy Policy")
  445. """
  446. try:
  447. browser, tools = await get_browser_session()
  448. result = await tools.find_text(
  449. text=text,
  450. browser_session=browser
  451. )
  452. return action_result_to_tool_result(result, f"查找文本: {text}")
  453. except Exception as e:
  454. return ToolResult(
  455. title="查找失败",
  456. output="",
  457. error=f"Failed to find text: {str(e)}",
  458. long_term_memory=f"查找文本 '{text}' 失败"
  459. )
  460. @tool()
  461. async def screenshot(uid: str = "") -> ToolResult:
  462. """
  463. 请求在下次观察中包含页面截图
  464. Request a screenshot to be included in the next observation
  465. 用于视觉检查页面状态,帮助理解页面布局和内容。
  466. Args:
  467. uid: 用户 ID(由框架自动注入)
  468. Returns:
  469. ToolResult: 包含截图请求结果的工具返回对象
  470. Example:
  471. screenshot()
  472. Note:
  473. 截图会在下次页面观察时自动包含在结果中。
  474. """
  475. try:
  476. browser, tools = await get_browser_session()
  477. result = await tools.screenshot(browser_session=browser)
  478. return action_result_to_tool_result(result, "截图请求")
  479. except Exception as e:
  480. return ToolResult(
  481. title="截图失败",
  482. output="",
  483. error=f"Failed to capture screenshot: {str(e)}",
  484. long_term_memory="截图失败"
  485. )
  486. # ============================================================
  487. # 标签页管理工具 (Tab Management Tools)
  488. # ============================================================
  489. @tool()
  490. async def switch_tab(tab_id: str, uid: str = "") -> ToolResult:
  491. """
  492. 切换到指定标签页
  493. Switch to a different browser tab
  494. Args:
  495. tab_id: 4字符标签ID(target_id 的最后4位)
  496. uid: 用户 ID(由框架自动注入)
  497. Returns:
  498. ToolResult: 切换结果
  499. Example:
  500. switch_tab(tab_id="a3f2")
  501. """
  502. try:
  503. browser, tools = await get_browser_session()
  504. result = await tools.switch(
  505. tab_id=tab_id,
  506. browser_session=browser
  507. )
  508. return action_result_to_tool_result(result, f"切换到标签页 {tab_id}")
  509. except Exception as e:
  510. return ToolResult(
  511. title="切换标签页失败",
  512. output="",
  513. error=f"Failed to switch tab: {str(e)}",
  514. long_term_memory=f"切换到标签页 {tab_id} 失败"
  515. )
  516. @tool()
  517. async def close_tab(tab_id: str, uid: str = "") -> ToolResult:
  518. """
  519. 关闭指定标签页
  520. Close a browser tab
  521. Args:
  522. tab_id: 4字符标签ID
  523. uid: 用户 ID(由框架自动注入)
  524. Returns:
  525. ToolResult: 关闭结果
  526. Example:
  527. close_tab(tab_id="a3f2")
  528. """
  529. try:
  530. browser, tools = await get_browser_session()
  531. result = await tools.close(
  532. tab_id=tab_id,
  533. browser_session=browser
  534. )
  535. return action_result_to_tool_result(result, f"关闭标签页 {tab_id}")
  536. except Exception as e:
  537. return ToolResult(
  538. title="关闭标签页失败",
  539. output="",
  540. error=f"Failed to close tab: {str(e)}",
  541. long_term_memory=f"关闭标签页 {tab_id} 失败"
  542. )
  543. # ============================================================
  544. # 下拉框工具 (Dropdown Tools)
  545. # ============================================================
  546. @tool()
  547. async def get_dropdown_options(index: int, uid: str = "") -> ToolResult:
  548. """
  549. 获取下拉框的所有选项
  550. Get options from a dropdown element
  551. Args:
  552. index: 下拉框的元素索引
  553. uid: 用户 ID(由框架自动注入)
  554. Returns:
  555. ToolResult: 包含所有选项的结果
  556. Example:
  557. get_dropdown_options(index=8)
  558. """
  559. try:
  560. browser, tools = await get_browser_session()
  561. result = await tools.dropdown_options(
  562. index=index,
  563. browser_session=browser
  564. )
  565. return action_result_to_tool_result(result, f"获取下拉框选项: {index}")
  566. except Exception as e:
  567. return ToolResult(
  568. title="获取下拉框选项失败",
  569. output="",
  570. error=f"Failed to get dropdown options: {str(e)}",
  571. long_term_memory=f"获取下拉框 {index} 选项失败"
  572. )
  573. @tool()
  574. async def select_dropdown_option(index: int, text: str, uid: str = "") -> ToolResult:
  575. """
  576. 选择下拉框选项
  577. Select an option from a dropdown
  578. Args:
  579. index: 下拉框的元素索引
  580. text: 要选择的选项文本(精确匹配)
  581. uid: 用户 ID(由框架自动注入)
  582. Returns:
  583. ToolResult: 选择结果
  584. Example:
  585. select_dropdown_option(index=8, text="Option 2")
  586. """
  587. try:
  588. browser, tools = await get_browser_session()
  589. result = await tools.select_dropdown(
  590. index=index,
  591. text=text,
  592. browser_session=browser
  593. )
  594. return action_result_to_tool_result(result, f"选择下拉框选项: {text}")
  595. except Exception as e:
  596. return ToolResult(
  597. title="选择下拉框选项失败",
  598. output="",
  599. error=f"Failed to select dropdown option: {str(e)}",
  600. long_term_memory=f"选择选项 '{text}' 失败"
  601. )
  602. # ============================================================
  603. # 内容提取工具 (Content Extraction Tools)
  604. # ============================================================
  605. @tool()
  606. async def extract_content(query: str, extract_links: bool = False,
  607. start_from_char: int = 0, uid: str = "") -> ToolResult:
  608. """
  609. 使用 LLM 从页面提取结构化数据
  610. Extract content from the current page using LLM
  611. Args:
  612. query: 提取查询(告诉 LLM 要提取什么内容)
  613. extract_links: 是否提取链接(默认 False,节省 token)
  614. start_from_char: 从哪个字符开始提取(用于分页提取大内容)
  615. uid: 用户 ID(由框架自动注入)
  616. Returns:
  617. ToolResult: 提取的内容
  618. Example:
  619. extract_content(query="提取页面上所有产品的名称和价格", extract_links=True)
  620. Note:
  621. 需要配置 page_extraction_llm,否则会失败
  622. 支持分页提取,最大100k字符
  623. """
  624. try:
  625. browser, tools = await get_browser_session()
  626. # 注意:extract 需要 page_extraction_llm 参数
  627. # 这里我们假设用户会在初始化时配置 LLM
  628. # 如果没有配置,会抛出异常
  629. result = await tools.extract(
  630. query=query,
  631. extract_links=extract_links,
  632. start_from_char=start_from_char,
  633. browser_session=browser,
  634. page_extraction_llm=None, # 需要用户配置
  635. file_system=_file_system
  636. )
  637. return action_result_to_tool_result(result, f"提取内容: {query}")
  638. except Exception as e:
  639. return ToolResult(
  640. title="内容提取失败",
  641. output="",
  642. error=f"Failed to extract content: {str(e)}",
  643. long_term_memory=f"提取内容失败: {query}"
  644. )
  645. @tool()
  646. async def get_page_html(uid: str = "") -> ToolResult:
  647. """
  648. 获取当前页面的完整 HTML
  649. Get the full HTML of the current page
  650. 返回当前页面的完整 HTML 源代码。
  651. Args:
  652. uid: 用户 ID(由框架自动注入)
  653. Returns:
  654. ToolResult: 包含页面 HTML 的工具返回对象
  655. Example:
  656. get_page_html()
  657. Note:
  658. - 返回的是完整的 HTML 源代码
  659. - 输出会被限制在 10000 字符以内(完整内容保存在 metadata 中)
  660. """
  661. try:
  662. browser, tools = await get_browser_session()
  663. # 使用 CDP 获取页面 HTML
  664. cdp = await browser.get_or_create_cdp_session()
  665. # 获取页面内容
  666. result = await cdp.cdp_client.send.Runtime.evaluate(
  667. params={'expression': 'document.documentElement.outerHTML'},
  668. session_id=cdp.session_id
  669. )
  670. html = result.get('result', {}).get('value', '')
  671. # 获取 URL 和标题
  672. url = await browser.get_current_page_url()
  673. title_result = await cdp.cdp_client.send.Runtime.evaluate(
  674. params={'expression': 'document.title'},
  675. session_id=cdp.session_id
  676. )
  677. title = title_result.get('result', {}).get('value', '')
  678. # 限制输出大小
  679. output_html = html
  680. if len(html) > 10000:
  681. output_html = html[:10000] + "... (truncated)"
  682. return ToolResult(
  683. title=f"获取 HTML: {url}",
  684. output=f"页面: {title}\nURL: {url}\n\nHTML:\n{output_html}",
  685. long_term_memory=f"获取 HTML: {url}",
  686. metadata={"url": url, "title": title, "html": html}
  687. )
  688. except Exception as e:
  689. return ToolResult(
  690. title="获取 HTML 失败",
  691. output="",
  692. error=f"Failed to get page HTML: {str(e)}",
  693. long_term_memory="获取 HTML 失败"
  694. )
  695. @tool()
  696. async def get_selector_map(uid: str = "") -> ToolResult:
  697. """
  698. 获取当前页面的元素索引映射
  699. Get the selector map of interactive elements on the current page
  700. 返回页面所有可交互元素的索引字典,用于后续的元素操作。
  701. Args:
  702. uid: 用户 ID(由框架自动注入)
  703. Returns:
  704. ToolResult: 包含元素映射的工具返回对象
  705. Example:
  706. get_selector_map()
  707. Note:
  708. 返回的索引可以用于 click_element, input_text 等操作
  709. """
  710. try:
  711. browser, tools = await get_browser_session()
  712. # 获取选择器映射
  713. selector_map = await browser.get_selector_map()
  714. # 构建输出信息
  715. elements_info = []
  716. for index, node in list(selector_map.items())[:20]: # 只显示前20个
  717. tag = node.tag_name
  718. attrs = node.attributes or {}
  719. text = attrs.get('aria-label') or attrs.get('placeholder') or attrs.get('value', '')
  720. elements_info.append(f"索引 {index}: <{tag}> {text[:50]}")
  721. output = f"找到 {len(selector_map)} 个交互元素\n\n"
  722. output += "\n".join(elements_info)
  723. if len(selector_map) > 20:
  724. output += f"\n... 还有 {len(selector_map) - 20} 个元素"
  725. return ToolResult(
  726. title="获取元素映射",
  727. output=output,
  728. long_term_memory=f"获取到 {len(selector_map)} 个交互元素",
  729. metadata={"selector_map": {k: str(v) for k, v in list(selector_map.items())[:100]}}
  730. )
  731. except Exception as e:
  732. return ToolResult(
  733. title="获取元素映射失败",
  734. output="",
  735. error=f"Failed to get selector map: {str(e)}",
  736. long_term_memory="获取元素映射失败"
  737. )
  738. # ============================================================
  739. # JavaScript 执行工具 (JavaScript Tools)
  740. # ============================================================
  741. @tool()
  742. async def evaluate(code: str, uid: str = "") -> ToolResult:
  743. """
  744. 在页面中执行 JavaScript 代码
  745. Execute JavaScript code in the page context
  746. 允许在当前页面中执行任意 JavaScript 代码,用于复杂的页面操作或数据提取。
  747. Args:
  748. code: 要执行的 JavaScript 代码字符串
  749. uid: 用户 ID(由框架自动注入)
  750. Returns:
  751. ToolResult: 包含执行结果的工具返回对象
  752. Example:
  753. evaluate("document.title")
  754. evaluate("document.querySelectorAll('a').length")
  755. Note:
  756. - 代码在页面上下文中执行,可以访问 DOM 和全局变量
  757. - 返回值会被自动序列化为字符串
  758. - 执行结果限制在 20k 字符以内
  759. """
  760. try:
  761. browser, tools = await get_browser_session()
  762. result = await tools.evaluate(
  763. code=code,
  764. browser_session=browser
  765. )
  766. return action_result_to_tool_result(result, "执行 JavaScript")
  767. except Exception as e:
  768. return ToolResult(
  769. title="JavaScript 执行失败",
  770. output="",
  771. error=f"Failed to execute JavaScript: {str(e)}",
  772. long_term_memory="JavaScript 执行失败"
  773. )
  774. # ============================================================
  775. # 文件系统工具 (File System Tools)
  776. # ============================================================
  777. @tool()
  778. async def write_file(file_name: str, content: str, append: bool = False, uid: str = "") -> ToolResult:
  779. """
  780. 写入文件到本地文件系统
  781. Write content to a local file
  782. 支持多种文件格式的写入操作。
  783. Args:
  784. file_name: 文件名(包含扩展名)
  785. content: 要写入的文件内容
  786. append: 是否追加模式(默认 False,覆盖写入)
  787. uid: 用户 ID(由框架自动注入)
  788. Returns:
  789. ToolResult: 包含写入结果的工具返回对象
  790. Example:
  791. write_file("output.txt", "Hello World")
  792. write_file("data.json", '{"key": "value"}')
  793. Note:
  794. 支持的文件格式: .txt, .md, .json, .jsonl, .csv, .pdf
  795. """
  796. try:
  797. browser, tools = await get_browser_session()
  798. result = await tools.write_file(
  799. file_name=file_name,
  800. content=content,
  801. append=append,
  802. file_system=_file_system
  803. )
  804. return action_result_to_tool_result(result, f"写入文件: {file_name}")
  805. except Exception as e:
  806. return ToolResult(
  807. title="写入文件失败",
  808. output="",
  809. error=f"Failed to write file: {str(e)}",
  810. long_term_memory=f"写入文件 {file_name} 失败"
  811. )
  812. @tool()
  813. async def read_file(file_name: str, uid: str = "") -> ToolResult:
  814. """
  815. 读取文件内容
  816. Read content from a local file
  817. 支持多种文件格式的读取操作。
  818. Args:
  819. file_name: 文件名(包含扩展名)
  820. uid: 用户 ID(由框架自动注入)
  821. Returns:
  822. ToolResult: 包含文件内容的工具返回对象
  823. Example:
  824. read_file("input.txt")
  825. read_file("data.json")
  826. Note:
  827. 支持的文件格式: 文本文件、PDF、DOCX、图片等
  828. """
  829. try:
  830. browser, tools = await get_browser_session()
  831. result = await tools.read_file(
  832. file_name=file_name,
  833. available_file_paths=[],
  834. file_system=_file_system
  835. )
  836. return action_result_to_tool_result(result, f"读取文件: {file_name}")
  837. except Exception as e:
  838. return ToolResult(
  839. title="读取文件失败",
  840. output="",
  841. error=f"Failed to read file: {str(e)}",
  842. long_term_memory=f"读取文件 {file_name} 失败"
  843. )
  844. @tool()
  845. async def replace_file(file_name: str, old_str: str, new_str: str, uid: str = "") -> ToolResult:
  846. """
  847. 替换文件中的特定文本
  848. Replace specific text in a file
  849. 在文件中查找并替换指定的文本内容。
  850. Args:
  851. file_name: 文件名(包含扩展名)
  852. old_str: 要替换的文本
  853. new_str: 新文本
  854. uid: 用户 ID(由框架自动注入)
  855. Returns:
  856. ToolResult: 包含替换结果的工具返回对象
  857. Example:
  858. replace_file("config.txt", "old_value", "new_value")
  859. Note:
  860. - 会替换文件中所有匹配的文本
  861. - 如果找不到要替换的文本,会返回警告
  862. """
  863. try:
  864. browser, tools = await get_browser_session()
  865. result = await tools.replace_file(
  866. file_name=file_name,
  867. old_str=old_str,
  868. new_str=new_str,
  869. file_system=_file_system
  870. )
  871. return action_result_to_tool_result(result, f"替换文件内容: {file_name}")
  872. except Exception as e:
  873. return ToolResult(
  874. title="替换文件失败",
  875. output="",
  876. error=f"Failed to replace file content: {str(e)}",
  877. long_term_memory=f"替换文件 {file_name} 失败"
  878. )
  879. # ============================================================
  880. # 等待用户操作工具 (Wait for User Action)
  881. # ============================================================
  882. @tool()
  883. async def wait_for_user_action(message: str = "Please complete the action in browser",
  884. timeout: int = 300, uid: str = "") -> ToolResult:
  885. """
  886. 等待用户在浏览器中完成操作(如登录)
  887. Wait for user to complete an action in the browser (e.g., login)
  888. 暂停自动化流程,等待用户手动完成某些操作(如登录、验证码等)。
  889. Args:
  890. message: 提示用户需要完成的操作
  891. timeout: 最大等待时间(秒),默认 300 秒(5 分钟)
  892. uid: 用户 ID(由框架自动注入)
  893. Returns:
  894. ToolResult: 包含等待结果的工具返回对象
  895. Example:
  896. wait_for_user_action("Please login to Xiaohongshu", timeout=180)
  897. wait_for_user_action("Please complete the CAPTCHA", timeout=60)
  898. Note:
  899. - 用户需要在浏览器窗口中手动完成操作
  900. - 完成后按回车键继续
  901. - 超时后会自动继续执行
  902. """
  903. try:
  904. import asyncio
  905. print(f"\n{'='*60}")
  906. print(f"⏸️ WAITING FOR USER ACTION")
  907. print(f"{'='*60}")
  908. print(f"📝 {message}")
  909. print(f"⏱️ Timeout: {timeout} seconds")
  910. print(f"\n👉 Please complete the action in the browser window")
  911. print(f"👉 Press ENTER when done, or wait for timeout")
  912. print(f"{'='*60}\n")
  913. # Wait for user input or timeout
  914. try:
  915. loop = asyncio.get_event_loop()
  916. # Wait for either user input or timeout
  917. await asyncio.wait_for(
  918. loop.run_in_executor(None, input),
  919. timeout=timeout
  920. )
  921. return ToolResult(
  922. title="用户操作完成",
  923. output=f"User completed: {message}",
  924. long_term_memory=f"用户完成操作: {message}"
  925. )
  926. except asyncio.TimeoutError:
  927. return ToolResult(
  928. title="用户操作超时",
  929. output=f"Timeout waiting for: {message}",
  930. long_term_memory=f"等待用户操作超时: {message}"
  931. )
  932. except Exception as e:
  933. return ToolResult(
  934. title="等待用户操作失败",
  935. output="",
  936. error=f"Failed to wait for user action: {str(e)}",
  937. long_term_memory="等待用户操作失败"
  938. )
  939. # ============================================================
  940. # 任务完成工具 (Task Completion)
  941. # ============================================================
  942. @tool()
  943. async def done(text: str, success: bool = True,
  944. files_to_display: Optional[List[str]] = None, uid: str = "") -> ToolResult:
  945. """
  946. 标记任务完成并返回最终消息
  947. Mark the task as complete and return final message to user
  948. Args:
  949. text: 给用户的最终消息
  950. success: 任务是否成功完成
  951. files_to_display: 可选的要显示的文件路径列表
  952. uid: 用户 ID(由框架自动注入)
  953. Returns:
  954. ToolResult: 完成结果
  955. Example:
  956. done("任务已完成,提取了10个产品信息", success=True)
  957. """
  958. try:
  959. browser, tools = await get_browser_session()
  960. result = await tools.done(
  961. text=text,
  962. success=success,
  963. files_to_display=files_to_display,
  964. file_system=_file_system
  965. )
  966. return action_result_to_tool_result(result, "任务完成")
  967. except Exception as e:
  968. return ToolResult(
  969. title="标记任务完成失败",
  970. output="",
  971. error=f"Failed to complete task: {str(e)}",
  972. long_term_memory="标记任务完成失败"
  973. )
  974. # ============================================================
  975. # 导出所有工具函数(供外部使用)
  976. # ============================================================
  977. __all__ = [
  978. # 会话管理
  979. 'init_browser_session',
  980. 'get_browser_session',
  981. 'cleanup_browser_session',
  982. 'kill_browser_session',
  983. # 导航类工具
  984. 'navigate_to_url',
  985. 'search_web',
  986. 'go_back',
  987. 'wait',
  988. # 元素交互工具
  989. 'click_element',
  990. 'input_text',
  991. 'send_keys',
  992. 'upload_file',
  993. # 滚动和视图工具
  994. 'scroll_page',
  995. 'find_text',
  996. 'screenshot',
  997. # 标签页管理工具
  998. 'switch_tab',
  999. 'close_tab',
  1000. # 下拉框工具
  1001. 'get_dropdown_options',
  1002. 'select_dropdown_option',
  1003. # 内容提取工具
  1004. 'extract_content',
  1005. 'get_page_html',
  1006. 'get_selector_map',
  1007. # JavaScript 执行工具
  1008. 'evaluate',
  1009. # 文件系统工具
  1010. 'write_file',
  1011. 'read_file',
  1012. 'replace_file',
  1013. # 等待用户操作
  1014. 'wait_for_user_action',
  1015. # 任务完成
  1016. 'done',
  1017. ]