baseClass.py 43 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492
  1. """
  2. Browser-Use 原生工具适配器
  3. Native Browser-Use Tools Adapter
  4. 直接使用 browser-use 的原生类(BrowserSession, Tools)实现所有浏览器操作工具。
  5. 不依赖 Playwright,完全基于 CDP 协议。
  6. 核心特性:
  7. 1. 浏览器会话持久化 - 只启动一次浏览器
  8. 2. 状态自动保持 - 登录状态、Cookie、LocalStorage 等
  9. 3. 完整的底层访问 - 可以直接使用 CDP 协议
  10. 4. 性能优异 - 避免频繁创建/销毁浏览器实例
  11. 使用方法:
  12. 1. 在 Agent 初始化时调用 init_browser_session()
  13. 2. 使用各个工具函数执行浏览器操作
  14. 3. 任务结束时调用 cleanup_browser_session()
  15. """
  16. import sys
  17. import os
  18. import json
  19. from typing import Optional, List, Dict, Any, Tuple
  20. from pathlib import Path
  21. from urllib.parse import urlparse
  22. # 将项目根目录添加到 Python 路径
  23. sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  24. # 导入框架的工具装饰器和结果类
  25. from agent.tools import tool, ToolResult
  26. from agent.tools.builtin.browser.sync_mysql_help import mysql
  27. # 导入 browser-use 的核心类
  28. from browser_use import BrowserSession, BrowserProfile
  29. from browser_use.tools.service import Tools
  30. from browser_use.agent.views import ActionResult
  31. from browser_use.filesystem.file_system import FileSystem
  32. # ============================================================
  33. # 全局浏览器会话管理
  34. # ============================================================
  35. # 全局变量:浏览器会话和工具实例
  36. _browser_session: Optional[BrowserSession] = None
  37. _browser_tools: Optional[Tools] = None
  38. _file_system: Optional[FileSystem] = None
  39. async def init_browser_session(
  40. headless: bool = False,
  41. user_data_dir: Optional[str] = None,
  42. profile_name: str = "default",
  43. browser_profile: Optional[BrowserProfile] = None,
  44. use_cloud: bool = False,
  45. **kwargs
  46. ) -> tuple[BrowserSession, Tools]:
  47. """
  48. 初始化全局浏览器会话
  49. Args:
  50. headless: 是否无头模式
  51. user_data_dir: 用户数据目录(用于保存登录状态)
  52. profile_name: 配置文件名称
  53. browser_profile: BrowserProfile 对象(用于预设 cookies 等)
  54. use_cloud: 是否使用云浏览器(默认 False,使用本地浏览器)
  55. **kwargs: 其他 BrowserSession 参数
  56. Returns:
  57. (BrowserSession, Tools) 元组
  58. """
  59. global _browser_session, _browser_tools, _file_system
  60. if _browser_session is not None:
  61. return _browser_session, _browser_tools
  62. # 设置用户数据目录(持久化登录状态)
  63. if user_data_dir is None and profile_name and not use_cloud:
  64. user_data_dir = str(Path.home() / ".browser_use" / "profiles" / profile_name)
  65. Path(user_data_dir).mkdir(parents=True, exist_ok=True)
  66. # 创建浏览器会话
  67. session_params = {
  68. "headless": headless,
  69. }
  70. if use_cloud:
  71. # 云浏览器模式
  72. session_params["use_cloud"] = True
  73. print("🌐 使用云浏览器模式")
  74. else:
  75. # 本地浏览器模式
  76. session_params["is_local"] = True
  77. # macOS 上显式指定 Chrome 路径
  78. import platform
  79. if platform.system() == "Darwin": # macOS
  80. chrome_path = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
  81. if Path(chrome_path).exists():
  82. session_params["executable_path"] = chrome_path
  83. # 只在有值时才添加 user_data_dir
  84. if user_data_dir:
  85. session_params["user_data_dir"] = user_data_dir
  86. # 只在有值时才添加 browser_profile
  87. if browser_profile:
  88. session_params["browser_profile"] = browser_profile
  89. # 合并其他参数
  90. session_params.update(kwargs)
  91. _browser_session = BrowserSession(**session_params)
  92. # 启动浏览器
  93. await _browser_session.start()
  94. # 创建工具实例
  95. _browser_tools = Tools()
  96. # 创建文件系统实例(用于文件操作)
  97. base_dir = Path.cwd() / ".browser_use_files"
  98. base_dir.mkdir(parents=True, exist_ok=True)
  99. _file_system = FileSystem(base_dir=str(base_dir))
  100. return _browser_session, _browser_tools
  101. async def get_browser_session() -> tuple[BrowserSession, Tools]:
  102. """
  103. 获取当前浏览器会话,如果不存在则自动创建
  104. Returns:
  105. (BrowserSession, Tools) 元组
  106. """
  107. global _browser_session, _browser_tools
  108. if _browser_session is None:
  109. await init_browser_session()
  110. return _browser_session, _browser_tools
  111. async def cleanup_browser_session():
  112. """
  113. 清理浏览器会话
  114. 优雅地停止浏览器但保留会话状态
  115. """
  116. global _browser_session, _browser_tools, _file_system
  117. if _browser_session is not None:
  118. await _browser_session.stop()
  119. _browser_session = None
  120. _browser_tools = None
  121. _file_system = None
  122. async def kill_browser_session():
  123. """
  124. 强制终止浏览器会话
  125. 完全关闭浏览器进程
  126. """
  127. global _browser_session, _browser_tools, _file_system
  128. if _browser_session is not None:
  129. await _browser_session.kill()
  130. _browser_session = None
  131. _browser_tools = None
  132. _file_system = None
  133. # ============================================================
  134. # 辅助函数:ActionResult 转 ToolResult
  135. # ============================================================
  136. def action_result_to_tool_result(result: ActionResult, title: str = None) -> ToolResult:
  137. """
  138. 将 browser-use 的 ActionResult 转换为框架的 ToolResult
  139. Args:
  140. result: browser-use 的 ActionResult
  141. title: 可选的标题(如果不提供则从 result 推断)
  142. Returns:
  143. ToolResult
  144. """
  145. if result.error:
  146. return ToolResult(
  147. title=title or "操作失败",
  148. output="",
  149. error=result.error,
  150. long_term_memory=result.long_term_memory or result.error
  151. )
  152. return ToolResult(
  153. title=title or "操作成功",
  154. output=result.extracted_content or "",
  155. long_term_memory=result.long_term_memory or result.extracted_content or "",
  156. metadata=result.metadata or {}
  157. )
  158. def _cookie_domain_for_type(cookie_type: str, url: str) -> Tuple[str, str]:
  159. if cookie_type:
  160. key = cookie_type.lower()
  161. if key in {"xiaohongshu", "xhs"}:
  162. return ".xiaohongshu.com", "https://www.xiaohongshu.com"
  163. parsed = urlparse(url or "")
  164. domain = parsed.netloc or ""
  165. domain = domain.replace("www.", "")
  166. if domain:
  167. domain = f".{domain}"
  168. base_url = f"{parsed.scheme}://{parsed.netloc}" if parsed.scheme and parsed.netloc else url
  169. return domain, base_url
  170. def _parse_cookie_string(cookie_str: str, domain: str, url: str) -> List[Dict[str, Any]]:
  171. cookies: List[Dict[str, Any]] = []
  172. if not cookie_str:
  173. return cookies
  174. parts = cookie_str.split(";")
  175. for part in parts:
  176. if not part:
  177. continue
  178. if "=" not in part:
  179. continue
  180. name, value = part.split("=", 1)
  181. cookie = {
  182. "name": str(name).strip(),
  183. "value": str(value).strip(),
  184. "domain": domain,
  185. "path": "/",
  186. "expires": -1,
  187. "httpOnly": False,
  188. "secure": True,
  189. "sameSite": "None"
  190. }
  191. if url:
  192. cookie["url"] = url
  193. cookies.append(cookie)
  194. return cookies
  195. def _normalize_cookies(cookie_value: Any, domain: str, url: str) -> List[Dict[str, Any]]:
  196. if cookie_value is None:
  197. return []
  198. if isinstance(cookie_value, list):
  199. return cookie_value
  200. if isinstance(cookie_value, dict):
  201. if "cookies" in cookie_value:
  202. return _normalize_cookies(cookie_value.get("cookies"), domain, url)
  203. if "name" in cookie_value and "value" in cookie_value:
  204. return [cookie_value]
  205. return []
  206. if isinstance(cookie_value, (bytes, bytearray)):
  207. cookie_value = cookie_value.decode("utf-8", errors="ignore")
  208. if isinstance(cookie_value, str):
  209. text = cookie_value.strip()
  210. if not text:
  211. return []
  212. try:
  213. parsed = json.loads(text)
  214. except Exception:
  215. parsed = None
  216. if parsed is not None:
  217. return _normalize_cookies(parsed, domain, url)
  218. return _parse_cookie_string(text, domain, url)
  219. return []
  220. def _extract_cookie_value(row: Optional[Dict[str, Any]]) -> Any:
  221. if not row:
  222. return None
  223. for key, value in row.items():
  224. if "cookie" in key.lower():
  225. return value
  226. return None
  227. def _fetch_cookie_row(cookie_type: str) -> Optional[Dict[str, Any]]:
  228. if not cookie_type:
  229. return None
  230. try:
  231. return mysql.fetchone(
  232. "select * from agent_channel_cookies where type=%s order by id desc limit 1",
  233. (cookie_type,)
  234. )
  235. except Exception:
  236. return None
  237. # ============================================================
  238. # 导航类工具 (Navigation Tools)
  239. # ============================================================
  240. @tool()
  241. async def navigate_to_url(url: str, new_tab: bool = False, uid: str = "") -> ToolResult:
  242. """
  243. 导航到指定的 URL
  244. Navigate to a specific URL
  245. 使用 browser-use 的原生导航功能,支持在新标签页打开。
  246. Args:
  247. url: 要访问的 URL 地址
  248. new_tab: 是否在新标签页中打开(默认 False)
  249. uid: 用户 ID(由框架自动注入)
  250. Returns:
  251. ToolResult: 包含导航结果的工具返回对象
  252. Example:
  253. navigate_to_url("https://www.baidu.com")
  254. navigate_to_url("https://www.google.com", new_tab=True)
  255. """
  256. try:
  257. browser, tools = await get_browser_session()
  258. # 使用 browser-use 的 navigate 工具
  259. result = await tools.navigate(
  260. url=url,
  261. new_tab=new_tab,
  262. browser_session=browser
  263. )
  264. return action_result_to_tool_result(result, f"导航到 {url}")
  265. except Exception as e:
  266. return ToolResult(
  267. title="导航失败",
  268. output="",
  269. error=f"Failed to navigate to {url}: {str(e)}",
  270. long_term_memory=f"导航到 {url} 失败"
  271. )
  272. @tool()
  273. async def search_web(query: str, engine: str = "google", uid: str = "") -> ToolResult:
  274. """
  275. 使用搜索引擎搜索
  276. Search the web using a search engine
  277. Args:
  278. query: 搜索关键词
  279. engine: 搜索引擎 (google, duckduckgo, bing) - 默认: google
  280. uid: 用户 ID(由框架自动注入)
  281. Returns:
  282. ToolResult: 搜索结果
  283. Example:
  284. search_web("Python async programming", engine="google")
  285. """
  286. try:
  287. browser, tools = await get_browser_session()
  288. # 使用 browser-use 的 search 工具
  289. result = await tools.search(
  290. query=query,
  291. engine=engine,
  292. browser_session=browser
  293. )
  294. return action_result_to_tool_result(result, f"搜索: {query}")
  295. except Exception as e:
  296. return ToolResult(
  297. title="搜索失败",
  298. output="",
  299. error=f"Search failed: {str(e)}",
  300. long_term_memory=f"搜索 '{query}' 失败"
  301. )
  302. @tool()
  303. async def go_back(uid: str = "") -> ToolResult:
  304. """
  305. 返回到上一个页面
  306. Go back to the previous page
  307. 模拟浏览器的"后退"按钮功能。
  308. Args:
  309. uid: 用户 ID(由框架自动注入)
  310. Returns:
  311. ToolResult: 包含返回操作结果的工具返回对象
  312. """
  313. try:
  314. browser, tools = await get_browser_session()
  315. result = await tools.go_back(browser_session=browser)
  316. return action_result_to_tool_result(result, "返回上一页")
  317. except Exception as e:
  318. return ToolResult(
  319. title="返回失败",
  320. output="",
  321. error=f"Failed to go back: {str(e)}",
  322. long_term_memory="返回上一页失败"
  323. )
  324. @tool()
  325. async def wait(seconds: int = 3, uid: str = "") -> ToolResult:
  326. """
  327. 等待指定的秒数
  328. Wait for a specified number of seconds
  329. 用于等待页面加载、动画完成或其他异步操作。
  330. Args:
  331. seconds: 等待时间(秒),最大30秒
  332. uid: 用户 ID(由框架自动注入)
  333. Returns:
  334. ToolResult: 包含等待操作结果的工具返回对象
  335. Example:
  336. wait(5) # 等待5秒
  337. """
  338. try:
  339. browser, tools = await get_browser_session()
  340. result = await tools.wait(seconds=seconds, browser_session=browser)
  341. return action_result_to_tool_result(result, f"等待 {seconds} 秒")
  342. except Exception as e:
  343. return ToolResult(
  344. title="等待失败",
  345. output="",
  346. error=f"Failed to wait: {str(e)}",
  347. long_term_memory="等待失败"
  348. )
  349. # ============================================================
  350. # 元素交互工具 (Element Interaction Tools)
  351. # ============================================================
  352. @tool()
  353. async def click_element(index: int, uid: str = "") -> ToolResult:
  354. """
  355. 通过索引点击页面元素
  356. Click an element by index
  357. Args:
  358. index: 元素索引(从浏览器状态中获取)
  359. uid: 用户 ID(由框架自动注入)
  360. Returns:
  361. ToolResult: 包含点击操作结果的工具返回对象
  362. Example:
  363. click_element(index=5)
  364. Note:
  365. 需要先通过 get_selector_map 获取页面元素索引
  366. """
  367. try:
  368. browser, tools = await get_browser_session()
  369. result = await tools.click(
  370. index=index,
  371. browser_session=browser
  372. )
  373. return action_result_to_tool_result(result, f"点击元素 {index}")
  374. except Exception as e:
  375. return ToolResult(
  376. title="点击失败",
  377. output="",
  378. error=f"Failed to click element {index}: {str(e)}",
  379. long_term_memory=f"点击元素 {index} 失败"
  380. )
  381. @tool()
  382. async def input_text(index: int, text: str, clear: bool = True, uid: str = "") -> ToolResult:
  383. """
  384. 在指定元素中输入文本
  385. Input text into an element
  386. Args:
  387. index: 元素索引(从浏览器状态中获取)
  388. text: 要输入的文本内容
  389. clear: 是否先清除现有文本(默认 True)
  390. uid: 用户 ID(由框架自动注入)
  391. Returns:
  392. ToolResult: 包含输入操作结果的工具返回对象
  393. Example:
  394. input_text(index=0, text="Hello World", clear=True)
  395. """
  396. try:
  397. browser, tools = await get_browser_session()
  398. result = await tools.input(
  399. index=index,
  400. text=text,
  401. clear=clear,
  402. browser_session=browser
  403. )
  404. return action_result_to_tool_result(result, f"输入文本到元素 {index}")
  405. except Exception as e:
  406. return ToolResult(
  407. title="输入失败",
  408. output="",
  409. error=f"Failed to input text into element {index}: {str(e)}",
  410. long_term_memory=f"输入文本失败"
  411. )
  412. @tool()
  413. async def send_keys(keys: str, uid: str = "") -> ToolResult:
  414. """
  415. 发送键盘按键或快捷键
  416. Send keyboard keys or shortcuts
  417. 支持发送单个按键、组合键和快捷键。
  418. Args:
  419. keys: 要发送的按键字符串
  420. - 单个按键: "Enter", "Escape", "PageDown", "Tab"
  421. - 组合键: "Control+o", "Shift+Tab", "Alt+F4"
  422. - 功能键: "F1", "F2", ..., "F12"
  423. uid: 用户 ID(由框架自动注入)
  424. Returns:
  425. ToolResult: 包含按键操作结果的工具返回对象
  426. Example:
  427. send_keys("Enter")
  428. send_keys("Control+A")
  429. """
  430. try:
  431. browser, tools = await get_browser_session()
  432. result = await tools.send_keys(
  433. keys=keys,
  434. browser_session=browser
  435. )
  436. return action_result_to_tool_result(result, f"发送按键: {keys}")
  437. except Exception as e:
  438. return ToolResult(
  439. title="发送按键失败",
  440. output="",
  441. error=f"Failed to send keys: {str(e)}",
  442. long_term_memory="发送按键失败"
  443. )
  444. @tool()
  445. async def upload_file(index: int, path: str, uid: str = "") -> ToolResult:
  446. """
  447. 上传文件到文件输入元素
  448. Upload a file to a file input element
  449. Args:
  450. index: 文件输入框的元素索引
  451. path: 要上传的文件路径(绝对路径)
  452. uid: 用户 ID(由框架自动注入)
  453. Returns:
  454. ToolResult: 包含上传操作结果的工具返回对象
  455. Example:
  456. upload_file(index=7, path="/path/to/file.pdf")
  457. Note:
  458. 文件必须存在且路径必须是绝对路径
  459. """
  460. try:
  461. browser, tools = await get_browser_session()
  462. result = await tools.upload_file(
  463. index=index,
  464. path=path,
  465. browser_session=browser,
  466. available_file_paths=[path],
  467. file_system=_file_system
  468. )
  469. return action_result_to_tool_result(result, f"上传文件: {path}")
  470. except Exception as e:
  471. return ToolResult(
  472. title="上传失败",
  473. output="",
  474. error=f"Failed to upload file: {str(e)}",
  475. long_term_memory=f"上传文件 {path} 失败"
  476. )
  477. # ============================================================
  478. # 滚动和视图工具 (Scroll & View Tools)
  479. # ============================================================
  480. @tool()
  481. async def scroll_page(down: bool = True, pages: float = 1.0,
  482. index: Optional[int] = None, uid: str = "") -> ToolResult:
  483. """
  484. 滚动页面或元素
  485. Scroll the page or a specific element
  486. Args:
  487. down: True 向下滚动,False 向上滚动
  488. pages: 滚动页数(0.5=半页,1=全页,10=滚动到底部/顶部)
  489. index: 可选,滚动特定元素(如下拉框内部)
  490. uid: 用户 ID(由框架自动注入)
  491. Returns:
  492. ToolResult: 滚动结果
  493. Example:
  494. scroll_page(down=True, pages=2.0) # 向下滚动2页
  495. scroll_page(down=False, pages=1.0) # 向上滚动1页
  496. """
  497. try:
  498. browser, tools = await get_browser_session()
  499. result = await tools.scroll(
  500. down=down,
  501. pages=pages,
  502. index=index,
  503. browser_session=browser
  504. )
  505. direction = "向下" if down else "向上"
  506. return action_result_to_tool_result(result, f"{direction}滚动 {pages} 页")
  507. except Exception as e:
  508. return ToolResult(
  509. title="滚动失败",
  510. output="",
  511. error=f"Failed to scroll: {str(e)}",
  512. long_term_memory="滚动失败"
  513. )
  514. @tool()
  515. async def find_text(text: str, uid: str = "") -> ToolResult:
  516. """
  517. 查找页面中的文本并滚动到该位置
  518. Find text on the page and scroll to it
  519. 在页面中搜索指定的文本,找到后自动滚动到该位置。
  520. Args:
  521. text: 要查找的文本内容
  522. uid: 用户 ID(由框架自动注入)
  523. Returns:
  524. ToolResult: 包含查找结果的工具返回对象
  525. Example:
  526. find_text("Privacy Policy")
  527. """
  528. try:
  529. browser, tools = await get_browser_session()
  530. result = await tools.find_text(
  531. text=text,
  532. browser_session=browser
  533. )
  534. return action_result_to_tool_result(result, f"查找文本: {text}")
  535. except Exception as e:
  536. return ToolResult(
  537. title="查找失败",
  538. output="",
  539. error=f"Failed to find text: {str(e)}",
  540. long_term_memory=f"查找文本 '{text}' 失败"
  541. )
  542. @tool()
  543. async def screenshot(uid: str = "") -> ToolResult:
  544. """
  545. 请求在下次观察中包含页面截图
  546. Request a screenshot to be included in the next observation
  547. 用于视觉检查页面状态,帮助理解页面布局和内容。
  548. Args:
  549. uid: 用户 ID(由框架自动注入)
  550. Returns:
  551. ToolResult: 包含截图请求结果的工具返回对象
  552. Example:
  553. screenshot()
  554. Note:
  555. 截图会在下次页面观察时自动包含在结果中。
  556. """
  557. try:
  558. browser, tools = await get_browser_session()
  559. result = await tools.screenshot(browser_session=browser)
  560. return action_result_to_tool_result(result, "截图请求")
  561. except Exception as e:
  562. return ToolResult(
  563. title="截图失败",
  564. output="",
  565. error=f"Failed to capture screenshot: {str(e)}",
  566. long_term_memory="截图失败"
  567. )
  568. # ============================================================
  569. # 标签页管理工具 (Tab Management Tools)
  570. # ============================================================
  571. @tool()
  572. async def switch_tab(tab_id: str, uid: str = "") -> ToolResult:
  573. """
  574. 切换到指定标签页
  575. Switch to a different browser tab
  576. Args:
  577. tab_id: 4字符标签ID(target_id 的最后4位)
  578. uid: 用户 ID(由框架自动注入)
  579. Returns:
  580. ToolResult: 切换结果
  581. Example:
  582. switch_tab(tab_id="a3f2")
  583. """
  584. try:
  585. browser, tools = await get_browser_session()
  586. result = await tools.switch(
  587. tab_id=tab_id,
  588. browser_session=browser
  589. )
  590. return action_result_to_tool_result(result, f"切换到标签页 {tab_id}")
  591. except Exception as e:
  592. return ToolResult(
  593. title="切换标签页失败",
  594. output="",
  595. error=f"Failed to switch tab: {str(e)}",
  596. long_term_memory=f"切换到标签页 {tab_id} 失败"
  597. )
  598. @tool()
  599. async def close_tab(tab_id: str, uid: str = "") -> ToolResult:
  600. """
  601. 关闭指定标签页
  602. Close a browser tab
  603. Args:
  604. tab_id: 4字符标签ID
  605. uid: 用户 ID(由框架自动注入)
  606. Returns:
  607. ToolResult: 关闭结果
  608. Example:
  609. close_tab(tab_id="a3f2")
  610. """
  611. try:
  612. browser, tools = await get_browser_session()
  613. result = await tools.close(
  614. tab_id=tab_id,
  615. browser_session=browser
  616. )
  617. return action_result_to_tool_result(result, f"关闭标签页 {tab_id}")
  618. except Exception as e:
  619. return ToolResult(
  620. title="关闭标签页失败",
  621. output="",
  622. error=f"Failed to close tab: {str(e)}",
  623. long_term_memory=f"关闭标签页 {tab_id} 失败"
  624. )
  625. # ============================================================
  626. # 下拉框工具 (Dropdown Tools)
  627. # ============================================================
  628. @tool()
  629. async def get_dropdown_options(index: int, uid: str = "") -> ToolResult:
  630. """
  631. 获取下拉框的所有选项
  632. Get options from a dropdown element
  633. Args:
  634. index: 下拉框的元素索引
  635. uid: 用户 ID(由框架自动注入)
  636. Returns:
  637. ToolResult: 包含所有选项的结果
  638. Example:
  639. get_dropdown_options(index=8)
  640. """
  641. try:
  642. browser, tools = await get_browser_session()
  643. result = await tools.dropdown_options(
  644. index=index,
  645. browser_session=browser
  646. )
  647. return action_result_to_tool_result(result, f"获取下拉框选项: {index}")
  648. except Exception as e:
  649. return ToolResult(
  650. title="获取下拉框选项失败",
  651. output="",
  652. error=f"Failed to get dropdown options: {str(e)}",
  653. long_term_memory=f"获取下拉框 {index} 选项失败"
  654. )
  655. @tool()
  656. async def select_dropdown_option(index: int, text: str, uid: str = "") -> ToolResult:
  657. """
  658. 选择下拉框选项
  659. Select an option from a dropdown
  660. Args:
  661. index: 下拉框的元素索引
  662. text: 要选择的选项文本(精确匹配)
  663. uid: 用户 ID(由框架自动注入)
  664. Returns:
  665. ToolResult: 选择结果
  666. Example:
  667. select_dropdown_option(index=8, text="Option 2")
  668. """
  669. try:
  670. browser, tools = await get_browser_session()
  671. result = await tools.select_dropdown(
  672. index=index,
  673. text=text,
  674. browser_session=browser
  675. )
  676. return action_result_to_tool_result(result, f"选择下拉框选项: {text}")
  677. except Exception as e:
  678. return ToolResult(
  679. title="选择下拉框选项失败",
  680. output="",
  681. error=f"Failed to select dropdown option: {str(e)}",
  682. long_term_memory=f"选择选项 '{text}' 失败"
  683. )
  684. # ============================================================
  685. # 内容提取工具 (Content Extraction Tools)
  686. # ============================================================
  687. @tool()
  688. async def extract_content(query: str, extract_links: bool = False,
  689. start_from_char: int = 0, uid: str = "") -> ToolResult:
  690. """
  691. 使用 LLM 从页面提取结构化数据
  692. Extract content from the current page using LLM
  693. Args:
  694. query: 提取查询(告诉 LLM 要提取什么内容)
  695. extract_links: 是否提取链接(默认 False,节省 token)
  696. start_from_char: 从哪个字符开始提取(用于分页提取大内容)
  697. uid: 用户 ID(由框架自动注入)
  698. Returns:
  699. ToolResult: 提取的内容
  700. Example:
  701. extract_content(query="提取页面上所有产品的名称和价格", extract_links=True)
  702. Note:
  703. 需要配置 page_extraction_llm,否则会失败
  704. 支持分页提取,最大100k字符
  705. """
  706. try:
  707. browser, tools = await get_browser_session()
  708. # 注意:extract 需要 page_extraction_llm 参数
  709. # 这里我们假设用户会在初始化时配置 LLM
  710. # 如果没有配置,会抛出异常
  711. result = await tools.extract(
  712. query=query,
  713. extract_links=extract_links,
  714. start_from_char=start_from_char,
  715. browser_session=browser,
  716. page_extraction_llm=None, # 需要用户配置
  717. file_system=_file_system
  718. )
  719. return action_result_to_tool_result(result, f"提取内容: {query}")
  720. except Exception as e:
  721. return ToolResult(
  722. title="内容提取失败",
  723. output="",
  724. error=f"Failed to extract content: {str(e)}",
  725. long_term_memory=f"提取内容失败: {query}"
  726. )
  727. @tool()
  728. async def get_page_html(uid: str = "") -> ToolResult:
  729. """
  730. 获取当前页面的完整 HTML
  731. Get the full HTML of the current page
  732. 返回当前页面的完整 HTML 源代码。
  733. Args:
  734. uid: 用户 ID(由框架自动注入)
  735. Returns:
  736. ToolResult: 包含页面 HTML 的工具返回对象
  737. Example:
  738. get_page_html()
  739. Note:
  740. - 返回的是完整的 HTML 源代码
  741. - 输出会被限制在 10000 字符以内(完整内容保存在 metadata 中)
  742. """
  743. try:
  744. browser, tools = await get_browser_session()
  745. # 使用 CDP 获取页面 HTML
  746. cdp = await browser.get_or_create_cdp_session()
  747. # 获取页面内容
  748. result = await cdp.cdp_client.send.Runtime.evaluate(
  749. params={'expression': 'document.documentElement.outerHTML'},
  750. session_id=cdp.session_id
  751. )
  752. html = result.get('result', {}).get('value', '')
  753. # 获取 URL 和标题
  754. url = await browser.get_current_page_url()
  755. title_result = await cdp.cdp_client.send.Runtime.evaluate(
  756. params={'expression': 'document.title'},
  757. session_id=cdp.session_id
  758. )
  759. title = title_result.get('result', {}).get('value', '')
  760. # 限制输出大小
  761. output_html = html
  762. if len(html) > 10000:
  763. output_html = html[:10000] + "... (truncated)"
  764. return ToolResult(
  765. title=f"获取 HTML: {url}",
  766. output=f"页面: {title}\nURL: {url}\n\nHTML:\n{output_html}",
  767. long_term_memory=f"获取 HTML: {url}",
  768. metadata={"url": url, "title": title, "html": html}
  769. )
  770. except Exception as e:
  771. return ToolResult(
  772. title="获取 HTML 失败",
  773. output="",
  774. error=f"Failed to get page HTML: {str(e)}",
  775. long_term_memory="获取 HTML 失败"
  776. )
  777. @tool()
  778. async def get_selector_map(uid: str = "") -> ToolResult:
  779. """
  780. 获取当前页面的元素索引映射
  781. Get the selector map of interactive elements on the current page
  782. 返回页面所有可交互元素的索引字典,用于后续的元素操作。
  783. Args:
  784. uid: 用户 ID(由框架自动注入)
  785. Returns:
  786. ToolResult: 包含元素映射的工具返回对象
  787. Example:
  788. get_selector_map()
  789. Note:
  790. 返回的索引可以用于 click_element, input_text 等操作
  791. """
  792. try:
  793. browser, tools = await get_browser_session()
  794. # 获取选择器映射
  795. selector_map = await browser.get_selector_map()
  796. # 构建输出信息
  797. elements_info = []
  798. for index, node in list(selector_map.items())[:20]: # 只显示前20个
  799. tag = node.tag_name
  800. attrs = node.attributes or {}
  801. text = attrs.get('aria-label') or attrs.get('placeholder') or attrs.get('value', '')
  802. elements_info.append(f"索引 {index}: <{tag}> {text[:50]}")
  803. output = f"找到 {len(selector_map)} 个交互元素\n\n"
  804. output += "\n".join(elements_info)
  805. if len(selector_map) > 20:
  806. output += f"\n... 还有 {len(selector_map) - 20} 个元素"
  807. return ToolResult(
  808. title="获取元素映射",
  809. output=output,
  810. long_term_memory=f"获取到 {len(selector_map)} 个交互元素",
  811. metadata={"selector_map": {k: str(v) for k, v in list(selector_map.items())[:100]}}
  812. )
  813. except Exception as e:
  814. return ToolResult(
  815. title="获取元素映射失败",
  816. output="",
  817. error=f"Failed to get selector map: {str(e)}",
  818. long_term_memory="获取元素映射失败"
  819. )
  820. # ============================================================
  821. # JavaScript 执行工具 (JavaScript Tools)
  822. # ============================================================
  823. @tool()
  824. async def evaluate(code: str, uid: str = "") -> ToolResult:
  825. """
  826. 在页面中执行 JavaScript 代码
  827. Execute JavaScript code in the page context
  828. 允许在当前页面中执行任意 JavaScript 代码,用于复杂的页面操作或数据提取。
  829. Args:
  830. code: 要执行的 JavaScript 代码字符串
  831. uid: 用户 ID(由框架自动注入)
  832. Returns:
  833. ToolResult: 包含执行结果的工具返回对象
  834. Example:
  835. evaluate("document.title")
  836. evaluate("document.querySelectorAll('a').length")
  837. Note:
  838. - 代码在页面上下文中执行,可以访问 DOM 和全局变量
  839. - 返回值会被自动序列化为字符串
  840. - 执行结果限制在 20k 字符以内
  841. """
  842. try:
  843. browser, tools = await get_browser_session()
  844. result = await tools.evaluate(
  845. code=code,
  846. browser_session=browser
  847. )
  848. return action_result_to_tool_result(result, "执行 JavaScript")
  849. except Exception as e:
  850. return ToolResult(
  851. title="JavaScript 执行失败",
  852. output="",
  853. error=f"Failed to execute JavaScript: {str(e)}",
  854. long_term_memory="JavaScript 执行失败"
  855. )
  856. @tool()
  857. async def ensure_login_with_cookies(cookie_type: str, url: str = "https://www.xiaohongshu.com", uid: str = "") -> ToolResult:
  858. """
  859. 检查登录状态并在需要时注入 cookies
  860. """
  861. try:
  862. browser, tools = await get_browser_session()
  863. if url:
  864. await tools.navigate(url=url, browser_session=browser)
  865. await tools.wait(seconds=2, browser_session=browser)
  866. check_login_js = """
  867. (function() {
  868. const loginBtn = document.querySelector('[class*="login"]') ||
  869. document.querySelector('[href*="login"]') ||
  870. Array.from(document.querySelectorAll('button, a')).find(el => (el.textContent || '').includes('登录'));
  871. const userInfo = document.querySelector('[class*="user"]') ||
  872. document.querySelector('[class*="avatar"]');
  873. return {
  874. needLogin: !!loginBtn && !userInfo,
  875. hasLoginBtn: !!loginBtn,
  876. hasUserInfo: !!userInfo
  877. };
  878. })()
  879. """
  880. result = await tools.evaluate(code=check_login_js, browser_session=browser)
  881. status_output = result.extracted_content
  882. if isinstance(status_output, str) and status_output.startswith("Result: "):
  883. status_output = status_output[8:]
  884. login_info: Dict[str, Any] = {}
  885. if isinstance(status_output, str):
  886. try:
  887. login_info = json.loads(status_output)
  888. except Exception:
  889. login_info = {}
  890. elif isinstance(status_output, dict):
  891. login_info = status_output
  892. if not login_info.get("needLogin"):
  893. output = json.dumps({"need_login": False}, ensure_ascii=False)
  894. return ToolResult(
  895. title="已登录",
  896. output=output,
  897. long_term_memory=output
  898. )
  899. row = _fetch_cookie_row(cookie_type)
  900. cookie_value = _extract_cookie_value(row)
  901. if not cookie_value:
  902. output = json.dumps({"need_login": True, "cookies_count": 0}, ensure_ascii=False)
  903. return ToolResult(
  904. title="未找到 cookies",
  905. output=output,
  906. error="未找到 cookies",
  907. long_term_memory=output
  908. )
  909. domain, base_url = _cookie_domain_for_type(cookie_type, url)
  910. cookies = _normalize_cookies(cookie_value, domain, base_url)
  911. if not cookies:
  912. output = json.dumps({"need_login": True, "cookies_count": 0}, ensure_ascii=False)
  913. return ToolResult(
  914. title="cookies 解析失败",
  915. output=output,
  916. error="cookies 解析失败",
  917. long_term_memory=output
  918. )
  919. await browser._cdp_set_cookies(cookies)
  920. if url:
  921. await tools.navigate(url=url, browser_session=browser)
  922. await tools.wait(seconds=2, browser_session=browser)
  923. output = json.dumps({"need_login": True, "cookies_count": len(cookies)}, ensure_ascii=False)
  924. return ToolResult(
  925. title="已注入 cookies",
  926. output=output,
  927. long_term_memory=output
  928. )
  929. except Exception as e:
  930. return ToolResult(
  931. title="登录检查失败",
  932. output="",
  933. error=str(e),
  934. long_term_memory="登录检查失败"
  935. )
  936. # ============================================================
  937. # 文件系统工具 (File System Tools)
  938. # ============================================================
  939. @tool()
  940. async def write_file(file_name: str, content: str, append: bool = False, uid: str = "") -> ToolResult:
  941. """
  942. 写入文件到本地文件系统
  943. Write content to a local file
  944. 支持多种文件格式的写入操作。
  945. Args:
  946. file_name: 文件名(包含扩展名)
  947. content: 要写入的文件内容
  948. append: 是否追加模式(默认 False,覆盖写入)
  949. uid: 用户 ID(由框架自动注入)
  950. Returns:
  951. ToolResult: 包含写入结果的工具返回对象
  952. Example:
  953. write_file("output.txt", "Hello World")
  954. write_file("data.json", '{"key": "value"}')
  955. Note:
  956. 支持的文件格式: .txt, .md, .json, .jsonl, .csv, .pdf
  957. """
  958. try:
  959. browser, tools = await get_browser_session()
  960. result = await tools.write_file(
  961. file_name=file_name,
  962. content=content,
  963. append=append,
  964. file_system=_file_system
  965. )
  966. return action_result_to_tool_result(result, f"写入文件: {file_name}")
  967. except Exception as e:
  968. return ToolResult(
  969. title="写入文件失败",
  970. output="",
  971. error=f"Failed to write file: {str(e)}",
  972. long_term_memory=f"写入文件 {file_name} 失败"
  973. )
  974. @tool()
  975. async def read_file(file_name: str, uid: str = "") -> ToolResult:
  976. """
  977. 读取文件内容
  978. Read content from a local file
  979. 支持多种文件格式的读取操作。
  980. Args:
  981. file_name: 文件名(包含扩展名)
  982. uid: 用户 ID(由框架自动注入)
  983. Returns:
  984. ToolResult: 包含文件内容的工具返回对象
  985. Example:
  986. read_file("input.txt")
  987. read_file("data.json")
  988. Note:
  989. 支持的文件格式: 文本文件、PDF、DOCX、图片等
  990. """
  991. try:
  992. browser, tools = await get_browser_session()
  993. result = await tools.read_file(
  994. file_name=file_name,
  995. available_file_paths=[],
  996. file_system=_file_system
  997. )
  998. return action_result_to_tool_result(result, f"读取文件: {file_name}")
  999. except Exception as e:
  1000. return ToolResult(
  1001. title="读取文件失败",
  1002. output="",
  1003. error=f"Failed to read file: {str(e)}",
  1004. long_term_memory=f"读取文件 {file_name} 失败"
  1005. )
  1006. @tool()
  1007. async def replace_file(file_name: str, old_str: str, new_str: str, uid: str = "") -> ToolResult:
  1008. """
  1009. 替换文件中的特定文本
  1010. Replace specific text in a file
  1011. 在文件中查找并替换指定的文本内容。
  1012. Args:
  1013. file_name: 文件名(包含扩展名)
  1014. old_str: 要替换的文本
  1015. new_str: 新文本
  1016. uid: 用户 ID(由框架自动注入)
  1017. Returns:
  1018. ToolResult: 包含替换结果的工具返回对象
  1019. Example:
  1020. replace_file("config.txt", "old_value", "new_value")
  1021. Note:
  1022. - 会替换文件中所有匹配的文本
  1023. - 如果找不到要替换的文本,会返回警告
  1024. """
  1025. try:
  1026. browser, tools = await get_browser_session()
  1027. result = await tools.replace_file(
  1028. file_name=file_name,
  1029. old_str=old_str,
  1030. new_str=new_str,
  1031. file_system=_file_system
  1032. )
  1033. return action_result_to_tool_result(result, f"替换文件内容: {file_name}")
  1034. except Exception as e:
  1035. return ToolResult(
  1036. title="替换文件失败",
  1037. output="",
  1038. error=f"Failed to replace file content: {str(e)}",
  1039. long_term_memory=f"替换文件 {file_name} 失败"
  1040. )
  1041. # ============================================================
  1042. # 等待用户操作工具 (Wait for User Action)
  1043. # ============================================================
  1044. @tool()
  1045. async def wait_for_user_action(message: str = "Please complete the action in browser",
  1046. timeout: int = 300, uid: str = "") -> ToolResult:
  1047. """
  1048. 等待用户在浏览器中完成操作(如登录)
  1049. Wait for user to complete an action in the browser (e.g., login)
  1050. 暂停自动化流程,等待用户手动完成某些操作(如登录、验证码等)。
  1051. Args:
  1052. message: 提示用户需要完成的操作
  1053. timeout: 最大等待时间(秒),默认 300 秒(5 分钟)
  1054. uid: 用户 ID(由框架自动注入)
  1055. Returns:
  1056. ToolResult: 包含等待结果的工具返回对象
  1057. Example:
  1058. wait_for_user_action("Please login to Xiaohongshu", timeout=180)
  1059. wait_for_user_action("Please complete the CAPTCHA", timeout=60)
  1060. Note:
  1061. - 用户需要在浏览器窗口中手动完成操作
  1062. - 完成后按回车键继续
  1063. - 超时后会自动继续执行
  1064. """
  1065. try:
  1066. import asyncio
  1067. print(f"\n{'='*60}")
  1068. print(f"⏸️ WAITING FOR USER ACTION")
  1069. print(f"{'='*60}")
  1070. print(f"📝 {message}")
  1071. print(f"⏱️ Timeout: {timeout} seconds")
  1072. print(f"\n👉 Please complete the action in the browser window")
  1073. print(f"👉 Press ENTER when done, or wait for timeout")
  1074. print(f"{'='*60}\n")
  1075. # Wait for user input or timeout
  1076. try:
  1077. loop = asyncio.get_event_loop()
  1078. # Wait for either user input or timeout
  1079. await asyncio.wait_for(
  1080. loop.run_in_executor(None, input),
  1081. timeout=timeout
  1082. )
  1083. return ToolResult(
  1084. title="用户操作完成",
  1085. output=f"User completed: {message}",
  1086. long_term_memory=f"用户完成操作: {message}"
  1087. )
  1088. except asyncio.TimeoutError:
  1089. return ToolResult(
  1090. title="用户操作超时",
  1091. output=f"Timeout waiting for: {message}",
  1092. long_term_memory=f"等待用户操作超时: {message}"
  1093. )
  1094. except Exception as e:
  1095. return ToolResult(
  1096. title="等待用户操作失败",
  1097. output="",
  1098. error=f"Failed to wait for user action: {str(e)}",
  1099. long_term_memory="等待用户操作失败"
  1100. )
  1101. # ============================================================
  1102. # 任务完成工具 (Task Completion)
  1103. # ============================================================
  1104. @tool()
  1105. async def done(text: str, success: bool = True,
  1106. files_to_display: Optional[List[str]] = None, uid: str = "") -> ToolResult:
  1107. """
  1108. 标记任务完成并返回最终消息
  1109. Mark the task as complete and return final message to user
  1110. Args:
  1111. text: 给用户的最终消息
  1112. success: 任务是否成功完成
  1113. files_to_display: 可选的要显示的文件路径列表
  1114. uid: 用户 ID(由框架自动注入)
  1115. Returns:
  1116. ToolResult: 完成结果
  1117. Example:
  1118. done("任务已完成,提取了10个产品信息", success=True)
  1119. """
  1120. try:
  1121. browser, tools = await get_browser_session()
  1122. result = await tools.done(
  1123. text=text,
  1124. success=success,
  1125. files_to_display=files_to_display,
  1126. file_system=_file_system
  1127. )
  1128. return action_result_to_tool_result(result, "任务完成")
  1129. except Exception as e:
  1130. return ToolResult(
  1131. title="标记任务完成失败",
  1132. output="",
  1133. error=f"Failed to complete task: {str(e)}",
  1134. long_term_memory="标记任务完成失败"
  1135. )
  1136. # ============================================================
  1137. # 导出所有工具函数(供外部使用)
  1138. # ============================================================
  1139. __all__ = [
  1140. # 会话管理
  1141. 'init_browser_session',
  1142. 'get_browser_session',
  1143. 'cleanup_browser_session',
  1144. 'kill_browser_session',
  1145. # 导航类工具
  1146. 'navigate_to_url',
  1147. 'search_web',
  1148. 'go_back',
  1149. 'wait',
  1150. # 元素交互工具
  1151. 'click_element',
  1152. 'input_text',
  1153. 'send_keys',
  1154. 'upload_file',
  1155. # 滚动和视图工具
  1156. 'scroll_page',
  1157. 'find_text',
  1158. 'screenshot',
  1159. # 标签页管理工具
  1160. 'switch_tab',
  1161. 'close_tab',
  1162. # 下拉框工具
  1163. 'get_dropdown_options',
  1164. 'select_dropdown_option',
  1165. # 内容提取工具
  1166. 'extract_content',
  1167. 'get_page_html',
  1168. 'get_selector_map',
  1169. # JavaScript 执行工具
  1170. 'evaluate',
  1171. 'ensure_login_with_cookies',
  1172. # 文件系统工具
  1173. 'write_file',
  1174. 'read_file',
  1175. 'replace_file',
  1176. # 等待用户操作
  1177. 'wait_for_user_action',
  1178. # 任务完成
  1179. 'done',
  1180. ]