baseClass.py 44 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512
  1. """
  2. Browser-Use 原生工具适配器
  3. Native Browser-Use Tools Adapter
  4. 直接使用 browser-use 的原生类(BrowserSession, Tools)实现所有浏览器操作工具。
  5. 不依赖 Playwright,完全基于 CDP 协议。
  6. 核心特性:
  7. 1. 浏览器会话持久化 - 只启动一次浏览器
  8. 2. 状态自动保持 - 登录状态、Cookie、LocalStorage 等
  9. 3. 完整的底层访问 - 可以直接使用 CDP 协议
  10. 4. 性能优异 - 避免频繁创建/销毁浏览器实例
  11. 使用方法:
  12. 1. 在 Agent 初始化时调用 init_browser_session()
  13. 2. 使用各个工具函数执行浏览器操作
  14. 3. 任务结束时调用 cleanup_browser_session()
  15. """
  16. import sys
  17. import os
  18. import json
  19. from typing import Optional, List, Dict, Any, Tuple
  20. from pathlib import Path
  21. from urllib.parse import urlparse
  22. # 将项目根目录添加到 Python 路径
  23. sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  24. # 导入框架的工具装饰器和结果类
  25. from agent.tools import tool, ToolResult
  26. from agent.tools.builtin.browser.sync_mysql_help import mysql
  27. # 导入 browser-use 的核心类
  28. from browser_use import BrowserSession, BrowserProfile
  29. from browser_use.tools.service import Tools
  30. from browser_use.agent.views import ActionResult
  31. from browser_use.filesystem.file_system import FileSystem
  32. # ============================================================
  33. # 全局浏览器会话管理
  34. # ============================================================
  35. # 全局变量:浏览器会话和工具实例
  36. _browser_session: Optional[BrowserSession] = None
  37. _browser_tools: Optional[Tools] = None
  38. _file_system: Optional[FileSystem] = None
  39. async def init_browser_session(
  40. headless: bool = False,
  41. user_data_dir: Optional[str] = None,
  42. profile_name: str = "default",
  43. browser_profile: Optional[BrowserProfile] = None,
  44. use_cloud: bool = False,
  45. **kwargs
  46. ) -> tuple[BrowserSession, Tools]:
  47. """
  48. 初始化全局浏览器会话
  49. Args:
  50. headless: 是否无头模式
  51. user_data_dir: 用户数据目录(用于保存登录状态)
  52. profile_name: 配置文件名称
  53. browser_profile: BrowserProfile 对象(用于预设 cookies 等)
  54. use_cloud: 是否使用云浏览器(默认 False,使用本地浏览器)
  55. **kwargs: 其他 BrowserSession 参数
  56. Returns:
  57. (BrowserSession, Tools) 元组
  58. """
  59. global _browser_session, _browser_tools, _file_system
  60. if _browser_session is not None:
  61. return _browser_session, _browser_tools
  62. # 设置用户数据目录(持久化登录状态)
  63. if user_data_dir is None and profile_name and not use_cloud:
  64. user_data_dir = str(Path.home() / ".browser_use" / "profiles" / profile_name)
  65. Path(user_data_dir).mkdir(parents=True, exist_ok=True)
  66. # 创建浏览器会话
  67. session_params = {
  68. "headless": headless,
  69. }
  70. if use_cloud:
  71. # 云浏览器模式
  72. session_params["use_cloud"] = True
  73. print("🌐 使用云浏览器模式")
  74. else:
  75. # 本地浏览器模式
  76. session_params["is_local"] = True
  77. # macOS 上显式指定 Chrome 路径
  78. import platform
  79. if platform.system() == "Darwin": # macOS
  80. chrome_path = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
  81. if Path(chrome_path).exists():
  82. session_params["executable_path"] = chrome_path
  83. # 只在有值时才添加 user_data_dir
  84. if user_data_dir:
  85. session_params["user_data_dir"] = user_data_dir
  86. # 只在有值时才添加 browser_profile
  87. if browser_profile:
  88. session_params["browser_profile"] = browser_profile
  89. # 合并其他参数
  90. session_params.update(kwargs)
  91. _browser_session = BrowserSession(**session_params)
  92. # 启动浏览器
  93. await _browser_session.start()
  94. # 创建工具实例
  95. _browser_tools = Tools()
  96. # 创建文件系统实例(用于文件操作)
  97. base_dir = Path.cwd() / ".browser_use_files"
  98. base_dir.mkdir(parents=True, exist_ok=True)
  99. _file_system = FileSystem(base_dir=str(base_dir))
  100. return _browser_session, _browser_tools
  101. async def get_browser_session() -> tuple[BrowserSession, Tools]:
  102. """
  103. 获取当前浏览器会话,如果不存在则自动创建
  104. Returns:
  105. (BrowserSession, Tools) 元组
  106. """
  107. global _browser_session, _browser_tools
  108. if _browser_session is None:
  109. await init_browser_session()
  110. return _browser_session, _browser_tools
  111. async def cleanup_browser_session():
  112. """
  113. 清理浏览器会话
  114. 优雅地停止浏览器但保留会话状态
  115. """
  116. global _browser_session, _browser_tools, _file_system
  117. if _browser_session is not None:
  118. await _browser_session.stop()
  119. _browser_session = None
  120. _browser_tools = None
  121. _file_system = None
  122. async def kill_browser_session():
  123. """
  124. 强制终止浏览器会话
  125. 完全关闭浏览器进程
  126. """
  127. global _browser_session, _browser_tools, _file_system
  128. if _browser_session is not None:
  129. await _browser_session.kill()
  130. _browser_session = None
  131. _browser_tools = None
  132. _file_system = None
  133. # ============================================================
  134. # 辅助函数:ActionResult 转 ToolResult
  135. # ============================================================
  136. def action_result_to_tool_result(result: ActionResult, title: str = None) -> ToolResult:
  137. """
  138. 将 browser-use 的 ActionResult 转换为框架的 ToolResult
  139. Args:
  140. result: browser-use 的 ActionResult
  141. title: 可选的标题(如果不提供则从 result 推断)
  142. Returns:
  143. ToolResult
  144. """
  145. if result.error:
  146. return ToolResult(
  147. title=title or "操作失败",
  148. output="",
  149. error=result.error,
  150. long_term_memory=result.long_term_memory or result.error
  151. )
  152. return ToolResult(
  153. title=title or "操作成功",
  154. output=result.extracted_content or "",
  155. long_term_memory=result.long_term_memory or result.extracted_content or "",
  156. metadata=result.metadata or {}
  157. )
  158. def _cookie_domain_for_type(cookie_type: str, url: str) -> Tuple[str, str]:
  159. if cookie_type:
  160. key = cookie_type.lower()
  161. if key in {"xiaohongshu", "xhs"}:
  162. return ".xiaohongshu.com", "https://www.xiaohongshu.com"
  163. parsed = urlparse(url or "")
  164. domain = parsed.netloc or ""
  165. domain = domain.replace("www.", "")
  166. if domain:
  167. domain = f".{domain}"
  168. base_url = f"{parsed.scheme}://{parsed.netloc}" if parsed.scheme and parsed.netloc else url
  169. return domain, base_url
  170. def _parse_cookie_string(cookie_str: str, domain: str, url: str) -> List[Dict[str, Any]]:
  171. cookies: List[Dict[str, Any]] = []
  172. if not cookie_str:
  173. return cookies
  174. parts = cookie_str.split(";")
  175. for part in parts:
  176. if not part:
  177. continue
  178. if "=" not in part:
  179. continue
  180. name, value = part.split("=", 1)
  181. cookie = {
  182. "name": str(name).strip(),
  183. "value": str(value).strip(),
  184. "domain": domain,
  185. "path": "/",
  186. "expires": -1,
  187. "httpOnly": False,
  188. "secure": True,
  189. "sameSite": "None"
  190. }
  191. if url:
  192. cookie["url"] = url
  193. cookies.append(cookie)
  194. return cookies
  195. def _normalize_cookies(cookie_value: Any, domain: str, url: str) -> List[Dict[str, Any]]:
  196. if cookie_value is None:
  197. return []
  198. if isinstance(cookie_value, list):
  199. return cookie_value
  200. if isinstance(cookie_value, dict):
  201. if "cookies" in cookie_value:
  202. return _normalize_cookies(cookie_value.get("cookies"), domain, url)
  203. if "name" in cookie_value and "value" in cookie_value:
  204. return [cookie_value]
  205. return []
  206. if isinstance(cookie_value, (bytes, bytearray)):
  207. cookie_value = cookie_value.decode("utf-8", errors="ignore")
  208. if isinstance(cookie_value, str):
  209. text = cookie_value.strip()
  210. if not text:
  211. return []
  212. try:
  213. parsed = json.loads(text)
  214. except Exception:
  215. parsed = None
  216. if parsed is not None:
  217. return _normalize_cookies(parsed, domain, url)
  218. return _parse_cookie_string(text, domain, url)
  219. return []
  220. def _extract_cookie_value(row: Optional[Dict[str, Any]]) -> Any:
  221. if not row:
  222. return None
  223. # 优先使用 cookies 字段
  224. if "cookies" in row:
  225. return row["cookies"]
  226. # 兼容其他可能的字段名
  227. for key, value in row.items():
  228. if "cookie" in key.lower():
  229. return value
  230. return None
  231. def _fetch_cookie_row(cookie_type: str) -> Optional[Dict[str, Any]]:
  232. if not cookie_type:
  233. return None
  234. try:
  235. return mysql.fetchone(
  236. "select * from agent_channel_cookies where type=%s limit 1",
  237. (cookie_type,)
  238. )
  239. except Exception:
  240. return None
  241. def _fetch_profile_id(cookie_type: str) -> Optional[str]:
  242. """从数据库获取 cloud_profile_id"""
  243. if not cookie_type:
  244. return None
  245. try:
  246. row = mysql.fetchone(
  247. "select profileId from agent_channel_cookies where type=%s limit 1",
  248. (cookie_type,)
  249. )
  250. if row and "profileId" in row:
  251. return row["profileId"]
  252. return None
  253. except Exception:
  254. return None
  255. # ============================================================
  256. # 导航类工具 (Navigation Tools)
  257. # ============================================================
  258. @tool()
  259. async def navigate_to_url(url: str, new_tab: bool = False, uid: str = "") -> ToolResult:
  260. """
  261. 导航到指定的 URL
  262. Navigate to a specific URL
  263. 使用 browser-use 的原生导航功能,支持在新标签页打开。
  264. Args:
  265. url: 要访问的 URL 地址
  266. new_tab: 是否在新标签页中打开(默认 False)
  267. uid: 用户 ID(由框架自动注入)
  268. Returns:
  269. ToolResult: 包含导航结果的工具返回对象
  270. Example:
  271. navigate_to_url("https://www.baidu.com")
  272. navigate_to_url("https://www.google.com", new_tab=True)
  273. """
  274. try:
  275. browser, tools = await get_browser_session()
  276. # 使用 browser-use 的 navigate 工具
  277. result = await tools.navigate(
  278. url=url,
  279. new_tab=new_tab,
  280. browser_session=browser
  281. )
  282. return action_result_to_tool_result(result, f"导航到 {url}")
  283. except Exception as e:
  284. return ToolResult(
  285. title="导航失败",
  286. output="",
  287. error=f"Failed to navigate to {url}: {str(e)}",
  288. long_term_memory=f"导航到 {url} 失败"
  289. )
  290. @tool()
  291. async def search_web(query: str, engine: str = "google", uid: str = "") -> ToolResult:
  292. """
  293. 使用搜索引擎搜索
  294. Search the web using a search engine
  295. Args:
  296. query: 搜索关键词
  297. engine: 搜索引擎 (google, duckduckgo, bing) - 默认: google
  298. uid: 用户 ID(由框架自动注入)
  299. Returns:
  300. ToolResult: 搜索结果
  301. Example:
  302. search_web("Python async programming", engine="google")
  303. """
  304. try:
  305. browser, tools = await get_browser_session()
  306. # 使用 browser-use 的 search 工具
  307. result = await tools.search(
  308. query=query,
  309. engine=engine,
  310. browser_session=browser
  311. )
  312. return action_result_to_tool_result(result, f"搜索: {query}")
  313. except Exception as e:
  314. return ToolResult(
  315. title="搜索失败",
  316. output="",
  317. error=f"Search failed: {str(e)}",
  318. long_term_memory=f"搜索 '{query}' 失败"
  319. )
  320. @tool()
  321. async def go_back(uid: str = "") -> ToolResult:
  322. """
  323. 返回到上一个页面
  324. Go back to the previous page
  325. 模拟浏览器的"后退"按钮功能。
  326. Args:
  327. uid: 用户 ID(由框架自动注入)
  328. Returns:
  329. ToolResult: 包含返回操作结果的工具返回对象
  330. """
  331. try:
  332. browser, tools = await get_browser_session()
  333. result = await tools.go_back(browser_session=browser)
  334. return action_result_to_tool_result(result, "返回上一页")
  335. except Exception as e:
  336. return ToolResult(
  337. title="返回失败",
  338. output="",
  339. error=f"Failed to go back: {str(e)}",
  340. long_term_memory="返回上一页失败"
  341. )
  342. @tool()
  343. async def wait(seconds: int = 3, uid: str = "") -> ToolResult:
  344. """
  345. 等待指定的秒数
  346. Wait for a specified number of seconds
  347. 用于等待页面加载、动画完成或其他异步操作。
  348. Args:
  349. seconds: 等待时间(秒),最大30秒
  350. uid: 用户 ID(由框架自动注入)
  351. Returns:
  352. ToolResult: 包含等待操作结果的工具返回对象
  353. Example:
  354. wait(5) # 等待5秒
  355. """
  356. try:
  357. browser, tools = await get_browser_session()
  358. result = await tools.wait(seconds=seconds, browser_session=browser)
  359. return action_result_to_tool_result(result, f"等待 {seconds} 秒")
  360. except Exception as e:
  361. return ToolResult(
  362. title="等待失败",
  363. output="",
  364. error=f"Failed to wait: {str(e)}",
  365. long_term_memory="等待失败"
  366. )
  367. # ============================================================
  368. # 元素交互工具 (Element Interaction Tools)
  369. # ============================================================
  370. @tool()
  371. async def click_element(index: int, uid: str = "") -> ToolResult:
  372. """
  373. 通过索引点击页面元素
  374. Click an element by index
  375. Args:
  376. index: 元素索引(从浏览器状态中获取)
  377. uid: 用户 ID(由框架自动注入)
  378. Returns:
  379. ToolResult: 包含点击操作结果的工具返回对象
  380. Example:
  381. click_element(index=5)
  382. Note:
  383. 需要先通过 get_selector_map 获取页面元素索引
  384. """
  385. try:
  386. browser, tools = await get_browser_session()
  387. result = await tools.click(
  388. index=index,
  389. browser_session=browser
  390. )
  391. return action_result_to_tool_result(result, f"点击元素 {index}")
  392. except Exception as e:
  393. return ToolResult(
  394. title="点击失败",
  395. output="",
  396. error=f"Failed to click element {index}: {str(e)}",
  397. long_term_memory=f"点击元素 {index} 失败"
  398. )
  399. @tool()
  400. async def input_text(index: int, text: str, clear: bool = True, uid: str = "") -> ToolResult:
  401. """
  402. 在指定元素中输入文本
  403. Input text into an element
  404. Args:
  405. index: 元素索引(从浏览器状态中获取)
  406. text: 要输入的文本内容
  407. clear: 是否先清除现有文本(默认 True)
  408. uid: 用户 ID(由框架自动注入)
  409. Returns:
  410. ToolResult: 包含输入操作结果的工具返回对象
  411. Example:
  412. input_text(index=0, text="Hello World", clear=True)
  413. """
  414. try:
  415. browser, tools = await get_browser_session()
  416. result = await tools.input(
  417. index=index,
  418. text=text,
  419. clear=clear,
  420. browser_session=browser
  421. )
  422. return action_result_to_tool_result(result, f"输入文本到元素 {index}")
  423. except Exception as e:
  424. return ToolResult(
  425. title="输入失败",
  426. output="",
  427. error=f"Failed to input text into element {index}: {str(e)}",
  428. long_term_memory=f"输入文本失败"
  429. )
  430. @tool()
  431. async def send_keys(keys: str, uid: str = "") -> ToolResult:
  432. """
  433. 发送键盘按键或快捷键
  434. Send keyboard keys or shortcuts
  435. 支持发送单个按键、组合键和快捷键。
  436. Args:
  437. keys: 要发送的按键字符串
  438. - 单个按键: "Enter", "Escape", "PageDown", "Tab"
  439. - 组合键: "Control+o", "Shift+Tab", "Alt+F4"
  440. - 功能键: "F1", "F2", ..., "F12"
  441. uid: 用户 ID(由框架自动注入)
  442. Returns:
  443. ToolResult: 包含按键操作结果的工具返回对象
  444. Example:
  445. send_keys("Enter")
  446. send_keys("Control+A")
  447. """
  448. try:
  449. browser, tools = await get_browser_session()
  450. result = await tools.send_keys(
  451. keys=keys,
  452. browser_session=browser
  453. )
  454. return action_result_to_tool_result(result, f"发送按键: {keys}")
  455. except Exception as e:
  456. return ToolResult(
  457. title="发送按键失败",
  458. output="",
  459. error=f"Failed to send keys: {str(e)}",
  460. long_term_memory="发送按键失败"
  461. )
  462. @tool()
  463. async def upload_file(index: int, path: str, uid: str = "") -> ToolResult:
  464. """
  465. 上传文件到文件输入元素
  466. Upload a file to a file input element
  467. Args:
  468. index: 文件输入框的元素索引
  469. path: 要上传的文件路径(绝对路径)
  470. uid: 用户 ID(由框架自动注入)
  471. Returns:
  472. ToolResult: 包含上传操作结果的工具返回对象
  473. Example:
  474. upload_file(index=7, path="/path/to/file.pdf")
  475. Note:
  476. 文件必须存在且路径必须是绝对路径
  477. """
  478. try:
  479. browser, tools = await get_browser_session()
  480. result = await tools.upload_file(
  481. index=index,
  482. path=path,
  483. browser_session=browser,
  484. available_file_paths=[path],
  485. file_system=_file_system
  486. )
  487. return action_result_to_tool_result(result, f"上传文件: {path}")
  488. except Exception as e:
  489. return ToolResult(
  490. title="上传失败",
  491. output="",
  492. error=f"Failed to upload file: {str(e)}",
  493. long_term_memory=f"上传文件 {path} 失败"
  494. )
  495. # ============================================================
  496. # 滚动和视图工具 (Scroll & View Tools)
  497. # ============================================================
  498. @tool()
  499. async def scroll_page(down: bool = True, pages: float = 1.0,
  500. index: Optional[int] = None, uid: str = "") -> ToolResult:
  501. """
  502. 滚动页面或元素
  503. Scroll the page or a specific element
  504. Args:
  505. down: True 向下滚动,False 向上滚动
  506. pages: 滚动页数(0.5=半页,1=全页,10=滚动到底部/顶部)
  507. index: 可选,滚动特定元素(如下拉框内部)
  508. uid: 用户 ID(由框架自动注入)
  509. Returns:
  510. ToolResult: 滚动结果
  511. Example:
  512. scroll_page(down=True, pages=2.0) # 向下滚动2页
  513. scroll_page(down=False, pages=1.0) # 向上滚动1页
  514. """
  515. try:
  516. browser, tools = await get_browser_session()
  517. result = await tools.scroll(
  518. down=down,
  519. pages=pages,
  520. index=index,
  521. browser_session=browser
  522. )
  523. direction = "向下" if down else "向上"
  524. return action_result_to_tool_result(result, f"{direction}滚动 {pages} 页")
  525. except Exception as e:
  526. return ToolResult(
  527. title="滚动失败",
  528. output="",
  529. error=f"Failed to scroll: {str(e)}",
  530. long_term_memory="滚动失败"
  531. )
  532. @tool()
  533. async def find_text(text: str, uid: str = "") -> ToolResult:
  534. """
  535. 查找页面中的文本并滚动到该位置
  536. Find text on the page and scroll to it
  537. 在页面中搜索指定的文本,找到后自动滚动到该位置。
  538. Args:
  539. text: 要查找的文本内容
  540. uid: 用户 ID(由框架自动注入)
  541. Returns:
  542. ToolResult: 包含查找结果的工具返回对象
  543. Example:
  544. find_text("Privacy Policy")
  545. """
  546. try:
  547. browser, tools = await get_browser_session()
  548. result = await tools.find_text(
  549. text=text,
  550. browser_session=browser
  551. )
  552. return action_result_to_tool_result(result, f"查找文本: {text}")
  553. except Exception as e:
  554. return ToolResult(
  555. title="查找失败",
  556. output="",
  557. error=f"Failed to find text: {str(e)}",
  558. long_term_memory=f"查找文本 '{text}' 失败"
  559. )
  560. @tool()
  561. async def screenshot(uid: str = "") -> ToolResult:
  562. """
  563. 请求在下次观察中包含页面截图
  564. Request a screenshot to be included in the next observation
  565. 用于视觉检查页面状态,帮助理解页面布局和内容。
  566. Args:
  567. uid: 用户 ID(由框架自动注入)
  568. Returns:
  569. ToolResult: 包含截图请求结果的工具返回对象
  570. Example:
  571. screenshot()
  572. Note:
  573. 截图会在下次页面观察时自动包含在结果中。
  574. """
  575. try:
  576. browser, tools = await get_browser_session()
  577. result = await tools.screenshot(browser_session=browser)
  578. return action_result_to_tool_result(result, "截图请求")
  579. except Exception as e:
  580. return ToolResult(
  581. title="截图失败",
  582. output="",
  583. error=f"Failed to capture screenshot: {str(e)}",
  584. long_term_memory="截图失败"
  585. )
  586. # ============================================================
  587. # 标签页管理工具 (Tab Management Tools)
  588. # ============================================================
  589. @tool()
  590. async def switch_tab(tab_id: str, uid: str = "") -> ToolResult:
  591. """
  592. 切换到指定标签页
  593. Switch to a different browser tab
  594. Args:
  595. tab_id: 4字符标签ID(target_id 的最后4位)
  596. uid: 用户 ID(由框架自动注入)
  597. Returns:
  598. ToolResult: 切换结果
  599. Example:
  600. switch_tab(tab_id="a3f2")
  601. """
  602. try:
  603. browser, tools = await get_browser_session()
  604. result = await tools.switch(
  605. tab_id=tab_id,
  606. browser_session=browser
  607. )
  608. return action_result_to_tool_result(result, f"切换到标签页 {tab_id}")
  609. except Exception as e:
  610. return ToolResult(
  611. title="切换标签页失败",
  612. output="",
  613. error=f"Failed to switch tab: {str(e)}",
  614. long_term_memory=f"切换到标签页 {tab_id} 失败"
  615. )
  616. @tool()
  617. async def close_tab(tab_id: str, uid: str = "") -> ToolResult:
  618. """
  619. 关闭指定标签页
  620. Close a browser tab
  621. Args:
  622. tab_id: 4字符标签ID
  623. uid: 用户 ID(由框架自动注入)
  624. Returns:
  625. ToolResult: 关闭结果
  626. Example:
  627. close_tab(tab_id="a3f2")
  628. """
  629. try:
  630. browser, tools = await get_browser_session()
  631. result = await tools.close(
  632. tab_id=tab_id,
  633. browser_session=browser
  634. )
  635. return action_result_to_tool_result(result, f"关闭标签页 {tab_id}")
  636. except Exception as e:
  637. return ToolResult(
  638. title="关闭标签页失败",
  639. output="",
  640. error=f"Failed to close tab: {str(e)}",
  641. long_term_memory=f"关闭标签页 {tab_id} 失败"
  642. )
  643. # ============================================================
  644. # 下拉框工具 (Dropdown Tools)
  645. # ============================================================
  646. @tool()
  647. async def get_dropdown_options(index: int, uid: str = "") -> ToolResult:
  648. """
  649. 获取下拉框的所有选项
  650. Get options from a dropdown element
  651. Args:
  652. index: 下拉框的元素索引
  653. uid: 用户 ID(由框架自动注入)
  654. Returns:
  655. ToolResult: 包含所有选项的结果
  656. Example:
  657. get_dropdown_options(index=8)
  658. """
  659. try:
  660. browser, tools = await get_browser_session()
  661. result = await tools.dropdown_options(
  662. index=index,
  663. browser_session=browser
  664. )
  665. return action_result_to_tool_result(result, f"获取下拉框选项: {index}")
  666. except Exception as e:
  667. return ToolResult(
  668. title="获取下拉框选项失败",
  669. output="",
  670. error=f"Failed to get dropdown options: {str(e)}",
  671. long_term_memory=f"获取下拉框 {index} 选项失败"
  672. )
  673. @tool()
  674. async def select_dropdown_option(index: int, text: str, uid: str = "") -> ToolResult:
  675. """
  676. 选择下拉框选项
  677. Select an option from a dropdown
  678. Args:
  679. index: 下拉框的元素索引
  680. text: 要选择的选项文本(精确匹配)
  681. uid: 用户 ID(由框架自动注入)
  682. Returns:
  683. ToolResult: 选择结果
  684. Example:
  685. select_dropdown_option(index=8, text="Option 2")
  686. """
  687. try:
  688. browser, tools = await get_browser_session()
  689. result = await tools.select_dropdown(
  690. index=index,
  691. text=text,
  692. browser_session=browser
  693. )
  694. return action_result_to_tool_result(result, f"选择下拉框选项: {text}")
  695. except Exception as e:
  696. return ToolResult(
  697. title="选择下拉框选项失败",
  698. output="",
  699. error=f"Failed to select dropdown option: {str(e)}",
  700. long_term_memory=f"选择选项 '{text}' 失败"
  701. )
  702. # ============================================================
  703. # 内容提取工具 (Content Extraction Tools)
  704. # ============================================================
  705. @tool()
  706. async def extract_content(query: str, extract_links: bool = False,
  707. start_from_char: int = 0, uid: str = "") -> ToolResult:
  708. """
  709. 使用 LLM 从页面提取结构化数据
  710. Extract content from the current page using LLM
  711. Args:
  712. query: 提取查询(告诉 LLM 要提取什么内容)
  713. extract_links: 是否提取链接(默认 False,节省 token)
  714. start_from_char: 从哪个字符开始提取(用于分页提取大内容)
  715. uid: 用户 ID(由框架自动注入)
  716. Returns:
  717. ToolResult: 提取的内容
  718. Example:
  719. extract_content(query="提取页面上所有产品的名称和价格", extract_links=True)
  720. Note:
  721. 需要配置 page_extraction_llm,否则会失败
  722. 支持分页提取,最大100k字符
  723. """
  724. try:
  725. browser, tools = await get_browser_session()
  726. # 注意:extract 需要 page_extraction_llm 参数
  727. # 这里我们假设用户会在初始化时配置 LLM
  728. # 如果没有配置,会抛出异常
  729. result = await tools.extract(
  730. query=query,
  731. extract_links=extract_links,
  732. start_from_char=start_from_char,
  733. browser_session=browser,
  734. page_extraction_llm=None, # 需要用户配置
  735. file_system=_file_system
  736. )
  737. return action_result_to_tool_result(result, f"提取内容: {query}")
  738. except Exception as e:
  739. return ToolResult(
  740. title="内容提取失败",
  741. output="",
  742. error=f"Failed to extract content: {str(e)}",
  743. long_term_memory=f"提取内容失败: {query}"
  744. )
  745. @tool()
  746. async def get_page_html(uid: str = "") -> ToolResult:
  747. """
  748. 获取当前页面的完整 HTML
  749. Get the full HTML of the current page
  750. 返回当前页面的完整 HTML 源代码。
  751. Args:
  752. uid: 用户 ID(由框架自动注入)
  753. Returns:
  754. ToolResult: 包含页面 HTML 的工具返回对象
  755. Example:
  756. get_page_html()
  757. Note:
  758. - 返回的是完整的 HTML 源代码
  759. - 输出会被限制在 10000 字符以内(完整内容保存在 metadata 中)
  760. """
  761. try:
  762. browser, tools = await get_browser_session()
  763. # 使用 CDP 获取页面 HTML
  764. cdp = await browser.get_or_create_cdp_session()
  765. # 获取页面内容
  766. result = await cdp.cdp_client.send.Runtime.evaluate(
  767. params={'expression': 'document.documentElement.outerHTML'},
  768. session_id=cdp.session_id
  769. )
  770. html = result.get('result', {}).get('value', '')
  771. # 获取 URL 和标题
  772. url = await browser.get_current_page_url()
  773. title_result = await cdp.cdp_client.send.Runtime.evaluate(
  774. params={'expression': 'document.title'},
  775. session_id=cdp.session_id
  776. )
  777. title = title_result.get('result', {}).get('value', '')
  778. # 限制输出大小
  779. output_html = html
  780. if len(html) > 10000:
  781. output_html = html[:10000] + "... (truncated)"
  782. return ToolResult(
  783. title=f"获取 HTML: {url}",
  784. output=f"页面: {title}\nURL: {url}\n\nHTML:\n{output_html}",
  785. long_term_memory=f"获取 HTML: {url}",
  786. metadata={"url": url, "title": title, "html": html}
  787. )
  788. except Exception as e:
  789. return ToolResult(
  790. title="获取 HTML 失败",
  791. output="",
  792. error=f"Failed to get page HTML: {str(e)}",
  793. long_term_memory="获取 HTML 失败"
  794. )
  795. @tool()
  796. async def get_selector_map(uid: str = "") -> ToolResult:
  797. """
  798. 获取当前页面的元素索引映射
  799. Get the selector map of interactive elements on the current page
  800. 返回页面所有可交互元素的索引字典,用于后续的元素操作。
  801. Args:
  802. uid: 用户 ID(由框架自动注入)
  803. Returns:
  804. ToolResult: 包含元素映射的工具返回对象
  805. Example:
  806. get_selector_map()
  807. Note:
  808. 返回的索引可以用于 click_element, input_text 等操作
  809. """
  810. try:
  811. browser, tools = await get_browser_session()
  812. # 获取选择器映射
  813. selector_map = await browser.get_selector_map()
  814. # 构建输出信息
  815. elements_info = []
  816. for index, node in list(selector_map.items())[:20]: # 只显示前20个
  817. tag = node.tag_name
  818. attrs = node.attributes or {}
  819. text = attrs.get('aria-label') or attrs.get('placeholder') or attrs.get('value', '')
  820. elements_info.append(f"索引 {index}: <{tag}> {text[:50]}")
  821. output = f"找到 {len(selector_map)} 个交互元素\n\n"
  822. output += "\n".join(elements_info)
  823. if len(selector_map) > 20:
  824. output += f"\n... 还有 {len(selector_map) - 20} 个元素"
  825. return ToolResult(
  826. title="获取元素映射",
  827. output=output,
  828. long_term_memory=f"获取到 {len(selector_map)} 个交互元素",
  829. metadata={"selector_map": {k: str(v) for k, v in list(selector_map.items())[:100]}}
  830. )
  831. except Exception as e:
  832. return ToolResult(
  833. title="获取元素映射失败",
  834. output="",
  835. error=f"Failed to get selector map: {str(e)}",
  836. long_term_memory="获取元素映射失败"
  837. )
  838. # ============================================================
  839. # JavaScript 执行工具 (JavaScript Tools)
  840. # ============================================================
  841. @tool()
  842. async def evaluate(code: str, uid: str = "") -> ToolResult:
  843. """
  844. 在页面中执行 JavaScript 代码
  845. Execute JavaScript code in the page context
  846. 允许在当前页面中执行任意 JavaScript 代码,用于复杂的页面操作或数据提取。
  847. Args:
  848. code: 要执行的 JavaScript 代码字符串
  849. uid: 用户 ID(由框架自动注入)
  850. Returns:
  851. ToolResult: 包含执行结果的工具返回对象
  852. Example:
  853. evaluate("document.title")
  854. evaluate("document.querySelectorAll('a').length")
  855. Note:
  856. - 代码在页面上下文中执行,可以访问 DOM 和全局变量
  857. - 返回值会被自动序列化为字符串
  858. - 执行结果限制在 20k 字符以内
  859. """
  860. try:
  861. browser, tools = await get_browser_session()
  862. result = await tools.evaluate(
  863. code=code,
  864. browser_session=browser
  865. )
  866. return action_result_to_tool_result(result, "执行 JavaScript")
  867. except Exception as e:
  868. return ToolResult(
  869. title="JavaScript 执行失败",
  870. output="",
  871. error=f"Failed to execute JavaScript: {str(e)}",
  872. long_term_memory="JavaScript 执行失败"
  873. )
  874. @tool()
  875. async def ensure_login_with_cookies(cookie_type: str, url: str = "https://www.xiaohongshu.com", uid: str = "") -> ToolResult:
  876. """
  877. 检查登录状态并在需要时注入 cookies
  878. """
  879. try:
  880. browser, tools = await get_browser_session()
  881. if url:
  882. await tools.navigate(url=url, browser_session=browser)
  883. await tools.wait(seconds=2, browser_session=browser)
  884. check_login_js = """
  885. (function() {
  886. const loginBtn = document.querySelector('[class*="login"]') ||
  887. document.querySelector('[href*="login"]') ||
  888. Array.from(document.querySelectorAll('button, a')).find(el => (el.textContent || '').includes('登录'));
  889. const userInfo = document.querySelector('[class*="user"]') ||
  890. document.querySelector('[class*="avatar"]');
  891. return {
  892. needLogin: !!loginBtn && !userInfo,
  893. hasLoginBtn: !!loginBtn,
  894. hasUserInfo: !!userInfo
  895. };
  896. })()
  897. """
  898. result = await tools.evaluate(code=check_login_js, browser_session=browser)
  899. status_output = result.extracted_content
  900. if isinstance(status_output, str) and status_output.startswith("Result: "):
  901. status_output = status_output[8:]
  902. login_info: Dict[str, Any] = {}
  903. if isinstance(status_output, str):
  904. try:
  905. login_info = json.loads(status_output)
  906. except Exception:
  907. login_info = {}
  908. elif isinstance(status_output, dict):
  909. login_info = status_output
  910. if not login_info.get("needLogin"):
  911. output = json.dumps({"need_login": False}, ensure_ascii=False)
  912. return ToolResult(
  913. title="已登录",
  914. output=output,
  915. long_term_memory=output
  916. )
  917. row = _fetch_cookie_row(cookie_type)
  918. cookie_value = _extract_cookie_value(row)
  919. if not cookie_value:
  920. output = json.dumps({"need_login": True, "cookies_count": 0}, ensure_ascii=False)
  921. return ToolResult(
  922. title="未找到 cookies",
  923. output=output,
  924. error="未找到 cookies",
  925. long_term_memory=output
  926. )
  927. domain, base_url = _cookie_domain_for_type(cookie_type, url)
  928. cookies = _normalize_cookies(cookie_value, domain, base_url)
  929. if not cookies:
  930. output = json.dumps({"need_login": True, "cookies_count": 0}, ensure_ascii=False)
  931. return ToolResult(
  932. title="cookies 解析失败",
  933. output=output,
  934. error="cookies 解析失败",
  935. long_term_memory=output
  936. )
  937. await browser._cdp_set_cookies(cookies)
  938. if url:
  939. await tools.navigate(url=url, browser_session=browser)
  940. await tools.wait(seconds=2, browser_session=browser)
  941. output = json.dumps({"need_login": True, "cookies_count": len(cookies)}, ensure_ascii=False)
  942. return ToolResult(
  943. title="已注入 cookies",
  944. output=output,
  945. long_term_memory=output
  946. )
  947. except Exception as e:
  948. return ToolResult(
  949. title="登录检查失败",
  950. output="",
  951. error=str(e),
  952. long_term_memory="登录检查失败"
  953. )
  954. # ============================================================
  955. # 文件系统工具 (File System Tools)
  956. # ============================================================
  957. @tool()
  958. async def write_file(file_name: str, content: str, append: bool = False, uid: str = "") -> ToolResult:
  959. """
  960. 写入文件到本地文件系统
  961. Write content to a local file
  962. 支持多种文件格式的写入操作。
  963. Args:
  964. file_name: 文件名(包含扩展名)
  965. content: 要写入的文件内容
  966. append: 是否追加模式(默认 False,覆盖写入)
  967. uid: 用户 ID(由框架自动注入)
  968. Returns:
  969. ToolResult: 包含写入结果的工具返回对象
  970. Example:
  971. write_file("output.txt", "Hello World")
  972. write_file("data.json", '{"key": "value"}')
  973. Note:
  974. 支持的文件格式: .txt, .md, .json, .jsonl, .csv, .pdf
  975. """
  976. try:
  977. browser, tools = await get_browser_session()
  978. result = await tools.write_file(
  979. file_name=file_name,
  980. content=content,
  981. append=append,
  982. file_system=_file_system
  983. )
  984. return action_result_to_tool_result(result, f"写入文件: {file_name}")
  985. except Exception as e:
  986. return ToolResult(
  987. title="写入文件失败",
  988. output="",
  989. error=f"Failed to write file: {str(e)}",
  990. long_term_memory=f"写入文件 {file_name} 失败"
  991. )
  992. @tool()
  993. async def read_file(file_name: str, uid: str = "") -> ToolResult:
  994. """
  995. 读取文件内容
  996. Read content from a local file
  997. 支持多种文件格式的读取操作。
  998. Args:
  999. file_name: 文件名(包含扩展名)
  1000. uid: 用户 ID(由框架自动注入)
  1001. Returns:
  1002. ToolResult: 包含文件内容的工具返回对象
  1003. Example:
  1004. read_file("input.txt")
  1005. read_file("data.json")
  1006. Note:
  1007. 支持的文件格式: 文本文件、PDF、DOCX、图片等
  1008. """
  1009. try:
  1010. browser, tools = await get_browser_session()
  1011. result = await tools.read_file(
  1012. file_name=file_name,
  1013. available_file_paths=[],
  1014. file_system=_file_system
  1015. )
  1016. return action_result_to_tool_result(result, f"读取文件: {file_name}")
  1017. except Exception as e:
  1018. return ToolResult(
  1019. title="读取文件失败",
  1020. output="",
  1021. error=f"Failed to read file: {str(e)}",
  1022. long_term_memory=f"读取文件 {file_name} 失败"
  1023. )
  1024. @tool()
  1025. async def replace_file(file_name: str, old_str: str, new_str: str, uid: str = "") -> ToolResult:
  1026. """
  1027. 替换文件中的特定文本
  1028. Replace specific text in a file
  1029. 在文件中查找并替换指定的文本内容。
  1030. Args:
  1031. file_name: 文件名(包含扩展名)
  1032. old_str: 要替换的文本
  1033. new_str: 新文本
  1034. uid: 用户 ID(由框架自动注入)
  1035. Returns:
  1036. ToolResult: 包含替换结果的工具返回对象
  1037. Example:
  1038. replace_file("config.txt", "old_value", "new_value")
  1039. Note:
  1040. - 会替换文件中所有匹配的文本
  1041. - 如果找不到要替换的文本,会返回警告
  1042. """
  1043. try:
  1044. browser, tools = await get_browser_session()
  1045. result = await tools.replace_file(
  1046. file_name=file_name,
  1047. old_str=old_str,
  1048. new_str=new_str,
  1049. file_system=_file_system
  1050. )
  1051. return action_result_to_tool_result(result, f"替换文件内容: {file_name}")
  1052. except Exception as e:
  1053. return ToolResult(
  1054. title="替换文件失败",
  1055. output="",
  1056. error=f"Failed to replace file content: {str(e)}",
  1057. long_term_memory=f"替换文件 {file_name} 失败"
  1058. )
  1059. # ============================================================
  1060. # 等待用户操作工具 (Wait for User Action)
  1061. # ============================================================
  1062. @tool()
  1063. async def wait_for_user_action(message: str = "Please complete the action in browser",
  1064. timeout: int = 300, uid: str = "") -> ToolResult:
  1065. """
  1066. 等待用户在浏览器中完成操作(如登录)
  1067. Wait for user to complete an action in the browser (e.g., login)
  1068. 暂停自动化流程,等待用户手动完成某些操作(如登录、验证码等)。
  1069. Args:
  1070. message: 提示用户需要完成的操作
  1071. timeout: 最大等待时间(秒),默认 300 秒(5 分钟)
  1072. uid: 用户 ID(由框架自动注入)
  1073. Returns:
  1074. ToolResult: 包含等待结果的工具返回对象
  1075. Example:
  1076. wait_for_user_action("Please login to Xiaohongshu", timeout=180)
  1077. wait_for_user_action("Please complete the CAPTCHA", timeout=60)
  1078. Note:
  1079. - 用户需要在浏览器窗口中手动完成操作
  1080. - 完成后按回车键继续
  1081. - 超时后会自动继续执行
  1082. """
  1083. try:
  1084. import asyncio
  1085. print(f"\n{'='*60}")
  1086. print(f"⏸️ WAITING FOR USER ACTION")
  1087. print(f"{'='*60}")
  1088. print(f"📝 {message}")
  1089. print(f"⏱️ Timeout: {timeout} seconds")
  1090. print(f"\n👉 Please complete the action in the browser window")
  1091. print(f"👉 Press ENTER when done, or wait for timeout")
  1092. print(f"{'='*60}\n")
  1093. # Wait for user input or timeout
  1094. try:
  1095. loop = asyncio.get_event_loop()
  1096. # Wait for either user input or timeout
  1097. await asyncio.wait_for(
  1098. loop.run_in_executor(None, input),
  1099. timeout=timeout
  1100. )
  1101. return ToolResult(
  1102. title="用户操作完成",
  1103. output=f"User completed: {message}",
  1104. long_term_memory=f"用户完成操作: {message}"
  1105. )
  1106. except asyncio.TimeoutError:
  1107. return ToolResult(
  1108. title="用户操作超时",
  1109. output=f"Timeout waiting for: {message}",
  1110. long_term_memory=f"等待用户操作超时: {message}"
  1111. )
  1112. except Exception as e:
  1113. return ToolResult(
  1114. title="等待用户操作失败",
  1115. output="",
  1116. error=f"Failed to wait for user action: {str(e)}",
  1117. long_term_memory="等待用户操作失败"
  1118. )
  1119. # ============================================================
  1120. # 任务完成工具 (Task Completion)
  1121. # ============================================================
  1122. @tool()
  1123. async def done(text: str, success: bool = True,
  1124. files_to_display: Optional[List[str]] = None, uid: str = "") -> ToolResult:
  1125. """
  1126. 标记任务完成并返回最终消息
  1127. Mark the task as complete and return final message to user
  1128. Args:
  1129. text: 给用户的最终消息
  1130. success: 任务是否成功完成
  1131. files_to_display: 可选的要显示的文件路径列表
  1132. uid: 用户 ID(由框架自动注入)
  1133. Returns:
  1134. ToolResult: 完成结果
  1135. Example:
  1136. done("任务已完成,提取了10个产品信息", success=True)
  1137. """
  1138. try:
  1139. browser, tools = await get_browser_session()
  1140. result = await tools.done(
  1141. text=text,
  1142. success=success,
  1143. files_to_display=files_to_display,
  1144. file_system=_file_system
  1145. )
  1146. return action_result_to_tool_result(result, "任务完成")
  1147. except Exception as e:
  1148. return ToolResult(
  1149. title="标记任务完成失败",
  1150. output="",
  1151. error=f"Failed to complete task: {str(e)}",
  1152. long_term_memory="标记任务完成失败"
  1153. )
  1154. # ============================================================
  1155. # 导出所有工具函数(供外部使用)
  1156. # ============================================================
  1157. __all__ = [
  1158. # 会话管理
  1159. 'init_browser_session',
  1160. 'get_browser_session',
  1161. 'cleanup_browser_session',
  1162. 'kill_browser_session',
  1163. # 导航类工具
  1164. 'navigate_to_url',
  1165. 'search_web',
  1166. 'go_back',
  1167. 'wait',
  1168. # 元素交互工具
  1169. 'click_element',
  1170. 'input_text',
  1171. 'send_keys',
  1172. 'upload_file',
  1173. # 滚动和视图工具
  1174. 'scroll_page',
  1175. 'find_text',
  1176. 'screenshot',
  1177. # 标签页管理工具
  1178. 'switch_tab',
  1179. 'close_tab',
  1180. # 下拉框工具
  1181. 'get_dropdown_options',
  1182. 'select_dropdown_option',
  1183. # 内容提取工具
  1184. 'extract_content',
  1185. 'get_page_html',
  1186. 'get_selector_map',
  1187. # JavaScript 执行工具
  1188. 'evaluate',
  1189. 'ensure_login_with_cookies',
  1190. # 文件系统工具
  1191. 'write_file',
  1192. 'read_file',
  1193. 'replace_file',
  1194. # 等待用户操作
  1195. 'wait_for_user_action',
  1196. # 任务完成
  1197. 'done',
  1198. ]