baseClass.py 49 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650
  1. """
  2. Browser-Use 原生工具适配器
  3. Native Browser-Use Tools Adapter
  4. 直接使用 browser-use 的原生类(BrowserSession, Tools)实现所有浏览器操作工具。
  5. 不依赖 Playwright,完全基于 CDP 协议。
  6. 核心特性:
  7. 1. 浏览器会话持久化 - 只启动一次浏览器
  8. 2. 状态自动保持 - 登录状态、Cookie、LocalStorage 等
  9. 3. 完整的底层访问 - 可以直接使用 CDP 协议
  10. 4. 性能优异 - 避免频繁创建/销毁浏览器实例
  11. 使用方法:
  12. 1. 在 Agent 初始化时调用 init_browser_session()
  13. 2. 使用各个工具函数执行浏览器操作
  14. 3. 任务结束时调用 cleanup_browser_session()
  15. """
  16. import sys
  17. import os
  18. import json
  19. import asyncio
  20. from typing import Optional, List, Dict, Any, Tuple
  21. from pathlib import Path
  22. from urllib.parse import urlparse
  23. # 将项目根目录添加到 Python 路径
  24. sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  25. # 导入框架的工具装饰器和结果类
  26. from agent.tools import tool, ToolResult
  27. from agent.tools.builtin.browser.sync_mysql_help import mysql
  28. # 导入 browser-use 的核心类
  29. from browser_use import BrowserSession, BrowserProfile
  30. from browser_use.tools.service import Tools
  31. from browser_use.agent.views import ActionResult
  32. from browser_use.filesystem.file_system import FileSystem
  33. # ============================================================
  34. # 全局浏览器会话管理
  35. # ============================================================
  36. # 全局变量:浏览器会话和工具实例
  37. _browser_session: Optional[BrowserSession] = None
  38. _browser_tools: Optional[Tools] = None
  39. _file_system: Optional[FileSystem] = None
  40. async def init_browser_session(
  41. headless: bool = False,
  42. user_data_dir: Optional[str] = None,
  43. profile_name: str = "default",
  44. browser_profile: Optional[BrowserProfile] = None,
  45. use_cloud: bool = False,
  46. **kwargs
  47. ) -> tuple[BrowserSession, Tools]:
  48. """
  49. 初始化全局浏览器会话
  50. Args:
  51. headless: 是否无头模式
  52. user_data_dir: 用户数据目录(用于保存登录状态)
  53. profile_name: 配置文件名称
  54. browser_profile: BrowserProfile 对象(用于预设 cookies 等)
  55. use_cloud: 是否使用云浏览器(默认 False,使用本地浏览器)
  56. **kwargs: 其他 BrowserSession 参数
  57. Returns:
  58. (BrowserSession, Tools) 元组
  59. """
  60. global _browser_session, _browser_tools, _file_system
  61. if _browser_session is not None:
  62. return _browser_session, _browser_tools
  63. # 设置用户数据目录(持久化登录状态)
  64. if user_data_dir is None and profile_name and not use_cloud:
  65. user_data_dir = str(Path.home() / ".browser_use" / "profiles" / profile_name)
  66. Path(user_data_dir).mkdir(parents=True, exist_ok=True)
  67. # 创建浏览器会话
  68. session_params = {
  69. "headless": headless,
  70. }
  71. if use_cloud:
  72. # 云浏览器模式
  73. session_params["use_cloud"] = True
  74. print("🌐 使用云浏览器模式")
  75. else:
  76. # 本地浏览器模式
  77. session_params["is_local"] = True
  78. # macOS 上显式指定 Chrome 路径
  79. import platform
  80. if platform.system() == "Darwin": # macOS
  81. chrome_path = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
  82. if Path(chrome_path).exists():
  83. session_params["executable_path"] = chrome_path
  84. # 只在有值时才添加 user_data_dir
  85. if user_data_dir:
  86. session_params["user_data_dir"] = user_data_dir
  87. # 只在有值时才添加 browser_profile
  88. if browser_profile:
  89. session_params["browser_profile"] = browser_profile
  90. # 合并其他参数
  91. session_params.update(kwargs)
  92. _browser_session = BrowserSession(**session_params)
  93. # 启动浏览器
  94. await _browser_session.start()
  95. # 创建工具实例
  96. _browser_tools = Tools()
  97. # 创建文件系统实例(用于文件操作)
  98. base_dir = Path.cwd() / ".browser_use_files"
  99. base_dir.mkdir(parents=True, exist_ok=True)
  100. _file_system = FileSystem(base_dir=str(base_dir))
  101. return _browser_session, _browser_tools
  102. async def get_browser_session() -> tuple[BrowserSession, Tools]:
  103. """
  104. 获取当前浏览器会话,如果不存在则自动创建
  105. Returns:
  106. (BrowserSession, Tools) 元组
  107. """
  108. global _browser_session, _browser_tools
  109. if _browser_session is None:
  110. await init_browser_session()
  111. return _browser_session, _browser_tools
  112. async def cleanup_browser_session():
  113. """
  114. 清理浏览器会话
  115. 优雅地停止浏览器但保留会话状态
  116. """
  117. global _browser_session, _browser_tools, _file_system
  118. if _browser_session is not None:
  119. await _browser_session.stop()
  120. _browser_session = None
  121. _browser_tools = None
  122. _file_system = None
  123. async def kill_browser_session():
  124. """
  125. 强制终止浏览器会话
  126. 完全关闭浏览器进程
  127. """
  128. global _browser_session, _browser_tools, _file_system
  129. if _browser_session is not None:
  130. await _browser_session.kill()
  131. _browser_session = None
  132. _browser_tools = None
  133. _file_system = None
  134. # ============================================================
  135. # 辅助函数:ActionResult 转 ToolResult
  136. # ============================================================
  137. def action_result_to_tool_result(result: ActionResult, title: str = None) -> ToolResult:
  138. """
  139. 将 browser-use 的 ActionResult 转换为框架的 ToolResult
  140. Args:
  141. result: browser-use 的 ActionResult
  142. title: 可选的标题(如果不提供则从 result 推断)
  143. Returns:
  144. ToolResult
  145. """
  146. if result.error:
  147. return ToolResult(
  148. title=title or "操作失败",
  149. output="",
  150. error=result.error,
  151. long_term_memory=result.long_term_memory or result.error
  152. )
  153. return ToolResult(
  154. title=title or "操作成功",
  155. output=result.extracted_content or "",
  156. long_term_memory=result.long_term_memory or result.extracted_content or "",
  157. metadata=result.metadata or {}
  158. )
  159. def _cookie_domain_for_type(cookie_type: str, url: str) -> Tuple[str, str]:
  160. if cookie_type:
  161. key = cookie_type.lower()
  162. if key in {"xiaohongshu", "xhs"}:
  163. return ".xiaohongshu.com", "https://www.xiaohongshu.com"
  164. parsed = urlparse(url or "")
  165. domain = parsed.netloc or ""
  166. domain = domain.replace("www.", "")
  167. if domain:
  168. domain = f".{domain}"
  169. base_url = f"{parsed.scheme}://{parsed.netloc}" if parsed.scheme and parsed.netloc else url
  170. return domain, base_url
  171. def _parse_cookie_string(cookie_str: str, domain: str, url: str) -> List[Dict[str, Any]]:
  172. cookies: List[Dict[str, Any]] = []
  173. if not cookie_str:
  174. return cookies
  175. parts = cookie_str.split(";")
  176. for part in parts:
  177. if not part:
  178. continue
  179. if "=" not in part:
  180. continue
  181. name, value = part.split("=", 1)
  182. cookie = {
  183. "name": str(name).strip(),
  184. "value": str(value).strip(),
  185. "domain": domain,
  186. "path": "/",
  187. "expires": -1,
  188. "httpOnly": False,
  189. "secure": True,
  190. "sameSite": "None"
  191. }
  192. if url:
  193. cookie["url"] = url
  194. cookies.append(cookie)
  195. return cookies
  196. def _normalize_cookies(cookie_value: Any, domain: str, url: str) -> List[Dict[str, Any]]:
  197. if cookie_value is None:
  198. return []
  199. if isinstance(cookie_value, list):
  200. return cookie_value
  201. if isinstance(cookie_value, dict):
  202. if "cookies" in cookie_value:
  203. return _normalize_cookies(cookie_value.get("cookies"), domain, url)
  204. if "name" in cookie_value and "value" in cookie_value:
  205. return [cookie_value]
  206. return []
  207. if isinstance(cookie_value, (bytes, bytearray)):
  208. cookie_value = cookie_value.decode("utf-8", errors="ignore")
  209. if isinstance(cookie_value, str):
  210. text = cookie_value.strip()
  211. if not text:
  212. return []
  213. try:
  214. parsed = json.loads(text)
  215. except Exception:
  216. parsed = None
  217. if parsed is not None:
  218. return _normalize_cookies(parsed, domain, url)
  219. return _parse_cookie_string(text, domain, url)
  220. return []
  221. def _extract_cookie_value(row: Optional[Dict[str, Any]]) -> Any:
  222. if not row:
  223. return None
  224. # 优先使用 cookies 字段
  225. if "cookies" in row:
  226. return row["cookies"]
  227. # 兼容其他可能的字段名
  228. for key, value in row.items():
  229. if "cookie" in key.lower():
  230. return value
  231. return None
  232. def _fetch_cookie_row(cookie_type: str) -> Optional[Dict[str, Any]]:
  233. if not cookie_type:
  234. return None
  235. try:
  236. return mysql.fetchone(
  237. "select * from agent_channel_cookies where type=%s limit 1",
  238. (cookie_type,)
  239. )
  240. except Exception:
  241. return None
  242. def _fetch_profile_id(cookie_type: str) -> Optional[str]:
  243. """从数据库获取 cloud_profile_id"""
  244. if not cookie_type:
  245. return None
  246. try:
  247. row = mysql.fetchone(
  248. "select profileId from agent_channel_cookies where type=%s limit 1",
  249. (cookie_type,)
  250. )
  251. if row and "profileId" in row:
  252. return row["profileId"]
  253. return None
  254. except Exception:
  255. return None
  256. # ============================================================
  257. # 导航类工具 (Navigation Tools)
  258. # ============================================================
  259. @tool()
  260. async def navigate_to_url(url: str, new_tab: bool = False, uid: str = "") -> ToolResult:
  261. """
  262. 导航到指定的 URL
  263. Navigate to a specific URL
  264. 使用 browser-use 的原生导航功能,支持在新标签页打开。
  265. Args:
  266. url: 要访问的 URL 地址
  267. new_tab: 是否在新标签页中打开(默认 False)
  268. uid: 用户 ID(由框架自动注入)
  269. Returns:
  270. ToolResult: 包含导航结果的工具返回对象
  271. Example:
  272. navigate_to_url("https://www.baidu.com")
  273. navigate_to_url("https://www.google.com", new_tab=True)
  274. """
  275. try:
  276. browser, tools = await get_browser_session()
  277. # 使用 browser-use 的 navigate 工具
  278. result = await tools.navigate(
  279. url=url,
  280. new_tab=new_tab,
  281. browser_session=browser
  282. )
  283. return action_result_to_tool_result(result, f"导航到 {url}")
  284. except Exception as e:
  285. return ToolResult(
  286. title="导航失败",
  287. output="",
  288. error=f"Failed to navigate to {url}: {str(e)}",
  289. long_term_memory=f"导航到 {url} 失败"
  290. )
  291. @tool()
  292. async def search_web(query: str, engine: str = "google", uid: str = "") -> ToolResult:
  293. """
  294. 使用搜索引擎搜索
  295. Search the web using a search engine
  296. Args:
  297. query: 搜索关键词
  298. engine: 搜索引擎 (google, duckduckgo, bing) - 默认: google
  299. uid: 用户 ID(由框架自动注入)
  300. Returns:
  301. ToolResult: 搜索结果
  302. Example:
  303. search_web("Python async programming", engine="google")
  304. """
  305. try:
  306. browser, tools = await get_browser_session()
  307. # 使用 browser-use 的 search 工具
  308. result = await tools.search(
  309. query=query,
  310. engine=engine,
  311. browser_session=browser
  312. )
  313. return action_result_to_tool_result(result, f"搜索: {query}")
  314. except Exception as e:
  315. return ToolResult(
  316. title="搜索失败",
  317. output="",
  318. error=f"Search failed: {str(e)}",
  319. long_term_memory=f"搜索 '{query}' 失败"
  320. )
  321. @tool()
  322. async def go_back(uid: str = "") -> ToolResult:
  323. """
  324. 返回到上一个页面
  325. Go back to the previous page
  326. 模拟浏览器的"后退"按钮功能。
  327. Args:
  328. uid: 用户 ID(由框架自动注入)
  329. Returns:
  330. ToolResult: 包含返回操作结果的工具返回对象
  331. """
  332. try:
  333. browser, tools = await get_browser_session()
  334. result = await tools.go_back(browser_session=browser)
  335. return action_result_to_tool_result(result, "返回上一页")
  336. except Exception as e:
  337. return ToolResult(
  338. title="返回失败",
  339. output="",
  340. error=f"Failed to go back: {str(e)}",
  341. long_term_memory="返回上一页失败"
  342. )
  343. @tool()
  344. async def wait(seconds: int = 3, uid: str = "") -> ToolResult:
  345. """
  346. 等待指定的秒数
  347. Wait for a specified number of seconds
  348. 用于等待页面加载、动画完成或其他异步操作。
  349. Args:
  350. seconds: 等待时间(秒),最大30秒
  351. uid: 用户 ID(由框架自动注入)
  352. Returns:
  353. ToolResult: 包含等待操作结果的工具返回对象
  354. Example:
  355. wait(5) # 等待5秒
  356. """
  357. try:
  358. browser, tools = await get_browser_session()
  359. result = await tools.wait(seconds=seconds, browser_session=browser)
  360. return action_result_to_tool_result(result, f"等待 {seconds} 秒")
  361. except Exception as e:
  362. return ToolResult(
  363. title="等待失败",
  364. output="",
  365. error=f"Failed to wait: {str(e)}",
  366. long_term_memory="等待失败"
  367. )
  368. # ============================================================
  369. # 元素交互工具 (Element Interaction Tools)
  370. # ============================================================
  371. @tool()
  372. async def click_element(index: int, uid: str = "") -> ToolResult:
  373. """
  374. 通过索引点击页面元素
  375. Click an element by index
  376. Args:
  377. index: 元素索引(从浏览器状态中获取)
  378. uid: 用户 ID(由框架自动注入)
  379. Returns:
  380. ToolResult: 包含点击操作结果的工具返回对象
  381. Example:
  382. click_element(index=5)
  383. Note:
  384. 需要先通过 get_selector_map 获取页面元素索引
  385. """
  386. try:
  387. browser, tools = await get_browser_session()
  388. result = await tools.click(
  389. index=index,
  390. browser_session=browser
  391. )
  392. return action_result_to_tool_result(result, f"点击元素 {index}")
  393. except Exception as e:
  394. return ToolResult(
  395. title="点击失败",
  396. output="",
  397. error=f"Failed to click element {index}: {str(e)}",
  398. long_term_memory=f"点击元素 {index} 失败"
  399. )
  400. @tool()
  401. async def input_text(index: int, text: str, clear: bool = True, uid: str = "") -> ToolResult:
  402. """
  403. 在指定元素中输入文本
  404. Input text into an element
  405. Args:
  406. index: 元素索引(从浏览器状态中获取)
  407. text: 要输入的文本内容
  408. clear: 是否先清除现有文本(默认 True)
  409. uid: 用户 ID(由框架自动注入)
  410. Returns:
  411. ToolResult: 包含输入操作结果的工具返回对象
  412. Example:
  413. input_text(index=0, text="Hello World", clear=True)
  414. """
  415. try:
  416. browser, tools = await get_browser_session()
  417. result = await tools.input(
  418. index=index,
  419. text=text,
  420. clear=clear,
  421. browser_session=browser
  422. )
  423. return action_result_to_tool_result(result, f"输入文本到元素 {index}")
  424. except Exception as e:
  425. return ToolResult(
  426. title="输入失败",
  427. output="",
  428. error=f"Failed to input text into element {index}: {str(e)}",
  429. long_term_memory=f"输入文本失败"
  430. )
  431. @tool()
  432. async def send_keys(keys: str, uid: str = "") -> ToolResult:
  433. """
  434. 发送键盘按键或快捷键
  435. Send keyboard keys or shortcuts
  436. 支持发送单个按键、组合键和快捷键。
  437. Args:
  438. keys: 要发送的按键字符串
  439. - 单个按键: "Enter", "Escape", "PageDown", "Tab"
  440. - 组合键: "Control+o", "Shift+Tab", "Alt+F4"
  441. - 功能键: "F1", "F2", ..., "F12"
  442. uid: 用户 ID(由框架自动注入)
  443. Returns:
  444. ToolResult: 包含按键操作结果的工具返回对象
  445. Example:
  446. send_keys("Enter")
  447. send_keys("Control+A")
  448. """
  449. try:
  450. browser, tools = await get_browser_session()
  451. result = await tools.send_keys(
  452. keys=keys,
  453. browser_session=browser
  454. )
  455. return action_result_to_tool_result(result, f"发送按键: {keys}")
  456. except Exception as e:
  457. return ToolResult(
  458. title="发送按键失败",
  459. output="",
  460. error=f"Failed to send keys: {str(e)}",
  461. long_term_memory="发送按键失败"
  462. )
  463. @tool()
  464. async def upload_file(index: int, path: str, uid: str = "") -> ToolResult:
  465. """
  466. 上传文件到文件输入元素
  467. Upload a file to a file input element
  468. Args:
  469. index: 文件输入框的元素索引
  470. path: 要上传的文件路径(绝对路径)
  471. uid: 用户 ID(由框架自动注入)
  472. Returns:
  473. ToolResult: 包含上传操作结果的工具返回对象
  474. Example:
  475. upload_file(index=7, path="/path/to/file.pdf")
  476. Note:
  477. 文件必须存在且路径必须是绝对路径
  478. """
  479. try:
  480. browser, tools = await get_browser_session()
  481. result = await tools.upload_file(
  482. index=index,
  483. path=path,
  484. browser_session=browser,
  485. available_file_paths=[path],
  486. file_system=_file_system
  487. )
  488. return action_result_to_tool_result(result, f"上传文件: {path}")
  489. except Exception as e:
  490. return ToolResult(
  491. title="上传失败",
  492. output="",
  493. error=f"Failed to upload file: {str(e)}",
  494. long_term_memory=f"上传文件 {path} 失败"
  495. )
  496. # ============================================================
  497. # 滚动和视图工具 (Scroll & View Tools)
  498. # ============================================================
  499. @tool()
  500. async def scroll_page(down: bool = True, pages: float = 1.0,
  501. index: Optional[int] = None, uid: str = "") -> ToolResult:
  502. """
  503. 滚动页面或元素
  504. Scroll the page or a specific element
  505. Args:
  506. down: True 向下滚动,False 向上滚动
  507. pages: 滚动页数(0.5=半页,1=全页,10=滚动到底部/顶部)
  508. index: 可选,滚动特定元素(如下拉框内部)
  509. uid: 用户 ID(由框架自动注入)
  510. Returns:
  511. ToolResult: 滚动结果
  512. Example:
  513. scroll_page(down=True, pages=2.0) # 向下滚动2页
  514. scroll_page(down=False, pages=1.0) # 向上滚动1页
  515. """
  516. try:
  517. browser, tools = await get_browser_session()
  518. result = await tools.scroll(
  519. down=down,
  520. pages=pages,
  521. index=index,
  522. browser_session=browser
  523. )
  524. direction = "向下" if down else "向上"
  525. return action_result_to_tool_result(result, f"{direction}滚动 {pages} 页")
  526. except Exception as e:
  527. return ToolResult(
  528. title="滚动失败",
  529. output="",
  530. error=f"Failed to scroll: {str(e)}",
  531. long_term_memory="滚动失败"
  532. )
  533. @tool()
  534. async def find_text(text: str, uid: str = "") -> ToolResult:
  535. """
  536. 查找页面中的文本并滚动到该位置
  537. Find text on the page and scroll to it
  538. 在页面中搜索指定的文本,找到后自动滚动到该位置。
  539. Args:
  540. text: 要查找的文本内容
  541. uid: 用户 ID(由框架自动注入)
  542. Returns:
  543. ToolResult: 包含查找结果的工具返回对象
  544. Example:
  545. find_text("Privacy Policy")
  546. """
  547. try:
  548. browser, tools = await get_browser_session()
  549. result = await tools.find_text(
  550. text=text,
  551. browser_session=browser
  552. )
  553. return action_result_to_tool_result(result, f"查找文本: {text}")
  554. except Exception as e:
  555. return ToolResult(
  556. title="查找失败",
  557. output="",
  558. error=f"Failed to find text: {str(e)}",
  559. long_term_memory=f"查找文本 '{text}' 失败"
  560. )
  561. @tool()
  562. async def screenshot(uid: str = "") -> ToolResult:
  563. """
  564. 请求在下次观察中包含页面截图
  565. Request a screenshot to be included in the next observation
  566. 用于视觉检查页面状态,帮助理解页面布局和内容。
  567. Args:
  568. uid: 用户 ID(由框架自动注入)
  569. Returns:
  570. ToolResult: 包含截图请求结果的工具返回对象
  571. Example:
  572. screenshot()
  573. Note:
  574. 截图会在下次页面观察时自动包含在结果中。
  575. """
  576. try:
  577. browser, tools = await get_browser_session()
  578. result = await tools.screenshot(browser_session=browser)
  579. return action_result_to_tool_result(result, "截图请求")
  580. except Exception as e:
  581. return ToolResult(
  582. title="截图失败",
  583. output="",
  584. error=f"Failed to capture screenshot: {str(e)}",
  585. long_term_memory="截图失败"
  586. )
  587. # ============================================================
  588. # 标签页管理工具 (Tab Management Tools)
  589. # ============================================================
  590. @tool()
  591. async def switch_tab(tab_id: str, uid: str = "") -> ToolResult:
  592. """
  593. 切换到指定标签页
  594. Switch to a different browser tab
  595. Args:
  596. tab_id: 4字符标签ID(target_id 的最后4位)
  597. uid: 用户 ID(由框架自动注入)
  598. Returns:
  599. ToolResult: 切换结果
  600. Example:
  601. switch_tab(tab_id="a3f2")
  602. """
  603. try:
  604. browser, tools = await get_browser_session()
  605. normalized_tab_id = tab_id[-4:] if tab_id else tab_id
  606. result = await tools.switch(
  607. tab_id=normalized_tab_id,
  608. browser_session=browser
  609. )
  610. return action_result_to_tool_result(result, f"切换到标签页 {normalized_tab_id}")
  611. except Exception as e:
  612. return ToolResult(
  613. title="切换标签页失败",
  614. output="",
  615. error=f"Failed to switch tab: {str(e)}",
  616. long_term_memory=f"切换到标签页 {tab_id} 失败"
  617. )
  618. @tool()
  619. async def close_tab(tab_id: str, uid: str = "") -> ToolResult:
  620. """
  621. 关闭指定标签页
  622. Close a browser tab
  623. Args:
  624. tab_id: 4字符标签ID
  625. uid: 用户 ID(由框架自动注入)
  626. Returns:
  627. ToolResult: 关闭结果
  628. Example:
  629. close_tab(tab_id="a3f2")
  630. """
  631. try:
  632. browser, tools = await get_browser_session()
  633. normalized_tab_id = tab_id[-4:] if tab_id else tab_id
  634. result = await tools.close(
  635. tab_id=normalized_tab_id,
  636. browser_session=browser
  637. )
  638. return action_result_to_tool_result(result, f"关闭标签页 {normalized_tab_id}")
  639. except Exception as e:
  640. return ToolResult(
  641. title="关闭标签页失败",
  642. output="",
  643. error=f"Failed to close tab: {str(e)}",
  644. long_term_memory=f"关闭标签页 {tab_id} 失败"
  645. )
  646. # ============================================================
  647. # 下拉框工具 (Dropdown Tools)
  648. # ============================================================
  649. @tool()
  650. async def get_dropdown_options(index: int, uid: str = "") -> ToolResult:
  651. """
  652. 获取下拉框的所有选项
  653. Get options from a dropdown element
  654. Args:
  655. index: 下拉框的元素索引
  656. uid: 用户 ID(由框架自动注入)
  657. Returns:
  658. ToolResult: 包含所有选项的结果
  659. Example:
  660. get_dropdown_options(index=8)
  661. """
  662. try:
  663. browser, tools = await get_browser_session()
  664. result = await tools.dropdown_options(
  665. index=index,
  666. browser_session=browser
  667. )
  668. return action_result_to_tool_result(result, f"获取下拉框选项: {index}")
  669. except Exception as e:
  670. return ToolResult(
  671. title="获取下拉框选项失败",
  672. output="",
  673. error=f"Failed to get dropdown options: {str(e)}",
  674. long_term_memory=f"获取下拉框 {index} 选项失败"
  675. )
  676. @tool()
  677. async def select_dropdown_option(index: int, text: str, uid: str = "") -> ToolResult:
  678. """
  679. 选择下拉框选项
  680. Select an option from a dropdown
  681. Args:
  682. index: 下拉框的元素索引
  683. text: 要选择的选项文本(精确匹配)
  684. uid: 用户 ID(由框架自动注入)
  685. Returns:
  686. ToolResult: 选择结果
  687. Example:
  688. select_dropdown_option(index=8, text="Option 2")
  689. """
  690. try:
  691. browser, tools = await get_browser_session()
  692. result = await tools.select_dropdown(
  693. index=index,
  694. text=text,
  695. browser_session=browser
  696. )
  697. return action_result_to_tool_result(result, f"选择下拉框选项: {text}")
  698. except Exception as e:
  699. return ToolResult(
  700. title="选择下拉框选项失败",
  701. output="",
  702. error=f"Failed to select dropdown option: {str(e)}",
  703. long_term_memory=f"选择选项 '{text}' 失败"
  704. )
  705. # ============================================================
  706. # 内容提取工具 (Content Extraction Tools)
  707. # ============================================================
  708. @tool()
  709. async def extract_content(query: str, extract_links: bool = False,
  710. start_from_char: int = 0, uid: str = "") -> ToolResult:
  711. """
  712. 使用 LLM 从页面提取结构化数据
  713. Extract content from the current page using LLM
  714. Args:
  715. query: 提取查询(告诉 LLM 要提取什么内容)
  716. extract_links: 是否提取链接(默认 False,节省 token)
  717. start_from_char: 从哪个字符开始提取(用于分页提取大内容)
  718. uid: 用户 ID(由框架自动注入)
  719. Returns:
  720. ToolResult: 提取的内容
  721. Example:
  722. extract_content(query="提取页面上所有产品的名称和价格", extract_links=True)
  723. Note:
  724. 需要配置 page_extraction_llm,否则会失败
  725. 支持分页提取,最大100k字符
  726. """
  727. try:
  728. browser, tools = await get_browser_session()
  729. # 注意:extract 需要 page_extraction_llm 参数
  730. # 这里我们假设用户会在初始化时配置 LLM
  731. # 如果没有配置,会抛出异常
  732. result = await tools.extract(
  733. query=query,
  734. extract_links=extract_links,
  735. start_from_char=start_from_char,
  736. browser_session=browser,
  737. page_extraction_llm=None, # 需要用户配置
  738. file_system=_file_system
  739. )
  740. return action_result_to_tool_result(result, f"提取内容: {query}")
  741. except Exception as e:
  742. return ToolResult(
  743. title="内容提取失败",
  744. output="",
  745. error=f"Failed to extract content: {str(e)}",
  746. long_term_memory=f"提取内容失败: {query}"
  747. )
  748. @tool()
  749. async def get_page_html(uid: str = "") -> ToolResult:
  750. """
  751. 获取当前页面的完整 HTML
  752. Get the full HTML of the current page
  753. 返回当前页面的完整 HTML 源代码。
  754. Args:
  755. uid: 用户 ID(由框架自动注入)
  756. Returns:
  757. ToolResult: 包含页面 HTML 的工具返回对象
  758. Example:
  759. get_page_html()
  760. Note:
  761. - 返回的是完整的 HTML 源代码
  762. - 输出会被限制在 10000 字符以内(完整内容保存在 metadata 中)
  763. """
  764. try:
  765. browser, tools = await get_browser_session()
  766. # 使用 CDP 获取页面 HTML
  767. cdp = await browser.get_or_create_cdp_session()
  768. # 获取页面内容
  769. result = await cdp.cdp_client.send.Runtime.evaluate(
  770. params={'expression': 'document.documentElement.outerHTML'},
  771. session_id=cdp.session_id
  772. )
  773. html = result.get('result', {}).get('value', '')
  774. # 获取 URL 和标题
  775. url = await browser.get_current_page_url()
  776. title_result = await cdp.cdp_client.send.Runtime.evaluate(
  777. params={'expression': 'document.title'},
  778. session_id=cdp.session_id
  779. )
  780. title = title_result.get('result', {}).get('value', '')
  781. # 限制输出大小
  782. output_html = html
  783. if len(html) > 10000:
  784. output_html = html[:10000] + "... (truncated)"
  785. return ToolResult(
  786. title=f"获取 HTML: {url}",
  787. output=f"页面: {title}\nURL: {url}\n\nHTML:\n{output_html}",
  788. long_term_memory=f"获取 HTML: {url}",
  789. metadata={"url": url, "title": title, "html": html}
  790. )
  791. except Exception as e:
  792. return ToolResult(
  793. title="获取 HTML 失败",
  794. output="",
  795. error=f"Failed to get page HTML: {str(e)}",
  796. long_term_memory="获取 HTML 失败"
  797. )
  798. @tool()
  799. async def get_selector_map(uid: str = "") -> ToolResult:
  800. """
  801. 获取当前页面的元素索引映射
  802. Get the selector map of interactive elements on the current page
  803. 返回页面所有可交互元素的索引字典,用于后续的元素操作。
  804. Args:
  805. uid: 用户 ID(由框架自动注入)
  806. Returns:
  807. ToolResult: 包含元素映射的工具返回对象
  808. Example:
  809. get_selector_map()
  810. Note:
  811. 返回的索引可以用于 click_element, input_text 等操作
  812. """
  813. try:
  814. browser, tools = await get_browser_session()
  815. # 获取选择器映射
  816. selector_map = await browser.get_selector_map()
  817. # 构建输出信息
  818. elements_info = []
  819. for index, node in list(selector_map.items())[:20]: # 只显示前20个
  820. tag = node.tag_name
  821. attrs = node.attributes or {}
  822. text = attrs.get('aria-label') or attrs.get('placeholder') or attrs.get('value', '')
  823. elements_info.append(f"索引 {index}: <{tag}> {text[:50]}")
  824. output = f"找到 {len(selector_map)} 个交互元素\n\n"
  825. output += "\n".join(elements_info)
  826. if len(selector_map) > 20:
  827. output += f"\n... 还有 {len(selector_map) - 20} 个元素"
  828. return ToolResult(
  829. title="获取元素映射",
  830. output=output,
  831. long_term_memory=f"获取到 {len(selector_map)} 个交互元素",
  832. metadata={"selector_map": {k: str(v) for k, v in list(selector_map.items())[:100]}}
  833. )
  834. except Exception as e:
  835. return ToolResult(
  836. title="获取元素映射失败",
  837. output="",
  838. error=f"Failed to get selector map: {str(e)}",
  839. long_term_memory="获取元素映射失败"
  840. )
  841. # ============================================================
  842. # JavaScript 执行工具 (JavaScript Tools)
  843. # ============================================================
  844. @tool()
  845. async def evaluate(code: str, uid: str = "") -> ToolResult:
  846. """
  847. 在页面中执行 JavaScript 代码
  848. Execute JavaScript code in the page context
  849. 允许在当前页面中执行任意 JavaScript 代码,用于复杂的页面操作或数据提取。
  850. Args:
  851. code: 要执行的 JavaScript 代码字符串
  852. uid: 用户 ID(由框架自动注入)
  853. Returns:
  854. ToolResult: 包含执行结果的工具返回对象
  855. Example:
  856. evaluate("document.title")
  857. evaluate("document.querySelectorAll('a').length")
  858. Note:
  859. - 代码在页面上下文中执行,可以访问 DOM 和全局变量
  860. - 返回值会被自动序列化为字符串
  861. - 执行结果限制在 20k 字符以内
  862. """
  863. try:
  864. browser, tools = await get_browser_session()
  865. result = await tools.evaluate(
  866. code=code,
  867. browser_session=browser
  868. )
  869. return action_result_to_tool_result(result, "执行 JavaScript")
  870. except Exception as e:
  871. return ToolResult(
  872. title="JavaScript 执行失败",
  873. output="",
  874. error=f"Failed to execute JavaScript: {str(e)}",
  875. long_term_memory="JavaScript 执行失败"
  876. )
  877. @tool()
  878. async def ensure_login_with_cookies(cookie_type: str, url: str = "https://www.xiaohongshu.com", uid: str = "") -> ToolResult:
  879. """
  880. 检查登录状态并在需要时注入 cookies
  881. """
  882. try:
  883. browser, tools = await get_browser_session()
  884. if url:
  885. await tools.navigate(url=url, browser_session=browser)
  886. await tools.wait(seconds=2, browser_session=browser)
  887. check_login_js = """
  888. (function() {
  889. const loginBtn = document.querySelector('[class*="login"]') ||
  890. document.querySelector('[href*="login"]') ||
  891. Array.from(document.querySelectorAll('button, a')).find(el => (el.textContent || '').includes('登录'));
  892. const userInfo = document.querySelector('[class*="user"]') ||
  893. document.querySelector('[class*="avatar"]');
  894. return {
  895. needLogin: !!loginBtn && !userInfo,
  896. hasLoginBtn: !!loginBtn,
  897. hasUserInfo: !!userInfo
  898. };
  899. })()
  900. """
  901. result = await tools.evaluate(code=check_login_js, browser_session=browser)
  902. status_output = result.extracted_content
  903. if isinstance(status_output, str) and status_output.startswith("Result: "):
  904. status_output = status_output[8:]
  905. login_info: Dict[str, Any] = {}
  906. if isinstance(status_output, str):
  907. try:
  908. login_info = json.loads(status_output)
  909. except Exception:
  910. login_info = {}
  911. elif isinstance(status_output, dict):
  912. login_info = status_output
  913. if not login_info.get("needLogin"):
  914. output = json.dumps({"need_login": False}, ensure_ascii=False)
  915. return ToolResult(
  916. title="已登录",
  917. output=output,
  918. long_term_memory=output
  919. )
  920. row = _fetch_cookie_row(cookie_type)
  921. cookie_value = _extract_cookie_value(row)
  922. if not cookie_value:
  923. output = json.dumps({"need_login": True, "cookies_count": 0}, ensure_ascii=False)
  924. return ToolResult(
  925. title="未找到 cookies",
  926. output=output,
  927. error="未找到 cookies",
  928. long_term_memory=output
  929. )
  930. domain, base_url = _cookie_domain_for_type(cookie_type, url)
  931. cookies = _normalize_cookies(cookie_value, domain, base_url)
  932. if not cookies:
  933. output = json.dumps({"need_login": True, "cookies_count": 0}, ensure_ascii=False)
  934. return ToolResult(
  935. title="cookies 解析失败",
  936. output=output,
  937. error="cookies 解析失败",
  938. long_term_memory=output
  939. )
  940. await browser._cdp_set_cookies(cookies)
  941. if url:
  942. await tools.navigate(url=url, browser_session=browser)
  943. await tools.wait(seconds=2, browser_session=browser)
  944. output = json.dumps({"need_login": True, "cookies_count": len(cookies)}, ensure_ascii=False)
  945. return ToolResult(
  946. title="已注入 cookies",
  947. output=output,
  948. long_term_memory=output
  949. )
  950. except Exception as e:
  951. return ToolResult(
  952. title="登录检查失败",
  953. output="",
  954. error=str(e),
  955. long_term_memory="登录检查失败"
  956. )
  957. # ============================================================
  958. # 文件系统工具 (File System Tools)
  959. # ============================================================
  960. @tool()
  961. async def write_file(file_name: str, content: str, append: bool = False, uid: str = "") -> ToolResult:
  962. """
  963. 写入文件到本地文件系统
  964. Write content to a local file
  965. 支持多种文件格式的写入操作。
  966. Args:
  967. file_name: 文件名(包含扩展名)
  968. content: 要写入的文件内容
  969. append: 是否追加模式(默认 False,覆盖写入)
  970. uid: 用户 ID(由框架自动注入)
  971. Returns:
  972. ToolResult: 包含写入结果的工具返回对象
  973. Example:
  974. write_file("output.txt", "Hello World")
  975. write_file("data.json", '{"key": "value"}')
  976. Note:
  977. 支持的文件格式: .txt, .md, .json, .jsonl, .csv, .pdf
  978. """
  979. try:
  980. browser, tools = await get_browser_session()
  981. result = await tools.write_file(
  982. file_name=file_name,
  983. content=content,
  984. append=append,
  985. file_system=_file_system
  986. )
  987. return action_result_to_tool_result(result, f"写入文件: {file_name}")
  988. except Exception as e:
  989. return ToolResult(
  990. title="写入文件失败",
  991. output="",
  992. error=f"Failed to write file: {str(e)}",
  993. long_term_memory=f"写入文件 {file_name} 失败"
  994. )
  995. @tool()
  996. async def read_file(file_name: str, uid: str = "") -> ToolResult:
  997. """
  998. 读取文件内容
  999. Read content from a local file
  1000. 支持多种文件格式的读取操作。
  1001. Args:
  1002. file_name: 文件名(包含扩展名)
  1003. uid: 用户 ID(由框架自动注入)
  1004. Returns:
  1005. ToolResult: 包含文件内容的工具返回对象
  1006. Example:
  1007. read_file("input.txt")
  1008. read_file("data.json")
  1009. Note:
  1010. 支持的文件格式: 文本文件、PDF、DOCX、图片等
  1011. """
  1012. try:
  1013. browser, tools = await get_browser_session()
  1014. result = await tools.read_file(
  1015. file_name=file_name,
  1016. available_file_paths=[],
  1017. file_system=_file_system
  1018. )
  1019. return action_result_to_tool_result(result, f"读取文件: {file_name}")
  1020. except Exception as e:
  1021. return ToolResult(
  1022. title="读取文件失败",
  1023. output="",
  1024. error=f"Failed to read file: {str(e)}",
  1025. long_term_memory=f"读取文件 {file_name} 失败"
  1026. )
  1027. @tool()
  1028. async def replace_file(file_name: str, old_str: str, new_str: str, uid: str = "") -> ToolResult:
  1029. """
  1030. 替换文件中的特定文本
  1031. Replace specific text in a file
  1032. 在文件中查找并替换指定的文本内容。
  1033. Args:
  1034. file_name: 文件名(包含扩展名)
  1035. old_str: 要替换的文本
  1036. new_str: 新文本
  1037. uid: 用户 ID(由框架自动注入)
  1038. Returns:
  1039. ToolResult: 包含替换结果的工具返回对象
  1040. Example:
  1041. replace_file("config.txt", "old_value", "new_value")
  1042. Note:
  1043. - 会替换文件中所有匹配的文本
  1044. - 如果找不到要替换的文本,会返回警告
  1045. """
  1046. try:
  1047. browser, tools = await get_browser_session()
  1048. result = await tools.replace_file(
  1049. file_name=file_name,
  1050. old_str=old_str,
  1051. new_str=new_str,
  1052. file_system=_file_system
  1053. )
  1054. return action_result_to_tool_result(result, f"替换文件内容: {file_name}")
  1055. except Exception as e:
  1056. return ToolResult(
  1057. title="替换文件失败",
  1058. output="",
  1059. error=f"Failed to replace file content: {str(e)}",
  1060. long_term_memory=f"替换文件 {file_name} 失败"
  1061. )
  1062. # ============================================================
  1063. # 等待用户操作工具 (Wait for User Action)
  1064. # ============================================================
  1065. @tool()
  1066. async def wait_for_user_action(message: str = "Please complete the action in browser",
  1067. timeout: int = 300, uid: str = "") -> ToolResult:
  1068. """
  1069. 等待用户在浏览器中完成操作(如登录)
  1070. Wait for user to complete an action in the browser (e.g., login)
  1071. 暂停自动化流程,等待用户手动完成某些操作(如登录、验证码等)。
  1072. Args:
  1073. message: 提示用户需要完成的操作
  1074. timeout: 最大等待时间(秒),默认 300 秒(5 分钟)
  1075. uid: 用户 ID(由框架自动注入)
  1076. Returns:
  1077. ToolResult: 包含等待结果的工具返回对象
  1078. Example:
  1079. wait_for_user_action("Please login to Xiaohongshu", timeout=180)
  1080. wait_for_user_action("Please complete the CAPTCHA", timeout=60)
  1081. Note:
  1082. - 用户需要在浏览器窗口中手动完成操作
  1083. - 完成后按回车键继续
  1084. - 超时后会自动继续执行
  1085. """
  1086. try:
  1087. import asyncio
  1088. print(f"\n{'='*60}")
  1089. print(f"⏸️ WAITING FOR USER ACTION")
  1090. print(f"{'='*60}")
  1091. print(f"📝 {message}")
  1092. print(f"⏱️ Timeout: {timeout} seconds")
  1093. print(f"\n👉 Please complete the action in the browser window")
  1094. print(f"👉 Press ENTER when done, or wait for timeout")
  1095. print(f"{'='*60}\n")
  1096. # Wait for user input or timeout
  1097. try:
  1098. loop = asyncio.get_event_loop()
  1099. # Wait for either user input or timeout
  1100. await asyncio.wait_for(
  1101. loop.run_in_executor(None, input),
  1102. timeout=timeout
  1103. )
  1104. return ToolResult(
  1105. title="用户操作完成",
  1106. output=f"User completed: {message}",
  1107. long_term_memory=f"用户完成操作: {message}"
  1108. )
  1109. except asyncio.TimeoutError:
  1110. return ToolResult(
  1111. title="用户操作超时",
  1112. output=f"Timeout waiting for: {message}",
  1113. long_term_memory=f"等待用户操作超时: {message}"
  1114. )
  1115. except Exception as e:
  1116. return ToolResult(
  1117. title="等待用户操作失败",
  1118. output="",
  1119. error=f"Failed to wait for user action: {str(e)}",
  1120. long_term_memory="等待用户操作失败"
  1121. )
  1122. # ============================================================
  1123. # 任务完成工具 (Task Completion)
  1124. # ============================================================
  1125. @tool()
  1126. async def done(text: str, success: bool = True,
  1127. files_to_display: Optional[List[str]] = None, uid: str = "") -> ToolResult:
  1128. """
  1129. 标记任务完成并返回最终消息
  1130. Mark the task as complete and return final message to user
  1131. Args:
  1132. text: 给用户的最终消息
  1133. success: 任务是否成功完成
  1134. files_to_display: 可选的要显示的文件路径列表
  1135. uid: 用户 ID(由框架自动注入)
  1136. Returns:
  1137. ToolResult: 完成结果
  1138. Example:
  1139. done("任务已完成,提取了10个产品信息", success=True)
  1140. """
  1141. try:
  1142. browser, tools = await get_browser_session()
  1143. result = await tools.done(
  1144. text=text,
  1145. success=success,
  1146. files_to_display=files_to_display,
  1147. file_system=_file_system
  1148. )
  1149. return action_result_to_tool_result(result, "任务完成")
  1150. except Exception as e:
  1151. return ToolResult(
  1152. title="标记任务完成失败",
  1153. output="",
  1154. error=f"Failed to complete task: {str(e)}",
  1155. long_term_memory="标记任务完成失败"
  1156. )
  1157. # ============================================================
  1158. # 容器管理工具 (Container Management Tools)
  1159. # ============================================================
  1160. import aiohttp
  1161. async def create_container(url: str, account_name: str = "liuwenwu") -> Dict[str, Any]:
  1162. """
  1163. 创建浏览器容器并导航到指定URL
  1164. 按照 test.md 的要求:
  1165. 1.1 调用接口创建容器
  1166. 1.2 调用接口创建窗口并导航到URL
  1167. Args:
  1168. url: 要导航的URL地址
  1169. account_name: 账户名称
  1170. Returns:
  1171. 包含容器信息的字典:
  1172. - success: 是否成功
  1173. - container_id: 容器ID
  1174. - vnc: VNC访问URL
  1175. - cdp: CDP协议URL(用于浏览器连接)
  1176. - connection_id: 窗口连接ID
  1177. - error: 错误信息(如果失败)
  1178. """
  1179. result = {
  1180. "success": False,
  1181. "container_id": None,
  1182. "vnc": None,
  1183. "cdp": None,
  1184. "connection_id": None,
  1185. "error": None
  1186. }
  1187. try:
  1188. async with aiohttp.ClientSession() as session:
  1189. # 步骤1.1: 创建容器
  1190. print("📦 步骤1.1: 创建容器...")
  1191. create_url = "http://47.84.182.56:8200/api/v1/container/create"
  1192. create_payload = {
  1193. "auto_remove": True,
  1194. "need_port_binding": True,
  1195. "max_lifetime_seconds": 900
  1196. }
  1197. async with session.post(create_url, json=create_payload) as resp:
  1198. if resp.status != 200:
  1199. raise RuntimeError(f"创建容器失败: HTTP {resp.status}")
  1200. create_result = await resp.json()
  1201. if create_result.get("code") != 0:
  1202. raise RuntimeError(f"创建容器失败: {create_result.get('msg')}")
  1203. data = create_result.get("data", {})
  1204. result["container_id"] = data.get("container_id")
  1205. result["vnc"] = data.get("vnc")
  1206. result["cdp"] = data.get("cdp")
  1207. print(f"✅ 容器创建成功")
  1208. print(f" Container ID: {result['container_id']}")
  1209. print(f" VNC: {result['vnc']}")
  1210. print(f" CDP: {result['cdp']}")
  1211. # 等待容器内的浏览器启动
  1212. print(f"\n⏳ 等待容器内浏览器启动...")
  1213. await asyncio.sleep(5)
  1214. # 步骤1.2: 创建页面并导航
  1215. print(f"\n📱 步骤1.2: 创建页面并导航到 {url}...")
  1216. page_create_url = "http://47.84.182.56:8200/api/v1/browser/page/create"
  1217. page_payload = {
  1218. "container_id": result["container_id"],
  1219. "url": url,
  1220. "account_name": account_name,
  1221. "need_wait": True,
  1222. "timeout": 30
  1223. }
  1224. # 重试机制:最多尝试3次
  1225. max_retries = 3
  1226. page_created = False
  1227. last_error = None
  1228. for attempt in range(max_retries):
  1229. try:
  1230. if attempt > 0:
  1231. print(f" 重试 {attempt + 1}/{max_retries}...")
  1232. await asyncio.sleep(3) # 重试前等待
  1233. async with session.post(page_create_url, json=page_payload, timeout=aiohttp.ClientTimeout(total=60)) as resp:
  1234. if resp.status != 200:
  1235. response_text = await resp.text()
  1236. last_error = f"HTTP {resp.status}: {response_text[:200]}"
  1237. continue
  1238. page_result = await resp.json()
  1239. if page_result.get("code") != 0:
  1240. last_error = f"{page_result.get('msg')}"
  1241. continue
  1242. page_data = page_result.get("data", {})
  1243. result["connection_id"] = page_data.get("connection_id")
  1244. result["success"] = True
  1245. page_created = True
  1246. print(f"✅ 页面创建成功")
  1247. print(f" Connection ID: {result['connection_id']}")
  1248. break
  1249. except asyncio.TimeoutError:
  1250. last_error = "请求超时"
  1251. continue
  1252. except aiohttp.ClientError as e:
  1253. last_error = f"网络错误: {str(e)}"
  1254. continue
  1255. except Exception as e:
  1256. last_error = f"未知错误: {str(e)}"
  1257. continue
  1258. if not page_created:
  1259. raise RuntimeError(f"创建页面失败(尝试{max_retries}次后): {last_error}")
  1260. except Exception as e:
  1261. result["error"] = str(e)
  1262. print(f"❌ 错误: {str(e)}")
  1263. return result
  1264. # ============================================================
  1265. # 导出所有工具函数(供外部使用)
  1266. # ============================================================
  1267. __all__ = [
  1268. # 会话管理
  1269. 'init_browser_session',
  1270. 'get_browser_session',
  1271. 'cleanup_browser_session',
  1272. 'kill_browser_session',
  1273. # 导航类工具
  1274. 'navigate_to_url',
  1275. 'search_web',
  1276. 'go_back',
  1277. 'wait',
  1278. # 元素交互工具
  1279. 'click_element',
  1280. 'input_text',
  1281. 'send_keys',
  1282. 'upload_file',
  1283. # 滚动和视图工具
  1284. 'scroll_page',
  1285. 'find_text',
  1286. 'screenshot',
  1287. # 标签页管理工具
  1288. 'switch_tab',
  1289. 'close_tab',
  1290. # 下拉框工具
  1291. 'get_dropdown_options',
  1292. 'select_dropdown_option',
  1293. # 内容提取工具
  1294. 'extract_content',
  1295. 'get_page_html',
  1296. 'get_selector_map',
  1297. # JavaScript 执行工具
  1298. 'evaluate',
  1299. 'ensure_login_with_cookies',
  1300. # 文件系统工具
  1301. 'write_file',
  1302. 'read_file',
  1303. 'replace_file',
  1304. # 等待用户操作
  1305. 'wait_for_user_action',
  1306. # 任务完成
  1307. 'done',
  1308. # 容器管理
  1309. 'create_container',
  1310. ]