baseClass.py 48 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588
  1. """
  2. Browser-Use 原生工具适配器
  3. Native Browser-Use Tools Adapter
  4. 直接使用 browser-use 的原生类(BrowserSession, Tools)实现所有浏览器操作工具。
  5. 不依赖 Playwright,完全基于 CDP 协议。
  6. 核心特性:
  7. 1. 浏览器会话持久化 - 只启动一次浏览器
  8. 2. 状态自动保持 - 登录状态、Cookie、LocalStorage 等
  9. 3. 完整的底层访问 - 可以直接使用 CDP 协议
  10. 4. 性能优异 - 避免频繁创建/销毁浏览器实例
  11. 5. 多种浏览器类型 - 支持 local、cloud、container 三种模式
  12. 支持的浏览器类型:
  13. 1. Local (本地浏览器):
  14. - 在本地运行 Chrome
  15. - 支持可视化调试
  16. - 速度最快
  17. - 示例: init_browser_session(browser_type="local")
  18. 2. Cloud (云浏览器):
  19. - 在云端运行
  20. - 不占用本地资源
  21. - 适合生产环境
  22. - 示例: init_browser_session(browser_type="cloud")
  23. 3. Container (容器浏览器):
  24. - 在独立容器中运行
  25. - 隔离性好
  26. - 支持预配置账户
  27. - 示例: init_browser_session(browser_type="container", container_url="https://example.com")
  28. 使用方法:
  29. 1. 在 Agent 初始化时调用 init_browser_session() 并指定 browser_type
  30. 2. 使用各个工具函数执行浏览器操作
  31. 3. 任务结束时调用 cleanup_browser_session()
  32. 文件操作说明:
  33. - 浏览器专用文件目录:.browser_use_files/ (在当前工作目录下)
  34. 用于存储浏览器会话产生的临时文件(下载、上传、截图等)
  35. - 一般文件操作:请使用 agent.tools.builtin 中的文件工具 (read_file, write_file, edit_file)
  36. 这些工具功能更完善,支持diff预览、智能匹配、分页读取等
  37. """
  38. import sys
  39. import os
  40. import json
  41. import asyncio
  42. import aiohttp
  43. from typing import Optional, List, Dict, Any, Tuple
  44. from pathlib import Path
  45. from urllib.parse import urlparse
  46. # 将项目根目录添加到 Python 路径
  47. sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  48. # 导入框架的工具装饰器和结果类
  49. from agent.tools import tool, ToolResult
  50. from agent.tools.builtin.browser.sync_mysql_help import mysql
  51. # 导入 browser-use 的核心类
  52. from browser_use import BrowserSession, BrowserProfile
  53. from browser_use.tools.service import Tools
  54. from browser_use.agent.views import ActionResult
  55. from browser_use.filesystem.file_system import FileSystem
  56. # ============================================================
  57. # 无需注册的内部辅助函数
  58. # ============================================================
  59. # ============================================================
  60. # 全局浏览器会话管理
  61. # ============================================================
  62. # 全局变量:浏览器会话和工具实例
  63. _browser_session: Optional[BrowserSession] = None
  64. _browser_tools: Optional[Tools] = None
  65. _file_system: Optional[FileSystem] = None
  66. async def create_container(url: str, account_name: str = "liuwenwu") -> Dict[str, Any]:
  67. """
  68. 创建浏览器容器并导航到指定URL
  69. 按照 test.md 的要求:
  70. 1.1 调用接口创建容器
  71. 1.2 调用接口创建窗口并导航到URL
  72. Args:
  73. url: 要导航的URL地址
  74. account_name: 账户名称
  75. Returns:
  76. 包含容器信息的字典:
  77. - success: 是否成功
  78. - container_id: 容器ID
  79. - vnc: VNC访问URL
  80. - cdp: CDP协议URL(用于浏览器连接)
  81. - connection_id: 窗口连接ID
  82. - error: 错误信息(如果失败)
  83. """
  84. result = {
  85. "success": False,
  86. "container_id": None,
  87. "vnc": None,
  88. "cdp": None,
  89. "connection_id": None,
  90. "error": None
  91. }
  92. try:
  93. async with aiohttp.ClientSession() as session:
  94. # 步骤1.1: 创建容器
  95. print("📦 步骤1.1: 创建容器...")
  96. create_url = "http://47.84.182.56:8200/api/v1/container/create"
  97. create_payload = {
  98. "auto_remove": True,
  99. "need_port_binding": True,
  100. "max_lifetime_seconds": 900
  101. }
  102. async with session.post(create_url, json=create_payload) as resp:
  103. if resp.status != 200:
  104. raise RuntimeError(f"创建容器失败: HTTP {resp.status}")
  105. create_result = await resp.json()
  106. if create_result.get("code") != 0:
  107. raise RuntimeError(f"创建容器失败: {create_result.get('msg')}")
  108. data = create_result.get("data", {})
  109. result["container_id"] = data.get("container_id")
  110. result["vnc"] = data.get("vnc")
  111. result["cdp"] = data.get("cdp")
  112. print(f"✅ 容器创建成功")
  113. print(f" Container ID: {result['container_id']}")
  114. print(f" VNC: {result['vnc']}")
  115. print(f" CDP: {result['cdp']}")
  116. # 等待容器内的浏览器启动
  117. print(f"\n⏳ 等待容器内浏览器启动...")
  118. await asyncio.sleep(5)
  119. # 步骤1.2: 创建页面并导航
  120. print(f"\n📱 步骤1.2: 创建页面并导航到 {url}...")
  121. page_create_url = "http://47.84.182.56:8200/api/v1/browser/page/create"
  122. page_payload = {
  123. "container_id": result["container_id"],
  124. "url": url,
  125. "account_name": account_name,
  126. "need_wait": True,
  127. "timeout": 30
  128. }
  129. # 重试机制:最多尝试3次
  130. max_retries = 3
  131. page_created = False
  132. last_error = None
  133. for attempt in range(max_retries):
  134. try:
  135. if attempt > 0:
  136. print(f" 重试 {attempt + 1}/{max_retries}...")
  137. await asyncio.sleep(3) # 重试前等待
  138. async with session.post(page_create_url, json=page_payload, timeout=aiohttp.ClientTimeout(total=60)) as resp:
  139. if resp.status != 200:
  140. response_text = await resp.text()
  141. last_error = f"HTTP {resp.status}: {response_text[:200]}"
  142. continue
  143. page_result = await resp.json()
  144. if page_result.get("code") != 0:
  145. last_error = f"{page_result.get('msg')}"
  146. continue
  147. page_data = page_result.get("data", {})
  148. result["connection_id"] = page_data.get("connection_id")
  149. result["success"] = True
  150. page_created = True
  151. print(f"✅ 页面创建成功")
  152. print(f" Connection ID: {result['connection_id']}")
  153. break
  154. except asyncio.TimeoutError:
  155. last_error = "请求超时"
  156. continue
  157. except aiohttp.ClientError as e:
  158. last_error = f"网络错误: {str(e)}"
  159. continue
  160. except Exception as e:
  161. last_error = f"未知错误: {str(e)}"
  162. continue
  163. if not page_created:
  164. raise RuntimeError(f"创建页面失败(尝试{max_retries}次后): {last_error}")
  165. except Exception as e:
  166. result["error"] = str(e)
  167. print(f"❌ 错误: {str(e)}")
  168. return result
  169. async def init_browser_session(
  170. browser_type: str = "local",
  171. headless: bool = False,
  172. url: Optional[str] = None,
  173. profile_name: str = "default",
  174. user_data_dir: Optional[str] = None,
  175. browser_profile: Optional[BrowserProfile] = None,
  176. **kwargs
  177. ) -> tuple[BrowserSession, Tools]:
  178. """
  179. 初始化全局浏览器会话 - 支持三种浏览器类型
  180. Args:
  181. browser_type: 浏览器类型 ("local", "cloud", "container")
  182. headless: 是否无头模式
  183. url: 初始访问URL(可选)
  184. - local/cloud: 初始化后会自动导航到此URL
  185. - container: 必需,容器启动时访问的URL
  186. profile_name: 配置文件/账户名称(默认 "default")
  187. - local: 用于创建用户数据目录路径
  188. - cloud: 云浏览器配置ID
  189. - container: 容器账户名称
  190. user_data_dir: 用户数据目录(仅 local 模式,高级用法)
  191. 如果提供则覆盖 profile_name 生成的路径
  192. browser_profile: BrowserProfile 对象(通用,高级用法)
  193. 用于预设 cookies 等
  194. **kwargs: 其他 BrowserSession 参数
  195. Returns:
  196. (BrowserSession, Tools) 元组
  197. Examples:
  198. # 本地浏览器
  199. browser, tools = await init_browser_session(
  200. browser_type="local",
  201. url="https://www.baidu.com" # 可选
  202. )
  203. # 云浏览器
  204. browser, tools = await init_browser_session(
  205. browser_type="cloud",
  206. profile_name="my_cloud_profile" # 可选
  207. )
  208. # 容器浏览器
  209. browser, tools = await init_browser_session(
  210. browser_type="container",
  211. url="https://www.xiaohongshu.com", # 必需
  212. profile_name="my_account" # 可选
  213. )
  214. """
  215. global _browser_session, _browser_tools, _file_system
  216. if _browser_session is not None:
  217. return _browser_session, _browser_tools
  218. # 验证 browser_type
  219. valid_types = ["local", "cloud", "container"]
  220. if browser_type not in valid_types:
  221. raise ValueError(f"无效的 browser_type: {browser_type},必须是 {valid_types} 之一")
  222. # 创建浏览器会话参数
  223. session_params = {
  224. "headless": headless,
  225. }
  226. # === Container 模式 ===
  227. if browser_type == "container":
  228. print("🐳 使用容器浏览器模式")
  229. # container 模式必须提供 URL
  230. if not url:
  231. url = "about:blank" # 使用默认空白页
  232. print("⚠️ 未提供 url 参数,使用默认空白页")
  233. # 创建容器并获取 CDP URL
  234. print(f"📦 正在创建容器...")
  235. container_info = await create_container(
  236. url=url,
  237. account_name=profile_name
  238. )
  239. if not container_info["success"]:
  240. raise RuntimeError(f"容器创建失败: {container_info['error']}")
  241. cdp_url = container_info["cdp"]
  242. print(f"✅ 容器创建成功")
  243. print(f" CDP URL: {cdp_url}")
  244. print(f" Container ID: {container_info['container_id']}")
  245. print(f" Connection ID: {container_info.get('connection_id')}")
  246. # 使用容器的 CDP URL 连接
  247. session_params["cdp_url"] = cdp_url
  248. # 等待容器完全启动
  249. print("⏳ 等待容器浏览器启动...")
  250. await asyncio.sleep(3)
  251. # === Cloud 模式 ===
  252. elif browser_type == "cloud":
  253. print("🌐 使用云浏览器模式")
  254. session_params["use_cloud"] = True
  255. # profile_name 作为云配置ID
  256. if profile_name and profile_name != "default":
  257. session_params["cloud_profile_id"] = profile_name
  258. # === Local 模式 ===
  259. else: # local
  260. print("💻 使用本地浏览器模式")
  261. session_params["is_local"] = True
  262. # 设置用户数据目录(持久化登录状态)
  263. if user_data_dir is None and profile_name:
  264. user_data_dir = str(Path.home() / ".browser_use" / "profiles" / profile_name)
  265. Path(user_data_dir).mkdir(parents=True, exist_ok=True)
  266. # macOS 上显式指定 Chrome 路径
  267. import platform
  268. if platform.system() == "Darwin": # macOS
  269. chrome_path = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
  270. if Path(chrome_path).exists():
  271. session_params["executable_path"] = chrome_path
  272. # 只在有值时才添加 user_data_dir
  273. if user_data_dir:
  274. session_params["user_data_dir"] = user_data_dir
  275. # 只在有值时才添加 browser_profile (适用于所有模式)
  276. if browser_profile:
  277. session_params["browser_profile"] = browser_profile
  278. # 合并其他参数
  279. session_params.update(kwargs)
  280. # 创建浏览器会话
  281. _browser_session = BrowserSession(**session_params)
  282. # 启动浏览器
  283. await _browser_session.start()
  284. # 创建工具实例
  285. _browser_tools = Tools()
  286. # 创建文件系统实例(用于浏览器会话产生的文件)
  287. # 注意:这个目录仅用于浏览器操作相关的临时文件(下载、上传、截图等)
  288. # 对于一般文件读写操作,请使用 agent.tools.builtin 中的文件工具
  289. base_dir = Path.cwd() / ".browser_use_files"
  290. base_dir.mkdir(parents=True, exist_ok=True)
  291. _file_system = FileSystem(base_dir=str(base_dir))
  292. print("✅ 浏览器会话初始化成功")
  293. # 如果是 local 或 cloud 模式且提供了 URL,导航到该 URL
  294. if browser_type in ["local", "cloud"] and url:
  295. print(f"🔗 导航到: {url}")
  296. await _browser_tools.navigate(url=url, browser_session=_browser_session)
  297. return _browser_session, _browser_tools
  298. async def get_browser_session() -> tuple[BrowserSession, Tools]:
  299. """
  300. 获取当前浏览器会话,如果不存在则自动创建
  301. Returns:
  302. (BrowserSession, Tools) 元组
  303. """
  304. global _browser_session, _browser_tools
  305. if _browser_session is None:
  306. await init_browser_session()
  307. return _browser_session, _browser_tools
  308. async def cleanup_browser_session():
  309. """
  310. 清理浏览器会话
  311. 优雅地停止浏览器但保留会话状态
  312. """
  313. global _browser_session, _browser_tools, _file_system
  314. if _browser_session is not None:
  315. await _browser_session.stop()
  316. _browser_session = None
  317. _browser_tools = None
  318. _file_system = None
  319. async def kill_browser_session():
  320. """
  321. 强制终止浏览器会话
  322. 完全关闭浏览器进程
  323. """
  324. global _browser_session, _browser_tools, _file_system
  325. if _browser_session is not None:
  326. await _browser_session.kill()
  327. _browser_session = None
  328. _browser_tools = None
  329. _file_system = None
  330. # ============================================================
  331. # 辅助函数:ActionResult 转 ToolResult
  332. # ============================================================
  333. def action_result_to_tool_result(result: ActionResult, title: str = None) -> ToolResult:
  334. """
  335. 将 browser-use 的 ActionResult 转换为框架的 ToolResult
  336. Args:
  337. result: browser-use 的 ActionResult
  338. title: 可选的标题(如果不提供则从 result 推断)
  339. Returns:
  340. ToolResult
  341. """
  342. if result.error:
  343. return ToolResult(
  344. title=title or "操作失败",
  345. output="",
  346. error=result.error,
  347. long_term_memory=result.long_term_memory or result.error
  348. )
  349. return ToolResult(
  350. title=title or "操作成功",
  351. output=result.extracted_content or "",
  352. long_term_memory=result.long_term_memory or result.extracted_content or "",
  353. metadata=result.metadata or {}
  354. )
  355. def _cookie_domain_for_type(cookie_type: str, url: str) -> Tuple[str, str]:
  356. if cookie_type:
  357. key = cookie_type.lower()
  358. if key in {"xiaohongshu", "xhs"}:
  359. return ".xiaohongshu.com", "https://www.xiaohongshu.com"
  360. parsed = urlparse(url or "")
  361. domain = parsed.netloc or ""
  362. domain = domain.replace("www.", "")
  363. if domain:
  364. domain = f".{domain}"
  365. base_url = f"{parsed.scheme}://{parsed.netloc}" if parsed.scheme and parsed.netloc else url
  366. return domain, base_url
  367. def _parse_cookie_string(cookie_str: str, domain: str, url: str) -> List[Dict[str, Any]]:
  368. cookies: List[Dict[str, Any]] = []
  369. if not cookie_str:
  370. return cookies
  371. parts = cookie_str.split(";")
  372. for part in parts:
  373. if not part:
  374. continue
  375. if "=" not in part:
  376. continue
  377. name, value = part.split("=", 1)
  378. cookie = {
  379. "name": str(name).strip(),
  380. "value": str(value).strip(),
  381. "domain": domain,
  382. "path": "/",
  383. "expires": -1,
  384. "httpOnly": False,
  385. "secure": True,
  386. "sameSite": "None"
  387. }
  388. if url:
  389. cookie["url"] = url
  390. cookies.append(cookie)
  391. return cookies
  392. def _normalize_cookies(cookie_value: Any, domain: str, url: str) -> List[Dict[str, Any]]:
  393. if cookie_value is None:
  394. return []
  395. if isinstance(cookie_value, list):
  396. return cookie_value
  397. if isinstance(cookie_value, dict):
  398. if "cookies" in cookie_value:
  399. return _normalize_cookies(cookie_value.get("cookies"), domain, url)
  400. if "name" in cookie_value and "value" in cookie_value:
  401. return [cookie_value]
  402. return []
  403. if isinstance(cookie_value, (bytes, bytearray)):
  404. cookie_value = cookie_value.decode("utf-8", errors="ignore")
  405. if isinstance(cookie_value, str):
  406. text = cookie_value.strip()
  407. if not text:
  408. return []
  409. try:
  410. parsed = json.loads(text)
  411. except Exception:
  412. parsed = None
  413. if parsed is not None:
  414. return _normalize_cookies(parsed, domain, url)
  415. return _parse_cookie_string(text, domain, url)
  416. return []
  417. def _extract_cookie_value(row: Optional[Dict[str, Any]]) -> Any:
  418. if not row:
  419. return None
  420. # 优先使用 cookies 字段
  421. if "cookies" in row:
  422. return row["cookies"]
  423. # 兼容其他可能的字段名
  424. for key, value in row.items():
  425. if "cookie" in key.lower():
  426. return value
  427. return None
  428. def _fetch_cookie_row(cookie_type: str) -> Optional[Dict[str, Any]]:
  429. if not cookie_type:
  430. return None
  431. try:
  432. return mysql.fetchone(
  433. "select * from agent_channel_cookies where type=%s limit 1",
  434. (cookie_type,)
  435. )
  436. except Exception:
  437. return None
  438. def _fetch_profile_id(cookie_type: str) -> Optional[str]:
  439. """从数据库获取 cloud_profile_id"""
  440. if not cookie_type:
  441. return None
  442. try:
  443. row = mysql.fetchone(
  444. "select profileId from agent_channel_cookies where type=%s limit 1",
  445. (cookie_type,)
  446. )
  447. if row and "profileId" in row:
  448. return row["profileId"]
  449. return None
  450. except Exception:
  451. return None
  452. # ============================================================
  453. # 需要注册的工具
  454. # ============================================================
  455. # ============================================================
  456. # 导航类工具 (Navigation Tools)
  457. # ============================================================
  458. @tool()
  459. async def browser_navigate_to_url(url: str, new_tab: bool = False) -> ToolResult:
  460. """
  461. 导航到指定的 URL
  462. Navigate to a specific URL
  463. 使用 browser-use 的原生导航功能,支持在新标签页打开。
  464. Args:
  465. url: 要访问的 URL 地址
  466. new_tab: 是否在新标签页中打开(默认 False)
  467. Returns:
  468. ToolResult: 包含导航结果的工具返回对象
  469. Example:
  470. navigate_to_url("https://www.baidu.com")
  471. navigate_to_url("https://www.google.com", new_tab=True)
  472. """
  473. try:
  474. browser, tools = await get_browser_session()
  475. # 使用 browser-use 的 navigate 工具
  476. result = await tools.navigate(
  477. url=url,
  478. new_tab=new_tab,
  479. browser_session=browser
  480. )
  481. return action_result_to_tool_result(result, f"导航到 {url}")
  482. except Exception as e:
  483. return ToolResult(
  484. title="导航失败",
  485. output="",
  486. error=f"Failed to navigate to {url}: {str(e)}",
  487. long_term_memory=f"导航到 {url} 失败"
  488. )
  489. @tool()
  490. async def browser_search_web(query: str, engine: str = "google") -> ToolResult:
  491. """
  492. 使用搜索引擎搜索
  493. Search the web using a search engine
  494. Args:
  495. query: 搜索关键词
  496. engine: 搜索引擎 (google, duckduckgo, bing) - 默认: google
  497. Returns:
  498. ToolResult: 搜索结果
  499. Example:
  500. search_web("Python async programming", engine="google")
  501. """
  502. try:
  503. browser, tools = await get_browser_session()
  504. # 使用 browser-use 的 search 工具
  505. result = await tools.search(
  506. query=query,
  507. engine=engine,
  508. browser_session=browser
  509. )
  510. return action_result_to_tool_result(result, f"搜索: {query}")
  511. except Exception as e:
  512. return ToolResult(
  513. title="搜索失败",
  514. output="",
  515. error=f"Search failed: {str(e)}",
  516. long_term_memory=f"搜索 '{query}' 失败"
  517. )
  518. @tool()
  519. async def browser_go_back() -> ToolResult:
  520. """
  521. 返回到上一个页面
  522. Go back to the previous page
  523. 模拟浏览器的"后退"按钮功能。
  524. Returns:
  525. ToolResult: 包含返回操作结果的工具返回对象
  526. """
  527. try:
  528. browser, tools = await get_browser_session()
  529. result = await tools.go_back(browser_session=browser)
  530. return action_result_to_tool_result(result, "返回上一页")
  531. except Exception as e:
  532. return ToolResult(
  533. title="返回失败",
  534. output="",
  535. error=f"Failed to go back: {str(e)}",
  536. long_term_memory="返回上一页失败"
  537. )
  538. @tool()
  539. async def browser_wait(seconds: int = 3) -> ToolResult:
  540. """
  541. 等待指定的秒数
  542. Wait for a specified number of seconds
  543. 用于等待页面加载、动画完成或其他异步操作。
  544. Args:
  545. seconds: 等待时间(秒),最大30秒
  546. Returns:
  547. ToolResult: 包含等待操作结果的工具返回对象
  548. Example:
  549. wait(5) # 等待5秒
  550. """
  551. try:
  552. browser, tools = await get_browser_session()
  553. result = await tools.wait(seconds=seconds, browser_session=browser)
  554. return action_result_to_tool_result(result, f"等待 {seconds} 秒")
  555. except Exception as e:
  556. return ToolResult(
  557. title="等待失败",
  558. output="",
  559. error=f"Failed to wait: {str(e)}",
  560. long_term_memory="等待失败"
  561. )
  562. # ============================================================
  563. # 元素交互工具 (Element Interaction Tools)
  564. # ============================================================
  565. @tool()
  566. async def browser_click_element(index: int) -> ToolResult:
  567. """
  568. 通过索引点击页面元素
  569. Click an element by index
  570. Args:
  571. index: 元素索引(从浏览器状态中获取)
  572. Returns:
  573. ToolResult: 包含点击操作结果的工具返回对象
  574. Example:
  575. click_element(index=5)
  576. Note:
  577. 需要先通过 get_selector_map 获取页面元素索引
  578. """
  579. try:
  580. browser, tools = await get_browser_session()
  581. result = await tools.click(
  582. index=index,
  583. browser_session=browser
  584. )
  585. return action_result_to_tool_result(result, f"点击元素 {index}")
  586. except Exception as e:
  587. return ToolResult(
  588. title="点击失败",
  589. output="",
  590. error=f"Failed to click element {index}: {str(e)}",
  591. long_term_memory=f"点击元素 {index} 失败"
  592. )
  593. @tool()
  594. async def browser_input_text(index: int, text: str, clear: bool = True) -> ToolResult:
  595. """
  596. 在指定元素中输入文本
  597. Input text into an element
  598. Args:
  599. index: 元素索引(从浏览器状态中获取)
  600. text: 要输入的文本内容
  601. clear: 是否先清除现有文本(默认 True)
  602. Returns:
  603. ToolResult: 包含输入操作结果的工具返回对象
  604. Example:
  605. input_text(index=0, text="Hello World", clear=True)
  606. """
  607. try:
  608. browser, tools = await get_browser_session()
  609. result = await tools.input(
  610. index=index,
  611. text=text,
  612. clear=clear,
  613. browser_session=browser
  614. )
  615. return action_result_to_tool_result(result, f"输入文本到元素 {index}")
  616. except Exception as e:
  617. return ToolResult(
  618. title="输入失败",
  619. output="",
  620. error=f"Failed to input text into element {index}: {str(e)}",
  621. long_term_memory=f"输入文本失败"
  622. )
  623. @tool()
  624. async def browser_send_keys(keys: str) -> ToolResult:
  625. """
  626. 发送键盘按键或快捷键
  627. Send keyboard keys or shortcuts
  628. 支持发送单个按键、组合键和快捷键。
  629. Args:
  630. keys: 要发送的按键字符串
  631. - 单个按键: "Enter", "Escape", "PageDown", "Tab"
  632. - 组合键: "Control+o", "Shift+Tab", "Alt+F4"
  633. - 功能键: "F1", "F2", ..., "F12"
  634. Returns:
  635. ToolResult: 包含按键操作结果的工具返回对象
  636. Example:
  637. send_keys("Enter")
  638. send_keys("Control+A")
  639. """
  640. try:
  641. browser, tools = await get_browser_session()
  642. result = await tools.send_keys(
  643. keys=keys,
  644. browser_session=browser
  645. )
  646. return action_result_to_tool_result(result, f"发送按键: {keys}")
  647. except Exception as e:
  648. return ToolResult(
  649. title="发送按键失败",
  650. output="",
  651. error=f"Failed to send keys: {str(e)}",
  652. long_term_memory="发送按键失败"
  653. )
  654. @tool()
  655. async def browser_upload_file(index: int, path: str) -> ToolResult:
  656. """
  657. 上传文件到文件输入元素
  658. Upload a file to a file input element
  659. Args:
  660. index: 文件输入框的元素索引
  661. path: 要上传的文件路径(绝对路径)
  662. Returns:
  663. ToolResult: 包含上传操作结果的工具返回对象
  664. Example:
  665. upload_file(index=7, path="/path/to/file.pdf")
  666. Note:
  667. 文件必须存在且路径必须是绝对路径
  668. """
  669. try:
  670. browser, tools = await get_browser_session()
  671. result = await tools.upload_file(
  672. index=index,
  673. path=path,
  674. browser_session=browser,
  675. available_file_paths=[path],
  676. file_system=_file_system
  677. )
  678. return action_result_to_tool_result(result, f"上传文件: {path}")
  679. except Exception as e:
  680. return ToolResult(
  681. title="上传失败",
  682. output="",
  683. error=f"Failed to upload file: {str(e)}",
  684. long_term_memory=f"上传文件 {path} 失败"
  685. )
  686. # ============================================================
  687. # 滚动和视图工具 (Scroll & View Tools)
  688. # ============================================================
  689. @tool()
  690. async def browser_scroll_page(down: bool = True, pages: float = 1.0,
  691. index: Optional[int] = None) -> ToolResult:
  692. """
  693. 滚动页面或元素
  694. Scroll the page or a specific element
  695. Args:
  696. down: True 向下滚动,False 向上滚动
  697. pages: 滚动页数(0.5=半页,1=全页,10=滚动到底部/顶部)
  698. index: 可选,滚动特定元素(如下拉框内部)
  699. Returns:
  700. ToolResult: 滚动结果
  701. Example:
  702. scroll_page(down=True, pages=2.0) # 向下滚动2页
  703. scroll_page(down=False, pages=1.0) # 向上滚动1页
  704. """
  705. try:
  706. browser, tools = await get_browser_session()
  707. result = await tools.scroll(
  708. down=down,
  709. pages=pages,
  710. index=index,
  711. browser_session=browser
  712. )
  713. direction = "向下" if down else "向上"
  714. return action_result_to_tool_result(result, f"{direction}滚动 {pages} 页")
  715. except Exception as e:
  716. return ToolResult(
  717. title="滚动失败",
  718. output="",
  719. error=f"Failed to scroll: {str(e)}",
  720. long_term_memory="滚动失败"
  721. )
  722. @tool()
  723. async def browser_find_text(text: str) -> ToolResult:
  724. """
  725. 查找页面中的文本并滚动到该位置
  726. Find text on the page and scroll to it
  727. 在页面中搜索指定的文本,找到后自动滚动到该位置。
  728. Args:
  729. text: 要查找的文本内容
  730. Returns:
  731. ToolResult: 包含查找结果的工具返回对象
  732. Example:
  733. find_text("Privacy Policy")
  734. """
  735. try:
  736. browser, tools = await get_browser_session()
  737. result = await tools.find_text(
  738. text=text,
  739. browser_session=browser
  740. )
  741. return action_result_to_tool_result(result, f"查找文本: {text}")
  742. except Exception as e:
  743. return ToolResult(
  744. title="查找失败",
  745. output="",
  746. error=f"Failed to find text: {str(e)}",
  747. long_term_memory=f"查找文本 '{text}' 失败"
  748. )
  749. @tool()
  750. async def browser_screenshot() -> ToolResult:
  751. """
  752. 请求在下次观察中包含页面截图
  753. Request a screenshot to be included in the next observation
  754. 用于视觉检查页面状态,帮助理解页面布局和内容。
  755. Returns:
  756. ToolResult: 包含截图请求结果的工具返回对象
  757. Example:
  758. screenshot()
  759. Note:
  760. 截图会在下次页面观察时自动包含在结果中。
  761. """
  762. try:
  763. browser, tools = await get_browser_session()
  764. result = await tools.screenshot(browser_session=browser)
  765. return action_result_to_tool_result(result, "截图请求")
  766. except Exception as e:
  767. return ToolResult(
  768. title="截图失败",
  769. output="",
  770. error=f"Failed to capture screenshot: {str(e)}",
  771. long_term_memory="截图失败"
  772. )
  773. # ============================================================
  774. # 标签页管理工具 (Tab Management Tools)
  775. # ============================================================
  776. @tool()
  777. async def browser_switch_tab(tab_id: str) -> ToolResult:
  778. """
  779. 切换到指定标签页
  780. Switch to a different browser tab
  781. Args:
  782. tab_id: 4字符标签ID(target_id 的最后4位)
  783. Returns:
  784. ToolResult: 切换结果
  785. Example:
  786. switch_tab(tab_id="a3f2")
  787. """
  788. try:
  789. browser, tools = await get_browser_session()
  790. normalized_tab_id = tab_id[-4:] if tab_id else tab_id
  791. result = await tools.switch(
  792. tab_id=normalized_tab_id,
  793. browser_session=browser
  794. )
  795. return action_result_to_tool_result(result, f"切换到标签页 {normalized_tab_id}")
  796. except Exception as e:
  797. return ToolResult(
  798. title="切换标签页失败",
  799. output="",
  800. error=f"Failed to switch tab: {str(e)}",
  801. long_term_memory=f"切换到标签页 {tab_id} 失败"
  802. )
  803. @tool()
  804. async def browser_close_tab(tab_id: str) -> ToolResult:
  805. """
  806. 关闭指定标签页
  807. Close a browser tab
  808. Args:
  809. tab_id: 4字符标签ID
  810. Returns:
  811. ToolResult: 关闭结果
  812. Example:
  813. close_tab(tab_id="a3f2")
  814. """
  815. try:
  816. browser, tools = await get_browser_session()
  817. normalized_tab_id = tab_id[-4:] if tab_id else tab_id
  818. result = await tools.close(
  819. tab_id=normalized_tab_id,
  820. browser_session=browser
  821. )
  822. return action_result_to_tool_result(result, f"关闭标签页 {normalized_tab_id}")
  823. except Exception as e:
  824. return ToolResult(
  825. title="关闭标签页失败",
  826. output="",
  827. error=f"Failed to close tab: {str(e)}",
  828. long_term_memory=f"关闭标签页 {tab_id} 失败"
  829. )
  830. # ============================================================
  831. # 下拉框工具 (Dropdown Tools)
  832. # ============================================================
  833. @tool()
  834. async def browser_get_dropdown_options(index: int) -> ToolResult:
  835. """
  836. 获取下拉框的所有选项
  837. Get options from a dropdown element
  838. Args:
  839. index: 下拉框的元素索引
  840. Returns:
  841. ToolResult: 包含所有选项的结果
  842. Example:
  843. get_dropdown_options(index=8)
  844. """
  845. try:
  846. browser, tools = await get_browser_session()
  847. result = await tools.dropdown_options(
  848. index=index,
  849. browser_session=browser
  850. )
  851. return action_result_to_tool_result(result, f"获取下拉框选项: {index}")
  852. except Exception as e:
  853. return ToolResult(
  854. title="获取下拉框选项失败",
  855. output="",
  856. error=f"Failed to get dropdown options: {str(e)}",
  857. long_term_memory=f"获取下拉框 {index} 选项失败"
  858. )
  859. @tool()
  860. async def browser_select_dropdown_option(index: int, text: str) -> ToolResult:
  861. """
  862. 选择下拉框选项
  863. Select an option from a dropdown
  864. Args:
  865. index: 下拉框的元素索引
  866. text: 要选择的选项文本(精确匹配)
  867. Returns:
  868. ToolResult: 选择结果
  869. Example:
  870. select_dropdown_option(index=8, text="Option 2")
  871. """
  872. try:
  873. browser, tools = await get_browser_session()
  874. result = await tools.select_dropdown(
  875. index=index,
  876. text=text,
  877. browser_session=browser
  878. )
  879. return action_result_to_tool_result(result, f"选择下拉框选项: {text}")
  880. except Exception as e:
  881. return ToolResult(
  882. title="选择下拉框选项失败",
  883. output="",
  884. error=f"Failed to select dropdown option: {str(e)}",
  885. long_term_memory=f"选择选项 '{text}' 失败"
  886. )
  887. # ============================================================
  888. # 内容提取工具 (Content Extraction Tools)
  889. # ============================================================
  890. @tool()
  891. async def browser_extract_content(query: str, extract_links: bool = False,
  892. start_from_char: int = 0) -> ToolResult:
  893. """
  894. 使用 LLM 从页面提取结构化数据
  895. Extract content from the current page using LLM
  896. Args:
  897. query: 提取查询(告诉 LLM 要提取什么内容)
  898. extract_links: 是否提取链接(默认 False,节省 token)
  899. start_from_char: 从哪个字符开始提取(用于分页提取大内容)
  900. Returns:
  901. ToolResult: 提取的内容
  902. Example:
  903. extract_content(query="提取页面上所有产品的名称和价格", extract_links=True)
  904. Note:
  905. 需要配置 page_extraction_llm,否则会失败
  906. 支持分页提取,最大100k字符
  907. """
  908. try:
  909. browser, tools = await get_browser_session()
  910. # 注意:extract 需要 page_extraction_llm 参数
  911. # 这里我们假设用户会在初始化时配置 LLM
  912. # 如果没有配置,会抛出异常
  913. result = await tools.extract(
  914. query=query,
  915. extract_links=extract_links,
  916. start_from_char=start_from_char,
  917. browser_session=browser,
  918. page_extraction_llm=None, # 需要用户配置
  919. file_system=_file_system
  920. )
  921. return action_result_to_tool_result(result, f"提取内容: {query}")
  922. except Exception as e:
  923. return ToolResult(
  924. title="内容提取失败",
  925. output="",
  926. error=f"Failed to extract content: {str(e)}",
  927. long_term_memory=f"提取内容失败: {query}"
  928. )
  929. @tool()
  930. async def browser_get_page_html() -> ToolResult:
  931. """
  932. 获取当前页面的完整 HTML
  933. Get the full HTML of the current page
  934. 返回当前页面的完整 HTML 源代码。
  935. Returns:
  936. ToolResult: 包含页面 HTML 的工具返回对象
  937. Example:
  938. get_page_html()
  939. Note:
  940. - 返回的是完整的 HTML 源代码
  941. - 输出会被限制在 10000 字符以内(完整内容保存在 metadata 中)
  942. """
  943. try:
  944. browser, tools = await get_browser_session()
  945. # 使用 CDP 获取页面 HTML
  946. cdp = await browser.get_or_create_cdp_session()
  947. # 获取页面内容
  948. result = await cdp.cdp_client.send.Runtime.evaluate(
  949. params={'expression': 'document.documentElement.outerHTML'},
  950. session_id=cdp.session_id
  951. )
  952. html = result.get('result', {}).get('value', '')
  953. # 获取 URL 和标题
  954. url = await browser.get_current_page_url()
  955. title_result = await cdp.cdp_client.send.Runtime.evaluate(
  956. params={'expression': 'document.title'},
  957. session_id=cdp.session_id
  958. )
  959. title = title_result.get('result', {}).get('value', '')
  960. # 限制输出大小
  961. output_html = html
  962. if len(html) > 10000:
  963. output_html = html[:10000] + "... (truncated)"
  964. return ToolResult(
  965. title=f"获取 HTML: {url}",
  966. output=f"页面: {title}\nURL: {url}\n\nHTML:\n{output_html}",
  967. long_term_memory=f"获取 HTML: {url}",
  968. metadata={"url": url, "title": title, "html": html}
  969. )
  970. except Exception as e:
  971. return ToolResult(
  972. title="获取 HTML 失败",
  973. output="",
  974. error=f"Failed to get page HTML: {str(e)}",
  975. long_term_memory="获取 HTML 失败"
  976. )
  977. @tool()
  978. async def browser_get_selector_map() -> ToolResult:
  979. """
  980. 获取当前页面的元素索引映射
  981. Get the selector map of interactive elements on the current page
  982. 返回页面所有可交互元素的索引字典,用于后续的元素操作。
  983. Returns:
  984. ToolResult: 包含元素映射的工具返回对象
  985. Example:
  986. get_selector_map()
  987. Note:
  988. 返回的索引可以用于 click_element, input_text 等操作
  989. """
  990. try:
  991. browser, tools = await get_browser_session()
  992. # 获取选择器映射
  993. selector_map = await browser.get_selector_map()
  994. # 构建输出信息
  995. elements_info = []
  996. for index, node in list(selector_map.items())[:20]: # 只显示前20个
  997. tag = node.tag_name
  998. attrs = node.attributes or {}
  999. text = attrs.get('aria-label') or attrs.get('placeholder') or attrs.get('value', '')
  1000. elements_info.append(f"索引 {index}: <{tag}> {text[:50]}")
  1001. output = f"找到 {len(selector_map)} 个交互元素\n\n"
  1002. output += "\n".join(elements_info)
  1003. if len(selector_map) > 20:
  1004. output += f"\n... 还有 {len(selector_map) - 20} 个元素"
  1005. return ToolResult(
  1006. title="获取元素映射",
  1007. output=output,
  1008. long_term_memory=f"获取到 {len(selector_map)} 个交互元素",
  1009. metadata={"selector_map": {k: str(v) for k, v in list(selector_map.items())[:100]}}
  1010. )
  1011. except Exception as e:
  1012. return ToolResult(
  1013. title="获取元素映射失败",
  1014. output="",
  1015. error=f"Failed to get selector map: {str(e)}",
  1016. long_term_memory="获取元素映射失败"
  1017. )
  1018. # ============================================================
  1019. # JavaScript 执行工具 (JavaScript Tools)
  1020. # ============================================================
  1021. @tool()
  1022. async def browser_evaluate(code: str) -> ToolResult:
  1023. """
  1024. 在页面中执行 JavaScript 代码
  1025. Execute JavaScript code in the page context
  1026. 允许在当前页面中执行任意 JavaScript 代码,用于复杂的页面操作或数据提取。
  1027. Args:
  1028. code: 要执行的 JavaScript 代码字符串
  1029. Returns:
  1030. ToolResult: 包含执行结果的工具返回对象
  1031. Example:
  1032. evaluate("document.title")
  1033. evaluate("document.querySelectorAll('a').length")
  1034. Note:
  1035. - 代码在页面上下文中执行,可以访问 DOM 和全局变量
  1036. - 返回值会被自动序列化为字符串
  1037. - 执行结果限制在 20k 字符以内
  1038. """
  1039. try:
  1040. browser, tools = await get_browser_session()
  1041. result = await tools.evaluate(
  1042. code=code,
  1043. browser_session=browser
  1044. )
  1045. return action_result_to_tool_result(result, "执行 JavaScript")
  1046. except Exception as e:
  1047. return ToolResult(
  1048. title="JavaScript 执行失败",
  1049. output="",
  1050. error=f"Failed to execute JavaScript: {str(e)}",
  1051. long_term_memory="JavaScript 执行失败"
  1052. )
  1053. @tool()
  1054. async def browser_ensure_login_with_cookies(cookie_type: str, url: str = "https://www.xiaohongshu.com") -> ToolResult:
  1055. """
  1056. 检查登录状态并在需要时注入 cookies
  1057. """
  1058. try:
  1059. browser, tools = await get_browser_session()
  1060. if url:
  1061. await tools.navigate(url=url, browser_session=browser)
  1062. await tools.wait(seconds=2, browser_session=browser)
  1063. check_login_js = """
  1064. (function() {
  1065. const loginBtn = document.querySelector('[class*="login"]') ||
  1066. document.querySelector('[href*="login"]') ||
  1067. Array.from(document.querySelectorAll('button, a')).find(el => (el.textContent || '').includes('登录'));
  1068. const userInfo = document.querySelector('[class*="user"]') ||
  1069. document.querySelector('[class*="avatar"]');
  1070. return {
  1071. needLogin: !!loginBtn && !userInfo,
  1072. hasLoginBtn: !!loginBtn,
  1073. hasUserInfo: !!userInfo
  1074. };
  1075. })()
  1076. """
  1077. result = await tools.evaluate(code=check_login_js, browser_session=browser)
  1078. status_output = result.extracted_content
  1079. if isinstance(status_output, str) and status_output.startswith("Result: "):
  1080. status_output = status_output[8:]
  1081. login_info: Dict[str, Any] = {}
  1082. if isinstance(status_output, str):
  1083. try:
  1084. login_info = json.loads(status_output)
  1085. except Exception:
  1086. login_info = {}
  1087. elif isinstance(status_output, dict):
  1088. login_info = status_output
  1089. if not login_info.get("needLogin"):
  1090. output = json.dumps({"need_login": False}, ensure_ascii=False)
  1091. return ToolResult(
  1092. title="已登录",
  1093. output=output,
  1094. long_term_memory=output
  1095. )
  1096. row = _fetch_cookie_row(cookie_type)
  1097. cookie_value = _extract_cookie_value(row)
  1098. if not cookie_value:
  1099. output = json.dumps({"need_login": True, "cookies_count": 0}, ensure_ascii=False)
  1100. return ToolResult(
  1101. title="未找到 cookies",
  1102. output=output,
  1103. error="未找到 cookies",
  1104. long_term_memory=output
  1105. )
  1106. domain, base_url = _cookie_domain_for_type(cookie_type, url)
  1107. cookies = _normalize_cookies(cookie_value, domain, base_url)
  1108. if not cookies:
  1109. output = json.dumps({"need_login": True, "cookies_count": 0}, ensure_ascii=False)
  1110. return ToolResult(
  1111. title="cookies 解析失败",
  1112. output=output,
  1113. error="cookies 解析失败",
  1114. long_term_memory=output
  1115. )
  1116. await browser._cdp_set_cookies(cookies)
  1117. if url:
  1118. await tools.navigate(url=url, browser_session=browser)
  1119. await tools.wait(seconds=2, browser_session=browser)
  1120. output = json.dumps({"need_login": True, "cookies_count": len(cookies)}, ensure_ascii=False)
  1121. return ToolResult(
  1122. title="已注入 cookies",
  1123. output=output,
  1124. long_term_memory=output
  1125. )
  1126. except Exception as e:
  1127. return ToolResult(
  1128. title="登录检查失败",
  1129. output="",
  1130. error=str(e),
  1131. long_term_memory="登录检查失败"
  1132. )
  1133. # ============================================================
  1134. # 等待用户操作工具 (Wait for User Action)
  1135. # ============================================================
  1136. @tool()
  1137. async def browser_wait_for_user_action(message: str = "Please complete the action in browser",
  1138. timeout: int = 300) -> ToolResult:
  1139. """
  1140. 等待用户在浏览器中完成操作(如登录)
  1141. Wait for user to complete an action in the browser (e.g., login)
  1142. 暂停自动化流程,等待用户手动完成某些操作(如登录、验证码等)。
  1143. Args:
  1144. message: 提示用户需要完成的操作
  1145. timeout: 最大等待时间(秒),默认 300 秒(5 分钟)
  1146. Returns:
  1147. ToolResult: 包含等待结果的工具返回对象
  1148. Example:
  1149. wait_for_user_action("Please login to Xiaohongshu", timeout=180)
  1150. wait_for_user_action("Please complete the CAPTCHA", timeout=60)
  1151. Note:
  1152. - 用户需要在浏览器窗口中手动完成操作
  1153. - 完成后按回车键继续
  1154. - 超时后会自动继续执行
  1155. """
  1156. try:
  1157. import asyncio
  1158. print(f"\n{'='*60}")
  1159. print(f"⏸️ WAITING FOR USER ACTION")
  1160. print(f"{'='*60}")
  1161. print(f"📝 {message}")
  1162. print(f"⏱️ Timeout: {timeout} seconds")
  1163. print(f"\n👉 Please complete the action in the browser window")
  1164. print(f"👉 Press ENTER when done, or wait for timeout")
  1165. print(f"{'='*60}\n")
  1166. # Wait for user input or timeout
  1167. try:
  1168. loop = asyncio.get_event_loop()
  1169. # Wait for either user input or timeout
  1170. await asyncio.wait_for(
  1171. loop.run_in_executor(None, input),
  1172. timeout=timeout
  1173. )
  1174. return ToolResult(
  1175. title="用户操作完成",
  1176. output=f"User completed: {message}",
  1177. long_term_memory=f"用户完成操作: {message}"
  1178. )
  1179. except asyncio.TimeoutError:
  1180. return ToolResult(
  1181. title="用户操作超时",
  1182. output=f"Timeout waiting for: {message}",
  1183. long_term_memory=f"等待用户操作超时: {message}"
  1184. )
  1185. except Exception as e:
  1186. return ToolResult(
  1187. title="等待用户操作失败",
  1188. output="",
  1189. error=f"Failed to wait for user action: {str(e)}",
  1190. long_term_memory="等待用户操作失败"
  1191. )
  1192. # ============================================================
  1193. # 任务完成工具 (Task Completion)
  1194. # ============================================================
  1195. @tool()
  1196. async def browser_done(text: str, success: bool = True,
  1197. files_to_display: Optional[List[str]] = None) -> ToolResult:
  1198. """
  1199. 标记任务完成并返回最终消息
  1200. Mark the task as complete and return final message to user
  1201. Args:
  1202. text: 给用户的最终消息
  1203. success: 任务是否成功完成
  1204. files_to_display: 可选的要显示的文件路径列表
  1205. Returns:
  1206. ToolResult: 完成结果
  1207. Example:
  1208. done("任务已完成,提取了10个产品信息", success=True)
  1209. """
  1210. try:
  1211. browser, tools = await get_browser_session()
  1212. result = await tools.done(
  1213. text=text,
  1214. success=success,
  1215. files_to_display=files_to_display,
  1216. file_system=_file_system
  1217. )
  1218. return action_result_to_tool_result(result, "任务完成")
  1219. except Exception as e:
  1220. return ToolResult(
  1221. title="标记任务完成失败",
  1222. output="",
  1223. error=f"Failed to complete task: {str(e)}",
  1224. long_term_memory="标记任务完成失败"
  1225. )
  1226. # ============================================================
  1227. # 导出所有工具函数(供外部使用)
  1228. # ============================================================
  1229. __all__ = [
  1230. # 会话管理
  1231. 'init_browser_session',
  1232. 'get_browser_session',
  1233. 'cleanup_browser_session',
  1234. 'kill_browser_session',
  1235. # 导航类工具
  1236. 'browser_navigate_to_url',
  1237. 'browser_search_web',
  1238. 'browser_go_back',
  1239. 'browser_wait',
  1240. # 元素交互工具
  1241. 'browser_click_element',
  1242. 'browser_input_text',
  1243. 'browser_send_keys',
  1244. 'browser_upload_file',
  1245. # 滚动和视图工具
  1246. 'browser_scroll_page',
  1247. 'browser_find_text',
  1248. 'browser_screenshot',
  1249. # 标签页管理工具
  1250. 'browser_switch_tab',
  1251. 'browser_close_tab',
  1252. # 下拉框工具
  1253. 'browser_get_dropdown_options',
  1254. 'browser_select_dropdown_option',
  1255. # 内容提取工具
  1256. 'browser_extract_content',
  1257. 'browser_get_page_html',
  1258. 'browser_get_selector_map',
  1259. # JavaScript 执行工具
  1260. 'browser_evaluate',
  1261. 'browser_ensure_login_with_cookies',
  1262. # 等待用户操作
  1263. 'browser_wait_for_user_action',
  1264. # 任务完成
  1265. 'browser_done',
  1266. ]