baseClass.py 87 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494
  1. """
  2. Browser-Use 原生工具适配器
  3. Native Browser-Use Tools Adapter
  4. 直接使用 browser-use 的原生类(BrowserSession, Tools)实现所有浏览器操作工具。
  5. 不依赖 Playwright,完全基于 CDP 协议。
  6. 核心特性:
  7. 1. 浏览器会话持久化 - 只启动一次浏览器
  8. 2. 状态自动保持 - 登录状态、Cookie、LocalStorage 等
  9. 3. 完整的底层访问 - 可以直接使用 CDP 协议
  10. 4. 性能优异 - 避免频繁创建/销毁浏览器实例
  11. 5. 多种浏览器类型 - 支持 local、cloud、container 三种模式
  12. 支持的浏览器类型:
  13. 1. Local (本地浏览器):
  14. - 在本地运行 Chrome
  15. - 支持可视化调试
  16. - 速度最快
  17. - 示例: init_browser_session(browser_type="local")
  18. 2. Cloud (云浏览器):
  19. - 在云端运行
  20. - 不占用本地资源
  21. - 适合生产环境
  22. - 示例: init_browser_session(browser_type="cloud")
  23. 3. Container (容器浏览器):
  24. - 在独立容器中运行
  25. - 隔离性好
  26. - 支持预配置账户
  27. - 示例: init_browser_session(browser_type="container", container_url="https://example.com")
  28. 使用方法:
  29. 1. 在 Agent 初始化时调用 init_browser_session() 并指定 browser_type
  30. 2. 使用各个工具函数执行浏览器操作
  31. 3. 任务结束时调用 cleanup_browser_session()
  32. 文件操作说明:
  33. - 浏览器专用文件目录:.cache/.browser_use_files/ (在当前工作目录下)
  34. 用于存储浏览器会话产生的临时文件(下载、上传、截图等)
  35. - 一般文件操作:请使用 agent.tools.builtin 中的文件工具 (read_file, write_file, edit_file)
  36. 这些工具功能更完善,支持diff预览、智能匹配、分页读取等
  37. """
  38. import logging
  39. import sys
  40. import os
  41. import json
  42. import httpx
  43. import asyncio
  44. import aiohttp
  45. import re
  46. import base64
  47. from urllib.parse import urlparse, parse_qs, unquote
  48. from typing import Literal, Optional, List, Dict, Any, Tuple, Union
  49. from pathlib import Path
  50. from langchain_core.runnables import RunnableLambda
  51. from argparse import Namespace # 使用 Namespace 快速构造带属性的对象
  52. from langchain_core.messages import AIMessage
  53. from ....llm.qwen import qwen_llm_call
  54. # 将项目根目录添加到 Python 路径
  55. sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  56. # 配置日志
  57. logger = logging.getLogger(__name__)
  58. # 导入框架的工具装饰器和结果类
  59. from agent.tools import tool, ToolResult
  60. from agent.tools.builtin.browser.sync_mysql_help import mysql
  61. # 导入 browser-use 的核心类
  62. from browser_use import BrowserSession, BrowserProfile
  63. from browser_use.tools.service import Tools
  64. try:
  65. from browser_use.tools.views import ReadContentAction # type: ignore
  66. except Exception:
  67. from pydantic import BaseModel
  68. class ReadContentAction(BaseModel):
  69. goal: str
  70. source: str = "page"
  71. context: str = ""
  72. from browser_use.agent.views import ActionResult
  73. from browser_use.filesystem.file_system import FileSystem
  74. # ============================================================
  75. # 无需注册的内部辅助函数
  76. # ============================================================
  77. # ============================================================
  78. # 全局浏览器会话管理
  79. # ============================================================
  80. # 全局变量:浏览器会话和工具实例
  81. _browser_session: Optional[BrowserSession] = None
  82. _browser_tools: Optional[Tools] = None
  83. _file_system: Optional[FileSystem] = None
  84. _last_browser_type: str = "local"
  85. _last_headless: bool = True
  86. _live_url: Optional[str] = None
  87. async def create_container(url: str, account_name: str = "liuwenwu") -> Dict[str, Any]:
  88. """
  89. 创建浏览器容器并导航到指定URL
  90. 按照 test.md 的要求:
  91. 1.1 调用接口创建容器
  92. 1.2 调用接口创建窗口并导航到URL
  93. Args:
  94. url: 要导航的URL地址
  95. account_name: 账户名称
  96. Returns:
  97. 包含容器信息的字典:
  98. - success: 是否成功
  99. - container_id: 容器ID
  100. - vnc: VNC访问URL
  101. - cdp: CDP协议URL(用于浏览器连接)
  102. - connection_id: 窗口连接ID
  103. - error: 错误信息(如果失败)
  104. """
  105. result = {
  106. "success": False,
  107. "container_id": None,
  108. "vnc": None,
  109. "cdp": None,
  110. "connection_id": None,
  111. "error": None
  112. }
  113. try:
  114. async with aiohttp.ClientSession() as session:
  115. # 步骤1.1: 创建容器
  116. print("📦 步骤1.1: 创建容器...")
  117. create_url = "http://47.84.182.56:8200/api/v1/container/create"
  118. create_payload = {
  119. "auto_remove": True,
  120. "need_port_binding": True,
  121. "max_lifetime_seconds": 900
  122. }
  123. async with session.post(create_url, json=create_payload) as resp:
  124. if resp.status != 200:
  125. raise RuntimeError(f"创建容器失败: HTTP {resp.status}")
  126. create_result = await resp.json()
  127. if create_result.get("code") != 0:
  128. raise RuntimeError(f"创建容器失败: {create_result.get('msg')}")
  129. data = create_result.get("data", {})
  130. result["container_id"] = data.get("container_id")
  131. result["vnc"] = data.get("vnc")
  132. result["cdp"] = data.get("cdp")
  133. print(f"✅ 容器创建成功")
  134. print(f" Container ID: {result['container_id']}")
  135. print(f" VNC: {result['vnc']}")
  136. print(f" CDP: {result['cdp']}")
  137. # 等待容器内的浏览器启动
  138. print(f"\n⏳ 等待容器内浏览器启动...")
  139. await asyncio.sleep(5)
  140. # 步骤1.2: 创建页面并导航
  141. print(f"\n📱 步骤1.2: 创建页面并导航到 {url}...")
  142. page_create_url = "http://47.84.182.56:8200/api/v1/browser/page/create"
  143. page_payload = {
  144. "container_id": result["container_id"],
  145. "url": url,
  146. "account_name": account_name,
  147. "need_wait": True,
  148. "timeout": 30
  149. }
  150. # 重试机制:最多尝试3次
  151. max_retries = 3
  152. page_created = False
  153. last_error = None
  154. for attempt in range(max_retries):
  155. try:
  156. if attempt > 0:
  157. print(f" 重试 {attempt + 1}/{max_retries}...")
  158. await asyncio.sleep(3) # 重试前等待
  159. async with session.post(page_create_url, json=page_payload, timeout=aiohttp.ClientTimeout(total=60)) as resp:
  160. if resp.status != 200:
  161. response_text = await resp.text()
  162. last_error = f"HTTP {resp.status}: {response_text[:200]}"
  163. continue
  164. page_result = await resp.json()
  165. if page_result.get("code") != 0:
  166. last_error = f"{page_result.get('msg')}"
  167. continue
  168. page_data = page_result.get("data", {})
  169. result["connection_id"] = page_data.get("connection_id")
  170. result["success"] = True
  171. page_created = True
  172. print(f"✅ 页面创建成功")
  173. print(f" Connection ID: {result['connection_id']}")
  174. break
  175. except asyncio.TimeoutError:
  176. last_error = "请求超时"
  177. continue
  178. except aiohttp.ClientError as e:
  179. last_error = f"网络错误: {str(e)}"
  180. continue
  181. except Exception as e:
  182. last_error = f"未知错误: {str(e)}"
  183. continue
  184. if not page_created:
  185. raise RuntimeError(f"创建页面失败(尝试{max_retries}次后): {last_error}")
  186. except Exception as e:
  187. result["error"] = str(e)
  188. print(f"❌ 错误: {str(e)}")
  189. return result
  190. async def init_browser_session(
  191. browser_type: str = "local",
  192. headless: bool = False,
  193. url: Optional[str] = None,
  194. profile_name: str = "default",
  195. user_data_dir: Optional[str] = None,
  196. browser_profile: Optional[BrowserProfile] = None,
  197. **kwargs
  198. ) -> tuple[BrowserSession, Tools]:
  199. global _browser_session, _browser_tools, _file_system, _last_browser_type, _last_headless, _live_url
  200. if _browser_session is not None:
  201. return _browser_session, _browser_tools
  202. _last_browser_type = browser_type
  203. _last_headless = headless
  204. valid_types = ["local", "cloud", "container"]
  205. if browser_type not in valid_types:
  206. raise ValueError(f"无效的 browser_type: {browser_type}")
  207. # --- 核心:定义本地统一存储路径 ---
  208. save_dir = Path.cwd() / ".cache/.browser_use_files"
  209. save_dir.mkdir(parents=True, exist_ok=True)
  210. # 基础参数配置
  211. session_params = {
  212. "headless": headless,
  213. # 告诉 Playwright 所有的下载临时流先存入此本地目录
  214. "downloads_path": str(save_dir),
  215. }
  216. if browser_type == "container":
  217. print("🐳 使用容器浏览器模式")
  218. if not url: url = "about:blank"
  219. container_info = await create_container(url=url, account_name=profile_name)
  220. if not container_info["success"]:
  221. raise RuntimeError(f"容器创建失败: {container_info['error']}")
  222. session_params["cdp_url"] = container_info["cdp"]
  223. await asyncio.sleep(3)
  224. elif browser_type == "cloud":
  225. print("🌐 使用云浏览器模式")
  226. session_params["use_cloud"] = True
  227. if profile_name and profile_name != "default":
  228. session_params["cloud_profile_id"] = profile_name
  229. else: # local
  230. print("💻 使用本地浏览器模式")
  231. session_params["is_local"] = True
  232. if user_data_dir is None and profile_name:
  233. user_data_dir = str(Path.home() / ".browser_use" / "profiles" / profile_name)
  234. Path(user_data_dir).mkdir(parents=True, exist_ok=True)
  235. session_params["user_data_dir"] = user_data_dir
  236. # macOS 路径兼容
  237. import platform
  238. if platform.system() == "Darwin":
  239. chrome_path = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
  240. if Path(chrome_path).exists():
  241. session_params["executable_path"] = chrome_path
  242. if browser_profile:
  243. session_params["browser_profile"] = browser_profile
  244. session_params.update(kwargs)
  245. # 创建会话
  246. _browser_session = BrowserSession(**session_params)
  247. # 添加短暂延迟,确保 Chrome CDP 端点完全就绪
  248. await asyncio.sleep(1)
  249. await _browser_session.start()
  250. _browser_tools = Tools()
  251. _file_system = FileSystem(base_dir=str(save_dir))
  252. print(f"✅ 浏览器会话初始化成功 | 默认下载路径: {save_dir}")
  253. # 云浏览器:捕获 live URL
  254. if browser_type == "cloud":
  255. import urllib.parse
  256. cdp_url = getattr(_browser_session, 'cdp_url', '') or ''
  257. if 'browser-use.com' in cdp_url:
  258. # 从 cdp_url (wss://xxx.cdp1.browser-use.com/...) 提取主机名,用 https:// 拼接
  259. parsed = urllib.parse.urlparse(cdp_url)
  260. host_url = f"https://{parsed.hostname}"
  261. _live_url = f"https://live.browser-use.com?wss={urllib.parse.quote(host_url)}"
  262. print(f"📡 实时画面链接: {_live_url}")
  263. if browser_type in ["local", "cloud"] and url:
  264. await _browser_tools.navigate(url=url, browser_session=_browser_session)
  265. return _browser_session, _browser_tools
  266. def get_browser_live_url() -> Optional[str]:
  267. """获取云浏览器的实时画面链接"""
  268. return _live_url
  269. async def get_browser_session() -> tuple[BrowserSession, Tools]:
  270. """
  271. 获取当前浏览器会话,如果不存在或连接已断开则自动重新创建
  272. Returns:
  273. (BrowserSession, Tools) 元组
  274. """
  275. global _browser_session, _browser_tools, _file_system
  276. if _browser_session is not None:
  277. # 检查底层 CDP 连接是否仍然存活
  278. # 当 runner.stop() 暂停后用户在菜单停留较久,WebSocket 可能超时断开,
  279. # 但 _browser_session 对象仍然存在,导致后续操作抛出 ConnectionClosedError
  280. alive = False
  281. try:
  282. cdp_root = getattr(_browser_session, '_cdp_client_root', None)
  283. sess_mgr = getattr(_browser_session, 'session_manager', None)
  284. if cdp_root is not None and sess_mgr is not None:
  285. cdp_session = await _browser_session.get_or_create_cdp_session()
  286. await asyncio.wait_for(
  287. cdp_session.cdp_client.send.Runtime.evaluate(
  288. params={'expression': '1+1'},
  289. session_id=cdp_session.session_id
  290. ),
  291. timeout=3.0,
  292. )
  293. alive = True
  294. except Exception:
  295. pass
  296. if not alive:
  297. print("⚠️ 浏览器会话连接已断开,正在重新初始化...")
  298. try:
  299. await cleanup_browser_session()
  300. except Exception:
  301. _browser_session = None
  302. _browser_tools = None
  303. _file_system = None
  304. if _browser_session is None:
  305. await init_browser_session(browser_type=_last_browser_type, headless=_last_headless)
  306. return _browser_session, _browser_tools
  307. async def cleanup_browser_session():
  308. """
  309. 清理浏览器会话
  310. 优雅地停止浏览器但保留会话状态
  311. """
  312. global _browser_session, _browser_tools, _file_system
  313. if _browser_session is not None:
  314. await _browser_session.stop()
  315. _browser_session = None
  316. _browser_tools = None
  317. _file_system = None
  318. async def kill_browser_session():
  319. """
  320. 强制终止浏览器会话
  321. 完全关闭浏览器进程
  322. """
  323. global _browser_session, _browser_tools, _file_system
  324. if _browser_session is not None:
  325. await _browser_session.kill()
  326. _browser_session = None
  327. _browser_tools = None
  328. _file_system = None
  329. # ============================================================
  330. # 辅助函数:ActionResult 转 ToolResult
  331. # ============================================================
  332. def action_result_to_tool_result(result: ActionResult, title: str = None) -> ToolResult:
  333. """
  334. 将 browser-use 的 ActionResult 转换为框架的 ToolResult
  335. Args:
  336. result: browser-use 的 ActionResult
  337. title: 可选的标题(如果不提供则从 result 推断)
  338. Returns:
  339. ToolResult
  340. """
  341. if result.error:
  342. return ToolResult(
  343. title=title or "操作失败",
  344. output="",
  345. error=result.error,
  346. long_term_memory=result.long_term_memory or result.error
  347. )
  348. return ToolResult(
  349. title=title or "操作成功",
  350. output=result.extracted_content or "",
  351. long_term_memory=result.long_term_memory or result.extracted_content or "",
  352. metadata=result.metadata or {}
  353. )
  354. def _cookie_domain_for_type(cookie_type: str, url: str) -> Tuple[str, str]:
  355. if cookie_type:
  356. key = cookie_type.lower()
  357. if key in {"xiaohongshu", "xhs"}:
  358. return ".xiaohongshu.com", "https://www.xiaohongshu.com"
  359. parsed = urlparse(url or "")
  360. domain = parsed.netloc or ""
  361. domain = domain.replace("www.", "")
  362. if domain:
  363. domain = f".{domain}"
  364. base_url = f"{parsed.scheme}://{parsed.netloc}" if parsed.scheme and parsed.netloc else url
  365. return domain, base_url
  366. def _parse_cookie_string(cookie_str: str, domain: str, url: str) -> List[Dict[str, Any]]:
  367. cookies: List[Dict[str, Any]] = []
  368. if not cookie_str:
  369. return cookies
  370. parts = cookie_str.split(";")
  371. for part in parts:
  372. if not part:
  373. continue
  374. if "=" not in part:
  375. continue
  376. name, value = part.split("=", 1)
  377. cookie = {
  378. "name": str(name).strip(),
  379. "value": str(value).strip(),
  380. "domain": domain,
  381. "path": "/",
  382. "expires": -1,
  383. "httpOnly": False,
  384. "secure": True,
  385. "sameSite": "None"
  386. }
  387. if url:
  388. cookie["url"] = url
  389. cookies.append(cookie)
  390. return cookies
  391. def _normalize_cookies(cookie_value: Any, domain: str, url: str) -> List[Dict[str, Any]]:
  392. if cookie_value is None:
  393. return []
  394. if isinstance(cookie_value, list):
  395. return cookie_value
  396. if isinstance(cookie_value, dict):
  397. if "cookies" in cookie_value:
  398. return _normalize_cookies(cookie_value.get("cookies"), domain, url)
  399. if "name" in cookie_value and "value" in cookie_value:
  400. return [cookie_value]
  401. return []
  402. if isinstance(cookie_value, (bytes, bytearray)):
  403. cookie_value = cookie_value.decode("utf-8", errors="ignore")
  404. if isinstance(cookie_value, str):
  405. text = cookie_value.strip()
  406. if not text:
  407. return []
  408. try:
  409. parsed = json.loads(text)
  410. except Exception:
  411. parsed = None
  412. if parsed is not None:
  413. return _normalize_cookies(parsed, domain, url)
  414. return _parse_cookie_string(text, domain, url)
  415. return []
  416. def _extract_cookie_value(row: Optional[Dict[str, Any]]) -> Any:
  417. if not row:
  418. return None
  419. # 优先使用 cookies 字段
  420. if "cookies" in row:
  421. return row["cookies"]
  422. # 兼容其他可能的字段名
  423. for key, value in row.items():
  424. if "cookie" in key.lower():
  425. return value
  426. return None
  427. def _fetch_cookie_row(cookie_type: str) -> Optional[Dict[str, Any]]:
  428. if not cookie_type:
  429. return None
  430. try:
  431. return mysql.fetchone(
  432. "select * from agent_channel_cookies where type=%s limit 1",
  433. (cookie_type,)
  434. )
  435. except Exception:
  436. return None
  437. def _fetch_profile_id(cookie_type: str) -> Optional[str]:
  438. """从数据库获取 cloud_profile_id"""
  439. if not cookie_type:
  440. return None
  441. try:
  442. row = mysql.fetchone(
  443. "select profileId from agent_channel_cookies where type=%s limit 1",
  444. (cookie_type,)
  445. )
  446. if row and "profileId" in row:
  447. return row["profileId"]
  448. return None
  449. except Exception:
  450. return None
  451. # ============================================================
  452. # 需要注册的工具
  453. # ============================================================
  454. # ============================================================
  455. # 导航类工具 (Navigation Tools)
  456. # ============================================================
  457. async def browser_get_live_url() -> ToolResult:
  458. """
  459. 获取云浏览器的实时画面链接(Live URL),可用于在本地浏览器中查看或分享给他人操作。
  460. 仅在云浏览器模式下有效,本地浏览器返回空。
  461. """
  462. url = get_browser_live_url()
  463. if url:
  464. return ToolResult(
  465. title="云浏览器实时画面链接",
  466. output=url,
  467. metadata={"live_url": url}
  468. )
  469. return ToolResult(
  470. title="无可用链接",
  471. output="当前未使用云浏览器,或浏览器尚未初始化",
  472. )
  473. async def browser_navigate_to_url(url: str, new_tab: bool = False) -> ToolResult:
  474. """
  475. 导航到指定的 URL
  476. Navigate to a specific URL
  477. 使用 browser-use 的原生导航功能,支持在新标签页打开。
  478. Args:
  479. url: 要访问的 URL 地址
  480. new_tab: 是否在新标签页中打开(默认 False)
  481. Returns:
  482. ToolResult: 包含导航结果的工具返回对象
  483. Example:
  484. navigate_to_url("https://www.baidu.com")
  485. navigate_to_url("https://www.google.com", new_tab=True)
  486. """
  487. try:
  488. browser, tools = await get_browser_session()
  489. # 使用 browser-use 的 navigate 工具
  490. result = await tools.navigate(
  491. url=url,
  492. new_tab=new_tab,
  493. browser_session=browser
  494. )
  495. return action_result_to_tool_result(result, f"导航到 {url}")
  496. except Exception as e:
  497. return ToolResult(
  498. title="导航失败",
  499. output="",
  500. error=f"Failed to navigate to {url}: {str(e)}",
  501. long_term_memory=f"导航到 {url} 失败"
  502. )
  503. async def browser_search_web(query: str, engine: str = "bing") -> ToolResult:
  504. """
  505. 使用搜索引擎搜索
  506. Search the web using a search engine
  507. Args:
  508. query: 搜索关键词
  509. engine: 搜索引擎 (google, duckduckgo, bing) - 默认: google
  510. Returns:
  511. ToolResult: 搜索结果
  512. Example:
  513. search_web("Python async programming", engine="google")
  514. """
  515. try:
  516. browser, tools = await get_browser_session()
  517. # 使用 browser-use 的 search 工具
  518. result = await tools.search(
  519. query=query,
  520. engine=engine,
  521. browser_session=browser
  522. )
  523. return action_result_to_tool_result(result, f"搜索: {query}")
  524. except Exception as e:
  525. return ToolResult(
  526. title="搜索失败",
  527. output="",
  528. error=f"Search failed: {str(e)}",
  529. long_term_memory=f"搜索 '{query}' 失败"
  530. )
  531. async def browser_go_back() -> ToolResult:
  532. """
  533. 返回到上一个页面
  534. Go back to the previous page
  535. 模拟浏览器的"后退"按钮功能。
  536. Returns:
  537. ToolResult: 包含返回操作结果的工具返回对象
  538. """
  539. try:
  540. browser, tools = await get_browser_session()
  541. result = await tools.go_back(browser_session=browser)
  542. return action_result_to_tool_result(result, "返回上一页")
  543. except Exception as e:
  544. return ToolResult(
  545. title="返回失败",
  546. output="",
  547. error=f"Failed to go back: {str(e)}",
  548. long_term_memory="返回上一页失败"
  549. )
  550. async def browser_wait_impl(seconds: int = 3) -> ToolResult:
  551. """
  552. 等待指定的秒数(内部实现)
  553. Wait for a specified number of seconds
  554. 用于等待页面加载、动画完成或其他异步操作。
  555. Args:
  556. seconds: 等待时间(秒),最大30秒
  557. Returns:
  558. ToolResult: 包含等待操作结果的工具返回对象
  559. Example:
  560. wait(5) # 等待5秒
  561. """
  562. try:
  563. browser, tools = await get_browser_session()
  564. result = await tools.wait(seconds=seconds, browser_session=browser)
  565. return action_result_to_tool_result(result, f"等待 {seconds} 秒")
  566. except Exception as e:
  567. return ToolResult(
  568. title="等待失败",
  569. output="",
  570. error=f"Failed to wait: {str(e)}",
  571. long_term_memory="等待失败"
  572. )
  573. # ============================================================
  574. # 元素交互工具 (Element Interaction Tools)
  575. # ============================================================
  576. # 定义一个专门捕获下载链接的 Handler
  577. class DownloadLinkCaptureHandler(logging.Handler):
  578. def __init__(self):
  579. super().__init__()
  580. self.captured_url = None
  581. def emit(self, record):
  582. # 如果已经捕获到了(通常第一条是最完整的),就不再处理后续日志
  583. if self.captured_url:
  584. return
  585. message = record.getMessage()
  586. # 寻找包含下载信息的日志
  587. if "redirection?filename=" in message or "Failed to download" in message:
  588. # 使用更严格的正则,确保不抓取带省略号(...)的截断链接
  589. # 排除掉末尾带有三个点的干扰
  590. match = re.search(r"https?://[^\s]+(?!\.\.\.)", message)
  591. if match:
  592. url = match.group(0)
  593. # 再次过滤:如果发现提取出的 URL 确实包含三个点,说明依然抓到了截断版,跳过
  594. if "..." not in url:
  595. self.captured_url = url
  596. # print(f"🎯 成功锁定完整直链: {url[:50]}...") # 调试用
  597. async def browser_download_direct_url(url: str, save_name: str = "book.epub") -> ToolResult:
  598. save_dir = Path.cwd() / ".cache/.browser_use_files"
  599. save_dir.mkdir(parents=True, exist_ok=True)
  600. # 提取域名作为 Referer,这能骗过 90% 的防盗链校验
  601. from urllib.parse import urlparse
  602. parsed_url = urlparse(url)
  603. base_url = f"{parsed_url.scheme}://{parsed_url.netloc}/"
  604. # 如果没传 save_name,自动从 URL 获取
  605. if not save_name:
  606. import unquote
  607. # 尝试从 URL 路径获取文件名并解码(处理中文)
  608. save_name = Path(urlparse(url).path).name or f"download_{int(time.time())}"
  609. save_name = unquote(save_name)
  610. target_path = save_dir / save_name
  611. headers = {
  612. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
  613. "Accept": "*/*",
  614. "Referer": base_url, # 动态设置 Referer
  615. "Range": "bytes=0-", # 有时对大文件下载有奇效
  616. }
  617. try:
  618. print(f"🚀 开始下载: {url[:60]}...")
  619. # 使用 follow_redirects=True 处理链接中的 redirection
  620. async with httpx.AsyncClient(headers=headers, follow_redirects=True, timeout=60.0) as client:
  621. async with client.stream("GET", url) as response:
  622. if response.status_code != 200:
  623. print(f"❌ 下载失败,HTTP 状态码: {response.status_code}")
  624. return
  625. # 获取实际文件名(如果服务器提供了)
  626. # 这里会优先使用你指定的 save_name
  627. with open(target_path, "wb") as f:
  628. downloaded_bytes = 0
  629. async for chunk in response.aiter_bytes():
  630. f.write(chunk)
  631. downloaded_bytes += len(chunk)
  632. if downloaded_bytes % (1024 * 1024) == 0: # 每下载 1MB 打印一次
  633. print(f"📥 已下载: {downloaded_bytes // (1024 * 1024)} MB")
  634. print(f"✅ 下载完成!文件已存至: {target_path}")
  635. success_msg = f"✅ 下载完成!文件已存至: {target_path}"
  636. return ToolResult(
  637. title="直链下载成功",
  638. output=success_msg,
  639. long_term_memory=success_msg,
  640. metadata={"path": str(target_path)}
  641. )
  642. except Exception as e:
  643. # 异常捕获返回
  644. return ToolResult(
  645. title="下载异常",
  646. output="",
  647. error=f"💥 发生错误: {str(e)}",
  648. long_term_memory=f"下载任务由于异常中断: {str(e)}"
  649. )
  650. async def browser_click_element(index: int) -> ToolResult:
  651. """
  652. 点击页面元素,并自动通过拦截内部日志获取下载直链。
  653. """
  654. # 1. 挂载日志窃听器
  655. capture_handler = DownloadLinkCaptureHandler()
  656. logger = logging.getLogger("browser_use") # 拦截整个 browser_use 命名空间
  657. logger.addHandler(capture_handler)
  658. try:
  659. browser, tools = await get_browser_session()
  660. # 2. 执行原生的点击动作
  661. result = await tools.click(
  662. index=index,
  663. browser_session=browser
  664. )
  665. # 3. 检查是否有“意外收获”
  666. download_msg = ""
  667. if capture_handler.captured_url:
  668. captured_url = capture_handler.captured_url
  669. download_msg = f"\n\n⚠️ 系统检测到浏览器下载被拦截,已自动捕获准确直链:\n{captured_url}\n\n建议:你可以直接使用 browser_download_direct_url 工具下载此链接。"
  670. # 如果你想更激进一点,甚至可以在这里直接自动触发本地下载逻辑
  671. # await auto_download_file(captured_url)
  672. # 4. 转换结果并附加捕获的信息
  673. tool_result = action_result_to_tool_result(result, f"点击元素 {index}")
  674. if download_msg:
  675. # 关键:把日志里的信息塞进 output,这样 LLM 就能看到了!
  676. tool_result.output = (tool_result.output or "") + download_msg
  677. tool_result.long_term_memory = (tool_result.long_term_memory or "") + f" 捕获下载链接: {captured_url}"
  678. return tool_result
  679. except Exception as e:
  680. return ToolResult(
  681. title="点击失败",
  682. output="",
  683. error=f"Failed to click element {index}: {str(e)}",
  684. long_term_memory=f"点击元素 {index} 失败"
  685. )
  686. finally:
  687. # 5. 务必移除监听器,防止内存泄漏和日志污染
  688. logger.removeHandler(capture_handler)
  689. async def browser_input_text(index: int, text: str, clear: bool = True) -> ToolResult:
  690. """
  691. 在指定元素中输入文本
  692. Input text into an element
  693. Args:
  694. index: 元素索引(从浏览器状态中获取)
  695. text: 要输入的文本内容
  696. clear: 是否先清除现有文本(默认 True)
  697. Returns:
  698. ToolResult: 包含输入操作结果的工具返回对象
  699. Example:
  700. input_text(index=0, text="Hello World", clear=True)
  701. """
  702. try:
  703. browser, tools = await get_browser_session()
  704. result = await tools.input(
  705. index=index,
  706. text=text,
  707. clear=clear,
  708. browser_session=browser
  709. )
  710. return action_result_to_tool_result(result, f"输入文本到元素 {index}")
  711. except Exception as e:
  712. return ToolResult(
  713. title="输入失败",
  714. output="",
  715. error=f"Failed to input text into element {index}: {str(e)}",
  716. long_term_memory=f"输入文本失败"
  717. )
  718. async def browser_send_keys(keys: str) -> ToolResult:
  719. """
  720. 发送键盘按键或快捷键
  721. Send keyboard keys or shortcuts
  722. 支持发送单个按键、组合键和快捷键。
  723. Args:
  724. keys: 要发送的按键字符串
  725. - 单个按键: "Enter", "Escape", "PageDown", "Tab"
  726. - 组合键: "Control+o", "Shift+Tab", "Alt+F4"
  727. - 功能键: "F1", "F2", ..., "F12"
  728. Returns:
  729. ToolResult: 包含按键操作结果的工具返回对象
  730. Example:
  731. send_keys("Enter")
  732. send_keys("Control+A")
  733. """
  734. try:
  735. browser, tools = await get_browser_session()
  736. result = await tools.send_keys(
  737. keys=keys,
  738. browser_session=browser
  739. )
  740. return action_result_to_tool_result(result, f"发送按键: {keys}")
  741. except Exception as e:
  742. return ToolResult(
  743. title="发送按键失败",
  744. output="",
  745. error=f"Failed to send keys: {str(e)}",
  746. long_term_memory="发送按键失败"
  747. )
  748. async def browser_upload_file(index: int, path: str) -> ToolResult:
  749. """
  750. 上传文件到文件输入元素
  751. Upload a file to a file input element
  752. Args:
  753. index: 文件输入框的元素索引
  754. path: 要上传的文件路径(绝对路径)
  755. Returns:
  756. ToolResult: 包含上传操作结果的工具返回对象
  757. Example:
  758. upload_file(index=7, path="/path/to/file.pdf")
  759. Note:
  760. 文件必须存在且路径必须是绝对路径
  761. """
  762. try:
  763. browser, tools = await get_browser_session()
  764. result = await tools.upload_file(
  765. index=index,
  766. path=path,
  767. browser_session=browser,
  768. available_file_paths=[path],
  769. file_system=_file_system
  770. )
  771. return action_result_to_tool_result(result, f"上传文件: {path}")
  772. except Exception as e:
  773. return ToolResult(
  774. title="上传失败",
  775. output="",
  776. error=f"Failed to upload file: {str(e)}",
  777. long_term_memory=f"上传文件 {path} 失败"
  778. )
  779. # ============================================================
  780. # 滚动和视图工具 (Scroll & View Tools)
  781. # ============================================================
  782. async def browser_scroll_page(down: bool = True, pages: float = 1.0, index: Optional[int] = None) -> ToolResult:
  783. try:
  784. # 限制单次滚动幅度,避免 agent 一次滚 100 页
  785. MAX_PAGES = 10
  786. if pages > MAX_PAGES:
  787. pages = MAX_PAGES
  788. browser, tools = await get_browser_session()
  789. cdp_session = await browser.get_or_create_cdp_session()
  790. before_y_result = await cdp_session.cdp_client.send.Runtime.evaluate(
  791. params={'expression': 'window.scrollY'},
  792. session_id=cdp_session.session_id
  793. )
  794. before_y = before_y_result.get('result', {}).get('value', 0)
  795. # 执行滚动
  796. result = await tools.scroll(down=down, pages=pages, index=index, browser_session=browser)
  797. # 等待渲染(懒加载页面需要更长时间)
  798. await asyncio.sleep(2)
  799. after_y_result = await cdp_session.cdp_client.send.Runtime.evaluate(
  800. params={'expression': 'window.scrollY'},
  801. session_id=cdp_session.session_id
  802. )
  803. after_y = after_y_result.get('result', {}).get('value', 0)
  804. # 如果第一次检测没动,再等一轮(应对懒加载触发后的延迟滚动)
  805. if before_y == after_y and index is None:
  806. await asyncio.sleep(2)
  807. retry_result = await cdp_session.cdp_client.send.Runtime.evaluate(
  808. params={'expression': 'window.scrollY'},
  809. session_id=cdp_session.session_id
  810. )
  811. after_y = retry_result.get('result', {}).get('value', 0)
  812. if before_y == after_y and index is None:
  813. direction = "下" if down else "上"
  814. return ToolResult(
  815. title="滚动无效",
  816. output=f"页面已到达{direction}边界,无法继续滚动",
  817. error="No movement detected"
  818. )
  819. delta = abs(after_y - before_y)
  820. direction = "下" if down else "上"
  821. return action_result_to_tool_result(result, f"已向{direction}滚动 {delta}px")
  822. except Exception as e:
  823. # --- 核心修复 2: 必须补全 output 参数,否则框架会报错 ---
  824. return ToolResult(
  825. title="滚动失败",
  826. output="", # 补全这个缺失的必填参数
  827. error=str(e)
  828. )
  829. async def browser_find_text(text: str) -> ToolResult:
  830. """
  831. 查找页面中的文本并滚动到该位置
  832. Find text on the page and scroll to it
  833. 在页面中搜索指定的文本,找到后自动滚动到该位置。
  834. Args:
  835. text: 要查找的文本内容
  836. Returns:
  837. ToolResult: 包含查找结果的工具返回对象
  838. Example:
  839. find_text("Privacy Policy")
  840. """
  841. try:
  842. browser, tools = await get_browser_session()
  843. result = await tools.find_text(
  844. text=text,
  845. browser_session=browser
  846. )
  847. return action_result_to_tool_result(result, f"查找文本: {text}")
  848. except Exception as e:
  849. return ToolResult(
  850. title="查找失败",
  851. output="",
  852. error=f"Failed to find text: {str(e)}",
  853. long_term_memory=f"查找文本 '{text}' 失败"
  854. )
  855. async def browser_get_visual_selector_map() -> ToolResult:
  856. """
  857. 获取当前页面的视觉快照和交互元素索引映射。
  858. Get visual snapshot and selector map of interactive elements.
  859. 该工具会同时执行两个操作:
  860. 1. 捕捉当前页面的截图,并用 browser-use 内置方法在截图上标注元素索引号。
  861. 2. 生成页面所有可交互元素的索引字典(含 href、type 等属性信息)。
  862. Returns:
  863. ToolResult: 包含高亮截图(在 images 中)和元素列表的工具返回对象。
  864. """
  865. try:
  866. browser, _ = await get_browser_session()
  867. # 1. 构造同时包含 DOM 和 截图 的请求
  868. from browser_use.browser.events import BrowserStateRequestEvent
  869. from browser_use.browser.python_highlights import create_highlighted_screenshot_async
  870. event = browser.event_bus.dispatch(
  871. BrowserStateRequestEvent(
  872. include_dom=True,
  873. include_screenshot=True,
  874. include_recent_events=False
  875. )
  876. )
  877. # 2. 等待浏览器返回完整状态
  878. browser_state = await event.event_result(raise_if_none=True, raise_if_any=True)
  879. # 3. 提取 Selector Map
  880. selector_map = browser_state.dom_state.selector_map if browser_state.dom_state else {}
  881. # 4. 提取截图并生成带索引标注的高亮截图(通过 CDP 获取精确 DPI 和滚动偏移)
  882. screenshot_b64 = browser_state.screenshot or ""
  883. highlighted_b64 = ""
  884. if screenshot_b64 and selector_map:
  885. try:
  886. cdp_session = await browser.get_or_create_cdp_session()
  887. highlighted_b64 = await create_highlighted_screenshot_async(
  888. screenshot_b64, selector_map,
  889. cdp_session=cdp_session,
  890. filter_highlight_ids=False
  891. )
  892. except Exception:
  893. highlighted_b64 = screenshot_b64 # fallback to raw screenshot
  894. else:
  895. highlighted_b64 = screenshot_b64
  896. # 5. 构建供 Agent 阅读的完整元素列表,包含丰富的属性信息
  897. elements_info = []
  898. for index, node in selector_map.items():
  899. tag = node.tag_name
  900. attrs = node.attributes or {}
  901. desc = attrs.get('aria-label') or attrs.get('placeholder') or attrs.get('title') or node.get_all_children_text(max_depth=1) or ""
  902. # 收集有用的属性片段
  903. extra_parts = []
  904. if attrs.get('href'):
  905. extra_parts.append(f"href={attrs['href'][:60]}")
  906. if attrs.get('type'):
  907. extra_parts.append(f"type={attrs['type']}")
  908. if attrs.get('role'):
  909. extra_parts.append(f"role={attrs['role']}")
  910. if attrs.get('name'):
  911. extra_parts.append(f"name={attrs['name']}")
  912. extra = f" ({', '.join(extra_parts)})" if extra_parts else ""
  913. elements_info.append(f"Index {index}: <{tag}> \"{desc[:50]}\"{extra}")
  914. output = f"页面截图已捕获(含元素索引标注)\n找到 {len(selector_map)} 个交互元素\n\n"
  915. output += "元素列表:\n" + "\n".join(elements_info)
  916. # 6. 将高亮截图存入 images 字段,metadata 保留结构化数据
  917. images = []
  918. if highlighted_b64:
  919. images.append({"type": "base64", "media_type": "image/png", "data": highlighted_b64})
  920. return ToolResult(
  921. title="视觉元素观察",
  922. output=output,
  923. long_term_memory=f"在页面观察到 {len(selector_map)} 个元素并保存了截图",
  924. images=images,
  925. metadata={
  926. "selector_map": {k: str(v) for k, v in list(selector_map.items())[:100]},
  927. "url": browser_state.url,
  928. "title": browser_state.title
  929. }
  930. )
  931. except Exception as e:
  932. return ToolResult(
  933. title="视觉观察失败",
  934. output="",
  935. error=f"Failed to get visual selector map: {str(e)}",
  936. long_term_memory="获取视觉元素映射失败"
  937. )
  938. async def browser_screenshot_impl() -> ToolResult:
  939. """
  940. 请求在下次观察中包含页面截图(内部实现)
  941. Request a screenshot to be included in the next observation
  942. 用于视觉检查页面状态,帮助理解页面布局和内容。
  943. Returns:
  944. ToolResult: 包含截图请求结果的工具返回对象
  945. Example:
  946. screenshot()
  947. Note:
  948. 截图会在下次页面观察时自动包含在结果中。
  949. """
  950. try:
  951. browser, tools = await get_browser_session()
  952. result = await tools.screenshot(browser_session=browser, file_system=_file_system)
  953. return action_result_to_tool_result(result, "截图请求")
  954. except Exception as e:
  955. return ToolResult(
  956. title="截图失败",
  957. output="",
  958. error=f"Failed to capture screenshot: {str(e)}",
  959. long_term_memory="截图失败"
  960. )
  961. # ============================================================
  962. # 标签页管理工具 (Tab Management Tools)
  963. # ============================================================
  964. async def browser_switch_tab(tab_id: str) -> ToolResult:
  965. """
  966. 切换到指定标签页
  967. Switch to a different browser tab
  968. Args:
  969. tab_id: 4字符标签ID(target_id 的最后4位)
  970. Returns:
  971. ToolResult: 切换结果
  972. Example:
  973. switch_tab(tab_id="a3f2")
  974. """
  975. try:
  976. browser, tools = await get_browser_session()
  977. normalized_tab_id = tab_id[-4:] if tab_id else tab_id
  978. result = await tools.switch(
  979. tab_id=normalized_tab_id,
  980. browser_session=browser
  981. )
  982. return action_result_to_tool_result(result, f"切换到标签页 {normalized_tab_id}")
  983. except Exception as e:
  984. return ToolResult(
  985. title="切换标签页失败",
  986. output="",
  987. error=f"Failed to switch tab: {str(e)}",
  988. long_term_memory=f"切换到标签页 {tab_id} 失败"
  989. )
  990. async def browser_close_tab(tab_id: str) -> ToolResult:
  991. """
  992. 关闭指定标签页
  993. Close a browser tab
  994. Args:
  995. tab_id: 4字符标签ID
  996. Returns:
  997. ToolResult: 关闭结果
  998. Example:
  999. close_tab(tab_id="a3f2")
  1000. """
  1001. try:
  1002. browser, tools = await get_browser_session()
  1003. normalized_tab_id = tab_id[-4:] if tab_id else tab_id
  1004. result = await tools.close(
  1005. tab_id=normalized_tab_id,
  1006. browser_session=browser
  1007. )
  1008. return action_result_to_tool_result(result, f"关闭标签页 {normalized_tab_id}")
  1009. except Exception as e:
  1010. return ToolResult(
  1011. title="关闭标签页失败",
  1012. output="",
  1013. error=f"Failed to close tab: {str(e)}",
  1014. long_term_memory=f"关闭标签页 {tab_id} 失败"
  1015. )
  1016. # ============================================================
  1017. # 下拉框工具 (Dropdown Tools)
  1018. # ============================================================
  1019. async def browser_get_dropdown_options(index: int) -> ToolResult:
  1020. """
  1021. 获取下拉框的所有选项
  1022. Get options from a dropdown element
  1023. Args:
  1024. index: 下拉框的元素索引
  1025. Returns:
  1026. ToolResult: 包含所有选项的结果
  1027. Example:
  1028. get_dropdown_options(index=8)
  1029. """
  1030. try:
  1031. browser, tools = await get_browser_session()
  1032. result = await tools.dropdown_options(
  1033. index=index,
  1034. browser_session=browser
  1035. )
  1036. return action_result_to_tool_result(result, f"获取下拉框选项: {index}")
  1037. except Exception as e:
  1038. return ToolResult(
  1039. title="获取下拉框选项失败",
  1040. output="",
  1041. error=f"Failed to get dropdown options: {str(e)}",
  1042. long_term_memory=f"获取下拉框 {index} 选项失败"
  1043. )
  1044. async def browser_select_dropdown_option(index: int, text: str) -> ToolResult:
  1045. """
  1046. 选择下拉框选项
  1047. Select an option from a dropdown
  1048. Args:
  1049. index: 下拉框的元素索引
  1050. text: 要选择的选项文本(精确匹配)
  1051. Returns:
  1052. ToolResult: 选择结果
  1053. Example:
  1054. select_dropdown_option(index=8, text="Option 2")
  1055. """
  1056. try:
  1057. browser, tools = await get_browser_session()
  1058. result = await tools.select_dropdown(
  1059. index=index,
  1060. text=text,
  1061. browser_session=browser
  1062. )
  1063. return action_result_to_tool_result(result, f"选择下拉框选项: {text}")
  1064. except Exception as e:
  1065. return ToolResult(
  1066. title="选择下拉框选项失败",
  1067. output="",
  1068. error=f"Failed to select dropdown option: {str(e)}",
  1069. long_term_memory=f"选择选项 '{text}' 失败"
  1070. )
  1071. # ============================================================
  1072. # 内容提取工具 (Content Extraction Tools)
  1073. # ============================================================
  1074. def scrub_search_redirect_url(url: str) -> str:
  1075. """
  1076. 自动检测并解析 Bing/Google 等搜索引擎的重定向链接,提取真实目标 URL。
  1077. """
  1078. if not url or not isinstance(url, str):
  1079. return url
  1080. try:
  1081. parsed = urlparse(url)
  1082. # 1. 处理 Bing 重定向 (特征:u 参数带 Base64)
  1083. # 示例:...&u=a1aHR0cHM6Ly96aHVhbmxhbi56aGlodS5jb20vcC8zODYxMjgwOQ&...
  1084. if "bing.com" in parsed.netloc:
  1085. u_param = parse_qs(parsed.query).get('u', [None])[0]
  1086. if u_param:
  1087. # 移除开头的 'a1', 'a0' 等标识符
  1088. b64_str = u_param[2:]
  1089. # 补齐 Base64 填充符
  1090. padding = '=' * (4 - len(b64_str) % 4)
  1091. decoded = base64.b64decode(b64_str + padding).decode('utf-8', errors='ignore')
  1092. if decoded.startswith('http'):
  1093. return decoded
  1094. # 2. 处理 Google 重定向 (特征:url 参数)
  1095. if "google.com" in parsed.netloc:
  1096. url_param = parse_qs(parsed.query).get('url', [None])[0]
  1097. if url_param:
  1098. return unquote(url_param)
  1099. # 3. 兜底:处理常见的跳转参数
  1100. for param in ['target', 'dest', 'destination', 'link']:
  1101. found = parse_qs(parsed.query).get(param, [None])[0]
  1102. if found and found.startswith('http'):
  1103. return unquote(found)
  1104. except Exception:
  1105. pass # 解析失败则返回原链接
  1106. return url
  1107. async def extraction_adapter(input_data):
  1108. # 提取字符串
  1109. if isinstance(input_data, list):
  1110. prompt = input_data[-1].content if hasattr(input_data[-1], 'content') else str(input_data[-1])
  1111. else:
  1112. prompt = str(input_data)
  1113. response = await qwen_llm_call(
  1114. messages=[{"role": "user", "content": prompt}]
  1115. )
  1116. content = response["content"]
  1117. # --- 核心改进:URL 自动修复 ---
  1118. # 使用正则表达式匹配内容中的所有 URL,并尝试进行洗涤
  1119. urls = re.findall(r'https?://[^\s<>"\']+', content)
  1120. for original_url in urls:
  1121. clean_url = scrub_search_redirect_url(original_url)
  1122. if clean_url != original_url:
  1123. content = content.replace(original_url, clean_url)
  1124. from argparse import Namespace
  1125. return Namespace(completion=content)
  1126. async def browser_extract_content(query: str, extract_links: bool = False,
  1127. start_from_char: int = 0) -> ToolResult:
  1128. """
  1129. 使用 LLM 从页面提取结构化数据
  1130. Extract content from the current page using LLM
  1131. Args:
  1132. query: 提取查询(告诉 LLM 要提取什么内容)
  1133. extract_links: 是否提取链接(默认 False,节省 token)
  1134. start_from_char: 从哪个字符开始提取(用于分页提取大内容)
  1135. Returns:
  1136. ToolResult: 提取的内容
  1137. Example:
  1138. extract_content(query="提取页面上所有产品的名称和价格", extract_links=True)
  1139. Note:
  1140. 需要配置 page_extraction_llm,否则会失败
  1141. 支持分页提取,最大100k字符
  1142. """
  1143. try:
  1144. browser, tools = await get_browser_session()
  1145. # 注意:extract 需要 page_extraction_llm 参数
  1146. # 这里我们假设用户会在初始化时配置 LLM
  1147. # 如果没有配置,会抛出异常
  1148. result = await tools.extract(
  1149. query=query,
  1150. extract_links=extract_links,
  1151. start_from_char=start_from_char,
  1152. browser_session=browser,
  1153. page_extraction_llm=RunnableLambda(extraction_adapter), # 需要用户配置
  1154. file_system=_file_system
  1155. )
  1156. return action_result_to_tool_result(result, f"提取内容: {query}")
  1157. except Exception as e:
  1158. return ToolResult(
  1159. title="内容提取失败",
  1160. output="",
  1161. error=f"Failed to extract content: {str(e)}",
  1162. long_term_memory=f"提取内容失败: {query}"
  1163. )
  1164. async def _detect_and_download_pdf_via_cdp(browser) -> Optional[str]:
  1165. """
  1166. 检测当前页面是否为 PDF,如果是则通过 CDP(浏览器内 fetch)下载到本地。
  1167. 优势:自动携带浏览器的 cookies/session,可访问需要登录的 PDF。
  1168. 返回本地文件路径,非 PDF 页面返回 None。
  1169. """
  1170. try:
  1171. current_url = await browser.get_current_page_url()
  1172. if not current_url:
  1173. return None
  1174. parsed = urlparse(current_url)
  1175. is_pdf = parsed.path.lower().endswith('.pdf')
  1176. # URL 不明显是 PDF 时,通过 CDP 检查 content-type
  1177. if not is_pdf:
  1178. try:
  1179. cdp = await browser.get_or_create_cdp_session()
  1180. ct_result = await cdp.cdp_client.send.Runtime.evaluate(
  1181. params={'expression': 'document.contentType'},
  1182. session_id=cdp.session_id
  1183. )
  1184. content_type = ct_result.get('result', {}).get('value', '')
  1185. is_pdf = 'pdf' in content_type.lower()
  1186. except Exception:
  1187. pass
  1188. if not is_pdf:
  1189. return None
  1190. # 通过浏览器内 fetch API 下载 PDF(自动携带 cookies)
  1191. cdp = await browser.get_or_create_cdp_session()
  1192. js_code = """
  1193. (async () => {
  1194. try {
  1195. const resp = await fetch(window.location.href);
  1196. if (!resp.ok) return JSON.stringify({error: 'HTTP ' + resp.status});
  1197. const blob = await resp.blob();
  1198. return new Promise((resolve, reject) => {
  1199. const reader = new FileReader();
  1200. reader.onloadend = () => resolve(JSON.stringify({data: reader.result}));
  1201. reader.onerror = () => resolve(JSON.stringify({error: 'FileReader failed'}));
  1202. reader.readAsDataURL(blob);
  1203. });
  1204. } catch(e) {
  1205. return JSON.stringify({error: e.message});
  1206. }
  1207. })()
  1208. """
  1209. result = await cdp.cdp_client.send.Runtime.evaluate(
  1210. params={
  1211. 'expression': js_code,
  1212. 'awaitPromise': True,
  1213. 'returnByValue': True,
  1214. 'timeout': 60000
  1215. },
  1216. session_id=cdp.session_id
  1217. )
  1218. value = result.get('result', {}).get('value', '')
  1219. if not value:
  1220. print("⚠️ CDP fetch PDF: 无返回值")
  1221. return None
  1222. data = json.loads(value)
  1223. if 'error' in data:
  1224. print(f"⚠️ CDP fetch PDF 失败: {data['error']}")
  1225. return None
  1226. # 从 data URL 中提取 base64 并解码
  1227. data_url = data['data'] # data:application/pdf;base64,JVBERi0...
  1228. base64_data = data_url.split(',', 1)[1]
  1229. pdf_bytes = base64.b64decode(base64_data)
  1230. # 保存到本地
  1231. save_dir = Path.cwd() / ".cache/.browser_use_files"
  1232. save_dir.mkdir(parents=True, exist_ok=True)
  1233. filename = Path(parsed.path).name if parsed.path else ""
  1234. if not filename or not filename.lower().endswith('.pdf'):
  1235. import time
  1236. filename = f"downloaded_{int(time.time())}.pdf"
  1237. save_path = str(save_dir / filename)
  1238. with open(save_path, 'wb') as f:
  1239. f.write(pdf_bytes)
  1240. print(f"📄 PDF 已通过 CDP 下载到: {save_path} ({len(pdf_bytes)} bytes)")
  1241. return save_path
  1242. except Exception as e:
  1243. print(f"⚠️ PDF 检测/下载异常: {e}")
  1244. return None
  1245. async def browser_read_long_content(
  1246. goal: Union[str, dict],
  1247. source: str = "page",
  1248. context: str = "",
  1249. **kwargs
  1250. ) -> ToolResult:
  1251. """
  1252. 智能读取长内容。支持自动检测并读取网页上的 PDF 文件。
  1253. 当 source="page" 且当前页面是 PDF 时,会通过 CDP 下载 PDF 并用 pypdf 解析,
  1254. 而非使用 DOM 提取(DOM 无法读取浏览器内置 PDF Viewer 的内容)。
  1255. 通过 CDP 下载可自动携带浏览器的 cookies/session,支持需要登录的 PDF。
  1256. """
  1257. try:
  1258. browser, tools = await get_browser_session()
  1259. # 1. 提取目标文本 (针对 GoalTree 字典结构)
  1260. final_goal_text = ""
  1261. if isinstance(goal, dict):
  1262. final_goal_text = goal.get("mission") or goal.get("goal") or str(goal)
  1263. else:
  1264. final_goal_text = str(goal)
  1265. # 2. 清洗业务背景 (过滤框架注入的 dict 类型 context)
  1266. business_context = context if isinstance(context, str) else ""
  1267. # 3. PDF 自动检测:当 source="page" 时检查是否为 PDF 页面
  1268. available_files = []
  1269. if source.lower() == "page":
  1270. pdf_path = await _detect_and_download_pdf_via_cdp(browser)
  1271. if pdf_path:
  1272. source = pdf_path
  1273. available_files.append(pdf_path)
  1274. # 4. 验证并实例化
  1275. action_params = ReadContentAction(
  1276. goal=final_goal_text,
  1277. source=source,
  1278. context=business_context
  1279. )
  1280. # 5. 解包参数调用底层方法
  1281. result = await tools.read_long_content(
  1282. **action_params.model_dump(),
  1283. browser_session=browser,
  1284. page_extraction_llm=RunnableLambda(extraction_adapter),
  1285. available_file_paths=available_files
  1286. )
  1287. return action_result_to_tool_result(result, f"深度读取: {source}")
  1288. except Exception as e:
  1289. return ToolResult(
  1290. title="深度读取失败",
  1291. output="",
  1292. error=f"Read long content failed: {str(e)}",
  1293. long_term_memory="参数解析或校验失败,请检查输入"
  1294. )
  1295. async def browser_get_page_html() -> ToolResult:
  1296. """
  1297. 获取当前页面的完整 HTML
  1298. Get the full HTML of the current page
  1299. 返回当前页面的完整 HTML 源代码。
  1300. Returns:
  1301. ToolResult: 包含页面 HTML 的工具返回对象
  1302. Example:
  1303. get_page_html()
  1304. Note:
  1305. - 返回的是完整的 HTML 源代码
  1306. - 输出会被限制在 10000 字符以内(完整内容保存在 metadata 中)
  1307. """
  1308. try:
  1309. browser, tools = await get_browser_session()
  1310. # 使用 CDP 获取页面 HTML
  1311. cdp = await browser.get_or_create_cdp_session()
  1312. # 获取页面内容
  1313. result = await cdp.cdp_client.send.Runtime.evaluate(
  1314. params={'expression': 'document.documentElement.outerHTML'},
  1315. session_id=cdp.session_id
  1316. )
  1317. html = result.get('result', {}).get('value', '')
  1318. # 获取 URL 和标题
  1319. url = await browser.get_current_page_url()
  1320. title_result = await cdp.cdp_client.send.Runtime.evaluate(
  1321. params={'expression': 'document.title'},
  1322. session_id=cdp.session_id
  1323. )
  1324. title = title_result.get('result', {}).get('value', '')
  1325. # 限制输出大小
  1326. output_html = html
  1327. if len(html) > 10000:
  1328. output_html = html[:10000] + "... (truncated)"
  1329. return ToolResult(
  1330. title=f"获取 HTML: {url}",
  1331. output=f"页面: {title}\nURL: {url}\n\nHTML:\n{output_html}",
  1332. long_term_memory=f"获取 HTML: {url}",
  1333. metadata={"url": url, "title": title, "html": html}
  1334. )
  1335. except Exception as e:
  1336. return ToolResult(
  1337. title="获取 HTML 失败",
  1338. output="",
  1339. error=f"Failed to get page HTML: {str(e)}",
  1340. long_term_memory="获取 HTML 失败"
  1341. )
  1342. async def browser_get_selector_map() -> ToolResult:
  1343. """
  1344. 获取当前页面的元素索引映射
  1345. Get the selector map of interactive elements on the current page
  1346. 返回页面所有可交互元素的索引字典,用于后续的元素操作。
  1347. Returns:
  1348. ToolResult: 包含元素映射的工具返回对象
  1349. Example:
  1350. get_selector_map()
  1351. Note:
  1352. 返回的索引可以用于 click_element, input_text 等操作
  1353. """
  1354. try:
  1355. browser, tools = await get_browser_session()
  1356. # 关键修复:先触发 BrowserStateRequestEvent 来更新 DOM 状态
  1357. # 这会触发 DOM watchdog 重新构建 DOM 树并更新 selector_map
  1358. from browser_use.browser.events import BrowserStateRequestEvent
  1359. # 触发事件并等待结果
  1360. event = browser.event_bus.dispatch(
  1361. BrowserStateRequestEvent(
  1362. include_dom=True,
  1363. include_screenshot=False, # 不需要截图,节省时间
  1364. include_recent_events=False
  1365. )
  1366. )
  1367. # 等待 DOM 更新完成
  1368. browser_state = await event.event_result(raise_if_none=True, raise_if_any=True)
  1369. # 从更新后的状态中获取 selector_map
  1370. selector_map = browser_state.dom_state.selector_map if browser_state.dom_state else {}
  1371. # 构建输出信息
  1372. elements_info = []
  1373. for index, node in list(selector_map.items())[:20]: # 只显示前20个
  1374. tag = node.tag_name
  1375. attrs = node.attributes or {}
  1376. text = attrs.get('aria-label') or attrs.get('placeholder') or attrs.get('value', '')
  1377. elements_info.append(f"索引 {index}: <{tag}> {text[:50]}")
  1378. output = f"找到 {len(selector_map)} 个交互元素\n\n"
  1379. output += "\n".join(elements_info)
  1380. if len(selector_map) > 20:
  1381. output += f"\n... 还有 {len(selector_map) - 20} 个元素"
  1382. return ToolResult(
  1383. title="获取元素映射",
  1384. output=output,
  1385. long_term_memory=f"获取到 {len(selector_map)} 个交互元素",
  1386. metadata={"selector_map": {k: str(v) for k, v in list(selector_map.items())[:100]}}
  1387. )
  1388. except Exception as e:
  1389. return ToolResult(
  1390. title="获取元素映射失败",
  1391. output="",
  1392. error=f"Failed to get selector map: {str(e)}",
  1393. long_term_memory="获取元素映射失败"
  1394. )
  1395. # ============================================================
  1396. # JavaScript 执行工具 (JavaScript Tools)
  1397. # ============================================================
  1398. async def browser_evaluate(code: str) -> ToolResult:
  1399. """
  1400. 在页面中执行 JavaScript 代码
  1401. Execute JavaScript code in the page context
  1402. 允许在当前页面中执行任意 JavaScript 代码,用于复杂的页面操作或数据提取。
  1403. Args:
  1404. code: 要执行的 JavaScript 代码字符串
  1405. Returns:
  1406. ToolResult: 包含执行结果的工具返回对象
  1407. Example:
  1408. evaluate("document.title")
  1409. evaluate("document.querySelectorAll('a').length")
  1410. Note:
  1411. - 代码在页面上下文中执行,可以访问 DOM 和全局变量
  1412. - 返回值会被自动序列化为字符串
  1413. - 执行结果限制在 20k 字符以内
  1414. """
  1415. try:
  1416. browser, tools = await get_browser_session()
  1417. result = await tools.evaluate(
  1418. code=code,
  1419. browser_session=browser
  1420. )
  1421. return action_result_to_tool_result(result, "执行 JavaScript")
  1422. except Exception as e:
  1423. return ToolResult(
  1424. title="JavaScript 执行失败",
  1425. output="",
  1426. error=f"Failed to execute JavaScript: {str(e)}",
  1427. long_term_memory="JavaScript 执行失败"
  1428. )
  1429. async def browser_ensure_login_with_cookies(cookie_type: str, url: str = "https://www.xiaohongshu.com") -> ToolResult:
  1430. """
  1431. 检查登录状态并在需要时注入 cookies
  1432. """
  1433. try:
  1434. browser, tools = await get_browser_session()
  1435. if url:
  1436. await tools.navigate(url=url, browser_session=browser)
  1437. await tools.wait(seconds=2, browser_session=browser)
  1438. check_login_js = """
  1439. (function() {
  1440. const loginBtn = document.querySelector('[class*="login"]') ||
  1441. document.querySelector('[href*="login"]') ||
  1442. Array.from(document.querySelectorAll('button, a')).find(el => (el.textContent || '').includes('登录'));
  1443. const userInfo = document.querySelector('[class*="user"]') ||
  1444. document.querySelector('[class*="avatar"]');
  1445. return {
  1446. needLogin: !!loginBtn && !userInfo,
  1447. hasLoginBtn: !!loginBtn,
  1448. hasUserInfo: !!userInfo
  1449. };
  1450. })()
  1451. """
  1452. result = await tools.evaluate(code=check_login_js, browser_session=browser)
  1453. status_output = result.extracted_content
  1454. if isinstance(status_output, str) and status_output.startswith("Result: "):
  1455. status_output = status_output[8:]
  1456. login_info: Dict[str, Any] = {}
  1457. if isinstance(status_output, str):
  1458. try:
  1459. login_info = json.loads(status_output)
  1460. except Exception:
  1461. login_info = {}
  1462. elif isinstance(status_output, dict):
  1463. login_info = status_output
  1464. if not login_info.get("needLogin"):
  1465. output = json.dumps({"need_login": False}, ensure_ascii=False)
  1466. return ToolResult(
  1467. title="已登录",
  1468. output=output,
  1469. long_term_memory=output
  1470. )
  1471. row = _fetch_cookie_row(cookie_type)
  1472. cookie_value = _extract_cookie_value(row)
  1473. if not cookie_value:
  1474. output = json.dumps({"need_login": True, "cookies_count": 0}, ensure_ascii=False)
  1475. return ToolResult(
  1476. title="未找到 cookies",
  1477. output=output,
  1478. error="未找到 cookies",
  1479. long_term_memory=output
  1480. )
  1481. domain, base_url = _cookie_domain_for_type(cookie_type, url)
  1482. cookies = _normalize_cookies(cookie_value, domain, base_url)
  1483. if not cookies:
  1484. output = json.dumps({"need_login": True, "cookies_count": 0}, ensure_ascii=False)
  1485. return ToolResult(
  1486. title="cookies 解析失败",
  1487. output=output,
  1488. error="cookies 解析失败",
  1489. long_term_memory=output
  1490. )
  1491. await browser._cdp_set_cookies(cookies)
  1492. if url:
  1493. await tools.navigate(url=url, browser_session=browser)
  1494. await tools.wait(seconds=2, browser_session=browser)
  1495. output = json.dumps({"need_login": True, "cookies_count": len(cookies)}, ensure_ascii=False)
  1496. return ToolResult(
  1497. title="已注入 cookies",
  1498. output=output,
  1499. long_term_memory=output
  1500. )
  1501. except Exception as e:
  1502. return ToolResult(
  1503. title="登录检查失败",
  1504. output="",
  1505. error=str(e),
  1506. long_term_memory="登录检查失败"
  1507. )
  1508. # ============================================================
  1509. # 等待用户操作工具 (Wait for User Action)
  1510. # ============================================================
  1511. async def browser_wait_for_user_action(message: str = "Please complete the action in browser",
  1512. timeout: int = 300) -> ToolResult:
  1513. """
  1514. 等待用户在浏览器中完成操作(如登录)
  1515. Wait for user to complete an action in the browser (e.g., login)
  1516. 暂停自动化流程,等待用户手动完成某些操作(如登录、验证码等)。
  1517. Args:
  1518. message: 提示用户需要完成的操作
  1519. timeout: 最大等待时间(秒),默认 300 秒(5 分钟)
  1520. Returns:
  1521. ToolResult: 包含等待结果的工具返回对象
  1522. Example:
  1523. wait_for_user_action("Please login to Xiaohongshu", timeout=180)
  1524. wait_for_user_action("Please complete the CAPTCHA", timeout=60)
  1525. Note:
  1526. - 用户需要在浏览器窗口中手动完成操作
  1527. - 完成后按回车键继续
  1528. - 超时后会自动继续执行
  1529. """
  1530. try:
  1531. import asyncio
  1532. print(f"\n{'='*60}")
  1533. print(f"⏸️ WAITING FOR USER ACTION")
  1534. print(f"{'='*60}")
  1535. print(f"📝 {message}")
  1536. print(f"⏱️ Timeout: {timeout} seconds")
  1537. print(f"\n👉 Please complete the action in the browser window")
  1538. print(f"👉 Press ENTER when done, or wait for timeout")
  1539. print(f"{'='*60}\n")
  1540. # Wait for user input or timeout
  1541. try:
  1542. loop = asyncio.get_event_loop()
  1543. # Wait for either user input or timeout
  1544. await asyncio.wait_for(
  1545. loop.run_in_executor(None, input),
  1546. timeout=timeout
  1547. )
  1548. return ToolResult(
  1549. title="用户操作完成",
  1550. output=f"User completed: {message}",
  1551. long_term_memory=f"用户完成操作: {message}"
  1552. )
  1553. except asyncio.TimeoutError:
  1554. return ToolResult(
  1555. title="用户操作超时",
  1556. output=f"Timeout waiting for: {message}",
  1557. long_term_memory=f"等待用户操作超时: {message}"
  1558. )
  1559. except Exception as e:
  1560. return ToolResult(
  1561. title="等待用户操作失败",
  1562. output="",
  1563. error=f"Failed to wait for user action: {str(e)}",
  1564. long_term_memory="等待用户操作失败"
  1565. )
  1566. # ============================================================
  1567. # 任务完成工具 (Task Completion)
  1568. # ============================================================
  1569. async def browser_done(text: str, success: bool = True,
  1570. files_to_display: Optional[List[str]] = None) -> ToolResult:
  1571. """
  1572. 标记任务完成并返回最终消息
  1573. Mark the task as complete and return final message to user
  1574. Args:
  1575. text: 给用户的最终消息
  1576. success: 任务是否成功完成
  1577. files_to_display: 可选的要显示的文件路径列表
  1578. Returns:
  1579. ToolResult: 完成结果
  1580. Example:
  1581. done("任务已完成,提取了10个产品信息", success=True)
  1582. """
  1583. try:
  1584. browser, tools = await get_browser_session()
  1585. result = await tools.done(
  1586. text=text,
  1587. success=success,
  1588. files_to_display=files_to_display,
  1589. file_system=_file_system
  1590. )
  1591. return action_result_to_tool_result(result, "任务完成")
  1592. except Exception as e:
  1593. return ToolResult(
  1594. title="标记任务完成失败",
  1595. output="",
  1596. error=f"Failed to complete task: {str(e)}",
  1597. long_term_memory="标记任务完成失败"
  1598. )
  1599. # ============================================================
  1600. # Cookie 持久化工具
  1601. # ============================================================
  1602. _COOKIES_DIR = Path(__file__).parent.parent.parent.parent.parent / ".cache/.cookies"
  1603. async def browser_export_cookies(name: str = "", account: str = "") -> ToolResult:
  1604. """
  1605. 导出当前浏览器的所有 Cookie 到本地 .cookies/ 目录。
  1606. 文件命名格式:{域名}_{账号名}.json,如 bilibili.com_zhangsan.json
  1607. 登录成功后调用此工具,下次可通过 browser_load_cookies 恢复登录态。
  1608. Args:
  1609. name: 自定义文件名(可选,提供则忽略自动命名)
  1610. account: 账号名称(可选,用于区分同一网站的不同账号)
  1611. """
  1612. try:
  1613. browser, _ = await get_browser_session()
  1614. # 获取所有 Cookie(CDP 格式)
  1615. all_cookies = await browser._cdp_get_cookies()
  1616. if not all_cookies:
  1617. return ToolResult(title="Cookie 导出", output="当前浏览器没有 Cookie", long_term_memory="无 Cookie 可导出")
  1618. # 获取当前域名,用于过滤和命名
  1619. from urllib.parse import urlparse
  1620. current_url = await browser.get_current_page_url() or ''
  1621. domain = urlparse(current_url).netloc.replace("www.", "") or "default"
  1622. if not name:
  1623. name = f"{domain}_{account}" if account else domain
  1624. # 只保留当前域名的 cookie(过滤第三方)
  1625. cookies = [c for c in all_cookies if domain in c.get("domain", "").lstrip(".")]
  1626. # 保存
  1627. _COOKIES_DIR.mkdir(parents=True, exist_ok=True)
  1628. cookie_file = _COOKIES_DIR / f"{name}.json"
  1629. cookie_file.write_text(json.dumps(cookies, ensure_ascii=False, indent=2), encoding="utf-8")
  1630. return ToolResult(
  1631. title="Cookie 已导出",
  1632. output=f"已保存 {len(cookies)} 条 Cookie 到 .cookies/{name}.json(从 {len(all_cookies)} 条中过滤当前域名)",
  1633. long_term_memory=f"导出 {len(cookies)} 条 Cookie 到 .cookies/{name}.json"
  1634. )
  1635. except Exception as e:
  1636. return ToolResult(title="Cookie 导出失败", output="", error=str(e), long_term_memory="导出 Cookie 失败")
  1637. async def browser_load_cookies(url: str, name: str = "", auto_navigate: bool = True) -> ToolResult:
  1638. """
  1639. 根据目标 URL 自动查找本地 Cookie 文件,注入浏览器并导航到目标页面恢复登录态。
  1640. 如果找不到 Cookie 文件,会根据 auto_navigate 参数决定是否直接导航到目标页面。
  1641. 重要:此工具会自动完成导航,调用前不需要先调用 browser_navigate_to_url。
  1642. Args:
  1643. url: 目标 URL(必须提供,同时用于自动匹配 Cookie 文件)
  1644. name: Cookie 文件名(可选,不传则根据 URL 域名自动查找)
  1645. auto_navigate: 找不到 Cookie 时是否自动导航到目标页面(默认 True)
  1646. """
  1647. try:
  1648. browser, tools = await get_browser_session()
  1649. if not url.startswith("http"):
  1650. url = f"https://{url}"
  1651. # 根据域名自动查找 Cookie 文件
  1652. if not name:
  1653. from urllib.parse import urlparse
  1654. domain = urlparse(url).netloc.replace("www.", "")
  1655. if _COOKIES_DIR.exists():
  1656. # 尝试多种匹配模式
  1657. matches = []
  1658. # 1. 精确匹配完整域名(如 xiaohongshu.com.json)
  1659. exact_match = _COOKIES_DIR / f"{domain}.json"
  1660. if exact_match.exists():
  1661. matches.append(exact_match)
  1662. logger.info(f"Cookie 精确匹配成功: {exact_match.name}")
  1663. # 2. 匹配域名前缀(如 xiaohongshu.com*.json)
  1664. if not matches:
  1665. prefix_matches = list(_COOKIES_DIR.glob(f"{domain}*.json"))
  1666. if prefix_matches:
  1667. matches = prefix_matches
  1668. logger.info(f"Cookie 前缀匹配成功: {[m.name for m in matches]}")
  1669. # 3. 模糊匹配:提取主域名(如 xiaohongshu)
  1670. if not matches:
  1671. main_domain = domain.split('.')[0] # 提取第一部分
  1672. fuzzy_matches = list(_COOKIES_DIR.glob(f"{main_domain}*.json"))
  1673. if fuzzy_matches:
  1674. matches = fuzzy_matches
  1675. logger.info(f"Cookie 模糊匹配成功: {[m.name for m in matches]} (主域名: {main_domain})")
  1676. if matches:
  1677. cookie_file = matches[0] # 取第一个匹配的
  1678. logger.info(f"使用 Cookie 文件: {cookie_file.name}")
  1679. else:
  1680. available = [f.stem for f in _COOKIES_DIR.glob("*.json")]
  1681. logger.warning(f"未找到匹配的 Cookie 文件。域名: {domain}, 可用: {available}")
  1682. hint = f"可用的 Cookie 文件: {available}" if available else "提示:首次使用需要先手动登录,然后使用 browser_export_cookies 保存 Cookie"
  1683. # 如果启用自动导航,直接访问目标页面
  1684. if auto_navigate:
  1685. await tools.navigate(url=url, browser_session=browser)
  1686. await tools.wait(seconds=2, browser_session=browser)
  1687. return ToolResult(
  1688. title="未找到 Cookie,已导航到目标页面",
  1689. output=f"没有找到 {domain} 的 Cookie 文件,已自动导航到 {url}。\n\n{hint}\n\n建议:如需保持登录态,请手动登录后使用 browser_export_cookies 保存 Cookie。",
  1690. error=None,
  1691. long_term_memory=f"未找到 {domain} 的 Cookie,已导航到 {url}"
  1692. )
  1693. else:
  1694. return ToolResult(
  1695. title="未找到 Cookie",
  1696. output=f"没有匹配 {domain} 的 Cookie 文件。{hint}\n\n建议:使用 browser_navigate_to_url 访问 {url} 并手动登录,或使用 browser_export_cookies 保存当前 Cookie。",
  1697. error=None,
  1698. long_term_memory=f"未找到 {domain} 的 Cookie 文件"
  1699. )
  1700. else:
  1701. # Cookie 目录不存在
  1702. if auto_navigate:
  1703. await tools.navigate(url=url, browser_session=browser)
  1704. await tools.wait(seconds=2, browser_session=browser)
  1705. return ToolResult(
  1706. title="首次使用 Cookie 功能,已导航到目标页面",
  1707. output=f"这是首次使用 Cookie 功能,已自动导航到 {url}。\n\n建议:手动完成登录后,使用 browser_export_cookies 保存 Cookie 供下次使用。",
  1708. error=None,
  1709. long_term_memory="首次使用 Cookie 功能,已导航到目标页面"
  1710. )
  1711. else:
  1712. return ToolResult(
  1713. title="Cookie 目录不存在",
  1714. output=f"这是首次使用 Cookie 功能。建议:\n1. 使用 browser_navigate_to_url 访问 {url}\n2. 手动完成登录\n3. 使用 browser_export_cookies 保存 Cookie 供下次使用",
  1715. error=None,
  1716. long_term_memory="Cookie 目录不存在,这是首次使用"
  1717. )
  1718. else:
  1719. cookie_file = _COOKIES_DIR / f"{name}.json"
  1720. if not cookie_file.exists():
  1721. available = [f.stem for f in _COOKIES_DIR.glob("*.json")] if _COOKIES_DIR.exists() else []
  1722. hint = f"可用的 Cookie 文件: {available}" if available else "提示:使用 browser_export_cookies 保存 Cookie"
  1723. if auto_navigate:
  1724. await tools.navigate(url=url, browser_session=browser)
  1725. await tools.wait(seconds=2, browser_session=browser)
  1726. return ToolResult(
  1727. title="Cookie 文件不存在,已导航到目标页面",
  1728. output=f"未找到 .cookies/{name}.json,已自动导航到 {url}。\n\n{hint}",
  1729. error=None,
  1730. long_term_memory=f"未找到 {name}.json,已导航到目标页面"
  1731. )
  1732. else:
  1733. return ToolResult(
  1734. title="Cookie 文件不存在",
  1735. output=f"未找到 .cookies/{name}.json。{hint}",
  1736. error=None,
  1737. long_term_memory=f"未找到 {name}.json Cookie 文件"
  1738. )
  1739. cookies = json.loads(cookie_file.read_text(encoding="utf-8"))
  1740. # 直接注入(export 和 load 使用相同的 CDP 格式,无需标准化)
  1741. await browser._cdp_set_cookies(cookies)
  1742. # 导航到目标页面(带上刚注入的 Cookie)
  1743. if url:
  1744. if not url.startswith("http"):
  1745. url = f"https://{url}"
  1746. await tools.navigate(url=url, browser_session=browser)
  1747. await tools.wait(seconds=3, browser_session=browser)
  1748. return ToolResult(
  1749. title="Cookie 注入并导航完成",
  1750. output=f"从 {cookie_file.name} 注入 {len(cookies)} 条 Cookie,已导航到 {url}",
  1751. long_term_memory=f"已从 {cookie_file.name} 注入 Cookie 并导航到 {url},登录态已恢复"
  1752. )
  1753. except Exception as e:
  1754. return ToolResult(title="Cookie 加载失败", output="", error=str(e), long_term_memory="加载 Cookie 失败")
  1755. # ============================================================
  1756. # 新版统一入口(13 个 @tool,替代原来 28 个)
  1757. # ============================================================
  1758. @tool()
  1759. async def browser_navigate(url: str, new_tab: bool = False) -> ToolResult:
  1760. """
  1761. 导航到指定 URL。
  1762. Args:
  1763. url: 目标 URL
  1764. new_tab: 是否在新标签页打开(默认 False)
  1765. """
  1766. return await browser_navigate_to_url(url=url, new_tab=new_tab)
  1767. @tool()
  1768. async def browser_search(query: str, engine: str = "bing") -> ToolResult:
  1769. """
  1770. 使用搜索引擎搜索。
  1771. Args:
  1772. query: 搜索关键词
  1773. engine: 搜索引擎,可选 google / bing / duckduckgo,默认 bing
  1774. """
  1775. return await browser_search_web(query=query, engine=engine)
  1776. @tool()
  1777. async def browser_back() -> ToolResult:
  1778. """返回上一页。"""
  1779. return await browser_go_back()
  1780. @tool()
  1781. async def browser_interact(
  1782. action: Literal["click", "type", "send_keys", "upload", "dropdown_list", "dropdown_select"],
  1783. index: Optional[int] = None,
  1784. text: Optional[str] = None,
  1785. path: Optional[str] = None,
  1786. keys: Optional[str] = None,
  1787. clear: bool = True,
  1788. ) -> ToolResult:
  1789. """
  1790. 与页面元素交互。根据 action 选择具体操作:
  1791. - click: 点击元素。需要 index。
  1792. - type: 在输入框输入文本。需要 index + text。clear 控制是否先清空。
  1793. - send_keys: 发送键盘按键(如 Enter、Control+A)。需要 keys,不需要 index。
  1794. - upload: 上传文件到文件输入框。需要 index + path(绝对路径)。
  1795. - dropdown_list: 列出下拉框选项。需要 index。
  1796. - dropdown_select: 选择下拉框选项。需要 index + text(选项文本)。
  1797. Args:
  1798. action: 交互类型
  1799. index: 元素索引(从 browser_elements 或 browser_screenshot(highlight=True) 获取)
  1800. text: 输入文本 / 下拉框选项文本
  1801. path: 上传文件的绝对路径
  1802. keys: 键盘按键字符串(如 "Enter"、"Control+A")
  1803. clear: type 时是否先清空(默认 True)
  1804. """
  1805. if action == "click":
  1806. if index is None:
  1807. return ToolResult(title="参数错误", output="", error="click 需要 index 参数")
  1808. return await browser_click_element(index=index)
  1809. elif action == "type":
  1810. if index is None or text is None:
  1811. return ToolResult(title="参数错误", output="", error="type 需要 index 和 text 参数")
  1812. return await browser_input_text(index=index, text=text, clear=clear)
  1813. elif action == "send_keys":
  1814. if keys is None:
  1815. return ToolResult(title="参数错误", output="", error="send_keys 需要 keys 参数")
  1816. return await browser_send_keys(keys=keys)
  1817. elif action == "upload":
  1818. if index is None or path is None:
  1819. return ToolResult(title="参数错误", output="", error="upload 需要 index 和 path 参数")
  1820. return await browser_upload_file(index=index, path=path)
  1821. elif action == "dropdown_list":
  1822. if index is None:
  1823. return ToolResult(title="参数错误", output="", error="dropdown_list 需要 index 参数")
  1824. return await browser_get_dropdown_options(index=index)
  1825. elif action == "dropdown_select":
  1826. if index is None or text is None:
  1827. return ToolResult(title="参数错误", output="", error="dropdown_select 需要 index 和 text 参数")
  1828. return await browser_select_dropdown_option(index=index, text=text)
  1829. else:
  1830. return ToolResult(title="未知 action", output="", error=f"不支持的 action: {action}")
  1831. @tool()
  1832. async def browser_scroll(
  1833. down: bool = True,
  1834. pages: float = 1.0,
  1835. into_view_index: Optional[int] = None,
  1836. ) -> ToolResult:
  1837. """
  1838. 滚动页面。
  1839. Args:
  1840. down: True 向下滚动,False 向上(默认 True)
  1841. pages: 滚动的页面数(默认 1.0)
  1842. into_view_index: 传入元素索引则滚动到该元素可见(忽略 down 和 pages)
  1843. """
  1844. return await browser_scroll_page(down=down, pages=pages, index=into_view_index)
  1845. @tool()
  1846. async def browser_screenshot(highlight_elements: bool = False) -> ToolResult:
  1847. """
  1848. 截取当前页面。
  1849. Args:
  1850. highlight_elements: False 返回纯截图;True 返回带交互元素编号标注的截图
  1851. + 元素列表(原 visual_selector_map 功能)
  1852. """
  1853. if highlight_elements:
  1854. return await browser_get_visual_selector_map()
  1855. else:
  1856. return await browser_screenshot_impl()
  1857. @tool()
  1858. async def browser_elements() -> ToolResult:
  1859. """
  1860. 获取当前页面的可交互元素列表(纯文本,不截图)。
  1861. 返回的 index 用于 browser_interact / browser_scroll 等操作。
  1862. """
  1863. return await browser_get_selector_map()
  1864. @tool()
  1865. async def browser_read(
  1866. mode: Literal["html", "find", "long"],
  1867. query: Optional[str] = None,
  1868. source: str = "page",
  1869. context: str = "",
  1870. ) -> ToolResult:
  1871. """
  1872. 读取页面内容,三种模式:
  1873. - html: 获取当前页面的 HTML 源码(大页面会截断到 10000 字符)
  1874. - find: 在页面中查找文本。需要 query。
  1875. - long: 智能分页读取长内容(支持自动检测 PDF)。query 描述阅读目标。
  1876. Args:
  1877. mode: 读取模式
  1878. query: find 模式下的查找文本;long 模式下的阅读目标描述
  1879. source: long 模式的内容来源("page" 或文件路径),默认 "page"
  1880. context: long 模式的业务背景(可选)
  1881. """
  1882. if mode == "html":
  1883. return await browser_get_page_html()
  1884. elif mode == "find":
  1885. if not query:
  1886. return ToolResult(title="参数错误", output="", error="find 模式需要 query 参数")
  1887. return await browser_find_text(text=query)
  1888. elif mode == "long":
  1889. return await browser_read_long_content(
  1890. goal=query or "阅读页面内容",
  1891. source=source,
  1892. context=context,
  1893. )
  1894. else:
  1895. return ToolResult(title="未知 mode", output="", error=f"不支持的 mode: {mode}")
  1896. @tool()
  1897. async def browser_extract(
  1898. query: str,
  1899. extract_links: bool = False,
  1900. start_from_char: int = 0,
  1901. ) -> ToolResult:
  1902. """
  1903. 使用 LLM 从当前页面提取结构化数据。
  1904. 与 browser_read 不同,此工具会调用 LLM 分析页面内容并返回结构化结果。
  1905. 适合"提取所有产品价格"、"总结文章要点"等需要理解语义的场景。
  1906. Args:
  1907. query: 提取指令(如"提取页面上所有产品名称和价格")
  1908. extract_links: 是否同时提取链接(默认 False)
  1909. start_from_char: 从第几个字符开始提取(用于分页处理大内容)
  1910. """
  1911. return await browser_extract_content(
  1912. query=query,
  1913. extract_links=extract_links,
  1914. start_from_char=start_from_char,
  1915. )
  1916. @tool()
  1917. async def browser_tabs(
  1918. action: Literal["switch", "close"],
  1919. tab_id: str = "",
  1920. ) -> ToolResult:
  1921. """
  1922. 管理浏览器标签页。
  1923. Args:
  1924. action: "switch" 切换到指定标签页;"close" 关闭指定标签页
  1925. tab_id: 标签页 ID(4 字符)
  1926. """
  1927. if not tab_id:
  1928. return ToolResult(title="参数错误", output="", error="需要 tab_id 参数")
  1929. if action == "switch":
  1930. return await browser_switch_tab(tab_id=tab_id)
  1931. elif action == "close":
  1932. return await browser_close_tab(tab_id=tab_id)
  1933. else:
  1934. return ToolResult(title="未知 action", output="", error=f"不支持的 action: {action}")
  1935. @tool()
  1936. async def browser_cookies(
  1937. action: Literal["load", "export", "ensure_login"],
  1938. url: str = "",
  1939. name: str = "",
  1940. account: str = "",
  1941. cookie_type: str = "",
  1942. auto_navigate: bool = True,
  1943. ) -> ToolResult:
  1944. """
  1945. Cookie / 登录态管理:
  1946. - load: 从本地加载已保存的 cookie 并注入浏览器。需要 url(自动匹配 cookie 文件)。
  1947. - export: 导出当前浏览器 cookie 到本地。可选 name 和 account 标识。
  1948. - ensure_login: 检查登录状态,未登录时自动注入 cookie。需要 cookie_type 和 url。
  1949. Args:
  1950. action: 操作类型
  1951. url: 目标 URL(load / ensure_login 必填)
  1952. name: cookie 文件名(可选)
  1953. account: 账号名(export 时可选)
  1954. cookie_type: cookie 类型标识(ensure_login 必填)
  1955. auto_navigate: load 时找不到 cookie 是否自动导航到目标页面(默认 True)
  1956. """
  1957. if action == "load":
  1958. if not url:
  1959. return ToolResult(title="参数错误", output="", error="load 需要 url 参数")
  1960. return await browser_load_cookies(url=url, name=name, auto_navigate=auto_navigate)
  1961. elif action == "export":
  1962. return await browser_export_cookies(name=name, account=account)
  1963. elif action == "ensure_login":
  1964. if not cookie_type:
  1965. return ToolResult(title="参数错误", output="", error="ensure_login 需要 cookie_type 参数")
  1966. return await browser_ensure_login_with_cookies(
  1967. cookie_type=cookie_type,
  1968. url=url or "https://www.xiaohongshu.com",
  1969. )
  1970. else:
  1971. return ToolResult(title="未知 action", output="", error=f"不支持的 action: {action}")
  1972. @tool()
  1973. async def browser_wait(
  1974. seconds: Optional[int] = None,
  1975. user_message: Optional[str] = None,
  1976. timeout: int = 300,
  1977. ) -> ToolResult:
  1978. """
  1979. 等待。两种模式:
  1980. - 传 seconds: 纯等待指定秒数(默认 3 秒)
  1981. - 传 user_message: 暂停并提示用户在浏览器中完成操作(如登录、验证码),
  1982. 用户完成后按回车继续。timeout 控制最长等待时间。
  1983. - 两者都不传: 默认等待 3 秒
  1984. Args:
  1985. seconds: 等待秒数
  1986. user_message: 用户操作提示消息
  1987. timeout: user_message 模式的最长等待(秒),默认 300
  1988. """
  1989. if user_message:
  1990. return await browser_wait_for_user_action(message=user_message, timeout=timeout)
  1991. else:
  1992. return await browser_wait_impl(seconds=seconds or 3)
  1993. @tool()
  1994. async def browser_js(code: str) -> ToolResult:
  1995. """
  1996. 在当前页面执行 JavaScript 代码。
  1997. Args:
  1998. code: JavaScript 代码字符串。返回值会被自动序列化。
  1999. """
  2000. return await browser_evaluate(code=code)
  2001. @tool()
  2002. async def browser_download(url: str, save_name: str = "") -> ToolResult:
  2003. """
  2004. 下载指定 URL 的文件到本地。
  2005. Args:
  2006. url: 文件 URL
  2007. save_name: 保存文件名(可选,默认自动推断)
  2008. """
  2009. return await browser_download_direct_url(url=url, save_name=save_name or "download")
  2010. # ============================================================
  2011. # 导出(供外部使用)
  2012. # ============================================================
  2013. __all__ = [
  2014. # 会话管理(非 @tool)
  2015. 'init_browser_session',
  2016. 'get_browser_session',
  2017. 'get_browser_live_url',
  2018. 'cleanup_browser_session',
  2019. 'kill_browser_session',
  2020. # 13 个 @tool 入口
  2021. 'browser_navigate',
  2022. 'browser_search',
  2023. 'browser_back',
  2024. 'browser_interact',
  2025. 'browser_scroll',
  2026. 'browser_screenshot',
  2027. 'browser_elements',
  2028. 'browser_read',
  2029. 'browser_extract',
  2030. 'browser_tabs',
  2031. 'browser_cookies',
  2032. 'browser_wait',
  2033. 'browser_js',
  2034. 'browser_download',
  2035. ]