cloud_browser_example.py 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802
  1. """
  2. 云浏览器模式示例
  3. Cloud Browser Mode Example
  4. 本示例展示如何使用 browser-use 的云浏览器模式进行网页自动化操作。
  5. 云浏览器模式的优势:
  6. 1. 无需本地安装 Chrome/Chromium
  7. 2. 可以在无头服务器上运行
  8. 3. 更好的稳定性和性能
  9. 4. 支持分布式部署
  10. 使用前提:
  11. 1. 在 .env 文件中配置 BROWSER_USE_API_KEY
  12. 2. 确保网络连接正常
  13. """
  14. import sys
  15. import os
  16. import asyncio
  17. import json
  18. import re
  19. from datetime import datetime
  20. from pathlib import Path
  21. from urllib.parse import quote
  22. from dotenv import load_dotenv
  23. # 加载环境变量
  24. load_dotenv()
  25. # 将项目根目录添加到 Python 路径
  26. project_root = Path(__file__).parent.parent
  27. sys.path.insert(0, str(project_root))
  28. # 导入 browser-use 核心类
  29. from browser_use import BrowserSession, BrowserProfile
  30. from browser_use.tools.service import Tools
  31. # 导入框架的工具函数
  32. from agent.tools.builtin.baseClass import (
  33. init_browser_session,
  34. cleanup_browser_session,
  35. navigate_to_url,
  36. search_web,
  37. get_selector_map,
  38. click_element,
  39. input_text,
  40. screenshot,
  41. get_page_html,
  42. evaluate,
  43. wait,
  44. scroll_page,
  45. wait_for_user_action,
  46. )
  47. async def example_1_basic_navigation():
  48. """
  49. 示例 1: 基础导航操作
  50. 演示如何使用云浏览器访问网页
  51. """
  52. print("\n" + "="*60)
  53. print("示例 1: 基础导航操作")
  54. print("="*60)
  55. try:
  56. # 初始化云浏览器会话
  57. # 关键参数:is_local=False 表示使用云浏览器
  58. api_key = os.getenv("BROWSER_USE_API_KEY")
  59. if not api_key:
  60. print("❌ 错误: 未找到 BROWSER_USE_API_KEY,请在 .env 文件中配置")
  61. return
  62. print(f"✅ 使用云浏览器 API Key: {api_key[:20]}...")
  63. # 初始化浏览器会话(云模式)
  64. # 注意:API key 会自动从环境变量 BROWSER_USE_API_KEY 读取
  65. browser, tools = await init_browser_session(
  66. headless=True, # 云浏览器通常使用无头模式
  67. use_cloud=True, # 关键:设置为 True 使用云浏览器
  68. )
  69. print("✅ 云浏览器会话已启动")
  70. # 导航到百度
  71. print("\n📍 导航到百度...")
  72. result = await navigate_to_url("https://www.baidu.com")
  73. print(f" 结果: {result.title}")
  74. # 等待页面加载
  75. await wait(2)
  76. # 获取页面标题
  77. print("\n📄 获取页面信息...")
  78. title_result = await evaluate("document.title")
  79. print(f" 页面标题: {title_result.output}")
  80. # 截图
  81. print("\n📸 截图...")
  82. screenshot_result = await screenshot()
  83. print(f" 截图结果: {screenshot_result.title}")
  84. print("\n✅ 示例 1 完成")
  85. except Exception as e:
  86. print(f"❌ 错误: {str(e)}")
  87. finally:
  88. # 清理浏览器会话
  89. await cleanup_browser_session()
  90. print("🧹 浏览器会话已清理")
  91. async def example_2_search_and_extract():
  92. """
  93. 示例 2: 搜索和内容提取
  94. 演示如何使用云浏览器进行搜索并提取内容
  95. """
  96. print("\n" + "="*60)
  97. print("示例 2: 搜索和内容提取")
  98. print("="*60)
  99. try:
  100. # 初始化云浏览器
  101. api_key = os.getenv("BROWSER_USE_API_KEY")
  102. if not api_key:
  103. print("❌ 错误: 未找到 BROWSER_USE_API_KEY")
  104. return
  105. browser, tools = await init_browser_session(
  106. headless=True,
  107. use_cloud=True,
  108. )
  109. print("✅ 云浏览器会话已启动")
  110. # 使用搜索引擎搜索
  111. print("\n🔍 搜索: Python async programming...")
  112. result = await search_web("Python async programming", engine="google")
  113. print(f" 搜索结果: {result.title}")
  114. # 等待搜索结果加载
  115. await wait(3)
  116. # 获取页面 HTML(部分)
  117. print("\n📄 获取页面 HTML...")
  118. html_result = await get_page_html()
  119. print(f" HTML 长度: {len(html_result.metadata.get('html', ''))} 字符")
  120. # 获取可交互元素
  121. print("\n🎯 获取页面元素...")
  122. selector_result = await get_selector_map()
  123. print(f" {selector_result.output[:200]}...")
  124. print("\n✅ 示例 2 完成")
  125. except Exception as e:
  126. print(f"❌ 错误: {str(e)}")
  127. finally:
  128. await cleanup_browser_session()
  129. print("🧹 浏览器会话已清理")
  130. async def example_3_with_browser_profile():
  131. """
  132. 示例 3: 使用 BrowserProfile 预设配置
  133. 演示如何使用 BrowserProfile 预设 cookies、localStorage 等
  134. """
  135. print("\n" + "="*60)
  136. print("示例 3: 使用 BrowserProfile 预设配置")
  137. print("="*60)
  138. try:
  139. api_key = os.getenv("BROWSER_USE_API_KEY")
  140. if not api_key:
  141. print("❌ 错误: 未找到 BROWSER_USE_API_KEY")
  142. return
  143. # 创建 BrowserProfile 并预设一些配置
  144. profile = BrowserProfile(
  145. # 可以预设 cookies
  146. cookies=[
  147. {
  148. "name": "test_cookie",
  149. "value": "test_value",
  150. "domain": ".example.com",
  151. "path": "/",
  152. }
  153. ],
  154. # 可以预设 localStorage
  155. local_storage={
  156. "example.com": {
  157. "key1": "value1",
  158. "key2": "value2",
  159. }
  160. },
  161. # 可以设置用户代理
  162. user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
  163. )
  164. print("✅ 创建了 BrowserProfile 配置")
  165. # 使用 profile 初始化浏览器
  166. browser, tools = await init_browser_session(
  167. headless=True,
  168. use_cloud=True,
  169. browser_profile=profile, # 传入 profile
  170. )
  171. print("✅ 云浏览器会话已启动(带预设配置)")
  172. # 访问一个网页
  173. print("\n📍 导航到示例网站...")
  174. result = await navigate_to_url("https://httpbin.org/headers")
  175. print(f" 结果: {result.title}")
  176. await wait(2)
  177. # 检查 User-Agent 是否生效
  178. print("\n🔍 检查 User-Agent...")
  179. ua_result = await evaluate("navigator.userAgent")
  180. print(f" User-Agent: {ua_result.output[:100]}...")
  181. print("\n✅ 示例 3 完成")
  182. except Exception as e:
  183. print(f"❌ 错误: {str(e)}")
  184. finally:
  185. await cleanup_browser_session()
  186. print("🧹 浏览器会话已清理")
  187. async def example_4_form_interaction():
  188. """
  189. 示例 4: 表单交互
  190. 演示如何在云浏览器中进行表单填写和提交
  191. """
  192. print("\n" + "="*60)
  193. print("示例 4: 表单交互")
  194. print("="*60)
  195. try:
  196. api_key = os.getenv("BROWSER_USE_API_KEY")
  197. if not api_key:
  198. print("❌ 错误: 未找到 BROWSER_USE_API_KEY")
  199. return
  200. browser, tools = await init_browser_session(
  201. headless=True,
  202. use_cloud=True,
  203. )
  204. print("✅ 云浏览器会话已启动")
  205. # 访问一个有表单的测试页面
  206. print("\n📍 导航到表单测试页面...")
  207. result = await navigate_to_url("https://httpbin.org/forms/post")
  208. print(f" 结果: {result.title}")
  209. await wait(2)
  210. # 获取页面元素
  211. print("\n🎯 获取页面元素...")
  212. selector_result = await get_selector_map()
  213. print(f" 找到 {selector_result.long_term_memory}")
  214. # 注意:实际使用时需要根据页面结构找到正确的元素索引
  215. # 这里只是演示流程
  216. print("\n✅ 示例 4 完成")
  217. except Exception as e:
  218. print(f"❌ 错误: {str(e)}")
  219. finally:
  220. await cleanup_browser_session()
  221. print("🧹 浏览器会话已清理")
  222. async def example_5_multi_tab():
  223. """
  224. 示例 5: 多标签页操作
  225. 演示如何在云浏览器中管理多个标签页
  226. """
  227. print("\n" + "="*60)
  228. print("示例 5: 多标签页操作")
  229. print("="*60)
  230. try:
  231. api_key = os.getenv("BROWSER_USE_API_KEY")
  232. if not api_key:
  233. print("❌ 错误: 未找到 BROWSER_USE_API_KEY")
  234. return
  235. browser, tools = await init_browser_session(
  236. headless=True,
  237. use_cloud=True,
  238. )
  239. print("✅ 云浏览器会话已启动")
  240. # 在第一个标签页打开百度
  241. print("\n📍 标签页 1: 打开百度...")
  242. result1 = await navigate_to_url("https://www.baidu.com")
  243. print(f" 结果: {result1.title}")
  244. await wait(2)
  245. # 在新标签页打开谷歌
  246. print("\n📍 标签页 2: 打开谷歌(新标签页)...")
  247. result2 = await navigate_to_url("https://www.google.com", new_tab=True)
  248. print(f" 结果: {result2.title}")
  249. await wait(2)
  250. # 获取当前页面信息
  251. print("\n📄 当前页面信息...")
  252. title_result = await evaluate("document.title")
  253. print(f" 当前标题: {title_result.output}")
  254. print("\n✅ 示例 5 完成")
  255. except Exception as e:
  256. print(f"❌ 错误: {str(e)}")
  257. finally:
  258. await cleanup_browser_session()
  259. print("🧹 浏览器会话已清理")
  260. def load_cookies(cookie_str, domain, url=None):
  261. cookies = []
  262. try:
  263. for cookie_part in cookie_str.split(';'):
  264. if cookie_part:
  265. name, value = cookie_part.split('=', 1)
  266. cookie = {"name": str(name).strip(), "value": str(value).strip(), "domain": domain,
  267. "path":"/",
  268. "expires":-1,
  269. "httpOnly": False,
  270. "secure": True,
  271. "sameSite":"None"}
  272. if url:
  273. cookie["url"] = url
  274. cookies.append(cookie)
  275. except:
  276. pass
  277. return cookies
  278. async def example_6_xhs_search_save():
  279. """
  280. 示例 6: 小红书搜索并保存结果(带登录)
  281. 演示如何处理需要登录的网站
  282. """
  283. print("\n" + "="*60)
  284. print("示例 6: 小红书搜索并保存结果(带登录)")
  285. print("="*60)
  286. try:
  287. api_key = os.getenv("BROWSER_USE_API_KEY")
  288. if not api_key:
  289. print("❌ 错误: 未找到 BROWSER_USE_API_KEY")
  290. return
  291. # 创建 BrowserProfile
  292. cookiesStr = "gid=yjJiiqSqKKf8yjJiiqSJiWMKyJvfq2vIJxYDh4EfAyCW9Sq89uUhxI888y4JW8y8WJS448Kj; a1=19a5821e25frfgqcz1g48ktmjilzla6dvt8saird230000337474; webId=bf5a89012d3e96b8e8317a9158d2237b; abRequestId=bf5a89012d3e96b8e8317a9158d2237b; x-user-id-pgy.xiaohongshu.com=64cb5fa2000000002b00a903; x-user-id-ad.xiaohongshu.com=67078bac000000001d022a25; x-user-id-mcc.xiaohongshu.com=67078bac000000001d022a25; web_session=040069b5bf1ceafef95542ee0a3b4b114d9a59; x-user-id-pro.xiaohongshu.com=67078bac000000001d022a25; x-user-id-creator.xiaohongshu.com=64cb5fa2000000002b00a903; webBuild=5.8.0; unread={%22ub%22:%226972cc62000000001a032ef0%22%2C%22ue%22:%226978c695000000001a030baf%22%2C%22uc%22:25}; acw_tc=0a0d0d6817697823078311273e2749a170e3d6e7c28bc3c6b3df1b05366b21; xsecappid=ugc; websectiga=f47eda31ec99545da40c2f731f0630efd2b0959e1dd10d5fedac3dce0bd1e04d; sec_poison_id=8f37e824-4cf9-4c1a-8a6b-1297a36d51ba; customer-sso-sid=68c517601157138359418885nha1gpvvujwqbhia; customerClientId=609975161834570; access-token-creator.xiaohongshu.com=customer.creator.AT-68c517601157138359418887mosxcziw5qwkllrs; galaxy_creator_session_id=NIUNVxmv6LPmZ31jZ2DoKYgyUutPOItjJ24t; galaxy.creator.beaker.session.id=1769782309631057230248; loadts=1769782310288"
  293. cookie_url = "https://www.xiaohongshu.com"
  294. cookies = load_cookies(cookiesStr, ".xiaohongshu.com", cookie_url)
  295. profile = BrowserProfile(
  296. user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
  297. )
  298. # 初始化云浏览器(非无头模式,方便用户看到登录界面)
  299. browser, tools = await init_browser_session(
  300. headless=False, # 设置为 False,方便用户看到浏览器界面
  301. use_cloud=True,
  302. browser_profile=profile,
  303. )
  304. print("✅ 云浏览器会话已启动")
  305. print("📝 提示: 云浏览器启动时会输出 Live URL,你可以在浏览器中打开查看")
  306. # 步骤 1: 先访问小红书首页,检查是否需要登录
  307. print("\n📍 步骤 1: 访问小红书首页...")
  308. await navigate_to_url("https://www.xiaohongshu.com")
  309. await wait(3)
  310. await browser._cdp_set_cookies(cookies)
  311. await wait(1)
  312. await navigate_to_url("https://www.xiaohongshu.com")
  313. await wait(3)
  314. # 检查是否需要登录
  315. print("\n🔍 检查登录状态...")
  316. check_login_js = """
  317. (function() {
  318. // 检查是否有登录按钮或登录相关元素
  319. const loginBtn = document.querySelector('[class*="login"]') ||
  320. document.querySelector('[href*="login"]') ||
  321. Array.from(document.querySelectorAll('button, a')).find(el => (el.textContent || '').includes('登录'));
  322. // 检查是否有用户信息(已登录)
  323. const userInfo = document.querySelector('[class*="user"]') ||
  324. document.querySelector('[class*="avatar"]');
  325. return {
  326. needLogin: !!loginBtn && !userInfo,
  327. hasLoginBtn: !!loginBtn,
  328. hasUserInfo: !!userInfo
  329. };
  330. })()
  331. """
  332. login_status = await evaluate(check_login_js)
  333. print(f" 登录状态检查: {login_status.output}")
  334. status_output = login_status.output
  335. if isinstance(status_output, str) and status_output.startswith("Result: "):
  336. status_output = status_output[8:]
  337. login_info = None
  338. if isinstance(status_output, str):
  339. try:
  340. login_info = json.loads(status_output)
  341. except Exception:
  342. login_info = None
  343. elif isinstance(status_output, dict):
  344. login_info = status_output
  345. if login_info and login_info.get("needLogin"):
  346. print("\n👤 步骤 2: 登录处理...")
  347. print(" 如果小红书需要登录,请在云浏览器中完成以下操作:")
  348. print(" 1. 打开上面输出的 Live URL(在日志中查找 '🔗 Live URL')")
  349. print(" 2. 在 Live URL 页面中完成登录(扫码或账号密码)")
  350. print(" 3. 登录成功后,回到这里按 Enter 继续")
  351. await wait_for_user_action(
  352. message="请在云浏览器中完成小红书登录,完成后按 Enter 继续",
  353. timeout=300
  354. )
  355. print("\n✅ 用户已确认登录完成,继续执行...")
  356. else:
  357. print("\n✅ 已检测为登录状态,跳过手动登录")
  358. # 步骤 3: 执行搜索
  359. keyword = "瑜伽美女"
  360. search_url = f"https://www.xiaohongshu.com/search_result?keyword={quote(keyword)}&type=51"
  361. print(f"\n📍 步骤 3: 导航到搜索页: {keyword} ...")
  362. await navigate_to_url(search_url)
  363. await wait(6)
  364. # 滚动页面加载更多内容
  365. print("\n📜 滚动页面加载更多内容...")
  366. for i in range(3):
  367. print(f" 滚动 {i+1}/3...")
  368. await scroll_page(down=True, pages=2.0)
  369. await wait(2)
  370. # 步骤 4: 提取数据
  371. print("\n📊 步骤 4: 提取搜索结果...")
  372. extract_js = """
  373. (function(){
  374. const maxCount = 20;
  375. const seen = new Set();
  376. const results = [];
  377. function pushItem(item){
  378. if (!item || !item.link || seen.has(item.link)) return;
  379. seen.add(item.link);
  380. results.push(item);
  381. }
  382. // 方法 1: 从 DOM 中提取
  383. const anchors = document.querySelectorAll('a[href*="/explore/"]');
  384. anchors.forEach(a => {
  385. if (results.length >= maxCount) return;
  386. const link = a.href || '';
  387. const img = a.querySelector('img');
  388. const title = ((img && img.alt) || a.textContent || '').trim();
  389. const cover = (img && img.src) || '';
  390. if (link && title) {
  391. pushItem({ title, link, cover });
  392. }
  393. });
  394. // 方法 2: 从 JSON 数据中提取
  395. const scriptNodes = document.querySelectorAll('script[type="application/json"], script#__NEXT_DATA__, script#__NUXT__');
  396. const walk = (node) => {
  397. if (!node || results.length >= maxCount) return;
  398. if (Array.isArray(node)) {
  399. for (const item of node) {
  400. walk(item);
  401. if (results.length >= maxCount) return;
  402. }
  403. return;
  404. }
  405. if (typeof node === 'object') {
  406. const title = (node.title || node.desc || node.name || node.noteTitle || '').toString().trim();
  407. const id = node.noteId || node.note_id || node.id || node.noteID;
  408. const cover = (node.cover && (node.cover.url || node.cover.urlDefault)) || node.coverUrl || node.image || '';
  409. let link = '';
  410. if (id) {
  411. link = `https://www.xiaohongshu.com/explore/${id}`;
  412. }
  413. if (title && link) {
  414. pushItem({ title, link, cover });
  415. }
  416. for (const key in node) {
  417. if (typeof node[key] === 'object') walk(node[key]);
  418. }
  419. }
  420. };
  421. scriptNodes.forEach(node => {
  422. if (results.length >= maxCount) return;
  423. const text = node.textContent || '';
  424. if (!text) return;
  425. try {
  426. const data = JSON.parse(text);
  427. walk(data);
  428. } catch (e) {}
  429. });
  430. return {
  431. success: true,
  432. keyword: '瑜伽美女',
  433. count: results.length,
  434. results: results,
  435. timestamp: new Date().toISOString(),
  436. };
  437. })()
  438. """
  439. async def run_extract():
  440. result = await evaluate(extract_js)
  441. output = result.output
  442. if isinstance(output, str) and output.startswith("Result: "):
  443. output = output[8:]
  444. try:
  445. data = json.loads(output)
  446. except Exception:
  447. data = {
  448. "success": False,
  449. "keyword": keyword,
  450. "error": "JSON 解析失败",
  451. "raw_output": str(output)[:2000],
  452. "timestamp": datetime.now().isoformat(),
  453. }
  454. if isinstance(data, dict) and data.get("count", 0) == 0:
  455. print(" JS 提取结果为空,尝试从 HTML 中提取...")
  456. html_result = await get_page_html()
  457. html = html_result.metadata.get("html", "")
  458. if html:
  459. def decode_text(value: str) -> str:
  460. try:
  461. return bytes(value, "utf-8").decode("unicode_escape")
  462. except Exception:
  463. return value
  464. results = []
  465. seen = set()
  466. pattern = re.compile(r'"noteId":"(.*?)".*?"title":"(.*?)"', re.S)
  467. for match in pattern.finditer(html):
  468. note_id = match.group(1)
  469. title = decode_text(match.group(2)).strip()
  470. link = f"https://www.xiaohongshu.com/explore/{note_id}"
  471. if note_id and link not in seen and title:
  472. seen.add(link)
  473. results.append({"title": title, "link": link})
  474. if len(results) >= 20:
  475. break
  476. if results:
  477. data = {
  478. "success": True,
  479. "keyword": keyword,
  480. "count": len(results),
  481. "results": results,
  482. "timestamp": datetime.now().isoformat(),
  483. "source": "html_fallback",
  484. }
  485. else:
  486. blocked_markers = ["登录", "验证", "验证码", "请先登录", "异常访问"]
  487. if any(marker in html for marker in blocked_markers):
  488. data = {
  489. "success": False,
  490. "keyword": keyword,
  491. "count": 0,
  492. "results": [],
  493. "error": "可能被登录或验证码拦截",
  494. "timestamp": datetime.now().isoformat(),
  495. }
  496. return data
  497. data = await run_extract()
  498. if isinstance(data, dict) and data.get("count", 0) == 0 and data.get("error") == "可能被登录或验证码拦截":
  499. print("\n👤 检测到拦截,请在云浏览器中完成登录或验证码验证")
  500. await wait_for_user_action(
  501. message="完成后按 Enter 继续,将重新提取搜索结果",
  502. timeout=300
  503. )
  504. data = await run_extract()
  505. # 步骤 5: 保存结果
  506. print(f"\n💾 步骤 5: 保存结果...")
  507. print(f" 提取到 {data.get('count', 0)} 条数据")
  508. output_dir = Path(__file__).parent.parent / "output"
  509. output_dir.mkdir(parents=True, exist_ok=True)
  510. output_path = output_dir / "xhs.json"
  511. with open(output_path, "w", encoding="utf-8") as f:
  512. json.dump(data, f, ensure_ascii=False, indent=2)
  513. print(f"✅ 数据已保存到: {output_path}")
  514. # 显示部分结果
  515. if data.get("results"):
  516. print(f"\n📋 前 3 条结果预览:")
  517. for i, item in enumerate(data["results"][:3], 1):
  518. print(f" {i}. {item.get('title', 'N/A')[:50]}")
  519. print(f" {item.get('link', 'N/A')}")
  520. print("\n✅ 示例 6 完成")
  521. except Exception as e:
  522. print(f"❌ 错误: {str(e)}")
  523. import traceback
  524. traceback.print_exc()
  525. finally:
  526. await cleanup_browser_session()
  527. print("🧹 浏览器会话已清理")
  528. async def example_7_baidu_search_save():
  529. print("\n" + "="*60)
  530. print("示例 7: 百度搜索并保存结果")
  531. print("="*60)
  532. try:
  533. api_key = os.getenv("BROWSER_USE_API_KEY")
  534. if not api_key:
  535. print("❌ 错误: 未找到 BROWSER_USE_API_KEY")
  536. return
  537. await init_browser_session(
  538. headless=True,
  539. use_cloud=True,
  540. )
  541. print("✅ 云浏览器会话已启动")
  542. keyword = "瑜伽美女"
  543. search_url = f"https://www.baidu.com/s?wd={quote(keyword)}"
  544. print(f"\n📍 导航到百度搜索页: {keyword} ...")
  545. await navigate_to_url(search_url)
  546. await wait(3)
  547. await scroll_page(down=True, pages=1.5)
  548. await wait(2)
  549. extract_js = """
  550. (function(){
  551. const results = [];
  552. const items = document.querySelectorAll('#content_left > div[class*="result"]');
  553. items.forEach((item, index) => {
  554. if (index >= 10) return;
  555. const titleEl = item.querySelector('h3 a, .t a');
  556. const title = titleEl ? titleEl.textContent.trim() : '';
  557. const link = titleEl ? titleEl.href : '';
  558. const summaryEl = item.querySelector('.c-abstract, .content-right_8Zs40');
  559. const summary = summaryEl ? summaryEl.textContent.trim() : '';
  560. const sourceEl = item.querySelector('.c-color-gray, .source_1Vdff');
  561. const source = sourceEl ? sourceEl.textContent.trim() : '';
  562. if (title || link) {
  563. results.push({
  564. index: index + 1,
  565. title,
  566. link,
  567. summary: summary.substring(0, 200),
  568. source,
  569. });
  570. }
  571. });
  572. return {
  573. success: true,
  574. keyword: '瑜伽美女',
  575. count: results.length,
  576. results,
  577. timestamp: new Date().toISOString(),
  578. };
  579. })()
  580. """
  581. result = await evaluate(extract_js)
  582. output = result.output
  583. if isinstance(output, str) and output.startswith("Result: "):
  584. output = output[8:]
  585. try:
  586. data = json.loads(output)
  587. except Exception:
  588. data = {
  589. "success": False,
  590. "keyword": keyword,
  591. "error": "JSON 解析失败",
  592. "raw_output": str(output)[:2000],
  593. "timestamp": datetime.now().isoformat(),
  594. }
  595. output_dir = Path(__file__).parent.parent / "output"
  596. output_dir.mkdir(parents=True, exist_ok=True)
  597. output_path = output_dir / "baidu.json"
  598. with open(output_path, "w", encoding="utf-8") as f:
  599. json.dump(data, f, ensure_ascii=False, indent=2)
  600. print(f"✅ 数据已保存到: {output_path}")
  601. if data.get("results"):
  602. print("\n📋 前 3 条结果预览:")
  603. for i, item in enumerate(data["results"][:3], 1):
  604. print(f" {i}. {item.get('title', 'N/A')[:50]}")
  605. print(f" {item.get('link', 'N/A')}")
  606. print("\n✅ 示例 7 完成")
  607. except Exception as e:
  608. print(f"❌ 错误: {str(e)}")
  609. finally:
  610. await cleanup_browser_session()
  611. print("🧹 浏览器会话已清理")
  612. async def main():
  613. """
  614. 主函数:运行所有示例
  615. """
  616. import argparse
  617. print("\n" + "="*60)
  618. print("🌐 Browser-Use 云浏览器模式示例")
  619. print("="*60)
  620. # 检查 API Key
  621. api_key = os.getenv("BROWSER_USE_API_KEY")
  622. if not api_key:
  623. print("\n❌ 错误: 未找到 BROWSER_USE_API_KEY")
  624. print("请在 .env 文件中配置 BROWSER_USE_API_KEY")
  625. return
  626. print(f"\n✅ 已加载 API Key: {api_key[:20]}...")
  627. # 运行示例(可以选择运行哪些示例)
  628. examples = [
  629. ("基础导航操作", example_1_basic_navigation),
  630. ("搜索和内容提取", example_2_search_and_extract),
  631. ("使用 BrowserProfile", example_3_with_browser_profile),
  632. ("表单交互", example_4_form_interaction),
  633. ("多标签页操作", example_5_multi_tab),
  634. ("小红书搜索并保存结果", example_6_xhs_search_save),
  635. ("百度搜索并保存结果", example_7_baidu_search_save),
  636. ]
  637. # 解析命令行参数
  638. parser = argparse.ArgumentParser(description="Browser-Use 云浏览器模式示例")
  639. parser.add_argument(
  640. "--example",
  641. type=int,
  642. choices=range(1, len(examples) + 1),
  643. help="选择要运行的示例 (1-7),不指定则运行第一个示例"
  644. )
  645. parser.add_argument(
  646. "--all",
  647. action="store_true",
  648. help="运行所有示例"
  649. )
  650. args = parser.parse_args()
  651. print("\n可用示例:")
  652. for i, (name, _) in enumerate(examples, 1):
  653. print(f" {i}. {name}")
  654. if args.all:
  655. # 运行所有示例
  656. print("\n运行所有示例...")
  657. for name, func in examples:
  658. await func()
  659. print("\n" + "-"*60)
  660. elif args.example:
  661. # 运行指定示例
  662. name, func = examples[args.example - 1]
  663. print(f"\n运行示例 {args.example}: {name}")
  664. await func()
  665. else:
  666. # 默认运行第一个示例
  667. name, func = examples[0]
  668. print(f"\n运行默认示例: {name}")
  669. print("(使用 --example N 运行其他示例,或 --all 运行所有示例)")
  670. await func()
  671. print("\n" + "="*60)
  672. print("✅ 示例运行完成")
  673. print("="*60)
  674. if __name__ == "__main__":
  675. # 运行主函数
  676. asyncio.run(main())