test_browser_config.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. #!/usr/bin/env python3
  2. """
  3. 使用 Browser + BrowserConfig 方式测试 browser-use
  4. 参考 GitHub issue #1520 的解决方案
  5. """
  6. import asyncio
  7. import json
  8. from pathlib import Path
  9. from datetime import datetime
  10. async def test_browser_config():
  11. """使用 BrowserConfig 测试"""
  12. print("="*60)
  13. print("🧪 测试 browser-use (BrowserConfig 方式)")
  14. print("="*60)
  15. print()
  16. from browser_use import Browser, BrowserConfig
  17. from browser_use.browser.context import BrowserContextConfig
  18. try:
  19. # 创建上下文配置
  20. print("📌 步骤 1: 创建配置...")
  21. context_cfg = BrowserContextConfig(
  22. disable_security=True,
  23. minimum_wait_page_load_time=0.5,
  24. wait_for_network_idle_page_load_time=0.5,
  25. )
  26. # 创建浏览器配置
  27. browser = Browser(
  28. config=BrowserConfig(
  29. chrome_instance_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
  30. new_context_config=context_cfg,
  31. headless=False # 非无头模式,便于调试
  32. )
  33. )
  34. print("✅ 配置已创建\n")
  35. # 获取浏览器会话
  36. print("📌 步骤 2: 获取浏览器会话...")
  37. session = await browser.get_session()
  38. print("✅ 浏览器会话已获取\n")
  39. # 导入工具
  40. from browser_use import Tools
  41. tools = Tools()
  42. # 导航到百度
  43. print("📌 步骤 3: 导航到百度...")
  44. result = await tools.navigate(
  45. url="https://www.baidu.com",
  46. browser_session=session
  47. )
  48. print(f"✅ {result.long_term_memory}\n")
  49. await asyncio.sleep(2)
  50. # 搜索
  51. search_keyword = "Python 教程"
  52. search_url = f"https://www.baidu.com/s?wd={search_keyword}"
  53. print(f"📌 步骤 4: 搜索 '{search_keyword}'...")
  54. result = await tools.navigate(
  55. url=search_url,
  56. browser_session=session
  57. )
  58. print(f"✅ {result.long_term_memory}\n")
  59. await asyncio.sleep(3)
  60. # 滚动页面
  61. print("📌 步骤 5: 滚动页面...")
  62. await tools.scroll(
  63. down=True,
  64. pages=1.0,
  65. browser_session=session
  66. )
  67. await asyncio.sleep(2)
  68. print("✅ 页面滚动完成\n")
  69. # 提取数据
  70. print("📌 步骤 6: 提取搜索结果...")
  71. extract_js = """
  72. (function(){
  73. try {
  74. const results = [];
  75. const resultItems = document.querySelectorAll('#content_left > div[class*="result"]');
  76. resultItems.forEach((item, index) => {
  77. if (index >= 10) return;
  78. const titleEl = item.querySelector('h3 a, .t a');
  79. const title = titleEl ? titleEl.textContent.trim() : '';
  80. const link = titleEl ? titleEl.href : '';
  81. const summaryEl = item.querySelector('.c-abstract, .content-right_8Zs40');
  82. const summary = summaryEl ? summaryEl.textContent.trim() : '';
  83. const sourceEl = item.querySelector('.c-color-gray, .source_1Vdff');
  84. const source = sourceEl ? sourceEl.textContent.trim() : '';
  85. if (title || link) {
  86. results.push({
  87. index: index + 1,
  88. title: title,
  89. link: link,
  90. summary: summary.substring(0, 200),
  91. source: source
  92. });
  93. }
  94. });
  95. return {
  96. success: true,
  97. count: results.length,
  98. keyword: 'Python 教程',
  99. timestamp: new Date().toISOString(),
  100. results: results
  101. };
  102. } catch (e) {
  103. return {
  104. success: false,
  105. error: e.message
  106. };
  107. }
  108. })()
  109. """
  110. result = await tools.evaluate(
  111. code=extract_js,
  112. browser_session=session
  113. )
  114. # 解析结果
  115. output = result.extracted_content or str(result.metadata)
  116. if isinstance(output, str) and output.startswith("Result: "):
  117. output = output[8:]
  118. data = json.loads(output) if isinstance(output, str) else output
  119. if data.get('success'):
  120. print(f"✅ 成功提取 {data.get('count', 0)} 条结果\n")
  121. # 保存数据
  122. json_file = Path("baidu.json")
  123. with open(json_file, 'w', encoding='utf-8') as f:
  124. json.dump(data, f, ensure_ascii=False, indent=2)
  125. print(f"✅ 数据已保存: {json_file}\n")
  126. # 显示前3条结果
  127. if data.get('results'):
  128. print("📋 前3条结果:")
  129. for item in data['results'][:3]:
  130. print(f" {item.get('index')}. {item.get('title', '无标题')[:50]}...")
  131. print()
  132. else:
  133. print(f"⚠️ 提取失败: {data.get('error')}\n")
  134. # 保存 HTML
  135. print("📌 步骤 7: 保存页面 HTML...")
  136. cdp = await session.get_or_create_cdp_session()
  137. html_result = await cdp.cdp_client.send.Runtime.evaluate(
  138. params={'expression': 'document.documentElement.outerHTML'},
  139. session_id=cdp.session_id
  140. )
  141. html_content = html_result.get('result', {}).get('value', '')
  142. html_file = Path("baidu_page.html")
  143. with open(html_file, 'w', encoding='utf-8') as f:
  144. f.write(f"<!-- 保存时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} -->\n")
  145. f.write(html_content)
  146. print(f"✅ HTML 已保存: {html_file}")
  147. print(f" 大小: {len(html_content):,} 字符\n")
  148. print("="*60)
  149. print("🎉 测试成功!")
  150. print("="*60)
  151. print("✅ browser-use 使用 BrowserConfig 方式正常工作")
  152. print("✅ 生成文件:")
  153. print(" • baidu.json")
  154. print(" • baidu_page.html")
  155. print("="*60)
  156. # 关闭浏览器
  157. await browser.close()
  158. except Exception as e:
  159. print(f"\n❌ 错误: {e}")
  160. import traceback
  161. traceback.print_exc()
  162. if __name__ == "__main__":
  163. asyncio.run(test_browser_config())