| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196 |
- #!/usr/bin/env python3
- """
- 使用 Browser + BrowserConfig 方式测试 browser-use
- 参考 GitHub issue #1520 的解决方案
- """
- import asyncio
- import json
- from pathlib import Path
- from datetime import datetime
- async def test_browser_config():
- """使用 BrowserConfig 测试"""
- print("="*60)
- print("🧪 测试 browser-use (BrowserConfig 方式)")
- print("="*60)
- print()
- from browser_use import Browser, BrowserConfig
- from browser_use.browser.context import BrowserContextConfig
- try:
- # 创建上下文配置
- print("📌 步骤 1: 创建配置...")
- context_cfg = BrowserContextConfig(
- disable_security=True,
- minimum_wait_page_load_time=0.5,
- wait_for_network_idle_page_load_time=0.5,
- )
- # 创建浏览器配置
- browser = Browser(
- config=BrowserConfig(
- chrome_instance_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
- new_context_config=context_cfg,
- headless=False # 非无头模式,便于调试
- )
- )
- print("✅ 配置已创建\n")
- # 获取浏览器会话
- print("📌 步骤 2: 获取浏览器会话...")
- session = await browser.get_session()
- print("✅ 浏览器会话已获取\n")
- # 导入工具
- from browser_use import Tools
- tools = Tools()
- # 导航到百度
- print("📌 步骤 3: 导航到百度...")
- result = await tools.navigate(
- url="https://www.baidu.com",
- browser_session=session
- )
- print(f"✅ {result.long_term_memory}\n")
- await asyncio.sleep(2)
- # 搜索
- search_keyword = "Python 教程"
- search_url = f"https://www.baidu.com/s?wd={search_keyword}"
- print(f"📌 步骤 4: 搜索 '{search_keyword}'...")
- result = await tools.navigate(
- url=search_url,
- browser_session=session
- )
- print(f"✅ {result.long_term_memory}\n")
- await asyncio.sleep(3)
- # 滚动页面
- print("📌 步骤 5: 滚动页面...")
- await tools.scroll(
- down=True,
- pages=1.0,
- browser_session=session
- )
- await asyncio.sleep(2)
- print("✅ 页面滚动完成\n")
- # 提取数据
- print("📌 步骤 6: 提取搜索结果...")
- extract_js = """
- (function(){
- try {
- const results = [];
- const resultItems = document.querySelectorAll('#content_left > div[class*="result"]');
- resultItems.forEach((item, index) => {
- if (index >= 10) return;
- const titleEl = item.querySelector('h3 a, .t a');
- const title = titleEl ? titleEl.textContent.trim() : '';
- const link = titleEl ? titleEl.href : '';
- const summaryEl = item.querySelector('.c-abstract, .content-right_8Zs40');
- const summary = summaryEl ? summaryEl.textContent.trim() : '';
- const sourceEl = item.querySelector('.c-color-gray, .source_1Vdff');
- const source = sourceEl ? sourceEl.textContent.trim() : '';
- if (title || link) {
- results.push({
- index: index + 1,
- title: title,
- link: link,
- summary: summary.substring(0, 200),
- source: source
- });
- }
- });
- return {
- success: true,
- count: results.length,
- keyword: 'Python 教程',
- timestamp: new Date().toISOString(),
- results: results
- };
- } catch (e) {
- return {
- success: false,
- error: e.message
- };
- }
- })()
- """
- result = await tools.evaluate(
- code=extract_js,
- browser_session=session
- )
- # 解析结果
- output = result.extracted_content or str(result.metadata)
- if isinstance(output, str) and output.startswith("Result: "):
- output = output[8:]
- data = json.loads(output) if isinstance(output, str) else output
- if data.get('success'):
- print(f"✅ 成功提取 {data.get('count', 0)} 条结果\n")
- # 保存数据
- json_file = Path("baidu.json")
- with open(json_file, 'w', encoding='utf-8') as f:
- json.dump(data, f, ensure_ascii=False, indent=2)
- print(f"✅ 数据已保存: {json_file}\n")
- # 显示前3条结果
- if data.get('results'):
- print("📋 前3条结果:")
- for item in data['results'][:3]:
- print(f" {item.get('index')}. {item.get('title', '无标题')[:50]}...")
- print()
- else:
- print(f"⚠️ 提取失败: {data.get('error')}\n")
- # 保存 HTML
- print("📌 步骤 7: 保存页面 HTML...")
- cdp = await session.get_or_create_cdp_session()
- html_result = await cdp.cdp_client.send.Runtime.evaluate(
- params={'expression': 'document.documentElement.outerHTML'},
- session_id=cdp.session_id
- )
- html_content = html_result.get('result', {}).get('value', '')
- html_file = Path("baidu_page.html")
- with open(html_file, 'w', encoding='utf-8') as f:
- f.write(f"<!-- 保存时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} -->\n")
- f.write(html_content)
- print(f"✅ HTML 已保存: {html_file}")
- print(f" 大小: {len(html_content):,} 字符\n")
- print("="*60)
- print("🎉 测试成功!")
- print("="*60)
- print("✅ browser-use 使用 BrowserConfig 方式正常工作")
- print("✅ 生成文件:")
- print(" • baidu.json")
- print(" • baidu_page.html")
- print("="*60)
- # 关闭浏览器
- await browser.close()
- except Exception as e:
- print(f"\n❌ 错误: {e}")
- import traceback
- traceback.print_exc()
- if __name__ == "__main__":
- asyncio.run(test_browser_config())
|