howard
/
Agent


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408
							"""
baseClassTools.py 使用示例
Usage Examples for baseClassTools.py

本文件演示如何使用基于 browser-use 原生类的工具集。
"""

import asyncio
from tools.baseClassTools import (
    # 会话管理
    init_browser_session,
    cleanup_browser_session,

    # 工具函数
    navigate_to_url,
    click_element,
    input_text,
    send_keys,
    scroll_page,
    get_page_html,
    get_selector_map,
    extract_content,
    wait,
    wait_for_user_action,
    search_web,
    screenshot,
    evaluate,
)


# ============================================================
# 示例 1: 基础使用 - 简单的网页导航和交互
# ============================================================

async def example_1_basic_usage():
    """
    基础示例：导航到网页，获取页面信息
    """
    print("\n" + "="*60)
    print("示例 1: 基础使用")
    print("="*60 + "\n")

    try:
        # 1. 初始化浏览器会话（只需要调用一次）
        await init_browser_session(
            headless=False,
            profile_name="example_profile"
        )

        # 2. 导航到网页
        result = await navigate_to_url("https://www.baidu.com")
        print(f"✅ 导航结果: {result.output}")

        # 3. 等待页面加载
        await wait(seconds=2)

        # 4. 获取页面 HTML
        html_result = await get_page_html()
        print(f"✅ 获取到 HTML，长度: {len(html_result.metadata.get('html', ''))}")

        # 5. 获取页面元素映射
        selector_result = await get_selector_map()
        print(f"✅ 找到元素: {selector_result.output.split('\\n')[0]}")

    finally:
        # 6. 清理会话
        await cleanup_browser_session()
        print("\n✅ 浏览器会话已清理")


# ============================================================
# 示例 2: 搜索和数据提取
# ============================================================

async def example_2_search_and_extract():
    """
    搜索示例：使用搜索引擎搜索并提取结果
    """
    print("\n" + "="*60)
    print("示例 2: 搜索和数据提取")
    print("="*60 + "\n")

    try:
        # 1. 初始化浏览器
        await init_browser_session(headless=False)

        # 2. 使用 Google 搜索
        search_result = await search_web(
            query="Python async programming",
            engine="google"
        )
        print(f"✅ 搜索完成: {search_result.long_term_memory}")

        # 3. 等待结果加载
        await wait(seconds=3)

        # 4. 滚动查看更多结果
        await scroll_page(down=True, pages=1.0)
        print("✅ 滚动页面完成")

        # 5. 截图查看页面状态
        await screenshot()
        print("✅ 截图请求已发送")

        # 6. 获取页面 HTML 用于分析
        html_result = await get_page_html()
        url = html_result.metadata.get('url', '')
        print(f"✅ 当前页面: {url}")

    finally:
        await cleanup_browser_session()


# ============================================================
# 示例 3: 表单填写和提交
# ============================================================

async def example_3_form_interaction():
    """
    表单交互示例：填写表单并提交
    """
    print("\n" + "="*60)
    print("示例 3: 表单填写和提交")
    print("="*60 + "\n")

    try:
        # 1. 初始化浏览器
        await init_browser_session(headless=False)

        # 2. 导航到表单页面（这里用百度作为示例）
        await navigate_to_url("https://www.baidu.com")
        await wait(seconds=2)

        # 3. 获取元素映射
        selector_result = await get_selector_map()
        print(f"✅ {selector_result.output.split(chr(10))[0]}")

        # 4. 找到搜索框并输入文本（假设索引为 1）
        # 注意：实际使用时需要查看 selector_result 找到正确的索引
        await input_text(index=1, text="browser automation", clear=True)
        print("✅ 输入文本完成")

        # 5. 发送回车键提交
        await send_keys(keys="Enter")
        print("✅ 按下回车键")

        # 6. 等待搜索结果加载
        await wait(seconds=3)

        # 7. 滚动查看结果
        await scroll_page(down=True, pages=2.0)
        print("✅ 滚动完成")

    finally:
        await cleanup_browser_session()


# ============================================================
# 示例 4: 需要登录的场景
# ============================================================

async def example_4_login_scenario():
    """
    登录场景示例：导航到需要登录的网站，等待用户登录
    """
    print("\n" + "="*60)
    print("示例 4: 需要登录的场景")
    print("="*60 + "\n")

    try:
        # 1. 初始化浏览器（使用持久化配置保存登录状态）
        await init_browser_session(
            headless=False,
            profile_name="xiaohongshu_profile"  # 使用专门的配置文件
        )

        # 2. 导航到小红书
        await navigate_to_url("https://www.xiaohongshu.com")
        await wait(seconds=2)

        # 3. 检查是否需要登录
        html_result = await get_page_html()
        html = html_result.metadata.get('html', '')

        if "登录" in html or "login" in html.lower():
            print("⚠️ 检测到需要登录")

            # 4. 等待用户手动登录
            wait_result = await wait_for_user_action(
                message="请在浏览器中登录小红书 (Please login to Xiaohongshu)",
                timeout=180  # 3分钟超时
            )

            if "完成" in wait_result.title:
                print("✅ 用户已完成登录")
            else:
                print("⚠️ 等待超时，继续执行")
        else:
            print("✅ 已经登录或不需要登录")

        # 5. 继续执行任务（这里只是示例）
        await wait(seconds=2)
        print("✅ 可以继续执行后续任务")

        # 注意：第二次运行时，由于使用了持久化配置，
        # 浏览器会自动加载之前保存的登录状态

    finally:
        # 不要立即清理，保持登录状态
        await cleanup_browser_session()
        print("\n✅ 会话已保存，下次运行会自动登录")


# ============================================================
# 示例 5: JavaScript 执行和高级操作
# ============================================================

async def example_5_javascript_execution():
    """
    JavaScript 执行示例：使用 JavaScript 进行高级操作
    """
    print("\n" + "="*60)
    print("示例 5: JavaScript 执行")
    print("="*60 + "\n")

    try:
        # 1. 初始化浏览器
        await init_browser_session(headless=False)

        # 2. 导航到网页
        await navigate_to_url("https://www.baidu.com")
        await wait(seconds=2)

        # 3. 执行 JavaScript 获取页面信息
        js_code = """
        (function(){
            try {
                return {
                    title: document.title,
                    url: window.location.href,
                    links: document.querySelectorAll('a').length,
                    images: document.querySelectorAll('img').length
                };
            } catch(e) {
                return 'Error: ' + e.message;
            }
        })()
        """

        result = await evaluate(code=js_code)
        print(f"✅ JavaScript 执行结果:\n{result.output}")

        # 4. 使用 JavaScript 滚动到特定位置
        scroll_js = "window.scrollTo(0, document.body.scrollHeight / 2)"
        await evaluate(code=scroll_js)
        print("✅ 使用 JS 滚动完成")

        # 5. 使用 JavaScript 提取数据
        extract_js = """
        (function(){
            const links = Array.from(document.querySelectorAll('a'));
            return links.slice(0, 5).map(a => ({
                text: a.textContent.trim(),
                href: a.href
            }));
        })()
        """

        links_result = await evaluate(code=extract_js)
        print(f"✅ 提取链接:\n{links_result.output[:200]}...")

    finally:
        await cleanup_browser_session()


# ============================================================
# 示例 6: 在 Agent 类中集成使用
# ============================================================

class MyBrowserAgent:
    """
    示例 Agent 类：展示如何在自己的 Agent 中集成 baseClassTools
    """

    def __init__(self, profile_name: str = "default"):
        self.profile_name = profile_name
        self.initialized = False

    async def initialize(self):
        """初始化 Agent 和浏览器会话"""
        if not self.initialized:
            await init_browser_session(
                headless=False,
                profile_name=self.profile_name
            )
            self.initialized = True
            print("✅ Agent 已初始化")

    async def cleanup(self):
        """清理资源"""
        if self.initialized:
            await cleanup_browser_session()
            self.initialized = False
            print("✅ Agent 已清理")

    async def navigate_and_get_info(self, url: str):
        """导航到 URL 并获取页面信息"""
        await self.initialize()

        # 导航
        await navigate_to_url(url)
        await wait(seconds=2)

        # 获取信息
        html_result = await get_page_html()
        title = html_result.metadata.get('title', '')
        url = html_result.metadata.get('url', '')

        return {
            "title": title,
            "url": url
        }

    async def search_and_click(self, query: str, element_index: int):
        """搜索并点击指定元素"""
        await self.initialize()

        # 搜索
        await search_web(query=query, engine="google")
        await wait(seconds=3)

        # 点击元素
        await click_element(index=element_index)
        await wait(seconds=2)

        return True

    async def extract_with_login(self, url: str, need_login: bool = False):
        """提取数据，如果需要则等待登录"""
        await self.initialize()

        # 导航
        await navigate_to_url(url)
        await wait(seconds=2)

        # 如果需要登录
        if need_login:
            html_result = await get_page_html()
            html = html_result.metadata.get('html', '')

            if "登录" in html or "login" in html.lower():
                print("⚠️ 检测到需要登录")
                await wait_for_user_action("请登录", timeout=180)

        # 获取页面内容
        html_result = await get_page_html()
        return html_result.metadata.get('html', '')


async def example_6_agent_integration():
    """
    Agent 集成示例：展示如何在 Agent 中使用工具
    """
    print("\n" + "="*60)
    print("示例 6: Agent 集成")
    print("="*60 + "\n")

    agent = MyBrowserAgent(profile_name="agent_profile")

    try:
        # 1. 导航并获取信息
        info = await agent.navigate_and_get_info("https://www.baidu.com")
        print(f"✅ 页面信息: {info}")

        # 2. 搜索并交互
        # await agent.search_and_click("Python", element_index=5)
        # print("✅ 搜索和点击完成")

    finally:
        await agent.cleanup()


# ============================================================
# 运行所有示例
# ============================================================

async def run_all_examples():
    """运行所有示例"""
    print("\n" + "="*80)
    print("开始运行 baseClassTools.py 使用示例")
    print("="*80)

    # 选择要运行的示例（取消注释来运行）
    await example_1_basic_usage()
    # await example_2_search_and_extract()
    # await example_3_form_interaction()
    # await example_4_login_scenario()
    # await example_5_javascript_execution()
    # await example_6_agent_integration()

    print("\n" + "="*80)
    print("所有示例运行完成")
    print("="*80 + "\n")


if __name__ == "__main__":
    # 运行示例
    asyncio.run(run_all_examples())