run.py 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209
  1. """
  2. 浏览器调研示例 (支持云浏览器/本地浏览器切换)
  3. 功能:
  4. 1. Agent 模式自动化调研
  5. 2. 手动接管:随时按 [Enter] 键暂停 Agent 并手动操作浏览器
  6. 3. 自动清理:无论成功或崩溃,均安全关闭浏览器进程
  7. 4. 灵活切换:通过配置变量选择云浏览器或本地浏览器
  8. 浏览器模式配置:
  9. - 修改下方 BROWSER_TYPE 变量来切换模式
  10. - "cloud": 云浏览器模式,不占用本地资源,需要配置 browser-use 云服务
  11. - "local": 本地浏览器模式,在本地运行 Chrome,速度更快,支持可视化调试
  12. """
  13. # ===== 浏览器模式配置 =====
  14. # 可选值: "cloud" (云浏览器) 或 "local" (本地浏览器)
  15. BROWSER_TYPE = "cloud" # 修改这里来切换浏览器模式
  16. HEADLESS = False # 是否无头模式运行
  17. import os
  18. import sys
  19. import asyncio
  20. import logging
  21. import re
  22. import uuid
  23. from pathlib import Path
  24. from datetime import datetime
  25. from argparse import Namespace
  26. # 添加项目根目录到 Python 路径
  27. sys.path.insert(0, str(Path(__file__).parent.parent.parent))
  28. from dotenv import load_dotenv
  29. load_dotenv()
  30. # --- 日志配置 ---
  31. logging.basicConfig(level=logging.WARNING)
  32. logging.getLogger("agent.core.message_manager").setLevel(logging.INFO)
  33. logging.getLogger("tools").setLevel(logging.INFO)
  34. from agent.llm.prompts import SimplePrompt
  35. from agent.core.runner import AgentRunner, RunConfig
  36. from agent.trace import FileSystemTraceStore, Trace, Message
  37. from agent.llm import create_openrouter_llm_call
  38. from agent.tools.builtin.browser.baseClass import kill_browser_session, init_browser_session
  39. # ===== 全局交互控制 =====
  40. pause_event = asyncio.Event()
  41. async def listen_for_interrupt():
  42. """后台协程:监听标准输入,按下回车即触发暂停"""
  43. while True:
  44. # 在执行器中运行同步的 readline,避免阻塞事件循环
  45. await asyncio.get_event_loop().run_in_executor(None, sys.stdin.readline)
  46. if not pause_event.is_set():
  47. print("\n" + "!" * 40)
  48. print("🛑 检测到手动干预请求!")
  49. print("Agent 将在完成当前动作后暂停,请准备接管浏览器。")
  50. print("!" * 40 + "\n")
  51. pause_event.set()
  52. # ===== 核心逻辑 =====
  53. async def main():
  54. # 1. 环境准备
  55. base_dir = Path(__file__).parent
  56. project_root = base_dir.parent.parent
  57. trace_dir = project_root / ".trace"
  58. prompt_path = base_dir / "test.prompt"
  59. output_dir = base_dir / "output"
  60. output_dir.mkdir(exist_ok=True)
  61. print("=" * 60)
  62. print("🚀 交互式浏览器调研 Agent")
  63. print(f"🌐 浏览器模式: {'云浏览器 (Cloud)' if BROWSER_TYPE == 'cloud' else '本地浏览器 (Local)'}")
  64. print("👉 操作指南:")
  65. print(" - 运行中随时按下 [Enter] 键进入手动接管模式")
  66. print(" - 在浏览器完成操作后,点击页面上的 'Done' 或回车返回")
  67. print("=" * 60 + "\n")
  68. # 2. 加载任务
  69. prompt = SimplePrompt(prompt_path)
  70. system_prompt = prompt._messages.get("system", "")
  71. user_task = prompt._messages.get("user", "")
  72. # 默认使用 cheap 模型进行调研,如 gemini-3-flash-preview
  73. model_name = prompt.config.get('model', 'gemini-3-flash-preview')
  74. temperature = float(prompt.config.get('temperature', 0.3))
  75. messages = prompt.build_messages()
  76. # 3. 初始化浏览器会话
  77. browser_mode_name = "云浏览器" if BROWSER_TYPE == "cloud" else "本地浏览器"
  78. print(f"🌐 正在初始化{browser_mode_name}...")
  79. await init_browser_session(
  80. browser_type=BROWSER_TYPE,
  81. headless=HEADLESS,
  82. url="about:blank"
  83. )
  84. print(f"✅ {browser_mode_name}初始化完成\n")
  85. # 4. 初始化 Runner
  86. # 注意:确保你的 openrouter 配置正确
  87. runner = AgentRunner(
  88. trace_store=FileSystemTraceStore(base_path=str(trace_dir)),
  89. llm_call=create_openrouter_llm_call(model=f"google/{model_name}"),
  90. skills_dir=None,
  91. debug=True
  92. )
  93. # 5. 启动监听任务
  94. interrupt_task = asyncio.create_task(listen_for_interrupt())
  95. final_response = ""
  96. current_trace_id = None
  97. try:
  98. # 启动 Agent 迭代
  99. agent_stream = runner.run(
  100. messages=messages,
  101. config=RunConfig(
  102. system_prompt=system_prompt,
  103. model=f"google/{model_name}",
  104. temperature=temperature,
  105. max_iterations=30,
  106. name=user_task[:50],
  107. ),
  108. )
  109. async for item in agent_stream:
  110. # --- 检查手动暂停信号 ---
  111. if pause_event.is_set():
  112. print("\n" + "🛠️" * 20)
  113. print(">>> 人工接管模式激活 <<<")
  114. print("1. 请在浏览器窗口进行必要操作(登录、过验证码等)")
  115. print("2. 操作完成后,请在终端按 [Enter] 或在页面点击交互按钮继续")
  116. try:
  117. # 调用内置的等待交互工具
  118. await runner.tools.execute(
  119. "browser_wait_for_user_action",
  120. {"message": "人工干预中,请完成操作后恢复 Agent"},
  121. uid="human_admin",
  122. context={"runner": runner}
  123. )
  124. except Exception as e:
  125. print(f"⚠️ 交互工具调用失败: {e}")
  126. print(">>> 交互结束,交还控制权给 Agent <<<")
  127. print("🛠️" * 20 + "\n")
  128. pause_event.clear()
  129. # --- 正常处理 Agent 消息输出 ---
  130. if isinstance(item, Trace):
  131. current_trace_id = item.trace_id
  132. if item.status == "running":
  133. print(f"[{datetime.now().strftime('%H:%M:%S')}] 🛰️ Trace 启动: {item.trace_id[:8]}")
  134. elif item.status == "completed":
  135. print(f"\n✅ 任务圆满完成!Cost: ${item.total_cost:.4f}")
  136. elif isinstance(item, Message):
  137. if item.role == "assistant":
  138. content = item.content
  139. if isinstance(content, dict):
  140. text = content.get("text", "")
  141. tool_calls = content.get("tool_calls")
  142. if text:
  143. # 打印摘要,带点 Wit
  144. print(f"\n🤖 Agent: {text[:200]}..." if len(text) > 200 else f"\n🤖 Agent: {text}")
  145. if tool_calls:
  146. for tc in tool_calls:
  147. t_name = tc.get("function", {}).get("name", "unknown")
  148. print(f" 🛠️ 执行工具: {t_name}")
  149. elif item.role == "tool":
  150. t_content = item.content
  151. if isinstance(t_content, dict):
  152. t_name = t_content.get("tool_name", "unknown")
  153. print(f" ✅ 工具返回: {t_name}")
  154. except Exception as e:
  155. print(f"\n🔥 发生严重错误: {e}")
  156. import traceback
  157. traceback.print_exc()
  158. finally:
  159. # 停止监听协程
  160. interrupt_task.cancel()
  161. # 6. 强制清理浏览器环境
  162. print("\n" + "·" * 40)
  163. print("🧹 正在执行环境清理...")
  164. try:
  165. await kill_browser_session()
  166. print(f"✨ {browser_mode_name}进程已安全终止。")
  167. except Exception as err:
  168. print(f"❌ 清理失败: {err}")
  169. print("·" * 40 + "\n")
  170. # 7. 结果展示
  171. if current_trace_id:
  172. print(f"🔍 任务 Trace ID: {current_trace_id}")
  173. print(f"📊 访问可视化面板查看详情。")
  174. if __name__ == "__main__":
  175. try:
  176. asyncio.run(main())
  177. except KeyboardInterrupt:
  178. print("\n👋 收到退出信号,程序已停止。")