run.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. """
  2. 浏览器调研示例 (交互增强版)
  3. 功能:
  4. 1. Agent 模式自动化调研
  5. 2. 手动接管:随时按 [Enter] 键暂停 Agent 并手动操作浏览器
  6. 3. 自动清理:无论成功或崩溃,均安全关闭浏览器进程
  7. """
  8. import os
  9. import sys
  10. import asyncio
  11. import logging
  12. import re
  13. import uuid
  14. from pathlib import Path
  15. from datetime import datetime
  16. from argparse import Namespace
  17. # 添加项目根目录到 Python 路径
  18. sys.path.insert(0, str(Path(__file__).parent.parent.parent))
  19. from dotenv import load_dotenv
  20. load_dotenv()
  21. # --- 日志配置 ---
  22. logging.basicConfig(level=logging.WARNING)
  23. logging.getLogger("agent.core.message_manager").setLevel(logging.INFO)
  24. logging.getLogger("tools").setLevel(logging.INFO)
  25. from agent.llm.prompts import SimplePrompt
  26. from agent.core.runner import AgentRunner, RunConfig
  27. from agent.trace import FileSystemTraceStore, Trace, Message
  28. from agent.llm import create_openrouter_llm_call
  29. from agent.tools.builtin.browser.baseClass import kill_browser_session
  30. # ===== 全局交互控制 =====
  31. pause_event = asyncio.Event()
  32. async def listen_for_interrupt():
  33. """后台协程:监听标准输入,按下回车即触发暂停"""
  34. while True:
  35. # 在执行器中运行同步的 readline,避免阻塞事件循环
  36. await asyncio.get_event_loop().run_in_executor(None, sys.stdin.readline)
  37. if not pause_event.is_set():
  38. print("\n" + "!" * 40)
  39. print("🛑 检测到手动干预请求!")
  40. print("Agent 将在完成当前动作后暂停,请准备接管浏览器。")
  41. print("!" * 40 + "\n")
  42. pause_event.set()
  43. # ===== 核心逻辑 =====
  44. async def main():
  45. # 1. 环境准备
  46. base_dir = Path(__file__).parent
  47. project_root = base_dir.parent.parent
  48. trace_dir = project_root / ".trace"
  49. prompt_path = base_dir / "test.prompt"
  50. output_dir = base_dir / "output"
  51. output_dir.mkdir(exist_ok=True)
  52. print("=" * 60)
  53. print("🚀 交互式浏览器调研 Agent")
  54. print("👉 操作指南:")
  55. print(" - 运行中随时按下 [Enter] 键进入手动接管模式")
  56. print(" - 在浏览器完成操作后,点击页面上的 'Done' 或回车返回")
  57. print("=" * 60 + "\n")
  58. # 2. 加载任务
  59. prompt = SimplePrompt(prompt_path)
  60. system_prompt = prompt._messages.get("system", "")
  61. user_task = prompt._messages.get("user", "")
  62. # 默认使用 cheap 模型进行调研,如 gemini-3-flash-preview
  63. model_name = prompt.config.get('model', 'gemini-3-flash-preview')
  64. temperature = float(prompt.config.get('temperature', 0.3))
  65. messages = prompt.build_messages()
  66. # 3. 初始化 Runner
  67. # 注意:确保你的 openrouter 配置正确
  68. runner = AgentRunner(
  69. trace_store=FileSystemTraceStore(base_path=str(trace_dir)),
  70. llm_call=create_openrouter_llm_call(model=f"google/{model_name}"),
  71. skills_dir=None,
  72. debug=True
  73. )
  74. # 4. 启动监听任务
  75. interrupt_task = asyncio.create_task(listen_for_interrupt())
  76. final_response = ""
  77. current_trace_id = None
  78. try:
  79. # 启动 Agent 迭代
  80. agent_stream = runner.run(
  81. messages=messages,
  82. config=RunConfig(
  83. system_prompt=system_prompt,
  84. model=f"google/{model_name}",
  85. temperature=temperature,
  86. max_iterations=30,
  87. name=user_task[:50],
  88. ),
  89. )
  90. async for item in agent_stream:
  91. # --- 检查手动暂停信号 ---
  92. if pause_event.is_set():
  93. print("\n" + "🛠️" * 20)
  94. print(">>> 人工接管模式激活 <<<")
  95. print("1. 请在浏览器窗口进行必要操作(登录、过验证码等)")
  96. print("2. 操作完成后,请在终端按 [Enter] 或在页面点击交互按钮继续")
  97. try:
  98. # 调用内置的等待交互工具
  99. await runner.tools.execute(
  100. "browser_wait_for_user_action",
  101. {"message": "人工干预中,请完成操作后恢复 Agent"},
  102. uid="human_admin",
  103. context={"runner": runner}
  104. )
  105. except Exception as e:
  106. print(f"⚠️ 交互工具调用失败: {e}")
  107. print(">>> 交互结束,交还控制权给 Agent <<<")
  108. print("🛠️" * 20 + "\n")
  109. pause_event.clear()
  110. # --- 正常处理 Agent 消息输出 ---
  111. if isinstance(item, Trace):
  112. current_trace_id = item.trace_id
  113. if item.status == "running":
  114. print(f"[{datetime.now().strftime('%H:%M:%S')}] 🛰️ Trace 启动: {item.trace_id[:8]}")
  115. elif item.status == "completed":
  116. print(f"\n✅ 任务圆满完成!Cost: ${item.total_cost:.4f}")
  117. elif isinstance(item, Message):
  118. if item.role == "assistant":
  119. content = item.content
  120. if isinstance(content, dict):
  121. text = content.get("text", "")
  122. tool_calls = content.get("tool_calls")
  123. if text:
  124. # 打印摘要,带点 Wit
  125. print(f"\n🤖 Agent: {text[:200]}..." if len(text) > 200 else f"\n🤖 Agent: {text}")
  126. if tool_calls:
  127. for tc in tool_calls:
  128. t_name = tc.get("function", {}).get("name", "unknown")
  129. print(f" 🛠️ 执行工具: {t_name}")
  130. elif item.role == "tool":
  131. t_content = item.content
  132. if isinstance(t_content, dict):
  133. t_name = t_content.get("tool_name", "unknown")
  134. print(f" ✅ 工具返回: {t_name}")
  135. except Exception as e:
  136. print(f"\n🔥 发生严重错误: {e}")
  137. import traceback
  138. traceback.print_exc()
  139. finally:
  140. # 停止监听协程
  141. interrupt_task.cancel()
  142. # 5. 强制清理浏览器环境
  143. print("\n" + "·" * 40)
  144. print("🧹 正在执行环境清理...")
  145. try:
  146. await kill_browser_session()
  147. print("✨ 浏览器进程已安全终止。")
  148. except Exception as err:
  149. print(f"❌ 清理失败: {err}")
  150. print("·" * 40 + "\n")
  151. # 6. 结果展示
  152. if current_trace_id:
  153. print(f"🔍 任务 Trace ID: {current_trace_id}")
  154. print(f"📊 访问可视化面板查看详情。")
  155. if __name__ == "__main__":
  156. try:
  157. asyncio.run(main())
  158. except KeyboardInterrupt:
  159. print("\n👋 收到退出信号,程序已停止。")