run.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422
  1. """
  2. 图片模态特征提取研究示例
  3. 使用 Agent 模式 + Skills,研究应该提取什么样的图片模态特征
  4. """
  5. import argparse
  6. import os
  7. import sys
  8. import select
  9. import asyncio
  10. from pathlib import Path
  11. # Clash Verge TUN 模式兼容:禁止 httpx/urllib 自动检测系统 HTTP 代理
  12. os.environ.setdefault("no_proxy", "*")
  13. os.environ.setdefault("NO_PROXY", "*")
  14. # 添加项目根目录到 Python 路径
  15. sys.path.insert(0, str(Path(__file__).parent.parent.parent))
  16. from dotenv import load_dotenv
  17. load_dotenv()
  18. from agent.llm.prompts import SimplePrompt
  19. from agent.core.runner import AgentRunner, RunConfig
  20. from agent.trace import (
  21. FileSystemTraceStore,
  22. Trace,
  23. Message,
  24. )
  25. from agent.llm import create_claude_code_llm_call
  26. # 导入自定义工具模块,触发 @tool 装饰器注册
  27. sys.path.insert(0, str(Path(__file__).parent))
  28. import tool # noqa: E402
  29. def check_stdin() -> str | None:
  30. """非阻塞检查 stdin 是否有输入"""
  31. ready, _, _ = select.select([sys.stdin], [], [], 0)
  32. if ready:
  33. line = sys.stdin.readline().strip().lower()
  34. if line in ('p', 'pause'):
  35. return 'pause'
  36. if line in ('q', 'quit'):
  37. return 'quit'
  38. return None
  39. def _read_multiline() -> str:
  40. """读取多行输入,以连续两次回车(空行)结束"""
  41. print("\n请输入干预消息(连续输入两次回车结束):")
  42. lines: list[str] = []
  43. blank_count = 0
  44. while True:
  45. line = input()
  46. if line == "":
  47. blank_count += 1
  48. if blank_count >= 2:
  49. break
  50. lines.append("")
  51. else:
  52. blank_count = 0
  53. lines.append(line)
  54. while lines and lines[-1] == "":
  55. lines.pop()
  56. return "\n".join(lines)
  57. async def show_interactive_menu(
  58. runner: AgentRunner,
  59. trace_id: str,
  60. current_sequence: int,
  61. store: FileSystemTraceStore,
  62. ):
  63. """显示交互式菜单"""
  64. print("\n" + "=" * 60)
  65. print(" 执行已暂停")
  66. print("=" * 60)
  67. print("请选择操作:")
  68. print(" 1. 插入干预消息并继续")
  69. print(" 2. 查看当前 GoalTree")
  70. print(" 3. 继续执行")
  71. print(" 4. 停止执行")
  72. print("=" * 60)
  73. while True:
  74. choice = input("请输入选项 (1-4): ").strip()
  75. if choice == "1":
  76. text = _read_multiline()
  77. if not text:
  78. print("未输入任何内容,取消操作")
  79. continue
  80. print(f"\n将插入干预消息并继续执行...")
  81. live_trace = await store.get_trace(trace_id)
  82. actual_sequence = live_trace.last_sequence if live_trace and live_trace.last_sequence else current_sequence
  83. return {
  84. "action": "continue",
  85. "messages": [{"role": "user", "content": text}],
  86. "after_sequence": actual_sequence,
  87. }
  88. elif choice == "2":
  89. goal_tree = await store.get_goal_tree(trace_id)
  90. if goal_tree and goal_tree.goals:
  91. print("\n当前 GoalTree:")
  92. print(goal_tree.to_prompt())
  93. else:
  94. print("\n当前没有 Goal")
  95. continue
  96. elif choice == "3":
  97. print("\n继续执行...")
  98. return {"action": "continue"}
  99. elif choice == "4":
  100. print("\n停止执行...")
  101. return {"action": "stop"}
  102. else:
  103. print("无效选项,请重新输入")
  104. async def main():
  105. parser = argparse.ArgumentParser(description="图片模态特征提取研究")
  106. parser.add_argument(
  107. "--trace", type=str, default=None,
  108. help="已有的 Trace ID,用于恢复继续执行",
  109. )
  110. args = parser.parse_args()
  111. # 路径配置
  112. base_dir = Path(__file__).parent
  113. project_root = base_dir.parent.parent
  114. prompt_path = base_dir / "test.prompt"
  115. output_dir = base_dir / "output"
  116. output_dir.mkdir(exist_ok=True)
  117. # 确保 input 和 knowledge 目录存在
  118. input_dir = base_dir / "input"
  119. knowledge_dir = base_dir / "knowledge"
  120. input_dir.mkdir(exist_ok=True)
  121. knowledge_dir.mkdir(exist_ok=True)
  122. print("=" * 60)
  123. print("图片模态特征提取研究 (Agent 模式)")
  124. print("=" * 60)
  125. print()
  126. print("💡 交互提示:")
  127. print(" - 执行过程中输入 'p' 或 'pause' 暂停并进入交互模式")
  128. print(" - 执行过程中输入 'q' 或 'quit' 停止执行")
  129. print("=" * 60)
  130. print()
  131. # 加载 prompt
  132. print("1. 加载 prompt 配置...")
  133. prompt = SimplePrompt(prompt_path)
  134. # 构建消息
  135. print("2. 构建任务消息...")
  136. messages = prompt.build_messages()
  137. # 创建 Agent Runner
  138. print("3. 创建 Agent Runner...")
  139. model_name = prompt.config.get('model', 'anthropic/claude-sonnet-4.6')
  140. print(f" - 模型: {model_name}")
  141. store = FileSystemTraceStore(base_path=".trace")
  142. runner = AgentRunner(
  143. trace_store=store,
  144. llm_call=create_claude_code_llm_call(model=model_name),
  145. skills_dir=None,
  146. debug=True
  147. )
  148. # 判断是新建还是恢复
  149. resume_trace_id = args.trace
  150. if resume_trace_id:
  151. existing_trace = await store.get_trace(resume_trace_id)
  152. if not existing_trace:
  153. print(f"\n错误: Trace 不存在: {resume_trace_id}")
  154. sys.exit(1)
  155. print(f"4. 恢复已有 Trace: {resume_trace_id[:8]}...")
  156. print(f" - 状态: {existing_trace.status}")
  157. print(f" - 消息数: {existing_trace.total_messages}")
  158. else:
  159. print(f"4. 启动新 Agent 模式...")
  160. print()
  161. final_response = ""
  162. current_trace_id = resume_trace_id
  163. current_sequence = 0
  164. should_exit = False
  165. try:
  166. model_name = prompt.config.get('model', 'anthropic/claude-sonnet-4.6')
  167. if resume_trace_id:
  168. initial_messages = None
  169. config = RunConfig(
  170. model=model_name,
  171. temperature=float(prompt.config.get('temperature', 0.3)),
  172. max_iterations=1000,
  173. trace_id=resume_trace_id,
  174. enable_thinking=prompt.config.get('enable_thinking', False),
  175. thinking_budget_tokens=prompt.config.get('thinking_budget_tokens', 10000),
  176. )
  177. else:
  178. initial_messages = messages
  179. config = RunConfig(
  180. model=model_name,
  181. temperature=float(prompt.config.get('temperature', 0.3)),
  182. max_iterations=1000,
  183. name="图片模态特征提取研究",
  184. enable_thinking=prompt.config.get('enable_thinking', False),
  185. thinking_budget_tokens=prompt.config.get('thinking_budget_tokens', 10000),
  186. )
  187. while not should_exit:
  188. if current_trace_id:
  189. config.trace_id = current_trace_id
  190. final_response = ""
  191. # 检查 trace 状态
  192. if current_trace_id and initial_messages is None:
  193. check_trace = await store.get_trace(current_trace_id)
  194. if check_trace and check_trace.status in ("completed", "failed"):
  195. if check_trace.status == "completed":
  196. print(f"\n[Trace] ✅ 已完成")
  197. print(f" - Total messages: {check_trace.total_messages}")
  198. print(f" - Total cost: ${check_trace.total_cost:.4f}")
  199. else:
  200. print(f"\n[Trace] ❌ 已失败: {check_trace.error_message}")
  201. current_sequence = check_trace.head_sequence
  202. menu_result = await show_interactive_menu(
  203. runner, current_trace_id, current_sequence, store
  204. )
  205. if menu_result["action"] == "stop":
  206. break
  207. elif menu_result["action"] == "continue":
  208. new_messages = menu_result.get("messages", [])
  209. if new_messages:
  210. initial_messages = new_messages
  211. config.after_sequence = menu_result.get("after_sequence")
  212. else:
  213. initial_messages = []
  214. config.after_sequence = None
  215. continue
  216. break
  217. initial_messages = []
  218. print(f"{'▶️ 开始执行...' if not current_trace_id else '▶️ 继续执行...'}")
  219. # 执行 Agent
  220. paused = False
  221. try:
  222. async for item in runner.run(messages=initial_messages, config=config):
  223. # 检查用户中断
  224. cmd = check_stdin()
  225. if cmd == 'pause':
  226. print("\n⏸️ 正在暂停执行...")
  227. if current_trace_id:
  228. await runner.stop(current_trace_id)
  229. await asyncio.sleep(0.5)
  230. menu_result = await show_interactive_menu(
  231. runner, current_trace_id, current_sequence, store
  232. )
  233. if menu_result["action"] == "stop":
  234. should_exit = True
  235. paused = True
  236. break
  237. elif menu_result["action"] == "continue":
  238. new_messages = menu_result.get("messages", [])
  239. if new_messages:
  240. initial_messages = new_messages
  241. after_seq = menu_result.get("after_sequence")
  242. if after_seq is not None:
  243. config.after_sequence = after_seq
  244. paused = True
  245. break
  246. else:
  247. initial_messages = []
  248. config.after_sequence = None
  249. paused = True
  250. break
  251. elif cmd == 'quit':
  252. print("\n🛑 用户请求停止...")
  253. if current_trace_id:
  254. await runner.stop(current_trace_id)
  255. should_exit = True
  256. break
  257. # 处理 Trace 对象
  258. if isinstance(item, Trace):
  259. current_trace_id = item.trace_id
  260. if item.status == "running":
  261. print(f"[Trace] 开始: {item.trace_id[:8]}...")
  262. elif item.status == "completed":
  263. print(f"\n[Trace] ✅ 完成")
  264. print(f" - Total messages: {item.total_messages}")
  265. print(f" - Total tokens: {item.total_tokens}")
  266. print(f" - Total cost: ${item.total_cost:.4f}")
  267. elif item.status == "failed":
  268. print(f"\n[Trace] ❌ 失败: {item.error_message}")
  269. elif item.status == "stopped":
  270. print(f"\n[Trace] ⏸️ 已停止")
  271. # 处理 Message 对象
  272. elif isinstance(item, Message):
  273. current_sequence = item.sequence
  274. if item.role == "assistant":
  275. content = item.content
  276. if isinstance(content, dict):
  277. text = content.get("text", "")
  278. tool_calls = content.get("tool_calls")
  279. if text and not tool_calls:
  280. final_response = text
  281. print(f"\n[Response] Agent 回复:")
  282. print(text)
  283. elif text:
  284. preview = text[:150] + "..." if len(text) > 150 else text
  285. print(f"[Assistant] {preview}")
  286. if tool_calls:
  287. for tc in tool_calls:
  288. tool_name = tc.get("function", {}).get("name", "unknown")
  289. print(f"[Tool Call] 🛠️ {tool_name}")
  290. elif item.role == "tool":
  291. content = item.content
  292. if isinstance(content, dict):
  293. tool_name = content.get("tool_name", "unknown")
  294. print(f"[Tool Result] ✅ {tool_name}")
  295. if item.description:
  296. desc = item.description[:80] if len(item.description) > 80 else item.description
  297. print(f" {desc}...")
  298. except Exception as e:
  299. print(f"\n执行出错: {e}")
  300. import traceback
  301. traceback.print_exc()
  302. if paused:
  303. if should_exit:
  304. break
  305. continue
  306. if should_exit:
  307. break
  308. # Runner 退出后显示交互菜单
  309. if current_trace_id:
  310. menu_result = await show_interactive_menu(
  311. runner, current_trace_id, current_sequence, store
  312. )
  313. if menu_result["action"] == "stop":
  314. break
  315. elif menu_result["action"] == "continue":
  316. new_messages = menu_result.get("messages", [])
  317. if new_messages:
  318. initial_messages = new_messages
  319. config.after_sequence = menu_result.get("after_sequence")
  320. else:
  321. initial_messages = []
  322. config.after_sequence = None
  323. continue
  324. break
  325. except KeyboardInterrupt:
  326. print("\n\n用户中断 (Ctrl+C)")
  327. if current_trace_id:
  328. await runner.stop(current_trace_id)
  329. # 输出结果
  330. if final_response:
  331. print()
  332. print("=" * 60)
  333. print("Agent 响应:")
  334. print("=" * 60)
  335. print(final_response)
  336. print("=" * 60)
  337. print()
  338. # 保存结果
  339. output_file = output_dir / "result.txt"
  340. with open(output_file, 'w', encoding='utf-8') as f:
  341. f.write(final_response)
  342. print(f"✓ 结果已保存到: {output_file}")
  343. print()
  344. # 可视化提示
  345. if current_trace_id:
  346. print("=" * 60)
  347. print("可视化 Step Tree:")
  348. print("=" * 60)
  349. print("1. 启动 API Server:")
  350. print(" python3 api_server.py")
  351. print()
  352. print("2. 浏览器访问:")
  353. print(" http://localhost:8000/api/traces")
  354. print()
  355. print(f"3. Trace ID: {current_trace_id}")
  356. print("=" * 60)
  357. if __name__ == "__main__":
  358. asyncio.run(main())