| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422 |
- """
- 图片模态特征提取研究示例
- 使用 Agent 模式 + Skills,研究应该提取什么样的图片模态特征
- """
- import argparse
- import os
- import sys
- import select
- import asyncio
- from pathlib import Path
- # Clash Verge TUN 模式兼容:禁止 httpx/urllib 自动检测系统 HTTP 代理
- os.environ.setdefault("no_proxy", "*")
- os.environ.setdefault("NO_PROXY", "*")
- # 添加项目根目录到 Python 路径
- sys.path.insert(0, str(Path(__file__).parent.parent.parent))
- from dotenv import load_dotenv
- load_dotenv()
- from agent.llm.prompts import SimplePrompt
- from agent.core.runner import AgentRunner, RunConfig
- from agent.trace import (
- FileSystemTraceStore,
- Trace,
- Message,
- )
- from agent.llm import create_claude_code_llm_call
- # 导入自定义工具模块,触发 @tool 装饰器注册
- sys.path.insert(0, str(Path(__file__).parent))
- import tool # noqa: E402
- def check_stdin() -> str | None:
- """非阻塞检查 stdin 是否有输入"""
- ready, _, _ = select.select([sys.stdin], [], [], 0)
- if ready:
- line = sys.stdin.readline().strip().lower()
- if line in ('p', 'pause'):
- return 'pause'
- if line in ('q', 'quit'):
- return 'quit'
- return None
- def _read_multiline() -> str:
- """读取多行输入,以连续两次回车(空行)结束"""
- print("\n请输入干预消息(连续输入两次回车结束):")
- lines: list[str] = []
- blank_count = 0
- while True:
- line = input()
- if line == "":
- blank_count += 1
- if blank_count >= 2:
- break
- lines.append("")
- else:
- blank_count = 0
- lines.append(line)
- while lines and lines[-1] == "":
- lines.pop()
- return "\n".join(lines)
- async def show_interactive_menu(
- runner: AgentRunner,
- trace_id: str,
- current_sequence: int,
- store: FileSystemTraceStore,
- ):
- """显示交互式菜单"""
- print("\n" + "=" * 60)
- print(" 执行已暂停")
- print("=" * 60)
- print("请选择操作:")
- print(" 1. 插入干预消息并继续")
- print(" 2. 查看当前 GoalTree")
- print(" 3. 继续执行")
- print(" 4. 停止执行")
- print("=" * 60)
- while True:
- choice = input("请输入选项 (1-4): ").strip()
- if choice == "1":
- text = _read_multiline()
- if not text:
- print("未输入任何内容,取消操作")
- continue
- print(f"\n将插入干预消息并继续执行...")
- live_trace = await store.get_trace(trace_id)
- actual_sequence = live_trace.last_sequence if live_trace and live_trace.last_sequence else current_sequence
- return {
- "action": "continue",
- "messages": [{"role": "user", "content": text}],
- "after_sequence": actual_sequence,
- }
- elif choice == "2":
- goal_tree = await store.get_goal_tree(trace_id)
- if goal_tree and goal_tree.goals:
- print("\n当前 GoalTree:")
- print(goal_tree.to_prompt())
- else:
- print("\n当前没有 Goal")
- continue
- elif choice == "3":
- print("\n继续执行...")
- return {"action": "continue"}
- elif choice == "4":
- print("\n停止执行...")
- return {"action": "stop"}
- else:
- print("无效选项,请重新输入")
- async def main():
- parser = argparse.ArgumentParser(description="图片模态特征提取研究")
- parser.add_argument(
- "--trace", type=str, default=None,
- help="已有的 Trace ID,用于恢复继续执行",
- )
- args = parser.parse_args()
- # 路径配置
- base_dir = Path(__file__).parent
- project_root = base_dir.parent.parent
- prompt_path = base_dir / "test.prompt"
- output_dir = base_dir / "output"
- output_dir.mkdir(exist_ok=True)
- # 确保 input 和 knowledge 目录存在
- input_dir = base_dir / "input"
- knowledge_dir = base_dir / "knowledge"
- input_dir.mkdir(exist_ok=True)
- knowledge_dir.mkdir(exist_ok=True)
- print("=" * 60)
- print("图片模态特征提取研究 (Agent 模式)")
- print("=" * 60)
- print()
- print("💡 交互提示:")
- print(" - 执行过程中输入 'p' 或 'pause' 暂停并进入交互模式")
- print(" - 执行过程中输入 'q' 或 'quit' 停止执行")
- print("=" * 60)
- print()
- # 加载 prompt
- print("1. 加载 prompt 配置...")
- prompt = SimplePrompt(prompt_path)
- # 构建消息
- print("2. 构建任务消息...")
- messages = prompt.build_messages()
- # 创建 Agent Runner
- print("3. 创建 Agent Runner...")
- model_name = prompt.config.get('model', 'anthropic/claude-sonnet-4.6')
- print(f" - 模型: {model_name}")
- store = FileSystemTraceStore(base_path=".trace")
- runner = AgentRunner(
- trace_store=store,
- llm_call=create_claude_code_llm_call(model=model_name),
- skills_dir=None,
- debug=True
- )
- # 判断是新建还是恢复
- resume_trace_id = args.trace
- if resume_trace_id:
- existing_trace = await store.get_trace(resume_trace_id)
- if not existing_trace:
- print(f"\n错误: Trace 不存在: {resume_trace_id}")
- sys.exit(1)
- print(f"4. 恢复已有 Trace: {resume_trace_id[:8]}...")
- print(f" - 状态: {existing_trace.status}")
- print(f" - 消息数: {existing_trace.total_messages}")
- else:
- print(f"4. 启动新 Agent 模式...")
- print()
- final_response = ""
- current_trace_id = resume_trace_id
- current_sequence = 0
- should_exit = False
- try:
- model_name = prompt.config.get('model', 'anthropic/claude-sonnet-4.6')
- if resume_trace_id:
- initial_messages = None
- config = RunConfig(
- model=model_name,
- temperature=float(prompt.config.get('temperature', 0.3)),
- max_iterations=1000,
- trace_id=resume_trace_id,
- enable_thinking=prompt.config.get('enable_thinking', False),
- thinking_budget_tokens=prompt.config.get('thinking_budget_tokens', 10000),
- )
- else:
- initial_messages = messages
- config = RunConfig(
- model=model_name,
- temperature=float(prompt.config.get('temperature', 0.3)),
- max_iterations=1000,
- name="图片模态特征提取研究",
- enable_thinking=prompt.config.get('enable_thinking', False),
- thinking_budget_tokens=prompt.config.get('thinking_budget_tokens', 10000),
- )
- while not should_exit:
- if current_trace_id:
- config.trace_id = current_trace_id
- final_response = ""
- # 检查 trace 状态
- if current_trace_id and initial_messages is None:
- check_trace = await store.get_trace(current_trace_id)
- if check_trace and check_trace.status in ("completed", "failed"):
- if check_trace.status == "completed":
- print(f"\n[Trace] ✅ 已完成")
- print(f" - Total messages: {check_trace.total_messages}")
- print(f" - Total cost: ${check_trace.total_cost:.4f}")
- else:
- print(f"\n[Trace] ❌ 已失败: {check_trace.error_message}")
- current_sequence = check_trace.head_sequence
- menu_result = await show_interactive_menu(
- runner, current_trace_id, current_sequence, store
- )
- if menu_result["action"] == "stop":
- break
- elif menu_result["action"] == "continue":
- new_messages = menu_result.get("messages", [])
- if new_messages:
- initial_messages = new_messages
- config.after_sequence = menu_result.get("after_sequence")
- else:
- initial_messages = []
- config.after_sequence = None
- continue
- break
- initial_messages = []
- print(f"{'▶️ 开始执行...' if not current_trace_id else '▶️ 继续执行...'}")
- # 执行 Agent
- paused = False
- try:
- async for item in runner.run(messages=initial_messages, config=config):
- # 检查用户中断
- cmd = check_stdin()
- if cmd == 'pause':
- print("\n⏸️ 正在暂停执行...")
- if current_trace_id:
- await runner.stop(current_trace_id)
- await asyncio.sleep(0.5)
- menu_result = await show_interactive_menu(
- runner, current_trace_id, current_sequence, store
- )
- if menu_result["action"] == "stop":
- should_exit = True
- paused = True
- break
- elif menu_result["action"] == "continue":
- new_messages = menu_result.get("messages", [])
- if new_messages:
- initial_messages = new_messages
- after_seq = menu_result.get("after_sequence")
- if after_seq is not None:
- config.after_sequence = after_seq
- paused = True
- break
- else:
- initial_messages = []
- config.after_sequence = None
- paused = True
- break
- elif cmd == 'quit':
- print("\n🛑 用户请求停止...")
- if current_trace_id:
- await runner.stop(current_trace_id)
- should_exit = True
- break
- # 处理 Trace 对象
- if isinstance(item, Trace):
- current_trace_id = item.trace_id
- if item.status == "running":
- print(f"[Trace] 开始: {item.trace_id[:8]}...")
- elif item.status == "completed":
- print(f"\n[Trace] ✅ 完成")
- print(f" - Total messages: {item.total_messages}")
- print(f" - Total tokens: {item.total_tokens}")
- print(f" - Total cost: ${item.total_cost:.4f}")
- elif item.status == "failed":
- print(f"\n[Trace] ❌ 失败: {item.error_message}")
- elif item.status == "stopped":
- print(f"\n[Trace] ⏸️ 已停止")
- # 处理 Message 对象
- elif isinstance(item, Message):
- current_sequence = item.sequence
- if item.role == "assistant":
- content = item.content
- if isinstance(content, dict):
- text = content.get("text", "")
- tool_calls = content.get("tool_calls")
- if text and not tool_calls:
- final_response = text
- print(f"\n[Response] Agent 回复:")
- print(text)
- elif text:
- preview = text[:150] + "..." if len(text) > 150 else text
- print(f"[Assistant] {preview}")
- if tool_calls:
- for tc in tool_calls:
- tool_name = tc.get("function", {}).get("name", "unknown")
- print(f"[Tool Call] 🛠️ {tool_name}")
- elif item.role == "tool":
- content = item.content
- if isinstance(content, dict):
- tool_name = content.get("tool_name", "unknown")
- print(f"[Tool Result] ✅ {tool_name}")
- if item.description:
- desc = item.description[:80] if len(item.description) > 80 else item.description
- print(f" {desc}...")
- except Exception as e:
- print(f"\n执行出错: {e}")
- import traceback
- traceback.print_exc()
- if paused:
- if should_exit:
- break
- continue
- if should_exit:
- break
- # Runner 退出后显示交互菜单
- if current_trace_id:
- menu_result = await show_interactive_menu(
- runner, current_trace_id, current_sequence, store
- )
- if menu_result["action"] == "stop":
- break
- elif menu_result["action"] == "continue":
- new_messages = menu_result.get("messages", [])
- if new_messages:
- initial_messages = new_messages
- config.after_sequence = menu_result.get("after_sequence")
- else:
- initial_messages = []
- config.after_sequence = None
- continue
- break
- except KeyboardInterrupt:
- print("\n\n用户中断 (Ctrl+C)")
- if current_trace_id:
- await runner.stop(current_trace_id)
- # 输出结果
- if final_response:
- print()
- print("=" * 60)
- print("Agent 响应:")
- print("=" * 60)
- print(final_response)
- print("=" * 60)
- print()
- # 保存结果
- output_file = output_dir / "result.txt"
- with open(output_file, 'w', encoding='utf-8') as f:
- f.write(final_response)
- print(f"✓ 结果已保存到: {output_file}")
- print()
- # 可视化提示
- if current_trace_id:
- print("=" * 60)
- print("可视化 Step Tree:")
- print("=" * 60)
- print("1. 启动 API Server:")
- print(" python3 api_server.py")
- print()
- print("2. 浏览器访问:")
- print(" http://localhost:8000/api/traces")
- print()
- print(f"3. Trace ID: {current_trace_id}")
- print("=" * 60)
- if __name__ == "__main__":
- asyncio.run(main())
|