|
@@ -31,10 +31,11 @@ from examples.process_research.config import (
|
|
|
from agent.utils import setup_logging
|
|
from agent.utils import setup_logging
|
|
|
|
|
|
|
|
async def run_agent_task(runner: AgentRunner, prompt_name: str, kwargs: dict, task_name: str, model_name: str):
|
|
async def run_agent_task(runner: AgentRunner, prompt_name: str, kwargs: dict, task_name: str, model_name: str):
|
|
|
|
|
+ from examples.process_pipeline.script.validate_schema import validate_case, validate_blueprint, validate_capabilities, validate_strategy
|
|
|
base_dir = Path(__file__).parent
|
|
base_dir = Path(__file__).parent
|
|
|
prompt_path = base_dir / "prompts" / f"{prompt_name}.prompt"
|
|
prompt_path = base_dir / "prompts" / f"{prompt_name}.prompt"
|
|
|
prompt = SimplePrompt(prompt_path)
|
|
prompt = SimplePrompt(prompt_path)
|
|
|
-
|
|
|
|
|
|
|
+
|
|
|
messages = prompt.build_messages(**kwargs)
|
|
messages = prompt.build_messages(**kwargs)
|
|
|
target_tools = []
|
|
target_tools = []
|
|
|
if prompt_name == "extract_capabilities":
|
|
if prompt_name == "extract_capabilities":
|
|
@@ -48,85 +49,198 @@ async def run_agent_task(runner: AgentRunner, prompt_name: str, kwargs: dict, ta
|
|
|
"assemble_strategy": ["core"], # 只需文件读写
|
|
"assemble_strategy": ["core"], # 只需文件读写
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- run_config = RunConfig(
|
|
|
|
|
- model=prompt.config.get("model") or model_name,
|
|
|
|
|
- temperature=prompt.config.get("temperature") or 0.3,
|
|
|
|
|
- name=task_name,
|
|
|
|
|
- agent_type=prompt_name,
|
|
|
|
|
- tools=target_tools,
|
|
|
|
|
- tool_groups=tool_groups_map.get(prompt_name, ["core"]),
|
|
|
|
|
- knowledge=KnowledgeConfig(enable_completion_extraction=False, enable_extraction=False, enable_injection=False)
|
|
|
|
|
- )
|
|
|
|
|
-
|
|
|
|
|
- task_cost = 0.0
|
|
|
|
|
|
|
+ total_task_cost = 0.0
|
|
|
task_errors = []
|
|
task_errors = []
|
|
|
- last_trace_id = None
|
|
|
|
|
-
|
|
|
|
|
- print(f"🚀 [Launch] {task_name}")
|
|
|
|
|
- try:
|
|
|
|
|
- async for item in runner.run(messages=messages, config=run_config):
|
|
|
|
|
- if isinstance(item, Trace):
|
|
|
|
|
- last_trace_id = item.trace_id
|
|
|
|
|
- if item.status == "completed":
|
|
|
|
|
- print(f"✅ [Done] {task_name} (Cost: ${item.total_cost:.4f})")
|
|
|
|
|
- task_cost = item.total_cost
|
|
|
|
|
- elif item.status == "failed":
|
|
|
|
|
- print(f"❌ [Fail] {task_name}: {item.error_message}")
|
|
|
|
|
- task_errors.append(f"{task_name} Failed: {item.error_message}")
|
|
|
|
|
- if isinstance(item, Message):
|
|
|
|
|
- if item.role == "tool":
|
|
|
|
|
- content = item.content if isinstance(item.content, dict) else {}
|
|
|
|
|
- t_name = content.get("tool_name", "unknown")
|
|
|
|
|
- if t_name in ("write_file", "write_json"):
|
|
|
|
|
- print(f" 💾 [File Written by {task_name}]")
|
|
|
|
|
- except Exception as e:
|
|
|
|
|
- err_msg = f"{type(e).__name__}: {e}"
|
|
|
|
|
- print(f"❌ [Exception] {task_name} crashed: {err_msg}")
|
|
|
|
|
- task_errors.append(f"{task_name} crashed: {err_msg}")
|
|
|
|
|
-
|
|
|
|
|
- # Verification & Recovery block
|
|
|
|
|
out_file = kwargs.get("output_file")
|
|
out_file = kwargs.get("output_file")
|
|
|
- if out_file and not Path(out_file).exists() and last_trace_id:
|
|
|
|
|
- print(f"⚠️ [Recovery] {task_name} missing output file. Triggering forced wrap-up continuation...")
|
|
|
|
|
- recovery_messages = [{
|
|
|
|
|
- "role": "user",
|
|
|
|
|
- "content": f"【系统强制指令】你的任务阶段已终止,但尚未将结果写入文件。请立刻调用 write_json 工具,将你目前已经搜集或处理到的原生结构化内容直接作为 json_data 参数对象写入到绝对路径 `{out_file}`,如果搜集失败也请写入空的总结对象。必须立刻执行写入!"
|
|
|
|
|
- }]
|
|
|
|
|
- rec_config = RunConfig(
|
|
|
|
|
- model=model_name,
|
|
|
|
|
- temperature=0.1,
|
|
|
|
|
- name=task_name + "_Rec",
|
|
|
|
|
- agent_type=prompt_name,
|
|
|
|
|
- trace_id=last_trace_id,
|
|
|
|
|
- knowledge=KnowledgeConfig(enable_completion_extraction=False, enable_extraction=False, enable_injection=False)
|
|
|
|
|
- )
|
|
|
|
|
- try:
|
|
|
|
|
- async for r_item in runner.run(messages=recovery_messages, config=rec_config):
|
|
|
|
|
- if isinstance(r_item, Trace):
|
|
|
|
|
- if r_item.status == "completed":
|
|
|
|
|
- task_cost += r_item.total_cost
|
|
|
|
|
- elif r_item.status == "failed":
|
|
|
|
|
- task_errors.append(f"{task_name} Recovery Failed: {r_item.error_message}")
|
|
|
|
|
- if isinstance(r_item, Message) and r_item.role == "tool":
|
|
|
|
|
- content = r_item.content if isinstance(r_item.content, dict) else {}
|
|
|
|
|
- if content.get("tool_name") in ("write_file", "write_json"):
|
|
|
|
|
- print(f" 💾 [Recovery File Written by {task_name}]")
|
|
|
|
|
- except Exception as e:
|
|
|
|
|
- err_msg = f"{type(e).__name__}: {e}"
|
|
|
|
|
- print(f"❌ [Exception Recovery] {task_name} crashed: {err_msg}")
|
|
|
|
|
- task_errors.append(f"{task_name} recovery crashed: {err_msg}")
|
|
|
|
|
-
|
|
|
|
|
- if out_file and Path(out_file).exists() and str(out_file).endswith(".json"):
|
|
|
|
|
- try:
|
|
|
|
|
- with open(out_file, "r", encoding="utf-8") as f:
|
|
|
|
|
- json.loads(f.read())
|
|
|
|
|
- print(f" ✅ [JSON Validated] {Path(out_file).name}")
|
|
|
|
|
- except json.JSONDecodeError as e:
|
|
|
|
|
- err_msg = f"Invalid JSON syntax in {Path(out_file).name}: {e}"
|
|
|
|
|
- print(f"❌ [Validation Error] {task_name}: {err_msg}")
|
|
|
|
|
- task_errors.append(f"{task_name} JSON Error: {e}")
|
|
|
|
|
-
|
|
|
|
|
- return task_cost, task_errors
|
|
|
|
|
|
|
+
|
|
|
|
|
+ max_retries = 3
|
|
|
|
|
+ last_trace_id = None
|
|
|
|
|
+ last_validation_error = None
|
|
|
|
|
+ final_trace_id = None # 用于返回最终成功的 trace_id
|
|
|
|
|
+
|
|
|
|
|
+ for attempt in range(max_retries):
|
|
|
|
|
+ if attempt > 0 and last_trace_id and last_validation_error:
|
|
|
|
|
+ # 续跑模式:把错误信息告诉之前的 agent,让它修复
|
|
|
|
|
+ print(f"🔄 [Continue {attempt}/{max_retries-1}] {task_name} - sending fix instructions to existing agent")
|
|
|
|
|
+
|
|
|
|
|
+ fix_messages = [{
|
|
|
|
|
+ "role": "user",
|
|
|
|
|
+ "content": (
|
|
|
|
|
+ f"【系统校验失败】你上一次写入的文件 `{out_file}` 未通过 schema 校验。\n"
|
|
|
|
|
+ f"错误详情:{last_validation_error}\n\n"
|
|
|
|
|
+ f"请立刻读取该文件,根据以上错误信息修复内容,然后重新调用 write_json 写入到同一路径 `{out_file}`。"
|
|
|
|
|
+ f"只修复有问题的部分,不要丢弃已有的正确内容。"
|
|
|
|
|
+ )
|
|
|
|
|
+ }]
|
|
|
|
|
+
|
|
|
|
|
+ fix_config = RunConfig(
|
|
|
|
|
+ model=prompt.config.get("model") or model_name,
|
|
|
|
|
+ temperature=0.1,
|
|
|
|
|
+ name=f"{task_name}_Fix{attempt}",
|
|
|
|
|
+ agent_type=prompt_name,
|
|
|
|
|
+ tools=target_tools,
|
|
|
|
|
+ tool_groups=tool_groups_map.get(prompt_name, ["core"]),
|
|
|
|
|
+ trace_id=last_trace_id,
|
|
|
|
|
+ knowledge=KnowledgeConfig(enable_completion_extraction=False, enable_extraction=False, enable_injection=False)
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ try:
|
|
|
|
|
+ async for item in runner.run(messages=fix_messages, config=fix_config):
|
|
|
|
|
+ if isinstance(item, Trace):
|
|
|
|
|
+ last_trace_id = item.trace_id
|
|
|
|
|
+ if item.status == "completed":
|
|
|
|
|
+ total_task_cost += item.total_cost
|
|
|
|
|
+ elif item.status == "failed":
|
|
|
|
|
+ task_errors.append(f"{task_name} Fix Failed: {item.error_message}")
|
|
|
|
|
+ if isinstance(item, Message) and item.role == "tool":
|
|
|
|
|
+ content = item.content if isinstance(item.content, dict) else {}
|
|
|
|
|
+ if content.get("tool_name") in ("write_file", "write_json"):
|
|
|
|
|
+ print(f" 💾 [Fix File Written by {task_name}]")
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ err_msg = f"{type(e).__name__}: {e}"
|
|
|
|
|
+ print(f"❌ [Exception Fix] {task_name} crashed: {err_msg}")
|
|
|
|
|
+ task_errors.append(f"{task_name} fix crashed: {err_msg}")
|
|
|
|
|
+
|
|
|
|
|
+ elif attempt > 0:
|
|
|
|
|
+ # 没有 trace_id 或没有 validation error,只能完全重跑
|
|
|
|
|
+ print(f"🔄 [Retry {attempt}/{max_retries-1}] {task_name} - no prior trace, full restart")
|
|
|
|
|
+ if out_file and Path(out_file).exists():
|
|
|
|
|
+ Path(out_file).unlink()
|
|
|
|
|
+
|
|
|
|
|
+ run_config = RunConfig(
|
|
|
|
|
+ model=prompt.config.get("model") or model_name,
|
|
|
|
|
+ temperature=prompt.config.get("temperature") or 0.3,
|
|
|
|
|
+ name=f"{task_name}_A{attempt}",
|
|
|
|
|
+ agent_type=prompt_name,
|
|
|
|
|
+ tools=target_tools,
|
|
|
|
|
+ tool_groups=tool_groups_map.get(prompt_name, ["core"]),
|
|
|
|
|
+ knowledge=KnowledgeConfig(enable_completion_extraction=False, enable_extraction=False, enable_injection=False)
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ print(f"🚀 [Launch] {task_name} (Attempt {attempt+1})")
|
|
|
|
|
+
|
|
|
|
|
+ try:
|
|
|
|
|
+ async for item in runner.run(messages=messages, config=run_config):
|
|
|
|
|
+ if isinstance(item, Trace):
|
|
|
|
|
+ last_trace_id = item.trace_id
|
|
|
|
|
+ if item.status == "completed":
|
|
|
|
|
+ total_task_cost += item.total_cost
|
|
|
|
|
+ elif item.status == "failed":
|
|
|
|
|
+ task_errors.append(f"{task_name} Failed: {item.error_message}")
|
|
|
|
|
+ if isinstance(item, Message):
|
|
|
|
|
+ if item.role == "tool":
|
|
|
|
|
+ content = item.content if isinstance(item.content, dict) else {}
|
|
|
|
|
+ t_name = content.get("tool_name", "unknown")
|
|
|
|
|
+ if t_name in ("write_file", "write_json"):
|
|
|
|
|
+ print(f" 💾 [File Written by {task_name}]")
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ err_msg = f"{type(e).__name__}: {e}"
|
|
|
|
|
+ print(f"❌ [Exception] {task_name} crashed: {err_msg}")
|
|
|
|
|
+ task_errors.append(f"{task_name} crashed: {err_msg}")
|
|
|
|
|
+
|
|
|
|
|
+ else:
|
|
|
|
|
+ # 首次执行
|
|
|
|
|
+ run_config = RunConfig(
|
|
|
|
|
+ model=prompt.config.get("model") or model_name,
|
|
|
|
|
+ temperature=prompt.config.get("temperature") or 0.3,
|
|
|
|
|
+ name=f"{task_name}_A{attempt}",
|
|
|
|
|
+ agent_type=prompt_name,
|
|
|
|
|
+ tools=target_tools,
|
|
|
|
|
+ tool_groups=tool_groups_map.get(prompt_name, ["core"]),
|
|
|
|
|
+ knowledge=KnowledgeConfig(enable_completion_extraction=False, enable_extraction=False, enable_injection=False)
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ print(f"🚀 [Launch] {task_name} (Attempt {attempt+1})")
|
|
|
|
|
+
|
|
|
|
|
+ try:
|
|
|
|
|
+ async for item in runner.run(messages=messages, config=run_config):
|
|
|
|
|
+ if isinstance(item, Trace):
|
|
|
|
|
+ last_trace_id = item.trace_id
|
|
|
|
|
+ if item.status == "completed":
|
|
|
|
|
+ total_task_cost += item.total_cost
|
|
|
|
|
+ elif item.status == "failed":
|
|
|
|
|
+ task_errors.append(f"{task_name} Failed: {item.error_message}")
|
|
|
|
|
+ if isinstance(item, Message):
|
|
|
|
|
+ if item.role == "tool":
|
|
|
|
|
+ content = item.content if isinstance(item.content, dict) else {}
|
|
|
|
|
+ t_name = content.get("tool_name", "unknown")
|
|
|
|
|
+ if t_name in ("write_file", "write_json"):
|
|
|
|
|
+ print(f" 💾 [File Written by {task_name}]")
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ err_msg = f"{type(e).__name__}: {e}"
|
|
|
|
|
+ print(f"❌ [Exception] {task_name} crashed: {err_msg}")
|
|
|
|
|
+ task_errors.append(f"{task_name} crashed: {err_msg}")
|
|
|
|
|
+
|
|
|
|
|
+ # Verification & Recovery block
|
|
|
|
|
+ if out_file and not Path(out_file).exists() and last_trace_id:
|
|
|
|
|
+ print(f"⚠️ [Recovery] {task_name} missing output file. Triggering forced wrap-up continuation...")
|
|
|
|
|
+ recovery_messages = [{
|
|
|
|
|
+ "role": "user",
|
|
|
|
|
+ "content": f"【系统强制指令】你的任务阶段已终止,但尚未将结果写入文件。请立刻调用 write_json 工具,将你目前已经搜集或处理到的原生结构化内容直接作为 json_data 参数对象写入到绝对路径 `{out_file}`,如果搜集失败也请写入空的总结对象。必须立刻执行写入!"
|
|
|
|
|
+ }]
|
|
|
|
|
+ rec_config = RunConfig(
|
|
|
|
|
+ model=model_name,
|
|
|
|
|
+ temperature=0.1,
|
|
|
|
|
+ name=task_name + "_Rec",
|
|
|
|
|
+ agent_type=prompt_name,
|
|
|
|
|
+ trace_id=last_trace_id,
|
|
|
|
|
+ knowledge=KnowledgeConfig(enable_completion_extraction=False, enable_extraction=False, enable_injection=False)
|
|
|
|
|
+ )
|
|
|
|
|
+ try:
|
|
|
|
|
+ async for r_item in runner.run(messages=recovery_messages, config=rec_config):
|
|
|
|
|
+ if isinstance(r_item, Trace):
|
|
|
|
|
+ if r_item.status == "completed":
|
|
|
|
|
+ total_task_cost += r_item.total_cost
|
|
|
|
|
+ elif r_item.status == "failed":
|
|
|
|
|
+ task_errors.append(f"{task_name} Recovery Failed: {r_item.error_message}")
|
|
|
|
|
+ if isinstance(r_item, Message) and r_item.role == "tool":
|
|
|
|
|
+ content = r_item.content if isinstance(r_item.content, dict) else {}
|
|
|
|
|
+ if content.get("tool_name") in ("write_file", "write_json"):
|
|
|
|
|
+ print(f" 💾 [Recovery File Written by {task_name}]")
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ err_msg = f"{type(e).__name__}: {e}"
|
|
|
|
|
+ print(f"❌ [Exception Recovery] {task_name} crashed: {err_msg}")
|
|
|
|
|
+ task_errors.append(f"{task_name} recovery crashed: {err_msg}")
|
|
|
|
|
+
|
|
|
|
|
+ # Schema Validation
|
|
|
|
|
+ if out_file and Path(out_file).exists() and str(out_file).endswith(".json"):
|
|
|
|
|
+ try:
|
|
|
|
|
+ with open(out_file, "r", encoding="utf-8") as f:
|
|
|
|
|
+ data = json.loads(f.read())
|
|
|
|
|
+
|
|
|
|
|
+ filename = Path(out_file).name
|
|
|
|
|
+ err = None
|
|
|
|
|
+ if filename.startswith("case_"):
|
|
|
|
|
+ err = validate_case(data)
|
|
|
|
|
+ elif filename == "blueprint.json":
|
|
|
|
|
+ err = validate_blueprint(data)
|
|
|
|
|
+ elif filename == "capabilities_extracted.json":
|
|
|
|
|
+ err = validate_capabilities(data)
|
|
|
|
|
+ elif filename == "strategy.json":
|
|
|
|
|
+ err = validate_strategy(data)
|
|
|
|
|
+
|
|
|
|
|
+ if err:
|
|
|
|
|
+ raise ValueError(f"Schema Validation Failed: {err}")
|
|
|
|
|
+
|
|
|
|
|
+ print(f" ✅ [Schema Validated] {Path(out_file).name}")
|
|
|
|
|
+ final_trace_id = last_trace_id
|
|
|
|
|
+ return total_task_cost, task_errors, final_trace_id # Success! Exit retry loop.
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ err_msg = f"Invalid JSON or Schema in {Path(out_file).name}: {e}"
|
|
|
|
|
+ print(f"❌ [Validation Error] {task_name}: {err_msg}")
|
|
|
|
|
+ task_errors.append(f"{task_name} Error: {e}")
|
|
|
|
|
+ last_validation_error = str(e)
|
|
|
|
|
+
|
|
|
|
|
+ if attempt == max_retries - 1:
|
|
|
|
|
+ print(f"❌ [Retry Limit] {task_name} exhausted retries.")
|
|
|
|
|
+ return total_task_cost, task_errors, last_trace_id
|
|
|
|
|
+ else:
|
|
|
|
|
+ print(f"❌ [Missing File] {task_name} did not produce output file after recovery.")
|
|
|
|
|
+ last_validation_error = None
|
|
|
|
|
+ if attempt == max_retries - 1:
|
|
|
|
|
+ return total_task_cost, task_errors, last_trace_id
|
|
|
|
|
+
|
|
|
|
|
+ return total_task_cost, task_errors, last_trace_id
|
|
|
|
|
|
|
|
async def run_anthropic_sdk_task(prompt_name: str, kwargs: dict, task_name: str, model_name: str):
|
|
async def run_anthropic_sdk_task(prompt_name: str, kwargs: dict, task_name: str, model_name: str):
|
|
|
"""
|
|
"""
|
|
@@ -167,126 +281,165 @@ async def run_anthropic_sdk_task(prompt_name: str, kwargs: dict, task_name: str,
|
|
|
|
|
|
|
|
# 3. 初始化并开启 Loop
|
|
# 3. 初始化并开启 Loop
|
|
|
# 提示:你需要在你的终端中配置好 ANTHROPIC_API_KEY 环境变量
|
|
# 提示:你需要在你的终端中配置好 ANTHROPIC_API_KEY 环境变量
|
|
|
- client = AsyncAnthropic()
|
|
|
|
|
- task_cost = 0.0
|
|
|
|
|
|
|
+ client = AsyncAnthropic()
|
|
|
|
|
+ total_task_cost = 0.0
|
|
|
task_errors = []
|
|
task_errors = []
|
|
|
- print(f"🚀 [Launch Anthropic SDK] {task_name}")
|
|
|
|
|
|
|
+ sdk_trace_id = None # 用于记录 Anthropic SDK 的 response ID
|
|
|
|
|
|
|
|
- max_loops = 50
|
|
|
|
|
- for loop_idx in range(max_loops):
|
|
|
|
|
- try:
|
|
|
|
|
- # 去除前缀(兼容比如 openrouter 传入的名字)
|
|
|
|
|
- clean_model = model_name.split("/")[-1] if "/" in model_name else model_name
|
|
|
|
|
-
|
|
|
|
|
- # 这里专门将实际请求映射到其可用的特殊别名 claude-sonnet-4-5
|
|
|
|
|
- target_model = "claude-sonnet-4-5" if "claude" in clean_model else clean_model
|
|
|
|
|
-
|
|
|
|
|
- response = await client.messages.create(
|
|
|
|
|
- model=target_model,
|
|
|
|
|
- max_tokens=4096,
|
|
|
|
|
- temperature=0.2,
|
|
|
|
|
- system=system_prompt,
|
|
|
|
|
- messages=messages,
|
|
|
|
|
- tools=anthropic_tools
|
|
|
|
|
- )
|
|
|
|
|
-
|
|
|
|
|
- # (简略预估,不代表真实官方开销)
|
|
|
|
|
- if hasattr(response, 'usage'):
|
|
|
|
|
- step_cost = (response.usage.input_tokens / 1e6 * 3.0) + (response.usage.output_tokens / 1e6 * 15.0)
|
|
|
|
|
- task_cost += step_cost
|
|
|
|
|
-
|
|
|
|
|
- # 加入助手回复
|
|
|
|
|
- assistant_content = []
|
|
|
|
|
- tool_uses = []
|
|
|
|
|
-
|
|
|
|
|
- for content_block in response.content:
|
|
|
|
|
- if content_block.type == "text":
|
|
|
|
|
- text_val = content_block.text
|
|
|
|
|
- if text_val:
|
|
|
|
|
- assistant_content.append({"type": "text", "text": text_val})
|
|
|
|
|
- print(f"\n🤖 [{task_name} Output]:\n{text_val}\n")
|
|
|
|
|
- elif content_block.type == "tool_use":
|
|
|
|
|
- assistant_content.append({
|
|
|
|
|
- "type": "tool_use",
|
|
|
|
|
- "id": content_block.id,
|
|
|
|
|
- "name": content_block.name,
|
|
|
|
|
- "input": content_block.input
|
|
|
|
|
|
|
+ from examples.process_pipeline.script.validate_schema import validate_case, validate_blueprint, validate_capabilities, validate_strategy
|
|
|
|
|
+ out_file = kwargs.get("output_file")
|
|
|
|
|
+
|
|
|
|
|
+ max_retries = 3
|
|
|
|
|
+ for attempt in range(max_retries):
|
|
|
|
|
+ if attempt > 0:
|
|
|
|
|
+ print(f"🔄 [Retry SDK {attempt}/{max_retries-1}] {task_name}")
|
|
|
|
|
+ if out_file and Path(out_file).exists():
|
|
|
|
|
+ Path(out_file).unlink()
|
|
|
|
|
+
|
|
|
|
|
+ print(f"🚀 [Launch Anthropic SDK] {task_name} (Attempt {attempt+1})")
|
|
|
|
|
+
|
|
|
|
|
+ # Reset messages for retry
|
|
|
|
|
+ messages_copy = list(messages)
|
|
|
|
|
+
|
|
|
|
|
+ max_loops = 50
|
|
|
|
|
+ for loop_idx in range(max_loops):
|
|
|
|
|
+ try:
|
|
|
|
|
+ # 去除前缀(兼容比如 openrouter 传入的名字)
|
|
|
|
|
+ clean_model = model_name.split("/")[-1] if "/" in model_name else model_name
|
|
|
|
|
+
|
|
|
|
|
+ # 这里专门将实际请求映射到其可用的特殊别名 claude-sonnet-4-5
|
|
|
|
|
+ target_model = "claude-sonnet-4-5" if "claude" in clean_model else clean_model
|
|
|
|
|
+
|
|
|
|
|
+ response = await client.messages.create(
|
|
|
|
|
+ model=target_model,
|
|
|
|
|
+ max_tokens=4096,
|
|
|
|
|
+ temperature=0.2,
|
|
|
|
|
+ system=system_prompt,
|
|
|
|
|
+ messages=messages_copy,
|
|
|
|
|
+ tools=anthropic_tools
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ # (简略预估,不代表真实官方开销)
|
|
|
|
|
+ if hasattr(response, 'usage'):
|
|
|
|
|
+ step_cost = (response.usage.input_tokens / 1e6 * 3.0) + (response.usage.output_tokens / 1e6 * 15.0)
|
|
|
|
|
+ total_task_cost += step_cost
|
|
|
|
|
+
|
|
|
|
|
+ # 加入助手回复
|
|
|
|
|
+ assistant_content = []
|
|
|
|
|
+ tool_uses = []
|
|
|
|
|
+
|
|
|
|
|
+ for content_block in response.content:
|
|
|
|
|
+ if content_block.type == "text":
|
|
|
|
|
+ text_val = content_block.text
|
|
|
|
|
+ if text_val:
|
|
|
|
|
+ assistant_content.append({"type": "text", "text": text_val})
|
|
|
|
|
+ print(f"\n🤖 [{task_name} Output]:\n{text_val}\n")
|
|
|
|
|
+ elif content_block.type == "tool_use":
|
|
|
|
|
+ assistant_content.append({
|
|
|
|
|
+ "type": "tool_use",
|
|
|
|
|
+ "id": content_block.id,
|
|
|
|
|
+ "name": content_block.name,
|
|
|
|
|
+ "input": content_block.input
|
|
|
|
|
+ })
|
|
|
|
|
+ tool_uses.append(content_block)
|
|
|
|
|
+
|
|
|
|
|
+ if not assistant_content:
|
|
|
|
|
+ assistant_content.append({"type": "text", "text": "(Thinking completed but no output)"})
|
|
|
|
|
+
|
|
|
|
|
+ messages_copy.append({"role": "assistant", "content": assistant_content})
|
|
|
|
|
+
|
|
|
|
|
+ # 出口:没有调用工具说明任务结束
|
|
|
|
|
+ if not tool_uses:
|
|
|
|
|
+ print(f"✅ [Done Anthropic SDK] {task_name} (Cost: ${total_task_cost:.4f})")
|
|
|
|
|
+ break
|
|
|
|
|
+
|
|
|
|
|
+ # 工具执行与回传
|
|
|
|
|
+ tool_results = []
|
|
|
|
|
+ for tu in tool_uses:
|
|
|
|
|
+ if tu.name in ("write_file", "write_json"):
|
|
|
|
|
+ print(f" 💾 [File Written by SDK] {task_name}")
|
|
|
|
|
+
|
|
|
|
|
+ print(f" 🛠️ [Tool Exec Debug] name_is={tu.name}, input_is={tu.input}, type_is={type(tu.input)}")
|
|
|
|
|
+ # 执行本地环境的函数
|
|
|
|
|
+ result_str = await registry.execute(tu.name, tu.input)
|
|
|
|
|
+
|
|
|
|
|
+ tool_results.append({
|
|
|
|
|
+ "type": "tool_result",
|
|
|
|
|
+ "tool_use_id": tu.id,
|
|
|
|
|
+ "content": result_str
|
|
|
})
|
|
})
|
|
|
- tool_uses.append(content_block)
|
|
|
|
|
-
|
|
|
|
|
- if not assistant_content:
|
|
|
|
|
- assistant_content.append({"type": "text", "text": "(Thinking completed but no output)"})
|
|
|
|
|
|
|
+
|
|
|
|
|
+ messages_copy.append({"role": "user", "content": tool_results})
|
|
|
|
|
|
|
|
- messages.append({"role": "assistant", "content": assistant_content})
|
|
|
|
|
-
|
|
|
|
|
- # 出口:没有调用工具说明任务结束
|
|
|
|
|
- if not tool_uses:
|
|
|
|
|
- print(f"✅ [Done Anthropic SDK] {task_name} (Cost: ${task_cost:.4f})")
|
|
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ err_msg = str(e)
|
|
|
|
|
+ print(f"❌ [Fail SDK Core] {task_name}: {err_msg}")
|
|
|
|
|
+ task_errors.append(err_msg)
|
|
|
break
|
|
break
|
|
|
|
|
|
|
|
- # 工具执行与回传
|
|
|
|
|
- tool_results = []
|
|
|
|
|
- for tu in tool_uses:
|
|
|
|
|
- if tu.name in ("write_file", "write_json"):
|
|
|
|
|
- print(f" 💾 [File Written by SDK] {task_name}")
|
|
|
|
|
|
|
+ # Verification & Recovery block for SDK (porting from AgentRunner)
|
|
|
|
|
+ if out_file and not Path(out_file).exists():
|
|
|
|
|
+ print(f"⚠️ [Recovery SDK] {task_name} missing output file. Triggering forced wrap-up continuation...")
|
|
|
|
|
+ messages_copy.append({
|
|
|
|
|
+ "role": "user",
|
|
|
|
|
+ "content": f"【系统强制指令】你的任务阶段已完成分析,但尚未将最终结果写入目标文件。请立刻调用 write_json (或 write_file) 工具,将你的成果数据直接写入到绝对路径 `{out_file}`,务必立刻执行写入动作!"
|
|
|
|
|
+ })
|
|
|
|
|
+ try:
|
|
|
|
|
+ target_model = "claude-sonnet-4-5" if "claude" in clean_model else clean_model
|
|
|
|
|
+ rec_response = await client.messages.create(
|
|
|
|
|
+ model=target_model,
|
|
|
|
|
+ max_tokens=4096,
|
|
|
|
|
+ temperature=0.1,
|
|
|
|
|
+ system=system_prompt,
|
|
|
|
|
+ messages=messages_copy,
|
|
|
|
|
+ tools=anthropic_tools,
|
|
|
|
|
+ tool_choice={"type": "any"}
|
|
|
|
|
+ )
|
|
|
|
|
+ for content_block in rec_response.content:
|
|
|
|
|
+ if content_block.type == "tool_use":
|
|
|
|
|
+ if content_block.name in ("write_file", "write_json"):
|
|
|
|
|
+ print(f" 💾 [Recovery File Written by SDK] {task_name}")
|
|
|
|
|
+ await registry.execute(content_block.name, content_block.input)
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ print(f"❌ [Fail SDK Recovery] {task_name}: {e}")
|
|
|
|
|
+ task_errors.append(str(e))
|
|
|
|
|
|
|
|
- print(f" 🛠️ [Tool Exec Debug] name_is={tu.name}, input_is={tu.input}, type_is={type(tu.input)}")
|
|
|
|
|
- # 执行本地环境的函数
|
|
|
|
|
- result_str = await registry.execute(tu.name, tu.input)
|
|
|
|
|
|
|
+ # Schema Validation
|
|
|
|
|
+ if out_file and Path(out_file).exists() and str(out_file).endswith(".json"):
|
|
|
|
|
+ try:
|
|
|
|
|
+ with open(out_file, "r", encoding="utf-8") as f:
|
|
|
|
|
+ data = json.loads(f.read())
|
|
|
|
|
+
|
|
|
|
|
+ filename = Path(out_file).name
|
|
|
|
|
+ err = None
|
|
|
|
|
+ if filename.startswith("case_"):
|
|
|
|
|
+ err = validate_case(data)
|
|
|
|
|
+ elif filename == "blueprint.json":
|
|
|
|
|
+ err = validate_blueprint(data)
|
|
|
|
|
+ elif filename == "capabilities_extracted.json":
|
|
|
|
|
+ err = validate_capabilities(data)
|
|
|
|
|
+ elif filename == "strategy.json":
|
|
|
|
|
+ err = validate_strategy(data)
|
|
|
|
|
+
|
|
|
|
|
+ if err:
|
|
|
|
|
+ raise ValueError(f"Schema Validation Failed: {err}")
|
|
|
|
|
+
|
|
|
|
|
+ print(f" ✅ [Schema Validated] {Path(out_file).name}")
|
|
|
|
|
+ return total_task_cost, task_errors # Success! Exit retry loop.
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ err_msg = f"Invalid JSON or Schema in {Path(out_file).name}: {e}"
|
|
|
|
|
+ print(f"❌ [Validation Error] {task_name}: {err_msg}")
|
|
|
|
|
+ task_errors.append(f"{task_name} Error: {e}")
|
|
|
|
|
|
|
|
- tool_results.append({
|
|
|
|
|
- "type": "tool_result",
|
|
|
|
|
- "tool_use_id": tu.id,
|
|
|
|
|
- "content": result_str
|
|
|
|
|
- })
|
|
|
|
|
|
|
+ if attempt == max_retries - 1:
|
|
|
|
|
+ print(f"❌ [Retry Limit] {task_name} exhausted retries.")
|
|
|
|
|
+ return total_task_cost, task_errors
|
|
|
|
|
+ else:
|
|
|
|
|
+ print(f"❌ [Missing File] {task_name} did not produce output file after recovery.")
|
|
|
|
|
+ if attempt == max_retries - 1:
|
|
|
|
|
+ return total_task_cost, task_errors
|
|
|
|
|
|
|
|
- messages.append({"role": "user", "content": tool_results})
|
|
|
|
|
-
|
|
|
|
|
- except Exception as e:
|
|
|
|
|
- err_msg = str(e)
|
|
|
|
|
- print(f"❌ [Fail SDK Core] {task_name}: {err_msg}")
|
|
|
|
|
- task_errors.append(err_msg)
|
|
|
|
|
- break
|
|
|
|
|
-
|
|
|
|
|
- # Verification & Recovery block for SDK (porting from AgentRunner)
|
|
|
|
|
- out_file = kwargs.get("output_file")
|
|
|
|
|
- if out_file and not Path(out_file).exists():
|
|
|
|
|
- print(f"⚠️ [Recovery SDK] {task_name} missing output file. Triggering forced wrap-up continuation...")
|
|
|
|
|
- messages.append({
|
|
|
|
|
- "role": "user",
|
|
|
|
|
- "content": f"【系统强制指令】你的任务阶段已完成分析,但尚未将最终结果写入目标文件。请立刻调用 write_json (或 write_file) 工具,将你的成果数据直接写入到绝对路径 `{out_file}`,务必立刻执行写入动作!"
|
|
|
|
|
- })
|
|
|
|
|
- try:
|
|
|
|
|
- target_model = "claude-sonnet-4-5" if "claude" in clean_model else clean_model
|
|
|
|
|
- rec_response = await client.messages.create(
|
|
|
|
|
- model=target_model,
|
|
|
|
|
- max_tokens=4096,
|
|
|
|
|
- temperature=0.1,
|
|
|
|
|
- system=system_prompt,
|
|
|
|
|
- messages=messages,
|
|
|
|
|
- tools=anthropic_tools,
|
|
|
|
|
- tool_choice={"type": "any"}
|
|
|
|
|
- )
|
|
|
|
|
- for content_block in rec_response.content:
|
|
|
|
|
- if content_block.type == "tool_use":
|
|
|
|
|
- if content_block.name in ("write_file", "write_json"):
|
|
|
|
|
- print(f" 💾 [Recovery File Written by SDK] {task_name}")
|
|
|
|
|
- await registry.execute(content_block.name, content_block.input)
|
|
|
|
|
- except Exception as e:
|
|
|
|
|
- print(f"❌ [Fail SDK Recovery] {task_name}: {e}")
|
|
|
|
|
- task_errors.append(str(e))
|
|
|
|
|
-
|
|
|
|
|
- if out_file and Path(out_file).exists() and str(out_file).endswith(".json"):
|
|
|
|
|
- try:
|
|
|
|
|
- with open(out_file, "r", encoding="utf-8") as f:
|
|
|
|
|
- json.loads(f.read())
|
|
|
|
|
- print(f" ✅ [JSON Validated] {Path(out_file).name}")
|
|
|
|
|
- except json.JSONDecodeError as e:
|
|
|
|
|
- err_msg = f"Invalid JSON syntax in {Path(out_file).name}: {e}"
|
|
|
|
|
- print(f"❌ [Validation Error] {task_name}: {err_msg}")
|
|
|
|
|
- task_errors.append(f"{task_name} JSON Error: {e}")
|
|
|
|
|
-
|
|
|
|
|
- return task_cost, task_errors
|
|
|
|
|
|
|
+ return total_task_cost, task_errors
|
|
|
|
|
|
|
|
async def main():
|
|
async def main():
|
|
|
parser = argparse.ArgumentParser()
|
|
parser = argparse.ArgumentParser()
|
|
@@ -295,6 +448,7 @@ async def main():
|
|
|
parser.add_argument("--research-only", action="store_true", help="Only run research phases, skip Phase 2 and 3")
|
|
parser.add_argument("--research-only", action="store_true", help="Only run research phases, skip Phase 2 and 3")
|
|
|
parser.add_argument("--platforms", type=str, default="xhs,youtube,bili,x", help="Comma-separated list of platforms to search")
|
|
parser.add_argument("--platforms", type=str, default="xhs,youtube,bili,x", help="Comma-separated list of platforms to search")
|
|
|
parser.add_argument("--use-claude-sdk", action="store_true", help="Use pure Anthropic SDK (run_anthropic_sdk_task) instead of internal AgentRunner for Phase 2/3")
|
|
parser.add_argument("--use-claude-sdk", action="store_true", help="Use pure Anthropic SDK (run_anthropic_sdk_task) instead of internal AgentRunner for Phase 2/3")
|
|
|
|
|
+ parser.add_argument("--restart-mode", type=str, default="smart", help="Granular restart mode for cascading deletions")
|
|
|
args = parser.parse_args()
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
base_dir = Path(__file__).parent
|
|
base_dir = Path(__file__).parent
|
|
@@ -437,9 +591,11 @@ async def main():
|
|
|
phase1_tasks.append(run_agent_task(runner_qwen, "researcher", kwargs, f"P1_Research_{p}", qwen_model))
|
|
phase1_tasks.append(run_agent_task(runner_qwen, "researcher", kwargs, f"P1_Research_{p}", qwen_model))
|
|
|
|
|
|
|
|
phase1_results = await asyncio.gather(*phase1_tasks)
|
|
phase1_results = await asyncio.gather(*phase1_tasks)
|
|
|
- for (task_cost, task_errors), p in zip(phase1_results, platforms):
|
|
|
|
|
|
|
+ phase1_trace_ids = {}
|
|
|
|
|
+ for (task_cost, task_errors, trace_id), p in zip(phase1_results, platforms):
|
|
|
total_cost += task_cost
|
|
total_cost += task_cost
|
|
|
costs_breakdown[f"P1_Research_{p}"] = round(task_cost, 4)
|
|
costs_breakdown[f"P1_Research_{p}"] = round(task_cost, 4)
|
|
|
|
|
+ phase1_trace_ids[f"P1_Research_{p}"] = trace_id
|
|
|
global_errors.extend(task_errors)
|
|
global_errors.extend(task_errors)
|
|
|
|
|
|
|
|
# Check if cases actually got written
|
|
# Check if cases actually got written
|
|
@@ -512,9 +668,11 @@ async def main():
|
|
|
|
|
|
|
|
if to_await:
|
|
if to_await:
|
|
|
phase2_results = await asyncio.gather(*to_await)
|
|
phase2_results = await asyncio.gather(*to_await)
|
|
|
- for (cost, errs), t_name in zip(phase2_results, names_await):
|
|
|
|
|
|
|
+ phase2_trace_ids = {}
|
|
|
|
|
+ for (cost, errs, trace_id), t_name in zip(phase2_results, names_await):
|
|
|
total_cost += cost
|
|
total_cost += cost
|
|
|
costs_breakdown[t_name] = round(cost, 4)
|
|
costs_breakdown[t_name] = round(cost, 4)
|
|
|
|
|
+ phase2_trace_ids[t_name] = trace_id
|
|
|
global_errors.extend(errs)
|
|
global_errors.extend(errs)
|
|
|
|
|
|
|
|
# Phase 3: REDUCE 2 (Final Assembly) uses Claude
|
|
# Phase 3: REDUCE 2 (Final Assembly) uses Claude
|
|
@@ -529,7 +687,7 @@ async def main():
|
|
|
|
|
|
|
|
if args.use_claude_sdk:
|
|
if args.use_claude_sdk:
|
|
|
print(" > Using [Anthropic SDK Core]")
|
|
print(" > Using [Anthropic SDK Core]")
|
|
|
- phase3_cost, phase3_errs = await run_anthropic_sdk_task("assemble_strategy", {
|
|
|
|
|
|
|
+ phase3_cost, phase3_errs, phase3_trace_id = await run_anthropic_sdk_task("assemble_strategy", {
|
|
|
"requirement": requirement,
|
|
"requirement": requirement,
|
|
|
"blueprint_file": blueprint_file,
|
|
"blueprint_file": blueprint_file,
|
|
|
"capabilities_file": capabilities_file,
|
|
"capabilities_file": capabilities_file,
|
|
@@ -537,7 +695,7 @@ async def main():
|
|
|
}, "P3_Assembler", claude_model)
|
|
}, "P3_Assembler", claude_model)
|
|
|
else:
|
|
else:
|
|
|
print(" > Using [AgentRunner Core]")
|
|
print(" > Using [AgentRunner Core]")
|
|
|
- phase3_cost, phase3_errs = await run_agent_task(runner_claude, "assemble_strategy", {
|
|
|
|
|
|
|
+ phase3_cost, phase3_errs, phase3_trace_id = await run_agent_task(runner_claude, "assemble_strategy", {
|
|
|
"requirement": requirement,
|
|
"requirement": requirement,
|
|
|
"blueprint_file": blueprint_file,
|
|
"blueprint_file": blueprint_file,
|
|
|
"capabilities_file": capabilities_file,
|
|
"capabilities_file": capabilities_file,
|
|
@@ -562,12 +720,23 @@ async def main():
|
|
|
except json.JSONDecodeError:
|
|
except json.JSONDecodeError:
|
|
|
pass
|
|
pass
|
|
|
|
|
|
|
|
|
|
+ # Collect trace_ids from all phases
|
|
|
|
|
+ trace_ids = {}
|
|
|
|
|
+ if not args.skip_research and 'phase1_trace_ids' in dir():
|
|
|
|
|
+ trace_ids.update(phase1_trace_ids)
|
|
|
|
|
+ if not args.research_only:
|
|
|
|
|
+ if 'phase2_trace_ids' in dir():
|
|
|
|
|
+ trace_ids.update(phase2_trace_ids)
|
|
|
|
|
+ if 'phase3_trace_id' in dir() and phase3_trace_id:
|
|
|
|
|
+ trace_ids["P3_Assembler"] = phase3_trace_id
|
|
|
|
|
+
|
|
|
metrics_data.append({
|
|
metrics_data.append({
|
|
|
"index": args.index,
|
|
"index": args.index,
|
|
|
"requirement": requirement[:80] + "...",
|
|
"requirement": requirement[:80] + "...",
|
|
|
"duration_seconds": round(elapsed_sec, 2),
|
|
"duration_seconds": round(elapsed_sec, 2),
|
|
|
"total_cost_usd": round(total_cost, 4),
|
|
"total_cost_usd": round(total_cost, 4),
|
|
|
"costs_breakdown": costs_breakdown,
|
|
"costs_breakdown": costs_breakdown,
|
|
|
|
|
+ "trace_ids": trace_ids,
|
|
|
"errors": global_errors,
|
|
"errors": global_errors,
|
|
|
"timestamp": datetime.now().isoformat()
|
|
"timestamp": datetime.now().isoformat()
|
|
|
})
|
|
})
|