há 2 meses atrás · 5f21dfade3
--- a/agent/core/runner.py
+++ b/agent/core/runner.py
@@ -520,10 +520,10 @@ class AgentRunner:
 
															                     trace_id, trace_obj.head_sequence
														
 
															                 )
														
 
															-                # 清理尾部不完整的 tool_call/tool_result 对
														
 
															-                # 当 agent 被 stop 时，可能恰好存了 assistant(tool_calls) 但还没存 tool results，
														
 
															-                # 直接发给 LLM 会报 400（tool_use without tool_result）
														
 
															-                main_path = self._trim_dangling_tool_calls(main_path)
														
 
															+                # 修复 orphaned tool_calls（中断导致的 tool_call 无 tool_result）
														
 
															+                main_path, sequence = await self._heal_orphaned_tool_calls(
														
 
															+                    main_path, trace_id, goal_tree, sequence,
														
 
															+                )
														
 
															                 history = [msg.to_llm_dict() for msg in main_path]
														
 
															                 if main_path:
														
@@ -911,36 +911,154 @@ class AgentRunner:
 
															         return cutoff
														
 
															-    @staticmethod
														
 
															-    def _trim_dangling_tool_calls(messages: List[Message]) -> List[Message]:
														
 
															+    async def _heal_orphaned_tool_calls(
														
 
															+        self,
														
 
															+        messages: List[Message],
														
 
															+        trace_id: str,
														
 
															+        goal_tree: Optional[GoalTree],
														
 
															+        sequence: int,
														
 
															+    ) -> tuple:
														
 
															         """
														
 
															-        从消息列表尾部移除不完整的 tool_call/tool_result 对。
														
 
															+        检测并修复消息历史中的 orphaned tool_calls。
														
 
															+
														
 
															+        当 agent 被 stop/crash 中断时，可能有 assistant 的 tool_calls 没有对应的
														
 
															+        tool results（包括多 tool_call 部分完成的情况）。直接发给 LLM 会导致 400。
														
 
															-        当 agent 被 stop 中断时，可能最后一条消息是带 tool_calls 的 assistant，
														
 
															-        但对应的 tool results 尚未存储。直接发给 LLM 会导致 400 错误。
														
 
															-        此方法从尾部向前回退，直到最后一条消息不是悬空的 tool_call。
														
 
															+        修复策略：为每个缺失的 tool_result 插入合成的"中断通知"消息，而非裁剪。
														
 
															+        - 普通工具：简短中断提示
														
 
															+        - agent/evaluate：包含 sub_trace_id、执行统计、continue_from 指引
														
 
															+
														
 
															+        合成消息持久化到 store，确保幂等（下次续跑不再触发）。
														
 
															+
														
 
															+        Returns:
														
 
															+            (healed_messages, next_sequence)
														
 
															         """
														
 
															         if not messages:
														
 
															-            return messages
														
 
															+            return messages, sequence
														
 
															-        while messages:
														
 
															-            last = messages[-1]
														
 
															-            if last.role != "assistant":
														
 
															-                break
														
 
															+        # 收集所有 tool_call IDs → (assistant_msg, tool_call_dict)
														
 
															+        tc_map: Dict[str, tuple] = {}
														
 
															+        result_ids: set = set()
														
 
															-            content = last.content
														
 
															-            if not isinstance(content, dict) or not content.get("tool_calls"):
														
 
															-                break
														
 
															+        for msg in messages:
														
 
															+            if msg.role == "assistant":
														
 
															+                content = msg.content
														
 
															+                if isinstance(content, dict) and content.get("tool_calls"):
														
 
															+                    for tc in content["tool_calls"]:
														
 
															+                        tc_id = tc.get("id")
														
 
															+                        if tc_id:
														
 
															+                            tc_map[tc_id] = (msg, tc)
														
 
															+            elif msg.role == "tool" and msg.tool_call_id:
														
 
															+                result_ids.add(msg.tool_call_id)
														
 
															+
														
 
															+        orphaned_ids = [tc_id for tc_id in tc_map if tc_id not in result_ids]
														
 
															+        if not orphaned_ids:
														
 
															+            return messages, sequence
														
 
															+
														
 
															+        logger.info(
														
 
															+            "检测到 %d 个 orphaned tool_calls，生成合成中断通知",
														
 
															+            len(orphaned_ids),
														
 
															+        )
														
 
															+
														
 
															+        healed = list(messages)
														
 
															+        head_seq = messages[-1].sequence
														
 
															+
														
 
															+        for tc_id in orphaned_ids:
														
 
															+            assistant_msg, tc = tc_map[tc_id]
														
 
															+            tool_name = tc.get("function", {}).get("name", "unknown")
														
 
															+
														
 
															+            if tool_name in ("agent", "evaluate"):
														
 
															+                result_text = self._build_agent_interrupted_result(
														
 
															+                    tc, goal_tree, assistant_msg,
														
 
															+                )
														
 
															+            else:
														
 
															+                result_text = (
														
 
															+                    f"⚠️ 工具 {tool_name} 执行被中断（进程异常退出），"
														
 
															+                    "未获得执行结果。请根据需要重新调用。"
														
 
															+                )
														
 
															-            # 最后一条是 assistant + tool_calls，检查 tool_results 是否齐全
														
 
															-            # 既然它是最后一条，后面没有 tool results → 悬空，需要移除
														
 
															-            logger.info(
														
 
															-                "移除尾部悬空的 tool_call 消息 (sequence=%d)",
														
 
															-                last.sequence,
														
 
															+            synthetic_msg = Message.create(
														
 
															+                trace_id=trace_id,
														
 
															+                role="tool",
														
 
															+                sequence=sequence,
														
 
															+                goal_id=assistant_msg.goal_id,
														
 
															+                parent_sequence=head_seq,
														
 
															+                tool_call_id=tc_id,
														
 
															+                content={"tool_name": tool_name, "result": result_text},
														
 
															+            )
														
 
															+
														
 
															+            if self.trace_store:
														
 
															+                await self.trace_store.add_message(synthetic_msg)
														
 
															+
														
 
															+            healed.append(synthetic_msg)
														
 
															+            head_seq = sequence
														
 
															+            sequence += 1
														
 
															+
														
 
															+        # 更新 trace head/last sequence
														
 
															+        if self.trace_store:
														
 
															+            await self.trace_store.update_trace(
														
 
															+                trace_id,
														
 
															+                head_sequence=head_seq,
														
 
															+                last_sequence=max(head_seq, sequence - 1),
														
 
															+            )
														
 
															+
														
 
															+        return healed, sequence
														
 
															+
														
 
															+    def _build_agent_interrupted_result(
														
 
															+        self,
														
 
															+        tc: Dict,
														
 
															+        goal_tree: Optional[GoalTree],
														
 
															+        assistant_msg: Message,
														
 
															+    ) -> str:
														
 
															+        """为中断的 agent/evaluate 工具调用构建合成结果（对齐正常返回值格式）"""
														
 
															+        args_str = tc.get("function", {}).get("arguments", "{}")
														
 
															+        try:
														
 
															+            args = json.loads(args_str) if isinstance(args_str, str) else args_str
														
 
															+        except json.JSONDecodeError:
														
 
															+            args = {}
														
 
															+
														
 
															+        task = args.get("task", "未知任务")
														
 
															+        if isinstance(task, list):
														
 
															+            task = "; ".join(task)
														
 
															+
														
 
															+        tool_name = tc.get("function", {}).get("name", "agent")
														
 
															+        mode = "evaluate" if tool_name == "evaluate" else "delegate"
														
 
															+
														
 
															+        # 从 goal_tree 查找 sub_trace 信息
														
 
															+        sub_trace_id = None
														
 
															+        stats = None
														
 
															+        if goal_tree and assistant_msg.goal_id:
														
 
															+            goal = goal_tree.find(assistant_msg.goal_id)
														
 
															+            if goal and goal.sub_trace_ids:
														
 
															+                first = goal.sub_trace_ids[0]
														
 
															+                if isinstance(first, dict):
														
 
															+                    sub_trace_id = first.get("trace_id")
														
 
															+                elif isinstance(first, str):
														
 
															+                    sub_trace_id = first
														
 
															+                if goal.cumulative_stats:
														
 
															+                    s = goal.cumulative_stats
														
 
															+                    if s.message_count > 0:
														
 
															+                        stats = {
														
 
															+                            "message_count": s.message_count,
														
 
															+                            "total_tokens": s.total_tokens,
														
 
															+                            "total_cost": round(s.total_cost, 4),
														
 
															+                        }
														
 
															+
														
 
															+        result: Dict[str, Any] = {
														
 
															+            "mode": mode,
														
 
															+            "status": "interrupted",
														
 
															+            "summary": "⚠️ 子Agent执行被中断（进程异常退出）",
														
 
															+            "task": task,
														
 
															+        }
														
 
															+        if sub_trace_id:
														
 
															+            result["sub_trace_id"] = sub_trace_id
														
 
															+            result["hint"] = (
														
 
															+                f'使用 continue_from="{sub_trace_id}" 可继续执行，保留已有进度'
														
 
															             )
														
 
															-            messages = messages[:-1]
														
 
															+        if stats:
														
 
															+            result["stats"] = stats
														
 
															-        return messages
														
 
															+        return json.dumps(result, ensure_ascii=False, indent=2)
														
 
															     # ===== 上下文注入 =====
														
--- a/agent/llm/yescode.py
+++ b/agent/llm/yescode.py
@@ -37,15 +37,62 @@ _RETRYABLE_EXCEPTIONS = (
 
															     ConnectionError,
														
 
															 )
														
 
															-# 简短模型名 -> Anthropic API 模型名
														
 
															-MODEL_MAP = {
														
 
															+# 模糊匹配规则：(关键词, 目标模型名)，从精确到宽泛排序
														
 
															+# 精确匹配走 MODEL_EXACT，不命中则按顺序尝试关键词匹配
														
 
															+MODEL_EXACT = {
														
 
															+    "claude-sonnet-4-5-20250929": "claude-sonnet-4-5-20250929",
														
 
															+    "claude-sonnet-4-5": "claude-sonnet-4-5-20250929",
														
 
															     "claude-sonnet-4.5": "claude-sonnet-4-5-20250929",
														
 
															+    "claude-opus-4-6": "claude-opus-4-6",
														
 
															+    "claude-opus-4-5-20251101": "claude-opus-4-5-20251101",
														
 
															+    "claude-opus-4-5": "claude-opus-4-5-20251101",
														
 
															+    "claude-opus-4-1-20250805": "claude-opus-4-1-20250805",
														
 
															+    "claude-opus-4-1": "claude-opus-4-1-20250805",
														
 
															+    "claude-haiku-4-5-20251001": "claude-haiku-4-5-20251001",
														
 
															+    "claude-haiku-4-5": "claude-haiku-4-5-20251001",
														
 
															 }
														
 
															+MODEL_FUZZY = [
														
 
															+    # 版本+家族（精确）
														
 
															+    ("sonnet-4-5", "claude-sonnet-4-5-20250929"),
														
 
															+    ("sonnet-4.5", "claude-sonnet-4-5-20250929"),
														
 
															+    ("opus-4-6", "claude-opus-4-6"),
														
 
															+    ("opus-4.6", "claude-opus-4-6"),
														
 
															+    ("opus-4-5", "claude-opus-4-5-20251101"),
														
 
															+    ("opus-4.5", "claude-opus-4-5-20251101"),
														
 
															+    ("opus-4-1", "claude-opus-4-1-20250805"),
														
 
															+    ("opus-4.1", "claude-opus-4-1-20250805"),
														
 
															+    ("haiku-4-5", "claude-haiku-4-5-20251001"),
														
 
															+    ("haiku-4.5", "claude-haiku-4-5-20251001"),
														
 
															+    # 仅家族名 → 最新版本
														
 
															+    ("sonnet", "claude-sonnet-4-5-20250929"),
														
 
															+    ("opus", "claude-opus-4-6"),
														
 
															+    ("haiku", "claude-haiku-4-5-20251001"),
														
 
															+]
														
 
															+
														
 
															 def _resolve_model(model: str) -> str:
														
 
															-    """将简短模型名映射为实际 API 模型名"""
														
 
															-    return MODEL_MAP.get(model, model)
														
 
															+    """将任意格式的模型名映射为 Yescode API 接受的模型名。
														
 
															+    支持：OpenRouter 前缀(anthropic/xxx)、带点号(4.5)、纯家族名(sonnet)等。
														
 
															+    """
														
 
															+    # 1. 剥离 provider 前缀
														
 
															+    if "/" in model:
														
 
															+        model = model.split("/", 1)[1]
														
 
															+
														
 
															+    # 2. 精确匹配
														
 
															+    if model in MODEL_EXACT:
														
 
															+        return MODEL_EXACT[model]
														
 
															+
														
 
															+    # 3. 模糊匹配（大小写不敏感）
														
 
															+    model_lower = model.lower()
														
 
															+    for keyword, target in MODEL_FUZZY:
														
 
															+        if keyword in model_lower:
														
 
															+            logger.info("模型名模糊匹配: %s → %s", model, target)
														
 
															+            return target
														
 
															+
														
 
															+    # 4. 兜底：原样返回，让 API 报错
														
 
															+    logger.warning("未能匹配模型名: %s, 原样传递", model)
														
 
															+    return model
														
 
															 def _normalize_tool_call_ids(messages: List[Dict[str, Any]], target_prefix: str) -> List[Dict[str, Any]]:
														
@@ -298,7 +345,7 @@ async def yescode_llm_call(
 
															     }
														
 
															     # 调用 API（带重试）
														
 
															-    max_retries = 3
														
 
															+    max_retries = 5
														
 
															     last_exception = None
														
 
															     for attempt in range(max_retries):
														
 
															         async with httpx.AsyncClient(timeout=300.0) as client:
														
@@ -311,7 +358,7 @@ async def yescode_llm_call(
 
															             except httpx.HTTPStatusError as e:
														
 
															                 error_body = e.response.text
														
 
															                 status = e.response.status_code
														
 
															-                if status in (429, 500, 502, 503, 504, 529) and attempt < max_retries - 1:
														
 
															+                if status in (429, 500, 502, 503, 504, 524, 529) and attempt < max_retries - 1:
														
 
															                     wait = 2 ** attempt * 2
														
 
															                     logger.warning(
														
 
															                         "[Yescode] HTTP %d (attempt %d/%d), retrying in %ds: %s",
														
--- a/docs/README.md
+++ b/docs/README.md
@@ -290,7 +290,18 @@ await runner.stop(trace_id)
 
															 调用后 agent loop 在下一个检查点退出，Trace 状态置为 `stopped`，同时保存当前 `head_sequence`（确保续跑时能正确加载完整历史）。
														
 
															-**消息完整性保护**：续跑/回溯加载历史时，`_build_history` 自动裁剪尾部悬空的 tool_call 消息（`_trim_dangling_tool_calls`）。当 agent 被 stop 中断时，可能最后一条消息是带 `tool_calls` 的 assistant，但对应的 tool results 尚未存储；直接发给 LLM 会导致 400 错误，此机制在框架层自动处理。
														
 
															+**消息完整性保护（orphaned tool_call 修复）**：续跑加载历史时，`_build_history` 自动检测并修复 orphaned tool_calls（`_heal_orphaned_tool_calls`）。当 agent 被 stop/crash 中断时，可能存在 assistant 的 tool_calls 没有对应的 tool results（包括部分完成的情况：3 个 tool_call 只有 1 个 tool_result）。直接发给 LLM 会导致 400 错误。
														
 
															+
														
 
															+修复策略：为每个缺失的 tool_result **插入合成的中断通知**（而非裁剪 assistant 消息）：
														
 
															+
														
 
															+| 工具类型 | 合成 tool_result 内容 |
														
 
															+|----------|---------------------|
														
 
															+| 普通工具 | 简短中断提示，建议重新调用 |
														
 
															+| agent/evaluate | 结构化中断信息，包含 `sub_trace_id`、执行统计、`continue_from` 用法指引 |
														
 
															+
														
 
															+agent 工具的合成结果对齐正常返回值格式（含 `sub_trace_id` 字段），主 Agent 可直接使用 `agent(task=..., continue_from=sub_trace_id)` 续跑被中断的子 Agent。合成消息持久化存储，确保幂等。
														
 
															+
														
 
															+**实现**：`agent/core/runner.py:AgentRunner._heal_orphaned_tool_calls`
														
 
															 - `run(messages, config)`：**核心方法**，流式返回 `AsyncIterator[Union[Trace, Message]]`
														
 
															 - `run_result(messages, config)`：便利方法，内部消费 `run()`，返回结构化结果。主要用于 `agent`/`evaluate` 工具内部