Quellcode durchsuchen

fix: multi-modal msg format & goaltree overwriting

Talegorithm vor 1 Woche
Ursprung
Commit
791b24399c

+ 8 - 7
agent/core/runner.py

@@ -475,6 +475,10 @@ class AgentRunner:
             raise ValueError(f"Trace not found: {config.trace_id}")
             raise ValueError(f"Trace not found: {config.trace_id}")
 
 
         goal_tree = await self.trace_store.get_goal_tree(config.trace_id)
         goal_tree = await self.trace_store.get_goal_tree(config.trace_id)
+        if goal_tree is None:
+            # 防御性兜底:trace 存在但 goal.json 丢失时,创建空树
+            goal_tree = GoalTree(mission=trace_obj.task or "Agent task")
+            await self.trace_store.update_goal_tree(config.trace_id, goal_tree)
 
 
         # 自动判断行为:after_sequence 为 None 或 == head → 续跑;< head → 回溯
         # 自动判断行为:after_sequence 为 None 或 == head → 续跑;< head → 回溯
         after_seq = config.after_sequence
         after_seq = config.after_sequence
@@ -628,11 +632,6 @@ class AgentRunner:
         # 当前主路径头节点的 sequence(用于设置 parent_sequence)
         # 当前主路径头节点的 sequence(用于设置 parent_sequence)
         head_seq = trace.head_sequence
         head_seq = trace.head_sequence
 
 
-        # 设置 goal_tree 到 goal 工具
-        if goal_tree and self.trace_store:
-            from agent.trace.goal_tool import set_goal_tree
-            set_goal_tree(goal_tree)
-
         for iteration in range(config.max_iterations):
         for iteration in range(config.max_iterations):
             # 检查取消信号
             # 检查取消信号
             cancel_event = self._cancel_events.get(trace_id)
             cancel_event = self._cancel_events.get(trace_id)
@@ -742,8 +741,8 @@ class AgentRunner:
                     )
                     )
                     goal_tree.focus(goal_tree.goals[0].id)
                     goal_tree.focus(goal_tree.goals[0].id)
                     if self.trace_store:
                     if self.trace_store:
-                        await self.trace_store.update_goal_tree(trace_id, goal_tree)
                         await self.trace_store.add_goal(trace_id, goal_tree.goals[0])
                         await self.trace_store.add_goal(trace_id, goal_tree.goals[0])
+                        await self.trace_store.update_goal_tree(trace_id, goal_tree)
                     logger.info(f"自动创建 root goal: {goal_tree.goals[0].id}")
                     logger.info(f"自动创建 root goal: {goal_tree.goals[0].id}")
 
 
             # 获取当前 goal_id
             # 获取当前 goal_id
@@ -825,6 +824,7 @@ class AgentRunner:
                             "trace_id": trace_id,
                             "trace_id": trace_id,
                             "goal_id": current_goal_id,
                             "goal_id": current_goal_id,
                             "runner": self,
                             "runner": self,
+                            "goal_tree": goal_tree,
                         }
                         }
                     )
                     )
 
 
@@ -856,7 +856,8 @@ class AgentRunner:
                         goal_id=current_goal_id,
                         goal_id=current_goal_id,
                         parent_sequence=head_seq,
                         parent_sequence=head_seq,
                         tool_call_id=tc["id"],
                         tool_call_id=tc["id"],
-                        content={"tool_name": tool_name, "result": tool_result_text},
+                        # 存储完整内容:有图片时保留 list(含 image_url),纯文本时存字符串
+                        content={"tool_name": tool_name, "result": tool_content_for_llm},
                     )
                     )
 
 
                     if self.trace_store:
                     if self.trace_store:

+ 355 - 10
agent/llm/openrouter.py

@@ -2,7 +2,11 @@
 OpenRouter Provider
 OpenRouter Provider
 
 
 使用 OpenRouter API 调用各种模型(包括 Claude Sonnet 4.5)
 使用 OpenRouter API 调用各种模型(包括 Claude Sonnet 4.5)
-支持 OpenAI 兼容的 API 格式
+
+路由策略:
+- Claude 模型:走 OpenRouter 的 Anthropic 原生端点(/api/v1/messages),
+  使用自包含的格式转换逻辑,确保多模态工具结果(截图等)正确传递。
+- 其他模型:走 OpenAI 兼容端点(/api/v1/chat/completions)。
 
 
 OpenRouter 转发多种模型,需要根据实际模型处理不同的 usage 格式:
 OpenRouter 转发多种模型,需要根据实际模型处理不同的 usage 格式:
 - OpenAI 模型: prompt_tokens, completion_tokens, completion_tokens_details.reasoning_tokens
 - OpenAI 模型: prompt_tokens, completion_tokens, completion_tokens_details.reasoning_tokens
@@ -34,6 +38,245 @@ _RETRYABLE_EXCEPTIONS = (
 )
 )
 
 
 
 
+# ── OpenRouter Anthropic endpoint: model name mapping ──────────────────────
+# Local copy of yescode's model tables so this module is self-contained.
+_OR_MODEL_EXACT = {
+    "claude-sonnet-4-6": "claude-sonnet-4-6",
+    "claude-sonnet-4.6": "claude-sonnet-4-6",
+    "claude-sonnet-4-5-20250929": "claude-sonnet-4-5-20250929",
+    "claude-sonnet-4-5": "claude-sonnet-4-5-20250929",
+    "claude-sonnet-4.5": "claude-sonnet-4-5-20250929",
+    "claude-opus-4-6": "claude-opus-4-6",
+    "claude-opus-4-5-20251101": "claude-opus-4-5-20251101",
+    "claude-opus-4-5": "claude-opus-4-5-20251101",
+    "claude-opus-4-1-20250805": "claude-opus-4-1-20250805",
+    "claude-opus-4-1": "claude-opus-4-1-20250805",
+    "claude-haiku-4-5-20251001": "claude-haiku-4-5-20251001",
+    "claude-haiku-4-5": "claude-haiku-4-5-20251001",
+}
+
+_OR_MODEL_FUZZY = [
+    ("sonnet-4-6", "claude-sonnet-4-6"),
+    ("sonnet-4.6", "claude-sonnet-4-6"),
+    ("sonnet-4-5", "claude-sonnet-4-5-20250929"),
+    ("sonnet-4.5", "claude-sonnet-4-5-20250929"),
+    ("opus-4-6", "claude-opus-4-6"),
+    ("opus-4.6", "claude-opus-4-6"),
+    ("opus-4-5", "claude-opus-4-5-20251101"),
+    ("opus-4.5", "claude-opus-4-5-20251101"),
+    ("opus-4-1", "claude-opus-4-1-20250805"),
+    ("opus-4.1", "claude-opus-4-1-20250805"),
+    ("haiku-4-5", "claude-haiku-4-5-20251001"),
+    ("haiku-4.5", "claude-haiku-4-5-20251001"),
+    ("sonnet", "claude-sonnet-4-6"),
+    ("opus", "claude-opus-4-6"),
+    ("haiku", "claude-haiku-4-5-20251001"),
+]
+
+
+def _resolve_openrouter_model(model: str) -> str:
+    """Normalize a model name for OpenRouter's Anthropic endpoint.
+
+    Strips ``anthropic/`` prefix, resolves aliases / dot-notation,
+    and re-prepends ``anthropic/`` for OpenRouter routing.
+    """
+    # 1. Strip provider prefix
+    bare = model.split("/", 1)[1] if "/" in model else model
+
+    # 2. Exact match
+    if bare in _OR_MODEL_EXACT:
+        return f"anthropic/{_OR_MODEL_EXACT[bare]}"
+
+    # 3. Fuzzy keyword match (case-insensitive)
+    bare_lower = bare.lower()
+    for keyword, target in _OR_MODEL_FUZZY:
+        if keyword in bare_lower:
+            logger.info("[OpenRouter] Model fuzzy match: %s → anthropic/%s", model, target)
+            return f"anthropic/{target}"
+
+    # 4. Fallback – return as-is (let API report the error)
+    logger.warning("[OpenRouter] Could not resolve model name: %s, passing as-is", model)
+    return model
+
+
+# ── OpenRouter Anthropic endpoint: format conversion helpers ───────────────
+
+def _to_anthropic_content(content: Any) -> Any:
+    """Convert OpenAI-style *content* (string or block list) to Anthropic format.
+
+    Handles ``image_url`` blocks → Anthropic ``image`` blocks (base64 or url).
+    Passes through ``text`` blocks and ``cache_control`` unchanged.
+    """
+    if not isinstance(content, list):
+        return content
+
+    result = []
+    for block in content:
+        if not isinstance(block, dict):
+            result.append(block)
+            continue
+
+        if block.get("type") == "image_url":
+            image_url_obj = block.get("image_url", {})
+            url = image_url_obj.get("url", "") if isinstance(image_url_obj, dict) else str(image_url_obj)
+            if url.startswith("data:"):
+                header, _, data = url.partition(",")
+                media_type = header.split(":")[1].split(";")[0] if ":" in header else "image/png"
+                result.append({
+                    "type": "image",
+                    "source": {
+                        "type": "base64",
+                        "media_type": media_type,
+                        "data": data,
+                    },
+                })
+            else:
+                result.append({
+                    "type": "image",
+                    "source": {"type": "url", "url": url},
+                })
+        else:
+            result.append(block)
+    return result
+
+
+def _to_anthropic_messages(messages: List[Dict[str, Any]]) -> tuple:
+    """Convert an OpenAI-format message list to Anthropic Messages API format.
+
+    Returns ``(system_prompt, anthropic_messages)`` where *system_prompt* is
+    ``None`` or a string extracted from ``role=system`` messages, and
+    *anthropic_messages* is the converted list.
+    """
+    system_prompt = None
+    anthropic_messages: List[Dict[str, Any]] = []
+
+    for msg in messages:
+        role = msg.get("role", "")
+        content = msg.get("content", "")
+
+        if role == "system":
+            system_prompt = content
+
+        elif role == "user":
+            anthropic_messages.append({
+                "role": "user",
+                "content": _to_anthropic_content(content),
+            })
+
+        elif role == "assistant":
+            tool_calls = msg.get("tool_calls")
+            if tool_calls:
+                content_blocks: List[Dict[str, Any]] = []
+                if content:
+                    converted = _to_anthropic_content(content)
+                    if isinstance(converted, list):
+                        content_blocks.extend(converted)
+                    elif isinstance(converted, str) and converted.strip():
+                        content_blocks.append({"type": "text", "text": converted})
+                for tc in tool_calls:
+                    func = tc.get("function", {})
+                    args_str = func.get("arguments", "{}")
+                    try:
+                        args = json.loads(args_str) if isinstance(args_str, str) else args_str
+                    except json.JSONDecodeError:
+                        args = {}
+                    content_blocks.append({
+                        "type": "tool_use",
+                        "id": tc.get("id", ""),
+                        "name": func.get("name", ""),
+                        "input": args,
+                    })
+                anthropic_messages.append({"role": "assistant", "content": content_blocks})
+            else:
+                anthropic_messages.append({"role": "assistant", "content": content})
+
+        elif role == "tool":
+            tool_result_block = {
+                "type": "tool_result",
+                "tool_use_id": msg.get("tool_call_id", ""),
+                "content": _to_anthropic_content(content),
+            }
+            # Merge consecutive tool results into one user message
+            if (anthropic_messages
+                    and anthropic_messages[-1].get("role") == "user"
+                    and isinstance(anthropic_messages[-1].get("content"), list)
+                    and anthropic_messages[-1]["content"]
+                    and anthropic_messages[-1]["content"][0].get("type") == "tool_result"):
+                anthropic_messages[-1]["content"].append(tool_result_block)
+            else:
+                anthropic_messages.append({
+                    "role": "user",
+                    "content": [tool_result_block],
+                })
+
+    return system_prompt, anthropic_messages
+
+
+def _to_anthropic_tools(tools: List[Dict]) -> List[Dict]:
+    """Convert OpenAI tool definitions to Anthropic format."""
+    anthropic_tools = []
+    for tool in tools:
+        if tool.get("type") == "function":
+            func = tool["function"]
+            anthropic_tools.append({
+                "name": func.get("name", ""),
+                "description": func.get("description", ""),
+                "input_schema": func.get("parameters", {"type": "object", "properties": {}}),
+            })
+    return anthropic_tools
+
+
+def _parse_anthropic_response(result: Dict[str, Any]) -> Dict[str, Any]:
+    """Parse an Anthropic Messages API response into the unified format.
+
+    Returns a dict with keys: content, tool_calls, finish_reason, usage.
+    """
+    content_blocks = result.get("content", [])
+
+    text_parts = []
+    tool_calls = []
+    for block in content_blocks:
+        if block.get("type") == "text":
+            text_parts.append(block.get("text", ""))
+        elif block.get("type") == "tool_use":
+            tool_calls.append({
+                "id": block.get("id", ""),
+                "type": "function",
+                "function": {
+                    "name": block.get("name", ""),
+                    "arguments": json.dumps(block.get("input", {}), ensure_ascii=False),
+                },
+            })
+
+    content = "\n".join(text_parts)
+
+    stop_reason = result.get("stop_reason", "end_turn")
+    finish_reason_map = {
+        "end_turn": "stop",
+        "tool_use": "tool_calls",
+        "max_tokens": "length",
+        "stop_sequence": "stop",
+    }
+    finish_reason = finish_reason_map.get(stop_reason, stop_reason)
+
+    raw_usage = result.get("usage", {})
+    usage = TokenUsage(
+        input_tokens=raw_usage.get("input_tokens", 0),
+        output_tokens=raw_usage.get("output_tokens", 0),
+        cache_creation_tokens=raw_usage.get("cache_creation_input_tokens", 0),
+        cache_read_tokens=raw_usage.get("cache_read_input_tokens", 0),
+    )
+
+    return {
+        "content": content,
+        "tool_calls": tool_calls if tool_calls else None,
+        "finish_reason": finish_reason,
+        "usage": usage,
+    }
+
+
+# ── Provider detection / usage parsing ─────────────────────────────────────
+
 def _detect_provider_from_model(model: str) -> str:
 def _detect_provider_from_model(model: str) -> str:
     """根据模型名称检测提供商"""
     """根据模型名称检测提供商"""
     model_lower = model.lower()
     model_lower = model.lower()
@@ -139,6 +382,111 @@ def _normalize_tool_call_ids(messages: List[Dict[str, Any]], target_prefix: str)
     return result
     return result
 
 
 
 
+async def _openrouter_anthropic_call(
+    messages: List[Dict[str, Any]],
+    model: str,
+    tools: Optional[List[Dict]],
+    api_key: str,
+    **kwargs,
+) -> Dict[str, Any]:
+    """
+    通过 OpenRouter 的 Anthropic 原生端点调用 Claude 模型。
+
+    使用 Anthropic Messages API 格式(/api/v1/messages),
+    自包含的格式转换逻辑,确保多模态内容(截图等)正确传递。
+    """
+    endpoint = "https://openrouter.ai/api/v1/messages"
+
+    # Resolve model name for OpenRouter (e.g. "claude-sonnet-4.5" → "anthropic/claude-sonnet-4-5-20250929")
+    resolved_model = _resolve_openrouter_model(model)
+    logger.info("[OpenRouter/Anthropic] model: %s → %s", model, resolved_model)
+
+    # 跨 Provider 续跑时,重写不兼容的 tool_call_id 为 toolu_ 前缀
+    messages = _normalize_tool_call_ids(messages, "toolu")
+
+    # OpenAI 格式 → Anthropic 格式
+    system_prompt, anthropic_messages = _to_anthropic_messages(messages)
+
+    payload: Dict[str, Any] = {
+        "model": resolved_model,
+        "messages": anthropic_messages,
+        "max_tokens": kwargs.get("max_tokens", 16384),
+    }
+    if system_prompt is not None:
+        payload["system"] = system_prompt
+    if tools:
+        payload["tools"] = _to_anthropic_tools(tools)
+    if "temperature" in kwargs:
+        payload["temperature"] = kwargs["temperature"]
+
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "anthropic-version": "2023-06-01",
+        "content-type": "application/json",
+        "HTTP-Referer": "https://github.com/your-repo",
+        "X-Title": "Agent Framework",
+    }
+
+    max_retries = 3
+    last_exception = None
+    for attempt in range(max_retries):
+        async with httpx.AsyncClient(timeout=300.0) as client:
+            try:
+                response = await client.post(endpoint, json=payload, headers=headers)
+                response.raise_for_status()
+                result = response.json()
+                break
+
+            except httpx.HTTPStatusError as e:
+                status = e.response.status_code
+                error_body = e.response.text
+                if status in (429, 500, 502, 503, 504) and attempt < max_retries - 1:
+                    wait = 2 ** attempt * 2
+                    logger.warning(
+                        "[OpenRouter/Anthropic] HTTP %d (attempt %d/%d), retrying in %ds: %s",
+                        status, attempt + 1, max_retries, wait, error_body[:200],
+                    )
+                    await asyncio.sleep(wait)
+                    last_exception = e
+                    continue
+                # Log AND print error body so it is visible in console output
+                logger.error("[OpenRouter/Anthropic] HTTP %d error body: %s", status, error_body)
+                print(f"[OpenRouter/Anthropic] API Error {status}: {error_body[:500]}")
+                raise
+
+            except _RETRYABLE_EXCEPTIONS as e:
+                last_exception = e
+                if attempt < max_retries - 1:
+                    wait = 2 ** attempt * 2
+                    logger.warning(
+                        "[OpenRouter/Anthropic] %s (attempt %d/%d), retrying in %ds",
+                        type(e).__name__, attempt + 1, max_retries, wait,
+                    )
+                    await asyncio.sleep(wait)
+                    continue
+                raise
+    else:
+        raise last_exception  # type: ignore[misc]
+
+    # 解析 Anthropic 响应 → 统一格式
+    parsed = _parse_anthropic_response(result)
+    usage = parsed["usage"]
+    cost = calculate_cost(model, usage)
+
+    return {
+        "content": parsed["content"],
+        "tool_calls": parsed["tool_calls"],
+        "prompt_tokens": usage.input_tokens,
+        "completion_tokens": usage.output_tokens,
+        "reasoning_tokens": usage.reasoning_tokens,
+        "cache_creation_tokens": usage.cache_creation_tokens,
+        "cache_read_tokens": usage.cache_read_tokens,
+        "finish_reason": parsed["finish_reason"],
+        "cost": cost,
+        "usage": usage,
+    }
+
+
 async def openrouter_llm_call(
 async def openrouter_llm_call(
     messages: List[Dict[str, Any]],
     messages: List[Dict[str, Any]],
     model: str = "anthropic/claude-sonnet-4.5",
     model: str = "anthropic/claude-sonnet-4.5",
@@ -168,6 +516,12 @@ async def openrouter_llm_call(
     if not api_key:
     if not api_key:
         raise ValueError("OPEN_ROUTER_API_KEY environment variable not set")
         raise ValueError("OPEN_ROUTER_API_KEY environment variable not set")
 
 
+    # Claude 模型走 Anthropic 原生端点,其余走 OpenAI 兼容端点
+    provider = _detect_provider_from_model(model)
+    if provider == "anthropic":
+        logger.debug("[OpenRouter] Routing Claude model to Anthropic native endpoint")
+        return await _openrouter_anthropic_call(messages, model, tools, api_key, **kwargs)
+
     base_url = "https://openrouter.ai/api/v1"
     base_url = "https://openrouter.ai/api/v1"
     endpoint = f"{base_url}/chat/completions"
     endpoint = f"{base_url}/chat/completions"
 
 
@@ -189,15 +543,6 @@ async def openrouter_llm_call(
     if "max_tokens" in kwargs:
     if "max_tokens" in kwargs:
         payload["max_tokens"] = kwargs["max_tokens"]
         payload["max_tokens"] = kwargs["max_tokens"]
 
 
-    # 对于 Anthropic 模型,锁定 provider 以确保缓存生效
-    if "anthropic" in model.lower() or "claude" in model.lower():
-        payload["provider"] = {
-            "only": ["Anthropic"],
-            "allow_fallbacks": False,
-            "require_parameters": True
-        }
-        logger.debug("[OpenRouter] Locked provider to Anthropic for caching support")
-
     # OpenRouter 特定参数
     # OpenRouter 特定参数
     headers = {
     headers = {
         "Authorization": f"Bearer {api_key}",
         "Authorization": f"Bearer {api_key}",

+ 7 - 1
agent/llm/yescode.py

@@ -212,7 +212,13 @@ def _convert_messages_to_anthropic(messages: List[Dict[str, Any]]) -> tuple:
             if tool_calls:
             if tool_calls:
                 content_blocks = []
                 content_blocks = []
                 if content:
                 if content:
-                    content_blocks.append({"type": "text", "text": content})
+                    # content 可能已被 _add_cache_control 转成 list(含 cache_control),
+                    # 也可能是普通字符串。两者都需要正确处理,避免产生 {"type":"text","text":[...]}
+                    converted = _convert_content_to_anthropic(content)
+                    if isinstance(converted, list):
+                        content_blocks.extend(converted)
+                    elif isinstance(converted, str) and converted.strip():
+                        content_blocks.append({"type": "text", "text": converted})
                 for tc in tool_calls:
                 for tc in tool_calls:
                     func = tc.get("function", {})
                     func = tc.get("function", {})
                     args_str = func.get("arguments", "{}")
                     args_str = func.get("arguments", "{}")

+ 20 - 7
agent/tools/builtin/file/read.py

@@ -11,6 +11,7 @@ Read Tool - 文件读取工具
 """
 """
 
 
 import os
 import os
+import base64
 import mimetypes
 import mimetypes
 from pathlib import Path
 from pathlib import Path
 from typing import Optional
 from typing import Optional
@@ -79,13 +80,25 @@ async def read_file(
 
 
     # 图片文件(参考 opencode:66-91)
     # 图片文件(参考 opencode:66-91)
     if mime_type.startswith("image/") and mime_type not in ["image/svg+xml", "image/vnd.fastbidsheet"]:
     if mime_type.startswith("image/") and mime_type not in ["image/svg+xml", "image/vnd.fastbidsheet"]:
-        # 注意:实际项目中需要实现图片的 base64 编码
-        # 这里简化处理
-        return ToolResult(
-            title=path.name,
-            output=f"图片文件: {path.name} (MIME: {mime_type})",
-            metadata={"mime_type": mime_type, "truncated": False}
-        )
+        try:
+            raw = path.read_bytes()
+            b64_data = base64.b64encode(raw).decode("ascii")
+            return ToolResult(
+                title=path.name,
+                output=f"图片文件: {path.name} (MIME: {mime_type}, {len(raw)} bytes)",
+                metadata={"mime_type": mime_type, "truncated": False},
+                images=[{
+                    "type": "base64",
+                    "media_type": mime_type,
+                    "data": b64_data,
+                }],
+            )
+        except Exception as e:
+            return ToolResult(
+                title=path.name,
+                output=f"图片文件读取失败: {path.name}: {e}",
+                error=str(e),
+            )
 
 
     # PDF 文件
     # PDF 文件
     if mime_type == "application/pdf":
     if mime_type == "application/pdf":

+ 0 - 4
agent/trace/__init__.py

@@ -14,7 +14,6 @@ from .goal_models import Goal, GoalTree, GoalStatus, GoalType, GoalStats
 from .protocols import TraceStore
 from .protocols import TraceStore
 from .store import FileSystemTraceStore
 from .store import FileSystemTraceStore
 from .trace_id import generate_trace_id, generate_sub_trace_id, parse_parent_trace_id
 from .trace_id import generate_trace_id, generate_sub_trace_id, parse_parent_trace_id
-from .goal_tool import set_goal_tree, get_goal_tree
 
 
 __all__ = [
 __all__ = [
     # Models
     # Models
@@ -32,7 +31,4 @@ __all__ = [
     "generate_trace_id",
     "generate_trace_id",
     "generate_sub_trace_id",
     "generate_sub_trace_id",
     "parse_parent_trace_id",
     "parse_parent_trace_id",
-    # Goal tool
-    "set_goal_tree",
-    "get_goal_tree",
 ]
 ]

+ 8 - 27
agent/trace/goal_tool.py

@@ -13,22 +13,6 @@ if TYPE_CHECKING:
     from .protocols import TraceStore
     from .protocols import TraceStore
 
 
 
 
-# ===== 全局 GoalTree 状态管理 =====
-
-_current_goal_tree = None
-
-
-def set_goal_tree(tree):
-    """设置当前 GoalTree(由 AgentRunner 调用)"""
-    global _current_goal_tree
-    _current_goal_tree = tree
-
-
-def get_goal_tree():
-    """获取当前 GoalTree"""
-    return _current_goal_tree
-
-
 # ===== LLM 可调用的 goal 工具 =====
 # ===== LLM 可调用的 goal 工具 =====
 
 
 @tool(description="管理执行计划,添加/完成/放弃目标,切换焦点")
 @tool(description="管理执行计划,添加/完成/放弃目标,切换焦点")
@@ -53,12 +37,13 @@ async def goal(
         done: 完成当前目标,值为 summary
         done: 完成当前目标,值为 summary
         abandon: 放弃当前目标,值为原因
         abandon: 放弃当前目标,值为原因
         focus: 切换焦点到指定 ID
         focus: 切换焦点到指定 ID
-        context: 工具执行上下文(包含 store 和 trace_id
+        context: 工具执行上下文(包含 store、trace_id、goal_tree
 
 
     Returns:
     Returns:
         str: 更新后的计划状态文本
         str: 更新后的计划状态文本
     """
     """
-    tree = get_goal_tree()
+    # GoalTree 从 context 获取,每个 agent 实例独立,不再依赖全局变量
+    tree = context.get("goal_tree") if context else None
     if tree is None:
     if tree is None:
         return "错误:GoalTree 未初始化"
         return "错误:GoalTree 未初始化"
 
 
@@ -130,10 +115,7 @@ async def goal_tool(
 
 
         # 推送事件
         # 推送事件
         if store and trace_id:
         if store and trace_id:
-            print(f"[DEBUG] goal_tool: calling store.update_goal for done: goal_id={goal.id}")
             await store.update_goal(trace_id, goal.id, status="completed", summary=done)
             await store.update_goal(trace_id, goal.id, status="completed", summary=done)
-        else:
-            print(f"[DEBUG] goal_tool: skip event push (store={store}, trace_id={trace_id})")
 
 
         # 检查是否有级联完成的父目标(complete方法已经处理,这里只需要记录)
         # 检查是否有级联完成的父目标(complete方法已经处理,这里只需要记录)
         if goal.parent_id:
         if goal.parent_id:
@@ -163,10 +145,7 @@ async def goal_tool(
 
 
         # 推送事件
         # 推送事件
         if store and trace_id:
         if store and trace_id:
-            print(f"[DEBUG] goal_tool: calling store.update_goal for abandon: goal_id={goal.id}")
             await store.update_goal(trace_id, goal.id, status="abandoned", summary=abandon)
             await store.update_goal(trace_id, goal.id, status="abandoned", summary=abandon)
-        else:
-            print(f"[DEBUG] goal_tool: skip event push (store={store}, trace_id={trace_id})")
 
 
     # 4. 处理 add
     # 4. 处理 add
     if add is not None:
     if add is not None:
@@ -218,11 +197,8 @@ async def goal_tool(
 
 
             # 推送事件
             # 推送事件
             if store and trace_id:
             if store and trace_id:
-                print(f"[DEBUG] goal_tool: calling store.add_goal for {len(new_goals)} new goals")
                 for goal in new_goals:
                 for goal in new_goals:
                     await store.add_goal(trace_id, goal)
                     await store.add_goal(trace_id, goal)
-            else:
-                print(f"[DEBUG] goal_tool: skip event push (store={store}, trace_id={trace_id})")
 
 
             # 如果没有焦点且添加了目标,自动 focus 到第一个新目标
             # 如果没有焦点且添加了目标,自动 focus 到第一个新目标
             if not tree.current_id and new_goals:
             if not tree.current_id and new_goals:
@@ -230,6 +206,11 @@ async def goal_tool(
                 display_id = tree._generate_display_id(new_goals[0])
                 display_id = tree._generate_display_id(new_goals[0])
                 changes.append(f"自动切换焦点: {display_id}")
                 changes.append(f"自动切换焦点: {display_id}")
 
 
+    # 将完整内存树状态(含 current_id)同步到存储,
+    # 因为 store.add_goal / update_goal 各自从磁盘加载,不包含 focus 等内存变更
+    if store and trace_id and changes:
+        await store.update_goal_tree(trace_id, tree)
+
     # 返回当前状态
     # 返回当前状态
     result = []
     result = []
     if changes:
     if changes:

+ 4 - 2
agent/trace/models.py

@@ -200,12 +200,14 @@ class Message:
         msg: Dict[str, Any] = {"role": self.role}
         msg: Dict[str, Any] = {"role": self.role}
 
 
         if self.role == "tool":
         if self.role == "tool":
-            # tool message: tool_call_id + name + content(string)
+            # tool message: tool_call_id + name + content
             if self.tool_call_id:
             if self.tool_call_id:
                 msg["tool_call_id"] = self.tool_call_id
                 msg["tool_call_id"] = self.tool_call_id
                 msg["name"] = self.description or "unknown"
                 msg["name"] = self.description or "unknown"
             if isinstance(self.content, dict):
             if isinstance(self.content, dict):
-                msg["content"] = str(self.content.get("result", self.content))
+                result = self.content.get("result", self.content)
+                # result 可能是 list(含图片的多模态内容)或字符串
+                msg["content"] = result if isinstance(result, list) else str(result)
             else:
             else:
                 msg["content"] = str(self.content) if self.content is not None else ""
                 msg["content"] = str(self.content) if self.content is not None else ""
 
 

+ 3 - 1
examples/how/production.prompt

@@ -11,7 +11,9 @@ $system$
 ## 工作流程
 ## 工作流程
 
 
 **第一轮**:
 **第一轮**:
-1. 调用 deconstruct agent,传入原帖的完整多模态内容(文字+所有图片),获取 制作表
+1. 调用 deconstruct agent,传入原帖的完整多模态内容,获取 制作表;注意:
+    - 你可以直接给deconstruct agent输入文件夹路径
+    - 它会自动加载如何解构内容的skill:examples/how/skills/deconstruct.md作为system prompt
 2. 调用 construct agent,传入解构产物 制作表,得到生成内容
 2. 调用 construct agent,传入解构产物 制作表,得到生成内容
 3. 对比建构结果与原帖,做出评估
 3. 对比建构结果与原帖,做出评估
 
 

+ 1 - 1
examples/how/run.py

@@ -294,7 +294,7 @@ async def main():
                 model=f"claude-{prompt.config.get('model', 'sonnet-4.5')}",
                 model=f"claude-{prompt.config.get('model', 'sonnet-4.5')}",
                 temperature=float(prompt.config.get('temperature', 0.3)),
                 temperature=float(prompt.config.get('temperature', 0.3)),
                 max_iterations=1000,
                 max_iterations=1000,
-                name="mcp/skills 发现、获取、评价 分析任务",
+                name="社交媒体内容解构、建构、评估任务",
             )
             )
 
 
         while not should_exit:
         while not should_exit:

+ 1 - 0
examples/how/skills/deconstruct.md

@@ -113,6 +113,7 @@ description: 从制作层解构社交媒体帖子,提取视觉制作决策
 
 
 ## 原则
 ## 原则
 
 
+- **亲自读图**:你应该直接读取我们需要解构的内容中的多模态内容,仅在后续缺乏特征提取能力的情况下再继续使用其他工具来处理多模态内容
 - **选择性而非穷举**:只记录对还原质量有实质影响的信息                                          
 - **选择性而非穷举**:只记录对还原质量有实质影响的信息                                          
 - **泛化描述**:描述创作规律,而非内容细节("主体特写,背景虚化"优于"穿蓝衣服的女生")          
 - **泛化描述**:描述创作规律,而非内容细节("主体特写,背景虚化"优于"穿蓝衣服的女生")          
 - **制作视角**:从"如何制作出这个效果"出发,而非"这是什么内容"                                  
 - **制作视角**:从"如何制作出这个效果"出发,而非"这是什么内容"