Procházet zdrojové kódy

fix: openrouter provider

Talegorithm před 1 týdnem
rodič
revize
4bbe5f5933
1 změnil soubory, kde provedl 40 přidání a 4 odebrání
  1. 40 4
      agent/llm/openrouter.py

+ 40 - 4
agent/llm/openrouter.py

@@ -191,22 +191,47 @@ def _to_anthropic_messages(messages: List[Dict[str, Any]]) -> tuple:
                 anthropic_messages.append({"role": "assistant", "content": content})
                 anthropic_messages.append({"role": "assistant", "content": content})
 
 
         elif role == "tool":
         elif role == "tool":
-            tool_result_block = {
+            # Split tool result into text-only tool_result + sibling image blocks.
+            # Images nested inside tool_result.content are not reliably passed
+            # through by all proxies (e.g. OpenRouter).  Placing them as sibling
+            # content blocks in the same user message is more compatible.
+            converted = _to_anthropic_content(content)
+            text_parts: List[Dict[str, Any]] = []
+            image_parts: List[Dict[str, Any]] = []
+            if isinstance(converted, list):
+                for block in converted:
+                    if isinstance(block, dict) and block.get("type") == "image":
+                        image_parts.append(block)
+                    else:
+                        text_parts.append(block)
+            elif isinstance(converted, str):
+                text_parts = [{"type": "text", "text": converted}] if converted else []
+
+            # tool_result keeps only text content
+            tool_result_block: Dict[str, Any] = {
                 "type": "tool_result",
                 "type": "tool_result",
                 "tool_use_id": msg.get("tool_call_id", ""),
                 "tool_use_id": msg.get("tool_call_id", ""),
-                "content": _to_anthropic_content(content),
             }
             }
+            if len(text_parts) == 1 and text_parts[0].get("type") == "text":
+                tool_result_block["content"] = text_parts[0]["text"]
+            elif text_parts:
+                tool_result_block["content"] = text_parts
+            # (omit content key entirely when empty – Anthropic accepts this)
+
+            # Build the blocks to append: tool_result first, then any images
+            new_blocks = [tool_result_block] + image_parts
+
             # Merge consecutive tool results into one user message
             # Merge consecutive tool results into one user message
             if (anthropic_messages
             if (anthropic_messages
                     and anthropic_messages[-1].get("role") == "user"
                     and anthropic_messages[-1].get("role") == "user"
                     and isinstance(anthropic_messages[-1].get("content"), list)
                     and isinstance(anthropic_messages[-1].get("content"), list)
                     and anthropic_messages[-1]["content"]
                     and anthropic_messages[-1]["content"]
                     and anthropic_messages[-1]["content"][0].get("type") == "tool_result"):
                     and anthropic_messages[-1]["content"][0].get("type") == "tool_result"):
-                anthropic_messages[-1]["content"].append(tool_result_block)
+                anthropic_messages[-1]["content"].extend(new_blocks)
             else:
             else:
                 anthropic_messages.append({
                 anthropic_messages.append({
                     "role": "user",
                     "role": "user",
-                    "content": [tool_result_block],
+                    "content": new_blocks,
                 })
                 })
 
 
     return system_prompt, anthropic_messages
     return system_prompt, anthropic_messages
@@ -407,6 +432,17 @@ async def _openrouter_anthropic_call(
     # OpenAI 格式 → Anthropic 格式
     # OpenAI 格式 → Anthropic 格式
     system_prompt, anthropic_messages = _to_anthropic_messages(messages)
     system_prompt, anthropic_messages = _to_anthropic_messages(messages)
 
 
+    # Diagnostic: count image blocks in the payload
+    _img_count = 0
+    for _m in anthropic_messages:
+        if isinstance(_m.get("content"), list):
+            for _b in _m["content"]:
+                if isinstance(_b, dict) and _b.get("type") == "image":
+                    _img_count += 1
+    if _img_count:
+        logger.info("[OpenRouter/Anthropic] payload contains %d image block(s)", _img_count)
+        print(f"[OpenRouter/Anthropic] payload contains {_img_count} image block(s)")
+
     payload: Dict[str, Any] = {
     payload: Dict[str, Any] = {
         "model": resolved_model,
         "model": resolved_model,
         "messages": anthropic_messages,
         "messages": anthropic_messages,