hai 3 meses · b16f501a01
--- a/agent/core/runner.py
+++ b/agent/core/runner.py
@@ -864,6 +864,16 @@ class AgentRunner:
 
				 
			
 
				             if self.trace_store:
			
 
				                 await self.trace_store.add_message(assistant_msg)
			
 
				+                # 记录模型使用
			
 
				+                await self.trace_store.record_model_usage(
			
 
				+                    trace_id=trace_id,
			
 
				+                    sequence=sequence - 1,  # assistant_msg的sequence
			
 
				+                    role="assistant",
			
 
				+                    model=config.model,
			
 
				+                    prompt_tokens=prompt_tokens,
			
 
				+                    completion_tokens=completion_tokens,
			
 
				+                    cache_read_tokens=cache_read_tokens or 0,
			
 
				+                )
			
 
				 
			
 
				             yield assistant_msg
			
 
				             head_seq = sequence
			
@@ -927,12 +937,21 @@ class AgentRunner:
 
				                     )
			
 
				 
			
 
				                     # --- 支持多模态工具反馈 ---
			
 
				-                    # execute() 返回 dict{"text","images"} 或 str
			
 
				-                    if isinstance(tool_result, dict) and tool_result.get("images"):
			
 
				-                        tool_result_text = tool_result["text"]
			
 
				+                    # execute() 返回 dict{"text","images","tool_usage"} 或 str
			
 
				+                    # 统一为dict格式
			
 
				+                    if isinstance(tool_result, str):
			
 
				+                        tool_result = {"text": tool_result}
			
 
				+
			
 
				+                    tool_text = tool_result.get("text", str(tool_result))
			
 
				+                    tool_images = tool_result.get("images", [])
			
 
				+                    tool_usage = tool_result.get("tool_usage")  # 新增：提取tool_usage
			
 
				+
			
 
				+                    # 处理多模态消息
			
 
				+                    if tool_images:
			
 
				+                        tool_result_text = tool_text
			
 
				                         # 构建多模态消息格式
			
 
				-                        tool_content_for_llm = [{"type": "text", "text": tool_result_text}]
			
 
				-                        for img in tool_result["images"]:
			
 
				+                        tool_content_for_llm = [{"type": "text", "text": tool_text}]
			
 
				+                        for img in tool_images:
			
 
				                             if img.get("type") == "base64" and img.get("data"):
			
 
				                                 media_type = img.get("media_type", "image/png")
			
 
				                                 tool_content_for_llm.append({
			
@@ -944,8 +963,8 @@ class AgentRunner:
 
				                         img_count = len(tool_content_for_llm) - 1  # 减去 text 块
			
 
				                         print(f"[Runner] 多模态工具反馈: tool={tool_name}, images={img_count}, text_len={len(tool_result_text)}")
			
 
				                     else:
			
 
				-                        tool_result_text = str(tool_result)
			
 
				-                        tool_content_for_llm = tool_result_text
			
 
				+                        tool_result_text = tool_text
			
 
				+                        tool_content_for_llm = tool_text
			
 
				 
			
 
				                     tool_msg = Message.create(
			
 
				                         trace_id=trace_id,
			
@@ -960,10 +979,22 @@ class AgentRunner:
 
				 
			
 
				                     if self.trace_store:
			
 
				                         await self.trace_store.add_message(tool_msg)
			
 
				+                        # 记录工具的模型使用
			
 
				+                        if tool_usage:
			
 
				+                            await self.trace_store.record_model_usage(
			
 
				+                                trace_id=trace_id,
			
 
				+                                sequence=sequence,
			
 
				+                                role="tool",
			
 
				+                                tool_name=tool_name,
			
 
				+                                model=tool_usage.get("model"),
			
 
				+                                prompt_tokens=tool_usage.get("prompt_tokens", 0),
			
 
				+                                completion_tokens=tool_usage.get("completion_tokens", 0),
			
 
				+                                cache_read_tokens=tool_usage.get("cache_read_tokens", 0),
			
 
				+                            )
			
 
				                         # 截图单独存为同名 PNG 文件
			
 
				-                        if isinstance(tool_result, dict) and tool_result.get("images"):
			
 
				+                        if tool_images:
			
 
				                             import base64 as b64mod
			
 
				-                            for img in tool_result["images"]:
			
 
				+                            for img in tool_images:
			
 
				                                 if img.get("data"):
			
 
				                                     png_path = self.trace_store._get_messages_dir(trace_id) / f"{tool_msg.message_id}.png"
			
 
				                                     png_path.write_bytes(b64mod.b64decode(img["data"]))
			
--- a/agent/tools/builtin/__init__.py
+++ b/agent/tools/builtin/__init__.py
@@ -19,6 +19,7 @@ from agent.tools.builtin.experience import get_experience
 
				 from agent.tools.builtin.search import search_posts, get_search_suggestions
			
 
				 from agent.tools.builtin.sandbox import (sandbox_create_environment, sandbox_run_shell,
			
 
				                                          sandbox_rebuild_with_ports,sandbox_destroy_environment)
			
 
				+from agent.trace.goal_tool import goal
			
 
				 
			
 
				 # 导入浏览器工具以触发注册
			
 
				 import agent.tools.builtin.browser  # noqa: F401
			
@@ -45,4 +46,6 @@ __all__ = [
 
				     "sandbox_run_shell",
			
 
				     "sandbox_rebuild_with_ports",
			
 
				     "sandbox_destroy_environment",
			
 
				+    # 计划管理
			
 
				+    "goal",
			
 
				 ]
			
--- a/agent/tools/builtin/browser/baseClass.py
+++ b/agent/tools/builtin/browser/baseClass.py
@@ -223,7 +223,11 @@ async def create_container(url: str, account_name: str = "liuwenwu") -> Dict[str
 
				 
			
 
				 async def init_browser_session(
			
 
				     browser_type: str = "local",
			
 
				-    headless: bool = False,
			
 
				+    # TEMPORARY FIX (2026-03-02): 改为 True 以解决 CDP 连接时序问题
			
 
				+    # browser-use 在非 headless 模式下有时会在 Chrome 完全启动前尝试连接 CDP，
			
 
				+    # 导致 "JSONDecodeError: Expecting value" 错误
			
 
				+    # TODO: 之后改回 headless: bool = False，或在 browser-use 修复此问题后移除此注释
			
 
				+    headless: bool = True,  # 原值: False
			
 
				     url: Optional[str] = None,
			
 
				     profile_name: str = "default",
			
 
				     user_data_dir: Optional[str] = None,
			
@@ -287,6 +291,8 @@ async def init_browser_session(
 
				 
			
 
				     # 创建会话
			
 
				     _browser_session = BrowserSession(**session_params)
			
 
				+    # 添加短暂延迟，确保 Chrome CDP 端点完全就绪
			
 
				+    await asyncio.sleep(1)
			
 
				     await _browser_session.start()
			
 
				 
			
 
				     _browser_tools = Tools()
			
--- a/agent/tools/models.py
+++ b/agent/tools/models.py
@@ -41,6 +41,9 @@ class ToolResult:
 
				 	attachments: List[str] = field(default_factory=list)  # 文件路径列表
			
 
				 	images: List[Dict[str, Any]] = field(default_factory=list)  # 图片列表
			
 
				 
			
 
				+	# Token追踪（用于工具内部LLM调用）
			
 
				+	tool_usage: Optional[Dict[str, Any]] = None  # 格式：{"model": "...", "prompt_tokens": 100, "completion_tokens": 50, "cost": 0.0}
			
 
				+
			
 
				 	def to_llm_message(self, first_time: bool = True) -> str:
			
 
				 		"""
			
 
				 		转换为给 LLM 的消息
			
--- a/agent/tools/registry.py
+++ b/agent/tools/registry.py
@@ -241,10 +241,20 @@ class ToolRegistry:
 
				 			# 处理 ToolResult 对象
			
 
				 			from agent.tools.models import ToolResult
			
 
				 			if isinstance(result, ToolResult):
			
 
				-				# 有图片时返回 dict 以便 runner 构建多模态消息
			
 
				+				ret = {"text": result.to_llm_message()}
			
 
				+
			
 
				+				# 保留images
			
 
				 				if result.images:
			
 
				-					return {"text": result.to_llm_message(), "images": result.images}
			
 
				-				return result.to_llm_message()
			
 
				+					ret["images"] = result.images
			
 
				+
			
 
				+				# 保留tool_usage
			
 
				+				if result.tool_usage:
			
 
				+					ret["tool_usage"] = result.tool_usage
			
 
				+
			
 
				+				# 向后兼容：只有text时返回字符串
			
 
				+				if len(ret) == 1:
			
 
				+					return ret["text"]
			
 
				+				return ret
			
 
				 
			
 
				 			return json.dumps(result, ensure_ascii=False, indent=2)
			
 
				 
			
--- a/agent/trace/store.py
+++ b/agent/trace/store.py
@@ -61,6 +61,10 @@ class FileSystemTraceStore:
 
				         """获取 events.jsonl 文件路径"""
			
 
				         return self._get_trace_dir(trace_id) / "events.jsonl"
			
 
				 
			
 
				+    def _get_model_usage_file(self, trace_id: str) -> Path:
			
 
				+        """获取 model_usage.json 文件路径"""
			
 
				+        return self._get_trace_dir(trace_id) / "model_usage.json"
			
 
				+
			
 
				     # ===== Trace 操作 =====
			
 
				 
			
 
				     async def create_trace(self, trace: Trace) -> str:
			
@@ -566,6 +570,104 @@ class FileSystemTraceStore:
 
				 
			
 
				         return abandoned_ids
			
 
				 
			
 
				+    # ===== 模型使用追踪 =====
			
 
				+
			
 
				+    async def record_model_usage(
			
 
				+        self,
			
 
				+        trace_id: str,
			
 
				+        sequence: int,
			
 
				+        role: str,
			
 
				+        model: str,
			
 
				+        prompt_tokens: int,
			
 
				+        completion_tokens: int,
			
 
				+        cache_read_tokens: int = 0,
			
 
				+        tool_name: Optional[str] = None,
			
 
				+    ) -> None:
			
 
				+        """
			
 
				+        记录模型使用情况到 model_usage.json
			
 
				+
			
 
				+        Args:
			
 
				+            trace_id: Trace ID
			
 
				+            sequence: 消息序号
			
 
				+            role: 角色（assistant/tool）
			
 
				+            model: 模型名称
			
 
				+            prompt_tokens: 输入tokens
			
 
				+            completion_tokens: 输出tokens
			
 
				+            cache_read_tokens: 缓存读取tokens
			
 
				+            tool_name: 工具名称（role=tool时）
			
 
				+        """
			
 
				+        usage_file = self._get_model_usage_file(trace_id)
			
 
				+
			
 
				+        # 读取现有数据
			
 
				+        if usage_file.exists():
			
 
				+            data = json.loads(usage_file.read_text())
			
 
				+        else:
			
 
				+            data = {
			
 
				+                "summary": {
			
 
				+                    "total_models": 0,
			
 
				+                    "total_tokens": 0,
			
 
				+                    "total_cache_read_tokens": 0,
			
 
				+                    "agent_tokens": 0,
			
 
				+                    "tool_tokens": 0,
			
 
				+                },
			
 
				+                "models": [],
			
 
				+                "timeline": [],
			
 
				+            }
			
 
				+
			
 
				+        # 更新summary
			
 
				+        total_tokens = prompt_tokens + completion_tokens
			
 
				+        data["summary"]["total_tokens"] += total_tokens
			
 
				+        data["summary"]["total_cache_read_tokens"] += cache_read_tokens
			
 
				+
			
 
				+        if role == "assistant":
			
 
				+            data["summary"]["agent_tokens"] += total_tokens
			
 
				+            source = "agent"
			
 
				+        else:
			
 
				+            data["summary"]["tool_tokens"] += total_tokens
			
 
				+            source = f"tool:{tool_name}" if tool_name else "tool"
			
 
				+
			
 
				+        # 更新models列表
			
 
				+        model_entry = None
			
 
				+        for m in data["models"]:
			
 
				+            if m["model"] == model and m["source"] == source:
			
 
				+                model_entry = m
			
 
				+                break
			
 
				+
			
 
				+        if model_entry:
			
 
				+            model_entry["prompt_tokens"] += prompt_tokens
			
 
				+            model_entry["completion_tokens"] += completion_tokens
			
 
				+            model_entry["total_tokens"] += total_tokens
			
 
				+            model_entry["cache_read_tokens"] += cache_read_tokens
			
 
				+            model_entry["call_count"] += 1
			
 
				+        else:
			
 
				+            data["models"].append({
			
 
				+                "model": model,
			
 
				+                "source": source,
			
 
				+                "prompt_tokens": prompt_tokens,
			
 
				+                "completion_tokens": completion_tokens,
			
 
				+                "total_tokens": total_tokens,
			
 
				+                "cache_read_tokens": cache_read_tokens,
			
 
				+                "call_count": 1,
			
 
				+            })
			
 
				+            data["summary"]["total_models"] = len(data["models"])
			
 
				+
			
 
				+        # 添加到timeline
			
 
				+        timeline_entry = {
			
 
				+            "sequence": sequence,
			
 
				+            "role": role,
			
 
				+            "model": model,
			
 
				+            "prompt_tokens": prompt_tokens,
			
 
				+            "completion_tokens": completion_tokens,
			
 
				+        }
			
 
				+        if cache_read_tokens > 0:
			
 
				+            timeline_entry["cache_read_tokens"] = cache_read_tokens
			
 
				+        if tool_name:
			
 
				+            timeline_entry["tool_name"] = tool_name
			
 
				+        data["timeline"].append(timeline_entry)
			
 
				+
			
 
				+        # 写回文件
			
 
				+        usage_file.write_text(json.dumps(data, indent=2, ensure_ascii=False))
			
 
				+
			
 
				     # ===== 事件流操作（用于 WebSocket 断线续传）=====
			
 
				 
			
 
				     async def get_events(
			
--- a/examples/how/tool/nanobanana.py
+++ b/examples/how/tool/nanobanana.py
@@ -1,9 +1,8 @@
 
				 """
			
 
				-NanoBanana Tool - 图像特征提取与图像生成
			
 
				+NanoBanana Tool - 图像生成
			
 
				 
			
 
				-该工具可以提取图片中的特征，也可以根据描述生成图片。
			
 
				-支持通过 OpenRouter 调用多模态模型，提取结构化的图像特征并保存为 JSON，
			
 
				-或基于输入图像生成新的图像。
			
 
				+通用图像生成工具，可以接受自然语言描述和/或图像输入，生成新的图像。
			
 
				+支持通过 OpenRouter 调用 Gemini 2.5 Flash Image 模型。
			
 
				 """
			
 
				 
			
 
				 import base64
			
@@ -22,23 +21,10 @@ from agent.tools import tool, ToolResult
 
				 OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
			
 
				 DEFAULT_TIMEOUT = 120.0
			
 
				 
			
 
				-DEFAULT_EXTRACTION_PROMPT = (
			
 
				-    "请从这张图像中提取跨场景相对稳定、可复用的视觉不变特征。"
			
 
				-    "输出严格 JSON，字段包含：identity_features、pose_features、appearance_features、"
			
 
				-    "material_features、style_features、uncertainty、notes。"
			
 
				-    "每个字段给出简洁要点，避免臆测。"
			
 
				-)
			
 
				-
			
 
				-DEFAULT_IMAGE_PROMPT = (
			
 
				-    "基于输入图像生成一张保留主体身份与关键视觉特征的新图像。"
			
 
				-    "保持人物核心特征一致，同时提升清晰度与可用性。"
			
 
				-)
			
 
				+DEFAULT_IMAGE_PROMPT = "根据输入生成图像。"
			
 
				 
			
 
				 DEFAULT_IMAGE_MODEL_CANDIDATES = [
			
 
				     "google/gemini-2.5-flash-image",
			
 
				-    # "google/gemini-3-pro-image-preview",
			
 
				-    # "black-forest-labs/flux.2-flex",
			
 
				-    # "black-forest-labs/flux.2-pro",
			
 
				 ]
			
 
				 
			
 
				 
			
@@ -214,84 +200,76 @@ def _normalize_model_id(model_id: str) -> str:
 
				     return m
			
 
				 
			
 
				 
			
 
				-@tool(description="可以提取图片中的特征，也可以根据描述生成图片")
			
 
				+@tool(description="通用图像生成工具，可以接受自然语言描述和/或图像输入，生成新的图像")
			
 
				 async def nanobanana(
			
 
				     image_path: str = "",
			
 
				     image_paths: Optional[List[str]] = None,
			
 
				-    output_file: Optional[str] = None,
			
 
				     prompt: Optional[str] = None,
			
 
				     model: Optional[str] = None,
			
 
				     max_tokens: int = 1200,
			
 
				-    generate_image: bool = False,
			
 
				     image_output_path: Optional[str] = None,
			
 
				 ) -> ToolResult:
			
 
				     """
			
 
				-    可以提取图片中的特征，也可以根据描述生成图片。
			
 
				+    通用图像生成工具，可以接受自然语言描述和/或图像输入，生成新的图像。
			
 
				 
			
 
				     Args:
			
 
				         image_path: 输入图片路径（单图模式，可选）
			
 
				-        image_paths: 输入图片路径列表（多图整体模式，可选）
			
 
				-        output_file: 输出 JSON 文件路径（可选，用于特征提取模式）
			
 
				-        prompt: 自定义提取指令或生成描述（可选）
			
 
				-        model: OpenRouter 模型名（可选，默认读取 NANOBANANA_MODEL 或使用 Gemini 视觉模型）
			
 
				+        image_paths: 输入图片路径列表（多图模式，可选）
			
 
				+        prompt: 自定义生成描述（可选，默认使用通用prompt）
			
 
				+        model: OpenRouter 模型名（可选，默认使用 gemini-2.5-flash-image）
			
 
				         max_tokens: 最大输出 token
			
 
				-        generate_image: 是否生成图片（False=提取特征，True=生成图片）
			
 
				-        image_output_path: 生成图片保存路径（generate_image=True 时可选）
			
 
				+        image_output_path: 生成图片保存路径（可选）
			
 
				 
			
 
				     Returns:
			
 
				-        ToolResult: 包含结构化特征和输出文件路径，或生成的图片路径
			
 
				+        ToolResult: 包含生成的图片路径
			
 
				     """
			
 
				     raw_paths: List[str] = []
			
 
				     if image_paths:
			
 
				         raw_paths.extend(image_paths)
			
 
				     if image_path:
			
 
				         raw_paths.append(image_path)
			
 
				-    if not raw_paths:
			
 
				-        return ToolResult(
			
 
				-            title="NanoBanana 提取失败",
			
 
				-            output="",
			
 
				-            error="未提供输入图片，请传入 image_path 或 image_paths",
			
 
				-        )
			
 
				 
			
 
				-    # 去重并检查路径
			
 
				-    unique_raw: List[str] = []
			
 
				-    seen = set()
			
 
				-    for p in raw_paths:
			
 
				-        if p and p not in seen:
			
 
				-            unique_raw.append(p)
			
 
				-            seen.add(p)
			
 
				-
			
 
				-    input_paths: List[Path] = [Path(p) for p in unique_raw]
			
 
				-    invalid = [str(p) for p in input_paths if (not p.exists() or not p.is_file())]
			
 
				-    if invalid:
			
 
				-        return ToolResult(
			
 
				-            title="NanoBanana 提取失败",
			
 
				-            output="",
			
 
				-            error=f"以下图片不存在或不可读: {invalid}",
			
 
				-        )
			
 
				+    # 图像输入是可选的，但如果提供了就需要验证
			
 
				+    input_paths: List[Path] = []
			
 
				+    if raw_paths:
			
 
				+        # 去重并检查路径
			
 
				+        unique_raw: List[str] = []
			
 
				+        seen = set()
			
 
				+        for p in raw_paths:
			
 
				+            if p and p not in seen:
			
 
				+                unique_raw.append(p)
			
 
				+                seen.add(p)
			
 
				+
			
 
				+        input_paths = [Path(p) for p in unique_raw]
			
 
				+        invalid = [str(p) for p in input_paths if (not p.exists() or not p.is_file())]
			
 
				+        if invalid:
			
 
				+            return ToolResult(
			
 
				+                title="NanoBanana 生成失败",
			
 
				+                output="",
			
 
				+                error=f"以下图片不存在或不可读: {invalid}",
			
 
				+            )
			
 
				 
			
 
				     api_key = _resolve_api_key()
			
 
				     if not api_key:
			
 
				         return ToolResult(
			
 
				-            title="NanoBanana 提取失败",
			
 
				+            title="NanoBanana 生成失败",
			
 
				             output="",
			
 
				             error="未找到 OpenRouter API Key，请设置 OPENROUTER_API_KEY 或 OPEN_ROUTER_API_KEY",
			
 
				         )
			
 
				 
			
 
				-    if generate_image:
			
 
				-        user_prompt = prompt or DEFAULT_IMAGE_PROMPT
			
 
				-    else:
			
 
				-        chosen_model = model or os.getenv("NANOBANANA_MODEL") or "google/gemini-2.5-flash"
			
 
				-        user_prompt = prompt or DEFAULT_EXTRACTION_PROMPT
			
 
				+    user_prompt = prompt or DEFAULT_IMAGE_PROMPT
			
 
				 
			
 
				-    try:
			
 
				-        image_data_urls = [_image_to_data_url(p) for p in input_paths]
			
 
				-    except Exception as e:
			
 
				-        return ToolResult(
			
 
				-            title="NanoBanana 提取失败",
			
 
				-            output="",
			
 
				-            error=f"图片编码失败: {e}",
			
 
				-        )
			
 
				+    # 编码图像（如果有）
			
 
				+    image_data_urls = []
			
 
				+    if input_paths:
			
 
				+        try:
			
 
				+            image_data_urls = [_image_to_data_url(p) for p in input_paths]
			
 
				+        except Exception as e:
			
 
				+            return ToolResult(
			
 
				+                title="NanoBanana 生成失败",
			
 
				+                output="",
			
 
				+                error=f"图片编码失败: {e}",
			
 
				+            )
			
 
				 
			
 
				     user_content: List[Dict[str, Any]] = [{"type": "text", "text": user_prompt}]
			
 
				     for u in image_data_urls:
			
@@ -301,11 +279,7 @@ async def nanobanana(
 
				         "messages": [
			
 
				             {
			
 
				                 "role": "system",
			
 
				-                "content": (
			
 
				-                    "你是视觉助手。"
			
 
				-                    "当任务为特征提取时输出 JSON 对象，不要输出 markdown。"
			
 
				-                    "当任务为图像生成时请返回图像。"
			
 
				-                ),
			
 
				+                "content": "你是图像生成助手。请根据用户的描述和/或输入图像生成新的图像。",
			
 
				             },
			
 
				             {
			
 
				                 "role": "user",
			
@@ -314,9 +288,8 @@ async def nanobanana(
 
				         ],
			
 
				         "temperature": 0.2,
			
 
				         "max_tokens": max_tokens,
			
 
				+        "modalities": ["image", "text"],
			
 
				     }
			
 
				-    if generate_image:
			
 
				-        payload["modalities"] = ["image", "text"]
			
 
				 
			
 
				     headers = {
			
 
				         "Authorization": f"Bearer {api_key}",
			
@@ -327,33 +300,28 @@ async def nanobanana(
 
				 
			
 
				     endpoint = f"{OPENROUTER_BASE_URL}/chat/completions"
			
 
				 
			
 
				-    # 图像生成模式：自动尝试多个可用模型，减少 404/invalid model 影响
			
 
				-    if generate_image:
			
 
				-        candidates: List[str] = []
			
 
				-        if model:
			
 
				-            candidates.append(_normalize_model_id(model))
			
 
				-        if env_model := os.getenv("NANOBANANA_IMAGE_MODEL"):
			
 
				-            candidates.append(_normalize_model_id(env_model))
			
 
				-        candidates.extend([_normalize_model_id(x) for x in DEFAULT_IMAGE_MODEL_CANDIDATES])
			
 
				-        # 去重并保持顺序
			
 
				-        dedup: List[str] = []
			
 
				-        seen = set()
			
 
				-        for m in candidates:
			
 
				-            if m and m not in seen:
			
 
				-                dedup.append(m)
			
 
				-                seen.add(m)
			
 
				-        candidates = dedup
			
 
				-    else:
			
 
				-        candidates = [chosen_model]
			
 
				+    # 自动尝试多个可用模型，减少 404/invalid model 影响
			
 
				+    candidates: List[str] = []
			
 
				+    if model:
			
 
				+        candidates.append(_normalize_model_id(model))
			
 
				+    if env_model := os.getenv("NANOBANANA_IMAGE_MODEL"):
			
 
				+        candidates.append(_normalize_model_id(env_model))
			
 
				+    candidates.extend([_normalize_model_id(x) for x in DEFAULT_IMAGE_MODEL_CANDIDATES])
			
 
				+    # 去重并保持顺序
			
 
				+    dedup: List[str] = []
			
 
				+    seen = set()
			
 
				+    for m in candidates:
			
 
				+        if m and m not in seen:
			
 
				+            dedup.append(m)
			
 
				+            seen.add(m)
			
 
				+    candidates = dedup
			
 
				 
			
 
				     data: Optional[Dict[str, Any]] = None
			
 
				     used_model: Optional[str] = None
			
 
				     errors: List[Dict[str, Any]] = []
			
 
				 
			
 
				     for cand in candidates:
			
 
				-        modality_attempts: List[Optional[List[str]]] = [None]
			
 
				-        if generate_image:
			
 
				-            modality_attempts = [["image", "text"], ["image"], None]
			
 
				+        modality_attempts: List[Optional[List[str]]] = [["image", "text"], ["image"], None]
			
 
				 
			
 
				         for mods in modality_attempts:
			
 
				             trial_payload = dict(payload)
			
@@ -392,9 +360,8 @@ async def nanobanana(
 
				             break
			
 
				 
			
 
				     if data is None:
			
 
				-        title = "NanoBanana 生成失败" if generate_image else "NanoBanana 提取失败"
			
 
				         return ToolResult(
			
 
				-            title=title,
			
 
				+            title="NanoBanana 生成失败",
			
 
				             output=json.dumps({"attempted_models": candidates, "errors": errors}, ensure_ascii=False, indent=2),
			
 
				             long_term_memory="All candidate models failed for this request",
			
 
				             metadata={"attempted_models": candidates, "errors": errors},
			
@@ -405,168 +372,115 @@ async def nanobanana(
 
				     choices = data.get("choices") or []
			
 
				     message = choices[0].get("message", {}) if choices else {}
			
 
				 
			
 
				-    # 图像生成分支
			
 
				-    if generate_image:
			
 
				-        refs = _extract_image_refs(choices[0] if choices else {}, message)
			
 
				-        if not refs:
			
 
				-            content = message.get("content")
			
 
				-            preview = ""
			
 
				-            if isinstance(content, str):
			
 
				-                preview = content[:500]
			
 
				-            elif isinstance(content, list):
			
 
				-                preview = json.dumps(content[:3], ensure_ascii=False)[:500]
			
 
				+    # 提取生成的图像
			
 
				+    refs = _extract_image_refs(choices[0] if choices else {}, message)
			
 
				+    if not refs:
			
 
				+        content = message.get("content")
			
 
				+        preview = ""
			
 
				+        if isinstance(content, str):
			
 
				+            preview = content[:500]
			
 
				+        elif isinstance(content, list):
			
 
				+            preview = json.dumps(content[:3], ensure_ascii=False)[:500]
			
 
				 
			
 
				-            return ToolResult(
			
 
				-                title="NanoBanana 生成失败",
			
 
				-                output=json.dumps(data, ensure_ascii=False, indent=2),
			
 
				-                error="模型未返回可解析图片（未在 message.images/choice.images/content 中发现图片）",
			
 
				-                metadata={
			
 
				-                    "model": chosen_model,
			
 
				-                    "choice_keys": list((choices[0] if choices else {}).keys()),
			
 
				-                    "message_keys": list(message.keys()) if isinstance(message, dict) else [],
			
 
				-                    "content_preview": preview,
			
 
				-                },
			
 
				-            )
			
 
				+        return ToolResult(
			
 
				+            title="NanoBanana 生成失败",
			
 
				+            output=json.dumps(data, ensure_ascii=False, indent=2),
			
 
				+            error="模型未返回可解析图片（未在 message.images/choice.images/content 中发现图片）",
			
 
				+            metadata={
			
 
				+                "model": chosen_model,
			
 
				+                "choice_keys": list((choices[0] if choices else {}).keys()),
			
 
				+                "message_keys": list(message.keys()) if isinstance(message, dict) else [],
			
 
				+                "content_preview": preview,
			
 
				+            },
			
 
				+        )
			
 
				 
			
 
				-        output_paths: List[str] = []
			
 
				-        if image_output_path:
			
 
				-            base_path = Path(image_output_path)
			
 
				+    output_paths: List[str] = []
			
 
				+    if image_output_path:
			
 
				+        base_path = Path(image_output_path)
			
 
				+    else:
			
 
				+        if len(input_paths) > 1:
			
 
				+            base_path = input_paths[0].parent / "set_generated.png"
			
 
				         else:
			
 
				-            if len(input_paths) > 1:
			
 
				-                base_path = input_paths[0].parent / "set_generated.png"
			
 
				-            else:
			
 
				-                base_path = input_paths[0].parent / f"{input_paths[0].stem}_generated.png"
			
 
				-        base_path.parent.mkdir(parents=True, exist_ok=True)
			
 
				-
			
 
				-        for idx, ref in enumerate(refs):
			
 
				-            kind = ref.get("kind", "")
			
 
				-            mime_type = "image/png"
			
 
				-            raw_bytes: Optional[bytes] = None
			
 
				-
			
 
				-            if kind == "data_url":
			
 
				-                m = re.match(r"^data:([^;]+);base64,(.+)$", ref.get("value", ""), flags=re.DOTALL)
			
 
				-                if not m:
			
 
				-                    continue
			
 
				-                mime_type = m.group(1)
			
 
				-                raw_bytes = base64.b64decode(m.group(2))
			
 
				-            elif kind == "base64":
			
 
				-                mime_type = ref.get("mime_type", "image/png")
			
 
				-                raw_bytes = base64.b64decode(ref.get("value", ""))
			
 
				-            elif kind == "url":
			
 
				-                url = ref.get("value", "")
			
 
				-                try:
			
 
				-                    with httpx.Client(timeout=DEFAULT_TIMEOUT) as client:
			
 
				-                        r = client.get(url)
			
 
				-                        r.raise_for_status()
			
 
				-                        raw_bytes = r.content
			
 
				-                        mime_type = r.headers.get("content-type", "image/png").split(";")[0]
			
 
				-                except Exception:
			
 
				-                    continue
			
 
				-            else:
			
 
				-                continue
			
 
				+            base_path = input_paths[0].parent / f"{input_paths[0].stem}_generated.png"
			
 
				+    base_path.parent.mkdir(parents=True, exist_ok=True)
			
 
				 
			
 
				-            if not raw_bytes:
			
 
				-                continue
			
 
				+    for idx, ref in enumerate(refs):
			
 
				+        kind = ref.get("kind", "")
			
 
				+        mime_type = "image/png"
			
 
				+        raw_bytes: Optional[bytes] = None
			
 
				 
			
 
				-            ext = _mime_to_ext(mime_type)
			
 
				-            if len(refs) == 1:
			
 
				-                target = base_path
			
 
				-                if target.suffix.lower() not in [".png", ".jpg", ".jpeg", ".webp"]:
			
 
				-                    target = target.with_suffix(ext)
			
 
				-            else:
			
 
				-                stem = base_path.stem
			
 
				-                target = base_path.with_name(f"{stem}_{idx+1}{ext}")
			
 
				+        if kind == "data_url":
			
 
				+            m = re.match(r"^data:([^;]+);base64,(.+)$", ref.get("value", ""), flags=re.DOTALL)
			
 
				+            if not m:
			
 
				+                continue
			
 
				+            mime_type = m.group(1)
			
 
				+            raw_bytes = base64.b64decode(m.group(2))
			
 
				+        elif kind == "base64":
			
 
				+            mime_type = ref.get("mime_type", "image/png")
			
 
				+            raw_bytes = base64.b64decode(ref.get("value", ""))
			
 
				+        elif kind == "url":
			
 
				+            url = ref.get("value", "")
			
 
				             try:
			
 
				-                target.write_bytes(raw_bytes)
			
 
				-                output_paths.append(str(target))
			
 
				-            except Exception as e:
			
 
				-                return ToolResult(
			
 
				-                    title="NanoBanana 生成失败",
			
 
				-                    output="",
			
 
				-                    error=f"写入生成图片失败: {e}",
			
 
				-                    metadata={"model": chosen_model},
			
 
				-                )
			
 
				-
			
 
				-        if not output_paths:
			
 
				+                with httpx.Client(timeout=DEFAULT_TIMEOUT) as client:
			
 
				+                    r = client.get(url)
			
 
				+                    r.raise_for_status()
			
 
				+                    raw_bytes = r.content
			
 
				+                    mime_type = r.headers.get("content-type", "image/png").split(";")[0]
			
 
				+            except Exception:
			
 
				+                continue
			
 
				+        else:
			
 
				+            continue
			
 
				+
			
 
				+        if not raw_bytes:
			
 
				+            continue
			
 
				+
			
 
				+        ext = _mime_to_ext(mime_type)
			
 
				+        if len(refs) == 1:
			
 
				+            target = base_path
			
 
				+            if target.suffix.lower() not in [".png", ".jpg", ".jpeg", ".webp"]:
			
 
				+                target = target.with_suffix(ext)
			
 
				+        else:
			
 
				+            stem = base_path.stem
			
 
				+            target = base_path.with_name(f"{stem}_{idx+1}{ext}")
			
 
				+        try:
			
 
				+            target.write_bytes(raw_bytes)
			
 
				+            output_paths.append(str(target))
			
 
				+        except Exception as e:
			
 
				             return ToolResult(
			
 
				                 title="NanoBanana 生成失败",
			
 
				-                output=json.dumps(data, ensure_ascii=False, indent=2),
			
 
				-                error="检测到图片引用但写入失败（可能是无效 base64 或 URL 不可访问）",
			
 
				-                metadata={"model": chosen_model, "ref_count": len(refs)},
			
 
				+                output="",
			
 
				+                error=f"写入生成图片失败: {e}",
			
 
				+                metadata={"model": chosen_model},
			
 
				             )
			
 
				 
			
 
				-        usage = data.get("usage", {})
			
 
				-        prompt_tokens = usage.get("prompt_tokens") or usage.get("input_tokens", 0)
			
 
				-        completion_tokens = usage.get("completion_tokens") or usage.get("output_tokens", 0)
			
 
				-        summary = {
			
 
				-            "model": chosen_model,
			
 
				-            "input_images": [str(p) for p in input_paths],
			
 
				-            "input_count": len(input_paths),
			
 
				-            "generated_images": output_paths,
			
 
				-            "prompt_tokens": prompt_tokens,
			
 
				-            "completion_tokens": completion_tokens,
			
 
				-        }
			
 
				-        return ToolResult(
			
 
				-            title="NanoBanana 图片生成完成",
			
 
				-            output=json.dumps({"summary": summary}, ensure_ascii=False, indent=2),
			
 
				-            long_term_memory=f"Generated {len(output_paths)} image(s) from {len(input_paths)} input image(s) using {chosen_model}",
			
 
				-            attachments=output_paths,
			
 
				-            metadata=summary,
			
 
				-        )
			
 
				-
			
 
				-    content = message.get("content") or ""
			
 
				-    if not content:
			
 
				+    if not output_paths:
			
 
				         return ToolResult(
			
 
				-            title="NanoBanana 提取失败",
			
 
				+            title="NanoBanana 生成失败",
			
 
				             output=json.dumps(data, ensure_ascii=False, indent=2),
			
 
				-            error="模型未返回内容",
			
 
				+            error="检测到图片引用但写入失败（可能是无效 base64 或 URL 不可访问）",
			
 
				+            metadata={"model": chosen_model, "ref_count": len(refs)},
			
 
				         )
			
 
				 
			
 
				-    try:
			
 
				-        parsed = _safe_json_parse(content)
			
 
				-    except Exception as e:
			
 
				-        return ToolResult(
			
 
				-            title="NanoBanana 提取失败",
			
 
				-            output=content,
			
 
				-            error=f"模型返回非 JSON 内容，解析失败: {e}",
			
 
				-            metadata={"model": chosen_model},
			
 
				-        )
			
 
				-
			
 
				-    if output_file:
			
 
				-        out_path = Path(output_file)
			
 
				-    else:
			
 
				-        if len(input_paths) > 1:
			
 
				-            out_path = input_paths[0].parent / "set_invariant_features.json"
			
 
				-        else:
			
 
				-            out_path = input_paths[0].parent / f"{input_paths[0].stem}_invariant_features.json"
			
 
				-
			
 
				-    out_path.parent.mkdir(parents=True, exist_ok=True)
			
 
				-    out_path.write_text(json.dumps(parsed, ensure_ascii=False, indent=2), encoding="utf-8")
			
 
				-
			
 
				     usage = data.get("usage", {})
			
 
				     prompt_tokens = usage.get("prompt_tokens") or usage.get("input_tokens", 0)
			
 
				     completion_tokens = usage.get("completion_tokens") or usage.get("output_tokens", 0)
			
 
				-
			
 
				     summary = {
			
 
				         "model": chosen_model,
			
 
				         "input_images": [str(p) for p in input_paths],
			
 
				         "input_count": len(input_paths),
			
 
				-        "output_file": str(out_path),
			
 
				+        "generated_images": output_paths,
			
 
				         "prompt_tokens": prompt_tokens,
			
 
				         "completion_tokens": completion_tokens,
			
 
				     }
			
 
				-
			
 
				     return ToolResult(
			
 
				-        title="NanoBanana 不变特征提取完成",
			
 
				-        output=json.dumps(
			
 
				-            {
			
 
				-                "summary": summary,
			
 
				-                "features": parsed,
			
 
				-            },
			
 
				-            ensure_ascii=False,
			
 
				-            indent=2,
			
 
				-        ),
			
 
				-        long_term_memory=f"Extracted invariant features from {len(input_paths)} input image(s) using {chosen_model}",
			
 
				-        attachments=[str(out_path)],
			
 
				+        title="NanoBanana 图片生成完成",
			
 
				+        output=json.dumps({"summary": summary}, ensure_ascii=False, indent=2),
			
 
				+        long_term_memory=f"Generated {len(output_paths)} image(s) from {len(input_paths)} input image(s) using {chosen_model}",
			
 
				+        attachments=output_paths,
			
 
				         metadata=summary,
			
 
				+        tool_usage={
			
 
				+            "model": chosen_model,
			
 
				+            "prompt_tokens": prompt_tokens,
			
 
				+            "completion_tokens": completion_tokens,
			
 
				+        }
			
 
				     )