kevin.yang 4 giorni fa
parent
commit
e4dd2bcf6d

+ 80 - 0
data/registry.json

@@ -475,6 +475,86 @@
       "group_ids": [
         "ji_meng_task_lifecycle"
       ]
+    },
+    {
+      "tool_id": "nano_banana",
+      "name": "Nano Banana(Gemini 图模)",
+      "tool_slug_ids": [],
+      "category": "cv",
+      "description": "通过 Google Gemini 原生图模 REST generateContent 文生图/图生图。需 GEMINI_API_KEY;可选 gemini-2.5-flash-image、gemini-3.1-flash-image-preview 等。详见 https://ai.google.dev/gemini-api/docs/image-generation?hl=zh-cn#rest",
+      "input_schema": {
+        "type": "object",
+        "properties": {
+          "prompt": {
+            "type": "string",
+            "description": "提示词(文生图或与参考图配合做编辑)"
+          },
+          "model": {
+            "type": "string",
+            "description": "模型 ID;省略则使用环境变量 GEMINI_IMAGE_MODEL,默认 gemini-2.5-flash-image。示例:gemini-3.1-flash-image-preview"
+          },
+          "aspect_ratio": {
+            "type": "string",
+            "description": "输出宽高比,如 1:1、16:9(对应 generationConfig.imageConfig.aspectRatio)"
+          },
+          "image_size": {
+            "type": "string",
+            "description": "Gemini 3.x 输出规格:512、1K、2K、4K(须大写 K,见官方文档)"
+          },
+          "response_modalities": {
+            "type": "array",
+            "items": {
+              "type": "string"
+            },
+            "description": "如 [\"TEXT\",\"IMAGE\"] 或 [\"IMAGE\"];省略则由 API 默认"
+          },
+          "images": {
+            "type": "array",
+            "description": "可选参考图,每项为 {mime_type, data},data 为 Base64 或 data URL",
+            "items": {
+              "type": "object",
+              "properties": {
+                "mime_type": {
+                  "type": "string"
+                },
+                "data": {
+                  "type": "string"
+                }
+              },
+              "required": [
+                "data"
+              ]
+            }
+          }
+        },
+        "required": [
+          "prompt"
+        ]
+      },
+      "output_schema": {
+        "type": "object",
+        "properties": {
+          "images": {
+            "type": "array",
+            "items": {
+              "type": "string"
+            },
+            "description": "data:mime;base64,... 列表"
+          },
+          "model": {
+            "type": "string",
+            "description": "实际调用的模型 ID"
+          },
+          "text": {
+            "type": "string",
+            "description": "若返回文本部分则在此汇总"
+          }
+        }
+      },
+      "stream_support": false,
+      "status": "active",
+      "backend_runtime": "local",
+      "group_ids": []
     }
   ],
   "version": "2.0"

+ 14 - 0
data/sources.json

@@ -97,6 +97,20 @@
         "http_method": "POST",
         "internal_port": 0
       }
+    ],
+    "nano_banana": [
+      {
+        "type": "local",
+        "host_dir": "tools/local/nano_banana",
+        "container_id": "",
+        "image": "",
+        "hub_url": "",
+        "hub_tool_path": "",
+        "hub_api_key": "",
+        "endpoint_path": "/generate",
+        "http_method": "POST",
+        "internal_port": 0
+      }
     ]
   }
 }

+ 1 - 0
pyproject.toml

@@ -42,4 +42,5 @@ members = [
     "tools/local/task_0cd69d84",
     "tools/local/runcomfy_stop_env",
     "tools/local/ji_meng",
+    "tools/local/nano_banana",
 ]

+ 144 - 0
tests/test_nano_banana.py

@@ -0,0 +1,144 @@
+"""测试 nano_banana — Router 调用 Gemini 图模(HTTP generateContent)
+
+前提:
+    - data/registry.json + data/sources.json 已注册 tool_id=nano_banana
+    - tools/local/nano_banana 已提供 POST /generate,且 .env 中配置 GEMINI_API_KEY
+
+用法:
+    1. uv run python -m tool_agent
+    2. uv run python tests/test_nano_banana.py
+
+模型切换(任选其一):
+    - 不传 NANO_BANANA_MODEL:请求体不含 model,由工具侧默认(如 gemini-2.5-flash-image /
+      环境变量 GEMINI_IMAGE_MODEL)
+    - 显式切换预览图模:
+        NANO_BANANA_MODEL=gemini-3.1-flash-image-preview uv run python tests/test_nano_banana.py
+
+环境变量:
+    TOOL_AGENT_ROUTER_URL   默认 http://127.0.0.1:8001
+    NANO_BANANA_TOOL_ID     默认 nano_banana
+    NANO_BANANA_TEST_PROMPT 覆盖默认短提示词
+    NANO_BANANA_MODEL       非空时作为 params["model"] 传给 /run_tool
+"""
+
+import io
+import os
+import sys
+from typing import Any
+
+if sys.platform == "win32":
+    _out = sys.stdout
+    if isinstance(_out, io.TextIOWrapper):
+        _out.reconfigure(encoding="utf-8")
+
+import httpx
+
+ROUTER_URL = os.environ.get("TOOL_AGENT_ROUTER_URL", "http://127.0.0.1:8001")
+TOOL_ID = os.environ.get("NANO_BANANA_TOOL_ID", "nano_banana")
+NANO_BANANA_MODEL = os.environ.get("NANO_BANANA_MODEL", "").strip()
+TEST_PROMPT = os.environ.get(
+    "NANO_BANANA_TEST_PROMPT",
+    "A minimal flat icon of a yellow banana on white background, no text",
+)
+
+
+def run_tool(params: dict[str, Any], timeout: float = 180.0) -> dict[str, Any]:
+    resp = httpx.post(
+        f"{ROUTER_URL}/run_tool",
+        json={"tool_id": TOOL_ID, "params": params},
+        timeout=timeout,
+    )
+    resp.raise_for_status()
+    body = resp.json()
+    if body.get("status") != "success":
+        raise RuntimeError(body.get("error") or str(body))
+    result = body.get("result")
+    if isinstance(result, dict) and result.get("status") == "error":
+        raise RuntimeError(result.get("error", str(result)))
+    return result if isinstance(result, dict) else {}
+
+
+def _has_image_payload(data: dict[str, Any]) -> bool:
+    if not data:
+        return False
+    if data.get("images"):
+        return True
+    if data.get("image") and isinstance(data["image"], str) and len(data["image"]) > 100:
+        return True
+    if data.get("image_base64"):
+        return True
+    cands = data.get("candidates")
+    if isinstance(cands, list) and cands:
+        parts = cands[0].get("content", {}).get("parts", [])
+        for p in parts:
+            if isinstance(p, dict) and (p.get("inlineData") or p.get("inline_data")):
+                return True
+    return False
+
+
+def main():
+    print("=" * 50)
+    print("测试 nano_banana(Gemini 图模,可切换 model)")
+    print("=" * 50)
+    print(f"ROUTER_URL: {ROUTER_URL}")
+    print(f"tool_id:    {TOOL_ID}")
+    if NANO_BANANA_MODEL:
+        print(f"model:      {NANO_BANANA_MODEL}(经 params 传入)")
+    else:
+        print("model:      (未传,使用工具默认 / GEMINI_IMAGE_MODEL)")
+
+    try:
+        r = httpx.get(f"{ROUTER_URL}/health", timeout=3)
+        print(f"Router 状态: {r.json()}")
+    except httpx.ConnectError:
+        print(f"无法连接 Router ({ROUTER_URL}),请先: uv run python -m tool_agent")
+        sys.exit(1)
+
+    print("\n--- 校验工具已注册 ---")
+    tr = httpx.get(f"{ROUTER_URL}/tools", timeout=30)
+    tr.raise_for_status()
+    tools = tr.json().get("tools", [])
+    ids = {t["tool_id"] for t in tools}
+    if TOOL_ID not in ids:
+        print(f"错误: {TOOL_ID!r} 不在 GET /tools 中。当前示例: {sorted(ids)[:15]}...")
+        sys.exit(1)
+    meta = next(t for t in tools if t["tool_id"] == TOOL_ID)
+    print(f"  {TOOL_ID}: {meta.get('name', '')} (state={meta.get('state')})")
+    props = (meta.get("input_schema") or {}).get("properties") or {}
+    if "model" in props:
+        print("  input_schema 已声明 model(注册与实现应对齐)")
+    else:
+        print("  提示: input_schema 尚无 model 字段,注册表宜补充以便编排知晓可切换模型")
+
+    params: dict[str, Any] = {"prompt": TEST_PROMPT}
+    if NANO_BANANA_MODEL:
+        params["model"] = NANO_BANANA_MODEL
+
+    print("\n--- 调用生图 ---")
+    print(f"prompt: {TEST_PROMPT[:80]}{'...' if len(TEST_PROMPT) > 80 else ''}")
+
+    try:
+        data = run_tool(params, timeout=180.0)
+    except (RuntimeError, httpx.HTTPError) as e:
+        print(f"错误: {e}")
+        sys.exit(1)
+
+    print(f"\n下游返回 keys: {list(data.keys())[:20]}")
+    if rm := data.get("model"):
+        print(f"下游报告 model: {rm}")
+        if NANO_BANANA_MODEL and rm != NANO_BANANA_MODEL:
+            print(
+                f"警告: 请求 model={NANO_BANANA_MODEL!r} 与返回 model={rm!r} 不一致(若工具会规范化 ID 可忽略)"
+            )
+
+    if _has_image_payload(data):
+        print("\n检测到图片相关字段,测试通过!")
+        return
+
+    print("\n未识别到常见图片字段(images / image / candidates[].inlineData 等)。")
+    print(f"完整结果(截断): {str(data)[:800]}")
+    sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()

+ 8 - 0
tools/local/nano_banana/.env.example

@@ -0,0 +1,8 @@
+# 必填:https://ai.google.dev/gemini-api/docs/image-generation
+GEMINI_API_KEY=
+
+# 未在请求体传 model 时使用(默认 gemini-2.5-flash-image)
+# GEMINI_IMAGE_MODEL=gemini-3.1-flash-image-preview
+
+# 可选,一般无需修改
+GEMINI_API_BASE=https://airouter.piaoquantv.com/v1beta

+ 4 - 0
tools/local/nano_banana/.gitignore

@@ -0,0 +1,4 @@
+.venv/
+__pycache__/
+*.pyc
+.env

+ 1 - 0
tools/local/nano_banana/.python-version

@@ -0,0 +1 @@
+3.12

+ 149 - 0
tools/local/nano_banana/gemini_image_client.py

@@ -0,0 +1,149 @@
+"""Gemini 原生图模 — REST `generateContent`(与官方文档一致,无 SDK)。
+
+参考: https://ai.google.dev/gemini-api/docs/image-generation?hl=zh-cn#rest
+"""
+
+from __future__ import annotations
+
+import os
+import re
+from typing import Any
+
+import httpx
+from dotenv import load_dotenv
+
+_ = load_dotenv()
+
+DEFAULT_MODEL = "gemini-2.5-flash-image"
+GEMINI_API_BASE = os.environ.get(
+    "GEMINI_API_BASE", "https://airouter.piaoquantv.com/v1beta"
+)
+
+_DATA_URL_RE = re.compile(r"^data:[^;]+;base64,(.+)$", re.I | re.S)
+
+
+def _strip_data_url(b64_or_data_url: str) -> str:
+    s = b64_or_data_url.strip()
+    m = _DATA_URL_RE.match(s)
+    return m.group(1) if m else s
+
+
+def _build_parts(
+    prompt: str,
+    images: list[dict[str, str]] | None,
+) -> list[dict[str, Any]]:
+    parts: list[dict[str, Any]] = [{"text": prompt}]
+    if not images:
+        return parts
+    for img in images:
+        mime = (img.get("mime_type") or img.get("mimeType") or "image/png").strip()
+        raw = _strip_data_url(img.get("data") or "")
+        if not raw:
+            raise ValueError("images[].data 不能为空(Base64 或 data URL)")
+        parts.append({"inline_data": {"mime_type": mime, "data": raw}})
+    return parts
+
+
+def _merge_generation_config(
+    *,
+    aspect_ratio: str | None,
+    image_size: str | None,
+    response_modalities: list[str] | None,
+) -> dict[str, Any] | None:
+    cfg: dict[str, Any] = {}
+    if response_modalities:
+        cfg["responseModalities"] = response_modalities
+    img_cfg: dict[str, str] = {}
+    if aspect_ratio:
+        img_cfg["aspectRatio"] = aspect_ratio.strip()
+    if image_size:
+        img_cfg["imageSize"] = image_size.strip()
+    if img_cfg:
+        cfg["imageConfig"] = img_cfg
+    return cfg or None
+
+
+def generate_content(
+    *,
+    prompt: str,
+    model: str | None,
+    aspect_ratio: str | None = None,
+    image_size: str | None = None,
+    response_modalities: list[str] | None = None,
+    images: list[dict[str, str]] | None = None,
+) -> dict[str, Any]:
+    api_key = os.environ.get("GEMINI_API_KEY", "").strip()
+    if not api_key:
+        raise ValueError("缺少环境变量 GEMINI_API_KEY")
+
+    resolved = (model or os.environ.get("GEMINI_IMAGE_MODEL") or DEFAULT_MODEL).strip()
+    url = f"{GEMINI_API_BASE.rstrip('/')}/models/{resolved}:generateContent"
+
+    body: dict[str, Any] = {
+        "contents": [
+            {
+                "role": "user",
+                "parts": _build_parts(prompt, images),
+            }
+        ],
+    }
+    gen_cfg = _merge_generation_config(
+        aspect_ratio=aspect_ratio,
+        image_size=image_size,
+        response_modalities=response_modalities,
+    )
+    if gen_cfg:
+        body["generationConfig"] = gen_cfg
+
+    headers = {
+        "x-goog-api-key": api_key,
+        "Content-Type": "application/json",
+    }
+
+    with httpx.Client(timeout=300.0) as client:
+        r = client.post(url, headers=headers, json=body)
+        try:
+            data = r.json()
+        except Exception:
+            r.raise_for_status()
+            raise RuntimeError(r.text[:2000]) from None
+
+    if r.status_code >= 400:
+        err = data.get("error") if isinstance(data, dict) else None
+        msg = err.get("message", str(data)) if isinstance(err, dict) else str(data)
+        raise RuntimeError(f"Gemini HTTP {r.status_code}: {msg}")
+
+    if not isinstance(data, dict):
+        raise RuntimeError("响应不是 JSON 对象")
+
+    if data.get("error"):
+        raise RuntimeError(str(data["error"]))
+
+    images_out: list[str] = []
+    texts: list[str] = []
+    for cand in data.get("candidates") or []:
+        if not isinstance(cand, dict):
+            continue
+        for part in cand.get("content", {}).get("parts") or []:
+            if not isinstance(part, dict):
+                continue
+            if part.get("text"):
+                texts.append(str(part["text"]))
+            inline = part.get("inlineData") or part.get("inline_data")
+            if isinstance(inline, dict):
+                b64 = inline.get("data")
+                if b64:
+                    mime = (
+                        inline.get("mimeType")
+                        or inline.get("mime_type")
+                        or "image/png"
+                    )
+                    images_out.append(f"data:{mime};base64,{b64}")
+
+    out: dict[str, Any] = {
+        "images": images_out,
+        "model": resolved,
+    }
+    if texts:
+        out["text"] = "\n".join(texts)
+    return out

+ 93 - 0
tools/local/nano_banana/main.py

@@ -0,0 +1,93 @@
+"""nano_banana — 本地 HTTP 封装 Gemini 原生图模(REST generateContent)。
+
+环境变量:
+  GEMINI_API_KEY      必填,对应文档中的 x-goog-api-key
+  GEMINI_IMAGE_MODEL  可选,未在请求体指定 model 时使用,默认 gemini-2.5-flash-image
+  GEMINI_API_BASE     可选,默认 https://generativelanguage.googleapis.com/v1beta
+
+接口:
+  GET  /health
+  POST /generate     文生图 / 图+文生图,字段与 registry input_schema 对齐
+
+文档: https://ai.google.dev/gemini-api/docs/image-generation?hl=zh-cn#rest
+"""
+
+from __future__ import annotations
+
+import argparse
+
+import uvicorn
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel, Field
+
+from gemini_image_client import generate_content
+
+app = FastAPI(title="Nano Banana — Gemini Image (REST)")
+
+
+class ImageInput(BaseModel):
+    """参考图:Base64 或 data URL;字段名与 REST inline_data 对应。"""
+
+    mime_type: str = Field(default="image/png", description="如 image/png、image/jpeg")
+    data: str = Field(..., description="图片 Base64,或 data:image/...;base64,...")
+
+
+class GenerateRequest(BaseModel):
+    prompt: str = Field(..., description="主提示词")
+    model: str | None = Field(
+        default=None,
+        description=(
+            "模型 ID,如 gemini-2.5-flash-image、gemini-3.1-flash-image-preview;"
+            "省略则使用 GEMINI_IMAGE_MODEL / 内置默认"
+        ),
+    )
+    aspect_ratio: str | None = Field(
+        default=None,
+        description='宽高比,如 "1:1"、"16:9"(见官方文档 imageConfig.aspectRatio)',
+    )
+    image_size: str | None = Field(
+        default=None,
+        description='Gemini 3.x 输出分辨率:512、1K、2K、4K(generationConfig.imageConfig.imageSize)',
+    )
+    response_modalities: list[str] | None = Field(
+        default=None,
+        description='如 ["TEXT","IMAGE"] 或 ["IMAGE"];省略则由 API 默认',
+    )
+    images: list[ImageInput] | None = Field(
+        default=None,
+        description="可选参考图列表(图生图 / 编辑),对应 REST parts 中的 inline_data",
+    )
+
+
+@app.get("/health")
+def health() -> dict[str, str]:
+    return {"status": "ok"}
+
+
+@app.post("/generate")
+def generate(req: GenerateRequest) -> dict:
+    try:
+        imgs = None
+        if req.images:
+            imgs = [{"mime_type": i.mime_type, "data": i.data} for i in req.images]
+        return generate_content(
+            prompt=req.prompt,
+            model=req.model,
+            aspect_ratio=req.aspect_ratio,
+            image_size=req.image_size,
+            response_modalities=req.response_modalities,
+            images=imgs,
+        )
+    except ValueError as e:
+        raise HTTPException(status_code=503, detail=str(e)) from e
+    except RuntimeError as e:
+        raise HTTPException(status_code=502, detail=str(e)) from e
+    except Exception as e:
+        raise HTTPException(status_code=502, detail=str(e)) from e
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--port", type=int, default=8001)
+    args = parser.parse_args()
+    uvicorn.run(app, host="0.0.0.0", port=args.port)

+ 12 - 0
tools/local/nano_banana/pyproject.toml

@@ -0,0 +1,12 @@
+[project]
+name = "nano-banana"
+version = "0.1.0"
+description = "Gemini 原生图模 REST 封装:POST /generate(generateContent)"
+requires-python = ">=3.11"
+dependencies = [
+    "fastapi>=0.115.0",
+    "uvicorn>=0.30.0",
+    "pydantic>=2.0.0",
+    "python-dotenv>=1.0.0",
+    "httpx>=0.27.0",
+]

+ 22 - 0
uv.lock

@@ -8,6 +8,7 @@ members = [
     "ji-meng",
     "launch-comfy-env",
     "liblibai-controlnet",
+    "nano-banana",
     "runcomfy-stop-env",
     "task-0cd69d84",
     "tool-agent",
@@ -541,6 +542,27 @@ wheels = [
     { url = "https://mirrors.ustc.edu.cn/pypi/packages/fd/d9/eaa1f80170d2b7c5ba23f3b59f766f3a0bb41155fbc32a69adfa1adaaef9/mcp-1.26.0-py3-none-any.whl", hash = "sha256:904a21c33c25aa98ddbeb47273033c435e595bbacfdb177f4bd87f6dceebe1ca", size = 233615, upload-time = "2026-01-24T19:40:30.652Z" },
 ]
 
+[[package]]
+name = "nano-banana"
+version = "0.1.0"
+source = { virtual = "tools/local/nano_banana" }
+dependencies = [
+    { name = "fastapi" },
+    { name = "httpx" },
+    { name = "pydantic" },
+    { name = "python-dotenv" },
+    { name = "uvicorn" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "fastapi", specifier = ">=0.115.0" },
+    { name = "httpx", specifier = ">=0.27.0" },
+    { name = "pydantic", specifier = ">=2.0.0" },
+    { name = "python-dotenv", specifier = ">=1.0.0" },
+    { name = "uvicorn", specifier = ">=0.30.0" },
+]
+
 [[package]]
 name = "packaging"
 version = "26.0"