123456789101112131415161718192021222324252627282930313233343536373839404142 |
- import io
- import urllib.request
- from typing import List
- import torch
- from PIL import Image
- from .clip_model import get_model
- # init model
- model, image_processor, tokenizer, DEVICE, DTYPE, MAX_BATCH = get_model()
- def _normalize(x: torch.Tensor) -> torch.Tensor:
- return x / (x.norm(dim=-1, keepdim=True) + 1e-12)
- def _to_list(x: torch.Tensor):
- return x.detach().cpu().tolist()
- async def embed_image_url(img_url_list: List[str]):
- images = []
- for u in img_url_list:
- with urllib.request.urlopen(u, timeout=15) as r:
- img = Image.open(io.BytesIO(r.read())).convert("RGB")
- images.append(img)
- outputs = []
- for chunk_start in range(0, len(images), MAX_BATCH):
- chunk = images[chunk_start:chunk_start + MAX_BATCH]
- # 只取 pixel_values,不传 text
- inputs = image_processor(images=chunk, return_tensors="pt")
- pixel_values = inputs["pixel_values"].to(DEVICE, dtype=DTYPE)
- with torch.no_grad():
- # ✅ 调用图像编码器
- image_features = model.encode_image(pixel_values)
- feats = _normalize(image_features)
- outputs.extend(_to_list(feats))
- return outputs
|