import io import urllib.request from typing import List import torch from PIL import Image from .clip_model import get_model # init model model, image_processor, tokenizer, DEVICE, DTYPE, MAX_BATCH = get_model() def _normalize(x: torch.Tensor) -> torch.Tensor: return x / (x.norm(dim=-1, keepdim=True) + 1e-12) def _to_list(x: torch.Tensor): return x.detach().cpu().tolist() async def embed_image_url(img_url_list: List[str]): images = [] for u in img_url_list: with urllib.request.urlopen(u, timeout=15) as r: img = Image.open(io.BytesIO(r.read())).convert("RGB") images.append(img) outputs = [] for chunk_start in range(0, len(images), MAX_BATCH): chunk = images[chunk_start:chunk_start + MAX_BATCH] # 只取 pixel_values,不传 text inputs = image_processor(images=chunk, return_tensors="pt") pixel_values = inputs["pixel_values"].to(DEVICE, dtype=DTYPE) with torch.no_grad(): # ✅ 调用图像编码器 image_features = model.encode_image(pixel_values) feats = _normalize(image_features) outputs.extend(_to_list(feats)) return outputs