ソースを参照

add img embedding.py

luojunhui 4 週間 前
コミット
db0d940e9c

+ 9 - 27
applications/clip_embedding/clip_model.py

@@ -1,42 +1,24 @@
 import os
 import torch
-from transformers import AutoModel, AutoConfig, AutoProcessor, CLIPImageProcessor, AutoTokenizer
+
+from transformers import AutoModel, AutoConfig, CLIPImageProcessor, AutoTokenizer
 
 MODEL_NAME = "BAAI/EVA-CLIP-8B"
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 DTYPE = torch.float16 if DEVICE == "cuda" else torch.float32
 MAX_BATCH = int(os.getenv("MAX_BATCH", "32"))
 
-TRUST_REMOTE_CODE = True
-
 print(f"[model_config] Loading {MODEL_NAME} on {DEVICE} dtype={DTYPE} ...")
 
-# 加载模型配置
-config = AutoConfig.from_pretrained(
-    pretrained_model_name_or_path=MODEL_NAME,
-    trust_remote_code=TRUST_REMOTE_CODE
-)
-
-# 加载模型
+config = AutoConfig.from_pretrained(MODEL_NAME, trust_remote_code=True)
 model = AutoModel.from_pretrained(
-    pretrained_model_name_or_path=MODEL_NAME,
-    config=config,
-    trust_remote_code=TRUST_REMOTE_CODE
+    MODEL_NAME, config=config, trust_remote_code=True
 ).to(dtype=DTYPE, device=DEVICE).eval()
 
-# 优先尝试 AutoProcessor(适配EVA-CLIP这种特殊情况)
-try:
-    processor = AutoProcessor.from_pretrained(
-        pretrained_model_name_or_path=MODEL_NAME,
-        trust_remote_code=TRUST_REMOTE_CODE
-    )
-except Exception as e:
-    print(f"[warning] AutoProcessor 加载失败: {e}")
-    print("[info] 尝试手动组合 ImageProcessor + Tokenizer ...")
-    processor = {
-        "image_processor": CLIPImageProcessor.from_pretrained(MODEL_NAME),
-        "tokenizer": AutoTokenizer.from_pretrained(MODEL_NAME),
-    }
+# ✅ 只管图像处理
+image_processor = CLIPImageProcessor.from_pretrained(MODEL_NAME)
+# 如果后续要做 text embedding,可以加 tokenizer
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 
 def get_model():
-    return model, processor, DEVICE, DTYPE, MAX_BATCH
+    return model, image_processor, tokenizer, DEVICE, DTYPE, MAX_BATCH

+ 3 - 6
applications/clip_embedding/embedding.py

@@ -28,13 +28,10 @@ async def embed_image_url(img_url_list: List[str]):
     for chunk_start in range(0, len(images), MAX_BATCH):
         chunk = images[chunk_start:chunk_start + MAX_BATCH]
 
-        # 兼容两种情况:AutoProcessor vs dict(fallback)
-        if isinstance(processor, dict):
-            inputs = processor["image_processor"](images=chunk, return_tensors="pt")
-        else:
-            inputs = processor(images=chunk, return_tensors="pt")
-
+        # ✅ 用 image_processor,不再用混合 processor
+        inputs = image_processor(images=chunk, return_tensors="pt")
         inputs = {k: v.to(DEVICE, dtype=DTYPE) if hasattr(v, "to") else v for k, v in inputs.items()}
+
         feats = model.get_image_features(**inputs)
         feats = _normalize(feats)
         outputs.extend(_to_list(feats))