|
@@ -15,8 +15,18 @@ model = AutoModel.from_pretrained(
|
|
|
MODEL_NAME, config=config, trust_remote_code=True
|
|
|
).to(dtype=DTYPE, device=DEVICE).eval()
|
|
|
|
|
|
-# ✅ 只管图像处理
|
|
|
-image_processor = CLIPImageProcessor.from_pretrained(MODEL_NAME)
|
|
|
+
|
|
|
+try:
|
|
|
+ from transformers import CLIPImageProcessor
|
|
|
+ image_processor = CLIPImageProcessor.from_pretrained(MODEL_NAME)
|
|
|
+except Exception:
|
|
|
+ print("[warning] EVA-CLIP 没有预处理配置,使用默认参数构造 ImageProcessor")
|
|
|
+ from transformers import CLIPImageProcessor
|
|
|
+ image_processor = CLIPImageProcessor(size={"shortest_edge": 224}, resample=3,
|
|
|
+ crop_size={"height": 224, "width": 224},
|
|
|
+ image_mean=[0.48145466, 0.4578275, 0.40821073],
|
|
|
+ image_std=[0.26862954, 0.26130258, 0.27577711])
|
|
|
+
|
|
|
# 如果后续要做 text embedding,可以加 tokenizer
|
|
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
|
|
|