import os import torch from transformers import AutoModel, AutoConfig, AutoTokenizer MODEL_NAME = "BAAI/EVA-CLIP-8B-448" DEVICE = "cuda" if torch.cuda.is_available() else "cpu" DTYPE = torch.float16 if DEVICE == "cuda" else torch.float32 MAX_BATCH = int(os.getenv("MAX_BATCH", "32")) print(f"[model_config] Loading {MODEL_NAME} on {DEVICE} dtype={DTYPE} ...") config = AutoConfig.from_pretrained(MODEL_NAME, trust_remote_code=True) model = AutoModel.from_pretrained( MODEL_NAME, config=config, trust_remote_code=True ).to(dtype=DTYPE, device=DEVICE).eval() try: from transformers import CLIPImageProcessor image_processor = CLIPImageProcessor.from_pretrained(MODEL_NAME) except Exception: print("[warning] EVA-CLIP 没有预处理配置,使用默认参数构造 ImageProcessor") from transformers import CLIPImageProcessor image_processor = CLIPImageProcessor( size={"shortest_edge": 448}, # 最短边 resize 到 448 resample=3, crop_size={"height": 448, "width": 448}, # 中心裁剪 448×448 image_mean=[0.48145466, 0.4578275, 0.40821073], image_std=[0.26862954, 0.26130258, 0.27577711] ) # 如果后续要做 text embedding,可以加 tokenizer tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) def get_model(): return model, image_processor, tokenizer, DEVICE, DTYPE, MAX_BATCH