Server
/
rag_server


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142
							import os
import torch
from transformers import AutoModel, AutoConfig, AutoProcessor, CLIPImageProcessor, AutoTokenizer

MODEL_NAME = "BAAI/EVA-CLIP-8B"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DTYPE = torch.float16 if DEVICE == "cuda" else torch.float32
MAX_BATCH = int(os.getenv("MAX_BATCH", "32"))

TRUST_REMOTE_CODE = True

print(f"[model_config] Loading {MODEL_NAME} on {DEVICE} dtype={DTYPE} ...")

# 加载模型配置
config = AutoConfig.from_pretrained(
    pretrained_model_name_or_path=MODEL_NAME,
    trust_remote_code=TRUST_REMOTE_CODE
)

# 加载模型
model = AutoModel.from_pretrained(
    pretrained_model_name_or_path=MODEL_NAME,
    config=config,
    trust_remote_code=TRUST_REMOTE_CODE
).to(dtype=DTYPE, device=DEVICE).eval()

# 优先尝试 AutoProcessor（适配EVA-CLIP这种特殊情况）
try:
    processor = AutoProcessor.from_pretrained(
        pretrained_model_name_or_path=MODEL_NAME,
        trust_remote_code=TRUST_REMOTE_CODE
    )
except Exception as e:
    print(f"[warning] AutoProcessor 加载失败: {e}")
    print("[info] 尝试手动组合 ImageProcessor + Tokenizer ...")
    processor = {
        "image_processor": CLIPImageProcessor.from_pretrained(MODEL_NAME),
        "tokenizer": AutoTokenizer.from_pretrained(MODEL_NAME),
    }

def get_model():
    return model, processor, DEVICE, DTYPE, MAX_BATCH