clip_model.py 1.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142
  1. import os
  2. import torch
  3. from transformers import AutoModel, AutoConfig, AutoProcessor, CLIPImageProcessor, AutoTokenizer
  4. MODEL_NAME = "BAAI/EVA-CLIP-8B"
  5. DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
  6. DTYPE = torch.float16 if DEVICE == "cuda" else torch.float32
  7. MAX_BATCH = int(os.getenv("MAX_BATCH", "32"))
  8. TRUST_REMOTE_CODE = True
  9. print(f"[model_config] Loading {MODEL_NAME} on {DEVICE} dtype={DTYPE} ...")
  10. # 加载模型配置
  11. config = AutoConfig.from_pretrained(
  12. pretrained_model_name_or_path=MODEL_NAME,
  13. trust_remote_code=TRUST_REMOTE_CODE
  14. )
  15. # 加载模型
  16. model = AutoModel.from_pretrained(
  17. pretrained_model_name_or_path=MODEL_NAME,
  18. config=config,
  19. trust_remote_code=TRUST_REMOTE_CODE
  20. ).to(dtype=DTYPE, device=DEVICE).eval()
  21. # 优先尝试 AutoProcessor(适配EVA-CLIP这种特殊情况)
  22. try:
  23. processor = AutoProcessor.from_pretrained(
  24. pretrained_model_name_or_path=MODEL_NAME,
  25. trust_remote_code=TRUST_REMOTE_CODE
  26. )
  27. except Exception as e:
  28. print(f"[warning] AutoProcessor 加载失败: {e}")
  29. print("[info] 尝试手动组合 ImageProcessor + Tokenizer ...")
  30. processor = {
  31. "image_processor": CLIPImageProcessor.from_pretrained(MODEL_NAME),
  32. "tokenizer": AutoTokenizer.from_pretrained(MODEL_NAME),
  33. }
  34. def get_model():
  35. return model, processor, DEVICE, DTYPE, MAX_BATCH