image.py 1.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. import requests
  2. import imagehash
  3. from PIL import Image
  4. from io import BytesIO
  5. from typing import Optional
  6. from app.core.config import GlobalConfigSettings
  7. from app.infra.shared.oss import OssUtils
  8. from app.schemas import ImagePath
  9. class ImageUtils(OssUtils):
  10. """phash 汉明距离 0~64,越小越相似。低于此阈值视为同一张图。"""
  11. SAME_IMAGE_PHASH_THRESHOLD = 5
  12. def __init__(self, config: GlobalConfigSettings):
  13. super().__init__(config.aliyun_oss)
  14. def load_image(self, path: str, path_type: Optional[str] = None):
  15. path_type = path_type or "oss_file"
  16. match path_type:
  17. case "filepath":
  18. img = Image.open(path)
  19. case "url":
  20. img = Image.open(BytesIO(requests.get(path, timeout=5).content))
  21. case "oss_file":
  22. img = Image.open(BytesIO(self.bucket.get_object(path).read()))
  23. case _:
  24. return "file_type error"
  25. return img.convert("RGB")
  26. @staticmethod
  27. def phash_distance(img1: Image.Image, img2: Image.Image):
  28. h1 = imagehash.phash(img1)
  29. h2 = imagehash.phash(img2)
  30. return h1 - h2
  31. def image_similar(self, file1: ImagePath, file2: ImagePath):
  32. img1 = self.load_image(
  33. path=file1.path,
  34. path_type=file1.path_type,
  35. )
  36. img2 = self.load_image(
  37. path=file2.path,
  38. path_type=file2.path_type,
  39. )
  40. return self.phash_distance(img1, img2)
  41. def is_same_image(self, file1: ImagePath, file2: ImagePath) -> bool:
  42. """判断两张图是否视为同一张(phash 距离 <= SAME_IMAGE_PHASH_THRESHOLD)。"""
  43. return self.image_similar(file1, file2) <= self.SAME_IMAGE_PHASH_THRESHOLD