image.py 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475
  1. import hashlib
  2. import requests
  3. import imagehash
  4. from PIL import Image
  5. from io import BytesIO
  6. from typing import Optional
  7. from app.core.config import GlobalConfigSettings
  8. from app.infra.shared.oss import OssUtils
  9. from app.schemas import ImagePath
  10. class ImageUtils(OssUtils):
  11. """phash 汉明距离 0~64,越小越相似。低于此阈值视为同一张图。"""
  12. SAME_IMAGE_PHASH_THRESHOLD = 5
  13. def __init__(self, config: GlobalConfigSettings):
  14. super().__init__(config.aliyun_oss)
  15. def load_image(self, path: str, path_type: Optional[str] = None):
  16. path_type = path_type or "oss_file"
  17. match path_type:
  18. case "filepath":
  19. img = Image.open(path)
  20. case "url":
  21. img = Image.open(BytesIO(requests.get(path, timeout=5).content))
  22. case "oss_file":
  23. img = Image.open(BytesIO(self.fetch_oss_file(path).read()))
  24. case _:
  25. return "file_type error"
  26. return img.convert("RGB")
  27. def _read_file_bytes(self, path: str, path_type: Optional[str] = None) -> bytes:
  28. """根据 path_type 读取文件原始字节。"""
  29. path_type = path_type or "oss_file"
  30. if path_type == "filepath":
  31. with open(path, "rb") as f:
  32. return f.read()
  33. if path_type == "url":
  34. return requests.get(path, timeout=5).content
  35. if path_type == "oss_file":
  36. return self.fetch_oss_file(path).read()
  37. raise ValueError(f"unsupported path_type: {path_type}")
  38. def get_image_md5(self, file: ImagePath) -> str:
  39. """计算图片文件的 MD5(基于原始文件字节),返回 32 位十六进制字符串。"""
  40. content = self._read_file_bytes(file.path, file.path_type)
  41. return hashlib.md5(content).hexdigest()
  42. @staticmethod
  43. def phash_distance(img1: Image.Image, img2: Image.Image):
  44. h1 = imagehash.phash(img1)
  45. h2 = imagehash.phash(img2)
  46. return h1 - h2
  47. def image_similar(self, file1: ImagePath, file2: ImagePath):
  48. img1 = self.load_image(
  49. path=file1.path,
  50. path_type=file1.path_type,
  51. )
  52. img2 = self.load_image(
  53. path=file2.path,
  54. path_type=file2.path_type,
  55. )
  56. return self.phash_distance(img1, img2)
  57. def is_same_image(self, file1: ImagePath, file2: ImagePath) -> bool:
  58. """判断两张图是否视为同一张(phash 距离 <= SAME_IMAGE_PHASH_THRESHOLD)。"""
  59. return self.image_similar(file1, file2) <= self.SAME_IMAGE_PHASH_THRESHOLD