| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475 |
- import hashlib
- import requests
- import imagehash
- from PIL import Image
- from io import BytesIO
- from typing import Optional
- from app.core.config import GlobalConfigSettings
- from app.infra.shared.oss import OssUtils
- from app.schemas import ImagePath
- class ImageUtils(OssUtils):
- """phash 汉明距离 0~64,越小越相似。低于此阈值视为同一张图。"""
- SAME_IMAGE_PHASH_THRESHOLD = 5
- def __init__(self, config: GlobalConfigSettings):
- super().__init__(config.aliyun_oss)
- def load_image(self, path: str, path_type: Optional[str] = None):
- path_type = path_type or "oss_file"
- match path_type:
- case "filepath":
- img = Image.open(path)
- case "url":
- img = Image.open(BytesIO(requests.get(path, timeout=5).content))
- case "oss_file":
- img = Image.open(BytesIO(self.fetch_oss_file(path).read()))
- case _:
- return "file_type error"
- return img.convert("RGB")
- def _read_file_bytes(self, path: str, path_type: Optional[str] = None) -> bytes:
- """根据 path_type 读取文件原始字节。"""
- path_type = path_type or "oss_file"
- if path_type == "filepath":
- with open(path, "rb") as f:
- return f.read()
- if path_type == "url":
- return requests.get(path, timeout=5).content
- if path_type == "oss_file":
- return self.fetch_oss_file(path).read()
- raise ValueError(f"unsupported path_type: {path_type}")
- def get_image_md5(self, file: ImagePath) -> str:
- """计算图片文件的 MD5(基于原始文件字节),返回 32 位十六进制字符串。"""
- content = self._read_file_bytes(file.path, file.path_type)
- return hashlib.md5(content).hexdigest()
- @staticmethod
- def phash_distance(img1: Image.Image, img2: Image.Image):
- h1 = imagehash.phash(img1)
- h2 = imagehash.phash(img2)
- return h1 - h2
- def image_similar(self, file1: ImagePath, file2: ImagePath):
- img1 = self.load_image(
- path=file1.path,
- path_type=file1.path_type,
- )
- img2 = self.load_image(
- path=file2.path,
- path_type=file2.path_type,
- )
- return self.phash_distance(img1, img2)
- def is_same_image(self, file1: ImagePath, file2: ImagePath) -> bool:
- """判断两张图是否视为同一张(phash 距离 <= SAME_IMAGE_PHASH_THRESHOLD)。"""
- return self.image_similar(file1, file2) <= self.SAME_IMAGE_PHASH_THRESHOLD
|