face_utils.py 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248
  1. import cv2
  2. import numpy as np
  3. import torch
  4. def compute_increased_bbox(bbox, increase_area, preserve_aspect=True):
  5. left, top, right, bot = bbox
  6. width = right - left
  7. height = bot - top
  8. if preserve_aspect:
  9. width_increase = max(
  10. increase_area, ((1 + 2 * increase_area) * height - width) / (2 * width)
  11. )
  12. height_increase = max(
  13. increase_area, ((1 + 2 * increase_area) * width - height) / (2 * height)
  14. )
  15. else:
  16. width_increase = height_increase = increase_area
  17. left = int(left - width_increase * width)
  18. top = int(top - height_increase * height)
  19. right = int(right + width_increase * width)
  20. bot = int(bot + height_increase * height)
  21. return (left, top, right, bot)
  22. def get_valid_bboxes(bboxes, h, w):
  23. left = max(bboxes[0], 0)
  24. top = max(bboxes[1], 0)
  25. right = min(bboxes[2], w)
  26. bottom = min(bboxes[3], h)
  27. return (left, top, right, bottom)
  28. def align_crop_face_landmarks(
  29. img,
  30. landmarks,
  31. output_size,
  32. transform_size=None,
  33. enable_padding=True,
  34. return_inverse_affine=False,
  35. shrink_ratio=(1, 1),
  36. ):
  37. """Align and crop face with landmarks.
  38. The output_size and transform_size are based on width. The height is
  39. adjusted based on shrink_ratio_h/shring_ration_w.
  40. Modified from:
  41. https://github.com/NVlabs/ffhq-dataset/blob/master/download_ffhq.py
  42. Args:
  43. img (Numpy array): Input image.
  44. landmarks (Numpy array): 5 or 68 or 98 landmarks.
  45. output_size (int): Output face size.
  46. transform_size (ing): Transform size. Usually the four time of
  47. output_size.
  48. enable_padding (float): Default: True.
  49. shrink_ratio (float | tuple[float] | list[float]): Shring the whole
  50. face for height and width (crop larger area). Default: (1, 1).
  51. Returns:
  52. (Numpy array): Cropped face.
  53. """
  54. lm_type = "retinaface_5" # Options: dlib_5, retinaface_5
  55. if isinstance(shrink_ratio, (float, int)):
  56. shrink_ratio = (shrink_ratio, shrink_ratio)
  57. if transform_size is None:
  58. transform_size = output_size * 4
  59. # Parse landmarks
  60. lm = np.array(landmarks)
  61. if lm.shape[0] == 5 and lm_type == "retinaface_5":
  62. eye_left = lm[0]
  63. eye_right = lm[1]
  64. mouth_avg = (lm[3] + lm[4]) * 0.5
  65. elif lm.shape[0] == 5 and lm_type == "dlib_5":
  66. lm_eye_left = lm[2:4]
  67. lm_eye_right = lm[0:2]
  68. eye_left = np.mean(lm_eye_left, axis=0)
  69. eye_right = np.mean(lm_eye_right, axis=0)
  70. mouth_avg = lm[4]
  71. elif lm.shape[0] == 68:
  72. lm_eye_left = lm[36:42]
  73. lm_eye_right = lm[42:48]
  74. eye_left = np.mean(lm_eye_left, axis=0)
  75. eye_right = np.mean(lm_eye_right, axis=0)
  76. mouth_avg = (lm[48] + lm[54]) * 0.5
  77. elif lm.shape[0] == 98:
  78. lm_eye_left = lm[60:68]
  79. lm_eye_right = lm[68:76]
  80. eye_left = np.mean(lm_eye_left, axis=0)
  81. eye_right = np.mean(lm_eye_right, axis=0)
  82. mouth_avg = (lm[76] + lm[82]) * 0.5
  83. eye_avg = (eye_left + eye_right) * 0.5
  84. eye_to_eye = eye_right - eye_left
  85. eye_to_mouth = mouth_avg - eye_avg
  86. # Get the oriented crop rectangle
  87. # x: half width of the oriented crop rectangle
  88. x = eye_to_eye - np.flipud(eye_to_mouth) * [-1, 1]
  89. # - np.flipud(eye_to_mouth) * [-1, 1]: rotate 90 clockwise
  90. # norm with the hypotenuse: get the direction
  91. x /= np.hypot(*x) # get the hypotenuse of a right triangle
  92. rect_scale = 1 # TODO: you can edit it to get larger rect
  93. x *= max(
  94. np.hypot(*eye_to_eye) * 2.0 * rect_scale,
  95. np.hypot(*eye_to_mouth) * 1.8 * rect_scale,
  96. )
  97. # y: half height of the oriented crop rectangle
  98. y = np.flipud(x) * [-1, 1]
  99. x *= shrink_ratio[1] # width
  100. y *= shrink_ratio[0] # height
  101. # c: center
  102. c = eye_avg + eye_to_mouth * 0.1
  103. # quad: (left_top, left_bottom, right_bottom, right_top)
  104. quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y])
  105. # qsize: side length of the square
  106. qsize = np.hypot(*x) * 2
  107. quad_ori = np.copy(quad)
  108. # Shrink, for large face
  109. # TODO: do we really need shrink
  110. shrink = int(np.floor(qsize / output_size * 0.5))
  111. if shrink > 1:
  112. h, w = img.shape[0:2]
  113. rsize = (int(np.rint(float(w) / shrink)), int(np.rint(float(h) / shrink)))
  114. img = cv2.resize(img, rsize, interpolation=cv2.INTER_AREA)
  115. quad /= shrink
  116. qsize /= shrink
  117. # Crop
  118. h, w = img.shape[0:2]
  119. border = max(int(np.rint(qsize * 0.1)), 3)
  120. crop = (
  121. int(np.floor(min(quad[:, 0]))),
  122. int(np.floor(min(quad[:, 1]))),
  123. int(np.ceil(max(quad[:, 0]))),
  124. int(np.ceil(max(quad[:, 1]))),
  125. )
  126. crop = (
  127. max(crop[0] - border, 0),
  128. max(crop[1] - border, 0),
  129. min(crop[2] + border, w),
  130. min(crop[3] + border, h),
  131. )
  132. if crop[2] - crop[0] < w or crop[3] - crop[1] < h:
  133. img = img[crop[1] : crop[3], crop[0] : crop[2], :]
  134. quad -= crop[0:2]
  135. # Pad
  136. # pad: (width_left, height_top, width_right, height_bottom)
  137. h, w = img.shape[0:2]
  138. pad = (
  139. int(np.floor(min(quad[:, 0]))),
  140. int(np.floor(min(quad[:, 1]))),
  141. int(np.ceil(max(quad[:, 0]))),
  142. int(np.ceil(max(quad[:, 1]))),
  143. )
  144. pad = (
  145. max(-pad[0] + border, 0),
  146. max(-pad[1] + border, 0),
  147. max(pad[2] - w + border, 0),
  148. max(pad[3] - h + border, 0),
  149. )
  150. if enable_padding and max(pad) > border - 4:
  151. pad = np.maximum(pad, int(np.rint(qsize * 0.3)))
  152. img = np.pad(img, ((pad[1], pad[3]), (pad[0], pad[2]), (0, 0)), "reflect")
  153. h, w = img.shape[0:2]
  154. y, x, _ = np.ogrid[:h, :w, :1]
  155. mask = np.maximum(
  156. 1.0 - np.minimum(np.float32(x) / pad[0], np.float32(w - 1 - x) / pad[2]),
  157. 1.0 - np.minimum(np.float32(y) / pad[1], np.float32(h - 1 - y) / pad[3]),
  158. )
  159. blur = int(qsize * 0.02)
  160. if blur % 2 == 0:
  161. blur += 1
  162. blur_img = cv2.boxFilter(img, 0, ksize=(blur, blur))
  163. img = img.astype("float32")
  164. img += (blur_img - img) * np.clip(mask * 3.0 + 1.0, 0.0, 1.0)
  165. img += (np.median(img, axis=(0, 1)) - img) * np.clip(mask, 0.0, 1.0)
  166. img = np.clip(img, 0, 255) # float32, [0, 255]
  167. quad += pad[:2]
  168. # Transform use cv2
  169. h_ratio = shrink_ratio[0] / shrink_ratio[1]
  170. dst_h, dst_w = int(transform_size * h_ratio), transform_size
  171. template = np.array([[0, 0], [0, dst_h], [dst_w, dst_h], [dst_w, 0]])
  172. # use cv2.LMEDS method for the equivalence to skimage transform
  173. # ref: https://blog.csdn.net/yichxi/article/details/115827338
  174. affine_matrix = cv2.estimateAffinePartial2D(quad, template, method=cv2.LMEDS)[0]
  175. cropped_face = cv2.warpAffine(
  176. img,
  177. affine_matrix,
  178. (dst_w, dst_h),
  179. borderMode=cv2.BORDER_CONSTANT,
  180. borderValue=(135, 133, 132),
  181. ) # gray
  182. if output_size < transform_size:
  183. cropped_face = cv2.resize(
  184. cropped_face,
  185. (output_size, int(output_size * h_ratio)),
  186. interpolation=cv2.INTER_LINEAR,
  187. )
  188. if return_inverse_affine:
  189. dst_h, dst_w = int(output_size * h_ratio), output_size
  190. template = np.array([[0, 0], [0, dst_h], [dst_w, dst_h], [dst_w, 0]])
  191. # use cv2.LMEDS method for the equivalence to skimage transform
  192. # ref: https://blog.csdn.net/yichxi/article/details/115827338
  193. affine_matrix = cv2.estimateAffinePartial2D(
  194. quad_ori,
  195. np.array([[0, 0], [0, output_size], [dst_w, dst_h], [dst_w, 0]]),
  196. method=cv2.LMEDS,
  197. )[0]
  198. inverse_affine = cv2.invertAffineTransform(affine_matrix)
  199. else:
  200. inverse_affine = None
  201. return cropped_face, inverse_affine
  202. def paste_face_back(img, face, inverse_affine):
  203. h, w = img.shape[0:2]
  204. face_h, face_w = face.shape[0:2]
  205. inv_restored = cv2.warpAffine(face, inverse_affine, (w, h))
  206. mask = np.ones((face_h, face_w, 3), dtype=np.float32)
  207. inv_mask = cv2.warpAffine(mask, inverse_affine, (w, h))
  208. # remove the black borders
  209. inv_mask_erosion = cv2.erode(inv_mask, np.ones((2, 2), np.uint8))
  210. inv_restored_remove_border = inv_mask_erosion * inv_restored
  211. total_face_area = np.sum(inv_mask_erosion) // 3
  212. # compute the fusion edge based on the area of face
  213. w_edge = int(total_face_area**0.5) // 20
  214. erosion_radius = w_edge * 2
  215. inv_mask_center = cv2.erode(
  216. inv_mask_erosion, np.ones((erosion_radius, erosion_radius), np.uint8)
  217. )
  218. blur_size = w_edge * 2
  219. inv_soft_mask = cv2.GaussianBlur(inv_mask_center, (blur_size + 1, blur_size + 1), 0)
  220. img = inv_soft_mask * inv_restored_remove_border + (1 - inv_soft_mask) * img
  221. # float32, [0, 255]
  222. return img