retinaface_utils.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452
  1. from itertools import product as product
  2. from math import ceil
  3. import numpy as np
  4. import torch
  5. import torchvision
  6. class PriorBox(object):
  7. def __init__(self, cfg, image_size=None, phase="train"):
  8. super(PriorBox, self).__init__()
  9. self.min_sizes = cfg["min_sizes"]
  10. self.steps = cfg["steps"]
  11. self.clip = cfg["clip"]
  12. self.image_size = image_size
  13. self.feature_maps = [
  14. [ceil(self.image_size[0] / step), ceil(self.image_size[1] / step)]
  15. for step in self.steps
  16. ]
  17. self.name = "s"
  18. def forward(self):
  19. anchors = []
  20. for k, f in enumerate(self.feature_maps):
  21. min_sizes = self.min_sizes[k]
  22. for i, j in product(range(f[0]), range(f[1])):
  23. for min_size in min_sizes:
  24. s_kx = min_size / self.image_size[1]
  25. s_ky = min_size / self.image_size[0]
  26. dense_cx = [
  27. x * self.steps[k] / self.image_size[1] for x in [j + 0.5]
  28. ]
  29. dense_cy = [
  30. y * self.steps[k] / self.image_size[0] for y in [i + 0.5]
  31. ]
  32. for cy, cx in product(dense_cy, dense_cx):
  33. anchors += [cx, cy, s_kx, s_ky]
  34. # back to torch land
  35. output = torch.Tensor(anchors).view(-1, 4)
  36. if self.clip:
  37. output.clamp_(max=1, min=0)
  38. return output
  39. def py_cpu_nms(dets, thresh):
  40. """Pure Python NMS baseline."""
  41. keep = torchvision.ops.nms(
  42. boxes=torch.Tensor(dets[:, :4]),
  43. scores=torch.Tensor(dets[:, 4]),
  44. iou_threshold=thresh,
  45. )
  46. return list(keep)
  47. def point_form(boxes):
  48. """Convert prior_boxes to (xmin, ymin, xmax, ymax)
  49. representation for comparison to point form ground truth data.
  50. Args:
  51. boxes: (tensor) center-size default boxes from priorbox layers.
  52. Return:
  53. boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
  54. """
  55. return torch.cat(
  56. (
  57. boxes[:, :2] - boxes[:, 2:] / 2, # xmin, ymin
  58. boxes[:, :2] + boxes[:, 2:] / 2,
  59. ),
  60. 1,
  61. ) # xmax, ymax
  62. def center_size(boxes):
  63. """Convert prior_boxes to (cx, cy, w, h)
  64. representation for comparison to center-size form ground truth data.
  65. Args:
  66. boxes: (tensor) point_form boxes
  67. Return:
  68. boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
  69. """
  70. return torch.cat(
  71. (boxes[:, 2:] + boxes[:, :2]) / 2, boxes[:, 2:] - boxes[:, :2], 1 # cx, cy
  72. ) # w, h
  73. def intersect(box_a, box_b):
  74. """We resize both tensors to [A,B,2] without new malloc:
  75. [A,2] -> [A,1,2] -> [A,B,2]
  76. [B,2] -> [1,B,2] -> [A,B,2]
  77. Then we compute the area of intersect between box_a and box_b.
  78. Args:
  79. box_a: (tensor) bounding boxes, Shape: [A,4].
  80. box_b: (tensor) bounding boxes, Shape: [B,4].
  81. Return:
  82. (tensor) intersection area, Shape: [A,B].
  83. """
  84. A = box_a.size(0)
  85. B = box_b.size(0)
  86. max_xy = torch.min(
  87. box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
  88. box_b[:, 2:].unsqueeze(0).expand(A, B, 2),
  89. )
  90. min_xy = torch.max(
  91. box_a[:, :2].unsqueeze(1).expand(A, B, 2),
  92. box_b[:, :2].unsqueeze(0).expand(A, B, 2),
  93. )
  94. inter = torch.clamp((max_xy - min_xy), min=0)
  95. return inter[:, :, 0] * inter[:, :, 1]
  96. def jaccard(box_a, box_b):
  97. """Compute the jaccard overlap of two sets of boxes. The jaccard overlap
  98. is simply the intersection over union of two boxes. Here we operate on
  99. ground truth boxes and default boxes.
  100. E.g.:
  101. A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
  102. Args:
  103. box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
  104. box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
  105. Return:
  106. jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
  107. """
  108. inter = intersect(box_a, box_b)
  109. area_a = (
  110. ((box_a[:, 2] - box_a[:, 0]) * (box_a[:, 3] - box_a[:, 1]))
  111. .unsqueeze(1)
  112. .expand_as(inter)
  113. ) # [A,B]
  114. area_b = (
  115. ((box_b[:, 2] - box_b[:, 0]) * (box_b[:, 3] - box_b[:, 1]))
  116. .unsqueeze(0)
  117. .expand_as(inter)
  118. ) # [A,B]
  119. union = area_a + area_b - inter
  120. return inter / union # [A,B]
  121. def matrix_iou(a, b):
  122. """
  123. return iou of a and b, numpy version for data augenmentation
  124. """
  125. lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
  126. rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
  127. area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
  128. area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
  129. area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
  130. return area_i / (area_a[:, np.newaxis] + area_b - area_i)
  131. def matrix_iof(a, b):
  132. """
  133. return iof of a and b, numpy version for data augenmentation
  134. """
  135. lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
  136. rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
  137. area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
  138. area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
  139. return area_i / np.maximum(area_a[:, np.newaxis], 1)
  140. def match(
  141. threshold, truths, priors, variances, labels, landms, loc_t, conf_t, landm_t, idx
  142. ):
  143. """Match each prior box with the ground truth box of the highest jaccard
  144. overlap, encode the bounding boxes, then return the matched indices
  145. corresponding to both confidence and location preds.
  146. Args:
  147. threshold: (float) The overlap threshold used when matching boxes.
  148. truths: (tensor) Ground truth boxes, Shape: [num_obj, 4].
  149. priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4].
  150. variances: (tensor) Variances corresponding to each prior coord,
  151. Shape: [num_priors, 4].
  152. labels: (tensor) All the class labels for the image, Shape: [num_obj].
  153. landms: (tensor) Ground truth landms, Shape [num_obj, 10].
  154. loc_t: (tensor) Tensor to be filled w/ encoded location targets.
  155. conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds.
  156. landm_t: (tensor) Tensor to be filled w/ encoded landm targets.
  157. idx: (int) current batch index
  158. Return:
  159. The matched indices corresponding to 1)location 2)confidence
  160. 3)landm preds.
  161. """
  162. # jaccard index
  163. overlaps = jaccard(truths, point_form(priors))
  164. # (Bipartite Matching)
  165. # [1,num_objects] best prior for each ground truth
  166. best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True)
  167. # ignore hard gt
  168. valid_gt_idx = best_prior_overlap[:, 0] >= 0.2
  169. best_prior_idx_filter = best_prior_idx[valid_gt_idx, :]
  170. if best_prior_idx_filter.shape[0] <= 0:
  171. loc_t[idx] = 0
  172. conf_t[idx] = 0
  173. return
  174. # [1,num_priors] best ground truth for each prior
  175. best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=True)
  176. best_truth_idx.squeeze_(0)
  177. best_truth_overlap.squeeze_(0)
  178. best_prior_idx.squeeze_(1)
  179. best_prior_idx_filter.squeeze_(1)
  180. best_prior_overlap.squeeze_(1)
  181. best_truth_overlap.index_fill_(0, best_prior_idx_filter, 2) # ensure best prior
  182. # TODO refactor: index best_prior_idx with long tensor
  183. # ensure every gt matches with its prior of max overlap
  184. for j in range(best_prior_idx.size(0)): # 判别此anchor是预测哪一个boxes
  185. best_truth_idx[best_prior_idx[j]] = j
  186. matches = truths[best_truth_idx] # Shape: [num_priors,4] 此处为每一个anchor对应的bbox取出来
  187. conf = labels[best_truth_idx] # Shape: [num_priors] 此处为每一个anchor对应的label取出来
  188. conf[
  189. best_truth_overlap < threshold
  190. ] = 0 # label as background overlap<0.35的全部作为负样本
  191. loc = encode(matches, priors, variances)
  192. matches_landm = landms[best_truth_idx]
  193. landm = encode_landm(matches_landm, priors, variances)
  194. loc_t[idx] = loc # [num_priors,4] encoded offsets to learn
  195. conf_t[idx] = conf # [num_priors] top class label for each prior
  196. landm_t[idx] = landm
  197. def encode(matched, priors, variances):
  198. """Encode the variances from the priorbox layers into the ground truth boxes
  199. we have matched (based on jaccard overlap) with the prior boxes.
  200. Args:
  201. matched: (tensor) Coords of ground truth for each prior in point-form
  202. Shape: [num_priors, 4].
  203. priors: (tensor) Prior boxes in center-offset form
  204. Shape: [num_priors,4].
  205. variances: (list[float]) Variances of priorboxes
  206. Return:
  207. encoded boxes (tensor), Shape: [num_priors, 4]
  208. """
  209. # dist b/t match center and prior's center
  210. g_cxcy = (matched[:, :2] + matched[:, 2:]) / 2 - priors[:, :2]
  211. # encode variance
  212. g_cxcy /= variances[0] * priors[:, 2:]
  213. # match wh / prior wh
  214. g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
  215. g_wh = torch.log(g_wh) / variances[1]
  216. # return target for smooth_l1_loss
  217. return torch.cat([g_cxcy, g_wh], 1) # [num_priors,4]
  218. def encode_landm(matched, priors, variances):
  219. """Encode the variances from the priorbox layers into the ground truth boxes
  220. we have matched (based on jaccard overlap) with the prior boxes.
  221. Args:
  222. matched: (tensor) Coords of ground truth for each prior in point-form
  223. Shape: [num_priors, 10].
  224. priors: (tensor) Prior boxes in center-offset form
  225. Shape: [num_priors,4].
  226. variances: (list[float]) Variances of priorboxes
  227. Return:
  228. encoded landm (tensor), Shape: [num_priors, 10]
  229. """
  230. # dist b/t match center and prior's center
  231. matched = torch.reshape(matched, (matched.size(0), 5, 2))
  232. priors_cx = priors[:, 0].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
  233. priors_cy = priors[:, 1].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
  234. priors_w = priors[:, 2].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
  235. priors_h = priors[:, 3].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
  236. priors = torch.cat([priors_cx, priors_cy, priors_w, priors_h], dim=2)
  237. g_cxcy = matched[:, :, :2] - priors[:, :, :2]
  238. # encode variance
  239. g_cxcy /= variances[0] * priors[:, :, 2:]
  240. # g_cxcy /= priors[:, :, 2:]
  241. g_cxcy = g_cxcy.reshape(g_cxcy.size(0), -1)
  242. # return target for smooth_l1_loss
  243. return g_cxcy
  244. # Adapted from https://github.com/Hakuyume/chainer-ssd
  245. def decode(loc, priors, variances):
  246. """Decode locations from predictions using priors to undo
  247. the encoding we did for offset regression at train time.
  248. Args:
  249. loc (tensor): location predictions for loc layers,
  250. Shape: [num_priors,4]
  251. priors (tensor): Prior boxes in center-offset form.
  252. Shape: [num_priors,4].
  253. variances: (list[float]) Variances of priorboxes
  254. Return:
  255. decoded bounding box predictions
  256. """
  257. boxes = torch.cat(
  258. (
  259. priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
  260. priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1]),
  261. ),
  262. 1,
  263. )
  264. boxes[:, :2] -= boxes[:, 2:] / 2
  265. boxes[:, 2:] += boxes[:, :2]
  266. return boxes
  267. def decode_landm(pre, priors, variances):
  268. """Decode landm from predictions using priors to undo
  269. the encoding we did for offset regression at train time.
  270. Args:
  271. pre (tensor): landm predictions for loc layers,
  272. Shape: [num_priors,10]
  273. priors (tensor): Prior boxes in center-offset form.
  274. Shape: [num_priors,4].
  275. variances: (list[float]) Variances of priorboxes
  276. Return:
  277. decoded landm predictions
  278. """
  279. tmp = (
  280. priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:],
  281. priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:],
  282. priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:],
  283. priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:],
  284. priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:],
  285. )
  286. landms = torch.cat(tmp, dim=1)
  287. return landms
  288. def batched_decode(b_loc, priors, variances):
  289. """Decode locations from predictions using priors to undo
  290. the encoding we did for offset regression at train time.
  291. Args:
  292. b_loc (tensor): location predictions for loc layers,
  293. Shape: [num_batches,num_priors,4]
  294. priors (tensor): Prior boxes in center-offset form.
  295. Shape: [1,num_priors,4].
  296. variances: (list[float]) Variances of priorboxes
  297. Return:
  298. decoded bounding box predictions
  299. """
  300. boxes = (
  301. priors[:, :, :2] + b_loc[:, :, :2] * variances[0] * priors[:, :, 2:],
  302. priors[:, :, 2:] * torch.exp(b_loc[:, :, 2:] * variances[1]),
  303. )
  304. boxes = torch.cat(boxes, dim=2)
  305. boxes[:, :, :2] -= boxes[:, :, 2:] / 2
  306. boxes[:, :, 2:] += boxes[:, :, :2]
  307. return boxes
  308. def batched_decode_landm(pre, priors, variances):
  309. """Decode landm from predictions using priors to undo
  310. the encoding we did for offset regression at train time.
  311. Args:
  312. pre (tensor): landm predictions for loc layers,
  313. Shape: [num_batches,num_priors,10]
  314. priors (tensor): Prior boxes in center-offset form.
  315. Shape: [1,num_priors,4].
  316. variances: (list[float]) Variances of priorboxes
  317. Return:
  318. decoded landm predictions
  319. """
  320. landms = (
  321. priors[:, :, :2] + pre[:, :, :2] * variances[0] * priors[:, :, 2:],
  322. priors[:, :, :2] + pre[:, :, 2:4] * variances[0] * priors[:, :, 2:],
  323. priors[:, :, :2] + pre[:, :, 4:6] * variances[0] * priors[:, :, 2:],
  324. priors[:, :, :2] + pre[:, :, 6:8] * variances[0] * priors[:, :, 2:],
  325. priors[:, :, :2] + pre[:, :, 8:10] * variances[0] * priors[:, :, 2:],
  326. )
  327. landms = torch.cat(landms, dim=2)
  328. return landms
  329. def log_sum_exp(x):
  330. """Utility function for computing log_sum_exp while determining
  331. This will be used to determine unaveraged confidence loss across
  332. all examples in a batch.
  333. Args:
  334. x (Variable(tensor)): conf_preds from conf layers
  335. """
  336. x_max = x.data.max()
  337. return torch.log(torch.sum(torch.exp(x - x_max), 1, keepdim=True)) + x_max
  338. # Original author: Francisco Massa:
  339. # https://github.com/fmassa/object-detection.torch
  340. # Ported to PyTorch by Max deGroot (02/01/2017)
  341. def nms(boxes, scores, overlap=0.5, top_k=200):
  342. """Apply non-maximum suppression at test time to avoid detecting too many
  343. overlapping bounding boxes for a given object.
  344. Args:
  345. boxes: (tensor) The location preds for the img, Shape: [num_priors,4].
  346. scores: (tensor) The class predscores for the img, Shape:[num_priors].
  347. overlap: (float) The overlap thresh for suppressing unnecessary boxes.
  348. top_k: (int) The Maximum number of box preds to consider.
  349. Return:
  350. The indices of the kept boxes with respect to num_priors.
  351. """
  352. keep = torch.Tensor(scores.size(0)).fill_(0).long()
  353. if boxes.numel() == 0:
  354. return keep
  355. x1 = boxes[:, 0]
  356. y1 = boxes[:, 1]
  357. x2 = boxes[:, 2]
  358. y2 = boxes[:, 3]
  359. area = torch.mul(x2 - x1, y2 - y1)
  360. v, idx = scores.sort(0) # sort in ascending order
  361. # I = I[v >= 0.01]
  362. idx = idx[-top_k:] # indices of the top-k largest vals
  363. xx1 = boxes.new()
  364. yy1 = boxes.new()
  365. xx2 = boxes.new()
  366. yy2 = boxes.new()
  367. w = boxes.new()
  368. h = boxes.new()
  369. # keep = torch.Tensor()
  370. count = 0
  371. while idx.numel() > 0:
  372. i = idx[-1] # index of current largest val
  373. # keep.append(i)
  374. keep[count] = i
  375. count += 1
  376. if idx.size(0) == 1:
  377. break
  378. idx = idx[:-1] # remove kept element from view
  379. # load bboxes of next highest vals
  380. torch.index_select(x1, 0, idx, out=xx1)
  381. torch.index_select(y1, 0, idx, out=yy1)
  382. torch.index_select(x2, 0, idx, out=xx2)
  383. torch.index_select(y2, 0, idx, out=yy2)
  384. # store element-wise max with next highest score
  385. xx1 = torch.clamp(xx1, min=x1[i])
  386. yy1 = torch.clamp(yy1, min=y1[i])
  387. xx2 = torch.clamp(xx2, max=x2[i])
  388. yy2 = torch.clamp(yy2, max=y2[i])
  389. w.resize_as_(xx2)
  390. h.resize_as_(yy2)
  391. w = xx2 - xx1
  392. h = yy2 - yy1
  393. # check sizes of xx1 and xx2.. after each iteration
  394. w = torch.clamp(w, min=0.0)
  395. h = torch.clamp(h, min=0.0)
  396. inter = w * h
  397. # IoU = i / (area(a) + area(b) - i)
  398. rem_areas = torch.index_select(area, 0, idx) # load remaining areas)
  399. union = (rem_areas - inter) + area[i]
  400. IoU = inter / union # store result in iou
  401. # keep only elements with an IoU <= overlap
  402. idx = idx[IoU.le(overlap)]
  403. return keep, count