| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452 |
- from itertools import product as product
- from math import ceil
- import numpy as np
- import torch
- import torchvision
- class PriorBox(object):
- def __init__(self, cfg, image_size=None, phase="train"):
- super(PriorBox, self).__init__()
- self.min_sizes = cfg["min_sizes"]
- self.steps = cfg["steps"]
- self.clip = cfg["clip"]
- self.image_size = image_size
- self.feature_maps = [
- [ceil(self.image_size[0] / step), ceil(self.image_size[1] / step)]
- for step in self.steps
- ]
- self.name = "s"
- def forward(self):
- anchors = []
- for k, f in enumerate(self.feature_maps):
- min_sizes = self.min_sizes[k]
- for i, j in product(range(f[0]), range(f[1])):
- for min_size in min_sizes:
- s_kx = min_size / self.image_size[1]
- s_ky = min_size / self.image_size[0]
- dense_cx = [
- x * self.steps[k] / self.image_size[1] for x in [j + 0.5]
- ]
- dense_cy = [
- y * self.steps[k] / self.image_size[0] for y in [i + 0.5]
- ]
- for cy, cx in product(dense_cy, dense_cx):
- anchors += [cx, cy, s_kx, s_ky]
- # back to torch land
- output = torch.Tensor(anchors).view(-1, 4)
- if self.clip:
- output.clamp_(max=1, min=0)
- return output
- def py_cpu_nms(dets, thresh):
- """Pure Python NMS baseline."""
- keep = torchvision.ops.nms(
- boxes=torch.Tensor(dets[:, :4]),
- scores=torch.Tensor(dets[:, 4]),
- iou_threshold=thresh,
- )
- return list(keep)
- def point_form(boxes):
- """Convert prior_boxes to (xmin, ymin, xmax, ymax)
- representation for comparison to point form ground truth data.
- Args:
- boxes: (tensor) center-size default boxes from priorbox layers.
- Return:
- boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
- """
- return torch.cat(
- (
- boxes[:, :2] - boxes[:, 2:] / 2, # xmin, ymin
- boxes[:, :2] + boxes[:, 2:] / 2,
- ),
- 1,
- ) # xmax, ymax
- def center_size(boxes):
- """Convert prior_boxes to (cx, cy, w, h)
- representation for comparison to center-size form ground truth data.
- Args:
- boxes: (tensor) point_form boxes
- Return:
- boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
- """
- return torch.cat(
- (boxes[:, 2:] + boxes[:, :2]) / 2, boxes[:, 2:] - boxes[:, :2], 1 # cx, cy
- ) # w, h
- def intersect(box_a, box_b):
- """We resize both tensors to [A,B,2] without new malloc:
- [A,2] -> [A,1,2] -> [A,B,2]
- [B,2] -> [1,B,2] -> [A,B,2]
- Then we compute the area of intersect between box_a and box_b.
- Args:
- box_a: (tensor) bounding boxes, Shape: [A,4].
- box_b: (tensor) bounding boxes, Shape: [B,4].
- Return:
- (tensor) intersection area, Shape: [A,B].
- """
- A = box_a.size(0)
- B = box_b.size(0)
- max_xy = torch.min(
- box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
- box_b[:, 2:].unsqueeze(0).expand(A, B, 2),
- )
- min_xy = torch.max(
- box_a[:, :2].unsqueeze(1).expand(A, B, 2),
- box_b[:, :2].unsqueeze(0).expand(A, B, 2),
- )
- inter = torch.clamp((max_xy - min_xy), min=0)
- return inter[:, :, 0] * inter[:, :, 1]
- def jaccard(box_a, box_b):
- """Compute the jaccard overlap of two sets of boxes. The jaccard overlap
- is simply the intersection over union of two boxes. Here we operate on
- ground truth boxes and default boxes.
- E.g.:
- A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
- Args:
- box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
- box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
- Return:
- jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
- """
- inter = intersect(box_a, box_b)
- area_a = (
- ((box_a[:, 2] - box_a[:, 0]) * (box_a[:, 3] - box_a[:, 1]))
- .unsqueeze(1)
- .expand_as(inter)
- ) # [A,B]
- area_b = (
- ((box_b[:, 2] - box_b[:, 0]) * (box_b[:, 3] - box_b[:, 1]))
- .unsqueeze(0)
- .expand_as(inter)
- ) # [A,B]
- union = area_a + area_b - inter
- return inter / union # [A,B]
- def matrix_iou(a, b):
- """
- return iou of a and b, numpy version for data augenmentation
- """
- lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
- rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
- area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
- area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
- area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
- return area_i / (area_a[:, np.newaxis] + area_b - area_i)
- def matrix_iof(a, b):
- """
- return iof of a and b, numpy version for data augenmentation
- """
- lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
- rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
- area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
- area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
- return area_i / np.maximum(area_a[:, np.newaxis], 1)
- def match(
- threshold, truths, priors, variances, labels, landms, loc_t, conf_t, landm_t, idx
- ):
- """Match each prior box with the ground truth box of the highest jaccard
- overlap, encode the bounding boxes, then return the matched indices
- corresponding to both confidence and location preds.
- Args:
- threshold: (float) The overlap threshold used when matching boxes.
- truths: (tensor) Ground truth boxes, Shape: [num_obj, 4].
- priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4].
- variances: (tensor) Variances corresponding to each prior coord,
- Shape: [num_priors, 4].
- labels: (tensor) All the class labels for the image, Shape: [num_obj].
- landms: (tensor) Ground truth landms, Shape [num_obj, 10].
- loc_t: (tensor) Tensor to be filled w/ encoded location targets.
- conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds.
- landm_t: (tensor) Tensor to be filled w/ encoded landm targets.
- idx: (int) current batch index
- Return:
- The matched indices corresponding to 1)location 2)confidence
- 3)landm preds.
- """
- # jaccard index
- overlaps = jaccard(truths, point_form(priors))
- # (Bipartite Matching)
- # [1,num_objects] best prior for each ground truth
- best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True)
- # ignore hard gt
- valid_gt_idx = best_prior_overlap[:, 0] >= 0.2
- best_prior_idx_filter = best_prior_idx[valid_gt_idx, :]
- if best_prior_idx_filter.shape[0] <= 0:
- loc_t[idx] = 0
- conf_t[idx] = 0
- return
- # [1,num_priors] best ground truth for each prior
- best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=True)
- best_truth_idx.squeeze_(0)
- best_truth_overlap.squeeze_(0)
- best_prior_idx.squeeze_(1)
- best_prior_idx_filter.squeeze_(1)
- best_prior_overlap.squeeze_(1)
- best_truth_overlap.index_fill_(0, best_prior_idx_filter, 2) # ensure best prior
- # TODO refactor: index best_prior_idx with long tensor
- # ensure every gt matches with its prior of max overlap
- for j in range(best_prior_idx.size(0)): # 判别此anchor是预测哪一个boxes
- best_truth_idx[best_prior_idx[j]] = j
- matches = truths[best_truth_idx] # Shape: [num_priors,4] 此处为每一个anchor对应的bbox取出来
- conf = labels[best_truth_idx] # Shape: [num_priors] 此处为每一个anchor对应的label取出来
- conf[
- best_truth_overlap < threshold
- ] = 0 # label as background overlap<0.35的全部作为负样本
- loc = encode(matches, priors, variances)
- matches_landm = landms[best_truth_idx]
- landm = encode_landm(matches_landm, priors, variances)
- loc_t[idx] = loc # [num_priors,4] encoded offsets to learn
- conf_t[idx] = conf # [num_priors] top class label for each prior
- landm_t[idx] = landm
- def encode(matched, priors, variances):
- """Encode the variances from the priorbox layers into the ground truth boxes
- we have matched (based on jaccard overlap) with the prior boxes.
- Args:
- matched: (tensor) Coords of ground truth for each prior in point-form
- Shape: [num_priors, 4].
- priors: (tensor) Prior boxes in center-offset form
- Shape: [num_priors,4].
- variances: (list[float]) Variances of priorboxes
- Return:
- encoded boxes (tensor), Shape: [num_priors, 4]
- """
- # dist b/t match center and prior's center
- g_cxcy = (matched[:, :2] + matched[:, 2:]) / 2 - priors[:, :2]
- # encode variance
- g_cxcy /= variances[0] * priors[:, 2:]
- # match wh / prior wh
- g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
- g_wh = torch.log(g_wh) / variances[1]
- # return target for smooth_l1_loss
- return torch.cat([g_cxcy, g_wh], 1) # [num_priors,4]
- def encode_landm(matched, priors, variances):
- """Encode the variances from the priorbox layers into the ground truth boxes
- we have matched (based on jaccard overlap) with the prior boxes.
- Args:
- matched: (tensor) Coords of ground truth for each prior in point-form
- Shape: [num_priors, 10].
- priors: (tensor) Prior boxes in center-offset form
- Shape: [num_priors,4].
- variances: (list[float]) Variances of priorboxes
- Return:
- encoded landm (tensor), Shape: [num_priors, 10]
- """
- # dist b/t match center and prior's center
- matched = torch.reshape(matched, (matched.size(0), 5, 2))
- priors_cx = priors[:, 0].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
- priors_cy = priors[:, 1].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
- priors_w = priors[:, 2].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
- priors_h = priors[:, 3].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
- priors = torch.cat([priors_cx, priors_cy, priors_w, priors_h], dim=2)
- g_cxcy = matched[:, :, :2] - priors[:, :, :2]
- # encode variance
- g_cxcy /= variances[0] * priors[:, :, 2:]
- # g_cxcy /= priors[:, :, 2:]
- g_cxcy = g_cxcy.reshape(g_cxcy.size(0), -1)
- # return target for smooth_l1_loss
- return g_cxcy
- # Adapted from https://github.com/Hakuyume/chainer-ssd
- def decode(loc, priors, variances):
- """Decode locations from predictions using priors to undo
- the encoding we did for offset regression at train time.
- Args:
- loc (tensor): location predictions for loc layers,
- Shape: [num_priors,4]
- priors (tensor): Prior boxes in center-offset form.
- Shape: [num_priors,4].
- variances: (list[float]) Variances of priorboxes
- Return:
- decoded bounding box predictions
- """
- boxes = torch.cat(
- (
- priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
- priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1]),
- ),
- 1,
- )
- boxes[:, :2] -= boxes[:, 2:] / 2
- boxes[:, 2:] += boxes[:, :2]
- return boxes
- def decode_landm(pre, priors, variances):
- """Decode landm from predictions using priors to undo
- the encoding we did for offset regression at train time.
- Args:
- pre (tensor): landm predictions for loc layers,
- Shape: [num_priors,10]
- priors (tensor): Prior boxes in center-offset form.
- Shape: [num_priors,4].
- variances: (list[float]) Variances of priorboxes
- Return:
- decoded landm predictions
- """
- tmp = (
- priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:],
- priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:],
- priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:],
- priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:],
- priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:],
- )
- landms = torch.cat(tmp, dim=1)
- return landms
- def batched_decode(b_loc, priors, variances):
- """Decode locations from predictions using priors to undo
- the encoding we did for offset regression at train time.
- Args:
- b_loc (tensor): location predictions for loc layers,
- Shape: [num_batches,num_priors,4]
- priors (tensor): Prior boxes in center-offset form.
- Shape: [1,num_priors,4].
- variances: (list[float]) Variances of priorboxes
- Return:
- decoded bounding box predictions
- """
- boxes = (
- priors[:, :, :2] + b_loc[:, :, :2] * variances[0] * priors[:, :, 2:],
- priors[:, :, 2:] * torch.exp(b_loc[:, :, 2:] * variances[1]),
- )
- boxes = torch.cat(boxes, dim=2)
- boxes[:, :, :2] -= boxes[:, :, 2:] / 2
- boxes[:, :, 2:] += boxes[:, :, :2]
- return boxes
- def batched_decode_landm(pre, priors, variances):
- """Decode landm from predictions using priors to undo
- the encoding we did for offset regression at train time.
- Args:
- pre (tensor): landm predictions for loc layers,
- Shape: [num_batches,num_priors,10]
- priors (tensor): Prior boxes in center-offset form.
- Shape: [1,num_priors,4].
- variances: (list[float]) Variances of priorboxes
- Return:
- decoded landm predictions
- """
- landms = (
- priors[:, :, :2] + pre[:, :, :2] * variances[0] * priors[:, :, 2:],
- priors[:, :, :2] + pre[:, :, 2:4] * variances[0] * priors[:, :, 2:],
- priors[:, :, :2] + pre[:, :, 4:6] * variances[0] * priors[:, :, 2:],
- priors[:, :, :2] + pre[:, :, 6:8] * variances[0] * priors[:, :, 2:],
- priors[:, :, :2] + pre[:, :, 8:10] * variances[0] * priors[:, :, 2:],
- )
- landms = torch.cat(landms, dim=2)
- return landms
- def log_sum_exp(x):
- """Utility function for computing log_sum_exp while determining
- This will be used to determine unaveraged confidence loss across
- all examples in a batch.
- Args:
- x (Variable(tensor)): conf_preds from conf layers
- """
- x_max = x.data.max()
- return torch.log(torch.sum(torch.exp(x - x_max), 1, keepdim=True)) + x_max
- # Original author: Francisco Massa:
- # https://github.com/fmassa/object-detection.torch
- # Ported to PyTorch by Max deGroot (02/01/2017)
- def nms(boxes, scores, overlap=0.5, top_k=200):
- """Apply non-maximum suppression at test time to avoid detecting too many
- overlapping bounding boxes for a given object.
- Args:
- boxes: (tensor) The location preds for the img, Shape: [num_priors,4].
- scores: (tensor) The class predscores for the img, Shape:[num_priors].
- overlap: (float) The overlap thresh for suppressing unnecessary boxes.
- top_k: (int) The Maximum number of box preds to consider.
- Return:
- The indices of the kept boxes with respect to num_priors.
- """
- keep = torch.Tensor(scores.size(0)).fill_(0).long()
- if boxes.numel() == 0:
- return keep
- x1 = boxes[:, 0]
- y1 = boxes[:, 1]
- x2 = boxes[:, 2]
- y2 = boxes[:, 3]
- area = torch.mul(x2 - x1, y2 - y1)
- v, idx = scores.sort(0) # sort in ascending order
- # I = I[v >= 0.01]
- idx = idx[-top_k:] # indices of the top-k largest vals
- xx1 = boxes.new()
- yy1 = boxes.new()
- xx2 = boxes.new()
- yy2 = boxes.new()
- w = boxes.new()
- h = boxes.new()
- # keep = torch.Tensor()
- count = 0
- while idx.numel() > 0:
- i = idx[-1] # index of current largest val
- # keep.append(i)
- keep[count] = i
- count += 1
- if idx.size(0) == 1:
- break
- idx = idx[:-1] # remove kept element from view
- # load bboxes of next highest vals
- torch.index_select(x1, 0, idx, out=xx1)
- torch.index_select(y1, 0, idx, out=yy1)
- torch.index_select(x2, 0, idx, out=xx2)
- torch.index_select(y2, 0, idx, out=yy2)
- # store element-wise max with next highest score
- xx1 = torch.clamp(xx1, min=x1[i])
- yy1 = torch.clamp(yy1, min=y1[i])
- xx2 = torch.clamp(xx2, max=x2[i])
- yy2 = torch.clamp(yy2, max=y2[i])
- w.resize_as_(xx2)
- h.resize_as_(yy2)
- w = xx2 - xx1
- h = yy2 - yy1
- # check sizes of xx1 and xx2.. after each iteration
- w = torch.clamp(w, min=0.0)
- h = torch.clamp(h, min=0.0)
- inter = w * h
- # IoU = i / (area(a) + area(b) - i)
- rem_areas = torch.index_select(area, 0, idx) # load remaining areas)
- union = (rem_areas - inter) + area[i]
- IoU = inter / union # store result in iou
- # keep only elements with an IoU <= overlap
- idx = idx[IoU.le(overlap)]
- return keep, count
|