| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113 |
- # Copyright (c) Meta Platforms, Inc. and affiliates.
- # All rights reserved.
- # This source code is licensed under the license found in the
- # LICENSE file in the root directory of this source tree.
- from copy import deepcopy
- from typing import Tuple
- import numpy as np
- import torch
- from torch.nn import functional as F
- from torchvision.transforms.functional import resize # type: ignore
- from torchvision.transforms.functional import to_pil_image
- class ResizeLongestSide:
- """
- Resizes images to longest side 'target_length', as well as provides
- methods for resizing coordinates and boxes. Provides methods for
- transforming both numpy array and batched torch tensors.
- """
- def __init__(self, target_length: int) -> None:
- self.target_length = target_length
- def apply_image(self, image: np.ndarray) -> np.ndarray:
- """
- Expects a numpy array with shape HxWxC in uint8 format.
- """
- target_size = self.get_preprocess_shape(
- image.shape[0], image.shape[1], self.target_length
- )
- return np.array(resize(to_pil_image(image), target_size))
- def apply_coords(
- self, coords: np.ndarray, original_size: Tuple[int, ...]
- ) -> np.ndarray:
- """
- Expects a numpy array of length 2 in the final dimension. Requires the
- original image size in (H, W) format.
- """
- old_h, old_w = original_size
- new_h, new_w = self.get_preprocess_shape(
- original_size[0], original_size[1], self.target_length
- )
- coords = deepcopy(coords).astype(float)
- coords[..., 0] = coords[..., 0] * (new_w / old_w)
- coords[..., 1] = coords[..., 1] * (new_h / old_h)
- return coords
- def apply_boxes(
- self, boxes: np.ndarray, original_size: Tuple[int, ...]
- ) -> np.ndarray:
- """
- Expects a numpy array shape Bx4. Requires the original image size
- in (H, W) format.
- """
- boxes = self.apply_coords(boxes.reshape(-1, 2, 2), original_size)
- return boxes.reshape(-1, 4)
- def apply_image_torch(self, image: torch.Tensor) -> torch.Tensor:
- """
- Expects batched images with shape BxCxHxW and float format. This
- transformation may not exactly match apply_image. apply_image is
- the transformation expected by the model.
- """
- # Expects an image in BCHW format. May not exactly match apply_image.
- target_size = self.get_preprocess_shape(
- image.shape[0], image.shape[1], self.target_length
- )
- return F.interpolate(
- image, target_size, mode="bilinear", align_corners=False, antialias=True
- )
- def apply_coords_torch(
- self, coords: torch.Tensor, original_size: Tuple[int, ...]
- ) -> torch.Tensor:
- """
- Expects a torch tensor with length 2 in the last dimension. Requires the
- original image size in (H, W) format.
- """
- old_h, old_w = original_size
- new_h, new_w = self.get_preprocess_shape(
- original_size[0], original_size[1], self.target_length
- )
- coords = deepcopy(coords).to(torch.float)
- coords[..., 0] = coords[..., 0] * (new_w / old_w)
- coords[..., 1] = coords[..., 1] * (new_h / old_h)
- return coords
- def apply_boxes_torch(
- self, boxes: torch.Tensor, original_size: Tuple[int, ...]
- ) -> torch.Tensor:
- """
- Expects a torch tensor with shape Bx4. Requires the original image
- size in (H, W) format.
- """
- boxes = self.apply_coords_torch(boxes.reshape(-1, 2, 2), original_size)
- return boxes.reshape(-1, 4)
- @staticmethod
- def get_preprocess_shape(
- oldh: int, oldw: int, long_side_length: int
- ) -> Tuple[int, int]:
- """
- Compute the output size given input size and target long side length.
- """
- scale = long_side_length * 1.0 / max(oldh, oldw)
- newh, neww = oldh * scale, oldw * scale
- neww = int(neww + 0.5)
- newh = int(newh + 0.5)
- return (newh, neww)
|