Shortcuts

Source code for common.vision.transforms.keypoint_detection

"""
@author: Junguang Jiang
@contact: JiangJunguang1123@outlook.com
"""
# TODO needs better documentation
import numpy as np
from PIL import ImageFilter, Image
import torchvision.transforms.functional as F
import torchvision.transforms.transforms as T
import numbers
import random
import math
import warnings
from typing import ClassVar


[docs]def wrapper(transform: ClassVar): """ Wrap a transform for classification to a transform for keypoint detection. Note that the keypoint detection label will keep the same before and after wrapper. Args: transform (class, callable): transform for classification Returns: transform for keypoint detection """ class WrapperTransform(transform): def __call__(self, image, **kwargs): image = super().__call__(image) return image, kwargs return WrapperTransform
ToTensor = wrapper(T.ToTensor) Normalize = wrapper(T.Normalize) ColorJitter = wrapper(T.ColorJitter) def resize(image: Image.Image, size: int, interpolation=Image.BILINEAR, keypoint2d: np.ndarray=None, intrinsic_matrix: np.ndarray=None): width, height = image.size assert width == height factor = float(size) / float(width) image = F.resize(image, size, interpolation) keypoint2d = np.copy(keypoint2d) keypoint2d *= factor intrinsic_matrix = np.copy(intrinsic_matrix) intrinsic_matrix[0][0] *= factor intrinsic_matrix[0][2] *= factor intrinsic_matrix[1][1] *= factor intrinsic_matrix[1][2] *= factor return image, keypoint2d, intrinsic_matrix def crop(image: Image.Image, top, left, height, width, keypoint2d: np.ndarray): image = F.crop(image, top, left, height, width) keypoint2d = np.copy(keypoint2d) keypoint2d[:, 0] -= left keypoint2d[:, 1] -= top return image, keypoint2d
[docs]def resized_crop(img, top, left, height, width, size, interpolation=Image.BILINEAR, keypoint2d: np.ndarray=None, intrinsic_matrix: np.ndarray=None): """Crop the given PIL Image and resize it to desired size. Notably used in :class:`~torchvision.transforms.RandomResizedCrop`. Args: img (PIL Image): Image to be cropped. (0,0) denotes the top left corner of the image. top (int): Vertical component of the top left corner of the crop box. left (int): Horizontal component of the top left corner of the crop box. height (int): Height of the crop box. width (int): Width of the crop box. size (sequence or int): Desired output size. Same semantics as ``resize``. interpolation (int, optional): Desired interpolation. Default is ``PIL.Image.BILINEAR``. Returns: PIL Image: Cropped image. """ assert isinstance(img, Image.Image), 'img should be PIL Image' img, keypoint2d = crop(img, top, left, height, width, keypoint2d) img, keypoint2d, intrinsic_matrix = resize(img, size, interpolation, keypoint2d, intrinsic_matrix) return img, keypoint2d, intrinsic_matrix
[docs]def center_crop(image, output_size, keypoint2d: np.ndarray): """Crop the given PIL Image and resize it to desired size. Args: img (PIL Image): Image to be cropped. (0,0) denotes the top left corner of the image. output_size (sequence or int): (height, width) of the crop box. If int, it is used for both directions Returns: PIL Image: Cropped image. """ width, height = image.size crop_height, crop_width = output_size crop_top = int(round((height - crop_height) / 2.)) crop_left = int(round((width - crop_width) / 2.)) return crop(image, crop_top, crop_left, crop_height, crop_width, keypoint2d)
def hflip(image: Image.Image, keypoint2d: np.ndarray): width, height = image.size image = F.hflip(image) keypoint2d = np.copy(keypoint2d) keypoint2d[:, 0] = width - 1. - keypoint2d[:, 0] return image, keypoint2d def rotate(image: Image.Image, angle, keypoint2d: np.ndarray): image = F.rotate(image, angle) angle = -np.deg2rad(angle) keypoint2d = np.copy(keypoint2d) rotation_matrix = np.array([ [np.cos(angle), -np.sin(angle)], [np.sin(angle), np.cos(angle)] ]) width, height = image.size keypoint2d[:, 0] = keypoint2d[:, 0] - width / 2 keypoint2d[:, 1] = keypoint2d[:, 1] - height / 2 keypoint2d = np.matmul(rotation_matrix, keypoint2d.T).T keypoint2d[:, 0] = keypoint2d[:, 0] + width / 2 keypoint2d[:, 1] = keypoint2d[:, 1] + height / 2 return image, keypoint2d def resize_pad(img, keypoint2d, size, interpolation=Image.BILINEAR): w, h = img.size if w < h: oh = size ow = int(size * w / h) img = img.resize((ow, oh), interpolation) pad_top = pad_bottom = 0 pad_left = math.floor((size - ow) / 2) pad_right = math.ceil((size - ow) / 2) keypoint2d = keypoint2d * oh / h keypoint2d[:, 0] += (size - ow) / 2 else: ow = size oh = int(size * h / w) img = img.resize((ow, oh), interpolation) pad_top = math.floor((size - oh) / 2) pad_bottom = math.ceil((size - oh) / 2) pad_left = pad_right = 0 keypoint2d = keypoint2d * ow / w keypoint2d[:, 1] += (size - oh) / 2 keypoint2d[:, 0] += (size - ow) / 2 img = np.asarray(img) img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right), (0, 0)), 'constant', constant_values=0) return Image.fromarray(img), keypoint2d
[docs]class Compose(object): """Composes several transforms together. Args: transforms (list of ``Transform`` objects): list of transforms to compose. """ def __init__(self, transforms): self.transforms = transforms def __call__(self, image, **kwargs): for t in self.transforms: image, kwargs = t(image, **kwargs) return image, kwargs
class GaussianBlur(object): def __init__(self, low=0, high=0.8): self.low = low self.high = high def __call__(self, image: Image, **kwargs): radius = np.random.uniform(low=self.low, high=self.high) image = image.filter(ImageFilter.GaussianBlur(radius)) return image, kwargs
[docs]class Resize(object): """Resize the input PIL Image to the given size. """ def __init__(self, size, interpolation=Image.BILINEAR): assert isinstance(size, int) self.size = size self.interpolation = interpolation def __call__(self, image, keypoint2d: np.ndarray, intrinsic_matrix: np.ndarray, **kwargs): image, keypoint2d, intrinsic_matrix = resize(image, self.size, self.interpolation, keypoint2d, intrinsic_matrix) kwargs.update(keypoint2d=keypoint2d, intrinsic_matrix=intrinsic_matrix) if 'depth' in kwargs: kwargs['depth'] = F.resize(kwargs['depth'], self.size) return image, kwargs
[docs]class ResizePad(object): """Pad the given image on all sides with the given "pad" value to resize the image to the given size. """ def __init__(self, size, interpolation=Image.BILINEAR): self.size = size self.interpolation = interpolation def __call__(self, img, keypoint2d, **kwargs): image, keypoint2d = resize_pad(img, keypoint2d, self.size, self.interpolation) kwargs.update(keypoint2d=keypoint2d) return image, kwargs
[docs]class CenterCrop(object): """Crops the given PIL Image at the center. """ def __init__(self, size): if isinstance(size, numbers.Number): self.size = (int(size), int(size)) else: self.size = size def __call__(self, image, keypoint2d, **kwargs): """ Args: img (PIL Image): Image to be cropped. Returns: PIL Image: Cropped image. """ image, keypoint2d = center_crop(image, self.size, keypoint2d) kwargs.update(keypoint2d=keypoint2d) if 'depth' in kwargs: kwargs['depth'] = F.center_crop(kwargs['depth'], self.size) return image, kwargs
[docs]class RandomRotation(object): """Rotate the image by angle. Args: degrees (sequence or float or int): Range of degrees to select from. If degrees is a number instead of sequence like (min, max), the range of degrees will be (-degrees, +degrees). """ def __init__(self, degrees): if isinstance(degrees, numbers.Number): if degrees < 0: raise ValueError("If degrees is a single number, it must be positive.") self.degrees = (-degrees, degrees) else: if len(degrees) != 2: raise ValueError("If degrees is a sequence, it must be of len 2.") self.degrees = degrees
[docs] @staticmethod def get_params(degrees): """Get parameters for ``rotate`` for a random rotation. Returns: sequence: params to be passed to ``rotate`` for random rotation. """ angle = random.uniform(degrees[0], degrees[1]) return angle
def __call__(self, image, keypoint2d, **kwargs): """ Args: img (PIL Image): Image to be rotated. Returns: PIL Image: Rotated image. """ angle = self.get_params(self.degrees) image, keypoint2d = rotate(image, angle, keypoint2d) kwargs.update(keypoint2d=keypoint2d) if 'depth' in kwargs: kwargs['depth'] = F.rotate(kwargs['depth'], angle) return image, kwargs
[docs]class RandomResizedCrop(object): """Crop the given PIL Image to random size and aspect ratio. A crop of random size (default: of 0.08 to 1.0) of the original size and a random aspect ratio (default: of 3/4 to 4/3) of the original aspect ratio is made. This crop is finally resized to given size. This is popularly used to train the Inception networks. Args: size: expected output size of each edge scale: range of size of the origin size cropped ratio: range of aspect ratio of the origin aspect ratio cropped interpolation: Default: PIL.Image.BILINEAR """ def __init__(self, size, scale=(0.6, 1.3), interpolation=Image.BILINEAR): self.size = size if scale[0] > scale[1]: warnings.warn("range should be of kind (min, max)") self.interpolation = interpolation self.scale = scale
[docs] @staticmethod def get_params(img, scale): """Get parameters for ``crop`` for a random sized crop. Args: img (PIL Image): Image to be cropped. scale (tuple): range of size of the origin size cropped Returns: tuple: params (i, j, h, w) to be passed to ``crop`` for a random sized crop. """ width, height = img.size area = height * width for attempt in range(10): target_area = random.uniform(*scale) * area aspect_ratio = 1 w = int(round(math.sqrt(target_area * aspect_ratio))) h = int(round(math.sqrt(target_area / aspect_ratio))) if 0 < w <= width and 0 < h <= height: i = random.randint(0, height - h) j = random.randint(0, width - w) return i, j, h, w # Fallback to whole image return 0, 0, height, width
def __call__(self, image, keypoint2d: np.ndarray, intrinsic_matrix: np.ndarray, **kwargs): """ Args: img (PIL Image): Image to be cropped and resized. Returns: PIL Image: Randomly cropped and resized image. """ i, j, h, w = self.get_params(image, self.scale) image, keypoint2d, intrinsic_matrix = resized_crop(image, i, j, h, w, self.size, self.interpolation, keypoint2d, intrinsic_matrix) kwargs.update(keypoint2d=keypoint2d, intrinsic_matrix=intrinsic_matrix) if 'depth' in kwargs: kwargs['depth'] = F.resized_crop(kwargs['depth'], i, j, h, w, self.size, self.interpolation,) return image, kwargs
[docs]class RandomApply(T.RandomTransforms): """Apply randomly a list of transformations with a given probability. Args: transforms (list or tuple or torch.nn.Module): list of transformations p (float): probability """ def __init__(self, transforms, p=0.5): super(RandomApply, self).__init__(transforms) self.p = p def __call__(self, image, **kwargs): if self.p < random.random(): return image, kwargs for t in self.transforms: image, kwargs = t(image, **kwargs) return image, kwargs

Docs

Access comprehensive documentation for Transfer Learning Library

View Docs

Tutorials

Get started for Transfer Learning Library

Get Started