Source code for common.vision.transforms
import math
import random
from PIL import Image
import numpy as np
import torch
from torchvision.transforms import Normalize
[docs]class ResizeImage(object):
"""Resize the input PIL Image to the given size.
Args:
size (sequence or int): Desired output size. If size is a sequence like
(h, w), output size will be matched to this. If size is an int,
output size will be (size, size)
"""
def __init__(self, size):
if isinstance(size, int):
self.size = (int(size), int(size))
else:
self.size = size
def __call__(self, img):
th, tw = self.size
return img.resize((th, tw))
def __repr__(self):
return self.__class__.__name__ + '(size={0})'.format(self.size)
[docs]class MultipleApply:
"""Apply a list of transformations to an image and get multiple transformed images.
Args:
transforms (list or tuple): list of transformations
Example:
>>> transform1 = T.Compose([
... ResizeImage(256),
... T.RandomCrop(224)
... ])
>>> transform2 = T.Compose([
... ResizeImage(256),
... T.RandomCrop(224),
... ])
>>> multiply_transform = MultipleApply([transform1, transform2])
"""
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, image):
return [t(image) for t in self.transforms]
def __repr__(self):
format_string = self.__class__.__name__ + '('
for t in self.transforms:
format_string += '\n'
format_string += ' {0}'.format(t)
format_string += '\n)'
return format_string
[docs]class Denormalize(Normalize):
"""DeNormalize a tensor image with mean and standard deviation.
Given mean: ``(mean[1],...,mean[n])`` and std: ``(std[1],..,std[n])`` for ``n``
channels, this transform will denormalize each channel of the input
``torch.*Tensor`` i.e.,
``output[channel] = input[channel] * std[channel] + mean[channel]``
.. note::
This transform acts out of place, i.e., it does not mutate the input tensor.
Args:
mean (sequence): Sequence of means for each channel.
std (sequence): Sequence of standard deviations for each channel.
"""
def __init__(self, mean, std):
mean = np.array(mean)
std = np.array(std)
super().__init__((-mean / std).tolist(), (1 / std).tolist())
[docs]class NormalizeAndTranspose:
"""
First, normalize a tensor image with mean and standard deviation.
Then, convert the shape (H x W x C) to shape (C x H x W).
"""
def __init__(self, mean=(104.00698793, 116.66876762, 122.67891434)):
self.mean = np.array(mean, dtype=np.float32)
def __call__(self, image):
if isinstance(image, Image.Image):
image = np.asarray(image, np.float32)
# change to BGR
image = image[:, :, ::-1]
# normalize
image -= self.mean
image = image.transpose((2, 0, 1)).copy()
elif isinstance(image, torch.Tensor):
# change to BGR
image = image[:, :, [2, 1, 0]]
# normalize
image -= torch.from_numpy(self.mean).to(image.device)
image = image.permute((2, 0, 1))
else:
raise NotImplementedError(type(image))
return image
[docs]class DeNormalizeAndTranspose:
"""
First, convert a tensor image from the shape (C x H x W ) to shape (H x W x C).
Then, denormalize it with mean and standard deviation.
"""
def __init__(self, mean=(104.00698793, 116.66876762, 122.67891434)):
self.mean = np.array(mean, dtype=np.float32)
def __call__(self, image):
image = image.transpose((1, 2, 0))
# denormalize
image += self.mean
# change to RGB
image = image[:, :, ::-1]
return image
[docs]class RandomErasing(object):
"""Random erasing augmentation from `Random Erasing Data Augmentation (CVPR 2017)
<https://arxiv.org/pdf/1708.04896.pdf>`_. This augmentation randomly selects a rectangle region in an image
and erases its pixels.
Args:
probability (float): The probability that the Random Erasing operation will be performed.
sl (float): Minimum proportion of erased area against input image.
sh (float): Maximum proportion of erased area against input image.
r1 (float): Minimum aspect ratio of erased area.
mean (sequence): Value to fill the erased area.
"""
def __init__(self, probability=0.5, sl=0.02, sh=0.4, r1=0.3, mean=(0.4914, 0.4822, 0.4465)):
self.probability = probability
self.mean = mean
self.sl = sl
self.sh = sh
self.r1 = r1
def __call__(self, img):
if random.uniform(0, 1) >= self.probability:
return img
for attempt in range(100):
area = img.size()[1] * img.size()[2]
target_area = random.uniform(self.sl, self.sh) * area
aspect_ratio = random.uniform(self.r1, 1 / self.r1)
h = int(round(math.sqrt(target_area * aspect_ratio)))
w = int(round(math.sqrt(target_area / aspect_ratio)))
if w < img.size()[2] and h < img.size()[1]:
x1 = random.randint(0, img.size()[1] - h)
y1 = random.randint(0, img.size()[2] - w)
if img.size()[0] == 3:
img[0, x1:x1 + h, y1:y1 + w] = self.mean[0]
img[1, x1:x1 + h, y1:y1 + w] = self.mean[1]
img[2, x1:x1 + h, y1:y1 + w] = self.mean[2]
else:
img[0, x1:x1 + h, y1:y1 + w] = self.mean[0]
return img
return img
def __repr__(self):
return self.__class__.__name__ + '(p={})'.format(self.probability)