Shortcuts

Source code for common.vision.transforms

import math
import random
from PIL import Image
import numpy as np
import torch
from torchvision.transforms import Normalize


[docs]class ResizeImage(object): """Resize the input PIL Image to the given size. Args: size (sequence or int): Desired output size. If size is a sequence like (h, w), output size will be matched to this. If size is an int, output size will be (size, size) """ def __init__(self, size): if isinstance(size, int): self.size = (int(size), int(size)) else: self.size = size def __call__(self, img): th, tw = self.size return img.resize((th, tw)) def __repr__(self): return self.__class__.__name__ + '(size={0})'.format(self.size)
[docs]class MultipleApply: """Apply a list of transformations to an image and get multiple transformed images. Args: transforms (list or tuple): list of transformations Example: >>> transform1 = T.Compose([ ... ResizeImage(256), ... T.RandomCrop(224) ... ]) >>> transform2 = T.Compose([ ... ResizeImage(256), ... T.RandomCrop(224), ... ]) >>> multiply_transform = MultipleApply([transform1, transform2]) """ def __init__(self, transforms): self.transforms = transforms def __call__(self, image): return [t(image) for t in self.transforms] def __repr__(self): format_string = self.__class__.__name__ + '(' for t in self.transforms: format_string += '\n' format_string += ' {0}'.format(t) format_string += '\n)' return format_string
[docs]class Denormalize(Normalize): """DeNormalize a tensor image with mean and standard deviation. Given mean: ``(mean[1],...,mean[n])`` and std: ``(std[1],..,std[n])`` for ``n`` channels, this transform will denormalize each channel of the input ``torch.*Tensor`` i.e., ``output[channel] = input[channel] * std[channel] + mean[channel]`` .. note:: This transform acts out of place, i.e., it does not mutate the input tensor. Args: mean (sequence): Sequence of means for each channel. std (sequence): Sequence of standard deviations for each channel. """ def __init__(self, mean, std): mean = np.array(mean) std = np.array(std) super().__init__((-mean / std).tolist(), (1 / std).tolist())
[docs]class NormalizeAndTranspose: """ First, normalize a tensor image with mean and standard deviation. Then, convert the shape (H x W x C) to shape (C x H x W). """ def __init__(self, mean=(104.00698793, 116.66876762, 122.67891434)): self.mean = np.array(mean, dtype=np.float32) def __call__(self, image): if isinstance(image, Image.Image): image = np.asarray(image, np.float32) # change to BGR image = image[:, :, ::-1] # normalize image -= self.mean image = image.transpose((2, 0, 1)).copy() elif isinstance(image, torch.Tensor): # change to BGR image = image[:, :, [2, 1, 0]] # normalize image -= torch.from_numpy(self.mean).to(image.device) image = image.permute((2, 0, 1)) else: raise NotImplementedError(type(image)) return image
[docs]class DeNormalizeAndTranspose: """ First, convert a tensor image from the shape (C x H x W ) to shape (H x W x C). Then, denormalize it with mean and standard deviation. """ def __init__(self, mean=(104.00698793, 116.66876762, 122.67891434)): self.mean = np.array(mean, dtype=np.float32) def __call__(self, image): image = image.transpose((1, 2, 0)) # denormalize image += self.mean # change to RGB image = image[:, :, ::-1] return image
[docs]class RandomErasing(object): """Random erasing augmentation from `Random Erasing Data Augmentation (CVPR 2017) <https://arxiv.org/pdf/1708.04896.pdf>`_. This augmentation randomly selects a rectangle region in an image and erases its pixels. Args: probability (float): The probability that the Random Erasing operation will be performed. sl (float): Minimum proportion of erased area against input image. sh (float): Maximum proportion of erased area against input image. r1 (float): Minimum aspect ratio of erased area. mean (sequence): Value to fill the erased area. """ def __init__(self, probability=0.5, sl=0.02, sh=0.4, r1=0.3, mean=(0.4914, 0.4822, 0.4465)): self.probability = probability self.mean = mean self.sl = sl self.sh = sh self.r1 = r1 def __call__(self, img): if random.uniform(0, 1) >= self.probability: return img for attempt in range(100): area = img.size()[1] * img.size()[2] target_area = random.uniform(self.sl, self.sh) * area aspect_ratio = random.uniform(self.r1, 1 / self.r1) h = int(round(math.sqrt(target_area * aspect_ratio))) w = int(round(math.sqrt(target_area / aspect_ratio))) if w < img.size()[2] and h < img.size()[1]: x1 = random.randint(0, img.size()[1] - h) y1 = random.randint(0, img.size()[2] - w) if img.size()[0] == 3: img[0, x1:x1 + h, y1:y1 + w] = self.mean[0] img[1, x1:x1 + h, y1:y1 + w] = self.mean[1] img[2, x1:x1 + h, y1:y1 + w] = self.mean[2] else: img[0, x1:x1 + h, y1:y1 + w] = self.mean[0] return img return img def __repr__(self): return self.__class__.__name__ + '(p={})'.format(self.probability)

Docs

Access comprehensive documentation for Transfer Learning Library

View Docs

Tutorials

Get started for Transfer Learning Library

Get Started