CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-torchvision

Computer vision library for PyTorch with datasets, model architectures, and image/video transforms.

Overview
Eval results
Files

transforms.mddocs/

Transforms

TorchVision provides comprehensive image and video preprocessing and augmentation capabilities. The transforms module includes both v1 (traditional PIL/tensor) and v2 (multi-tensor) APIs, functional implementations, and preset transform pipelines for common use cases.

Capabilities

Core Transform Classes

Container Transforms

Transforms that compose and apply multiple transformations.

class Compose:
    """
    Composes several transforms together.
    
    Args:
        transforms (list): List of transforms to compose
    """
    def __init__(self, transforms: list): ...
    def __call__(self, img): ...

class RandomApply:
    """
    Apply list of transforms randomly with probability p.
    
    Args:
        transforms (list): List of transforms to apply
        p (float): Probability of applying transforms
    """
    def __init__(self, transforms: list, p: float = 0.5): ...

class RandomChoice:
    """
    Apply single random transform from list.
    
    Args:
        transforms (list): List of transforms to choose from
    """
    def __init__(self, transforms: list): ...

class RandomOrder:
    """
    Apply transforms in random order.
    
    Args:
        transforms (list): List of transforms to apply in random order
    """
    def __init__(self, transforms: list): ...

Type Conversion Transforms

Transforms for converting between different data types and formats.

class ToTensor:
    """
    Convert PIL Image or numpy array to tensor.
    Converts PIL Image or numpy.ndarray (H x W x C) in range [0, 255]
    to torch.FloatTensor of shape (C x H x W) in range [0.0, 1.0].
    """
    def __call__(self, pic): ...

class PILToTensor:
    """
    Convert PIL Image to tensor without scaling.
    Converts PIL Image to torch.Tensor without scaling values.
    """
    def __call__(self, pic): ...

class ToPILImage:
    """
    Convert tensor or ndarray to PIL Image.
    
    Args:
        mode (str, optional): Color mode for output image
    """
    def __init__(self, mode=None): ...

class ConvertImageDtype:
    """
    Convert tensor image to given dtype.
    
    Args:
        dtype (torch.dtype): Desired data type
    """
    def __init__(self, dtype: torch.dtype): ...

Geometric Transforms

Spatial transformations for resizing, cropping, and geometric augmentation.

class Resize:
    """
    Resize input to given size.
    
    Args:
        size (int or tuple): Desired output size
        interpolation (InterpolationMode): Interpolation method
        max_size (int, optional): Maximum size for aspect ratio preservation
        antialias (bool, optional): Apply antialiasing
    """
    def __init__(self, size, interpolation=InterpolationMode.BILINEAR, max_size=None, antialias=None): ...

class CenterCrop:
    """
    Crop image at center.
    
    Args:
        size (int or tuple): Desired output size
    """
    def __init__(self, size): ...

class RandomCrop:
    """
    Crop image at random location.
    
    Args:
        size (int or tuple): Desired output size
        padding (int or tuple, optional): Padding on each border
        pad_if_needed (bool): Pad if image smaller than crop size
        fill (number or tuple): Fill value for padding
        padding_mode (str): Padding mode ('constant', 'edge', 'reflect', 'symmetric')
    """
    def __init__(self, size, padding=None, pad_if_needed: bool = False, fill: int = 0, padding_mode: str = 'constant'): ...

class RandomResizedCrop:
    """
    Random crop with resize to target size.
    
    Args:
        size (int or tuple): Expected output size
        scale (tuple): Range of size of the origin size cropped
        ratio (tuple): Range of aspect ratio of the origin aspect ratio cropped
        interpolation (InterpolationMode): Interpolation method
        antialias (bool, optional): Apply antialiasing
    """
    def __init__(self, size, scale: tuple = (0.08, 1.0), ratio: tuple = (3./4., 4./3.), interpolation=InterpolationMode.BILINEAR, antialias=None): ...

class FiveCrop:
    """
    Crop image into four corners and center.
    
    Args:
        size (int or tuple): Desired output size
    """
    def __init__(self, size): ...

class TenCrop:
    """
    Create 10 crops: 5 crops + horizontally flipped versions.
    
    Args:
        size (int or tuple): Desired output size
        vertical_flip (bool): Use vertical flip instead of horizontal
    """
    def __init__(self, size, vertical_flip: bool = False): ...

class Pad:
    """
    Pad image on all sides with given pad value.
    
    Args:
        padding (int or tuple): Padding on each border
        fill (number or tuple): Fill value for constant fill
        padding_mode (str): Padding mode
    """
    def __init__(self, padding, fill: int = 0, padding_mode: str = 'constant'): ...

class RandomHorizontalFlip:
    """
    Randomly flip image horizontally with probability p.
    
    Args:
        p (float): Probability of flip
    """
    def __init__(self, p: float = 0.5): ...

class RandomVerticalFlip:
    """
    Randomly flip image vertically with probability p.
    
    Args:
        p (float): Probability of flip
    """
    def __init__(self, p: float = 0.5): ...

class RandomRotation:
    """
    Rotate image by random angle.
    
    Args:
        degrees (number or tuple): Range of degrees to select from
        interpolation (InterpolationMode): Interpolation method
        expand (bool): Expand output to fit rotated image
        center (tuple, optional): Center of rotation
        fill (number or tuple): Fill value for area outside rotated image
    """
    def __init__(self, degrees, interpolation=InterpolationMode.NEAREST, expand: bool = False, center=None, fill: int = 0): ...

class RandomAffine:
    """
    Random affine transformation.
    
    Args:
        degrees (number or tuple): Range of degrees for rotation
        translate (tuple, optional): Range of translations
        scale (tuple, optional): Range of scale factors
        shear (number or tuple, optional): Range of shear angles
        interpolation (InterpolationMode): Interpolation method
        fill (number or tuple): Fill value
        center (tuple, optional): Center point for transformations
    """
    def __init__(self, degrees, translate=None, scale=None, shear=None, interpolation=InterpolationMode.NEAREST, fill: int = 0, center=None): ...

class RandomPerspective:
    """
    Random perspective transformation.
    
    Args:
        distortion_scale (float): Argument to control degree of distortion
        p (float): Probability of applying transformation
        interpolation (InterpolationMode): Interpolation method
        fill (number or tuple): Fill value
    """
    def __init__(self, distortion_scale: float = 0.5, p: float = 0.5, interpolation=InterpolationMode.BILINEAR, fill: int = 0): ...

class ElasticTransform:
    """
    Random elastic transformation.
    
    Args:
        alpha (float or tuple): Magnitude of displacement
        sigma (float or tuple): Standard deviation of Gaussian kernel
        interpolation (InterpolationMode): Interpolation method
        fill (number or tuple): Fill value
    """
    def __init__(self, alpha: float = 50.0, sigma: float = 5.0, interpolation=InterpolationMode.BILINEAR, fill: int = 0): ...

Color Transforms

Photometric transformations for color manipulation and augmentation.

class ColorJitter:
    """
    Randomly change brightness, contrast, saturation, and hue.
    
    Args:
        brightness (float or tuple): How much to jitter brightness
        contrast (float or tuple): How much to jitter contrast
        saturation (float or tuple): How much to jitter saturation
        hue (float or tuple): How much to jitter hue
    """
    def __init__(self, brightness: float = 0, contrast: float = 0, saturation: float = 0, hue: float = 0): ...

class Grayscale:
    """
    Convert image to grayscale.
    
    Args:
        num_output_channels (int): Number of channels for output (1 or 3)
    """
    def __init__(self, num_output_channels: int = 1): ...

class RandomGrayscale:
    """
    Randomly convert image to grayscale with probability p.
    
    Args:
        p (float): Probability of conversion to grayscale
    """
    def __init__(self, p: float = 0.1): ...

class GaussianBlur:
    """
    Apply Gaussian blur to image.
    
    Args:
        kernel_size (int or tuple): Size of Gaussian kernel
        sigma (float or tuple): Standard deviation for Gaussian kernel
    """
    def __init__(self, kernel_size, sigma: tuple = (0.1, 2.0)): ...

class RandomInvert:
    """
    Randomly invert colors of image with probability p.
    
    Args:
        p (float): Probability of inversion
    """
    def __init__(self, p: float = 0.5): ...

class RandomPosterize:
    """
    Randomly posterize image with probability p.
    
    Args:
        bits (int): Number of bits to keep for each channel
        p (float): Probability of posterization
    """
    def __init__(self, bits: int, p: float = 0.5): ...

class RandomSolarize:
    """
    Randomly solarize image with probability p.
    
    Args:
        threshold (float): Threshold above which pixels are inverted
        p (float): Probability of solarization
    """
    def __init__(self, threshold: float, p: float = 0.5): ...

class RandomAdjustSharpness:
    """
    Randomly adjust sharpness with probability p.
    
    Args:
        sharpness_factor (float): Sharpness adjustment factor
        p (float): Probability of adjustment
    """
    def __init__(self, sharpness_factor: float, p: float = 0.5): ...

class RandomAutocontrast:
    """
    Randomly apply autocontrast with probability p.
    
    Args:
        p (float): Probability of applying autocontrast
    """
    def __init__(self, p: float = 0.5): ...

class RandomEqualize:
    """
    Randomly equalize histogram with probability p.
    
    Args:
        p (float): Probability of equalization
    """
    def __init__(self, p: float = 0.5): ...

Normalization and Utility Transforms

Statistical normalization and utility transformations.

class Normalize:
    """
    Normalize tensor with mean and standard deviation.
    
    Args:
        mean (sequence): Sequence of means for each channel
        std (sequence): Sequence of standard deviations for each channel
        inplace (bool): Make operation in-place
    """
    def __init__(self, mean: list, std: list, inplace: bool = False): ...

class Lambda:
    """
    Apply user-defined lambda function.
    
    Args:
        lambd (function): Lambda/function to be used for transform
    """
    def __init__(self, lambd): ...

class LinearTransformation:
    """
    Apply linear transformation using transformation matrix and mean vector.
    
    Args:
        transformation_matrix (Tensor): Transformation matrix
        mean_vector (Tensor): Mean vector
    """
    def __init__(self, transformation_matrix: torch.Tensor, mean_vector: torch.Tensor): ...

Auto-Augmentation Transforms

Automated augmentation policies for improved model robustness.

class AutoAugment:
    """
    AutoAugment data augmentation policy.
    
    Args:
        policy (AutoAugmentPolicy): AutoAugment policy to use
        interpolation (InterpolationMode): Interpolation method
        fill (sequence or number): Pixel fill value
    """
    def __init__(self, policy=AutoAugmentPolicy.IMAGENET, interpolation=InterpolationMode.NEAREST, fill=None): ...

class RandAugment:
    """
    RandAugment data augmentation.
    
    Args:
        num_ops (int): Number of augmentation transformations to apply
        magnitude (int): Magnitude for all transformations
        num_magnitude_bins (int): Number of magnitude bins
        interpolation (InterpolationMode): Interpolation method
        fill (sequence or number): Pixel fill value
    """
    def __init__(self, num_ops: int = 2, magnitude: int = 9, num_magnitude_bins: int = 31, interpolation=InterpolationMode.NEAREST, fill=None): ...

class TrivialAugmentWide:
    """
    TrivialAugment Wide augmentation policy.
    
    Args:
        num_magnitude_bins (int): Number of magnitude bins
        interpolation (InterpolationMode): Interpolation method
        fill (sequence or number): Pixel fill value
    """
    def __init__(self, num_magnitude_bins: int = 31, interpolation=InterpolationMode.NEAREST, fill=None): ...

class AugMix:
    """
    AugMix data augmentation.
    
    Args:
        severity (int): Severity level for base augmentations
        mixture_width (int): Number of augmentation chains
        chain_depth (int): Depth of augmentation chains
        alpha (float): Parameter for Beta distribution
        all_ops (bool): Use all available operations
        interpolation (InterpolationMode): Interpolation method
        fill (sequence or number): Pixel fill value
    """
    def __init__(self, severity: int = 3, mixture_width: int = 3, chain_depth: int = -1, alpha: float = 1.0, all_ops: bool = True, interpolation=InterpolationMode.BILINEAR, fill=None): ...

class AutoAugmentPolicy:
    """AutoAugment policy constants."""
    IMAGENET: str = "imagenet"
    CIFAR10: str = "cifar10" 
    SVHN: str = "svhn"

Preset Transform Pipelines

Pre-configured transform pipelines for common tasks.

class ImageClassification:
    """
    Standard preprocessing for image classification.
    
    Args:
        crop_size (int): Size for center crop
        resize_size (int): Size for resize operation
        mean (tuple): Normalization mean
        std (tuple): Normalization standard deviation
        interpolation (InterpolationMode): Interpolation method
    """
    def __init__(self, crop_size: int, resize_size: int = 256, mean: tuple = (0.485, 0.456, 0.406), std: tuple = (0.229, 0.224, 0.225), interpolation=InterpolationMode.BILINEAR): ...

class ObjectDetection:
    """Standard preprocessing for object detection."""
    def __init__(self): ...

class SemanticSegmentation:
    """Standard preprocessing for semantic segmentation."""
    def __init__(self): ...

class VideoClassification:
    """
    Standard preprocessing for video classification.
    
    Args:
        crop_size (tuple): Size for crop
        resize_size (tuple): Size for resize
        mean (tuple): Normalization mean
        std (tuple): Normalization standard deviation
    """
    def __init__(self, crop_size: tuple = (224, 224), resize_size: tuple = (256, 256), mean: tuple = (0.43216, 0.394666, 0.37645), std: tuple = (0.22803, 0.22145, 0.216989)): ...

class OpticalFlow:
    """Standard preprocessing for optical flow."""
    def __init__(self): ...

Functional API

Low-level functional implementations of transforms.

# Interpolation modes for transforms
class InterpolationMode:
    NEAREST = "nearest"
    NEAREST_EXACT = "nearest-exact"
    BILINEAR = "bilinear"
    BICUBIC = "bicubic"
    BOX = "box"
    HAMMING = "hamming"
    LANCZOS = "lanczos"

# Geometric functions
def resize(img, size: list, interpolation=InterpolationMode.BILINEAR, max_size=None, antialias=None):
    """Resize image to given size."""

def center_crop(img, output_size: list):
    """Center crop image to output size."""

def crop(img, top: int, left: int, height: int, width: int):
    """Crop image at specified location."""

def pad(img, padding, fill: int = 0, padding_mode: str = 'constant'):
    """Pad image on all sides."""

def hflip(img):
    """Horizontally flip image."""

def vflip(img):
    """Vertically flip image."""

def rotate(img, angle: float, interpolation=InterpolationMode.NEAREST, expand: bool = False, center=None, fill: int = 0):
    """Rotate image by angle."""

def affine(img, angle: float, translate: list, scale: float, shear: list, interpolation=InterpolationMode.NEAREST, fill: int = 0, center=None):
    """Apply affine transformation."""

def perspective(img, startpoints: list, endpoints: list, interpolation=InterpolationMode.BILINEAR, fill: int = 0):
    """Apply perspective transformation."""

def five_crop(img, size: list):
    """Create five crops of image."""

def ten_crop(img, size: list, vertical_flip: bool = False):
    """Create ten crops of image."""

# Color functions
def adjust_brightness(img, brightness_factor: float):
    """Adjust brightness of image."""

def adjust_contrast(img, contrast_factor: float):
    """Adjust contrast of image."""

def adjust_saturation(img, saturation_factor: float):
    """Adjust saturation of image."""

def adjust_hue(img, hue_factor: float):
    """Adjust hue of image."""

def adjust_gamma(img, gamma: float, gain: float = 1):
    """Adjust gamma of image."""

def adjust_sharpness(img, sharpness_factor: float):
    """Adjust sharpness of image."""

def rgb_to_grayscale(img, num_output_channels: int = 1):
    """Convert RGB image to grayscale."""

def to_grayscale(img, num_output_channels: int = 1):
    """Convert image to grayscale."""

def gaussian_blur(img, kernel_size: list, sigma=None):
    """Apply Gaussian blur to image."""

def invert(img):
    """Invert colors of image."""

def posterize(img, bits: int):
    """Posterize image."""

def solarize(img, threshold: float):
    """Solarize image."""

def autocontrast(img):
    """Apply autocontrast to image."""

def equalize(img):
    """Equalize histogram of image."""

# Conversion functions
def to_tensor(pic):
    """Convert PIL Image or numpy array to tensor."""

def to_pil_image(pic, mode=None):
    """Convert tensor to PIL Image."""

def pil_to_tensor(pic):
    """Convert PIL Image to tensor without scaling."""

def convert_image_dtype(image, dtype: torch.dtype):
    """Convert image tensor dtype."""

def normalize(tensor, mean: list, std: list, inplace: bool = False):
    """Normalize tensor with mean and std."""

# Utility functions
def get_image_size(img):
    """Get image size as (height, width)."""

def get_image_num_channels(img):
    """Get number of channels in image."""

v2 Transforms API

Enhanced transforms API with multi-tensor support for images, videos, bounding boxes, and masks.

class Transform:
    """Base class for all v2 transforms."""

# Type conversion v2
class ToImage:
    """Convert to image tensor."""

class ToPILImage:
    """Convert to PIL Image with v2 support."""

class PILToTensor:
    """Convert PIL to tensor with v2 support."""

class ToPureTensor:
    """Convert to pure tensor."""

class ToDtype:
    """
    Convert to specified dtype.
    
    Args:
        dtype (torch.dtype): Target dtype
        scale (bool): Scale values when converting
    """
    def __init__(self, dtype: torch.dtype, scale: bool = False): ...

# Container transforms v2
class Compose:
    """Compose transforms with multi-tensor support."""

class RandomApply:
    """Apply transforms randomly with multi-tensor support."""

class RandomChoice:
    """Choose random transform with multi-tensor support."""

class RandomOrder:
    """Apply in random order with multi-tensor support."""

# Enhanced geometric transforms
class Resize:
    """Resize with multi-tensor support including bounding boxes."""

class CenterCrop:
    """Center crop with bounding box support."""

class RandomCrop:
    """Random crop with mask and bounding box support."""

class RandomResizedCrop:
    """Random resized crop with multi-tensor support."""

class RandomHorizontalFlip:
    """Horizontal flip with bounding box support."""

class RandomVerticalFlip:
    """Vertical flip with bounding box support."""

class RandomRotation:
    """Rotation with bounding box support."""

class RandomAffine:
    """Affine transformation with bounding box support."""

class RandomPerspective:
    """Perspective transformation with v2 support."""

class ElasticTransform:
    """Elastic transformation with v2 support."""

class RandomIoUCrop:
    """
    IoU-aware random crop for object detection.
    
    Args:
        min_scale (float): Minimum scale for cropping
        max_scale (float): Maximum scale for cropping
        min_aspect_ratio (float): Minimum aspect ratio
        max_aspect_ratio (float): Maximum aspect ratio
        sampler_options (list): List of sampling options
        trials (int): Number of trials for finding valid crop
    """
    def __init__(self, min_scale: float = 0.3, max_scale: float = 1.0, min_aspect_ratio: float = 0.5, max_aspect_ratio: float = 2.0, sampler_options=None, trials: int = 40): ...

class RandomZoomOut:
    """
    Random zoom out transformation.
    
    Args:
        fill (number or tuple): Fill value for expanded area
        side_range (tuple): Range for zoom out factor
        p (float): Probability of applying zoom out
    """
    def __init__(self, fill: int = 0, side_range: tuple = (1.0, 4.0), p: float = 0.5): ...

class RandomShortestSize:
    """
    Random shortest size resize.
    
    Args:
        min_size (int or list): Minimum size for shortest edge
        max_size (int, optional): Maximum size for longest edge
        interpolation (InterpolationMode): Interpolation method
    """
    def __init__(self, min_size, max_size=None, interpolation=InterpolationMode.BILINEAR): ...

class RandomResize:
    """
    Random resize within range.
    
    Args:
        min_size (int): Minimum size
        max_size (int): Maximum size
        interpolation (InterpolationMode): Interpolation method
    """
    def __init__(self, min_size: int, max_size: int, interpolation=InterpolationMode.BILINEAR): ...

class ScaleJitter:
    """
    Scale jittering transform.
    
    Args:
        target_size (tuple): Target size
        scale_range (tuple): Range for scale jittering
        interpolation (InterpolationMode): Interpolation method
    """
    def __init__(self, target_size: tuple, scale_range: tuple = (0.1, 2.0), interpolation=InterpolationMode.BILINEAR): ...

# Enhanced color transforms v2
class ColorJitter:
    """Color jittering with v2 support."""

class RandomChannelPermutation:
    """Randomly permute image channels."""

class RandomPhotometricDistort:
    """
    Photometric distortion for data augmentation.
    
    Args:
        brightness (tuple): Range for brightness adjustment
        contrast (tuple): Range for contrast adjustment
        saturation (tuple): Range for saturation adjustment
        hue (tuple): Range for hue adjustment
        p (float): Probability of applying distortion
    """
    def __init__(self, brightness: tuple = (0.875, 1.125), contrast: tuple = (0.5, 1.5), saturation: tuple = (0.5, 1.5), hue: tuple = (-0.05, 0.05), p: float = 0.5): ...

class RGB:
    """Ensure RGB format."""

class GaussianNoise:
    """
    Add Gaussian noise to image.
    
    Args:
        mean (float): Mean of Gaussian noise
        sigma (float or tuple): Standard deviation of noise
    """
    def __init__(self, mean: float = 0.0, sigma: tuple = (0.1, 2.0)): ...

# Augmentation transforms v2
class MixUp:
    """
    MixUp data augmentation.
    
    Args:
        alpha (float): Parameter for Beta distribution
        num_classes (int): Number of classes
        labels_getter (callable): Function to get labels
    """
    def __init__(self, alpha: float = 1.0, num_classes: int = None, labels_getter=None): ...

class CutMix:
    """
    CutMix data augmentation.
    
    Args:
        alpha (float): Parameter for Beta distribution
        num_classes (int): Number of classes
        labels_getter (callable): Function to get labels
    """
    def __init__(self, alpha: float = 1.0, num_classes: int = None, labels_getter=None): ...

class RandomErasing:
    """
    Random erasing data augmentation.
    
    Args:
        p (float): Probability of applying random erasing
        scale (tuple): Range of proportion of erased area
        ratio (tuple): Range of aspect ratio of erased area
        value (number or str): Erasing value
        inplace (bool): Make operation in-place
    """
    def __init__(self, p: float = 0.5, scale: tuple = (0.02, 0.33), ratio: tuple = (0.3, 3.3), value: int = 0, inplace: bool = False): ...

class JPEG:
    """
    JPEG compression simulation.
    
    Args:
        quality (tuple or int): JPEG quality range
    """
    def __init__(self, quality: tuple = (25, 100)): ...

# Metadata transforms v2
class ClampBoundingBoxes:
    """Clamp bounding boxes to image bounds."""

class ClampKeyPoints:
    """Clamp keypoints to image bounds."""

class ConvertBoundingBoxFormat:
    """
    Convert bounding box format.
    
    Args:
        format (BoundingBoxFormat): Target format
    """
    def __init__(self, format): ...

class SanitizeBoundingBoxes:
    """
    Remove invalid bounding boxes.
    
    Args:
        min_size (float): Minimum box size
        labels_getter (callable): Function to get labels
    """
    def __init__(self, min_size: float = 1.0, labels_getter=None): ...

# Temporal transforms v2
class UniformTemporalSubsample:
    """
    Uniform temporal subsampling for video.
    
    Args:
        num_samples (int): Number of samples to extract
    """
    def __init__(self, num_samples: int): ...

# Utility functions v2
def check_type(inpt, type_sequence):
    """Check input types."""

def get_bounding_boxes(inpt):
    """Extract bounding boxes from input."""

def has_all(*types):
    """Check if input has all specified types."""

def has_any(*types):
    """Check if input has any specified type."""

def query_chw(flat_inputs):
    """Query CHW dimensions from inputs."""

def query_size(flat_inputs):
    """Query spatial size from inputs."""

Usage Examples

Basic Image Preprocessing

from torchvision import transforms
import torch

# Standard ImageNet preprocessing
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

# Apply to PIL image
from PIL import Image
image = Image.open('image.jpg')
tensor = transform(image)

Data Augmentation Pipeline

from torchvision import transforms

# Training augmentations
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ColorJitter(
        brightness=0.2,
        contrast=0.2, 
        saturation=0.2,
        hue=0.1
    ),
    transforms.RandomRotation(degrees=10),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    ),
    transforms.RandomErasing(p=0.1)
])

v2 Transforms for Object Detection

from torchvision.transforms import v2
from torchvision.tv_tensors import BoundingBoxes, Image

# Object detection preprocessing
transform = v2.Compose([
    v2.ToImage(),
    v2.RandomHorizontalFlip(p=0.5),
    v2.RandomIoUCrop(),
    v2.Resize(size=(640, 640)),
    v2.ToDtype(torch.float32, scale=True),
    v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Apply to image and bounding boxes
image = Image(torch.randint(0, 256, (3, 480, 640), dtype=torch.uint8))
boxes = BoundingBoxes(
    torch.tensor([[10, 10, 100, 100], [200, 200, 300, 300]]),
    format='XYXY',
    canvas_size=(480, 640)
)

transformed_image, transformed_boxes = transform(image, boxes)

Functional API Usage

from torchvision.transforms import functional as F
import torch

# Using functional API for custom transforms
def custom_transform(image):
    # Apply specific sequence of transforms
    image = F.resize(image, [256, 256])
    image = F.center_crop(image, [224, 224])
    image = F.to_tensor(image)
    
    # Conditional augmentation
    if torch.rand(1) > 0.5:
        image = F.hflip(image)
    
    image = F.normalize(image, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    return image

Video Transforms

from torchvision.transforms import v2

# Video preprocessing pipeline
video_transform = v2.Compose([
    v2.UniformTemporalSubsample(16),  # Sample 16 frames
    v2.Resize((224, 224)),
    v2.RandomHorizontalFlip(p=0.5),
    v2.ToDtype(torch.float32, scale=True),
    v2.Normalize(mean=[0.43216, 0.394666, 0.37645], 
                std=[0.22803, 0.22145, 0.216989])
])

# Apply to video tensor (T, C, H, W)
video_tensor = torch.randint(0, 256, (32, 3, 256, 256), dtype=torch.uint8)
transformed_video = video_transform(video_tensor)

AutoAugment Policies

from torchvision import transforms

# Using AutoAugment
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.AutoAugment(policy=transforms.AutoAugmentPolicy.IMAGENET),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Using RandAugment
transform_rand = transforms.Compose([
    transforms.Resize(256),
    transforms.RandAugment(num_ops=2, magnitude=15),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

Install with Tessl CLI

npx tessl i tessl/pypi-torchvision

docs

datasets.md

index.md

io.md

models.md

ops.md

transforms.md

tv_tensors.md

utils.md

tile.json