Computer vision library for PyTorch with datasets, model architectures, and image/video transforms.
TorchVision provides comprehensive image and video preprocessing and augmentation capabilities. The transforms module includes both v1 (traditional PIL/tensor) and v2 (multi-tensor) APIs, functional implementations, and preset transform pipelines for common use cases.
Transforms that compose and apply multiple transformations.
class Compose:
"""
Composes several transforms together.
Args:
transforms (list): List of transforms to compose
"""
def __init__(self, transforms: list): ...
def __call__(self, img): ...
class RandomApply:
"""
Apply list of transforms randomly with probability p.
Args:
transforms (list): List of transforms to apply
p (float): Probability of applying transforms
"""
def __init__(self, transforms: list, p: float = 0.5): ...
class RandomChoice:
"""
Apply single random transform from list.
Args:
transforms (list): List of transforms to choose from
"""
def __init__(self, transforms: list): ...
class RandomOrder:
"""
Apply transforms in random order.
Args:
transforms (list): List of transforms to apply in random order
"""
def __init__(self, transforms: list): ...Transforms for converting between different data types and formats.
class ToTensor:
"""
Convert PIL Image or numpy array to tensor.
Converts PIL Image or numpy.ndarray (H x W x C) in range [0, 255]
to torch.FloatTensor of shape (C x H x W) in range [0.0, 1.0].
"""
def __call__(self, pic): ...
class PILToTensor:
"""
Convert PIL Image to tensor without scaling.
Converts PIL Image to torch.Tensor without scaling values.
"""
def __call__(self, pic): ...
class ToPILImage:
"""
Convert tensor or ndarray to PIL Image.
Args:
mode (str, optional): Color mode for output image
"""
def __init__(self, mode=None): ...
class ConvertImageDtype:
"""
Convert tensor image to given dtype.
Args:
dtype (torch.dtype): Desired data type
"""
def __init__(self, dtype: torch.dtype): ...Spatial transformations for resizing, cropping, and geometric augmentation.
class Resize:
"""
Resize input to given size.
Args:
size (int or tuple): Desired output size
interpolation (InterpolationMode): Interpolation method
max_size (int, optional): Maximum size for aspect ratio preservation
antialias (bool, optional): Apply antialiasing
"""
def __init__(self, size, interpolation=InterpolationMode.BILINEAR, max_size=None, antialias=None): ...
class CenterCrop:
"""
Crop image at center.
Args:
size (int or tuple): Desired output size
"""
def __init__(self, size): ...
class RandomCrop:
"""
Crop image at random location.
Args:
size (int or tuple): Desired output size
padding (int or tuple, optional): Padding on each border
pad_if_needed (bool): Pad if image smaller than crop size
fill (number or tuple): Fill value for padding
padding_mode (str): Padding mode ('constant', 'edge', 'reflect', 'symmetric')
"""
def __init__(self, size, padding=None, pad_if_needed: bool = False, fill: int = 0, padding_mode: str = 'constant'): ...
class RandomResizedCrop:
"""
Random crop with resize to target size.
Args:
size (int or tuple): Expected output size
scale (tuple): Range of size of the origin size cropped
ratio (tuple): Range of aspect ratio of the origin aspect ratio cropped
interpolation (InterpolationMode): Interpolation method
antialias (bool, optional): Apply antialiasing
"""
def __init__(self, size, scale: tuple = (0.08, 1.0), ratio: tuple = (3./4., 4./3.), interpolation=InterpolationMode.BILINEAR, antialias=None): ...
class FiveCrop:
"""
Crop image into four corners and center.
Args:
size (int or tuple): Desired output size
"""
def __init__(self, size): ...
class TenCrop:
"""
Create 10 crops: 5 crops + horizontally flipped versions.
Args:
size (int or tuple): Desired output size
vertical_flip (bool): Use vertical flip instead of horizontal
"""
def __init__(self, size, vertical_flip: bool = False): ...
class Pad:
"""
Pad image on all sides with given pad value.
Args:
padding (int or tuple): Padding on each border
fill (number or tuple): Fill value for constant fill
padding_mode (str): Padding mode
"""
def __init__(self, padding, fill: int = 0, padding_mode: str = 'constant'): ...
class RandomHorizontalFlip:
"""
Randomly flip image horizontally with probability p.
Args:
p (float): Probability of flip
"""
def __init__(self, p: float = 0.5): ...
class RandomVerticalFlip:
"""
Randomly flip image vertically with probability p.
Args:
p (float): Probability of flip
"""
def __init__(self, p: float = 0.5): ...
class RandomRotation:
"""
Rotate image by random angle.
Args:
degrees (number or tuple): Range of degrees to select from
interpolation (InterpolationMode): Interpolation method
expand (bool): Expand output to fit rotated image
center (tuple, optional): Center of rotation
fill (number or tuple): Fill value for area outside rotated image
"""
def __init__(self, degrees, interpolation=InterpolationMode.NEAREST, expand: bool = False, center=None, fill: int = 0): ...
class RandomAffine:
"""
Random affine transformation.
Args:
degrees (number or tuple): Range of degrees for rotation
translate (tuple, optional): Range of translations
scale (tuple, optional): Range of scale factors
shear (number or tuple, optional): Range of shear angles
interpolation (InterpolationMode): Interpolation method
fill (number or tuple): Fill value
center (tuple, optional): Center point for transformations
"""
def __init__(self, degrees, translate=None, scale=None, shear=None, interpolation=InterpolationMode.NEAREST, fill: int = 0, center=None): ...
class RandomPerspective:
"""
Random perspective transformation.
Args:
distortion_scale (float): Argument to control degree of distortion
p (float): Probability of applying transformation
interpolation (InterpolationMode): Interpolation method
fill (number or tuple): Fill value
"""
def __init__(self, distortion_scale: float = 0.5, p: float = 0.5, interpolation=InterpolationMode.BILINEAR, fill: int = 0): ...
class ElasticTransform:
"""
Random elastic transformation.
Args:
alpha (float or tuple): Magnitude of displacement
sigma (float or tuple): Standard deviation of Gaussian kernel
interpolation (InterpolationMode): Interpolation method
fill (number or tuple): Fill value
"""
def __init__(self, alpha: float = 50.0, sigma: float = 5.0, interpolation=InterpolationMode.BILINEAR, fill: int = 0): ...Photometric transformations for color manipulation and augmentation.
class ColorJitter:
"""
Randomly change brightness, contrast, saturation, and hue.
Args:
brightness (float or tuple): How much to jitter brightness
contrast (float or tuple): How much to jitter contrast
saturation (float or tuple): How much to jitter saturation
hue (float or tuple): How much to jitter hue
"""
def __init__(self, brightness: float = 0, contrast: float = 0, saturation: float = 0, hue: float = 0): ...
class Grayscale:
"""
Convert image to grayscale.
Args:
num_output_channels (int): Number of channels for output (1 or 3)
"""
def __init__(self, num_output_channels: int = 1): ...
class RandomGrayscale:
"""
Randomly convert image to grayscale with probability p.
Args:
p (float): Probability of conversion to grayscale
"""
def __init__(self, p: float = 0.1): ...
class GaussianBlur:
"""
Apply Gaussian blur to image.
Args:
kernel_size (int or tuple): Size of Gaussian kernel
sigma (float or tuple): Standard deviation for Gaussian kernel
"""
def __init__(self, kernel_size, sigma: tuple = (0.1, 2.0)): ...
class RandomInvert:
"""
Randomly invert colors of image with probability p.
Args:
p (float): Probability of inversion
"""
def __init__(self, p: float = 0.5): ...
class RandomPosterize:
"""
Randomly posterize image with probability p.
Args:
bits (int): Number of bits to keep for each channel
p (float): Probability of posterization
"""
def __init__(self, bits: int, p: float = 0.5): ...
class RandomSolarize:
"""
Randomly solarize image with probability p.
Args:
threshold (float): Threshold above which pixels are inverted
p (float): Probability of solarization
"""
def __init__(self, threshold: float, p: float = 0.5): ...
class RandomAdjustSharpness:
"""
Randomly adjust sharpness with probability p.
Args:
sharpness_factor (float): Sharpness adjustment factor
p (float): Probability of adjustment
"""
def __init__(self, sharpness_factor: float, p: float = 0.5): ...
class RandomAutocontrast:
"""
Randomly apply autocontrast with probability p.
Args:
p (float): Probability of applying autocontrast
"""
def __init__(self, p: float = 0.5): ...
class RandomEqualize:
"""
Randomly equalize histogram with probability p.
Args:
p (float): Probability of equalization
"""
def __init__(self, p: float = 0.5): ...Statistical normalization and utility transformations.
class Normalize:
"""
Normalize tensor with mean and standard deviation.
Args:
mean (sequence): Sequence of means for each channel
std (sequence): Sequence of standard deviations for each channel
inplace (bool): Make operation in-place
"""
def __init__(self, mean: list, std: list, inplace: bool = False): ...
class Lambda:
"""
Apply user-defined lambda function.
Args:
lambd (function): Lambda/function to be used for transform
"""
def __init__(self, lambd): ...
class LinearTransformation:
"""
Apply linear transformation using transformation matrix and mean vector.
Args:
transformation_matrix (Tensor): Transformation matrix
mean_vector (Tensor): Mean vector
"""
def __init__(self, transformation_matrix: torch.Tensor, mean_vector: torch.Tensor): ...Automated augmentation policies for improved model robustness.
class AutoAugment:
"""
AutoAugment data augmentation policy.
Args:
policy (AutoAugmentPolicy): AutoAugment policy to use
interpolation (InterpolationMode): Interpolation method
fill (sequence or number): Pixel fill value
"""
def __init__(self, policy=AutoAugmentPolicy.IMAGENET, interpolation=InterpolationMode.NEAREST, fill=None): ...
class RandAugment:
"""
RandAugment data augmentation.
Args:
num_ops (int): Number of augmentation transformations to apply
magnitude (int): Magnitude for all transformations
num_magnitude_bins (int): Number of magnitude bins
interpolation (InterpolationMode): Interpolation method
fill (sequence or number): Pixel fill value
"""
def __init__(self, num_ops: int = 2, magnitude: int = 9, num_magnitude_bins: int = 31, interpolation=InterpolationMode.NEAREST, fill=None): ...
class TrivialAugmentWide:
"""
TrivialAugment Wide augmentation policy.
Args:
num_magnitude_bins (int): Number of magnitude bins
interpolation (InterpolationMode): Interpolation method
fill (sequence or number): Pixel fill value
"""
def __init__(self, num_magnitude_bins: int = 31, interpolation=InterpolationMode.NEAREST, fill=None): ...
class AugMix:
"""
AugMix data augmentation.
Args:
severity (int): Severity level for base augmentations
mixture_width (int): Number of augmentation chains
chain_depth (int): Depth of augmentation chains
alpha (float): Parameter for Beta distribution
all_ops (bool): Use all available operations
interpolation (InterpolationMode): Interpolation method
fill (sequence or number): Pixel fill value
"""
def __init__(self, severity: int = 3, mixture_width: int = 3, chain_depth: int = -1, alpha: float = 1.0, all_ops: bool = True, interpolation=InterpolationMode.BILINEAR, fill=None): ...
class AutoAugmentPolicy:
"""AutoAugment policy constants."""
IMAGENET: str = "imagenet"
CIFAR10: str = "cifar10"
SVHN: str = "svhn"Pre-configured transform pipelines for common tasks.
class ImageClassification:
"""
Standard preprocessing for image classification.
Args:
crop_size (int): Size for center crop
resize_size (int): Size for resize operation
mean (tuple): Normalization mean
std (tuple): Normalization standard deviation
interpolation (InterpolationMode): Interpolation method
"""
def __init__(self, crop_size: int, resize_size: int = 256, mean: tuple = (0.485, 0.456, 0.406), std: tuple = (0.229, 0.224, 0.225), interpolation=InterpolationMode.BILINEAR): ...
class ObjectDetection:
"""Standard preprocessing for object detection."""
def __init__(self): ...
class SemanticSegmentation:
"""Standard preprocessing for semantic segmentation."""
def __init__(self): ...
class VideoClassification:
"""
Standard preprocessing for video classification.
Args:
crop_size (tuple): Size for crop
resize_size (tuple): Size for resize
mean (tuple): Normalization mean
std (tuple): Normalization standard deviation
"""
def __init__(self, crop_size: tuple = (224, 224), resize_size: tuple = (256, 256), mean: tuple = (0.43216, 0.394666, 0.37645), std: tuple = (0.22803, 0.22145, 0.216989)): ...
class OpticalFlow:
"""Standard preprocessing for optical flow."""
def __init__(self): ...Low-level functional implementations of transforms.
# Interpolation modes for transforms
class InterpolationMode:
NEAREST = "nearest"
NEAREST_EXACT = "nearest-exact"
BILINEAR = "bilinear"
BICUBIC = "bicubic"
BOX = "box"
HAMMING = "hamming"
LANCZOS = "lanczos"
# Geometric functions
def resize(img, size: list, interpolation=InterpolationMode.BILINEAR, max_size=None, antialias=None):
"""Resize image to given size."""
def center_crop(img, output_size: list):
"""Center crop image to output size."""
def crop(img, top: int, left: int, height: int, width: int):
"""Crop image at specified location."""
def pad(img, padding, fill: int = 0, padding_mode: str = 'constant'):
"""Pad image on all sides."""
def hflip(img):
"""Horizontally flip image."""
def vflip(img):
"""Vertically flip image."""
def rotate(img, angle: float, interpolation=InterpolationMode.NEAREST, expand: bool = False, center=None, fill: int = 0):
"""Rotate image by angle."""
def affine(img, angle: float, translate: list, scale: float, shear: list, interpolation=InterpolationMode.NEAREST, fill: int = 0, center=None):
"""Apply affine transformation."""
def perspective(img, startpoints: list, endpoints: list, interpolation=InterpolationMode.BILINEAR, fill: int = 0):
"""Apply perspective transformation."""
def five_crop(img, size: list):
"""Create five crops of image."""
def ten_crop(img, size: list, vertical_flip: bool = False):
"""Create ten crops of image."""
# Color functions
def adjust_brightness(img, brightness_factor: float):
"""Adjust brightness of image."""
def adjust_contrast(img, contrast_factor: float):
"""Adjust contrast of image."""
def adjust_saturation(img, saturation_factor: float):
"""Adjust saturation of image."""
def adjust_hue(img, hue_factor: float):
"""Adjust hue of image."""
def adjust_gamma(img, gamma: float, gain: float = 1):
"""Adjust gamma of image."""
def adjust_sharpness(img, sharpness_factor: float):
"""Adjust sharpness of image."""
def rgb_to_grayscale(img, num_output_channels: int = 1):
"""Convert RGB image to grayscale."""
def to_grayscale(img, num_output_channels: int = 1):
"""Convert image to grayscale."""
def gaussian_blur(img, kernel_size: list, sigma=None):
"""Apply Gaussian blur to image."""
def invert(img):
"""Invert colors of image."""
def posterize(img, bits: int):
"""Posterize image."""
def solarize(img, threshold: float):
"""Solarize image."""
def autocontrast(img):
"""Apply autocontrast to image."""
def equalize(img):
"""Equalize histogram of image."""
# Conversion functions
def to_tensor(pic):
"""Convert PIL Image or numpy array to tensor."""
def to_pil_image(pic, mode=None):
"""Convert tensor to PIL Image."""
def pil_to_tensor(pic):
"""Convert PIL Image to tensor without scaling."""
def convert_image_dtype(image, dtype: torch.dtype):
"""Convert image tensor dtype."""
def normalize(tensor, mean: list, std: list, inplace: bool = False):
"""Normalize tensor with mean and std."""
# Utility functions
def get_image_size(img):
"""Get image size as (height, width)."""
def get_image_num_channels(img):
"""Get number of channels in image."""Enhanced transforms API with multi-tensor support for images, videos, bounding boxes, and masks.
class Transform:
"""Base class for all v2 transforms."""
# Type conversion v2
class ToImage:
"""Convert to image tensor."""
class ToPILImage:
"""Convert to PIL Image with v2 support."""
class PILToTensor:
"""Convert PIL to tensor with v2 support."""
class ToPureTensor:
"""Convert to pure tensor."""
class ToDtype:
"""
Convert to specified dtype.
Args:
dtype (torch.dtype): Target dtype
scale (bool): Scale values when converting
"""
def __init__(self, dtype: torch.dtype, scale: bool = False): ...
# Container transforms v2
class Compose:
"""Compose transforms with multi-tensor support."""
class RandomApply:
"""Apply transforms randomly with multi-tensor support."""
class RandomChoice:
"""Choose random transform with multi-tensor support."""
class RandomOrder:
"""Apply in random order with multi-tensor support."""
# Enhanced geometric transforms
class Resize:
"""Resize with multi-tensor support including bounding boxes."""
class CenterCrop:
"""Center crop with bounding box support."""
class RandomCrop:
"""Random crop with mask and bounding box support."""
class RandomResizedCrop:
"""Random resized crop with multi-tensor support."""
class RandomHorizontalFlip:
"""Horizontal flip with bounding box support."""
class RandomVerticalFlip:
"""Vertical flip with bounding box support."""
class RandomRotation:
"""Rotation with bounding box support."""
class RandomAffine:
"""Affine transformation with bounding box support."""
class RandomPerspective:
"""Perspective transformation with v2 support."""
class ElasticTransform:
"""Elastic transformation with v2 support."""
class RandomIoUCrop:
"""
IoU-aware random crop for object detection.
Args:
min_scale (float): Minimum scale for cropping
max_scale (float): Maximum scale for cropping
min_aspect_ratio (float): Minimum aspect ratio
max_aspect_ratio (float): Maximum aspect ratio
sampler_options (list): List of sampling options
trials (int): Number of trials for finding valid crop
"""
def __init__(self, min_scale: float = 0.3, max_scale: float = 1.0, min_aspect_ratio: float = 0.5, max_aspect_ratio: float = 2.0, sampler_options=None, trials: int = 40): ...
class RandomZoomOut:
"""
Random zoom out transformation.
Args:
fill (number or tuple): Fill value for expanded area
side_range (tuple): Range for zoom out factor
p (float): Probability of applying zoom out
"""
def __init__(self, fill: int = 0, side_range: tuple = (1.0, 4.0), p: float = 0.5): ...
class RandomShortestSize:
"""
Random shortest size resize.
Args:
min_size (int or list): Minimum size for shortest edge
max_size (int, optional): Maximum size for longest edge
interpolation (InterpolationMode): Interpolation method
"""
def __init__(self, min_size, max_size=None, interpolation=InterpolationMode.BILINEAR): ...
class RandomResize:
"""
Random resize within range.
Args:
min_size (int): Minimum size
max_size (int): Maximum size
interpolation (InterpolationMode): Interpolation method
"""
def __init__(self, min_size: int, max_size: int, interpolation=InterpolationMode.BILINEAR): ...
class ScaleJitter:
"""
Scale jittering transform.
Args:
target_size (tuple): Target size
scale_range (tuple): Range for scale jittering
interpolation (InterpolationMode): Interpolation method
"""
def __init__(self, target_size: tuple, scale_range: tuple = (0.1, 2.0), interpolation=InterpolationMode.BILINEAR): ...
# Enhanced color transforms v2
class ColorJitter:
"""Color jittering with v2 support."""
class RandomChannelPermutation:
"""Randomly permute image channels."""
class RandomPhotometricDistort:
"""
Photometric distortion for data augmentation.
Args:
brightness (tuple): Range for brightness adjustment
contrast (tuple): Range for contrast adjustment
saturation (tuple): Range for saturation adjustment
hue (tuple): Range for hue adjustment
p (float): Probability of applying distortion
"""
def __init__(self, brightness: tuple = (0.875, 1.125), contrast: tuple = (0.5, 1.5), saturation: tuple = (0.5, 1.5), hue: tuple = (-0.05, 0.05), p: float = 0.5): ...
class RGB:
"""Ensure RGB format."""
class GaussianNoise:
"""
Add Gaussian noise to image.
Args:
mean (float): Mean of Gaussian noise
sigma (float or tuple): Standard deviation of noise
"""
def __init__(self, mean: float = 0.0, sigma: tuple = (0.1, 2.0)): ...
# Augmentation transforms v2
class MixUp:
"""
MixUp data augmentation.
Args:
alpha (float): Parameter for Beta distribution
num_classes (int): Number of classes
labels_getter (callable): Function to get labels
"""
def __init__(self, alpha: float = 1.0, num_classes: int = None, labels_getter=None): ...
class CutMix:
"""
CutMix data augmentation.
Args:
alpha (float): Parameter for Beta distribution
num_classes (int): Number of classes
labels_getter (callable): Function to get labels
"""
def __init__(self, alpha: float = 1.0, num_classes: int = None, labels_getter=None): ...
class RandomErasing:
"""
Random erasing data augmentation.
Args:
p (float): Probability of applying random erasing
scale (tuple): Range of proportion of erased area
ratio (tuple): Range of aspect ratio of erased area
value (number or str): Erasing value
inplace (bool): Make operation in-place
"""
def __init__(self, p: float = 0.5, scale: tuple = (0.02, 0.33), ratio: tuple = (0.3, 3.3), value: int = 0, inplace: bool = False): ...
class JPEG:
"""
JPEG compression simulation.
Args:
quality (tuple or int): JPEG quality range
"""
def __init__(self, quality: tuple = (25, 100)): ...
# Metadata transforms v2
class ClampBoundingBoxes:
"""Clamp bounding boxes to image bounds."""
class ClampKeyPoints:
"""Clamp keypoints to image bounds."""
class ConvertBoundingBoxFormat:
"""
Convert bounding box format.
Args:
format (BoundingBoxFormat): Target format
"""
def __init__(self, format): ...
class SanitizeBoundingBoxes:
"""
Remove invalid bounding boxes.
Args:
min_size (float): Minimum box size
labels_getter (callable): Function to get labels
"""
def __init__(self, min_size: float = 1.0, labels_getter=None): ...
# Temporal transforms v2
class UniformTemporalSubsample:
"""
Uniform temporal subsampling for video.
Args:
num_samples (int): Number of samples to extract
"""
def __init__(self, num_samples: int): ...
# Utility functions v2
def check_type(inpt, type_sequence):
"""Check input types."""
def get_bounding_boxes(inpt):
"""Extract bounding boxes from input."""
def has_all(*types):
"""Check if input has all specified types."""
def has_any(*types):
"""Check if input has any specified type."""
def query_chw(flat_inputs):
"""Query CHW dimensions from inputs."""
def query_size(flat_inputs):
"""Query spatial size from inputs."""from torchvision import transforms
import torch
# Standard ImageNet preprocessing
transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
)
])
# Apply to PIL image
from PIL import Image
image = Image.open('image.jpg')
tensor = transform(image)from torchvision import transforms
# Training augmentations
train_transform = transforms.Compose([
transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
transforms.RandomHorizontalFlip(p=0.5),
transforms.ColorJitter(
brightness=0.2,
contrast=0.2,
saturation=0.2,
hue=0.1
),
transforms.RandomRotation(degrees=10),
transforms.ToTensor(),
transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
),
transforms.RandomErasing(p=0.1)
])from torchvision.transforms import v2
from torchvision.tv_tensors import BoundingBoxes, Image
# Object detection preprocessing
transform = v2.Compose([
v2.ToImage(),
v2.RandomHorizontalFlip(p=0.5),
v2.RandomIoUCrop(),
v2.Resize(size=(640, 640)),
v2.ToDtype(torch.float32, scale=True),
v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
# Apply to image and bounding boxes
image = Image(torch.randint(0, 256, (3, 480, 640), dtype=torch.uint8))
boxes = BoundingBoxes(
torch.tensor([[10, 10, 100, 100], [200, 200, 300, 300]]),
format='XYXY',
canvas_size=(480, 640)
)
transformed_image, transformed_boxes = transform(image, boxes)from torchvision.transforms import functional as F
import torch
# Using functional API for custom transforms
def custom_transform(image):
# Apply specific sequence of transforms
image = F.resize(image, [256, 256])
image = F.center_crop(image, [224, 224])
image = F.to_tensor(image)
# Conditional augmentation
if torch.rand(1) > 0.5:
image = F.hflip(image)
image = F.normalize(image, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
return imagefrom torchvision.transforms import v2
# Video preprocessing pipeline
video_transform = v2.Compose([
v2.UniformTemporalSubsample(16), # Sample 16 frames
v2.Resize((224, 224)),
v2.RandomHorizontalFlip(p=0.5),
v2.ToDtype(torch.float32, scale=True),
v2.Normalize(mean=[0.43216, 0.394666, 0.37645],
std=[0.22803, 0.22145, 0.216989])
])
# Apply to video tensor (T, C, H, W)
video_tensor = torch.randint(0, 256, (32, 3, 256, 256), dtype=torch.uint8)
transformed_video = video_transform(video_tensor)from torchvision import transforms
# Using AutoAugment
transform = transforms.Compose([
transforms.Resize(256),
transforms.AutoAugment(policy=transforms.AutoAugmentPolicy.IMAGENET),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
# Using RandAugment
transform_rand = transforms.Compose([
transforms.Resize(256),
transforms.RandAugment(num_ops=2, magnitude=15),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])Install with Tessl CLI
npx tessl i tessl/pypi-torchvision