Computer vision library for PyTorch with datasets, model architectures, and image/video transforms.
TorchVision provides a comprehensive collection of computer vision datasets with automatic downloading, caching, and preprocessing. The datasets module supports image classification, object detection, segmentation, video analysis, optical flow, and stereo vision tasks.
Foundation classes for building and working with vision datasets.
class VisionDataset:
"""
Base class for all vision datasets.
Args:
root (str): Root directory of dataset
transforms (callable, optional): Function/transform to apply to data
transform (callable, optional): Function/transform to apply to PIL image
target_transform (callable, optional): Function/transform to apply to target
"""
def __init__(self, root: str, transforms=None, transform=None, target_transform=None): ...
def __getitem__(self, index: int): ...
def __len__(self) -> int: ...
class DatasetFolder(VisionDataset):
"""
Generic data loader for datasets in folder format.
Args:
root (str): Root directory path
loader (callable): Function to load a sample from path
extensions (tuple): Allowed extensions
transform (callable, optional): Transform to apply to samples
target_transform (callable, optional): Transform to apply to targets
is_valid_file (callable, optional): Function to check file validity
"""
def __init__(self, root: str, loader, extensions=None, transform=None, target_transform=None, is_valid_file=None): ...
class ImageFolder(DatasetFolder):
"""
Data loader for image classification datasets in folder format.
Expected structure: root/class_x/xxx.ext
Args:
root (str): Root directory path
transform (callable, optional): Transform to apply to PIL images
target_transform (callable, optional): Transform to apply to targets
loader (callable, optional): Function to load image from path
is_valid_file (callable, optional): Function to check file validity
"""
def __init__(self, root: str, transform=None, target_transform=None, loader=None, is_valid_file=None): ...Standard datasets for image classification tasks with automatic download and preprocessing.
class MNIST(VisionDataset):
"""
MNIST handwritten digit dataset.
Args:
root (str): Root directory for dataset files
train (bool): If True, creates dataset from training set, else test set
transform (callable, optional): Transform to apply to PIL image
target_transform (callable, optional): Transform to apply to target
download (bool): If True, downloads dataset if not found at root
"""
def __init__(self, root: str, train: bool = True, transform=None, target_transform=None, download: bool = False): ...
class FashionMNIST(VisionDataset):
"""Fashion-MNIST dataset of clothing images."""
def __init__(self, root: str, train: bool = True, transform=None, target_transform=None, download: bool = False): ...
class KMNIST(VisionDataset):
"""Kuzushiji-MNIST dataset of Japanese characters."""
def __init__(self, root: str, train: bool = True, transform=None, target_transform=None, download: bool = False): ...
class EMNIST(VisionDataset):
"""
Extended MNIST dataset.
Args:
root (str): Root directory for dataset files
split (str): Dataset split ('byclass', 'bymerge', 'balanced', 'letters', 'digits', 'mnist')
train (bool): If True, creates dataset from training set
transform (callable, optional): Transform to apply to PIL image
target_transform (callable, optional): Transform to apply to target
download (bool): If True, downloads dataset if not found
"""
def __init__(self, root: str, split: str, train: bool = True, transform=None, target_transform=None, download: bool = False): ...
class CIFAR10(VisionDataset):
"""
CIFAR-10 dataset of 32x32 color images in 10 classes.
Args:
root (str): Root directory for dataset files
train (bool): If True, creates dataset from training set, else test set
transform (callable, optional): Transform to apply to PIL image
target_transform (callable, optional): Transform to apply to target
download (bool): If True, downloads dataset if not found at root
"""
def __init__(self, root: str, train: bool = True, transform=None, target_transform=None, download: bool = False): ...
class CIFAR100(VisionDataset):
"""CIFAR-100 dataset with 100 classes grouped into 20 superclasses."""
def __init__(self, root: str, train: bool = True, transform=None, target_transform=None, download: bool = False): ...
class SVHN(VisionDataset):
"""
Street View House Numbers dataset.
Args:
root (str): Root directory for dataset files
split (str): Dataset split ('train', 'test', 'extra')
transform (callable, optional): Transform to apply to PIL image
target_transform (callable, optional): Transform to apply to target
download (bool): If True, downloads dataset if not found
"""
def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False): ...
class ImageNet(VisionDataset):
"""
ImageNet dataset for large-scale image classification.
Args:
root (str): Root directory containing 'train' and 'val' folders
split (str): Dataset split ('train', 'val')
transform (callable, optional): Transform to apply to PIL image
target_transform (callable, optional): Transform to apply to target
"""
def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None): ...
class Caltech101(VisionDataset):
"""Caltech 101 dataset with 101 object categories."""
def __init__(self, root: str, target_type='category', transform=None, target_transform=None, download: bool = False): ...
class Caltech256(VisionDataset):
"""Caltech 256 dataset with 256 object categories."""
def __init__(self, root: str, transform=None, target_transform=None, download: bool = False): ...
class CelebA(VisionDataset):
"""
CelebA face dataset with attributes.
Args:
root (str): Root directory for dataset files
split (str): Dataset split ('train', 'valid', 'test', 'all')
target_type (str): Target type ('attr', 'identity', 'bbox', 'landmarks')
transform (callable, optional): Transform to apply to PIL image
target_transform (callable, optional): Transform to apply to target
download (bool): If True, downloads dataset if not found
"""
def __init__(self, root: str, split: str = 'train', target_type: str = 'attr', transform=None, target_transform=None, download: bool = False): ...
class StanfordCars(VisionDataset):
"""Stanford Cars dataset with 196 car classes."""
def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False): ...
class Flowers102(VisionDataset):
"""Oxford 102 Flower dataset."""
def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False): ...
class Food101(VisionDataset):
"""Food-101 dataset with 101 food categories."""
def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False): ...
class GTSRB(VisionDataset):
"""German Traffic Sign Recognition Benchmark."""
def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False): ...
class OxfordIIITPet(VisionDataset):
"""
Oxford-IIIT Pet dataset.
Args:
root (str): Root directory for dataset files
split (str): Dataset split ('trainval', 'test')
target_types (str or list): Target types ('category', 'segmentation')
transform (callable, optional): Transform to apply to PIL image
target_transform (callable, optional): Transform to apply to target
download (bool): If True, downloads dataset if not found
"""
def __init__(self, root: str, split: str = 'trainval', target_types='category', transform=None, target_transform=None, download: bool = False): ...
class STL10(VisionDataset):
"""
STL10 dataset of 96x96 color images in 10 classes.
Args:
root (str): Root directory for dataset files
split (str): Dataset split ('train', 'test', 'unlabeled', 'train+unlabeled')
folds (int, optional): One of {0-9} or None for training fold selection
transform (callable, optional): Transform to apply to PIL image
target_transform (callable, optional): Transform to apply to target
download (bool): If True, downloads dataset if not found
"""
def __init__(self, root: str, split: str = 'train', folds=None, transform=None, target_transform=None, download: bool = False): ...
class SUN397(VisionDataset):
"""
SUN397 scene recognition dataset with 397 categories.
Args:
root (str): Root directory for dataset files
transform (callable, optional): Transform to apply to PIL image
target_transform (callable, optional): Transform to apply to target
download (bool): If True, downloads dataset if not found
loader (callable, optional): Function to load image from path
"""
def __init__(self, root: str, transform=None, target_transform=None, download: bool = False, loader=None): ...
class SEMEION(VisionDataset):
"""
SEMEION handwritten digit dataset with 16x16 grayscale images.
Args:
root (str): Root directory for dataset files
transform (callable, optional): Transform to apply to PIL image
target_transform (callable, optional): Transform to apply to target
download (bool): If True, downloads dataset if not found
"""
def __init__(self, root: str, transform=None, target_transform=None, download: bool = True): ...
class Omniglot(VisionDataset):
"""
Omniglot dataset for few-shot learning with character recognition.
Args:
root (str): Root directory for dataset files
background (bool): If True, creates dataset from background set, otherwise evaluation set
transform (callable, optional): Transform to apply to PIL image
target_transform (callable, optional): Transform to apply to target
download (bool): If True, downloads dataset if not found
loader (callable, optional): Function to load image from path
"""
def __init__(self, root: str, background: bool = True, transform=None, target_transform=None, download: bool = False, loader=None): ...
class USPS(VisionDataset):
"""
USPS handwritten digit dataset with 16x16 grayscale images.
Args:
root (str): Root directory for dataset files
train (bool): If True, creates dataset from training set, otherwise test set
transform (callable, optional): Transform to apply to PIL image
target_transform (callable, optional): Transform to apply to target
download (bool): If True, downloads dataset if not found
"""
def __init__(self, root: str, train: bool = True, transform=None, target_transform=None, download: bool = False): ...
class QMNIST(VisionDataset):
"""
QMNIST extended MNIST dataset with additional metadata.
Args:
root (str): Root directory for dataset files
what (str, optional): Dataset subset ('train', 'test', 'test10k', 'test50k', 'nist')
compat (bool): If True, returns class labels for MNIST compatibility
train (bool): If True, creates dataset from training set (when what is None)
transform (callable, optional): Transform to apply to PIL image
target_transform (callable, optional): Transform to apply to target
download (bool): If True, downloads dataset if not found
"""
def __init__(self, root: str, what=None, compat: bool = True, train: bool = True, transform=None, target_transform=None, download: bool = False): ...
class Places365(VisionDataset):
"""
Places365 scene recognition dataset with 365 scene categories.
Args:
root (str): Root directory for dataset files
split (str): Dataset split ('train-standard', 'train-challenge', 'val', 'test')
small (bool): If True, uses small (256x256) images instead of high resolution
download (bool): If True, downloads dataset if not found
transform (callable, optional): Transform to apply to PIL image
target_transform (callable, optional): Transform to apply to target
loader (callable, optional): Function to load image from path
"""
def __init__(self, root: str, split: str = 'train-standard', small: bool = False, download: bool = False, transform=None, target_transform=None, loader=None): ...
class INaturalist(VisionDataset):
"""
iNaturalist dataset for fine-grained species classification.
Args:
root (str): Root directory for dataset files
version (str): Dataset version ('2017', '2018', '2019', '2021_train', '2021_train_mini', '2021_valid')
target_type (str or list): Target type ('full', 'kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'super')
transform (callable, optional): Transform to apply to PIL image
target_transform (callable, optional): Transform to apply to target
download (bool): If True, downloads dataset if not found
loader (callable, optional): Function to load image from path
"""
def __init__(self, root: str, version: str = '2021_train', target_type='full', transform=None, target_transform=None, download: bool = False, loader=None): ...
class DTD(VisionDataset):
"""
Describable Textures Dataset (DTD) with 47 texture categories.
Args:
root (str): Root directory for dataset files
split (str): Dataset split ('train', 'val', 'test')
partition (int): Dataset partition (1-10)
transform (callable, optional): Transform to apply to PIL image
target_transform (callable, optional): Transform to apply to target
download (bool): If True, downloads dataset if not found
loader (callable, optional): Function to load image from path
"""
def __init__(self, root: str, split: str = 'train', partition: int = 1, transform=None, target_transform=None, download: bool = False, loader=None): ...
class FER2013(VisionDataset):
"""
FER2013 facial expression recognition dataset with 7 emotion classes.
Args:
root (str): Root directory for dataset files
split (str): Dataset split ('train', 'test')
transform (callable, optional): Transform to apply to PIL image
target_transform (callable, optional): Transform to apply to target
"""
def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None): ...
class CLEVRClassification(VisionDataset):
"""
CLEVR classification dataset for visual reasoning (object counting).
Args:
root (str): Root directory for dataset files
split (str): Dataset split ('train', 'val', 'test')
transform (callable, optional): Transform to apply to PIL image
target_transform (callable, optional): Transform to apply to target
download (bool): If True, downloads dataset if not found
loader (callable, optional): Function to load image from path
"""
def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False, loader=None): ...
class PCAM(VisionDataset):
"""
PatchCamelyon (PCAM) histopathologic cancer detection dataset.
Args:
root (str): Root directory for dataset files
split (str): Dataset split ('train', 'val', 'test')
transform (callable, optional): Transform to apply to PIL image
target_transform (callable, optional): Transform to apply to target
download (bool): If True, downloads dataset if not found
"""
def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False): ...
class Country211(VisionDataset):
"""
Country211 dataset for country classification from images.
Args:
root (str): Root directory for dataset files
split (str): Dataset split ('train', 'valid', 'test')
transform (callable, optional): Transform to apply to PIL image
target_transform (callable, optional): Transform to apply to target
download (bool): If True, downloads dataset if not found
loader (callable, optional): Function to load image from path
"""
def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False, loader=None): ...
class FGVCAircraft(VisionDataset):
"""
FGVC Aircraft dataset for fine-grained aircraft recognition.
Args:
root (str): Root directory for dataset files
split (str): Dataset split ('train', 'val', 'trainval', 'test')
annotation_level (str): Annotation level ('variant', 'family', 'manufacturer')
transform (callable, optional): Transform to apply to PIL image
target_transform (callable, optional): Transform to apply to target
download (bool): If True, downloads dataset if not found
loader (callable, optional): Function to load image from path
"""
def __init__(self, root: str, split: str = 'trainval', annotation_level: str = 'variant', transform=None, target_transform=None, download: bool = False, loader=None): ...
class EuroSAT(VisionDataset):
"""
EuroSAT satellite image classification dataset with 10 land use classes.
Args:
root (str): Root directory for dataset files
transform (callable, optional): Transform to apply to PIL image
target_transform (callable, optional): Transform to apply to target
download (bool): If True, downloads dataset if not found
loader (callable, optional): Function to load image from path
"""
def __init__(self, root: str, transform=None, target_transform=None, download: bool = False, loader=None): ...
class RenderedSST2(VisionDataset):
"""
Rendered SST2 dataset for optical character recognition with sentiment.
Args:
root (str): Root directory for dataset files
split (str): Dataset split ('train', 'val', 'test')
transform (callable, optional): Transform to apply to PIL image
target_transform (callable, optional): Transform to apply to target
download (bool): If True, downloads dataset if not found
loader (callable, optional): Function to load image from path
"""
def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False, loader=None): ...
class Imagenette(VisionDataset):
"""
Imagenette dataset - subset of ImageNet with 10 classes.
Args:
root (str): Root directory for dataset files
split (str): Dataset split ('train', 'val')
size (str): Image size ('full', '320px', '160px')
download (bool): If True, downloads dataset if not found
transform (callable, optional): Transform to apply to PIL image
target_transform (callable, optional): Transform to apply to target
loader (callable, optional): Function to load image from path
"""
def __init__(self, root: str, split: str = 'train', size: str = 'full', download: bool = False, transform=None, target_transform=None, loader=None): ...Datasets for object detection, instance segmentation, and semantic segmentation tasks.
class CocoDetection(VisionDataset):
"""
COCO dataset for object detection.
Args:
root (str): Root directory containing images
annFile (str): Path to annotation file
transform (callable, optional): Transform to apply to image
target_transform (callable, optional): Transform to apply to target
transforms (callable, optional): Transform to apply to image and target
"""
def __init__(self, root: str, annFile: str, transform=None, target_transform=None, transforms=None): ...
class CocoCaptions(VisionDataset):
"""COCO dataset for image captioning."""
def __init__(self, root: str, annFile: str, transform=None, target_transform=None, transforms=None): ...
class VOCDetection(VisionDataset):
"""
Pascal VOC dataset for object detection.
Args:
root (str): Root directory for dataset files
year (str): Dataset year ('2007', '2008', '2009', '2010', '2011', '2012')
image_set (str): Image set ('train', 'trainval', 'val', 'test')
download (bool): If True, downloads dataset if not found
transform (callable, optional): Transform to apply to PIL image
target_transform (callable, optional): Transform to apply to target
transforms (callable, optional): Transform to apply to image and target
"""
def __init__(self, root: str, year: str = '2012', image_set: str = 'train', download: bool = False, transform=None, target_transform=None, transforms=None): ...
class VOCSegmentation(VisionDataset):
"""Pascal VOC dataset for semantic segmentation."""
def __init__(self, root: str, year: str = '2012', image_set: str = 'train', download: bool = False, transform=None, target_transform=None, transforms=None): ...
class Cityscapes(VisionDataset):
"""
Cityscapes dataset for semantic segmentation.
Args:
root (str): Root directory for dataset files
split (str): Dataset split ('train', 'test', 'val')
mode (str): Quality mode ('fine', 'coarse')
target_type (str or list): Target type ('instance', 'semantic', 'polygon', 'color')
transform (callable, optional): Transform to apply to PIL image
target_transform (callable, optional): Transform to apply to target
transforms (callable, optional): Transform to apply to image and target
"""
def __init__(self, root: str, split: str = 'train', mode: str = 'fine', target_type: str = 'instance', transform=None, target_transform=None, transforms=None): ...
class SBDataset(VisionDataset):
"""Semantic Boundaries Dataset."""
def __init__(self, root: str, image_set: str = 'train', mode: str = 'boundaries', download: bool = False, transform=None, target_transform=None): ...
class WIDERFace(VisionDataset):
"""
WIDER FACE dataset for face detection.
Args:
root (str): Root directory for dataset files
split (str): Dataset split ('train', 'val', 'test')
transform (callable, optional): Transform to apply to PIL image
target_transform (callable, optional): Transform to apply to target
download (bool): If True, downloads dataset if not found
"""
def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False): ...
class Kitti(VisionDataset):
"""
KITTI dataset for object detection.
Args:
root (str): Root directory for dataset files
train (bool): If True, creates dataset from training set, otherwise test set
transform (callable, optional): Transform to apply to PIL image
target_transform (callable, optional): Transform to apply to target
transforms (callable, optional): Transform to apply to image and target
download (bool): If True, downloads dataset if not found
"""
def __init__(self, root: str, train: bool = True, transform=None, target_transform=None, transforms=None, download: bool = False): ...Datasets for video analysis and action recognition tasks.
class Kinetics(VisionDataset):
"""
Kinetics dataset for action recognition.
Args:
root (str): Root directory for dataset files
frames_per_clip (int): Number of frames per video clip
num_classes (str): Number of classes ('400', '600', '700')
split (str): Dataset split ('train', 'val')
frame_rate (int, optional): Target frame rate for clips
step_between_clips (int): Number of frames between consecutive clips
transform (callable, optional): Transform to apply to video clips
download (bool): If True, downloads dataset if not found
num_download_workers (int): Number of workers for downloading
num_extract_workers (int): Number of workers for extraction
"""
def __init__(self, root: str, frames_per_clip: int, num_classes: str = '400', split: str = 'train', frame_rate=None, step_between_clips: int = 1, transform=None, download: bool = False, num_download_workers: int = 1, num_extract_workers: int = 1): ...
class HMDB51(VisionDataset):
"""
HMDB51 action recognition dataset.
Args:
root (str): Root directory for dataset files
annotation_path (str): Path to annotation files
frames_per_clip (int): Number of frames per video clip
step_between_clips (int): Number of frames between consecutive clips
fold (int): Which fold to load (1, 2, or 3)
train (bool): If True, creates dataset from training set
transform (callable, optional): Transform to apply to video clips
num_workers (int): Number of workers for video loading
"""
def __init__(self, root: str, annotation_path: str, frames_per_clip: int, step_between_clips: int = 1, fold: int = 1, train: bool = True, transform=None, num_workers: int = 1): ...
class UCF101(VisionDataset):
"""UCF101 action recognition dataset with 101 action classes."""
def __init__(self, root: str, annotation_path: str, frames_per_clip: int, step_between_clips: int = 1, fold: int = 1, train: bool = True, transform=None, num_workers: int = 1): ...
class MovingMNIST(VisionDataset):
"""
Moving MNIST dataset for video prediction.
Args:
root (str): Root directory for dataset files
split (str, optional): Dataset split ('train', 'test')
transform (callable, optional): Transform to apply to video data
download (bool): If True, downloads dataset if not found
"""
def __init__(self, root: str, split=None, transform=None, download: bool = True): ...Datasets for optical flow estimation and stereo vision tasks.
class FlyingChairs(VisionDataset):
"""
FlyingChairs optical flow dataset.
Args:
root (str): Root directory for dataset files
split (str): Dataset split ('train', 'val')
transform (callable, optional): Transform to apply to samples
target_transform (callable, optional): Transform to apply to flow
"""
def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None): ...
class FlyingThings3D(VisionDataset):
"""FlyingThings3D optical flow dataset."""
def __init__(self, root: str, split: str = 'train', pass_name: str = 'clean', camera: str = 'left', transform=None, target_transform=None): ...
class Sintel(VisionDataset):
"""
MPI Sintel optical flow dataset.
Args:
root (str): Root directory for dataset files
split (str): Dataset split ('train', 'test')
pass_name (str): Rendering pass ('clean', 'final')
transform (callable, optional): Transform to apply to samples
target_transform (callable, optional): Transform to apply to flow
"""
def __init__(self, root: str, split: str = 'train', pass_name: str = 'clean', transform=None, target_transform=None): ...
class KittiFlow(VisionDataset):
"""KITTI optical flow dataset."""
def __init__(self, root: str, split: str = 'train', transform=None): ...
class HD1K(VisionDataset):
"""HD1K optical flow dataset."""
def __init__(self, root: str, split: str = 'train', transform=None): ...
class Kitti2012Stereo(VisionDataset):
"""
KITTI 2012 stereo dataset.
Args:
root (str): Root directory for dataset files
split (str): Dataset split ('train', 'test')
transform (callable, optional): Transform to apply to samples
"""
def __init__(self, root: str, split: str = 'train', transform=None): ...
class Kitti2015Stereo(VisionDataset):
"""KITTI 2015 stereo dataset."""
def __init__(self, root: str, split: str = 'train', transform=None): ...
class CarlaStereo(VisionDataset):
"""CARLA stereo dataset."""
def __init__(self, root: str, split: str = 'train', transform=None): ...
class Middlebury2014Stereo(VisionDataset):
"""Middlebury 2014 stereo dataset."""
def __init__(self, root: str, split: str = 'train', transform=None): ...
class CREStereo(VisionDataset):
"""CREStereo dataset."""
def __init__(self, root: str, split: str = 'train', transform=None): ...
class FallingThingsStereo(VisionDataset):
"""Falling Things stereo dataset."""
def __init__(self, root: str, variant: str = 'single', split: str = 'train', transform=None): ...
class SceneFlowStereo(VisionDataset):
"""Scene Flow stereo dataset."""
def __init__(self, root: str, split: str = 'train', pass_name: str = 'clean', transform=None): ...
class SintelStereo(VisionDataset):
"""Sintel stereo dataset."""
def __init__(self, root: str, split: str = 'train', pass_name: str = 'clean', transform=None): ...
class InStereo2k(VisionDataset):
"""InStereo2k dataset."""
def __init__(self, root: str, split: str = 'train', transform=None): ...
class ETH3DStereo(VisionDataset):
"""ETH3D stereo dataset."""
def __init__(self, root: str, split: str = 'train', transform=None): ...Datasets for image captioning, patch matching, and face recognition tasks.
class SBU(VisionDataset):
"""
SBU Captioned Photo dataset for image captioning.
Args:
root (str): Root directory for dataset files
transform (callable, optional): Transform to apply to PIL image
target_transform (callable, optional): Transform to apply to target
download (bool): If True, downloads dataset if not found
loader (callable, optional): Function to load image from path
"""
def __init__(self, root: str, transform=None, target_transform=None, download: bool = True, loader=None): ...
class Flickr8k(VisionDataset):
"""
Flickr8k dataset for image captioning.
Args:
root (str): Root directory for dataset files
ann_file (str): Path to annotation file
transform (callable, optional): Transform to apply to PIL image
target_transform (callable, optional): Transform to apply to target
loader (callable, optional): Function to load image from path
"""
def __init__(self, root: str, ann_file: str, transform=None, target_transform=None, loader=None): ...
class Flickr30k(VisionDataset):
"""
Flickr30k dataset for image captioning.
Args:
root (str): Root directory for dataset files
ann_file (str): Path to annotation file
transform (callable, optional): Transform to apply to PIL image
target_transform (callable, optional): Transform to apply to target
loader (callable, optional): Function to load image from path
"""
def __init__(self, root: str, ann_file: str, transform=None, target_transform=None, loader=None): ...
class PhotoTour(VisionDataset):
"""
Multi-view Stereo Correspondence dataset for patch matching.
Args:
root (str): Root directory for dataset files
name (str): Dataset name ('notredame_harris', 'yosemite_harris', 'liberty_harris', 'notredame', 'yosemite', 'liberty')
train (bool): If True, creates dataset for training patches, otherwise for matching pairs
transform (callable, optional): Transform to apply to patches
download (bool): If True, downloads dataset if not found
"""
def __init__(self, root: str, name: str, train: bool = True, transform=None, download: bool = False): ...
class LFWPeople(VisionDataset):
"""
LFW People dataset for face recognition.
Args:
root (str): Root directory for dataset files
split (str): Dataset split ('train', 'test', '10fold')
image_set (str): Image processing type ('original', 'funneled', 'deepfunneled')
transform (callable, optional): Transform to apply to PIL image
target_transform (callable, optional): Transform to apply to target
download (bool): NOT SUPPORTED - manual download required
loader (callable, optional): Function to load image from path
"""
def __init__(self, root: str, split: str = '10fold', image_set: str = 'funneled', transform=None, target_transform=None, download: bool = False, loader=None): ...
class LFWPairs(VisionDataset):
"""
LFW Pairs dataset for face verification.
Args:
root (str): Root directory for dataset files
split (str): Dataset split ('train', 'test', '10fold')
image_set (str): Image processing type ('original', 'funneled', 'deepfunneled')
transform (callable, optional): Transform to apply to PIL image
target_transform (callable, optional): Transform to apply to target
download (bool): NOT SUPPORTED - manual download required
loader (callable, optional): Function to load image from path
"""
def __init__(self, root: str, split: str = '10fold', image_set: str = 'funneled', transform=None, target_transform=None, download: bool = False, loader=None): ...Helper datasets and utilities for testing and dataset manipulation.
class FakeData(VisionDataset):
"""
Generates fake data for testing purposes.
Args:
size (int): Dataset size
image_size (tuple): Image dimensions (channels, height, width)
num_classes (int): Number of classes
transform (callable, optional): Transform to apply to PIL image
target_transform (callable, optional): Transform to apply to target
random_offset (int): Random seed offset
"""
def __init__(self, size: int = 1000, image_size: tuple = (3, 224, 224), num_classes: int = 10, transform=None, target_transform=None, random_offset: int = 0): ...
def wrap_dataset_for_transforms_v2(dataset, target_keys=None):
"""
Wraps datasets for v2 transforms compatibility.
Args:
dataset: Dataset to wrap
target_keys (sequence, optional): Keys for target extraction
Returns:
Wrapped dataset compatible with v2 transforms
"""Sampling strategies for dataset loading and batching.
# Available in torchvision.datasets.samplers
# Provides various sampling strategies for efficient dataset loadingfrom torchvision import datasets, transforms
from torch.utils.data import DataLoader
# Define transforms
transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
# Load CIFAR-10
train_dataset = datasets.CIFAR10(
root='./data',
train=True,
download=True,
transform=transform
)
test_dataset = datasets.CIFAR10(
root='./data',
train=False,
download=True,
transform=transform
)
# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)from torchvision import datasets, transforms as T
# Define transforms for detection
transform = T.Compose([
T.ToTensor(),
])
# Load COCO detection dataset
dataset = datasets.CocoDetection(
root='/path/to/coco/images/train2017',
annFile='/path/to/coco/annotations/instances_train2017.json',
transform=transform
)
# Each item returns (image, target) where target is list of annotations
image, target = dataset[0]from torchvision import datasets, transforms
# For datasets organized as: root/class_name/image_files
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
])
dataset = datasets.ImageFolder(
root='/path/to/custom/dataset',
transform=transform
)
# Access class names
print(dataset.classes)
print(dataset.class_to_idx)Install with Tessl CLI
npx tessl i tessl/pypi-torchvision