CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-torchvision

Computer vision library for PyTorch with datasets, model architectures, and image/video transforms.

Overview
Eval results
Files

datasets.mddocs/

Datasets

TorchVision provides a comprehensive collection of computer vision datasets with automatic downloading, caching, and preprocessing. The datasets module supports image classification, object detection, segmentation, video analysis, optical flow, and stereo vision tasks.

Capabilities

Base Dataset Classes

Foundation classes for building and working with vision datasets.

class VisionDataset:
    """
    Base class for all vision datasets.
    
    Args:
        root (str): Root directory of dataset
        transforms (callable, optional): Function/transform to apply to data
        transform (callable, optional): Function/transform to apply to PIL image
        target_transform (callable, optional): Function/transform to apply to target
    """
    def __init__(self, root: str, transforms=None, transform=None, target_transform=None): ...
    def __getitem__(self, index: int): ...
    def __len__(self) -> int: ...

class DatasetFolder(VisionDataset):
    """
    Generic data loader for datasets in folder format.
    
    Args:
        root (str): Root directory path
        loader (callable): Function to load a sample from path
        extensions (tuple): Allowed extensions
        transform (callable, optional): Transform to apply to samples
        target_transform (callable, optional): Transform to apply to targets
        is_valid_file (callable, optional): Function to check file validity
    """
    def __init__(self, root: str, loader, extensions=None, transform=None, target_transform=None, is_valid_file=None): ...

class ImageFolder(DatasetFolder):
    """
    Data loader for image classification datasets in folder format.
    Expected structure: root/class_x/xxx.ext
    
    Args:
        root (str): Root directory path
        transform (callable, optional): Transform to apply to PIL images
        target_transform (callable, optional): Transform to apply to targets
        loader (callable, optional): Function to load image from path
        is_valid_file (callable, optional): Function to check file validity
    """
    def __init__(self, root: str, transform=None, target_transform=None, loader=None, is_valid_file=None): ...

Image Classification Datasets

Standard datasets for image classification tasks with automatic download and preprocessing.

class MNIST(VisionDataset):
    """
    MNIST handwritten digit dataset.
    
    Args:
        root (str): Root directory for dataset files
        train (bool): If True, creates dataset from training set, else test set
        transform (callable, optional): Transform to apply to PIL image
        target_transform (callable, optional): Transform to apply to target
        download (bool): If True, downloads dataset if not found at root
    """
    def __init__(self, root: str, train: bool = True, transform=None, target_transform=None, download: bool = False): ...

class FashionMNIST(VisionDataset):
    """Fashion-MNIST dataset of clothing images."""
    def __init__(self, root: str, train: bool = True, transform=None, target_transform=None, download: bool = False): ...

class KMNIST(VisionDataset):
    """Kuzushiji-MNIST dataset of Japanese characters."""
    def __init__(self, root: str, train: bool = True, transform=None, target_transform=None, download: bool = False): ...

class EMNIST(VisionDataset):
    """
    Extended MNIST dataset.
    
    Args:
        root (str): Root directory for dataset files
        split (str): Dataset split ('byclass', 'bymerge', 'balanced', 'letters', 'digits', 'mnist')
        train (bool): If True, creates dataset from training set
        transform (callable, optional): Transform to apply to PIL image
        target_transform (callable, optional): Transform to apply to target
        download (bool): If True, downloads dataset if not found
    """
    def __init__(self, root: str, split: str, train: bool = True, transform=None, target_transform=None, download: bool = False): ...

class CIFAR10(VisionDataset):
    """
    CIFAR-10 dataset of 32x32 color images in 10 classes.
    
    Args:
        root (str): Root directory for dataset files
        train (bool): If True, creates dataset from training set, else test set
        transform (callable, optional): Transform to apply to PIL image
        target_transform (callable, optional): Transform to apply to target
        download (bool): If True, downloads dataset if not found at root
    """
    def __init__(self, root: str, train: bool = True, transform=None, target_transform=None, download: bool = False): ...

class CIFAR100(VisionDataset):
    """CIFAR-100 dataset with 100 classes grouped into 20 superclasses."""
    def __init__(self, root: str, train: bool = True, transform=None, target_transform=None, download: bool = False): ...

class SVHN(VisionDataset):
    """
    Street View House Numbers dataset.
    
    Args:
        root (str): Root directory for dataset files
        split (str): Dataset split ('train', 'test', 'extra')
        transform (callable, optional): Transform to apply to PIL image
        target_transform (callable, optional): Transform to apply to target
        download (bool): If True, downloads dataset if not found
    """
    def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False): ...

class ImageNet(VisionDataset):
    """
    ImageNet dataset for large-scale image classification.
    
    Args:
        root (str): Root directory containing 'train' and 'val' folders
        split (str): Dataset split ('train', 'val')
        transform (callable, optional): Transform to apply to PIL image
        target_transform (callable, optional): Transform to apply to target
    """
    def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None): ...

class Caltech101(VisionDataset):
    """Caltech 101 dataset with 101 object categories."""
    def __init__(self, root: str, target_type='category', transform=None, target_transform=None, download: bool = False): ...

class Caltech256(VisionDataset):
    """Caltech 256 dataset with 256 object categories."""
    def __init__(self, root: str, transform=None, target_transform=None, download: bool = False): ...

class CelebA(VisionDataset):
    """
    CelebA face dataset with attributes.
    
    Args:
        root (str): Root directory for dataset files
        split (str): Dataset split ('train', 'valid', 'test', 'all')
        target_type (str): Target type ('attr', 'identity', 'bbox', 'landmarks')
        transform (callable, optional): Transform to apply to PIL image
        target_transform (callable, optional): Transform to apply to target
        download (bool): If True, downloads dataset if not found
    """
    def __init__(self, root: str, split: str = 'train', target_type: str = 'attr', transform=None, target_transform=None, download: bool = False): ...

class StanfordCars(VisionDataset):
    """Stanford Cars dataset with 196 car classes."""
    def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False): ...

class Flowers102(VisionDataset):
    """Oxford 102 Flower dataset."""
    def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False): ...

class Food101(VisionDataset):
    """Food-101 dataset with 101 food categories."""
    def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False): ...

class GTSRB(VisionDataset):
    """German Traffic Sign Recognition Benchmark."""
    def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False): ...

class OxfordIIITPet(VisionDataset):
    """
    Oxford-IIIT Pet dataset.
    
    Args:
        root (str): Root directory for dataset files
        split (str): Dataset split ('trainval', 'test')
        target_types (str or list): Target types ('category', 'segmentation')
        transform (callable, optional): Transform to apply to PIL image
        target_transform (callable, optional): Transform to apply to target
        download (bool): If True, downloads dataset if not found
    """
    def __init__(self, root: str, split: str = 'trainval', target_types='category', transform=None, target_transform=None, download: bool = False): ...

class STL10(VisionDataset):
    """
    STL10 dataset of 96x96 color images in 10 classes.
    
    Args:
        root (str): Root directory for dataset files
        split (str): Dataset split ('train', 'test', 'unlabeled', 'train+unlabeled')
        folds (int, optional): One of {0-9} or None for training fold selection
        transform (callable, optional): Transform to apply to PIL image
        target_transform (callable, optional): Transform to apply to target
        download (bool): If True, downloads dataset if not found
    """
    def __init__(self, root: str, split: str = 'train', folds=None, transform=None, target_transform=None, download: bool = False): ...

class SUN397(VisionDataset):
    """
    SUN397 scene recognition dataset with 397 categories.
    
    Args:
        root (str): Root directory for dataset files
        transform (callable, optional): Transform to apply to PIL image
        target_transform (callable, optional): Transform to apply to target
        download (bool): If True, downloads dataset if not found
        loader (callable, optional): Function to load image from path
    """
    def __init__(self, root: str, transform=None, target_transform=None, download: bool = False, loader=None): ...

class SEMEION(VisionDataset):
    """
    SEMEION handwritten digit dataset with 16x16 grayscale images.
    
    Args:
        root (str): Root directory for dataset files
        transform (callable, optional): Transform to apply to PIL image
        target_transform (callable, optional): Transform to apply to target
        download (bool): If True, downloads dataset if not found
    """
    def __init__(self, root: str, transform=None, target_transform=None, download: bool = True): ...

class Omniglot(VisionDataset):
    """
    Omniglot dataset for few-shot learning with character recognition.
    
    Args:
        root (str): Root directory for dataset files
        background (bool): If True, creates dataset from background set, otherwise evaluation set
        transform (callable, optional): Transform to apply to PIL image
        target_transform (callable, optional): Transform to apply to target
        download (bool): If True, downloads dataset if not found
        loader (callable, optional): Function to load image from path
    """
    def __init__(self, root: str, background: bool = True, transform=None, target_transform=None, download: bool = False, loader=None): ...

class USPS(VisionDataset):
    """
    USPS handwritten digit dataset with 16x16 grayscale images.
    
    Args:
        root (str): Root directory for dataset files
        train (bool): If True, creates dataset from training set, otherwise test set
        transform (callable, optional): Transform to apply to PIL image
        target_transform (callable, optional): Transform to apply to target
        download (bool): If True, downloads dataset if not found
    """
    def __init__(self, root: str, train: bool = True, transform=None, target_transform=None, download: bool = False): ...

class QMNIST(VisionDataset):
    """
    QMNIST extended MNIST dataset with additional metadata.
    
    Args:
        root (str): Root directory for dataset files
        what (str, optional): Dataset subset ('train', 'test', 'test10k', 'test50k', 'nist')
        compat (bool): If True, returns class labels for MNIST compatibility
        train (bool): If True, creates dataset from training set (when what is None)
        transform (callable, optional): Transform to apply to PIL image
        target_transform (callable, optional): Transform to apply to target
        download (bool): If True, downloads dataset if not found
    """
    def __init__(self, root: str, what=None, compat: bool = True, train: bool = True, transform=None, target_transform=None, download: bool = False): ...

class Places365(VisionDataset):
    """
    Places365 scene recognition dataset with 365 scene categories.
    
    Args:
        root (str): Root directory for dataset files
        split (str): Dataset split ('train-standard', 'train-challenge', 'val', 'test')
        small (bool): If True, uses small (256x256) images instead of high resolution
        download (bool): If True, downloads dataset if not found
        transform (callable, optional): Transform to apply to PIL image
        target_transform (callable, optional): Transform to apply to target
        loader (callable, optional): Function to load image from path
    """
    def __init__(self, root: str, split: str = 'train-standard', small: bool = False, download: bool = False, transform=None, target_transform=None, loader=None): ...

class INaturalist(VisionDataset):
    """
    iNaturalist dataset for fine-grained species classification.
    
    Args:
        root (str): Root directory for dataset files
        version (str): Dataset version ('2017', '2018', '2019', '2021_train', '2021_train_mini', '2021_valid')
        target_type (str or list): Target type ('full', 'kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'super')
        transform (callable, optional): Transform to apply to PIL image
        target_transform (callable, optional): Transform to apply to target
        download (bool): If True, downloads dataset if not found
        loader (callable, optional): Function to load image from path
    """
    def __init__(self, root: str, version: str = '2021_train', target_type='full', transform=None, target_transform=None, download: bool = False, loader=None): ...

class DTD(VisionDataset):
    """
    Describable Textures Dataset (DTD) with 47 texture categories.
    
    Args:
        root (str): Root directory for dataset files
        split (str): Dataset split ('train', 'val', 'test')
        partition (int): Dataset partition (1-10)
        transform (callable, optional): Transform to apply to PIL image
        target_transform (callable, optional): Transform to apply to target
        download (bool): If True, downloads dataset if not found
        loader (callable, optional): Function to load image from path
    """
    def __init__(self, root: str, split: str = 'train', partition: int = 1, transform=None, target_transform=None, download: bool = False, loader=None): ...

class FER2013(VisionDataset):
    """
    FER2013 facial expression recognition dataset with 7 emotion classes.
    
    Args:
        root (str): Root directory for dataset files
        split (str): Dataset split ('train', 'test')
        transform (callable, optional): Transform to apply to PIL image
        target_transform (callable, optional): Transform to apply to target
    """
    def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None): ...

class CLEVRClassification(VisionDataset):
    """
    CLEVR classification dataset for visual reasoning (object counting).
    
    Args:
        root (str): Root directory for dataset files
        split (str): Dataset split ('train', 'val', 'test')
        transform (callable, optional): Transform to apply to PIL image
        target_transform (callable, optional): Transform to apply to target
        download (bool): If True, downloads dataset if not found
        loader (callable, optional): Function to load image from path
    """
    def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False, loader=None): ...

class PCAM(VisionDataset):
    """
    PatchCamelyon (PCAM) histopathologic cancer detection dataset.
    
    Args:
        root (str): Root directory for dataset files
        split (str): Dataset split ('train', 'val', 'test')
        transform (callable, optional): Transform to apply to PIL image
        target_transform (callable, optional): Transform to apply to target
        download (bool): If True, downloads dataset if not found
    """
    def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False): ...

class Country211(VisionDataset):
    """
    Country211 dataset for country classification from images.
    
    Args:
        root (str): Root directory for dataset files
        split (str): Dataset split ('train', 'valid', 'test')
        transform (callable, optional): Transform to apply to PIL image
        target_transform (callable, optional): Transform to apply to target
        download (bool): If True, downloads dataset if not found
        loader (callable, optional): Function to load image from path
    """
    def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False, loader=None): ...

class FGVCAircraft(VisionDataset):
    """
    FGVC Aircraft dataset for fine-grained aircraft recognition.
    
    Args:
        root (str): Root directory for dataset files
        split (str): Dataset split ('train', 'val', 'trainval', 'test')
        annotation_level (str): Annotation level ('variant', 'family', 'manufacturer')
        transform (callable, optional): Transform to apply to PIL image
        target_transform (callable, optional): Transform to apply to target
        download (bool): If True, downloads dataset if not found
        loader (callable, optional): Function to load image from path
    """
    def __init__(self, root: str, split: str = 'trainval', annotation_level: str = 'variant', transform=None, target_transform=None, download: bool = False, loader=None): ...

class EuroSAT(VisionDataset):
    """
    EuroSAT satellite image classification dataset with 10 land use classes.
    
    Args:
        root (str): Root directory for dataset files
        transform (callable, optional): Transform to apply to PIL image
        target_transform (callable, optional): Transform to apply to target
        download (bool): If True, downloads dataset if not found
        loader (callable, optional): Function to load image from path
    """
    def __init__(self, root: str, transform=None, target_transform=None, download: bool = False, loader=None): ...

class RenderedSST2(VisionDataset):
    """
    Rendered SST2 dataset for optical character recognition with sentiment.
    
    Args:
        root (str): Root directory for dataset files
        split (str): Dataset split ('train', 'val', 'test')
        transform (callable, optional): Transform to apply to PIL image
        target_transform (callable, optional): Transform to apply to target
        download (bool): If True, downloads dataset if not found
        loader (callable, optional): Function to load image from path
    """
    def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False, loader=None): ...

class Imagenette(VisionDataset):
    """
    Imagenette dataset - subset of ImageNet with 10 classes.
    
    Args:
        root (str): Root directory for dataset files
        split (str): Dataset split ('train', 'val')
        size (str): Image size ('full', '320px', '160px')
        download (bool): If True, downloads dataset if not found
        transform (callable, optional): Transform to apply to PIL image
        target_transform (callable, optional): Transform to apply to target
        loader (callable, optional): Function to load image from path
    """
    def __init__(self, root: str, split: str = 'train', size: str = 'full', download: bool = False, transform=None, target_transform=None, loader=None): ...

Object Detection and Segmentation Datasets

Datasets for object detection, instance segmentation, and semantic segmentation tasks.

class CocoDetection(VisionDataset):
    """
    COCO dataset for object detection.
    
    Args:
        root (str): Root directory containing images
        annFile (str): Path to annotation file
        transform (callable, optional): Transform to apply to image
        target_transform (callable, optional): Transform to apply to target
        transforms (callable, optional): Transform to apply to image and target
    """
    def __init__(self, root: str, annFile: str, transform=None, target_transform=None, transforms=None): ...

class CocoCaptions(VisionDataset):
    """COCO dataset for image captioning."""
    def __init__(self, root: str, annFile: str, transform=None, target_transform=None, transforms=None): ...

class VOCDetection(VisionDataset):
    """
    Pascal VOC dataset for object detection.
    
    Args:
        root (str): Root directory for dataset files
        year (str): Dataset year ('2007', '2008', '2009', '2010', '2011', '2012')
        image_set (str): Image set ('train', 'trainval', 'val', 'test')
        download (bool): If True, downloads dataset if not found
        transform (callable, optional): Transform to apply to PIL image
        target_transform (callable, optional): Transform to apply to target
        transforms (callable, optional): Transform to apply to image and target
    """
    def __init__(self, root: str, year: str = '2012', image_set: str = 'train', download: bool = False, transform=None, target_transform=None, transforms=None): ...

class VOCSegmentation(VisionDataset):
    """Pascal VOC dataset for semantic segmentation."""
    def __init__(self, root: str, year: str = '2012', image_set: str = 'train', download: bool = False, transform=None, target_transform=None, transforms=None): ...

class Cityscapes(VisionDataset):
    """
    Cityscapes dataset for semantic segmentation.
    
    Args:
        root (str): Root directory for dataset files
        split (str): Dataset split ('train', 'test', 'val')
        mode (str): Quality mode ('fine', 'coarse')
        target_type (str or list): Target type ('instance', 'semantic', 'polygon', 'color')
        transform (callable, optional): Transform to apply to PIL image
        target_transform (callable, optional): Transform to apply to target
        transforms (callable, optional): Transform to apply to image and target
    """
    def __init__(self, root: str, split: str = 'train', mode: str = 'fine', target_type: str = 'instance', transform=None, target_transform=None, transforms=None): ...

class SBDataset(VisionDataset):
    """Semantic Boundaries Dataset."""
    def __init__(self, root: str, image_set: str = 'train', mode: str = 'boundaries', download: bool = False, transform=None, target_transform=None): ...

class WIDERFace(VisionDataset):
    """
    WIDER FACE dataset for face detection.
    
    Args:
        root (str): Root directory for dataset files
        split (str): Dataset split ('train', 'val', 'test')
        transform (callable, optional): Transform to apply to PIL image
        target_transform (callable, optional): Transform to apply to target
        download (bool): If True, downloads dataset if not found
    """
    def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False): ...

class Kitti(VisionDataset):
    """
    KITTI dataset for object detection.
    
    Args:
        root (str): Root directory for dataset files
        train (bool): If True, creates dataset from training set, otherwise test set
        transform (callable, optional): Transform to apply to PIL image
        target_transform (callable, optional): Transform to apply to target
        transforms (callable, optional): Transform to apply to image and target
        download (bool): If True, downloads dataset if not found
    """
    def __init__(self, root: str, train: bool = True, transform=None, target_transform=None, transforms=None, download: bool = False): ...

Video Datasets

Datasets for video analysis and action recognition tasks.

class Kinetics(VisionDataset):
    """
    Kinetics dataset for action recognition.
    
    Args:
        root (str): Root directory for dataset files
        frames_per_clip (int): Number of frames per video clip
        num_classes (str): Number of classes ('400', '600', '700')
        split (str): Dataset split ('train', 'val')
        frame_rate (int, optional): Target frame rate for clips
        step_between_clips (int): Number of frames between consecutive clips
        transform (callable, optional): Transform to apply to video clips
        download (bool): If True, downloads dataset if not found
        num_download_workers (int): Number of workers for downloading
        num_extract_workers (int): Number of workers for extraction
    """
    def __init__(self, root: str, frames_per_clip: int, num_classes: str = '400', split: str = 'train', frame_rate=None, step_between_clips: int = 1, transform=None, download: bool = False, num_download_workers: int = 1, num_extract_workers: int = 1): ...

class HMDB51(VisionDataset):
    """
    HMDB51 action recognition dataset.
    
    Args:
        root (str): Root directory for dataset files
        annotation_path (str): Path to annotation files
        frames_per_clip (int): Number of frames per video clip
        step_between_clips (int): Number of frames between consecutive clips
        fold (int): Which fold to load (1, 2, or 3)
        train (bool): If True, creates dataset from training set
        transform (callable, optional): Transform to apply to video clips
        num_workers (int): Number of workers for video loading
    """
    def __init__(self, root: str, annotation_path: str, frames_per_clip: int, step_between_clips: int = 1, fold: int = 1, train: bool = True, transform=None, num_workers: int = 1): ...

class UCF101(VisionDataset):
    """UCF101 action recognition dataset with 101 action classes."""
    def __init__(self, root: str, annotation_path: str, frames_per_clip: int, step_between_clips: int = 1, fold: int = 1, train: bool = True, transform=None, num_workers: int = 1): ...

class MovingMNIST(VisionDataset):
    """
    Moving MNIST dataset for video prediction.
    
    Args:
        root (str): Root directory for dataset files
        split (str, optional): Dataset split ('train', 'test')
        transform (callable, optional): Transform to apply to video data
        download (bool): If True, downloads dataset if not found
    """
    def __init__(self, root: str, split=None, transform=None, download: bool = True): ...

Optical Flow and Stereo Datasets

Datasets for optical flow estimation and stereo vision tasks.

class FlyingChairs(VisionDataset):
    """
    FlyingChairs optical flow dataset.
    
    Args:
        root (str): Root directory for dataset files
        split (str): Dataset split ('train', 'val')
        transform (callable, optional): Transform to apply to samples
        target_transform (callable, optional): Transform to apply to flow
    """
    def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None): ...

class FlyingThings3D(VisionDataset):
    """FlyingThings3D optical flow dataset."""
    def __init__(self, root: str, split: str = 'train', pass_name: str = 'clean', camera: str = 'left', transform=None, target_transform=None): ...

class Sintel(VisionDataset):
    """
    MPI Sintel optical flow dataset.
    
    Args:
        root (str): Root directory for dataset files
        split (str): Dataset split ('train', 'test')
        pass_name (str): Rendering pass ('clean', 'final')
        transform (callable, optional): Transform to apply to samples
        target_transform (callable, optional): Transform to apply to flow
    """
    def __init__(self, root: str, split: str = 'train', pass_name: str = 'clean', transform=None, target_transform=None): ...

class KittiFlow(VisionDataset):
    """KITTI optical flow dataset."""
    def __init__(self, root: str, split: str = 'train', transform=None): ...

class HD1K(VisionDataset):
    """HD1K optical flow dataset."""
    def __init__(self, root: str, split: str = 'train', transform=None): ...

class Kitti2012Stereo(VisionDataset):
    """
    KITTI 2012 stereo dataset.
    
    Args:
        root (str): Root directory for dataset files
        split (str): Dataset split ('train', 'test')
        transform (callable, optional): Transform to apply to samples
    """
    def __init__(self, root: str, split: str = 'train', transform=None): ...

class Kitti2015Stereo(VisionDataset):
    """KITTI 2015 stereo dataset."""
    def __init__(self, root: str, split: str = 'train', transform=None): ...

class CarlaStereo(VisionDataset):
    """CARLA stereo dataset."""
    def __init__(self, root: str, split: str = 'train', transform=None): ...

class Middlebury2014Stereo(VisionDataset):
    """Middlebury 2014 stereo dataset."""
    def __init__(self, root: str, split: str = 'train', transform=None): ...

class CREStereo(VisionDataset):
    """CREStereo dataset."""
    def __init__(self, root: str, split: str = 'train', transform=None): ...

class FallingThingsStereo(VisionDataset):
    """Falling Things stereo dataset."""
    def __init__(self, root: str, variant: str = 'single', split: str = 'train', transform=None): ...

class SceneFlowStereo(VisionDataset):
    """Scene Flow stereo dataset."""
    def __init__(self, root: str, split: str = 'train', pass_name: str = 'clean', transform=None): ...

class SintelStereo(VisionDataset):
    """Sintel stereo dataset."""
    def __init__(self, root: str, split: str = 'train', pass_name: str = 'clean', transform=None): ...

class InStereo2k(VisionDataset):
    """InStereo2k dataset."""
    def __init__(self, root: str, split: str = 'train', transform=None): ...

class ETH3DStereo(VisionDataset):
    """ETH3D stereo dataset."""
    def __init__(self, root: str, split: str = 'train', transform=None): ...

Image Captioning and Matching Datasets

Datasets for image captioning, patch matching, and face recognition tasks.

class SBU(VisionDataset):
    """
    SBU Captioned Photo dataset for image captioning.
    
    Args:
        root (str): Root directory for dataset files
        transform (callable, optional): Transform to apply to PIL image
        target_transform (callable, optional): Transform to apply to target
        download (bool): If True, downloads dataset if not found
        loader (callable, optional): Function to load image from path
    """
    def __init__(self, root: str, transform=None, target_transform=None, download: bool = True, loader=None): ...

class Flickr8k(VisionDataset):
    """
    Flickr8k dataset for image captioning.
    
    Args:
        root (str): Root directory for dataset files
        ann_file (str): Path to annotation file
        transform (callable, optional): Transform to apply to PIL image
        target_transform (callable, optional): Transform to apply to target
        loader (callable, optional): Function to load image from path
    """
    def __init__(self, root: str, ann_file: str, transform=None, target_transform=None, loader=None): ...

class Flickr30k(VisionDataset):
    """
    Flickr30k dataset for image captioning.
    
    Args:
        root (str): Root directory for dataset files
        ann_file (str): Path to annotation file
        transform (callable, optional): Transform to apply to PIL image
        target_transform (callable, optional): Transform to apply to target
        loader (callable, optional): Function to load image from path
    """
    def __init__(self, root: str, ann_file: str, transform=None, target_transform=None, loader=None): ...

class PhotoTour(VisionDataset):
    """
    Multi-view Stereo Correspondence dataset for patch matching.
    
    Args:
        root (str): Root directory for dataset files
        name (str): Dataset name ('notredame_harris', 'yosemite_harris', 'liberty_harris', 'notredame', 'yosemite', 'liberty')
        train (bool): If True, creates dataset for training patches, otherwise for matching pairs
        transform (callable, optional): Transform to apply to patches
        download (bool): If True, downloads dataset if not found
    """
    def __init__(self, root: str, name: str, train: bool = True, transform=None, download: bool = False): ...

class LFWPeople(VisionDataset):
    """
    LFW People dataset for face recognition.
    
    Args:
        root (str): Root directory for dataset files
        split (str): Dataset split ('train', 'test', '10fold')
        image_set (str): Image processing type ('original', 'funneled', 'deepfunneled')
        transform (callable, optional): Transform to apply to PIL image
        target_transform (callable, optional): Transform to apply to target
        download (bool): NOT SUPPORTED - manual download required
        loader (callable, optional): Function to load image from path
    """
    def __init__(self, root: str, split: str = '10fold', image_set: str = 'funneled', transform=None, target_transform=None, download: bool = False, loader=None): ...

class LFWPairs(VisionDataset):
    """
    LFW Pairs dataset for face verification.
    
    Args:
        root (str): Root directory for dataset files
        split (str): Dataset split ('train', 'test', '10fold')
        image_set (str): Image processing type ('original', 'funneled', 'deepfunneled')
        transform (callable, optional): Transform to apply to PIL image
        target_transform (callable, optional): Transform to apply to target
        download (bool): NOT SUPPORTED - manual download required
        loader (callable, optional): Function to load image from path
    """
    def __init__(self, root: str, split: str = '10fold', image_set: str = 'funneled', transform=None, target_transform=None, download: bool = False, loader=None): ...

Utility Datasets and Functions

Helper datasets and utilities for testing and dataset manipulation.

class FakeData(VisionDataset):
    """
    Generates fake data for testing purposes.
    
    Args:
        size (int): Dataset size
        image_size (tuple): Image dimensions (channels, height, width)
        num_classes (int): Number of classes
        transform (callable, optional): Transform to apply to PIL image
        target_transform (callable, optional): Transform to apply to target
        random_offset (int): Random seed offset
    """
    def __init__(self, size: int = 1000, image_size: tuple = (3, 224, 224), num_classes: int = 10, transform=None, target_transform=None, random_offset: int = 0): ...

def wrap_dataset_for_transforms_v2(dataset, target_keys=None):
    """
    Wraps datasets for v2 transforms compatibility.
    
    Args:
        dataset: Dataset to wrap
        target_keys (sequence, optional): Keys for target extraction
        
    Returns:
        Wrapped dataset compatible with v2 transforms
    """

Samplers

Sampling strategies for dataset loading and batching.

# Available in torchvision.datasets.samplers
# Provides various sampling strategies for efficient dataset loading

Usage Examples

Basic Image Classification Dataset

from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Define transforms
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                        std=[0.229, 0.224, 0.225])
])

# Load CIFAR-10
train_dataset = datasets.CIFAR10(
    root='./data', 
    train=True,
    download=True, 
    transform=transform
)

test_dataset = datasets.CIFAR10(
    root='./data', 
    train=False,
    download=True, 
    transform=transform
)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

Object Detection Dataset

from torchvision import datasets, transforms as T

# Define transforms for detection
transform = T.Compose([
    T.ToTensor(),
])

# Load COCO detection dataset
dataset = datasets.CocoDetection(
    root='/path/to/coco/images/train2017',
    annFile='/path/to/coco/annotations/instances_train2017.json',
    transform=transform
)

# Each item returns (image, target) where target is list of annotations
image, target = dataset[0]

Custom Dataset with ImageFolder

from torchvision import datasets, transforms

# For datasets organized as: root/class_name/image_files
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

dataset = datasets.ImageFolder(
    root='/path/to/custom/dataset',
    transform=transform
)

# Access class names
print(dataset.classes)
print(dataset.class_to_idx)

Install with Tessl CLI

npx tessl i tessl/pypi-torchvision

docs

datasets.md

index.md

io.md

models.md

ops.md

transforms.md

tv_tensors.md

utils.md

tile.json