Tessl Tile for pypi/torchvision@0.23.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

datasets.md index.md io.md models.md ops.md transforms.md tv_tensors.md utils.md

datasets.mddocs/

0
# Datasets
1

2
TorchVision provides a comprehensive collection of computer vision datasets with automatic downloading, caching, and preprocessing. The datasets module supports image classification, object detection, segmentation, video analysis, optical flow, and stereo vision tasks.
3

4
## Capabilities
5

6
### Base Dataset Classes
7

8
Foundation classes for building and working with vision datasets.
9

10
```python { .api }
11
class VisionDataset:
12
    """
13
    Base class for all vision datasets.
14
    
15
    Args:
16
        root (str): Root directory of dataset
17
        transforms (callable, optional): Function/transform to apply to data
18
        transform (callable, optional): Function/transform to apply to PIL image
19
        target_transform (callable, optional): Function/transform to apply to target
20
    """
21
    def __init__(self, root: str, transforms=None, transform=None, target_transform=None): ...
22
    def __getitem__(self, index: int): ...
23
    def __len__(self) -> int: ...
24

25
class DatasetFolder(VisionDataset):
26
    """
27
    Generic data loader for datasets in folder format.
28
    
29
    Args:
30
        root (str): Root directory path
31
        loader (callable): Function to load a sample from path
32
        extensions (tuple): Allowed extensions
33
        transform (callable, optional): Transform to apply to samples
34
        target_transform (callable, optional): Transform to apply to targets
35
        is_valid_file (callable, optional): Function to check file validity
36
    """
37
    def __init__(self, root: str, loader, extensions=None, transform=None, target_transform=None, is_valid_file=None): ...
38

39
class ImageFolder(DatasetFolder):
40
    """
41
    Data loader for image classification datasets in folder format.
42
    Expected structure: root/class_x/xxx.ext
43
    
44
    Args:
45
        root (str): Root directory path
46
        transform (callable, optional): Transform to apply to PIL images
47
        target_transform (callable, optional): Transform to apply to targets
48
        loader (callable, optional): Function to load image from path
49
        is_valid_file (callable, optional): Function to check file validity
50
    """
51
    def __init__(self, root: str, transform=None, target_transform=None, loader=None, is_valid_file=None): ...
52
```
53

54
### Image Classification Datasets
55

56
Standard datasets for image classification tasks with automatic download and preprocessing.
57

58
```python { .api }
59
class MNIST(VisionDataset):
60
    """
61
    MNIST handwritten digit dataset.
62
    
63
    Args:
64
        root (str): Root directory for dataset files
65
        train (bool): If True, creates dataset from training set, else test set
66
        transform (callable, optional): Transform to apply to PIL image
67
        target_transform (callable, optional): Transform to apply to target
68
        download (bool): If True, downloads dataset if not found at root
69
    """
70
    def __init__(self, root: str, train: bool = True, transform=None, target_transform=None, download: bool = False): ...
71

72
class FashionMNIST(VisionDataset):
73
    """Fashion-MNIST dataset of clothing images."""
74
    def __init__(self, root: str, train: bool = True, transform=None, target_transform=None, download: bool = False): ...
75

76
class KMNIST(VisionDataset):
77
    """Kuzushiji-MNIST dataset of Japanese characters."""
78
    def __init__(self, root: str, train: bool = True, transform=None, target_transform=None, download: bool = False): ...
79

80
class EMNIST(VisionDataset):
81
    """
82
    Extended MNIST dataset.
83
    
84
    Args:
85
        root (str): Root directory for dataset files
86
        split (str): Dataset split ('byclass', 'bymerge', 'balanced', 'letters', 'digits', 'mnist')
87
        train (bool): If True, creates dataset from training set
88
        transform (callable, optional): Transform to apply to PIL image
89
        target_transform (callable, optional): Transform to apply to target
90
        download (bool): If True, downloads dataset if not found
91
    """
92
    def __init__(self, root: str, split: str, train: bool = True, transform=None, target_transform=None, download: bool = False): ...
93

94
class CIFAR10(VisionDataset):
95
    """
96
    CIFAR-10 dataset of 32x32 color images in 10 classes.
97
    
98
    Args:
99
        root (str): Root directory for dataset files
100
        train (bool): If True, creates dataset from training set, else test set
101
        transform (callable, optional): Transform to apply to PIL image
102
        target_transform (callable, optional): Transform to apply to target
103
        download (bool): If True, downloads dataset if not found at root
104
    """
105
    def __init__(self, root: str, train: bool = True, transform=None, target_transform=None, download: bool = False): ...
106

107
class CIFAR100(VisionDataset):
108
    """CIFAR-100 dataset with 100 classes grouped into 20 superclasses."""
109
    def __init__(self, root: str, train: bool = True, transform=None, target_transform=None, download: bool = False): ...
110

111
class SVHN(VisionDataset):
112
    """
113
    Street View House Numbers dataset.
114
    
115
    Args:
116
        root (str): Root directory for dataset files
117
        split (str): Dataset split ('train', 'test', 'extra')
118
        transform (callable, optional): Transform to apply to PIL image
119
        target_transform (callable, optional): Transform to apply to target
120
        download (bool): If True, downloads dataset if not found
121
    """
122
    def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False): ...
123

124
class ImageNet(VisionDataset):
125
    """
126
    ImageNet dataset for large-scale image classification.
127
    
128
    Args:
129
        root (str): Root directory containing 'train' and 'val' folders
130
        split (str): Dataset split ('train', 'val')
131
        transform (callable, optional): Transform to apply to PIL image
132
        target_transform (callable, optional): Transform to apply to target
133
    """
134
    def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None): ...
135

136
class Caltech101(VisionDataset):
137
    """Caltech 101 dataset with 101 object categories."""
138
    def __init__(self, root: str, target_type='category', transform=None, target_transform=None, download: bool = False): ...
139

140
class Caltech256(VisionDataset):
141
    """Caltech 256 dataset with 256 object categories."""
142
    def __init__(self, root: str, transform=None, target_transform=None, download: bool = False): ...
143

144
class CelebA(VisionDataset):
145
    """
146
    CelebA face dataset with attributes.
147
    
148
    Args:
149
        root (str): Root directory for dataset files
150
        split (str): Dataset split ('train', 'valid', 'test', 'all')
151
        target_type (str): Target type ('attr', 'identity', 'bbox', 'landmarks')
152
        transform (callable, optional): Transform to apply to PIL image
153
        target_transform (callable, optional): Transform to apply to target
154
        download (bool): If True, downloads dataset if not found
155
    """
156
    def __init__(self, root: str, split: str = 'train', target_type: str = 'attr', transform=None, target_transform=None, download: bool = False): ...
157

158
class StanfordCars(VisionDataset):
159
    """Stanford Cars dataset with 196 car classes."""
160
    def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False): ...
161

162
class Flowers102(VisionDataset):
163
    """Oxford 102 Flower dataset."""
164
    def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False): ...
165

166
class Food101(VisionDataset):
167
    """Food-101 dataset with 101 food categories."""
168
    def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False): ...
169

170
class GTSRB(VisionDataset):
171
    """German Traffic Sign Recognition Benchmark."""
172
    def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False): ...
173

174
class OxfordIIITPet(VisionDataset):
175
    """
176
    Oxford-IIIT Pet dataset.
177
    
178
    Args:
179
        root (str): Root directory for dataset files
180
        split (str): Dataset split ('trainval', 'test')
181
        target_types (str or list): Target types ('category', 'segmentation')
182
        transform (callable, optional): Transform to apply to PIL image
183
        target_transform (callable, optional): Transform to apply to target
184
        download (bool): If True, downloads dataset if not found
185
    """
186
    def __init__(self, root: str, split: str = 'trainval', target_types='category', transform=None, target_transform=None, download: bool = False): ...
187

188
class STL10(VisionDataset):
189
    """
190
    STL10 dataset of 96x96 color images in 10 classes.
191
    
192
    Args:
193
        root (str): Root directory for dataset files
194
        split (str): Dataset split ('train', 'test', 'unlabeled', 'train+unlabeled')
195
        folds (int, optional): One of {0-9} or None for training fold selection
196
        transform (callable, optional): Transform to apply to PIL image
197
        target_transform (callable, optional): Transform to apply to target
198
        download (bool): If True, downloads dataset if not found
199
    """
200
    def __init__(self, root: str, split: str = 'train', folds=None, transform=None, target_transform=None, download: bool = False): ...
201

202
class SUN397(VisionDataset):
203
    """
204
    SUN397 scene recognition dataset with 397 categories.
205
    
206
    Args:
207
        root (str): Root directory for dataset files
208
        transform (callable, optional): Transform to apply to PIL image
209
        target_transform (callable, optional): Transform to apply to target
210
        download (bool): If True, downloads dataset if not found
211
        loader (callable, optional): Function to load image from path
212
    """
213
    def __init__(self, root: str, transform=None, target_transform=None, download: bool = False, loader=None): ...
214

215
class SEMEION(VisionDataset):
216
    """
217
    SEMEION handwritten digit dataset with 16x16 grayscale images.
218
    
219
    Args:
220
        root (str): Root directory for dataset files
221
        transform (callable, optional): Transform to apply to PIL image
222
        target_transform (callable, optional): Transform to apply to target
223
        download (bool): If True, downloads dataset if not found
224
    """
225
    def __init__(self, root: str, transform=None, target_transform=None, download: bool = True): ...
226

227
class Omniglot(VisionDataset):
228
    """
229
    Omniglot dataset for few-shot learning with character recognition.
230
    
231
    Args:
232
        root (str): Root directory for dataset files
233
        background (bool): If True, creates dataset from background set, otherwise evaluation set
234
        transform (callable, optional): Transform to apply to PIL image
235
        target_transform (callable, optional): Transform to apply to target
236
        download (bool): If True, downloads dataset if not found
237
        loader (callable, optional): Function to load image from path
238
    """
239
    def __init__(self, root: str, background: bool = True, transform=None, target_transform=None, download: bool = False, loader=None): ...
240

241
class USPS(VisionDataset):
242
    """
243
    USPS handwritten digit dataset with 16x16 grayscale images.
244
    
245
    Args:
246
        root (str): Root directory for dataset files
247
        train (bool): If True, creates dataset from training set, otherwise test set
248
        transform (callable, optional): Transform to apply to PIL image
249
        target_transform (callable, optional): Transform to apply to target
250
        download (bool): If True, downloads dataset if not found
251
    """
252
    def __init__(self, root: str, train: bool = True, transform=None, target_transform=None, download: bool = False): ...
253

254
class QMNIST(VisionDataset):
255
    """
256
    QMNIST extended MNIST dataset with additional metadata.
257
    
258
    Args:
259
        root (str): Root directory for dataset files
260
        what (str, optional): Dataset subset ('train', 'test', 'test10k', 'test50k', 'nist')
261
        compat (bool): If True, returns class labels for MNIST compatibility
262
        train (bool): If True, creates dataset from training set (when what is None)
263
        transform (callable, optional): Transform to apply to PIL image
264
        target_transform (callable, optional): Transform to apply to target
265
        download (bool): If True, downloads dataset if not found
266
    """
267
    def __init__(self, root: str, what=None, compat: bool = True, train: bool = True, transform=None, target_transform=None, download: bool = False): ...
268

269
class Places365(VisionDataset):
270
    """
271
    Places365 scene recognition dataset with 365 scene categories.
272
    
273
    Args:
274
        root (str): Root directory for dataset files
275
        split (str): Dataset split ('train-standard', 'train-challenge', 'val', 'test')
276
        small (bool): If True, uses small (256x256) images instead of high resolution
277
        download (bool): If True, downloads dataset if not found
278
        transform (callable, optional): Transform to apply to PIL image
279
        target_transform (callable, optional): Transform to apply to target
280
        loader (callable, optional): Function to load image from path
281
    """
282
    def __init__(self, root: str, split: str = 'train-standard', small: bool = False, download: bool = False, transform=None, target_transform=None, loader=None): ...
283

284
class INaturalist(VisionDataset):
285
    """
286
    iNaturalist dataset for fine-grained species classification.
287
    
288
    Args:
289
        root (str): Root directory for dataset files
290
        version (str): Dataset version ('2017', '2018', '2019', '2021_train', '2021_train_mini', '2021_valid')
291
        target_type (str or list): Target type ('full', 'kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'super')
292
        transform (callable, optional): Transform to apply to PIL image
293
        target_transform (callable, optional): Transform to apply to target
294
        download (bool): If True, downloads dataset if not found
295
        loader (callable, optional): Function to load image from path
296
    """
297
    def __init__(self, root: str, version: str = '2021_train', target_type='full', transform=None, target_transform=None, download: bool = False, loader=None): ...
298

299
class DTD(VisionDataset):
300
    """
301
    Describable Textures Dataset (DTD) with 47 texture categories.
302
    
303
    Args:
304
        root (str): Root directory for dataset files
305
        split (str): Dataset split ('train', 'val', 'test')
306
        partition (int): Dataset partition (1-10)
307
        transform (callable, optional): Transform to apply to PIL image
308
        target_transform (callable, optional): Transform to apply to target
309
        download (bool): If True, downloads dataset if not found
310
        loader (callable, optional): Function to load image from path
311
    """
312
    def __init__(self, root: str, split: str = 'train', partition: int = 1, transform=None, target_transform=None, download: bool = False, loader=None): ...
313

314
class FER2013(VisionDataset):
315
    """
316
    FER2013 facial expression recognition dataset with 7 emotion classes.
317
    
318
    Args:
319
        root (str): Root directory for dataset files
320
        split (str): Dataset split ('train', 'test')
321
        transform (callable, optional): Transform to apply to PIL image
322
        target_transform (callable, optional): Transform to apply to target
323
    """
324
    def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None): ...
325

326
class CLEVRClassification(VisionDataset):
327
    """
328
    CLEVR classification dataset for visual reasoning (object counting).
329
    
330
    Args:
331
        root (str): Root directory for dataset files
332
        split (str): Dataset split ('train', 'val', 'test')
333
        transform (callable, optional): Transform to apply to PIL image
334
        target_transform (callable, optional): Transform to apply to target
335
        download (bool): If True, downloads dataset if not found
336
        loader (callable, optional): Function to load image from path
337
    """
338
    def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False, loader=None): ...
339

340
class PCAM(VisionDataset):
341
    """
342
    PatchCamelyon (PCAM) histopathologic cancer detection dataset.
343
    
344
    Args:
345
        root (str): Root directory for dataset files
346
        split (str): Dataset split ('train', 'val', 'test')
347
        transform (callable, optional): Transform to apply to PIL image
348
        target_transform (callable, optional): Transform to apply to target
349
        download (bool): If True, downloads dataset if not found
350
    """
351
    def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False): ...
352

353
class Country211(VisionDataset):
354
    """
355
    Country211 dataset for country classification from images.
356
    
357
    Args:
358
        root (str): Root directory for dataset files
359
        split (str): Dataset split ('train', 'valid', 'test')
360
        transform (callable, optional): Transform to apply to PIL image
361
        target_transform (callable, optional): Transform to apply to target
362
        download (bool): If True, downloads dataset if not found
363
        loader (callable, optional): Function to load image from path
364
    """
365
    def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False, loader=None): ...
366

367
class FGVCAircraft(VisionDataset):
368
    """
369
    FGVC Aircraft dataset for fine-grained aircraft recognition.
370
    
371
    Args:
372
        root (str): Root directory for dataset files
373
        split (str): Dataset split ('train', 'val', 'trainval', 'test')
374
        annotation_level (str): Annotation level ('variant', 'family', 'manufacturer')
375
        transform (callable, optional): Transform to apply to PIL image
376
        target_transform (callable, optional): Transform to apply to target
377
        download (bool): If True, downloads dataset if not found
378
        loader (callable, optional): Function to load image from path
379
    """
380
    def __init__(self, root: str, split: str = 'trainval', annotation_level: str = 'variant', transform=None, target_transform=None, download: bool = False, loader=None): ...
381

382
class EuroSAT(VisionDataset):
383
    """
384
    EuroSAT satellite image classification dataset with 10 land use classes.
385
    
386
    Args:
387
        root (str): Root directory for dataset files
388
        transform (callable, optional): Transform to apply to PIL image
389
        target_transform (callable, optional): Transform to apply to target
390
        download (bool): If True, downloads dataset if not found
391
        loader (callable, optional): Function to load image from path
392
    """
393
    def __init__(self, root: str, transform=None, target_transform=None, download: bool = False, loader=None): ...
394

395
class RenderedSST2(VisionDataset):
396
    """
397
    Rendered SST2 dataset for optical character recognition with sentiment.
398
    
399
    Args:
400
        root (str): Root directory for dataset files
401
        split (str): Dataset split ('train', 'val', 'test')
402
        transform (callable, optional): Transform to apply to PIL image
403
        target_transform (callable, optional): Transform to apply to target
404
        download (bool): If True, downloads dataset if not found
405
        loader (callable, optional): Function to load image from path
406
    """
407
    def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False, loader=None): ...
408

409
class Imagenette(VisionDataset):
410
    """
411
    Imagenette dataset - subset of ImageNet with 10 classes.
412
    
413
    Args:
414
        root (str): Root directory for dataset files
415
        split (str): Dataset split ('train', 'val')
416
        size (str): Image size ('full', '320px', '160px')
417
        download (bool): If True, downloads dataset if not found
418
        transform (callable, optional): Transform to apply to PIL image
419
        target_transform (callable, optional): Transform to apply to target
420
        loader (callable, optional): Function to load image from path
421
    """
422
    def __init__(self, root: str, split: str = 'train', size: str = 'full', download: bool = False, transform=None, target_transform=None, loader=None): ...
423
```
424

425
### Object Detection and Segmentation Datasets
426

427
Datasets for object detection, instance segmentation, and semantic segmentation tasks.
428

429
```python { .api }
430
class CocoDetection(VisionDataset):
431
    """
432
    COCO dataset for object detection.
433
    
434
    Args:
435
        root (str): Root directory containing images
436
        annFile (str): Path to annotation file
437
        transform (callable, optional): Transform to apply to image
438
        target_transform (callable, optional): Transform to apply to target
439
        transforms (callable, optional): Transform to apply to image and target
440
    """
441
    def __init__(self, root: str, annFile: str, transform=None, target_transform=None, transforms=None): ...
442

443
class CocoCaptions(VisionDataset):
444
    """COCO dataset for image captioning."""
445
    def __init__(self, root: str, annFile: str, transform=None, target_transform=None, transforms=None): ...
446

447
class VOCDetection(VisionDataset):
448
    """
449
    Pascal VOC dataset for object detection.
450
    
451
    Args:
452
        root (str): Root directory for dataset files
453
        year (str): Dataset year ('2007', '2008', '2009', '2010', '2011', '2012')
454
        image_set (str): Image set ('train', 'trainval', 'val', 'test')
455
        download (bool): If True, downloads dataset if not found
456
        transform (callable, optional): Transform to apply to PIL image
457
        target_transform (callable, optional): Transform to apply to target
458
        transforms (callable, optional): Transform to apply to image and target
459
    """
460
    def __init__(self, root: str, year: str = '2012', image_set: str = 'train', download: bool = False, transform=None, target_transform=None, transforms=None): ...
461

462
class VOCSegmentation(VisionDataset):
463
    """Pascal VOC dataset for semantic segmentation."""
464
    def __init__(self, root: str, year: str = '2012', image_set: str = 'train', download: bool = False, transform=None, target_transform=None, transforms=None): ...
465

466
class Cityscapes(VisionDataset):
467
    """
468
    Cityscapes dataset for semantic segmentation.
469
    
470
    Args:
471
        root (str): Root directory for dataset files
472
        split (str): Dataset split ('train', 'test', 'val')
473
        mode (str): Quality mode ('fine', 'coarse')
474
        target_type (str or list): Target type ('instance', 'semantic', 'polygon', 'color')
475
        transform (callable, optional): Transform to apply to PIL image
476
        target_transform (callable, optional): Transform to apply to target
477
        transforms (callable, optional): Transform to apply to image and target
478
    """
479
    def __init__(self, root: str, split: str = 'train', mode: str = 'fine', target_type: str = 'instance', transform=None, target_transform=None, transforms=None): ...
480

481
class SBDataset(VisionDataset):
482
    """Semantic Boundaries Dataset."""
483
    def __init__(self, root: str, image_set: str = 'train', mode: str = 'boundaries', download: bool = False, transform=None, target_transform=None): ...
484

485
class WIDERFace(VisionDataset):
486
    """
487
    WIDER FACE dataset for face detection.
488
    
489
    Args:
490
        root (str): Root directory for dataset files
491
        split (str): Dataset split ('train', 'val', 'test')
492
        transform (callable, optional): Transform to apply to PIL image
493
        target_transform (callable, optional): Transform to apply to target
494
        download (bool): If True, downloads dataset if not found
495
    """
496
    def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False): ...
497

498
class Kitti(VisionDataset):
499
    """
500
    KITTI dataset for object detection.
501
    
502
    Args:
503
        root (str): Root directory for dataset files
504
        train (bool): If True, creates dataset from training set, otherwise test set
505
        transform (callable, optional): Transform to apply to PIL image
506
        target_transform (callable, optional): Transform to apply to target
507
        transforms (callable, optional): Transform to apply to image and target
508
        download (bool): If True, downloads dataset if not found
509
    """
510
    def __init__(self, root: str, train: bool = True, transform=None, target_transform=None, transforms=None, download: bool = False): ...
511
```
512

513
### Video Datasets
514

515
Datasets for video analysis and action recognition tasks.
516

517
```python { .api }
518
class Kinetics(VisionDataset):
519
    """
520
    Kinetics dataset for action recognition.
521
    
522
    Args:
523
        root (str): Root directory for dataset files
524
        frames_per_clip (int): Number of frames per video clip
525
        num_classes (str): Number of classes ('400', '600', '700')
526
        split (str): Dataset split ('train', 'val')
527
        frame_rate (int, optional): Target frame rate for clips
528
        step_between_clips (int): Number of frames between consecutive clips
529
        transform (callable, optional): Transform to apply to video clips
530
        download (bool): If True, downloads dataset if not found
531
        num_download_workers (int): Number of workers for downloading
532
        num_extract_workers (int): Number of workers for extraction
533
    """
534
    def __init__(self, root: str, frames_per_clip: int, num_classes: str = '400', split: str = 'train', frame_rate=None, step_between_clips: int = 1, transform=None, download: bool = False, num_download_workers: int = 1, num_extract_workers: int = 1): ...
535

536
class HMDB51(VisionDataset):
537
    """
538
    HMDB51 action recognition dataset.
539
    
540
    Args:
541
        root (str): Root directory for dataset files
542
        annotation_path (str): Path to annotation files
543
        frames_per_clip (int): Number of frames per video clip
544
        step_between_clips (int): Number of frames between consecutive clips
545
        fold (int): Which fold to load (1, 2, or 3)
546
        train (bool): If True, creates dataset from training set
547
        transform (callable, optional): Transform to apply to video clips
548
        num_workers (int): Number of workers for video loading
549
    """
550
    def __init__(self, root: str, annotation_path: str, frames_per_clip: int, step_between_clips: int = 1, fold: int = 1, train: bool = True, transform=None, num_workers: int = 1): ...
551

552
class UCF101(VisionDataset):
553
    """UCF101 action recognition dataset with 101 action classes."""
554
    def __init__(self, root: str, annotation_path: str, frames_per_clip: int, step_between_clips: int = 1, fold: int = 1, train: bool = True, transform=None, num_workers: int = 1): ...
555

556
class MovingMNIST(VisionDataset):
557
    """
558
    Moving MNIST dataset for video prediction.
559
    
560
    Args:
561
        root (str): Root directory for dataset files
562
        split (str, optional): Dataset split ('train', 'test')
563
        transform (callable, optional): Transform to apply to video data
564
        download (bool): If True, downloads dataset if not found
565
    """
566
    def __init__(self, root: str, split=None, transform=None, download: bool = True): ...
567
```
568

569
### Optical Flow and Stereo Datasets
570

571
Datasets for optical flow estimation and stereo vision tasks.
572

573
```python { .api }
574
class FlyingChairs(VisionDataset):
575
    """
576
    FlyingChairs optical flow dataset.
577
    
578
    Args:
579
        root (str): Root directory for dataset files
580
        split (str): Dataset split ('train', 'val')
581
        transform (callable, optional): Transform to apply to samples
582
        target_transform (callable, optional): Transform to apply to flow
583
    """
584
    def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None): ...
585

586
class FlyingThings3D(VisionDataset):
587
    """FlyingThings3D optical flow dataset."""
588
    def __init__(self, root: str, split: str = 'train', pass_name: str = 'clean', camera: str = 'left', transform=None, target_transform=None): ...
589

590
class Sintel(VisionDataset):
591
    """
592
    MPI Sintel optical flow dataset.
593
    
594
    Args:
595
        root (str): Root directory for dataset files
596
        split (str): Dataset split ('train', 'test')
597
        pass_name (str): Rendering pass ('clean', 'final')
598
        transform (callable, optional): Transform to apply to samples
599
        target_transform (callable, optional): Transform to apply to flow
600
    """
601
    def __init__(self, root: str, split: str = 'train', pass_name: str = 'clean', transform=None, target_transform=None): ...
602

603
class KittiFlow(VisionDataset):
604
    """KITTI optical flow dataset."""
605
    def __init__(self, root: str, split: str = 'train', transform=None): ...
606

607
class HD1K(VisionDataset):
608
    """HD1K optical flow dataset."""
609
    def __init__(self, root: str, split: str = 'train', transform=None): ...
610

611
class Kitti2012Stereo(VisionDataset):
612
    """
613
    KITTI 2012 stereo dataset.
614
    
615
    Args:
616
        root (str): Root directory for dataset files
617
        split (str): Dataset split ('train', 'test')
618
        transform (callable, optional): Transform to apply to samples
619
    """
620
    def __init__(self, root: str, split: str = 'train', transform=None): ...
621

622
class Kitti2015Stereo(VisionDataset):
623
    """KITTI 2015 stereo dataset."""
624
    def __init__(self, root: str, split: str = 'train', transform=None): ...
625

626
class CarlaStereo(VisionDataset):
627
    """CARLA stereo dataset."""
628
    def __init__(self, root: str, split: str = 'train', transform=None): ...
629

630
class Middlebury2014Stereo(VisionDataset):
631
    """Middlebury 2014 stereo dataset."""
632
    def __init__(self, root: str, split: str = 'train', transform=None): ...
633

634
class CREStereo(VisionDataset):
635
    """CREStereo dataset."""
636
    def __init__(self, root: str, split: str = 'train', transform=None): ...
637

638
class FallingThingsStereo(VisionDataset):
639
    """Falling Things stereo dataset."""
640
    def __init__(self, root: str, variant: str = 'single', split: str = 'train', transform=None): ...
641

642
class SceneFlowStereo(VisionDataset):
643
    """Scene Flow stereo dataset."""
644
    def __init__(self, root: str, split: str = 'train', pass_name: str = 'clean', transform=None): ...
645

646
class SintelStereo(VisionDataset):
647
    """Sintel stereo dataset."""
648
    def __init__(self, root: str, split: str = 'train', pass_name: str = 'clean', transform=None): ...
649

650
class InStereo2k(VisionDataset):
651
    """InStereo2k dataset."""
652
    def __init__(self, root: str, split: str = 'train', transform=None): ...
653

654
class ETH3DStereo(VisionDataset):
655
    """ETH3D stereo dataset."""
656
    def __init__(self, root: str, split: str = 'train', transform=None): ...
657
```
658

659
### Image Captioning and Matching Datasets
660

661
Datasets for image captioning, patch matching, and face recognition tasks.
662

663
```python { .api }
664
class SBU(VisionDataset):
665
    """
666
    SBU Captioned Photo dataset for image captioning.
667
    
668
    Args:
669
        root (str): Root directory for dataset files
670
        transform (callable, optional): Transform to apply to PIL image
671
        target_transform (callable, optional): Transform to apply to target
672
        download (bool): If True, downloads dataset if not found
673
        loader (callable, optional): Function to load image from path
674
    """
675
    def __init__(self, root: str, transform=None, target_transform=None, download: bool = True, loader=None): ...
676

677
class Flickr8k(VisionDataset):
678
    """
679
    Flickr8k dataset for image captioning.
680
    
681
    Args:
682
        root (str): Root directory for dataset files
683
        ann_file (str): Path to annotation file
684
        transform (callable, optional): Transform to apply to PIL image
685
        target_transform (callable, optional): Transform to apply to target
686
        loader (callable, optional): Function to load image from path
687
    """
688
    def __init__(self, root: str, ann_file: str, transform=None, target_transform=None, loader=None): ...
689

690
class Flickr30k(VisionDataset):
691
    """
692
    Flickr30k dataset for image captioning.
693
    
694
    Args:
695
        root (str): Root directory for dataset files
696
        ann_file (str): Path to annotation file
697
        transform (callable, optional): Transform to apply to PIL image
698
        target_transform (callable, optional): Transform to apply to target
699
        loader (callable, optional): Function to load image from path
700
    """
701
    def __init__(self, root: str, ann_file: str, transform=None, target_transform=None, loader=None): ...
702

703
class PhotoTour(VisionDataset):
704
    """
705
    Multi-view Stereo Correspondence dataset for patch matching.
706
    
707
    Args:
708
        root (str): Root directory for dataset files
709
        name (str): Dataset name ('notredame_harris', 'yosemite_harris', 'liberty_harris', 'notredame', 'yosemite', 'liberty')
710
        train (bool): If True, creates dataset for training patches, otherwise for matching pairs
711
        transform (callable, optional): Transform to apply to patches
712
        download (bool): If True, downloads dataset if not found
713
    """
714
    def __init__(self, root: str, name: str, train: bool = True, transform=None, download: bool = False): ...
715

716
class LFWPeople(VisionDataset):
717
    """
718
    LFW People dataset for face recognition.
719
    
720
    Args:
721
        root (str): Root directory for dataset files
722
        split (str): Dataset split ('train', 'test', '10fold')
723
        image_set (str): Image processing type ('original', 'funneled', 'deepfunneled')
724
        transform (callable, optional): Transform to apply to PIL image
725
        target_transform (callable, optional): Transform to apply to target
726
        download (bool): NOT SUPPORTED - manual download required
727
        loader (callable, optional): Function to load image from path
728
    """
729
    def __init__(self, root: str, split: str = '10fold', image_set: str = 'funneled', transform=None, target_transform=None, download: bool = False, loader=None): ...
730

731
class LFWPairs(VisionDataset):
732
    """
733
    LFW Pairs dataset for face verification.
734
    
735
    Args:
736
        root (str): Root directory for dataset files
737
        split (str): Dataset split ('train', 'test', '10fold')
738
        image_set (str): Image processing type ('original', 'funneled', 'deepfunneled')
739
        transform (callable, optional): Transform to apply to PIL image
740
        target_transform (callable, optional): Transform to apply to target
741
        download (bool): NOT SUPPORTED - manual download required
742
        loader (callable, optional): Function to load image from path
743
    """
744
    def __init__(self, root: str, split: str = '10fold', image_set: str = 'funneled', transform=None, target_transform=None, download: bool = False, loader=None): ...
745
```
746

747
### Utility Datasets and Functions
748

749
Helper datasets and utilities for testing and dataset manipulation.
750

751
```python { .api }
752
class FakeData(VisionDataset):
753
    """
754
    Generates fake data for testing purposes.
755
    
756
    Args:
757
        size (int): Dataset size
758
        image_size (tuple): Image dimensions (channels, height, width)
759
        num_classes (int): Number of classes
760
        transform (callable, optional): Transform to apply to PIL image
761
        target_transform (callable, optional): Transform to apply to target
762
        random_offset (int): Random seed offset
763
    """
764
    def __init__(self, size: int = 1000, image_size: tuple = (3, 224, 224), num_classes: int = 10, transform=None, target_transform=None, random_offset: int = 0): ...
765

766
def wrap_dataset_for_transforms_v2(dataset, target_keys=None):
767
    """
768
    Wraps datasets for v2 transforms compatibility.
769
    
770
    Args:
771
        dataset: Dataset to wrap
772
        target_keys (sequence, optional): Keys for target extraction
773
        
774
    Returns:
775
        Wrapped dataset compatible with v2 transforms
776
    """
777
```
778

779
### Samplers
780

781
Sampling strategies for dataset loading and batching.
782

783
```python { .api }
784
# Available in torchvision.datasets.samplers
785
# Provides various sampling strategies for efficient dataset loading
786
```
787

788
## Usage Examples
789

790
### Basic Image Classification Dataset
791

792
```python
793
from torchvision import datasets, transforms
794
from torch.utils.data import DataLoader
795

796
# Define transforms
797
transform = transforms.Compose([
798
    transforms.Resize(256),
799
    transforms.CenterCrop(224),
800
    transforms.ToTensor(),
801
    transforms.Normalize(mean=[0.485, 0.456, 0.406], 
802
                        std=[0.229, 0.224, 0.225])
803
])
804

805
# Load CIFAR-10
806
train_dataset = datasets.CIFAR10(
807
    root='./data', 
808
    train=True,
809
    download=True, 
810
    transform=transform
811
)
812

813
test_dataset = datasets.CIFAR10(
814
    root='./data', 
815
    train=False,
816
    download=True, 
817
    transform=transform
818
)
819

820
# Create data loaders
821
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
822
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
823
```
824

825
### Object Detection Dataset
826

827
```python
828
from torchvision import datasets, transforms as T
829

830
# Define transforms for detection
831
transform = T.Compose([
832
    T.ToTensor(),
833
])
834

835
# Load COCO detection dataset
836
dataset = datasets.CocoDetection(
837
    root='/path/to/coco/images/train2017',
838
    annFile='/path/to/coco/annotations/instances_train2017.json',
839
    transform=transform
840
)
841

842
# Each item returns (image, target) where target is list of annotations
843
image, target = dataset[0]
844
```
845

846
### Custom Dataset with ImageFolder
847

848
```python
849
from torchvision import datasets, transforms
850

851
# For datasets organized as: root/class_name/image_files
852
transform = transforms.Compose([
853
    transforms.Resize((224, 224)),
854
    transforms.ToTensor(),
855
])
856

857
dataset = datasets.ImageFolder(
858
    root='/path/to/custom/dataset',
859
    transform=transform
860
)
861

862
# Access class names
863
print(dataset.classes)
864
print(dataset.class_to_idx)
865
```

Version

Tile

Files

datasets.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

datasets.mddocs/