0
# Datasets
1
2
TorchVision provides a comprehensive collection of computer vision datasets with automatic downloading, caching, and preprocessing. The datasets module supports image classification, object detection, segmentation, video analysis, optical flow, and stereo vision tasks.
3
4
## Capabilities
5
6
### Base Dataset Classes
7
8
Foundation classes for building and working with vision datasets.
9
10
```python { .api }
11
class VisionDataset:
12
"""
13
Base class for all vision datasets.
14
15
Args:
16
root (str): Root directory of dataset
17
transforms (callable, optional): Function/transform to apply to data
18
transform (callable, optional): Function/transform to apply to PIL image
19
target_transform (callable, optional): Function/transform to apply to target
20
"""
21
def __init__(self, root: str, transforms=None, transform=None, target_transform=None): ...
22
def __getitem__(self, index: int): ...
23
def __len__(self) -> int: ...
24
25
class DatasetFolder(VisionDataset):
26
"""
27
Generic data loader for datasets in folder format.
28
29
Args:
30
root (str): Root directory path
31
loader (callable): Function to load a sample from path
32
extensions (tuple): Allowed extensions
33
transform (callable, optional): Transform to apply to samples
34
target_transform (callable, optional): Transform to apply to targets
35
is_valid_file (callable, optional): Function to check file validity
36
"""
37
def __init__(self, root: str, loader, extensions=None, transform=None, target_transform=None, is_valid_file=None): ...
38
39
class ImageFolder(DatasetFolder):
40
"""
41
Data loader for image classification datasets in folder format.
42
Expected structure: root/class_x/xxx.ext
43
44
Args:
45
root (str): Root directory path
46
transform (callable, optional): Transform to apply to PIL images
47
target_transform (callable, optional): Transform to apply to targets
48
loader (callable, optional): Function to load image from path
49
is_valid_file (callable, optional): Function to check file validity
50
"""
51
def __init__(self, root: str, transform=None, target_transform=None, loader=None, is_valid_file=None): ...
52
```
53
54
### Image Classification Datasets
55
56
Standard datasets for image classification tasks with automatic download and preprocessing.
57
58
```python { .api }
59
class MNIST(VisionDataset):
60
"""
61
MNIST handwritten digit dataset.
62
63
Args:
64
root (str): Root directory for dataset files
65
train (bool): If True, creates dataset from training set, else test set
66
transform (callable, optional): Transform to apply to PIL image
67
target_transform (callable, optional): Transform to apply to target
68
download (bool): If True, downloads dataset if not found at root
69
"""
70
def __init__(self, root: str, train: bool = True, transform=None, target_transform=None, download: bool = False): ...
71
72
class FashionMNIST(VisionDataset):
73
"""Fashion-MNIST dataset of clothing images."""
74
def __init__(self, root: str, train: bool = True, transform=None, target_transform=None, download: bool = False): ...
75
76
class KMNIST(VisionDataset):
77
"""Kuzushiji-MNIST dataset of Japanese characters."""
78
def __init__(self, root: str, train: bool = True, transform=None, target_transform=None, download: bool = False): ...
79
80
class EMNIST(VisionDataset):
81
"""
82
Extended MNIST dataset.
83
84
Args:
85
root (str): Root directory for dataset files
86
split (str): Dataset split ('byclass', 'bymerge', 'balanced', 'letters', 'digits', 'mnist')
87
train (bool): If True, creates dataset from training set
88
transform (callable, optional): Transform to apply to PIL image
89
target_transform (callable, optional): Transform to apply to target
90
download (bool): If True, downloads dataset if not found
91
"""
92
def __init__(self, root: str, split: str, train: bool = True, transform=None, target_transform=None, download: bool = False): ...
93
94
class CIFAR10(VisionDataset):
95
"""
96
CIFAR-10 dataset of 32x32 color images in 10 classes.
97
98
Args:
99
root (str): Root directory for dataset files
100
train (bool): If True, creates dataset from training set, else test set
101
transform (callable, optional): Transform to apply to PIL image
102
target_transform (callable, optional): Transform to apply to target
103
download (bool): If True, downloads dataset if not found at root
104
"""
105
def __init__(self, root: str, train: bool = True, transform=None, target_transform=None, download: bool = False): ...
106
107
class CIFAR100(VisionDataset):
108
"""CIFAR-100 dataset with 100 classes grouped into 20 superclasses."""
109
def __init__(self, root: str, train: bool = True, transform=None, target_transform=None, download: bool = False): ...
110
111
class SVHN(VisionDataset):
112
"""
113
Street View House Numbers dataset.
114
115
Args:
116
root (str): Root directory for dataset files
117
split (str): Dataset split ('train', 'test', 'extra')
118
transform (callable, optional): Transform to apply to PIL image
119
target_transform (callable, optional): Transform to apply to target
120
download (bool): If True, downloads dataset if not found
121
"""
122
def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False): ...
123
124
class ImageNet(VisionDataset):
125
"""
126
ImageNet dataset for large-scale image classification.
127
128
Args:
129
root (str): Root directory containing 'train' and 'val' folders
130
split (str): Dataset split ('train', 'val')
131
transform (callable, optional): Transform to apply to PIL image
132
target_transform (callable, optional): Transform to apply to target
133
"""
134
def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None): ...
135
136
class Caltech101(VisionDataset):
137
"""Caltech 101 dataset with 101 object categories."""
138
def __init__(self, root: str, target_type='category', transform=None, target_transform=None, download: bool = False): ...
139
140
class Caltech256(VisionDataset):
141
"""Caltech 256 dataset with 256 object categories."""
142
def __init__(self, root: str, transform=None, target_transform=None, download: bool = False): ...
143
144
class CelebA(VisionDataset):
145
"""
146
CelebA face dataset with attributes.
147
148
Args:
149
root (str): Root directory for dataset files
150
split (str): Dataset split ('train', 'valid', 'test', 'all')
151
target_type (str): Target type ('attr', 'identity', 'bbox', 'landmarks')
152
transform (callable, optional): Transform to apply to PIL image
153
target_transform (callable, optional): Transform to apply to target
154
download (bool): If True, downloads dataset if not found
155
"""
156
def __init__(self, root: str, split: str = 'train', target_type: str = 'attr', transform=None, target_transform=None, download: bool = False): ...
157
158
class StanfordCars(VisionDataset):
159
"""Stanford Cars dataset with 196 car classes."""
160
def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False): ...
161
162
class Flowers102(VisionDataset):
163
"""Oxford 102 Flower dataset."""
164
def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False): ...
165
166
class Food101(VisionDataset):
167
"""Food-101 dataset with 101 food categories."""
168
def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False): ...
169
170
class GTSRB(VisionDataset):
171
"""German Traffic Sign Recognition Benchmark."""
172
def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False): ...
173
174
class OxfordIIITPet(VisionDataset):
175
"""
176
Oxford-IIIT Pet dataset.
177
178
Args:
179
root (str): Root directory for dataset files
180
split (str): Dataset split ('trainval', 'test')
181
target_types (str or list): Target types ('category', 'segmentation')
182
transform (callable, optional): Transform to apply to PIL image
183
target_transform (callable, optional): Transform to apply to target
184
download (bool): If True, downloads dataset if not found
185
"""
186
def __init__(self, root: str, split: str = 'trainval', target_types='category', transform=None, target_transform=None, download: bool = False): ...
187
188
class STL10(VisionDataset):
189
"""
190
STL10 dataset of 96x96 color images in 10 classes.
191
192
Args:
193
root (str): Root directory for dataset files
194
split (str): Dataset split ('train', 'test', 'unlabeled', 'train+unlabeled')
195
folds (int, optional): One of {0-9} or None for training fold selection
196
transform (callable, optional): Transform to apply to PIL image
197
target_transform (callable, optional): Transform to apply to target
198
download (bool): If True, downloads dataset if not found
199
"""
200
def __init__(self, root: str, split: str = 'train', folds=None, transform=None, target_transform=None, download: bool = False): ...
201
202
class SUN397(VisionDataset):
203
"""
204
SUN397 scene recognition dataset with 397 categories.
205
206
Args:
207
root (str): Root directory for dataset files
208
transform (callable, optional): Transform to apply to PIL image
209
target_transform (callable, optional): Transform to apply to target
210
download (bool): If True, downloads dataset if not found
211
loader (callable, optional): Function to load image from path
212
"""
213
def __init__(self, root: str, transform=None, target_transform=None, download: bool = False, loader=None): ...
214
215
class SEMEION(VisionDataset):
216
"""
217
SEMEION handwritten digit dataset with 16x16 grayscale images.
218
219
Args:
220
root (str): Root directory for dataset files
221
transform (callable, optional): Transform to apply to PIL image
222
target_transform (callable, optional): Transform to apply to target
223
download (bool): If True, downloads dataset if not found
224
"""
225
def __init__(self, root: str, transform=None, target_transform=None, download: bool = True): ...
226
227
class Omniglot(VisionDataset):
228
"""
229
Omniglot dataset for few-shot learning with character recognition.
230
231
Args:
232
root (str): Root directory for dataset files
233
background (bool): If True, creates dataset from background set, otherwise evaluation set
234
transform (callable, optional): Transform to apply to PIL image
235
target_transform (callable, optional): Transform to apply to target
236
download (bool): If True, downloads dataset if not found
237
loader (callable, optional): Function to load image from path
238
"""
239
def __init__(self, root: str, background: bool = True, transform=None, target_transform=None, download: bool = False, loader=None): ...
240
241
class USPS(VisionDataset):
242
"""
243
USPS handwritten digit dataset with 16x16 grayscale images.
244
245
Args:
246
root (str): Root directory for dataset files
247
train (bool): If True, creates dataset from training set, otherwise test set
248
transform (callable, optional): Transform to apply to PIL image
249
target_transform (callable, optional): Transform to apply to target
250
download (bool): If True, downloads dataset if not found
251
"""
252
def __init__(self, root: str, train: bool = True, transform=None, target_transform=None, download: bool = False): ...
253
254
class QMNIST(VisionDataset):
255
"""
256
QMNIST extended MNIST dataset with additional metadata.
257
258
Args:
259
root (str): Root directory for dataset files
260
what (str, optional): Dataset subset ('train', 'test', 'test10k', 'test50k', 'nist')
261
compat (bool): If True, returns class labels for MNIST compatibility
262
train (bool): If True, creates dataset from training set (when what is None)
263
transform (callable, optional): Transform to apply to PIL image
264
target_transform (callable, optional): Transform to apply to target
265
download (bool): If True, downloads dataset if not found
266
"""
267
def __init__(self, root: str, what=None, compat: bool = True, train: bool = True, transform=None, target_transform=None, download: bool = False): ...
268
269
class Places365(VisionDataset):
270
"""
271
Places365 scene recognition dataset with 365 scene categories.
272
273
Args:
274
root (str): Root directory for dataset files
275
split (str): Dataset split ('train-standard', 'train-challenge', 'val', 'test')
276
small (bool): If True, uses small (256x256) images instead of high resolution
277
download (bool): If True, downloads dataset if not found
278
transform (callable, optional): Transform to apply to PIL image
279
target_transform (callable, optional): Transform to apply to target
280
loader (callable, optional): Function to load image from path
281
"""
282
def __init__(self, root: str, split: str = 'train-standard', small: bool = False, download: bool = False, transform=None, target_transform=None, loader=None): ...
283
284
class INaturalist(VisionDataset):
285
"""
286
iNaturalist dataset for fine-grained species classification.
287
288
Args:
289
root (str): Root directory for dataset files
290
version (str): Dataset version ('2017', '2018', '2019', '2021_train', '2021_train_mini', '2021_valid')
291
target_type (str or list): Target type ('full', 'kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'super')
292
transform (callable, optional): Transform to apply to PIL image
293
target_transform (callable, optional): Transform to apply to target
294
download (bool): If True, downloads dataset if not found
295
loader (callable, optional): Function to load image from path
296
"""
297
def __init__(self, root: str, version: str = '2021_train', target_type='full', transform=None, target_transform=None, download: bool = False, loader=None): ...
298
299
class DTD(VisionDataset):
300
"""
301
Describable Textures Dataset (DTD) with 47 texture categories.
302
303
Args:
304
root (str): Root directory for dataset files
305
split (str): Dataset split ('train', 'val', 'test')
306
partition (int): Dataset partition (1-10)
307
transform (callable, optional): Transform to apply to PIL image
308
target_transform (callable, optional): Transform to apply to target
309
download (bool): If True, downloads dataset if not found
310
loader (callable, optional): Function to load image from path
311
"""
312
def __init__(self, root: str, split: str = 'train', partition: int = 1, transform=None, target_transform=None, download: bool = False, loader=None): ...
313
314
class FER2013(VisionDataset):
315
"""
316
FER2013 facial expression recognition dataset with 7 emotion classes.
317
318
Args:
319
root (str): Root directory for dataset files
320
split (str): Dataset split ('train', 'test')
321
transform (callable, optional): Transform to apply to PIL image
322
target_transform (callable, optional): Transform to apply to target
323
"""
324
def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None): ...
325
326
class CLEVRClassification(VisionDataset):
327
"""
328
CLEVR classification dataset for visual reasoning (object counting).
329
330
Args:
331
root (str): Root directory for dataset files
332
split (str): Dataset split ('train', 'val', 'test')
333
transform (callable, optional): Transform to apply to PIL image
334
target_transform (callable, optional): Transform to apply to target
335
download (bool): If True, downloads dataset if not found
336
loader (callable, optional): Function to load image from path
337
"""
338
def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False, loader=None): ...
339
340
class PCAM(VisionDataset):
341
"""
342
PatchCamelyon (PCAM) histopathologic cancer detection dataset.
343
344
Args:
345
root (str): Root directory for dataset files
346
split (str): Dataset split ('train', 'val', 'test')
347
transform (callable, optional): Transform to apply to PIL image
348
target_transform (callable, optional): Transform to apply to target
349
download (bool): If True, downloads dataset if not found
350
"""
351
def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False): ...
352
353
class Country211(VisionDataset):
354
"""
355
Country211 dataset for country classification from images.
356
357
Args:
358
root (str): Root directory for dataset files
359
split (str): Dataset split ('train', 'valid', 'test')
360
transform (callable, optional): Transform to apply to PIL image
361
target_transform (callable, optional): Transform to apply to target
362
download (bool): If True, downloads dataset if not found
363
loader (callable, optional): Function to load image from path
364
"""
365
def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False, loader=None): ...
366
367
class FGVCAircraft(VisionDataset):
368
"""
369
FGVC Aircraft dataset for fine-grained aircraft recognition.
370
371
Args:
372
root (str): Root directory for dataset files
373
split (str): Dataset split ('train', 'val', 'trainval', 'test')
374
annotation_level (str): Annotation level ('variant', 'family', 'manufacturer')
375
transform (callable, optional): Transform to apply to PIL image
376
target_transform (callable, optional): Transform to apply to target
377
download (bool): If True, downloads dataset if not found
378
loader (callable, optional): Function to load image from path
379
"""
380
def __init__(self, root: str, split: str = 'trainval', annotation_level: str = 'variant', transform=None, target_transform=None, download: bool = False, loader=None): ...
381
382
class EuroSAT(VisionDataset):
383
"""
384
EuroSAT satellite image classification dataset with 10 land use classes.
385
386
Args:
387
root (str): Root directory for dataset files
388
transform (callable, optional): Transform to apply to PIL image
389
target_transform (callable, optional): Transform to apply to target
390
download (bool): If True, downloads dataset if not found
391
loader (callable, optional): Function to load image from path
392
"""
393
def __init__(self, root: str, transform=None, target_transform=None, download: bool = False, loader=None): ...
394
395
class RenderedSST2(VisionDataset):
396
"""
397
Rendered SST2 dataset for optical character recognition with sentiment.
398
399
Args:
400
root (str): Root directory for dataset files
401
split (str): Dataset split ('train', 'val', 'test')
402
transform (callable, optional): Transform to apply to PIL image
403
target_transform (callable, optional): Transform to apply to target
404
download (bool): If True, downloads dataset if not found
405
loader (callable, optional): Function to load image from path
406
"""
407
def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False, loader=None): ...
408
409
class Imagenette(VisionDataset):
410
"""
411
Imagenette dataset - subset of ImageNet with 10 classes.
412
413
Args:
414
root (str): Root directory for dataset files
415
split (str): Dataset split ('train', 'val')
416
size (str): Image size ('full', '320px', '160px')
417
download (bool): If True, downloads dataset if not found
418
transform (callable, optional): Transform to apply to PIL image
419
target_transform (callable, optional): Transform to apply to target
420
loader (callable, optional): Function to load image from path
421
"""
422
def __init__(self, root: str, split: str = 'train', size: str = 'full', download: bool = False, transform=None, target_transform=None, loader=None): ...
423
```
424
425
### Object Detection and Segmentation Datasets
426
427
Datasets for object detection, instance segmentation, and semantic segmentation tasks.
428
429
```python { .api }
430
class CocoDetection(VisionDataset):
431
"""
432
COCO dataset for object detection.
433
434
Args:
435
root (str): Root directory containing images
436
annFile (str): Path to annotation file
437
transform (callable, optional): Transform to apply to image
438
target_transform (callable, optional): Transform to apply to target
439
transforms (callable, optional): Transform to apply to image and target
440
"""
441
def __init__(self, root: str, annFile: str, transform=None, target_transform=None, transforms=None): ...
442
443
class CocoCaptions(VisionDataset):
444
"""COCO dataset for image captioning."""
445
def __init__(self, root: str, annFile: str, transform=None, target_transform=None, transforms=None): ...
446
447
class VOCDetection(VisionDataset):
448
"""
449
Pascal VOC dataset for object detection.
450
451
Args:
452
root (str): Root directory for dataset files
453
year (str): Dataset year ('2007', '2008', '2009', '2010', '2011', '2012')
454
image_set (str): Image set ('train', 'trainval', 'val', 'test')
455
download (bool): If True, downloads dataset if not found
456
transform (callable, optional): Transform to apply to PIL image
457
target_transform (callable, optional): Transform to apply to target
458
transforms (callable, optional): Transform to apply to image and target
459
"""
460
def __init__(self, root: str, year: str = '2012', image_set: str = 'train', download: bool = False, transform=None, target_transform=None, transforms=None): ...
461
462
class VOCSegmentation(VisionDataset):
463
"""Pascal VOC dataset for semantic segmentation."""
464
def __init__(self, root: str, year: str = '2012', image_set: str = 'train', download: bool = False, transform=None, target_transform=None, transforms=None): ...
465
466
class Cityscapes(VisionDataset):
467
"""
468
Cityscapes dataset for semantic segmentation.
469
470
Args:
471
root (str): Root directory for dataset files
472
split (str): Dataset split ('train', 'test', 'val')
473
mode (str): Quality mode ('fine', 'coarse')
474
target_type (str or list): Target type ('instance', 'semantic', 'polygon', 'color')
475
transform (callable, optional): Transform to apply to PIL image
476
target_transform (callable, optional): Transform to apply to target
477
transforms (callable, optional): Transform to apply to image and target
478
"""
479
def __init__(self, root: str, split: str = 'train', mode: str = 'fine', target_type: str = 'instance', transform=None, target_transform=None, transforms=None): ...
480
481
class SBDataset(VisionDataset):
482
"""Semantic Boundaries Dataset."""
483
def __init__(self, root: str, image_set: str = 'train', mode: str = 'boundaries', download: bool = False, transform=None, target_transform=None): ...
484
485
class WIDERFace(VisionDataset):
486
"""
487
WIDER FACE dataset for face detection.
488
489
Args:
490
root (str): Root directory for dataset files
491
split (str): Dataset split ('train', 'val', 'test')
492
transform (callable, optional): Transform to apply to PIL image
493
target_transform (callable, optional): Transform to apply to target
494
download (bool): If True, downloads dataset if not found
495
"""
496
def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False): ...
497
498
class Kitti(VisionDataset):
499
"""
500
KITTI dataset for object detection.
501
502
Args:
503
root (str): Root directory for dataset files
504
train (bool): If True, creates dataset from training set, otherwise test set
505
transform (callable, optional): Transform to apply to PIL image
506
target_transform (callable, optional): Transform to apply to target
507
transforms (callable, optional): Transform to apply to image and target
508
download (bool): If True, downloads dataset if not found
509
"""
510
def __init__(self, root: str, train: bool = True, transform=None, target_transform=None, transforms=None, download: bool = False): ...
511
```
512
513
### Video Datasets
514
515
Datasets for video analysis and action recognition tasks.
516
517
```python { .api }
518
class Kinetics(VisionDataset):
519
"""
520
Kinetics dataset for action recognition.
521
522
Args:
523
root (str): Root directory for dataset files
524
frames_per_clip (int): Number of frames per video clip
525
num_classes (str): Number of classes ('400', '600', '700')
526
split (str): Dataset split ('train', 'val')
527
frame_rate (int, optional): Target frame rate for clips
528
step_between_clips (int): Number of frames between consecutive clips
529
transform (callable, optional): Transform to apply to video clips
530
download (bool): If True, downloads dataset if not found
531
num_download_workers (int): Number of workers for downloading
532
num_extract_workers (int): Number of workers for extraction
533
"""
534
def __init__(self, root: str, frames_per_clip: int, num_classes: str = '400', split: str = 'train', frame_rate=None, step_between_clips: int = 1, transform=None, download: bool = False, num_download_workers: int = 1, num_extract_workers: int = 1): ...
535
536
class HMDB51(VisionDataset):
537
"""
538
HMDB51 action recognition dataset.
539
540
Args:
541
root (str): Root directory for dataset files
542
annotation_path (str): Path to annotation files
543
frames_per_clip (int): Number of frames per video clip
544
step_between_clips (int): Number of frames between consecutive clips
545
fold (int): Which fold to load (1, 2, or 3)
546
train (bool): If True, creates dataset from training set
547
transform (callable, optional): Transform to apply to video clips
548
num_workers (int): Number of workers for video loading
549
"""
550
def __init__(self, root: str, annotation_path: str, frames_per_clip: int, step_between_clips: int = 1, fold: int = 1, train: bool = True, transform=None, num_workers: int = 1): ...
551
552
class UCF101(VisionDataset):
553
"""UCF101 action recognition dataset with 101 action classes."""
554
def __init__(self, root: str, annotation_path: str, frames_per_clip: int, step_between_clips: int = 1, fold: int = 1, train: bool = True, transform=None, num_workers: int = 1): ...
555
556
class MovingMNIST(VisionDataset):
557
"""
558
Moving MNIST dataset for video prediction.
559
560
Args:
561
root (str): Root directory for dataset files
562
split (str, optional): Dataset split ('train', 'test')
563
transform (callable, optional): Transform to apply to video data
564
download (bool): If True, downloads dataset if not found
565
"""
566
def __init__(self, root: str, split=None, transform=None, download: bool = True): ...
567
```
568
569
### Optical Flow and Stereo Datasets
570
571
Datasets for optical flow estimation and stereo vision tasks.
572
573
```python { .api }
574
class FlyingChairs(VisionDataset):
575
"""
576
FlyingChairs optical flow dataset.
577
578
Args:
579
root (str): Root directory for dataset files
580
split (str): Dataset split ('train', 'val')
581
transform (callable, optional): Transform to apply to samples
582
target_transform (callable, optional): Transform to apply to flow
583
"""
584
def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None): ...
585
586
class FlyingThings3D(VisionDataset):
587
"""FlyingThings3D optical flow dataset."""
588
def __init__(self, root: str, split: str = 'train', pass_name: str = 'clean', camera: str = 'left', transform=None, target_transform=None): ...
589
590
class Sintel(VisionDataset):
591
"""
592
MPI Sintel optical flow dataset.
593
594
Args:
595
root (str): Root directory for dataset files
596
split (str): Dataset split ('train', 'test')
597
pass_name (str): Rendering pass ('clean', 'final')
598
transform (callable, optional): Transform to apply to samples
599
target_transform (callable, optional): Transform to apply to flow
600
"""
601
def __init__(self, root: str, split: str = 'train', pass_name: str = 'clean', transform=None, target_transform=None): ...
602
603
class KittiFlow(VisionDataset):
604
"""KITTI optical flow dataset."""
605
def __init__(self, root: str, split: str = 'train', transform=None): ...
606
607
class HD1K(VisionDataset):
608
"""HD1K optical flow dataset."""
609
def __init__(self, root: str, split: str = 'train', transform=None): ...
610
611
class Kitti2012Stereo(VisionDataset):
612
"""
613
KITTI 2012 stereo dataset.
614
615
Args:
616
root (str): Root directory for dataset files
617
split (str): Dataset split ('train', 'test')
618
transform (callable, optional): Transform to apply to samples
619
"""
620
def __init__(self, root: str, split: str = 'train', transform=None): ...
621
622
class Kitti2015Stereo(VisionDataset):
623
"""KITTI 2015 stereo dataset."""
624
def __init__(self, root: str, split: str = 'train', transform=None): ...
625
626
class CarlaStereo(VisionDataset):
627
"""CARLA stereo dataset."""
628
def __init__(self, root: str, split: str = 'train', transform=None): ...
629
630
class Middlebury2014Stereo(VisionDataset):
631
"""Middlebury 2014 stereo dataset."""
632
def __init__(self, root: str, split: str = 'train', transform=None): ...
633
634
class CREStereo(VisionDataset):
635
"""CREStereo dataset."""
636
def __init__(self, root: str, split: str = 'train', transform=None): ...
637
638
class FallingThingsStereo(VisionDataset):
639
"""Falling Things stereo dataset."""
640
def __init__(self, root: str, variant: str = 'single', split: str = 'train', transform=None): ...
641
642
class SceneFlowStereo(VisionDataset):
643
"""Scene Flow stereo dataset."""
644
def __init__(self, root: str, split: str = 'train', pass_name: str = 'clean', transform=None): ...
645
646
class SintelStereo(VisionDataset):
647
"""Sintel stereo dataset."""
648
def __init__(self, root: str, split: str = 'train', pass_name: str = 'clean', transform=None): ...
649
650
class InStereo2k(VisionDataset):
651
"""InStereo2k dataset."""
652
def __init__(self, root: str, split: str = 'train', transform=None): ...
653
654
class ETH3DStereo(VisionDataset):
655
"""ETH3D stereo dataset."""
656
def __init__(self, root: str, split: str = 'train', transform=None): ...
657
```
658
659
### Image Captioning and Matching Datasets
660
661
Datasets for image captioning, patch matching, and face recognition tasks.
662
663
```python { .api }
664
class SBU(VisionDataset):
665
"""
666
SBU Captioned Photo dataset for image captioning.
667
668
Args:
669
root (str): Root directory for dataset files
670
transform (callable, optional): Transform to apply to PIL image
671
target_transform (callable, optional): Transform to apply to target
672
download (bool): If True, downloads dataset if not found
673
loader (callable, optional): Function to load image from path
674
"""
675
def __init__(self, root: str, transform=None, target_transform=None, download: bool = True, loader=None): ...
676
677
class Flickr8k(VisionDataset):
678
"""
679
Flickr8k dataset for image captioning.
680
681
Args:
682
root (str): Root directory for dataset files
683
ann_file (str): Path to annotation file
684
transform (callable, optional): Transform to apply to PIL image
685
target_transform (callable, optional): Transform to apply to target
686
loader (callable, optional): Function to load image from path
687
"""
688
def __init__(self, root: str, ann_file: str, transform=None, target_transform=None, loader=None): ...
689
690
class Flickr30k(VisionDataset):
691
"""
692
Flickr30k dataset for image captioning.
693
694
Args:
695
root (str): Root directory for dataset files
696
ann_file (str): Path to annotation file
697
transform (callable, optional): Transform to apply to PIL image
698
target_transform (callable, optional): Transform to apply to target
699
loader (callable, optional): Function to load image from path
700
"""
701
def __init__(self, root: str, ann_file: str, transform=None, target_transform=None, loader=None): ...
702
703
class PhotoTour(VisionDataset):
704
"""
705
Multi-view Stereo Correspondence dataset for patch matching.
706
707
Args:
708
root (str): Root directory for dataset files
709
name (str): Dataset name ('notredame_harris', 'yosemite_harris', 'liberty_harris', 'notredame', 'yosemite', 'liberty')
710
train (bool): If True, creates dataset for training patches, otherwise for matching pairs
711
transform (callable, optional): Transform to apply to patches
712
download (bool): If True, downloads dataset if not found
713
"""
714
def __init__(self, root: str, name: str, train: bool = True, transform=None, download: bool = False): ...
715
716
class LFWPeople(VisionDataset):
717
"""
718
LFW People dataset for face recognition.
719
720
Args:
721
root (str): Root directory for dataset files
722
split (str): Dataset split ('train', 'test', '10fold')
723
image_set (str): Image processing type ('original', 'funneled', 'deepfunneled')
724
transform (callable, optional): Transform to apply to PIL image
725
target_transform (callable, optional): Transform to apply to target
726
download (bool): NOT SUPPORTED - manual download required
727
loader (callable, optional): Function to load image from path
728
"""
729
def __init__(self, root: str, split: str = '10fold', image_set: str = 'funneled', transform=None, target_transform=None, download: bool = False, loader=None): ...
730
731
class LFWPairs(VisionDataset):
732
"""
733
LFW Pairs dataset for face verification.
734
735
Args:
736
root (str): Root directory for dataset files
737
split (str): Dataset split ('train', 'test', '10fold')
738
image_set (str): Image processing type ('original', 'funneled', 'deepfunneled')
739
transform (callable, optional): Transform to apply to PIL image
740
target_transform (callable, optional): Transform to apply to target
741
download (bool): NOT SUPPORTED - manual download required
742
loader (callable, optional): Function to load image from path
743
"""
744
def __init__(self, root: str, split: str = '10fold', image_set: str = 'funneled', transform=None, target_transform=None, download: bool = False, loader=None): ...
745
```
746
747
### Utility Datasets and Functions
748
749
Helper datasets and utilities for testing and dataset manipulation.
750
751
```python { .api }
752
class FakeData(VisionDataset):
753
"""
754
Generates fake data for testing purposes.
755
756
Args:
757
size (int): Dataset size
758
image_size (tuple): Image dimensions (channels, height, width)
759
num_classes (int): Number of classes
760
transform (callable, optional): Transform to apply to PIL image
761
target_transform (callable, optional): Transform to apply to target
762
random_offset (int): Random seed offset
763
"""
764
def __init__(self, size: int = 1000, image_size: tuple = (3, 224, 224), num_classes: int = 10, transform=None, target_transform=None, random_offset: int = 0): ...
765
766
def wrap_dataset_for_transforms_v2(dataset, target_keys=None):
767
"""
768
Wraps datasets for v2 transforms compatibility.
769
770
Args:
771
dataset: Dataset to wrap
772
target_keys (sequence, optional): Keys for target extraction
773
774
Returns:
775
Wrapped dataset compatible with v2 transforms
776
"""
777
```
778
779
### Samplers
780
781
Sampling strategies for dataset loading and batching.
782
783
```python { .api }
784
# Available in torchvision.datasets.samplers
785
# Provides various sampling strategies for efficient dataset loading
786
```
787
788
## Usage Examples
789
790
### Basic Image Classification Dataset
791
792
```python
793
from torchvision import datasets, transforms
794
from torch.utils.data import DataLoader
795
796
# Define transforms
797
transform = transforms.Compose([
798
transforms.Resize(256),
799
transforms.CenterCrop(224),
800
transforms.ToTensor(),
801
transforms.Normalize(mean=[0.485, 0.456, 0.406],
802
std=[0.229, 0.224, 0.225])
803
])
804
805
# Load CIFAR-10
806
train_dataset = datasets.CIFAR10(
807
root='./data',
808
train=True,
809
download=True,
810
transform=transform
811
)
812
813
test_dataset = datasets.CIFAR10(
814
root='./data',
815
train=False,
816
download=True,
817
transform=transform
818
)
819
820
# Create data loaders
821
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
822
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
823
```
824
825
### Object Detection Dataset
826
827
```python
828
from torchvision import datasets, transforms as T
829
830
# Define transforms for detection
831
transform = T.Compose([
832
T.ToTensor(),
833
])
834
835
# Load COCO detection dataset
836
dataset = datasets.CocoDetection(
837
root='/path/to/coco/images/train2017',
838
annFile='/path/to/coco/annotations/instances_train2017.json',
839
transform=transform
840
)
841
842
# Each item returns (image, target) where target is list of annotations
843
image, target = dataset[0]
844
```
845
846
### Custom Dataset with ImageFolder
847
848
```python
849
from torchvision import datasets, transforms
850
851
# For datasets organized as: root/class_name/image_files
852
transform = transforms.Compose([
853
transforms.Resize((224, 224)),
854
transforms.ToTensor(),
855
])
856
857
dataset = datasets.ImageFolder(
858
root='/path/to/custom/dataset',
859
transform=transform
860
)
861
862
# Access class names
863
print(dataset.classes)
864
print(dataset.class_to_idx)
865
```