or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

datasets.mdindex.mdio.mdmodels.mdops.mdtransforms.mdtv_tensors.mdutils.md

datasets.mddocs/

0

# Datasets

1

2

TorchVision provides a comprehensive collection of computer vision datasets with automatic downloading, caching, and preprocessing. The datasets module supports image classification, object detection, segmentation, video analysis, optical flow, and stereo vision tasks.

3

4

## Capabilities

5

6

### Base Dataset Classes

7

8

Foundation classes for building and working with vision datasets.

9

10

```python { .api }

11

class VisionDataset:

12

"""

13

Base class for all vision datasets.

14

15

Args:

16

root (str): Root directory of dataset

17

transforms (callable, optional): Function/transform to apply to data

18

transform (callable, optional): Function/transform to apply to PIL image

19

target_transform (callable, optional): Function/transform to apply to target

20

"""

21

def __init__(self, root: str, transforms=None, transform=None, target_transform=None): ...

22

def __getitem__(self, index: int): ...

23

def __len__(self) -> int: ...

24

25

class DatasetFolder(VisionDataset):

26

"""

27

Generic data loader for datasets in folder format.

28

29

Args:

30

root (str): Root directory path

31

loader (callable): Function to load a sample from path

32

extensions (tuple): Allowed extensions

33

transform (callable, optional): Transform to apply to samples

34

target_transform (callable, optional): Transform to apply to targets

35

is_valid_file (callable, optional): Function to check file validity

36

"""

37

def __init__(self, root: str, loader, extensions=None, transform=None, target_transform=None, is_valid_file=None): ...

38

39

class ImageFolder(DatasetFolder):

40

"""

41

Data loader for image classification datasets in folder format.

42

Expected structure: root/class_x/xxx.ext

43

44

Args:

45

root (str): Root directory path

46

transform (callable, optional): Transform to apply to PIL images

47

target_transform (callable, optional): Transform to apply to targets

48

loader (callable, optional): Function to load image from path

49

is_valid_file (callable, optional): Function to check file validity

50

"""

51

def __init__(self, root: str, transform=None, target_transform=None, loader=None, is_valid_file=None): ...

52

```

53

54

### Image Classification Datasets

55

56

Standard datasets for image classification tasks with automatic download and preprocessing.

57

58

```python { .api }

59

class MNIST(VisionDataset):

60

"""

61

MNIST handwritten digit dataset.

62

63

Args:

64

root (str): Root directory for dataset files

65

train (bool): If True, creates dataset from training set, else test set

66

transform (callable, optional): Transform to apply to PIL image

67

target_transform (callable, optional): Transform to apply to target

68

download (bool): If True, downloads dataset if not found at root

69

"""

70

def __init__(self, root: str, train: bool = True, transform=None, target_transform=None, download: bool = False): ...

71

72

class FashionMNIST(VisionDataset):

73

"""Fashion-MNIST dataset of clothing images."""

74

def __init__(self, root: str, train: bool = True, transform=None, target_transform=None, download: bool = False): ...

75

76

class KMNIST(VisionDataset):

77

"""Kuzushiji-MNIST dataset of Japanese characters."""

78

def __init__(self, root: str, train: bool = True, transform=None, target_transform=None, download: bool = False): ...

79

80

class EMNIST(VisionDataset):

81

"""

82

Extended MNIST dataset.

83

84

Args:

85

root (str): Root directory for dataset files

86

split (str): Dataset split ('byclass', 'bymerge', 'balanced', 'letters', 'digits', 'mnist')

87

train (bool): If True, creates dataset from training set

88

transform (callable, optional): Transform to apply to PIL image

89

target_transform (callable, optional): Transform to apply to target

90

download (bool): If True, downloads dataset if not found

91

"""

92

def __init__(self, root: str, split: str, train: bool = True, transform=None, target_transform=None, download: bool = False): ...

93

94

class CIFAR10(VisionDataset):

95

"""

96

CIFAR-10 dataset of 32x32 color images in 10 classes.

97

98

Args:

99

root (str): Root directory for dataset files

100

train (bool): If True, creates dataset from training set, else test set

101

transform (callable, optional): Transform to apply to PIL image

102

target_transform (callable, optional): Transform to apply to target

103

download (bool): If True, downloads dataset if not found at root

104

"""

105

def __init__(self, root: str, train: bool = True, transform=None, target_transform=None, download: bool = False): ...

106

107

class CIFAR100(VisionDataset):

108

"""CIFAR-100 dataset with 100 classes grouped into 20 superclasses."""

109

def __init__(self, root: str, train: bool = True, transform=None, target_transform=None, download: bool = False): ...

110

111

class SVHN(VisionDataset):

112

"""

113

Street View House Numbers dataset.

114

115

Args:

116

root (str): Root directory for dataset files

117

split (str): Dataset split ('train', 'test', 'extra')

118

transform (callable, optional): Transform to apply to PIL image

119

target_transform (callable, optional): Transform to apply to target

120

download (bool): If True, downloads dataset if not found

121

"""

122

def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False): ...

123

124

class ImageNet(VisionDataset):

125

"""

126

ImageNet dataset for large-scale image classification.

127

128

Args:

129

root (str): Root directory containing 'train' and 'val' folders

130

split (str): Dataset split ('train', 'val')

131

transform (callable, optional): Transform to apply to PIL image

132

target_transform (callable, optional): Transform to apply to target

133

"""

134

def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None): ...

135

136

class Caltech101(VisionDataset):

137

"""Caltech 101 dataset with 101 object categories."""

138

def __init__(self, root: str, target_type='category', transform=None, target_transform=None, download: bool = False): ...

139

140

class Caltech256(VisionDataset):

141

"""Caltech 256 dataset with 256 object categories."""

142

def __init__(self, root: str, transform=None, target_transform=None, download: bool = False): ...

143

144

class CelebA(VisionDataset):

145

"""

146

CelebA face dataset with attributes.

147

148

Args:

149

root (str): Root directory for dataset files

150

split (str): Dataset split ('train', 'valid', 'test', 'all')

151

target_type (str): Target type ('attr', 'identity', 'bbox', 'landmarks')

152

transform (callable, optional): Transform to apply to PIL image

153

target_transform (callable, optional): Transform to apply to target

154

download (bool): If True, downloads dataset if not found

155

"""

156

def __init__(self, root: str, split: str = 'train', target_type: str = 'attr', transform=None, target_transform=None, download: bool = False): ...

157

158

class StanfordCars(VisionDataset):

159

"""Stanford Cars dataset with 196 car classes."""

160

def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False): ...

161

162

class Flowers102(VisionDataset):

163

"""Oxford 102 Flower dataset."""

164

def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False): ...

165

166

class Food101(VisionDataset):

167

"""Food-101 dataset with 101 food categories."""

168

def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False): ...

169

170

class GTSRB(VisionDataset):

171

"""German Traffic Sign Recognition Benchmark."""

172

def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False): ...

173

174

class OxfordIIITPet(VisionDataset):

175

"""

176

Oxford-IIIT Pet dataset.

177

178

Args:

179

root (str): Root directory for dataset files

180

split (str): Dataset split ('trainval', 'test')

181

target_types (str or list): Target types ('category', 'segmentation')

182

transform (callable, optional): Transform to apply to PIL image

183

target_transform (callable, optional): Transform to apply to target

184

download (bool): If True, downloads dataset if not found

185

"""

186

def __init__(self, root: str, split: str = 'trainval', target_types='category', transform=None, target_transform=None, download: bool = False): ...

187

188

class STL10(VisionDataset):

189

"""

190

STL10 dataset of 96x96 color images in 10 classes.

191

192

Args:

193

root (str): Root directory for dataset files

194

split (str): Dataset split ('train', 'test', 'unlabeled', 'train+unlabeled')

195

folds (int, optional): One of {0-9} or None for training fold selection

196

transform (callable, optional): Transform to apply to PIL image

197

target_transform (callable, optional): Transform to apply to target

198

download (bool): If True, downloads dataset if not found

199

"""

200

def __init__(self, root: str, split: str = 'train', folds=None, transform=None, target_transform=None, download: bool = False): ...

201

202

class SUN397(VisionDataset):

203

"""

204

SUN397 scene recognition dataset with 397 categories.

205

206

Args:

207

root (str): Root directory for dataset files

208

transform (callable, optional): Transform to apply to PIL image

209

target_transform (callable, optional): Transform to apply to target

210

download (bool): If True, downloads dataset if not found

211

loader (callable, optional): Function to load image from path

212

"""

213

def __init__(self, root: str, transform=None, target_transform=None, download: bool = False, loader=None): ...

214

215

class SEMEION(VisionDataset):

216

"""

217

SEMEION handwritten digit dataset with 16x16 grayscale images.

218

219

Args:

220

root (str): Root directory for dataset files

221

transform (callable, optional): Transform to apply to PIL image

222

target_transform (callable, optional): Transform to apply to target

223

download (bool): If True, downloads dataset if not found

224

"""

225

def __init__(self, root: str, transform=None, target_transform=None, download: bool = True): ...

226

227

class Omniglot(VisionDataset):

228

"""

229

Omniglot dataset for few-shot learning with character recognition.

230

231

Args:

232

root (str): Root directory for dataset files

233

background (bool): If True, creates dataset from background set, otherwise evaluation set

234

transform (callable, optional): Transform to apply to PIL image

235

target_transform (callable, optional): Transform to apply to target

236

download (bool): If True, downloads dataset if not found

237

loader (callable, optional): Function to load image from path

238

"""

239

def __init__(self, root: str, background: bool = True, transform=None, target_transform=None, download: bool = False, loader=None): ...

240

241

class USPS(VisionDataset):

242

"""

243

USPS handwritten digit dataset with 16x16 grayscale images.

244

245

Args:

246

root (str): Root directory for dataset files

247

train (bool): If True, creates dataset from training set, otherwise test set

248

transform (callable, optional): Transform to apply to PIL image

249

target_transform (callable, optional): Transform to apply to target

250

download (bool): If True, downloads dataset if not found

251

"""

252

def __init__(self, root: str, train: bool = True, transform=None, target_transform=None, download: bool = False): ...

253

254

class QMNIST(VisionDataset):

255

"""

256

QMNIST extended MNIST dataset with additional metadata.

257

258

Args:

259

root (str): Root directory for dataset files

260

what (str, optional): Dataset subset ('train', 'test', 'test10k', 'test50k', 'nist')

261

compat (bool): If True, returns class labels for MNIST compatibility

262

train (bool): If True, creates dataset from training set (when what is None)

263

transform (callable, optional): Transform to apply to PIL image

264

target_transform (callable, optional): Transform to apply to target

265

download (bool): If True, downloads dataset if not found

266

"""

267

def __init__(self, root: str, what=None, compat: bool = True, train: bool = True, transform=None, target_transform=None, download: bool = False): ...

268

269

class Places365(VisionDataset):

270

"""

271

Places365 scene recognition dataset with 365 scene categories.

272

273

Args:

274

root (str): Root directory for dataset files

275

split (str): Dataset split ('train-standard', 'train-challenge', 'val', 'test')

276

small (bool): If True, uses small (256x256) images instead of high resolution

277

download (bool): If True, downloads dataset if not found

278

transform (callable, optional): Transform to apply to PIL image

279

target_transform (callable, optional): Transform to apply to target

280

loader (callable, optional): Function to load image from path

281

"""

282

def __init__(self, root: str, split: str = 'train-standard', small: bool = False, download: bool = False, transform=None, target_transform=None, loader=None): ...

283

284

class INaturalist(VisionDataset):

285

"""

286

iNaturalist dataset for fine-grained species classification.

287

288

Args:

289

root (str): Root directory for dataset files

290

version (str): Dataset version ('2017', '2018', '2019', '2021_train', '2021_train_mini', '2021_valid')

291

target_type (str or list): Target type ('full', 'kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'super')

292

transform (callable, optional): Transform to apply to PIL image

293

target_transform (callable, optional): Transform to apply to target

294

download (bool): If True, downloads dataset if not found

295

loader (callable, optional): Function to load image from path

296

"""

297

def __init__(self, root: str, version: str = '2021_train', target_type='full', transform=None, target_transform=None, download: bool = False, loader=None): ...

298

299

class DTD(VisionDataset):

300

"""

301

Describable Textures Dataset (DTD) with 47 texture categories.

302

303

Args:

304

root (str): Root directory for dataset files

305

split (str): Dataset split ('train', 'val', 'test')

306

partition (int): Dataset partition (1-10)

307

transform (callable, optional): Transform to apply to PIL image

308

target_transform (callable, optional): Transform to apply to target

309

download (bool): If True, downloads dataset if not found

310

loader (callable, optional): Function to load image from path

311

"""

312

def __init__(self, root: str, split: str = 'train', partition: int = 1, transform=None, target_transform=None, download: bool = False, loader=None): ...

313

314

class FER2013(VisionDataset):

315

"""

316

FER2013 facial expression recognition dataset with 7 emotion classes.

317

318

Args:

319

root (str): Root directory for dataset files

320

split (str): Dataset split ('train', 'test')

321

transform (callable, optional): Transform to apply to PIL image

322

target_transform (callable, optional): Transform to apply to target

323

"""

324

def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None): ...

325

326

class CLEVRClassification(VisionDataset):

327

"""

328

CLEVR classification dataset for visual reasoning (object counting).

329

330

Args:

331

root (str): Root directory for dataset files

332

split (str): Dataset split ('train', 'val', 'test')

333

transform (callable, optional): Transform to apply to PIL image

334

target_transform (callable, optional): Transform to apply to target

335

download (bool): If True, downloads dataset if not found

336

loader (callable, optional): Function to load image from path

337

"""

338

def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False, loader=None): ...

339

340

class PCAM(VisionDataset):

341

"""

342

PatchCamelyon (PCAM) histopathologic cancer detection dataset.

343

344

Args:

345

root (str): Root directory for dataset files

346

split (str): Dataset split ('train', 'val', 'test')

347

transform (callable, optional): Transform to apply to PIL image

348

target_transform (callable, optional): Transform to apply to target

349

download (bool): If True, downloads dataset if not found

350

"""

351

def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False): ...

352

353

class Country211(VisionDataset):

354

"""

355

Country211 dataset for country classification from images.

356

357

Args:

358

root (str): Root directory for dataset files

359

split (str): Dataset split ('train', 'valid', 'test')

360

transform (callable, optional): Transform to apply to PIL image

361

target_transform (callable, optional): Transform to apply to target

362

download (bool): If True, downloads dataset if not found

363

loader (callable, optional): Function to load image from path

364

"""

365

def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False, loader=None): ...

366

367

class FGVCAircraft(VisionDataset):

368

"""

369

FGVC Aircraft dataset for fine-grained aircraft recognition.

370

371

Args:

372

root (str): Root directory for dataset files

373

split (str): Dataset split ('train', 'val', 'trainval', 'test')

374

annotation_level (str): Annotation level ('variant', 'family', 'manufacturer')

375

transform (callable, optional): Transform to apply to PIL image

376

target_transform (callable, optional): Transform to apply to target

377

download (bool): If True, downloads dataset if not found

378

loader (callable, optional): Function to load image from path

379

"""

380

def __init__(self, root: str, split: str = 'trainval', annotation_level: str = 'variant', transform=None, target_transform=None, download: bool = False, loader=None): ...

381

382

class EuroSAT(VisionDataset):

383

"""

384

EuroSAT satellite image classification dataset with 10 land use classes.

385

386

Args:

387

root (str): Root directory for dataset files

388

transform (callable, optional): Transform to apply to PIL image

389

target_transform (callable, optional): Transform to apply to target

390

download (bool): If True, downloads dataset if not found

391

loader (callable, optional): Function to load image from path

392

"""

393

def __init__(self, root: str, transform=None, target_transform=None, download: bool = False, loader=None): ...

394

395

class RenderedSST2(VisionDataset):

396

"""

397

Rendered SST2 dataset for optical character recognition with sentiment.

398

399

Args:

400

root (str): Root directory for dataset files

401

split (str): Dataset split ('train', 'val', 'test')

402

transform (callable, optional): Transform to apply to PIL image

403

target_transform (callable, optional): Transform to apply to target

404

download (bool): If True, downloads dataset if not found

405

loader (callable, optional): Function to load image from path

406

"""

407

def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False, loader=None): ...

408

409

class Imagenette(VisionDataset):

410

"""

411

Imagenette dataset - subset of ImageNet with 10 classes.

412

413

Args:

414

root (str): Root directory for dataset files

415

split (str): Dataset split ('train', 'val')

416

size (str): Image size ('full', '320px', '160px')

417

download (bool): If True, downloads dataset if not found

418

transform (callable, optional): Transform to apply to PIL image

419

target_transform (callable, optional): Transform to apply to target

420

loader (callable, optional): Function to load image from path

421

"""

422

def __init__(self, root: str, split: str = 'train', size: str = 'full', download: bool = False, transform=None, target_transform=None, loader=None): ...

423

```

424

425

### Object Detection and Segmentation Datasets

426

427

Datasets for object detection, instance segmentation, and semantic segmentation tasks.

428

429

```python { .api }

430

class CocoDetection(VisionDataset):

431

"""

432

COCO dataset for object detection.

433

434

Args:

435

root (str): Root directory containing images

436

annFile (str): Path to annotation file

437

transform (callable, optional): Transform to apply to image

438

target_transform (callable, optional): Transform to apply to target

439

transforms (callable, optional): Transform to apply to image and target

440

"""

441

def __init__(self, root: str, annFile: str, transform=None, target_transform=None, transforms=None): ...

442

443

class CocoCaptions(VisionDataset):

444

"""COCO dataset for image captioning."""

445

def __init__(self, root: str, annFile: str, transform=None, target_transform=None, transforms=None): ...

446

447

class VOCDetection(VisionDataset):

448

"""

449

Pascal VOC dataset for object detection.

450

451

Args:

452

root (str): Root directory for dataset files

453

year (str): Dataset year ('2007', '2008', '2009', '2010', '2011', '2012')

454

image_set (str): Image set ('train', 'trainval', 'val', 'test')

455

download (bool): If True, downloads dataset if not found

456

transform (callable, optional): Transform to apply to PIL image

457

target_transform (callable, optional): Transform to apply to target

458

transforms (callable, optional): Transform to apply to image and target

459

"""

460

def __init__(self, root: str, year: str = '2012', image_set: str = 'train', download: bool = False, transform=None, target_transform=None, transforms=None): ...

461

462

class VOCSegmentation(VisionDataset):

463

"""Pascal VOC dataset for semantic segmentation."""

464

def __init__(self, root: str, year: str = '2012', image_set: str = 'train', download: bool = False, transform=None, target_transform=None, transforms=None): ...

465

466

class Cityscapes(VisionDataset):

467

"""

468

Cityscapes dataset for semantic segmentation.

469

470

Args:

471

root (str): Root directory for dataset files

472

split (str): Dataset split ('train', 'test', 'val')

473

mode (str): Quality mode ('fine', 'coarse')

474

target_type (str or list): Target type ('instance', 'semantic', 'polygon', 'color')

475

transform (callable, optional): Transform to apply to PIL image

476

target_transform (callable, optional): Transform to apply to target

477

transforms (callable, optional): Transform to apply to image and target

478

"""

479

def __init__(self, root: str, split: str = 'train', mode: str = 'fine', target_type: str = 'instance', transform=None, target_transform=None, transforms=None): ...

480

481

class SBDataset(VisionDataset):

482

"""Semantic Boundaries Dataset."""

483

def __init__(self, root: str, image_set: str = 'train', mode: str = 'boundaries', download: bool = False, transform=None, target_transform=None): ...

484

485

class WIDERFace(VisionDataset):

486

"""

487

WIDER FACE dataset for face detection.

488

489

Args:

490

root (str): Root directory for dataset files

491

split (str): Dataset split ('train', 'val', 'test')

492

transform (callable, optional): Transform to apply to PIL image

493

target_transform (callable, optional): Transform to apply to target

494

download (bool): If True, downloads dataset if not found

495

"""

496

def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None, download: bool = False): ...

497

498

class Kitti(VisionDataset):

499

"""

500

KITTI dataset for object detection.

501

502

Args:

503

root (str): Root directory for dataset files

504

train (bool): If True, creates dataset from training set, otherwise test set

505

transform (callable, optional): Transform to apply to PIL image

506

target_transform (callable, optional): Transform to apply to target

507

transforms (callable, optional): Transform to apply to image and target

508

download (bool): If True, downloads dataset if not found

509

"""

510

def __init__(self, root: str, train: bool = True, transform=None, target_transform=None, transforms=None, download: bool = False): ...

511

```

512

513

### Video Datasets

514

515

Datasets for video analysis and action recognition tasks.

516

517

```python { .api }

518

class Kinetics(VisionDataset):

519

"""

520

Kinetics dataset for action recognition.

521

522

Args:

523

root (str): Root directory for dataset files

524

frames_per_clip (int): Number of frames per video clip

525

num_classes (str): Number of classes ('400', '600', '700')

526

split (str): Dataset split ('train', 'val')

527

frame_rate (int, optional): Target frame rate for clips

528

step_between_clips (int): Number of frames between consecutive clips

529

transform (callable, optional): Transform to apply to video clips

530

download (bool): If True, downloads dataset if not found

531

num_download_workers (int): Number of workers for downloading

532

num_extract_workers (int): Number of workers for extraction

533

"""

534

def __init__(self, root: str, frames_per_clip: int, num_classes: str = '400', split: str = 'train', frame_rate=None, step_between_clips: int = 1, transform=None, download: bool = False, num_download_workers: int = 1, num_extract_workers: int = 1): ...

535

536

class HMDB51(VisionDataset):

537

"""

538

HMDB51 action recognition dataset.

539

540

Args:

541

root (str): Root directory for dataset files

542

annotation_path (str): Path to annotation files

543

frames_per_clip (int): Number of frames per video clip

544

step_between_clips (int): Number of frames between consecutive clips

545

fold (int): Which fold to load (1, 2, or 3)

546

train (bool): If True, creates dataset from training set

547

transform (callable, optional): Transform to apply to video clips

548

num_workers (int): Number of workers for video loading

549

"""

550

def __init__(self, root: str, annotation_path: str, frames_per_clip: int, step_between_clips: int = 1, fold: int = 1, train: bool = True, transform=None, num_workers: int = 1): ...

551

552

class UCF101(VisionDataset):

553

"""UCF101 action recognition dataset with 101 action classes."""

554

def __init__(self, root: str, annotation_path: str, frames_per_clip: int, step_between_clips: int = 1, fold: int = 1, train: bool = True, transform=None, num_workers: int = 1): ...

555

556

class MovingMNIST(VisionDataset):

557

"""

558

Moving MNIST dataset for video prediction.

559

560

Args:

561

root (str): Root directory for dataset files

562

split (str, optional): Dataset split ('train', 'test')

563

transform (callable, optional): Transform to apply to video data

564

download (bool): If True, downloads dataset if not found

565

"""

566

def __init__(self, root: str, split=None, transform=None, download: bool = True): ...

567

```

568

569

### Optical Flow and Stereo Datasets

570

571

Datasets for optical flow estimation and stereo vision tasks.

572

573

```python { .api }

574

class FlyingChairs(VisionDataset):

575

"""

576

FlyingChairs optical flow dataset.

577

578

Args:

579

root (str): Root directory for dataset files

580

split (str): Dataset split ('train', 'val')

581

transform (callable, optional): Transform to apply to samples

582

target_transform (callable, optional): Transform to apply to flow

583

"""

584

def __init__(self, root: str, split: str = 'train', transform=None, target_transform=None): ...

585

586

class FlyingThings3D(VisionDataset):

587

"""FlyingThings3D optical flow dataset."""

588

def __init__(self, root: str, split: str = 'train', pass_name: str = 'clean', camera: str = 'left', transform=None, target_transform=None): ...

589

590

class Sintel(VisionDataset):

591

"""

592

MPI Sintel optical flow dataset.

593

594

Args:

595

root (str): Root directory for dataset files

596

split (str): Dataset split ('train', 'test')

597

pass_name (str): Rendering pass ('clean', 'final')

598

transform (callable, optional): Transform to apply to samples

599

target_transform (callable, optional): Transform to apply to flow

600

"""

601

def __init__(self, root: str, split: str = 'train', pass_name: str = 'clean', transform=None, target_transform=None): ...

602

603

class KittiFlow(VisionDataset):

604

"""KITTI optical flow dataset."""

605

def __init__(self, root: str, split: str = 'train', transform=None): ...

606

607

class HD1K(VisionDataset):

608

"""HD1K optical flow dataset."""

609

def __init__(self, root: str, split: str = 'train', transform=None): ...

610

611

class Kitti2012Stereo(VisionDataset):

612

"""

613

KITTI 2012 stereo dataset.

614

615

Args:

616

root (str): Root directory for dataset files

617

split (str): Dataset split ('train', 'test')

618

transform (callable, optional): Transform to apply to samples

619

"""

620

def __init__(self, root: str, split: str = 'train', transform=None): ...

621

622

class Kitti2015Stereo(VisionDataset):

623

"""KITTI 2015 stereo dataset."""

624

def __init__(self, root: str, split: str = 'train', transform=None): ...

625

626

class CarlaStereo(VisionDataset):

627

"""CARLA stereo dataset."""

628

def __init__(self, root: str, split: str = 'train', transform=None): ...

629

630

class Middlebury2014Stereo(VisionDataset):

631

"""Middlebury 2014 stereo dataset."""

632

def __init__(self, root: str, split: str = 'train', transform=None): ...

633

634

class CREStereo(VisionDataset):

635

"""CREStereo dataset."""

636

def __init__(self, root: str, split: str = 'train', transform=None): ...

637

638

class FallingThingsStereo(VisionDataset):

639

"""Falling Things stereo dataset."""

640

def __init__(self, root: str, variant: str = 'single', split: str = 'train', transform=None): ...

641

642

class SceneFlowStereo(VisionDataset):

643

"""Scene Flow stereo dataset."""

644

def __init__(self, root: str, split: str = 'train', pass_name: str = 'clean', transform=None): ...

645

646

class SintelStereo(VisionDataset):

647

"""Sintel stereo dataset."""

648

def __init__(self, root: str, split: str = 'train', pass_name: str = 'clean', transform=None): ...

649

650

class InStereo2k(VisionDataset):

651

"""InStereo2k dataset."""

652

def __init__(self, root: str, split: str = 'train', transform=None): ...

653

654

class ETH3DStereo(VisionDataset):

655

"""ETH3D stereo dataset."""

656

def __init__(self, root: str, split: str = 'train', transform=None): ...

657

```

658

659

### Image Captioning and Matching Datasets

660

661

Datasets for image captioning, patch matching, and face recognition tasks.

662

663

```python { .api }

664

class SBU(VisionDataset):

665

"""

666

SBU Captioned Photo dataset for image captioning.

667

668

Args:

669

root (str): Root directory for dataset files

670

transform (callable, optional): Transform to apply to PIL image

671

target_transform (callable, optional): Transform to apply to target

672

download (bool): If True, downloads dataset if not found

673

loader (callable, optional): Function to load image from path

674

"""

675

def __init__(self, root: str, transform=None, target_transform=None, download: bool = True, loader=None): ...

676

677

class Flickr8k(VisionDataset):

678

"""

679

Flickr8k dataset for image captioning.

680

681

Args:

682

root (str): Root directory for dataset files

683

ann_file (str): Path to annotation file

684

transform (callable, optional): Transform to apply to PIL image

685

target_transform (callable, optional): Transform to apply to target

686

loader (callable, optional): Function to load image from path

687

"""

688

def __init__(self, root: str, ann_file: str, transform=None, target_transform=None, loader=None): ...

689

690

class Flickr30k(VisionDataset):

691

"""

692

Flickr30k dataset for image captioning.

693

694

Args:

695

root (str): Root directory for dataset files

696

ann_file (str): Path to annotation file

697

transform (callable, optional): Transform to apply to PIL image

698

target_transform (callable, optional): Transform to apply to target

699

loader (callable, optional): Function to load image from path

700

"""

701

def __init__(self, root: str, ann_file: str, transform=None, target_transform=None, loader=None): ...

702

703

class PhotoTour(VisionDataset):

704

"""

705

Multi-view Stereo Correspondence dataset for patch matching.

706

707

Args:

708

root (str): Root directory for dataset files

709

name (str): Dataset name ('notredame_harris', 'yosemite_harris', 'liberty_harris', 'notredame', 'yosemite', 'liberty')

710

train (bool): If True, creates dataset for training patches, otherwise for matching pairs

711

transform (callable, optional): Transform to apply to patches

712

download (bool): If True, downloads dataset if not found

713

"""

714

def __init__(self, root: str, name: str, train: bool = True, transform=None, download: bool = False): ...

715

716

class LFWPeople(VisionDataset):

717

"""

718

LFW People dataset for face recognition.

719

720

Args:

721

root (str): Root directory for dataset files

722

split (str): Dataset split ('train', 'test', '10fold')

723

image_set (str): Image processing type ('original', 'funneled', 'deepfunneled')

724

transform (callable, optional): Transform to apply to PIL image

725

target_transform (callable, optional): Transform to apply to target

726

download (bool): NOT SUPPORTED - manual download required

727

loader (callable, optional): Function to load image from path

728

"""

729

def __init__(self, root: str, split: str = '10fold', image_set: str = 'funneled', transform=None, target_transform=None, download: bool = False, loader=None): ...

730

731

class LFWPairs(VisionDataset):

732

"""

733

LFW Pairs dataset for face verification.

734

735

Args:

736

root (str): Root directory for dataset files

737

split (str): Dataset split ('train', 'test', '10fold')

738

image_set (str): Image processing type ('original', 'funneled', 'deepfunneled')

739

transform (callable, optional): Transform to apply to PIL image

740

target_transform (callable, optional): Transform to apply to target

741

download (bool): NOT SUPPORTED - manual download required

742

loader (callable, optional): Function to load image from path

743

"""

744

def __init__(self, root: str, split: str = '10fold', image_set: str = 'funneled', transform=None, target_transform=None, download: bool = False, loader=None): ...

745

```

746

747

### Utility Datasets and Functions

748

749

Helper datasets and utilities for testing and dataset manipulation.

750

751

```python { .api }

752

class FakeData(VisionDataset):

753

"""

754

Generates fake data for testing purposes.

755

756

Args:

757

size (int): Dataset size

758

image_size (tuple): Image dimensions (channels, height, width)

759

num_classes (int): Number of classes

760

transform (callable, optional): Transform to apply to PIL image

761

target_transform (callable, optional): Transform to apply to target

762

random_offset (int): Random seed offset

763

"""

764

def __init__(self, size: int = 1000, image_size: tuple = (3, 224, 224), num_classes: int = 10, transform=None, target_transform=None, random_offset: int = 0): ...

765

766

def wrap_dataset_for_transforms_v2(dataset, target_keys=None):

767

"""

768

Wraps datasets for v2 transforms compatibility.

769

770

Args:

771

dataset: Dataset to wrap

772

target_keys (sequence, optional): Keys for target extraction

773

774

Returns:

775

Wrapped dataset compatible with v2 transforms

776

"""

777

```

778

779

### Samplers

780

781

Sampling strategies for dataset loading and batching.

782

783

```python { .api }

784

# Available in torchvision.datasets.samplers

785

# Provides various sampling strategies for efficient dataset loading

786

```

787

788

## Usage Examples

789

790

### Basic Image Classification Dataset

791

792

```python

793

from torchvision import datasets, transforms

794

from torch.utils.data import DataLoader

795

796

# Define transforms

797

transform = transforms.Compose([

798

transforms.Resize(256),

799

transforms.CenterCrop(224),

800

transforms.ToTensor(),

801

transforms.Normalize(mean=[0.485, 0.456, 0.406],

802

std=[0.229, 0.224, 0.225])

803

])

804

805

# Load CIFAR-10

806

train_dataset = datasets.CIFAR10(

807

root='./data',

808

train=True,

809

download=True,

810

transform=transform

811

)

812

813

test_dataset = datasets.CIFAR10(

814

root='./data',

815

train=False,

816

download=True,

817

transform=transform

818

)

819

820

# Create data loaders

821

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

822

test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

823

```

824

825

### Object Detection Dataset

826

827

```python

828

from torchvision import datasets, transforms as T

829

830

# Define transforms for detection

831

transform = T.Compose([

832

T.ToTensor(),

833

])

834

835

# Load COCO detection dataset

836

dataset = datasets.CocoDetection(

837

root='/path/to/coco/images/train2017',

838

annFile='/path/to/coco/annotations/instances_train2017.json',

839

transform=transform

840

)

841

842

# Each item returns (image, target) where target is list of annotations

843

image, target = dataset[0]

844

```

845

846

### Custom Dataset with ImageFolder

847

848

```python

849

from torchvision import datasets, transforms

850

851

# For datasets organized as: root/class_name/image_files

852

transform = transforms.Compose([

853

transforms.Resize((224, 224)),

854

transforms.ToTensor(),

855

])

856

857

dataset = datasets.ImageFolder(

858

root='/path/to/custom/dataset',

859

transform=transform

860

)

861

862

# Access class names

863

print(dataset.classes)

864

print(dataset.class_to_idx)

865

```