or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

datasets.mdindex.mdio.mdmodels.mdops.mdtransforms.mdtv_tensors.mdutils.md

transforms.mddocs/

0

# Transforms

1

2

TorchVision provides comprehensive image and video preprocessing and augmentation capabilities. The transforms module includes both v1 (traditional PIL/tensor) and v2 (multi-tensor) APIs, functional implementations, and preset transform pipelines for common use cases.

3

4

## Capabilities

5

6

### Core Transform Classes

7

8

#### Container Transforms

9

10

Transforms that compose and apply multiple transformations.

11

12

```python { .api }

13

class Compose:

14

"""

15

Composes several transforms together.

16

17

Args:

18

transforms (list): List of transforms to compose

19

"""

20

def __init__(self, transforms: list): ...

21

def __call__(self, img): ...

22

23

class RandomApply:

24

"""

25

Apply list of transforms randomly with probability p.

26

27

Args:

28

transforms (list): List of transforms to apply

29

p (float): Probability of applying transforms

30

"""

31

def __init__(self, transforms: list, p: float = 0.5): ...

32

33

class RandomChoice:

34

"""

35

Apply single random transform from list.

36

37

Args:

38

transforms (list): List of transforms to choose from

39

"""

40

def __init__(self, transforms: list): ...

41

42

class RandomOrder:

43

"""

44

Apply transforms in random order.

45

46

Args:

47

transforms (list): List of transforms to apply in random order

48

"""

49

def __init__(self, transforms: list): ...

50

```

51

52

#### Type Conversion Transforms

53

54

Transforms for converting between different data types and formats.

55

56

```python { .api }

57

class ToTensor:

58

"""

59

Convert PIL Image or numpy array to tensor.

60

Converts PIL Image or numpy.ndarray (H x W x C) in range [0, 255]

61

to torch.FloatTensor of shape (C x H x W) in range [0.0, 1.0].

62

"""

63

def __call__(self, pic): ...

64

65

class PILToTensor:

66

"""

67

Convert PIL Image to tensor without scaling.

68

Converts PIL Image to torch.Tensor without scaling values.

69

"""

70

def __call__(self, pic): ...

71

72

class ToPILImage:

73

"""

74

Convert tensor or ndarray to PIL Image.

75

76

Args:

77

mode (str, optional): Color mode for output image

78

"""

79

def __init__(self, mode=None): ...

80

81

class ConvertImageDtype:

82

"""

83

Convert tensor image to given dtype.

84

85

Args:

86

dtype (torch.dtype): Desired data type

87

"""

88

def __init__(self, dtype: torch.dtype): ...

89

```

90

91

#### Geometric Transforms

92

93

Spatial transformations for resizing, cropping, and geometric augmentation.

94

95

```python { .api }

96

class Resize:

97

"""

98

Resize input to given size.

99

100

Args:

101

size (int or tuple): Desired output size

102

interpolation (InterpolationMode): Interpolation method

103

max_size (int, optional): Maximum size for aspect ratio preservation

104

antialias (bool, optional): Apply antialiasing

105

"""

106

def __init__(self, size, interpolation=InterpolationMode.BILINEAR, max_size=None, antialias=None): ...

107

108

class CenterCrop:

109

"""

110

Crop image at center.

111

112

Args:

113

size (int or tuple): Desired output size

114

"""

115

def __init__(self, size): ...

116

117

class RandomCrop:

118

"""

119

Crop image at random location.

120

121

Args:

122

size (int or tuple): Desired output size

123

padding (int or tuple, optional): Padding on each border

124

pad_if_needed (bool): Pad if image smaller than crop size

125

fill (number or tuple): Fill value for padding

126

padding_mode (str): Padding mode ('constant', 'edge', 'reflect', 'symmetric')

127

"""

128

def __init__(self, size, padding=None, pad_if_needed: bool = False, fill: int = 0, padding_mode: str = 'constant'): ...

129

130

class RandomResizedCrop:

131

"""

132

Random crop with resize to target size.

133

134

Args:

135

size (int or tuple): Expected output size

136

scale (tuple): Range of size of the origin size cropped

137

ratio (tuple): Range of aspect ratio of the origin aspect ratio cropped

138

interpolation (InterpolationMode): Interpolation method

139

antialias (bool, optional): Apply antialiasing

140

"""

141

def __init__(self, size, scale: tuple = (0.08, 1.0), ratio: tuple = (3./4., 4./3.), interpolation=InterpolationMode.BILINEAR, antialias=None): ...

142

143

class FiveCrop:

144

"""

145

Crop image into four corners and center.

146

147

Args:

148

size (int or tuple): Desired output size

149

"""

150

def __init__(self, size): ...

151

152

class TenCrop:

153

"""

154

Create 10 crops: 5 crops + horizontally flipped versions.

155

156

Args:

157

size (int or tuple): Desired output size

158

vertical_flip (bool): Use vertical flip instead of horizontal

159

"""

160

def __init__(self, size, vertical_flip: bool = False): ...

161

162

class Pad:

163

"""

164

Pad image on all sides with given pad value.

165

166

Args:

167

padding (int or tuple): Padding on each border

168

fill (number or tuple): Fill value for constant fill

169

padding_mode (str): Padding mode

170

"""

171

def __init__(self, padding, fill: int = 0, padding_mode: str = 'constant'): ...

172

173

class RandomHorizontalFlip:

174

"""

175

Randomly flip image horizontally with probability p.

176

177

Args:

178

p (float): Probability of flip

179

"""

180

def __init__(self, p: float = 0.5): ...

181

182

class RandomVerticalFlip:

183

"""

184

Randomly flip image vertically with probability p.

185

186

Args:

187

p (float): Probability of flip

188

"""

189

def __init__(self, p: float = 0.5): ...

190

191

class RandomRotation:

192

"""

193

Rotate image by random angle.

194

195

Args:

196

degrees (number or tuple): Range of degrees to select from

197

interpolation (InterpolationMode): Interpolation method

198

expand (bool): Expand output to fit rotated image

199

center (tuple, optional): Center of rotation

200

fill (number or tuple): Fill value for area outside rotated image

201

"""

202

def __init__(self, degrees, interpolation=InterpolationMode.NEAREST, expand: bool = False, center=None, fill: int = 0): ...

203

204

class RandomAffine:

205

"""

206

Random affine transformation.

207

208

Args:

209

degrees (number or tuple): Range of degrees for rotation

210

translate (tuple, optional): Range of translations

211

scale (tuple, optional): Range of scale factors

212

shear (number or tuple, optional): Range of shear angles

213

interpolation (InterpolationMode): Interpolation method

214

fill (number or tuple): Fill value

215

center (tuple, optional): Center point for transformations

216

"""

217

def __init__(self, degrees, translate=None, scale=None, shear=None, interpolation=InterpolationMode.NEAREST, fill: int = 0, center=None): ...

218

219

class RandomPerspective:

220

"""

221

Random perspective transformation.

222

223

Args:

224

distortion_scale (float): Argument to control degree of distortion

225

p (float): Probability of applying transformation

226

interpolation (InterpolationMode): Interpolation method

227

fill (number or tuple): Fill value

228

"""

229

def __init__(self, distortion_scale: float = 0.5, p: float = 0.5, interpolation=InterpolationMode.BILINEAR, fill: int = 0): ...

230

231

class ElasticTransform:

232

"""

233

Random elastic transformation.

234

235

Args:

236

alpha (float or tuple): Magnitude of displacement

237

sigma (float or tuple): Standard deviation of Gaussian kernel

238

interpolation (InterpolationMode): Interpolation method

239

fill (number or tuple): Fill value

240

"""

241

def __init__(self, alpha: float = 50.0, sigma: float = 5.0, interpolation=InterpolationMode.BILINEAR, fill: int = 0): ...

242

```

243

244

#### Color Transforms

245

246

Photometric transformations for color manipulation and augmentation.

247

248

```python { .api }

249

class ColorJitter:

250

"""

251

Randomly change brightness, contrast, saturation, and hue.

252

253

Args:

254

brightness (float or tuple): How much to jitter brightness

255

contrast (float or tuple): How much to jitter contrast

256

saturation (float or tuple): How much to jitter saturation

257

hue (float or tuple): How much to jitter hue

258

"""

259

def __init__(self, brightness: float = 0, contrast: float = 0, saturation: float = 0, hue: float = 0): ...

260

261

class Grayscale:

262

"""

263

Convert image to grayscale.

264

265

Args:

266

num_output_channels (int): Number of channels for output (1 or 3)

267

"""

268

def __init__(self, num_output_channels: int = 1): ...

269

270

class RandomGrayscale:

271

"""

272

Randomly convert image to grayscale with probability p.

273

274

Args:

275

p (float): Probability of conversion to grayscale

276

"""

277

def __init__(self, p: float = 0.1): ...

278

279

class GaussianBlur:

280

"""

281

Apply Gaussian blur to image.

282

283

Args:

284

kernel_size (int or tuple): Size of Gaussian kernel

285

sigma (float or tuple): Standard deviation for Gaussian kernel

286

"""

287

def __init__(self, kernel_size, sigma: tuple = (0.1, 2.0)): ...

288

289

class RandomInvert:

290

"""

291

Randomly invert colors of image with probability p.

292

293

Args:

294

p (float): Probability of inversion

295

"""

296

def __init__(self, p: float = 0.5): ...

297

298

class RandomPosterize:

299

"""

300

Randomly posterize image with probability p.

301

302

Args:

303

bits (int): Number of bits to keep for each channel

304

p (float): Probability of posterization

305

"""

306

def __init__(self, bits: int, p: float = 0.5): ...

307

308

class RandomSolarize:

309

"""

310

Randomly solarize image with probability p.

311

312

Args:

313

threshold (float): Threshold above which pixels are inverted

314

p (float): Probability of solarization

315

"""

316

def __init__(self, threshold: float, p: float = 0.5): ...

317

318

class RandomAdjustSharpness:

319

"""

320

Randomly adjust sharpness with probability p.

321

322

Args:

323

sharpness_factor (float): Sharpness adjustment factor

324

p (float): Probability of adjustment

325

"""

326

def __init__(self, sharpness_factor: float, p: float = 0.5): ...

327

328

class RandomAutocontrast:

329

"""

330

Randomly apply autocontrast with probability p.

331

332

Args:

333

p (float): Probability of applying autocontrast

334

"""

335

def __init__(self, p: float = 0.5): ...

336

337

class RandomEqualize:

338

"""

339

Randomly equalize histogram with probability p.

340

341

Args:

342

p (float): Probability of equalization

343

"""

344

def __init__(self, p: float = 0.5): ...

345

```

346

347

#### Normalization and Utility Transforms

348

349

Statistical normalization and utility transformations.

350

351

```python { .api }

352

class Normalize:

353

"""

354

Normalize tensor with mean and standard deviation.

355

356

Args:

357

mean (sequence): Sequence of means for each channel

358

std (sequence): Sequence of standard deviations for each channel

359

inplace (bool): Make operation in-place

360

"""

361

def __init__(self, mean: list, std: list, inplace: bool = False): ...

362

363

class Lambda:

364

"""

365

Apply user-defined lambda function.

366

367

Args:

368

lambd (function): Lambda/function to be used for transform

369

"""

370

def __init__(self, lambd): ...

371

372

class LinearTransformation:

373

"""

374

Apply linear transformation using transformation matrix and mean vector.

375

376

Args:

377

transformation_matrix (Tensor): Transformation matrix

378

mean_vector (Tensor): Mean vector

379

"""

380

def __init__(self, transformation_matrix: torch.Tensor, mean_vector: torch.Tensor): ...

381

```

382

383

#### Auto-Augmentation Transforms

384

385

Automated augmentation policies for improved model robustness.

386

387

```python { .api }

388

class AutoAugment:

389

"""

390

AutoAugment data augmentation policy.

391

392

Args:

393

policy (AutoAugmentPolicy): AutoAugment policy to use

394

interpolation (InterpolationMode): Interpolation method

395

fill (sequence or number): Pixel fill value

396

"""

397

def __init__(self, policy=AutoAugmentPolicy.IMAGENET, interpolation=InterpolationMode.NEAREST, fill=None): ...

398

399

class RandAugment:

400

"""

401

RandAugment data augmentation.

402

403

Args:

404

num_ops (int): Number of augmentation transformations to apply

405

magnitude (int): Magnitude for all transformations

406

num_magnitude_bins (int): Number of magnitude bins

407

interpolation (InterpolationMode): Interpolation method

408

fill (sequence or number): Pixel fill value

409

"""

410

def __init__(self, num_ops: int = 2, magnitude: int = 9, num_magnitude_bins: int = 31, interpolation=InterpolationMode.NEAREST, fill=None): ...

411

412

class TrivialAugmentWide:

413

"""

414

TrivialAugment Wide augmentation policy.

415

416

Args:

417

num_magnitude_bins (int): Number of magnitude bins

418

interpolation (InterpolationMode): Interpolation method

419

fill (sequence or number): Pixel fill value

420

"""

421

def __init__(self, num_magnitude_bins: int = 31, interpolation=InterpolationMode.NEAREST, fill=None): ...

422

423

class AugMix:

424

"""

425

AugMix data augmentation.

426

427

Args:

428

severity (int): Severity level for base augmentations

429

mixture_width (int): Number of augmentation chains

430

chain_depth (int): Depth of augmentation chains

431

alpha (float): Parameter for Beta distribution

432

all_ops (bool): Use all available operations

433

interpolation (InterpolationMode): Interpolation method

434

fill (sequence or number): Pixel fill value

435

"""

436

def __init__(self, severity: int = 3, mixture_width: int = 3, chain_depth: int = -1, alpha: float = 1.0, all_ops: bool = True, interpolation=InterpolationMode.BILINEAR, fill=None): ...

437

438

class AutoAugmentPolicy:

439

"""AutoAugment policy constants."""

440

IMAGENET: str = "imagenet"

441

CIFAR10: str = "cifar10"

442

SVHN: str = "svhn"

443

```

444

445

#### Preset Transform Pipelines

446

447

Pre-configured transform pipelines for common tasks.

448

449

```python { .api }

450

class ImageClassification:

451

"""

452

Standard preprocessing for image classification.

453

454

Args:

455

crop_size (int): Size for center crop

456

resize_size (int): Size for resize operation

457

mean (tuple): Normalization mean

458

std (tuple): Normalization standard deviation

459

interpolation (InterpolationMode): Interpolation method

460

"""

461

def __init__(self, crop_size: int, resize_size: int = 256, mean: tuple = (0.485, 0.456, 0.406), std: tuple = (0.229, 0.224, 0.225), interpolation=InterpolationMode.BILINEAR): ...

462

463

class ObjectDetection:

464

"""Standard preprocessing for object detection."""

465

def __init__(self): ...

466

467

class SemanticSegmentation:

468

"""Standard preprocessing for semantic segmentation."""

469

def __init__(self): ...

470

471

class VideoClassification:

472

"""

473

Standard preprocessing for video classification.

474

475

Args:

476

crop_size (tuple): Size for crop

477

resize_size (tuple): Size for resize

478

mean (tuple): Normalization mean

479

std (tuple): Normalization standard deviation

480

"""

481

def __init__(self, crop_size: tuple = (224, 224), resize_size: tuple = (256, 256), mean: tuple = (0.43216, 0.394666, 0.37645), std: tuple = (0.22803, 0.22145, 0.216989)): ...

482

483

class OpticalFlow:

484

"""Standard preprocessing for optical flow."""

485

def __init__(self): ...

486

```

487

488

### Functional API

489

490

Low-level functional implementations of transforms.

491

492

```python { .api }

493

# Interpolation modes for transforms

494

class InterpolationMode:

495

NEAREST = "nearest"

496

NEAREST_EXACT = "nearest-exact"

497

BILINEAR = "bilinear"

498

BICUBIC = "bicubic"

499

BOX = "box"

500

HAMMING = "hamming"

501

LANCZOS = "lanczos"

502

503

# Geometric functions

504

def resize(img, size: list, interpolation=InterpolationMode.BILINEAR, max_size=None, antialias=None):

505

"""Resize image to given size."""

506

507

def center_crop(img, output_size: list):

508

"""Center crop image to output size."""

509

510

def crop(img, top: int, left: int, height: int, width: int):

511

"""Crop image at specified location."""

512

513

def pad(img, padding, fill: int = 0, padding_mode: str = 'constant'):

514

"""Pad image on all sides."""

515

516

def hflip(img):

517

"""Horizontally flip image."""

518

519

def vflip(img):

520

"""Vertically flip image."""

521

522

def rotate(img, angle: float, interpolation=InterpolationMode.NEAREST, expand: bool = False, center=None, fill: int = 0):

523

"""Rotate image by angle."""

524

525

def affine(img, angle: float, translate: list, scale: float, shear: list, interpolation=InterpolationMode.NEAREST, fill: int = 0, center=None):

526

"""Apply affine transformation."""

527

528

def perspective(img, startpoints: list, endpoints: list, interpolation=InterpolationMode.BILINEAR, fill: int = 0):

529

"""Apply perspective transformation."""

530

531

def five_crop(img, size: list):

532

"""Create five crops of image."""

533

534

def ten_crop(img, size: list, vertical_flip: bool = False):

535

"""Create ten crops of image."""

536

537

# Color functions

538

def adjust_brightness(img, brightness_factor: float):

539

"""Adjust brightness of image."""

540

541

def adjust_contrast(img, contrast_factor: float):

542

"""Adjust contrast of image."""

543

544

def adjust_saturation(img, saturation_factor: float):

545

"""Adjust saturation of image."""

546

547

def adjust_hue(img, hue_factor: float):

548

"""Adjust hue of image."""

549

550

def adjust_gamma(img, gamma: float, gain: float = 1):

551

"""Adjust gamma of image."""

552

553

def adjust_sharpness(img, sharpness_factor: float):

554

"""Adjust sharpness of image."""

555

556

def rgb_to_grayscale(img, num_output_channels: int = 1):

557

"""Convert RGB image to grayscale."""

558

559

def to_grayscale(img, num_output_channels: int = 1):

560

"""Convert image to grayscale."""

561

562

def gaussian_blur(img, kernel_size: list, sigma=None):

563

"""Apply Gaussian blur to image."""

564

565

def invert(img):

566

"""Invert colors of image."""

567

568

def posterize(img, bits: int):

569

"""Posterize image."""

570

571

def solarize(img, threshold: float):

572

"""Solarize image."""

573

574

def autocontrast(img):

575

"""Apply autocontrast to image."""

576

577

def equalize(img):

578

"""Equalize histogram of image."""

579

580

# Conversion functions

581

def to_tensor(pic):

582

"""Convert PIL Image or numpy array to tensor."""

583

584

def to_pil_image(pic, mode=None):

585

"""Convert tensor to PIL Image."""

586

587

def pil_to_tensor(pic):

588

"""Convert PIL Image to tensor without scaling."""

589

590

def convert_image_dtype(image, dtype: torch.dtype):

591

"""Convert image tensor dtype."""

592

593

def normalize(tensor, mean: list, std: list, inplace: bool = False):

594

"""Normalize tensor with mean and std."""

595

596

# Utility functions

597

def get_image_size(img):

598

"""Get image size as (height, width)."""

599

600

def get_image_num_channels(img):

601

"""Get number of channels in image."""

602

```

603

604

### v2 Transforms API

605

606

Enhanced transforms API with multi-tensor support for images, videos, bounding boxes, and masks.

607

608

```python { .api }

609

class Transform:

610

"""Base class for all v2 transforms."""

611

612

# Type conversion v2

613

class ToImage:

614

"""Convert to image tensor."""

615

616

class ToPILImage:

617

"""Convert to PIL Image with v2 support."""

618

619

class PILToTensor:

620

"""Convert PIL to tensor with v2 support."""

621

622

class ToPureTensor:

623

"""Convert to pure tensor."""

624

625

class ToDtype:

626

"""

627

Convert to specified dtype.

628

629

Args:

630

dtype (torch.dtype): Target dtype

631

scale (bool): Scale values when converting

632

"""

633

def __init__(self, dtype: torch.dtype, scale: bool = False): ...

634

635

# Container transforms v2

636

class Compose:

637

"""Compose transforms with multi-tensor support."""

638

639

class RandomApply:

640

"""Apply transforms randomly with multi-tensor support."""

641

642

class RandomChoice:

643

"""Choose random transform with multi-tensor support."""

644

645

class RandomOrder:

646

"""Apply in random order with multi-tensor support."""

647

648

# Enhanced geometric transforms

649

class Resize:

650

"""Resize with multi-tensor support including bounding boxes."""

651

652

class CenterCrop:

653

"""Center crop with bounding box support."""

654

655

class RandomCrop:

656

"""Random crop with mask and bounding box support."""

657

658

class RandomResizedCrop:

659

"""Random resized crop with multi-tensor support."""

660

661

class RandomHorizontalFlip:

662

"""Horizontal flip with bounding box support."""

663

664

class RandomVerticalFlip:

665

"""Vertical flip with bounding box support."""

666

667

class RandomRotation:

668

"""Rotation with bounding box support."""

669

670

class RandomAffine:

671

"""Affine transformation with bounding box support."""

672

673

class RandomPerspective:

674

"""Perspective transformation with v2 support."""

675

676

class ElasticTransform:

677

"""Elastic transformation with v2 support."""

678

679

class RandomIoUCrop:

680

"""

681

IoU-aware random crop for object detection.

682

683

Args:

684

min_scale (float): Minimum scale for cropping

685

max_scale (float): Maximum scale for cropping

686

min_aspect_ratio (float): Minimum aspect ratio

687

max_aspect_ratio (float): Maximum aspect ratio

688

sampler_options (list): List of sampling options

689

trials (int): Number of trials for finding valid crop

690

"""

691

def __init__(self, min_scale: float = 0.3, max_scale: float = 1.0, min_aspect_ratio: float = 0.5, max_aspect_ratio: float = 2.0, sampler_options=None, trials: int = 40): ...

692

693

class RandomZoomOut:

694

"""

695

Random zoom out transformation.

696

697

Args:

698

fill (number or tuple): Fill value for expanded area

699

side_range (tuple): Range for zoom out factor

700

p (float): Probability of applying zoom out

701

"""

702

def __init__(self, fill: int = 0, side_range: tuple = (1.0, 4.0), p: float = 0.5): ...

703

704

class RandomShortestSize:

705

"""

706

Random shortest size resize.

707

708

Args:

709

min_size (int or list): Minimum size for shortest edge

710

max_size (int, optional): Maximum size for longest edge

711

interpolation (InterpolationMode): Interpolation method

712

"""

713

def __init__(self, min_size, max_size=None, interpolation=InterpolationMode.BILINEAR): ...

714

715

class RandomResize:

716

"""

717

Random resize within range.

718

719

Args:

720

min_size (int): Minimum size

721

max_size (int): Maximum size

722

interpolation (InterpolationMode): Interpolation method

723

"""

724

def __init__(self, min_size: int, max_size: int, interpolation=InterpolationMode.BILINEAR): ...

725

726

class ScaleJitter:

727

"""

728

Scale jittering transform.

729

730

Args:

731

target_size (tuple): Target size

732

scale_range (tuple): Range for scale jittering

733

interpolation (InterpolationMode): Interpolation method

734

"""

735

def __init__(self, target_size: tuple, scale_range: tuple = (0.1, 2.0), interpolation=InterpolationMode.BILINEAR): ...

736

737

# Enhanced color transforms v2

738

class ColorJitter:

739

"""Color jittering with v2 support."""

740

741

class RandomChannelPermutation:

742

"""Randomly permute image channels."""

743

744

class RandomPhotometricDistort:

745

"""

746

Photometric distortion for data augmentation.

747

748

Args:

749

brightness (tuple): Range for brightness adjustment

750

contrast (tuple): Range for contrast adjustment

751

saturation (tuple): Range for saturation adjustment

752

hue (tuple): Range for hue adjustment

753

p (float): Probability of applying distortion

754

"""

755

def __init__(self, brightness: tuple = (0.875, 1.125), contrast: tuple = (0.5, 1.5), saturation: tuple = (0.5, 1.5), hue: tuple = (-0.05, 0.05), p: float = 0.5): ...

756

757

class RGB:

758

"""Ensure RGB format."""

759

760

class GaussianNoise:

761

"""

762

Add Gaussian noise to image.

763

764

Args:

765

mean (float): Mean of Gaussian noise

766

sigma (float or tuple): Standard deviation of noise

767

"""

768

def __init__(self, mean: float = 0.0, sigma: tuple = (0.1, 2.0)): ...

769

770

# Augmentation transforms v2

771

class MixUp:

772

"""

773

MixUp data augmentation.

774

775

Args:

776

alpha (float): Parameter for Beta distribution

777

num_classes (int): Number of classes

778

labels_getter (callable): Function to get labels

779

"""

780

def __init__(self, alpha: float = 1.0, num_classes: int = None, labels_getter=None): ...

781

782

class CutMix:

783

"""

784

CutMix data augmentation.

785

786

Args:

787

alpha (float): Parameter for Beta distribution

788

num_classes (int): Number of classes

789

labels_getter (callable): Function to get labels

790

"""

791

def __init__(self, alpha: float = 1.0, num_classes: int = None, labels_getter=None): ...

792

793

class RandomErasing:

794

"""

795

Random erasing data augmentation.

796

797

Args:

798

p (float): Probability of applying random erasing

799

scale (tuple): Range of proportion of erased area

800

ratio (tuple): Range of aspect ratio of erased area

801

value (number or str): Erasing value

802

inplace (bool): Make operation in-place

803

"""

804

def __init__(self, p: float = 0.5, scale: tuple = (0.02, 0.33), ratio: tuple = (0.3, 3.3), value: int = 0, inplace: bool = False): ...

805

806

class JPEG:

807

"""

808

JPEG compression simulation.

809

810

Args:

811

quality (tuple or int): JPEG quality range

812

"""

813

def __init__(self, quality: tuple = (25, 100)): ...

814

815

# Metadata transforms v2

816

class ClampBoundingBoxes:

817

"""Clamp bounding boxes to image bounds."""

818

819

class ClampKeyPoints:

820

"""Clamp keypoints to image bounds."""

821

822

class ConvertBoundingBoxFormat:

823

"""

824

Convert bounding box format.

825

826

Args:

827

format (BoundingBoxFormat): Target format

828

"""

829

def __init__(self, format): ...

830

831

class SanitizeBoundingBoxes:

832

"""

833

Remove invalid bounding boxes.

834

835

Args:

836

min_size (float): Minimum box size

837

labels_getter (callable): Function to get labels

838

"""

839

def __init__(self, min_size: float = 1.0, labels_getter=None): ...

840

841

# Temporal transforms v2

842

class UniformTemporalSubsample:

843

"""

844

Uniform temporal subsampling for video.

845

846

Args:

847

num_samples (int): Number of samples to extract

848

"""

849

def __init__(self, num_samples: int): ...

850

851

# Utility functions v2

852

def check_type(inpt, type_sequence):

853

"""Check input types."""

854

855

def get_bounding_boxes(inpt):

856

"""Extract bounding boxes from input."""

857

858

def has_all(*types):

859

"""Check if input has all specified types."""

860

861

def has_any(*types):

862

"""Check if input has any specified type."""

863

864

def query_chw(flat_inputs):

865

"""Query CHW dimensions from inputs."""

866

867

def query_size(flat_inputs):

868

"""Query spatial size from inputs."""

869

```

870

871

## Usage Examples

872

873

### Basic Image Preprocessing

874

875

```python

876

from torchvision import transforms

877

import torch

878

879

# Standard ImageNet preprocessing

880

transform = transforms.Compose([

881

transforms.Resize(256),

882

transforms.CenterCrop(224),

883

transforms.ToTensor(),

884

transforms.Normalize(

885

mean=[0.485, 0.456, 0.406],

886

std=[0.229, 0.224, 0.225]

887

)

888

])

889

890

# Apply to PIL image

891

from PIL import Image

892

image = Image.open('image.jpg')

893

tensor = transform(image)

894

```

895

896

### Data Augmentation Pipeline

897

898

```python

899

from torchvision import transforms

900

901

# Training augmentations

902

train_transform = transforms.Compose([

903

transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),

904

transforms.RandomHorizontalFlip(p=0.5),

905

transforms.ColorJitter(

906

brightness=0.2,

907

contrast=0.2,

908

saturation=0.2,

909

hue=0.1

910

),

911

transforms.RandomRotation(degrees=10),

912

transforms.ToTensor(),

913

transforms.Normalize(

914

mean=[0.485, 0.456, 0.406],

915

std=[0.229, 0.224, 0.225]

916

),

917

transforms.RandomErasing(p=0.1)

918

])

919

```

920

921

### v2 Transforms for Object Detection

922

923

```python

924

from torchvision.transforms import v2

925

from torchvision.tv_tensors import BoundingBoxes, Image

926

927

# Object detection preprocessing

928

transform = v2.Compose([

929

v2.ToImage(),

930

v2.RandomHorizontalFlip(p=0.5),

931

v2.RandomIoUCrop(),

932

v2.Resize(size=(640, 640)),

933

v2.ToDtype(torch.float32, scale=True),

934

v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

935

])

936

937

# Apply to image and bounding boxes

938

image = Image(torch.randint(0, 256, (3, 480, 640), dtype=torch.uint8))

939

boxes = BoundingBoxes(

940

torch.tensor([[10, 10, 100, 100], [200, 200, 300, 300]]),

941

format='XYXY',

942

canvas_size=(480, 640)

943

)

944

945

transformed_image, transformed_boxes = transform(image, boxes)

946

```

947

948

### Functional API Usage

949

950

```python

951

from torchvision.transforms import functional as F

952

import torch

953

954

# Using functional API for custom transforms

955

def custom_transform(image):

956

# Apply specific sequence of transforms

957

image = F.resize(image, [256, 256])

958

image = F.center_crop(image, [224, 224])

959

image = F.to_tensor(image)

960

961

# Conditional augmentation

962

if torch.rand(1) > 0.5:

963

image = F.hflip(image)

964

965

image = F.normalize(image, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

966

return image

967

```

968

969

### Video Transforms

970

971

```python

972

from torchvision.transforms import v2

973

974

# Video preprocessing pipeline

975

video_transform = v2.Compose([

976

v2.UniformTemporalSubsample(16), # Sample 16 frames

977

v2.Resize((224, 224)),

978

v2.RandomHorizontalFlip(p=0.5),

979

v2.ToDtype(torch.float32, scale=True),

980

v2.Normalize(mean=[0.43216, 0.394666, 0.37645],

981

std=[0.22803, 0.22145, 0.216989])

982

])

983

984

# Apply to video tensor (T, C, H, W)

985

video_tensor = torch.randint(0, 256, (32, 3, 256, 256), dtype=torch.uint8)

986

transformed_video = video_transform(video_tensor)

987

```

988

989

### AutoAugment Policies

990

991

```python

992

from torchvision import transforms

993

994

# Using AutoAugment

995

transform = transforms.Compose([

996

transforms.Resize(256),

997

transforms.AutoAugment(policy=transforms.AutoAugmentPolicy.IMAGENET),

998

transforms.CenterCrop(224),

999

transforms.ToTensor(),

1000

transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

1001

])

1002

1003

# Using RandAugment

1004

transform_rand = transforms.Compose([

1005

transforms.Resize(256),

1006

transforms.RandAugment(num_ops=2, magnitude=15),

1007

transforms.CenterCrop(224),

1008

transforms.ToTensor(),

1009

transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

1010

])

1011

```