0
# Transforms
1
2
TorchVision provides comprehensive image and video preprocessing and augmentation capabilities. The transforms module includes both v1 (traditional PIL/tensor) and v2 (multi-tensor) APIs, functional implementations, and preset transform pipelines for common use cases.
3
4
## Capabilities
5
6
### Core Transform Classes
7
8
#### Container Transforms
9
10
Transforms that compose and apply multiple transformations.
11
12
```python { .api }
13
class Compose:
14
"""
15
Composes several transforms together.
16
17
Args:
18
transforms (list): List of transforms to compose
19
"""
20
def __init__(self, transforms: list): ...
21
def __call__(self, img): ...
22
23
class RandomApply:
24
"""
25
Apply list of transforms randomly with probability p.
26
27
Args:
28
transforms (list): List of transforms to apply
29
p (float): Probability of applying transforms
30
"""
31
def __init__(self, transforms: list, p: float = 0.5): ...
32
33
class RandomChoice:
34
"""
35
Apply single random transform from list.
36
37
Args:
38
transforms (list): List of transforms to choose from
39
"""
40
def __init__(self, transforms: list): ...
41
42
class RandomOrder:
43
"""
44
Apply transforms in random order.
45
46
Args:
47
transforms (list): List of transforms to apply in random order
48
"""
49
def __init__(self, transforms: list): ...
50
```
51
52
#### Type Conversion Transforms
53
54
Transforms for converting between different data types and formats.
55
56
```python { .api }
57
class ToTensor:
58
"""
59
Convert PIL Image or numpy array to tensor.
60
Converts PIL Image or numpy.ndarray (H x W x C) in range [0, 255]
61
to torch.FloatTensor of shape (C x H x W) in range [0.0, 1.0].
62
"""
63
def __call__(self, pic): ...
64
65
class PILToTensor:
66
"""
67
Convert PIL Image to tensor without scaling.
68
Converts PIL Image to torch.Tensor without scaling values.
69
"""
70
def __call__(self, pic): ...
71
72
class ToPILImage:
73
"""
74
Convert tensor or ndarray to PIL Image.
75
76
Args:
77
mode (str, optional): Color mode for output image
78
"""
79
def __init__(self, mode=None): ...
80
81
class ConvertImageDtype:
82
"""
83
Convert tensor image to given dtype.
84
85
Args:
86
dtype (torch.dtype): Desired data type
87
"""
88
def __init__(self, dtype: torch.dtype): ...
89
```
90
91
#### Geometric Transforms
92
93
Spatial transformations for resizing, cropping, and geometric augmentation.
94
95
```python { .api }
96
class Resize:
97
"""
98
Resize input to given size.
99
100
Args:
101
size (int or tuple): Desired output size
102
interpolation (InterpolationMode): Interpolation method
103
max_size (int, optional): Maximum size for aspect ratio preservation
104
antialias (bool, optional): Apply antialiasing
105
"""
106
def __init__(self, size, interpolation=InterpolationMode.BILINEAR, max_size=None, antialias=None): ...
107
108
class CenterCrop:
109
"""
110
Crop image at center.
111
112
Args:
113
size (int or tuple): Desired output size
114
"""
115
def __init__(self, size): ...
116
117
class RandomCrop:
118
"""
119
Crop image at random location.
120
121
Args:
122
size (int or tuple): Desired output size
123
padding (int or tuple, optional): Padding on each border
124
pad_if_needed (bool): Pad if image smaller than crop size
125
fill (number or tuple): Fill value for padding
126
padding_mode (str): Padding mode ('constant', 'edge', 'reflect', 'symmetric')
127
"""
128
def __init__(self, size, padding=None, pad_if_needed: bool = False, fill: int = 0, padding_mode: str = 'constant'): ...
129
130
class RandomResizedCrop:
131
"""
132
Random crop with resize to target size.
133
134
Args:
135
size (int or tuple): Expected output size
136
scale (tuple): Range of size of the origin size cropped
137
ratio (tuple): Range of aspect ratio of the origin aspect ratio cropped
138
interpolation (InterpolationMode): Interpolation method
139
antialias (bool, optional): Apply antialiasing
140
"""
141
def __init__(self, size, scale: tuple = (0.08, 1.0), ratio: tuple = (3./4., 4./3.), interpolation=InterpolationMode.BILINEAR, antialias=None): ...
142
143
class FiveCrop:
144
"""
145
Crop image into four corners and center.
146
147
Args:
148
size (int or tuple): Desired output size
149
"""
150
def __init__(self, size): ...
151
152
class TenCrop:
153
"""
154
Create 10 crops: 5 crops + horizontally flipped versions.
155
156
Args:
157
size (int or tuple): Desired output size
158
vertical_flip (bool): Use vertical flip instead of horizontal
159
"""
160
def __init__(self, size, vertical_flip: bool = False): ...
161
162
class Pad:
163
"""
164
Pad image on all sides with given pad value.
165
166
Args:
167
padding (int or tuple): Padding on each border
168
fill (number or tuple): Fill value for constant fill
169
padding_mode (str): Padding mode
170
"""
171
def __init__(self, padding, fill: int = 0, padding_mode: str = 'constant'): ...
172
173
class RandomHorizontalFlip:
174
"""
175
Randomly flip image horizontally with probability p.
176
177
Args:
178
p (float): Probability of flip
179
"""
180
def __init__(self, p: float = 0.5): ...
181
182
class RandomVerticalFlip:
183
"""
184
Randomly flip image vertically with probability p.
185
186
Args:
187
p (float): Probability of flip
188
"""
189
def __init__(self, p: float = 0.5): ...
190
191
class RandomRotation:
192
"""
193
Rotate image by random angle.
194
195
Args:
196
degrees (number or tuple): Range of degrees to select from
197
interpolation (InterpolationMode): Interpolation method
198
expand (bool): Expand output to fit rotated image
199
center (tuple, optional): Center of rotation
200
fill (number or tuple): Fill value for area outside rotated image
201
"""
202
def __init__(self, degrees, interpolation=InterpolationMode.NEAREST, expand: bool = False, center=None, fill: int = 0): ...
203
204
class RandomAffine:
205
"""
206
Random affine transformation.
207
208
Args:
209
degrees (number or tuple): Range of degrees for rotation
210
translate (tuple, optional): Range of translations
211
scale (tuple, optional): Range of scale factors
212
shear (number or tuple, optional): Range of shear angles
213
interpolation (InterpolationMode): Interpolation method
214
fill (number or tuple): Fill value
215
center (tuple, optional): Center point for transformations
216
"""
217
def __init__(self, degrees, translate=None, scale=None, shear=None, interpolation=InterpolationMode.NEAREST, fill: int = 0, center=None): ...
218
219
class RandomPerspective:
220
"""
221
Random perspective transformation.
222
223
Args:
224
distortion_scale (float): Argument to control degree of distortion
225
p (float): Probability of applying transformation
226
interpolation (InterpolationMode): Interpolation method
227
fill (number or tuple): Fill value
228
"""
229
def __init__(self, distortion_scale: float = 0.5, p: float = 0.5, interpolation=InterpolationMode.BILINEAR, fill: int = 0): ...
230
231
class ElasticTransform:
232
"""
233
Random elastic transformation.
234
235
Args:
236
alpha (float or tuple): Magnitude of displacement
237
sigma (float or tuple): Standard deviation of Gaussian kernel
238
interpolation (InterpolationMode): Interpolation method
239
fill (number or tuple): Fill value
240
"""
241
def __init__(self, alpha: float = 50.0, sigma: float = 5.0, interpolation=InterpolationMode.BILINEAR, fill: int = 0): ...
242
```
243
244
#### Color Transforms
245
246
Photometric transformations for color manipulation and augmentation.
247
248
```python { .api }
249
class ColorJitter:
250
"""
251
Randomly change brightness, contrast, saturation, and hue.
252
253
Args:
254
brightness (float or tuple): How much to jitter brightness
255
contrast (float or tuple): How much to jitter contrast
256
saturation (float or tuple): How much to jitter saturation
257
hue (float or tuple): How much to jitter hue
258
"""
259
def __init__(self, brightness: float = 0, contrast: float = 0, saturation: float = 0, hue: float = 0): ...
260
261
class Grayscale:
262
"""
263
Convert image to grayscale.
264
265
Args:
266
num_output_channels (int): Number of channels for output (1 or 3)
267
"""
268
def __init__(self, num_output_channels: int = 1): ...
269
270
class RandomGrayscale:
271
"""
272
Randomly convert image to grayscale with probability p.
273
274
Args:
275
p (float): Probability of conversion to grayscale
276
"""
277
def __init__(self, p: float = 0.1): ...
278
279
class GaussianBlur:
280
"""
281
Apply Gaussian blur to image.
282
283
Args:
284
kernel_size (int or tuple): Size of Gaussian kernel
285
sigma (float or tuple): Standard deviation for Gaussian kernel
286
"""
287
def __init__(self, kernel_size, sigma: tuple = (0.1, 2.0)): ...
288
289
class RandomInvert:
290
"""
291
Randomly invert colors of image with probability p.
292
293
Args:
294
p (float): Probability of inversion
295
"""
296
def __init__(self, p: float = 0.5): ...
297
298
class RandomPosterize:
299
"""
300
Randomly posterize image with probability p.
301
302
Args:
303
bits (int): Number of bits to keep for each channel
304
p (float): Probability of posterization
305
"""
306
def __init__(self, bits: int, p: float = 0.5): ...
307
308
class RandomSolarize:
309
"""
310
Randomly solarize image with probability p.
311
312
Args:
313
threshold (float): Threshold above which pixels are inverted
314
p (float): Probability of solarization
315
"""
316
def __init__(self, threshold: float, p: float = 0.5): ...
317
318
class RandomAdjustSharpness:
319
"""
320
Randomly adjust sharpness with probability p.
321
322
Args:
323
sharpness_factor (float): Sharpness adjustment factor
324
p (float): Probability of adjustment
325
"""
326
def __init__(self, sharpness_factor: float, p: float = 0.5): ...
327
328
class RandomAutocontrast:
329
"""
330
Randomly apply autocontrast with probability p.
331
332
Args:
333
p (float): Probability of applying autocontrast
334
"""
335
def __init__(self, p: float = 0.5): ...
336
337
class RandomEqualize:
338
"""
339
Randomly equalize histogram with probability p.
340
341
Args:
342
p (float): Probability of equalization
343
"""
344
def __init__(self, p: float = 0.5): ...
345
```
346
347
#### Normalization and Utility Transforms
348
349
Statistical normalization and utility transformations.
350
351
```python { .api }
352
class Normalize:
353
"""
354
Normalize tensor with mean and standard deviation.
355
356
Args:
357
mean (sequence): Sequence of means for each channel
358
std (sequence): Sequence of standard deviations for each channel
359
inplace (bool): Make operation in-place
360
"""
361
def __init__(self, mean: list, std: list, inplace: bool = False): ...
362
363
class Lambda:
364
"""
365
Apply user-defined lambda function.
366
367
Args:
368
lambd (function): Lambda/function to be used for transform
369
"""
370
def __init__(self, lambd): ...
371
372
class LinearTransformation:
373
"""
374
Apply linear transformation using transformation matrix and mean vector.
375
376
Args:
377
transformation_matrix (Tensor): Transformation matrix
378
mean_vector (Tensor): Mean vector
379
"""
380
def __init__(self, transformation_matrix: torch.Tensor, mean_vector: torch.Tensor): ...
381
```
382
383
#### Auto-Augmentation Transforms
384
385
Automated augmentation policies for improved model robustness.
386
387
```python { .api }
388
class AutoAugment:
389
"""
390
AutoAugment data augmentation policy.
391
392
Args:
393
policy (AutoAugmentPolicy): AutoAugment policy to use
394
interpolation (InterpolationMode): Interpolation method
395
fill (sequence or number): Pixel fill value
396
"""
397
def __init__(self, policy=AutoAugmentPolicy.IMAGENET, interpolation=InterpolationMode.NEAREST, fill=None): ...
398
399
class RandAugment:
400
"""
401
RandAugment data augmentation.
402
403
Args:
404
num_ops (int): Number of augmentation transformations to apply
405
magnitude (int): Magnitude for all transformations
406
num_magnitude_bins (int): Number of magnitude bins
407
interpolation (InterpolationMode): Interpolation method
408
fill (sequence or number): Pixel fill value
409
"""
410
def __init__(self, num_ops: int = 2, magnitude: int = 9, num_magnitude_bins: int = 31, interpolation=InterpolationMode.NEAREST, fill=None): ...
411
412
class TrivialAugmentWide:
413
"""
414
TrivialAugment Wide augmentation policy.
415
416
Args:
417
num_magnitude_bins (int): Number of magnitude bins
418
interpolation (InterpolationMode): Interpolation method
419
fill (sequence or number): Pixel fill value
420
"""
421
def __init__(self, num_magnitude_bins: int = 31, interpolation=InterpolationMode.NEAREST, fill=None): ...
422
423
class AugMix:
424
"""
425
AugMix data augmentation.
426
427
Args:
428
severity (int): Severity level for base augmentations
429
mixture_width (int): Number of augmentation chains
430
chain_depth (int): Depth of augmentation chains
431
alpha (float): Parameter for Beta distribution
432
all_ops (bool): Use all available operations
433
interpolation (InterpolationMode): Interpolation method
434
fill (sequence or number): Pixel fill value
435
"""
436
def __init__(self, severity: int = 3, mixture_width: int = 3, chain_depth: int = -1, alpha: float = 1.0, all_ops: bool = True, interpolation=InterpolationMode.BILINEAR, fill=None): ...
437
438
class AutoAugmentPolicy:
439
"""AutoAugment policy constants."""
440
IMAGENET: str = "imagenet"
441
CIFAR10: str = "cifar10"
442
SVHN: str = "svhn"
443
```
444
445
#### Preset Transform Pipelines
446
447
Pre-configured transform pipelines for common tasks.
448
449
```python { .api }
450
class ImageClassification:
451
"""
452
Standard preprocessing for image classification.
453
454
Args:
455
crop_size (int): Size for center crop
456
resize_size (int): Size for resize operation
457
mean (tuple): Normalization mean
458
std (tuple): Normalization standard deviation
459
interpolation (InterpolationMode): Interpolation method
460
"""
461
def __init__(self, crop_size: int, resize_size: int = 256, mean: tuple = (0.485, 0.456, 0.406), std: tuple = (0.229, 0.224, 0.225), interpolation=InterpolationMode.BILINEAR): ...
462
463
class ObjectDetection:
464
"""Standard preprocessing for object detection."""
465
def __init__(self): ...
466
467
class SemanticSegmentation:
468
"""Standard preprocessing for semantic segmentation."""
469
def __init__(self): ...
470
471
class VideoClassification:
472
"""
473
Standard preprocessing for video classification.
474
475
Args:
476
crop_size (tuple): Size for crop
477
resize_size (tuple): Size for resize
478
mean (tuple): Normalization mean
479
std (tuple): Normalization standard deviation
480
"""
481
def __init__(self, crop_size: tuple = (224, 224), resize_size: tuple = (256, 256), mean: tuple = (0.43216, 0.394666, 0.37645), std: tuple = (0.22803, 0.22145, 0.216989)): ...
482
483
class OpticalFlow:
484
"""Standard preprocessing for optical flow."""
485
def __init__(self): ...
486
```
487
488
### Functional API
489
490
Low-level functional implementations of transforms.
491
492
```python { .api }
493
# Interpolation modes for transforms
494
class InterpolationMode:
495
NEAREST = "nearest"
496
NEAREST_EXACT = "nearest-exact"
497
BILINEAR = "bilinear"
498
BICUBIC = "bicubic"
499
BOX = "box"
500
HAMMING = "hamming"
501
LANCZOS = "lanczos"
502
503
# Geometric functions
504
def resize(img, size: list, interpolation=InterpolationMode.BILINEAR, max_size=None, antialias=None):
505
"""Resize image to given size."""
506
507
def center_crop(img, output_size: list):
508
"""Center crop image to output size."""
509
510
def crop(img, top: int, left: int, height: int, width: int):
511
"""Crop image at specified location."""
512
513
def pad(img, padding, fill: int = 0, padding_mode: str = 'constant'):
514
"""Pad image on all sides."""
515
516
def hflip(img):
517
"""Horizontally flip image."""
518
519
def vflip(img):
520
"""Vertically flip image."""
521
522
def rotate(img, angle: float, interpolation=InterpolationMode.NEAREST, expand: bool = False, center=None, fill: int = 0):
523
"""Rotate image by angle."""
524
525
def affine(img, angle: float, translate: list, scale: float, shear: list, interpolation=InterpolationMode.NEAREST, fill: int = 0, center=None):
526
"""Apply affine transformation."""
527
528
def perspective(img, startpoints: list, endpoints: list, interpolation=InterpolationMode.BILINEAR, fill: int = 0):
529
"""Apply perspective transformation."""
530
531
def five_crop(img, size: list):
532
"""Create five crops of image."""
533
534
def ten_crop(img, size: list, vertical_flip: bool = False):
535
"""Create ten crops of image."""
536
537
# Color functions
538
def adjust_brightness(img, brightness_factor: float):
539
"""Adjust brightness of image."""
540
541
def adjust_contrast(img, contrast_factor: float):
542
"""Adjust contrast of image."""
543
544
def adjust_saturation(img, saturation_factor: float):
545
"""Adjust saturation of image."""
546
547
def adjust_hue(img, hue_factor: float):
548
"""Adjust hue of image."""
549
550
def adjust_gamma(img, gamma: float, gain: float = 1):
551
"""Adjust gamma of image."""
552
553
def adjust_sharpness(img, sharpness_factor: float):
554
"""Adjust sharpness of image."""
555
556
def rgb_to_grayscale(img, num_output_channels: int = 1):
557
"""Convert RGB image to grayscale."""
558
559
def to_grayscale(img, num_output_channels: int = 1):
560
"""Convert image to grayscale."""
561
562
def gaussian_blur(img, kernel_size: list, sigma=None):
563
"""Apply Gaussian blur to image."""
564
565
def invert(img):
566
"""Invert colors of image."""
567
568
def posterize(img, bits: int):
569
"""Posterize image."""
570
571
def solarize(img, threshold: float):
572
"""Solarize image."""
573
574
def autocontrast(img):
575
"""Apply autocontrast to image."""
576
577
def equalize(img):
578
"""Equalize histogram of image."""
579
580
# Conversion functions
581
def to_tensor(pic):
582
"""Convert PIL Image or numpy array to tensor."""
583
584
def to_pil_image(pic, mode=None):
585
"""Convert tensor to PIL Image."""
586
587
def pil_to_tensor(pic):
588
"""Convert PIL Image to tensor without scaling."""
589
590
def convert_image_dtype(image, dtype: torch.dtype):
591
"""Convert image tensor dtype."""
592
593
def normalize(tensor, mean: list, std: list, inplace: bool = False):
594
"""Normalize tensor with mean and std."""
595
596
# Utility functions
597
def get_image_size(img):
598
"""Get image size as (height, width)."""
599
600
def get_image_num_channels(img):
601
"""Get number of channels in image."""
602
```
603
604
### v2 Transforms API
605
606
Enhanced transforms API with multi-tensor support for images, videos, bounding boxes, and masks.
607
608
```python { .api }
609
class Transform:
610
"""Base class for all v2 transforms."""
611
612
# Type conversion v2
613
class ToImage:
614
"""Convert to image tensor."""
615
616
class ToPILImage:
617
"""Convert to PIL Image with v2 support."""
618
619
class PILToTensor:
620
"""Convert PIL to tensor with v2 support."""
621
622
class ToPureTensor:
623
"""Convert to pure tensor."""
624
625
class ToDtype:
626
"""
627
Convert to specified dtype.
628
629
Args:
630
dtype (torch.dtype): Target dtype
631
scale (bool): Scale values when converting
632
"""
633
def __init__(self, dtype: torch.dtype, scale: bool = False): ...
634
635
# Container transforms v2
636
class Compose:
637
"""Compose transforms with multi-tensor support."""
638
639
class RandomApply:
640
"""Apply transforms randomly with multi-tensor support."""
641
642
class RandomChoice:
643
"""Choose random transform with multi-tensor support."""
644
645
class RandomOrder:
646
"""Apply in random order with multi-tensor support."""
647
648
# Enhanced geometric transforms
649
class Resize:
650
"""Resize with multi-tensor support including bounding boxes."""
651
652
class CenterCrop:
653
"""Center crop with bounding box support."""
654
655
class RandomCrop:
656
"""Random crop with mask and bounding box support."""
657
658
class RandomResizedCrop:
659
"""Random resized crop with multi-tensor support."""
660
661
class RandomHorizontalFlip:
662
"""Horizontal flip with bounding box support."""
663
664
class RandomVerticalFlip:
665
"""Vertical flip with bounding box support."""
666
667
class RandomRotation:
668
"""Rotation with bounding box support."""
669
670
class RandomAffine:
671
"""Affine transformation with bounding box support."""
672
673
class RandomPerspective:
674
"""Perspective transformation with v2 support."""
675
676
class ElasticTransform:
677
"""Elastic transformation with v2 support."""
678
679
class RandomIoUCrop:
680
"""
681
IoU-aware random crop for object detection.
682
683
Args:
684
min_scale (float): Minimum scale for cropping
685
max_scale (float): Maximum scale for cropping
686
min_aspect_ratio (float): Minimum aspect ratio
687
max_aspect_ratio (float): Maximum aspect ratio
688
sampler_options (list): List of sampling options
689
trials (int): Number of trials for finding valid crop
690
"""
691
def __init__(self, min_scale: float = 0.3, max_scale: float = 1.0, min_aspect_ratio: float = 0.5, max_aspect_ratio: float = 2.0, sampler_options=None, trials: int = 40): ...
692
693
class RandomZoomOut:
694
"""
695
Random zoom out transformation.
696
697
Args:
698
fill (number or tuple): Fill value for expanded area
699
side_range (tuple): Range for zoom out factor
700
p (float): Probability of applying zoom out
701
"""
702
def __init__(self, fill: int = 0, side_range: tuple = (1.0, 4.0), p: float = 0.5): ...
703
704
class RandomShortestSize:
705
"""
706
Random shortest size resize.
707
708
Args:
709
min_size (int or list): Minimum size for shortest edge
710
max_size (int, optional): Maximum size for longest edge
711
interpolation (InterpolationMode): Interpolation method
712
"""
713
def __init__(self, min_size, max_size=None, interpolation=InterpolationMode.BILINEAR): ...
714
715
class RandomResize:
716
"""
717
Random resize within range.
718
719
Args:
720
min_size (int): Minimum size
721
max_size (int): Maximum size
722
interpolation (InterpolationMode): Interpolation method
723
"""
724
def __init__(self, min_size: int, max_size: int, interpolation=InterpolationMode.BILINEAR): ...
725
726
class ScaleJitter:
727
"""
728
Scale jittering transform.
729
730
Args:
731
target_size (tuple): Target size
732
scale_range (tuple): Range for scale jittering
733
interpolation (InterpolationMode): Interpolation method
734
"""
735
def __init__(self, target_size: tuple, scale_range: tuple = (0.1, 2.0), interpolation=InterpolationMode.BILINEAR): ...
736
737
# Enhanced color transforms v2
738
class ColorJitter:
739
"""Color jittering with v2 support."""
740
741
class RandomChannelPermutation:
742
"""Randomly permute image channels."""
743
744
class RandomPhotometricDistort:
745
"""
746
Photometric distortion for data augmentation.
747
748
Args:
749
brightness (tuple): Range for brightness adjustment
750
contrast (tuple): Range for contrast adjustment
751
saturation (tuple): Range for saturation adjustment
752
hue (tuple): Range for hue adjustment
753
p (float): Probability of applying distortion
754
"""
755
def __init__(self, brightness: tuple = (0.875, 1.125), contrast: tuple = (0.5, 1.5), saturation: tuple = (0.5, 1.5), hue: tuple = (-0.05, 0.05), p: float = 0.5): ...
756
757
class RGB:
758
"""Ensure RGB format."""
759
760
class GaussianNoise:
761
"""
762
Add Gaussian noise to image.
763
764
Args:
765
mean (float): Mean of Gaussian noise
766
sigma (float or tuple): Standard deviation of noise
767
"""
768
def __init__(self, mean: float = 0.0, sigma: tuple = (0.1, 2.0)): ...
769
770
# Augmentation transforms v2
771
class MixUp:
772
"""
773
MixUp data augmentation.
774
775
Args:
776
alpha (float): Parameter for Beta distribution
777
num_classes (int): Number of classes
778
labels_getter (callable): Function to get labels
779
"""
780
def __init__(self, alpha: float = 1.0, num_classes: int = None, labels_getter=None): ...
781
782
class CutMix:
783
"""
784
CutMix data augmentation.
785
786
Args:
787
alpha (float): Parameter for Beta distribution
788
num_classes (int): Number of classes
789
labels_getter (callable): Function to get labels
790
"""
791
def __init__(self, alpha: float = 1.0, num_classes: int = None, labels_getter=None): ...
792
793
class RandomErasing:
794
"""
795
Random erasing data augmentation.
796
797
Args:
798
p (float): Probability of applying random erasing
799
scale (tuple): Range of proportion of erased area
800
ratio (tuple): Range of aspect ratio of erased area
801
value (number or str): Erasing value
802
inplace (bool): Make operation in-place
803
"""
804
def __init__(self, p: float = 0.5, scale: tuple = (0.02, 0.33), ratio: tuple = (0.3, 3.3), value: int = 0, inplace: bool = False): ...
805
806
class JPEG:
807
"""
808
JPEG compression simulation.
809
810
Args:
811
quality (tuple or int): JPEG quality range
812
"""
813
def __init__(self, quality: tuple = (25, 100)): ...
814
815
# Metadata transforms v2
816
class ClampBoundingBoxes:
817
"""Clamp bounding boxes to image bounds."""
818
819
class ClampKeyPoints:
820
"""Clamp keypoints to image bounds."""
821
822
class ConvertBoundingBoxFormat:
823
"""
824
Convert bounding box format.
825
826
Args:
827
format (BoundingBoxFormat): Target format
828
"""
829
def __init__(self, format): ...
830
831
class SanitizeBoundingBoxes:
832
"""
833
Remove invalid bounding boxes.
834
835
Args:
836
min_size (float): Minimum box size
837
labels_getter (callable): Function to get labels
838
"""
839
def __init__(self, min_size: float = 1.0, labels_getter=None): ...
840
841
# Temporal transforms v2
842
class UniformTemporalSubsample:
843
"""
844
Uniform temporal subsampling for video.
845
846
Args:
847
num_samples (int): Number of samples to extract
848
"""
849
def __init__(self, num_samples: int): ...
850
851
# Utility functions v2
852
def check_type(inpt, type_sequence):
853
"""Check input types."""
854
855
def get_bounding_boxes(inpt):
856
"""Extract bounding boxes from input."""
857
858
def has_all(*types):
859
"""Check if input has all specified types."""
860
861
def has_any(*types):
862
"""Check if input has any specified type."""
863
864
def query_chw(flat_inputs):
865
"""Query CHW dimensions from inputs."""
866
867
def query_size(flat_inputs):
868
"""Query spatial size from inputs."""
869
```
870
871
## Usage Examples
872
873
### Basic Image Preprocessing
874
875
```python
876
from torchvision import transforms
877
import torch
878
879
# Standard ImageNet preprocessing
880
transform = transforms.Compose([
881
transforms.Resize(256),
882
transforms.CenterCrop(224),
883
transforms.ToTensor(),
884
transforms.Normalize(
885
mean=[0.485, 0.456, 0.406],
886
std=[0.229, 0.224, 0.225]
887
)
888
])
889
890
# Apply to PIL image
891
from PIL import Image
892
image = Image.open('image.jpg')
893
tensor = transform(image)
894
```
895
896
### Data Augmentation Pipeline
897
898
```python
899
from torchvision import transforms
900
901
# Training augmentations
902
train_transform = transforms.Compose([
903
transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
904
transforms.RandomHorizontalFlip(p=0.5),
905
transforms.ColorJitter(
906
brightness=0.2,
907
contrast=0.2,
908
saturation=0.2,
909
hue=0.1
910
),
911
transforms.RandomRotation(degrees=10),
912
transforms.ToTensor(),
913
transforms.Normalize(
914
mean=[0.485, 0.456, 0.406],
915
std=[0.229, 0.224, 0.225]
916
),
917
transforms.RandomErasing(p=0.1)
918
])
919
```
920
921
### v2 Transforms for Object Detection
922
923
```python
924
from torchvision.transforms import v2
925
from torchvision.tv_tensors import BoundingBoxes, Image
926
927
# Object detection preprocessing
928
transform = v2.Compose([
929
v2.ToImage(),
930
v2.RandomHorizontalFlip(p=0.5),
931
v2.RandomIoUCrop(),
932
v2.Resize(size=(640, 640)),
933
v2.ToDtype(torch.float32, scale=True),
934
v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
935
])
936
937
# Apply to image and bounding boxes
938
image = Image(torch.randint(0, 256, (3, 480, 640), dtype=torch.uint8))
939
boxes = BoundingBoxes(
940
torch.tensor([[10, 10, 100, 100], [200, 200, 300, 300]]),
941
format='XYXY',
942
canvas_size=(480, 640)
943
)
944
945
transformed_image, transformed_boxes = transform(image, boxes)
946
```
947
948
### Functional API Usage
949
950
```python
951
from torchvision.transforms import functional as F
952
import torch
953
954
# Using functional API for custom transforms
955
def custom_transform(image):
956
# Apply specific sequence of transforms
957
image = F.resize(image, [256, 256])
958
image = F.center_crop(image, [224, 224])
959
image = F.to_tensor(image)
960
961
# Conditional augmentation
962
if torch.rand(1) > 0.5:
963
image = F.hflip(image)
964
965
image = F.normalize(image, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
966
return image
967
```
968
969
### Video Transforms
970
971
```python
972
from torchvision.transforms import v2
973
974
# Video preprocessing pipeline
975
video_transform = v2.Compose([
976
v2.UniformTemporalSubsample(16), # Sample 16 frames
977
v2.Resize((224, 224)),
978
v2.RandomHorizontalFlip(p=0.5),
979
v2.ToDtype(torch.float32, scale=True),
980
v2.Normalize(mean=[0.43216, 0.394666, 0.37645],
981
std=[0.22803, 0.22145, 0.216989])
982
])
983
984
# Apply to video tensor (T, C, H, W)
985
video_tensor = torch.randint(0, 256, (32, 3, 256, 256), dtype=torch.uint8)
986
transformed_video = video_transform(video_tensor)
987
```
988
989
### AutoAugment Policies
990
991
```python
992
from torchvision import transforms
993
994
# Using AutoAugment
995
transform = transforms.Compose([
996
transforms.Resize(256),
997
transforms.AutoAugment(policy=transforms.AutoAugmentPolicy.IMAGENET),
998
transforms.CenterCrop(224),
999
transforms.ToTensor(),
1000
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
1001
])
1002
1003
# Using RandAugment
1004
transform_rand = transforms.Compose([
1005
transforms.Resize(256),
1006
transforms.RandAugment(num_ops=2, magnitude=15),
1007
transforms.CenterCrop(224),
1008
transforms.ToTensor(),
1009
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
1010
])
1011
```