0
# Operations
1
2
TorchVision ops module provides low-level operations and specialized neural network layers for computer vision tasks. It includes functions for bounding box operations, non-maximum suppression, region of interest operations, loss functions, and custom layers used in modern computer vision architectures.
3
4
## Capabilities
5
6
### Bounding Box Operations
7
8
Functions for manipulating and analyzing bounding boxes in various formats.
9
10
```python { .api }
11
def box_area(boxes: torch.Tensor) -> torch.Tensor:
12
"""
13
Calculate area of bounding boxes.
14
15
Args:
16
boxes (torch.Tensor): Bounding boxes in format [x1, y1, x2, y2] of shape (..., 4)
17
18
Returns:
19
torch.Tensor: Areas of boxes with shape (...,)
20
"""
21
22
def box_convert(boxes: torch.Tensor, in_fmt: str, out_fmt: str) -> torch.Tensor:
23
"""
24
Convert bounding boxes between different formats.
25
26
Args:
27
boxes (torch.Tensor): Bounding boxes tensor of shape (..., 4)
28
in_fmt (str): Input format ('xyxy', 'xywh', 'cxcywh')
29
out_fmt (str): Output format ('xyxy', 'xywh', 'cxcywh')
30
31
Returns:
32
torch.Tensor: Converted bounding boxes
33
"""
34
35
def box_iou(boxes1: torch.Tensor, boxes2: torch.Tensor) -> torch.Tensor:
36
"""
37
Calculate Intersection over Union (IoU) between two sets of boxes.
38
39
Args:
40
boxes1 (torch.Tensor): Boxes of shape (N, 4) in format [x1, y1, x2, y2]
41
boxes2 (torch.Tensor): Boxes of shape (M, 4) in format [x1, y1, x2, y2]
42
43
Returns:
44
torch.Tensor: IoU matrix of shape (N, M)
45
"""
46
47
def generalized_box_iou(boxes1: torch.Tensor, boxes2: torch.Tensor) -> torch.Tensor:
48
"""
49
Calculate Generalized Intersection over Union (GIoU) between boxes.
50
51
Args:
52
boxes1 (torch.Tensor): Boxes of shape (N, 4)
53
boxes2 (torch.Tensor): Boxes of shape (M, 4)
54
55
Returns:
56
torch.Tensor: GIoU matrix of shape (N, M)
57
"""
58
59
def distance_box_iou(boxes1: torch.Tensor, boxes2: torch.Tensor) -> torch.Tensor:
60
"""
61
Calculate Distance Intersection over Union (DIoU) between boxes.
62
63
Args:
64
boxes1 (torch.Tensor): Boxes of shape (N, 4)
65
boxes2 (torch.Tensor): Boxes of shape (M, 4)
66
67
Returns:
68
torch.Tensor: DIoU matrix of shape (N, M)
69
"""
70
71
def complete_box_iou(boxes1: torch.Tensor, boxes2: torch.Tensor) -> torch.Tensor:
72
"""
73
Calculate Complete Intersection over Union (CIoU) between boxes.
74
75
Args:
76
boxes1 (torch.Tensor): Boxes of shape (N, 4)
77
boxes2 (torch.Tensor): Boxes of shape (M, 4)
78
79
Returns:
80
torch.Tensor: CIoU matrix of shape (N, M)
81
"""
82
83
def clip_boxes_to_image(boxes: torch.Tensor, size: tuple) -> torch.Tensor:
84
"""
85
Clip bounding boxes to image boundaries.
86
87
Args:
88
boxes (torch.Tensor): Boxes of shape (..., 4) in format [x1, y1, x2, y2]
89
size (tuple): Image size as (height, width)
90
91
Returns:
92
torch.Tensor: Clipped boxes
93
"""
94
95
def remove_small_boxes(boxes: torch.Tensor, min_size: float) -> torch.Tensor:
96
"""
97
Remove bounding boxes smaller than minimum size.
98
99
Args:
100
boxes (torch.Tensor): Boxes of shape (N, 4)
101
min_size (float): Minimum box size threshold
102
103
Returns:
104
torch.Tensor: Indices of boxes to keep
105
"""
106
107
def masks_to_boxes(masks: torch.Tensor) -> torch.Tensor:
108
"""
109
Convert binary masks to bounding boxes.
110
111
Args:
112
masks (torch.Tensor): Binary masks of shape (N, H, W)
113
114
Returns:
115
torch.Tensor: Bounding boxes of shape (N, 4) in format [x1, y1, x2, y2]
116
"""
117
```
118
119
### Non-Maximum Suppression
120
121
Functions for removing duplicate detections based on overlap criteria.
122
123
```python { .api }
124
def nms(boxes: torch.Tensor, scores: torch.Tensor, iou_threshold: float) -> torch.Tensor:
125
"""
126
Non-maximum suppression for object detection.
127
128
Args:
129
boxes (torch.Tensor): Bounding boxes of shape (N, 4) in format [x1, y1, x2, y2]
130
scores (torch.Tensor): Scores for each box of shape (N,)
131
iou_threshold (float): IoU threshold for suppression
132
133
Returns:
134
torch.Tensor: Indices of boxes to keep
135
"""
136
137
def batched_nms(boxes: torch.Tensor, scores: torch.Tensor, idxs: torch.Tensor, iou_threshold: float) -> torch.Tensor:
138
"""
139
Batched non-maximum suppression for multiple classes.
140
141
Args:
142
boxes (torch.Tensor): Bounding boxes of shape (N, 4)
143
scores (torch.Tensor): Scores for each box of shape (N,)
144
idxs (torch.Tensor): Class indices for each box of shape (N,)
145
iou_threshold (float): IoU threshold for suppression
146
147
Returns:
148
torch.Tensor: Indices of boxes to keep
149
"""
150
```
151
152
### Loss Functions
153
154
Specialized loss functions for computer vision tasks.
155
156
```python { .api }
157
def sigmoid_focal_loss(inputs: torch.Tensor, targets: torch.Tensor, alpha: float = -1, gamma: float = 2, reduction: str = 'none') -> torch.Tensor:
158
"""
159
Focal loss for addressing class imbalance in object detection.
160
161
Args:
162
inputs (torch.Tensor): Predicted logits of shape (..., num_classes)
163
targets (torch.Tensor): Ground truth labels of shape (..., num_classes)
164
alpha (float): Weighting factor for rare class (default: -1 means no weighting)
165
gamma (float): Focusing parameter to down-weight easy examples
166
reduction (str): Reduction method ('none', 'mean', 'sum')
167
168
Returns:
169
torch.Tensor: Focal loss values
170
"""
171
172
def generalized_box_iou_loss(boxes1: torch.Tensor, boxes2: torch.Tensor, reduction: str = 'none') -> torch.Tensor:
173
"""
174
Generalized IoU loss for bounding box regression.
175
176
Args:
177
boxes1 (torch.Tensor): Predicted boxes of shape (N, 4)
178
boxes2 (torch.Tensor): Target boxes of shape (N, 4)
179
reduction (str): Reduction method ('none', 'mean', 'sum')
180
181
Returns:
182
torch.Tensor: GIoU loss values
183
"""
184
185
def distance_box_iou_loss(boxes1: torch.Tensor, boxes2: torch.Tensor, reduction: str = 'none') -> torch.Tensor:
186
"""
187
Distance IoU loss for bounding box regression.
188
189
Args:
190
boxes1 (torch.Tensor): Predicted boxes of shape (N, 4)
191
boxes2 (torch.Tensor): Target boxes of shape (N, 4)
192
reduction (str): Reduction method ('none', 'mean', 'sum')
193
194
Returns:
195
torch.Tensor: DIoU loss values
196
"""
197
198
def complete_box_iou_loss(boxes1: torch.Tensor, boxes2: torch.Tensor, reduction: str = 'none') -> torch.Tensor:
199
"""
200
Complete IoU loss for bounding box regression.
201
202
Args:
203
boxes1 (torch.Tensor): Predicted boxes of shape (N, 4)
204
boxes2 (torch.Tensor): Target boxes of shape (N, 4)
205
reduction (str): Reduction method ('none', 'mean', 'sum')
206
207
Returns:
208
torch.Tensor: CIoU loss values
209
"""
210
```
211
212
### Region of Interest Operations
213
214
Operations for extracting features from regions of interest in feature maps.
215
216
```python { .api }
217
def roi_align(input: torch.Tensor, boxes: torch.Tensor, output_size: tuple, spatial_scale: float = 1.0, sampling_ratio: int = -1, aligned: bool = False) -> torch.Tensor:
218
"""
219
RoI Align operation for extracting fixed-size features from variable-size regions.
220
221
Args:
222
input (torch.Tensor): Feature map of shape (N, C, H, W)
223
boxes (torch.Tensor): RoIs of shape (K, 5) where each row is [batch_idx, x1, y1, x2, y2]
224
output_size (tuple): Output size as (height, width)
225
spatial_scale (float): Scale factor to map from input coordinates to box coordinates
226
sampling_ratio (int): Number of sampling points (-1 for adaptive)
227
aligned (bool): Whether to align corners
228
229
Returns:
230
torch.Tensor: Extracted features of shape (K, C, output_size[0], output_size[1])
231
"""
232
233
class RoIAlign(torch.nn.Module):
234
"""
235
RoI Align layer for region-based networks.
236
237
Args:
238
output_size (tuple): Output size as (height, width)
239
spatial_scale (float): Scale factor between input and RoI coordinates
240
sampling_ratio (int): Number of sampling points per bin
241
aligned (bool): Whether to align corners
242
"""
243
244
def __init__(self, output_size: tuple, spatial_scale: float = 1.0, sampling_ratio: int = -1, aligned: bool = False): ...
245
246
def forward(self, input: torch.Tensor, rois: torch.Tensor) -> torch.Tensor: ...
247
248
def roi_pool(input: torch.Tensor, boxes: torch.Tensor, output_size: tuple, spatial_scale: float = 1.0) -> torch.Tensor:
249
"""
250
RoI Pooling operation (legacy, prefer RoI Align).
251
252
Args:
253
input (torch.Tensor): Feature map of shape (N, C, H, W)
254
boxes (torch.Tensor): RoIs of shape (K, 5)
255
output_size (tuple): Output size as (height, width)
256
spatial_scale (float): Scale factor
257
258
Returns:
259
torch.Tensor: Pooled features
260
"""
261
262
class RoIPool(torch.nn.Module):
263
"""RoI Pooling layer."""
264
265
def __init__(self, output_size: tuple, spatial_scale: float = 1.0): ...
266
267
def ps_roi_align(input: torch.Tensor, boxes: torch.Tensor, output_size: tuple, spatial_scale: float = 1.0, sampling_ratio: int = -1) -> torch.Tensor:
268
"""
269
Position Sensitive RoI Align for position-sensitive score maps.
270
271
Args:
272
input (torch.Tensor): Position-sensitive feature map
273
boxes (torch.Tensor): RoIs of shape (K, 5)
274
output_size (tuple): Output size
275
spatial_scale (float): Scale factor
276
sampling_ratio (int): Number of sampling points
277
278
Returns:
279
torch.Tensor: Position-sensitive aligned features
280
"""
281
282
class PSRoIAlign(torch.nn.Module):
283
"""Position Sensitive RoI Align layer."""
284
285
def __init__(self, output_size: tuple, spatial_scale: float = 1.0, sampling_ratio: int = -1): ...
286
287
def ps_roi_pool(input: torch.Tensor, boxes: torch.Tensor, output_size: tuple, spatial_scale: float = 1.0) -> torch.Tensor:
288
"""Position Sensitive RoI Pooling operation."""
289
290
class PSRoIPool(torch.nn.Module):
291
"""Position Sensitive RoI Pooling layer."""
292
293
def __init__(self, output_size: tuple, spatial_scale: float = 1.0): ...
294
295
class MultiScaleRoIAlign(torch.nn.Module):
296
"""
297
Multi-scale RoI Align for Feature Pyramid Networks.
298
299
Args:
300
featmap_names (list): Names of feature maps to use
301
output_size (tuple): Output size for aligned features
302
sampling_ratio (int): Number of sampling points
303
canonical_scale (int): Canonical scale for level assignment
304
canonical_level (int): Canonical level in pyramid
305
"""
306
307
def __init__(self, featmap_names: list, output_size: tuple, sampling_ratio: int, canonical_scale: int = 224, canonical_level: int = 4): ...
308
309
def forward(self, x: dict, boxes: list) -> torch.Tensor: ...
310
```
311
312
### Specialized Convolutions
313
314
Custom convolution operations for advanced architectures.
315
316
```python { .api }
317
def deform_conv2d(input: torch.Tensor, offset: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor = None, stride: tuple = (1, 1), padding: tuple = (0, 0), dilation: tuple = (1, 1), mask: torch.Tensor = None) -> torch.Tensor:
318
"""
319
Deformable convolution operation.
320
321
Args:
322
input (torch.Tensor): Input feature map of shape (N, C_in, H_in, W_in)
323
offset (torch.Tensor): Offset field of shape (N, 2*kernel_h*kernel_w, H_out, W_out)
324
weight (torch.Tensor): Convolution weights of shape (C_out, C_in, kernel_h, kernel_w)
325
bias (torch.Tensor, optional): Bias tensor of shape (C_out,)
326
stride (tuple): Convolution stride
327
padding (tuple): Convolution padding
328
dilation (tuple): Convolution dilation
329
mask (torch.Tensor, optional): Modulation mask
330
331
Returns:
332
torch.Tensor: Output feature map of shape (N, C_out, H_out, W_out)
333
"""
334
335
class DeformConv2d(torch.nn.Module):
336
"""
337
Deformable Convolution layer.
338
339
Args:
340
in_channels (int): Number of input channels
341
out_channels (int): Number of output channels
342
kernel_size (int or tuple): Convolution kernel size
343
stride (int or tuple): Convolution stride
344
padding (int or tuple): Convolution padding
345
dilation (int or tuple): Convolution dilation
346
groups (int): Number of groups for grouped convolution
347
bias (bool): Whether to use bias
348
"""
349
350
def __init__(self, in_channels: int, out_channels: int, kernel_size: int, stride: int = 1, padding: int = 0, dilation: int = 1, groups: int = 1, bias: bool = True): ...
351
352
def forward(self, input: torch.Tensor, offset: torch.Tensor, mask: torch.Tensor = None) -> torch.Tensor: ...
353
```
354
355
### Regularization Operations
356
357
Regularization techniques for improving model robustness.
358
359
```python { .api }
360
def stochastic_depth(input: torch.Tensor, p: float, mode: str, training: bool = True) -> torch.Tensor:
361
"""
362
Stochastic depth regularization (Drop Path).
363
364
Args:
365
input (torch.Tensor): Input tensor
366
p (float): Drop probability
367
mode (str): Drop mode ('batch' or 'row')
368
training (bool): Whether in training mode
369
370
Returns:
371
torch.Tensor: Output tensor with stochastic depth applied
372
"""
373
374
class StochasticDepth(torch.nn.Module):
375
"""
376
Stochastic Depth (Drop Path) layer.
377
378
Args:
379
p (float): Drop probability
380
mode (str): Drop mode ('batch' or 'row')
381
"""
382
383
def __init__(self, p: float, mode: str): ...
384
385
def forward(self, input: torch.Tensor) -> torch.Tensor: ...
386
387
def drop_block2d(input: torch.Tensor, p: float, block_size: int, inplace: bool = False, eps: float = 1e-6, training: bool = True) -> torch.Tensor:
388
"""
389
DropBlock2D regularization for convolutional layers.
390
391
Args:
392
input (torch.Tensor): Input tensor of shape (N, C, H, W)
393
p (float): Drop probability
394
block_size (int): Size of blocks to drop
395
inplace (bool): Whether to apply in-place
396
eps (float): Small value to avoid division by zero
397
training (bool): Whether in training mode
398
399
Returns:
400
torch.Tensor: Output tensor with DropBlock applied
401
"""
402
403
class DropBlock2d(torch.nn.Module):
404
"""
405
DropBlock2D layer for spatial regularization.
406
407
Args:
408
p (float): Drop probability
409
block_size (int): Size of blocks to drop
410
eps (float): Small epsilon value
411
inplace (bool): Whether to apply in-place
412
"""
413
414
def __init__(self, p: float, block_size: int, eps: float = 1e-6, inplace: bool = False): ...
415
416
def drop_block3d(input: torch.Tensor, p: float, block_size: int, inplace: bool = False, eps: float = 1e-6, training: bool = True) -> torch.Tensor:
417
"""DropBlock3D for 3D tensors (e.g., video)."""
418
419
class DropBlock3d(torch.nn.Module):
420
"""DropBlock3D layer for 3D regularization."""
421
422
def __init__(self, p: float, block_size: int, eps: float = 1e-6, inplace: bool = False): ...
423
```
424
425
### Feature Pyramid Network
426
427
Implementation of Feature Pyramid Network for multi-scale feature extraction.
428
429
```python { .api }
430
class FeaturePyramidNetwork(torch.nn.Module):
431
"""
432
Feature Pyramid Network for multi-scale feature extraction.
433
434
Args:
435
in_channels_list (list): List of input channel numbers for each level
436
out_channels (int): Number of output channels for all levels
437
extra_blocks (nn.Module, optional): Extra blocks to append
438
norm_layer (callable, optional): Normalization layer
439
"""
440
441
def __init__(self, in_channels_list: list, out_channels: int, extra_blocks=None, norm_layer=None): ...
442
443
def forward(self, x: dict) -> dict:
444
"""
445
Forward pass through FPN.
446
447
Args:
448
x (dict): Dictionary of feature maps from different levels
449
450
Returns:
451
dict: Dictionary of FPN feature maps
452
"""
453
```
454
455
### Utility Layers
456
457
General-purpose layers commonly used in computer vision architectures.
458
459
```python { .api }
460
class FrozenBatchNorm2d(torch.nn.Module):
461
"""
462
Frozen Batch Normalization layer (parameters not updated during training).
463
464
Args:
465
num_features (int): Number of features
466
eps (float): Small value for numerical stability
467
"""
468
469
def __init__(self, num_features: int, eps: float = 1e-5): ...
470
471
class Conv2dNormActivation(torch.nn.Sequential):
472
"""
473
Convolution with normalization and activation in sequence.
474
475
Args:
476
in_planes (int): Input channels
477
out_planes (int): Output channels
478
kernel_size (int): Convolution kernel size
479
stride (int): Convolution stride
480
padding (int, optional): Convolution padding
481
groups (int): Number of groups for grouped convolution
482
norm_layer (callable, optional): Normalization layer
483
activation_layer (callable, optional): Activation layer
484
dilation (int): Convolution dilation
485
inplace (bool, optional): Whether activations should be in-place
486
bias (bool, optional): Whether to use bias in convolution
487
"""
488
489
def __init__(self, in_planes: int, out_planes: int, kernel_size: int = 3, stride: int = 1, padding: int = None, groups: int = 1, norm_layer=None, activation_layer=None, dilation: int = 1, inplace: bool = None, bias: bool = None): ...
490
491
class Conv3dNormActivation(torch.nn.Sequential):
492
"""3D version of Conv2dNormActivation for video/3D data."""
493
494
def __init__(self, in_planes: int, out_planes: int, kernel_size: int = 3, stride: int = 1, padding: int = None, groups: int = 1, norm_layer=None, activation_layer=None, dilation: int = 1, inplace: bool = None, bias: bool = None): ...
495
496
class SqueezeExcitation(torch.nn.Module):
497
"""
498
Squeeze-and-Excitation block for channel attention.
499
500
Args:
501
input_channels (int): Number of input channels
502
squeeze_channels (int): Number of channels after squeeze operation
503
activation (callable, optional): Activation function for squeeze
504
scale_activation (callable, optional): Activation function for scale
505
"""
506
507
def __init__(self, input_channels: int, squeeze_channels: int, activation=None, scale_activation=None): ...
508
509
def forward(self, input: torch.Tensor) -> torch.Tensor: ...
510
511
class MLP(torch.nn.Sequential):
512
"""
513
Multi-layer perceptron with configurable layers.
514
515
Args:
516
in_channels (int): Input dimension
517
hidden_channels (list): List of hidden layer dimensions
518
norm_layer (callable, optional): Normalization layer
519
activation_layer (callable, optional): Activation layer
520
inplace (bool, optional): Whether activations should be in-place
521
bias (bool): Whether to use bias
522
dropout (float): Dropout probability
523
"""
524
525
def __init__(self, in_channels: int, hidden_channels: list, norm_layer=None, activation_layer=None, inplace: bool = None, bias: bool = True, dropout: float = 0.0): ...
526
527
class Permute(torch.nn.Module):
528
"""
529
Permute tensor dimensions.
530
531
Args:
532
dims (list): New order of dimensions
533
"""
534
535
def __init__(self, dims: list): ...
536
537
def forward(self, x: torch.Tensor) -> torch.Tensor: ...
538
```
539
540
## Usage Examples
541
542
### Bounding Box Operations
543
544
```python
545
import torch
546
import torchvision.ops as ops
547
548
# Create example bounding boxes (N=3 boxes in xyxy format)
549
boxes1 = torch.tensor([
550
[10, 10, 50, 50],
551
[30, 30, 70, 70],
552
[60, 10, 100, 50]
553
], dtype=torch.float)
554
555
boxes2 = torch.tensor([
556
[15, 15, 55, 55],
557
[25, 25, 65, 65]
558
], dtype=torch.float)
559
560
# Calculate IoU matrix
561
iou_matrix = ops.box_iou(boxes1, boxes2)
562
print(f"IoU matrix shape: {iou_matrix.shape}") # (3, 2)
563
print(f"IoU values:\n{iou_matrix}")
564
565
# Calculate box areas
566
areas = ops.box_area(boxes1)
567
print(f"Box areas: {areas}")
568
569
# Convert box formats
570
boxes_xywh = ops.box_convert(boxes1, 'xyxy', 'xywh')
571
print(f"Boxes in xywh format: {boxes_xywh}")
572
573
# Clip boxes to image boundaries
574
image_size = (100, 120) # (height, width)
575
clipped_boxes = ops.clip_boxes_to_image(boxes1, image_size)
576
print(f"Clipped boxes: {clipped_boxes}")
577
```
578
579
### Non-Maximum Suppression
580
581
```python
582
import torch
583
import torchvision.ops as ops
584
585
# Example detection results
586
boxes = torch.tensor([
587
[10, 10, 50, 50],
588
[12, 12, 52, 52], # Overlapping with first box
589
[60, 10, 100, 50],
590
[15, 15, 45, 45], # Overlapping with first box
591
[80, 80, 120, 120]
592
], dtype=torch.float)
593
594
scores = torch.tensor([0.9, 0.8, 0.7, 0.85, 0.6])
595
class_ids = torch.tensor([0, 0, 1, 0, 1])
596
597
# Apply NMS
598
keep_indices = ops.nms(boxes, scores, iou_threshold=0.5)
599
print(f"Indices to keep after NMS: {keep_indices}")
600
601
# Apply batched NMS (per-class NMS)
602
keep_indices_batched = ops.batched_nms(boxes, scores, class_ids, iou_threshold=0.5)
603
print(f"Indices to keep after batched NMS: {keep_indices_batched}")
604
605
# Filter results
606
final_boxes = boxes[keep_indices_batched]
607
final_scores = scores[keep_indices_batched]
608
final_classes = class_ids[keep_indices_batched]
609
610
print(f"Final boxes: {final_boxes}")
611
print(f"Final scores: {final_scores}")
612
print(f"Final classes: {final_classes}")
613
```
614
615
### RoI Align Operation
616
617
```python
618
import torch
619
import torchvision.ops as ops
620
621
# Create feature map (batch_size=2, channels=64, height=32, width=32)
622
feature_map = torch.randn(2, 64, 32, 32)
623
624
# Define RoIs: [batch_idx, x1, y1, x2, y2]
625
rois = torch.tensor([
626
[0, 5, 5, 15, 15], # RoI in first image
627
[0, 20, 10, 30, 25], # Another RoI in first image
628
[1, 8, 8, 18, 18], # RoI in second image
629
], dtype=torch.float)
630
631
# Apply RoI Align
632
output_size = (7, 7)
633
spatial_scale = 1.0
634
aligned_features = ops.roi_align(
635
feature_map,
636
rois,
637
output_size,
638
spatial_scale=spatial_scale,
639
sampling_ratio=2
640
)
641
642
print(f"Aligned features shape: {aligned_features.shape}") # (3, 64, 7, 7)
643
644
# Using RoI Align as a layer
645
roi_align_layer = ops.RoIAlign(output_size=(14, 14), spatial_scale=0.5, sampling_ratio=2)
646
aligned_features_layer = roi_align_layer(feature_map, rois)
647
print(f"Layer output shape: {aligned_features_layer.shape}")
648
```
649
650
### Feature Pyramid Network
651
652
```python
653
import torch
654
import torchvision.ops as ops
655
656
# Create FPN for ResNet-like backbone
657
in_channels_list = [256, 512, 1024, 2048] # ResNet feature channels
658
out_channels = 256
659
660
fpn = ops.FeaturePyramidNetwork(in_channels_list, out_channels)
661
662
# Simulate backbone features
663
backbone_features = {
664
'0': torch.randn(2, 256, 64, 64), # Early layer
665
'1': torch.randn(2, 512, 32, 32), # Mid layer
666
'2': torch.randn(2, 1024, 16, 16), # Late layer
667
'3': torch.randn(2, 2048, 8, 8), # Final layer
668
}
669
670
# Apply FPN
671
fpn_features = fpn(backbone_features)
672
673
print("FPN output shapes:")
674
for key, feature in fpn_features.items():
675
print(f"Level {key}: {feature.shape}")
676
```
677
678
### Custom Detection Pipeline
679
680
```python
681
import torch
682
import torchvision.ops as ops
683
684
def post_process_detections(boxes, scores, class_logits, score_threshold=0.5, nms_threshold=0.5):
685
"""
686
Post-process detection outputs with NMS and filtering.
687
688
Args:
689
boxes: Predicted boxes (N, 4)
690
scores: Objectness scores (N,)
691
class_logits: Class predictions (N, num_classes)
692
score_threshold: Minimum score threshold
693
nms_threshold: NMS IoU threshold
694
695
Returns:
696
dict: Filtered detections
697
"""
698
# Get class predictions
699
class_probs = torch.softmax(class_logits, dim=1)
700
class_ids = torch.argmax(class_probs, dim=1)
701
class_scores = torch.max(class_probs, dim=1)[0]
702
703
# Combine objectness and classification scores
704
final_scores = scores * class_scores
705
706
# Filter by score threshold
707
keep_mask = final_scores >= score_threshold
708
boxes = boxes[keep_mask]
709
final_scores = final_scores[keep_mask]
710
class_ids = class_ids[keep_mask]
711
712
# Apply NMS per class
713
keep_indices = ops.batched_nms(boxes, final_scores, class_ids, nms_threshold)
714
715
return {
716
'boxes': boxes[keep_indices],
717
'scores': final_scores[keep_indices],
718
'labels': class_ids[keep_indices]
719
}
720
721
# Example usage
722
num_detections = 1000
723
num_classes = 80
724
725
boxes = torch.randn(num_detections, 4) * 100 # Random boxes
726
scores = torch.rand(num_detections) # Random objectness scores
727
class_logits = torch.randn(num_detections, num_classes) # Random class logits
728
729
# Post-process detections
730
results = post_process_detections(boxes, scores, class_logits)
731
print(f"Final detections: {len(results['boxes'])}")
732
print(f"Score range: {results['scores'].min():.3f} - {results['scores'].max():.3f}")
733
```
734
735
### Loss Functions for Training
736
737
```python
738
import torch
739
import torchvision.ops as ops
740
741
# Focal Loss for object classification
742
def train_step_focal_loss():
743
# Simulated predictions and targets
744
batch_size, num_classes = 32, 80
745
predictions = torch.randn(batch_size, num_classes)
746
targets = torch.zeros(batch_size, num_classes)
747
748
# Create some positive examples
749
targets[torch.arange(batch_size), torch.randint(0, num_classes, (batch_size,))] = 1
750
751
# Calculate focal loss
752
focal_loss = ops.sigmoid_focal_loss(
753
predictions,
754
targets,
755
alpha=0.25,
756
gamma=2.0,
757
reduction='mean'
758
)
759
760
print(f"Focal loss: {focal_loss.item():.4f}")
761
return focal_loss
762
763
# Box regression losses
764
def train_step_box_loss():
765
batch_size = 64
766
pred_boxes = torch.randn(batch_size, 4) * 100
767
target_boxes = torch.randn(batch_size, 4) * 100
768
769
# Different IoU-based losses
770
giou_loss = ops.generalized_box_iou_loss(pred_boxes, target_boxes, reduction='mean')
771
diou_loss = ops.distance_box_iou_loss(pred_boxes, target_boxes, reduction='mean')
772
ciou_loss = ops.complete_box_iou_loss(pred_boxes, target_boxes, reduction='mean')
773
774
print(f"GIoU loss: {giou_loss.item():.4f}")
775
print(f"DIoU loss: {diou_loss.item():.4f}")
776
print(f"CIoU loss: {ciou_loss.item():.4f}")
777
778
return giou_loss + diou_loss + ciou_loss
779
780
# Run example training steps
781
focal_loss = train_step_focal_loss()
782
box_loss = train_step_box_loss()
783
total_loss = focal_loss + box_loss
784
print(f"Total loss: {total_loss.item():.4f}")
785
```
786
787
### Regularization Techniques
788
789
```python
790
import torch
791
import torch.nn as nn
792
import torchvision.ops as ops
793
794
class ResidualBlock(nn.Module):
795
"""Example residual block with stochastic depth."""
796
797
def __init__(self, channels, drop_prob=0.1):
798
super().__init__()
799
self.conv1 = nn.Conv2d(channels, channels, 3, padding=1)
800
self.conv2 = nn.Conv2d(channels, channels, 3, padding=1)
801
self.relu = nn.ReLU()
802
self.stochastic_depth = ops.StochasticDepth(drop_prob, mode='row')
803
804
def forward(self, x):
805
identity = x
806
out = self.relu(self.conv1(x))
807
out = self.conv2(out)
808
809
# Apply stochastic depth to residual connection
810
out = self.stochastic_depth(out)
811
out += identity
812
return self.relu(out)
813
814
# Example with DropBlock for convolutional regularization
815
class ConvBlockWithDropBlock(nn.Module):
816
"""Convolutional block with DropBlock regularization."""
817
818
def __init__(self, in_channels, out_channels, drop_prob=0.1, block_size=7):
819
super().__init__()
820
self.conv = nn.Conv2d(in_channels, out_channels, 3, padding=1)
821
self.bn = nn.BatchNorm2d(out_channels)
822
self.relu = nn.ReLU()
823
self.dropblock = ops.DropBlock2d(drop_prob, block_size)
824
825
def forward(self, x):
826
x = self.conv(x)
827
x = self.bn(x)
828
x = self.relu(x)
829
x = self.dropblock(x)
830
return x
831
832
# Test regularization
833
batch_size, channels, height, width = 4, 64, 32, 32
834
input_tensor = torch.randn(batch_size, channels, height, width)
835
836
# Test stochastic depth block
837
residual_block = ResidualBlock(channels, drop_prob=0.2)
838
output = residual_block(input_tensor)
839
print(f"Residual block output shape: {output.shape}")
840
841
# Test DropBlock
842
dropblock_conv = ConvBlockWithDropBlock(channels, channels, drop_prob=0.1, block_size=5)
843
output = dropblock_conv(input_tensor)
844
print(f"DropBlock conv output shape: {output.shape}")
845
```