Tessl Tile for pypi/torchvision@0.23.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

datasets.md index.md io.md models.md ops.md transforms.md tv_tensors.md utils.md

ops.mddocs/

0
# Operations
1

2
TorchVision ops module provides low-level operations and specialized neural network layers for computer vision tasks. It includes functions for bounding box operations, non-maximum suppression, region of interest operations, loss functions, and custom layers used in modern computer vision architectures.
3

4
## Capabilities
5

6
### Bounding Box Operations
7

8
Functions for manipulating and analyzing bounding boxes in various formats.
9

10
```python { .api }
11
def box_area(boxes: torch.Tensor) -> torch.Tensor:
12
    """
13
    Calculate area of bounding boxes.
14
    
15
    Args:
16
        boxes (torch.Tensor): Bounding boxes in format [x1, y1, x2, y2] of shape (..., 4)
17
    
18
    Returns:
19
        torch.Tensor: Areas of boxes with shape (...,)
20
    """
21

22
def box_convert(boxes: torch.Tensor, in_fmt: str, out_fmt: str) -> torch.Tensor:
23
    """
24
    Convert bounding boxes between different formats.
25
    
26
    Args:
27
        boxes (torch.Tensor): Bounding boxes tensor of shape (..., 4)
28
        in_fmt (str): Input format ('xyxy', 'xywh', 'cxcywh')
29
        out_fmt (str): Output format ('xyxy', 'xywh', 'cxcywh')
30
    
31
    Returns:
32
        torch.Tensor: Converted bounding boxes
33
    """
34

35
def box_iou(boxes1: torch.Tensor, boxes2: torch.Tensor) -> torch.Tensor:
36
    """
37
    Calculate Intersection over Union (IoU) between two sets of boxes.
38
    
39
    Args:
40
        boxes1 (torch.Tensor): Boxes of shape (N, 4) in format [x1, y1, x2, y2]
41
        boxes2 (torch.Tensor): Boxes of shape (M, 4) in format [x1, y1, x2, y2]
42
    
43
    Returns:
44
        torch.Tensor: IoU matrix of shape (N, M)
45
    """
46

47
def generalized_box_iou(boxes1: torch.Tensor, boxes2: torch.Tensor) -> torch.Tensor:
48
    """
49
    Calculate Generalized Intersection over Union (GIoU) between boxes.
50
    
51
    Args:
52
        boxes1 (torch.Tensor): Boxes of shape (N, 4)
53
        boxes2 (torch.Tensor): Boxes of shape (M, 4)
54
    
55
    Returns:
56
        torch.Tensor: GIoU matrix of shape (N, M)
57
    """
58

59
def distance_box_iou(boxes1: torch.Tensor, boxes2: torch.Tensor) -> torch.Tensor:
60
    """
61
    Calculate Distance Intersection over Union (DIoU) between boxes.
62
    
63
    Args:
64
        boxes1 (torch.Tensor): Boxes of shape (N, 4)
65
        boxes2 (torch.Tensor): Boxes of shape (M, 4)
66
    
67
    Returns:
68
        torch.Tensor: DIoU matrix of shape (N, M)
69
    """
70

71
def complete_box_iou(boxes1: torch.Tensor, boxes2: torch.Tensor) -> torch.Tensor:
72
    """
73
    Calculate Complete Intersection over Union (CIoU) between boxes.
74
    
75
    Args:
76
        boxes1 (torch.Tensor): Boxes of shape (N, 4)
77
        boxes2 (torch.Tensor): Boxes of shape (M, 4)
78
    
79
    Returns:
80
        torch.Tensor: CIoU matrix of shape (N, M)
81
    """
82

83
def clip_boxes_to_image(boxes: torch.Tensor, size: tuple) -> torch.Tensor:
84
    """
85
    Clip bounding boxes to image boundaries.
86
    
87
    Args:
88
        boxes (torch.Tensor): Boxes of shape (..., 4) in format [x1, y1, x2, y2]
89
        size (tuple): Image size as (height, width)
90
    
91
    Returns:
92
        torch.Tensor: Clipped boxes
93
    """
94

95
def remove_small_boxes(boxes: torch.Tensor, min_size: float) -> torch.Tensor:
96
    """
97
    Remove bounding boxes smaller than minimum size.
98
    
99
    Args:
100
        boxes (torch.Tensor): Boxes of shape (N, 4)
101
        min_size (float): Minimum box size threshold
102
    
103
    Returns:
104
        torch.Tensor: Indices of boxes to keep
105
    """
106

107
def masks_to_boxes(masks: torch.Tensor) -> torch.Tensor:
108
    """
109
    Convert binary masks to bounding boxes.
110
    
111
    Args:
112
        masks (torch.Tensor): Binary masks of shape (N, H, W)
113
    
114
    Returns:
115
        torch.Tensor: Bounding boxes of shape (N, 4) in format [x1, y1, x2, y2]
116
    """
117
```
118

119
### Non-Maximum Suppression
120

121
Functions for removing duplicate detections based on overlap criteria.
122

123
```python { .api }
124
def nms(boxes: torch.Tensor, scores: torch.Tensor, iou_threshold: float) -> torch.Tensor:
125
    """
126
    Non-maximum suppression for object detection.
127
    
128
    Args:
129
        boxes (torch.Tensor): Bounding boxes of shape (N, 4) in format [x1, y1, x2, y2]
130
        scores (torch.Tensor): Scores for each box of shape (N,)
131
        iou_threshold (float): IoU threshold for suppression
132
    
133
    Returns:
134
        torch.Tensor: Indices of boxes to keep
135
    """
136

137
def batched_nms(boxes: torch.Tensor, scores: torch.Tensor, idxs: torch.Tensor, iou_threshold: float) -> torch.Tensor:
138
    """
139
    Batched non-maximum suppression for multiple classes.
140
    
141
    Args:
142
        boxes (torch.Tensor): Bounding boxes of shape (N, 4)
143
        scores (torch.Tensor): Scores for each box of shape (N,)
144
        idxs (torch.Tensor): Class indices for each box of shape (N,)
145
        iou_threshold (float): IoU threshold for suppression
146
    
147
    Returns:
148
        torch.Tensor: Indices of boxes to keep
149
    """
150
```
151

152
### Loss Functions
153

154
Specialized loss functions for computer vision tasks.
155

156
```python { .api }
157
def sigmoid_focal_loss(inputs: torch.Tensor, targets: torch.Tensor, alpha: float = -1, gamma: float = 2, reduction: str = 'none') -> torch.Tensor:
158
    """
159
    Focal loss for addressing class imbalance in object detection.
160
    
161
    Args:
162
        inputs (torch.Tensor): Predicted logits of shape (..., num_classes)
163
        targets (torch.Tensor): Ground truth labels of shape (..., num_classes)
164
        alpha (float): Weighting factor for rare class (default: -1 means no weighting)
165
        gamma (float): Focusing parameter to down-weight easy examples
166
        reduction (str): Reduction method ('none', 'mean', 'sum')
167
    
168
    Returns:
169
        torch.Tensor: Focal loss values
170
    """
171

172
def generalized_box_iou_loss(boxes1: torch.Tensor, boxes2: torch.Tensor, reduction: str = 'none') -> torch.Tensor:
173
    """
174
    Generalized IoU loss for bounding box regression.
175
    
176
    Args:
177
        boxes1 (torch.Tensor): Predicted boxes of shape (N, 4)
178
        boxes2 (torch.Tensor): Target boxes of shape (N, 4)
179
        reduction (str): Reduction method ('none', 'mean', 'sum')
180
    
181
    Returns:
182
        torch.Tensor: GIoU loss values
183
    """
184

185
def distance_box_iou_loss(boxes1: torch.Tensor, boxes2: torch.Tensor, reduction: str = 'none') -> torch.Tensor:
186
    """
187
    Distance IoU loss for bounding box regression.
188
    
189
    Args:
190
        boxes1 (torch.Tensor): Predicted boxes of shape (N, 4)
191
        boxes2 (torch.Tensor): Target boxes of shape (N, 4)
192
        reduction (str): Reduction method ('none', 'mean', 'sum')
193
    
194
    Returns:
195
        torch.Tensor: DIoU loss values
196
    """
197

198
def complete_box_iou_loss(boxes1: torch.Tensor, boxes2: torch.Tensor, reduction: str = 'none') -> torch.Tensor:
199
    """
200
    Complete IoU loss for bounding box regression.
201
    
202
    Args:
203
        boxes1 (torch.Tensor): Predicted boxes of shape (N, 4)
204
        boxes2 (torch.Tensor): Target boxes of shape (N, 4)
205
        reduction (str): Reduction method ('none', 'mean', 'sum')
206
    
207
    Returns:
208
        torch.Tensor: CIoU loss values
209
    """
210
```
211

212
### Region of Interest Operations
213

214
Operations for extracting features from regions of interest in feature maps.
215

216
```python { .api }
217
def roi_align(input: torch.Tensor, boxes: torch.Tensor, output_size: tuple, spatial_scale: float = 1.0, sampling_ratio: int = -1, aligned: bool = False) -> torch.Tensor:
218
    """
219
    RoI Align operation for extracting fixed-size features from variable-size regions.
220
    
221
    Args:
222
        input (torch.Tensor): Feature map of shape (N, C, H, W)
223
        boxes (torch.Tensor): RoIs of shape (K, 5) where each row is [batch_idx, x1, y1, x2, y2]
224
        output_size (tuple): Output size as (height, width)
225
        spatial_scale (float): Scale factor to map from input coordinates to box coordinates
226
        sampling_ratio (int): Number of sampling points (-1 for adaptive)
227
        aligned (bool): Whether to align corners
228
    
229
    Returns:
230
        torch.Tensor: Extracted features of shape (K, C, output_size[0], output_size[1])
231
    """
232

233
class RoIAlign(torch.nn.Module):
234
    """
235
    RoI Align layer for region-based networks.
236
    
237
    Args:
238
        output_size (tuple): Output size as (height, width)
239
        spatial_scale (float): Scale factor between input and RoI coordinates
240
        sampling_ratio (int): Number of sampling points per bin
241
        aligned (bool): Whether to align corners
242
    """
243
    
244
    def __init__(self, output_size: tuple, spatial_scale: float = 1.0, sampling_ratio: int = -1, aligned: bool = False): ...
245
    
246
    def forward(self, input: torch.Tensor, rois: torch.Tensor) -> torch.Tensor: ...
247

248
def roi_pool(input: torch.Tensor, boxes: torch.Tensor, output_size: tuple, spatial_scale: float = 1.0) -> torch.Tensor:
249
    """
250
    RoI Pooling operation (legacy, prefer RoI Align).
251
    
252
    Args:
253
        input (torch.Tensor): Feature map of shape (N, C, H, W)
254
        boxes (torch.Tensor): RoIs of shape (K, 5)
255
        output_size (tuple): Output size as (height, width)
256
        spatial_scale (float): Scale factor
257
    
258
    Returns:
259
        torch.Tensor: Pooled features
260
    """
261

262
class RoIPool(torch.nn.Module):
263
    """RoI Pooling layer."""
264
    
265
    def __init__(self, output_size: tuple, spatial_scale: float = 1.0): ...
266

267
def ps_roi_align(input: torch.Tensor, boxes: torch.Tensor, output_size: tuple, spatial_scale: float = 1.0, sampling_ratio: int = -1) -> torch.Tensor:
268
    """
269
    Position Sensitive RoI Align for position-sensitive score maps.
270
    
271
    Args:
272
        input (torch.Tensor): Position-sensitive feature map
273
        boxes (torch.Tensor): RoIs of shape (K, 5)
274
        output_size (tuple): Output size
275
        spatial_scale (float): Scale factor
276
        sampling_ratio (int): Number of sampling points
277
    
278
    Returns:
279
        torch.Tensor: Position-sensitive aligned features
280
    """
281

282
class PSRoIAlign(torch.nn.Module):
283
    """Position Sensitive RoI Align layer."""
284
    
285
    def __init__(self, output_size: tuple, spatial_scale: float = 1.0, sampling_ratio: int = -1): ...
286

287
def ps_roi_pool(input: torch.Tensor, boxes: torch.Tensor, output_size: tuple, spatial_scale: float = 1.0) -> torch.Tensor:
288
    """Position Sensitive RoI Pooling operation."""
289

290
class PSRoIPool(torch.nn.Module):
291
    """Position Sensitive RoI Pooling layer."""
292
    
293
    def __init__(self, output_size: tuple, spatial_scale: float = 1.0): ...
294

295
class MultiScaleRoIAlign(torch.nn.Module):
296
    """
297
    Multi-scale RoI Align for Feature Pyramid Networks.
298
    
299
    Args:
300
        featmap_names (list): Names of feature maps to use
301
        output_size (tuple): Output size for aligned features
302
        sampling_ratio (int): Number of sampling points
303
        canonical_scale (int): Canonical scale for level assignment
304
        canonical_level (int): Canonical level in pyramid
305
    """
306
    
307
    def __init__(self, featmap_names: list, output_size: tuple, sampling_ratio: int, canonical_scale: int = 224, canonical_level: int = 4): ...
308
    
309
    def forward(self, x: dict, boxes: list) -> torch.Tensor: ...
310
```
311

312
### Specialized Convolutions
313

314
Custom convolution operations for advanced architectures.
315

316
```python { .api }
317
def deform_conv2d(input: torch.Tensor, offset: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor = None, stride: tuple = (1, 1), padding: tuple = (0, 0), dilation: tuple = (1, 1), mask: torch.Tensor = None) -> torch.Tensor:
318
    """
319
    Deformable convolution operation.
320
    
321
    Args:
322
        input (torch.Tensor): Input feature map of shape (N, C_in, H_in, W_in)
323
        offset (torch.Tensor): Offset field of shape (N, 2*kernel_h*kernel_w, H_out, W_out)
324
        weight (torch.Tensor): Convolution weights of shape (C_out, C_in, kernel_h, kernel_w)
325
        bias (torch.Tensor, optional): Bias tensor of shape (C_out,)
326
        stride (tuple): Convolution stride
327
        padding (tuple): Convolution padding
328
        dilation (tuple): Convolution dilation
329
        mask (torch.Tensor, optional): Modulation mask
330
    
331
    Returns:
332
        torch.Tensor: Output feature map of shape (N, C_out, H_out, W_out)
333
    """
334

335
class DeformConv2d(torch.nn.Module):
336
    """
337
    Deformable Convolution layer.
338
    
339
    Args:
340
        in_channels (int): Number of input channels
341
        out_channels (int): Number of output channels
342
        kernel_size (int or tuple): Convolution kernel size
343
        stride (int or tuple): Convolution stride
344
        padding (int or tuple): Convolution padding
345
        dilation (int or tuple): Convolution dilation
346
        groups (int): Number of groups for grouped convolution
347
        bias (bool): Whether to use bias
348
    """
349
    
350
    def __init__(self, in_channels: int, out_channels: int, kernel_size: int, stride: int = 1, padding: int = 0, dilation: int = 1, groups: int = 1, bias: bool = True): ...
351
    
352
    def forward(self, input: torch.Tensor, offset: torch.Tensor, mask: torch.Tensor = None) -> torch.Tensor: ...
353
```
354

355
### Regularization Operations
356

357
Regularization techniques for improving model robustness.
358

359
```python { .api }
360
def stochastic_depth(input: torch.Tensor, p: float, mode: str, training: bool = True) -> torch.Tensor:
361
    """
362
    Stochastic depth regularization (Drop Path).
363
    
364
    Args:
365
        input (torch.Tensor): Input tensor
366
        p (float): Drop probability
367
        mode (str): Drop mode ('batch' or 'row')
368
        training (bool): Whether in training mode
369
    
370
    Returns:
371
        torch.Tensor: Output tensor with stochastic depth applied
372
    """
373

374
class StochasticDepth(torch.nn.Module):
375
    """
376
    Stochastic Depth (Drop Path) layer.
377
    
378
    Args:
379
        p (float): Drop probability
380
        mode (str): Drop mode ('batch' or 'row')
381
    """
382
    
383
    def __init__(self, p: float, mode: str): ...
384
    
385
    def forward(self, input: torch.Tensor) -> torch.Tensor: ...
386

387
def drop_block2d(input: torch.Tensor, p: float, block_size: int, inplace: bool = False, eps: float = 1e-6, training: bool = True) -> torch.Tensor:
388
    """
389
    DropBlock2D regularization for convolutional layers.
390
    
391
    Args:
392
        input (torch.Tensor): Input tensor of shape (N, C, H, W)
393
        p (float): Drop probability
394
        block_size (int): Size of blocks to drop
395
        inplace (bool): Whether to apply in-place
396
        eps (float): Small value to avoid division by zero
397
        training (bool): Whether in training mode
398
    
399
    Returns:
400
        torch.Tensor: Output tensor with DropBlock applied
401
    """
402

403
class DropBlock2d(torch.nn.Module):
404
    """
405
    DropBlock2D layer for spatial regularization.
406
    
407
    Args:
408
        p (float): Drop probability
409
        block_size (int): Size of blocks to drop
410
        eps (float): Small epsilon value
411
        inplace (bool): Whether to apply in-place
412
    """
413
    
414
    def __init__(self, p: float, block_size: int, eps: float = 1e-6, inplace: bool = False): ...
415

416
def drop_block3d(input: torch.Tensor, p: float, block_size: int, inplace: bool = False, eps: float = 1e-6, training: bool = True) -> torch.Tensor:
417
    """DropBlock3D for 3D tensors (e.g., video)."""
418

419
class DropBlock3d(torch.nn.Module):
420
    """DropBlock3D layer for 3D regularization."""
421
    
422
    def __init__(self, p: float, block_size: int, eps: float = 1e-6, inplace: bool = False): ...
423
```
424

425
### Feature Pyramid Network
426

427
Implementation of Feature Pyramid Network for multi-scale feature extraction.
428

429
```python { .api }
430
class FeaturePyramidNetwork(torch.nn.Module):
431
    """
432
    Feature Pyramid Network for multi-scale feature extraction.
433
    
434
    Args:
435
        in_channels_list (list): List of input channel numbers for each level
436
        out_channels (int): Number of output channels for all levels
437
        extra_blocks (nn.Module, optional): Extra blocks to append
438
        norm_layer (callable, optional): Normalization layer
439
    """
440
    
441
    def __init__(self, in_channels_list: list, out_channels: int, extra_blocks=None, norm_layer=None): ...
442
    
443
    def forward(self, x: dict) -> dict:
444
        """
445
        Forward pass through FPN.
446
        
447
        Args:
448
            x (dict): Dictionary of feature maps from different levels
449
        
450
        Returns:
451
            dict: Dictionary of FPN feature maps
452
        """
453
```
454

455
### Utility Layers
456

457
General-purpose layers commonly used in computer vision architectures.
458

459
```python { .api }
460
class FrozenBatchNorm2d(torch.nn.Module):
461
    """
462
    Frozen Batch Normalization layer (parameters not updated during training).
463
    
464
    Args:
465
        num_features (int): Number of features
466
        eps (float): Small value for numerical stability
467
    """
468
    
469
    def __init__(self, num_features: int, eps: float = 1e-5): ...
470

471
class Conv2dNormActivation(torch.nn.Sequential):
472
    """
473
    Convolution with normalization and activation in sequence.
474
    
475
    Args:
476
        in_planes (int): Input channels
477
        out_planes (int): Output channels
478
        kernel_size (int): Convolution kernel size
479
        stride (int): Convolution stride
480
        padding (int, optional): Convolution padding
481
        groups (int): Number of groups for grouped convolution
482
        norm_layer (callable, optional): Normalization layer
483
        activation_layer (callable, optional): Activation layer
484
        dilation (int): Convolution dilation
485
        inplace (bool, optional): Whether activations should be in-place
486
        bias (bool, optional): Whether to use bias in convolution
487
    """
488
    
489
    def __init__(self, in_planes: int, out_planes: int, kernel_size: int = 3, stride: int = 1, padding: int = None, groups: int = 1, norm_layer=None, activation_layer=None, dilation: int = 1, inplace: bool = None, bias: bool = None): ...
490

491
class Conv3dNormActivation(torch.nn.Sequential):
492
    """3D version of Conv2dNormActivation for video/3D data."""
493
    
494
    def __init__(self, in_planes: int, out_planes: int, kernel_size: int = 3, stride: int = 1, padding: int = None, groups: int = 1, norm_layer=None, activation_layer=None, dilation: int = 1, inplace: bool = None, bias: bool = None): ...
495

496
class SqueezeExcitation(torch.nn.Module):
497
    """
498
    Squeeze-and-Excitation block for channel attention.
499
    
500
    Args:
501
        input_channels (int): Number of input channels
502
        squeeze_channels (int): Number of channels after squeeze operation
503
        activation (callable, optional): Activation function for squeeze
504
        scale_activation (callable, optional): Activation function for scale
505
    """
506
    
507
    def __init__(self, input_channels: int, squeeze_channels: int, activation=None, scale_activation=None): ...
508
    
509
    def forward(self, input: torch.Tensor) -> torch.Tensor: ...
510

511
class MLP(torch.nn.Sequential):
512
    """
513
    Multi-layer perceptron with configurable layers.
514
    
515
    Args:
516
        in_channels (int): Input dimension
517
        hidden_channels (list): List of hidden layer dimensions
518
        norm_layer (callable, optional): Normalization layer
519
        activation_layer (callable, optional): Activation layer
520
        inplace (bool, optional): Whether activations should be in-place
521
        bias (bool): Whether to use bias
522
        dropout (float): Dropout probability
523
    """
524
    
525
    def __init__(self, in_channels: int, hidden_channels: list, norm_layer=None, activation_layer=None, inplace: bool = None, bias: bool = True, dropout: float = 0.0): ...
526

527
class Permute(torch.nn.Module):
528
    """
529
    Permute tensor dimensions.
530
    
531
    Args:
532
        dims (list): New order of dimensions
533
    """
534
    
535
    def __init__(self, dims: list): ...
536
    
537
    def forward(self, x: torch.Tensor) -> torch.Tensor: ...
538
```
539

540
## Usage Examples
541

542
### Bounding Box Operations
543

544
```python
545
import torch
546
import torchvision.ops as ops
547

548
# Create example bounding boxes (N=3 boxes in xyxy format)
549
boxes1 = torch.tensor([
550
    [10, 10, 50, 50],
551
    [30, 30, 70, 70], 
552
    [60, 10, 100, 50]
553
], dtype=torch.float)
554

555
boxes2 = torch.tensor([
556
    [15, 15, 55, 55],
557
    [25, 25, 65, 65]
558
], dtype=torch.float)
559

560
# Calculate IoU matrix
561
iou_matrix = ops.box_iou(boxes1, boxes2)
562
print(f"IoU matrix shape: {iou_matrix.shape}")  # (3, 2)
563
print(f"IoU values:\n{iou_matrix}")
564

565
# Calculate box areas
566
areas = ops.box_area(boxes1)
567
print(f"Box areas: {areas}")
568

569
# Convert box formats
570
boxes_xywh = ops.box_convert(boxes1, 'xyxy', 'xywh')
571
print(f"Boxes in xywh format: {boxes_xywh}")
572

573
# Clip boxes to image boundaries
574
image_size = (100, 120)  # (height, width)
575
clipped_boxes = ops.clip_boxes_to_image(boxes1, image_size)
576
print(f"Clipped boxes: {clipped_boxes}")
577
```
578

579
### Non-Maximum Suppression
580

581
```python
582
import torch
583
import torchvision.ops as ops
584

585
# Example detection results
586
boxes = torch.tensor([
587
    [10, 10, 50, 50],
588
    [12, 12, 52, 52],  # Overlapping with first box
589
    [60, 10, 100, 50],
590
    [15, 15, 45, 45],  # Overlapping with first box
591
    [80, 80, 120, 120]
592
], dtype=torch.float)
593

594
scores = torch.tensor([0.9, 0.8, 0.7, 0.85, 0.6])
595
class_ids = torch.tensor([0, 0, 1, 0, 1])
596

597
# Apply NMS
598
keep_indices = ops.nms(boxes, scores, iou_threshold=0.5)
599
print(f"Indices to keep after NMS: {keep_indices}")
600

601
# Apply batched NMS (per-class NMS)
602
keep_indices_batched = ops.batched_nms(boxes, scores, class_ids, iou_threshold=0.5)
603
print(f"Indices to keep after batched NMS: {keep_indices_batched}")
604

605
# Filter results
606
final_boxes = boxes[keep_indices_batched]
607
final_scores = scores[keep_indices_batched]
608
final_classes = class_ids[keep_indices_batched]
609

610
print(f"Final boxes: {final_boxes}")
611
print(f"Final scores: {final_scores}")
612
print(f"Final classes: {final_classes}")
613
```
614

615
### RoI Align Operation
616

617
```python
618
import torch
619
import torchvision.ops as ops
620

621
# Create feature map (batch_size=2, channels=64, height=32, width=32)
622
feature_map = torch.randn(2, 64, 32, 32)
623

624
# Define RoIs: [batch_idx, x1, y1, x2, y2]
625
rois = torch.tensor([
626
    [0, 5, 5, 15, 15],    # RoI in first image
627
    [0, 20, 10, 30, 25],  # Another RoI in first image
628
    [1, 8, 8, 18, 18],    # RoI in second image
629
], dtype=torch.float)
630

631
# Apply RoI Align
632
output_size = (7, 7)
633
spatial_scale = 1.0
634
aligned_features = ops.roi_align(
635
    feature_map, 
636
    rois, 
637
    output_size, 
638
    spatial_scale=spatial_scale,
639
    sampling_ratio=2
640
)
641

642
print(f"Aligned features shape: {aligned_features.shape}")  # (3, 64, 7, 7)
643

644
# Using RoI Align as a layer
645
roi_align_layer = ops.RoIAlign(output_size=(14, 14), spatial_scale=0.5, sampling_ratio=2)
646
aligned_features_layer = roi_align_layer(feature_map, rois)
647
print(f"Layer output shape: {aligned_features_layer.shape}")
648
```
649

650
### Feature Pyramid Network
651

652
```python
653
import torch
654
import torchvision.ops as ops
655

656
# Create FPN for ResNet-like backbone
657
in_channels_list = [256, 512, 1024, 2048]  # ResNet feature channels
658
out_channels = 256
659

660
fpn = ops.FeaturePyramidNetwork(in_channels_list, out_channels)
661

662
# Simulate backbone features
663
backbone_features = {
664
    '0': torch.randn(2, 256, 64, 64),   # Early layer
665
    '1': torch.randn(2, 512, 32, 32),   # Mid layer  
666
    '2': torch.randn(2, 1024, 16, 16),  # Late layer
667
    '3': torch.randn(2, 2048, 8, 8),    # Final layer
668
}
669

670
# Apply FPN
671
fpn_features = fpn(backbone_features)
672

673
print("FPN output shapes:")
674
for key, feature in fpn_features.items():
675
    print(f"Level {key}: {feature.shape}")
676
```
677

678
### Custom Detection Pipeline
679

680
```python
681
import torch
682
import torchvision.ops as ops
683

684
def post_process_detections(boxes, scores, class_logits, score_threshold=0.5, nms_threshold=0.5):
685
    """
686
    Post-process detection outputs with NMS and filtering.
687
    
688
    Args:
689
        boxes: Predicted boxes (N, 4)
690
        scores: Objectness scores (N,)  
691
        class_logits: Class predictions (N, num_classes)
692
        score_threshold: Minimum score threshold
693
        nms_threshold: NMS IoU threshold
694
    
695
    Returns:
696
        dict: Filtered detections
697
    """
698
    # Get class predictions
699
    class_probs = torch.softmax(class_logits, dim=1)
700
    class_ids = torch.argmax(class_probs, dim=1)
701
    class_scores = torch.max(class_probs, dim=1)[0]
702
    
703
    # Combine objectness and classification scores
704
    final_scores = scores * class_scores
705
    
706
    # Filter by score threshold
707
    keep_mask = final_scores >= score_threshold
708
    boxes = boxes[keep_mask]
709
    final_scores = final_scores[keep_mask]
710
    class_ids = class_ids[keep_mask]
711
    
712
    # Apply NMS per class
713
    keep_indices = ops.batched_nms(boxes, final_scores, class_ids, nms_threshold)
714
    
715
    return {
716
        'boxes': boxes[keep_indices],
717
        'scores': final_scores[keep_indices],
718
        'labels': class_ids[keep_indices]
719
    }
720

721
# Example usage
722
num_detections = 1000
723
num_classes = 80
724

725
boxes = torch.randn(num_detections, 4) * 100  # Random boxes
726
scores = torch.rand(num_detections)           # Random objectness scores
727
class_logits = torch.randn(num_detections, num_classes)  # Random class logits
728

729
# Post-process detections
730
results = post_process_detections(boxes, scores, class_logits)
731
print(f"Final detections: {len(results['boxes'])}")
732
print(f"Score range: {results['scores'].min():.3f} - {results['scores'].max():.3f}")
733
```
734

735
### Loss Functions for Training
736

737
```python
738
import torch
739
import torchvision.ops as ops
740

741
# Focal Loss for object classification
742
def train_step_focal_loss():
743
    # Simulated predictions and targets
744
    batch_size, num_classes = 32, 80
745
    predictions = torch.randn(batch_size, num_classes)
746
    targets = torch.zeros(batch_size, num_classes)
747
    
748
    # Create some positive examples
749
    targets[torch.arange(batch_size), torch.randint(0, num_classes, (batch_size,))] = 1
750
    
751
    # Calculate focal loss
752
    focal_loss = ops.sigmoid_focal_loss(
753
        predictions, 
754
        targets, 
755
        alpha=0.25, 
756
        gamma=2.0, 
757
        reduction='mean'
758
    )
759
    
760
    print(f"Focal loss: {focal_loss.item():.4f}")
761
    return focal_loss
762

763
# Box regression losses
764
def train_step_box_loss():
765
    batch_size = 64
766
    pred_boxes = torch.randn(batch_size, 4) * 100
767
    target_boxes = torch.randn(batch_size, 4) * 100
768
    
769
    # Different IoU-based losses
770
    giou_loss = ops.generalized_box_iou_loss(pred_boxes, target_boxes, reduction='mean')
771
    diou_loss = ops.distance_box_iou_loss(pred_boxes, target_boxes, reduction='mean')
772
    ciou_loss = ops.complete_box_iou_loss(pred_boxes, target_boxes, reduction='mean')
773
    
774
    print(f"GIoU loss: {giou_loss.item():.4f}")
775
    print(f"DIoU loss: {diou_loss.item():.4f}")  
776
    print(f"CIoU loss: {ciou_loss.item():.4f}")
777
    
778
    return giou_loss + diou_loss + ciou_loss
779

780
# Run example training steps
781
focal_loss = train_step_focal_loss()
782
box_loss = train_step_box_loss()
783
total_loss = focal_loss + box_loss
784
print(f"Total loss: {total_loss.item():.4f}")
785
```
786

787
### Regularization Techniques
788

789
```python
790
import torch
791
import torch.nn as nn
792
import torchvision.ops as ops
793

794
class ResidualBlock(nn.Module):
795
    """Example residual block with stochastic depth."""
796
    
797
    def __init__(self, channels, drop_prob=0.1):
798
        super().__init__()
799
        self.conv1 = nn.Conv2d(channels, channels, 3, padding=1)
800
        self.conv2 = nn.Conv2d(channels, channels, 3, padding=1)
801
        self.relu = nn.ReLU()
802
        self.stochastic_depth = ops.StochasticDepth(drop_prob, mode='row')
803
        
804
    def forward(self, x):
805
        identity = x
806
        out = self.relu(self.conv1(x))
807
        out = self.conv2(out)
808
        
809
        # Apply stochastic depth to residual connection
810
        out = self.stochastic_depth(out)
811
        out += identity
812
        return self.relu(out)
813

814
# Example with DropBlock for convolutional regularization
815
class ConvBlockWithDropBlock(nn.Module):
816
    """Convolutional block with DropBlock regularization."""
817
    
818
    def __init__(self, in_channels, out_channels, drop_prob=0.1, block_size=7):
819
        super().__init__()
820
        self.conv = nn.Conv2d(in_channels, out_channels, 3, padding=1)
821
        self.bn = nn.BatchNorm2d(out_channels)
822
        self.relu = nn.ReLU()
823
        self.dropblock = ops.DropBlock2d(drop_prob, block_size)
824
        
825
    def forward(self, x):
826
        x = self.conv(x)
827
        x = self.bn(x)
828
        x = self.relu(x)
829
        x = self.dropblock(x)
830
        return x
831

832
# Test regularization
833
batch_size, channels, height, width = 4, 64, 32, 32
834
input_tensor = torch.randn(batch_size, channels, height, width)
835

836
# Test stochastic depth block
837
residual_block = ResidualBlock(channels, drop_prob=0.2)
838
output = residual_block(input_tensor)
839
print(f"Residual block output shape: {output.shape}")
840

841
# Test DropBlock
842
dropblock_conv = ConvBlockWithDropBlock(channels, channels, drop_prob=0.1, block_size=5)
843
output = dropblock_conv(input_tensor)
844
print(f"DropBlock conv output shape: {output.shape}")
845
```

Version

Tile

Files

ops.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

ops.mddocs/