Computer vision library for PyTorch with datasets, model architectures, and image/video transforms.
TorchVision ops module provides low-level operations and specialized neural network layers for computer vision tasks. It includes functions for bounding box operations, non-maximum suppression, region of interest operations, loss functions, and custom layers used in modern computer vision architectures.
Functions for manipulating and analyzing bounding boxes in various formats.
def box_area(boxes: torch.Tensor) -> torch.Tensor:
"""
Calculate area of bounding boxes.
Args:
boxes (torch.Tensor): Bounding boxes in format [x1, y1, x2, y2] of shape (..., 4)
Returns:
torch.Tensor: Areas of boxes with shape (...,)
"""
def box_convert(boxes: torch.Tensor, in_fmt: str, out_fmt: str) -> torch.Tensor:
"""
Convert bounding boxes between different formats.
Args:
boxes (torch.Tensor): Bounding boxes tensor of shape (..., 4)
in_fmt (str): Input format ('xyxy', 'xywh', 'cxcywh')
out_fmt (str): Output format ('xyxy', 'xywh', 'cxcywh')
Returns:
torch.Tensor: Converted bounding boxes
"""
def box_iou(boxes1: torch.Tensor, boxes2: torch.Tensor) -> torch.Tensor:
"""
Calculate Intersection over Union (IoU) between two sets of boxes.
Args:
boxes1 (torch.Tensor): Boxes of shape (N, 4) in format [x1, y1, x2, y2]
boxes2 (torch.Tensor): Boxes of shape (M, 4) in format [x1, y1, x2, y2]
Returns:
torch.Tensor: IoU matrix of shape (N, M)
"""
def generalized_box_iou(boxes1: torch.Tensor, boxes2: torch.Tensor) -> torch.Tensor:
"""
Calculate Generalized Intersection over Union (GIoU) between boxes.
Args:
boxes1 (torch.Tensor): Boxes of shape (N, 4)
boxes2 (torch.Tensor): Boxes of shape (M, 4)
Returns:
torch.Tensor: GIoU matrix of shape (N, M)
"""
def distance_box_iou(boxes1: torch.Tensor, boxes2: torch.Tensor) -> torch.Tensor:
"""
Calculate Distance Intersection over Union (DIoU) between boxes.
Args:
boxes1 (torch.Tensor): Boxes of shape (N, 4)
boxes2 (torch.Tensor): Boxes of shape (M, 4)
Returns:
torch.Tensor: DIoU matrix of shape (N, M)
"""
def complete_box_iou(boxes1: torch.Tensor, boxes2: torch.Tensor) -> torch.Tensor:
"""
Calculate Complete Intersection over Union (CIoU) between boxes.
Args:
boxes1 (torch.Tensor): Boxes of shape (N, 4)
boxes2 (torch.Tensor): Boxes of shape (M, 4)
Returns:
torch.Tensor: CIoU matrix of shape (N, M)
"""
def clip_boxes_to_image(boxes: torch.Tensor, size: tuple) -> torch.Tensor:
"""
Clip bounding boxes to image boundaries.
Args:
boxes (torch.Tensor): Boxes of shape (..., 4) in format [x1, y1, x2, y2]
size (tuple): Image size as (height, width)
Returns:
torch.Tensor: Clipped boxes
"""
def remove_small_boxes(boxes: torch.Tensor, min_size: float) -> torch.Tensor:
"""
Remove bounding boxes smaller than minimum size.
Args:
boxes (torch.Tensor): Boxes of shape (N, 4)
min_size (float): Minimum box size threshold
Returns:
torch.Tensor: Indices of boxes to keep
"""
def masks_to_boxes(masks: torch.Tensor) -> torch.Tensor:
"""
Convert binary masks to bounding boxes.
Args:
masks (torch.Tensor): Binary masks of shape (N, H, W)
Returns:
torch.Tensor: Bounding boxes of shape (N, 4) in format [x1, y1, x2, y2]
"""Functions for removing duplicate detections based on overlap criteria.
def nms(boxes: torch.Tensor, scores: torch.Tensor, iou_threshold: float) -> torch.Tensor:
"""
Non-maximum suppression for object detection.
Args:
boxes (torch.Tensor): Bounding boxes of shape (N, 4) in format [x1, y1, x2, y2]
scores (torch.Tensor): Scores for each box of shape (N,)
iou_threshold (float): IoU threshold for suppression
Returns:
torch.Tensor: Indices of boxes to keep
"""
def batched_nms(boxes: torch.Tensor, scores: torch.Tensor, idxs: torch.Tensor, iou_threshold: float) -> torch.Tensor:
"""
Batched non-maximum suppression for multiple classes.
Args:
boxes (torch.Tensor): Bounding boxes of shape (N, 4)
scores (torch.Tensor): Scores for each box of shape (N,)
idxs (torch.Tensor): Class indices for each box of shape (N,)
iou_threshold (float): IoU threshold for suppression
Returns:
torch.Tensor: Indices of boxes to keep
"""Specialized loss functions for computer vision tasks.
def sigmoid_focal_loss(inputs: torch.Tensor, targets: torch.Tensor, alpha: float = -1, gamma: float = 2, reduction: str = 'none') -> torch.Tensor:
"""
Focal loss for addressing class imbalance in object detection.
Args:
inputs (torch.Tensor): Predicted logits of shape (..., num_classes)
targets (torch.Tensor): Ground truth labels of shape (..., num_classes)
alpha (float): Weighting factor for rare class (default: -1 means no weighting)
gamma (float): Focusing parameter to down-weight easy examples
reduction (str): Reduction method ('none', 'mean', 'sum')
Returns:
torch.Tensor: Focal loss values
"""
def generalized_box_iou_loss(boxes1: torch.Tensor, boxes2: torch.Tensor, reduction: str = 'none') -> torch.Tensor:
"""
Generalized IoU loss for bounding box regression.
Args:
boxes1 (torch.Tensor): Predicted boxes of shape (N, 4)
boxes2 (torch.Tensor): Target boxes of shape (N, 4)
reduction (str): Reduction method ('none', 'mean', 'sum')
Returns:
torch.Tensor: GIoU loss values
"""
def distance_box_iou_loss(boxes1: torch.Tensor, boxes2: torch.Tensor, reduction: str = 'none') -> torch.Tensor:
"""
Distance IoU loss for bounding box regression.
Args:
boxes1 (torch.Tensor): Predicted boxes of shape (N, 4)
boxes2 (torch.Tensor): Target boxes of shape (N, 4)
reduction (str): Reduction method ('none', 'mean', 'sum')
Returns:
torch.Tensor: DIoU loss values
"""
def complete_box_iou_loss(boxes1: torch.Tensor, boxes2: torch.Tensor, reduction: str = 'none') -> torch.Tensor:
"""
Complete IoU loss for bounding box regression.
Args:
boxes1 (torch.Tensor): Predicted boxes of shape (N, 4)
boxes2 (torch.Tensor): Target boxes of shape (N, 4)
reduction (str): Reduction method ('none', 'mean', 'sum')
Returns:
torch.Tensor: CIoU loss values
"""Operations for extracting features from regions of interest in feature maps.
def roi_align(input: torch.Tensor, boxes: torch.Tensor, output_size: tuple, spatial_scale: float = 1.0, sampling_ratio: int = -1, aligned: bool = False) -> torch.Tensor:
"""
RoI Align operation for extracting fixed-size features from variable-size regions.
Args:
input (torch.Tensor): Feature map of shape (N, C, H, W)
boxes (torch.Tensor): RoIs of shape (K, 5) where each row is [batch_idx, x1, y1, x2, y2]
output_size (tuple): Output size as (height, width)
spatial_scale (float): Scale factor to map from input coordinates to box coordinates
sampling_ratio (int): Number of sampling points (-1 for adaptive)
aligned (bool): Whether to align corners
Returns:
torch.Tensor: Extracted features of shape (K, C, output_size[0], output_size[1])
"""
class RoIAlign(torch.nn.Module):
"""
RoI Align layer for region-based networks.
Args:
output_size (tuple): Output size as (height, width)
spatial_scale (float): Scale factor between input and RoI coordinates
sampling_ratio (int): Number of sampling points per bin
aligned (bool): Whether to align corners
"""
def __init__(self, output_size: tuple, spatial_scale: float = 1.0, sampling_ratio: int = -1, aligned: bool = False): ...
def forward(self, input: torch.Tensor, rois: torch.Tensor) -> torch.Tensor: ...
def roi_pool(input: torch.Tensor, boxes: torch.Tensor, output_size: tuple, spatial_scale: float = 1.0) -> torch.Tensor:
"""
RoI Pooling operation (legacy, prefer RoI Align).
Args:
input (torch.Tensor): Feature map of shape (N, C, H, W)
boxes (torch.Tensor): RoIs of shape (K, 5)
output_size (tuple): Output size as (height, width)
spatial_scale (float): Scale factor
Returns:
torch.Tensor: Pooled features
"""
class RoIPool(torch.nn.Module):
"""RoI Pooling layer."""
def __init__(self, output_size: tuple, spatial_scale: float = 1.0): ...
def ps_roi_align(input: torch.Tensor, boxes: torch.Tensor, output_size: tuple, spatial_scale: float = 1.0, sampling_ratio: int = -1) -> torch.Tensor:
"""
Position Sensitive RoI Align for position-sensitive score maps.
Args:
input (torch.Tensor): Position-sensitive feature map
boxes (torch.Tensor): RoIs of shape (K, 5)
output_size (tuple): Output size
spatial_scale (float): Scale factor
sampling_ratio (int): Number of sampling points
Returns:
torch.Tensor: Position-sensitive aligned features
"""
class PSRoIAlign(torch.nn.Module):
"""Position Sensitive RoI Align layer."""
def __init__(self, output_size: tuple, spatial_scale: float = 1.0, sampling_ratio: int = -1): ...
def ps_roi_pool(input: torch.Tensor, boxes: torch.Tensor, output_size: tuple, spatial_scale: float = 1.0) -> torch.Tensor:
"""Position Sensitive RoI Pooling operation."""
class PSRoIPool(torch.nn.Module):
"""Position Sensitive RoI Pooling layer."""
def __init__(self, output_size: tuple, spatial_scale: float = 1.0): ...
class MultiScaleRoIAlign(torch.nn.Module):
"""
Multi-scale RoI Align for Feature Pyramid Networks.
Args:
featmap_names (list): Names of feature maps to use
output_size (tuple): Output size for aligned features
sampling_ratio (int): Number of sampling points
canonical_scale (int): Canonical scale for level assignment
canonical_level (int): Canonical level in pyramid
"""
def __init__(self, featmap_names: list, output_size: tuple, sampling_ratio: int, canonical_scale: int = 224, canonical_level: int = 4): ...
def forward(self, x: dict, boxes: list) -> torch.Tensor: ...Custom convolution operations for advanced architectures.
def deform_conv2d(input: torch.Tensor, offset: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor = None, stride: tuple = (1, 1), padding: tuple = (0, 0), dilation: tuple = (1, 1), mask: torch.Tensor = None) -> torch.Tensor:
"""
Deformable convolution operation.
Args:
input (torch.Tensor): Input feature map of shape (N, C_in, H_in, W_in)
offset (torch.Tensor): Offset field of shape (N, 2*kernel_h*kernel_w, H_out, W_out)
weight (torch.Tensor): Convolution weights of shape (C_out, C_in, kernel_h, kernel_w)
bias (torch.Tensor, optional): Bias tensor of shape (C_out,)
stride (tuple): Convolution stride
padding (tuple): Convolution padding
dilation (tuple): Convolution dilation
mask (torch.Tensor, optional): Modulation mask
Returns:
torch.Tensor: Output feature map of shape (N, C_out, H_out, W_out)
"""
class DeformConv2d(torch.nn.Module):
"""
Deformable Convolution layer.
Args:
in_channels (int): Number of input channels
out_channels (int): Number of output channels
kernel_size (int or tuple): Convolution kernel size
stride (int or tuple): Convolution stride
padding (int or tuple): Convolution padding
dilation (int or tuple): Convolution dilation
groups (int): Number of groups for grouped convolution
bias (bool): Whether to use bias
"""
def __init__(self, in_channels: int, out_channels: int, kernel_size: int, stride: int = 1, padding: int = 0, dilation: int = 1, groups: int = 1, bias: bool = True): ...
def forward(self, input: torch.Tensor, offset: torch.Tensor, mask: torch.Tensor = None) -> torch.Tensor: ...Regularization techniques for improving model robustness.
def stochastic_depth(input: torch.Tensor, p: float, mode: str, training: bool = True) -> torch.Tensor:
"""
Stochastic depth regularization (Drop Path).
Args:
input (torch.Tensor): Input tensor
p (float): Drop probability
mode (str): Drop mode ('batch' or 'row')
training (bool): Whether in training mode
Returns:
torch.Tensor: Output tensor with stochastic depth applied
"""
class StochasticDepth(torch.nn.Module):
"""
Stochastic Depth (Drop Path) layer.
Args:
p (float): Drop probability
mode (str): Drop mode ('batch' or 'row')
"""
def __init__(self, p: float, mode: str): ...
def forward(self, input: torch.Tensor) -> torch.Tensor: ...
def drop_block2d(input: torch.Tensor, p: float, block_size: int, inplace: bool = False, eps: float = 1e-6, training: bool = True) -> torch.Tensor:
"""
DropBlock2D regularization for convolutional layers.
Args:
input (torch.Tensor): Input tensor of shape (N, C, H, W)
p (float): Drop probability
block_size (int): Size of blocks to drop
inplace (bool): Whether to apply in-place
eps (float): Small value to avoid division by zero
training (bool): Whether in training mode
Returns:
torch.Tensor: Output tensor with DropBlock applied
"""
class DropBlock2d(torch.nn.Module):
"""
DropBlock2D layer for spatial regularization.
Args:
p (float): Drop probability
block_size (int): Size of blocks to drop
eps (float): Small epsilon value
inplace (bool): Whether to apply in-place
"""
def __init__(self, p: float, block_size: int, eps: float = 1e-6, inplace: bool = False): ...
def drop_block3d(input: torch.Tensor, p: float, block_size: int, inplace: bool = False, eps: float = 1e-6, training: bool = True) -> torch.Tensor:
"""DropBlock3D for 3D tensors (e.g., video)."""
class DropBlock3d(torch.nn.Module):
"""DropBlock3D layer for 3D regularization."""
def __init__(self, p: float, block_size: int, eps: float = 1e-6, inplace: bool = False): ...Implementation of Feature Pyramid Network for multi-scale feature extraction.
class FeaturePyramidNetwork(torch.nn.Module):
"""
Feature Pyramid Network for multi-scale feature extraction.
Args:
in_channels_list (list): List of input channel numbers for each level
out_channels (int): Number of output channels for all levels
extra_blocks (nn.Module, optional): Extra blocks to append
norm_layer (callable, optional): Normalization layer
"""
def __init__(self, in_channels_list: list, out_channels: int, extra_blocks=None, norm_layer=None): ...
def forward(self, x: dict) -> dict:
"""
Forward pass through FPN.
Args:
x (dict): Dictionary of feature maps from different levels
Returns:
dict: Dictionary of FPN feature maps
"""General-purpose layers commonly used in computer vision architectures.
class FrozenBatchNorm2d(torch.nn.Module):
"""
Frozen Batch Normalization layer (parameters not updated during training).
Args:
num_features (int): Number of features
eps (float): Small value for numerical stability
"""
def __init__(self, num_features: int, eps: float = 1e-5): ...
class Conv2dNormActivation(torch.nn.Sequential):
"""
Convolution with normalization and activation in sequence.
Args:
in_planes (int): Input channels
out_planes (int): Output channels
kernel_size (int): Convolution kernel size
stride (int): Convolution stride
padding (int, optional): Convolution padding
groups (int): Number of groups for grouped convolution
norm_layer (callable, optional): Normalization layer
activation_layer (callable, optional): Activation layer
dilation (int): Convolution dilation
inplace (bool, optional): Whether activations should be in-place
bias (bool, optional): Whether to use bias in convolution
"""
def __init__(self, in_planes: int, out_planes: int, kernel_size: int = 3, stride: int = 1, padding: int = None, groups: int = 1, norm_layer=None, activation_layer=None, dilation: int = 1, inplace: bool = None, bias: bool = None): ...
class Conv3dNormActivation(torch.nn.Sequential):
"""3D version of Conv2dNormActivation for video/3D data."""
def __init__(self, in_planes: int, out_planes: int, kernel_size: int = 3, stride: int = 1, padding: int = None, groups: int = 1, norm_layer=None, activation_layer=None, dilation: int = 1, inplace: bool = None, bias: bool = None): ...
class SqueezeExcitation(torch.nn.Module):
"""
Squeeze-and-Excitation block for channel attention.
Args:
input_channels (int): Number of input channels
squeeze_channels (int): Number of channels after squeeze operation
activation (callable, optional): Activation function for squeeze
scale_activation (callable, optional): Activation function for scale
"""
def __init__(self, input_channels: int, squeeze_channels: int, activation=None, scale_activation=None): ...
def forward(self, input: torch.Tensor) -> torch.Tensor: ...
class MLP(torch.nn.Sequential):
"""
Multi-layer perceptron with configurable layers.
Args:
in_channels (int): Input dimension
hidden_channels (list): List of hidden layer dimensions
norm_layer (callable, optional): Normalization layer
activation_layer (callable, optional): Activation layer
inplace (bool, optional): Whether activations should be in-place
bias (bool): Whether to use bias
dropout (float): Dropout probability
"""
def __init__(self, in_channels: int, hidden_channels: list, norm_layer=None, activation_layer=None, inplace: bool = None, bias: bool = True, dropout: float = 0.0): ...
class Permute(torch.nn.Module):
"""
Permute tensor dimensions.
Args:
dims (list): New order of dimensions
"""
def __init__(self, dims: list): ...
def forward(self, x: torch.Tensor) -> torch.Tensor: ...import torch
import torchvision.ops as ops
# Create example bounding boxes (N=3 boxes in xyxy format)
boxes1 = torch.tensor([
[10, 10, 50, 50],
[30, 30, 70, 70],
[60, 10, 100, 50]
], dtype=torch.float)
boxes2 = torch.tensor([
[15, 15, 55, 55],
[25, 25, 65, 65]
], dtype=torch.float)
# Calculate IoU matrix
iou_matrix = ops.box_iou(boxes1, boxes2)
print(f"IoU matrix shape: {iou_matrix.shape}") # (3, 2)
print(f"IoU values:\n{iou_matrix}")
# Calculate box areas
areas = ops.box_area(boxes1)
print(f"Box areas: {areas}")
# Convert box formats
boxes_xywh = ops.box_convert(boxes1, 'xyxy', 'xywh')
print(f"Boxes in xywh format: {boxes_xywh}")
# Clip boxes to image boundaries
image_size = (100, 120) # (height, width)
clipped_boxes = ops.clip_boxes_to_image(boxes1, image_size)
print(f"Clipped boxes: {clipped_boxes}")import torch
import torchvision.ops as ops
# Example detection results
boxes = torch.tensor([
[10, 10, 50, 50],
[12, 12, 52, 52], # Overlapping with first box
[60, 10, 100, 50],
[15, 15, 45, 45], # Overlapping with first box
[80, 80, 120, 120]
], dtype=torch.float)
scores = torch.tensor([0.9, 0.8, 0.7, 0.85, 0.6])
class_ids = torch.tensor([0, 0, 1, 0, 1])
# Apply NMS
keep_indices = ops.nms(boxes, scores, iou_threshold=0.5)
print(f"Indices to keep after NMS: {keep_indices}")
# Apply batched NMS (per-class NMS)
keep_indices_batched = ops.batched_nms(boxes, scores, class_ids, iou_threshold=0.5)
print(f"Indices to keep after batched NMS: {keep_indices_batched}")
# Filter results
final_boxes = boxes[keep_indices_batched]
final_scores = scores[keep_indices_batched]
final_classes = class_ids[keep_indices_batched]
print(f"Final boxes: {final_boxes}")
print(f"Final scores: {final_scores}")
print(f"Final classes: {final_classes}")import torch
import torchvision.ops as ops
# Create feature map (batch_size=2, channels=64, height=32, width=32)
feature_map = torch.randn(2, 64, 32, 32)
# Define RoIs: [batch_idx, x1, y1, x2, y2]
rois = torch.tensor([
[0, 5, 5, 15, 15], # RoI in first image
[0, 20, 10, 30, 25], # Another RoI in first image
[1, 8, 8, 18, 18], # RoI in second image
], dtype=torch.float)
# Apply RoI Align
output_size = (7, 7)
spatial_scale = 1.0
aligned_features = ops.roi_align(
feature_map,
rois,
output_size,
spatial_scale=spatial_scale,
sampling_ratio=2
)
print(f"Aligned features shape: {aligned_features.shape}") # (3, 64, 7, 7)
# Using RoI Align as a layer
roi_align_layer = ops.RoIAlign(output_size=(14, 14), spatial_scale=0.5, sampling_ratio=2)
aligned_features_layer = roi_align_layer(feature_map, rois)
print(f"Layer output shape: {aligned_features_layer.shape}")import torch
import torchvision.ops as ops
# Create FPN for ResNet-like backbone
in_channels_list = [256, 512, 1024, 2048] # ResNet feature channels
out_channels = 256
fpn = ops.FeaturePyramidNetwork(in_channels_list, out_channels)
# Simulate backbone features
backbone_features = {
'0': torch.randn(2, 256, 64, 64), # Early layer
'1': torch.randn(2, 512, 32, 32), # Mid layer
'2': torch.randn(2, 1024, 16, 16), # Late layer
'3': torch.randn(2, 2048, 8, 8), # Final layer
}
# Apply FPN
fpn_features = fpn(backbone_features)
print("FPN output shapes:")
for key, feature in fpn_features.items():
print(f"Level {key}: {feature.shape}")import torch
import torchvision.ops as ops
def post_process_detections(boxes, scores, class_logits, score_threshold=0.5, nms_threshold=0.5):
"""
Post-process detection outputs with NMS and filtering.
Args:
boxes: Predicted boxes (N, 4)
scores: Objectness scores (N,)
class_logits: Class predictions (N, num_classes)
score_threshold: Minimum score threshold
nms_threshold: NMS IoU threshold
Returns:
dict: Filtered detections
"""
# Get class predictions
class_probs = torch.softmax(class_logits, dim=1)
class_ids = torch.argmax(class_probs, dim=1)
class_scores = torch.max(class_probs, dim=1)[0]
# Combine objectness and classification scores
final_scores = scores * class_scores
# Filter by score threshold
keep_mask = final_scores >= score_threshold
boxes = boxes[keep_mask]
final_scores = final_scores[keep_mask]
class_ids = class_ids[keep_mask]
# Apply NMS per class
keep_indices = ops.batched_nms(boxes, final_scores, class_ids, nms_threshold)
return {
'boxes': boxes[keep_indices],
'scores': final_scores[keep_indices],
'labels': class_ids[keep_indices]
}
# Example usage
num_detections = 1000
num_classes = 80
boxes = torch.randn(num_detections, 4) * 100 # Random boxes
scores = torch.rand(num_detections) # Random objectness scores
class_logits = torch.randn(num_detections, num_classes) # Random class logits
# Post-process detections
results = post_process_detections(boxes, scores, class_logits)
print(f"Final detections: {len(results['boxes'])}")
print(f"Score range: {results['scores'].min():.3f} - {results['scores'].max():.3f}")import torch
import torchvision.ops as ops
# Focal Loss for object classification
def train_step_focal_loss():
# Simulated predictions and targets
batch_size, num_classes = 32, 80
predictions = torch.randn(batch_size, num_classes)
targets = torch.zeros(batch_size, num_classes)
# Create some positive examples
targets[torch.arange(batch_size), torch.randint(0, num_classes, (batch_size,))] = 1
# Calculate focal loss
focal_loss = ops.sigmoid_focal_loss(
predictions,
targets,
alpha=0.25,
gamma=2.0,
reduction='mean'
)
print(f"Focal loss: {focal_loss.item():.4f}")
return focal_loss
# Box regression losses
def train_step_box_loss():
batch_size = 64
pred_boxes = torch.randn(batch_size, 4) * 100
target_boxes = torch.randn(batch_size, 4) * 100
# Different IoU-based losses
giou_loss = ops.generalized_box_iou_loss(pred_boxes, target_boxes, reduction='mean')
diou_loss = ops.distance_box_iou_loss(pred_boxes, target_boxes, reduction='mean')
ciou_loss = ops.complete_box_iou_loss(pred_boxes, target_boxes, reduction='mean')
print(f"GIoU loss: {giou_loss.item():.4f}")
print(f"DIoU loss: {diou_loss.item():.4f}")
print(f"CIoU loss: {ciou_loss.item():.4f}")
return giou_loss + diou_loss + ciou_loss
# Run example training steps
focal_loss = train_step_focal_loss()
box_loss = train_step_box_loss()
total_loss = focal_loss + box_loss
print(f"Total loss: {total_loss.item():.4f}")import torch
import torch.nn as nn
import torchvision.ops as ops
class ResidualBlock(nn.Module):
"""Example residual block with stochastic depth."""
def __init__(self, channels, drop_prob=0.1):
super().__init__()
self.conv1 = nn.Conv2d(channels, channels, 3, padding=1)
self.conv2 = nn.Conv2d(channels, channels, 3, padding=1)
self.relu = nn.ReLU()
self.stochastic_depth = ops.StochasticDepth(drop_prob, mode='row')
def forward(self, x):
identity = x
out = self.relu(self.conv1(x))
out = self.conv2(out)
# Apply stochastic depth to residual connection
out = self.stochastic_depth(out)
out += identity
return self.relu(out)
# Example with DropBlock for convolutional regularization
class ConvBlockWithDropBlock(nn.Module):
"""Convolutional block with DropBlock regularization."""
def __init__(self, in_channels, out_channels, drop_prob=0.1, block_size=7):
super().__init__()
self.conv = nn.Conv2d(in_channels, out_channels, 3, padding=1)
self.bn = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU()
self.dropblock = ops.DropBlock2d(drop_prob, block_size)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
x = self.dropblock(x)
return x
# Test regularization
batch_size, channels, height, width = 4, 64, 32, 32
input_tensor = torch.randn(batch_size, channels, height, width)
# Test stochastic depth block
residual_block = ResidualBlock(channels, drop_prob=0.2)
output = residual_block(input_tensor)
print(f"Residual block output shape: {output.shape}")
# Test DropBlock
dropblock_conv = ConvBlockWithDropBlock(channels, channels, drop_prob=0.1, block_size=5)
output = dropblock_conv(input_tensor)
print(f"DropBlock conv output shape: {output.shape}")Install with Tessl CLI
npx tessl i tessl/pypi-torchvision