tessl/pypi-torchvision

Computer vision library for PyTorch with datasets, model architectures, and image/video transforms.

Overview

Eval results

Files

Operations

Name: tessl/pypi-torchvision
Author: tessl

TorchVision ops module provides low-level operations and specialized neural network layers for computer vision tasks. It includes functions for bounding box operations, non-maximum suppression, region of interest operations, loss functions, and custom layers used in modern computer vision architectures.

Capabilities

Bounding Box Operations

Functions for manipulating and analyzing bounding boxes in various formats.

def box_area(boxes: torch.Tensor) -> torch.Tensor:
    """
    Calculate area of bounding boxes.
    
    Args:
        boxes (torch.Tensor): Bounding boxes in format [x1, y1, x2, y2] of shape (..., 4)
    
    Returns:
        torch.Tensor: Areas of boxes with shape (...,)
    """

def box_convert(boxes: torch.Tensor, in_fmt: str, out_fmt: str) -> torch.Tensor:
    """
    Convert bounding boxes between different formats.
    
    Args:
        boxes (torch.Tensor): Bounding boxes tensor of shape (..., 4)
        in_fmt (str): Input format ('xyxy', 'xywh', 'cxcywh')
        out_fmt (str): Output format ('xyxy', 'xywh', 'cxcywh')
    
    Returns:
        torch.Tensor: Converted bounding boxes
    """

def box_iou(boxes1: torch.Tensor, boxes2: torch.Tensor) -> torch.Tensor:
    """
    Calculate Intersection over Union (IoU) between two sets of boxes.
    
    Args:
        boxes1 (torch.Tensor): Boxes of shape (N, 4) in format [x1, y1, x2, y2]
        boxes2 (torch.Tensor): Boxes of shape (M, 4) in format [x1, y1, x2, y2]
    
    Returns:
        torch.Tensor: IoU matrix of shape (N, M)
    """

def generalized_box_iou(boxes1: torch.Tensor, boxes2: torch.Tensor) -> torch.Tensor:
    """
    Calculate Generalized Intersection over Union (GIoU) between boxes.
    
    Args:
        boxes1 (torch.Tensor): Boxes of shape (N, 4)
        boxes2 (torch.Tensor): Boxes of shape (M, 4)
    
    Returns:
        torch.Tensor: GIoU matrix of shape (N, M)
    """

def distance_box_iou(boxes1: torch.Tensor, boxes2: torch.Tensor) -> torch.Tensor:
    """
    Calculate Distance Intersection over Union (DIoU) between boxes.
    
    Args:
        boxes1 (torch.Tensor): Boxes of shape (N, 4)
        boxes2 (torch.Tensor): Boxes of shape (M, 4)
    
    Returns:
        torch.Tensor: DIoU matrix of shape (N, M)
    """

def complete_box_iou(boxes1: torch.Tensor, boxes2: torch.Tensor) -> torch.Tensor:
    """
    Calculate Complete Intersection over Union (CIoU) between boxes.
    
    Args:
        boxes1 (torch.Tensor): Boxes of shape (N, 4)
        boxes2 (torch.Tensor): Boxes of shape (M, 4)
    
    Returns:
        torch.Tensor: CIoU matrix of shape (N, M)
    """

def clip_boxes_to_image(boxes: torch.Tensor, size: tuple) -> torch.Tensor:
    """
    Clip bounding boxes to image boundaries.
    
    Args:
        boxes (torch.Tensor): Boxes of shape (..., 4) in format [x1, y1, x2, y2]
        size (tuple): Image size as (height, width)
    
    Returns:
        torch.Tensor: Clipped boxes
    """

def remove_small_boxes(boxes: torch.Tensor, min_size: float) -> torch.Tensor:
    """
    Remove bounding boxes smaller than minimum size.
    
    Args:
        boxes (torch.Tensor): Boxes of shape (N, 4)
        min_size (float): Minimum box size threshold
    
    Returns:
        torch.Tensor: Indices of boxes to keep
    """

def masks_to_boxes(masks: torch.Tensor) -> torch.Tensor:
    """
    Convert binary masks to bounding boxes.
    
    Args:
        masks (torch.Tensor): Binary masks of shape (N, H, W)
    
    Returns:
        torch.Tensor: Bounding boxes of shape (N, 4) in format [x1, y1, x2, y2]
    """

Non-Maximum Suppression

Functions for removing duplicate detections based on overlap criteria.

def nms(boxes: torch.Tensor, scores: torch.Tensor, iou_threshold: float) -> torch.Tensor:
    """
    Non-maximum suppression for object detection.
    
    Args:
        boxes (torch.Tensor): Bounding boxes of shape (N, 4) in format [x1, y1, x2, y2]
        scores (torch.Tensor): Scores for each box of shape (N,)
        iou_threshold (float): IoU threshold for suppression
    
    Returns:
        torch.Tensor: Indices of boxes to keep
    """

def batched_nms(boxes: torch.Tensor, scores: torch.Tensor, idxs: torch.Tensor, iou_threshold: float) -> torch.Tensor:
    """
    Batched non-maximum suppression for multiple classes.
    
    Args:
        boxes (torch.Tensor): Bounding boxes of shape (N, 4)
        scores (torch.Tensor): Scores for each box of shape (N,)
        idxs (torch.Tensor): Class indices for each box of shape (N,)
        iou_threshold (float): IoU threshold for suppression
    
    Returns:
        torch.Tensor: Indices of boxes to keep
    """

Loss Functions

Specialized loss functions for computer vision tasks.

def sigmoid_focal_loss(inputs: torch.Tensor, targets: torch.Tensor, alpha: float = -1, gamma: float = 2, reduction: str = 'none') -> torch.Tensor:
    """
    Focal loss for addressing class imbalance in object detection.
    
    Args:
        inputs (torch.Tensor): Predicted logits of shape (..., num_classes)
        targets (torch.Tensor): Ground truth labels of shape (..., num_classes)
        alpha (float): Weighting factor for rare class (default: -1 means no weighting)
        gamma (float): Focusing parameter to down-weight easy examples
        reduction (str): Reduction method ('none', 'mean', 'sum')
    
    Returns:
        torch.Tensor: Focal loss values
    """

def generalized_box_iou_loss(boxes1: torch.Tensor, boxes2: torch.Tensor, reduction: str = 'none') -> torch.Tensor:
    """
    Generalized IoU loss for bounding box regression.
    
    Args:
        boxes1 (torch.Tensor): Predicted boxes of shape (N, 4)
        boxes2 (torch.Tensor): Target boxes of shape (N, 4)
        reduction (str): Reduction method ('none', 'mean', 'sum')
    
    Returns:
        torch.Tensor: GIoU loss values
    """

def distance_box_iou_loss(boxes1: torch.Tensor, boxes2: torch.Tensor, reduction: str = 'none') -> torch.Tensor:
    """
    Distance IoU loss for bounding box regression.
    
    Args:
        boxes1 (torch.Tensor): Predicted boxes of shape (N, 4)
        boxes2 (torch.Tensor): Target boxes of shape (N, 4)
        reduction (str): Reduction method ('none', 'mean', 'sum')
    
    Returns:
        torch.Tensor: DIoU loss values
    """

def complete_box_iou_loss(boxes1: torch.Tensor, boxes2: torch.Tensor, reduction: str = 'none') -> torch.Tensor:
    """
    Complete IoU loss for bounding box regression.
    
    Args:
        boxes1 (torch.Tensor): Predicted boxes of shape (N, 4)
        boxes2 (torch.Tensor): Target boxes of shape (N, 4)
        reduction (str): Reduction method ('none', 'mean', 'sum')
    
    Returns:
        torch.Tensor: CIoU loss values
    """

Region of Interest Operations

Operations for extracting features from regions of interest in feature maps.

def roi_align(input: torch.Tensor, boxes: torch.Tensor, output_size: tuple, spatial_scale: float = 1.0, sampling_ratio: int = -1, aligned: bool = False) -> torch.Tensor:
    """
    RoI Align operation for extracting fixed-size features from variable-size regions.
    
    Args:
        input (torch.Tensor): Feature map of shape (N, C, H, W)
        boxes (torch.Tensor): RoIs of shape (K, 5) where each row is [batch_idx, x1, y1, x2, y2]
        output_size (tuple): Output size as (height, width)
        spatial_scale (float): Scale factor to map from input coordinates to box coordinates
        sampling_ratio (int): Number of sampling points (-1 for adaptive)
        aligned (bool): Whether to align corners
    
    Returns:
        torch.Tensor: Extracted features of shape (K, C, output_size[0], output_size[1])
    """

class RoIAlign(torch.nn.Module):
    """
    RoI Align layer for region-based networks.
    
    Args:
        output_size (tuple): Output size as (height, width)
        spatial_scale (float): Scale factor between input and RoI coordinates
        sampling_ratio (int): Number of sampling points per bin
        aligned (bool): Whether to align corners
    """
    
    def __init__(self, output_size: tuple, spatial_scale: float = 1.0, sampling_ratio: int = -1, aligned: bool = False): ...
    
    def forward(self, input: torch.Tensor, rois: torch.Tensor) -> torch.Tensor: ...

def roi_pool(input: torch.Tensor, boxes: torch.Tensor, output_size: tuple, spatial_scale: float = 1.0) -> torch.Tensor:
    """
    RoI Pooling operation (legacy, prefer RoI Align).
    
    Args:
        input (torch.Tensor): Feature map of shape (N, C, H, W)
        boxes (torch.Tensor): RoIs of shape (K, 5)
        output_size (tuple): Output size as (height, width)
        spatial_scale (float): Scale factor
    
    Returns:
        torch.Tensor: Pooled features
    """

class RoIPool(torch.nn.Module):
    """RoI Pooling layer."""
    
    def __init__(self, output_size: tuple, spatial_scale: float = 1.0): ...

def ps_roi_align(input: torch.Tensor, boxes: torch.Tensor, output_size: tuple, spatial_scale: float = 1.0, sampling_ratio: int = -1) -> torch.Tensor:
    """
    Position Sensitive RoI Align for position-sensitive score maps.
    
    Args:
        input (torch.Tensor): Position-sensitive feature map
        boxes (torch.Tensor): RoIs of shape (K, 5)
        output_size (tuple): Output size
        spatial_scale (float): Scale factor
        sampling_ratio (int): Number of sampling points
    
    Returns:
        torch.Tensor: Position-sensitive aligned features
    """

class PSRoIAlign(torch.nn.Module):
    """Position Sensitive RoI Align layer."""
    
    def __init__(self, output_size: tuple, spatial_scale: float = 1.0, sampling_ratio: int = -1): ...

def ps_roi_pool(input: torch.Tensor, boxes: torch.Tensor, output_size: tuple, spatial_scale: float = 1.0) -> torch.Tensor:
    """Position Sensitive RoI Pooling operation."""

class PSRoIPool(torch.nn.Module):
    """Position Sensitive RoI Pooling layer."""
    
    def __init__(self, output_size: tuple, spatial_scale: float = 1.0): ...

class MultiScaleRoIAlign(torch.nn.Module):
    """
    Multi-scale RoI Align for Feature Pyramid Networks.
    
    Args:
        featmap_names (list): Names of feature maps to use
        output_size (tuple): Output size for aligned features
        sampling_ratio (int): Number of sampling points
        canonical_scale (int): Canonical scale for level assignment
        canonical_level (int): Canonical level in pyramid
    """
    
    def __init__(self, featmap_names: list, output_size: tuple, sampling_ratio: int, canonical_scale: int = 224, canonical_level: int = 4): ...
    
    def forward(self, x: dict, boxes: list) -> torch.Tensor: ...

Specialized Convolutions

Custom convolution operations for advanced architectures.

def deform_conv2d(input: torch.Tensor, offset: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor = None, stride: tuple = (1, 1), padding: tuple = (0, 0), dilation: tuple = (1, 1), mask: torch.Tensor = None) -> torch.Tensor:
    """
    Deformable convolution operation.
    
    Args:
        input (torch.Tensor): Input feature map of shape (N, C_in, H_in, W_in)
        offset (torch.Tensor): Offset field of shape (N, 2*kernel_h*kernel_w, H_out, W_out)
        weight (torch.Tensor): Convolution weights of shape (C_out, C_in, kernel_h, kernel_w)
        bias (torch.Tensor, optional): Bias tensor of shape (C_out,)
        stride (tuple): Convolution stride
        padding (tuple): Convolution padding
        dilation (tuple): Convolution dilation
        mask (torch.Tensor, optional): Modulation mask
    
    Returns:
        torch.Tensor: Output feature map of shape (N, C_out, H_out, W_out)
    """

class DeformConv2d(torch.nn.Module):
    """
    Deformable Convolution layer.
    
    Args:
        in_channels (int): Number of input channels
        out_channels (int): Number of output channels
        kernel_size (int or tuple): Convolution kernel size
        stride (int or tuple): Convolution stride
        padding (int or tuple): Convolution padding
        dilation (int or tuple): Convolution dilation
        groups (int): Number of groups for grouped convolution
        bias (bool): Whether to use bias
    """
    
    def __init__(self, in_channels: int, out_channels: int, kernel_size: int, stride: int = 1, padding: int = 0, dilation: int = 1, groups: int = 1, bias: bool = True): ...
    
    def forward(self, input: torch.Tensor, offset: torch.Tensor, mask: torch.Tensor = None) -> torch.Tensor: ...

Regularization Operations

Regularization techniques for improving model robustness.

def stochastic_depth(input: torch.Tensor, p: float, mode: str, training: bool = True) -> torch.Tensor:
    """
    Stochastic depth regularization (Drop Path).
    
    Args:
        input (torch.Tensor): Input tensor
        p (float): Drop probability
        mode (str): Drop mode ('batch' or 'row')
        training (bool): Whether in training mode
    
    Returns:
        torch.Tensor: Output tensor with stochastic depth applied
    """

class StochasticDepth(torch.nn.Module):
    """
    Stochastic Depth (Drop Path) layer.
    
    Args:
        p (float): Drop probability
        mode (str): Drop mode ('batch' or 'row')
    """
    
    def __init__(self, p: float, mode: str): ...
    
    def forward(self, input: torch.Tensor) -> torch.Tensor: ...

def drop_block2d(input: torch.Tensor, p: float, block_size: int, inplace: bool = False, eps: float = 1e-6, training: bool = True) -> torch.Tensor:
    """
    DropBlock2D regularization for convolutional layers.
    
    Args:
        input (torch.Tensor): Input tensor of shape (N, C, H, W)
        p (float): Drop probability
        block_size (int): Size of blocks to drop
        inplace (bool): Whether to apply in-place
        eps (float): Small value to avoid division by zero
        training (bool): Whether in training mode
    
    Returns:
        torch.Tensor: Output tensor with DropBlock applied
    """

class DropBlock2d(torch.nn.Module):
    """
    DropBlock2D layer for spatial regularization.
    
    Args:
        p (float): Drop probability
        block_size (int): Size of blocks to drop
        eps (float): Small epsilon value
        inplace (bool): Whether to apply in-place
    """
    
    def __init__(self, p: float, block_size: int, eps: float = 1e-6, inplace: bool = False): ...

def drop_block3d(input: torch.Tensor, p: float, block_size: int, inplace: bool = False, eps: float = 1e-6, training: bool = True) -> torch.Tensor:
    """DropBlock3D for 3D tensors (e.g., video)."""

class DropBlock3d(torch.nn.Module):
    """DropBlock3D layer for 3D regularization."""
    
    def __init__(self, p: float, block_size: int, eps: float = 1e-6, inplace: bool = False): ...

Feature Pyramid Network

Implementation of Feature Pyramid Network for multi-scale feature extraction.

class FeaturePyramidNetwork(torch.nn.Module):
    """
    Feature Pyramid Network for multi-scale feature extraction.
    
    Args:
        in_channels_list (list): List of input channel numbers for each level
        out_channels (int): Number of output channels for all levels
        extra_blocks (nn.Module, optional): Extra blocks to append
        norm_layer (callable, optional): Normalization layer
    """
    
    def __init__(self, in_channels_list: list, out_channels: int, extra_blocks=None, norm_layer=None): ...
    
    def forward(self, x: dict) -> dict:
        """
        Forward pass through FPN.
        
        Args:
            x (dict): Dictionary of feature maps from different levels
        
        Returns:
            dict: Dictionary of FPN feature maps
        """

Utility Layers

General-purpose layers commonly used in computer vision architectures.

class FrozenBatchNorm2d(torch.nn.Module):
    """
    Frozen Batch Normalization layer (parameters not updated during training).
    
    Args:
        num_features (int): Number of features
        eps (float): Small value for numerical stability
    """
    
    def __init__(self, num_features: int, eps: float = 1e-5): ...

class Conv2dNormActivation(torch.nn.Sequential):
    """
    Convolution with normalization and activation in sequence.
    
    Args:
        in_planes (int): Input channels
        out_planes (int): Output channels
        kernel_size (int): Convolution kernel size
        stride (int): Convolution stride
        padding (int, optional): Convolution padding
        groups (int): Number of groups for grouped convolution
        norm_layer (callable, optional): Normalization layer
        activation_layer (callable, optional): Activation layer
        dilation (int): Convolution dilation
        inplace (bool, optional): Whether activations should be in-place
        bias (bool, optional): Whether to use bias in convolution
    """
    
    def __init__(self, in_planes: int, out_planes: int, kernel_size: int = 3, stride: int = 1, padding: int = None, groups: int = 1, norm_layer=None, activation_layer=None, dilation: int = 1, inplace: bool = None, bias: bool = None): ...

class Conv3dNormActivation(torch.nn.Sequential):
    """3D version of Conv2dNormActivation for video/3D data."""
    
    def __init__(self, in_planes: int, out_planes: int, kernel_size: int = 3, stride: int = 1, padding: int = None, groups: int = 1, norm_layer=None, activation_layer=None, dilation: int = 1, inplace: bool = None, bias: bool = None): ...

class SqueezeExcitation(torch.nn.Module):
    """
    Squeeze-and-Excitation block for channel attention.
    
    Args:
        input_channels (int): Number of input channels
        squeeze_channels (int): Number of channels after squeeze operation
        activation (callable, optional): Activation function for squeeze
        scale_activation (callable, optional): Activation function for scale
    """
    
    def __init__(self, input_channels: int, squeeze_channels: int, activation=None, scale_activation=None): ...
    
    def forward(self, input: torch.Tensor) -> torch.Tensor: ...

class MLP(torch.nn.Sequential):
    """
    Multi-layer perceptron with configurable layers.
    
    Args:
        in_channels (int): Input dimension
        hidden_channels (list): List of hidden layer dimensions
        norm_layer (callable, optional): Normalization layer
        activation_layer (callable, optional): Activation layer
        inplace (bool, optional): Whether activations should be in-place
        bias (bool): Whether to use bias
        dropout (float): Dropout probability
    """
    
    def __init__(self, in_channels: int, hidden_channels: list, norm_layer=None, activation_layer=None, inplace: bool = None, bias: bool = True, dropout: float = 0.0): ...

class Permute(torch.nn.Module):
    """
    Permute tensor dimensions.
    
    Args:
        dims (list): New order of dimensions
    """
    
    def __init__(self, dims: list): ...
    
    def forward(self, x: torch.Tensor) -> torch.Tensor: ...

Usage Examples

Bounding Box Operations

import torch
import torchvision.ops as ops

# Create example bounding boxes (N=3 boxes in xyxy format)
boxes1 = torch.tensor([
    [10, 10, 50, 50],
    [30, 30, 70, 70], 
    [60, 10, 100, 50]
], dtype=torch.float)

boxes2 = torch.tensor([
    [15, 15, 55, 55],
    [25, 25, 65, 65]
], dtype=torch.float)

# Calculate IoU matrix
iou_matrix = ops.box_iou(boxes1, boxes2)
print(f"IoU matrix shape: {iou_matrix.shape}")  # (3, 2)
print(f"IoU values:\n{iou_matrix}")

# Calculate box areas
areas = ops.box_area(boxes1)
print(f"Box areas: {areas}")

# Convert box formats
boxes_xywh = ops.box_convert(boxes1, 'xyxy', 'xywh')
print(f"Boxes in xywh format: {boxes_xywh}")

# Clip boxes to image boundaries
image_size = (100, 120)  # (height, width)
clipped_boxes = ops.clip_boxes_to_image(boxes1, image_size)
print(f"Clipped boxes: {clipped_boxes}")

Non-Maximum Suppression

import torch
import torchvision.ops as ops

# Example detection results
boxes = torch.tensor([
    [10, 10, 50, 50],
    [12, 12, 52, 52],  # Overlapping with first box
    [60, 10, 100, 50],
    [15, 15, 45, 45],  # Overlapping with first box
    [80, 80, 120, 120]
], dtype=torch.float)

scores = torch.tensor([0.9, 0.8, 0.7, 0.85, 0.6])
class_ids = torch.tensor([0, 0, 1, 0, 1])

# Apply NMS
keep_indices = ops.nms(boxes, scores, iou_threshold=0.5)
print(f"Indices to keep after NMS: {keep_indices}")

# Apply batched NMS (per-class NMS)
keep_indices_batched = ops.batched_nms(boxes, scores, class_ids, iou_threshold=0.5)
print(f"Indices to keep after batched NMS: {keep_indices_batched}")

# Filter results
final_boxes = boxes[keep_indices_batched]
final_scores = scores[keep_indices_batched]
final_classes = class_ids[keep_indices_batched]

print(f"Final boxes: {final_boxes}")
print(f"Final scores: {final_scores}")
print(f"Final classes: {final_classes}")

RoI Align Operation

import torch
import torchvision.ops as ops

# Create feature map (batch_size=2, channels=64, height=32, width=32)
feature_map = torch.randn(2, 64, 32, 32)

# Define RoIs: [batch_idx, x1, y1, x2, y2]
rois = torch.tensor([
    [0, 5, 5, 15, 15],    # RoI in first image
    [0, 20, 10, 30, 25],  # Another RoI in first image
    [1, 8, 8, 18, 18],    # RoI in second image
], dtype=torch.float)

# Apply RoI Align
output_size = (7, 7)
spatial_scale = 1.0
aligned_features = ops.roi_align(
    feature_map, 
    rois, 
    output_size, 
    spatial_scale=spatial_scale,
    sampling_ratio=2
)

print(f"Aligned features shape: {aligned_features.shape}")  # (3, 64, 7, 7)

# Using RoI Align as a layer
roi_align_layer = ops.RoIAlign(output_size=(14, 14), spatial_scale=0.5, sampling_ratio=2)
aligned_features_layer = roi_align_layer(feature_map, rois)
print(f"Layer output shape: {aligned_features_layer.shape}")

Feature Pyramid Network

import torch
import torchvision.ops as ops

# Create FPN for ResNet-like backbone
in_channels_list = [256, 512, 1024, 2048]  # ResNet feature channels
out_channels = 256

fpn = ops.FeaturePyramidNetwork(in_channels_list, out_channels)

# Simulate backbone features
backbone_features = {
    '0': torch.randn(2, 256, 64, 64),   # Early layer
    '1': torch.randn(2, 512, 32, 32),   # Mid layer  
    '2': torch.randn(2, 1024, 16, 16),  # Late layer
    '3': torch.randn(2, 2048, 8, 8),    # Final layer
}

# Apply FPN
fpn_features = fpn(backbone_features)

print("FPN output shapes:")
for key, feature in fpn_features.items():
    print(f"Level {key}: {feature.shape}")

Custom Detection Pipeline

import torch
import torchvision.ops as ops

def post_process_detections(boxes, scores, class_logits, score_threshold=0.5, nms_threshold=0.5):
    """
    Post-process detection outputs with NMS and filtering.
    
    Args:
        boxes: Predicted boxes (N, 4)
        scores: Objectness scores (N,)  
        class_logits: Class predictions (N, num_classes)
        score_threshold: Minimum score threshold
        nms_threshold: NMS IoU threshold
    
    Returns:
        dict: Filtered detections
    """
    # Get class predictions
    class_probs = torch.softmax(class_logits, dim=1)
    class_ids = torch.argmax(class_probs, dim=1)
    class_scores = torch.max(class_probs, dim=1)[0]
    
    # Combine objectness and classification scores
    final_scores = scores * class_scores
    
    # Filter by score threshold
    keep_mask = final_scores >= score_threshold
    boxes = boxes[keep_mask]
    final_scores = final_scores[keep_mask]
    class_ids = class_ids[keep_mask]
    
    # Apply NMS per class
    keep_indices = ops.batched_nms(boxes, final_scores, class_ids, nms_threshold)
    
    return {
        'boxes': boxes[keep_indices],
        'scores': final_scores[keep_indices],
        'labels': class_ids[keep_indices]
    }

# Example usage
num_detections = 1000
num_classes = 80

boxes = torch.randn(num_detections, 4) * 100  # Random boxes
scores = torch.rand(num_detections)           # Random objectness scores
class_logits = torch.randn(num_detections, num_classes)  # Random class logits

# Post-process detections
results = post_process_detections(boxes, scores, class_logits)
print(f"Final detections: {len(results['boxes'])}")
print(f"Score range: {results['scores'].min():.3f} - {results['scores'].max():.3f}")

Loss Functions for Training

import torch
import torchvision.ops as ops

# Focal Loss for object classification
def train_step_focal_loss():
    # Simulated predictions and targets
    batch_size, num_classes = 32, 80
    predictions = torch.randn(batch_size, num_classes)
    targets = torch.zeros(batch_size, num_classes)
    
    # Create some positive examples
    targets[torch.arange(batch_size), torch.randint(0, num_classes, (batch_size,))] = 1
    
    # Calculate focal loss
    focal_loss = ops.sigmoid_focal_loss(
        predictions, 
        targets, 
        alpha=0.25, 
        gamma=2.0, 
        reduction='mean'
    )
    
    print(f"Focal loss: {focal_loss.item():.4f}")
    return focal_loss

# Box regression losses
def train_step_box_loss():
    batch_size = 64
    pred_boxes = torch.randn(batch_size, 4) * 100
    target_boxes = torch.randn(batch_size, 4) * 100
    
    # Different IoU-based losses
    giou_loss = ops.generalized_box_iou_loss(pred_boxes, target_boxes, reduction='mean')
    diou_loss = ops.distance_box_iou_loss(pred_boxes, target_boxes, reduction='mean')
    ciou_loss = ops.complete_box_iou_loss(pred_boxes, target_boxes, reduction='mean')
    
    print(f"GIoU loss: {giou_loss.item():.4f}")
    print(f"DIoU loss: {diou_loss.item():.4f}")  
    print(f"CIoU loss: {ciou_loss.item():.4f}")
    
    return giou_loss + diou_loss + ciou_loss

# Run example training steps
focal_loss = train_step_focal_loss()
box_loss = train_step_box_loss()
total_loss = focal_loss + box_loss
print(f"Total loss: {total_loss.item():.4f}")

Regularization Techniques

import torch
import torch.nn as nn
import torchvision.ops as ops

class ResidualBlock(nn.Module):
    """Example residual block with stochastic depth."""
    
    def __init__(self, channels, drop_prob=0.1):
        super().__init__()
        self.conv1 = nn.Conv2d(channels, channels, 3, padding=1)
        self.conv2 = nn.Conv2d(channels, channels, 3, padding=1)
        self.relu = nn.ReLU()
        self.stochastic_depth = ops.StochasticDepth(drop_prob, mode='row')
        
    def forward(self, x):
        identity = x
        out = self.relu(self.conv1(x))
        out = self.conv2(out)
        
        # Apply stochastic depth to residual connection
        out = self.stochastic_depth(out)
        out += identity
        return self.relu(out)

# Example with DropBlock for convolutional regularization
class ConvBlockWithDropBlock(nn.Module):
    """Convolutional block with DropBlock regularization."""
    
    def __init__(self, in_channels, out_channels, drop_prob=0.1, block_size=7):
        super().__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, 3, padding=1)
        self.bn = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU()
        self.dropblock = ops.DropBlock2d(drop_prob, block_size)
        
    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        x = self.relu(x)
        x = self.dropblock(x)
        return x

# Test regularization
batch_size, channels, height, width = 4, 64, 32, 32
input_tensor = torch.randn(batch_size, channels, height, width)

# Test stochastic depth block
residual_block = ResidualBlock(channels, drop_prob=0.2)
output = residual_block(input_tensor)
print(f"Residual block output shape: {output.shape}")

# Test DropBlock
dropblock_conv = ConvBlockWithDropBlock(channels, channels, drop_prob=0.1, block_size=5)
output = dropblock_conv(input_tensor)
print(f"DropBlock conv output shape: {output.shape}")

Install with Tessl CLI