or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

datasets.mdindex.mdio.mdmodels.mdops.mdtransforms.mdtv_tensors.mdutils.md

ops.mddocs/

0

# Operations

1

2

TorchVision ops module provides low-level operations and specialized neural network layers for computer vision tasks. It includes functions for bounding box operations, non-maximum suppression, region of interest operations, loss functions, and custom layers used in modern computer vision architectures.

3

4

## Capabilities

5

6

### Bounding Box Operations

7

8

Functions for manipulating and analyzing bounding boxes in various formats.

9

10

```python { .api }

11

def box_area(boxes: torch.Tensor) -> torch.Tensor:

12

"""

13

Calculate area of bounding boxes.

14

15

Args:

16

boxes (torch.Tensor): Bounding boxes in format [x1, y1, x2, y2] of shape (..., 4)

17

18

Returns:

19

torch.Tensor: Areas of boxes with shape (...,)

20

"""

21

22

def box_convert(boxes: torch.Tensor, in_fmt: str, out_fmt: str) -> torch.Tensor:

23

"""

24

Convert bounding boxes between different formats.

25

26

Args:

27

boxes (torch.Tensor): Bounding boxes tensor of shape (..., 4)

28

in_fmt (str): Input format ('xyxy', 'xywh', 'cxcywh')

29

out_fmt (str): Output format ('xyxy', 'xywh', 'cxcywh')

30

31

Returns:

32

torch.Tensor: Converted bounding boxes

33

"""

34

35

def box_iou(boxes1: torch.Tensor, boxes2: torch.Tensor) -> torch.Tensor:

36

"""

37

Calculate Intersection over Union (IoU) between two sets of boxes.

38

39

Args:

40

boxes1 (torch.Tensor): Boxes of shape (N, 4) in format [x1, y1, x2, y2]

41

boxes2 (torch.Tensor): Boxes of shape (M, 4) in format [x1, y1, x2, y2]

42

43

Returns:

44

torch.Tensor: IoU matrix of shape (N, M)

45

"""

46

47

def generalized_box_iou(boxes1: torch.Tensor, boxes2: torch.Tensor) -> torch.Tensor:

48

"""

49

Calculate Generalized Intersection over Union (GIoU) between boxes.

50

51

Args:

52

boxes1 (torch.Tensor): Boxes of shape (N, 4)

53

boxes2 (torch.Tensor): Boxes of shape (M, 4)

54

55

Returns:

56

torch.Tensor: GIoU matrix of shape (N, M)

57

"""

58

59

def distance_box_iou(boxes1: torch.Tensor, boxes2: torch.Tensor) -> torch.Tensor:

60

"""

61

Calculate Distance Intersection over Union (DIoU) between boxes.

62

63

Args:

64

boxes1 (torch.Tensor): Boxes of shape (N, 4)

65

boxes2 (torch.Tensor): Boxes of shape (M, 4)

66

67

Returns:

68

torch.Tensor: DIoU matrix of shape (N, M)

69

"""

70

71

def complete_box_iou(boxes1: torch.Tensor, boxes2: torch.Tensor) -> torch.Tensor:

72

"""

73

Calculate Complete Intersection over Union (CIoU) between boxes.

74

75

Args:

76

boxes1 (torch.Tensor): Boxes of shape (N, 4)

77

boxes2 (torch.Tensor): Boxes of shape (M, 4)

78

79

Returns:

80

torch.Tensor: CIoU matrix of shape (N, M)

81

"""

82

83

def clip_boxes_to_image(boxes: torch.Tensor, size: tuple) -> torch.Tensor:

84

"""

85

Clip bounding boxes to image boundaries.

86

87

Args:

88

boxes (torch.Tensor): Boxes of shape (..., 4) in format [x1, y1, x2, y2]

89

size (tuple): Image size as (height, width)

90

91

Returns:

92

torch.Tensor: Clipped boxes

93

"""

94

95

def remove_small_boxes(boxes: torch.Tensor, min_size: float) -> torch.Tensor:

96

"""

97

Remove bounding boxes smaller than minimum size.

98

99

Args:

100

boxes (torch.Tensor): Boxes of shape (N, 4)

101

min_size (float): Minimum box size threshold

102

103

Returns:

104

torch.Tensor: Indices of boxes to keep

105

"""

106

107

def masks_to_boxes(masks: torch.Tensor) -> torch.Tensor:

108

"""

109

Convert binary masks to bounding boxes.

110

111

Args:

112

masks (torch.Tensor): Binary masks of shape (N, H, W)

113

114

Returns:

115

torch.Tensor: Bounding boxes of shape (N, 4) in format [x1, y1, x2, y2]

116

"""

117

```

118

119

### Non-Maximum Suppression

120

121

Functions for removing duplicate detections based on overlap criteria.

122

123

```python { .api }

124

def nms(boxes: torch.Tensor, scores: torch.Tensor, iou_threshold: float) -> torch.Tensor:

125

"""

126

Non-maximum suppression for object detection.

127

128

Args:

129

boxes (torch.Tensor): Bounding boxes of shape (N, 4) in format [x1, y1, x2, y2]

130

scores (torch.Tensor): Scores for each box of shape (N,)

131

iou_threshold (float): IoU threshold for suppression

132

133

Returns:

134

torch.Tensor: Indices of boxes to keep

135

"""

136

137

def batched_nms(boxes: torch.Tensor, scores: torch.Tensor, idxs: torch.Tensor, iou_threshold: float) -> torch.Tensor:

138

"""

139

Batched non-maximum suppression for multiple classes.

140

141

Args:

142

boxes (torch.Tensor): Bounding boxes of shape (N, 4)

143

scores (torch.Tensor): Scores for each box of shape (N,)

144

idxs (torch.Tensor): Class indices for each box of shape (N,)

145

iou_threshold (float): IoU threshold for suppression

146

147

Returns:

148

torch.Tensor: Indices of boxes to keep

149

"""

150

```

151

152

### Loss Functions

153

154

Specialized loss functions for computer vision tasks.

155

156

```python { .api }

157

def sigmoid_focal_loss(inputs: torch.Tensor, targets: torch.Tensor, alpha: float = -1, gamma: float = 2, reduction: str = 'none') -> torch.Tensor:

158

"""

159

Focal loss for addressing class imbalance in object detection.

160

161

Args:

162

inputs (torch.Tensor): Predicted logits of shape (..., num_classes)

163

targets (torch.Tensor): Ground truth labels of shape (..., num_classes)

164

alpha (float): Weighting factor for rare class (default: -1 means no weighting)

165

gamma (float): Focusing parameter to down-weight easy examples

166

reduction (str): Reduction method ('none', 'mean', 'sum')

167

168

Returns:

169

torch.Tensor: Focal loss values

170

"""

171

172

def generalized_box_iou_loss(boxes1: torch.Tensor, boxes2: torch.Tensor, reduction: str = 'none') -> torch.Tensor:

173

"""

174

Generalized IoU loss for bounding box regression.

175

176

Args:

177

boxes1 (torch.Tensor): Predicted boxes of shape (N, 4)

178

boxes2 (torch.Tensor): Target boxes of shape (N, 4)

179

reduction (str): Reduction method ('none', 'mean', 'sum')

180

181

Returns:

182

torch.Tensor: GIoU loss values

183

"""

184

185

def distance_box_iou_loss(boxes1: torch.Tensor, boxes2: torch.Tensor, reduction: str = 'none') -> torch.Tensor:

186

"""

187

Distance IoU loss for bounding box regression.

188

189

Args:

190

boxes1 (torch.Tensor): Predicted boxes of shape (N, 4)

191

boxes2 (torch.Tensor): Target boxes of shape (N, 4)

192

reduction (str): Reduction method ('none', 'mean', 'sum')

193

194

Returns:

195

torch.Tensor: DIoU loss values

196

"""

197

198

def complete_box_iou_loss(boxes1: torch.Tensor, boxes2: torch.Tensor, reduction: str = 'none') -> torch.Tensor:

199

"""

200

Complete IoU loss for bounding box regression.

201

202

Args:

203

boxes1 (torch.Tensor): Predicted boxes of shape (N, 4)

204

boxes2 (torch.Tensor): Target boxes of shape (N, 4)

205

reduction (str): Reduction method ('none', 'mean', 'sum')

206

207

Returns:

208

torch.Tensor: CIoU loss values

209

"""

210

```

211

212

### Region of Interest Operations

213

214

Operations for extracting features from regions of interest in feature maps.

215

216

```python { .api }

217

def roi_align(input: torch.Tensor, boxes: torch.Tensor, output_size: tuple, spatial_scale: float = 1.0, sampling_ratio: int = -1, aligned: bool = False) -> torch.Tensor:

218

"""

219

RoI Align operation for extracting fixed-size features from variable-size regions.

220

221

Args:

222

input (torch.Tensor): Feature map of shape (N, C, H, W)

223

boxes (torch.Tensor): RoIs of shape (K, 5) where each row is [batch_idx, x1, y1, x2, y2]

224

output_size (tuple): Output size as (height, width)

225

spatial_scale (float): Scale factor to map from input coordinates to box coordinates

226

sampling_ratio (int): Number of sampling points (-1 for adaptive)

227

aligned (bool): Whether to align corners

228

229

Returns:

230

torch.Tensor: Extracted features of shape (K, C, output_size[0], output_size[1])

231

"""

232

233

class RoIAlign(torch.nn.Module):

234

"""

235

RoI Align layer for region-based networks.

236

237

Args:

238

output_size (tuple): Output size as (height, width)

239

spatial_scale (float): Scale factor between input and RoI coordinates

240

sampling_ratio (int): Number of sampling points per bin

241

aligned (bool): Whether to align corners

242

"""

243

244

def __init__(self, output_size: tuple, spatial_scale: float = 1.0, sampling_ratio: int = -1, aligned: bool = False): ...

245

246

def forward(self, input: torch.Tensor, rois: torch.Tensor) -> torch.Tensor: ...

247

248

def roi_pool(input: torch.Tensor, boxes: torch.Tensor, output_size: tuple, spatial_scale: float = 1.0) -> torch.Tensor:

249

"""

250

RoI Pooling operation (legacy, prefer RoI Align).

251

252

Args:

253

input (torch.Tensor): Feature map of shape (N, C, H, W)

254

boxes (torch.Tensor): RoIs of shape (K, 5)

255

output_size (tuple): Output size as (height, width)

256

spatial_scale (float): Scale factor

257

258

Returns:

259

torch.Tensor: Pooled features

260

"""

261

262

class RoIPool(torch.nn.Module):

263

"""RoI Pooling layer."""

264

265

def __init__(self, output_size: tuple, spatial_scale: float = 1.0): ...

266

267

def ps_roi_align(input: torch.Tensor, boxes: torch.Tensor, output_size: tuple, spatial_scale: float = 1.0, sampling_ratio: int = -1) -> torch.Tensor:

268

"""

269

Position Sensitive RoI Align for position-sensitive score maps.

270

271

Args:

272

input (torch.Tensor): Position-sensitive feature map

273

boxes (torch.Tensor): RoIs of shape (K, 5)

274

output_size (tuple): Output size

275

spatial_scale (float): Scale factor

276

sampling_ratio (int): Number of sampling points

277

278

Returns:

279

torch.Tensor: Position-sensitive aligned features

280

"""

281

282

class PSRoIAlign(torch.nn.Module):

283

"""Position Sensitive RoI Align layer."""

284

285

def __init__(self, output_size: tuple, spatial_scale: float = 1.0, sampling_ratio: int = -1): ...

286

287

def ps_roi_pool(input: torch.Tensor, boxes: torch.Tensor, output_size: tuple, spatial_scale: float = 1.0) -> torch.Tensor:

288

"""Position Sensitive RoI Pooling operation."""

289

290

class PSRoIPool(torch.nn.Module):

291

"""Position Sensitive RoI Pooling layer."""

292

293

def __init__(self, output_size: tuple, spatial_scale: float = 1.0): ...

294

295

class MultiScaleRoIAlign(torch.nn.Module):

296

"""

297

Multi-scale RoI Align for Feature Pyramid Networks.

298

299

Args:

300

featmap_names (list): Names of feature maps to use

301

output_size (tuple): Output size for aligned features

302

sampling_ratio (int): Number of sampling points

303

canonical_scale (int): Canonical scale for level assignment

304

canonical_level (int): Canonical level in pyramid

305

"""

306

307

def __init__(self, featmap_names: list, output_size: tuple, sampling_ratio: int, canonical_scale: int = 224, canonical_level: int = 4): ...

308

309

def forward(self, x: dict, boxes: list) -> torch.Tensor: ...

310

```

311

312

### Specialized Convolutions

313

314

Custom convolution operations for advanced architectures.

315

316

```python { .api }

317

def deform_conv2d(input: torch.Tensor, offset: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor = None, stride: tuple = (1, 1), padding: tuple = (0, 0), dilation: tuple = (1, 1), mask: torch.Tensor = None) -> torch.Tensor:

318

"""

319

Deformable convolution operation.

320

321

Args:

322

input (torch.Tensor): Input feature map of shape (N, C_in, H_in, W_in)

323

offset (torch.Tensor): Offset field of shape (N, 2*kernel_h*kernel_w, H_out, W_out)

324

weight (torch.Tensor): Convolution weights of shape (C_out, C_in, kernel_h, kernel_w)

325

bias (torch.Tensor, optional): Bias tensor of shape (C_out,)

326

stride (tuple): Convolution stride

327

padding (tuple): Convolution padding

328

dilation (tuple): Convolution dilation

329

mask (torch.Tensor, optional): Modulation mask

330

331

Returns:

332

torch.Tensor: Output feature map of shape (N, C_out, H_out, W_out)

333

"""

334

335

class DeformConv2d(torch.nn.Module):

336

"""

337

Deformable Convolution layer.

338

339

Args:

340

in_channels (int): Number of input channels

341

out_channels (int): Number of output channels

342

kernel_size (int or tuple): Convolution kernel size

343

stride (int or tuple): Convolution stride

344

padding (int or tuple): Convolution padding

345

dilation (int or tuple): Convolution dilation

346

groups (int): Number of groups for grouped convolution

347

bias (bool): Whether to use bias

348

"""

349

350

def __init__(self, in_channels: int, out_channels: int, kernel_size: int, stride: int = 1, padding: int = 0, dilation: int = 1, groups: int = 1, bias: bool = True): ...

351

352

def forward(self, input: torch.Tensor, offset: torch.Tensor, mask: torch.Tensor = None) -> torch.Tensor: ...

353

```

354

355

### Regularization Operations

356

357

Regularization techniques for improving model robustness.

358

359

```python { .api }

360

def stochastic_depth(input: torch.Tensor, p: float, mode: str, training: bool = True) -> torch.Tensor:

361

"""

362

Stochastic depth regularization (Drop Path).

363

364

Args:

365

input (torch.Tensor): Input tensor

366

p (float): Drop probability

367

mode (str): Drop mode ('batch' or 'row')

368

training (bool): Whether in training mode

369

370

Returns:

371

torch.Tensor: Output tensor with stochastic depth applied

372

"""

373

374

class StochasticDepth(torch.nn.Module):

375

"""

376

Stochastic Depth (Drop Path) layer.

377

378

Args:

379

p (float): Drop probability

380

mode (str): Drop mode ('batch' or 'row')

381

"""

382

383

def __init__(self, p: float, mode: str): ...

384

385

def forward(self, input: torch.Tensor) -> torch.Tensor: ...

386

387

def drop_block2d(input: torch.Tensor, p: float, block_size: int, inplace: bool = False, eps: float = 1e-6, training: bool = True) -> torch.Tensor:

388

"""

389

DropBlock2D regularization for convolutional layers.

390

391

Args:

392

input (torch.Tensor): Input tensor of shape (N, C, H, W)

393

p (float): Drop probability

394

block_size (int): Size of blocks to drop

395

inplace (bool): Whether to apply in-place

396

eps (float): Small value to avoid division by zero

397

training (bool): Whether in training mode

398

399

Returns:

400

torch.Tensor: Output tensor with DropBlock applied

401

"""

402

403

class DropBlock2d(torch.nn.Module):

404

"""

405

DropBlock2D layer for spatial regularization.

406

407

Args:

408

p (float): Drop probability

409

block_size (int): Size of blocks to drop

410

eps (float): Small epsilon value

411

inplace (bool): Whether to apply in-place

412

"""

413

414

def __init__(self, p: float, block_size: int, eps: float = 1e-6, inplace: bool = False): ...

415

416

def drop_block3d(input: torch.Tensor, p: float, block_size: int, inplace: bool = False, eps: float = 1e-6, training: bool = True) -> torch.Tensor:

417

"""DropBlock3D for 3D tensors (e.g., video)."""

418

419

class DropBlock3d(torch.nn.Module):

420

"""DropBlock3D layer for 3D regularization."""

421

422

def __init__(self, p: float, block_size: int, eps: float = 1e-6, inplace: bool = False): ...

423

```

424

425

### Feature Pyramid Network

426

427

Implementation of Feature Pyramid Network for multi-scale feature extraction.

428

429

```python { .api }

430

class FeaturePyramidNetwork(torch.nn.Module):

431

"""

432

Feature Pyramid Network for multi-scale feature extraction.

433

434

Args:

435

in_channels_list (list): List of input channel numbers for each level

436

out_channels (int): Number of output channels for all levels

437

extra_blocks (nn.Module, optional): Extra blocks to append

438

norm_layer (callable, optional): Normalization layer

439

"""

440

441

def __init__(self, in_channels_list: list, out_channels: int, extra_blocks=None, norm_layer=None): ...

442

443

def forward(self, x: dict) -> dict:

444

"""

445

Forward pass through FPN.

446

447

Args:

448

x (dict): Dictionary of feature maps from different levels

449

450

Returns:

451

dict: Dictionary of FPN feature maps

452

"""

453

```

454

455

### Utility Layers

456

457

General-purpose layers commonly used in computer vision architectures.

458

459

```python { .api }

460

class FrozenBatchNorm2d(torch.nn.Module):

461

"""

462

Frozen Batch Normalization layer (parameters not updated during training).

463

464

Args:

465

num_features (int): Number of features

466

eps (float): Small value for numerical stability

467

"""

468

469

def __init__(self, num_features: int, eps: float = 1e-5): ...

470

471

class Conv2dNormActivation(torch.nn.Sequential):

472

"""

473

Convolution with normalization and activation in sequence.

474

475

Args:

476

in_planes (int): Input channels

477

out_planes (int): Output channels

478

kernel_size (int): Convolution kernel size

479

stride (int): Convolution stride

480

padding (int, optional): Convolution padding

481

groups (int): Number of groups for grouped convolution

482

norm_layer (callable, optional): Normalization layer

483

activation_layer (callable, optional): Activation layer

484

dilation (int): Convolution dilation

485

inplace (bool, optional): Whether activations should be in-place

486

bias (bool, optional): Whether to use bias in convolution

487

"""

488

489

def __init__(self, in_planes: int, out_planes: int, kernel_size: int = 3, stride: int = 1, padding: int = None, groups: int = 1, norm_layer=None, activation_layer=None, dilation: int = 1, inplace: bool = None, bias: bool = None): ...

490

491

class Conv3dNormActivation(torch.nn.Sequential):

492

"""3D version of Conv2dNormActivation for video/3D data."""

493

494

def __init__(self, in_planes: int, out_planes: int, kernel_size: int = 3, stride: int = 1, padding: int = None, groups: int = 1, norm_layer=None, activation_layer=None, dilation: int = 1, inplace: bool = None, bias: bool = None): ...

495

496

class SqueezeExcitation(torch.nn.Module):

497

"""

498

Squeeze-and-Excitation block for channel attention.

499

500

Args:

501

input_channels (int): Number of input channels

502

squeeze_channels (int): Number of channels after squeeze operation

503

activation (callable, optional): Activation function for squeeze

504

scale_activation (callable, optional): Activation function for scale

505

"""

506

507

def __init__(self, input_channels: int, squeeze_channels: int, activation=None, scale_activation=None): ...

508

509

def forward(self, input: torch.Tensor) -> torch.Tensor: ...

510

511

class MLP(torch.nn.Sequential):

512

"""

513

Multi-layer perceptron with configurable layers.

514

515

Args:

516

in_channels (int): Input dimension

517

hidden_channels (list): List of hidden layer dimensions

518

norm_layer (callable, optional): Normalization layer

519

activation_layer (callable, optional): Activation layer

520

inplace (bool, optional): Whether activations should be in-place

521

bias (bool): Whether to use bias

522

dropout (float): Dropout probability

523

"""

524

525

def __init__(self, in_channels: int, hidden_channels: list, norm_layer=None, activation_layer=None, inplace: bool = None, bias: bool = True, dropout: float = 0.0): ...

526

527

class Permute(torch.nn.Module):

528

"""

529

Permute tensor dimensions.

530

531

Args:

532

dims (list): New order of dimensions

533

"""

534

535

def __init__(self, dims: list): ...

536

537

def forward(self, x: torch.Tensor) -> torch.Tensor: ...

538

```

539

540

## Usage Examples

541

542

### Bounding Box Operations

543

544

```python

545

import torch

546

import torchvision.ops as ops

547

548

# Create example bounding boxes (N=3 boxes in xyxy format)

549

boxes1 = torch.tensor([

550

[10, 10, 50, 50],

551

[30, 30, 70, 70],

552

[60, 10, 100, 50]

553

], dtype=torch.float)

554

555

boxes2 = torch.tensor([

556

[15, 15, 55, 55],

557

[25, 25, 65, 65]

558

], dtype=torch.float)

559

560

# Calculate IoU matrix

561

iou_matrix = ops.box_iou(boxes1, boxes2)

562

print(f"IoU matrix shape: {iou_matrix.shape}") # (3, 2)

563

print(f"IoU values:\n{iou_matrix}")

564

565

# Calculate box areas

566

areas = ops.box_area(boxes1)

567

print(f"Box areas: {areas}")

568

569

# Convert box formats

570

boxes_xywh = ops.box_convert(boxes1, 'xyxy', 'xywh')

571

print(f"Boxes in xywh format: {boxes_xywh}")

572

573

# Clip boxes to image boundaries

574

image_size = (100, 120) # (height, width)

575

clipped_boxes = ops.clip_boxes_to_image(boxes1, image_size)

576

print(f"Clipped boxes: {clipped_boxes}")

577

```

578

579

### Non-Maximum Suppression

580

581

```python

582

import torch

583

import torchvision.ops as ops

584

585

# Example detection results

586

boxes = torch.tensor([

587

[10, 10, 50, 50],

588

[12, 12, 52, 52], # Overlapping with first box

589

[60, 10, 100, 50],

590

[15, 15, 45, 45], # Overlapping with first box

591

[80, 80, 120, 120]

592

], dtype=torch.float)

593

594

scores = torch.tensor([0.9, 0.8, 0.7, 0.85, 0.6])

595

class_ids = torch.tensor([0, 0, 1, 0, 1])

596

597

# Apply NMS

598

keep_indices = ops.nms(boxes, scores, iou_threshold=0.5)

599

print(f"Indices to keep after NMS: {keep_indices}")

600

601

# Apply batched NMS (per-class NMS)

602

keep_indices_batched = ops.batched_nms(boxes, scores, class_ids, iou_threshold=0.5)

603

print(f"Indices to keep after batched NMS: {keep_indices_batched}")

604

605

# Filter results

606

final_boxes = boxes[keep_indices_batched]

607

final_scores = scores[keep_indices_batched]

608

final_classes = class_ids[keep_indices_batched]

609

610

print(f"Final boxes: {final_boxes}")

611

print(f"Final scores: {final_scores}")

612

print(f"Final classes: {final_classes}")

613

```

614

615

### RoI Align Operation

616

617

```python

618

import torch

619

import torchvision.ops as ops

620

621

# Create feature map (batch_size=2, channels=64, height=32, width=32)

622

feature_map = torch.randn(2, 64, 32, 32)

623

624

# Define RoIs: [batch_idx, x1, y1, x2, y2]

625

rois = torch.tensor([

626

[0, 5, 5, 15, 15], # RoI in first image

627

[0, 20, 10, 30, 25], # Another RoI in first image

628

[1, 8, 8, 18, 18], # RoI in second image

629

], dtype=torch.float)

630

631

# Apply RoI Align

632

output_size = (7, 7)

633

spatial_scale = 1.0

634

aligned_features = ops.roi_align(

635

feature_map,

636

rois,

637

output_size,

638

spatial_scale=spatial_scale,

639

sampling_ratio=2

640

)

641

642

print(f"Aligned features shape: {aligned_features.shape}") # (3, 64, 7, 7)

643

644

# Using RoI Align as a layer

645

roi_align_layer = ops.RoIAlign(output_size=(14, 14), spatial_scale=0.5, sampling_ratio=2)

646

aligned_features_layer = roi_align_layer(feature_map, rois)

647

print(f"Layer output shape: {aligned_features_layer.shape}")

648

```

649

650

### Feature Pyramid Network

651

652

```python

653

import torch

654

import torchvision.ops as ops

655

656

# Create FPN for ResNet-like backbone

657

in_channels_list = [256, 512, 1024, 2048] # ResNet feature channels

658

out_channels = 256

659

660

fpn = ops.FeaturePyramidNetwork(in_channels_list, out_channels)

661

662

# Simulate backbone features

663

backbone_features = {

664

'0': torch.randn(2, 256, 64, 64), # Early layer

665

'1': torch.randn(2, 512, 32, 32), # Mid layer

666

'2': torch.randn(2, 1024, 16, 16), # Late layer

667

'3': torch.randn(2, 2048, 8, 8), # Final layer

668

}

669

670

# Apply FPN

671

fpn_features = fpn(backbone_features)

672

673

print("FPN output shapes:")

674

for key, feature in fpn_features.items():

675

print(f"Level {key}: {feature.shape}")

676

```

677

678

### Custom Detection Pipeline

679

680

```python

681

import torch

682

import torchvision.ops as ops

683

684

def post_process_detections(boxes, scores, class_logits, score_threshold=0.5, nms_threshold=0.5):

685

"""

686

Post-process detection outputs with NMS and filtering.

687

688

Args:

689

boxes: Predicted boxes (N, 4)

690

scores: Objectness scores (N,)

691

class_logits: Class predictions (N, num_classes)

692

score_threshold: Minimum score threshold

693

nms_threshold: NMS IoU threshold

694

695

Returns:

696

dict: Filtered detections

697

"""

698

# Get class predictions

699

class_probs = torch.softmax(class_logits, dim=1)

700

class_ids = torch.argmax(class_probs, dim=1)

701

class_scores = torch.max(class_probs, dim=1)[0]

702

703

# Combine objectness and classification scores

704

final_scores = scores * class_scores

705

706

# Filter by score threshold

707

keep_mask = final_scores >= score_threshold

708

boxes = boxes[keep_mask]

709

final_scores = final_scores[keep_mask]

710

class_ids = class_ids[keep_mask]

711

712

# Apply NMS per class

713

keep_indices = ops.batched_nms(boxes, final_scores, class_ids, nms_threshold)

714

715

return {

716

'boxes': boxes[keep_indices],

717

'scores': final_scores[keep_indices],

718

'labels': class_ids[keep_indices]

719

}

720

721

# Example usage

722

num_detections = 1000

723

num_classes = 80

724

725

boxes = torch.randn(num_detections, 4) * 100 # Random boxes

726

scores = torch.rand(num_detections) # Random objectness scores

727

class_logits = torch.randn(num_detections, num_classes) # Random class logits

728

729

# Post-process detections

730

results = post_process_detections(boxes, scores, class_logits)

731

print(f"Final detections: {len(results['boxes'])}")

732

print(f"Score range: {results['scores'].min():.3f} - {results['scores'].max():.3f}")

733

```

734

735

### Loss Functions for Training

736

737

```python

738

import torch

739

import torchvision.ops as ops

740

741

# Focal Loss for object classification

742

def train_step_focal_loss():

743

# Simulated predictions and targets

744

batch_size, num_classes = 32, 80

745

predictions = torch.randn(batch_size, num_classes)

746

targets = torch.zeros(batch_size, num_classes)

747

748

# Create some positive examples

749

targets[torch.arange(batch_size), torch.randint(0, num_classes, (batch_size,))] = 1

750

751

# Calculate focal loss

752

focal_loss = ops.sigmoid_focal_loss(

753

predictions,

754

targets,

755

alpha=0.25,

756

gamma=2.0,

757

reduction='mean'

758

)

759

760

print(f"Focal loss: {focal_loss.item():.4f}")

761

return focal_loss

762

763

# Box regression losses

764

def train_step_box_loss():

765

batch_size = 64

766

pred_boxes = torch.randn(batch_size, 4) * 100

767

target_boxes = torch.randn(batch_size, 4) * 100

768

769

# Different IoU-based losses

770

giou_loss = ops.generalized_box_iou_loss(pred_boxes, target_boxes, reduction='mean')

771

diou_loss = ops.distance_box_iou_loss(pred_boxes, target_boxes, reduction='mean')

772

ciou_loss = ops.complete_box_iou_loss(pred_boxes, target_boxes, reduction='mean')

773

774

print(f"GIoU loss: {giou_loss.item():.4f}")

775

print(f"DIoU loss: {diou_loss.item():.4f}")

776

print(f"CIoU loss: {ciou_loss.item():.4f}")

777

778

return giou_loss + diou_loss + ciou_loss

779

780

# Run example training steps

781

focal_loss = train_step_focal_loss()

782

box_loss = train_step_box_loss()

783

total_loss = focal_loss + box_loss

784

print(f"Total loss: {total_loss.item():.4f}")

785

```

786

787

### Regularization Techniques

788

789

```python

790

import torch

791

import torch.nn as nn

792

import torchvision.ops as ops

793

794

class ResidualBlock(nn.Module):

795

"""Example residual block with stochastic depth."""

796

797

def __init__(self, channels, drop_prob=0.1):

798

super().__init__()

799

self.conv1 = nn.Conv2d(channels, channels, 3, padding=1)

800

self.conv2 = nn.Conv2d(channels, channels, 3, padding=1)

801

self.relu = nn.ReLU()

802

self.stochastic_depth = ops.StochasticDepth(drop_prob, mode='row')

803

804

def forward(self, x):

805

identity = x

806

out = self.relu(self.conv1(x))

807

out = self.conv2(out)

808

809

# Apply stochastic depth to residual connection

810

out = self.stochastic_depth(out)

811

out += identity

812

return self.relu(out)

813

814

# Example with DropBlock for convolutional regularization

815

class ConvBlockWithDropBlock(nn.Module):

816

"""Convolutional block with DropBlock regularization."""

817

818

def __init__(self, in_channels, out_channels, drop_prob=0.1, block_size=7):

819

super().__init__()

820

self.conv = nn.Conv2d(in_channels, out_channels, 3, padding=1)

821

self.bn = nn.BatchNorm2d(out_channels)

822

self.relu = nn.ReLU()

823

self.dropblock = ops.DropBlock2d(drop_prob, block_size)

824

825

def forward(self, x):

826

x = self.conv(x)

827

x = self.bn(x)

828

x = self.relu(x)

829

x = self.dropblock(x)

830

return x

831

832

# Test regularization

833

batch_size, channels, height, width = 4, 64, 32, 32

834

input_tensor = torch.randn(batch_size, channels, height, width)

835

836

# Test stochastic depth block

837

residual_block = ResidualBlock(channels, drop_prob=0.2)

838

output = residual_block(input_tensor)

839

print(f"Residual block output shape: {output.shape}")

840

841

# Test DropBlock

842

dropblock_conv = ConvBlockWithDropBlock(channels, channels, drop_prob=0.1, block_size=5)

843

output = dropblock_conv(input_tensor)

844

print(f"DropBlock conv output shape: {output.shape}")

845

```