or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

data.mdfeatures.mdindex.mdlayers.mdmodels.mdtraining.mdutils.md

layers.mddocs/

0

# Layers and Components

1

2

Extensive collection of neural network building blocks including activations, attention mechanisms, convolutions, normalization layers, and specialized components for vision architectures.

3

4

## Capabilities

5

6

### Layer Factory Functions

7

8

Factory functions for creating various neural network components with consistent interfaces.

9

10

```python { .api }

11

def create_conv2d(

12

in_channels: int,

13

out_channels: int,

14

kernel_size: int,

15

stride: int = 1,

16

padding: str = '',

17

dilation: int = 1,

18

groups: int = 1,

19

bias: bool = True,

20

**kwargs

21

) -> torch.nn.Module:

22

"""

23

Create 2D convolution layer with advanced padding options.

24

25

Args:

26

in_channels: Number of input channels

27

out_channels: Number of output channels

28

kernel_size: Convolution kernel size

29

stride: Convolution stride

30

padding: Padding mode ('', 'same', 'valid', or integer)

31

dilation: Convolution dilation

32

groups: Number of convolution groups

33

bias: Include bias parameter

34

**kwargs: Additional convolution arguments

35

36

Returns:

37

Configured convolution layer

38

"""

39

40

def create_norm_layer(

41

layer_name: str,

42

num_features: int,

43

eps: float = 1e-5,

44

**kwargs

45

) -> torch.nn.Module:

46

"""

47

Create normalization layer by name.

48

49

Args:

50

layer_name: Normalization type ('batchnorm', 'layernorm', 'groupnorm', etc.)

51

num_features: Number of features to normalize

52

eps: Epsilon for numerical stability

53

**kwargs: Layer-specific arguments

54

55

Returns:

56

Configured normalization layer

57

"""

58

59

def create_act_layer(

60

layer_name: str,

61

inplace: bool = True,

62

**kwargs

63

) -> torch.nn.Module:

64

"""

65

Create activation layer by name.

66

67

Args:

68

layer_name: Activation type ('relu', 'gelu', 'swish', etc.)

69

inplace: Use inplace operations when possible

70

**kwargs: Activation-specific arguments

71

72

Returns:

73

Configured activation layer

74

"""

75

76

def create_pool2d(

77

pool_type: str,

78

kernel_size: int,

79

stride: int = None,

80

**kwargs

81

) -> torch.nn.Module:

82

"""

83

Create 2D pooling layer.

84

85

Args:

86

pool_type: Pooling type ('avg', 'max', 'adaptiveavg', etc.)

87

kernel_size: Pooling kernel size

88

stride: Pooling stride

89

**kwargs: Pool-specific arguments

90

91

Returns:

92

Configured pooling layer

93

"""

94

```

95

96

### Global Configuration Functions

97

98

Functions to control global layer behavior for scriptability and exportability.

99

100

```python { .api }

101

def set_scriptable(enable: bool = True) -> None:

102

"""

103

Set global scriptable mode for layers.

104

105

Args:

106

enable: Enable scriptable mode for TorchScript compatibility

107

"""

108

109

def set_exportable(enable: bool = True) -> None:

110

"""

111

Set global exportable mode for layers.

112

113

Args:

114

enable: Enable exportable mode for ONNX/TensorRT compatibility

115

"""

116

117

def set_fused_attn(enable: bool = True) -> None:

118

"""

119

Set fused attention mode globally.

120

121

Args:

122

enable: Enable fused attention implementations

123

"""

124

125

def is_scriptable() -> bool:

126

"""

127

Check if layers are in scriptable mode.

128

129

Returns:

130

True if scriptable mode is enabled

131

"""

132

133

def is_exportable() -> bool:

134

"""

135

Check if layers are in exportable mode.

136

137

Returns:

138

True if exportable mode is enabled

139

"""

140

```

141

142

## Activation Functions

143

144

### Core Activation Classes

145

146

```python { .api }

147

class Swish(torch.nn.Module):

148

"""

149

Swish activation function (x * sigmoid(x)).

150

151

Args:

152

inplace: Use inplace operations

153

"""

154

155

def __init__(self, inplace: bool = False): ...

156

157

class Mish(torch.nn.Module):

158

"""

159

Mish activation function (x * tanh(softplus(x))).

160

161

Args:

162

inplace: Use inplace operations

163

"""

164

165

def __init__(self, inplace: bool = False): ...

166

167

class GELU(torch.nn.Module):

168

"""

169

Gaussian Error Linear Unit activation.

170

171

Args:

172

approximate: Use tanh approximation

173

"""

174

175

def __init__(self, approximate: str = 'none'): ...

176

177

class HardSwish(torch.nn.Module):

178

"""Hard Swish activation function."""

179

180

def __init__(self, inplace: bool = False): ...

181

182

class HardSigmoid(torch.nn.Module):

183

"""Hard Sigmoid activation function."""

184

185

def __init__(self, inplace: bool = False): ...

186

187

class PReLU(torch.nn.Module):

188

"""

189

Parametric ReLU activation.

190

191

Args:

192

num_parameters: Number of learnable parameters

193

init: Initial value for parameters

194

"""

195

196

def __init__(self, num_parameters: int = 1, init: float = 0.25): ...

197

```

198

199

### Functional Activations

200

201

```python { .api }

202

def swish(x: torch.Tensor, inplace: bool = False) -> torch.Tensor:

203

"""Functional Swish activation."""

204

205

def mish(x: torch.Tensor, inplace: bool = False) -> torch.Tensor:

206

"""Functional Mish activation."""

207

208

def hard_swish(x: torch.Tensor, inplace: bool = False) -> torch.Tensor:

209

"""Functional Hard Swish activation."""

210

211

def hard_sigmoid(x: torch.Tensor, inplace: bool = False) -> torch.Tensor:

212

"""Functional Hard Sigmoid activation."""

213

```

214

215

## Attention Mechanisms

216

217

### Attention Classes

218

219

```python { .api }

220

class Attention(torch.nn.Module):

221

"""

222

Multi-head self-attention layer.

223

224

Args:

225

dim: Input dimension

226

num_heads: Number of attention heads

227

qkv_bias: Include bias in QKV projection

228

qk_norm: Apply normalization to Q and K

229

attn_drop: Attention dropout rate

230

proj_drop: Projection dropout rate

231

norm_layer: Normalization layer type

232

"""

233

234

def __init__(

235

self,

236

dim: int,

237

num_heads: int = 8,

238

qkv_bias: bool = False,

239

qk_norm: bool = False,

240

attn_drop: float = 0.0,

241

proj_drop: float = 0.0,

242

norm_layer: torch.nn.Module = torch.nn.LayerNorm

243

): ...

244

245

class AttentionPool2d(torch.nn.Module):

246

"""

247

Attention-based 2D pooling layer.

248

249

Args:

250

in_features: Input feature dimension

251

out_features: Output feature dimension

252

embed_dim: Embedding dimension

253

num_heads: Number of attention heads

254

qkv_bias: Include bias in QKV projection

255

"""

256

257

def __init__(

258

self,

259

in_features: int,

260

out_features: int = None,

261

embed_dim: int = None,

262

num_heads: int = 8,

263

qkv_bias: bool = True

264

): ...

265

266

class SEModule(torch.nn.Module):

267

"""

268

Squeeze-and-Excitation module.

269

270

Args:

271

channels: Number of input channels

272

rd_ratio: Reduction ratio for squeeze operation

273

rd_channels: Explicit reduction channels

274

rd_divisor: Divisor for reduction channels

275

bias: Include bias in FC layers

276

act_layer: Activation layer type

277

gate_layer: Gate activation layer type

278

"""

279

280

def __init__(

281

self,

282

channels: int,

283

rd_ratio: float = 1./16,

284

rd_channels: int = None,

285

rd_divisor: int = 8,

286

bias: bool = True,

287

act_layer: torch.nn.Module = torch.nn.ReLU,

288

gate_layer: torch.nn.Module = torch.nn.Sigmoid

289

): ...

290

291

class EcaModule(torch.nn.Module):

292

"""

293

Efficient Channel Attention module.

294

295

Args:

296

channels: Number of input channels

297

kernel_size: Convolution kernel size for attention

298

gamma: Gamma parameter for kernel size calculation

299

beta: Beta parameter for kernel size calculation

300

"""

301

302

def __init__(

303

self,

304

channels: int = None,

305

kernel_size: int = 3,

306

gamma: int = 2,

307

beta: int = 1

308

): ...

309

```

310

311

## Convolution Layers

312

313

### Advanced Convolution Classes

314

315

```python { .api }

316

class Conv2dSame(torch.nn.Conv2d):

317

"""

318

2D convolution with SAME padding mode.

319

320

Args:

321

in_channels: Number of input channels

322

out_channels: Number of output channels

323

kernel_size: Convolution kernel size

324

stride: Convolution stride

325

padding: Padding (ignored, computed for SAME)

326

dilation: Convolution dilation

327

groups: Number of groups

328

bias: Include bias parameter

329

"""

330

331

def __init__(

332

self,

333

in_channels: int,

334

out_channels: int,

335

kernel_size: int,

336

stride: int = 1,

337

padding: str = 'SAME',

338

dilation: int = 1,

339

groups: int = 1,

340

bias: bool = True

341

): ...

342

343

class ConvNormAct(torch.nn.Module):

344

"""

345

Convolution + Normalization + Activation block.

346

347

Args:

348

in_channels: Number of input channels

349

out_channels: Number of output channels

350

kernel_size: Convolution kernel size

351

stride: Convolution stride

352

padding: Padding specification

353

dilation: Convolution dilation

354

groups: Number of groups

355

bias: Include convolution bias

356

norm_layer: Normalization layer

357

act_layer: Activation layer

358

drop_layer: Dropout layer

359

"""

360

361

def __init__(

362

self,

363

in_channels: int,

364

out_channels: int,

365

kernel_size: int = 1,

366

stride: int = 1,

367

padding: str = '',

368

dilation: int = 1,

369

groups: int = 1,

370

bias: bool = False,

371

norm_layer: torch.nn.Module = torch.nn.BatchNorm2d,

372

act_layer: torch.nn.Module = torch.nn.ReLU,

373

drop_layer: torch.nn.Module = None

374

): ...

375

376

class MixedConv2d(torch.nn.Module):

377

"""

378

Mixed depthwise convolution with multiple kernel sizes.

379

380

Args:

381

in_channels: Number of input channels

382

out_channels: Number of output channels

383

kernel_size: List of kernel sizes or single size

384

stride: Convolution stride

385

padding: Padding specification

386

dilation: Convolution dilation

387

depthwise: Use depthwise convolution

388

**kwargs: Additional convolution arguments

389

"""

390

391

def __init__(

392

self,

393

in_channels: int,

394

out_channels: int,

395

kernel_size: Union[int, List[int]] = 3,

396

stride: int = 1,

397

padding: str = '',

398

dilation: int = 1,

399

depthwise: bool = False,

400

**kwargs

401

): ...

402

```

403

404

## Normalization Layers

405

406

### Normalization Classes

407

408

```python { .api }

409

class LayerNorm(torch.nn.Module):

410

"""

411

Layer normalization with optional 2D support.

412

413

Args:

414

normalized_shape: Input shape for normalization

415

eps: Epsilon for numerical stability

416

elementwise_affine: Learn affine parameters

417

bias: Include bias parameter

418

"""

419

420

def __init__(

421

self,

422

normalized_shape: Union[int, List[int], torch.Size],

423

eps: float = 1e-5,

424

elementwise_affine: bool = True,

425

bias: bool = True

426

): ...

427

428

class LayerNorm2d(torch.nn.Module):

429

"""

430

2D Layer normalization (channel-wise).

431

432

Args:

433

num_channels: Number of channels

434

eps: Epsilon for numerical stability

435

affine: Learn affine parameters

436

"""

437

438

def __init__(

439

self,

440

num_channels: int,

441

eps: float = 1e-6,

442

affine: bool = True

443

): ...

444

445

class RmsNorm(torch.nn.Module):

446

"""

447

Root Mean Square normalization.

448

449

Args:

450

dim: Normalization dimension

451

eps: Epsilon for numerical stability

452

bias: Include bias parameter

453

"""

454

455

def __init__(

456

self,

457

dim: int,

458

eps: float = 1e-8,

459

bias: bool = False

460

): ...

461

462

class BatchNormAct2d(torch.nn.Module):

463

"""

464

BatchNorm + Activation in single layer.

465

466

Args:

467

num_features: Number of features

468

eps: Epsilon for numerical stability

469

momentum: Momentum for running statistics

470

affine: Learn affine parameters

471

track_running_stats: Track running statistics

472

act_layer: Activation layer

473

inplace: Use inplace activation

474

drop_layer: Dropout layer

475

"""

476

477

def __init__(

478

self,

479

num_features: int,

480

eps: float = 1e-5,

481

momentum: float = 0.1,

482

affine: bool = True,

483

track_running_stats: bool = True,

484

act_layer: torch.nn.Module = torch.nn.ReLU,

485

inplace: bool = True,

486

drop_layer: torch.nn.Module = None

487

): ...

488

```

489

490

## Pooling Layers

491

492

### Advanced Pooling Classes

493

494

```python { .api }

495

class AdaptiveAvgMaxPool2d(torch.nn.Module):

496

"""

497

Adaptive average + max pooling combination.

498

499

Args:

500

output_size: Target output size

501

"""

502

503

def __init__(self, output_size: int = 1): ...

504

505

class SelectAdaptivePool2d(torch.nn.Module):

506

"""

507

Selectable adaptive pooling (avg, max, avgmax, catavgmax).

508

509

Args:

510

output_size: Target output size

511

pool_type: Pooling type ('avg', 'max', 'avgmax', 'catavgmax')

512

flatten: Flatten output

513

"""

514

515

def __init__(

516

self,

517

output_size: int = 1,

518

pool_type: str = 'avg',

519

flatten: bool = False

520

): ...

521

522

class BlurPool2d(torch.nn.Module):

523

"""

524

Blur pooling for anti-aliasing.

525

526

Args:

527

channels: Number of input channels

528

filt_size: Filter size

529

stride: Pooling stride

530

"""

531

532

def __init__(

533

self,

534

channels: int,

535

filt_size: int = 4,

536

stride: int = 2

537

): ...

538

```

539

540

## Embedding Layers

541

542

### Vision Transformer Embeddings

543

544

```python { .api }

545

class PatchEmbed(torch.nn.Module):

546

"""

547

2D image to patch embedding.

548

549

Args:

550

img_size: Input image size

551

patch_size: Patch size

552

in_chans: Number of input channels

553

embed_dim: Embedding dimension

554

norm_layer: Normalization layer

555

flatten: Flatten spatial dimensions

556

bias: Include bias in projection

557

"""

558

559

def __init__(

560

self,

561

img_size: int = 224,

562

patch_size: int = 16,

563

in_chans: int = 3,

564

embed_dim: int = 768,

565

norm_layer: torch.nn.Module = None,

566

flatten: bool = True,

567

bias: bool = True

568

): ...

569

570

class HybridEmbed(torch.nn.Module):

571

"""

572

CNN feature map to patch embedding.

573

574

Args:

575

backbone: CNN backbone model

576

img_size: Input image size

577

patch_size: Patch size for embedding

578

feature_size: Feature map size from backbone

579

in_chans: Number of input channels

580

embed_dim: Embedding dimension

581

"""

582

583

def __init__(

584

self,

585

backbone: torch.nn.Module,

586

img_size: int = 224,

587

patch_size: int = 1,

588

feature_size: int = None,

589

in_chans: int = 3,

590

embed_dim: int = 768

591

): ...

592

```

593

594

## Regularization Layers

595

596

### Dropout Variants

597

598

```python { .api }

599

class DropPath(torch.nn.Module):

600

"""

601

Stochastic depth (drop path) regularization.

602

603

Args:

604

drop_prob: Drop probability

605

scale_by_keep: Scale by keep probability

606

"""

607

608

def __init__(

609

self,

610

drop_prob: float = 0.0,

611

scale_by_keep: bool = True

612

): ...

613

614

class DropBlock2d(torch.nn.Module):

615

"""

616

DropBlock regularization for 2D feature maps.

617

618

Args:

619

drop_rate: Drop rate

620

block_size: Size of dropped blocks

621

"""

622

623

def __init__(

624

self,

625

drop_rate: float = 0.1,

626

block_size: int = 7

627

): ...

628

629

class PatchDropout(torch.nn.Module):

630

"""

631

Patch dropout for vision transformers.

632

633

Args:

634

prob: Dropout probability

635

num_prefix_tokens: Number of prefix tokens to preserve

636

ordered: Use ordered dropout

637

return_indices: Return dropped indices

638

"""

639

640

def __init__(

641

self,

642

prob: float = 0.5,

643

num_prefix_tokens: int = 1,

644

ordered: bool = False,

645

return_indices: bool = False

646

): ...

647

```

648

649

## MLP and Feed-Forward Layers

650

651

### MLP Variants

652

653

```python { .api }

654

class Mlp(torch.nn.Module):

655

"""

656

Multi-layer perceptron.

657

658

Args:

659

in_features: Input feature dimension

660

hidden_features: Hidden layer dimension

661

out_features: Output feature dimension

662

act_layer: Activation layer

663

norm_layer: Normalization layer

664

bias: Include bias parameters

665

drop: Dropout rate

666

use_conv: Use 1x1 convolution instead of linear

667

"""

668

669

def __init__(

670

self,

671

in_features: int,

672

hidden_features: int = None,

673

out_features: int = None,

674

act_layer: torch.nn.Module = torch.nn.GELU,

675

norm_layer: torch.nn.Module = None,

676

bias: bool = True,

677

drop: float = 0.0,

678

use_conv: bool = False

679

): ...

680

681

class GluMlp(torch.nn.Module):

682

"""

683

MLP with Gated Linear Unit (GLU) activation.

684

685

Args:

686

in_features: Input feature dimension

687

hidden_features: Hidden layer dimension

688

out_features: Output feature dimension

689

act_layer: Activation layer for gate

690

norm_layer: Normalization layer

691

bias: Include bias parameters

692

drop: Dropout rate

693

gate_last: Apply gate after activation

694

"""

695

696

def __init__(

697

self,

698

in_features: int,

699

hidden_features: int = None,

700

out_features: int = None,

701

act_layer: torch.nn.Module = torch.nn.Sigmoid,

702

norm_layer: torch.nn.Module = None,

703

bias: bool = True,

704

drop: float = 0.0,

705

gate_last: bool = True

706

): ...

707

```

708

709

## Utility Functions

710

711

### Helper Functions

712

713

```python { .api }

714

def to_ntuple(n: int) -> Callable:

715

"""

716

Create function to convert input to n-tuple.

717

718

Args:

719

n: Tuple length

720

721

Returns:

722

Function that converts input to n-tuple

723

"""

724

725

def make_divisible(

726

v: int,

727

divisor: int = 8,

728

min_value: int = None,

729

round_limit: float = 0.9

730

) -> int:

731

"""

732

Make value divisible by divisor.

733

734

Args:

735

v: Input value

736

divisor: Divisor value

737

min_value: Minimum allowed value

738

round_limit: Rounding threshold

739

740

Returns:

741

Value divisible by divisor

742

"""

743

744

def get_padding(

745

kernel_size: int,

746

stride: int = 1,

747

dilation: int = 1

748

) -> int:

749

"""

750

Calculate padding for convolution.

751

752

Args:

753

kernel_size: Convolution kernel size

754

stride: Convolution stride

755

dilation: Convolution dilation

756

757

Returns:

758

Required padding

759

"""

760

```

761

762

### Weight Initialization

763

764

```python { .api }

765

def trunc_normal_(

766

tensor: torch.Tensor,

767

mean: float = 0.0,

768

std: float = 1.0,

769

a: float = -2.0,

770

b: float = 2.0

771

) -> torch.Tensor:

772

"""

773

Initialize tensor with truncated normal distribution.

774

775

Args:

776

tensor: Tensor to initialize

777

mean: Mean of distribution

778

std: Standard deviation

779

a: Lower truncation bound

780

b: Upper truncation bound

781

782

Returns:

783

Initialized tensor

784

"""

785

786

def variance_scaling_(

787

tensor: torch.Tensor,

788

scale: float = 1.0,

789

mode: str = 'fan_in',

790

distribution: str = 'normal'

791

) -> torch.Tensor:

792

"""

793

Initialize tensor with variance scaling.

794

795

Args:

796

tensor: Tensor to initialize

797

scale: Scaling factor

798

mode: Computation mode ('fan_in', 'fan_out', 'fan_avg')

799

distribution: Distribution type ('normal', 'uniform')

800

801

Returns:

802

Initialized tensor

803

"""

804

805

def lecun_normal_(tensor: torch.Tensor) -> torch.Tensor:

806

"""

807

Initialize tensor with LeCun normal initialization.

808

809

Args:

810

tensor: Tensor to initialize

811

812

Returns:

813

Initialized tensor

814

"""

815

```

816

817

## Types

818

819

```python { .api }

820

from typing import Optional, Union, List, Dict, Callable, Any, Tuple

821

import torch

822

823

# Layer types

824

LayerType = torch.nn.Module

825

ActivationType = torch.nn.Module

826

NormType = torch.nn.Module

827

828

# Padding types

829

PadType = Union[str, int, Tuple[int, ...]]

830

831

# Dimension types

832

DimType = Union[int, Tuple[int, ...]]

833

834

# Format enumeration

835

class Format:

836

NCHW = 'NCHW'

837

NHWC = 'NHWC'

838

NCL = 'NCL'

839

NLC = 'NLC'

840

```