Deep learning framework providing tensor computation with GPU acceleration and dynamic neural networks with automatic differentiation
Complete neural network building blocks including layers, activation functions, loss functions, and containers for building deep learning models. The torch.nn module provides high-level abstractions for neural network construction.
Core classes that form the foundation of all neural network components.
class Module:
"""Base class for all neural network modules."""
def __init__(self): ...
def forward(self, *input):
"""Define forward computation."""
def parameters(self, recurse=True):
"""Return iterator over module parameters."""
def named_parameters(self, prefix='', recurse=True):
"""Return iterator over (name, parameter) pairs."""
def modules(self):
"""Return iterator over all modules."""
def named_modules(self, memo=None, prefix=''):
"""Return iterator over (name, module) pairs."""
def children(self):
"""Return iterator over immediate children modules."""
def named_children(self):
"""Return iterator over (name, child) pairs."""
def train(self, mode=True):
"""Set module in training mode."""
def eval(self):
"""Set module in evaluation mode."""
def zero_grad(self, set_to_none=False):
"""Set gradients to zero."""
def to(self, *args, **kwargs):
"""Move module to device/dtype."""
def cuda(self, device=None):
"""Move module to CUDA device."""
def cpu(self):
"""Move module to CPU."""
def state_dict(self, destination=None, prefix='', keep_vars=False):
"""Return dictionary of module state."""
def load_state_dict(self, state_dict, strict=True):
"""Load parameters and buffers."""
class Parameter(Tensor):
"""Trainable parameter tensor."""
def __init__(self, data=None, requires_grad=True): ...
class UninitializedParameter(Parameter):
"""Parameter that is not yet initialized."""
def __init__(self, requires_grad=True): ...Dense layers that perform linear transformations.
class Linear(Module):
"""Linear transformation: y = xA^T + b."""
def __init__(self, in_features: int, out_features: int, bias: bool = True, device=None, dtype=None): ...
def forward(self, input: Tensor) -> Tensor: ...
class Bilinear(Module):
"""Bilinear transformation: y = x1^T A x2 + b."""
def __init__(self, in1_features: int, in2_features: int, out_features: int, bias: bool = True): ...
def forward(self, input1: Tensor, input2: Tensor) -> Tensor: ...
class LazyLinear(Module):
"""Linear layer with lazy weight initialization."""
def __init__(self, out_features: int, bias: bool = True): ...
def forward(self, input: Tensor) -> Tensor: ...
class Identity(Module):
"""Identity transformation."""
def __init__(self, *args, **kwargs): ...
def forward(self, input: Tensor) -> Tensor: ...Convolutional layers for spatial feature extraction.
class Conv1d(Module):
"""1D convolution layer."""
def __init__(self, in_channels: int, out_channels: int, kernel_size, stride=1,
padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros'): ...
def forward(self, input: Tensor) -> Tensor: ...
class Conv2d(Module):
"""2D convolution layer."""
def __init__(self, in_channels: int, out_channels: int, kernel_size, stride=1,
padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros'): ...
def forward(self, input: Tensor) -> Tensor: ...
class Conv3d(Module):
"""3D convolution layer."""
def __init__(self, in_channels: int, out_channels: int, kernel_size, stride=1,
padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros'): ...
def forward(self, input: Tensor) -> Tensor: ...
class ConvTranspose1d(Module):
"""1D transposed convolution layer."""
def __init__(self, in_channels: int, out_channels: int, kernel_size, stride=1,
padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros'): ...
def forward(self, input: Tensor, output_size=None) -> Tensor: ...
class ConvTranspose2d(Module):
"""2D transposed convolution layer."""
def __init__(self, in_channels: int, out_channels: int, kernel_size, stride=1,
padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros'): ...
def forward(self, input: Tensor, output_size=None) -> Tensor: ...
class ConvTranspose3d(Module):
"""3D transposed convolution layer."""
def __init__(self, in_channels: int, out_channels: int, kernel_size, stride=1,
padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros'): ...
def forward(self, input: Tensor, output_size=None) -> Tensor: ...Non-linear activation functions for introducing non-linearity.
class ReLU(Module):
"""Rectified Linear Unit: max(0, x)."""
def __init__(self, inplace: bool = False): ...
def forward(self, input: Tensor) -> Tensor: ...
class ReLU6(Module):
"""ReLU clamped to maximum value of 6."""
def __init__(self, inplace: bool = False): ...
def forward(self, input: Tensor) -> Tensor: ...
class LeakyReLU(Module):
"""Leaky ReLU: max(negative_slope * x, x)."""
def __init__(self, negative_slope: float = 0.01, inplace: bool = False): ...
def forward(self, input: Tensor) -> Tensor: ...
class PReLU(Module):
"""Parametric ReLU with learnable negative slope."""
def __init__(self, num_parameters: int = 1, init: float = 0.25): ...
def forward(self, input: Tensor) -> Tensor: ...
class ELU(Module):
"""Exponential Linear Unit."""
def __init__(self, alpha: float = 1.0, inplace: bool = False): ...
def forward(self, input: Tensor) -> Tensor: ...
class SELU(Module):
"""Scaled Exponential Linear Unit."""
def __init__(self, inplace: bool = False): ...
def forward(self, input: Tensor) -> Tensor: ...
class GELU(Module):
"""Gaussian Error Linear Unit."""
def __init__(self, approximate: str = 'none'): ...
def forward(self, input: Tensor) -> Tensor: ...
class SiLU(Module):
"""Sigmoid Linear Unit (Swish): x * sigmoid(x)."""
def __init__(self, inplace: bool = False): ...
def forward(self, input: Tensor) -> Tensor: ...
class Mish(Module):
"""Mish activation: x * tanh(softplus(x))."""
def __init__(self, inplace: bool = False): ...
def forward(self, input: Tensor) -> Tensor: ...
class Sigmoid(Module):
"""Sigmoid activation: 1 / (1 + exp(-x))."""
def __init__(self): ...
def forward(self, input: Tensor) -> Tensor: ...
class Tanh(Module):
"""Hyperbolic tangent activation."""
def __init__(self): ...
def forward(self, input: Tensor) -> Tensor: ...
class Softmax(Module):
"""Softmax activation along specified dimension."""
def __init__(self, dim=None): ...
def forward(self, input: Tensor) -> Tensor: ...
class LogSoftmax(Module):
"""Log-Softmax activation."""
def __init__(self, dim=None): ...
def forward(self, input: Tensor) -> Tensor: ...Normalization techniques for training stability and performance.
class BatchNorm1d(Module):
"""Batch normalization for 2D or 3D inputs."""
def __init__(self, num_features, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True): ...
def forward(self, input: Tensor) -> Tensor: ...
class BatchNorm2d(Module):
"""Batch normalization for 4D inputs."""
def __init__(self, num_features, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True): ...
def forward(self, input: Tensor) -> Tensor: ...
class BatchNorm3d(Module):
"""Batch normalization for 5D inputs."""
def __init__(self, num_features, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True): ...
def forward(self, input: Tensor) -> Tensor: ...
class LayerNorm(Module):
"""Layer normalization."""
def __init__(self, normalized_shape, eps=1e-05, elementwise_affine=True, bias=True): ...
def forward(self, input: Tensor) -> Tensor: ...
class GroupNorm(Module):
"""Group normalization."""
def __init__(self, num_groups: int, num_channels: int, eps=1e-05, affine=True): ...
def forward(self, input: Tensor) -> Tensor: ...
class InstanceNorm1d(Module):
"""Instance normalization for 3D inputs."""
def __init__(self, num_features, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False): ...
def forward(self, input: Tensor) -> Tensor: ...
class InstanceNorm2d(Module):
"""Instance normalization for 4D inputs."""
def __init__(self, num_features, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False): ...
def forward(self, input: Tensor) -> Tensor: ...
class InstanceNorm3d(Module):
"""Instance normalization for 5D inputs."""
def __init__(self, num_features, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False): ...
def forward(self, input: Tensor) -> Tensor: ...Pooling operations for spatial dimension reduction.
class MaxPool1d(Module):
"""1D max pooling."""
def __init__(self, kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False): ...
def forward(self, input: Tensor) -> Tensor: ...
class MaxPool2d(Module):
"""2D max pooling."""
def __init__(self, kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False): ...
def forward(self, input: Tensor) -> Tensor: ...
class MaxPool3d(Module):
"""3D max pooling."""
def __init__(self, kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False): ...
def forward(self, input: Tensor) -> Tensor: ...
class AvgPool1d(Module):
"""1D average pooling."""
def __init__(self, kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True): ...
def forward(self, input: Tensor) -> Tensor: ...
class AvgPool2d(Module):
"""2D average pooling."""
def __init__(self, kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True, divisor_override=None): ...
def forward(self, input: Tensor) -> Tensor: ...
class AvgPool3d(Module):
"""3D average pooling."""
def __init__(self, kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True, divisor_override=None): ...
def forward(self, input: Tensor) -> Tensor: ...
class AdaptiveMaxPool1d(Module):
"""1D adaptive max pooling."""
def __init__(self, output_size, return_indices=False): ...
def forward(self, input: Tensor) -> Tensor: ...
class AdaptiveMaxPool2d(Module):
"""2D adaptive max pooling."""
def __init__(self, output_size, return_indices=False): ...
def forward(self, input: Tensor) -> Tensor: ...
class AdaptiveAvgPool1d(Module):
"""1D adaptive average pooling."""
def __init__(self, output_size): ...
def forward(self, input: Tensor) -> Tensor: ...
class AdaptiveAvgPool2d(Module):
"""2D adaptive average pooling."""
def __init__(self, output_size): ...
def forward(self, input: Tensor) -> Tensor: ...Loss functions for training neural networks.
class MSELoss(Module):
"""Mean Squared Error loss."""
def __init__(self, size_average=None, reduce=None, reduction='mean'): ...
def forward(self, input: Tensor, target: Tensor) -> Tensor: ...
class L1Loss(Module):
"""Mean Absolute Error loss."""
def __init__(self, size_average=None, reduce=None, reduction='mean'): ...
def forward(self, input: Tensor, target: Tensor) -> Tensor: ...
class CrossEntropyLoss(Module):
"""Cross entropy loss for classification."""
def __init__(self, weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='mean', label_smoothing=0.0): ...
def forward(self, input: Tensor, target: Tensor) -> Tensor: ...
class NLLLoss(Module):
"""Negative log likelihood loss."""
def __init__(self, weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='mean'): ...
def forward(self, input: Tensor, target: Tensor) -> Tensor: ...
class BCELoss(Module):
"""Binary cross entropy loss."""
def __init__(self, weight=None, size_average=None, reduce=None, reduction='mean'): ...
def forward(self, input: Tensor, target: Tensor) -> Tensor: ...
class BCEWithLogitsLoss(Module):
"""Binary cross entropy with sigmoid."""
def __init__(self, weight=None, size_average=None, reduce=None, reduction='mean', pos_weight=None): ...
def forward(self, input: Tensor, target: Tensor) -> Tensor: ...
class KLDivLoss(Module):
"""Kullback-Leibler divergence loss."""
def __init__(self, size_average=None, reduce=None, reduction='mean', log_target=False): ...
def forward(self, input: Tensor, target: Tensor) -> Tensor: ...
class SmoothL1Loss(Module):
"""Smooth L1 loss (Huber loss)."""
def __init__(self, size_average=None, reduce=None, reduction='mean', beta=1.0): ...
def forward(self, input: Tensor, target: Tensor) -> Tensor: ...
class HuberLoss(Module):
"""Huber loss."""
def __init__(self, reduction='mean', delta=1.0): ...
def forward(self, input: Tensor, target: Tensor) -> Tensor: ...RNN, LSTM, and GRU layers for sequential data processing.
class RNN(Module):
"""Multi-layer RNN with tanh or ReLU non-linearity."""
def __init__(self, input_size, hidden_size, num_layers=1, nonlinearity='tanh', bias=True,
batch_first=False, dropout=0.0, bidirectional=False): ...
def forward(self, input, h_0=None) -> Tuple[Tensor, Tensor]: ...
class LSTM(Module):
"""Multi-layer Long Short-Term Memory network."""
def __init__(self, input_size, hidden_size, num_layers=1, bias=True, batch_first=False,
dropout=0.0, bidirectional=False, proj_size=0): ...
def forward(self, input, hx=None) -> Tuple[Tensor, Tuple[Tensor, Tensor]]: ...
class GRU(Module):
"""Multi-layer Gated Recurrent Unit network."""
def __init__(self, input_size, hidden_size, num_layers=1, bias=True, batch_first=False,
dropout=0.0, bidirectional=False): ...
def forward(self, input, h_0=None) -> Tuple[Tensor, Tensor]: ...
class RNNCell(Module):
"""RNN cell."""
def __init__(self, input_size, hidden_size, bias=True, nonlinearity='tanh'): ...
def forward(self, input, hidden) -> Tensor: ...
class LSTMCell(Module):
"""LSTM cell."""
def __init__(self, input_size, hidden_size, bias=True): ...
def forward(self, input, hx=None) -> Tuple[Tensor, Tensor]: ...
class GRUCell(Module):
"""GRU cell."""
def __init__(self, input_size, hidden_size, bias=True): ...
def forward(self, input, hidden) -> Tensor: ...Transformer architecture components for attention-based models.
class Transformer(Module):
"""Complete transformer model."""
def __init__(self, d_model=512, nhead=8, num_encoder_layers=6, num_decoder_layers=6,
dim_feedforward=2048, dropout=0.1, activation='relu', custom_encoder=None, custom_decoder=None): ...
def forward(self, src, tgt, src_mask=None, tgt_mask=None, memory_mask=None) -> Tensor: ...
class TransformerEncoder(Module):
"""Stack of transformer encoder layers."""
def __init__(self, encoder_layer, num_layers, norm=None): ...
def forward(self, src, mask=None, src_key_padding_mask=None) -> Tensor: ...
class TransformerEncoderLayer(Module):
"""Single transformer encoder layer."""
def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation='relu', batch_first=False): ...
def forward(self, src, src_mask=None, src_key_padding_mask=None) -> Tensor: ...
class TransformerDecoder(Module):
"""Stack of transformer decoder layers."""
def __init__(self, decoder_layer, num_layers, norm=None): ...
def forward(self, tgt, memory, tgt_mask=None, memory_mask=None) -> Tensor: ...
class TransformerDecoderLayer(Module):
"""Single transformer decoder layer."""
def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation='relu', batch_first=False): ...
def forward(self, tgt, memory, tgt_mask=None, memory_mask=None) -> Tensor: ...
class MultiheadAttention(Module):
"""Multi-head attention mechanism."""
def __init__(self, embed_dim, num_heads, dropout=0.0, bias=True, add_bias_kv=False, add_zero_attn=False, kdim=None, vdim=None, batch_first=False): ...
def forward(self, query, key, value, key_padding_mask=None, need_weights=True, attn_mask=None) -> Tuple[Tensor, Tensor]: ...Containers for organizing and combining multiple modules.
class Sequential(Module):
"""Sequential container of modules."""
def __init__(self, *args): ...
def forward(self, input): ...
class ModuleList(Module):
"""List container for modules."""
def __init__(self, modules=None): ...
def append(self, module): ...
def extend(self, modules): ...
def insert(self, index, module): ...
class ModuleDict(Module):
"""Dictionary container for modules."""
def __init__(self, modules=None): ...
def __getitem__(self, key): ...
def __setitem__(self, key, module): ...
def keys(self): ...
def items(self): ...
def values(self): ...
class ParameterList(Module):
"""List container for parameters."""
def __init__(self, parameters=None): ...
def append(self, parameter): ...
def extend(self, parameters): ...
class ParameterDict(Module):
"""Dictionary container for parameters."""
def __init__(self, parameters=None): ...
def __getitem__(self, key): ...
def __setitem__(self, key, parameter): ...Regularization techniques to prevent overfitting.
class Dropout(Module):
"""Randomly zeros elements with probability p."""
def __init__(self, p=0.5, inplace=False): ...
def forward(self, input: Tensor) -> Tensor: ...
class Dropout1d(Module):
"""1D channel-wise dropout."""
def __init__(self, p=0.5, inplace=False): ...
def forward(self, input: Tensor) -> Tensor: ...
class Dropout2d(Module):
"""2D channel-wise dropout."""
def __init__(self, p=0.5, inplace=False): ...
def forward(self, input: Tensor) -> Tensor: ...
class Dropout3d(Module):
"""3D channel-wise dropout."""
def __init__(self, p=0.5, inplace=False): ...
def forward(self, input: Tensor) -> Tensor: ...
class AlphaDropout(Module):
"""Alpha dropout for SELU networks."""
def __init__(self, p=0.5, inplace=False): ...
def forward(self, input: Tensor) -> Tensor: ...Embedding layers for discrete inputs like words or tokens.
class Embedding(Module):
"""Lookup table for embeddings."""
def __init__(self, num_embeddings: int, embedding_dim: int, padding_idx=None, max_norm=None,
norm_type=2.0, scale_grad_by_freq=False, sparse=False): ...
def forward(self, input: Tensor) -> Tensor: ...
class EmbeddingBag(Module):
"""Embedding bag for variable length sequences."""
def __init__(self, num_embeddings: int, embedding_dim: int, max_norm=None, norm_type=2.0,
scale_grad_by_freq=False, mode='mean', sparse=False, include_last_offset=False): ...
def forward(self, input: Tensor, offsets=None, per_sample_weights=None) -> Tensor: ...import torch
import torch.nn as nn
import torch.optim as optim
class SimpleNet(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(SimpleNet, self).__init__()
self.layers = nn.Sequential(
nn.Linear(input_size, hidden_size),
nn.ReLU(),
nn.Linear(hidden_size, hidden_size),
nn.ReLU(),
nn.Linear(hidden_size, output_size)
)
def forward(self, x):
return self.layers(x)
# Initialize model
model = SimpleNet(784, 128, 10)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Example forward pass
x = torch.randn(32, 784) # Batch of 32 samples
y = torch.randint(0, 10, (32,)) # Labels
output = model(x)
loss = criterion(output, y)
# Backward pass
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f"Loss: {loss.item()}")import torch
import torch.nn as nn
class CNN(nn.Module):
def __init__(self, num_classes=10):
super(CNN, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 32, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2),
nn.Conv2d(32, 64, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2),
nn.Conv2d(64, 128, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.AdaptiveAvgPool2d((1, 1))
)
self.classifier = nn.Sequential(
nn.Dropout(0.5),
nn.Linear(128, num_classes)
)
def forward(self, x):
x = self.features(x)
x = torch.flatten(x, 1)
x = self.classifier(x)
return x
# Initialize model
model = CNN(num_classes=10)
# Example forward pass
x = torch.randn(8, 3, 32, 32) # Batch of images
output = model(x)
print(f"Output shape: {output.shape}")import torch
import torch.nn as nn
class LSTMModel(nn.Module):
def __init__(self, vocab_size, embed_size, hidden_size, num_layers, num_classes):
super(LSTMModel, self).__init__()
self.embedding = nn.Embedding(vocab_size, embed_size)
self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True)
self.fc = nn.Linear(hidden_size, num_classes)
self.dropout = nn.Dropout(0.5)
def forward(self, x):
embedded = self.embedding(x)
lstm_out, (hidden, cell) = self.lstm(embedded)
# Use the last output
output = self.fc(self.dropout(lstm_out[:, -1, :]))
return output
# Initialize model
model = LSTMModel(vocab_size=10000, embed_size=128, hidden_size=256, num_layers=2, num_classes=5)
# Example forward pass
x = torch.randint(0, 10000, (16, 50)) # Batch of sequences
output = model(x)
print(f"Output shape: {output.shape}")Install with Tessl CLI
npx tessl i tessl/pypi-torch