Multi-backend deep learning framework providing a unified API for building and training neural networks across JAX, TensorFlow, PyTorch, and OpenVINO backends
—
Complete collection of neural network layer types for building deep learning models, including core layers, convolutional layers, recurrent layers, normalization, attention mechanisms, and preprocessing layers.
Fundamental layers for neural network construction including fully connected layers, embeddings, and basic transformations.
class Dense:
"""
Fully connected (dense) layer.
Args:
units (int): Number of output units
activation (str or callable, optional): Activation function
use_bias (bool): Whether to use bias terms
kernel_initializer (str or callable): Weight initialization
bias_initializer (str or callable): Bias initialization
kernel_regularizer: Weight regularization
bias_regularizer: Bias regularization
kernel_constraint: Weight constraints
bias_constraint: Bias constraints
"""
def __init__(self, units, activation=None, use_bias=True, **kwargs): ...
class Embedding:
"""
Turns positive integers into dense vectors of fixed size.
Args:
input_dim (int): Size of vocabulary
output_dim (int): Size of dense vectors
embeddings_initializer (str or callable): Embedding initialization
embeddings_regularizer: Embedding regularization
embeddings_constraint: Embedding constraints
mask_zero (bool): Whether to mask zero values
input_length (int, optional): Input sequence length
"""
def __init__(self, input_dim, output_dim, **kwargs): ...
class Identity:
"""Identity layer (pass-through)."""
def __init__(self, **kwargs): ...
class Lambda:
"""
Wraps arbitrary expressions as a layer.
Args:
function: Function to be wrapped
output_shape: Output shape of the function
mask: Mask value to be passed to the function
arguments (dict): Optional keyword arguments to pass to function
"""
def __init__(self, function, output_shape=None, mask=None, arguments=None, **kwargs): ...Dedicated activation layers that can be inserted between other layers.
class Activation:
"""
Applies activation function to input.
Args:
activation (str or callable): Activation function to use
"""
def __init__(self, activation, **kwargs): ...
class ReLU:
"""
Rectified Linear Unit activation.
Args:
max_value (float, optional): Maximum value to clip at
negative_slope (float): Slope for negative values
threshold (float): Threshold value for activation
"""
def __init__(self, max_value=None, negative_slope=0.0, threshold=0.0, **kwargs): ...
class LeakyReLU:
"""
Leaky Rectified Linear Unit.
Args:
negative_slope (float): Slope for negative values
"""
def __init__(self, negative_slope=0.3, **kwargs): ...
class Softmax:
"""
Softmax activation layer.
Args:
axis (int): Axis along which to apply softmax
"""
def __init__(self, axis=-1, **kwargs): ...Convolutional layers for processing grid-like data such as images, supporting 1D, 2D, and 3D convolutions.
class Conv2D:
"""
2D convolution layer.
Args:
filters (int): Number of output filters
kernel_size (int or tuple): Size of convolution kernel
strides (int or tuple): Convolution stride
padding (str): Padding mode ('valid' or 'same')
data_format (str): Data format ('channels_last' or 'channels_first')
dilation_rate (int or tuple): Dilation rate
groups (int): Number of groups for grouped convolution
activation (str or callable, optional): Activation function
use_bias (bool): Whether to use bias
kernel_initializer: Kernel initialization
bias_initializer: Bias initialization
"""
def __init__(self, filters, kernel_size, strides=(1, 1), padding='valid', **kwargs): ...
class Conv1D:
"""1D convolution layer."""
def __init__(self, filters, kernel_size, strides=1, padding='valid', **kwargs): ...
class Conv3D:
"""3D convolution layer."""
def __init__(self, filters, kernel_size, strides=(1, 1, 1), padding='valid', **kwargs): ...
class Conv2DTranspose:
"""
Transposed 2D convolution layer (deconvolution).
Args:
filters (int): Number of output filters
kernel_size (int or tuple): Size of convolution kernel
strides (int or tuple): Convolution stride
padding (str): Padding mode
output_padding (int or tuple, optional): Output padding
"""
def __init__(self, filters, kernel_size, strides=(1, 1), padding='valid', **kwargs): ...
class DepthwiseConv2D:
"""
Depthwise 2D convolution.
Args:
kernel_size (int or tuple): Size of convolution kernel
strides (int or tuple): Convolution stride
padding (str): Padding mode
depth_multiplier (int): Number of depthwise convolution output channels per input channel
"""
def __init__(self, kernel_size, strides=(1, 1), padding='valid', depth_multiplier=1, **kwargs): ...
class SeparableConv2D:
"""
Separable 2D convolution (depthwise followed by pointwise).
Args:
filters (int): Number of output filters
kernel_size (int or tuple): Size of convolution kernel
strides (int or tuple): Convolution stride
padding (str): Padding mode
depth_multiplier (int): Depthwise multiplier
"""
def __init__(self, filters, kernel_size, strides=(1, 1), padding='valid', **kwargs): ...Pooling layers for downsampling and feature extraction including average pooling, max pooling, and global pooling.
class MaxPooling2D:
"""
Max pooling for 2D spatial data.
Args:
pool_size (int or tuple): Pooling window size
strides (int or tuple, optional): Pooling stride
padding (str): Padding mode
data_format (str): Data format
"""
def __init__(self, pool_size=(2, 2), strides=None, padding='valid', **kwargs): ...
class AveragePooling2D:
"""Average pooling for 2D spatial data."""
def __init__(self, pool_size=(2, 2), strides=None, padding='valid', **kwargs): ...
class GlobalMaxPooling2D:
"""Global max pooling for 2D spatial data."""
def __init__(self, data_format=None, keepdims=False, **kwargs): ...
class GlobalAveragePooling2D:
"""Global average pooling for 2D spatial data."""
def __init__(self, data_format=None, keepdims=False, **kwargs): ...
# Similar 1D and 3D variants
class MaxPooling1D:
def __init__(self, pool_size=2, strides=None, padding='valid', **kwargs): ...
class MaxPooling3D:
def __init__(self, pool_size=(2, 2, 2), strides=None, padding='valid', **kwargs): ...Recurrent neural network layers for sequence processing including LSTM, GRU, and simple RNN variants.
class LSTM:
"""
Long Short-Term Memory layer.
Args:
units (int): Number of recurrent units
activation (str or callable): Activation function
recurrent_activation (str or callable): Recurrent activation
use_bias (bool): Whether to use bias
kernel_initializer: Input weight initialization
recurrent_initializer: Recurrent weight initialization
bias_initializer: Bias initialization
dropout (float): Dropout rate for inputs
recurrent_dropout (float): Dropout rate for recurrent connections
return_sequences (bool): Whether to return full sequences
return_state (bool): Whether to return final state
go_backwards (bool): Whether to process sequences backwards
stateful (bool): Whether to maintain state between batches
unroll (bool): Whether to unroll the network
"""
def __init__(self, units, activation='tanh', recurrent_activation='sigmoid',
use_bias=True, dropout=0.0, recurrent_dropout=0.0,
return_sequences=False, return_state=False, **kwargs): ...
class GRU:
"""
Gated Recurrent Unit layer.
Args:
units (int): Number of recurrent units
activation (str or callable): Activation function
recurrent_activation (str or callable): Recurrent activation
use_bias (bool): Whether to use bias
dropout (float): Dropout rate for inputs
recurrent_dropout (float): Dropout rate for recurrent connections
return_sequences (bool): Whether to return full sequences
return_state (bool): Whether to return final state
"""
def __init__(self, units, activation='tanh', recurrent_activation='sigmoid',
use_bias=True, dropout=0.0, recurrent_dropout=0.0,
return_sequences=False, return_state=False, **kwargs): ...
class SimpleRNN:
"""Simple recurrent neural network layer."""
def __init__(self, units, activation='tanh', use_bias=True, dropout=0.0,
recurrent_dropout=0.0, return_sequences=False, return_state=False, **kwargs): ...
class Bidirectional:
"""
Bidirectional wrapper for RNNs.
Args:
layer: RNN layer to wrap
merge_mode (str): How to combine forward and backward outputs
backward_layer: Optional separate backward layer
"""
def __init__(self, layer, merge_mode='concat', backward_layer=None, **kwargs): ...
class TimeDistributed:
"""
Applies a layer to every temporal slice of an input.
Args:
layer: Layer to be applied to each temporal slice
"""
def __init__(self, layer, **kwargs): ...Normalization layers for stabilizing and accelerating training including batch normalization, layer normalization, and other variants.
class BatchNormalization:
"""
Batch normalization layer.
Args:
axis (int): Axis to normalize along
momentum (float): Momentum for moving average
epsilon (float): Small constant for numerical stability
center (bool): Whether to add offset parameter
scale (bool): Whether to add scale parameter
beta_initializer: Offset parameter initialization
gamma_initializer: Scale parameter initialization
"""
def __init__(self, axis=-1, momentum=0.99, epsilon=1e-3, center=True, scale=True, **kwargs): ...
class LayerNormalization:
"""
Layer normalization layer.
Args:
axis (int or list): Axis/axes to normalize
epsilon (float): Small constant for numerical stability
center (bool): Whether to add offset parameter
scale (bool): Whether to add scale parameter
"""
def __init__(self, axis=-1, epsilon=1e-3, center=True, scale=True, **kwargs): ...
class GroupNormalization:
"""
Group normalization layer.
Args:
groups (int): Number of groups for GroupNorm
axis (int): Axis to normalize along
epsilon (float): Small constant for numerical stability
center (bool): Whether to add offset parameter
scale (bool): Whether to add scale parameter
"""
def __init__(self, groups=32, axis=-1, epsilon=1e-3, center=True, scale=True, **kwargs): ...Attention mechanisms for handling sequential and structured data with focus on relevant information.
class MultiHeadAttention:
"""
Multi-head attention layer.
Args:
num_heads (int): Number of attention heads
key_dim (int): Size of each attention head for query and key
value_dim (int, optional): Size of each attention head for value
dropout (float): Dropout rate
use_bias (bool): Whether to use bias in linear transformations
output_shape (int, optional): Output dimensionality
attention_axes (int or tuple, optional): Axes over which attention is applied
"""
def __init__(self, num_heads, key_dim, value_dim=None, dropout=0.0, **kwargs): ...
class Attention:
"""
Basic attention layer.
Args:
use_scale (bool): Whether to scale attention scores
score_mode (str): How to compute attention scores
dropout (float): Dropout rate for attention weights
"""
def __init__(self, use_scale=False, score_mode='dot', dropout=0.0, **kwargs): ...
class AdditiveAttention:
"""
Additive attention mechanism.
Args:
use_scale (bool): Whether to use learnable scale parameter
dropout (float): Dropout rate for attention weights
"""
def __init__(self, use_scale=True, dropout=0.0, **kwargs): ...Regularization layers for preventing overfitting including dropout variants and noise injection.
class Dropout:
"""
Applies dropout to input.
Args:
rate (float): Fraction of input units to drop
noise_shape (tuple, optional): Shape of binary dropout mask
seed (int, optional): Random seed
"""
def __init__(self, rate, noise_shape=None, seed=None, **kwargs): ...
class SpatialDropout2D:
"""
Spatial dropout for 2D inputs.
Args:
rate (float): Fraction of input channels to drop
data_format (str): Data format
"""
def __init__(self, rate, data_format=None, **kwargs): ...
class GaussianNoise:
"""
Applies Gaussian noise to input.
Args:
stddev (float): Standard deviation of noise
"""
def __init__(self, stddev, **kwargs): ...
class GaussianDropout:
"""
Applies multiplicative Gaussian noise.
Args:
rate (float): Drop probability
"""
def __init__(self, rate, **kwargs): ...Layers for manipulating tensor shapes and dimensions including flattening, reshaping, and padding.
class Flatten:
"""
Flattens input tensor.
Args:
data_format (str, optional): Data format
"""
def __init__(self, data_format=None, **kwargs): ...
class Reshape:
"""
Reshapes input to target shape.
Args:
target_shape (tuple): Target shape (excluding batch dimension)
"""
def __init__(self, target_shape, **kwargs): ...
class Permute:
"""
Permutes dimensions of input.
Args:
dims (tuple): Permutation pattern
"""
def __init__(self, dims, **kwargs): ...
class RepeatVector:
"""
Repeats input n times.
Args:
n (int): Number of repetitions
"""
def __init__(self, n, **kwargs): ...
class ZeroPadding2D:
"""
Zero-padding for 2D data.
Args:
padding (int or tuple): Padding specification
data_format (str): Data format
"""
def __init__(self, padding=(1, 1), data_format=None, **kwargs): ...
class Cropping2D:
"""
Cropping for 2D data.
Args:
cropping (int or tuple): Cropping specification
data_format (str): Data format
"""
def __init__(self, cropping=((0, 0), (0, 0)), data_format=None, **kwargs): ...Layers for combining multiple input tensors through various operations.
class Add:
"""Element-wise addition of inputs."""
def __init__(self, **kwargs): ...
class Subtract:
"""Element-wise subtraction of inputs."""
def __init__(self, **kwargs): ...
class Multiply:
"""Element-wise multiplication of inputs."""
def __init__(self, **kwargs): ...
class Average:
"""Element-wise average of inputs."""
def __init__(self, **kwargs): ...
class Maximum:
"""Element-wise maximum of inputs."""
def __init__(self, **kwargs): ...
class Minimum:
"""Element-wise minimum of inputs."""
def __init__(self, **kwargs): ...
class Concatenate:
"""
Concatenates inputs along specified axis.
Args:
axis (int): Concatenation axis
"""
def __init__(self, axis=-1, **kwargs): ...
class Dot:
"""
Computes dot product between inputs.
Args:
axes (int or tuple): Axes to compute dot product over
normalize (bool): Whether to normalize inputs
"""
def __init__(self, axes, normalize=False, **kwargs): ...
# Functional equivalents
def add(inputs): ...
def subtract(inputs): ...
def multiply(inputs): ...
def concatenate(inputs, axis=-1): ...import keras
from keras import layers
model = keras.Sequential([
layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
layers.BatchNormalization(),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (3, 3), activation='relu'),
layers.BatchNormalization(),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (3, 3), activation='relu'),
layers.GlobalAveragePooling2D(),
layers.Dropout(0.5),
layers.Dense(10, activation='softmax')
])import keras
from keras import layers
model = keras.Sequential([
layers.Embedding(vocab_size, 128, input_length=max_length),
layers.LSTM(64, return_sequences=True, dropout=0.2),
layers.LSTM(32, dropout=0.2),
layers.Dense(64, activation='relu'),
layers.Dropout(0.5),
layers.Dense(num_classes, activation='softmax')
])import keras
from keras import layers
# Input
inputs = keras.Input(shape=(seq_length, embedding_dim))
# Multi-head attention
attention_output = layers.MultiHeadAttention(
num_heads=8,
key_dim=64
)(inputs, inputs)
# Add & Norm
attention_output = layers.Add()([inputs, attention_output])
attention_output = layers.LayerNormalization()(attention_output)
# Feed Forward
ffn_output = layers.Dense(128, activation='relu')(attention_output)
ffn_output = layers.Dense(embedding_dim)(ffn_output)
# Add & Norm
outputs = layers.Add()([attention_output, ffn_output])
outputs = layers.LayerNormalization()(outputs)
# Classification head
outputs = layers.GlobalAveragePooling1D()(outputs)
outputs = layers.Dense(num_classes, activation='softmax')(outputs)
model = keras.Model(inputs=inputs, outputs=outputs)Install with Tessl CLI
npx tessl i tessl/pypi-keras-nightly