Multi-backend deep learning framework that provides a unified, high-level API for building and training neural networks across JAX, TensorFlow, PyTorch, and OpenVINO backends.
—
Weight initializers determine how layer weights are initialized before training. Proper initialization is crucial for effective training and convergence of neural networks.
The abstract base class for all weight initializers, providing the interface for weight initialization.
class Initializer:
"""
Base class for weight initializers.
All initializers should inherit from this class and implement the __call__ method.
"""
def __call__(self, shape, dtype=None, **kwargs):
"""
Generate initial weights.
Parameters:
- shape: Shape of the weight tensor to initialize
- dtype: Data type of the weights (default: None)
- **kwargs: Additional initializer-specific arguments
Returns:
Tensor of initialized weights
"""Initializers that set weights to constant values or specific patterns.
class Zeros(Initializer):
"""
Initializes weights to zero.
Usage:
```python
layer = Dense(10, kernel_initializer='zeros')
# or
layer = Dense(10, kernel_initializer=Zeros())
```
"""
def __init__(self):
"""Initialize the Zeros initializer."""
class Ones(Initializer):
"""
Initializes weights to one.
Usage:
```python
layer = Dense(10, kernel_initializer='ones')
# or
layer = Dense(10, kernel_initializer=Ones())
```
"""
def __init__(self):
"""Initialize the Ones initializer."""
class Constant(Initializer):
"""
Initializes weights to a constant value.
Usage:
```python
layer = Dense(10, kernel_initializer=Constant(value=0.5))
```
"""
def __init__(self, value=0.0):
"""
Initialize the Constant initializer.
Parameters:
- value: Constant value to initialize weights to (default: 0.0)
"""
class Identity(Initializer):
"""
Initializes weights to the identity matrix (for square matrices).
For non-square matrices, initializes with identity matrix in the center.
Usage:
```python
layer = Dense(10, kernel_initializer='identity')
# or
layer = Dense(10, kernel_initializer=Identity(gain=1.0))
```
"""
def __init__(self, gain=1.0):
"""
Initialize the Identity initializer.
Parameters:
- gain: Scaling factor for the identity matrix (default: 1.0)
"""Initializers that generate random weights from various probability distributions.
class RandomNormal(Initializer):
"""
Initializes weights with random values from a normal distribution.
Usage:
```python
layer = Dense(10, kernel_initializer=RandomNormal(mean=0.0, stddev=0.05))
```
"""
def __init__(self, mean=0.0, stddev=0.05, seed=None):
"""
Initialize the RandomNormal initializer.
Parameters:
- mean: Mean of the normal distribution (default: 0.0)
- stddev: Standard deviation of the normal distribution (default: 0.05)
- seed: Random seed for reproducibility (default: None)
"""
class RandomUniform(Initializer):
"""
Initializes weights with random values from a uniform distribution.
Usage:
```python
layer = Dense(10, kernel_initializer=RandomUniform(minval=-0.1, maxval=0.1))
```
"""
def __init__(self, minval=-0.05, maxval=0.05, seed=None):
"""
Initialize the RandomUniform initializer.
Parameters:
- minval: Lower bound of the uniform distribution (default: -0.05)
- maxval: Upper bound of the uniform distribution (default: 0.05)
- seed: Random seed for reproducibility (default: None)
"""
class TruncatedNormal(Initializer):
"""
Initializes weights with truncated normal distribution.
Values more than 2 standard deviations from mean are discarded and redrawn.
Usage:
```python
layer = Dense(10, kernel_initializer=TruncatedNormal(stddev=0.1))
```
"""
def __init__(self, mean=0.0, stddev=0.05, seed=None):
"""
Initialize the TruncatedNormal initializer.
Parameters:
- mean: Mean of the truncated normal distribution (default: 0.0)
- stddev: Standard deviation before truncation (default: 0.05)
- seed: Random seed for reproducibility (default: None)
"""Initializers that scale the variance based on the number of input and output units.
class VarianceScaling(Initializer):
"""
Base class for variance scaling initializers.
Scales variance based on fan-in, fan-out, or their average.
Usage:
```python
layer = Dense(10, kernel_initializer=VarianceScaling(
scale=2.0, mode='fan_in', distribution='truncated_normal'
))
```
"""
def __init__(self, scale=1.0, mode='fan_in', distribution='truncated_normal', seed=None):
"""
Initialize the VarianceScaling initializer.
Parameters:
- scale: Scaling factor for the variance (default: 1.0)
- mode: 'fan_in', 'fan_out', or 'fan_avg' (default: 'fan_in')
- distribution: 'normal', 'uniform', or 'truncated_normal' (default: 'truncated_normal')
- seed: Random seed for reproducibility (default: None)
"""
class GlorotNormal(VarianceScaling):
"""
Glorot normal initializer (Xavier normal).
Draws samples from truncated normal with stddev = sqrt(2 / (fan_in + fan_out)).
Usage:
```python
layer = Dense(10, kernel_initializer='glorot_normal')
# or
layer = Dense(10, kernel_initializer=GlorotNormal())
```
"""
def __init__(self, seed=None):
"""
Initialize the GlorotNormal initializer.
Parameters:
- seed: Random seed for reproducibility (default: None)
"""
class GlorotUniform(VarianceScaling):
"""
Glorot uniform initializer (Xavier uniform).
Draws samples from uniform distribution within [-limit, limit] where
limit = sqrt(6 / (fan_in + fan_out)).
Usage:
```python
layer = Dense(10, kernel_initializer='glorot_uniform')
# or
layer = Dense(10, kernel_initializer=GlorotUniform())
```
"""
def __init__(self, seed=None):
"""
Initialize the GlorotUniform initializer.
Parameters:
- seed: Random seed for reproducibility (default: None)
"""
class HeNormal(VarianceScaling):
"""
He normal initializer (Kaiming normal).
Draws samples from truncated normal with stddev = sqrt(2 / fan_in).
Recommended for ReLU activations.
Usage:
```python
layer = Dense(10, kernel_initializer='he_normal')
# or
layer = Dense(10, kernel_initializer=HeNormal())
```
"""
def __init__(self, seed=None):
"""
Initialize the HeNormal initializer.
Parameters:
- seed: Random seed for reproducibility (default: None)
"""
class HeUniform(VarianceScaling):
"""
He uniform initializer (Kaiming uniform).
Draws samples from uniform distribution within [-limit, limit] where
limit = sqrt(6 / fan_in). Recommended for ReLU activations.
Usage:
```python
layer = Dense(10, kernel_initializer='he_uniform')
# or
layer = Dense(10, kernel_initializer=HeUniform())
```
"""
def __init__(self, seed=None):
"""
Initialize the HeUniform initializer.
Parameters:
- seed: Random seed for reproducibility (default: None)
"""
class LecunNormal(VarianceScaling):
"""
Lecun normal initializer.
Draws samples from truncated normal with stddev = sqrt(1 / fan_in).
Recommended for SELU activations.
Usage:
```python
layer = Dense(10, kernel_initializer='lecun_normal')
# or
layer = Dense(10, kernel_initializer=LecunNormal())
```
"""
def __init__(self, seed=None):
"""
Initialize the LecunNormal initializer.
Parameters:
- seed: Random seed for reproducibility (default: None)
"""
class LecunUniform(VarianceScaling):
"""
Lecun uniform initializer.
Draws samples from uniform distribution within [-limit, limit] where
limit = sqrt(3 / fan_in). Recommended for SELU activations.
Usage:
```python
layer = Dense(10, kernel_initializer='lecun_uniform')
# or
layer = Dense(10, kernel_initializer=LecunUniform())
```
"""
def __init__(self, seed=None):
"""
Initialize the LecunUniform initializer.
Parameters:
- seed: Random seed for reproducibility (default: None)
"""Specialized initializers for specific architectures and use cases.
class Orthogonal(Initializer):
"""
Initializes weights with orthogonal matrices.
Generates random orthogonal matrices using SVD decomposition.
Useful for RNNs to avoid vanishing/exploding gradients.
Usage:
```python
layer = Dense(10, kernel_initializer=Orthogonal(gain=1.0))
```
"""
def __init__(self, gain=1.0, seed=None):
"""
Initialize the Orthogonal initializer.
Parameters:
- gain: Scaling factor for the orthogonal matrix (default: 1.0)
- seed: Random seed for reproducibility (default: None)
"""
class STFT(Initializer):
"""
STFT initializer for specific signal processing applications.
Usage:
```python
layer = Dense(10, kernel_initializer=STFT())
```
"""
def __init__(self, **kwargs):
"""
Initialize the STFT initializer.
Parameters:
- **kwargs: Additional STFT-specific parameters
"""Helper functions for initializer management and serialization.
def serialize(initializer):
"""
Serialize an initializer to a string or config dict.
Parameters:
- initializer: Initializer to serialize
Returns:
String identifier or config dictionary
"""
def deserialize(config, custom_objects=None):
"""
Deserialize an initializer from a string or config dict.
Parameters:
- config: String identifier or config dictionary
- custom_objects: Optional dict mapping names to custom objects
Returns:
Initializer instance
"""
def get(identifier):
"""
Retrieve an initializer by string identifier.
Parameters:
- identifier: String name or initializer instance
Returns:
Initializer instance
"""import keras
from keras import initializers
# Using string identifiers
model = keras.Sequential([
keras.layers.Dense(64, kernel_initializer='he_normal', activation='relu'),
keras.layers.Dense(32, kernel_initializer='glorot_uniform', activation='tanh'),
keras.layers.Dense(10, kernel_initializer='zeros', activation='softmax')
])
# Using initializer classes directly
model = keras.Sequential([
keras.layers.Dense(64,
kernel_initializer=initializers.HeNormal(),
bias_initializer=initializers.Zeros(),
activation='relu'),
keras.layers.Dense(32,
kernel_initializer=initializers.GlorotUniform(seed=42),
activation='tanh'),
keras.layers.Dense(10,
kernel_initializer=initializers.Constant(0.1),
activation='softmax')
])
# Custom variance scaling
custom_init = initializers.VarianceScaling(
scale=2.0,
mode='fan_out',
distribution='uniform'
)
layer = keras.layers.Dense(128, kernel_initializer=custom_init)
# For RNNs - orthogonal initialization
rnn_layer = keras.layers.LSTM(
64,
kernel_initializer='orthogonal',
recurrent_initializer='orthogonal'
)he_normal or he_uniformglorot_normal or glorot_uniformlecun_normal or lecun_uniformorthogonal for recurrent weightsglorot_uniform is a good default choiceInstall with Tessl CLI
npx tessl i tessl/pypi-keras