Multi-backend deep learning framework providing a unified API for building and training neural networks across JAX, TensorFlow, PyTorch, and OpenVINO backends
—
Comprehensive collection of weight initialization strategies for neural network layers. Proper weight initialization is crucial for training stability and convergence speed. Keras provides various initializers from simple constant values to sophisticated variance-scaling methods based on layer characteristics.
Initializers that set weights to constant values or specific patterns.
class Zeros:
"""Initialize weights to zero."""
def __init__(self): ...
class Ones:
"""Initialize weights to one."""
def __init__(self): ...
class Constant:
"""Initialize weights to a constant value."""
def __init__(self, value=0.0): ...
class Identity:
"""Initialize weights as identity matrix (for square matrices)."""
def __init__(self, gain=1.0): ...
class STFT:
"""Short-Time Fourier Transform initializer."""
def __init__(self, fft_length=128, window_length=128, window_step=32): ...Random initialization strategies with different distributions and scaling approaches.
class RandomNormal:
"""Initialize weights with normal distribution."""
def __init__(self, mean=0.0, stddev=0.05, seed=None): ...
class RandomUniform:
"""Initialize weights with uniform distribution."""
def __init__(self, minval=-0.05, maxval=0.05, seed=None): ...
class TruncatedNormal:
"""Initialize weights with truncated normal distribution."""
def __init__(self, mean=0.0, stddev=0.05, seed=None): ...
class Orthogonal:
"""Initialize weights as orthogonal matrix."""
def __init__(self, gain=1.0, seed=None): ...
class VarianceScaling:
"""Initialize weights with variance scaling."""
def __init__(self, scale=1.0, mode='fan_in', distribution='truncated_normal', seed=None): ...Xavier (Glorot) initialization methods that scale weights based on input and output dimensions.
class GlorotUniform:
"""Glorot uniform initializer (Xavier uniform)."""
def __init__(self, seed=None): ...
class GlorotNormal:
"""Glorot normal initializer (Xavier normal)."""
def __init__(self, seed=None): ...He initialization methods optimized for ReLU activations.
class HeUniform:
"""He uniform initializer."""
def __init__(self, seed=None): ...
class HeNormal:
"""He normal initializer."""
def __init__(self, seed=None): ...LeCun initialization methods for SELU activations.
class LecunUniform:
"""LeCun uniform initializer."""
def __init__(self, seed=None): ...
class LecunNormal:
"""LeCun normal initializer."""
def __init__(self, seed=None): ...Base classes and utility functions for working with initializers.
class Initializer:
"""Base class for all initializers."""
def __call__(self, shape, dtype=None, **kwargs): ...
def get_config(self): ...
def get(identifier):
"""Retrieve an initializer by name or instance."""
def serialize(initializer):
"""Serialize an initializer to configuration."""
def deserialize(config, custom_objects=None):
"""Deserialize an initializer from configuration."""from keras import layers, initializers
# Using string identifiers
dense_layer = layers.Dense(64, kernel_initializer='he_normal')
# Using initializer classes
dense_layer = layers.Dense(64,
kernel_initializer=initializers.HeNormal(),
bias_initializer=initializers.Zeros())
# Custom parameters
dense_layer = layers.Dense(64,
kernel_initializer=initializers.RandomNormal(mean=0.0, stddev=0.01),
bias_initializer=initializers.Constant(value=0.1))from keras import layers, initializers
# Convolutional layer with He initialization
conv_layer = layers.Conv2D(32, (3, 3),
kernel_initializer='he_uniform',
bias_initializer='zeros')
# With custom variance scaling
conv_layer = layers.Conv2D(32, (3, 3),
kernel_initializer=initializers.VarianceScaling(
scale=2.0, mode='fan_out', distribution='uniform'))from keras import layers, initializers
# LSTM with orthogonal recurrent weights
lstm_layer = layers.LSTM(128,
kernel_initializer='glorot_uniform',
recurrent_initializer='orthogonal',
bias_initializer='zeros')
# GRU with custom initialization
gru_layer = layers.GRU(64,
kernel_initializer=initializers.GlorotNormal(),
recurrent_initializer=initializers.Orthogonal(gain=1.0))import keras
from keras import initializers
class CustomInitializer(initializers.Initializer):
def __init__(self, scale=1.0):
self.scale = scale
def __call__(self, shape, dtype=None, **kwargs):
# Custom initialization logic
values = keras.random.normal(shape, dtype=dtype) * self.scale
return values
def get_config(self):
return {'scale': self.scale}
# Use custom initializer
dense_layer = layers.Dense(64, kernel_initializer=CustomInitializer(scale=0.5))import keras
from keras import initializers
import numpy as np
# Compare different initializers for same shape
shape = (100, 50)
# Glorot (Xavier) initialization
glorot_weights = initializers.GlorotNormal()(shape)
print(f"Glorot std: {keras.ops.std(glorot_weights):.4f}")
# He initialization
he_weights = initializers.HeNormal()(shape)
print(f"He std: {keras.ops.std(he_weights):.4f}")
# LeCun initialization
lecun_weights = initializers.LecunNormal()(shape)
print(f"LeCun std: {keras.ops.std(lecun_weights):.4f}")from keras import layers, initializers, models
# Identity initialization for residual connections
inputs = layers.Input(shape=(64,))
x = layers.Dense(64, kernel_initializer='he_normal')(inputs)
x = layers.ReLU()(x)
# Skip connection with identity initialization
skip = layers.Dense(64, kernel_initializer=initializers.Identity(gain=0.1))(inputs)
outputs = layers.Add()([x, skip])
model = models.Model(inputs, outputs)HeNormal or HeUniformLecunNormal or LecunUniformGlorotNormal or GlorotUniformGlorotNormal or custom variance scalingInstall with Tessl CLI
npx tessl i tessl/pypi-keras-nightly