CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-keras

Multi-backend deep learning framework that provides a unified, high-level API for building and training neural networks across JAX, TensorFlow, PyTorch, and OpenVINO backends.

Pending
Overview
Eval results
Files

training.mddocs/

Training and Optimization

Optimizers, loss functions, metrics, and callbacks for training neural networks effectively. These components control how models learn from data and how training progress is monitored and controlled.

Capabilities

Optimizers

Optimization algorithms that update model parameters during training to minimize the loss function.

class Optimizer:
    def __init__(self, learning_rate=0.001, name=None, **kwargs):
        """
        Base class for all optimizers.
        
        Parameters:
        - learning_rate: Initial learning rate
        - name: Name of the optimizer
        """
    
    def apply_gradients(self, grads_and_vars):
        """
        Apply gradients to variables.
        
        Parameters:
        - grads_and_vars: List of (gradient, variable) pairs
        """

class SGD(Optimizer):
    def __init__(self, learning_rate=0.01, momentum=0.0, nesterov=False, **kwargs):
        """
        Stochastic Gradient Descent optimizer.
        
        Parameters:
        - learning_rate: Learning rate
        - momentum: Momentum factor
        - nesterov: Whether to apply Nesterov momentum
        """

class Adam(Optimizer):
    def __init__(self, learning_rate=0.001, beta_1=0.9, beta_2=0.999, 
                 epsilon=1e-7, amsgrad=False, weight_decay=None, clipnorm=None,
                 clipvalue=None, global_clipnorm=None, use_ema=False,
                 ema_momentum=0.99, ema_overwrite_frequency=None,
                 loss_scale_factor=None, gradient_accumulation_steps=None, **kwargs):
        """
        Adam optimizer.
        
        Parameters:
        - learning_rate: Learning rate
        - beta_1: Exponential decay rate for first moment estimates
        - beta_2: Exponential decay rate for second moment estimates
        - epsilon: Small constant for numerical stability
        - amsgrad: Whether to apply AMSGrad variant
        - weight_decay: Weight decay coefficient
        - clipnorm: Global norm clipping value
        - clipvalue: Value clipping threshold
        - global_clipnorm: Global gradient norm clipping
        - use_ema: Whether to use exponential moving average
        - ema_momentum: EMA momentum coefficient
        - ema_overwrite_frequency: EMA overwrite frequency
        - loss_scale_factor: Loss scaling factor
        - gradient_accumulation_steps: Gradient accumulation steps
        """

class AdamW(Optimizer):
    def __init__(self, learning_rate=0.001, weight_decay=0.004, beta_1=0.9,
                 beta_2=0.999, epsilon=1e-7, amsgrad=False, **kwargs):
        """
        AdamW optimizer with decoupled weight decay.
        
        Parameters:
        - learning_rate: Learning rate
        - weight_decay: Weight decay coefficient
        - beta_1: Exponential decay rate for first moment estimates
        - beta_2: Exponential decay rate for second moment estimates
        - epsilon: Small constant for numerical stability
        - amsgrad: Whether to apply AMSGrad variant
        """

class RMSprop(Optimizer):
    def __init__(self, learning_rate=0.001, rho=0.9, momentum=0.0, 
                 epsilon=1e-7, centered=False, **kwargs):
        """
        RMSprop optimizer.
        
        Parameters:
        - learning_rate: Learning rate
        - rho: Discounting factor for history/coming gradient
        - momentum: Momentum factor
        - epsilon: Small constant for numerical stability
        - centered: Whether to normalize by estimated variance
        """

class Adagrad(Optimizer):
    def __init__(self, learning_rate=0.001, initial_accumulator_value=0.1,
                 epsilon=1e-7, **kwargs):
        """
        Adagrad optimizer.
        
        Parameters:
        - learning_rate: Learning rate
        - initial_accumulator_value: Initial value for accumulators
        - epsilon: Small constant for numerical stability
        """

class Adadelta(Optimizer):
    def __init__(self, learning_rate=0.001, rho=0.95, epsilon=1e-7, **kwargs):
        """
        Adadelta optimizer.
        
        Parameters:
        - learning_rate: Learning rate
        - rho: Decay factor
        - epsilon: Small constant for numerical stability
        """

Loss Functions

Functions that measure the difference between predicted and actual values, guiding the optimization process.

class Loss:
    def __init__(self, reduction='sum_over_batch_size', name=None, **kwargs):
        """
        Base class for all loss functions.
        
        Parameters:
        - reduction: Type of reduction to apply
        - name: Name of the loss function
        """
    
    def __call__(self, y_true, y_pred, sample_weight=None):
        """
        Compute loss value.
        
        Parameters:
        - y_true: Ground truth values
        - y_pred: Predicted values
        - sample_weight: Optional sample weights
        
        Returns:
        Loss value
        """

class SparseCategoricalCrossentropy(Loss):
    def __init__(self, from_logits=False, ignore_class=None, **kwargs):
        """
        Sparse categorical crossentropy loss.
        
        Parameters:
        - from_logits: Whether predictions are logits or probabilities
        - ignore_class: Optional class index to ignore
        """

class CategoricalCrossentropy(Loss):
    def __init__(self, from_logits=False, label_smoothing=0.0, **kwargs):
        """
        Categorical crossentropy loss.
        
        Parameters:
        - from_logits: Whether predictions are logits or probabilities
        - label_smoothing: Label smoothing factor
        """

class BinaryCrossentropy(Loss):
    def __init__(self, from_logits=False, label_smoothing=0.0, **kwargs):
        """
        Binary crossentropy loss.
        
        Parameters:
        - from_logits: Whether predictions are logits or probabilities
        - label_smoothing: Label smoothing factor
        """

class MeanSquaredError(Loss):
    def __init__(self, **kwargs):
        """Mean squared error loss."""

class MeanAbsoluteError(Loss):
    def __init__(self, **kwargs):
        """Mean absolute error loss."""

class Huber(Loss):
    def __init__(self, delta=1.0, **kwargs):
        """
        Huber loss.
        
        Parameters:
        - delta: Threshold for switching from quadratic to linear loss
        """

class KLDivergence(Loss):
    def __init__(self, **kwargs):
        """Kullback-Leibler divergence loss."""

class CosineSimilarity(Loss):
    def __init__(self, axis=-1, **kwargs):
        """
        Cosine similarity loss.
        
        Parameters:
        - axis: Axis along which to compute cosine similarity
        """

Metrics

Functions for monitoring training and evaluation performance without affecting the optimization process.

class Metric:
    def __init__(self, name=None, dtype=None, **kwargs):
        """
        Base class for all metrics.
        
        Parameters:
        - name: Name of the metric
        - dtype: Data type for metric computations
        """
    
    def update_state(self, y_true, y_pred, sample_weight=None):
        """
        Update metric state with new observations.
        
        Parameters:
        - y_true: Ground truth values
        - y_pred: Predicted values
        - sample_weight: Optional sample weights
        """
    
    def result(self):
        """
        Compute and return metric value.
        
        Returns:
        Metric value as tensor
        """
    
    def reset_state(self):
        """Reset all metric state variables."""

class Accuracy(Metric):
    def __init__(self, name='accuracy', dtype=None, **kwargs):
        """Accuracy metric for classification tasks."""

class SparseCategoricalAccuracy(Metric):
    def __init__(self, name='sparse_categorical_accuracy', dtype=None, **kwargs):
        """Sparse categorical accuracy metric."""

class CategoricalAccuracy(Metric):
    def __init__(self, name='categorical_accuracy', dtype=None, **kwargs):
        """Categorical accuracy metric."""

class TopKCategoricalAccuracy(Metric):
    def __init__(self, k=5, name='top_k_categorical_accuracy', dtype=None, **kwargs):
        """
        Top-k categorical accuracy metric.
        
        Parameters:
        - k: Number of top predictions to consider
        """

class Precision(Metric):
    def __init__(self, thresholds=None, top_k=None, class_id=None, 
                 name=None, dtype=None, **kwargs):
        """
        Precision metric.
        
        Parameters:
        - thresholds: Optional thresholds for binary classification
        - top_k: Number of top predictions to consider
        - class_id: Specific class to compute metric for
        """

class Recall(Metric):
    def __init__(self, thresholds=None, top_k=None, class_id=None,
                 name=None, dtype=None, **kwargs):
        """Recall metric."""

class AUC(Metric):
    def __init__(self, num_thresholds=200, curve='ROC', summation_method='interpolation',
                 name=None, dtype=None, **kwargs):
        """
        Area under the curve metric.
        
        Parameters:
        - num_thresholds: Number of thresholds for approximation
        - curve: Type of curve ('ROC' or 'PR')
        - summation_method: Method for approximating AUC
        """

class F1Score(Metric):
    def __init__(self, average=None, threshold=None, name='f1_score', dtype=None, **kwargs):
        """
        F1 score metric.
        
        Parameters:
        - average: Type of averaging ('micro', 'macro', 'weighted', or None)
        - threshold: Decision threshold for binary classification
        """

class MeanSquaredError(Metric):
    def __init__(self, name='mean_squared_error', dtype=None, **kwargs):
        """Mean squared error metric."""

class MeanAbsoluteError(Metric):
    def __init__(self, name='mean_absolute_error', dtype=None, **kwargs):
        """Mean absolute error metric."""

class RootMeanSquaredError(Metric):
    def __init__(self, name='root_mean_squared_error', dtype=None, **kwargs):
        """Root mean squared error metric."""

Callbacks

Utilities that can perform actions at various stages of training, such as saving models, adjusting learning rates, or early stopping.

class Callback:
    def __init__(self):
        """Base class for all callbacks."""
    
    def on_epoch_begin(self, epoch, logs=None):
        """Called at the beginning of an epoch."""
    
    def on_epoch_end(self, epoch, logs=None):
        """Called at the end of an epoch."""
    
    def on_batch_begin(self, batch, logs=None):
        """Called at the beginning of a batch."""
    
    def on_batch_end(self, batch, logs=None):
        """Called at the end of a batch."""
    
    def on_train_begin(self, logs=None):
        """Called at the beginning of training."""
    
    def on_train_end(self, logs=None):
        """Called at the end of training."""

class ModelCheckpoint(Callback):
    def __init__(self, filepath, monitor='val_loss', verbose=0, save_best_only=False,
                 save_weights_only=False, mode='auto', save_freq='epoch', **kwargs):
        """
        Save model or weights at some frequency.
        
        Parameters:
        - filepath: Path to save model/weights
        - monitor: Metric to monitor for saving
        - verbose: Verbosity mode
        - save_best_only: Whether to save only when monitored metric improves
        - save_weights_only: Whether to save only weights
        - mode: One of {'auto', 'min', 'max'}
        - save_freq: 'epoch' or integer (number of batches)
        """

class EarlyStopping(Callback):
    def __init__(self, monitor='val_loss', min_delta=0, patience=0, verbose=0,
                 mode='auto', baseline=None, restore_best_weights=False, **kwargs):
        """
        Stop training when monitored metric has stopped improving.
        
        Parameters:
        - monitor: Metric to monitor
        - min_delta: Minimum change to qualify as improvement
        - patience: Number of epochs with no improvement to wait
        - verbose: Verbosity mode
        - mode: One of {'auto', 'min', 'max'}
        - baseline: Baseline value for monitored metric
        - restore_best_weights: Whether to restore model weights from best epoch
        """

class ReduceLROnPlateau(Callback):
    def __init__(self, monitor='val_loss', factor=0.1, patience=10, verbose=0,
                 mode='auto', min_delta=1e-4, cooldown=0, min_lr=0, **kwargs):
        """
        Reduce learning rate when metric has stopped improving.
        
        Parameters:
        - monitor: Metric to monitor
        - factor: Factor by which learning rate will be reduced
        - patience: Number of epochs with no improvement to wait
        - verbose: Verbosity mode
        - mode: One of {'auto', 'min', 'max'}
        - min_delta: Threshold for measuring new optimum
        - cooldown: Number of epochs to wait before resuming normal operation
        - min_lr: Lower bound on learning rate
        """

class LearningRateScheduler(Callback):
    def __init__(self, schedule, verbose=0, **kwargs):
        """
        Learning rate scheduler.
        
        Parameters:
        - schedule: Function that takes epoch index and returns new learning rate
        - verbose: Verbosity mode
        """

class TensorBoard(Callback):
    def __init__(self, log_dir='logs', histogram_freq=0, write_graph=True,
                 write_images=False, write_steps_per_second=False, 
                 update_freq='epoch', **kwargs):
        """
        TensorBoard logging callback.
        
        Parameters:
        - log_dir: Directory to write TensorBoard logs
        - histogram_freq: Frequency for writing histograms
        - write_graph: Whether to write computation graph
        - write_images: Whether to write model weights as images
        - write_steps_per_second: Whether to log steps/second
        - update_freq: 'batch', 'epoch', or integer (number of batches)
        """

class CSVLogger(Callback):
    def __init__(self, filename, separator=',', append=False, **kwargs):
        """
        Stream epoch results to CSV file.
        
        Parameters:
        - filename: Path to CSV file
        - separator: String used to separate elements in CSV file
        - append: Whether to append if file exists
        """

Usage Examples

Basic Training Setup

import keras
from keras import layers, optimizers, losses, metrics

# Build model
model = keras.Sequential([
    layers.Dense(128, activation='relu', input_shape=(784,)),
    layers.Dropout(0.2),
    layers.Dense(10, activation='softmax')
])

# Compile with custom optimizer and metrics
model.compile(
    optimizer=optimizers.Adam(learning_rate=0.001),
    loss=losses.SparseCategoricalCrossentropy(),
    metrics=[
        metrics.SparseCategoricalAccuracy(),
        metrics.TopKCategoricalAccuracy(k=3)
    ]
)

Training with Callbacks

from keras import callbacks

# Define callbacks
checkpoint = callbacks.ModelCheckpoint(
    'best_model.keras',
    monitor='val_accuracy',
    save_best_only=True,
    verbose=1
)

early_stop = callbacks.EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

reduce_lr = callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.2,
    patience=3,
    min_lr=1e-7
)

# Train with callbacks
history = model.fit(
    x_train, y_train,
    batch_size=32,
    epochs=100,
    validation_data=(x_val, y_val),
    callbacks=[checkpoint, early_stop, reduce_lr]
)

Custom Learning Rate Schedule

from keras import callbacks
import math

def lr_schedule(epoch, lr):
    if epoch < 10:
        return lr
    else:
        return lr * math.exp(-0.1)

lr_scheduler = callbacks.LearningRateScheduler(lr_schedule, verbose=1)

model.fit(
    x_train, y_train,
    epochs=50,
    callbacks=[lr_scheduler]
)

Multi-GPU Training

import keras

# Create distributed strategy
strategy = keras.distribute.MirroredStrategy()

with strategy.scope():
    # Create model within strategy scope
    model = keras.Sequential([
        layers.Dense(128, activation='relu', input_shape=(784,)),
        layers.Dense(10, activation='softmax')
    ])
    
    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

# Train on multiple GPUs
model.fit(x_train, y_train, epochs=10)

Install with Tessl CLI

npx tessl i tessl/pypi-keras

docs

activations.md

applications.md

data-utils.md

index.md

initializers.md

layers.md

models.md

operations.md

random.md

regularizers.md

saving.md

training.md

tile.json