tessl/pypi-keras

Multi-backend deep learning framework that provides a unified, high-level API for building and training neural networks across JAX, TensorFlow, PyTorch, and OpenVINO backends.

—

Pending

Overview

Eval results

Files

Layers and Building Blocks

Name: tessl/pypi-keras
Author: tessl

Comprehensive layer types for building neural networks in Keras. Layers are the fundamental building blocks that transform inputs through learnable parameters and mathematical operations.

Capabilities

Base Layer Class

The foundational Layer class that all Keras layers inherit from, providing core functionality for parameter management, computation, and serialization.

class Layer:
    def __init__(self, trainable=True, name=None, dtype=None, **kwargs):
        """
        Base class for all neural network layers.
        
        Parameters:
        - trainable: Whether layer weights should be trainable
        - name: Name of the layer
        - dtype: Data type for layer computations
        """
    
    def call(self, inputs, **kwargs):
        """
        Forward pass computation logic.
        
        Parameters:
        - inputs: Input tensor(s)
        
        Returns:
        Output tensor(s)
        """
    
    def build(self, input_shape):
        """
        Create layer weights based on input shape.
        
        Parameters:
        - input_shape: Shape of input tensor
        """
    
    def get_config(self):
        """
        Get layer configuration for serialization.
        
        Returns:
        Dict containing layer configuration
        """

Core Layers

Fundamental layers for basic neural network operations including dense connections, embeddings, and utility layers.

class Dense(Layer):
    def __init__(self, units, activation=None, use_bias=True, 
                 kernel_initializer='glorot_uniform', bias_initializer='zeros',
                 kernel_regularizer=None, bias_regularizer=None,
                 activity_regularizer=None, kernel_constraint=None,
                 bias_constraint=None, lora_rank=None, lora_alpha=None, **kwargs):
        """
        Fully connected layer.
        
        Parameters:
        - units: Number of output units
        - activation: Activation function to use
        - use_bias: Whether to use bias vector
        - kernel_initializer: Initializer for weight matrix
        - bias_initializer: Initializer for bias vector
        - kernel_regularizer: Regularizer for weight matrix
        - bias_regularizer: Regularizer for bias vector
        - activity_regularizer: Regularizer for layer output
        - kernel_constraint: Constraint for weight matrix
        - bias_constraint: Constraint for bias vector
        - lora_rank: Rank for LoRA (Low-Rank Adaptation)
        - lora_alpha: Alpha parameter for LoRA scaling
        """

class Embedding(Layer):
    def __init__(self, input_dim, output_dim, embeddings_initializer='uniform',
                 embeddings_regularizer=None, mask_zero=False, **kwargs):
        """
        Embedding layer for discrete tokens.
        
        Parameters:
        - input_dim: Size of vocabulary
        - output_dim: Size of dense vector embeddings
        - embeddings_initializer: Initializer for embedding matrix
        - embeddings_regularizer: Regularizer for embedding matrix
        - mask_zero: Whether input value 0 is special "padding" value
        """

class Flatten(Layer):
    def __init__(self, data_format=None, **kwargs):
        """
        Flatten input tensor to 1D (except batch dimension).
        
        Parameters:
        - data_format: Data format for input tensor
        """

class Reshape(Layer):
    def __init__(self, target_shape, **kwargs):
        """
        Reshape input tensor to target shape.
        
        Parameters:
        - target_shape: Target shape tuple (not including batch dimension)
        """

class Lambda(Layer):
    def __init__(self, function, output_shape=None, mask=None, **kwargs):
        """
        Wrap arbitrary expression as layer.
        
        Parameters:
        - function: Function to be evaluated
        - output_shape: Expected output shape from function
        - mask: Mask to be applied to output
        """

Convolutional Layers

Layers for convolutional operations in 1D, 2D, and 3D, including standard convolution, transposed convolution, depthwise, and separable convolutions.

class Conv2D(Layer):
    def __init__(self, filters, kernel_size, strides=(1, 1), padding='valid',
                 data_format=None, dilation_rate=(1, 1), groups=1,
                 activation=None, use_bias=True, **kwargs):
        """
        2D convolution layer.
        
        Parameters:
        - filters: Number of output filters
        - kernel_size: Size of convolution window
        - strides: Stride of convolution
        - padding: Padding mode ('valid' or 'same')
        - data_format: Data format ('channels_last' or 'channels_first')
        - dilation_rate: Dilation rate for dilated convolution
        - groups: Number of groups for grouped convolution
        - activation: Activation function
        - use_bias: Whether to use bias
        """

class Conv1D(Layer):
    def __init__(self, filters, kernel_size, strides=1, padding='valid',
                 data_format='channels_last', dilation_rate=1, groups=1,
                 activation=None, use_bias=True, **kwargs):
        """1D convolution layer."""

class Conv3D(Layer):
    def __init__(self, filters, kernel_size, strides=(1, 1, 1), padding='valid',
                 data_format=None, dilation_rate=(1, 1, 1), groups=1,
                 activation=None, use_bias=True, **kwargs):
        """3D convolution layer."""

class Conv2DTranspose(Layer):
    def __init__(self, filters, kernel_size, strides=(1, 1), padding='valid',
                 output_padding=None, data_format=None, dilation_rate=(1, 1),
                 activation=None, use_bias=True, **kwargs):
        """2D transposed convolution layer."""

class DepthwiseConv2D(Layer):
    def __init__(self, kernel_size, strides=(1, 1), padding='valid',
                 depth_multiplier=1, data_format=None, dilation_rate=(1, 1),
                 activation=None, use_bias=True, **kwargs):
        """2D depthwise convolution layer."""

class SeparableConv2D(Layer):
    def __init__(self, filters, kernel_size, strides=(1, 1), padding='valid',
                 data_format=None, dilation_rate=(1, 1), depth_multiplier=1,
                 activation=None, use_bias=True, **kwargs):
        """2D separable convolution layer."""

Pooling Layers

Pooling operations for downsampling feature maps using max pooling, average pooling, and global pooling variants.

class MaxPooling2D(Layer):
    def __init__(self, pool_size=(2, 2), strides=None, padding='valid',
                 data_format=None, **kwargs):
        """
        2D max pooling layer.
        
        Parameters:
        - pool_size: Size of pooling window
        - strides: Stride of pooling operation
        - padding: Padding mode
        - data_format: Data format
        """

class AveragePooling2D(Layer):
    def __init__(self, pool_size=(2, 2), strides=None, padding='valid',
                 data_format=None, **kwargs):
        """2D average pooling layer."""

class GlobalMaxPooling2D(Layer):
    def __init__(self, data_format=None, keepdims=False, **kwargs):
        """
        Global max pooling for 2D data.
        
        Parameters:
        - data_format: Data format
        - keepdims: Whether to keep spatial dimensions
        """

class GlobalAveragePooling2D(Layer):
    def __init__(self, data_format=None, keepdims=False, **kwargs):
        """Global average pooling for 2D data."""

Recurrent Layers

Recurrent neural network layers including LSTM, GRU, and simple RNN variants for sequence processing.

class LSTM(Layer):
    def __init__(self, units, activation='tanh', recurrent_activation='sigmoid',
                 use_bias=True, kernel_initializer='glorot_uniform',
                 recurrent_initializer='orthogonal', bias_initializer='zeros',
                 dropout=0.0, recurrent_dropout=0.0, return_sequences=False,
                 return_state=False, go_backwards=False, stateful=False,
                 unroll=False, **kwargs):
        """
        Long Short-Term Memory layer.
        
        Parameters:
        - units: Dimensionality of output space
        - activation: Activation function for gates
        - recurrent_activation: Activation function for recurrent step
        - use_bias: Whether to use bias vectors
        - kernel_initializer: Initializer for input weights
        - recurrent_initializer: Initializer for recurrent weights
        - bias_initializer: Initializer for bias vectors
        - dropout: Dropout rate for input connections
        - recurrent_dropout: Dropout rate for recurrent connections
        - return_sequences: Whether to return full sequence or last output
        - return_state: Whether to return last state in addition to output
        - go_backwards: Whether to process sequence backwards
        - stateful: Whether to reset states between batches
        - unroll: Whether to unroll the recurrent loop
        """

class GRU(Layer):
    def __init__(self, units, activation='tanh', recurrent_activation='sigmoid',
                 use_bias=True, dropout=0.0, recurrent_dropout=0.0,
                 return_sequences=False, return_state=False, **kwargs):
        """Gated Recurrent Unit layer."""

class SimpleRNN(Layer):
    def __init__(self, units, activation='tanh', use_bias=True, dropout=0.0,
                 recurrent_dropout=0.0, return_sequences=False, 
                 return_state=False, **kwargs):
        """Simple RNN layer."""

class Bidirectional(Layer):
    def __init__(self, layer, merge_mode='concat', weights=None, **kwargs):
        """
        Bidirectional wrapper for RNNs.
        
        Parameters:
        - layer: RNN layer to wrap
        - merge_mode: How to combine forward and backward outputs
        - weights: Initial weights
        """

Attention Layers

Attention mechanisms for focusing on relevant parts of input sequences and implementing transformer-style architectures.

class MultiHeadAttention(Layer):
    def __init__(self, num_heads, key_dim, value_dim=None, dropout=0.0,
                 use_bias=True, output_shape=None, **kwargs):
        """
        Multi-head attention layer.
        
        Parameters:
        - num_heads: Number of attention heads
        - key_dim: Size of each attention head for query and key
        - value_dim: Size of each attention head for value
        - dropout: Dropout probability for attention weights
        - use_bias: Whether to use bias in linear projections
        - output_shape: Expected shape of output tensor
        """

class Attention(Layer):
    def __init__(self, use_scale=False, score_mode='dot', **kwargs):
        """
        Attention layer for computing attention weights.
        
        Parameters:
        - use_scale: Whether to scale attention scores
        - score_mode: Type of attention score computation
        """

Normalization Layers

Normalization techniques for stabilizing and accelerating training, including batch normalization, layer normalization, and group normalization.

class BatchNormalization(Layer):
    def __init__(self, axis=-1, momentum=0.99, epsilon=1e-3, center=True,
                 scale=True, beta_initializer='zeros', gamma_initializer='ones',
                 **kwargs):
        """
        Batch normalization layer.
        
        Parameters:
        - axis: Axis to normalize along
        - momentum: Momentum for moving statistics
        - epsilon: Small constant for numerical stability
        - center: Whether to add learned offset parameter
        - scale: Whether to add learned scaling parameter
        - beta_initializer: Initializer for beta parameter
        - gamma_initializer: Initializer for gamma parameter
        """

class LayerNormalization(Layer):
    def __init__(self, axis=-1, epsilon=1e-3, center=True, scale=True,
                 beta_initializer='zeros', gamma_initializer='ones', **kwargs):
        """Layer normalization layer."""

class GroupNormalization(Layer):
    def __init__(self, groups=32, axis=-1, epsilon=1e-3, center=True, 
                 scale=True, **kwargs):
        """
        Group normalization layer.
        
        Parameters:
        - groups: Number of groups for normalization
        - axis: Axis to normalize along
        - epsilon: Small constant for numerical stability
        - center: Whether to add learned offset parameter
        - scale: Whether to add learned scaling parameter
        """

Regularization Layers

Layers for regularization including various dropout techniques and noise injection to prevent overfitting.

class Dropout(Layer):
    def __init__(self, rate, noise_shape=None, seed=None, **kwargs):
        """
        Dropout layer for regularization.
        
        Parameters:
        - rate: Fraction of input units to drop
        - noise_shape: Shape of binary dropout mask
        - seed: Random seed for dropout
        """

class SpatialDropout2D(Layer):
    def __init__(self, rate, data_format=None, **kwargs):
        """
        2D spatial dropout layer.
        
        Parameters:
        - rate: Fraction of input units to drop
        - data_format: Data format
        """

class GaussianNoise(Layer):
    def __init__(self, stddev, **kwargs):
        """
        Gaussian noise regularization layer.
        
        Parameters:
        - stddev: Standard deviation of noise distribution
        """

class GaussianDropout(Layer):
    def __init__(self, rate, **kwargs):
        """
        Multiplicative Gaussian noise layer.
        
        Parameters:
        - rate: Drop probability as in Dropout
        """

Activation Layers

Activation functions implemented as layers for explicit control and custom activation patterns.

class Activation(Layer):
    def __init__(self, activation, **kwargs):
        """
        Activation layer.
        
        Parameters:
        - activation: Name of activation function or callable
        """

class ReLU(Layer):
    def __init__(self, max_value=None, negative_slope=0.0, threshold=0.0, **kwargs):
        """
        ReLU activation layer.
        
        Parameters:
        - max_value: Maximum activation value
        - negative_slope: Slope for negative values
        - threshold: Threshold value for activation
        """

class LeakyReLU(Layer):
    def __init__(self, alpha=0.3, **kwargs):
        """
        Leaky ReLU activation layer.
        
        Parameters:
        - alpha: Slope for negative values
        """

class ELU(Layer):
    def __init__(self, alpha=1.0, **kwargs):
        """
        ELU activation layer.
        
        Parameters:
        - alpha: Scale for negative values
        """

class Softmax(Layer):
    def __init__(self, axis=-1, **kwargs):
        """
        Softmax activation layer.
        
        Parameters:
        - axis: Axis along which to apply softmax
        """

Merging Layers

Layers for combining multiple input tensors through various operations like addition, concatenation, and element-wise operations.

class Add(Layer):
    def __init__(self, **kwargs):
        """Element-wise addition layer."""

class Concatenate(Layer):
    def __init__(self, axis=-1, **kwargs):
        """
        Concatenation layer.
        
        Parameters:
        - axis: Axis along which to concatenate
        """

class Multiply(Layer):
    def __init__(self, **kwargs):
        """Element-wise multiplication layer."""

class Average(Layer):
    def __init__(self, **kwargs):
        """Element-wise averaging layer."""

class Maximum(Layer):
    def __init__(self, **kwargs):
        """Element-wise maximum layer."""

class Minimum(Layer):
    def __init__(self, **kwargs):
        """Element-wise minimum layer."""

class Dot(Layer):
    def __init__(self, axes, normalize=False, **kwargs):
        """
        Dot product layer.
        
        Parameters:
        - axes: Axes to compute dot product over
        - normalize: Whether to normalize inputs
        """

Usage Examples

Building a CNN

import keras
from keras import layers

model = keras.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='softmax')
])

Building an LSTM for Sequence Processing

import keras
from keras import layers

model = keras.Sequential([
    layers.Embedding(10000, 128, input_length=100),
    layers.LSTM(64, dropout=0.2, recurrent_dropout=0.2),
    layers.Dense(1, activation='sigmoid')
])

Using Functional API for Complex Architecture

import keras
from keras import layers

inputs = keras.Input(shape=(784,))
x = layers.Dense(128, activation='relu')(inputs)
x = layers.Dropout(0.2)(x)
branch1 = layers.Dense(64, activation='relu', name='branch1')(x)
branch2 = layers.Dense(64, activation='relu', name='branch2')(x)
merged = layers.Add()([branch1, branch2])
outputs = layers.Dense(10, activation='softmax')(merged)

model = keras.Model(inputs=inputs, outputs=outputs)

Install with Tessl CLI