An end-to-end open source platform for machine learning
—
High-level neural network building blocks including models, layers, optimizers, losses, and metrics for rapid prototyping and production. Keras provides an intuitive interface for building and training deep learning models.
High-level model classes for building neural networks.
class Sequential(Model):
"""
Sequential groups a linear stack of layers into a tf.keras.Model.
Methods:
- add(layer): Adds a layer instance on top of the layer stack
- pop(): Removes the last layer in the model
- compile(optimizer, loss, metrics): Configures the model for training
- fit(x, y, **kwargs): Trains the model for a fixed number of epochs
- evaluate(x, y, **kwargs): Returns the loss value & metrics values for the model
- predict(x, **kwargs): Generates output predictions for the input samples
"""
class Model:
"""
Model groups layers into an object with training and inference features.
Methods:
- compile(optimizer, loss, metrics): Configures the model for training
- fit(x, y, **kwargs): Trains the model for a fixed number of epochs
- evaluate(x, y, **kwargs): Returns the loss value & metrics values for the model
- predict(x, **kwargs): Generates output predictions for the input samples
- save(filepath, **kwargs): Saves the model to Tensorflow SavedModel or a single HDF5 file
- load_model(filepath, **kwargs): Loads a model saved via save()
- summary(): Prints a string summary of the network
- get_weights(): Retrieves the weights of the model
- set_weights(weights): Sets the weights of the model
"""
def load_model(filepath, custom_objects=None, compile=True, options=None):
"""
Loads a model saved via model.save().
Parameters:
- filepath: One of the following: String or pathlib.Path object, path to the saved model
- custom_objects: Optional dictionary mapping names to custom classes or functions
- compile: Boolean, whether to compile the model after loading
- options: Optional tf.saved_model.LoadOptions object that specifies options for loading from SavedModel
Returns:
A Keras model instance
"""
def save_model(model, filepath, overwrite=True, include_optimizer=True, save_format=None,
signatures=None, options=None, save_traces=True):
"""
Saves a model as a TensorFlow SavedModel or HDF5 file.
Parameters:
- model: Keras model instance to be saved
- filepath: One of the following: String or pathlib.Path object, path where to save the model
- overwrite: Whether we should overwrite any existing model at the target location
- include_optimizer: If True, save optimizer's state together
- save_format: Either 'tf' or 'h5', indicating whether to save the model to Tensorflow SavedModel or HDF5
- signatures: Signatures to save with the SavedModel
- options: Optional tf.saved_model.SaveOptions object that specifies options for saving to SavedModel
- save_traces: When enabled, the SavedModel will store the function traces for each layer
"""Essential layer types for building neural networks.
class Dense(Layer):
"""
Just your regular densely-connected NN layer.
Parameters:
- units: Positive integer, dimensionality of the output space
- activation: Activation function to use
- use_bias: Boolean, whether the layer uses a bias vector
- kernel_initializer: Initializer for the kernel weights matrix
- bias_initializer: Initializer for the bias vector
- kernel_regularizer: Regularizer function applied to the kernel weights matrix
- bias_regularizer: Regularizer function applied to the bias vector
- activity_regularizer: Regularizer function applied to the output of the layer
- kernel_constraint: Constraint function applied to the kernel weights matrix
- bias_constraint: Constraint function applied to the bias vector
"""
class Dropout(Layer):
"""
Applies Dropout to the input.
Parameters:
- rate: Float between 0 and 1. Fraction of the input units to drop
- noise_shape: 1D integer tensor representing the shape of the binary dropout mask
- seed: A Python integer to use as random seed
"""
class Flatten(Layer):
"""
Flattens the input. Does not affect the batch size.
Parameters:
- data_format: A string, one of channels_last (default) or channels_first
"""
class Reshape(Layer):
"""
Reshapes an output to a certain shape.
Parameters:
- target_shape: Target shape. Tuple of integers, does not include the samples dimension (batch size)
"""
class Input:
"""
Input() is used to instantiate a Keras tensor.
Parameters:
- shape: A shape tuple (integers), not including the batch size
- batch_size: optional static batch size (integer)
- name: An optional name string for the layer
- dtype: The data type expected by the input, as a string
- sparse: A boolean specifying whether the placeholder to be created is sparse
- tensor: Optional existing tensor to wrap into the Input layer
- ragged: A boolean specifying whether the placeholder to be created is ragged
"""
class Lambda(Layer):
"""
Wraps arbitrary expressions as a Layer object.
Parameters:
- function: The function to be evaluated. Takes input tensor as first argument
- output_shape: Expected output shape from function
- mask: Either None (no masking) or a callable with the same signature as the compute_mask layer method
- arguments: Optional dictionary of keyword arguments to be passed to the function
"""Layers for processing spatial data such as images.
class Conv2D(Layer):
"""
2D convolution layer (e.g. spatial convolution over images).
Parameters:
- filters: Integer, the dimensionality of the output space
- kernel_size: An integer or tuple/list of 2 integers, specifying the height and width of the 2D convolution window
- strides: An integer or tuple/list of 2 integers, specifying the strides of the convolution
- padding: one of "valid" or "same" (case-insensitive)
- data_format: A string, one of channels_last (default) or channels_first
- dilation_rate: an integer or tuple/list of 2 integers, specifying the dilation rate to use for dilated convolution
- groups: A positive integer specifying the number of groups in which the input is split
- activation: Activation function to use
- use_bias: Boolean, whether the layer uses a bias vector
- kernel_initializer: Initializer for the kernel weights matrix
- bias_initializer: Initializer for the bias vector
"""
class Conv2DTranspose(Layer):
"""
Transposed convolution layer (sometimes called Deconvolution).
Parameters:
- filters: Integer, the dimensionality of the output space
- kernel_size: An integer or tuple/list of 2 integers, specifying the height and width of the 2D convolution window
- strides: An integer or tuple/list of 2 integers, specifying the strides of the convolution
- padding: one of "valid" or "same" (case-insensitive)
- output_padding: An integer or tuple/list of 2 integers, specifying the amount of padding along the height and width
- data_format: A string, one of channels_last (default) or channels_first
- dilation_rate: an integer or tuple/list of 2 integers, specifying the dilation rate to use for dilated convolution
- activation: Activation function to use
- use_bias: Boolean, whether the layer uses a bias vector
"""
class MaxPooling2D(Layer):
"""
Max pooling operation for 2D spatial data.
Parameters:
- pool_size: integer or tuple of 2 integers, window size over which to take the maximum
- strides: Integer, tuple of 2 integers, or None. Strides values
- padding: One of "valid" or "same" (case-insensitive)
- data_format: A string, one of channels_last (default) or channels_first
"""
class AveragePooling2D(Layer):
"""
Average pooling operation for 2D spatial data.
Parameters:
- pool_size: integer or tuple of 2 integers, factors by which to downscale (vertical, horizontal)
- strides: Integer, tuple of 2 integers, or None
- padding: One of "valid" or "same" (case-insensitive)
- data_format: A string, one of channels_last (default) or channels_first
"""Layers for processing sequential data.
class LSTM(Layer):
"""
Long Short-Term Memory layer - Hochreiter 1997.
Parameters:
- units: Positive integer, dimensionality of the output space
- activation: Activation function to use
- recurrent_activation: Activation function to use for the recurrent step
- use_bias: Boolean (default True), whether the layer uses a bias vector
- kernel_initializer: Initializer for the kernel weights matrix
- recurrent_initializer: Initializer for the recurrent_kernel weights matrix
- bias_initializer: Initializer for the bias vector
- unit_forget_bias: Boolean (default True). If True, add 1 to the bias of the forget gate at initialization
- kernel_regularizer: Regularizer function applied to the kernel weights matrix
- recurrent_regularizer: Regularizer function applied to the recurrent_kernel weights matrix
- bias_regularizer: Regularizer function applied to the bias vector
- activity_regularizer: Regularizer function applied to the output of the layer
- kernel_constraint: Constraint function applied to the kernel weights matrix
- recurrent_constraint: Constraint function applied to the recurrent_kernel weights matrix
- bias_constraint: Constraint function applied to the bias vector
- dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs
- recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the recurrent state
- return_sequences: Boolean. Whether to return the last output in the output sequence, or the full sequence
- return_state: Boolean. Whether to return the last state in addition to the output
- go_backwards: Boolean (default False). If True, process the input sequence backwards and return the reversed sequence
- stateful: Boolean (default False). If True, the last state for each sample at index i in a batch will be used as initial state for the sample of index i in the following batch
- time_major: The shape format of the inputs and outputs. If True, the inputs and outputs will be in shape (timesteps, batch, ...), whereas in the False case, it will be (batch, timesteps, ...)
- unroll: Boolean (default False). If True, the network will be unrolled, else a symbolic loop will be used
"""
class GRU(Layer):
"""
Gated Recurrent Unit - Cho et al. 2014.
Parameters:
- units: Positive integer, dimensionality of the output space
- activation: Activation function to use
- recurrent_activation: Activation function to use for the recurrent step
- use_bias: Boolean, (default True), whether the layer uses a bias vector
- kernel_initializer: Initializer for the kernel weights matrix
- recurrent_initializer: Initializer for the recurrent_kernel weights matrix
- bias_initializer: Initializer for the bias vector
- kernel_regularizer: Regularizer function applied to the kernel weights matrix
- recurrent_regularizer: Regularizer function applied to the recurrent_kernel weights matrix
- bias_regularizer: Regularizer function applied to the bias vector
- activity_regularizer: Regularizer function applied to the output of the layer
- kernel_constraint: Constraint function applied to the kernel weights matrix
- recurrent_constraint: Constraint function applied to the recurrent_kernel weights matrix
- bias_constraint: Constraint function applied to the bias vector
- dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs
- recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the recurrent state
- return_sequences: Boolean. Whether to return the last output in the output sequence, or the full sequence
- return_state: Boolean. Whether to return the last state in addition to the output
- go_backwards: Boolean (default False). If True, process the input sequence backwards and return the reversed sequence
- stateful: Boolean (default False). If True, the last state for each sample at index i in a batch will be used as initial state for the sample of index i in the following batch
- unroll: Boolean (default False). If True, the network will be unrolled, else a symbolic loop will be used
- time_major: The shape format of the inputs and outputs
- reset_after: GRU convention (whether to apply reset gate after or before matrix multiplication)
"""
class SimpleRNN(Layer):
"""
Fully-connected RNN where the output is to be fed back to input.
Parameters:
- units: Positive integer, dimensionality of the output space
- activation: Activation function to use
- use_bias: Boolean, (default True), whether the layer uses a bias vector
- kernel_initializer: Initializer for the kernel weights matrix
- recurrent_initializer: Initializer for the recurrent_kernel weights matrix
- bias_initializer: Initializer for the bias vector
- kernel_regularizer: Regularizer function applied to the kernel weights matrix
- recurrent_regularizer: Regularizer function applied to the recurrent_kernel weights matrix
- bias_regularizer: Regularizer function applied to the bias vector
- activity_regularizer: Regularizer function applied to the output of the layer
- kernel_constraint: Constraint function applied to the kernel weights matrix
- recurrent_constraint: Constraint function applied to the recurrent_kernel weights matrix
- bias_constraint: Constraint function applied to the bias vector
- dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs
- recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the recurrent state
- return_sequences: Boolean. Whether to return the last output in the output sequence, or the full sequence
- return_state: Boolean. Whether to return the last state in addition to the output
- go_backwards: Boolean (default False). If True, process the input sequence backwards and return the reversed sequence
- stateful: Boolean (default False). If True, the last state for each sample at index i in a batch will be used as initial state for the sample of index i in the following batch
- unroll: Boolean (default False). If True, the network will be unrolled, else a symbolic loop will be used
"""Optimization algorithms for training neural networks.
class Adam(Optimizer):
"""
Optimizer that implements the Adam algorithm.
Parameters:
- learning_rate: A Tensor, floating point value, or a schedule that is a tf.keras.optimizers.schedules.LearningRateSchedule
- beta_1: A float value or a constant float tensor, or a callable that takes no arguments and returns the actual value to use
- beta_2: A float value or a constant float tensor, or a callable that takes no arguments and returns the actual value to use
- epsilon: A small constant for numerical stability
- amsgrad: Boolean. Whether to apply AMSGrad variant of this algorithm from the paper "On the Convergence of Adam and beyond"
- name: Optional name prefix for the operations created when applying gradients
"""
class SGD(Optimizer):
"""
Gradient descent (with momentum) optimizer.
Parameters:
- learning_rate: A Tensor, floating point value, or a schedule that is a tf.keras.optimizers.schedules.LearningRateSchedule
- momentum: float hyperparameter >= 0 that accelerates gradient descent in the relevant direction and dampens oscillations
- nesterov: boolean. Whether to apply Nesterov momentum
- name: Optional name prefix for the operations created when applying gradients
"""
class RMSprop(Optimizer):
"""
Optimizer that implements the RMSprop algorithm.
Parameters:
- learning_rate: A Tensor, floating point value, or a schedule that is a tf.keras.optimizers.schedules.LearningRateSchedule
- rho: Discounting factor for the history/coming gradient
- momentum: A scalar or a scalar Tensor
- epsilon: A small constant for numerical stability
- centered: Boolean. If True, gradients are normalized by the estimated variance of the gradient
- name: Optional name prefix for the operations created when applying gradients
"""import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
# Sequential model
model = keras.Sequential([
layers.Dense(128, activation='relu', input_shape=(784,)),
layers.Dropout(0.2),
layers.Dense(10, activation='softmax')
])
# Functional API model
inputs = keras.Input(shape=(784,))
x = layers.Dense(128, activation='relu')(inputs)
x = layers.Dropout(0.2)(x)
outputs = layers.Dense(10, activation='softmax')(x)
model = keras.Model(inputs=inputs, outputs=outputs)
# Compile model
model.compile(
optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy']
)
# Train model (example with dummy data)
import numpy as np
x_train = np.random.random((1000, 784))
y_train = np.random.randint(10, size=(1000,))
model.fit(x_train, y_train, epochs=5, batch_size=32, validation_split=0.2)
# Evaluate and predict
x_test = np.random.random((100, 784))
y_test = np.random.randint(10, size=(100,))
loss, accuracy = model.evaluate(x_test, y_test)
predictions = model.predict(x_test)
# Save and load model
model.save('my_model.h5')
loaded_model = keras.models.load_model('my_model.h5')
# Convolutional model example
cnn_model = keras.Sequential([
layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (3, 3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (3, 3), activation='relu'),
layers.Flatten(),
layers.Dense(64, activation='relu'),
layers.Dense(10, activation='softmax')
])
# LSTM model example
lstm_model = keras.Sequential([
layers.LSTM(50, return_sequences=True, input_shape=(10, 1)),
layers.LSTM(50, return_sequences=False),
layers.Dense(25),
layers.Dense(1)
])Install with Tessl CLI
npx tessl i tessl/pypi-tensorflow