Multi-backend deep learning framework that provides a unified, high-level API for building and training neural networks across JAX, TensorFlow, PyTorch, and OpenVINO backends.
—
Built-in datasets, data preprocessing utilities, image processing functions, and various helper utilities for machine learning workflows. These tools simplify data preparation and provide ready-to-use datasets for experimentation.
Standard datasets commonly used for machine learning research and experimentation, pre-loaded and ready to use.
# MNIST handwritten digits dataset
def load_data():
"""
Load MNIST dataset.
Returns:
Tuple of ((x_train, y_train), (x_test, y_test))
- x_train, x_test: uint8 arrays of grayscale image data with shape (num_samples, 28, 28)
- y_train, y_test: uint8 arrays of digit labels (0-9) with shape (num_samples,)
"""
# Fashion-MNIST dataset (available as keras.datasets.fashion_mnist.load_data())
# CIFAR-10 dataset (available as keras.datasets.cifar10.load_data())
# CIFAR-100 dataset (available as keras.datasets.cifar100.load_data())
# IMDB movie reviews dataset (available as keras.datasets.imdb.load_data())
# Reuters newswire dataset (available as keras.datasets.reuters.load_data())
# Boston housing dataset (available as keras.datasets.boston_housing.load_data())
# California housing dataset (available as keras.datasets.california_housing.load_data())Functions for loading, saving, and manipulating images for machine learning workflows.
def load_img(path, color_mode='rgb', target_size=None, interpolation='nearest',
keep_aspect_ratio=False):
"""
Load an image into PIL format.
Parameters:
- path: Path to image file
- color_mode: 'grayscale', 'rgb', 'rgba'
- target_size: Tuple (height, width) to resize image
- interpolation: Interpolation method for resizing
- keep_aspect_ratio: Whether to keep aspect ratio when resizing
Returns:
PIL Image instance
"""
def save_img(path, x, data_format=None, file_format=None, scale=True, **kwargs):
"""
Save an image to disk.
Parameters:
- path: Path to save image
- x: Image array data
- data_format: Image data format
- file_format: Image file format ('png', 'jpeg', etc.)
- scale: Whether to scale pixel values to [0, 255]
"""
def img_to_array(img, data_format=None, dtype=None):
"""
Convert PIL Image to numpy array.
Parameters:
- img: PIL Image instance
- data_format: Image data format ('channels_first' or 'channels_last')
- dtype: Data type for output array
Returns:
Numpy array representation of image
"""
def array_to_img(x, data_format=None, scale=True, dtype=None):
"""
Convert numpy array to PIL Image.
Parameters:
- x: Input array
- data_format: Image data format
- scale: Whether to scale values to [0, 255]
- dtype: Data type
Returns:
PIL Image instance
"""Functions for common data preprocessing tasks including categorical encoding, normalization, and sequence processing.
def to_categorical(y, num_classes=None, dtype='float32'):
"""
Convert class vector to categorical (one-hot) matrix.
Parameters:
- y: Array of class labels to convert
- num_classes: Total number of classes (optional)
- dtype: Data type for output matrix
Returns:
Binary matrix representation of input as numpy array
"""
def normalize(x, axis=-1, order=2):
"""
Normalize array along specified axis.
Parameters:
- x: Array to normalize
- axis: Axis along which to normalize
- order: Normalization order (1 for L1, 2 for L2)
Returns:
Normalized array
"""
def pad_sequences(sequences, maxlen=None, dtype='int32', padding='pre',
truncating='pre', value=0.0):
"""
Pad sequences to same length.
Parameters:
- sequences: List of sequences to pad
- maxlen: Maximum length of sequences
- dtype: Data type for output
- padding: 'pre' or 'post' padding
- truncating: 'pre' or 'post' truncation
- value: Padding value
Returns:
2D numpy array with shape (len(sequences), maxlen)
"""Functions for creating tf.data.Dataset objects from directories and arrays for efficient data loading.
def image_dataset_from_directory(directory, labels='inferred', label_mode='int',
class_names=None, color_mode='rgb', batch_size=32,
image_size=(256, 256), shuffle=True, seed=None,
validation_split=None, subset=None, interpolation='bilinear',
follow_links=False, crop_to_aspect_ratio=False):
"""
Generate dataset from image directory.
Parameters:
- directory: Path to directory containing subdirectories of images
- labels: 'inferred' (from directory structure) or list of labels
- label_mode: 'int', 'categorical', 'binary', or None
- class_names: List of class names (overrides inferred names)
- color_mode: 'grayscale', 'rgb', or 'rgba'
- batch_size: Batch size
- image_size: Size to resize images to
- shuffle: Whether to shuffle data
- seed: Random seed for shuffling
- validation_split: Fraction of data for validation
- subset: 'training' or 'validation' (when validation_split is set)
- interpolation: Interpolation method for resizing
- follow_links: Whether to follow symlinks
- crop_to_aspect_ratio: Whether to crop to maintain aspect ratio
Returns:
tf.data.Dataset object
"""
def text_dataset_from_directory(directory, labels='inferred', label_mode='int',
class_names=None, batch_size=32, max_length=None,
shuffle=True, seed=None, validation_split=None,
subset=None, follow_links=False):
"""
Generate dataset from text directory.
Parameters:
- directory: Path to directory containing text files
- labels: 'inferred' or list of labels
- label_mode: 'int', 'categorical', 'binary', or None
- class_names: List of class names
- batch_size: Batch size
- max_length: Maximum sequence length
- shuffle: Whether to shuffle data
- seed: Random seed
- validation_split: Fraction for validation
- subset: 'training' or 'validation'
- follow_links: Whether to follow symlinks
Returns:
tf.data.Dataset object
"""
def timeseries_dataset_from_array(data, targets, sequence_length, sequence_stride=1,
sampling_rate=1, batch_size=128, shuffle=False,
seed=None, start_index=None, end_index=None):
"""
Create dataset from time series data.
Parameters:
- data: Array of data points
- targets: Array of targets corresponding to data
- sequence_length: Length of output sequences
- sequence_stride: Stride between successive output sequences
- sampling_rate: Rate to sample data points within sequences
- batch_size: Batch size
- shuffle: Whether to shuffle data
- seed: Random seed
- start_index: Start index for data
- end_index: End index for data
Returns:
tf.data.Dataset object yielding (inputs, targets) tuples
"""Utility classes and functions for advanced data handling including custom datasets, feature engineering, and data packing.
class Sequence:
"""Base class for fitting to sequence of data."""
def __init__(self):
"""Initialize sequence."""
def __getitem__(self, index):
"""
Get batch at index.
Parameters:
- index: Batch index
Returns:
Batch data
"""
def __len__(self):
"""
Number of batches in sequence.
Returns:
Number of batches
"""
def on_epoch_end(self):
"""Method called at end of every epoch."""
class FeatureSpace:
"""Utility for feature preprocessing and engineering."""
def __init__(self, features, output_mode='concat'):
"""
Initialize feature space.
Parameters:
- features: Dict mapping feature names to preprocessing layers
- output_mode: 'concat' or 'dict'
"""
def adapt(self, dataset):
"""
Fit feature preprocessing on dataset.
Parameters:
- dataset: Dataset to adapt to
"""
def __call__(self, data):
"""
Apply feature preprocessing.
Parameters:
- data: Input data
Returns:
Preprocessed features
"""
def pack_x_y_sample_weight(x, y=None, sample_weight=None):
"""
Pack user-provided data into tuple.
Parameters:
- x: Input data
- y: Target data
- sample_weight: Sample weights
Returns:
Packed data tuple
"""
def unpack_x_y_sample_weight(data):
"""
Unpack user-provided data tuple.
Parameters:
- data: Packed data tuple
Returns:
Tuple of (x, y, sample_weight)
"""
def split_dataset(dataset, left_size=None, right_size=None, shuffle=False, seed=None):
"""
Split dataset into two datasets.
Parameters:
- dataset: Dataset to split
- left_size: Size of left split
- right_size: Size of right split
- shuffle: Whether to shuffle before splitting
- seed: Random seed
Returns:
Tuple of (left_dataset, right_dataset)
"""Functions for downloading files and managing data assets.
def get_file(fname=None, origin=None, untar=False, md5_hash=None, file_hash=None,
cache_subdir='datasets', hash_algorithm='auto', extract=False,
archive_format='auto', cache_dir=None):
"""
Download file from URL if not already cached.
Parameters:
- fname: Name of file (if origin has different name)
- origin: Original URL of file
- untar: Whether to untar file after download
- md5_hash: MD5 hash for verification (deprecated)
- file_hash: Hash for verification
- cache_subdir: Subdirectory under cache directory
- hash_algorithm: Hash algorithm ('md5', 'sha256', 'auto')
- extract: Whether to extract archive after download
- archive_format: Archive format ('auto', 'tar', 'zip')
- cache_dir: Location to store cached files
Returns:
Path to downloaded file
"""Utilities for setting random seeds and managing global configuration.
def set_random_seed(seed=None):
"""
Set random seed for reproducible results.
Parameters:
- seed: Random seed value
"""
class Config:
"""Global configuration utility."""
def enable(self, feature):
"""Enable configuration feature."""
def disable(self, feature):
"""Disable configuration feature."""
def is_enabled(self, feature):
"""Check if feature is enabled."""
class Progbar:
"""Progress bar utility for training loops."""
def __init__(self, target, width=30, verbose=1, interval=0.05,
stateful_metrics=None, unit_name='step'):
"""
Initialize progress bar.
Parameters:
- target: Total number of steps expected
- width: Progress bar width
- verbose: Verbosity mode
- interval: Minimum update interval
- stateful_metrics: Metrics that shouldn't be averaged
- unit_name: Display name for step units
"""
def update(self, current, values=None, finalize=None):
"""
Update progress bar.
Parameters:
- current: Current step index
- values: List of tuples (name, value) for metrics
- finalize: Whether to finalize progress bar
"""import keras
from keras.utils import load_img, img_to_array, to_categorical
import numpy as np
# Load and preprocess single image
img_path = 'cat.jpg'
img = load_img(img_path, target_size=(224, 224))
img_array = img_to_array(img)
img_array = np.expand_dims(img_array, axis=0)
img_array /= 255.0 # Normalize to [0, 1]
# Convert labels to categorical
labels = [0, 1, 2, 1, 0] # Class indices
categorical_labels = to_categorical(labels, num_classes=3)
print(categorical_labels)
# [[1. 0. 0.]
# [0. 1. 0.]
# [0. 0. 1.]
# [0. 1. 0.]
# [1. 0. 0.]]import keras
# Create image dataset from directory structure
train_dataset = keras.utils.image_dataset_from_directory(
'path/to/train_data/',
labels='inferred',
label_mode='categorical',
color_mode='rgb',
batch_size=32,
image_size=(224, 224),
shuffle=True,
validation_split=0.2,
subset='training',
seed=123
)
val_dataset = keras.utils.image_dataset_from_directory(
'path/to/train_data/',
labels='inferred',
label_mode='categorical',
color_mode='rgb',
batch_size=32,
image_size=(224, 224),
shuffle=True,
validation_split=0.2,
subset='validation',
seed=123
)
# Use datasets for training
# model.fit(train_dataset, validation_data=val_dataset, epochs=10)import keras
from keras.datasets import mnist, cifar10
from keras.utils import to_categorical
# Load MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()
# Preprocess data
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
# Convert labels to categorical
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)
print(f"Training data shape: {x_train.shape}")
print(f"Training labels shape: {y_train.shape}")import keras
import numpy as np
class CustomDataSequence(keras.utils.Sequence):
def __init__(self, x_data, y_data, batch_size):
self.x_data = x_data
self.y_data = y_data
self.batch_size = batch_size
self.indices = np.arange(len(self.x_data))
def __len__(self):
return len(self.x_data) // self.batch_size
def __getitem__(self, index):
batch_indices = self.indices[index * self.batch_size:(index + 1) * self.batch_size]
batch_x = self.x_data[batch_indices]
batch_y = self.y_data[batch_indices]
return batch_x, batch_y
def on_epoch_end(self):
np.random.shuffle(self.indices)
# Use custom sequence
# train_sequence = CustomDataSequence(x_train, y_train, batch_size=32)
# model.fit(train_sequence, epochs=10)import keras
from keras import layers
from keras.utils import FeatureSpace
# Define feature preprocessing
feature_space = FeatureSpace(
features={
'age': layers.Normalization(),
'category': layers.StringLookup(output_mode='one_hot'),
'price': layers.Discretization(num_bins=10),
},
output_mode='concat'
)
# Adapt to training data
# feature_space.adapt(train_dataset)
# Apply preprocessing
# processed_features = feature_space(raw_features)import keras
import numpy as np
# Generate sample time series data
data = np.sin(np.arange(1000) * 0.1)
targets = np.sin(np.arange(1000) * 0.1 + 0.1)
# Create dataset for sequence prediction
dataset = keras.utils.timeseries_dataset_from_array(
data=data,
targets=targets,
sequence_length=10,
batch_size=32,
shuffle=True
)
# Use for training RNN models
# model.fit(dataset, epochs=10)Install with Tessl CLI
npx tessl i tessl/pypi-keras