Multi-backend deep learning framework providing a unified API for building and training neural networks across JAX, TensorFlow, PyTorch, and OpenVINO backends
—
Comprehensive data preprocessing utilities for images, text, audio, and numerical data with built-in augmentation capabilities, dataset creation functions, and feature preprocessing layers.
Functions for creating datasets from various data sources and formats.
def image_dataset_from_directory(directory, labels='inferred', label_mode='int',
class_names=None, color_mode='rgb', batch_size=32,
image_size=(256, 256), shuffle=True, seed=None,
validation_split=None, subset=None, **kwargs):
"""
Create image dataset from directory structure.
Args:
directory (str): Path to directory containing subdirectories of images
labels (str): How to generate labels ('inferred' or None)
label_mode (str): Type of labels ('int', 'categorical', 'binary', None)
class_names (list, optional): Explicit list of class names
color_mode (str): Image color mode ('grayscale', 'rgb', 'rgba')
batch_size (int): Batch size
image_size (tuple): Target image size
shuffle (bool): Whether to shuffle data
seed (int, optional): Random seed
validation_split (float, optional): Fraction for validation
subset (str, optional): Subset to return ('training' or 'validation')
Returns:
Dataset: Configured image dataset
"""
def text_dataset_from_directory(directory, labels='inferred', label_mode='int',
class_names=None, batch_size=32, max_length=None,
shuffle=True, seed=None, validation_split=None,
subset=None, **kwargs): ...
def timeseries_dataset_from_array(data, targets, sequence_length, sequence_stride=1,
sampling_rate=1, batch_size=128, shuffle=False,
seed=None, start_index=None, end_index=None): ...
def audio_dataset_from_directory(directory, labels='inferred', label_mode='int',
class_names=None, batch_size=32, sampling_rate=16000,
output_sequence_length=16000, **kwargs): ...Preprocessing layers for text and sequence data including vectorization and encoding.
class TextVectorization:
"""
Text vectorization layer for converting text to sequences.
Args:
max_tokens (int, optional): Maximum vocabulary size
standardize (str or callable): Text standardization ('lower_and_strip_punctuation', 'lower', 'strip_punctuation', or callable)
split (str or callable): Text splitting strategy ('whitespace' or callable)
ngrams (int, optional): N-gram size
output_mode (str): Output format ('int', 'multi_hot', 'count', 'tf_idf')
output_sequence_length (int, optional): Output sequence length
pad_to_max_tokens (bool): Whether to pad to max_tokens
vocabulary (list, optional): Pre-existing vocabulary
idf_weights (array, optional): IDF weights for tf-idf mode
sparse (bool): Whether to return sparse tensors
ragged (bool): Whether to return ragged tensors
"""
def __init__(self, max_tokens=None, standardize='lower_and_strip_punctuation',
split='whitespace', ngrams=None, output_mode='int',
output_sequence_length=None, **kwargs): ...
def adapt(self, data, batch_size=None, steps=None): ...
def get_vocabulary(self): ...
def set_vocabulary(self, vocabulary, idf_weights=None): ...
class StringLookup:
"""
String to integer lookup layer.
Args:
max_tokens (int, optional): Maximum vocabulary size
num_oov_indices (int): Number of out-of-vocabulary indices
mask_token (str, optional): Token to use for masking
oov_token (str): Token to use for out-of-vocabulary
vocabulary (list, optional): Pre-existing vocabulary
idf_weights (array, optional): IDF weights
invert (bool): Whether to invert the lookup
output_mode (str): Output format ('int', 'multi_hot', 'count', 'one_hot', 'tf_idf')
sparse (bool): Whether to return sparse tensors
pad_to_max_tokens (bool): Whether to pad to max_tokens
"""
def __init__(self, max_tokens=None, num_oov_indices=1, mask_token=None,
oov_token='[UNK]', vocabulary=None, **kwargs): ...
class IntegerLookup:
"""Integer to integer lookup layer."""
def __init__(self, max_tokens=None, num_oov_indices=1, mask_token=None,
oov_token=-1, vocabulary=None, **kwargs): ...
class CategoryEncoding:
"""
Categorical encoding layer.
Args:
num_tokens (int, optional): Total number of tokens
output_mode (str): Output format ('multi_hot', 'one_hot', 'count')
sparse (bool): Whether to return sparse tensors
"""
def __init__(self, num_tokens=None, output_mode='multi_hot', sparse=False, **kwargs): ...Preprocessing layers for image data including resizing, augmentation, and transformations.
class Resizing:
"""
Resize images to target size.
Args:
height (int): Target height
width (int): Target width
interpolation (str): Interpolation method ('bilinear', 'nearest', 'bicubic', 'area', 'lanczos3', 'lanczos5', 'gaussian', 'mitchellcubic')
crop_to_aspect_ratio (bool): Whether to crop to maintain aspect ratio
"""
def __init__(self, height, width, interpolation='bilinear', crop_to_aspect_ratio=False, **kwargs): ...
class CenterCrop:
"""
Crop images to specified size from center.
Args:
height (int): Target height
width (int): Target width
"""
def __init__(self, height, width, **kwargs): ...
class Rescaling:
"""
Rescale pixel values.
Args:
scale (float): Scaling factor
offset (float): Offset value
"""
def __init__(self, scale, offset=0.0, **kwargs): ...
# Data augmentation layers
class RandomFlip:
"""
Random image flipping.
Args:
mode (str): Flip mode ('horizontal', 'vertical', 'horizontal_and_vertical')
seed (int, optional): Random seed
"""
def __init__(self, mode='horizontal_and_vertical', seed=None, **kwargs): ...
class RandomRotation:
"""
Random image rotation.
Args:
factor (float or tuple): Rotation factor as fraction of 2π
fill_mode (str): Fill mode for transformed pixels
interpolation (str): Interpolation method
seed (int, optional): Random seed
fill_value (float): Fill value for constant fill mode
"""
def __init__(self, factor, fill_mode='reflect', interpolation='bilinear',
seed=None, fill_value=0.0, **kwargs): ...
class RandomZoom:
"""Random image zooming."""
def __init__(self, height_factor, width_factor=None, fill_mode='reflect',
interpolation='bilinear', seed=None, fill_value=0.0, **kwargs): ...
class RandomTranslation:
"""Random image translation."""
def __init__(self, height_factor, width_factor, fill_mode='reflect',
interpolation='bilinear', seed=None, fill_value=0.0, **kwargs): ...
class RandomCrop:
"""Random image cropping."""
def __init__(self, height, width, seed=None, **kwargs): ...
class RandomBrightness:
"""Random brightness adjustment."""
def __init__(self, factor, value_range=(0, 255), seed=None, **kwargs): ...
class RandomContrast:
"""Random contrast adjustment."""
def __init__(self, factor, seed=None, **kwargs): ...Preprocessing layers for numerical data including normalization and discretization.
class Normalization:
"""
Feature normalization layer.
Args:
axis (int): Axis to normalize along
mean (array, optional): Pre-computed mean
variance (array, optional): Pre-computed variance
invert (bool): Whether to invert normalization
"""
def __init__(self, axis=-1, mean=None, variance=None, invert=False, **kwargs): ...
def adapt(self, data, batch_size=None, steps=None): ...
class Discretization:
"""
Value discretization layer.
Args:
bin_boundaries (array, optional): Bin boundary values
num_bins (int, optional): Number of bins
epsilon (float): Small value for bin boundary adjustment
output_mode (str): Output format ('int', 'one_hot', 'multi_hot', 'count')
sparse (bool): Whether to return sparse tensors
"""
def __init__(self, bin_boundaries=None, num_bins=None, epsilon=0.01,
output_mode='int', sparse=False, **kwargs): ...
def adapt(self, data, batch_size=None, steps=None): ...Specialized layers for audio signal processing.
class MelSpectrogram:
"""
Mel-frequency spectrogram layer.
Args:
fft_length (int): FFT length
sequence_stride (int): Hop length between frames
sequence_length (int): Window length
window (str): Window function
sampling_rate (int): Audio sampling rate
num_mel_bins (int): Number of mel frequency bins
min_freq (float): Minimum frequency
max_freq (float): Maximum frequency
power_to_db (bool): Whether to convert power to decibels
top_db (float): Dynamic range for dB conversion
mag_exp (float): Magnitude exponent
"""
def __init__(self, fft_length=2048, sequence_stride=512, sequence_length=None,
window='hann', sampling_rate=16000, num_mel_bins=128, **kwargs): ...
class STFTSpectrogram:
"""Short-time Fourier transform spectrogram layer."""
def __init__(self, fft_length=2048, sequence_stride=512, sequence_length=None,
window='hann', **kwargs): ...Additional preprocessing utilities and helper functions.
def split_dataset(dataset, left_size=None, right_size=None, shuffle=False, seed=None):
"""
Split dataset into two parts.
Args:
dataset: Dataset to split
left_size (float or int, optional): Size of left split
right_size (float or int, optional): Size of right split
shuffle (bool): Whether to shuffle before splitting
seed (int, optional): Random seed
Returns:
tuple: (left_dataset, right_dataset)
"""
def to_categorical(y, num_classes=None, dtype='float32'):
"""
Convert integer labels to categorical encoding.
Args:
y (array): Integer labels
num_classes (int, optional): Total number of classes
dtype (str): Output data type
Returns:
array: Categorical encoded labels
"""
def normalize(x, axis=-1, order=2):
"""
Normalize arrays along specified axis.
Args:
x (array): Input array
axis (int): Normalization axis
order (int): Norm order
Returns:
array: Normalized array
"""
def pad_sequences(sequences, maxlen=None, dtype='int32', padding='pre',
truncating='pre', value=0.0):
"""
Pad sequences to same length.
Args:
sequences (list): List of sequences
maxlen (int, optional): Maximum length
dtype (str): Output data type
padding (str): Padding strategy ('pre' or 'post')
truncating (str): Truncation strategy ('pre' or 'post')
value (float): Padding value
Returns:
array: Padded sequences
"""import keras
from keras import layers
# Create dataset from directory
train_dataset = keras.utils.image_dataset_from_directory(
'path/to/train',
validation_split=0.2,
subset='training',
seed=123,
image_size=(224, 224),
batch_size=32
)
val_dataset = keras.utils.image_dataset_from_directory(
'path/to/train',
validation_split=0.2,
subset='validation',
seed=123,
image_size=(224, 224),
batch_size=32
)
# Build preprocessing pipeline
data_augmentation = keras.Sequential([
layers.RandomFlip('horizontal'),
layers.RandomRotation(0.2),
layers.RandomZoom(0.2),
layers.RandomBrightness(0.2),
layers.RandomContrast(0.2)
])
# Apply to datasets
train_dataset = train_dataset.map(lambda x, y: (data_augmentation(x, training=True), y))
# Normalize pixel values
normalization = layers.Rescaling(1./255)
train_dataset = train_dataset.map(lambda x, y: (normalization(x), y))
val_dataset = val_dataset.map(lambda x, y: (normalization(x), y))import keras
from keras import layers
# Create text dataset
train_dataset = keras.utils.text_dataset_from_directory(
'path/to/text_data',
batch_size=32,
validation_split=0.2,
subset='training',
seed=123
)
# Text vectorization
vectorize_layer = layers.TextVectorization(
max_tokens=10000,
output_sequence_length=100,
standardize='lower_and_strip_punctuation'
)
# Adapt to training data
text_only_dataset = train_dataset.map(lambda x, y: x)
vectorize_layer.adapt(text_only_dataset)
# Apply vectorization
train_dataset = train_dataset.map(lambda x, y: (vectorize_layer(x), y))Install with Tessl CLI
npx tessl i tessl/pypi-keras-nightly