Easy data preprocessing and data augmentation for deep learning models
npx @tessl/cli install tessl/pypi-keras-preprocessing@1.1.0Easy data preprocessing and data augmentation for deep learning models. Keras-Preprocessing provides comprehensive utilities for text tokenization, sequence padding, and image augmentation specifically designed for training deep neural networks.
pip install keras-preprocessingimport keras_preprocessingSpecific modules:
from keras_preprocessing.text import Tokenizer, text_to_word_sequence
from keras_preprocessing.sequence import pad_sequences, TimeseriesGenerator
from keras_preprocessing.image import ImageDataGenerator, load_img, img_to_arrayLegacy compatibility imports:
from keras_preprocessing import image, text, sequencefrom keras_preprocessing.text import Tokenizer
from keras_preprocessing.sequence import pad_sequences
from keras_preprocessing.image import ImageDataGenerator
# Text preprocessing
tokenizer = Tokenizer(num_words=1000)
texts = ['hello world', 'deep learning', 'neural networks']
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
# Sequence padding
padded = pad_sequences(sequences, maxlen=10, padding='post')
# Image data augmentation
datagen = ImageDataGenerator(
rotation_range=20,
width_shift_range=0.2,
height_shift_range=0.2,
horizontal_flip=True
)
# Load data from directory
train_generator = datagen.flow_from_directory(
'train_data/',
target_size=(224, 224),
batch_size=32,
class_mode='categorical'
)Keras-Preprocessing is organized into three main functional modules:
Each module provides both low-level utilities and high-level generators that integrate seamlessly with Keras training workflows.
Text tokenization, vocabulary management, and text-to-sequence conversion utilities for natural language processing. Includes hashing tricks, one-hot encoding, and comprehensive tokenization with configurable filtering and preprocessing.
class Tokenizer:
def __init__(self, num_words=None, filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
lower=True, split=' ', char_level=False, oov_token=None, **kwargs): ...
def fit_on_texts(self, texts): ...
def texts_to_sequences(self, texts): ...
def texts_to_matrix(self, texts, mode='binary'): ...
def text_to_word_sequence(text, filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
lower=True, split=" "): ...
def one_hot(text, n, filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
lower=True, split=' '): ...Sequence padding, temporal data generation, and utilities for preparing sequential data for neural networks. Includes padding sequences to uniform length, generating skipgrams for word2vec, and creating time series batches.
def pad_sequences(sequences, maxlen=None, dtype='int32', padding='pre',
truncating='pre', value=0.): ...
class TimeseriesGenerator:
def __init__(self, data, targets, length, sampling_rate=1, stride=1,
start_index=0, end_index=None, shuffle=False, reverse=False,
batch_size=128): ...
def __getitem__(self, index): ...
def skipgrams(sequence, vocabulary_size, window_size=4, negative_samples=1.,
shuffle=True, categorical=False, sampling_table=None, seed=None): ...Comprehensive image data augmentation, loading, and preprocessing utilities for computer vision models. Includes data generators, transformation functions, file utilities, and multiple data source iterators.
class ImageDataGenerator:
def __init__(self, rotation_range=0., width_shift_range=0.,
height_shift_range=0., horizontal_flip=False, **kwargs): ...
def flow(self, x, y=None, batch_size=32, shuffle=True, **kwargs): ...
def flow_from_directory(self, directory, target_size=(256, 256),
color_mode='rgb', batch_size=32, **kwargs): ...
def flow_from_dataframe(self, dataframe, x_col="filename", y_col="class",
target_size=(256, 256), **kwargs): ...
def load_img(path, color_mode='rgb', target_size=None, interpolation='nearest'): ...
def img_to_array(img, data_format='channels_last', dtype='float32'): ...
def array_to_img(x, data_format='channels_last', scale=True, dtype='float32'): ...# Common types used across modules
NDArray = numpy.ndarray
PILImage = PIL.Image.Image
Generator = typing.Generator
Iterator = typing.Iteratordef set_keras_submodules(backend, utils):
"""Set Keras backend and utils submodules (deprecated)."""
def get_keras_submodule(name):
"""Retrieve Keras submodule by name (deprecated)."""
__version__ = '1.1.2'