0
# Image Processing
1
2
Comprehensive image data augmentation, loading, and preprocessing utilities for computer vision models. These tools provide data generators, transformation functions, file utilities, and multiple data source iterators for efficient batch processing of image data.
3
4
## Capabilities
5
6
### Image Data Generator
7
8
The main class for image data augmentation and batch generation with real-time transformations.
9
10
```python { .api }
11
class ImageDataGenerator:
12
"""
13
Generate batches of tensor image data with real-time data augmentation.
14
15
Provides comprehensive image augmentation pipeline with rotation, shifting,
16
scaling, flipping, and other transformations applied on-the-fly during training.
17
"""
18
19
def __init__(self, featurewise_center=False, samplewise_center=False,
20
featurewise_std_normalization=False, samplewise_std_normalization=False,
21
zca_whitening=False, rotation_range=0., width_shift_range=0.,
22
height_shift_range=0., brightness_range=None, shear_range=0.,
23
zoom_range=0., channel_shift_range=0., fill_mode='nearest',
24
cval=0., horizontal_flip=False, vertical_flip=False,
25
rescale=None, preprocessing_function=None, data_format='channels_last',
26
validation_split=0., **kwargs):
27
"""
28
Initialize ImageDataGenerator with augmentation parameters.
29
30
Parameters:
31
- featurewise_center (bool): Set input mean to 0 over dataset
32
- samplewise_center (bool): Set each sample mean to 0
33
- featurewise_std_normalization (bool): Divide inputs by std of dataset
34
- samplewise_std_normalization (bool): Divide each input by its std
35
- zca_whitening (bool): Apply ZCA whitening
36
- rotation_range (float): Degree range for random rotations
37
- width_shift_range (float): Range for random horizontal shifts
38
- height_shift_range (float): Range for random vertical shifts
39
- brightness_range (tuple, optional): Range for brightness adjustment
40
- shear_range (float): Shear intensity
41
- zoom_range (float or tuple): Range for random zoom
42
- channel_shift_range (float): Range for random channel shifts
43
- fill_mode (str): Points outside boundaries filled according to mode
44
- cval (float): Value used for points outside boundaries when fill_mode='constant'
45
- horizontal_flip (bool): Randomly flip inputs horizontally
46
- vertical_flip (bool): Randomly flip inputs vertically
47
- rescale (float, optional): Rescaling factor
48
- preprocessing_function (callable, optional): Function applied to each input
49
- data_format (str): 'channels_first' or 'channels_last'
50
- validation_split (float): Fraction of data reserved for validation
51
"""
52
53
def flow(self, x, y=None, batch_size=32, shuffle=True, sample_weight=None,
54
seed=None, save_to_dir=None, save_prefix='', save_format='png',
55
subset=None):
56
"""
57
Create NumpyArrayIterator from numpy arrays.
58
59
Parameters:
60
- x (numpy.ndarray): Input data array
61
- y (numpy.ndarray, optional): Target data array
62
- batch_size (int): Size of batches
63
- shuffle (bool): Whether to shuffle data
64
- sample_weight (numpy.ndarray, optional): Sample weights
65
- seed (int, optional): Random seed
66
- save_to_dir (str, optional): Directory to save augmented images
67
- save_prefix (str): Prefix for saved images
68
- save_format (str): Format for saved images
69
- subset (str, optional): 'training' or 'validation'
70
71
Returns:
72
- NumpyArrayIterator: Iterator yielding batches
73
"""
74
75
def flow_from_directory(self, directory, target_size=(256, 256),
76
color_mode='rgb', classes=None, class_mode='categorical',
77
batch_size=32, shuffle=True, seed=None, save_to_dir=None,
78
save_prefix='', save_format='png', follow_links=False,
79
subset=None, interpolation='nearest'):
80
"""
81
Create DirectoryIterator from directory structure.
82
83
Parameters:
84
- directory (str): Path to target directory
85
- target_size (tuple): Size to resize images
86
- color_mode (str): 'grayscale', 'rgb', or 'rgba'
87
- classes (list, optional): List of class subdirectories
88
- class_mode (str): 'categorical', 'binary', 'sparse', 'input', or None
89
- batch_size (int): Size of batches
90
- shuffle (bool): Whether to shuffle data
91
- seed (int, optional): Random seed
92
- save_to_dir (str, optional): Directory to save augmented images
93
- save_prefix (str): Prefix for saved images
94
- save_format (str): Format for saved images
95
- follow_links (bool): Whether to follow symlinks
96
- subset (str, optional): 'training' or 'validation'
97
- interpolation (str): Interpolation method for resizing
98
99
Returns:
100
- DirectoryIterator: Iterator yielding batches from directory
101
"""
102
103
def flow_from_dataframe(self, dataframe, directory=None, x_col="filename",
104
y_col="class", weight_col=None, target_size=(256, 256),
105
color_mode='rgb', classes=None, class_mode='categorical',
106
batch_size=32, shuffle=True, seed=None, save_to_dir=None,
107
save_prefix='', save_format='png', subset=None,
108
interpolation='nearest', validate_filenames=True):
109
"""
110
Create DataFrameIterator from pandas DataFrame.
111
112
Parameters:
113
- dataframe (pandas.DataFrame): DataFrame containing image paths and labels
114
- directory (str, optional): Directory containing images
115
- x_col (str): Column containing image filenames
116
- y_col (str): Column containing class labels
117
- weight_col (str, optional): Column containing sample weights
118
- target_size (tuple): Size to resize images
119
- color_mode (str): 'grayscale', 'rgb', or 'rgba'
120
- classes (list, optional): List of class names
121
- class_mode (str): 'categorical', 'binary', 'sparse', 'input', or None
122
- batch_size (int): Size of batches
123
- shuffle (bool): Whether to shuffle data
124
- seed (int, optional): Random seed
125
- save_to_dir (str, optional): Directory to save augmented images
126
- save_prefix (str): Prefix for saved images
127
- save_format (str): Format for saved images
128
- subset (str, optional): 'training' or 'validation'
129
- interpolation (str): Interpolation method for resizing
130
- validate_filenames (bool): Whether to validate image files exist
131
132
Returns:
133
- DataFrameIterator: Iterator yielding batches from DataFrame
134
"""
135
136
def standardize(self, x):
137
"""
138
Apply normalization configuration to batch of inputs.
139
140
Parameters:
141
- x (numpy.ndarray): Batch of inputs to standardize
142
143
Returns:
144
- numpy.ndarray: Standardized batch
145
"""
146
147
def get_random_transform(self, img_shape, seed=None):
148
"""
149
Generate random transform parameters for single image.
150
151
Parameters:
152
- img_shape (tuple): Shape of image
153
- seed (int, optional): Random seed
154
155
Returns:
156
- dict: Transform parameters
157
"""
158
159
def apply_transform(self, x, transform_parameters):
160
"""
161
Apply transformation to image with given parameters.
162
163
Parameters:
164
- x (numpy.ndarray): Image to transform
165
- transform_parameters (dict): Transform parameters
166
167
Returns:
168
- numpy.ndarray: Transformed image
169
"""
170
171
def random_transform(self, x, seed=None):
172
"""
173
Apply random transformation to single image.
174
175
Parameters:
176
- x (numpy.ndarray): Image to transform
177
- seed (int, optional): Random seed
178
179
Returns:
180
- numpy.ndarray: Randomly transformed image
181
"""
182
183
def fit(self, x, augment=False, rounds=1, seed=None):
184
"""
185
Fit data generator to sample data for normalization.
186
187
Parameters:
188
- x (numpy.ndarray): Sample data
189
- augment (bool): Whether to use augmentation for fitting
190
- rounds (int): Number of rounds for augmentation
191
- seed (int, optional): Random seed
192
"""
193
```
194
195
### Iterator Classes
196
197
Base and specialized iterator classes for different data sources.
198
199
```python { .api }
200
class Iterator:
201
"""
202
Base class for image data iterators.
203
204
Provides common functionality for batch generation, shuffling, and indexing.
205
"""
206
207
def __init__(self, n, batch_size, shuffle, seed):
208
"""
209
Initialize iterator.
210
211
Parameters:
212
- n (int): Total number of samples
213
- batch_size (int): Size of batches
214
- shuffle (bool): Whether to shuffle data
215
- seed (int, optional): Random seed
216
"""
217
218
class DirectoryIterator(Iterator):
219
"""
220
Iterator capable of reading images from directory on disk.
221
222
Automatically infers class labels from subdirectory structure. Supports
223
various image formats and provides file path access.
224
"""
225
226
def __init__(self, directory, image_data_generator, target_size=(256, 256),
227
color_mode='rgb', classes=None, class_mode='categorical',
228
batch_size=32, shuffle=True, seed=None, data_format='channels_last',
229
save_to_dir=None, save_prefix='', save_format='png',
230
follow_links=False, subset=None, interpolation='nearest',
231
dtype='float32'):
232
"""Initialize DirectoryIterator with directory path and parameters."""
233
234
@property
235
def filepaths(self):
236
"""Get list of absolute file paths."""
237
238
@property
239
def labels(self):
240
"""Get array of class labels."""
241
242
@property
243
def sample_weight(self):
244
"""Get array of sample weights."""
245
246
class DataFrameIterator(Iterator):
247
"""
248
Iterator capable of reading images from directory through dataframe.
249
250
Uses pandas DataFrame to specify image paths and labels, providing
251
flexible data organization beyond directory structure.
252
"""
253
254
@property
255
def filepaths(self):
256
"""Get list of absolute file paths."""
257
258
@property
259
def labels(self):
260
"""Get array of class labels."""
261
262
@property
263
def sample_weight(self):
264
"""Get array of sample weights."""
265
266
class NumpyArrayIterator(Iterator):
267
"""
268
Iterator yielding data from numpy array.
269
270
Provides batching and augmentation for in-memory numpy arrays,
271
supporting both image data and labels.
272
"""
273
274
def __init__(self, x, y, image_data_generator, batch_size=32, shuffle=False,
275
sample_weight=None, seed=None, data_format='channels_last',
276
save_to_dir=None, save_prefix='', save_format='png',
277
subset=None, dtype='float32'):
278
"""Initialize NumpyArrayIterator with numpy arrays and parameters."""
279
```
280
281
### Image Utility Functions
282
283
Core utilities for image loading, saving, and format conversion.
284
285
```python { .api }
286
def load_img(path, grayscale=False, color_mode='rgb', target_size=None,
287
interpolation='nearest'):
288
"""
289
Load image into PIL format.
290
291
Parameters:
292
- path (str): Path to image file
293
- grayscale (bool): Whether to load as grayscale (deprecated, use color_mode)
294
- color_mode (str): 'grayscale', 'rgb', or 'rgba'
295
- target_size (tuple, optional): Size to resize to (width, height)
296
- interpolation (str): Interpolation method ('nearest', 'bilinear', etc.)
297
298
Returns:
299
- PIL.Image: Loaded image
300
"""
301
302
def save_img(path, x, data_format='channels_last', file_format=None,
303
scale=True, **kwargs):
304
"""
305
Save numpy array as image to path or file object.
306
307
Parameters:
308
- path (str): Path to save image
309
- x (numpy.ndarray): Image array to save
310
- data_format (str): 'channels_first' or 'channels_last'
311
- file_format (str, optional): Format to save as
312
- scale (bool): Whether to scale values to 0-255 range
313
- **kwargs: Additional arguments passed to PIL.Image.save()
314
"""
315
316
def img_to_array(img, data_format='channels_last', dtype='float32'):
317
"""
318
Convert PIL Image instance to numpy array.
319
320
Parameters:
321
- img (PIL.Image): PIL Image to convert
322
- data_format (str): 'channels_first' or 'channels_last'
323
- dtype (str): Data type for output array
324
325
Returns:
326
- numpy.ndarray: Image as numpy array
327
"""
328
329
def array_to_img(x, data_format='channels_last', scale=True, dtype='float32'):
330
"""
331
Convert 3D numpy array to PIL Image instance.
332
333
Parameters:
334
- x (numpy.ndarray): Array to convert
335
- data_format (str): 'channels_first' or 'channels_last'
336
- scale (bool): Whether to scale from [0,1] to [0,255]
337
- dtype (str): Data type of input array
338
339
Returns:
340
- PIL.Image: Converted image
341
"""
342
343
def list_pictures(directory, ext=('jpg', 'jpeg', 'bmp', 'png', 'ppm', 'tif', 'tiff')):
344
"""
345
List all pictures in directory including subdirectories.
346
347
Parameters:
348
- directory (str): Directory path to search
349
- ext (tuple): Tuple of valid image extensions
350
351
Returns:
352
- list: List of image file paths
353
"""
354
355
def validate_filename(filename, white_list_formats):
356
"""
357
Check if filename refers to valid image file.
358
359
Parameters:
360
- filename (str): Filename to validate
361
- white_list_formats (set): Set of allowed file extensions
362
363
Returns:
364
- bool: True if filename is valid image file
365
"""
366
```
367
368
### Image Transformation Functions
369
370
Low-level transformation functions for image augmentation.
371
372
```python { .api }
373
def flip_axis(x, axis):
374
"""
375
Flip array along specified axis.
376
377
Parameters:
378
- x (numpy.ndarray): Array to flip
379
- axis (int): Axis along which to flip
380
381
Returns:
382
- numpy.ndarray: Flipped array
383
"""
384
385
def random_rotation(x, rg, row_axis=1, col_axis=2, channel_axis=0,
386
fill_mode='nearest', cval=0., interpolation_order=1):
387
"""
388
Perform random rotation of numpy image tensor.
389
390
Parameters:
391
- x (numpy.ndarray): Image tensor
392
- rg (float): Rotation range in degrees
393
- row_axis (int): Index of axis for rows
394
- col_axis (int): Index of axis for columns
395
- channel_axis (int): Index of axis for channels
396
- fill_mode (str): Points outside boundaries filled according to mode
397
- cval (float): Value for points outside boundaries when fill_mode='constant'
398
- interpolation_order (int): Order of spline interpolation
399
400
Returns:
401
- numpy.ndarray: Rotated image tensor
402
"""
403
404
def random_shift(x, wrg, hrg, row_axis=1, col_axis=2, channel_axis=0,
405
fill_mode='nearest', cval=0., interpolation_order=1):
406
"""
407
Perform random spatial shift of numpy image tensor.
408
409
Parameters:
410
- x (numpy.ndarray): Image tensor
411
- wrg (float): Width shift range (fraction of total width)
412
- hrg (float): Height shift range (fraction of total height)
413
- row_axis (int): Index of axis for rows
414
- col_axis (int): Index of axis for columns
415
- channel_axis (int): Index of axis for channels
416
- fill_mode (str): Points outside boundaries filled according to mode
417
- cval (float): Value for points outside boundaries when fill_mode='constant'
418
- interpolation_order (int): Order of spline interpolation
419
420
Returns:
421
- numpy.ndarray: Shifted image tensor
422
"""
423
424
def random_shear(x, intensity, row_axis=1, col_axis=2, channel_axis=0,
425
fill_mode='nearest', cval=0., interpolation_order=1):
426
"""
427
Perform random spatial shear of numpy image tensor.
428
429
Parameters:
430
- x (numpy.ndarray): Image tensor
431
- intensity (float): Shear intensity
432
- row_axis (int): Index of axis for rows
433
- col_axis (int): Index of axis for columns
434
- channel_axis (int): Index of axis for channels
435
- fill_mode (str): Points outside boundaries filled according to mode
436
- cval (float): Value for points outside boundaries when fill_mode='constant'
437
- interpolation_order (int): Order of spline interpolation
438
439
Returns:
440
- numpy.ndarray: Sheared image tensor
441
"""
442
443
def random_zoom(x, zoom_range, row_axis=1, col_axis=2, channel_axis=0,
444
fill_mode='nearest', cval=0., interpolation_order=1):
445
"""
446
Perform random spatial zoom of numpy image tensor.
447
448
Parameters:
449
- x (numpy.ndarray): Image tensor
450
- zoom_range (tuple): Range for random zoom (zoom_min, zoom_max)
451
- row_axis (int): Index of axis for rows
452
- col_axis (int): Index of axis for columns
453
- channel_axis (int): Index of axis for channels
454
- fill_mode (str): Points outside boundaries filled according to mode
455
- cval (float): Value for points outside boundaries when fill_mode='constant'
456
- interpolation_order (int): Order of spline interpolation
457
458
Returns:
459
- numpy.ndarray: Zoomed image tensor
460
"""
461
462
def apply_channel_shift(x, intensity, channel_axis=0):
463
"""
464
Perform channel shift.
465
466
Parameters:
467
- x (numpy.ndarray): Image tensor
468
- intensity (float): Shift intensity
469
- channel_axis (int): Index of axis for channels
470
471
Returns:
472
- numpy.ndarray: Channel-shifted image tensor
473
"""
474
475
def random_channel_shift(x, intensity_range, channel_axis=0):
476
"""
477
Perform random channel shift.
478
479
Parameters:
480
- x (numpy.ndarray): Image tensor
481
- intensity_range (float): Range for random channel shift
482
- channel_axis (int): Index of axis for channels
483
484
Returns:
485
- numpy.ndarray: Channel-shifted image tensor
486
"""
487
488
def apply_brightness_shift(x, brightness):
489
"""
490
Perform brightness shift.
491
492
Parameters:
493
- x (numpy.ndarray): Image tensor
494
- brightness (float): Brightness shift value
495
496
Returns:
497
- numpy.ndarray: Brightness-adjusted image tensor
498
"""
499
500
def random_brightness(x, brightness_range):
501
"""
502
Perform random brightness shift.
503
504
Parameters:
505
- x (numpy.ndarray): Image tensor
506
- brightness_range (tuple): Range for brightness adjustment (min, max)
507
508
Returns:
509
- numpy.ndarray: Brightness-adjusted image tensor
510
"""
511
512
def apply_affine_transform(x, theta=0, tx=0, ty=0, shear=0, zx=1, zy=1,
513
row_axis=0, col_axis=1, channel_axis=2,
514
fill_mode='nearest', cval=0., order=1):
515
"""
516
Apply affine transformation specified by parameters.
517
518
Parameters:
519
- x (numpy.ndarray): Image tensor
520
- theta (float): Rotation angle in degrees
521
- tx (float): Translation in x direction
522
- ty (float): Translation in y direction
523
- shear (float): Shear angle in degrees
524
- zx (float): Zoom factor along x axis
525
- zy (float): Zoom factor along y axis
526
- row_axis (int): Index of axis for rows
527
- col_axis (int): Index of axis for columns
528
- channel_axis (int): Index of axis for channels
529
- fill_mode (str): Points outside boundaries filled according to mode
530
- cval (float): Value for points outside boundaries when fill_mode='constant'
531
- order (int): Order of spline interpolation
532
533
Returns:
534
- numpy.ndarray: Transformed image tensor
535
"""
536
537
def transform_matrix_offset_center(matrix, x, y):
538
"""
539
Offset transformation matrix to center.
540
541
Parameters:
542
- matrix (numpy.ndarray): Transformation matrix
543
- x (float): Center x coordinate
544
- y (float): Center y coordinate
545
546
Returns:
547
- numpy.ndarray: Offset transformation matrix
548
"""
549
```
550
551
## Usage Examples
552
553
### Basic Image Data Generation
554
555
```python
556
from keras_preprocessing.image import ImageDataGenerator
557
558
# Create generator with augmentation
559
datagen = ImageDataGenerator(
560
rotation_range=20,
561
width_shift_range=0.2,
562
height_shift_range=0.2,
563
shear_range=0.15,
564
zoom_range=0.15,
565
horizontal_flip=True,
566
brightness_range=[0.8, 1.2],
567
fill_mode='nearest'
568
)
569
570
# Load data from directory
571
train_generator = datagen.flow_from_directory(
572
'data/train/',
573
target_size=(224, 224),
574
batch_size=32,
575
class_mode='categorical',
576
shuffle=True
577
)
578
579
print(f"Found {train_generator.samples} images belonging to {train_generator.num_classes} classes")
580
```
581
582
### Working with Numpy Arrays
583
584
```python
585
import numpy as np
586
from keras_preprocessing.image import ImageDataGenerator
587
588
# Sample image data (1000 images, 64x64 pixels, 3 channels)
589
x_train = np.random.randint(0, 256, (1000, 64, 64, 3))
590
y_train = np.random.randint(0, 10, (1000,))
591
592
# Create generator
593
datagen = ImageDataGenerator(
594
rescale=1./255,
595
rotation_range=10,
596
horizontal_flip=True
597
)
598
599
# Fit to data for normalization
600
datagen.fit(x_train)
601
602
# Create iterator
603
train_iterator = datagen.flow(
604
x_train, y_train,
605
batch_size=32,
606
shuffle=True
607
)
608
609
# Get batch
610
batch_x, batch_y = next(train_iterator)
611
print(f"Batch shape: {batch_x.shape}") # (32, 64, 64, 3)
612
```
613
614
### Using DataFrames
615
616
```python
617
import pandas as pd
618
from keras_preprocessing.image import ImageDataGenerator
619
620
# Create DataFrame with image paths and labels
621
df = pd.DataFrame({
622
'filename': ['img1.jpg', 'img2.jpg', 'img3.jpg'],
623
'class': ['cat', 'dog', 'cat']
624
})
625
626
# Create generator
627
datagen = ImageDataGenerator(rescale=1./255)
628
629
# Flow from DataFrame
630
generator = datagen.flow_from_dataframe(
631
dataframe=df,
632
directory='images/',
633
x_col='filename',
634
y_col='class',
635
target_size=(150, 150),
636
batch_size=20,
637
class_mode='categorical'
638
)
639
```
640
641
### Manual Image Processing
642
643
```python
644
from keras_preprocessing.image import load_img, img_to_array, array_to_img
645
import numpy as np
646
647
# Load and preprocess single image
648
img = load_img('path/to/image.jpg', target_size=(224, 224))
649
img_array = img_to_array(img)
650
img_array = np.expand_dims(img_array, axis=0) # Add batch dimension
651
652
print(f"Image shape: {img_array.shape}") # (1, 224, 224, 3)
653
654
# Apply transformations manually
655
from keras_preprocessing.image import random_rotation
656
augmented = random_rotation(img_array[0], 15)
657
658
# Convert back to PIL Image
659
result_img = array_to_img(augmented)
660
result_img.save('augmented_image.jpg')
661
```
662
663
### Validation Split
664
665
```python
666
# Use validation split for train/val separation
667
datagen = ImageDataGenerator(
668
rescale=1./255,
669
validation_split=0.2 # 20% for validation
670
)
671
672
# Training generator
673
train_generator = datagen.flow_from_directory(
674
'data/',
675
target_size=(150, 150),
676
batch_size=32,
677
class_mode='categorical',
678
subset='training' # Use training subset
679
)
680
681
# Validation generator
682
validation_generator = datagen.flow_from_directory(
683
'data/',
684
target_size=(150, 150),
685
batch_size=32,
686
class_mode='categorical',
687
subset='validation' # Use validation subset
688
)
689
```