0
# Image Processing
1
2
DataVec provides comprehensive image processing capabilities for computer vision and deep learning workflows. It includes native image loading, format conversion, dataset loaders, and specialized record readers for image data.
3
4
## Capabilities
5
6
### Native Image Loading
7
8
High-performance image loading with support for multiple formats and automatic preprocessing for machine learning pipelines.
9
10
```java { .api }
11
public class NativeImageLoader {
12
public NativeImageLoader();
13
public NativeImageLoader(long height, long width);
14
public NativeImageLoader(long height, long width, long channels);
15
public NativeImageLoader(long height, long width, long channels, boolean centerCropIfNeeded);
16
17
public INDArray asMatrix(File file) throws IOException;
18
public INDArray asMatrix(InputStream inputStream) throws IOException;
19
public INDArray asMatrix(String filename) throws IOException;
20
public INDArray asRowVector(File file) throws IOException;
21
public INDArray asRowVector(InputStream inputStream) throws IOException;
22
}
23
```
24
25
**Constructor Parameters:**
26
- `height`, `width` - Target dimensions for image resizing
27
- `channels` - Number of color channels (1 for grayscale, 3 for RGB, 4 for RGBA)
28
- `centerCropIfNeeded` - Whether to center crop images that don't match aspect ratio
29
30
**Usage Examples:**
31
32
```java
33
// Load image with specific dimensions
34
NativeImageLoader loader = new NativeImageLoader(224, 224, 3); // 224x224 RGB
35
INDArray imageMatrix = loader.asMatrix(new File("image.jpg"));
36
37
// Get image dimensions
38
long[] shape = imageMatrix.shape(); // [channels, height, width]
39
System.out.println("Image shape: " + Arrays.toString(shape));
40
41
// Load as row vector (flattened)
42
INDArray rowVector = loader.asRowVector(new File("image.jpg"));
43
// Shape: [1, channels * height * width]
44
45
// Load from input stream
46
InputStream imageStream = new FileInputStream("image.png");
47
INDArray streamMatrix = loader.asMatrix(imageStream);
48
```
49
50
### Image Record Reader
51
52
Specialized record reader for processing directories of images with automatic label generation from file paths.
53
54
```java { .api }
55
public class ImageRecordReader implements RecordReader {
56
public ImageRecordReader(long height, long width, long channels, PathLabelGenerator labelGenerator);
57
public ImageRecordReader(long height, long width, long channels, PathLabelGenerator labelGenerator, boolean appendLabel);
58
public ImageRecordReader(long height, long width, long channels, List<String> labels);
59
}
60
```
61
62
**Usage Examples:**
63
64
```java
65
// Read images with parent directory as label
66
PathLabelGenerator labelGenerator = new ParentPathLabelGenerator();
67
ImageRecordReader imageReader = new ImageRecordReader(64, 64, 3, labelGenerator);
68
69
// Directory structure:
70
// /data/cats/cat1.jpg, cat2.jpg, ...
71
// /data/dogs/dog1.jpg, dog2.jpg, ...
72
FileSplit fileSplit = new FileSplit(new File("/data"), new String[]{"jpg", "png"}, true);
73
imageReader.initialize(fileSplit);
74
75
while (imageReader.hasNext()) {
76
List<Writable> record = imageReader.next();
77
// record[0] = NDArrayWritable containing image data
78
// record[1] = IntWritable containing label index
79
80
NDArrayWritable imageData = (NDArrayWritable) record.get(0);
81
IntWritable label = (IntWritable) record.get(1);
82
83
INDArray image = imageData.get(); // [3, 64, 64] tensor
84
int classLabel = label.get(); // 0 for cats, 1 for dogs
85
}
86
```
87
88
### Dataset Loaders
89
90
Specialized loaders for common computer vision datasets.
91
92
#### CIFAR Loader
93
94
```java { .api }
95
public class CifarLoader {
96
public static final int NUM_LABELS = 10;
97
public static final int HEIGHT = 32;
98
public static final int WIDTH = 32;
99
public static final int CHANNELS = 3;
100
101
public CifarLoader();
102
public CifarLoader(String dataUrl);
103
public CifarLoader(String dataUrl, String localCachePath);
104
105
public DataSetIterator getDataSetIterator(int batchSize) throws IOException;
106
public DataSetIterator getDataSetIterator(int batchSize, boolean train) throws IOException;
107
}
108
```
109
110
**Usage Example:**
111
112
```java
113
// Load CIFAR-10 dataset
114
CifarLoader cifarLoader = new CifarLoader();
115
116
// Get training data iterator
117
DataSetIterator trainIterator = cifarLoader.getDataSetIterator(32, true); // batch size 32
118
DataSetIterator testIterator = cifarLoader.getDataSetIterator(32, false);
119
120
// Process batches
121
while (trainIterator.hasNext()) {
122
DataSet batch = trainIterator.next();
123
INDArray features = batch.getFeatures(); // [32, 3, 32, 32]
124
INDArray labels = batch.getLabels(); // [32, 10] one-hot encoded
125
}
126
```
127
128
#### LFW (Labeled Faces in the Wild) Loader
129
130
```java { .api }
131
public class LFWLoader {
132
public static final int NUM_LABELS = 5749;
133
public static final int HEIGHT = 250;
134
public static final int WIDTH = 250;
135
public static final int CHANNELS = 3;
136
137
public LFWLoader();
138
public LFWLoader(String dataUrl);
139
public LFWLoader(String dataUrl, String localCachePath);
140
141
public DataSetIterator getDataSetIterator(int batchSize, int numExamples) throws IOException;
142
public DataSetIterator getDataSetIterator(int batchSize, int numExamples, int[] imgDim) throws IOException;
143
}
144
```
145
146
**Usage Example:**
147
148
```java
149
// Load LFW dataset for face recognition
150
LFWLoader lfwLoader = new LFWLoader();
151
152
// Custom image dimensions
153
int[] imageDimensions = {128, 128, 3}; // Resize to 128x128 RGB
154
DataSetIterator iterator = lfwLoader.getDataSetIterator(16, 1000, imageDimensions);
155
156
while (iterator.hasNext()) {
157
DataSet batch = iterator.next();
158
INDArray faceImages = batch.getFeatures(); // [16, 3, 128, 128]
159
INDArray identityLabels = batch.getLabels(); // [16, 5749] one-hot
160
}
161
```
162
163
### Image Transformations
164
165
Support for image preprocessing and augmentation through the ImageTransform interface.
166
167
```java { .api }
168
public interface ImageTransform {
169
INDArray transform(INDArray image, Random random);
170
INDArray transform(INDArray image);
171
}
172
```
173
174
**Common Transform Implementations:**
175
176
```java
177
// Example usage with transformations (implementations vary)
178
ImageTransform[] transforms = {
179
new FlipImageTransform(0.5), // 50% chance horizontal flip
180
new ScaleImageTransform(0.1), // Scale by ±10%
181
new RotateImageTransform(15) // Rotate by ±15 degrees
182
};
183
184
// Apply transformations in ImageRecordReader
185
ImageRecordReader reader = new ImageRecordReader(224, 224, 3, labelGenerator);
186
// Configure transforms (implementation-specific)
187
```
188
189
### Path Label Generators
190
191
Automatic label extraction from file paths for supervised learning.
192
193
```java { .api }
194
public interface PathLabelGenerator {
195
Writable getLabelForPath(String path);
196
Writable getLabelForPath(URI uri);
197
}
198
199
public class ParentPathLabelGenerator implements PathLabelGenerator {
200
public ParentPathLabelGenerator();
201
}
202
203
public class PatternPathLabelGenerator implements PathLabelGenerator {
204
public PatternPathLabelGenerator(String pattern, int groupIndex);
205
}
206
```
207
208
**Usage Examples:**
209
210
```java
211
// Use parent directory name as label
212
PathLabelGenerator parentLabels = new ParentPathLabelGenerator();
213
// /data/cats/image.jpg -> label: "cats"
214
// /data/dogs/image.jpg -> label: "dogs"
215
216
// Use regex pattern to extract labels
217
PathLabelGenerator patternLabels = new PatternPathLabelGenerator("class_(\\d+)_", 1);
218
// class_0_image.jpg -> label: "0"
219
// class_1_image.jpg -> label: "1"
220
221
Writable label = parentLabels.getLabelForPath("/data/cats/cat001.jpg");
222
String labelString = label.toString(); // "cats"
223
```
224
225
## Integration Patterns
226
227
### With DataSetIterator
228
229
```java
230
// Create image dataset iterator
231
PathLabelGenerator labelGen = new ParentPathLabelGenerator();
232
ImageRecordReader imageReader = new ImageRecordReader(224, 224, 3, labelGen);
233
FileSplit imageSplit = new FileSplit(new File("/path/to/images"), new String[]{"jpg", "png"}, true);
234
imageReader.initialize(imageSplit);
235
236
// Convert to DataSetIterator for DL4J
237
RecordReaderDataSetIterator dataSetIterator = new RecordReaderDataSetIterator(
238
imageReader,
239
32, // batch size
240
1, // label index (1 for images, 0 for label)
241
10 // number of classes
242
);
243
244
// Train neural network
245
MultiLayerNetwork model = new MultiLayerNetwork(config);
246
model.fit(dataSetIterator);
247
```
248
249
### Preprocessing Pipeline
250
251
```java
252
// Complete image preprocessing pipeline
253
public class ImagePreprocessor {
254
private final NativeImageLoader loader;
255
private final ImageTransform[] transforms;
256
257
public ImagePreprocessor(int height, int width, int channels, ImageTransform... transforms) {
258
this.loader = new NativeImageLoader(height, width, channels);
259
this.transforms = transforms;
260
}
261
262
public INDArray preprocess(File imageFile) throws IOException {
263
INDArray image = loader.asMatrix(imageFile);
264
265
// Apply transformations
266
for (ImageTransform transform : transforms) {
267
image = transform.transform(image);
268
}
269
270
// Normalize pixel values to [0, 1]
271
image.divi(255.0);
272
273
return image;
274
}
275
}
276
```
277
278
### Batch Processing
279
280
```java
281
// Process images in batches
282
List<File> imageFiles = Arrays.asList(/* image files */);
283
int batchSize = 32;
284
NativeImageLoader loader = new NativeImageLoader(224, 224, 3);
285
286
for (int i = 0; i < imageFiles.size(); i += batchSize) {
287
List<File> batch = imageFiles.subList(i, Math.min(i + batchSize, imageFiles.size()));
288
289
List<INDArray> batchImages = new ArrayList<>();
290
for (File imageFile : batch) {
291
INDArray image = loader.asMatrix(imageFile);
292
batchImages.add(image);
293
}
294
295
// Stack images into batch tensor
296
INDArray batchTensor = Nd4j.stack(0, batchImages.toArray(new INDArray[0]));
297
// Shape: [batchSize, channels, height, width]
298
299
// Process batch...
300
}
301
```
302
303
## Error Handling
304
305
```java
306
try {
307
NativeImageLoader loader = new NativeImageLoader(224, 224, 3);
308
INDArray image = loader.asMatrix(new File("image.jpg"));
309
} catch (IOException e) {
310
// Handle file read errors
311
System.err.println("Error loading image: " + e.getMessage());
312
} catch (IllegalArgumentException e) {
313
// Handle invalid image dimensions or unsupported format
314
System.err.println("Invalid image: " + e.getMessage());
315
} catch (OutOfMemoryError e) {
316
// Handle memory issues with large images
317
System.err.println("Out of memory loading image, try smaller dimensions");
318
}
319
```
320
321
## Performance Considerations
322
323
### Memory Management
324
325
```java
326
// For large datasets, consider streaming
327
ImageRecordReader reader = new ImageRecordReader(224, 224, 3, labelGenerator);
328
reader.initialize(imageSplit);
329
330
// Process one image at a time to reduce memory usage
331
while (reader.hasNext()) {
332
List<Writable> record = reader.next();
333
INDArray image = ((NDArrayWritable) record.get(0)).get();
334
335
// Process immediately and allow GC
336
processImage(image);
337
338
// Explicit cleanup for large datasets
339
if (needsCleanup()) {
340
System.gc();
341
}
342
}
343
```
344
345
### Parallel Processing
346
347
```java
348
// Use parallel streams for CPU-intensive preprocessing
349
List<File> imageFiles = getImageFiles();
350
351
List<INDArray> processedImages = imageFiles.parallelStream()
352
.map(file -> {
353
try {
354
return loader.asMatrix(file);
355
} catch (IOException e) {
356
throw new RuntimeException(e);
357
}
358
})
359
.collect(Collectors.toList());
360
```
361
362
## Types
363
364
### Core Interfaces
365
366
```java { .api }
367
public interface ImageTransform {
368
INDArray transform(INDArray image, Random random);
369
INDArray transform(INDArray image);
370
}
371
372
public interface PathLabelGenerator {
373
Writable getLabelForPath(String path);
374
Writable getLabelForPath(URI uri);
375
}
376
```
377
378
### Image Processing Classes
379
380
```java { .api }
381
// Core image loading
382
public class NativeImageLoader;
383
public class ImageRecordReader implements RecordReader;
384
385
// Dataset loaders
386
public class CifarLoader;
387
public class LFWLoader;
388
389
// Label generators
390
public class ParentPathLabelGenerator implements PathLabelGenerator;
391
public class PatternPathLabelGenerator implements PathLabelGenerator;
392
```