ETL library for machine learning data preprocessing across diverse formats including HDFS, Spark, Images, Video, Audio, CSV, and Excel
—
DataVec provides comprehensive image processing capabilities for computer vision and deep learning workflows. It includes native image loading, format conversion, dataset loaders, and specialized record readers for image data.
High-performance image loading with support for multiple formats and automatic preprocessing for machine learning pipelines.
public class NativeImageLoader {
public NativeImageLoader();
public NativeImageLoader(long height, long width);
public NativeImageLoader(long height, long width, long channels);
public NativeImageLoader(long height, long width, long channels, boolean centerCropIfNeeded);
public INDArray asMatrix(File file) throws IOException;
public INDArray asMatrix(InputStream inputStream) throws IOException;
public INDArray asMatrix(String filename) throws IOException;
public INDArray asRowVector(File file) throws IOException;
public INDArray asRowVector(InputStream inputStream) throws IOException;
}Constructor Parameters:
height, width - Target dimensions for image resizingchannels - Number of color channels (1 for grayscale, 3 for RGB, 4 for RGBA)centerCropIfNeeded - Whether to center crop images that don't match aspect ratioUsage Examples:
// Load image with specific dimensions
NativeImageLoader loader = new NativeImageLoader(224, 224, 3); // 224x224 RGB
INDArray imageMatrix = loader.asMatrix(new File("image.jpg"));
// Get image dimensions
long[] shape = imageMatrix.shape(); // [channels, height, width]
System.out.println("Image shape: " + Arrays.toString(shape));
// Load as row vector (flattened)
INDArray rowVector = loader.asRowVector(new File("image.jpg"));
// Shape: [1, channels * height * width]
// Load from input stream
InputStream imageStream = new FileInputStream("image.png");
INDArray streamMatrix = loader.asMatrix(imageStream);Specialized record reader for processing directories of images with automatic label generation from file paths.
public class ImageRecordReader implements RecordReader {
public ImageRecordReader(long height, long width, long channels, PathLabelGenerator labelGenerator);
public ImageRecordReader(long height, long width, long channels, PathLabelGenerator labelGenerator, boolean appendLabel);
public ImageRecordReader(long height, long width, long channels, List<String> labels);
}Usage Examples:
// Read images with parent directory as label
PathLabelGenerator labelGenerator = new ParentPathLabelGenerator();
ImageRecordReader imageReader = new ImageRecordReader(64, 64, 3, labelGenerator);
// Directory structure:
// /data/cats/cat1.jpg, cat2.jpg, ...
// /data/dogs/dog1.jpg, dog2.jpg, ...
FileSplit fileSplit = new FileSplit(new File("/data"), new String[]{"jpg", "png"}, true);
imageReader.initialize(fileSplit);
while (imageReader.hasNext()) {
List<Writable> record = imageReader.next();
// record[0] = NDArrayWritable containing image data
// record[1] = IntWritable containing label index
NDArrayWritable imageData = (NDArrayWritable) record.get(0);
IntWritable label = (IntWritable) record.get(1);
INDArray image = imageData.get(); // [3, 64, 64] tensor
int classLabel = label.get(); // 0 for cats, 1 for dogs
}Specialized loaders for common computer vision datasets.
public class CifarLoader {
public static final int NUM_LABELS = 10;
public static final int HEIGHT = 32;
public static final int WIDTH = 32;
public static final int CHANNELS = 3;
public CifarLoader();
public CifarLoader(String dataUrl);
public CifarLoader(String dataUrl, String localCachePath);
public DataSetIterator getDataSetIterator(int batchSize) throws IOException;
public DataSetIterator getDataSetIterator(int batchSize, boolean train) throws IOException;
}Usage Example:
// Load CIFAR-10 dataset
CifarLoader cifarLoader = new CifarLoader();
// Get training data iterator
DataSetIterator trainIterator = cifarLoader.getDataSetIterator(32, true); // batch size 32
DataSetIterator testIterator = cifarLoader.getDataSetIterator(32, false);
// Process batches
while (trainIterator.hasNext()) {
DataSet batch = trainIterator.next();
INDArray features = batch.getFeatures(); // [32, 3, 32, 32]
INDArray labels = batch.getLabels(); // [32, 10] one-hot encoded
}public class LFWLoader {
public static final int NUM_LABELS = 5749;
public static final int HEIGHT = 250;
public static final int WIDTH = 250;
public static final int CHANNELS = 3;
public LFWLoader();
public LFWLoader(String dataUrl);
public LFWLoader(String dataUrl, String localCachePath);
public DataSetIterator getDataSetIterator(int batchSize, int numExamples) throws IOException;
public DataSetIterator getDataSetIterator(int batchSize, int numExamples, int[] imgDim) throws IOException;
}Usage Example:
// Load LFW dataset for face recognition
LFWLoader lfwLoader = new LFWLoader();
// Custom image dimensions
int[] imageDimensions = {128, 128, 3}; // Resize to 128x128 RGB
DataSetIterator iterator = lfwLoader.getDataSetIterator(16, 1000, imageDimensions);
while (iterator.hasNext()) {
DataSet batch = iterator.next();
INDArray faceImages = batch.getFeatures(); // [16, 3, 128, 128]
INDArray identityLabels = batch.getLabels(); // [16, 5749] one-hot
}Support for image preprocessing and augmentation through the ImageTransform interface.
public interface ImageTransform {
INDArray transform(INDArray image, Random random);
INDArray transform(INDArray image);
}Common Transform Implementations:
// Example usage with transformations (implementations vary)
ImageTransform[] transforms = {
new FlipImageTransform(0.5), // 50% chance horizontal flip
new ScaleImageTransform(0.1), // Scale by ±10%
new RotateImageTransform(15) // Rotate by ±15 degrees
};
// Apply transformations in ImageRecordReader
ImageRecordReader reader = new ImageRecordReader(224, 224, 3, labelGenerator);
// Configure transforms (implementation-specific)Automatic label extraction from file paths for supervised learning.
public interface PathLabelGenerator {
Writable getLabelForPath(String path);
Writable getLabelForPath(URI uri);
}
public class ParentPathLabelGenerator implements PathLabelGenerator {
public ParentPathLabelGenerator();
}
public class PatternPathLabelGenerator implements PathLabelGenerator {
public PatternPathLabelGenerator(String pattern, int groupIndex);
}Usage Examples:
// Use parent directory name as label
PathLabelGenerator parentLabels = new ParentPathLabelGenerator();
// /data/cats/image.jpg -> label: "cats"
// /data/dogs/image.jpg -> label: "dogs"
// Use regex pattern to extract labels
PathLabelGenerator patternLabels = new PatternPathLabelGenerator("class_(\\d+)_", 1);
// class_0_image.jpg -> label: "0"
// class_1_image.jpg -> label: "1"
Writable label = parentLabels.getLabelForPath("/data/cats/cat001.jpg");
String labelString = label.toString(); // "cats"// Create image dataset iterator
PathLabelGenerator labelGen = new ParentPathLabelGenerator();
ImageRecordReader imageReader = new ImageRecordReader(224, 224, 3, labelGen);
FileSplit imageSplit = new FileSplit(new File("/path/to/images"), new String[]{"jpg", "png"}, true);
imageReader.initialize(imageSplit);
// Convert to DataSetIterator for DL4J
RecordReaderDataSetIterator dataSetIterator = new RecordReaderDataSetIterator(
imageReader,
32, // batch size
1, // label index (1 for images, 0 for label)
10 // number of classes
);
// Train neural network
MultiLayerNetwork model = new MultiLayerNetwork(config);
model.fit(dataSetIterator);// Complete image preprocessing pipeline
public class ImagePreprocessor {
private final NativeImageLoader loader;
private final ImageTransform[] transforms;
public ImagePreprocessor(int height, int width, int channels, ImageTransform... transforms) {
this.loader = new NativeImageLoader(height, width, channels);
this.transforms = transforms;
}
public INDArray preprocess(File imageFile) throws IOException {
INDArray image = loader.asMatrix(imageFile);
// Apply transformations
for (ImageTransform transform : transforms) {
image = transform.transform(image);
}
// Normalize pixel values to [0, 1]
image.divi(255.0);
return image;
}
}// Process images in batches
List<File> imageFiles = Arrays.asList(/* image files */);
int batchSize = 32;
NativeImageLoader loader = new NativeImageLoader(224, 224, 3);
for (int i = 0; i < imageFiles.size(); i += batchSize) {
List<File> batch = imageFiles.subList(i, Math.min(i + batchSize, imageFiles.size()));
List<INDArray> batchImages = new ArrayList<>();
for (File imageFile : batch) {
INDArray image = loader.asMatrix(imageFile);
batchImages.add(image);
}
// Stack images into batch tensor
INDArray batchTensor = Nd4j.stack(0, batchImages.toArray(new INDArray[0]));
// Shape: [batchSize, channels, height, width]
// Process batch...
}try {
NativeImageLoader loader = new NativeImageLoader(224, 224, 3);
INDArray image = loader.asMatrix(new File("image.jpg"));
} catch (IOException e) {
// Handle file read errors
System.err.println("Error loading image: " + e.getMessage());
} catch (IllegalArgumentException e) {
// Handle invalid image dimensions or unsupported format
System.err.println("Invalid image: " + e.getMessage());
} catch (OutOfMemoryError e) {
// Handle memory issues with large images
System.err.println("Out of memory loading image, try smaller dimensions");
}// For large datasets, consider streaming
ImageRecordReader reader = new ImageRecordReader(224, 224, 3, labelGenerator);
reader.initialize(imageSplit);
// Process one image at a time to reduce memory usage
while (reader.hasNext()) {
List<Writable> record = reader.next();
INDArray image = ((NDArrayWritable) record.get(0)).get();
// Process immediately and allow GC
processImage(image);
// Explicit cleanup for large datasets
if (needsCleanup()) {
System.gc();
}
}// Use parallel streams for CPU-intensive preprocessing
List<File> imageFiles = getImageFiles();
List<INDArray> processedImages = imageFiles.parallelStream()
.map(file -> {
try {
return loader.asMatrix(file);
} catch (IOException e) {
throw new RuntimeException(e);
}
})
.collect(Collectors.toList());public interface ImageTransform {
INDArray transform(INDArray image, Random random);
INDArray transform(INDArray image);
}
public interface PathLabelGenerator {
Writable getLabelForPath(String path);
Writable getLabelForPath(URI uri);
}// Core image loading
public class NativeImageLoader;
public class ImageRecordReader implements RecordReader;
// Dataset loaders
public class CifarLoader;
public class LFWLoader;
// Label generators
public class ParentPathLabelGenerator implements PathLabelGenerator;
public class PatternPathLabelGenerator implements PathLabelGenerator;Install with Tessl CLI
npx tessl i tessl/maven-org-datavec--datavec-api