CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-org-datavec--datavec-api

ETL library for machine learning data preprocessing across diverse formats including HDFS, Spark, Images, Video, Audio, CSV, and Excel

Pending
Overview
Eval results
Files

image-processing.mddocs/

Image Processing

DataVec provides comprehensive image processing capabilities for computer vision and deep learning workflows. It includes native image loading, format conversion, dataset loaders, and specialized record readers for image data.

Capabilities

Native Image Loading

High-performance image loading with support for multiple formats and automatic preprocessing for machine learning pipelines.

public class NativeImageLoader {
    public NativeImageLoader();
    public NativeImageLoader(long height, long width);
    public NativeImageLoader(long height, long width, long channels);
    public NativeImageLoader(long height, long width, long channels, boolean centerCropIfNeeded);
    
    public INDArray asMatrix(File file) throws IOException;
    public INDArray asMatrix(InputStream inputStream) throws IOException;
    public INDArray asMatrix(String filename) throws IOException;
    public INDArray asRowVector(File file) throws IOException;
    public INDArray asRowVector(InputStream inputStream) throws IOException;
}

Constructor Parameters:

  • height, width - Target dimensions for image resizing
  • channels - Number of color channels (1 for grayscale, 3 for RGB, 4 for RGBA)
  • centerCropIfNeeded - Whether to center crop images that don't match aspect ratio

Usage Examples:

// Load image with specific dimensions
NativeImageLoader loader = new NativeImageLoader(224, 224, 3); // 224x224 RGB
INDArray imageMatrix = loader.asMatrix(new File("image.jpg"));

// Get image dimensions
long[] shape = imageMatrix.shape(); // [channels, height, width]
System.out.println("Image shape: " + Arrays.toString(shape));

// Load as row vector (flattened)
INDArray rowVector = loader.asRowVector(new File("image.jpg"));
// Shape: [1, channels * height * width]

// Load from input stream
InputStream imageStream = new FileInputStream("image.png");
INDArray streamMatrix = loader.asMatrix(imageStream);

Image Record Reader

Specialized record reader for processing directories of images with automatic label generation from file paths.

public class ImageRecordReader implements RecordReader {
    public ImageRecordReader(long height, long width, long channels, PathLabelGenerator labelGenerator);
    public ImageRecordReader(long height, long width, long channels, PathLabelGenerator labelGenerator, boolean appendLabel);
    public ImageRecordReader(long height, long width, long channels, List<String> labels);
}

Usage Examples:

// Read images with parent directory as label
PathLabelGenerator labelGenerator = new ParentPathLabelGenerator();
ImageRecordReader imageReader = new ImageRecordReader(64, 64, 3, labelGenerator);

// Directory structure:
// /data/cats/cat1.jpg, cat2.jpg, ...
// /data/dogs/dog1.jpg, dog2.jpg, ...
FileSplit fileSplit = new FileSplit(new File("/data"), new String[]{"jpg", "png"}, true);
imageReader.initialize(fileSplit);

while (imageReader.hasNext()) {
    List<Writable> record = imageReader.next();
    // record[0] = NDArrayWritable containing image data
    // record[1] = IntWritable containing label index
    
    NDArrayWritable imageData = (NDArrayWritable) record.get(0);
    IntWritable label = (IntWritable) record.get(1);
    
    INDArray image = imageData.get(); // [3, 64, 64] tensor
    int classLabel = label.get();     // 0 for cats, 1 for dogs
}

Dataset Loaders

Specialized loaders for common computer vision datasets.

CIFAR Loader

public class CifarLoader {
    public static final int NUM_LABELS = 10;
    public static final int HEIGHT = 32;
    public static final int WIDTH = 32;
    public static final int CHANNELS = 3;
    
    public CifarLoader();
    public CifarLoader(String dataUrl);
    public CifarLoader(String dataUrl, String localCachePath);
    
    public DataSetIterator getDataSetIterator(int batchSize) throws IOException;
    public DataSetIterator getDataSetIterator(int batchSize, boolean train) throws IOException;
}

Usage Example:

// Load CIFAR-10 dataset
CifarLoader cifarLoader = new CifarLoader();

// Get training data iterator
DataSetIterator trainIterator = cifarLoader.getDataSetIterator(32, true);  // batch size 32
DataSetIterator testIterator = cifarLoader.getDataSetIterator(32, false);

// Process batches
while (trainIterator.hasNext()) {
    DataSet batch = trainIterator.next();
    INDArray features = batch.getFeatures(); // [32, 3, 32, 32]
    INDArray labels = batch.getLabels();     // [32, 10] one-hot encoded
}

LFW (Labeled Faces in the Wild) Loader

public class LFWLoader {
    public static final int NUM_LABELS = 5749;
    public static final int HEIGHT = 250;
    public static final int WIDTH = 250;
    public static final int CHANNELS = 3;
    
    public LFWLoader();
    public LFWLoader(String dataUrl);
    public LFWLoader(String dataUrl, String localCachePath);
    
    public DataSetIterator getDataSetIterator(int batchSize, int numExamples) throws IOException;
    public DataSetIterator getDataSetIterator(int batchSize, int numExamples, int[] imgDim) throws IOException;
}

Usage Example:

// Load LFW dataset for face recognition
LFWLoader lfwLoader = new LFWLoader();

// Custom image dimensions
int[] imageDimensions = {128, 128, 3}; // Resize to 128x128 RGB
DataSetIterator iterator = lfwLoader.getDataSetIterator(16, 1000, imageDimensions);

while (iterator.hasNext()) {
    DataSet batch = iterator.next();
    INDArray faceImages = batch.getFeatures(); // [16, 3, 128, 128]
    INDArray identityLabels = batch.getLabels(); // [16, 5749] one-hot
}

Image Transformations

Support for image preprocessing and augmentation through the ImageTransform interface.

public interface ImageTransform {
    INDArray transform(INDArray image, Random random);
    INDArray transform(INDArray image);
}

Common Transform Implementations:

// Example usage with transformations (implementations vary)
ImageTransform[] transforms = {
    new FlipImageTransform(0.5), // 50% chance horizontal flip
    new ScaleImageTransform(0.1), // Scale by ±10%
    new RotateImageTransform(15)  // Rotate by ±15 degrees
};

// Apply transformations in ImageRecordReader
ImageRecordReader reader = new ImageRecordReader(224, 224, 3, labelGenerator);
// Configure transforms (implementation-specific)

Path Label Generators

Automatic label extraction from file paths for supervised learning.

public interface PathLabelGenerator {
    Writable getLabelForPath(String path);
    Writable getLabelForPath(URI uri);
}

public class ParentPathLabelGenerator implements PathLabelGenerator {
    public ParentPathLabelGenerator();
}

public class PatternPathLabelGenerator implements PathLabelGenerator {
    public PatternPathLabelGenerator(String pattern, int groupIndex);
}

Usage Examples:

// Use parent directory name as label
PathLabelGenerator parentLabels = new ParentPathLabelGenerator();
// /data/cats/image.jpg -> label: "cats"
// /data/dogs/image.jpg -> label: "dogs"

// Use regex pattern to extract labels
PathLabelGenerator patternLabels = new PatternPathLabelGenerator("class_(\\d+)_", 1);
// class_0_image.jpg -> label: "0"
// class_1_image.jpg -> label: "1"

Writable label = parentLabels.getLabelForPath("/data/cats/cat001.jpg");
String labelString = label.toString(); // "cats"

Integration Patterns

With DataSetIterator

// Create image dataset iterator
PathLabelGenerator labelGen = new ParentPathLabelGenerator();
ImageRecordReader imageReader = new ImageRecordReader(224, 224, 3, labelGen);
FileSplit imageSplit = new FileSplit(new File("/path/to/images"), new String[]{"jpg", "png"}, true);
imageReader.initialize(imageSplit);

// Convert to DataSetIterator for DL4J
RecordReaderDataSetIterator dataSetIterator = new RecordReaderDataSetIterator(
    imageReader,
    32,        // batch size
    1,         // label index (1 for images, 0 for label)
    10         // number of classes
);

// Train neural network
MultiLayerNetwork model = new MultiLayerNetwork(config);
model.fit(dataSetIterator);

Preprocessing Pipeline

// Complete image preprocessing pipeline
public class ImagePreprocessor {
    private final NativeImageLoader loader;
    private final ImageTransform[] transforms;
    
    public ImagePreprocessor(int height, int width, int channels, ImageTransform... transforms) {
        this.loader = new NativeImageLoader(height, width, channels);
        this.transforms = transforms;
    }
    
    public INDArray preprocess(File imageFile) throws IOException {
        INDArray image = loader.asMatrix(imageFile);
        
        // Apply transformations
        for (ImageTransform transform : transforms) {
            image = transform.transform(image);
        }
        
        // Normalize pixel values to [0, 1]
        image.divi(255.0);
        
        return image;
    }
}

Batch Processing

// Process images in batches
List<File> imageFiles = Arrays.asList(/* image files */);
int batchSize = 32;
NativeImageLoader loader = new NativeImageLoader(224, 224, 3);

for (int i = 0; i < imageFiles.size(); i += batchSize) {
    List<File> batch = imageFiles.subList(i, Math.min(i + batchSize, imageFiles.size()));
    
    List<INDArray> batchImages = new ArrayList<>();
    for (File imageFile : batch) {
        INDArray image = loader.asMatrix(imageFile);
        batchImages.add(image);
    }
    
    // Stack images into batch tensor
    INDArray batchTensor = Nd4j.stack(0, batchImages.toArray(new INDArray[0]));
    // Shape: [batchSize, channels, height, width]
    
    // Process batch...
}

Error Handling

try {
    NativeImageLoader loader = new NativeImageLoader(224, 224, 3);
    INDArray image = loader.asMatrix(new File("image.jpg"));
} catch (IOException e) {
    // Handle file read errors
    System.err.println("Error loading image: " + e.getMessage());
} catch (IllegalArgumentException e) {
    // Handle invalid image dimensions or unsupported format
    System.err.println("Invalid image: " + e.getMessage());
} catch (OutOfMemoryError e) {
    // Handle memory issues with large images
    System.err.println("Out of memory loading image, try smaller dimensions");
}

Performance Considerations

Memory Management

// For large datasets, consider streaming
ImageRecordReader reader = new ImageRecordReader(224, 224, 3, labelGenerator);
reader.initialize(imageSplit);

// Process one image at a time to reduce memory usage
while (reader.hasNext()) {
    List<Writable> record = reader.next();
    INDArray image = ((NDArrayWritable) record.get(0)).get();
    
    // Process immediately and allow GC
    processImage(image);
    
    // Explicit cleanup for large datasets
    if (needsCleanup()) {
        System.gc();
    }
}

Parallel Processing

// Use parallel streams for CPU-intensive preprocessing
List<File> imageFiles = getImageFiles();

List<INDArray> processedImages = imageFiles.parallelStream()
    .map(file -> {
        try {
            return loader.asMatrix(file);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    })
    .collect(Collectors.toList());

Types

Core Interfaces

public interface ImageTransform {
    INDArray transform(INDArray image, Random random);
    INDArray transform(INDArray image);
}

public interface PathLabelGenerator {
    Writable getLabelForPath(String path);
    Writable getLabelForPath(URI uri);
}

Image Processing Classes

// Core image loading
public class NativeImageLoader;
public class ImageRecordReader implements RecordReader;

// Dataset loaders
public class CifarLoader;
public class LFWLoader;

// Label generators
public class ParentPathLabelGenerator implements PathLabelGenerator;
public class PatternPathLabelGenerator implements PathLabelGenerator;

Install with Tessl CLI

npx tessl i tessl/maven-org-datavec--datavec-api

docs

data-types.md

image-processing.md

index.md

input-sources.md

record-readers.md

transforms.md

tile.json