or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

data-types.mdimage-processing.mdindex.mdinput-sources.mdrecord-readers.mdtransforms.md

image-processing.mddocs/

0

# Image Processing

1

2

DataVec provides comprehensive image processing capabilities for computer vision and deep learning workflows. It includes native image loading, format conversion, dataset loaders, and specialized record readers for image data.

3

4

## Capabilities

5

6

### Native Image Loading

7

8

High-performance image loading with support for multiple formats and automatic preprocessing for machine learning pipelines.

9

10

```java { .api }

11

public class NativeImageLoader {

12

public NativeImageLoader();

13

public NativeImageLoader(long height, long width);

14

public NativeImageLoader(long height, long width, long channels);

15

public NativeImageLoader(long height, long width, long channels, boolean centerCropIfNeeded);

16

17

public INDArray asMatrix(File file) throws IOException;

18

public INDArray asMatrix(InputStream inputStream) throws IOException;

19

public INDArray asMatrix(String filename) throws IOException;

20

public INDArray asRowVector(File file) throws IOException;

21

public INDArray asRowVector(InputStream inputStream) throws IOException;

22

}

23

```

24

25

**Constructor Parameters:**

26

- `height`, `width` - Target dimensions for image resizing

27

- `channels` - Number of color channels (1 for grayscale, 3 for RGB, 4 for RGBA)

28

- `centerCropIfNeeded` - Whether to center crop images that don't match aspect ratio

29

30

**Usage Examples:**

31

32

```java

33

// Load image with specific dimensions

34

NativeImageLoader loader = new NativeImageLoader(224, 224, 3); // 224x224 RGB

35

INDArray imageMatrix = loader.asMatrix(new File("image.jpg"));

36

37

// Get image dimensions

38

long[] shape = imageMatrix.shape(); // [channels, height, width]

39

System.out.println("Image shape: " + Arrays.toString(shape));

40

41

// Load as row vector (flattened)

42

INDArray rowVector = loader.asRowVector(new File("image.jpg"));

43

// Shape: [1, channels * height * width]

44

45

// Load from input stream

46

InputStream imageStream = new FileInputStream("image.png");

47

INDArray streamMatrix = loader.asMatrix(imageStream);

48

```

49

50

### Image Record Reader

51

52

Specialized record reader for processing directories of images with automatic label generation from file paths.

53

54

```java { .api }

55

public class ImageRecordReader implements RecordReader {

56

public ImageRecordReader(long height, long width, long channels, PathLabelGenerator labelGenerator);

57

public ImageRecordReader(long height, long width, long channels, PathLabelGenerator labelGenerator, boolean appendLabel);

58

public ImageRecordReader(long height, long width, long channels, List<String> labels);

59

}

60

```

61

62

**Usage Examples:**

63

64

```java

65

// Read images with parent directory as label

66

PathLabelGenerator labelGenerator = new ParentPathLabelGenerator();

67

ImageRecordReader imageReader = new ImageRecordReader(64, 64, 3, labelGenerator);

68

69

// Directory structure:

70

// /data/cats/cat1.jpg, cat2.jpg, ...

71

// /data/dogs/dog1.jpg, dog2.jpg, ...

72

FileSplit fileSplit = new FileSplit(new File("/data"), new String[]{"jpg", "png"}, true);

73

imageReader.initialize(fileSplit);

74

75

while (imageReader.hasNext()) {

76

List<Writable> record = imageReader.next();

77

// record[0] = NDArrayWritable containing image data

78

// record[1] = IntWritable containing label index

79

80

NDArrayWritable imageData = (NDArrayWritable) record.get(0);

81

IntWritable label = (IntWritable) record.get(1);

82

83

INDArray image = imageData.get(); // [3, 64, 64] tensor

84

int classLabel = label.get(); // 0 for cats, 1 for dogs

85

}

86

```

87

88

### Dataset Loaders

89

90

Specialized loaders for common computer vision datasets.

91

92

#### CIFAR Loader

93

94

```java { .api }

95

public class CifarLoader {

96

public static final int NUM_LABELS = 10;

97

public static final int HEIGHT = 32;

98

public static final int WIDTH = 32;

99

public static final int CHANNELS = 3;

100

101

public CifarLoader();

102

public CifarLoader(String dataUrl);

103

public CifarLoader(String dataUrl, String localCachePath);

104

105

public DataSetIterator getDataSetIterator(int batchSize) throws IOException;

106

public DataSetIterator getDataSetIterator(int batchSize, boolean train) throws IOException;

107

}

108

```

109

110

**Usage Example:**

111

112

```java

113

// Load CIFAR-10 dataset

114

CifarLoader cifarLoader = new CifarLoader();

115

116

// Get training data iterator

117

DataSetIterator trainIterator = cifarLoader.getDataSetIterator(32, true); // batch size 32

118

DataSetIterator testIterator = cifarLoader.getDataSetIterator(32, false);

119

120

// Process batches

121

while (trainIterator.hasNext()) {

122

DataSet batch = trainIterator.next();

123

INDArray features = batch.getFeatures(); // [32, 3, 32, 32]

124

INDArray labels = batch.getLabels(); // [32, 10] one-hot encoded

125

}

126

```

127

128

#### LFW (Labeled Faces in the Wild) Loader

129

130

```java { .api }

131

public class LFWLoader {

132

public static final int NUM_LABELS = 5749;

133

public static final int HEIGHT = 250;

134

public static final int WIDTH = 250;

135

public static final int CHANNELS = 3;

136

137

public LFWLoader();

138

public LFWLoader(String dataUrl);

139

public LFWLoader(String dataUrl, String localCachePath);

140

141

public DataSetIterator getDataSetIterator(int batchSize, int numExamples) throws IOException;

142

public DataSetIterator getDataSetIterator(int batchSize, int numExamples, int[] imgDim) throws IOException;

143

}

144

```

145

146

**Usage Example:**

147

148

```java

149

// Load LFW dataset for face recognition

150

LFWLoader lfwLoader = new LFWLoader();

151

152

// Custom image dimensions

153

int[] imageDimensions = {128, 128, 3}; // Resize to 128x128 RGB

154

DataSetIterator iterator = lfwLoader.getDataSetIterator(16, 1000, imageDimensions);

155

156

while (iterator.hasNext()) {

157

DataSet batch = iterator.next();

158

INDArray faceImages = batch.getFeatures(); // [16, 3, 128, 128]

159

INDArray identityLabels = batch.getLabels(); // [16, 5749] one-hot

160

}

161

```

162

163

### Image Transformations

164

165

Support for image preprocessing and augmentation through the ImageTransform interface.

166

167

```java { .api }

168

public interface ImageTransform {

169

INDArray transform(INDArray image, Random random);

170

INDArray transform(INDArray image);

171

}

172

```

173

174

**Common Transform Implementations:**

175

176

```java

177

// Example usage with transformations (implementations vary)

178

ImageTransform[] transforms = {

179

new FlipImageTransform(0.5), // 50% chance horizontal flip

180

new ScaleImageTransform(0.1), // Scale by ±10%

181

new RotateImageTransform(15) // Rotate by ±15 degrees

182

};

183

184

// Apply transformations in ImageRecordReader

185

ImageRecordReader reader = new ImageRecordReader(224, 224, 3, labelGenerator);

186

// Configure transforms (implementation-specific)

187

```

188

189

### Path Label Generators

190

191

Automatic label extraction from file paths for supervised learning.

192

193

```java { .api }

194

public interface PathLabelGenerator {

195

Writable getLabelForPath(String path);

196

Writable getLabelForPath(URI uri);

197

}

198

199

public class ParentPathLabelGenerator implements PathLabelGenerator {

200

public ParentPathLabelGenerator();

201

}

202

203

public class PatternPathLabelGenerator implements PathLabelGenerator {

204

public PatternPathLabelGenerator(String pattern, int groupIndex);

205

}

206

```

207

208

**Usage Examples:**

209

210

```java

211

// Use parent directory name as label

212

PathLabelGenerator parentLabels = new ParentPathLabelGenerator();

213

// /data/cats/image.jpg -> label: "cats"

214

// /data/dogs/image.jpg -> label: "dogs"

215

216

// Use regex pattern to extract labels

217

PathLabelGenerator patternLabels = new PatternPathLabelGenerator("class_(\\d+)_", 1);

218

// class_0_image.jpg -> label: "0"

219

// class_1_image.jpg -> label: "1"

220

221

Writable label = parentLabels.getLabelForPath("/data/cats/cat001.jpg");

222

String labelString = label.toString(); // "cats"

223

```

224

225

## Integration Patterns

226

227

### With DataSetIterator

228

229

```java

230

// Create image dataset iterator

231

PathLabelGenerator labelGen = new ParentPathLabelGenerator();

232

ImageRecordReader imageReader = new ImageRecordReader(224, 224, 3, labelGen);

233

FileSplit imageSplit = new FileSplit(new File("/path/to/images"), new String[]{"jpg", "png"}, true);

234

imageReader.initialize(imageSplit);

235

236

// Convert to DataSetIterator for DL4J

237

RecordReaderDataSetIterator dataSetIterator = new RecordReaderDataSetIterator(

238

imageReader,

239

32, // batch size

240

1, // label index (1 for images, 0 for label)

241

10 // number of classes

242

);

243

244

// Train neural network

245

MultiLayerNetwork model = new MultiLayerNetwork(config);

246

model.fit(dataSetIterator);

247

```

248

249

### Preprocessing Pipeline

250

251

```java

252

// Complete image preprocessing pipeline

253

public class ImagePreprocessor {

254

private final NativeImageLoader loader;

255

private final ImageTransform[] transforms;

256

257

public ImagePreprocessor(int height, int width, int channels, ImageTransform... transforms) {

258

this.loader = new NativeImageLoader(height, width, channels);

259

this.transforms = transforms;

260

}

261

262

public INDArray preprocess(File imageFile) throws IOException {

263

INDArray image = loader.asMatrix(imageFile);

264

265

// Apply transformations

266

for (ImageTransform transform : transforms) {

267

image = transform.transform(image);

268

}

269

270

// Normalize pixel values to [0, 1]

271

image.divi(255.0);

272

273

return image;

274

}

275

}

276

```

277

278

### Batch Processing

279

280

```java

281

// Process images in batches

282

List<File> imageFiles = Arrays.asList(/* image files */);

283

int batchSize = 32;

284

NativeImageLoader loader = new NativeImageLoader(224, 224, 3);

285

286

for (int i = 0; i < imageFiles.size(); i += batchSize) {

287

List<File> batch = imageFiles.subList(i, Math.min(i + batchSize, imageFiles.size()));

288

289

List<INDArray> batchImages = new ArrayList<>();

290

for (File imageFile : batch) {

291

INDArray image = loader.asMatrix(imageFile);

292

batchImages.add(image);

293

}

294

295

// Stack images into batch tensor

296

INDArray batchTensor = Nd4j.stack(0, batchImages.toArray(new INDArray[0]));

297

// Shape: [batchSize, channels, height, width]

298

299

// Process batch...

300

}

301

```

302

303

## Error Handling

304

305

```java

306

try {

307

NativeImageLoader loader = new NativeImageLoader(224, 224, 3);

308

INDArray image = loader.asMatrix(new File("image.jpg"));

309

} catch (IOException e) {

310

// Handle file read errors

311

System.err.println("Error loading image: " + e.getMessage());

312

} catch (IllegalArgumentException e) {

313

// Handle invalid image dimensions or unsupported format

314

System.err.println("Invalid image: " + e.getMessage());

315

} catch (OutOfMemoryError e) {

316

// Handle memory issues with large images

317

System.err.println("Out of memory loading image, try smaller dimensions");

318

}

319

```

320

321

## Performance Considerations

322

323

### Memory Management

324

325

```java

326

// For large datasets, consider streaming

327

ImageRecordReader reader = new ImageRecordReader(224, 224, 3, labelGenerator);

328

reader.initialize(imageSplit);

329

330

// Process one image at a time to reduce memory usage

331

while (reader.hasNext()) {

332

List<Writable> record = reader.next();

333

INDArray image = ((NDArrayWritable) record.get(0)).get();

334

335

// Process immediately and allow GC

336

processImage(image);

337

338

// Explicit cleanup for large datasets

339

if (needsCleanup()) {

340

System.gc();

341

}

342

}

343

```

344

345

### Parallel Processing

346

347

```java

348

// Use parallel streams for CPU-intensive preprocessing

349

List<File> imageFiles = getImageFiles();

350

351

List<INDArray> processedImages = imageFiles.parallelStream()

352

.map(file -> {

353

try {

354

return loader.asMatrix(file);

355

} catch (IOException e) {

356

throw new RuntimeException(e);

357

}

358

})

359

.collect(Collectors.toList());

360

```

361

362

## Types

363

364

### Core Interfaces

365

366

```java { .api }

367

public interface ImageTransform {

368

INDArray transform(INDArray image, Random random);

369

INDArray transform(INDArray image);

370

}

371

372

public interface PathLabelGenerator {

373

Writable getLabelForPath(String path);

374

Writable getLabelForPath(URI uri);

375

}

376

```

377

378

### Image Processing Classes

379

380

```java { .api }

381

// Core image loading

382

public class NativeImageLoader;

383

public class ImageRecordReader implements RecordReader;

384

385

// Dataset loaders

386

public class CifarLoader;

387

public class LFWLoader;

388

389

// Label generators

390

public class ParentPathLabelGenerator implements PathLabelGenerator;

391

public class PatternPathLabelGenerator implements PathLabelGenerator;

392

```