0
# Data Types and Writables
1
2
DataVec uses a type-safe system of Writable objects to represent data values. These provide serialization capabilities, type safety, and seamless integration with Hadoop's I/O framework while supporting machine learning specific data types.
3
4
## Capabilities
5
6
### Core Writable Interface
7
8
The base interface that all DataVec data types implement. Provides serialization methods and string conversion for debugging and display.
9
10
```java { .api }
11
public interface Writable {
12
void write(DataOutput out) throws IOException;
13
void readFields(DataInput in) throws IOException;
14
String toString();
15
double toDouble();
16
float toFloat();
17
int toInt();
18
long toLong();
19
}
20
```
21
22
**Usage Example:**
23
24
```java
25
Writable writable = new DoubleWritable(3.14);
26
double value = writable.toDouble(); // 3.14
27
String text = writable.toString(); // "3.14"
28
```
29
30
### Primitive Type Wrappers
31
32
Type-safe wrappers for Java primitive types, commonly used for structured data representation in CSV files and database records.
33
34
```java { .api }
35
public class IntWritable implements Writable {
36
public IntWritable();
37
public IntWritable(int value);
38
public void set(int value);
39
public int get();
40
}
41
42
public class LongWritable implements Writable {
43
public LongWritable();
44
public LongWritable(long value);
45
public void set(long value);
46
public long get();
47
}
48
49
public class FloatWritable implements Writable {
50
public FloatWritable();
51
public FloatWritable(float value);
52
public void set(float value);
53
public float get();
54
}
55
56
public class DoubleWritable implements Writable {
57
public DoubleWritable();
58
public DoubleWritable(double value);
59
public void set(double value);
60
public double get();
61
}
62
63
public class ByteWritable implements Writable {
64
public ByteWritable();
65
public ByteWritable(byte value);
66
public void set(byte value);
67
public byte get();
68
}
69
```
70
71
**Usage Examples:**
72
73
```java
74
// Create and use integer values
75
IntWritable intVal = new IntWritable(42);
76
int primitive = intVal.get(); // 42
77
intVal.set(100); // Update value
78
79
// Create and use floating point values
80
DoubleWritable doubleVal = new DoubleWritable(3.14159);
81
double pi = doubleVal.get(); // 3.14159
82
83
// Type conversion
84
int piAsInt = doubleVal.toInt(); // 3 (truncated)
85
String piAsString = doubleVal.toString(); // "3.14159"
86
```
87
88
### Text and String Data
89
90
Handles text data with efficient string operations and encoding support.
91
92
```java { .api }
93
public class Text implements Writable {
94
public Text();
95
public Text(String string);
96
public Text(byte[] utf8);
97
public void set(String string);
98
public void set(byte[] utf8);
99
public String toString();
100
public byte[] getBytes();
101
public int getLength();
102
}
103
```
104
105
**Usage Example:**
106
107
```java
108
Text textData = new Text("Hello, DataVec!");
109
String value = textData.toString(); // "Hello, DataVec!"
110
byte[] bytes = textData.getBytes(); // UTF-8 encoded bytes
111
int length = textData.getLength(); // Length in bytes
112
113
// Update text value
114
textData.set("New text content");
115
```
116
117
### Binary Data
118
119
Handles raw binary data and byte arrays.
120
121
```java { .api }
122
public class BytesWritable implements Writable {
123
public BytesWritable();
124
public BytesWritable(byte[] bytes);
125
public void set(byte[] bytes);
126
public byte[] getBytes();
127
public int getLength();
128
public void setCapacity(int capacity);
129
}
130
```
131
132
**Usage Example:**
133
134
```java
135
// Create with byte array
136
byte[] data = {0x48, 0x65, 0x6C, 0x6C, 0x6F}; // "Hello" in bytes
137
BytesWritable bytesData = new BytesWritable(data);
138
139
byte[] retrieved = bytesData.getBytes(); // Original byte array
140
int length = bytesData.getLength(); // 5
141
142
// Update with new data
143
byte[] newData = "World".getBytes("UTF-8");
144
bytesData.set(newData);
145
```
146
147
### Boolean Data
148
149
Represents boolean values in the Writable system.
150
151
```java { .api }
152
public class BooleanWritable implements Writable {
153
public BooleanWritable();
154
public BooleanWritable(boolean value);
155
public void set(boolean value);
156
public boolean get();
157
}
158
```
159
160
**Usage Example:**
161
162
```java
163
BooleanWritable boolVal = new BooleanWritable(true);
164
boolean flag = boolVal.get(); // true
165
boolVal.set(false); // Update to false
166
```
167
168
### NDArray Integration
169
170
Wraps ND4J INDArray objects for machine learning tensor operations within the DataVec ecosystem.
171
172
```java { .api }
173
public class NDArrayWritable implements Writable {
174
public NDArrayWritable();
175
public NDArrayWritable(INDArray array);
176
public void set(INDArray array);
177
public INDArray get();
178
}
179
```
180
181
**Usage Example:**
182
183
```java
184
import org.nd4j.linalg.factory.Nd4j;
185
import org.nd4j.linalg.api.ndarray.INDArray;
186
187
// Create a tensor
188
INDArray tensor = Nd4j.create(new double[]{1.0, 2.0, 3.0, 4.0});
189
NDArrayWritable ndArrayWritable = new NDArrayWritable(tensor);
190
191
// Retrieve tensor
192
INDArray retrieved = ndArrayWritable.get();
193
double[] values = retrieved.toDoubleVector(); // [1.0, 2.0, 3.0, 4.0]
194
```
195
196
### Null Value Handling
197
198
Represents null or missing values in datasets.
199
200
```java { .api }
201
public class NullWritable implements Writable {
202
public static final NullWritable INSTANCE = new NullWritable();
203
private NullWritable();
204
}
205
```
206
207
**Usage Example:**
208
209
```java
210
Writable nullValue = NullWritable.INSTANCE;
211
boolean isNull = (nullValue instanceof NullWritable); // true
212
```
213
214
### Collection Types
215
216
Handles collections of Writable objects for complex data structures.
217
218
```java { .api }
219
public class ArrayWritable implements Writable {
220
public ArrayWritable(Class<? extends Writable> valueClass);
221
public ArrayWritable(Class<? extends Writable> valueClass, Writable[] values);
222
public ArrayWritable(String[] strings);
223
public void set(Writable[] values);
224
public Writable[] get();
225
public String[] toStrings();
226
}
227
```
228
229
**Usage Example:**
230
231
```java
232
// Create array of doubles
233
Writable[] doubles = {
234
new DoubleWritable(1.1),
235
new DoubleWritable(2.2),
236
new DoubleWritable(3.3)
237
};
238
239
ArrayWritable arrayWritable = new ArrayWritable(DoubleWritable.class, doubles);
240
Writable[] retrieved = arrayWritable.get();
241
242
// Convert to string array
243
String[] strings = arrayWritable.toStrings(); // ["1.1", "2.2", "3.3"]
244
```
245
246
## Data Conversion Patterns
247
248
### Type Conversion
249
250
All Writable objects support conversion to common Java types:
251
252
```java
253
Writable writable = new DoubleWritable(42.7);
254
255
double asDouble = writable.toDouble(); // 42.7
256
float asFloat = writable.toFloat(); // 42.7f
257
int asInt = writable.toInt(); // 42 (truncated)
258
long asLong = writable.toLong(); // 42L (truncated)
259
String asString = writable.toString(); // "42.7"
260
```
261
262
### WritableConverter Interface
263
264
Enables custom conversion logic for transforming data during record reading:
265
266
```java { .api }
267
public interface WritableConverter {
268
Writable convert(Writable writable) throws WritableConverterException;
269
}
270
271
public class SelfWritableConverter implements WritableConverter {
272
public Writable convert(Writable writable) throws WritableConverterException;
273
}
274
```
275
276
**Usage Example:**
277
278
```java
279
// Custom converter that squares numeric values
280
WritableConverter squareConverter = new WritableConverter() {
281
@Override
282
public Writable convert(Writable writable) throws WritableConverterException {
283
if (writable instanceof DoubleWritable) {
284
double value = ((DoubleWritable) writable).get();
285
return new DoubleWritable(value * value);
286
}
287
return writable; // Pass through non-numeric values
288
}
289
};
290
291
// Use with RecordReaderDataSetIterator
292
RecordReaderDataSetIterator iterator = new RecordReaderDataSetIterator(
293
recordReader, squareConverter, batchSize, labelIndex, numClasses
294
);
295
```
296
297
## Serialization and I/O
298
299
All Writable objects support Hadoop-compatible serialization:
300
301
```java
302
// Serialize to output stream
303
Writable writable = new IntWritable(123);
304
ByteArrayOutputStream baos = new ByteArrayOutputStream();
305
DataOutputStream dos = new DataOutputStream(baos);
306
writable.write(dos);
307
byte[] serialized = baos.toByteArray();
308
309
// Deserialize from input stream
310
ByteArrayInputStream bais = new ByteArrayInputStream(serialized);
311
DataInputStream dis = new DataInputStream(bais);
312
IntWritable deserialized = new IntWritable();
313
deserialized.readFields(dis);
314
int value = deserialized.get(); // 123
315
```
316
317
## Common Usage Patterns
318
319
### Record Processing
320
321
```java
322
List<Writable> record = recordReader.next();
323
324
// Access by index with type conversion
325
int id = record.get(0).toInt();
326
String name = record.get(1).toString();
327
double score = record.get(2).toDouble();
328
boolean active = record.get(3).toInt() == 1; // Convert int to boolean
329
```
330
331
### Data Validation
332
333
```java
334
for (Writable writable : record) {
335
if (writable instanceof NullWritable) {
336
// Handle missing value
337
continue;
338
}
339
340
if (writable instanceof DoubleWritable) {
341
double value = writable.toDouble();
342
if (Double.isNaN(value) || Double.isInfinite(value)) {
343
// Handle invalid numeric values
344
}
345
}
346
}
347
```
348
349
## Types
350
351
### Core Interfaces and Classes
352
353
```java { .api }
354
public interface Writable {
355
void write(DataOutput out) throws IOException;
356
void readFields(DataInput in) throws IOException;
357
String toString();
358
double toDouble();
359
float toFloat();
360
int toInt();
361
long toLong();
362
}
363
364
// Primitive Writable Types
365
public class IntWritable implements Writable;
366
public class LongWritable implements Writable;
367
public class FloatWritable implements Writable;
368
public class DoubleWritable implements Writable;
369
public class ByteWritable implements Writable;
370
public class BooleanWritable implements Writable;
371
372
// Complex Data Types
373
public class Text implements Writable;
374
public class BytesWritable implements Writable;
375
public class NDArrayWritable implements Writable;
376
public class ArrayWritable implements Writable;
377
public class NullWritable implements Writable;
378
379
public interface WritableConverter {
380
Writable convert(Writable writable) throws WritableConverterException;
381
}
382
383
public class WritableConverterException extends Exception {
384
public WritableConverterException(String message);
385
public WritableConverterException(String message, Throwable cause);
386
}
387
```