or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

data-types.mdimage-processing.mdindex.mdinput-sources.mdrecord-readers.mdtransforms.md

data-types.mddocs/

0

# Data Types and Writables

1

2

DataVec uses a type-safe system of Writable objects to represent data values. These provide serialization capabilities, type safety, and seamless integration with Hadoop's I/O framework while supporting machine learning specific data types.

3

4

## Capabilities

5

6

### Core Writable Interface

7

8

The base interface that all DataVec data types implement. Provides serialization methods and string conversion for debugging and display.

9

10

```java { .api }

11

public interface Writable {

12

void write(DataOutput out) throws IOException;

13

void readFields(DataInput in) throws IOException;

14

String toString();

15

double toDouble();

16

float toFloat();

17

int toInt();

18

long toLong();

19

}

20

```

21

22

**Usage Example:**

23

24

```java

25

Writable writable = new DoubleWritable(3.14);

26

double value = writable.toDouble(); // 3.14

27

String text = writable.toString(); // "3.14"

28

```

29

30

### Primitive Type Wrappers

31

32

Type-safe wrappers for Java primitive types, commonly used for structured data representation in CSV files and database records.

33

34

```java { .api }

35

public class IntWritable implements Writable {

36

public IntWritable();

37

public IntWritable(int value);

38

public void set(int value);

39

public int get();

40

}

41

42

public class LongWritable implements Writable {

43

public LongWritable();

44

public LongWritable(long value);

45

public void set(long value);

46

public long get();

47

}

48

49

public class FloatWritable implements Writable {

50

public FloatWritable();

51

public FloatWritable(float value);

52

public void set(float value);

53

public float get();

54

}

55

56

public class DoubleWritable implements Writable {

57

public DoubleWritable();

58

public DoubleWritable(double value);

59

public void set(double value);

60

public double get();

61

}

62

63

public class ByteWritable implements Writable {

64

public ByteWritable();

65

public ByteWritable(byte value);

66

public void set(byte value);

67

public byte get();

68

}

69

```

70

71

**Usage Examples:**

72

73

```java

74

// Create and use integer values

75

IntWritable intVal = new IntWritable(42);

76

int primitive = intVal.get(); // 42

77

intVal.set(100); // Update value

78

79

// Create and use floating point values

80

DoubleWritable doubleVal = new DoubleWritable(3.14159);

81

double pi = doubleVal.get(); // 3.14159

82

83

// Type conversion

84

int piAsInt = doubleVal.toInt(); // 3 (truncated)

85

String piAsString = doubleVal.toString(); // "3.14159"

86

```

87

88

### Text and String Data

89

90

Handles text data with efficient string operations and encoding support.

91

92

```java { .api }

93

public class Text implements Writable {

94

public Text();

95

public Text(String string);

96

public Text(byte[] utf8);

97

public void set(String string);

98

public void set(byte[] utf8);

99

public String toString();

100

public byte[] getBytes();

101

public int getLength();

102

}

103

```

104

105

**Usage Example:**

106

107

```java

108

Text textData = new Text("Hello, DataVec!");

109

String value = textData.toString(); // "Hello, DataVec!"

110

byte[] bytes = textData.getBytes(); // UTF-8 encoded bytes

111

int length = textData.getLength(); // Length in bytes

112

113

// Update text value

114

textData.set("New text content");

115

```

116

117

### Binary Data

118

119

Handles raw binary data and byte arrays.

120

121

```java { .api }

122

public class BytesWritable implements Writable {

123

public BytesWritable();

124

public BytesWritable(byte[] bytes);

125

public void set(byte[] bytes);

126

public byte[] getBytes();

127

public int getLength();

128

public void setCapacity(int capacity);

129

}

130

```

131

132

**Usage Example:**

133

134

```java

135

// Create with byte array

136

byte[] data = {0x48, 0x65, 0x6C, 0x6C, 0x6F}; // "Hello" in bytes

137

BytesWritable bytesData = new BytesWritable(data);

138

139

byte[] retrieved = bytesData.getBytes(); // Original byte array

140

int length = bytesData.getLength(); // 5

141

142

// Update with new data

143

byte[] newData = "World".getBytes("UTF-8");

144

bytesData.set(newData);

145

```

146

147

### Boolean Data

148

149

Represents boolean values in the Writable system.

150

151

```java { .api }

152

public class BooleanWritable implements Writable {

153

public BooleanWritable();

154

public BooleanWritable(boolean value);

155

public void set(boolean value);

156

public boolean get();

157

}

158

```

159

160

**Usage Example:**

161

162

```java

163

BooleanWritable boolVal = new BooleanWritable(true);

164

boolean flag = boolVal.get(); // true

165

boolVal.set(false); // Update to false

166

```

167

168

### NDArray Integration

169

170

Wraps ND4J INDArray objects for machine learning tensor operations within the DataVec ecosystem.

171

172

```java { .api }

173

public class NDArrayWritable implements Writable {

174

public NDArrayWritable();

175

public NDArrayWritable(INDArray array);

176

public void set(INDArray array);

177

public INDArray get();

178

}

179

```

180

181

**Usage Example:**

182

183

```java

184

import org.nd4j.linalg.factory.Nd4j;

185

import org.nd4j.linalg.api.ndarray.INDArray;

186

187

// Create a tensor

188

INDArray tensor = Nd4j.create(new double[]{1.0, 2.0, 3.0, 4.0});

189

NDArrayWritable ndArrayWritable = new NDArrayWritable(tensor);

190

191

// Retrieve tensor

192

INDArray retrieved = ndArrayWritable.get();

193

double[] values = retrieved.toDoubleVector(); // [1.0, 2.0, 3.0, 4.0]

194

```

195

196

### Null Value Handling

197

198

Represents null or missing values in datasets.

199

200

```java { .api }

201

public class NullWritable implements Writable {

202

public static final NullWritable INSTANCE = new NullWritable();

203

private NullWritable();

204

}

205

```

206

207

**Usage Example:**

208

209

```java

210

Writable nullValue = NullWritable.INSTANCE;

211

boolean isNull = (nullValue instanceof NullWritable); // true

212

```

213

214

### Collection Types

215

216

Handles collections of Writable objects for complex data structures.

217

218

```java { .api }

219

public class ArrayWritable implements Writable {

220

public ArrayWritable(Class<? extends Writable> valueClass);

221

public ArrayWritable(Class<? extends Writable> valueClass, Writable[] values);

222

public ArrayWritable(String[] strings);

223

public void set(Writable[] values);

224

public Writable[] get();

225

public String[] toStrings();

226

}

227

```

228

229

**Usage Example:**

230

231

```java

232

// Create array of doubles

233

Writable[] doubles = {

234

new DoubleWritable(1.1),

235

new DoubleWritable(2.2),

236

new DoubleWritable(3.3)

237

};

238

239

ArrayWritable arrayWritable = new ArrayWritable(DoubleWritable.class, doubles);

240

Writable[] retrieved = arrayWritable.get();

241

242

// Convert to string array

243

String[] strings = arrayWritable.toStrings(); // ["1.1", "2.2", "3.3"]

244

```

245

246

## Data Conversion Patterns

247

248

### Type Conversion

249

250

All Writable objects support conversion to common Java types:

251

252

```java

253

Writable writable = new DoubleWritable(42.7);

254

255

double asDouble = writable.toDouble(); // 42.7

256

float asFloat = writable.toFloat(); // 42.7f

257

int asInt = writable.toInt(); // 42 (truncated)

258

long asLong = writable.toLong(); // 42L (truncated)

259

String asString = writable.toString(); // "42.7"

260

```

261

262

### WritableConverter Interface

263

264

Enables custom conversion logic for transforming data during record reading:

265

266

```java { .api }

267

public interface WritableConverter {

268

Writable convert(Writable writable) throws WritableConverterException;

269

}

270

271

public class SelfWritableConverter implements WritableConverter {

272

public Writable convert(Writable writable) throws WritableConverterException;

273

}

274

```

275

276

**Usage Example:**

277

278

```java

279

// Custom converter that squares numeric values

280

WritableConverter squareConverter = new WritableConverter() {

281

@Override

282

public Writable convert(Writable writable) throws WritableConverterException {

283

if (writable instanceof DoubleWritable) {

284

double value = ((DoubleWritable) writable).get();

285

return new DoubleWritable(value * value);

286

}

287

return writable; // Pass through non-numeric values

288

}

289

};

290

291

// Use with RecordReaderDataSetIterator

292

RecordReaderDataSetIterator iterator = new RecordReaderDataSetIterator(

293

recordReader, squareConverter, batchSize, labelIndex, numClasses

294

);

295

```

296

297

## Serialization and I/O

298

299

All Writable objects support Hadoop-compatible serialization:

300

301

```java

302

// Serialize to output stream

303

Writable writable = new IntWritable(123);

304

ByteArrayOutputStream baos = new ByteArrayOutputStream();

305

DataOutputStream dos = new DataOutputStream(baos);

306

writable.write(dos);

307

byte[] serialized = baos.toByteArray();

308

309

// Deserialize from input stream

310

ByteArrayInputStream bais = new ByteArrayInputStream(serialized);

311

DataInputStream dis = new DataInputStream(bais);

312

IntWritable deserialized = new IntWritable();

313

deserialized.readFields(dis);

314

int value = deserialized.get(); // 123

315

```

316

317

## Common Usage Patterns

318

319

### Record Processing

320

321

```java

322

List<Writable> record = recordReader.next();

323

324

// Access by index with type conversion

325

int id = record.get(0).toInt();

326

String name = record.get(1).toString();

327

double score = record.get(2).toDouble();

328

boolean active = record.get(3).toInt() == 1; // Convert int to boolean

329

```

330

331

### Data Validation

332

333

```java

334

for (Writable writable : record) {

335

if (writable instanceof NullWritable) {

336

// Handle missing value

337

continue;

338

}

339

340

if (writable instanceof DoubleWritable) {

341

double value = writable.toDouble();

342

if (Double.isNaN(value) || Double.isInfinite(value)) {

343

// Handle invalid numeric values

344

}

345

}

346

}

347

```

348

349

## Types

350

351

### Core Interfaces and Classes

352

353

```java { .api }

354

public interface Writable {

355

void write(DataOutput out) throws IOException;

356

void readFields(DataInput in) throws IOException;

357

String toString();

358

double toDouble();

359

float toFloat();

360

int toInt();

361

long toLong();

362

}

363

364

// Primitive Writable Types

365

public class IntWritable implements Writable;

366

public class LongWritable implements Writable;

367

public class FloatWritable implements Writable;

368

public class DoubleWritable implements Writable;

369

public class ByteWritable implements Writable;

370

public class BooleanWritable implements Writable;

371

372

// Complex Data Types

373

public class Text implements Writable;

374

public class BytesWritable implements Writable;

375

public class NDArrayWritable implements Writable;

376

public class ArrayWritable implements Writable;

377

public class NullWritable implements Writable;

378

379

public interface WritableConverter {

380

Writable convert(Writable writable) throws WritableConverterException;

381

}

382

383

public class WritableConverterException extends Exception {

384

public WritableConverterException(String message);

385

public WritableConverterException(String message, Throwable cause);

386

}

387

```