0
# Encoding and Decoding
1
2
Avro's encoding and decoding framework provides low-level serialization support for multiple formats including binary, JSON, with validation and schema evolution capabilities. Encoders and decoders form the foundation for all Avro serialization operations.
3
4
## Capabilities
5
6
### Encoder Factory
7
8
Factory for creating various types of encoders for serializing data.
9
10
```java { .api }
11
public class EncoderFactory {
12
public static EncoderFactory get();
13
14
// Binary encoders
15
public BinaryEncoder binaryEncoder(OutputStream out, BinaryEncoder reuse);
16
public BinaryEncoder directBinaryEncoder(OutputStream out, BinaryEncoder reuse);
17
public BinaryEncoder blockingBinaryEncoder(OutputStream out, BinaryEncoder reuse);
18
19
// JSON encoders
20
public JsonEncoder jsonEncoder(Schema schema, OutputStream out) throws IOException;
21
public JsonEncoder jsonEncoder(Schema schema, OutputStream out, boolean pretty) throws IOException;
22
public JsonEncoder jsonEncoder(Schema schema, JsonGenerator generator) throws IOException;
23
24
// Validating encoder
25
public ValidatingEncoder validatingEncoder(Schema schema, Encoder encoder) throws IOException;
26
27
// Configuration
28
public EncoderFactory configureBlockSize(int size);
29
public EncoderFactory configureBufferSize(int size);
30
}
31
```
32
33
**Usage Examples:**
34
35
```java
36
// Create binary encoder
37
OutputStream outputStream = new ByteArrayOutputStream();
38
BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(outputStream, null);
39
40
// Reuse encoder for performance
41
BinaryEncoder reusedEncoder = null;
42
for (GenericRecord record : records) {
43
ByteArrayOutputStream out = new ByteArrayOutputStream();
44
reusedEncoder = EncoderFactory.get().binaryEncoder(out, reusedEncoder);
45
datumWriter.write(record, reusedEncoder);
46
reusedEncoder.flush();
47
}
48
49
// Create JSON encoder with pretty printing
50
OutputStream jsonOutput = new FileOutputStream("output.json");
51
JsonEncoder jsonEncoder = EncoderFactory.get().jsonEncoder(schema, jsonOutput, true);
52
53
// Create validating encoder
54
BinaryEncoder baseEncoder = EncoderFactory.get().binaryEncoder(outputStream, null);
55
ValidatingEncoder validatingEncoder = EncoderFactory.get().validatingEncoder(schema, baseEncoder);
56
57
// Configure factory settings
58
EncoderFactory factory = EncoderFactory.get()
59
.configureBlockSize(8192)
60
.configureBufferSize(4096);
61
BinaryEncoder configuredEncoder = factory.binaryEncoder(outputStream, null);
62
```
63
64
### Decoder Factory
65
66
Factory for creating various types of decoders for deserializing data.
67
68
```java { .api }
69
public class DecoderFactory {
70
public static DecoderFactory get();
71
72
// Binary decoders
73
public BinaryDecoder binaryDecoder(InputStream in, BinaryDecoder reuse);
74
public BinaryDecoder binaryDecoder(byte[] bytes, int offset, int length, BinaryDecoder reuse);
75
public BinaryDecoder binaryDecoder(byte[] bytes, BinaryDecoder reuse);
76
public BinaryDecoder directBinaryDecoder(InputStream in, BinaryDecoder reuse);
77
78
// JSON decoders
79
public JsonDecoder jsonDecoder(Schema schema, InputStream input) throws IOException;
80
public JsonDecoder jsonDecoder(Schema schema, String input) throws IOException;
81
82
// Validating decoder
83
public ValidatingDecoder validatingDecoder(Schema schema, Decoder decoder) throws IOException;
84
85
// Resolving decoder for schema evolution
86
public ResolvingDecoder resolvingDecoder(Schema writer, Schema reader, Decoder decoder) throws IOException;
87
88
// Configuration
89
public DecoderFactory configureDecoderBufferSize(int size);
90
}
91
```
92
93
**Usage Examples:**
94
95
```java
96
// Create binary decoder from input stream
97
InputStream inputStream = new FileInputStream("data.avro");
98
BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(inputStream, null);
99
100
// Reuse decoder for performance
101
BinaryDecoder reusedDecoder = null;
102
for (byte[] data : dataBlocks) {
103
reusedDecoder = DecoderFactory.get().binaryDecoder(data, reusedDecoder);
104
GenericRecord record = datumReader.read(null, reusedDecoder);
105
processRecord(record);
106
}
107
108
// Create JSON decoder
109
String jsonData = """{"name": "John", "age": 30}""";
110
JsonDecoder jsonDecoder = DecoderFactory.get().jsonDecoder(schema, jsonData);
111
112
// Create validating decoder
113
BinaryDecoder baseDecoder = DecoderFactory.get().binaryDecoder(inputStream, null);
114
ValidatingDecoder validatingDecoder = DecoderFactory.get().validatingDecoder(schema, baseDecoder);
115
116
// Create resolving decoder for schema evolution
117
Schema writerSchema = parseSchema(writerSchemaJson);
118
Schema readerSchema = parseSchema(readerSchemaJson);
119
ResolvingDecoder resolvingDecoder = DecoderFactory.get()
120
.resolvingDecoder(writerSchema, readerSchema, baseDecoder);
121
```
122
123
### Datum Writer Interface
124
125
Core interface for writing datum objects to encoders.
126
127
```java { .api }
128
public interface DatumWriter<D> {
129
void setSchema(Schema schema);
130
void write(D datum, Encoder out) throws IOException;
131
}
132
```
133
134
**Usage Examples:**
135
136
```java
137
// Implement custom datum writer
138
public class CustomUserWriter implements DatumWriter<User> {
139
private Schema schema;
140
141
@Override
142
public void setSchema(Schema schema) {
143
this.schema = schema;
144
}
145
146
@Override
147
public void write(User user, Encoder out) throws IOException {
148
out.writeString(user.getName());
149
out.writeInt(user.getAge());
150
if (user.getEmail() != null) {
151
out.writeIndex(1); // Union index for non-null
152
out.writeString(user.getEmail());
153
} else {
154
out.writeIndex(0); // Union index for null
155
out.writeNull();
156
}
157
}
158
}
159
160
// Use custom writer
161
CustomUserWriter writer = new CustomUserWriter();
162
writer.setSchema(userSchema);
163
164
BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(outputStream, null);
165
User user = new User("Alice", 25, "alice@example.com");
166
writer.write(user, encoder);
167
encoder.flush();
168
```
169
170
### Datum Reader Interface
171
172
Core interface for reading datum objects from decoders.
173
174
```java { .api }
175
public interface DatumReader<D> {
176
void setSchema(Schema schema);
177
void setExpected(Schema reader);
178
D read(D reuse, Decoder in) throws IOException;
179
}
180
```
181
182
**Usage Examples:**
183
184
```java
185
// Implement custom datum reader
186
public class CustomUserReader implements DatumReader<User> {
187
private Schema writerSchema;
188
private Schema readerSchema;
189
190
@Override
191
public void setSchema(Schema schema) {
192
this.writerSchema = schema;
193
}
194
195
@Override
196
public void setExpected(Schema reader) {
197
this.readerSchema = reader;
198
}
199
200
@Override
201
public User read(User reuse, Decoder in) throws IOException {
202
String name = in.readString();
203
int age = in.readInt();
204
205
// Handle union for email field
206
int unionIndex = in.readIndex();
207
String email = null;
208
if (unionIndex == 1) {
209
email = in.readString();
210
} else {
211
in.readNull();
212
}
213
214
return new User(name, age, email);
215
}
216
}
217
218
// Use custom reader
219
CustomUserReader reader = new CustomUserReader();
220
reader.setSchema(userSchema);
221
222
BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(inputStream, null);
223
User user = reader.read(null, decoder);
224
```
225
226
### Binary Encoding
227
228
High-performance binary format encoding and decoding.
229
230
```java { .api }
231
public abstract class BinaryEncoder extends Encoder {
232
// Primitive type writing
233
public abstract void writeNull() throws IOException;
234
public abstract void writeBoolean(boolean b) throws IOException;
235
public abstract void writeInt(int n) throws IOException;
236
public abstract void writeLong(long n) throws IOException;
237
public abstract void writeFloat(float f) throws IOException;
238
public abstract void writeDouble(double d) throws IOException;
239
public abstract void writeString(String str) throws IOException;
240
public abstract void writeBytes(ByteBuffer bytes) throws IOException;
241
public abstract void writeBytes(byte[] bytes, int start, int len) throws IOException;
242
243
// Complex type writing
244
public abstract void writeFixed(byte[] bytes, int start, int len) throws IOException;
245
public abstract void writeEnum(int e) throws IOException;
246
public abstract void writeArrayStart() throws IOException;
247
public abstract void writeArrayEnd() throws IOException;
248
public abstract void writeMapStart() throws IOException;
249
public abstract void writeMapEnd() throws IOException;
250
public abstract void startItem() throws IOException;
251
public abstract void writeIndex(int unionIndex) throws IOException;
252
}
253
254
public class BinaryDecoder extends Decoder {
255
// Primitive type reading
256
public boolean readBoolean() throws IOException;
257
public int readInt() throws IOException;
258
public long readLong() throws IOException;
259
public float readFloat() throws IOException;
260
public double readDouble() throws IOException;
261
public String readString() throws IOException;
262
public ByteBuffer readBytes(ByteBuffer old) throws IOException;
263
264
// Complex type reading
265
public void readFixed(byte[] bytes, int start, int len) throws IOException;
266
public int readEnum() throws IOException;
267
public long readArrayStart() throws IOException;
268
public long readMapStart() throws IOException;
269
public long arrayNext() throws IOException;
270
public long mapNext() throws IOException;
271
public int readIndex() throws IOException;
272
}
273
```
274
275
**Usage Examples:**
276
277
```java
278
// Manual binary encoding
279
BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(outputStream, null);
280
281
// Write record fields manually
282
encoder.writeString("John Doe"); // name field
283
encoder.writeInt(30); // age field
284
encoder.writeIndex(1); // union index for non-null email
285
encoder.writeString("john@example.com"); // email value
286
287
encoder.flush();
288
289
// Manual binary decoding
290
BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(inputStream, null);
291
292
// Read record fields manually
293
String name = decoder.readString();
294
int age = decoder.readInt();
295
int emailUnionIndex = decoder.readIndex();
296
String email = null;
297
if (emailUnionIndex == 1) {
298
email = decoder.readString();
299
}
300
301
System.out.println("Name: " + name + ", Age: " + age + ", Email: " + email);
302
```
303
304
### JSON Encoding
305
306
Human-readable JSON format encoding and decoding.
307
308
```java { .api }
309
public class JsonEncoder extends ParsingEncoder {
310
// Configured through EncoderFactory
311
// Supports pretty printing and custom JsonGenerator
312
}
313
314
public class JsonDecoder extends ParsingDecoder {
315
// Configured through DecoderFactory
316
// Parses JSON according to schema
317
}
318
```
319
320
**Usage Examples:**
321
322
```java
323
// JSON encoding with pretty printing
324
OutputStream jsonOutput = new ByteArrayOutputStream();
325
JsonEncoder jsonEncoder = EncoderFactory.get().jsonEncoder(schema, jsonOutput, true);
326
327
GenericRecord record = new GenericData.Record(schema);
328
record.put("name", "Alice");
329
record.put("age", 25);
330
record.put("email", "alice@example.com");
331
332
GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<>(schema);
333
writer.write(record, jsonEncoder);
334
jsonEncoder.flush();
335
336
String jsonString = jsonOutput.toString();
337
System.out.println("JSON output:\n" + jsonString);
338
339
// JSON decoding
340
String jsonInput = """
341
{
342
"name": "Bob",
343
"age": 35,
344
"email": "bob@example.com"
345
}
346
""";
347
348
JsonDecoder jsonDecoder = DecoderFactory.get().jsonDecoder(schema, jsonInput);
349
GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(schema);
350
GenericRecord decodedRecord = reader.read(null, jsonDecoder);
351
352
System.out.println("Decoded name: " + decodedRecord.get("name"));
353
```
354
355
### Validating Encoder/Decoder
356
357
Validation layer that ensures data conforms to schema during encoding/decoding.
358
359
```java { .api }
360
public class ValidatingEncoder extends ParsingEncoder {
361
// Validates data against schema during encoding
362
// Throws AvroTypeException for schema violations
363
}
364
365
public class ValidatingDecoder extends ParsingDecoder {
366
// Validates data against schema during decoding
367
// Throws AvroTypeException for schema violations
368
}
369
```
370
371
**Usage Examples:**
372
373
```java
374
// Validating encoder
375
BinaryEncoder baseEncoder = EncoderFactory.get().binaryEncoder(outputStream, null);
376
ValidatingEncoder validatingEncoder = EncoderFactory.get().validatingEncoder(schema, baseEncoder);
377
378
GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<>(schema);
379
380
try {
381
// This will validate the record against the schema
382
writer.write(record, validatingEncoder);
383
validatingEncoder.flush();
384
} catch (AvroTypeException e) {
385
System.err.println("Schema validation failed: " + e.getMessage());
386
}
387
388
// Validating decoder
389
BinaryDecoder baseDecoder = DecoderFactory.get().binaryDecoder(inputStream, null);
390
ValidatingDecoder validatingDecoder = DecoderFactory.get().validatingDecoder(schema, baseDecoder);
391
392
GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(schema);
393
394
try {
395
GenericRecord validatedRecord = reader.read(null, validatingDecoder);
396
System.out.println("Record passed validation");
397
} catch (AvroTypeException e) {
398
System.err.println("Validation failed during decoding: " + e.getMessage());
399
}
400
```
401
402
### Schema Evolution Support
403
404
ResolvingDecoder enables reading data written with different but compatible schemas.
405
406
```java { .api }
407
public class ResolvingDecoder extends ValidatingDecoder {
408
// Handles schema evolution during decoding
409
// Automatically resolves differences between writer and reader schemas
410
// Applies default values for missing fields
411
// Skips fields not present in reader schema
412
}
413
```
414
415
**Usage Examples:**
416
417
```java
418
// Schema evolution example
419
String writerSchemaJson = """
420
{
421
"type": "record",
422
"name": "User",
423
"fields": [
424
{"name": "name", "type": "string"},
425
{"name": "age", "type": "int"}
426
]
427
}
428
""";
429
430
String readerSchemaJson = """
431
{
432
"type": "record",
433
"name": "User",
434
"fields": [
435
{"name": "name", "type": "string"},
436
{"name": "age", "type": "int"},
437
{"name": "email", "type": ["null", "string"], "default": null},
438
{"name": "active", "type": "boolean", "default": true}
439
]
440
}
441
""";
442
443
Schema writerSchema = new Schema.Parser().parse(writerSchemaJson);
444
Schema readerSchema = new Schema.Parser().parse(readerSchemaJson);
445
446
// Create resolving decoder
447
BinaryDecoder baseDecoder = DecoderFactory.get().binaryDecoder(inputStream, null);
448
ResolvingDecoder resolvingDecoder = DecoderFactory.get()
449
.resolvingDecoder(writerSchema, readerSchema, baseDecoder);
450
451
// Read with schema evolution - missing fields get default values
452
GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(readerSchema);
453
GenericRecord evolvedRecord = reader.read(null, resolvingDecoder);
454
455
// New fields will have default values
456
System.out.println("Email: " + evolvedRecord.get("email")); // null
457
System.out.println("Active: " + evolvedRecord.get("active")); // true
458
```
459
460
## Types
461
462
```java { .api }
463
public class EncoderFactory {
464
// Factory for creating encoders
465
}
466
467
public class DecoderFactory {
468
// Factory for creating decoders
469
}
470
471
public interface DatumWriter<D> {
472
void setSchema(Schema schema);
473
void write(D datum, Encoder out) throws IOException;
474
}
475
476
public interface DatumReader<D> {
477
void setSchema(Schema schema);
478
void setExpected(Schema reader);
479
D read(D reuse, Decoder in) throws IOException;
480
}
481
482
public abstract class Encoder {
483
// Base class for all encoders
484
}
485
486
public abstract class Decoder {
487
// Base class for all decoders
488
}
489
490
public abstract class BinaryEncoder extends Encoder {
491
// High-performance binary encoding
492
}
493
494
public class BinaryDecoder extends Decoder {
495
// Binary format decoding
496
}
497
498
public class JsonEncoder extends ParsingEncoder {
499
// JSON format encoding
500
}
501
502
public class JsonDecoder extends ParsingDecoder {
503
// JSON format decoding
504
}
505
506
public class ValidatingEncoder extends ParsingEncoder {
507
// Schema validation during encoding
508
}
509
510
public class ValidatingDecoder extends ParsingDecoder {
511
// Schema validation during decoding
512
}
513
514
public class ResolvingDecoder extends ValidatingDecoder {
515
// Schema evolution support
516
}
517
```