0
# Generic Data Operations
1
2
Generic data operations provide runtime data handling without code generation, offering maximum flexibility for dynamic data processing. Generic operations work directly with Avro's data model using runtime schema information, making them ideal for applications that process schemas dynamically.
3
4
## Capabilities
5
6
### Generic Data Utilities
7
8
Core utilities for working with generic data representations, validation, and comparison operations.
9
10
```java { .api }
11
public class GenericData {
12
public static GenericData get();
13
14
// Data creation
15
public Object createDatum(Schema schema);
16
public GenericRecord newRecord(GenericRecord original, Schema schema);
17
public Object newInstance(Class<?> c, Schema schema);
18
19
// Validation
20
public boolean validate(Schema schema, Object datum);
21
22
// Comparison and hashing
23
public int compare(Object o1, Object o2, Schema s);
24
public int compare(Object o1, Object o2, Schema s, boolean equals);
25
public int hashCode(Object o, Schema s);
26
27
// String type handling
28
public void setStringType(StringType stringType);
29
public StringType getStringType();
30
31
public enum StringType { CharSequence, String, Utf8 }
32
}
33
```
34
35
**Usage Examples:**
36
37
```java
38
// Create generic data instance
39
GenericData genericData = GenericData.get();
40
41
// Create datum for schema
42
Schema userSchema = new Schema.Parser().parse(userSchemaJson);
43
Object userDatum = genericData.createDatum(userSchema);
44
45
// Validate data against schema
46
GenericRecord user = new GenericData.Record(userSchema);
47
user.put("name", "John Doe");
48
user.put("age", 30);
49
50
boolean isValid = genericData.validate(userSchema, user);
51
System.out.println("User record is valid: " + isValid);
52
53
// Compare two records
54
GenericRecord user1 = createUser("Alice", 25);
55
GenericRecord user2 = createUser("Bob", 30);
56
int comparison = genericData.compare(user1, user2, userSchema);
57
58
// Calculate hash code
59
int hashCode = genericData.hashCode(user1, userSchema);
60
```
61
62
### Generic Record Interface
63
64
Interface for generic record implementations providing field access by name and index.
65
66
```java { .api }
67
public interface GenericRecord extends IndexedRecord {
68
void put(String key, Object v);
69
Object get(String key);
70
}
71
72
public interface IndexedRecord extends GenericContainer {
73
void put(int i, Object v);
74
Object get(int i);
75
}
76
77
public interface GenericContainer {
78
Schema getSchema();
79
}
80
```
81
82
**Usage Examples:**
83
84
```java
85
// Create and populate generic record
86
Schema schema = new Schema.Parser().parse(schemaJson);
87
GenericRecord record = new GenericData.Record(schema);
88
89
// Set values by field name
90
record.put("name", "John Doe");
91
record.put("age", 30);
92
record.put("email", "john@example.com");
93
94
// Get values by field name
95
String name = (String) record.get("name");
96
Integer age = (Integer) record.get("age");
97
98
// Access by index (IndexedRecord interface)
99
record.put(0, "Jane Doe"); // Set first field
100
Object firstField = record.get(0); // Get first field
101
102
// Get schema
103
Schema recordSchema = record.getSchema();
104
System.out.println("Record type: " + recordSchema.getName());
105
```
106
107
### Generic Record Builder
108
109
Builder pattern implementation for constructing generic records with validation and default value handling.
110
111
```java { .api }
112
public class GenericRecordBuilder implements RecordBuilder<GenericRecord> {
113
public GenericRecordBuilder(Schema schema);
114
public GenericRecordBuilder(GenericRecordBuilder other);
115
public GenericRecordBuilder(GenericRecord other);
116
117
// Field setting
118
public GenericRecordBuilder set(String fieldName, Object value);
119
public GenericRecordBuilder set(Schema.Field field, Object value);
120
public GenericRecordBuilder set(int pos, Object value);
121
122
// Field checking
123
public boolean has(String fieldName);
124
public boolean has(Schema.Field field);
125
public boolean has(int pos);
126
127
// Field getting
128
public Object get(String fieldName);
129
public Object get(Schema.Field field);
130
public Object get(int pos);
131
132
// Field clearing
133
public GenericRecordBuilder clear(String fieldName);
134
public GenericRecordBuilder clear(Schema.Field field);
135
public GenericRecordBuilder clear(int pos);
136
137
// Build record
138
public GenericRecord build();
139
}
140
```
141
142
**Usage Examples:**
143
144
```java
145
// Build record using builder pattern
146
Schema userSchema = new Schema.Parser().parse(userSchemaJson);
147
148
GenericRecord user = new GenericRecordBuilder(userSchema)
149
.set("id", 123L)
150
.set("name", "John Doe")
151
.set("email", "john@example.com")
152
.set("age", 30)
153
.build();
154
155
// Check if fields are set
156
GenericRecordBuilder builder = new GenericRecordBuilder(userSchema);
157
builder.set("name", "Alice");
158
159
if (builder.has("name")) {
160
System.out.println("Name is set to: " + builder.get("name"));
161
}
162
163
// Build with partial data (uses defaults where available)
164
GenericRecord partialUser = new GenericRecordBuilder(userSchema)
165
.set("name", "Bob")
166
.build(); // Other fields will use default values if defined
167
168
// Copy and modify existing record
169
GenericRecord modifiedUser = new GenericRecordBuilder(user)
170
.set("age", 31)
171
.set("email", "newemail@example.com")
172
.build();
173
```
174
175
### Generic Array Interface
176
177
Interface for generic array implementations supporting list operations and generic container functionality.
178
179
```java { .api }
180
public interface GenericArray<T> extends List<T>, GenericContainer {
181
void reverse();
182
T peek();
183
184
// Inherited from List<T>
185
boolean add(T element);
186
void add(int index, T element);
187
T get(int index);
188
T set(int index, T element);
189
T remove(int index);
190
int size();
191
boolean isEmpty();
192
void clear();
193
}
194
```
195
196
**Usage Examples:**
197
198
```java
199
// Create generic array
200
Schema arraySchema = Schema.createArray(Schema.create(Schema.Type.STRING));
201
GenericArray<String> genericArray = new GenericData.Array<>(arraySchema, new ArrayList<>());
202
203
// Add elements
204
genericArray.add("first");
205
genericArray.add("second");
206
genericArray.add("third");
207
208
// Access elements
209
String first = genericArray.get(0);
210
System.out.println("First element: " + first);
211
212
// Use list operations
213
genericArray.set(1, "modified second");
214
genericArray.remove(2);
215
216
// Reverse array
217
genericArray.reverse();
218
219
// Peek at last element
220
String last = genericArray.peek();
221
222
// Get schema
223
Schema schema = genericArray.getSchema();
224
Schema elementType = schema.getElementType();
225
```
226
227
### Generic Fixed Interface
228
229
Interface for generic fixed-length data implementations.
230
231
```java { .api }
232
public interface GenericFixed extends GenericContainer {
233
byte[] bytes();
234
}
235
```
236
237
**Usage Examples:**
238
239
```java
240
// Create fixed schema
241
Schema fixedSchema = Schema.createFixed("MD5Hash", "MD5 hash value", null, 16);
242
243
// Create generic fixed
244
GenericFixed hash = new GenericData.Fixed(fixedSchema);
245
byte[] hashBytes = "1234567890123456".getBytes();
246
System.arraycopy(hashBytes, 0, hash.bytes(), 0, 16);
247
248
// Access bytes
249
byte[] bytes = hash.bytes();
250
System.out.println("Hash length: " + bytes.length);
251
252
// Get schema
253
Schema schema = hash.getSchema();
254
int expectedSize = schema.getFixedSize();
255
```
256
257
### Generic Datum Reader
258
259
DatumReader implementation for deserializing data into generic representations.
260
261
```java { .api }
262
public class GenericDatumReader<D> implements DatumReader<D> {
263
public GenericDatumReader();
264
public GenericDatumReader(Schema schema);
265
public GenericDatumReader(Schema writer, Schema reader);
266
267
// Schema management
268
public void setSchema(Schema schema);
269
public void setExpected(Schema reader);
270
public Schema getSchema();
271
public Schema getExpected();
272
273
// Reading operations
274
public D read(D reuse, Decoder in) throws IOException;
275
276
// Data model access
277
public GenericData getData();
278
public void setData(GenericData data);
279
}
280
```
281
282
**Usage Examples:**
283
284
```java
285
// Create reader for schema
286
Schema schema = new Schema.Parser().parse(schemaJson);
287
GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(schema);
288
289
// Read from decoder
290
InputStream inputStream = new FileInputStream("data.avro");
291
BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(inputStream, null);
292
GenericRecord record = reader.read(null, decoder);
293
294
// Schema evolution - different writer and reader schemas
295
Schema writerSchema = new Schema.Parser().parse(writerSchemaJson);
296
Schema readerSchema = new Schema.Parser().parse(readerSchemaJson);
297
GenericDatumReader<GenericRecord> evolvingReader =
298
new GenericDatumReader<>(writerSchema, readerSchema);
299
300
// Reuse objects for performance
301
GenericRecord reusedRecord = null;
302
while (decoder.hasMore()) {
303
reusedRecord = reader.read(reusedRecord, decoder);
304
processRecord(reusedRecord);
305
}
306
```
307
308
### Generic Datum Writer
309
310
DatumWriter implementation for serializing generic data representations.
311
312
```java { .api }
313
public class GenericDatumWriter<D> implements DatumWriter<D> {
314
public GenericDatumWriter();
315
public GenericDatumWriter(Schema schema);
316
317
// Schema management
318
public void setSchema(Schema schema);
319
public Schema getSchema();
320
321
// Writing operations
322
public void write(D datum, Encoder out) throws IOException;
323
324
// Data model access
325
public GenericData getData();
326
public void setData(GenericData data);
327
}
328
```
329
330
**Usage Examples:**
331
332
```java
333
// Create writer for schema
334
Schema schema = new Schema.Parser().parse(schemaJson);
335
GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<>(schema);
336
337
// Write to encoder
338
OutputStream outputStream = new FileOutputStream("output.avro");
339
BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(outputStream, null);
340
341
GenericRecord record = new GenericData.Record(schema);
342
record.put("name", "John Doe");
343
record.put("age", 30);
344
345
writer.write(record, encoder);
346
encoder.flush();
347
348
// Write multiple records
349
List<GenericRecord> records = createRecords();
350
for (GenericRecord record : records) {
351
writer.write(record, encoder);
352
}
353
encoder.flush();
354
outputStream.close();
355
```
356
357
## Types
358
359
```java { .api }
360
public class GenericData {
361
public enum StringType { CharSequence, String, Utf8 }
362
363
public static class Record implements GenericRecord, Comparable<Record> {
364
public Record(Schema schema);
365
public Record(Record other, boolean deepCopy);
366
}
367
368
public static class Array<T> extends AbstractList<T> implements GenericArray<T> {
369
public Array(Schema schema, Collection<T> c);
370
public Array(int capacity, Schema schema);
371
}
372
373
public static class Fixed implements GenericFixed {
374
public Fixed(Schema schema);
375
public Fixed(Schema schema, byte[] bytes);
376
}
377
378
public static class EnumSymbol implements GenericEnumSymbol {
379
public EnumSymbol(Schema schema, String symbol);
380
}
381
}
382
383
public interface GenericContainer {
384
Schema getSchema();
385
}
386
387
public interface GenericRecord extends IndexedRecord {
388
void put(String key, Object v);
389
Object get(String key);
390
}
391
392
public interface IndexedRecord extends GenericContainer {
393
void put(int i, Object v);
394
Object get(int i);
395
}
396
397
public interface GenericArray<T> extends List<T>, GenericContainer {
398
void reverse();
399
T peek();
400
}
401
402
public interface GenericFixed extends GenericContainer {
403
byte[] bytes();
404
}
405
406
public interface GenericEnumSymbol extends GenericContainer {
407
String toString();
408
}
409
410
public class GenericRecordBuilder implements RecordBuilder<GenericRecord> {
411
// Builder implementation for GenericRecord
412
}
413
```