Apache Avro core components for data serialization with rich data structures, compact binary format, and schema evolution support
npx @tessl/cli install tessl/maven-org-apache-avro--avro@1.12.0Apache Avro is a comprehensive data serialization system that provides rich data structures, a compact and fast binary data format, and a container file to store persistent data. It enables developers to define schemas using JSON, serialize data to a compact binary format, and deserialize it back to objects, supporting schema evolution and cross-language interoperability.
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
<version>1.12.0</version>
</dependency>import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;Common imports for specific use cases:
// Generic data operations
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecordBuilder;
// Generated class operations
import org.apache.avro.specific.SpecificDatumReader;
import org.apache.avro.specific.SpecificDatumWriter;
import org.apache.avro.specific.SpecificData;
// Reflection-based operations
import org.apache.avro.reflect.ReflectData;
import org.apache.avro.reflect.ReflectDatumReader;
import org.apache.avro.reflect.ReflectDatumWriter;
// Encoding/Decoding
import org.apache.avro.io.EncoderFactory;
import org.apache.avro.io.DecoderFactory;// Define a schema
String schemaJson = """
{
"type": "record",
"name": "User",
"fields": [
{"name": "name", "type": "string"},
{"name": "age", "type": "int"},
{"name": "email", "type": ["null", "string"], "default": null}
]
}
""";
Schema schema = new Schema.Parser().parse(schemaJson);
// Create a generic record
GenericRecord user = new GenericRecordBuilder(schema)
.set("name", "John Doe")
.set("age", 30)
.set("email", "john@example.com")
.build();
// Write to file
File file = new File("users.avro");
DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter);
dataFileWriter.create(schema, file);
dataFileWriter.append(user);
dataFileWriter.close();
// Read from file
DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(schema);
DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(file, datumReader);
while (dataFileReader.hasNext()) {
GenericRecord readUser = dataFileReader.next();
System.out.println("Name: " + readUser.get("name"));
System.out.println("Age: " + readUser.get("age"));
}
dataFileReader.close();Apache Avro's architecture centers around several key components:
This design enables Avro to serve as the foundation for data pipelines, messaging systems, storage systems, and any application requiring efficient data serialization with strong schema management.
Core functionality for defining, parsing, and working with Avro schemas. Schemas serve as the contract for data structure, evolution, and validation across the entire Avro ecosystem.
// Schema creation and parsing
public static Schema create(Schema.Type type);
public static Schema createRecord(String name, String doc, String namespace, boolean isError, List<Schema.Field> fields);
public static Schema createEnum(String name, String doc, String namespace, List<String> values);
public static Schema createArray(Schema elementType);
public static Schema createMap(Schema valueType);
public static Schema createUnion(List<Schema> types);
// Schema.Parser for JSON parsing
public class Parser {
public Schema parse(String jsonSchema);
public Schema parse(File file) throws IOException;
public Schema parse(InputStream in) throws IOException;
}
// SchemaBuilder for fluent construction
public static SchemaBuilder builder();
public static SchemaBuilder builder(String namespace);Runtime data handling without code generation, providing maximum flexibility for dynamic data processing. Generic operations work directly with Avro's data model using runtime schema information.
// Core generic data utilities
public static GenericData get();
public Object createDatum(Schema schema);
public boolean validate(Schema schema, Object datum);
// Generic record interface
public interface GenericRecord extends IndexedRecord {
void put(String key, Object v);
Object get(String key);
}
// Generic record builder
public class GenericRecordBuilder implements RecordBuilder<GenericRecord> {
public GenericRecordBuilder(Schema schema);
public GenericRecordBuilder set(String fieldName, Object value);
public GenericRecord build();
}Comprehensive file-based data storage and retrieval with embedded schemas, metadata, and compression support. Avro data files provide self-describing, splittable, and compressible data storage.
// File reading
public class DataFileReader<D> extends DataFileStream<D> implements FileReader<D> {
public DataFileReader(File file, DatumReader<D> reader) throws IOException;
public DataFileReader(SeekableInput sin, DatumReader<D> reader) throws IOException;
public void seek(long position) throws IOException;
}
// File writing
public class DataFileWriter<D> implements Closeable, Flushable {
public DataFileWriter<D> create(Schema schema, OutputStream outs) throws IOException;
public DataFileWriter<D> create(Schema schema, File file) throws IOException;
public void append(D datum) throws IOException;
public long sync() throws IOException;
}
// Stream reading
public class DataFileStream<D> implements Iterator<D>, Iterable<D>, Closeable {
public DataFileStream(InputStream in, DatumReader<D> reader) throws IOException;
public Schema getSchema();
}Low-level serialization framework supporting multiple formats (binary, JSON) with validation and schema evolution capabilities. Encoders and decoders provide the foundation for all Avro serialization operations.
// Encoder factory
public static EncoderFactory get();
public BinaryEncoder binaryEncoder(OutputStream out, BinaryEncoder reuse);
public JsonEncoder jsonEncoder(Schema schema, OutputStream out) throws IOException;
public ValidatingEncoder validatingEncoder(Schema schema, Encoder encoder) throws IOException;
// Decoder factory
public static DecoderFactory get();
public BinaryDecoder binaryDecoder(InputStream in, BinaryDecoder reuse);
public JsonDecoder jsonDecoder(Schema schema, InputStream input) throws IOException;
public ResolvingDecoder resolvingDecoder(Schema writer, Schema reader, Decoder decoder) throws IOException;
// Core interfaces
public interface DatumWriter<D> {
void setSchema(Schema schema);
void write(D datum, Encoder out) throws IOException;
}
public interface DatumReader<D> {
void setSchema(Schema schema);
D read(D reuse, Decoder in) throws IOException;
}Integration with generated Avro classes for type-safe, high-performance data operations. Specific operations provide compile-time type checking and optimized serialization for generated POJOs.
// Specific data utilities
public static SpecificData get();
public static Class<?> getClassName(Schema schema);
public Schema getSchema(Class<?> c);
// Generated class interface
public interface SpecificRecord extends IndexedRecord {
Schema getSchema();
}
// Specific I/O operations
public class SpecificDatumReader<T> extends GenericDatumReader<T>;
public class SpecificDatumWriter<T> extends GenericDatumWriter<T>;
// Annotations for generated classes
@Target(ElementType.TYPE)
@Retention(RetentionPolicy.RUNTIME)
public @interface AvroGenerated;Automatic schema generation and data handling for existing Java classes using reflection and annotations. Reflection support enables seamless integration with existing POJOs and domain objects.
// Reflection data utilities
public static ReflectData get();
public Schema getSchema(Type type);
public Schema getSchema(Class<?> type);
// Reflection I/O operations
public class ReflectDatumReader<T> extends SpecificDatumReader<T>;
public class ReflectDatumWriter<T> extends SpecificDatumWriter<T>;
// Reflection annotations
@Target({ElementType.FIELD, ElementType.TYPE})
@Retention(RetentionPolicy.RUNTIME)
public @interface AvroName {
String value();
}
@Target({ElementType.FIELD, ElementType.TYPE})
@Retention(RetentionPolicy.RUNTIME)
public @interface Nullable;Efficient encoding and decoding of individual objects for messaging systems and data exchange. Message operations provide header-based schema identification and raw data support.
// Message encoder interface
public interface MessageEncoder<D> {
byte[] encode(D datum) throws IOException;
void encode(D datum, OutputStream stream) throws IOException;
}
// Message decoder interface
public interface MessageDecoder<D> {
D decode(InputStream stream) throws IOException;
D decode(byte[] encoded) throws IOException;
}
// Binary message implementations
public class BinaryMessageEncoder<D> implements MessageEncoder<D>;
public class BinaryMessageDecoder<D> extends MessageDecoder.BaseDecoder<D>;
// Schema store for message decoding
public interface SchemaStore {
Schema findByFingerprint(long fingerprint);
}Tools and utilities for managing schema changes over time while maintaining backward and forward compatibility. Evolution support enables long-term data management and system integration.
// Compatibility checking
public class SchemaCompatibility {
public static SchemaPairCompatibility checkReaderWriterCompatibility(Schema reader, Schema writer);
public static SchemaCompatibilityResult checkReaderWriterCompatibility(Schema reader, List<Schema> writers);
}
// Logical types for schema evolution
public class LogicalTypes {
public static LogicalType decimal(int precision);
public static LogicalType decimal(int precision, int scale);
public static LogicalType uuid();
public static LogicalType date();
public static LogicalType timestampMillis();
}
// Schema resolution
public class ResolvingDecoder extends ValidatingDecoder {
// Automatically handles schema evolution during decoding
}// Core schema type
public abstract class Schema extends JsonProperties implements Serializable {
public enum Type {
RECORD, ENUM, ARRAY, MAP, UNION, FIXED, STRING, BYTES, INT, LONG, FLOAT, DOUBLE, BOOLEAN, NULL
}
public static class Field extends JsonProperties {
public Field(String name, Schema schema, String doc, Object defaultVal);
public String name();
public Schema schema();
public Object defaultVal();
}
}
// Exception hierarchy
public class AvroRuntimeException extends RuntimeException;
public class AvroTypeException extends AvroRuntimeException;
public class SchemaParseException extends AvroRuntimeException;
public class AvroRemoteException extends Exception;
// Utility types
public class Utf8 implements Comparable<Utf8>, CharSequence, Externalizable {
public Utf8(String string);
public Utf8(byte[] bytes);
}