CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-org-apache-avro--avro

Apache Avro core components for data serialization with rich data structures, compact binary format, and schema evolution support

Pending
Overview
Eval results
Files

encoding-decoding.mddocs/

Encoding and Decoding

Avro's encoding and decoding framework provides low-level serialization support for multiple formats including binary, JSON, with validation and schema evolution capabilities. Encoders and decoders form the foundation for all Avro serialization operations.

Capabilities

Encoder Factory

Factory for creating various types of encoders for serializing data.

public class EncoderFactory {
    public static EncoderFactory get();
    
    // Binary encoders
    public BinaryEncoder binaryEncoder(OutputStream out, BinaryEncoder reuse);
    public BinaryEncoder directBinaryEncoder(OutputStream out, BinaryEncoder reuse);
    public BinaryEncoder blockingBinaryEncoder(OutputStream out, BinaryEncoder reuse);
    
    // JSON encoders
    public JsonEncoder jsonEncoder(Schema schema, OutputStream out) throws IOException;
    public JsonEncoder jsonEncoder(Schema schema, OutputStream out, boolean pretty) throws IOException;
    public JsonEncoder jsonEncoder(Schema schema, JsonGenerator generator) throws IOException;
    
    // Validating encoder
    public ValidatingEncoder validatingEncoder(Schema schema, Encoder encoder) throws IOException;
    
    // Configuration
    public EncoderFactory configureBlockSize(int size);
    public EncoderFactory configureBufferSize(int size);
}

Usage Examples:

// Create binary encoder
OutputStream outputStream = new ByteArrayOutputStream();
BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(outputStream, null);

// Reuse encoder for performance
BinaryEncoder reusedEncoder = null;
for (GenericRecord record : records) {
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    reusedEncoder = EncoderFactory.get().binaryEncoder(out, reusedEncoder);
    datumWriter.write(record, reusedEncoder);
    reusedEncoder.flush();
}

// Create JSON encoder with pretty printing
OutputStream jsonOutput = new FileOutputStream("output.json");
JsonEncoder jsonEncoder = EncoderFactory.get().jsonEncoder(schema, jsonOutput, true);

// Create validating encoder
BinaryEncoder baseEncoder = EncoderFactory.get().binaryEncoder(outputStream, null);
ValidatingEncoder validatingEncoder = EncoderFactory.get().validatingEncoder(schema, baseEncoder);

// Configure factory settings
EncoderFactory factory = EncoderFactory.get()
    .configureBlockSize(8192)
    .configureBufferSize(4096);
BinaryEncoder configuredEncoder = factory.binaryEncoder(outputStream, null);

Decoder Factory

Factory for creating various types of decoders for deserializing data.

public class DecoderFactory {
    public static DecoderFactory get();
    
    // Binary decoders
    public BinaryDecoder binaryDecoder(InputStream in, BinaryDecoder reuse);
    public BinaryDecoder binaryDecoder(byte[] bytes, int offset, int length, BinaryDecoder reuse);
    public BinaryDecoder binaryDecoder(byte[] bytes, BinaryDecoder reuse);
    public BinaryDecoder directBinaryDecoder(InputStream in, BinaryDecoder reuse);
    
    // JSON decoders
    public JsonDecoder jsonDecoder(Schema schema, InputStream input) throws IOException;
    public JsonDecoder jsonDecoder(Schema schema, String input) throws IOException;
    
    // Validating decoder
    public ValidatingDecoder validatingDecoder(Schema schema, Decoder decoder) throws IOException;
    
    // Resolving decoder for schema evolution
    public ResolvingDecoder resolvingDecoder(Schema writer, Schema reader, Decoder decoder) throws IOException;
    
    // Configuration
    public DecoderFactory configureDecoderBufferSize(int size);
}

Usage Examples:

// Create binary decoder from input stream
InputStream inputStream = new FileInputStream("data.avro");
BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(inputStream, null);

// Reuse decoder for performance
BinaryDecoder reusedDecoder = null;
for (byte[] data : dataBlocks) {
    reusedDecoder = DecoderFactory.get().binaryDecoder(data, reusedDecoder);
    GenericRecord record = datumReader.read(null, reusedDecoder);
    processRecord(record);
}

// Create JSON decoder
String jsonData = """{"name": "John", "age": 30}""";
JsonDecoder jsonDecoder = DecoderFactory.get().jsonDecoder(schema, jsonData);

// Create validating decoder
BinaryDecoder baseDecoder = DecoderFactory.get().binaryDecoder(inputStream, null);
ValidatingDecoder validatingDecoder = DecoderFactory.get().validatingDecoder(schema, baseDecoder);

// Create resolving decoder for schema evolution
Schema writerSchema = parseSchema(writerSchemaJson);
Schema readerSchema = parseSchema(readerSchemaJson);
ResolvingDecoder resolvingDecoder = DecoderFactory.get()
    .resolvingDecoder(writerSchema, readerSchema, baseDecoder);

Datum Writer Interface

Core interface for writing datum objects to encoders.

public interface DatumWriter<D> {
    void setSchema(Schema schema);
    void write(D datum, Encoder out) throws IOException;
}

Usage Examples:

// Implement custom datum writer
public class CustomUserWriter implements DatumWriter<User> {
    private Schema schema;
    
    @Override
    public void setSchema(Schema schema) {
        this.schema = schema;
    }
    
    @Override
    public void write(User user, Encoder out) throws IOException {
        out.writeString(user.getName());
        out.writeInt(user.getAge());
        if (user.getEmail() != null) {
            out.writeIndex(1); // Union index for non-null
            out.writeString(user.getEmail());
        } else {
            out.writeIndex(0); // Union index for null
            out.writeNull();
        }
    }
}

// Use custom writer
CustomUserWriter writer = new CustomUserWriter();
writer.setSchema(userSchema);

BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(outputStream, null);
User user = new User("Alice", 25, "alice@example.com");
writer.write(user, encoder);
encoder.flush();

Datum Reader Interface

Core interface for reading datum objects from decoders.

public interface DatumReader<D> {
    void setSchema(Schema schema);
    void setExpected(Schema reader);
    D read(D reuse, Decoder in) throws IOException;
}

Usage Examples:

// Implement custom datum reader
public class CustomUserReader implements DatumReader<User> {
    private Schema writerSchema;
    private Schema readerSchema;
    
    @Override
    public void setSchema(Schema schema) {
        this.writerSchema = schema;
    }
    
    @Override
    public void setExpected(Schema reader) {
        this.readerSchema = reader;
    }
    
    @Override
    public User read(User reuse, Decoder in) throws IOException {
        String name = in.readString();
        int age = in.readInt();
        
        // Handle union for email field
        int unionIndex = in.readIndex();
        String email = null;
        if (unionIndex == 1) {
            email = in.readString();
        } else {
            in.readNull();
        }
        
        return new User(name, age, email);
    }
}

// Use custom reader
CustomUserReader reader = new CustomUserReader();
reader.setSchema(userSchema);

BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(inputStream, null);
User user = reader.read(null, decoder);

Binary Encoding

High-performance binary format encoding and decoding.

public abstract class BinaryEncoder extends Encoder {
    // Primitive type writing
    public abstract void writeNull() throws IOException;
    public abstract void writeBoolean(boolean b) throws IOException;
    public abstract void writeInt(int n) throws IOException;
    public abstract void writeLong(long n) throws IOException;
    public abstract void writeFloat(float f) throws IOException;
    public abstract void writeDouble(double d) throws IOException;
    public abstract void writeString(String str) throws IOException;
    public abstract void writeBytes(ByteBuffer bytes) throws IOException;
    public abstract void writeBytes(byte[] bytes, int start, int len) throws IOException;
    
    // Complex type writing
    public abstract void writeFixed(byte[] bytes, int start, int len) throws IOException;
    public abstract void writeEnum(int e) throws IOException;
    public abstract void writeArrayStart() throws IOException;
    public abstract void writeArrayEnd() throws IOException;
    public abstract void writeMapStart() throws IOException;
    public abstract void writeMapEnd() throws IOException;
    public abstract void startItem() throws IOException;
    public abstract void writeIndex(int unionIndex) throws IOException;
}

public class BinaryDecoder extends Decoder {
    // Primitive type reading
    public boolean readBoolean() throws IOException;
    public int readInt() throws IOException;
    public long readLong() throws IOException;
    public float readFloat() throws IOException;
    public double readDouble() throws IOException;
    public String readString() throws IOException;
    public ByteBuffer readBytes(ByteBuffer old) throws IOException;
    
    // Complex type reading
    public void readFixed(byte[] bytes, int start, int len) throws IOException;
    public int readEnum() throws IOException;
    public long readArrayStart() throws IOException;
    public long readMapStart() throws IOException;
    public long arrayNext() throws IOException;
    public long mapNext() throws IOException;
    public int readIndex() throws IOException;
}

Usage Examples:

// Manual binary encoding
BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(outputStream, null);

// Write record fields manually
encoder.writeString("John Doe");     // name field
encoder.writeInt(30);                // age field
encoder.writeIndex(1);               // union index for non-null email
encoder.writeString("john@example.com"); // email value

encoder.flush();

// Manual binary decoding
BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(inputStream, null);

// Read record fields manually
String name = decoder.readString();
int age = decoder.readInt();
int emailUnionIndex = decoder.readIndex();
String email = null;
if (emailUnionIndex == 1) {
    email = decoder.readString();
}

System.out.println("Name: " + name + ", Age: " + age + ", Email: " + email);

JSON Encoding

Human-readable JSON format encoding and decoding.

public class JsonEncoder extends ParsingEncoder {
    // Configured through EncoderFactory
    // Supports pretty printing and custom JsonGenerator
}

public class JsonDecoder extends ParsingDecoder {
    // Configured through DecoderFactory
    // Parses JSON according to schema
}

Usage Examples:

// JSON encoding with pretty printing
OutputStream jsonOutput = new ByteArrayOutputStream();
JsonEncoder jsonEncoder = EncoderFactory.get().jsonEncoder(schema, jsonOutput, true);

GenericRecord record = new GenericData.Record(schema);
record.put("name", "Alice");
record.put("age", 25);
record.put("email", "alice@example.com");

GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<>(schema);
writer.write(record, jsonEncoder);
jsonEncoder.flush();

String jsonString = jsonOutput.toString();
System.out.println("JSON output:\n" + jsonString);

// JSON decoding
String jsonInput = """
{
  "name": "Bob",
  "age": 35,
  "email": "bob@example.com"
}
""";

JsonDecoder jsonDecoder = DecoderFactory.get().jsonDecoder(schema, jsonInput);
GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(schema);
GenericRecord decodedRecord = reader.read(null, jsonDecoder);

System.out.println("Decoded name: " + decodedRecord.get("name"));

Validating Encoder/Decoder

Validation layer that ensures data conforms to schema during encoding/decoding.

public class ValidatingEncoder extends ParsingEncoder {
    // Validates data against schema during encoding
    // Throws AvroTypeException for schema violations
}

public class ValidatingDecoder extends ParsingDecoder {
    // Validates data against schema during decoding
    // Throws AvroTypeException for schema violations
}

Usage Examples:

// Validating encoder
BinaryEncoder baseEncoder = EncoderFactory.get().binaryEncoder(outputStream, null);
ValidatingEncoder validatingEncoder = EncoderFactory.get().validatingEncoder(schema, baseEncoder);

GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<>(schema);

try {
    // This will validate the record against the schema
    writer.write(record, validatingEncoder);
    validatingEncoder.flush();
} catch (AvroTypeException e) {
    System.err.println("Schema validation failed: " + e.getMessage());
}

// Validating decoder
BinaryDecoder baseDecoder = DecoderFactory.get().binaryDecoder(inputStream, null);
ValidatingDecoder validatingDecoder = DecoderFactory.get().validatingDecoder(schema, baseDecoder);

GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(schema);

try {
    GenericRecord validatedRecord = reader.read(null, validatingDecoder);
    System.out.println("Record passed validation");
} catch (AvroTypeException e) {
    System.err.println("Validation failed during decoding: " + e.getMessage());
}

Schema Evolution Support

ResolvingDecoder enables reading data written with different but compatible schemas.

public class ResolvingDecoder extends ValidatingDecoder {
    // Handles schema evolution during decoding
    // Automatically resolves differences between writer and reader schemas
    // Applies default values for missing fields
    // Skips fields not present in reader schema
}

Usage Examples:

// Schema evolution example
String writerSchemaJson = """
{
  "type": "record",
  "name": "User",
  "fields": [
    {"name": "name", "type": "string"},
    {"name": "age", "type": "int"}
  ]
}
""";

String readerSchemaJson = """
{
  "type": "record", 
  "name": "User",
  "fields": [
    {"name": "name", "type": "string"},
    {"name": "age", "type": "int"},
    {"name": "email", "type": ["null", "string"], "default": null},
    {"name": "active", "type": "boolean", "default": true}
  ]
}
""";

Schema writerSchema = new Schema.Parser().parse(writerSchemaJson);
Schema readerSchema = new Schema.Parser().parse(readerSchemaJson);

// Create resolving decoder
BinaryDecoder baseDecoder = DecoderFactory.get().binaryDecoder(inputStream, null);
ResolvingDecoder resolvingDecoder = DecoderFactory.get()
    .resolvingDecoder(writerSchema, readerSchema, baseDecoder);

// Read with schema evolution - missing fields get default values
GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(readerSchema);
GenericRecord evolvedRecord = reader.read(null, resolvingDecoder);

// New fields will have default values
System.out.println("Email: " + evolvedRecord.get("email"));    // null
System.out.println("Active: " + evolvedRecord.get("active"));  // true

Types

public class EncoderFactory {
    // Factory for creating encoders
}

public class DecoderFactory {
    // Factory for creating decoders
}

public interface DatumWriter<D> {
    void setSchema(Schema schema);
    void write(D datum, Encoder out) throws IOException;
}

public interface DatumReader<D> {
    void setSchema(Schema schema);
    void setExpected(Schema reader);
    D read(D reuse, Decoder in) throws IOException;
}

public abstract class Encoder {
    // Base class for all encoders
}

public abstract class Decoder {
    // Base class for all decoders
}

public abstract class BinaryEncoder extends Encoder {
    // High-performance binary encoding
}

public class BinaryDecoder extends Decoder {
    // Binary format decoding
}

public class JsonEncoder extends ParsingEncoder {
    // JSON format encoding
}

public class JsonDecoder extends ParsingDecoder {
    // JSON format decoding
}

public class ValidatingEncoder extends ParsingEncoder {
    // Schema validation during encoding
}

public class ValidatingDecoder extends ParsingDecoder {
    // Schema validation during decoding
}

public class ResolvingDecoder extends ValidatingDecoder {
    // Schema evolution support
}

Install with Tessl CLI

npx tessl i tessl/maven-org-apache-avro--avro

docs

code-generation.md

encoding-decoding.md

file-operations.md

generic-data.md

index.md

message-operations.md

reflection-operations.md

schema-evolution.md

schema-system.md

tile.json