CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-org-apache-avro--avro

Apache Avro core components for data serialization with rich data structures, compact binary format, and schema evolution support

Pending
Overview
Eval results
Files

file-operations.mddocs/

File Operations

Avro file operations provide comprehensive file-based data storage and retrieval with embedded schemas, metadata, and compression support. Avro data files are self-describing, splittable, and compressible, making them ideal for big data processing and long-term storage.

Capabilities

File Reading

Read Avro data files with random access and streaming capabilities.

public class DataFileReader<D> extends DataFileStream<D> implements FileReader<D> {
    public DataFileReader(File file, DatumReader<D> reader) throws IOException;
    public DataFileReader(SeekableInput sin, DatumReader<D> reader) throws IOException;
    
    // Random access operations
    public void seek(long position) throws IOException;
    public void sync(long position) throws IOException;
    public long tell() throws IOException;
    public long previousSync() throws IOException;
    
    // Metadata access
    public String getMetaString(String key);
    public byte[] getMeta(String key);
    public long getBlockCount();
    public long getBlockSize();
}

public interface FileReader<D> extends Iterator<D>, Iterable<D>, Closeable {
    public Schema getSchema();
    public void sync(long position) throws IOException;
    public boolean pastSync(long position) throws IOException;
    public long tell() throws IOException;
}

Usage Examples:

// Read from file with generic records
File avroFile = new File("users.avro");
DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
DataFileReader<GenericRecord> fileReader = new DataFileReader<>(avroFile, datumReader);

// Read all records
while (fileReader.hasNext()) {
    GenericRecord user = fileReader.next();
    System.out.println("Name: " + user.get("name"));
    System.out.println("Age: " + user.get("age"));
}
fileReader.close();

// Random access reading
DataFileReader<GenericRecord> randomReader = new DataFileReader<>(avroFile, datumReader);
Schema schema = randomReader.getSchema();

// Seek to specific position
long position = 1024;
randomReader.seek(position);
if (randomReader.hasNext()) {
    GenericRecord record = randomReader.next();
    processRecord(record);
}

// Access file metadata
String codec = randomReader.getMetaString("avro.codec");
byte[] schemaBytes = randomReader.getMeta("avro.schema");
System.out.println("Compression codec: " + codec);
randomReader.close();

File Writing

Write Avro data files with compression and metadata support.

public class DataFileWriter<D> implements Closeable, Flushable {
    public DataFileWriter(DatumWriter<D> dout);
    
    // File creation
    public DataFileWriter<D> create(Schema schema, OutputStream outs) throws IOException;
    public DataFileWriter<D> create(Schema schema, File file) throws IOException;
    public DataFileWriter<D> create(Schema schema, OutputStream outs, Codec codec) throws IOException;
    
    // Metadata operations
    public DataFileWriter<D> setMeta(String key, byte[] value);
    public DataFileWriter<D> setMeta(String key, String value);
    public DataFileWriter<D> setSyncInterval(int syncInterval);
    
    // Writing operations
    public void append(D datum) throws IOException;
    public void appendTo(DataFileWriter<D> writer) throws IOException;
    
    // Synchronization and flushing
    public long sync() throws IOException;
    public void flush() throws IOException;
    public void flusher() throws IOException;
    
    // Closing
    public void close() throws IOException;
}

Usage Examples:

// Write generic records to file
Schema schema = new Schema.Parser().parse(schemaJson);
File outputFile = new File("output.avro");

DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
DataFileWriter<GenericRecord> fileWriter = new DataFileWriter<>(datumWriter);

// Create file with compression
fileWriter.setCodec(CodecFactory.deflateCodec(6));
fileWriter.setMeta("created.by", "MyApplication");
fileWriter.create(schema, outputFile);

// Write multiple records
List<GenericRecord> users = createUsers();
for (GenericRecord user : users) {
    fileWriter.append(user);
}

// Force sync periodically for large files
fileWriter.sync();

fileWriter.close();

// Write to output stream
OutputStream outputStream = new BufferedOutputStream(new FileOutputStream("stream.avro"));
DataFileWriter<GenericRecord> streamWriter = new DataFileWriter<>(datumWriter);
streamWriter.create(schema, outputStream);

for (GenericRecord user : users) {
    streamWriter.append(user);
}
streamWriter.close();

Stream Reading

Read Avro data from input streams without random access.

public class DataFileStream<D> implements Iterator<D>, Iterable<D>, Closeable {
    public DataFileStream(InputStream in, DatumReader<D> reader) throws IOException;
    
    // Schema and metadata access
    public Schema getSchema();
    public String getMetaString(String key);
    public byte[] getMeta(String key);
    public List<String> getMetaKeys();
    
    // Iterator operations
    public boolean hasNext();
    public D next();
    public D next(D reuse);
    
    // Stream operations
    public void close() throws IOException;
}

Usage Examples:

// Read from input stream
InputStream inputStream = new FileInputStream("data.avro");
DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
DataFileStream<GenericRecord> stream = new DataFileStream<>(inputStream, datumReader);

// Get schema from stream
Schema schema = stream.getSchema();
System.out.println("Schema: " + schema.toString(true));

// Read all records
GenericRecord reusedRecord = null;
while (stream.hasNext()) {
    reusedRecord = stream.next(reusedRecord); // Reuse object for performance
    processRecord(reusedRecord);
}
stream.close();

// Read from compressed stream
InputStream gzipStream = new GZIPInputStream(new FileInputStream("compressed.avro.gz"));
DataFileStream<GenericRecord> compressedStream = new DataFileStream<>(gzipStream, datumReader);

// Process compressed data
for (GenericRecord record : compressedStream) {
    System.out.println("Record: " + record);
}
compressedStream.close();

Seekable Input Interface

Interface for random access input operations.

public interface SeekableInput extends Closeable {
    void seek(long p) throws IOException;
    long tell() throws IOException;
    long length() throws IOException;
    int read(byte[] b, int off, int len) throws IOException;
}

Usage Examples:

// Implement custom seekable input
public class CustomSeekableInput implements SeekableInput {
    private final RandomAccessFile file;
    
    public CustomSeekableInput(File file) throws IOException {
        this.file = new RandomAccessFile(file, "r");
    }
    
    @Override
    public void seek(long p) throws IOException {
        file.seek(p);
    }
    
    @Override
    public long tell() throws IOException {
        return file.getFilePointer();
    }
    
    @Override
    public long length() throws IOException {
        return file.length();
    }
    
    @Override
    public int read(byte[] b, int off, int len) throws IOException {
        return file.read(b, off, len);
    }
    
    @Override
    public void close() throws IOException {
        file.close();
    }
}

// Use custom seekable input
SeekableInput seekableInput = new CustomSeekableInput(new File("data.avro"));
DatumReader<GenericRecord> reader = new GenericDatumReader<>();
DataFileReader<GenericRecord> fileReader = new DataFileReader<>(seekableInput, reader);

// Use random access capabilities
fileReader.seek(1000);
GenericRecord record = fileReader.next();
fileReader.close();

Compression Codecs

Support for various compression algorithms to reduce file size.

public abstract class Codec {
    public abstract String getName();
    public abstract ByteBuffer compress(ByteBuffer uncompressedData) throws IOException;
    public abstract ByteBuffer decompress(ByteBuffer compressedData) throws IOException;
    public abstract int hashCode();
    public abstract boolean equals(Object obj);
}

public class CodecFactory {
    public static Codec nullCodec();
    public static Codec deflateCodec(int level);
    public static Codec snappyCodec();
    public static Codec bzip2Codec();
    public static Codec xzCodec(int level);
    public static Codec zstandardCodec(int level);
    public static Codec zstandardCodec(int level, boolean includeChecksum);
    
    public static Codec fromString(String codecName);
}

Usage Examples:

// Use different compression codecs
DataFileWriter<GenericRecord> writer = new DataFileWriter<>(datumWriter);

// Deflate compression (good balance of speed and compression)
writer.setCodec(CodecFactory.deflateCodec(6));

// Snappy compression (very fast, moderate compression)
writer.setCodec(CodecFactory.snappyCodec());

// BZip2 compression (slower, better compression)
writer.setCodec(CodecFactory.bzip2Codec());

// XZ compression (slowest, best compression)
writer.setCodec(CodecFactory.xzCodec(6));

// Zstandard compression (good speed and compression)
writer.setCodec(CodecFactory.zstandardCodec(3, true));

// No compression
writer.setCodec(CodecFactory.nullCodec());

writer.create(schema, outputFile);

// Check codec used in file
DataFileReader<GenericRecord> reader = new DataFileReader<>(file, datumReader);
String codecName = reader.getMetaString("avro.codec");
System.out.println("File uses codec: " + codecName);
reader.close();

File Metadata

Access and manipulate file-level metadata stored in Avro data files.

// Writer metadata operations
public DataFileWriter<D> setMeta(String key, byte[] value);
public DataFileWriter<D> setMeta(String key, String value);
public DataFileWriter<D> setMeta(String key, long value);

// Reader metadata operations  
public String getMetaString(String key);
public byte[] getMeta(String key);
public long getMetaLong(String key);
public List<String> getMetaKeys();

Usage Examples:

// Set metadata when writing
DataFileWriter<GenericRecord> writer = new DataFileWriter<>(datumWriter);
writer.setMeta("created.by", "MyApplication v1.2.3");
writer.setMeta("created.time", System.currentTimeMillis());
writer.setMeta("source.system", "production");
writer.setMeta("record.count.estimate", 1000000L);

byte[] customData = "custom metadata".getBytes();
writer.setMeta("custom.data", customData);

writer.create(schema, outputFile);

// Read metadata from file
DataFileReader<GenericRecord> reader = new DataFileReader<>(file, datumReader);

// Get all metadata keys
List<String> metaKeys = reader.getMetaKeys();
System.out.println("Metadata keys: " + metaKeys);

// Read specific metadata
String createdBy = reader.getMetaString("created.by");
long createdTime = reader.getMetaLong("created.time");
byte[] customData = reader.getMeta("custom.data");

System.out.println("Created by: " + createdBy);
System.out.println("Created time: " + new Date(createdTime));
System.out.println("Custom data: " + new String(customData));

// Standard Avro metadata
String codecName = reader.getMetaString("avro.codec");
String schemaJson = reader.getMetaString("avro.schema");
System.out.println("Codec: " + codecName);

reader.close();

Types

public class DataFileReader<D> extends DataFileStream<D> implements FileReader<D> {
    // File-based reader with random access
}

public class DataFileWriter<D> implements Closeable, Flushable, Syncable {
    // File writer with compression and metadata support
}

public class DataFileStream<D> implements Iterator<D>, Iterable<D>, Closeable {
    // Stream-based reader for sequential access
}

public interface FileReader<D> extends Iterator<D>, Iterable<D>, Closeable {
    Schema getSchema();
    void sync(long position) throws IOException;
    boolean pastSync(long position) throws IOException;
    long tell() throws IOException;
}

public interface SeekableInput extends Closeable {
    void seek(long p) throws IOException;
    long tell() throws IOException;
    long length() throws IOException;
    int read(byte[] b, int off, int len) throws IOException;
}

public interface Syncable {
    void sync() throws IOException;
}

public abstract class Codec {
    public abstract String getName();
    public abstract ByteBuffer compress(ByteBuffer data) throws IOException;
    public abstract ByteBuffer decompress(ByteBuffer data) throws IOException;
}

public class CodecFactory {
    // Factory for creating compression codecs
}

// Specific codec implementations
public class DeflateCodec extends Codec;
public class SnappyCodec extends Codec;
public class BZip2Codec extends Codec;
public class XZCodec extends Codec;
public class ZstandardCodec extends Codec;
public class NullCodec extends Codec;

Install with Tessl CLI

npx tessl i tessl/maven-org-apache-avro--avro

docs

code-generation.md

encoding-decoding.md

file-operations.md

generic-data.md

index.md

message-operations.md

reflection-operations.md

schema-evolution.md

schema-system.md

tile.json