or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

code-generation.mdencoding-decoding.mdfile-operations.mdgeneric-data.mdindex.mdmessage-operations.mdreflection-operations.mdschema-evolution.mdschema-system.md

index.mddocs/

0

# Apache Avro

1

2

Apache Avro is a comprehensive data serialization system that provides rich data structures, a compact and fast binary data format, and a container file to store persistent data. It enables developers to define schemas using JSON, serialize data to a compact binary format, and deserialize it back to objects, supporting schema evolution and cross-language interoperability.

3

4

## Package Information

5

6

- **Package Name**: org.apache.avro:avro

7

- **Package Type**: maven

8

- **Language**: Java

9

- **Installation**: Add to your Maven pom.xml:

10

```xml

11

<dependency>

12

<groupId>org.apache.avro</groupId>

13

<artifactId>avro</artifactId>

14

<version>1.12.0</version>

15

</dependency>

16

```

17

18

## Core Imports

19

20

```java

21

import org.apache.avro.Schema;

22

import org.apache.avro.generic.GenericData;

23

import org.apache.avro.generic.GenericRecord;

24

import org.apache.avro.file.DataFileReader;

25

import org.apache.avro.file.DataFileWriter;

26

import org.apache.avro.io.DatumReader;

27

import org.apache.avro.io.DatumWriter;

28

```

29

30

Common imports for specific use cases:

31

```java

32

// Generic data operations

33

import org.apache.avro.generic.GenericDatumReader;

34

import org.apache.avro.generic.GenericDatumWriter;

35

import org.apache.avro.generic.GenericRecordBuilder;

36

37

// Generated class operations

38

import org.apache.avro.specific.SpecificDatumReader;

39

import org.apache.avro.specific.SpecificDatumWriter;

40

import org.apache.avro.specific.SpecificData;

41

42

// Reflection-based operations

43

import org.apache.avro.reflect.ReflectData;

44

import org.apache.avro.reflect.ReflectDatumReader;

45

import org.apache.avro.reflect.ReflectDatumWriter;

46

47

// Encoding/Decoding

48

import org.apache.avro.io.EncoderFactory;

49

import org.apache.avro.io.DecoderFactory;

50

```

51

52

## Basic Usage

53

54

```java

55

// Define a schema

56

String schemaJson = """

57

{

58

"type": "record",

59

"name": "User",

60

"fields": [

61

{"name": "name", "type": "string"},

62

{"name": "age", "type": "int"},

63

{"name": "email", "type": ["null", "string"], "default": null}

64

]

65

}

66

""";

67

Schema schema = new Schema.Parser().parse(schemaJson);

68

69

// Create a generic record

70

GenericRecord user = new GenericRecordBuilder(schema)

71

.set("name", "John Doe")

72

.set("age", 30)

73

.set("email", "john@example.com")

74

.build();

75

76

// Write to file

77

File file = new File("users.avro");

78

DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);

79

DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter);

80

dataFileWriter.create(schema, file);

81

dataFileWriter.append(user);

82

dataFileWriter.close();

83

84

// Read from file

85

DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(schema);

86

DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(file, datumReader);

87

while (dataFileReader.hasNext()) {

88

GenericRecord readUser = dataFileReader.next();

89

System.out.println("Name: " + readUser.get("name"));

90

System.out.println("Age: " + readUser.get("age"));

91

}

92

dataFileReader.close();

93

```

94

95

## Architecture

96

97

Apache Avro's architecture centers around several key components:

98

99

- **Schema System**: JSON-based schema definitions that serve as the contract for data structure and evolution

100

- **Data Models**: Three approaches for working with data - Generic (runtime), Specific (code generation), and Reflect (annotation-based)

101

- **Serialization Framework**: Efficient binary encoding/decoding with multiple format support (binary, JSON)

102

- **File Storage**: Container files with embedded schemas, metadata, and compression support

103

- **RPC Framework**: Protocol definitions for remote procedure calls with schema-based contracts

104

- **Schema Evolution**: Forward and backward compatibility through reader/writer schema resolution

105

106

This design enables Avro to serve as the foundation for data pipelines, messaging systems, storage systems, and any application requiring efficient data serialization with strong schema management.

107

108

## Capabilities

109

110

### Schema Definition and Management

111

112

Core functionality for defining, parsing, and working with Avro schemas. Schemas serve as the contract for data structure, evolution, and validation across the entire Avro ecosystem.

113

114

```java { .api }

115

// Schema creation and parsing

116

public static Schema create(Schema.Type type);

117

public static Schema createRecord(String name, String doc, String namespace, boolean isError, List<Schema.Field> fields);

118

public static Schema createEnum(String name, String doc, String namespace, List<String> values);

119

public static Schema createArray(Schema elementType);

120

public static Schema createMap(Schema valueType);

121

public static Schema createUnion(List<Schema> types);

122

123

// Schema.Parser for JSON parsing

124

public class Parser {

125

public Schema parse(String jsonSchema);

126

public Schema parse(File file) throws IOException;

127

public Schema parse(InputStream in) throws IOException;

128

}

129

130

// SchemaBuilder for fluent construction

131

public static SchemaBuilder builder();

132

public static SchemaBuilder builder(String namespace);

133

```

134

135

[Schema System](./schema-system.md)

136

137

### Generic Data Operations

138

139

Runtime data handling without code generation, providing maximum flexibility for dynamic data processing. Generic operations work directly with Avro's data model using runtime schema information.

140

141

```java { .api }

142

// Core generic data utilities

143

public static GenericData get();

144

public Object createDatum(Schema schema);

145

public boolean validate(Schema schema, Object datum);

146

147

// Generic record interface

148

public interface GenericRecord extends IndexedRecord {

149

void put(String key, Object v);

150

Object get(String key);

151

}

152

153

// Generic record builder

154

public class GenericRecordBuilder implements RecordBuilder<GenericRecord> {

155

public GenericRecordBuilder(Schema schema);

156

public GenericRecordBuilder set(String fieldName, Object value);

157

public GenericRecord build();

158

}

159

```

160

161

[Generic Data](./generic-data.md)

162

163

### File I/O Operations

164

165

Comprehensive file-based data storage and retrieval with embedded schemas, metadata, and compression support. Avro data files provide self-describing, splittable, and compressible data storage.

166

167

```java { .api }

168

// File reading

169

public class DataFileReader<D> extends DataFileStream<D> implements FileReader<D> {

170

public DataFileReader(File file, DatumReader<D> reader) throws IOException;

171

public DataFileReader(SeekableInput sin, DatumReader<D> reader) throws IOException;

172

public void seek(long position) throws IOException;

173

}

174

175

// File writing

176

public class DataFileWriter<D> implements Closeable, Flushable {

177

public DataFileWriter<D> create(Schema schema, OutputStream outs) throws IOException;

178

public DataFileWriter<D> create(Schema schema, File file) throws IOException;

179

public void append(D datum) throws IOException;

180

public long sync() throws IOException;

181

}

182

183

// Stream reading

184

public class DataFileStream<D> implements Iterator<D>, Iterable<D>, Closeable {

185

public DataFileStream(InputStream in, DatumReader<D> reader) throws IOException;

186

public Schema getSchema();

187

}

188

```

189

190

[File Operations](./file-operations.md)

191

192

### Encoding and Decoding

193

194

Low-level serialization framework supporting multiple formats (binary, JSON) with validation and schema evolution capabilities. Encoders and decoders provide the foundation for all Avro serialization operations.

195

196

```java { .api }

197

// Encoder factory

198

public static EncoderFactory get();

199

public BinaryEncoder binaryEncoder(OutputStream out, BinaryEncoder reuse);

200

public JsonEncoder jsonEncoder(Schema schema, OutputStream out) throws IOException;

201

public ValidatingEncoder validatingEncoder(Schema schema, Encoder encoder) throws IOException;

202

203

// Decoder factory

204

public static DecoderFactory get();

205

public BinaryDecoder binaryDecoder(InputStream in, BinaryDecoder reuse);

206

public JsonDecoder jsonDecoder(Schema schema, InputStream input) throws IOException;

207

public ResolvingDecoder resolvingDecoder(Schema writer, Schema reader, Decoder decoder) throws IOException;

208

209

// Core interfaces

210

public interface DatumWriter<D> {

211

void setSchema(Schema schema);

212

void write(D datum, Encoder out) throws IOException;

213

}

214

215

public interface DatumReader<D> {

216

void setSchema(Schema schema);

217

D read(D reuse, Decoder in) throws IOException;

218

}

219

```

220

221

[Encoding and Decoding](./encoding-decoding.md)

222

223

### Code Generation Support

224

225

Integration with generated Avro classes for type-safe, high-performance data operations. Specific operations provide compile-time type checking and optimized serialization for generated POJOs.

226

227

```java { .api }

228

// Specific data utilities

229

public static SpecificData get();

230

public static Class<?> getClassName(Schema schema);

231

public Schema getSchema(Class<?> c);

232

233

// Generated class interface

234

public interface SpecificRecord extends IndexedRecord {

235

Schema getSchema();

236

}

237

238

// Specific I/O operations

239

public class SpecificDatumReader<T> extends GenericDatumReader<T>;

240

public class SpecificDatumWriter<T> extends GenericDatumWriter<T>;

241

242

// Annotations for generated classes

243

@Target(ElementType.TYPE)

244

@Retention(RetentionPolicy.RUNTIME)

245

public @interface AvroGenerated;

246

```

247

248

[Code Generation](./code-generation.md)

249

250

### Reflection-Based Operations

251

252

Automatic schema generation and data handling for existing Java classes using reflection and annotations. Reflection support enables seamless integration with existing POJOs and domain objects.

253

254

```java { .api }

255

// Reflection data utilities

256

public static ReflectData get();

257

public Schema getSchema(Type type);

258

public Schema getSchema(Class<?> type);

259

260

// Reflection I/O operations

261

public class ReflectDatumReader<T> extends SpecificDatumReader<T>;

262

public class ReflectDatumWriter<T> extends SpecificDatumWriter<T>;

263

264

// Reflection annotations

265

@Target({ElementType.FIELD, ElementType.TYPE})

266

@Retention(RetentionPolicy.RUNTIME)

267

public @interface AvroName {

268

String value();

269

}

270

271

@Target({ElementType.FIELD, ElementType.TYPE})

272

@Retention(RetentionPolicy.RUNTIME)

273

public @interface Nullable;

274

```

275

276

[Reflection Operations](./reflection-operations.md)

277

278

### Single-Object Messaging

279

280

Efficient encoding and decoding of individual objects for messaging systems and data exchange. Message operations provide header-based schema identification and raw data support.

281

282

```java { .api }

283

// Message encoder interface

284

public interface MessageEncoder<D> {

285

byte[] encode(D datum) throws IOException;

286

void encode(D datum, OutputStream stream) throws IOException;

287

}

288

289

// Message decoder interface

290

public interface MessageDecoder<D> {

291

D decode(InputStream stream) throws IOException;

292

D decode(byte[] encoded) throws IOException;

293

}

294

295

// Binary message implementations

296

public class BinaryMessageEncoder<D> implements MessageEncoder<D>;

297

public class BinaryMessageDecoder<D> extends MessageDecoder.BaseDecoder<D>;

298

299

// Schema store for message decoding

300

public interface SchemaStore {

301

Schema findByFingerprint(long fingerprint);

302

}

303

```

304

305

[Message Operations](./message-operations.md)

306

307

### Schema Evolution and Compatibility

308

309

Tools and utilities for managing schema changes over time while maintaining backward and forward compatibility. Evolution support enables long-term data management and system integration.

310

311

```java { .api }

312

// Compatibility checking

313

public class SchemaCompatibility {

314

public static SchemaPairCompatibility checkReaderWriterCompatibility(Schema reader, Schema writer);

315

public static SchemaCompatibilityResult checkReaderWriterCompatibility(Schema reader, List<Schema> writers);

316

}

317

318

// Logical types for schema evolution

319

public class LogicalTypes {

320

public static LogicalType decimal(int precision);

321

public static LogicalType decimal(int precision, int scale);

322

public static LogicalType uuid();

323

public static LogicalType date();

324

public static LogicalType timestampMillis();

325

}

326

327

// Schema resolution

328

public class ResolvingDecoder extends ValidatingDecoder {

329

// Automatically handles schema evolution during decoding

330

}

331

```

332

333

[Schema Evolution](./schema-evolution.md)

334

335

## Common Types

336

337

```java { .api }

338

// Core schema type

339

public abstract class Schema extends JsonProperties implements Serializable {

340

public enum Type {

341

RECORD, ENUM, ARRAY, MAP, UNION, FIXED, STRING, BYTES, INT, LONG, FLOAT, DOUBLE, BOOLEAN, NULL

342

}

343

344

public static class Field extends JsonProperties {

345

public Field(String name, Schema schema, String doc, Object defaultVal);

346

public String name();

347

public Schema schema();

348

public Object defaultVal();

349

}

350

}

351

352

// Exception hierarchy

353

public class AvroRuntimeException extends RuntimeException;

354

public class AvroTypeException extends AvroRuntimeException;

355

public class SchemaParseException extends AvroRuntimeException;

356

public class AvroRemoteException extends Exception;

357

358

// Utility types

359

public class Utf8 implements Comparable<Utf8>, CharSequence, Externalizable {

360

public Utf8(String string);

361

public Utf8(byte[] bytes);

362

}

363

```