Apache Avro meta-framework that coordinates data serialization implementations across multiple programming languages
—
Core Avro schemas and protocol definitions that are shared across all language implementations, providing standardized data formats for serialization, RPC communication, and system integration.
Schemas that define the structure for Remote Procedure Call (RPC) communication between Avro-enabled systems.
// HandshakeRequest schema
{
"type": "record",
"name": "HandshakeRequest",
"namespace": "org.apache.avro.ipc",
"doc": "A handshake request sent by a client to a server when establishing an RPC connection",
"fields": [
{
"name": "clientHash",
"type": {"type": "fixed", "name": "MD5", "size": 16},
"doc": "MD5 hash of the client's protocol schema"
},
{
"name": "clientProtocol",
"type": ["null", "string"],
"default": null,
"doc": "JSON representation of the client's protocol schema"
},
{
"name": "serverHash",
"type": "MD5",
"doc": "MD5 hash of the server's protocol schema"
},
{
"name": "meta",
"type": ["null", {"type": "map", "values": "bytes"}],
"default": null,
"doc": "Additional metadata for the handshake"
}
]
}
// HandshakeResponse schema
{
"type": "record",
"name": "HandshakeResponse",
"namespace": "org.apache.avro.ipc",
"doc": "A handshake response sent by a server to a client during RPC connection establishment",
"fields": [
{
"name": "match",
"type": {
"type": "enum",
"name": "HandshakeMatch",
"symbols": ["BOTH", "CLIENT", "NONE"]
},
"doc": "Indicates which schemas match between client and server"
},
{
"name": "serverProtocol",
"type": ["null", "string"],
"default": null,
"doc": "JSON representation of the server's protocol schema"
},
{
"name": "serverHash",
"type": ["null", "MD5"],
"default": null,
"doc": "MD5 hash of the server's protocol schema"
},
{
"name": "meta",
"type": ["null", {"type": "map", "values": "bytes"}],
"default": null,
"doc": "Additional metadata in the response"
}
]
}Location: share/schemas/org/apache/avro/ipc/
Usage Examples:
// Java usage
HandshakeRequest request = HandshakeRequest.newBuilder()
.setClientHash(clientHash)
.setServerHash(serverHash)
.build();
// Python usage
handshake_request = {
'clientHash': client_hash,
'serverHash': server_hash,
'clientProtocol': None,
'meta': None
}Schemas that define standardized data representation formats for cross-language compatibility.
// JSON data representation schema
{
"type": "record",
"name": "Json",
"namespace": "org.apache.avro.data",
"fields": [
{
"name": "value",
"type": [
"long",
"double",
"string",
"boolean",
"null",
{"type": "array", "items": "Json"},
{"type": "map", "values": "Json"}
]
}
]
}Location: share/schemas/org/apache/avro/data/
Usage Examples:
// Serialize JSON to Avro
Json jsonRecord = Json.newBuilder()
.setValue(jsonValue)
.build();
// Deserialize Avro to JSON
Object jsonValue = jsonRecord.getValue();Protocol definitions for integrating Avro with Apache Hadoop MapReduce framework, enabling distributed data processing.
// InputProtocol for MapReduce input processing
{
"namespace": "org.apache.avro.mapred.tether",
"protocol": "InputProtocol",
"doc": "Transmit inputs to a map or reduce task sub-process.",
"types": [
{"name": "TaskType", "type": "enum", "symbols": ["MAP","REDUCE"]}
],
"messages": {
"configure": {
"doc": "Configure the task. Sent before any other message.",
"request": [
{"name": "taskType", "type": "TaskType"},
{"name": "inSchema", "type": "string"},
{"name": "outSchema", "type": "string"}
],
"response": "null",
"one-way": true
},
"input": {
"doc": "Send a block of input data to a task.",
"request": [
{"name": "data", "type": "bytes"},
{"name": "count", "type": "long", "default": 1}
],
"response": "null",
"one-way": true
}
}
}
// OutputProtocol for MapReduce output processing
{
"namespace": "org.apache.avro.mapred.tether",
"protocol": "OutputProtocol",
"doc": "Transmit outputs from a map or reduce task to parent.",
"messages": {
"output": {
"doc": "Send an output datum.",
"request": [
{"name": "datum", "type": "bytes"}
],
"response": "null",
"one-way": true
},
"outputPartitioned": {
"doc": "Send map output datum explicitly naming its partition.",
"request": [
{"name": "partition", "type": "int"},
{"name": "datum", "type": "bytes"}
],
"response": "null",
"one-way": true
},
"count": {
"doc": "Increment a task/job counter.",
"request": [
{"name": "group", "type": "string"},
{"name": "name", "type": "string"},
{"name": "amount", "type": "long"}
],
"response": "null",
"one-way": true
}
}
}Location: share/schemas/org/apache/avro/mapred/tether/
Usage Examples:
// Implement MapReduce input processor
public class AvroInputProcessor implements InputProtocol {
@Override
public void configure(TaskType taskType, String inSchema, String outSchema) {
// Configure processor with task type and schemas
}
@Override
public void input(ByteBuffer data, long count) {
// Process input data block
}
}
// Implement MapReduce output processor
public class AvroOutputProcessor implements OutputProtocol {
@Override
public void output(ByteBuffer datum) {
// Send output datum
}
@Override
public void outputPartitioned(int partition, ByteBuffer datum) {
// Send partitioned output datum
}
@Override
public void count(String group, String name, long amount) {
// Increment counter
}
}Methods for accessing, loading, and managing the shared schema registry across different programming environments.
# Schema file locations and naming conventions
SCHEMA_BASE="share/schemas/"
IPC_SCHEMAS="${SCHEMA_BASE}org/apache/avro/ipc/"
DATA_SCHEMAS="${SCHEMA_BASE}org/apache/avro/data/"
MAPRED_SCHEMAS="${SCHEMA_BASE}org/apache/avro/mapred/tether/"
# Schema file extensions
# .avsc = Avro Schema (JSON format)
# .avpr = Avro Protocol (JSON format with RPC definitions)Usage Examples:
# List available schemas
find share/schemas -name "*.avsc" -o -name "*.avpr"
# Load schema in shell scripts
HANDSHAKE_REQUEST_SCHEMA=$(cat share/schemas/org/apache/avro/ipc/HandshakeRequest.avsc)
# Validate schema files
avro-tools validate share/schemas/org/apache/avro/ipc/HandshakeRequest.avsc// Load schemas in Java
Schema handshakeRequestSchema = new Schema.Parser()
.parse(new File("share/schemas/org/apache/avro/ipc/HandshakeRequest.avsc"));
// Load protocols in Java
Protocol inputProtocol = Protocol.parse(
new File("share/schemas/org/apache/avro/mapred/tether/InputProtocol.avpr"));# Load schemas in Python
import avro.schema
import json
with open('share/schemas/org/apache/avro/ipc/HandshakeRequest.avsc') as f:
schema_json = json.load(f)
handshake_schema = avro.schema.parse(json.dumps(schema_json))Install with Tessl CLI
npx tessl i tessl/maven-org-apache-avro--avro-toplevel