0
# Schema Registry
1
2
Core Avro schemas and protocol definitions that are shared across all language implementations, providing standardized data formats for serialization, RPC communication, and system integration.
3
4
## Capabilities
5
6
### IPC Communication Schemas
7
8
Schemas that define the structure for Remote Procedure Call (RPC) communication between Avro-enabled systems.
9
10
```json { .api }
11
// HandshakeRequest schema
12
{
13
"type": "record",
14
"name": "HandshakeRequest",
15
"namespace": "org.apache.avro.ipc",
16
"doc": "A handshake request sent by a client to a server when establishing an RPC connection",
17
"fields": [
18
{
19
"name": "clientHash",
20
"type": {"type": "fixed", "name": "MD5", "size": 16},
21
"doc": "MD5 hash of the client's protocol schema"
22
},
23
{
24
"name": "clientProtocol",
25
"type": ["null", "string"],
26
"default": null,
27
"doc": "JSON representation of the client's protocol schema"
28
},
29
{
30
"name": "serverHash",
31
"type": "MD5",
32
"doc": "MD5 hash of the server's protocol schema"
33
},
34
{
35
"name": "meta",
36
"type": ["null", {"type": "map", "values": "bytes"}],
37
"default": null,
38
"doc": "Additional metadata for the handshake"
39
}
40
]
41
}
42
43
// HandshakeResponse schema
44
{
45
"type": "record",
46
"name": "HandshakeResponse",
47
"namespace": "org.apache.avro.ipc",
48
"doc": "A handshake response sent by a server to a client during RPC connection establishment",
49
"fields": [
50
{
51
"name": "match",
52
"type": {
53
"type": "enum",
54
"name": "HandshakeMatch",
55
"symbols": ["BOTH", "CLIENT", "NONE"]
56
},
57
"doc": "Indicates which schemas match between client and server"
58
},
59
{
60
"name": "serverProtocol",
61
"type": ["null", "string"],
62
"default": null,
63
"doc": "JSON representation of the server's protocol schema"
64
},
65
{
66
"name": "serverHash",
67
"type": ["null", "MD5"],
68
"default": null,
69
"doc": "MD5 hash of the server's protocol schema"
70
},
71
{
72
"name": "meta",
73
"type": ["null", {"type": "map", "values": "bytes"}],
74
"default": null,
75
"doc": "Additional metadata in the response"
76
}
77
]
78
}
79
```
80
81
**Location**: `share/schemas/org/apache/avro/ipc/`
82
83
**Usage Examples:**
84
85
```java
86
// Java usage
87
HandshakeRequest request = HandshakeRequest.newBuilder()
88
.setClientHash(clientHash)
89
.setServerHash(serverHash)
90
.build();
91
92
// Python usage
93
handshake_request = {
94
'clientHash': client_hash,
95
'serverHash': server_hash,
96
'clientProtocol': None,
97
'meta': None
98
}
99
```
100
101
### Data Format Schemas
102
103
Schemas that define standardized data representation formats for cross-language compatibility.
104
105
```json { .api }
106
// JSON data representation schema
107
{
108
"type": "record",
109
"name": "Json",
110
"namespace": "org.apache.avro.data",
111
"fields": [
112
{
113
"name": "value",
114
"type": [
115
"long",
116
"double",
117
"string",
118
"boolean",
119
"null",
120
{"type": "array", "items": "Json"},
121
{"type": "map", "values": "Json"}
122
]
123
}
124
]
125
}
126
```
127
128
**Location**: `share/schemas/org/apache/avro/data/`
129
130
**Usage Examples:**
131
132
```java
133
// Serialize JSON to Avro
134
Json jsonRecord = Json.newBuilder()
135
.setValue(jsonValue)
136
.build();
137
138
// Deserialize Avro to JSON
139
Object jsonValue = jsonRecord.getValue();
140
```
141
142
### MapReduce Integration Protocols
143
144
Protocol definitions for integrating Avro with Apache Hadoop MapReduce framework, enabling distributed data processing.
145
146
```json { .api }
147
// InputProtocol for MapReduce input processing
148
{
149
"namespace": "org.apache.avro.mapred.tether",
150
"protocol": "InputProtocol",
151
"doc": "Transmit inputs to a map or reduce task sub-process.",
152
"types": [
153
{"name": "TaskType", "type": "enum", "symbols": ["MAP","REDUCE"]}
154
],
155
"messages": {
156
"configure": {
157
"doc": "Configure the task. Sent before any other message.",
158
"request": [
159
{"name": "taskType", "type": "TaskType"},
160
{"name": "inSchema", "type": "string"},
161
{"name": "outSchema", "type": "string"}
162
],
163
"response": "null",
164
"one-way": true
165
},
166
"input": {
167
"doc": "Send a block of input data to a task.",
168
"request": [
169
{"name": "data", "type": "bytes"},
170
{"name": "count", "type": "long", "default": 1}
171
],
172
"response": "null",
173
"one-way": true
174
}
175
}
176
}
177
178
// OutputProtocol for MapReduce output processing
179
{
180
"namespace": "org.apache.avro.mapred.tether",
181
"protocol": "OutputProtocol",
182
"doc": "Transmit outputs from a map or reduce task to parent.",
183
"messages": {
184
"output": {
185
"doc": "Send an output datum.",
186
"request": [
187
{"name": "datum", "type": "bytes"}
188
],
189
"response": "null",
190
"one-way": true
191
},
192
"outputPartitioned": {
193
"doc": "Send map output datum explicitly naming its partition.",
194
"request": [
195
{"name": "partition", "type": "int"},
196
{"name": "datum", "type": "bytes"}
197
],
198
"response": "null",
199
"one-way": true
200
},
201
"count": {
202
"doc": "Increment a task/job counter.",
203
"request": [
204
{"name": "group", "type": "string"},
205
{"name": "name", "type": "string"},
206
{"name": "amount", "type": "long"}
207
],
208
"response": "null",
209
"one-way": true
210
}
211
}
212
}
213
```
214
215
**Location**: `share/schemas/org/apache/avro/mapred/tether/`
216
217
**Usage Examples:**
218
219
```java
220
// Implement MapReduce input processor
221
public class AvroInputProcessor implements InputProtocol {
222
@Override
223
public void configure(TaskType taskType, String inSchema, String outSchema) {
224
// Configure processor with task type and schemas
225
}
226
227
@Override
228
public void input(ByteBuffer data, long count) {
229
// Process input data block
230
}
231
}
232
233
// Implement MapReduce output processor
234
public class AvroOutputProcessor implements OutputProtocol {
235
@Override
236
public void output(ByteBuffer datum) {
237
// Send output datum
238
}
239
240
@Override
241
public void outputPartitioned(int partition, ByteBuffer datum) {
242
// Send partitioned output datum
243
}
244
245
@Override
246
public void count(String group, String name, long amount) {
247
// Increment counter
248
}
249
}
250
```
251
252
### Schema Access and Management
253
254
Methods for accessing, loading, and managing the shared schema registry across different programming environments.
255
256
```bash { .api }
257
# Schema file locations and naming conventions
258
SCHEMA_BASE="share/schemas/"
259
IPC_SCHEMAS="${SCHEMA_BASE}org/apache/avro/ipc/"
260
DATA_SCHEMAS="${SCHEMA_BASE}org/apache/avro/data/"
261
MAPRED_SCHEMAS="${SCHEMA_BASE}org/apache/avro/mapred/tether/"
262
263
# Schema file extensions
264
# .avsc = Avro Schema (JSON format)
265
# .avpr = Avro Protocol (JSON format with RPC definitions)
266
```
267
268
**Usage Examples:**
269
270
```bash
271
# List available schemas
272
find share/schemas -name "*.avsc" -o -name "*.avpr"
273
274
# Load schema in shell scripts
275
HANDSHAKE_REQUEST_SCHEMA=$(cat share/schemas/org/apache/avro/ipc/HandshakeRequest.avsc)
276
277
# Validate schema files
278
avro-tools validate share/schemas/org/apache/avro/ipc/HandshakeRequest.avsc
279
```
280
281
```java
282
// Load schemas in Java
283
Schema handshakeRequestSchema = new Schema.Parser()
284
.parse(new File("share/schemas/org/apache/avro/ipc/HandshakeRequest.avsc"));
285
286
// Load protocols in Java
287
Protocol inputProtocol = Protocol.parse(
288
new File("share/schemas/org/apache/avro/mapred/tether/InputProtocol.avpr"));
289
```
290
291
```python
292
# Load schemas in Python
293
import avro.schema
294
import json
295
296
with open('share/schemas/org/apache/avro/ipc/HandshakeRequest.avsc') as f:
297
schema_json = json.load(f)
298
handshake_schema = avro.schema.parse(json.dumps(schema_json))
299
```