or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

build-tools.mdframework-coordination.mdindex.mdlanguage-implementations.mdschema-registry.md

schema-registry.mddocs/

0

# Schema Registry

1

2

Core Avro schemas and protocol definitions that are shared across all language implementations, providing standardized data formats for serialization, RPC communication, and system integration.

3

4

## Capabilities

5

6

### IPC Communication Schemas

7

8

Schemas that define the structure for Remote Procedure Call (RPC) communication between Avro-enabled systems.

9

10

```json { .api }

11

// HandshakeRequest schema

12

{

13

"type": "record",

14

"name": "HandshakeRequest",

15

"namespace": "org.apache.avro.ipc",

16

"doc": "A handshake request sent by a client to a server when establishing an RPC connection",

17

"fields": [

18

{

19

"name": "clientHash",

20

"type": {"type": "fixed", "name": "MD5", "size": 16},

21

"doc": "MD5 hash of the client's protocol schema"

22

},

23

{

24

"name": "clientProtocol",

25

"type": ["null", "string"],

26

"default": null,

27

"doc": "JSON representation of the client's protocol schema"

28

},

29

{

30

"name": "serverHash",

31

"type": "MD5",

32

"doc": "MD5 hash of the server's protocol schema"

33

},

34

{

35

"name": "meta",

36

"type": ["null", {"type": "map", "values": "bytes"}],

37

"default": null,

38

"doc": "Additional metadata for the handshake"

39

}

40

]

41

}

42

43

// HandshakeResponse schema

44

{

45

"type": "record",

46

"name": "HandshakeResponse",

47

"namespace": "org.apache.avro.ipc",

48

"doc": "A handshake response sent by a server to a client during RPC connection establishment",

49

"fields": [

50

{

51

"name": "match",

52

"type": {

53

"type": "enum",

54

"name": "HandshakeMatch",

55

"symbols": ["BOTH", "CLIENT", "NONE"]

56

},

57

"doc": "Indicates which schemas match between client and server"

58

},

59

{

60

"name": "serverProtocol",

61

"type": ["null", "string"],

62

"default": null,

63

"doc": "JSON representation of the server's protocol schema"

64

},

65

{

66

"name": "serverHash",

67

"type": ["null", "MD5"],

68

"default": null,

69

"doc": "MD5 hash of the server's protocol schema"

70

},

71

{

72

"name": "meta",

73

"type": ["null", {"type": "map", "values": "bytes"}],

74

"default": null,

75

"doc": "Additional metadata in the response"

76

}

77

]

78

}

79

```

80

81

**Location**: `share/schemas/org/apache/avro/ipc/`

82

83

**Usage Examples:**

84

85

```java

86

// Java usage

87

HandshakeRequest request = HandshakeRequest.newBuilder()

88

.setClientHash(clientHash)

89

.setServerHash(serverHash)

90

.build();

91

92

// Python usage

93

handshake_request = {

94

'clientHash': client_hash,

95

'serverHash': server_hash,

96

'clientProtocol': None,

97

'meta': None

98

}

99

```

100

101

### Data Format Schemas

102

103

Schemas that define standardized data representation formats for cross-language compatibility.

104

105

```json { .api }

106

// JSON data representation schema

107

{

108

"type": "record",

109

"name": "Json",

110

"namespace": "org.apache.avro.data",

111

"fields": [

112

{

113

"name": "value",

114

"type": [

115

"long",

116

"double",

117

"string",

118

"boolean",

119

"null",

120

{"type": "array", "items": "Json"},

121

{"type": "map", "values": "Json"}

122

]

123

}

124

]

125

}

126

```

127

128

**Location**: `share/schemas/org/apache/avro/data/`

129

130

**Usage Examples:**

131

132

```java

133

// Serialize JSON to Avro

134

Json jsonRecord = Json.newBuilder()

135

.setValue(jsonValue)

136

.build();

137

138

// Deserialize Avro to JSON

139

Object jsonValue = jsonRecord.getValue();

140

```

141

142

### MapReduce Integration Protocols

143

144

Protocol definitions for integrating Avro with Apache Hadoop MapReduce framework, enabling distributed data processing.

145

146

```json { .api }

147

// InputProtocol for MapReduce input processing

148

{

149

"namespace": "org.apache.avro.mapred.tether",

150

"protocol": "InputProtocol",

151

"doc": "Transmit inputs to a map or reduce task sub-process.",

152

"types": [

153

{"name": "TaskType", "type": "enum", "symbols": ["MAP","REDUCE"]}

154

],

155

"messages": {

156

"configure": {

157

"doc": "Configure the task. Sent before any other message.",

158

"request": [

159

{"name": "taskType", "type": "TaskType"},

160

{"name": "inSchema", "type": "string"},

161

{"name": "outSchema", "type": "string"}

162

],

163

"response": "null",

164

"one-way": true

165

},

166

"input": {

167

"doc": "Send a block of input data to a task.",

168

"request": [

169

{"name": "data", "type": "bytes"},

170

{"name": "count", "type": "long", "default": 1}

171

],

172

"response": "null",

173

"one-way": true

174

}

175

}

176

}

177

178

// OutputProtocol for MapReduce output processing

179

{

180

"namespace": "org.apache.avro.mapred.tether",

181

"protocol": "OutputProtocol",

182

"doc": "Transmit outputs from a map or reduce task to parent.",

183

"messages": {

184

"output": {

185

"doc": "Send an output datum.",

186

"request": [

187

{"name": "datum", "type": "bytes"}

188

],

189

"response": "null",

190

"one-way": true

191

},

192

"outputPartitioned": {

193

"doc": "Send map output datum explicitly naming its partition.",

194

"request": [

195

{"name": "partition", "type": "int"},

196

{"name": "datum", "type": "bytes"}

197

],

198

"response": "null",

199

"one-way": true

200

},

201

"count": {

202

"doc": "Increment a task/job counter.",

203

"request": [

204

{"name": "group", "type": "string"},

205

{"name": "name", "type": "string"},

206

{"name": "amount", "type": "long"}

207

],

208

"response": "null",

209

"one-way": true

210

}

211

}

212

}

213

```

214

215

**Location**: `share/schemas/org/apache/avro/mapred/tether/`

216

217

**Usage Examples:**

218

219

```java

220

// Implement MapReduce input processor

221

public class AvroInputProcessor implements InputProtocol {

222

@Override

223

public void configure(TaskType taskType, String inSchema, String outSchema) {

224

// Configure processor with task type and schemas

225

}

226

227

@Override

228

public void input(ByteBuffer data, long count) {

229

// Process input data block

230

}

231

}

232

233

// Implement MapReduce output processor

234

public class AvroOutputProcessor implements OutputProtocol {

235

@Override

236

public void output(ByteBuffer datum) {

237

// Send output datum

238

}

239

240

@Override

241

public void outputPartitioned(int partition, ByteBuffer datum) {

242

// Send partitioned output datum

243

}

244

245

@Override

246

public void count(String group, String name, long amount) {

247

// Increment counter

248

}

249

}

250

```

251

252

### Schema Access and Management

253

254

Methods for accessing, loading, and managing the shared schema registry across different programming environments.

255

256

```bash { .api }

257

# Schema file locations and naming conventions

258

SCHEMA_BASE="share/schemas/"

259

IPC_SCHEMAS="${SCHEMA_BASE}org/apache/avro/ipc/"

260

DATA_SCHEMAS="${SCHEMA_BASE}org/apache/avro/data/"

261

MAPRED_SCHEMAS="${SCHEMA_BASE}org/apache/avro/mapred/tether/"

262

263

# Schema file extensions

264

# .avsc = Avro Schema (JSON format)

265

# .avpr = Avro Protocol (JSON format with RPC definitions)

266

```

267

268

**Usage Examples:**

269

270

```bash

271

# List available schemas

272

find share/schemas -name "*.avsc" -o -name "*.avpr"

273

274

# Load schema in shell scripts

275

HANDSHAKE_REQUEST_SCHEMA=$(cat share/schemas/org/apache/avro/ipc/HandshakeRequest.avsc)

276

277

# Validate schema files

278

avro-tools validate share/schemas/org/apache/avro/ipc/HandshakeRequest.avsc

279

```

280

281

```java

282

// Load schemas in Java

283

Schema handshakeRequestSchema = new Schema.Parser()

284

.parse(new File("share/schemas/org/apache/avro/ipc/HandshakeRequest.avsc"));

285

286

// Load protocols in Java

287

Protocol inputProtocol = Protocol.parse(

288

new File("share/schemas/org/apache/avro/mapred/tether/InputProtocol.avpr"));

289

```

290

291

```python

292

# Load schemas in Python

293

import avro.schema

294

import json

295

296

with open('share/schemas/org/apache/avro/ipc/HandshakeRequest.avsc') as f:

297

schema_json = json.load(f)

298

handshake_schema = avro.schema.parse(json.dumps(schema_json))

299

```