or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

byte-utilities.mddata-format-system.mdindex.mdschema-system.mdstream-processing.mdstructured-records.md

schema-system.mddocs/

0

# Schema System

1

2

Comprehensive data schema definition system supporting primitive types, complex nested structures, logical types for dates and timestamps, and schema compatibility checking. The schema system provides type safety and validation for data processing pipelines, serving as the foundation for structured data handling in CDAP applications.

3

4

## Capabilities

5

6

### Schema Creation

7

8

Create schemas for various data types including primitives, complex structures, and logical types.

9

10

```java { .api }

11

/**

12

* Create schema for simple/primitive types

13

* @param type The primitive type (NULL, BOOLEAN, INT, LONG, FLOAT, DOUBLE, BYTES, STRING)

14

* @return Schema for the specified type

15

* @throws IllegalArgumentException if type is not a simple type

16

*/

17

public static Schema of(Schema.Type type);

18

19

/**

20

* Create schema for logical types (dates, timestamps)

21

* @param logicalType The logical type (DATE, TIMESTAMP_MILLIS, etc.)

22

* @return Schema for the specified logical type

23

*/

24

public static Schema of(Schema.LogicalType logicalType);

25

26

/**

27

* Create nullable schema (union with null)

28

* @param schema Schema to make nullable

29

* @return Union schema of given schema and null

30

* @throws IllegalArgumentException if schema is already null type

31

*/

32

public static Schema nullableOf(Schema schema);

33

```

34

35

**Usage Examples:**

36

37

```java

38

// Simple types

39

Schema stringSchema = Schema.of(Schema.Type.STRING);

40

Schema intSchema = Schema.of(Schema.Type.INT);

41

Schema nullableInt = Schema.nullableOf(intSchema);

42

43

// Logical types

44

Schema dateSchema = Schema.of(Schema.LogicalType.DATE);

45

Schema timestampSchema = Schema.of(Schema.LogicalType.TIMESTAMP_MILLIS);

46

```

47

48

### Complex Type Creation

49

50

Create schemas for complex data structures including arrays, maps, records, unions, and enums.

51

52

```java { .api }

53

/**

54

* Create array schema

55

* @param componentSchema Schema of array elements

56

* @return Array schema

57

*/

58

public static Schema arrayOf(Schema componentSchema);

59

60

/**

61

* Create map schema

62

* @param keySchema Schema for map keys

63

* @param valueSchema Schema for map values

64

* @return Map schema

65

*/

66

public static Schema mapOf(Schema keySchema, Schema valueSchema);

67

68

/**

69

* Create record schema with fields

70

* @param name Record name

71

* @param fields Record fields

72

* @return Record schema

73

* @throws IllegalArgumentException if name is null or no fields provided

74

*/

75

public static Schema recordOf(String name, Schema.Field... fields);

76

public static Schema recordOf(String name, Iterable<Schema.Field> fields);

77

78

/**

79

* Create empty record schema (for forward references)

80

* @param name Record name

81

* @return Empty record schema

82

*/

83

public static Schema recordOf(String name);

84

85

/**

86

* Create union schema

87

* @param schemas Schemas to union

88

* @return Union schema

89

* @throws IllegalArgumentException if no schemas provided

90

*/

91

public static Schema unionOf(Schema... schemas);

92

public static Schema unionOf(Iterable<Schema> schemas);

93

94

/**

95

* Create enum schema

96

* @param values Enum values

97

* @return Enum schema

98

* @throws IllegalArgumentException if values are not unique or empty

99

*/

100

public static Schema enumWith(String... values);

101

public static Schema enumWith(Iterable<String> values);

102

public static Schema enumWith(Class<Enum<?>> enumClass);

103

```

104

105

**Usage Examples:**

106

107

```java

108

// Array of strings

109

Schema stringArraySchema = Schema.arrayOf(Schema.of(Schema.Type.STRING));

110

111

// Map from string to int

112

Schema mapSchema = Schema.mapOf(

113

Schema.of(Schema.Type.STRING),

114

Schema.of(Schema.Type.INT)

115

);

116

117

// Record schema

118

Schema personSchema = Schema.recordOf("Person",

119

Schema.Field.of("name", Schema.of(Schema.Type.STRING)),

120

Schema.Field.of("age", Schema.of(Schema.Type.INT)),

121

Schema.Field.of("emails", Schema.arrayOf(Schema.of(Schema.Type.STRING)))

122

);

123

124

// Union schema

125

Schema stringOrInt = Schema.unionOf(

126

Schema.of(Schema.Type.STRING),

127

Schema.of(Schema.Type.INT)

128

);

129

130

// Enum schema

131

Schema statusSchema = Schema.enumWith("ACTIVE", "INACTIVE", "PENDING");

132

```

133

134

### Schema Parsing

135

136

Parse schemas from JSON and SQL-like string representations.

137

138

```java { .api }

139

/**

140

* Parse schema from JSON representation

141

* @param schemaJson JSON string representation

142

* @return Parsed schema

143

* @throws IOException if parsing fails

144

*/

145

public static Schema parseJson(String schemaJson) throws IOException;

146

147

/**

148

* Parse schema from reader containing JSON

149

* @param reader Reader for JSON schema

150

* @return Parsed schema

151

* @throws IOException if parsing fails

152

*/

153

public static Schema parseJson(Reader reader) throws IOException;

154

155

/**

156

* Parse schema from SQL-like representation

157

* @param schemaString SQL-like schema string

158

* @return Parsed schema

159

* @throws IOException if parsing fails

160

*/

161

public static Schema parseSQL(String schemaString) throws IOException;

162

```

163

164

**Usage Examples:**

165

166

```java

167

// Parse from JSON

168

String jsonSchema = "{\"type\":\"record\",\"name\":\"User\"," +

169

"\"fields\":[{\"name\":\"id\",\"type\":\"long\"}," +

170

"{\"name\":\"name\",\"type\":\"string\"}]}";

171

Schema schema = Schema.parseJson(jsonSchema);

172

173

// Parse from SQL-like syntax

174

String sqlSchema = "id long, name string, active boolean";

175

Schema recordSchema = Schema.parseSQL(sqlSchema);

176

```

177

178

### Schema Information Access

179

180

Access schema type information, structure details, and metadata.

181

182

```java { .api }

183

/**

184

* Get schema type

185

* @return Schema type enum value

186

*/

187

public Schema.Type getType();

188

189

/**

190

* Get logical type (if applicable)

191

* @return Logical type or null

192

*/

193

public Schema.LogicalType getLogicalType();

194

195

// For ENUM schemas

196

public Set<String> getEnumValues();

197

public int getEnumIndex(String value);

198

public String getEnumValue(int idx);

199

200

// For ARRAY schemas

201

public Schema getComponentSchema();

202

203

// For MAP schemas

204

public Map.Entry<Schema, Schema> getMapSchema();

205

206

// For RECORD schemas

207

public String getRecordName();

208

public List<Schema.Field> getFields();

209

public Schema.Field getField(String name);

210

public Schema.Field getField(String name, boolean ignoreCase);

211

212

// For UNION schemas

213

public List<Schema> getUnionSchemas();

214

public Schema getUnionSchema(int idx);

215

```

216

217

**Usage Examples:**

218

219

```java

220

Schema recordSchema = Schema.recordOf("Person",

221

Schema.Field.of("name", Schema.of(Schema.Type.STRING)),

222

Schema.Field.of("age", Schema.of(Schema.Type.INT))

223

);

224

225

// Access record information

226

String recordName = recordSchema.getRecordName(); // "Person"

227

List<Schema.Field> fields = recordSchema.getFields();

228

Schema.Field nameField = recordSchema.getField("name");

229

Schema nameFieldSchema = nameField.getSchema(); // STRING schema

230

231

// Check schema properties

232

boolean isSimple = nameFieldSchema.getType().isSimpleType(); // true

233

```

234

235

### Schema Validation and Compatibility

236

237

Validate schema compatibility and check for nullable types.

238

239

```java { .api }

240

/**

241

* Check if this schema is compatible with target schema

242

* @param target Target schema to check compatibility against

243

* @return true if compatible, false otherwise

244

*/

245

public boolean isCompatible(Schema target);

246

247

/**

248

* Check if schema is nullable (union of null and one other type)

249

* @return true if nullable union, false otherwise

250

*/

251

public boolean isNullable();

252

253

/**

254

* Check if schema is nullable simple type

255

* @return true if nullable simple type, false otherwise

256

*/

257

public boolean isNullableSimple();

258

259

/**

260

* Check if schema is simple or nullable simple type

261

* @return true if simple or nullable simple, false otherwise

262

*/

263

public boolean isSimpleOrNullableSimple();

264

265

/**

266

* Get non-null schema from nullable union

267

* @return Non-null schema from union

268

* @throws IllegalStateException if not a nullable union

269

*/

270

public Schema getNonNullable();

271

```

272

273

**Usage Examples:**

274

275

```java

276

Schema intSchema = Schema.of(Schema.Type.INT);

277

Schema nullableInt = Schema.nullableOf(intSchema);

278

Schema longSchema = Schema.of(Schema.Type.LONG);

279

280

// Compatibility checking

281

boolean compatible = intSchema.isCompatible(longSchema); // true (int -> long)

282

283

// Nullable checking

284

boolean isNullable = nullableInt.isNullable(); // true

285

Schema nonNull = nullableInt.getNonNullable(); // returns intSchema

286

```

287

288

### Schema Utilities

289

290

Generate schema hashes and string representations.

291

292

```java { .api }

293

/**

294

* Get MD5 hash of schema

295

* @return Schema hash object

296

*/

297

public SchemaHash getSchemaHash();

298

299

/**

300

* Get JSON string representation of schema

301

* @return JSON representation

302

*/

303

public String toString();

304

```

305

306

**Usage Examples:**

307

308

```java

309

Schema schema = Schema.of(Schema.Type.STRING);

310

SchemaHash hash = schema.getSchemaHash();

311

String hashString = hash.toString(); // Hex representation

312

313

String jsonRepresentation = schema.toString(); // "\"string\""

314

```

315

316

## Types

317

318

### Schema.Type Enum

319

320

```java { .api }

321

public enum Schema.Type {

322

// Simple/primitive types

323

NULL, BOOLEAN, INT, LONG, FLOAT, DOUBLE, BYTES, STRING,

324

325

// Complex types

326

ENUM, ARRAY, MAP, RECORD, UNION;

327

328

/**

329

* Check if this type is a simple/primitive type

330

* @return true if simple type, false if complex type

331

*/

332

public boolean isSimpleType();

333

}

334

```

335

336

### Schema.LogicalType Enum

337

338

```java { .api }

339

public enum Schema.LogicalType {

340

DATE, // Based on INT (days since epoch)

341

TIMESTAMP_MILLIS, // Based on LONG (milliseconds since epoch)

342

TIMESTAMP_MICROS, // Based on LONG (microseconds since epoch)

343

TIME_MILLIS, // Based on INT (milliseconds since midnight)

344

TIME_MICROS; // Based on LONG (microseconds since midnight)

345

346

/**

347

* Get string token for logical type

348

* @return Token string

349

*/

350

public String getToken();

351

352

/**

353

* Get logical type from token string

354

* @param token Token string

355

* @return Logical type

356

* @throws IllegalArgumentException if unknown token

357

*/

358

public static LogicalType fromToken(String token);

359

}

360

```

361

362

### Schema.Field Class

363

364

```java { .api }

365

public static final class Schema.Field {

366

/**

367

* Create field with name and schema

368

* @param name Field name

369

* @param schema Field schema

370

* @return Field instance

371

*/

372

public static Field of(String name, Schema schema);

373

374

/**

375

* Get field name

376

* @return Field name

377

*/

378

public String getName();

379

380

/**

381

* Get field schema

382

* @return Field schema

383

*/

384

public Schema getSchema();

385

}

386

```

387

388

### SchemaHash Class

389

390

```java { .api }

391

public final class SchemaHash {

392

/**

393

* Create hash from schema

394

* @param schema Schema to hash

395

*/

396

public SchemaHash(Schema schema);

397

398

/**

399

* Create hash from byte buffer

400

* @param bytes Byte buffer containing hash

401

*/

402

public SchemaHash(ByteBuffer bytes);

403

404

/**

405

* Get raw hash bytes

406

* @return Hash as byte array

407

*/

408

public byte[] toByteArray();

409

410

/**

411

* Get hex string representation

412

* @return Hex string of hash

413

*/

414

public String toString();

415

}

416

```

417

418

## Schema Compatibility Rules

419

420

### Primitive Type Compatibility

421

422

- `int``long`, `float`, `double`, `string`

423

- `long``float`, `double`, `string`

424

- `float``double`, `string`

425

- `double``string`

426

- `boolean``string`

427

- `null``null` only

428

- `bytes``bytes` only

429

- `string``string` only

430

431

### Complex Type Compatibility

432

433

- **Arrays**: Compatible if component schemas are compatible

434

- **Maps**: Compatible if both key and value schemas are compatible

435

- **Records**: Compatible if all common fields (by name) have compatible schemas

436

- **Unions**: Compatible if at least one schema pair between unions is compatible

437

- **Enums**: Target enum must contain all values from source enum

438

439

### Union Compatibility

440

441

- **Union → Union**: At least one pair of schemas must be compatible

442

- **Union → Non-union**: At least one union schema must be compatible with target

443

- **Non-union → Union**: Source must be compatible with at least one union schema