or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

data-reading.mdindex.mdquery-operations.mdschema-management.mdtable-maintenance.mdtable-operations.mdtransaction-management.mdwriting-modification.md

schema-management.mddocs/

0

# Schema Management

1

2

Schema definition, evolution, and type system for Delta Lake tables including field definitions, data types, and schema operations for maintaining table structure over time.

3

4

## Capabilities

5

6

### Schema Definition

7

8

```python { .api }

9

class Schema:

10

def __init__(self, fields: list[Field]): ...

11

12

@property

13

def fields(self) -> list[Field]: ...

14

15

def to_pyarrow(self) -> pyarrow.Schema: ...

16

17

def to_json(self) -> str: ...

18

19

@classmethod

20

def from_pyarrow(cls, schema: pyarrow.Schema) -> Schema: ...

21

22

@classmethod

23

def from_json(cls, json_str: str) -> Schema: ...

24

```

25

26

Main schema class for defining table structure.

27

28

### Field Definition

29

30

```python { .api }

31

class Field:

32

def __init__(

33

self,

34

name: str,

35

data_type: DataType,

36

nullable: bool = True,

37

metadata: dict[str, Any] | None = None

38

): ...

39

40

@property

41

def name(self) -> str: ...

42

43

@property

44

def data_type(self) -> DataType: ...

45

46

@property

47

def nullable(self) -> bool: ...

48

49

@property

50

def metadata(self) -> dict[str, Any]: ...

51

52

def to_json(self) -> str: ...

53

54

@classmethod

55

def from_json(cls, json_str: str) -> Field: ...

56

```

57

58

Individual field definition within a schema.

59

60

### Data Types

61

62

```python { .api }

63

# Union type for all data types

64

DataType = Union[PrimitiveType, ArrayType, MapType, StructType]

65

66

class PrimitiveType:

67

def __init__(self, data_type: str): ...

68

69

@property

70

def data_type(self) -> str: ...

71

72

class ArrayType:

73

def __init__(self, element_type: DataType, contains_null: bool = True): ...

74

75

@property

76

def element_type(self) -> DataType: ...

77

78

@property

79

def contains_null(self) -> bool: ...

80

81

class MapType:

82

def __init__(

83

self,

84

key_type: DataType,

85

value_type: DataType,

86

value_contains_null: bool = True

87

): ...

88

89

@property

90

def key_type(self) -> DataType: ...

91

92

@property

93

def value_type(self) -> DataType: ...

94

95

@property

96

def value_contains_null(self) -> bool: ...

97

98

class StructType:

99

def __init__(self, fields: list[Field]): ...

100

101

@property

102

def fields(self) -> list[Field]: ...

103

```

104

105

Type system supporting primitive types, collections, and nested structures.

106

107

## Usage Examples

108

109

### Basic Schema Creation

110

111

```python

112

from deltalake import Schema, Field

113

from deltalake.schema import PrimitiveType, ArrayType, MapType, StructType

114

115

# Simple schema with primitive types

116

schema = Schema([

117

Field("id", PrimitiveType("integer"), nullable=False),

118

Field("name", PrimitiveType("string"), nullable=True),

119

Field("age", PrimitiveType("integer"), nullable=True),

120

Field("salary", PrimitiveType("double"), nullable=True),

121

Field("is_active", PrimitiveType("boolean"), nullable=False),

122

Field("created_at", PrimitiveType("timestamp"), nullable=False)

123

])

124

125

# Print schema information

126

for field in schema.fields:

127

print(f"{field.name}: {field.data_type} (nullable: {field.nullable})")

128

```

129

130

### Complex Data Types

131

132

```python

133

# Array type

134

tags_field = Field(

135

"tags",

136

ArrayType(PrimitiveType("string"), contains_null=False),

137

nullable=True

138

)

139

140

# Map type for key-value pairs

141

metadata_field = Field(

142

"metadata",

143

MapType(

144

key_type=PrimitiveType("string"),

145

value_type=PrimitiveType("string"),

146

value_contains_null=True

147

),

148

nullable=True

149

)

150

151

# Nested struct type

152

address_struct = StructType([

153

Field("street", PrimitiveType("string")),

154

Field("city", PrimitiveType("string")),

155

Field("zipcode", PrimitiveType("string")),

156

Field("country", PrimitiveType("string"))

157

])

158

159

address_field = Field("address", address_struct, nullable=True)

160

161

# Combined complex schema

162

complex_schema = Schema([

163

Field("id", PrimitiveType("integer"), nullable=False),

164

Field("name", PrimitiveType("string"), nullable=False),

165

tags_field,

166

metadata_field,

167

address_field

168

])

169

```

170

171

### Schema from PyArrow

172

173

```python

174

import pyarrow as pa

175

176

# Create PyArrow schema

177

arrow_schema = pa.schema([

178

pa.field("id", pa.int64(), nullable=False),

179

pa.field("name", pa.string()),

180

pa.field("scores", pa.list_(pa.float64())),

181

pa.field("created_at", pa.timestamp('us'))

182

])

183

184

# Convert to Delta schema

185

delta_schema = Schema.from_pyarrow(arrow_schema)

186

187

# Convert back to PyArrow

188

converted_arrow = delta_schema.to_pyarrow()

189

```

190

191

### Schema Serialization

192

193

```python

194

# Convert schema to JSON

195

schema_json = schema.to_json()

196

print("Schema as JSON:")

197

print(schema_json)

198

199

# Recreate schema from JSON

200

recreated_schema = Schema.from_json(schema_json)

201

202

# Verify fields match

203

assert len(schema.fields) == len(recreated_schema.fields)

204

for original, recreated in zip(schema.fields, recreated_schema.fields):

205

assert original.name == recreated.name

206

assert original.nullable == recreated.nullable

207

```

208

209

### Working with Field Metadata

210

211

```python

212

# Field with metadata

213

documented_field = Field(

214

"user_id",

215

PrimitiveType("integer"),

216

nullable=False,

217

metadata={

218

"description": "Unique identifier for user",

219

"source_system": "user_management",

220

"pii": False,

221

"format": "int64"

222

}

223

)

224

225

# Access metadata

226

print(f"Field metadata: {documented_field.metadata}")

227

print(f"Description: {documented_field.metadata.get('description')}")

228

print(f"Contains PII: {documented_field.metadata.get('pii')}")

229

```

230

231

### Schema Evolution Examples

232

233

```python

234

from deltalake import DeltaTable, write_deltalake

235

236

# Original schema

237

original_schema = Schema([

238

Field("id", PrimitiveType("integer"), nullable=False),

239

Field("name", PrimitiveType("string"), nullable=True),

240

Field("age", PrimitiveType("integer"), nullable=True)

241

])

242

243

# Create table with original schema

244

dt = DeltaTable.create("path/to/evolving-table", schema=original_schema)

245

246

# Add data with additional column (schema evolution)

247

import pandas as pd

248

249

evolved_data = pd.DataFrame({

250

'id': [4, 5, 6],

251

'name': ['New Person 1', 'New Person 2', 'New Person 3'],

252

'age': [25, 30, 35],

253

'department': ['Engineering', 'Sales', 'Marketing'] # New column

254

})

255

256

# Write with schema merge mode

257

write_deltalake(

258

"path/to/evolving-table",

259

evolved_data,

260

mode="append",

261

schema_mode="merge" # Allow schema evolution

262

)

263

264

# Check evolved schema

265

dt = DeltaTable("path/to/evolving-table")

266

evolved_schema = dt.schema()

267

print("Evolved schema:")

268

for field in evolved_schema.fields:

269

print(f" {field.name}: {field.data_type}")

270

```

271

272

### Primitive Data Types

273

274

Available primitive types:

275

- `"boolean"` - Boolean values

276

- `"byte"` - 8-bit signed integer

277

- `"short"` - 16-bit signed integer

278

- `"integer"` - 32-bit signed integer

279

- `"long"` - 64-bit signed integer

280

- `"float"` - 32-bit floating point

281

- `"double"` - 64-bit floating point

282

- `"decimal"` - Arbitrary precision decimal

283

- `"string"` - UTF-8 string

284

- `"binary"` - Binary data

285

- `"date"` - Date (year, month, day)

286

- `"timestamp"` - Timestamp with microsecond precision

287

288

```python

289

# Examples of all primitive types

290

all_types_schema = Schema([

291

Field("bool_col", PrimitiveType("boolean")),

292

Field("byte_col", PrimitiveType("byte")),

293

Field("short_col", PrimitiveType("short")),

294

Field("int_col", PrimitiveType("integer")),

295

Field("long_col", PrimitiveType("long")),

296

Field("float_col", PrimitiveType("float")),

297

Field("double_col", PrimitiveType("double")),

298

Field("decimal_col", PrimitiveType("decimal")),

299

Field("string_col", PrimitiveType("string")),

300

Field("binary_col", PrimitiveType("binary")),

301

Field("date_col", PrimitiveType("date")),

302

Field("timestamp_col", PrimitiveType("timestamp"))

303

])

304

```

305

306

### Validation and Constraints

307

308

```python

309

# Schema validation when creating tables

310

try:

311

# This will validate the schema structure

312

dt = DeltaTable.create(

313

"path/to/validated-table",

314

schema=complex_schema,

315

mode="error"

316

)

317

print("Schema validation passed")

318

except Exception as e:

319

print(f"Schema validation failed: {e}")

320

321

# Check schema compatibility

322

def schemas_compatible(schema1: Schema, schema2: Schema) -> bool:

323

"""Check if two schemas are compatible for merging"""

324

schema1_fields = {f.name: f for f in schema1.fields}

325

schema2_fields = {f.name: f for f in schema2.fields}

326

327

# Check common fields have compatible types

328

for name in schema1_fields.keys() & schema2_fields.keys():

329

field1 = schema1_fields[name]

330

field2 = schema2_fields[name]

331

332

# Simple type comparison (real implementation would be more complex)

333

if field1.data_type != field2.data_type:

334

return False

335

336

# Nullable compatibility: can't make nullable field non-nullable

337

if field1.nullable and not field2.nullable:

338

return False

339

340

return True

341

342

# Test compatibility

343

compatible = schemas_compatible(original_schema, evolved_schema)

344

print(f"Schemas are compatible: {compatible}")

345

```

346

347

## TableAlterer Class

348

349

The TableAlterer class provides advanced schema and table modification capabilities accessed through `DeltaTable.alter`.

350

351

```python { .api }

352

class TableAlterer:

353

def add_feature(

354

self,

355

feature: TableFeatures | list[TableFeatures],

356

allow_protocol_versions_increase: bool = False,

357

commit_properties: CommitProperties | None = None,

358

post_commithook_properties: PostCommitHookProperties | None = None,

359

) -> None: ...

360

361

def add_columns(

362

self,

363

fields: Field | list[Field],

364

commit_properties: CommitProperties | None = None,

365

post_commithook_properties: PostCommitHookProperties | None = None,

366

) -> None: ...

367

368

def add_constraint(

369

self,

370

constraints: dict[str, str],

371

post_commithook_properties: PostCommitHookProperties | None = None,

372

commit_properties: CommitProperties | None = None,

373

) -> None: ...

374

375

def drop_constraint(

376

self,

377

name: str,

378

raise_if_not_exists: bool = True,

379

post_commithook_properties: PostCommitHookProperties | None = None,

380

commit_properties: CommitProperties | None = None,

381

) -> None: ...

382

383

def set_table_properties(

384

self,

385

properties: dict[str, str],

386

raise_if_not_exists: bool = True,

387

commit_properties: CommitProperties | None = None,

388

) -> None: ...

389

390

def set_table_name(

391

self,

392

name: str,

393

commit_properties: CommitProperties | None = None,

394

) -> None: ...

395

396

def set_table_description(

397

self,

398

description: str,

399

commit_properties: CommitProperties | None = None,

400

) -> None: ...

401

402

def set_column_metadata(

403

self,

404

column: str,

405

metadata: dict[str, str],

406

commit_properties: CommitProperties | None = None,

407

post_commithook_properties: PostCommitHookProperties | None = None,

408

) -> None: ...

409

```

410

411

Provides comprehensive table and schema alteration capabilities including feature management, column operations, constraints, and metadata modifications.