0
# Table Management
1
2
Programmatic table creation, schema management, and configuration for Delta Lake tables. Provides fluent builder APIs for creating tables with custom schemas, partitioning, clustering, and properties.
3
4
## Capabilities
5
6
### Table Builders
7
8
Create table builders for different creation patterns.
9
10
```python { .api }
11
class DeltaTable:
12
@classmethod
13
def create(cls, spark: SparkSession = None) -> DeltaTableBuilder:
14
"""
15
Create a new table (equivalent to CREATE TABLE).
16
17
Parameters:
18
- spark: Optional SparkSession (uses active session if None)
19
20
Returns:
21
DeltaTableBuilder for table configuration
22
"""
23
24
@classmethod
25
def createIfNotExists(cls, spark: SparkSession = None) -> DeltaTableBuilder:
26
"""
27
Create table if it doesn't exist (CREATE TABLE IF NOT EXISTS).
28
29
Parameters:
30
- spark: Optional SparkSession
31
32
Returns:
33
DeltaTableBuilder for table configuration
34
"""
35
36
@classmethod
37
def replace(cls, spark: SparkSession = None) -> DeltaTableBuilder:
38
"""
39
Replace existing table (REPLACE TABLE).
40
41
Parameters:
42
- spark: Optional SparkSession
43
44
Returns:
45
DeltaTableBuilder for table configuration
46
"""
47
48
@classmethod
49
def createOrReplace(cls, spark: SparkSession = None) -> DeltaTableBuilder:
50
"""
51
Create or replace table (CREATE OR REPLACE TABLE).
52
53
Parameters:
54
- spark: Optional SparkSession
55
56
Returns:
57
DeltaTableBuilder for table configuration
58
"""
59
```
60
61
```scala { .api }
62
object DeltaTable {
63
def create(): DeltaTableBuilder
64
def create(spark: SparkSession): DeltaTableBuilder
65
def createIfNotExists(): DeltaTableBuilder
66
def createIfNotExists(spark: SparkSession): DeltaTableBuilder
67
def replace(): DeltaTableBuilder
68
def replace(spark: SparkSession): DeltaTableBuilder
69
def createOrReplace(): DeltaTableBuilder
70
def createOrReplace(spark: SparkSession): DeltaTableBuilder
71
}
72
```
73
74
### Table Configuration
75
76
Configure table name, location, and metadata.
77
78
```python { .api }
79
class DeltaTableBuilder:
80
def tableName(self, identifier: str) -> DeltaTableBuilder:
81
"""
82
Set table name, optionally qualified with database.
83
84
Parameters:
85
- identifier: Table name (e.g., "my_table" or "db.my_table")
86
87
Returns:
88
DeltaTableBuilder for method chaining
89
"""
90
91
def location(self, location: str) -> DeltaTableBuilder:
92
"""
93
Set table data location path.
94
95
Parameters:
96
- location: Path where table data will be stored
97
98
Returns:
99
DeltaTableBuilder for method chaining
100
"""
101
102
def comment(self, comment: str) -> DeltaTableBuilder:
103
"""
104
Add table comment/description.
105
106
Parameters:
107
- comment: Table description
108
109
Returns:
110
DeltaTableBuilder for method chaining
111
"""
112
```
113
114
```scala { .api }
115
class DeltaTableBuilder {
116
def tableName(identifier: String): DeltaTableBuilder
117
def location(location: String): DeltaTableBuilder
118
def comment(comment: String): DeltaTableBuilder
119
}
120
```
121
122
### Column Definitions
123
124
Define table schema with columns, data types, and constraints.
125
126
```python { .api }
127
class DeltaTableBuilder:
128
def addColumn(
129
self,
130
col_name: str,
131
data_type: Union[str, DataType],
132
nullable: bool = True,
133
generated_always_as: Optional[Union[str, IdentityGenerator]] = None,
134
generated_by_default_as: Optional[IdentityGenerator] = None,
135
comment: Optional[str] = None
136
) -> DeltaTableBuilder:
137
"""
138
Add column to table schema.
139
140
Parameters:
141
- col_name: Column name
142
- data_type: Data type as string or DataType object
143
- nullable: Whether column accepts null values
144
- generated_always_as: SQL expression or IdentityGenerator for computed column
145
- generated_by_default_as: IdentityGenerator for identity column with defaults
146
- comment: Column description
147
148
Returns:
149
DeltaTableBuilder for method chaining
150
"""
151
152
def addColumns(
153
self,
154
cols: Union[StructType, List[StructField]]
155
) -> DeltaTableBuilder:
156
"""
157
Add multiple columns from existing schema.
158
159
Parameters:
160
- cols: StructType schema or list of StructField objects
161
162
Returns:
163
DeltaTableBuilder for method chaining
164
"""
165
```
166
167
```scala { .api }
168
class DeltaTableBuilder {
169
def addColumn(colName: String, dataType: DataType): DeltaTableBuilder
170
def addColumn(
171
colName: String,
172
dataType: DataType,
173
nullable: Boolean,
174
generatedAlwaysAs: String,
175
comment: String
176
): DeltaTableBuilder
177
def addColumns(cols: StructType): DeltaTableBuilder
178
}
179
```
180
181
### Partitioning and Clustering
182
183
Configure table partitioning and clustering for performance optimization.
184
185
```python { .api }
186
class DeltaTableBuilder:
187
def partitionedBy(self, *cols: str) -> DeltaTableBuilder:
188
"""
189
Specify partitioning columns.
190
191
Parameters:
192
- cols: Column names for partitioning
193
194
Returns:
195
DeltaTableBuilder for method chaining
196
"""
197
198
def clusterBy(self, *cols: str) -> DeltaTableBuilder:
199
"""
200
Specify clustering columns for data layout optimization.
201
202
Parameters:
203
- cols: Column names for clustering
204
205
Returns:
206
DeltaTableBuilder for method chaining
207
"""
208
```
209
210
```scala { .api }
211
class DeltaTableBuilder {
212
def partitionedBy(cols: String*): DeltaTableBuilder
213
def clusterBy(cols: String*): DeltaTableBuilder
214
}
215
```
216
217
### Table Properties
218
219
Set custom table properties and configuration.
220
221
```python { .api }
222
class DeltaTableBuilder:
223
def property(self, key: str, value: str) -> DeltaTableBuilder:
224
"""
225
Set table property.
226
227
Parameters:
228
- key: Property name
229
- value: Property value
230
231
Returns:
232
DeltaTableBuilder for method chaining
233
"""
234
```
235
236
```scala { .api }
237
class DeltaTableBuilder {
238
def property(key: String, value: String): DeltaTableBuilder
239
}
240
```
241
242
### Table Creation
243
244
Execute table creation with configured settings.
245
246
```python { .api }
247
class DeltaTableBuilder:
248
def execute(self) -> DeltaTable:
249
"""
250
Execute table creation.
251
252
Returns:
253
DeltaTable instance for the created table
254
"""
255
```
256
257
```scala { .api }
258
class DeltaTableBuilder {
259
def execute(): DeltaTable
260
}
261
```
262
263
### Identity Columns
264
265
Configure identity columns for auto-incrementing values.
266
267
```python { .api }
268
@dataclass
269
class IdentityGenerator:
270
"""Identity column configuration for auto-incrementing values."""
271
start: int = 1 # Starting value for identity sequence
272
step: int = 1 # Increment step for identity sequence
273
```
274
275
### Column Builders
276
277
Create detailed column specifications.
278
279
```python { .api }
280
class DeltaTable:
281
@classmethod
282
def columnBuilder(cls, col_name: str, spark: Optional[SparkSession] = None) -> DeltaColumnBuilder:
283
"""
284
Create column builder for detailed column configuration.
285
286
Parameters:
287
- col_name: Column name
288
- spark: Optional SparkSession
289
290
Returns:
291
DeltaColumnBuilder for column configuration
292
"""
293
294
class DeltaColumnBuilder:
295
def dataType(self, data_type: Union[str, DataType]) -> DeltaColumnBuilder:
296
"""
297
Set column data type.
298
299
Parameters:
300
- data_type: Data type as string or DataType object
301
302
Returns:
303
DeltaColumnBuilder for method chaining
304
"""
305
306
def nullable(self, nullable: bool) -> DeltaColumnBuilder:
307
"""
308
Set column nullability.
309
310
Parameters:
311
- nullable: Whether column accepts null values
312
313
Returns:
314
DeltaColumnBuilder for method chaining
315
"""
316
317
def generatedAlwaysAs(self, expression: str) -> DeltaColumnBuilder:
318
"""
319
Set column as generated/computed column.
320
321
Parameters:
322
- expression: SQL expression for computed column
323
324
Returns:
325
DeltaColumnBuilder for method chaining
326
"""
327
328
def generatedAlwaysAsIdentity(self, start: int, step: int) -> DeltaColumnBuilder:
329
"""
330
Set column as identity column with GENERATED ALWAYS.
331
332
Parameters:
333
- start: Starting value for identity sequence
334
- step: Increment step for identity sequence
335
336
Returns:
337
DeltaColumnBuilder for method chaining
338
"""
339
340
def generatedByDefaultAsIdentity(self, start: int, step: int) -> DeltaColumnBuilder:
341
"""
342
Set column as identity column with GENERATED BY DEFAULT.
343
344
Parameters:
345
- start: Starting value for identity sequence
346
- step: Increment step for identity sequence
347
348
Returns:
349
DeltaColumnBuilder for method chaining
350
"""
351
352
def comment(self, comment: str) -> DeltaColumnBuilder:
353
"""
354
Add column comment/description.
355
356
Parameters:
357
- comment: Column description
358
359
Returns:
360
DeltaColumnBuilder for method chaining
361
"""
362
363
def build(self) -> StructField:
364
"""
365
Build and return the StructField for this column.
366
367
Returns:
368
StructField representing the configured column
369
"""
370
```
371
372
```scala { .api }
373
object DeltaTable {
374
def columnBuilder(colName: String): DeltaColumnBuilder
375
def columnBuilder(spark: SparkSession, colName: String): DeltaColumnBuilder
376
}
377
378
// DeltaColumnBuilder for detailed column specification
379
class DeltaColumnBuilder {
380
def dataType(dataType: String): DeltaColumnBuilder
381
def dataType(dataType: DataType): DeltaColumnBuilder
382
def nullable(nullable: Boolean): DeltaColumnBuilder
383
def generatedAlwaysAs(expression: String): DeltaColumnBuilder
384
def generatedAlwaysAsIdentity(start: Long, step: Long): DeltaColumnBuilder
385
def generatedByDefaultAsIdentity(start: Long, step: Long): DeltaColumnBuilder
386
def comment(comment: String): DeltaColumnBuilder
387
def build(): StructField
388
}
389
```
390
391
## Usage Examples
392
393
### Basic Table Creation
394
395
```python
396
# Create simple table with schema
397
delta_table = (DeltaTable.create(spark)
398
.tableName("employees")
399
.addColumn("id", "INT", nullable=False)
400
.addColumn("name", "STRING")
401
.addColumn("department", "STRING")
402
.addColumn("salary", "DOUBLE")
403
.addColumn("created_at", "TIMESTAMP")
404
.execute())
405
```
406
407
### Table with Partitioning and Properties
408
409
```python
410
from pyspark.sql.types import *
411
412
# Create partitioned table with properties
413
schema = StructType([
414
StructField("transaction_id", StringType(), False),
415
StructField("customer_id", LongType(), False),
416
StructField("amount", DoubleType(), False),
417
StructField("transaction_date", DateType(), False),
418
StructField("region", StringType(), False)
419
])
420
421
delta_table = (DeltaTable.create(spark)
422
.tableName("transactions")
423
.location("/path/to/transactions")
424
.addColumns(schema)
425
.partitionedBy("transaction_date", "region")
426
.property("delta.logRetentionDuration", "interval 30 days")
427
.property("delta.deletedFileRetentionDuration", "interval 7 days")
428
.comment("Customer transaction data partitioned by date and region")
429
.execute())
430
```
431
432
### Table with Generated Columns
433
434
```python
435
from delta.tables import IdentityGenerator
436
437
# Create table with identity and computed columns
438
delta_table = (DeltaTable.create(spark)
439
.tableName("audit_log")
440
.addColumn("id", "BIGINT", nullable=False,
441
generated_always_as=IdentityGenerator(start=1, step=1))
442
.addColumn("event_type", "STRING", nullable=False)
443
.addColumn("event_data", "STRING")
444
.addColumn("created_at", "TIMESTAMP", nullable=False)
445
.addColumn("date_partition", "DATE", nullable=False,
446
generated_always_as="CAST(created_at AS DATE)")
447
.partitionedBy("date_partition")
448
.execute())
449
```
450
451
### Conditional Table Creation
452
453
```python
454
# Create table only if it doesn't exist
455
delta_table = (DeltaTable.createIfNotExists(spark)
456
.tableName("user_preferences")
457
.addColumn("user_id", "BIGINT", nullable=False)
458
.addColumn("preferences", "MAP<STRING, STRING>")
459
.addColumn("updated_at", "TIMESTAMP")
460
.execute())
461
462
# Replace existing table
463
delta_table = (DeltaTable.replace(spark)
464
.tableName("temp_results")
465
.addColumn("result_id", "STRING")
466
.addColumn("value", "DOUBLE")
467
.execute())
468
```
469
470
## Table Properties
471
472
Common Delta table properties:
473
474
- `delta.logRetentionDuration`: How long to keep transaction logs
475
- `delta.deletedFileRetentionDuration`: Retention for deleted files (vacuum)
476
- `delta.autoOptimize.optimizeWrite`: Enable write optimization
477
- `delta.autoOptimize.autoCompact`: Enable auto-compaction
478
- `delta.enableChangeDataFeed`: Enable change data capture
479
- `delta.columnMapping.mode`: Column mapping mode for schema evolution