or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

configuration.mdindex.mdmerge-operations.mdoptimization.mdtable-management.mdtable-operations.mdtime-travel.md

table-management.mddocs/

0

# Table Management

1

2

Programmatic table creation, schema management, and configuration for Delta Lake tables. Provides fluent builder APIs for creating tables with custom schemas, partitioning, clustering, and properties.

3

4

## Capabilities

5

6

### Table Builders

7

8

Create table builders for different creation patterns.

9

10

```python { .api }

11

class DeltaTable:

12

@classmethod

13

def create(cls, spark: SparkSession = None) -> DeltaTableBuilder:

14

"""

15

Create a new table (equivalent to CREATE TABLE).

16

17

Parameters:

18

- spark: Optional SparkSession (uses active session if None)

19

20

Returns:

21

DeltaTableBuilder for table configuration

22

"""

23

24

@classmethod

25

def createIfNotExists(cls, spark: SparkSession = None) -> DeltaTableBuilder:

26

"""

27

Create table if it doesn't exist (CREATE TABLE IF NOT EXISTS).

28

29

Parameters:

30

- spark: Optional SparkSession

31

32

Returns:

33

DeltaTableBuilder for table configuration

34

"""

35

36

@classmethod

37

def replace(cls, spark: SparkSession = None) -> DeltaTableBuilder:

38

"""

39

Replace existing table (REPLACE TABLE).

40

41

Parameters:

42

- spark: Optional SparkSession

43

44

Returns:

45

DeltaTableBuilder for table configuration

46

"""

47

48

@classmethod

49

def createOrReplace(cls, spark: SparkSession = None) -> DeltaTableBuilder:

50

"""

51

Create or replace table (CREATE OR REPLACE TABLE).

52

53

Parameters:

54

- spark: Optional SparkSession

55

56

Returns:

57

DeltaTableBuilder for table configuration

58

"""

59

```

60

61

```scala { .api }

62

object DeltaTable {

63

def create(): DeltaTableBuilder

64

def create(spark: SparkSession): DeltaTableBuilder

65

def createIfNotExists(): DeltaTableBuilder

66

def createIfNotExists(spark: SparkSession): DeltaTableBuilder

67

def replace(): DeltaTableBuilder

68

def replace(spark: SparkSession): DeltaTableBuilder

69

def createOrReplace(): DeltaTableBuilder

70

def createOrReplace(spark: SparkSession): DeltaTableBuilder

71

}

72

```

73

74

### Table Configuration

75

76

Configure table name, location, and metadata.

77

78

```python { .api }

79

class DeltaTableBuilder:

80

def tableName(self, identifier: str) -> DeltaTableBuilder:

81

"""

82

Set table name, optionally qualified with database.

83

84

Parameters:

85

- identifier: Table name (e.g., "my_table" or "db.my_table")

86

87

Returns:

88

DeltaTableBuilder for method chaining

89

"""

90

91

def location(self, location: str) -> DeltaTableBuilder:

92

"""

93

Set table data location path.

94

95

Parameters:

96

- location: Path where table data will be stored

97

98

Returns:

99

DeltaTableBuilder for method chaining

100

"""

101

102

def comment(self, comment: str) -> DeltaTableBuilder:

103

"""

104

Add table comment/description.

105

106

Parameters:

107

- comment: Table description

108

109

Returns:

110

DeltaTableBuilder for method chaining

111

"""

112

```

113

114

```scala { .api }

115

class DeltaTableBuilder {

116

def tableName(identifier: String): DeltaTableBuilder

117

def location(location: String): DeltaTableBuilder

118

def comment(comment: String): DeltaTableBuilder

119

}

120

```

121

122

### Column Definitions

123

124

Define table schema with columns, data types, and constraints.

125

126

```python { .api }

127

class DeltaTableBuilder:

128

def addColumn(

129

self,

130

col_name: str,

131

data_type: Union[str, DataType],

132

nullable: bool = True,

133

generated_always_as: Optional[Union[str, IdentityGenerator]] = None,

134

generated_by_default_as: Optional[IdentityGenerator] = None,

135

comment: Optional[str] = None

136

) -> DeltaTableBuilder:

137

"""

138

Add column to table schema.

139

140

Parameters:

141

- col_name: Column name

142

- data_type: Data type as string or DataType object

143

- nullable: Whether column accepts null values

144

- generated_always_as: SQL expression or IdentityGenerator for computed column

145

- generated_by_default_as: IdentityGenerator for identity column with defaults

146

- comment: Column description

147

148

Returns:

149

DeltaTableBuilder for method chaining

150

"""

151

152

def addColumns(

153

self,

154

cols: Union[StructType, List[StructField]]

155

) -> DeltaTableBuilder:

156

"""

157

Add multiple columns from existing schema.

158

159

Parameters:

160

- cols: StructType schema or list of StructField objects

161

162

Returns:

163

DeltaTableBuilder for method chaining

164

"""

165

```

166

167

```scala { .api }

168

class DeltaTableBuilder {

169

def addColumn(colName: String, dataType: DataType): DeltaTableBuilder

170

def addColumn(

171

colName: String,

172

dataType: DataType,

173

nullable: Boolean,

174

generatedAlwaysAs: String,

175

comment: String

176

): DeltaTableBuilder

177

def addColumns(cols: StructType): DeltaTableBuilder

178

}

179

```

180

181

### Partitioning and Clustering

182

183

Configure table partitioning and clustering for performance optimization.

184

185

```python { .api }

186

class DeltaTableBuilder:

187

def partitionedBy(self, *cols: str) -> DeltaTableBuilder:

188

"""

189

Specify partitioning columns.

190

191

Parameters:

192

- cols: Column names for partitioning

193

194

Returns:

195

DeltaTableBuilder for method chaining

196

"""

197

198

def clusterBy(self, *cols: str) -> DeltaTableBuilder:

199

"""

200

Specify clustering columns for data layout optimization.

201

202

Parameters:

203

- cols: Column names for clustering

204

205

Returns:

206

DeltaTableBuilder for method chaining

207

"""

208

```

209

210

```scala { .api }

211

class DeltaTableBuilder {

212

def partitionedBy(cols: String*): DeltaTableBuilder

213

def clusterBy(cols: String*): DeltaTableBuilder

214

}

215

```

216

217

### Table Properties

218

219

Set custom table properties and configuration.

220

221

```python { .api }

222

class DeltaTableBuilder:

223

def property(self, key: str, value: str) -> DeltaTableBuilder:

224

"""

225

Set table property.

226

227

Parameters:

228

- key: Property name

229

- value: Property value

230

231

Returns:

232

DeltaTableBuilder for method chaining

233

"""

234

```

235

236

```scala { .api }

237

class DeltaTableBuilder {

238

def property(key: String, value: String): DeltaTableBuilder

239

}

240

```

241

242

### Table Creation

243

244

Execute table creation with configured settings.

245

246

```python { .api }

247

class DeltaTableBuilder:

248

def execute(self) -> DeltaTable:

249

"""

250

Execute table creation.

251

252

Returns:

253

DeltaTable instance for the created table

254

"""

255

```

256

257

```scala { .api }

258

class DeltaTableBuilder {

259

def execute(): DeltaTable

260

}

261

```

262

263

### Identity Columns

264

265

Configure identity columns for auto-incrementing values.

266

267

```python { .api }

268

@dataclass

269

class IdentityGenerator:

270

"""Identity column configuration for auto-incrementing values."""

271

start: int = 1 # Starting value for identity sequence

272

step: int = 1 # Increment step for identity sequence

273

```

274

275

### Column Builders

276

277

Create detailed column specifications.

278

279

```python { .api }

280

class DeltaTable:

281

@classmethod

282

def columnBuilder(cls, col_name: str, spark: Optional[SparkSession] = None) -> DeltaColumnBuilder:

283

"""

284

Create column builder for detailed column configuration.

285

286

Parameters:

287

- col_name: Column name

288

- spark: Optional SparkSession

289

290

Returns:

291

DeltaColumnBuilder for column configuration

292

"""

293

294

class DeltaColumnBuilder:

295

def dataType(self, data_type: Union[str, DataType]) -> DeltaColumnBuilder:

296

"""

297

Set column data type.

298

299

Parameters:

300

- data_type: Data type as string or DataType object

301

302

Returns:

303

DeltaColumnBuilder for method chaining

304

"""

305

306

def nullable(self, nullable: bool) -> DeltaColumnBuilder:

307

"""

308

Set column nullability.

309

310

Parameters:

311

- nullable: Whether column accepts null values

312

313

Returns:

314

DeltaColumnBuilder for method chaining

315

"""

316

317

def generatedAlwaysAs(self, expression: str) -> DeltaColumnBuilder:

318

"""

319

Set column as generated/computed column.

320

321

Parameters:

322

- expression: SQL expression for computed column

323

324

Returns:

325

DeltaColumnBuilder for method chaining

326

"""

327

328

def generatedAlwaysAsIdentity(self, start: int, step: int) -> DeltaColumnBuilder:

329

"""

330

Set column as identity column with GENERATED ALWAYS.

331

332

Parameters:

333

- start: Starting value for identity sequence

334

- step: Increment step for identity sequence

335

336

Returns:

337

DeltaColumnBuilder for method chaining

338

"""

339

340

def generatedByDefaultAsIdentity(self, start: int, step: int) -> DeltaColumnBuilder:

341

"""

342

Set column as identity column with GENERATED BY DEFAULT.

343

344

Parameters:

345

- start: Starting value for identity sequence

346

- step: Increment step for identity sequence

347

348

Returns:

349

DeltaColumnBuilder for method chaining

350

"""

351

352

def comment(self, comment: str) -> DeltaColumnBuilder:

353

"""

354

Add column comment/description.

355

356

Parameters:

357

- comment: Column description

358

359

Returns:

360

DeltaColumnBuilder for method chaining

361

"""

362

363

def build(self) -> StructField:

364

"""

365

Build and return the StructField for this column.

366

367

Returns:

368

StructField representing the configured column

369

"""

370

```

371

372

```scala { .api }

373

object DeltaTable {

374

def columnBuilder(colName: String): DeltaColumnBuilder

375

def columnBuilder(spark: SparkSession, colName: String): DeltaColumnBuilder

376

}

377

378

// DeltaColumnBuilder for detailed column specification

379

class DeltaColumnBuilder {

380

def dataType(dataType: String): DeltaColumnBuilder

381

def dataType(dataType: DataType): DeltaColumnBuilder

382

def nullable(nullable: Boolean): DeltaColumnBuilder

383

def generatedAlwaysAs(expression: String): DeltaColumnBuilder

384

def generatedAlwaysAsIdentity(start: Long, step: Long): DeltaColumnBuilder

385

def generatedByDefaultAsIdentity(start: Long, step: Long): DeltaColumnBuilder

386

def comment(comment: String): DeltaColumnBuilder

387

def build(): StructField

388

}

389

```

390

391

## Usage Examples

392

393

### Basic Table Creation

394

395

```python

396

# Create simple table with schema

397

delta_table = (DeltaTable.create(spark)

398

.tableName("employees")

399

.addColumn("id", "INT", nullable=False)

400

.addColumn("name", "STRING")

401

.addColumn("department", "STRING")

402

.addColumn("salary", "DOUBLE")

403

.addColumn("created_at", "TIMESTAMP")

404

.execute())

405

```

406

407

### Table with Partitioning and Properties

408

409

```python

410

from pyspark.sql.types import *

411

412

# Create partitioned table with properties

413

schema = StructType([

414

StructField("transaction_id", StringType(), False),

415

StructField("customer_id", LongType(), False),

416

StructField("amount", DoubleType(), False),

417

StructField("transaction_date", DateType(), False),

418

StructField("region", StringType(), False)

419

])

420

421

delta_table = (DeltaTable.create(spark)

422

.tableName("transactions")

423

.location("/path/to/transactions")

424

.addColumns(schema)

425

.partitionedBy("transaction_date", "region")

426

.property("delta.logRetentionDuration", "interval 30 days")

427

.property("delta.deletedFileRetentionDuration", "interval 7 days")

428

.comment("Customer transaction data partitioned by date and region")

429

.execute())

430

```

431

432

### Table with Generated Columns

433

434

```python

435

from delta.tables import IdentityGenerator

436

437

# Create table with identity and computed columns

438

delta_table = (DeltaTable.create(spark)

439

.tableName("audit_log")

440

.addColumn("id", "BIGINT", nullable=False,

441

generated_always_as=IdentityGenerator(start=1, step=1))

442

.addColumn("event_type", "STRING", nullable=False)

443

.addColumn("event_data", "STRING")

444

.addColumn("created_at", "TIMESTAMP", nullable=False)

445

.addColumn("date_partition", "DATE", nullable=False,

446

generated_always_as="CAST(created_at AS DATE)")

447

.partitionedBy("date_partition")

448

.execute())

449

```

450

451

### Conditional Table Creation

452

453

```python

454

# Create table only if it doesn't exist

455

delta_table = (DeltaTable.createIfNotExists(spark)

456

.tableName("user_preferences")

457

.addColumn("user_id", "BIGINT", nullable=False)

458

.addColumn("preferences", "MAP<STRING, STRING>")

459

.addColumn("updated_at", "TIMESTAMP")

460

.execute())

461

462

# Replace existing table

463

delta_table = (DeltaTable.replace(spark)

464

.tableName("temp_results")

465

.addColumn("result_id", "STRING")

466

.addColumn("value", "DOUBLE")

467

.execute())

468

```

469

470

## Table Properties

471

472

Common Delta table properties:

473

474

- `delta.logRetentionDuration`: How long to keep transaction logs

475

- `delta.deletedFileRetentionDuration`: Retention for deleted files (vacuum)

476

- `delta.autoOptimize.optimizeWrite`: Enable write optimization

477

- `delta.autoOptimize.autoCompact`: Enable auto-compaction

478

- `delta.enableChangeDataFeed`: Enable change data capture

479

- `delta.columnMapping.mode`: Column mapping mode for schema evolution