0
# Table Operations
1
2
Core table management functionality for Delta Lake including creation, reading, updating, deleting, and table access patterns. Provides both path-based and catalog-based table operations with comprehensive CRUD support.
3
4
## Capabilities
5
6
### Table Access
7
8
Load existing Delta tables from filesystem paths or catalog names.
9
10
```python { .api }
11
class DeltaTable:
12
@classmethod
13
def forPath(
14
cls,
15
spark: SparkSession,
16
path: str,
17
hadoop_conf: Dict[str, str] = None
18
) -> DeltaTable:
19
"""
20
Load Delta table from filesystem path.
21
22
Parameters:
23
- spark: SparkSession instance
24
- path: Path to Delta table directory
25
- hadoop_conf: Optional Hadoop configuration for file system access
26
27
Returns:
28
DeltaTable instance
29
"""
30
31
@classmethod
32
def forName(cls, spark: SparkSession, table_name: str) -> DeltaTable:
33
"""
34
Load Delta table by catalog name.
35
36
Parameters:
37
- spark: SparkSession instance
38
- table_name: Table name in catalog (can be qualified: catalog.db.table)
39
40
Returns:
41
DeltaTable instance
42
"""
43
44
@classmethod
45
def isDeltaTable(cls, spark: SparkSession, identifier: str) -> bool:
46
"""
47
Check if path or table identifier is a Delta table.
48
49
Parameters:
50
- spark: SparkSession instance
51
- identifier: Path or table name to check
52
53
Returns:
54
True if identifier refers to a Delta table
55
"""
56
```
57
58
```scala { .api }
59
object DeltaTable {
60
def forPath(spark: SparkSession, path: String): DeltaTable
61
def forPath(
62
spark: SparkSession,
63
path: String,
64
hadoopConf: java.util.Map[String, String]
65
): DeltaTable
66
def forName(spark: SparkSession, tableName: String): DeltaTable
67
def isDeltaTable(spark: SparkSession, identifier: String): Boolean
68
}
69
```
70
71
### Data Reading
72
73
Convert Delta tables to DataFrames and apply aliases for query operations.
74
75
```python { .api }
76
class DeltaTable:
77
def toDF(self) -> DataFrame:
78
"""Get DataFrame representation of Delta table."""
79
80
def alias(self, alias_name: str) -> DeltaTable:
81
"""
82
Apply alias to Delta table for use in queries.
83
84
Parameters:
85
- alias_name: Alias name for the table
86
87
Returns:
88
DeltaTable with applied alias
89
"""
90
```
91
92
```scala { .api }
93
class DeltaTable {
94
def toDF: Dataset[Row]
95
def as(alias: String): DeltaTable
96
def alias(alias: String): DeltaTable
97
}
98
```
99
100
### Data Deletion
101
102
Delete rows from Delta tables with optional filtering conditions.
103
104
```python { .api }
105
class DeltaTable:
106
def delete(self, condition: Optional[Union[str, Column]] = None) -> None:
107
"""
108
Delete data matching condition.
109
110
Parameters:
111
- condition: Optional SQL condition string or Column expression for filtering rows to delete
112
"""
113
```
114
115
```scala { .api }
116
class DeltaTable {
117
def delete(): Unit
118
def delete(condition: String): Unit
119
def delete(condition: Column): Unit
120
}
121
```
122
123
Usage examples:
124
125
```python
126
# Delete all rows
127
delta_table.delete()
128
129
# Delete with condition
130
delta_table.delete("age < 18")
131
delta_table.delete(col("age") < 18)
132
```
133
134
### Data Updates
135
136
Update existing rows in Delta tables with conditional logic and column mappings.
137
138
```python { .api }
139
class DeltaTable:
140
def update(
141
self,
142
condition: Optional[Union[str, Column]] = None,
143
set: Optional[Dict[str, Union[str, Column]]] = None
144
) -> None:
145
"""
146
Update rows based on condition and column mappings.
147
148
Parameters:
149
- condition: Optional SQL condition string or Column expression for filtering rows to update
150
- set: Dictionary mapping column names to new values (SQL expressions or Column objects)
151
152
Note: Uses method overloading - can be called as update(set=...) or update(condition, set)
153
"""
154
```
155
156
```scala { .api }
157
class DeltaTable {
158
def update(set: Map[String, Column]): Unit
159
def update(condition: Column, set: Map[String, Column]): Unit
160
def updateExpr(set: Map[String, String]): Unit
161
def updateExpr(condition: String, set: Map[String, String]): Unit
162
}
163
```
164
165
Usage examples:
166
167
```python
168
# Update all rows
169
delta_table.update(set={"status": "'active'"})
170
171
# Conditional update with SQL expressions
172
delta_table.update(
173
condition="department = 'engineering'",
174
set={
175
"salary": "salary * 1.1",
176
"updated_at": "current_timestamp()"
177
}
178
)
179
180
# Update with Column objects
181
from pyspark.sql.functions import col, current_timestamp
182
delta_table.update(
183
condition=col("department") == "engineering",
184
set={
185
"salary": col("salary") * 1.1,
186
"updated_at": current_timestamp()
187
}
188
)
189
```
190
191
### Table Conversion
192
193
Convert existing Parquet tables to Delta format.
194
195
```python { .api }
196
class DeltaTable:
197
@classmethod
198
def convertToDelta(
199
cls,
200
spark: SparkSession,
201
identifier: str,
202
partition_schema: Optional[Union[str, StructType]] = None
203
) -> DeltaTable:
204
"""
205
Convert existing Parquet table to Delta format.
206
207
Parameters:
208
- spark: SparkSession instance
209
- identifier: Parquet table identifier (e.g., "parquet.`/path/to/table`")
210
- partition_schema: Optional partition schema as DDL string or StructType
211
212
Returns:
213
DeltaTable instance for converted table
214
"""
215
```
216
217
```scala { .api }
218
object DeltaTable {
219
def convertToDelta(spark: SparkSession, identifier: String): DeltaTable
220
def convertToDelta(
221
spark: SparkSession,
222
identifier: String,
223
partitionSchema: String
224
): DeltaTable
225
def convertToDelta(
226
spark: SparkSession,
227
identifier: String,
228
partitionSchema: StructType
229
): DeltaTable
230
}
231
```
232
233
Usage examples:
234
235
```python
236
# Convert unpartitioned table
237
delta_table = DeltaTable.convertToDelta(spark, "parquet.`/path/to/parquet/table`")
238
239
# Convert partitioned table
240
delta_table = DeltaTable.convertToDelta(
241
spark,
242
"parquet.`/path/to/partitioned/table`",
243
"year int, month int"
244
)
245
```
246
247
### Table Details
248
249
Get comprehensive information about Delta table structure and metadata.
250
251
```python { .api }
252
class DeltaTable:
253
def detail(self) -> DataFrame:
254
"""
255
Get detailed information about the Delta table.
256
257
Returns:
258
DataFrame with table details including format, location, size, etc.
259
"""
260
```
261
262
```scala { .api }
263
class DeltaTable {
264
def detail(): DataFrame
265
}
266
```
267
268
The detail operation returns information including:
269
- Table format and version
270
- Location and size
271
- Number of files and rows
272
- Partition columns
273
- Table properties
274
- Created/modified timestamps