or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

configuration.mdindex.mdmerge-operations.mdoptimization.mdtable-management.mdtable-operations.mdtime-travel.md

table-operations.mddocs/

0

# Table Operations

1

2

Core table management functionality for Delta Lake including creation, reading, updating, deleting, and table access patterns. Provides both path-based and catalog-based table operations with comprehensive CRUD support.

3

4

## Capabilities

5

6

### Table Access

7

8

Load existing Delta tables from filesystem paths or catalog names.

9

10

```python { .api }

11

class DeltaTable:

12

@classmethod

13

def forPath(

14

cls,

15

spark: SparkSession,

16

path: str,

17

hadoop_conf: Dict[str, str] = None

18

) -> DeltaTable:

19

"""

20

Load Delta table from filesystem path.

21

22

Parameters:

23

- spark: SparkSession instance

24

- path: Path to Delta table directory

25

- hadoop_conf: Optional Hadoop configuration for file system access

26

27

Returns:

28

DeltaTable instance

29

"""

30

31

@classmethod

32

def forName(cls, spark: SparkSession, table_name: str) -> DeltaTable:

33

"""

34

Load Delta table by catalog name.

35

36

Parameters:

37

- spark: SparkSession instance

38

- table_name: Table name in catalog (can be qualified: catalog.db.table)

39

40

Returns:

41

DeltaTable instance

42

"""

43

44

@classmethod

45

def isDeltaTable(cls, spark: SparkSession, identifier: str) -> bool:

46

"""

47

Check if path or table identifier is a Delta table.

48

49

Parameters:

50

- spark: SparkSession instance

51

- identifier: Path or table name to check

52

53

Returns:

54

True if identifier refers to a Delta table

55

"""

56

```

57

58

```scala { .api }

59

object DeltaTable {

60

def forPath(spark: SparkSession, path: String): DeltaTable

61

def forPath(

62

spark: SparkSession,

63

path: String,

64

hadoopConf: java.util.Map[String, String]

65

): DeltaTable

66

def forName(spark: SparkSession, tableName: String): DeltaTable

67

def isDeltaTable(spark: SparkSession, identifier: String): Boolean

68

}

69

```

70

71

### Data Reading

72

73

Convert Delta tables to DataFrames and apply aliases for query operations.

74

75

```python { .api }

76

class DeltaTable:

77

def toDF(self) -> DataFrame:

78

"""Get DataFrame representation of Delta table."""

79

80

def alias(self, alias_name: str) -> DeltaTable:

81

"""

82

Apply alias to Delta table for use in queries.

83

84

Parameters:

85

- alias_name: Alias name for the table

86

87

Returns:

88

DeltaTable with applied alias

89

"""

90

```

91

92

```scala { .api }

93

class DeltaTable {

94

def toDF: Dataset[Row]

95

def as(alias: String): DeltaTable

96

def alias(alias: String): DeltaTable

97

}

98

```

99

100

### Data Deletion

101

102

Delete rows from Delta tables with optional filtering conditions.

103

104

```python { .api }

105

class DeltaTable:

106

def delete(self, condition: Optional[Union[str, Column]] = None) -> None:

107

"""

108

Delete data matching condition.

109

110

Parameters:

111

- condition: Optional SQL condition string or Column expression for filtering rows to delete

112

"""

113

```

114

115

```scala { .api }

116

class DeltaTable {

117

def delete(): Unit

118

def delete(condition: String): Unit

119

def delete(condition: Column): Unit

120

}

121

```

122

123

Usage examples:

124

125

```python

126

# Delete all rows

127

delta_table.delete()

128

129

# Delete with condition

130

delta_table.delete("age < 18")

131

delta_table.delete(col("age") < 18)

132

```

133

134

### Data Updates

135

136

Update existing rows in Delta tables with conditional logic and column mappings.

137

138

```python { .api }

139

class DeltaTable:

140

def update(

141

self,

142

condition: Optional[Union[str, Column]] = None,

143

set: Optional[Dict[str, Union[str, Column]]] = None

144

) -> None:

145

"""

146

Update rows based on condition and column mappings.

147

148

Parameters:

149

- condition: Optional SQL condition string or Column expression for filtering rows to update

150

- set: Dictionary mapping column names to new values (SQL expressions or Column objects)

151

152

Note: Uses method overloading - can be called as update(set=...) or update(condition, set)

153

"""

154

```

155

156

```scala { .api }

157

class DeltaTable {

158

def update(set: Map[String, Column]): Unit

159

def update(condition: Column, set: Map[String, Column]): Unit

160

def updateExpr(set: Map[String, String]): Unit

161

def updateExpr(condition: String, set: Map[String, String]): Unit

162

}

163

```

164

165

Usage examples:

166

167

```python

168

# Update all rows

169

delta_table.update(set={"status": "'active'"})

170

171

# Conditional update with SQL expressions

172

delta_table.update(

173

condition="department = 'engineering'",

174

set={

175

"salary": "salary * 1.1",

176

"updated_at": "current_timestamp()"

177

}

178

)

179

180

# Update with Column objects

181

from pyspark.sql.functions import col, current_timestamp

182

delta_table.update(

183

condition=col("department") == "engineering",

184

set={

185

"salary": col("salary") * 1.1,

186

"updated_at": current_timestamp()

187

}

188

)

189

```

190

191

### Table Conversion

192

193

Convert existing Parquet tables to Delta format.

194

195

```python { .api }

196

class DeltaTable:

197

@classmethod

198

def convertToDelta(

199

cls,

200

spark: SparkSession,

201

identifier: str,

202

partition_schema: Optional[Union[str, StructType]] = None

203

) -> DeltaTable:

204

"""

205

Convert existing Parquet table to Delta format.

206

207

Parameters:

208

- spark: SparkSession instance

209

- identifier: Parquet table identifier (e.g., "parquet.`/path/to/table`")

210

- partition_schema: Optional partition schema as DDL string or StructType

211

212

Returns:

213

DeltaTable instance for converted table

214

"""

215

```

216

217

```scala { .api }

218

object DeltaTable {

219

def convertToDelta(spark: SparkSession, identifier: String): DeltaTable

220

def convertToDelta(

221

spark: SparkSession,

222

identifier: String,

223

partitionSchema: String

224

): DeltaTable

225

def convertToDelta(

226

spark: SparkSession,

227

identifier: String,

228

partitionSchema: StructType

229

): DeltaTable

230

}

231

```

232

233

Usage examples:

234

235

```python

236

# Convert unpartitioned table

237

delta_table = DeltaTable.convertToDelta(spark, "parquet.`/path/to/parquet/table`")

238

239

# Convert partitioned table

240

delta_table = DeltaTable.convertToDelta(

241

spark,

242

"parquet.`/path/to/partitioned/table`",

243

"year int, month int"

244

)

245

```

246

247

### Table Details

248

249

Get comprehensive information about Delta table structure and metadata.

250

251

```python { .api }

252

class DeltaTable:

253

def detail(self) -> DataFrame:

254

"""

255

Get detailed information about the Delta table.

256

257

Returns:

258

DataFrame with table details including format, location, size, etc.

259

"""

260

```

261

262

```scala { .api }

263

class DeltaTable {

264

def detail(): DataFrame

265

}

266

```

267

268

The detail operation returns information including:

269

- Table format and version

270

- Location and size

271

- Number of files and rows

272

- Partition columns

273

- Table properties

274

- Created/modified timestamps