Python APIs for using Delta Lake with Apache Spark
—
Core table management functionality for Delta Lake including creation, reading, updating, deleting, and table access patterns. Provides both path-based and catalog-based table operations with comprehensive CRUD support.
Load existing Delta tables from filesystem paths or catalog names.
class DeltaTable:
@classmethod
def forPath(
cls,
spark: SparkSession,
path: str,
hadoop_conf: Dict[str, str] = None
) -> DeltaTable:
"""
Load Delta table from filesystem path.
Parameters:
- spark: SparkSession instance
- path: Path to Delta table directory
- hadoop_conf: Optional Hadoop configuration for file system access
Returns:
DeltaTable instance
"""
@classmethod
def forName(cls, spark: SparkSession, table_name: str) -> DeltaTable:
"""
Load Delta table by catalog name.
Parameters:
- spark: SparkSession instance
- table_name: Table name in catalog (can be qualified: catalog.db.table)
Returns:
DeltaTable instance
"""
@classmethod
def isDeltaTable(cls, spark: SparkSession, identifier: str) -> bool:
"""
Check if path or table identifier is a Delta table.
Parameters:
- spark: SparkSession instance
- identifier: Path or table name to check
Returns:
True if identifier refers to a Delta table
"""object DeltaTable {
def forPath(spark: SparkSession, path: String): DeltaTable
def forPath(
spark: SparkSession,
path: String,
hadoopConf: java.util.Map[String, String]
): DeltaTable
def forName(spark: SparkSession, tableName: String): DeltaTable
def isDeltaTable(spark: SparkSession, identifier: String): Boolean
}Convert Delta tables to DataFrames and apply aliases for query operations.
class DeltaTable:
def toDF(self) -> DataFrame:
"""Get DataFrame representation of Delta table."""
def alias(self, alias_name: str) -> DeltaTable:
"""
Apply alias to Delta table for use in queries.
Parameters:
- alias_name: Alias name for the table
Returns:
DeltaTable with applied alias
"""class DeltaTable {
def toDF: Dataset[Row]
def as(alias: String): DeltaTable
def alias(alias: String): DeltaTable
}Delete rows from Delta tables with optional filtering conditions.
class DeltaTable:
def delete(self, condition: Optional[Union[str, Column]] = None) -> None:
"""
Delete data matching condition.
Parameters:
- condition: Optional SQL condition string or Column expression for filtering rows to delete
"""class DeltaTable {
def delete(): Unit
def delete(condition: String): Unit
def delete(condition: Column): Unit
}Usage examples:
# Delete all rows
delta_table.delete()
# Delete with condition
delta_table.delete("age < 18")
delta_table.delete(col("age") < 18)Update existing rows in Delta tables with conditional logic and column mappings.
class DeltaTable:
def update(
self,
condition: Optional[Union[str, Column]] = None,
set: Optional[Dict[str, Union[str, Column]]] = None
) -> None:
"""
Update rows based on condition and column mappings.
Parameters:
- condition: Optional SQL condition string or Column expression for filtering rows to update
- set: Dictionary mapping column names to new values (SQL expressions or Column objects)
Note: Uses method overloading - can be called as update(set=...) or update(condition, set)
"""class DeltaTable {
def update(set: Map[String, Column]): Unit
def update(condition: Column, set: Map[String, Column]): Unit
def updateExpr(set: Map[String, String]): Unit
def updateExpr(condition: String, set: Map[String, String]): Unit
}Usage examples:
# Update all rows
delta_table.update(set={"status": "'active'"})
# Conditional update with SQL expressions
delta_table.update(
condition="department = 'engineering'",
set={
"salary": "salary * 1.1",
"updated_at": "current_timestamp()"
}
)
# Update with Column objects
from pyspark.sql.functions import col, current_timestamp
delta_table.update(
condition=col("department") == "engineering",
set={
"salary": col("salary") * 1.1,
"updated_at": current_timestamp()
}
)Convert existing Parquet tables to Delta format.
class DeltaTable:
@classmethod
def convertToDelta(
cls,
spark: SparkSession,
identifier: str,
partition_schema: Optional[Union[str, StructType]] = None
) -> DeltaTable:
"""
Convert existing Parquet table to Delta format.
Parameters:
- spark: SparkSession instance
- identifier: Parquet table identifier (e.g., "parquet.`/path/to/table`")
- partition_schema: Optional partition schema as DDL string or StructType
Returns:
DeltaTable instance for converted table
"""object DeltaTable {
def convertToDelta(spark: SparkSession, identifier: String): DeltaTable
def convertToDelta(
spark: SparkSession,
identifier: String,
partitionSchema: String
): DeltaTable
def convertToDelta(
spark: SparkSession,
identifier: String,
partitionSchema: StructType
): DeltaTable
}Usage examples:
# Convert unpartitioned table
delta_table = DeltaTable.convertToDelta(spark, "parquet.`/path/to/parquet/table`")
# Convert partitioned table
delta_table = DeltaTable.convertToDelta(
spark,
"parquet.`/path/to/partitioned/table`",
"year int, month int"
)Get comprehensive information about Delta table structure and metadata.
class DeltaTable:
def detail(self) -> DataFrame:
"""
Get detailed information about the Delta table.
Returns:
DataFrame with table details including format, location, size, etc.
"""class DeltaTable {
def detail(): DataFrame
}The detail operation returns information including:
Install with Tessl CLI
npx tessl i tessl/pypi-delta-spark