Native Delta Lake Python binding based on delta-rs with Pandas integration
npx @tessl/cli install tessl/pypi-deltalake@1.1.0Native Delta Lake Python binding based on delta-rs with Pandas integration. Provides high-performance operations on Delta Lake tables with seamless integration to the Python data ecosystem, enabling ACID transactions, time travel queries, schema evolution, and efficient data storage with multiple backend support.
pip install deltalakepip install deltalake[pandas,pyarrow]__version__: str # Python package version
rust_core_version: str # Underlying Rust core versionAccess package version information to check compatibility and track installed versions.
from deltalake import DeltaTableComplete import for all functionality:
from deltalake import (
DeltaTable,
Metadata,
write_deltalake,
convert_to_deltalake,
QueryBuilder,
Schema,
Field,
DataType,
WriterProperties,
BloomFilterProperties,
ColumnProperties,
CommitProperties,
PostCommitHookProperties,
TableFeatures,
Transaction,
__version__,
rust_core_version
)from deltalake import DeltaTable, write_deltalake
import pandas as pd
# Reading a Delta table
dt = DeltaTable("path/to/delta-table")
df = dt.to_pandas()
print(f"Table has {dt.version()} versions with {len(dt.files())} files")
# Writing data to Delta table
data = pd.DataFrame({
'id': [1, 2, 3],
'name': ['Alice', 'Bob', 'Charlie'],
'age': [25, 30, 35]
})
write_deltalake("path/to/new-table", data)
# Updating records
dt = DeltaTable("path/to/new-table")
dt.update(
predicate="age < 30",
new_values={"name": "Updated Name"}
)
# Time travel
dt.load_as_version(0) # Load first version
older_df = dt.to_pandas()Delta Lake provides ACID transactions on top of object storage through a transaction log that tracks all changes to table metadata and data files. The deltalake package exposes this functionality through several key components:
The Rust-based core (delta-rs) provides high-performance operations while the Python binding offers seamless integration with pandas, PyArrow, and the broader Python data ecosystem.
Core table management including creation, reading, and metadata access. The DeltaTable class provides the primary interface for interacting with Delta Lake tables.
class DeltaTable:
def __init__(
self,
table_uri: str | Path,
version: int | None = None,
storage_options: dict[str, str] | None = None,
without_files: bool = False,
log_buffer_size: int | None = None
): ...
@classmethod
def create(
cls,
table_uri: str | Path,
schema: Schema,
mode: Literal["error", "append", "overwrite", "ignore"] = "error",
partition_by: list[str] | str | None = None,
storage_options: dict[str, str] | None = None
) -> DeltaTable: ...
@staticmethod
def is_deltatable(table_uri: str, storage_options: dict[str, str] | None = None) -> bool: ...Converting Delta tables to various formats including pandas DataFrames, PyArrow tables, and streaming readers for efficient data processing.
def to_pandas(
self,
columns: list[str] | None = None,
filesystem: Any | None = None
) -> pd.DataFrame: ...
def to_pyarrow_table(
self,
columns: list[str] | None = None,
filesystem: Any | None = None
) -> pyarrow.Table: ...
def to_pyarrow_dataset(
self,
partitions: list[tuple[str, str, Any]] | None = None,
filesystem: Any | None = None
) -> pyarrow.dataset.Dataset: ...Functions for writing data to Delta tables and modifying existing records through update, delete, and merge operations.
def write_deltalake(
table_or_uri: str | Path | DeltaTable,
data: Any,
*,
partition_by: list[str] | str | None = None,
mode: Literal["error", "append", "overwrite", "ignore"] = "error",
schema_mode: Literal["merge", "overwrite"] | None = None,
storage_options: dict[str, str] | None = None,
writer_properties: WriterProperties | None = None
) -> None: ...
def update(
self,
updates: dict[str, str] | None = None,
new_values: dict[str, Any] | None = None,
predicate: str | None = None,
writer_properties: WriterProperties | None = None
) -> dict[str, Any]: ...
def delete(self, predicate: str | None = None) -> dict[str, Any]: ...Schema definition, evolution, and type system for Delta Lake tables including field definitions and data types.
class Schema:
def __init__(self, fields: list[Field]): ...
@property
def fields(self) -> list[Field]: ...
class Field:
def __init__(self, name: str, data_type: DataType, nullable: bool = True, metadata: dict | None = None): ...
@property
def name(self) -> str: ...
@property
def data_type(self) -> DataType: ...
# Data types: PrimitiveType, ArrayType, MapType, StructType
DataType = Union[PrimitiveType, ArrayType, MapType, StructType]Transaction properties, commit configurations, and ACID transaction control for ensuring data consistency.
class CommitProperties:
def __init__(
self,
max_retry_commit_attempts: int | None = None,
app_metadata: dict[str, Any] | None = None
): ...
class PostCommitHookProperties:
def __init__(
self,
create_checkpoint: bool = True,
cleanup_expired_logs: bool | None = None
): ...
class Transaction:
def commit(
self,
actions: list[Any],
commit_properties: CommitProperties | None = None,
post_commit_hook_properties: PostCommitHookProperties | None = None
) -> int: ...SQL querying capabilities using Apache DataFusion integration for running analytical queries on Delta tables.
class QueryBuilder:
def __init__(self): ...
def register(self, table_name: str, delta_table: DeltaTable) -> QueryBuilder: ...
def execute(self, sql: str) -> RecordBatchReader: ...Operations for table optimization, vacuum cleanup, and checkpoint management to maintain table performance and storage efficiency.
def vacuum(
self,
retention_hours: int | None = None,
dry_run: bool = True,
enforce_retention_duration: bool = True
) -> list[str]: ...
def create_checkpoint(self) -> None: ...
def cleanup_metadata(self) -> None: ...
def optimize(self) -> TableOptimizer: ...