SQLAlchemy driver for DuckDB enabling seamless integration between Python applications and DuckDB's analytical database engine
—
Configuration management for DuckDB settings, extension loading, and filesystem registration with support for MotherDuck and custom configuration options. This module provides utilities for configuring DuckDB connections and managing DuckDB-specific features.
Functions for managing DuckDB configuration parameters and settings.
def get_core_config():
"""
Get set of core DuckDB configuration parameters.
Retrieves all available DuckDB settings from the database plus
MotherDuck-specific configuration keys for cloud connectivity.
Returns:
Set[str]: Set of configuration parameter names including:
- All DuckDB settings from duckdb_settings()
- MotherDuck parameters: motherduck_token, attach_mode, saas_mode
"""
def apply_config(dialect, conn, ext):
"""
Apply configuration settings to DuckDB connection.
Processes configuration parameters and applies them as DuckDB SET
statements with proper type handling and SQL escaping.
Parameters:
- dialect (Dialect): SQLAlchemy dialect instance for processing
- conn (DuckDBPyConnection): DuckDB connection to configure
- ext (Dict[str, Union[str, int, bool]]): Configuration parameters
Raises:
AssertionError: If configuration value type is not supported
"""Type mapping and processing utilities for configuration values.
TYPES: Dict[Type, TypeEngine]
"""
Maps Python types to SQLAlchemy type processors for configuration.
Supported types:
- int -> Integer()
- str -> String()
- bool -> Boolean()
"""Utilities for detecting DuckDB version and available features.
def has_comment_support():
"""
Check if current DuckDB version supports table comments.
Tests COMMENT ON TABLE functionality by attempting to create
a test table and add a comment.
Returns:
bool: True if comments supported, False otherwise
"""
has_uhugeint_support: bool
"""Boolean flag indicating if DuckDB version supports UHUGEINT type (v0.10.0+)."""
duckdb_version: Version
"""Parsed version object for current DuckDB installation."""All DuckDB configuration parameters are supported through the config parameter:
engine = create_engine('duckdb:///:memory:', connect_args={
'config': {
'threads': 4,
'memory_limit': '2GB',
'max_memory': '4GB',
'default_order': 'ASC',
'enable_progress_bar': True,
'enable_profiling': 'json',
'profile_output': '/tmp/profile.json'
}
})Load DuckDB extensions at connection time:
engine = create_engine('duckdb:///:memory:', connect_args={
'preload_extensions': ['spatial', 'json', 'httpfs', 'parquet']
})Register custom filesystems for remote data access:
import fsspec
# Create custom filesystem
fs = fsspec.filesystem('s3', key='access_key', secret='secret_key')
engine = create_engine('duckdb:///:memory:', connect_args={
'register_filesystems': [fs]
})Connect to MotherDuck cloud service:
engine = create_engine('duckdb:///', connect_args={
'config': {
'motherduck_token': 'your_token_here',
'attach_mode': 'auto',
'saas_mode': True
}
})Custom user agent strings for connection tracking:
engine = create_engine('duckdb:///:memory:', connect_args={
'config': {
'custom_user_agent': 'MyApp/1.0'
}
})from sqlalchemy import create_engine
# Configure DuckDB with performance settings
engine = create_engine('duckdb:///data.db', connect_args={
'config': {
'threads': 8,
'memory_limit': '8GB',
'enable_progress_bar': True
}
})import fsspec
from sqlalchemy import create_engine
# Setup S3 filesystem
s3_fs = fsspec.filesystem('s3',
key='your_access_key',
secret='your_secret_key'
)
engine = create_engine('duckdb:///analytics.db', connect_args={
'preload_extensions': ['httpfs', 'spatial', 'json'],
'register_filesystems': [s3_fs],
'config': {
'threads': 4,
'memory_limit': '4GB',
's3_region': 'us-west-2'
}
})engine = create_engine('duckdb:///', connect_args={
'config': {
'motherduck_token': 'your_motherduck_token',
'attach_mode': 'auto',
'saas_mode': True,
'custom_user_agent': 'MyDataApp/2.1'
}
})from duckdb_engine.config import get_core_config, apply_config
from duckdb_engine import Dialect
import duckdb
# Get available configuration options
core_configs = get_core_config()
print("Available configs:", core_configs)
# Apply configuration programmatically
dialect = Dialect()
conn = duckdb.connect(':memory:')
config_settings = {
'threads': 2,
'memory_limit': '1GB',
'enable_profiling': 'json'
}
apply_config(dialect, conn, config_settings)from duckdb_engine._supports import (
has_comment_support,
has_uhugeint_support,
duckdb_version
)
print(f"DuckDB version: {duckdb_version}")
print(f"Supports comments: {has_comment_support()}")
print(f"Supports UHUGEINT: {has_uhugeint_support}")
# Conditional feature usage
if has_comment_support():
engine.execute("COMMENT ON TABLE users IS 'User data table'")Configuration can also be passed via connection string query parameters:
# Basic configuration via URL
engine = create_engine(
'duckdb:///data.db?threads=4&memory_limit=2GB&enable_progress_bar=true'
)
# MotherDuck connection via URL
engine = create_engine(
'duckdb:///?motherduck_token=your_token&saas_mode=true'
)from sqlalchemy import create_engine, event
def configure_connection(dbapi_connection, connection_record):
"""Custom connection configuration function."""
with dbapi_connection.cursor() as cursor:
cursor.execute("SET enable_profiling = 'json'")
cursor.execute("SET profile_output = '/tmp/queries.json'")
engine = create_engine('duckdb:///data.db')
# Apply configuration after connection
event.listen(engine, 'connect', configure_connection)Install with Tessl CLI
npx tessl i tessl/pypi-duckdb-engine