tessl/pypi-duckdb-engine

SQLAlchemy driver for DuckDB enabling seamless integration between Python applications and DuckDB's analytical database engine

—

Pending

Overview

Eval results

Files

Core SQLAlchemy Integration

Name: tessl/pypi-duckdb-engine
Author: tessl

Main dialect class and connection management providing SQLAlchemy compatibility for DuckDB databases. This module implements the core SQLAlchemy dialect interface with DuckDB-specific optimizations and features.

Capabilities

Main Dialect Class

The primary SQLAlchemy dialect class that extends PostgreSQL dialect functionality for DuckDB compatibility.

class Dialect(PGDialect_psycopg2):
    """
    Main SQLAlchemy dialect for DuckDB.
    
    Extends PostgreSQL dialect to provide DuckDB-specific functionality
    while maintaining SQLAlchemy compatibility.
    """
    name = "duckdb"
    driver = "duckdb_engine"
    supports_statement_cache = False
    supports_comments: bool
    supports_sane_rowcount = False
    supports_server_side_cursors = False
    div_is_floordiv = False
    
    def __init__(self, *args, **kwargs): ...

Connection Management

Methods for creating and managing DuckDB connections through SQLAlchemy.

def connect(self, *cargs, **cparams):
    """
    Create a DuckDB connection with configuration.
    
    Parameters:
    - *cargs: Positional arguments for DuckDB connection
    - **cparams: Keyword arguments including:
        - preload_extensions: List of extensions to load
        - register_filesystems: List of filesystems to register
        - config: Dictionary of DuckDB configuration settings
        - url_config: Configuration from connection URL query parameters
    
    Returns:
    ConnectionWrapper: Wrapped DuckDB connection
    """

def create_connect_args(self, url):
    """
    Parse SQLAlchemy URL and create DuckDB connection arguments.
    
    Parameters:
    - url (URL): SQLAlchemy connection URL
    
    Returns:
    tuple: (args, kwargs) for DuckDB connection
    """

@classmethod
def get_pool_class(cls, url):
    """
    Return appropriate connection pool class based on database type.
    
    Parameters:
    - url (URL): SQLAlchemy connection URL
    
    Returns:
    Type[Pool]: SingletonThreadPool for :memory:, QueuePool for files
    """

Database API Compatibility

DBAPI compatibility layer providing standard database interface.

@staticmethod
def dbapi(**kwargs):
    """
    Return DBAPI compatibility class.
    
    Returns:
    Type[DBAPI]: Database API compatibility class
    """

@classmethod  
def import_dbapi(cls):
    """
    Import and return DBAPI class.
    
    Returns:
    Type[DBAPI]: Database API class
    """

Schema Operations

Methods for database schema inspection and table operations.

def get_table_names(self, connection, schema=None, **kw):
    """
    Get list of table names in schema.
    
    Parameters:
    - connection: Database connection
    - schema (str, optional): Schema name, supports database.schema format
    
    Returns:
    List[str]: Table names
    """

def get_view_names(self, connection, schema=None, include=None, **kw):
    """
    Get list of view names in schema.
    
    Parameters:
    - connection: Database connection  
    - schema (str, optional): Schema name
    - include: Filter for views to include
    
    Returns:
    List[str]: View names
    """

def get_schema_names(self, connection, **kw):
    """
    Get list of schema names with database prefixes.
    
    Returns unquoted database_name.schema_name unless either contains 
    spaces or double quotes, in which case they are properly quoted.
    
    Parameters:
    - connection: Database connection
    
    Returns:
    List[str]: Schema names in database.schema format
    """

def has_table(self, connection, table_name, schema=None, **kw):
    """
    Check if table exists in schema.
    
    Parameters:
    - connection: Database connection
    - table_name (str): Name of table to check
    - schema (str, optional): Schema name
    
    Returns:
    bool: True if table exists, False otherwise
    """

def get_table_oid(self, connection, table_name, schema=None, **kw):
    """
    Get table OID for schema operations.
    
    Parameters:
    - connection: Database connection
    - table_name (str): Table name
    - schema (str, optional): Schema name
    
    Returns:
    int: Table OID
    
    Raises:
    NoSuchTableError: If table does not exist
    """

Index Operations

Index-related operations (currently limited in DuckDB).

def get_indexes(self, connection, table_name, schema=None, **kw):
    """
    Get table indexes (returns empty list, indexes not yet supported).
    
    Parameters:
    - connection: Database connection
    - table_name (str): Table name
    - schema (str, optional): Schema name
    
    Returns:
    List: Empty list (indexes not supported)
    """

def get_multi_indexes(self, connection, schema=None, filter_names=None, **kw):
    """
    Get multiple table indexes (returns empty iterator).
    
    Parameters:
    - connection: Database connection
    - schema (str, optional): Schema name
    - filter_names (Collection[str], optional): Filter for index names
    
    Returns:
    Iterable[Tuple]: Empty iterator
    """

Transaction Management

Transaction handling with DuckDB-specific error handling.

def do_begin(self, connection):
    """
    Begin a transaction.
    
    Parameters:
    - connection: Database connection
    """

def do_rollback(self, connection):
    """
    Rollback transaction with DuckDB-specific error handling.
    
    Parameters:
    - connection: Database connection
    """

Column Operations

Multi-column reflection for complex schema operations.

def get_multi_columns(self, connection, schema=None, filter_names=None, scope=None, kind=None, **kw):
    """
    Get column information for multiple tables.
    
    Advanced method for retrieving column metadata across multiple tables,
    with optimizations for DuckDB's cursor handling.
    
    Parameters:
    - connection: Database connection
    - schema (str, optional): Schema name to filter by
    - filter_names (Set[str], optional): Set of table names to include
    - scope (str, optional): Scope for column retrieval
    - kind (Tuple[str, ...], optional): Table kinds to include
    - **kw: Additional keyword arguments
    
    Returns:
    List: Column information for matching tables
    """

Type System Integration

Type descriptor and column specification methods.

def type_descriptor(self, typeobj):
    """
    Convert SQLAlchemy types to DuckDB-compatible types.
    
    Parameters:
    - typeobj (Type[TypeEngine]): SQLAlchemy type class
    
    Returns:
    TypeEngine: DuckDB-compatible type instance
    """

# Class attributes
colspecs: Dict[Type[TypeEngine], TypeEngine]
ischema_names: Dict[str, Type[TypeEngine]]

Identifier Preparation

SQL identifier quoting and preparation for DuckDB.

class DuckDBIdentifierPreparer(PGIdentifierPreparer):
    """
    Handles SQL identifier quoting and preparation for DuckDB.
    
    Extends PostgreSQL identifier preparation with DuckDB-specific
    reserved word handling and database.schema name separation.
    """
    
    def __init__(self, dialect, **kwargs): ...
    
    def format_schema(self, name):
        """
        Prepare a quoted schema name with database prefix support.
        
        Parameters:
        - name (str): Schema name, optionally with database prefix
        
        Returns:
        str: Properly quoted schema identifier
        """
    
    def quote_schema(self, schema, force=None):
        """
        Conditionally quote a schema name.
        
        Parameters:
        - schema (str): Schema name to quote
        - force: Unused parameter for compatibility
        
        Returns:
        str: Quoted schema name
        """

Database Inspection

Schema inspection capabilities for DuckDB databases.

class DuckDBInspector(PGInspector):
    """
    Database schema inspection for DuckDB.
    
    Extends PostgreSQL inspector with DuckDB-specific adaptations
    for constraint and metadata handling.
    """
    
    def get_check_constraints(self, table_name, schema=None, **kw):
        """
        Get check constraints for table (limited DuckDB support).
        
        Parameters:
        - table_name (str): Table name
        - schema (str, optional): Schema name
        - **kw: Additional arguments
        
        Returns:
        List[Dict[str, Any]]: Check constraint information
        
        Raises:
        NotImplementedError: If DuckDB doesn't support the operation
        """

Custom Type Handling

DuckDB-specific type handling for null types and JSON processing.

class DuckDBNullType(sqltypes.NullType):
    """
    Custom null type with JSON processing support.
    
    Extends SQLAlchemy's NullType to handle DuckDB-specific
    type processing, particularly for JSON columns.
    """
    
    def result_processor(self, dialect, coltype):
        """
        Process null type results with JSON support.
        
        Parameters:
        - dialect (RootDialect): SQLAlchemy dialect
        - coltype (TypeEngine): Column type
        
        Returns:
        Optional[Callable]: Result processor function
        """

Usage Examples

Basic Connection

from sqlalchemy import create_engine
from duckdb_engine import Dialect

# Create engine
engine = create_engine('duckdb:///mydb.db')

# With configuration
engine = create_engine(
    'duckdb:///mydb.db',
    connect_args={
        'config': {'threads': 4, 'memory_limit': '2GB'},
        'preload_extensions': ['spatial', 'json'],
        'register_filesystems': [my_filesystem]
    }
)

Schema Operations

# Get table names
with engine.connect() as conn:
    tables = engine.dialect.get_table_names(conn)
    views = engine.dialect.get_view_names(conn)
    schemas = engine.dialect.get_schema_names(conn)
    
    # Check if table exists
    exists = engine.dialect.has_table(conn, 'my_table')

Install with Tessl CLI