CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-shillelagh

Making it easy to query APIs via SQL

Pending
Overview
Eval results
Files

type-system.mddocs/

Type System

Comprehensive type system for data conversion, validation, and SQL integration with automatic type inference and custom field definitions. Handles conversion between Python types and SQL types with support for filtering and ordering operations.

Capabilities

Base Field Class

Foundation for all field types providing parsing, formatting, and SQL integration capabilities.

class Field:
    """Base field class for data type handling."""
    
    def parse(self, value):
        """
        Parse value from external format to Python type.
        
        Parameters:
        - value: Raw value from data source
        
        Returns:
        Parsed Python value
        """
    
    def format(self, value):
        """
        Format Python value for external representation.
        
        Parameters:
        - value: Python value to format
        
        Returns:
        Formatted value for output
        """
    
    def quote(self, value):
        """
        Quote value for SQL query generation.
        
        Parameters:
        - value: Value to quote for SQL
        
        Returns:
        str: SQL-quoted value
        """
    
    # Field properties
    type: str           # SQL type name
    db_api_type: str    # DB API 2.0 type constant
    filters: list       # Supported filter classes

Basic Field Types

Standard field types for common data representations with automatic parsing and validation.

class Boolean(Field):
    """Boolean field type."""
    type = "BOOLEAN"
    db_api_type = "NUMBER"

class StringBoolean(Field):
    """Boolean field stored as string (TRUE/FALSE)."""
    type = "BOOLEAN"
    db_api_type = "NUMBER"

class IntBoolean(Field):
    """Boolean field stored as integer (1/0)."""
    type = "BOOLEAN"
    db_api_type = "NUMBER"

class Integer(Field):
    """Integer field type."""
    type = "INTEGER"
    db_api_type = "NUMBER"

class StringInteger(Field):
    """Integer field stored as string (for large integers)."""
    type = "INTEGER"
    db_api_type = "NUMBER"

class Float(Field):
    """Floating point field type."""
    type = "REAL"
    db_api_type = "NUMBER"

class String(Field):
    """String field type."""
    type = "TEXT"
    db_api_type = "STRING"

class Decimal(Field):
    """Decimal field type for precise numeric values."""
    type = "DECIMAL"
    db_api_type = "NUMBER"

class StringDecimal(Field):
    """Decimal field stored as string."""
    type = "DECIMAL"
    db_api_type = "NUMBER"

Date and Time Fields

Specialized field types for temporal data with timezone and format support.

class DateTime(Field):
    """DateTime field using datetime.datetime objects."""
    type = "TIMESTAMP"
    db_api_type = "DATETIME"

class ISODateTime(Field):
    """DateTime field stored as ISO string."""
    type = "TIMESTAMP"
    db_api_type = "DATETIME"

class Date(Field):
    """Date field using datetime.date objects."""
    type = "DATE"
    db_api_type = "DATETIME"

class ISODate(Field):
    """Date field stored as ISO string."""
    type = "DATE"
    db_api_type = "DATETIME"

class Time(Field):
    """Time field using datetime.time objects."""
    type = "TIME"
    db_api_type = "DATETIME"

class ISOTime(Field):
    """Time field stored as ISO string."""
    type = "TIME"
    db_api_type = "DATETIME"

class Duration(Field):
    """Duration field using datetime.timedelta objects."""
    type = "DURATION"
    db_api_type = "DATETIME"

class StringDuration(Field):
    """Duration field stored as formatted string."""
    type = "DURATION"
    db_api_type = "DATETIME"

Binary and Special Fields

Field types for binary data and special use cases.

class Blob(Field):
    """Binary field using bytes objects."""
    type = "BLOB"
    db_api_type = "BINARY"

class StringBlob(Field):
    """Binary field stored as hexadecimal string."""
    type = "BLOB"
    db_api_type = "BINARY"

class RowID(Integer):
    """Special field for row identifiers."""
    type = "INTEGER"
    db_api_type = "NUMBER"

class Unknown(Field):
    """Field for unknown or dynamic types."""
    type = "TEXT"
    db_api_type = "STRING"

Column Ordering

Enumeration for specifying column ordering capabilities and sort directions.

class Order:
    """Enumeration for column ordering."""
    
    NONE = "none"           # No ordering
    ASCENDING = "asc"       # Ascending order
    DESCENDING = "desc"     # Descending order

Type Analysis

Utilities for automatic type inference and data analysis from streams of data.

def analyze(data):
    """
    Analyze data stream to infer types, ordering, and row count.
    
    Parameters:
    - data (Iterator[dict]): Stream of row dictionaries
    
    Returns:
    tuple: (num_rows, order_info, type_info)
        - num_rows (int): Total number of rows
        - order_info (dict): Column ordering information  
        - type_info (dict): Column type mappings
    """

def update_order(current_order, previous, current, num_rows):
    """
    Update ordering information for a column.
    
    Parameters:
    - current_order (Order): Current ordering state
    - previous: Previous value in sequence
    - current: Current value in sequence  
    - num_rows (int): Number of rows processed
    
    Returns:
    Order: Updated ordering state 
    """

Field Registry

System for registering and accessing field types dynamically.

# Available field types
FIELD_TYPES = {
    'boolean': Boolean,
    'integer': Integer, 
    'float': Float,
    'string': String,
    'datetime': DateTime,
    'date': Date,
    'time': Time,
    'duration': Duration
}

Usage Examples

Custom Field Definition

from shillelagh.fields import Field, String

class EmailField(Field):
    """Custom field for email addresses."""
    
    def parse(self, value):
        """Parse and validate email format."""
        if value and '@' not in str(value):
            raise ValueError(f"Invalid email format: {value}")
        return str(value) if value else None
    
    def quote(self, value):
        """Quote email for SQL."""
        return f"'{value}'" if value else 'NULL'

# Use in adapter
columns = {
    'user_email': EmailField(),
    'user_name': String(),
    'created_at': DateTime()
}

Automatic Type Inference

from shillelagh.lib import analyze

# Sample data stream
data = [
    {'id': 1, 'name': 'Alice', 'score': 95.5, 'active': True},
    {'id': 2, 'name': 'Bob', 'score': 87.2, 'active': False},
    {'id': 3, 'name': 'Charlie', 'score': 92.0, 'active': True}
]

# Analyze data for types and ordering
num_rows, order_info, type_info = analyze(iter(data))

print(f"Rows: {num_rows}")
print(f"Types: {type_info}")
print(f"Order: {order_info}")

# Output:
# Rows: 3
# Types: {'id': Integer, 'name': String, 'score': Float, 'active': Boolean}
# Order: {'id': Order.ASCENDING, 'name': Order.NONE, 'score': Order.NONE, 'active': Order.NONE}

Field Configuration in Adapters

from shillelagh.adapters.base import Adapter
from shillelagh.fields import Integer, String, DateTime, Boolean

class CustomAdapter(Adapter):
    """Custom adapter with explicit field definitions."""
    
    def get_columns(self):
        """Define column types explicitly."""
        return {
            'id': Integer(),
            'title': String(), 
            'created_at': DateTime(timezone='UTC'),
            'is_published': Boolean(),
            'view_count': Integer()
        }
    
    def get_data(self, bounds=None, order=None, **kwargs):
        """Return typed data matching field definitions."""
        for row in self._fetch_raw_data():
            yield {
                'id': int(row['id']),
                'title': str(row['title']),
                'created_at': self._parse_datetime(row['created_at']),
                'is_published': bool(row['is_published']),
                'view_count': int(row['view_count'])
            }

Working with Temporal Data

from shillelagh.fields import DateTime, Date, Time
from shillelagh.backends.apsw.db import connect
from datetime import datetime, timezone

# Configure datetime fields with timezone
datetime_field = DateTime(timezone='America/New_York')
date_field = Date()
time_field = Time()

# Parse various datetime formats
dt1 = datetime_field.parse('2023-12-25 15:30:00')
dt2 = datetime_field.parse('2023-12-25T15:30:00Z')
date1 = date_field.parse('2023-12-25')
time1 = time_field.parse('15:30:00')

# Use in SQL queries with proper quoting
connection = connect(":memory:")
cursor = connection.cursor()

cursor.execute(f"""
    SELECT * FROM events 
    WHERE event_date >= {date_field.quote('2023-01-01')}
    AND event_time BETWEEN {time_field.quote('09:00:00')} 
                       AND {time_field.quote('17:00:00')}
""")

Data Validation and Conversion

from shillelagh.fields import Integer, Float, Boolean, String

# Create field instances
int_field = Integer()
float_field = Float()
bool_field = Boolean()
string_field = String()

# Parse various input formats
test_data = [
    ('123', int_field),      # String to int
    ('45.67', float_field),  # String to float  
    ('true', bool_field),    # String to bool
    (123, string_field),     # Int to string
]

for value, field in test_data:
    parsed = field.parse(value)
    formatted = field.format(parsed)
    quoted = field.quote(parsed)
    
    print(f"Input: {value} -> Parsed: {parsed} -> Formatted: {formatted} -> Quoted: {quoted}")

# Output:
# Input: 123 -> Parsed: 123 -> Formatted: 123 -> Quoted: 123
# Input: 45.67 -> Parsed: 45.67 -> Formatted: 45.67 -> Quoted: 45.67
# Input: true -> Parsed: True -> Formatted: True -> Quoted: 1
# Input: 123 -> Parsed: '123' -> Formatted: '123' -> Quoted: '123'

Install with Tessl CLI

npx tessl i tessl/pypi-shillelagh

docs

adapters.md

cli-interface.md

database-api.md

index.md

sqlalchemy-integration.md

type-system.md

tile.json