tessl/pypi-shillelagh

Making it easy to query APIs via SQL

—

Pending

Overview

Eval results

Files

SQLAlchemy Integration

Name: tessl/pypi-shillelagh
Author: tessl

Multiple SQLAlchemy dialects for seamless integration with ORMs and SQL frameworks, supporting different execution modes and safety levels. Enables standard SQLAlchemy operations against diverse data sources through shillelagh adapters.

Capabilities

Available Dialects

Multiple SQLAlchemy dialects supporting different use cases and execution environments.

# Available SQLAlchemy dialects:
# - shillelagh:         Default APSW dialect
# - shillelagh.apsw:    Explicit APSW dialect  
# - shillelagh.safe:    Safe mode dialect (restricted SQL)
# - gsheets:           Google Sheets specific dialect
# - metricflow:        dbt MetricFlow integration
# - shillelagh.multicorn2: PostgreSQL Multicorn integration
# - shillelagh.sqlglot: SQLGlot backend integration

Engine Creation

Create SQLAlchemy engines with shillelagh dialects for ORM and Core operations.

from sqlalchemy import create_engine

# Default shillelagh engine
engine = create_engine("shillelagh://")

# Safe mode engine (restricted SQL operations)
safe_engine = create_engine("shillelagh.safe://")

# Google Sheets specific engine
gsheets_engine = create_engine("gsheets://")

# dbt MetricFlow engine
metricflow_engine = create_engine("metricflow://")

Dialect Configuration

Configure dialects with adapter-specific settings and connection parameters.

from sqlalchemy import create_engine

# Engine with adapter configuration
engine = create_engine(
    "shillelagh://",
    connect_args={
        'adapters': ['GSheetsAPI', 'CSVFile', 'GitHubAPI'],
        'adapter_kwargs': {
            'gsheetsapi': {
                'service_account_file': '/path/to/credentials.json',
                'subject': 'user@example.com'
            },
            'githubapi': {
                'token': 'ghp_your_token_here'
            }
        }
    }
)

ORM Integration

Use shillelagh with SQLAlchemy ORM for object-relational mapping against data sources.

from sqlalchemy import Column, Integer, String, create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker

Base = declarative_base()

class DataModel(Base):
    """SQLAlchemy model for external data source."""
    
    __tablename__ = 'https://example.com/data.csv'
    
    id = Column(Integer, primary_key=True)
    name = Column(String)
    value = Column(Integer)

# Create engine and session
engine = create_engine("shillelagh://")
Session = sessionmaker(bind=engine)
session = Session()

# Query using ORM
results = session.query(DataModel).filter(DataModel.value > 100).all()

Core SQL Expression

Use SQLAlchemy Core for programmatic SQL construction with shillelagh backends.

from sqlalchemy import create_engine, MetaData, Table, Column, Integer, String, select

# Create engine
engine = create_engine("shillelagh://")

# Define table schema
metadata = MetaData()
data_table = Table(
    'https://api.example.com/data.json',
    metadata,
    Column('id', Integer),
    Column('name', String),
    Column('category', String),
    Column('score', Integer)
)

# Build and execute queries
with engine.connect() as conn:
    # Select with filtering
    stmt = select(data_table).where(data_table.c.score > 80)
    results = conn.execute(stmt)
    
    for row in results:
        print(f"{row.name}: {row.score}")

Dialect-Specific Features

Default Shillelagh Dialect

Full-featured dialect supporting all shillelagh adapters and SQL operations.

from sqlalchemy import create_engine

# Default dialect with all features
engine = create_engine("shillelagh://")

# Supports:
# - All SQL operations (SELECT, INSERT, UPDATE, DELETE)
# - All adapter types (API, file, memory)
# - Custom functions and expressions
# - Transaction support

Safe Mode Dialect

Restricted dialect for secure environments with limited SQL operations.

from sqlalchemy import create_engine

# Safe mode dialect
safe_engine = create_engine("shillelagh.safe://")

# Restrictions:
# - Read-only operations (SELECT only)
# - No DDL operations (CREATE, DROP, ALTER)
# - Limited function support
# - Enhanced security validations

Usage example:

from sqlalchemy import create_engine, text

safe_engine = create_engine("shillelagh.safe://")

with safe_engine.connect() as conn:
    # Allowed: SELECT operations
    result = conn.execute(text("SELECT * FROM 'data.csv' WHERE id < 100"))
    
    # Blocked: INSERT operations (would raise exception)
    # conn.execute(text("INSERT INTO 'data.csv' VALUES (1, 'test')"))

Google Sheets Dialect

Specialized dialect optimized for Google Sheets integration with enhanced features.

from sqlalchemy import create_engine

# Google Sheets specific dialect
gsheets_engine = create_engine("gsheets://")

# Features:
# - Optimized for Google Sheets API
# - Built-in authentication handling
# - Sheet-specific SQL extensions
# - Range-based queries

dbt MetricFlow Dialect

Integration with dbt MetricFlow semantic layer for metrics and dimensions.

from sqlalchemy import create_engine

# dbt MetricFlow dialect
metricflow_engine = create_engine("metricflow://")

# Features:
# - Semantic layer integration
# - Metric definitions and calculations
# - Dimension queries
# - Time-based aggregations

Usage Examples

Basic SQLAlchemy Core Usage

from sqlalchemy import create_engine, text

# Create shillelagh engine
engine = create_engine("shillelagh://")

# Execute raw SQL
with engine.connect() as conn:
    result = conn.execute(text("""
        SELECT country, AVG(population) as avg_pop
        FROM 'https://example.com/countries.csv'
        GROUP BY country
        HAVING avg_pop > 1000000
        ORDER BY avg_pop DESC
    """))
    
    for row in result:
        print(f"{row.country}: {row.avg_pop:,.0f}")

ORM with External Data Sources

from sqlalchemy import Column, Integer, String, Float, create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker

Base = declarative_base()

class WeatherData(Base):
    """Model for weather API data."""
    
    __tablename__ = 'https://api.weather.com/current'
    
    city = Column(String, primary_key=True)
    temperature = Column(Float)
    humidity = Column(Integer)
    pressure = Column(Float)

# Configure engine with API credentials
engine = create_engine(
    "shillelagh://",
    connect_args={
        'adapter_kwargs': {
            'weatherapi': {'api_key': 'your_weather_api_key'}
        }
    }
)

# Create session and query
Session = sessionmaker(bind=engine)
session = Session()

# Find hot, humid cities
hot_humid = session.query(WeatherData)\
    .filter(WeatherData.temperature > 30)\
    .filter(WeatherData.humidity > 80)\
    .order_by(WeatherData.temperature.desc())\
    .all()

for city in hot_humid:
    print(f"{city.city}: {city.temperature}°C, {city.humidity}% humidity")

Multi-Source Joins

from sqlalchemy import create_engine, MetaData, Table, Column, Integer, String, select

engine = create_engine("shillelagh://")
metadata = MetaData()

# Define tables from different sources
users = Table(
    'https://api.example.com/users',
    metadata,
    Column('id', Integer),
    Column('name', String),
    Column('department_id', Integer)
)

departments = Table(
    './data/departments.csv',
    metadata,
    Column('id', Integer),
    Column('name', String),
    Column('budget', Integer)
)

# Join data from API and CSV file
with engine.connect() as conn:
    stmt = select(
        users.c.name.label('user_name'),
        departments.c.name.label('dept_name'),
        departments.c.budget
    ).select_from(
        users.join(departments, users.c.department_id == departments.c.id)
    ).where(departments.c.budget > 100000)
    
    results = conn.execute(stmt)
    
    for row in results:
        print(f"{row.user_name} works in {row.dept_name} (Budget: ${row.budget:,})")

Pandas Integration

import pandas as pd
from sqlalchemy import create_engine

# Create engine
engine = create_engine("shillelagh://")

# Query directly to pandas DataFrame
df = pd.read_sql_query("""
    SELECT 
        DATE(timestamp) as date,
        COUNT(*) as events,
        AVG(value) as avg_value
    FROM 'https://api.metrics.com/events'
    WHERE timestamp >= '2023-01-01'
    GROUP BY DATE(timestamp)
    ORDER BY date
""", engine)

print(df.head())

# Use DataFrame for further analysis
daily_stats = df.groupby('date').agg({
    'events': 'sum',
    'avg_value': 'mean'
}).reset_index()

Install with Tessl CLI