tessl/pypi-shillelagh

Making it easy to query APIs via SQL

—

Pending

Overview

Eval results

Files

Adapters

Name: tessl/pypi-shillelagh
Author: tessl

Extensible plugin system for connecting to various data sources through SQL queries. Adapters translate SQL operations to source-specific requests, enabling unified querying across APIs, files, databases, and in-memory data.

Capabilities

Base Adapter Framework

Foundation classes for implementing custom data source adapters with standardized interface and lifecycle management.

class Adapter:
    """Base adapter class for data sources."""
    
    @classmethod
    def supports(cls, uri, fast=True, **kwargs):
        """
        Check if adapter supports the given URI.
        
        Parameters:
        - uri (str): Data source URI to check
        - fast (bool): If True, avoid network requests for quick check
        - **kwargs: Adapter-specific configuration
        
        Returns:
        bool or None: True if supported, False if not, None if uncertain
        """
    
    @classmethod
    def parse_uri(cls, uri):
        """
        Parse URI into adapter initialization arguments.
        
        Parameters:
        - uri (str): Data source URI to parse
        
        Returns:
        tuple: Arguments for adapter initialization
        """
    
    def get_columns(self):
        """
        Get column definitions for the data source.
        
        Returns:
        dict: Column name to Field mapping
        """
    
    def get_data(self, bounds=None, order=None, **kwargs):
        """
        Get data from the source with optional filtering and ordering.
        
        Parameters:
        - bounds (dict, optional): Column filters
        - order (list, optional): Ordering specifications
        - **kwargs: Additional query parameters
        
        Returns:
        Iterator[dict]: Stream of row dictionaries
        """
    
    def get_cost(self, filtered_columns, order):
        """
        Estimate query cost for optimization.
        
        Parameters:
        - filtered_columns (list): Columns with filters applied
        - order (list): Ordering specifications
        
        Returns:
        int: Estimated query cost
        """

File Adapters

Adapters for file-based data sources supporting local and remote files.

class CSVFile:
    """CSV file adapter."""
    
    def __init__(self, filepath, **kwargs):
        """
        Initialize CSV adapter.
        
        Parameters:
        - filepath (str): Path to CSV file (local or URL)
        - encoding (str, optional): File encoding
        - delimiter (str, optional): CSV delimiter
        - quotechar (str, optional): Quote character
        - has_header (bool, optional): Whether file has header row
        """

Usage example:

# Query local CSV file
cursor.execute("SELECT * FROM './data/sales.csv' WHERE amount > 1000")

# Query remote CSV file
cursor.execute("SELECT * FROM 'https://example.com/data.csv'")

API Adapters

Adapters for various web APIs and services with authentication and caching support.

class GSheetsAPI:
    """Google Sheets API adapter."""
    
    def __init__(self, url, service_account_file=None, **kwargs):
        """
        Initialize Google Sheets adapter.
        
        Parameters:
        - url (str): Google Sheets URL
        - service_account_file (str, optional): Path to service account JSON
        - subject (str, optional): Email for impersonation
        """

class GitHubAPI:
    """GitHub API adapter."""
    
    def __init__(self, url, token=None, **kwargs):
        """
        Initialize GitHub API adapter.
        
        Parameters:
        - url (str): GitHub API URL
        - token (str, optional): GitHub access token
        """

class GenericJSONAPI:
    """Generic JSON API adapter."""
    
    def __init__(self, url, path=None, **kwargs):
        """
        Initialize generic JSON API adapter.
        
        Parameters:
        - url (str): JSON API endpoint URL
        - path (str, optional): JSONPath expression for data extraction
        - headers (dict, optional): HTTP headers
        """

class HTMLTableAPI:
    """HTML table scraper adapter."""
    
    def __init__(self, url, table_index=0, **kwargs):
        """
        Initialize HTML table adapter.
        
        Parameters:
        - url (str): URL containing HTML tables
        - table_index (int): Index of table to extract (default: 0)
        """

class S3SelectAPI:
    """AWS S3 Select adapter."""
    
    def __init__(self, url, aws_access_key_id=None, **kwargs):
        """
        Initialize S3 Select adapter.
        
        Parameters:
        - url (str): S3 object URL (s3://bucket/key)
        - aws_access_key_id (str, optional): AWS access key
        - aws_secret_access_key (str, optional): AWS secret key
        """

Memory Adapters

Adapters for in-memory data sources and virtual tables.

class PandasMemory:
    """Pandas DataFrame adapter."""
    
    def __init__(self, df, **kwargs):
        """
        Initialize pandas adapter.
        
        Parameters:
        - df (pandas.DataFrame): DataFrame to query
        - table_name (str, optional): Virtual table name
        """

class VirtualMemory:
    """Virtual/generated data adapter."""
    
    def __init__(self, columns, rows=None, **kwargs):
        """
        Initialize virtual memory adapter.
        
        Parameters:
        - columns (dict): Column definitions
        - rows (list, optional): Static row data
        - generator (callable, optional): Dynamic row generator
        """

class HolidaysMemory:
    """Holiday data adapter."""
    
    def __init__(self, country='US', years=None, **kwargs):
        """
        Initialize holidays adapter.
        
        Parameters:
        - country (str): Country code for holidays
        - years (list, optional): Years to include
        """

Adapter Registry

System for loading and managing available adapters dynamically.

class AdapterLoader:
    """Loads and manages data source adapters."""
    
    def load_adapters(self):
        """
        Load all available adapters from entry points.
        
        Returns:
        list: List of adapter classes
        """
    
    def find_adapter(self, uri, adapter_kwargs=None):
        """
        Find appropriate adapter for URI.
        
        Parameters:
        - uri (str): Data source URI
        - adapter_kwargs (dict, optional): Adapter configurations
        
        Returns:
        tuple: (adapter_class, args, kwargs)
        """

Built-in Adapters

API Data Sources

gsheetsapi (GSheetsAPI): Google Sheets integration with authentication and caching
githubapi (GitHubAPI): GitHub repositories, issues, pull requests, and user data
genericjsonapi (GenericJSONAPI): Generic JSON-based REST APIs with configurable parsing
genericxmlapi (GenericXMLAPI): XML-based APIs and RSS/Atom feeds
htmltableapi (HTMLTableAPI): HTML table extraction from web pages with CSS selectors
datasetteapi (DatasetteAPI): Datasette database publishing platform integration
dbtmetricflowapi (DbtMetricFlowAPI): dbt MetricFlow semantic layer queries
presetapi (PresetAPI): Apache Superset dashboard and chart data
presetworkspaceapi (PresetWorkspaceAPI): Preset.io workspace integration
s3selectapi (S3SelectAPI): AWS S3 object querying with S3 Select
socrataapi (SocrataAPI): Socrata Open Data platform datasets
systemapi (SystemAPI): System metrics, processes, and monitoring data
weatherapi (WeatherAPI): Weather service APIs and meteorological data

File Data Sources

csvfile (CSVFile): CSV files from local filesystem or HTTP(S) URLs

Memory Data Sources

pandasmemory (PandasMemory): Pandas DataFrames as virtual SQL tables
holidaysmemory (HolidaysMemory): Holiday and calendar data for various countries
virtualmemory (VirtualMemory): Generated or computed data with custom logic

Usage Examples

Querying Google Sheets

from shillelagh.backends.apsw.db import connect

# Configure Google Sheets access
adapter_kwargs = {
    'gsheetsapi': {
        'service_account_file': '/path/to/credentials.json',
        'subject': 'user@example.com'
    }
}

connection = connect(":memory:", adapter_kwargs=adapter_kwargs)
cursor = connection.cursor()

# Query Google Sheet
sheet_url = "https://docs.google.com/spreadsheets/d/1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgvE2upms/edit"
cursor.execute(f"""
    SELECT Name, Major, "Number of Years" 
    FROM "{sheet_url}" 
    WHERE Major = 'English'
""")

for row in cursor.fetchall():
    print(row)

Custom Adapter Configuration

from shillelagh.backends.apsw.db import connect

# Configure multiple adapters
adapter_kwargs = {
    'githubapi': {'token': 'ghp_your_token_here'},
    'genericjsonapi': {'headers': {'User-Agent': 'MyApp/1.0'}},
    's3selectapi': {
        'aws_access_key_id': 'your_key',
        'aws_secret_access_key': 'your_secret'
    }
}

connection = connect(":memory:", adapter_kwargs=adapter_kwargs)
cursor = connection.cursor()

# Query GitHub API
cursor.execute("""
    SELECT name, stargazers_count, language
    FROM "https://api.github.com/users/octocat/repos"
    WHERE language = 'Python'
    ORDER BY stargazers_count DESC
""")

Using Pandas DataFrames

import pandas as pd
from shillelagh.backends.apsw.db import connect

# Create DataFrame
df = pd.DataFrame({
    'name': ['Alice', 'Bob', 'Charlie'],
    'age': [25, 30, 35],
    'city': ['NYC', 'LA', 'Chicago']
})

# Register DataFrame as virtual table
connection = connect(":memory:")
connection.execute("CREATE VIRTUAL TABLE people USING pandasmemory", df)

# Query DataFrame with SQL
cursor = connection.cursor()
cursor.execute("SELECT name, age FROM people WHERE age > 25 ORDER BY age")
results = cursor.fetchall()

Install with Tessl CLI