Making it easy to query APIs via SQL
—
Extensible plugin system for connecting to various data sources through SQL queries. Adapters translate SQL operations to source-specific requests, enabling unified querying across APIs, files, databases, and in-memory data.
Foundation classes for implementing custom data source adapters with standardized interface and lifecycle management.
class Adapter:
"""Base adapter class for data sources."""
@classmethod
def supports(cls, uri, fast=True, **kwargs):
"""
Check if adapter supports the given URI.
Parameters:
- uri (str): Data source URI to check
- fast (bool): If True, avoid network requests for quick check
- **kwargs: Adapter-specific configuration
Returns:
bool or None: True if supported, False if not, None if uncertain
"""
@classmethod
def parse_uri(cls, uri):
"""
Parse URI into adapter initialization arguments.
Parameters:
- uri (str): Data source URI to parse
Returns:
tuple: Arguments for adapter initialization
"""
def get_columns(self):
"""
Get column definitions for the data source.
Returns:
dict: Column name to Field mapping
"""
def get_data(self, bounds=None, order=None, **kwargs):
"""
Get data from the source with optional filtering and ordering.
Parameters:
- bounds (dict, optional): Column filters
- order (list, optional): Ordering specifications
- **kwargs: Additional query parameters
Returns:
Iterator[dict]: Stream of row dictionaries
"""
def get_cost(self, filtered_columns, order):
"""
Estimate query cost for optimization.
Parameters:
- filtered_columns (list): Columns with filters applied
- order (list): Ordering specifications
Returns:
int: Estimated query cost
"""Adapters for file-based data sources supporting local and remote files.
class CSVFile:
"""CSV file adapter."""
def __init__(self, filepath, **kwargs):
"""
Initialize CSV adapter.
Parameters:
- filepath (str): Path to CSV file (local or URL)
- encoding (str, optional): File encoding
- delimiter (str, optional): CSV delimiter
- quotechar (str, optional): Quote character
- has_header (bool, optional): Whether file has header row
"""Usage example:
# Query local CSV file
cursor.execute("SELECT * FROM './data/sales.csv' WHERE amount > 1000")
# Query remote CSV file
cursor.execute("SELECT * FROM 'https://example.com/data.csv'")Adapters for various web APIs and services with authentication and caching support.
class GSheetsAPI:
"""Google Sheets API adapter."""
def __init__(self, url, service_account_file=None, **kwargs):
"""
Initialize Google Sheets adapter.
Parameters:
- url (str): Google Sheets URL
- service_account_file (str, optional): Path to service account JSON
- subject (str, optional): Email for impersonation
"""
class GitHubAPI:
"""GitHub API adapter."""
def __init__(self, url, token=None, **kwargs):
"""
Initialize GitHub API adapter.
Parameters:
- url (str): GitHub API URL
- token (str, optional): GitHub access token
"""
class GenericJSONAPI:
"""Generic JSON API adapter."""
def __init__(self, url, path=None, **kwargs):
"""
Initialize generic JSON API adapter.
Parameters:
- url (str): JSON API endpoint URL
- path (str, optional): JSONPath expression for data extraction
- headers (dict, optional): HTTP headers
"""
class HTMLTableAPI:
"""HTML table scraper adapter."""
def __init__(self, url, table_index=0, **kwargs):
"""
Initialize HTML table adapter.
Parameters:
- url (str): URL containing HTML tables
- table_index (int): Index of table to extract (default: 0)
"""
class S3SelectAPI:
"""AWS S3 Select adapter."""
def __init__(self, url, aws_access_key_id=None, **kwargs):
"""
Initialize S3 Select adapter.
Parameters:
- url (str): S3 object URL (s3://bucket/key)
- aws_access_key_id (str, optional): AWS access key
- aws_secret_access_key (str, optional): AWS secret key
"""Adapters for in-memory data sources and virtual tables.
class PandasMemory:
"""Pandas DataFrame adapter."""
def __init__(self, df, **kwargs):
"""
Initialize pandas adapter.
Parameters:
- df (pandas.DataFrame): DataFrame to query
- table_name (str, optional): Virtual table name
"""
class VirtualMemory:
"""Virtual/generated data adapter."""
def __init__(self, columns, rows=None, **kwargs):
"""
Initialize virtual memory adapter.
Parameters:
- columns (dict): Column definitions
- rows (list, optional): Static row data
- generator (callable, optional): Dynamic row generator
"""
class HolidaysMemory:
"""Holiday data adapter."""
def __init__(self, country='US', years=None, **kwargs):
"""
Initialize holidays adapter.
Parameters:
- country (str): Country code for holidays
- years (list, optional): Years to include
"""System for loading and managing available adapters dynamically.
class AdapterLoader:
"""Loads and manages data source adapters."""
def load_adapters(self):
"""
Load all available adapters from entry points.
Returns:
list: List of adapter classes
"""
def find_adapter(self, uri, adapter_kwargs=None):
"""
Find appropriate adapter for URI.
Parameters:
- uri (str): Data source URI
- adapter_kwargs (dict, optional): Adapter configurations
Returns:
tuple: (adapter_class, args, kwargs)
"""GSheetsAPI): Google Sheets integration with authentication and cachingGitHubAPI): GitHub repositories, issues, pull requests, and user dataGenericJSONAPI): Generic JSON-based REST APIs with configurable parsingGenericXMLAPI): XML-based APIs and RSS/Atom feedsHTMLTableAPI): HTML table extraction from web pages with CSS selectorsDatasetteAPI): Datasette database publishing platform integrationDbtMetricFlowAPI): dbt MetricFlow semantic layer queriesPresetAPI): Apache Superset dashboard and chart dataPresetWorkspaceAPI): Preset.io workspace integrationS3SelectAPI): AWS S3 object querying with S3 SelectSocrataAPI): Socrata Open Data platform datasetsSystemAPI): System metrics, processes, and monitoring dataWeatherAPI): Weather service APIs and meteorological dataCSVFile): CSV files from local filesystem or HTTP(S) URLsPandasMemory): Pandas DataFrames as virtual SQL tablesHolidaysMemory): Holiday and calendar data for various countriesVirtualMemory): Generated or computed data with custom logicfrom shillelagh.backends.apsw.db import connect
# Configure Google Sheets access
adapter_kwargs = {
'gsheetsapi': {
'service_account_file': '/path/to/credentials.json',
'subject': 'user@example.com'
}
}
connection = connect(":memory:", adapter_kwargs=adapter_kwargs)
cursor = connection.cursor()
# Query Google Sheet
sheet_url = "https://docs.google.com/spreadsheets/d/1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgvE2upms/edit"
cursor.execute(f"""
SELECT Name, Major, "Number of Years"
FROM "{sheet_url}"
WHERE Major = 'English'
""")
for row in cursor.fetchall():
print(row)from shillelagh.backends.apsw.db import connect
# Configure multiple adapters
adapter_kwargs = {
'githubapi': {'token': 'ghp_your_token_here'},
'genericjsonapi': {'headers': {'User-Agent': 'MyApp/1.0'}},
's3selectapi': {
'aws_access_key_id': 'your_key',
'aws_secret_access_key': 'your_secret'
}
}
connection = connect(":memory:", adapter_kwargs=adapter_kwargs)
cursor = connection.cursor()
# Query GitHub API
cursor.execute("""
SELECT name, stargazers_count, language
FROM "https://api.github.com/users/octocat/repos"
WHERE language = 'Python'
ORDER BY stargazers_count DESC
""")import pandas as pd
from shillelagh.backends.apsw.db import connect
# Create DataFrame
df = pd.DataFrame({
'name': ['Alice', 'Bob', 'Charlie'],
'age': [25, 30, 35],
'city': ['NYC', 'LA', 'Chicago']
})
# Register DataFrame as virtual table
connection = connect(":memory:")
connection.execute("CREATE VIRTUAL TABLE people USING pandasmemory", df)
# Query DataFrame with SQL
cursor = connection.cursor()
cursor.execute("SELECT name, age FROM people WHERE age > 25 ORDER BY age")
results = cursor.fetchall()Install with Tessl CLI
npx tessl i tessl/pypi-shillelagh