A comprehensive SQL parser, transpiler, optimizer, and engine supporting 30+ dialects
Built-in SQL execution engine for running queries against in-memory data structures. Execute SQL against Python data, perform joins and aggregations, and integrate with pandas DataFrames and other data sources.
Execute SQL queries against in-memory tables and data structures.
def execute(
sql: str | Expression,
schema: Optional[Schema] = None,
read: str = None,
dialect: str = None,
tables: Optional[Dict] = None
) -> Table:
"""
Execute SQL query against in-memory data.
Args:
sql: SQL statement or Expression to execute
schema (Schema): Database schema for type information and validation
read (str): SQL dialect for parsing
dialect (str): SQL dialect (alias for read)
tables (Dict): Additional tables to register for execution
Returns:
Table: Result table with columnar data
"""Create and manage in-memory tables for SQL execution.
class Table:
"""In-memory table representation for SQL execution."""
def __init__(self, columns: List[str], rows: List[List]):
"""
Create table with column names and row data.
Args:
columns (List[str]): Column names
rows (List[List]): Row data as list of lists
"""
@property
def columns(self) -> List[str]:
"""Get column names."""
@property
def rows(self) -> List[List]:
"""Get row data."""
def __getitem__(self, index: int) -> List:
"""Get row by index."""
def __len__(self) -> int:
"""Get number of rows."""
def to_dict(self) -> Dict[str, List]:
"""Convert table to dictionary format."""
def to_df(self):
"""Convert table to pandas DataFrame (requires pandas)."""
def ensure_tables(tables: Optional[Dict], dialect: str = None) -> Tables:
"""
Ensure tables are in proper format for execution.
Args:
tables (Dict): Tables to validate and convert
dialect (str): SQL dialect for parsing table definitions
Returns:
Tables: Validated tables container
"""Core execution engine that runs SQL operations in Python.
class PythonExecutor:
"""Python-based SQL execution engine."""
def __init__(self, tables: Optional[Tables] = None):
"""
Initialize executor with optional table registry.
Args:
tables (Tables): Pre-registered tables for execution
"""
def execute(self, plan: Plan) -> Table:
"""
Execute query plan and return results.
Args:
plan (Plan): Query execution plan
Returns:
Table: Execution results
"""Manage execution context and variable environments.
class Context:
"""Execution context with variables and state."""
def __init__(self, tables: Optional[Tables] = None):
"""Initialize context with optional tables."""
def eval(self, expression: Expression) -> Any:
"""Evaluate expression in current context."""
def set_variable(self, name: str, value: Any) -> None:
"""Set variable in context."""
def get_variable(self, name: str) -> Any:
"""Get variable from context."""
class Environment:
"""Execution environment managing scope and variables."""
def __init__(self, context: Optional[Context] = None):
"""Initialize environment with optional context."""import sqlglot
from sqlglot.executor import execute, Table
# Create sample data
users_data = Table(
columns=["id", "name", "age", "email"],
rows=[
[1, "Alice", 25, "alice@example.com"],
[2, "Bob", 30, "bob@example.com"],
[3, "Charlie", 35, "charlie@example.com"]
]
)
orders_data = Table(
columns=["id", "user_id", "amount", "date"],
rows=[
[1, 1, 100.50, "2023-01-15"],
[2, 2, 250.00, "2023-01-16"],
[3, 1, 75.25, "2023-01-17"],
[4, 3, 500.00, "2023-01-18"]
]
)
# Execute SQL query
tables = {
"users": users_data,
"orders": orders_data
}
result = execute(
"SELECT name, COUNT(*) as order_count FROM users JOIN orders ON users.id = orders.user_id GROUP BY name",
tables=tables
)
print(result.columns) # ['name', 'order_count']
print(result.rows) # [['Alice', 2], ['Bob', 1], ['Charlie', 1]]import pandas as pd
from sqlglot.executor import execute, Table
# Convert pandas DataFrame to SQLGlot Table
df = pd.DataFrame({
'product': ['A', 'B', 'C', 'A', 'B'],
'sales': [100, 200, 150, 120, 180],
'region': ['North', 'South', 'East', 'West', 'North']
})
# Convert to SQLGlot Table format
table = Table(
columns=list(df.columns),
rows=df.values.tolist()
)
# Execute aggregation query
result = execute(
"SELECT product, SUM(sales) as total_sales FROM products GROUP BY product ORDER BY total_sales DESC",
tables={"products": table}
)
# Convert result back to DataFrame
result_df = pd.DataFrame(result.to_dict())
print(result_df)from sqlglot.executor import execute, Table
from sqlglot.schema import MappingSchema
# Create related tables
customers = Table(
columns=["customer_id", "name", "city"],
rows=[
[1, "John Doe", "New York"],
[2, "Jane Smith", "Los Angeles"],
[3, "Bob Johnson", "Chicago"]
]
)
orders = Table(
columns=["order_id", "customer_id", "product", "quantity", "price"],
rows=[
[101, 1, "Widget A", 2, 25.00],
[102, 2, "Widget B", 1, 50.00],
[103, 1, "Widget C", 3, 15.00],
[104, 3, "Widget A", 1, 25.00]
]
)
# Define schema for type validation
schema = MappingSchema({
"customers": {
"customer_id": "INT",
"name": "VARCHAR",
"city": "VARCHAR"
},
"orders": {
"order_id": "INT",
"customer_id": "INT",
"product": "VARCHAR",
"quantity": "INT",
"price": "DECIMAL"
}
})
# Execute complex analytical query
sql = """
SELECT
c.name,
c.city,
COUNT(*) as order_count,
SUM(o.quantity * o.price) as total_spent,
AVG(o.quantity * o.price) as avg_order_value
FROM customers c
JOIN orders o ON c.customer_id = o.customer_id
GROUP BY c.customer_id, c.name, c.city
HAVING SUM(o.quantity * o.price) > 50
ORDER BY total_spent DESC
"""
result = execute(
sql,
schema=schema,
tables={"customers": customers, "orders": orders}
)
# Display results
for row in result.rows:
print(dict(zip(result.columns, row)))from sqlglot.executor import execute, Table
# Sales data for window function analysis
sales = Table(
columns=["salesperson", "month", "sales", "region"],
rows=[
["Alice", "2023-01", 1000, "North"],
["Alice", "2023-02", 1200, "North"],
["Alice", "2023-03", 1100, "North"],
["Bob", "2023-01", 800, "South"],
["Bob", "2023-02", 900, "South"],
["Bob", "2023-03", 1050, "South"],
["Charlie", "2023-01", 1300, "East"],
["Charlie", "2023-02", 1400, "East"],
["Charlie", "2023-03", 1250, "East"]
]
)
# Window function query
sql = """
SELECT
salesperson,
month,
sales,
region,
SUM(sales) OVER (PARTITION BY salesperson ORDER BY month) as running_total,
LAG(sales, 1) OVER (PARTITION BY salesperson ORDER BY month) as prev_month_sales,
sales - LAG(sales, 1) OVER (PARTITION BY salesperson ORDER BY month) as month_over_month_change,
RANK() OVER (ORDER BY sales DESC) as sales_rank,
DENSE_RANK() OVER (PARTITION BY region ORDER BY sales DESC) as region_rank
FROM sales_data
ORDER BY salesperson, month
"""
result = execute(sql, tables={"sales_data": sales})
# Print results with headers
print("\t".join(result.columns))
for row in result.rows:
print("\t".join(str(x) for x in row))from sqlglot.executor import execute, Table
from sqlglot import expressions as exp
# Create table with custom data
data = Table(
columns=["text_col", "number_col", "date_col"],
rows=[
["Hello World", 42, "2023-01-15"],
["SQLGlot Rocks", 100, "2023-02-20"],
["Data Processing", 75, "2023-03-10"]
]
)
# Execute queries with various functions
queries = [
"SELECT UPPER(text_col) as upper_text FROM test_data",
"SELECT LENGTH(text_col) as text_length FROM test_data",
"SELECT number_col * 2 as doubled FROM test_data",
"SELECT SUBSTR(text_col, 1, 5) as first_five FROM test_data"
]
tables = {"test_data": data}
for sql in queries:
result = execute(sql, tables=tables)
print(f"Query: {sql}")
print(f"Results: {result.rows}")
print()class Table:
"""In-memory table for SQL execution."""
columns: List[str] # Column names
rows: List[List] # Row data
def __init__(self, columns: List[str], rows: List[List]): ...
def __getitem__(self, index: int) -> List: ...
def __len__(self) -> int: ...
def to_dict(self) -> Dict[str, List]: ...
class Tables:
"""Container for multiple tables with metadata."""
mapping: Dict[str, Table] # Table name to Table mapping
supported_table_args: Any # Supported table arguments
def __getitem__(self, name: str) -> Table: ...
def __contains__(self, name: str) -> bool: ...
class PythonExecutor:
"""Python-based SQL execution engine."""
tables: Tables # Available tables
def __init__(self, tables: Optional[Tables] = None): ...
def execute(self, plan: Plan) -> Table: ...
class Context:
"""Execution context with variables and state."""
tables: Optional[Tables] # Available tables
variables: Dict[str, Any] # Context variables
def eval(self, expression: Expression) -> Any: ...
def set_variable(self, name: str, value: Any) -> None: ...
def get_variable(self, name: str) -> Any: ...
class Environment:
"""Execution environment managing scope."""
context: Context # Execution context
def __init__(self, context: Optional[Context] = None): ...Install with Tessl CLI
npx tessl i tessl/pypi-sqlglot