A comprehensive SQL parser, transpiler, optimizer, and engine supporting 30+ dialects
Comprehensive query optimization engine with 15+ optimization rules for improving query performance, simplifying expressions, and analyzing query structure. The optimizer transforms SQL expressions to equivalent but more efficient forms.
Core optimization functionality that applies multiple optimization rules to SQL expressions.
def optimize(
sql: str | Expression,
schema: Optional[Schema] = None,
dialect: str = None,
**opts
) -> Expression:
"""
Applies optimization rules to SQL expressions for improved performance.
Args:
sql: SQL string or Expression to optimize
schema (Schema): Database schema for column/table resolution
dialect (str): SQL dialect for parsing and optimization
**opts: Additional optimization options
Returns:
Expression: Optimized expression tree
"""
RULES: List[Callable] = [
# List of available optimization rules
# Applied in sequence for maximum effectiveness
]Add type information to expressions for better optimization and validation.
def annotate_types(
expression: Expression,
schema: Optional[Schema] = None,
**opts
) -> Expression:
"""
Annotates expressions with type information from schema.
Args:
expression (Expression): Expression to annotate
schema (Schema): Schema providing type information
**opts: Type annotation options
Returns:
Expression: Expression with type annotations
"""Resolve column and table references using schema information.
def qualify_columns(
expression: Expression,
schema: Schema,
**opts
) -> Expression:
"""
Qualifies column references with table names using schema.
Args:
expression (Expression): Expression to qualify
schema (Schema): Schema for column resolution
**opts: Qualification options
Returns:
Expression: Expression with qualified column references
"""
def qualify_tables(
expression: Expression,
schema: Optional[Schema] = None,
**opts
) -> Expression:
"""
Qualifies table references with database/schema names.
Args:
expression (Expression): Expression to qualify
schema (Schema): Schema for table resolution
**opts: Qualification options
Returns:
Expression: Expression with qualified table references
"""Optimize WHERE clause conditions for better performance.
def pushdown_predicates(expression: Expression, **opts) -> Expression:
"""
Pushes WHERE predicates down to lowest possible level in query tree.
Args:
expression (Expression): Expression to optimize
**opts: Pushdown options
Returns:
Expression: Expression with optimized predicate placement
"""
def pushdown_projections(expression: Expression, **opts) -> Expression:
"""
Pushes SELECT projections down to reduce data movement.
Args:
expression (Expression): Expression to optimize
**opts: Projection pushdown options
Returns:
Expression: Expression with optimized projections
"""Eliminate and optimize subqueries for better performance.
def eliminate_subqueries(expression: Expression, **opts) -> Expression:
"""
Eliminates unnecessary subqueries by flattening or merging.
Args:
expression (Expression): Expression to optimize
**opts: Subquery elimination options
Returns:
Expression: Expression with eliminated subqueries
"""
def unnest_subqueries(expression: Expression, **opts) -> Expression:
"""
Unnests subqueries where possible for performance.
Args:
expression (Expression): Expression to optimize
**opts: Unnesting options
Returns:
Expression: Expression with unnested subqueries
"""
def merge_subqueries(expression: Expression, **opts) -> Expression:
"""
Merges compatible subqueries to reduce query complexity.
Args:
expression (Expression): Expression to optimize
**opts: Merge options
Returns:
Expression: Expression with merged subqueries
"""Optimize JOIN operations and eliminate unnecessary joins.
def optimize_joins(expression: Expression, **opts) -> Expression:
"""
Optimizes JOIN conditions and order for better performance.
Args:
expression (Expression): Expression to optimize
**opts: Join optimization options
Returns:
Expression: Expression with optimized joins
"""
def eliminate_joins(expression: Expression, **opts) -> Expression:
"""
Eliminates unnecessary JOINs that don't affect results.
Args:
expression (Expression): Expression to optimize
**opts: Join elimination options
Returns:
Expression: Expression with eliminated joins
"""Optimize Common Table Expressions and simplify complex expressions.
def eliminate_ctes(expression: Expression, **opts) -> Expression:
"""
Eliminates unnecessary Common Table Expressions.
Args:
expression (Expression): Expression to optimize
**opts: CTE elimination options
Returns:
Expression: Expression with eliminated CTEs
"""
def simplify(expression: Expression, **opts) -> Expression:
"""
Simplifies expressions by applying algebraic rules.
Args:
expression (Expression): Expression to simplify
**opts: Simplification options
Returns:
Expression: Simplified expression
"""
def canonicalize(expression: Expression, **opts) -> Expression:
"""
Canonicalizes expressions to standard form.
Args:
expression (Expression): Expression to canonicalize
**opts: Canonicalization options
Returns:
Expression: Canonicalized expression
"""Normalize identifiers and schema references for consistency.
def normalize(expression: Expression, **opts) -> Expression:
"""
Normalizes expressions to consistent format.
Args:
expression (Expression): Expression to normalize
**opts: Normalization options
Returns:
Expression: Normalized expression
"""
def normalize_identifiers(expression: Expression, **opts) -> Expression:
"""
Normalizes identifier casing and quoting.
Args:
expression (Expression): Expression to normalize
**opts: Identifier normalization options
Returns:
Expression: Expression with normalized identifiers
"""
def isolate_table_selects(expression: Expression, **opts) -> Expression:
"""
Isolates table SELECT expressions for optimization.
Args:
expression (Expression): Expression to isolate
**opts: Isolation options
Returns:
Expression: Expression with isolated table selects
"""Analyze and traverse expression scopes for column and table resolution.
class Scope:
"""Represents a scope in SQL expression for variable resolution."""
def __init__(self, expression: Expression, sources: Dict = None): ...
@property
def columns(self) -> List[Column]:
"""Available columns in this scope."""
@property
def tables(self) -> List[Table]:
"""Available tables in this scope."""
def build_scope(expression: Expression, **opts) -> Scope:
"""
Builds scope tree for expression with column/table resolution.
Args:
expression (Expression): Expression to analyze
**opts: Scope building options
Returns:
Scope: Root scope of expression tree
"""
def traverse_scope(scope: Scope, **opts) -> Iterator[Scope]:
"""
Traverses scope hierarchy depth-first.
Args:
scope (Scope): Root scope to traverse
**opts: Traversal options
Yields:
Scope: Each scope in traversal order
"""
def find_in_scope(scope: Scope, expression_type: Type, **opts) -> Optional[Expression]:
"""
Finds first expression of type in scope.
Args:
scope (Scope): Scope to search
expression_type (Type): Expression type to find
**opts: Search options
Returns:
Optional[Expression]: Found expression or None
"""
def find_all_in_scope(scope: Scope, expression_type: Type, **opts) -> List[Expression]:
"""
Finds all expressions of type in scope.
Args:
scope (Scope): Scope to search
expression_type (Type): Expression type to find
**opts: Search options
Returns:
List[Expression]: List of found expressions
"""
def walk_in_scope(scope: Scope, **opts) -> Iterator[Expression]:
"""
Walks all expressions within scope.
Args:
scope (Scope): Scope to walk
**opts: Walk options
Yields:
Expression: Each expression in scope
"""import sqlglot
from sqlglot.optimizer import optimize
from sqlglot.schema import MappingSchema
# Define schema for optimization
schema = MappingSchema({
"users": {
"id": "INT",
"name": "VARCHAR",
"email": "VARCHAR",
"created_date": "DATE"
},
"orders": {
"id": "INT",
"user_id": "INT",
"amount": "DECIMAL",
"order_date": "DATE"
}
})
# Optimize complex query
sql = """
SELECT u.name, COUNT(o.id) as order_count
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE u.created_date > '2023-01-01'
AND (SELECT COUNT(*) FROM orders WHERE user_id = u.id) > 5
GROUP BY u.name
"""
optimized = optimize(sql, schema=schema, dialect="postgres")
print(optimized.sql(pretty=True))import sqlglot
from sqlglot.optimizer import (
annotate_types, qualify_columns, pushdown_predicates,
eliminate_subqueries, simplify
)
from sqlglot.schema import MappingSchema
# Apply specific optimization rules
expression = sqlglot.parse_one("""
SELECT name FROM users
WHERE age > (SELECT AVG(age) FROM users) + 10 - 5
""")
schema = MappingSchema({"users": {"name": "VARCHAR", "age": "INT"}})
# Step-by-step optimization
expression = annotate_types(expression, schema=schema)
expression = qualify_columns(expression, schema=schema)
expression = pushdown_predicates(expression)
expression = eliminate_subqueries(expression)
expression = simplify(expression)
print(expression.sql(pretty=True))import sqlglot
from sqlglot.optimizer import build_scope, traverse_scope
# Analyze query scope
sql = """
SELECT u.name, o.amount
FROM users u
JOIN (
SELECT user_id, amount
FROM orders
WHERE amount > 100
) o ON u.id = o.user_id
"""
expression = sqlglot.parse_one(sql)
root_scope = build_scope(expression)
# Traverse scopes
for scope in traverse_scope(root_scope):
print(f"Scope columns: {[c.name for c in scope.columns]}")
print(f"Scope tables: {[t.name for t in scope.tables]}")import sqlglot
from sqlglot.optimizer import (
annotate_types, qualify_columns, qualify_tables,
pushdown_predicates, eliminate_subqueries, optimize_joins,
simplify, canonicalize
)
def custom_optimize(expression, schema=None):
"""Custom optimization pipeline with specific rules."""
# Core qualification and type annotation
if schema:
expression = annotate_types(expression, schema=schema)
expression = qualify_tables(expression, schema=schema)
expression = qualify_columns(expression, schema=schema)
# Predicate and projection optimization
expression = pushdown_predicates(expression)
# Subquery optimization
expression = eliminate_subqueries(expression)
# Join optimization
expression = optimize_joins(expression)
# Expression simplification
expression = simplify(expression)
expression = canonicalize(expression)
return expression
# Use custom optimizer
sql = "SELECT * FROM users WHERE age > 25 AND status = 'active'"
expression = sqlglot.parse_one(sql)
optimized = custom_optimize(expression)
print(optimized.sql())class Scope:
"""Query scope for column and table resolution."""
expression: Expression # Root expression of scope
sources: Dict # Available data sources
@property
def columns(self) -> List[Column]:
"""Columns available in this scope."""
@property
def tables(self) -> List[Table]:
"""Tables available in this scope."""
def find(self, expression_type: Type) -> Optional[Expression]:
"""Find expression of specific type in scope."""
def find_all(self, expression_type: Type) -> List[Expression]:
"""Find all expressions of specific type in scope."""
# Optimization rule type
OptimizationRule = Callable[[Expression], Expression]
# Available optimization rules
RULES: List[OptimizationRule] = [
annotate_types,
qualify_columns,
qualify_tables,
pushdown_predicates,
pushdown_projections,
eliminate_subqueries,
unnest_subqueries,
merge_subqueries,
optimize_joins,
eliminate_joins,
eliminate_ctes,
simplify,
canonicalize,
normalize,
normalize_identifiers,
isolate_table_selects
]Install with Tessl CLI
npx tessl i tessl/pypi-sqlglot