CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-sqlglot

A comprehensive SQL parser, transpiler, optimizer, and engine supporting 30+ dialects

Overview
Eval results
Files

optimization.mddocs/

SQL Optimization

Comprehensive query optimization engine with 15+ optimization rules for improving query performance, simplifying expressions, and analyzing query structure. The optimizer transforms SQL expressions to equivalent but more efficient forms.

Capabilities

Main Optimization Engine

Core optimization functionality that applies multiple optimization rules to SQL expressions.

def optimize(
    sql: str | Expression, 
    schema: Optional[Schema] = None, 
    dialect: str = None,
    **opts
) -> Expression:
    """
    Applies optimization rules to SQL expressions for improved performance.
    
    Args:
        sql: SQL string or Expression to optimize
        schema (Schema): Database schema for column/table resolution
        dialect (str): SQL dialect for parsing and optimization
        **opts: Additional optimization options
        
    Returns:
        Expression: Optimized expression tree
    """

RULES: List[Callable] = [
    # List of available optimization rules
    # Applied in sequence for maximum effectiveness
]

Type Annotation and Analysis

Add type information to expressions for better optimization and validation.

def annotate_types(
    expression: Expression, 
    schema: Optional[Schema] = None,
    **opts
) -> Expression:
    """
    Annotates expressions with type information from schema.
    
    Args:
        expression (Expression): Expression to annotate
        schema (Schema): Schema providing type information
        **opts: Type annotation options
        
    Returns:
        Expression: Expression with type annotations
    """

Column and Table Qualification

Resolve column and table references using schema information.

def qualify_columns(
    expression: Expression, 
    schema: Schema,
    **opts
) -> Expression:
    """
    Qualifies column references with table names using schema.
    
    Args:
        expression (Expression): Expression to qualify
        schema (Schema): Schema for column resolution
        **opts: Qualification options
        
    Returns:
        Expression: Expression with qualified column references
    """

def qualify_tables(
    expression: Expression,
    schema: Optional[Schema] = None,
    **opts  
) -> Expression:
    """
    Qualifies table references with database/schema names.
    
    Args:
        expression (Expression): Expression to qualify
        schema (Schema): Schema for table resolution
        **opts: Qualification options
        
    Returns:
        Expression: Expression with qualified table references
    """

Predicate Optimization

Optimize WHERE clause conditions for better performance.

def pushdown_predicates(expression: Expression, **opts) -> Expression:
    """
    Pushes WHERE predicates down to lowest possible level in query tree.
    
    Args:
        expression (Expression): Expression to optimize
        **opts: Pushdown options
        
    Returns:
        Expression: Expression with optimized predicate placement
    """

def pushdown_projections(expression: Expression, **opts) -> Expression:
    """
    Pushes SELECT projections down to reduce data movement.
    
    Args:
        expression (Expression): Expression to optimize
        **opts: Projection pushdown options
        
    Returns:
        Expression: Expression with optimized projections
    """

Subquery Optimization

Eliminate and optimize subqueries for better performance.

def eliminate_subqueries(expression: Expression, **opts) -> Expression:
    """
    Eliminates unnecessary subqueries by flattening or merging.
    
    Args:
        expression (Expression): Expression to optimize
        **opts: Subquery elimination options
        
    Returns:
        Expression: Expression with eliminated subqueries
    """

def unnest_subqueries(expression: Expression, **opts) -> Expression:
    """
    Unnests subqueries where possible for performance.
    
    Args:
        expression (Expression): Expression to optimize
        **opts: Unnesting options
        
    Returns:
        Expression: Expression with unnested subqueries
    """

def merge_subqueries(expression: Expression, **opts) -> Expression:
    """
    Merges compatible subqueries to reduce query complexity.
    
    Args:
        expression (Expression): Expression to optimize
        **opts: Merge options
        
    Returns:
        Expression: Expression with merged subqueries
    """

Join Optimization

Optimize JOIN operations and eliminate unnecessary joins.

def optimize_joins(expression: Expression, **opts) -> Expression:
    """
    Optimizes JOIN conditions and order for better performance.
    
    Args:
        expression (Expression): Expression to optimize
        **opts: Join optimization options
        
    Returns:
        Expression: Expression with optimized joins
    """

def eliminate_joins(expression: Expression, **opts) -> Expression:
    """
    Eliminates unnecessary JOINs that don't affect results.
    
    Args:
        expression (Expression): Expression to optimize
        **opts: Join elimination options
        
    Returns:
        Expression: Expression with eliminated joins
    """

CTE and Expression Simplification

Optimize Common Table Expressions and simplify complex expressions.

def eliminate_ctes(expression: Expression, **opts) -> Expression:
    """
    Eliminates unnecessary Common Table Expressions.
    
    Args:
        expression (Expression): Expression to optimize
        **opts: CTE elimination options
        
    Returns:
        Expression: Expression with eliminated CTEs
    """

def simplify(expression: Expression, **opts) -> Expression:
    """
    Simplifies expressions by applying algebraic rules.
    
    Args:
        expression (Expression): Expression to simplify
        **opts: Simplification options
        
    Returns:
        Expression: Simplified expression
    """

def canonicalize(expression: Expression, **opts) -> Expression:
    """
    Canonicalizes expressions to standard form.
    
    Args:
        expression (Expression): Expression to canonicalize
        **opts: Canonicalization options
        
    Returns:
        Expression: Canonicalized expression
    """

Schema and Identifier Normalization

Normalize identifiers and schema references for consistency.

def normalize(expression: Expression, **opts) -> Expression:
    """
    Normalizes expressions to consistent format.
    
    Args:
        expression (Expression): Expression to normalize
        **opts: Normalization options
        
    Returns:
        Expression: Normalized expression
    """

def normalize_identifiers(expression: Expression, **opts) -> Expression:
    """
    Normalizes identifier casing and quoting.
    
    Args:
        expression (Expression): Expression to normalize
        **opts: Identifier normalization options
        
    Returns:
        Expression: Expression with normalized identifiers
    """

def isolate_table_selects(expression: Expression, **opts) -> Expression:
    """
    Isolates table SELECT expressions for optimization.
    
    Args:
        expression (Expression): Expression to isolate
        **opts: Isolation options
        
    Returns:
        Expression: Expression with isolated table selects
    """

Scope Analysis

Analyze and traverse expression scopes for column and table resolution.

class Scope:
    """Represents a scope in SQL expression for variable resolution."""
    
    def __init__(self, expression: Expression, sources: Dict = None): ...
    
    @property
    def columns(self) -> List[Column]:
        """Available columns in this scope."""
    
    @property  
    def tables(self) -> List[Table]:
        """Available tables in this scope."""

def build_scope(expression: Expression, **opts) -> Scope:
    """
    Builds scope tree for expression with column/table resolution.
    
    Args:
        expression (Expression): Expression to analyze
        **opts: Scope building options
        
    Returns:
        Scope: Root scope of expression tree
    """

def traverse_scope(scope: Scope, **opts) -> Iterator[Scope]:
    """
    Traverses scope hierarchy depth-first.
    
    Args:
        scope (Scope): Root scope to traverse
        **opts: Traversal options
        
    Yields:
        Scope: Each scope in traversal order
    """

def find_in_scope(scope: Scope, expression_type: Type, **opts) -> Optional[Expression]:
    """
    Finds first expression of type in scope.
    
    Args:
        scope (Scope): Scope to search
        expression_type (Type): Expression type to find
        **opts: Search options
        
    Returns:
        Optional[Expression]: Found expression or None
    """

def find_all_in_scope(scope: Scope, expression_type: Type, **opts) -> List[Expression]:
    """
    Finds all expressions of type in scope.
    
    Args:
        scope (Scope): Scope to search  
        expression_type (Type): Expression type to find
        **opts: Search options
        
    Returns:
        List[Expression]: List of found expressions
    """

def walk_in_scope(scope: Scope, **opts) -> Iterator[Expression]:
    """
    Walks all expressions within scope.
    
    Args:
        scope (Scope): Scope to walk
        **opts: Walk options
        
    Yields:
        Expression: Each expression in scope
    """

Usage Examples

Basic Optimization

import sqlglot
from sqlglot.optimizer import optimize
from sqlglot.schema import MappingSchema

# Define schema for optimization
schema = MappingSchema({
    "users": {
        "id": "INT",
        "name": "VARCHAR",
        "email": "VARCHAR",
        "created_date": "DATE"
    },
    "orders": {
        "id": "INT", 
        "user_id": "INT",
        "amount": "DECIMAL",
        "order_date": "DATE"
    }
})

# Optimize complex query
sql = """
SELECT u.name, COUNT(o.id) as order_count
FROM users u
LEFT JOIN orders o ON u.id = o.user_id  
WHERE u.created_date > '2023-01-01'
  AND (SELECT COUNT(*) FROM orders WHERE user_id = u.id) > 5
GROUP BY u.name
"""

optimized = optimize(sql, schema=schema, dialect="postgres")
print(optimized.sql(pretty=True))

Individual Optimization Rules

import sqlglot
from sqlglot.optimizer import (
    annotate_types, qualify_columns, pushdown_predicates,
    eliminate_subqueries, simplify
)
from sqlglot.schema import MappingSchema

# Apply specific optimization rules
expression = sqlglot.parse_one("""
SELECT name FROM users 
WHERE age > (SELECT AVG(age) FROM users) + 10 - 5
""")

schema = MappingSchema({"users": {"name": "VARCHAR", "age": "INT"}})

# Step-by-step optimization
expression = annotate_types(expression, schema=schema)
expression = qualify_columns(expression, schema=schema)  
expression = pushdown_predicates(expression)
expression = eliminate_subqueries(expression)
expression = simplify(expression)

print(expression.sql(pretty=True))

Scope Analysis

import sqlglot
from sqlglot.optimizer import build_scope, traverse_scope

# Analyze query scope
sql = """
SELECT u.name, o.amount
FROM users u
JOIN (
    SELECT user_id, amount 
    FROM orders 
    WHERE amount > 100
) o ON u.id = o.user_id
"""

expression = sqlglot.parse_one(sql)
root_scope = build_scope(expression)

# Traverse scopes
for scope in traverse_scope(root_scope):
    print(f"Scope columns: {[c.name for c in scope.columns]}")
    print(f"Scope tables: {[t.name for t in scope.tables]}")

Custom Optimization Pipeline

import sqlglot
from sqlglot.optimizer import (
    annotate_types, qualify_columns, qualify_tables,
    pushdown_predicates, eliminate_subqueries, optimize_joins,
    simplify, canonicalize
)

def custom_optimize(expression, schema=None):
    """Custom optimization pipeline with specific rules."""
    
    # Core qualification and type annotation
    if schema:
        expression = annotate_types(expression, schema=schema)
        expression = qualify_tables(expression, schema=schema)
        expression = qualify_columns(expression, schema=schema)
    
    # Predicate and projection optimization
    expression = pushdown_predicates(expression)
    
    # Subquery optimization  
    expression = eliminate_subqueries(expression)
    
    # Join optimization
    expression = optimize_joins(expression)
    
    # Expression simplification
    expression = simplify(expression)
    expression = canonicalize(expression)
    
    return expression

# Use custom optimizer
sql = "SELECT * FROM users WHERE age > 25 AND status = 'active'"
expression = sqlglot.parse_one(sql)
optimized = custom_optimize(expression)
print(optimized.sql())

Types

class Scope:
    """Query scope for column and table resolution."""
    
    expression: Expression  # Root expression of scope
    sources: Dict          # Available data sources
    
    @property
    def columns(self) -> List[Column]:
        """Columns available in this scope."""
    
    @property
    def tables(self) -> List[Table]:
        """Tables available in this scope."""
    
    def find(self, expression_type: Type) -> Optional[Expression]:
        """Find expression of specific type in scope."""
    
    def find_all(self, expression_type: Type) -> List[Expression]:
        """Find all expressions of specific type in scope."""

# Optimization rule type
OptimizationRule = Callable[[Expression], Expression]

# Available optimization rules
RULES: List[OptimizationRule] = [
    annotate_types,
    qualify_columns,
    qualify_tables, 
    pushdown_predicates,
    pushdown_projections,
    eliminate_subqueries,
    unnest_subqueries,
    merge_subqueries,
    optimize_joins,
    eliminate_joins,
    eliminate_ctes,
    simplify,
    canonicalize,
    normalize,
    normalize_identifiers,
    isolate_table_selects
]

Install with Tessl CLI

npx tessl i tessl/pypi-sqlglot

docs

core-parsing.md

dialects.md

execution.md

expression-building.md

index.md

optimization.md

schema.md

utilities.md

tile.json