A comprehensive SQL parser, transpiler, optimizer, and engine supporting 30+ dialects
npx @tessl/cli install tessl/pypi-sqlglot@27.12.0A comprehensive no-dependency SQL parser, transpiler, optimizer, and engine written in Python. SQLGlot supports translation between 30+ different SQL dialects including DuckDB, Presto/Trino, Spark/Databricks, Snowflake, and BigQuery, providing robust SQL parsing capabilities with syntax error detection, comprehensive formatting and optimization features, AST manipulation tools for programmatic SQL building, and a built-in SQL execution engine.
pip install sqlglot or pip install "sqlglot[rs]" (with Rust tokenizer for better performance)import sqlglotCommon for parsing and transpilation:
from sqlglot import parse, parse_one, transpile, tokenizeExpression building:
from sqlglot import select, column, table_, func, and_, or_, caseAdvanced features:
from sqlglot import Expression, Dialect, Parser, Generator
from sqlglot.optimizer import optimize
from sqlglot.executor import execute
from sqlglot.schema import MappingSchemaimport sqlglot
# Parse SQL into abstract syntax tree
sql = "SELECT user_id, COUNT(*) FROM users WHERE age > 21 GROUP BY user_id"
parsed = sqlglot.parse_one(sql)
# Transpile between SQL dialects
spark_sql = "SELECT DATE_ADD(CURRENT_DATE(), 7)"
postgres_sql = sqlglot.transpile(spark_sql, read="spark", write="postgres")[0]
# Result: "SELECT (CURRENT_DATE + INTERVAL '7' DAY)"
# Build SQL programmatically
query = (
sqlglot.select("user_id", sqlglot.func("COUNT", "*").as_("total"))
.from_("users")
.where(sqlglot.column("age") > 21)
.group_by("user_id")
)
print(query.sql()) # SELECT user_id, COUNT(*) AS total FROM users WHERE age > 21 GROUP BY user_id
# Format SQL with pretty printing
formatted = sqlglot.transpile(sql, pretty=True)[0]SQLGlot's architecture is built around four core components:
This design enables SQLGlot to serve as both a high-level transpilation tool and a low-level SQL manipulation library, supporting everything from simple format conversion to complex query optimization and analysis.
Essential SQL parsing and dialect translation functionality. Parse SQL strings into abstract syntax trees, transpile between 30+ SQL dialects, and tokenize SQL for lexical analysis.
def parse(sql: str, read: str = None, dialect: str = None, **opts) -> List[Optional[Expression]]: ...
def parse_one(sql: str, read: str = None, dialect: str = None, into: Optional[Type] = None, **opts) -> Expression: ...
def transpile(sql: str, read: str = None, write: str = None, identity: bool = True, error_level: Optional[ErrorLevel] = None, **opts) -> List[str]: ...
def tokenize(sql: str, read: str = None, dialect: str = None) -> List[Token]: ...Core Parsing and Transpilation
Programmatic SQL construction using builder functions and direct AST manipulation. Create complex SQL queries through code, modify existing parsed queries, and traverse expression trees.
def select(*expressions) -> Select: ...
def column(col: str) -> Column: ...
def table_(name: str) -> Table: ...
def func(name: str, *args) -> Function: ...
def and_(*conditions) -> And: ...
def or_(*conditions) -> Or: ...
def case() -> Case: ...
def cast(expression, to: str) -> Cast: ...Support for 30+ SQL dialects with dialect-specific parsing, generation, and transformation rules. Each dialect handles unique syntax, functions, and data types.
class Dialect:
def parse(self, sql: str, **opts) -> List[Optional[Expression]]: ...
def generate(self, expression: Expression, **opts) -> str: ...
def tokenize(self, sql: str) -> List[Token]: ...
# Available dialects
class BigQuery(Dialect): ...
class Snowflake(Dialect): ...
class Spark(Dialect): ...
class DuckDB(Dialect): ...
# ... and 26 moreComprehensive query optimization engine with 15+ optimization rules. Optimize SQL for performance, simplify expressions, and analyze query structure.
def optimize(sql: str | Expression, schema: Optional[Schema] = None, **opts) -> Expression: ...
# Optimization rules and components
def annotate_types(expression: Expression, **opts) -> Expression: ...
def qualify_columns(expression: Expression, schema: Schema, **opts) -> Expression: ...
def pushdown_predicates(expression: Expression, **opts) -> Expression: ...
def eliminate_subqueries(expression: Expression, **opts) -> Expression: ...Built-in SQL execution engine for running queries against in-memory data structures. Execute SQL against Python data, perform joins and aggregations, and integrate with pandas DataFrames.
def execute(sql: str | Expression, schema: Optional[Schema] = None, read: str = None, dialect: str = None, tables: Optional[Dict] = None) -> Table: ...
class Table:
def __init__(self, columns: List[str], rows: List[List]): ...
def to_dict(self) -> Dict: ...Database schema representation and validation for accurate parsing and optimization. Define table structures, column types, and relationships.
class Schema:
def add_table(self, table: str, column_mapping: Dict = None, **opts) -> None: ...
def column_names(self, table: str, **opts) -> List[str]: ...
def get_column_type(self, table: str, column: str, **opts) -> Optional[str]: ...
class MappingSchema(Schema):
def __init__(self, schema: Dict = None, **opts): ...Additional tools for SQL analysis, comparison, and manipulation including AST diffing, column lineage analysis, and time parsing utilities.
def diff(source: Expression, target: Expression, **opts) -> str: ...
# Lineage analysis
def lineage(sql: str, schema: Optional[Schema] = None, **opts) -> Dict: ...
# Error classes
class ParseError(Exception): ...
class UnsupportedError(Exception): ...
class ErrorLevel: ...class Expression:
"""Base class for all SQL expressions with AST manipulation methods."""
def sql(self, dialect: str = None, **opts) -> str: ...
def transform(self, fun: Callable, **opts) -> Expression: ...
def find(self, expression_type: Type, **opts) -> Optional[Expression]: ...
def find_all(self, expression_type: Type, **opts) -> List[Expression]: ...
def replace(self, expression: Expression, **opts) -> Expression: ...
class Token:
"""Represents a lexical token from SQL tokenization."""
def __init__(self, token_type: TokenType, text: str, line: int = 1, col: int = 1): ...
class ErrorLevel:
"""Error handling levels for parsing and validation."""
IGNORE: str
WARN: str
RAISE: str
IMMEDIATE: str