A comprehensive SQL parser, transpiler, optimizer, and engine supporting 30+ dialects
Support for 30+ SQL dialects with dialect-specific parsing, generation, and transformation rules. Each dialect handles unique syntax, functions, data types, and SQL features specific to different database systems.
Core dialect functionality that provides the foundation for all SQL dialect implementations.
class Dialect:
"""Base dialect class providing core SQL processing functionality."""
def parse(self, sql: str, **opts) -> List[Optional[Expression]]:
"""Parse SQL string using dialect-specific grammar rules."""
def generate(self, expression: Expression, **opts) -> str:
"""Generate SQL string from expression using dialect-specific syntax."""
def tokenize(self, sql: str) -> List[Token]:
"""Tokenize SQL string using dialect-specific keywords and operators."""
@classmethod
def get_or_raise(cls, dialect: str) -> Dialect:
"""Get dialect instance by name or raise error if not found."""
@classmethod
def get(cls, dialect: str) -> Optional[Dialect]:
"""Get dialect by name, returns None if not found."""Modern cloud-based data warehouse platforms with advanced analytics features.
class BigQuery(Dialect):
"""Google BigQuery dialect with standard SQL and legacy SQL support."""
class Snowflake(Dialect):
"""Snowflake dialect with cloud data warehouse features."""
class Redshift(Dialect):
"""Amazon Redshift dialect with PostgreSQL-based syntax."""
class Databricks(Dialect):
"""Databricks dialect based on Apache Spark SQL."""
class Fabric(Dialect):
"""Microsoft Fabric dialect for data analytics."""Popular open-source SQL engines for data processing and analytics.
class Spark(Dialect):
"""Apache Spark SQL dialect for distributed data processing."""
class Spark2(Dialect):
"""Apache Spark 2.x dialect with legacy compatibility."""
class DuckDB(Dialect):
"""DuckDB dialect for embedded analytical processing."""
class ClickHouse(Dialect):
"""ClickHouse dialect for real-time analytics."""
class Presto(Dialect):
"""Presto dialect for distributed query processing."""
class Trino(Dialect):
"""Trino dialect (formerly PrestoSQL) for federated queries."""
class Hive(Dialect):
"""Apache Hive dialect for Hadoop data warehouse."""
class Drill(Dialect):
"""Apache Drill dialect for schema-free SQL queries."""
class Doris(Dialect):
"""Apache Doris dialect for modern data warehouse."""
class StarRocks(Dialect):
"""StarRocks dialect for real-time analytics."""Established relational database management systems.
class MySQL(Dialect):
"""MySQL dialect with MySQL-specific syntax and functions."""
class Postgres(Dialect):
"""PostgreSQL dialect with advanced SQL features."""
class Oracle(Dialect):
"""Oracle Database dialect with enterprise features."""
class SQLite(Dialect):
"""SQLite dialect for embedded databases."""
class TSQL(Dialect):
"""Microsoft SQL Server T-SQL dialect."""
class Teradata(Dialect):
"""Teradata dialect for enterprise data warehousing."""Newer and specialized SQL platforms for specific use cases.
class Athena(Dialect):
"""Amazon Athena dialect for serverless queries."""
class Dremio(Dialect):
"""Dremio dialect for data lake analytics."""
class Exasol(Dialect):
"""Exasol dialect for high-performance analytics."""
class Materialize(Dialect):
"""Materialize dialect for streaming SQL."""
class RisingWave(Dialect):
"""RisingWave dialect for stream processing."""
class SingleStore(Dialect):
"""SingleStore dialect for distributed SQL."""
class Tableau(Dialect):
"""Tableau dialect for business intelligence."""
class Druid(Dialect):
"""Apache Druid dialect for real-time analytics."""
class Dune(Dialect):
"""Dune Analytics dialect for blockchain data."""
class PRQL(Dialect):
"""PRQL (Pipelined Relational Query Language) dialect."""import sqlglot
from sqlglot.dialects import BigQuery, Snowflake, Spark
# Parse using specific dialect
bigquery = BigQuery()
expressions = bigquery.parse("SELECT EXTRACT(YEAR FROM date_col) FROM table")
# Generate SQL for specific dialect
snowflake = Snowflake()
sql = snowflake.generate(expressions[0])
# Use dialect names with main functions
spark_sql = "SELECT explode(split('a,b,c', ',')) as item"
postgres_sql = sqlglot.transpile(spark_sql, read="spark", write="postgres")[0]import sqlglot
# BigQuery array and struct syntax
bq_query = """
SELECT
arr[OFFSET(0)] as first_element,
struct_col.field_name,
PARSE_DATE('%Y-%m-%d', date_string)
FROM table_name
"""
standard_sql = sqlglot.transpile(bq_query, read="bigquery", write="postgres")[0]
# Snowflake variant data type
sf_query = """
SELECT
variant_col:field::string as extracted_field,
FLATTEN(array_col) as flattened
FROM table_name
"""
# Spark SQL window functions with specific syntax
spark_query = """
SELECT
col1,
LAG(col1, 1) OVER (PARTITION BY col2 ORDER BY col3
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) as prev_val
FROM table
"""from sqlglot import exp
from sqlglot.dialects.dialect import Dialect
from sqlglot.generator import Generator
from sqlglot.tokens import Tokenizer, TokenType
class CustomDialect(Dialect):
class Tokenizer(Tokenizer):
QUOTES = ["'", '"'] # String delimiters
IDENTIFIERS = ["`"] # Identifier delimiters
KEYWORDS = {
**Tokenizer.KEYWORDS,
"INT64": TokenType.BIGINT,
"FLOAT64": TokenType.DOUBLE,
}
class Generator(Generator):
# Custom SQL generation rules
TRANSFORMS = {
exp.Array: lambda self, e: f"[{self.expressions(e)}]",
}
# Data type mappings
TYPE_MAPPING = {
exp.DataType.Type.TINYINT: "INT64",
exp.DataType.Type.SMALLINT: "INT64",
exp.DataType.Type.INT: "INT64",
exp.DataType.Type.BIGINT: "INT64",
exp.DataType.Type.FLOAT: "FLOAT64",
exp.DataType.Type.DOUBLE: "FLOAT64",
}
# Use custom dialect
custom = CustomDialect()
parsed = custom.parse("SELECT col FROM table WHERE int_col > 5")
generated = custom.generate(parsed[0])import sqlglot
from sqlglot.dialects import Dialect
# Get available dialects
available_dialects = [
"bigquery", "snowflake", "spark", "postgres", "mysql",
"oracle", "duckdb", "clickhouse", "presto", "trino"
]
# Convert between multiple dialects
source_sql = "SELECT EXTRACT(YEAR FROM date_col) FROM table"
for target_dialect in ["postgres", "mysql", "snowflake"]:
try:
converted = sqlglot.transpile(
source_sql,
read="bigquery",
write=target_dialect
)[0]
print(f"{target_dialect}: {converted}")
except Exception as e:
print(f"Error converting to {target_dialect}: {e}")import sqlglot
from sqlglot.optimizer import optimize
# Optimize for specific target dialect
sql = """
SELECT user_id, COUNT(*) as cnt
FROM users
WHERE created_date >= '2023-01-01'
GROUP BY user_id
HAVING COUNT(*) > 5
"""
# Optimize for BigQuery
optimized_bq = optimize(sql, dialect="bigquery")
bq_sql = optimized_bq.sql(dialect="bigquery", pretty=True)
# Optimize for Snowflake
optimized_sf = optimize(sql, dialect="snowflake")
sf_sql = optimized_sf.sql(dialect="snowflake", pretty=True)
print("BigQuery:", bq_sql)
print("Snowflake:", sf_sql)class Dialect:
"""Base dialect class with extensible components."""
class Tokenizer:
"""Dialect-specific tokenization rules."""
QUOTES: List[str] # String quote characters
IDENTIFIERS: List[str] # Identifier quote characters
KEYWORDS: Dict[str, TokenType] # Keyword mappings
class Parser:
"""Dialect-specific parsing rules."""
FUNCTIONS: Dict[str, Callable] # Function parsers
class Generator:
"""Dialect-specific SQL generation rules."""
TRANSFORMS: Dict[Type, Callable] # Expression transformers
TYPE_MAPPING: Dict[Type, str] # Data type mappings
# Core dialect methods
def parse(self, sql: str, **opts) -> List[Optional[Expression]]: ...
def generate(self, expression: Expression, **opts) -> str: ...
def tokenize(self, sql: str) -> List[Token]: ...
# Type alias for dialect specification
DialectType = Union[str, Dialect, Type[Dialect]]Install with Tessl CLI
npx tessl i tessl/pypi-sqlglot