A comprehensive SQL parser, transpiler, optimizer, and engine supporting 30+ dialects
Essential SQL parsing and dialect translation functionality that forms the foundation of SQLGlot. These functions handle tokenization, parsing SQL into abstract syntax trees, and transpiling between different SQL dialects.
Parse SQL strings into abstract syntax trees (ASTs) for analysis and manipulation. Supports parsing multiple statements and handling various SQL dialects.
def parse(sql: str, read: str = None, dialect: str = None, **opts) -> List[Optional[Expression]]:
"""
Parses SQL string into collection of syntax trees, one per statement.
Args:
sql (str): SQL code string to parse
read (str): SQL dialect for parsing (e.g., "spark", "hive", "presto", "mysql")
dialect (str): SQL dialect (alias for read)
**opts: Additional parser options
Returns:
List[Optional[Expression]]: Collection of parsed expression trees
"""Parse a single SQL statement into an expression tree. Most commonly used parsing function for single queries.
def parse_one(sql: str, read: str = None, dialect: str = None, into: Optional[Type] = None, **opts) -> Expression:
"""
Parses SQL string and returns syntax tree for the first statement.
Args:
sql (str): SQL code string to parse
read (str): SQL dialect for parsing
dialect (str): SQL dialect (alias for read)
into (Type): Specific SQLGlot Expression type to parse into
**opts: Additional parser options
Returns:
Expression: Syntax tree for the first parsed statement
Raises:
ParseError: If no valid expression could be parsed
"""Convert SQL between different dialects while preserving semantic meaning. Handles dialect-specific syntax, functions, and data types.
def transpile(
sql: str,
read: str = None,
write: str = None,
identity: bool = True,
error_level: Optional[ErrorLevel] = None,
**opts
) -> List[str]:
"""
Transpiles SQL from source dialect to target dialect.
Args:
sql (str): SQL code string to transpile
read (str): Source dialect (e.g., "spark", "hive", "presto", "mysql")
write (str): Target dialect (e.g., "postgres", "bigquery", "snowflake")
identity (bool): Use source dialect as target if write not specified
error_level (ErrorLevel): Desired error handling level
**opts: Additional generator options for output formatting
Returns:
List[str]: List of transpiled SQL statements
"""Break SQL strings into lexical tokens for low-level analysis and custom processing.
def tokenize(sql: str, read: str = None, dialect: str = None) -> List[Token]:
"""
Tokenizes SQL string into list of lexical tokens.
Args:
sql (str): SQL code string to tokenize
read (str): SQL dialect for tokenization
dialect (str): SQL dialect (alias for read)
Returns:
List[Token]: List of tokens representing the SQL input
"""Additional parsing utilities for expression handling and analysis.
def maybe_parse(sql: str | Expression, **opts) -> Expression:
"""
Parses SQL string or returns Expression if already parsed.
Args:
sql: SQL string or Expression object
**opts: Parse options if parsing needed
Returns:
Expression: Parsed or existing expression
"""
def diff(source: Expression, target: Expression, **opts) -> str:
"""
Compares two SQL expressions and returns a diff string.
Args:
source (Expression): Source expression to compare
target (Expression): Target expression to compare against
**opts: Additional diff options
Returns:
str: String representation of differences between expressions
"""import sqlglot
# Parse a simple SELECT statement
sql = "SELECT name, age FROM users WHERE age > 25"
expression = sqlglot.parse_one(sql)
# Parse with specific dialect
spark_sql = "SELECT explode(array_col) FROM table"
expression = sqlglot.parse_one(spark_sql, dialect="spark")
# Parse multiple statements
multi_sql = "SELECT 1; SELECT 2; SELECT 3;"
expressions = sqlglot.parse(multi_sql)import sqlglot
# Convert Spark SQL to PostgreSQL
spark_query = "SELECT DATE_ADD(current_date(), 7) as future_date"
postgres_query = sqlglot.transpile(spark_query, read="spark", write="postgres")[0]
# Result: "SELECT (CURRENT_DATE + INTERVAL '7' DAY) AS future_date"
# Convert BigQuery to Snowflake
bq_query = "SELECT EXTRACT(YEAR FROM date_col) FROM table"
sf_query = sqlglot.transpile(bq_query, read="bigquery", write="snowflake")[0]
# Format SQL with pretty printing
formatted = sqlglot.transpile(
"SELECT a,b,c FROM table WHERE x=1 AND y=2",
pretty=True
)[0]import sqlglot
sql = "SELECT * FROM users"
tokens = sqlglot.tokenize(sql)
for token in tokens:
print(f"{token.token_type}: {token.text}")
# Output:
# TokenType.SELECT: SELECT
# TokenType.STAR: *
# TokenType.FROM: FROM
# TokenType.IDENTIFIER: usersimport sqlglot
from sqlglot import ParseError, ErrorLevel
# Handle parsing errors
try:
expression = sqlglot.parse_one("SELECT FROM") # Invalid SQL
except ParseError as e:
print(f"Parse error: {e}")
# Control error level
expressions = sqlglot.parse(
"SELECT 1; INVALID SQL; SELECT 2",
error_level=ErrorLevel.WARN # Log errors but continue
)class Token:
"""Represents a lexical token from SQL tokenization."""
token_type: TokenType
text: str
line: int
col: int
def __init__(self, token_type: TokenType, text: str, line: int = 1, col: int = 1): ...
class TokenType:
"""Enumeration of all possible token types in SQL."""
# Keywords
SELECT: str
FROM: str
WHERE: str
# Operators
PLUS: str
MINUS: str
STAR: str
# Literals
STRING: str
NUMBER: str
# ... and many more
class ErrorLevel:
"""Error handling levels for parsing operations."""
IGNORE: str # Ignore all errors
WARN: str # Log errors but continue
RAISE: str # Collect errors and raise single exception
IMMEDIATE: str # Raise exception on first errorInstall with Tessl CLI
npx tessl i tessl/pypi-sqlglot