A comprehensive SQL parser, transpiler, optimizer, and engine supporting 30+ dialects
Additional tools for SQL analysis, comparison, and manipulation including AST diffing, column lineage analysis, time parsing utilities, and error handling for comprehensive SQL processing workflows.
Compare SQL expressions and identify differences between query structures.
def diff(
source: Expression,
target: Expression,
**opts
) -> str:
"""
Compare two SQL expressions and generate diff output.
Args:
source (Expression): Source expression for comparison
target (Expression): Target expression for comparison
**opts: Diff formatting options
Returns:
str: Human-readable diff showing changes between expressions
"""Trace column dependencies and data flow through complex SQL queries.
def lineage(
sql: str,
schema: Optional[Schema] = None,
**opts
) -> Dict:
"""
Analyze column lineage and dependencies in SQL query.
Args:
sql (str): SQL query to analyze
schema (Schema): Schema for column resolution
**opts: Lineage analysis options
Returns:
Dict: Column lineage mapping showing data flow
"""Comprehensive error handling for SQL processing operations.
class SqlglotError(Exception):
"""Base exception class for all SQLGlot errors."""
class ParseError(SqlglotError):
"""Exception raised when SQL parsing fails."""
def __init__(
self,
message: str,
errors: Optional[List[Dict[str, Any]]] = None
):
"""
Initialize parse error with message and detailed error information.
Args:
message (str): Error message
errors (List[Dict]): List of detailed error dictionaries
"""
@classmethod
def new(
cls,
message: str,
description: Optional[str] = None,
line: Optional[int] = None,
col: Optional[int] = None,
start_context: Optional[str] = None,
highlight: Optional[str] = None,
end_context: Optional[str] = None,
into_expression: Optional[str] = None
) -> ParseError:
"""Create new ParseError with detailed location information."""
class TokenError(SqlglotError):
"""Exception raised when tokenization fails."""
class UnsupportedError(SqlglotError):
"""Exception raised when encountering unsupported SQL features."""
class OptimizeError(SqlglotError):
"""Exception raised during query optimization."""
class SchemaError(SqlglotError):
"""Exception raised for schema-related issues."""
class ExecuteError(SqlglotError):
"""Exception raised during SQL execution."""
class ErrorLevel:
"""Error handling levels for parser and validator."""
IGNORE: str # Ignore all errors
WARN: str # Log errors but continue processing
RAISE: str # Collect all errors and raise single exception
IMMEDIATE: str # Raise exception immediately on first errorGeneral utility functions and classes for SQL processing.
class AutoName:
"""Base class for auto-naming enumerations."""
def dict_depth(mapping: Dict) -> int:
"""
Calculate maximum nesting depth of dictionary.
Args:
mapping (Dict): Dictionary to analyze
Returns:
int: Maximum nesting depth
"""
def first(iterable) -> Any:
"""
Return first element from iterable.
Args:
iterable: Iterable to get first element from
Returns:
Any: First element or None if empty
"""
def concat_messages(errors: List[Any], maximum: int) -> str:
"""
Concatenate error messages with truncation.
Args:
errors (List): List of error objects
maximum (int): Maximum number of errors to include
Returns:
str: Concatenated error message string
"""
def merge_errors(errors: List[ParseError]) -> List[Dict[str, Any]]:
"""
Merge multiple ParseError instances into unified error list.
Args:
errors (List[ParseError]): Parse errors to merge
Returns:
List[Dict]: Merged error information dictionaries
"""Specialized utilities for handling SQL time and date operations.
# Time parsing and formatting utilities for SQL time literals
# Located in sqlglot.time module - provides functions for:
# - Parsing time strings in various formats
# - Converting between time representations
# - Formatting time values for different SQL dialectsVarious SQL transformation and manipulation utilities.
# SQL transformation utilities for advanced query manipulation
# Located in sqlglot.transforms module - provides functions for:
# - Custom expression transformations
# - Dialect-specific syntax conversions
# - Advanced AST manipulation patternsJSON path expression support for SQL queries with JSON data.
# JSON path expression utilities for SQL JSON operations
# Located in sqlglot.jsonpath module - provides:
# - JSON path parsing and evaluation
# - Integration with SQL JSON functions
# - Cross-dialect JSON path supportTrie and other data structure utilities for efficient processing.
def new_trie() -> Dict:
"""
Create new trie data structure.
Returns:
Dict: Empty trie structure
"""
def in_trie(trie: Dict, key: str) -> TrieResult:
"""
Check if key exists in trie structure.
Args:
trie (Dict): Trie to search
key (str): Key to search for
Returns:
TrieResult: Result indicating presence and type of match
"""
class TrieResult:
"""Result of trie lookup operation."""
def __init__(self, exists: bool, prefix: bool = False):
"""
Initialize trie result.
Args:
exists (bool): Whether key exists in trie
prefix (bool): Whether key is a prefix of existing keys
"""Serialization and deserialization utilities for expressions.
# Serialization utilities for SQLGlot expressions
# Located in sqlglot.serde module - provides:
# - Expression serialization to JSON/dict format
# - Deserialization from stored representations
# - Cross-process expression transfer supportimport sqlglot
from sqlglot.diff import diff
# Compare two similar queries
query1 = sqlglot.parse_one("SELECT name, age FROM users WHERE age > 25")
query2 = sqlglot.parse_one("SELECT name, age, email FROM users WHERE age >= 25")
# Generate diff
diff_output = diff(query1, query2)
print(diff_output)
# Compare more complex queries
original = sqlglot.parse_one("""
SELECT u.name, COUNT(o.id) as order_count
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE u.active = 1
GROUP BY u.name
""")
modified = sqlglot.parse_one("""
SELECT u.name, u.email, COUNT(o.id) as total_orders
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE u.active = 1 AND u.verified = 1
GROUP BY u.name, u.email
""")
print(diff(original, modified))import sqlglot
from sqlglot.lineage import lineage
from sqlglot.schema import MappingSchema
# Define schema for lineage analysis
schema = MappingSchema({
"raw_data": {
"user_id": "INT",
"first_name": "VARCHAR",
"last_name": "VARCHAR",
"email": "VARCHAR",
"signup_date": "DATE"
},
"orders": {
"id": "INT",
"user_id": "INT",
"amount": "DECIMAL",
"order_date": "DATE"
}
})
# Analyze complex query lineage
sql = """
SELECT
CONCAT(u.first_name, ' ', u.last_name) as full_name,
u.email,
COUNT(o.id) as total_orders,
SUM(o.amount) as total_spent,
AVG(o.amount) as avg_order_value,
MAX(o.order_date) as last_order_date
FROM raw_data u
LEFT JOIN orders o ON u.user_id = o.user_id
WHERE u.signup_date >= '2023-01-01'
GROUP BY u.user_id, u.first_name, u.last_name, u.email
"""
# Get lineage information
lineage_info = lineage(sql, schema=schema)
print("Column lineage:")
for output_col, source_cols in lineage_info.items():
print(f"{output_col} <- {source_cols}")import sqlglot
from sqlglot import ParseError, UnsupportedError, ErrorLevel
# Handle parsing errors gracefully
def safe_parse(sql_queries):
results = []
errors = []
for sql in sql_queries:
try:
parsed = sqlglot.parse_one(sql)
results.append(parsed)
except ParseError as e:
errors.append(f"Parse error in '{sql}': {e}")
except UnsupportedError as e:
errors.append(f"Unsupported feature in '{sql}': {e}")
return results, errors
# Test with mixed valid/invalid SQL
test_queries = [
"SELECT * FROM users",
"SELECT FROM", # Invalid
"SELECT name FROM users WHERE age > 25",
"INVALID SQL SYNTAX", # Invalid
]
parsed_queries, parse_errors = safe_parse(test_queries)
print(f"Successfully parsed: {len(parsed_queries)} queries")
print(f"Errors: {len(parse_errors)}")
for error in parse_errors:
print(f" {error}")
# Use different error levels
try:
# Parse with warning level - continues on errors
expressions = sqlglot.parse(
"SELECT 1; INVALID; SELECT 2;",
error_level=ErrorLevel.WARN
)
print(f"Parsed {len([e for e in expressions if e])} valid expressions")
except Exception as e:
print(f"Error: {e}")import sqlglot
from sqlglot.helper import dict_depth, first
# Calculate nested dictionary depth
schema_dict = {
"database1": {
"schema1": {
"table1": {"col1": "INT", "col2": "VARCHAR"}
}
},
"database2": {
"schema1": {
"table1": {"col1": "INT"},
"table2": {"col1": "INT", "col2": "VARCHAR", "col3": "DATE"}
}
}
}
depth = dict_depth(schema_dict)
print(f"Schema depth: {depth}")
# Get first valid expression from parse results
sql_statements = "INVALID; SELECT 1; SELECT 2;"
expressions = sqlglot.parse(sql_statements, error_level=ErrorLevel.WARN)
first_valid = first(expr for expr in expressions if expr is not None)
if first_valid:
print(f"First valid expression: {first_valid.sql()}")from sqlglot.trie import new_trie, in_trie
# Create trie for keyword matching
keywords_trie = new_trie()
# Add keywords (this would typically be done internally)
keywords = ["SELECT", "FROM", "WHERE", "GROUP", "ORDER", "HAVING"]
# Note: Actual trie population would use internal SQLGlot methods
# Check for keyword presence
for word in ["SELECT", "SELEC", "SELECTED", "FROM", "TABLE"]:
result = in_trie(keywords_trie, word.upper())
if result.exists:
print(f"'{word}' is a complete keyword")
elif result.prefix:
print(f"'{word}' is a keyword prefix")
else:
print(f"'{word}' is not a keyword or prefix")import sqlglot
from sqlglot import ParseError
# Parse with detailed error information
invalid_sql = """
SELECT name, age
FROM users
WHERE age > 25 AND
status = 'active' AND
created_date >= '2023-01-01'
missing_clause
"""
try:
parsed = sqlglot.parse_one(invalid_sql)
except ParseError as e:
print(f"Parse error: {e}")
# Access detailed error information
for error_detail in e.errors:
print(f" Line: {error_detail.get('line')}")
print(f" Column: {error_detail.get('col')}")
print(f" Description: {error_detail.get('description')}")
print(f" Context: {error_detail.get('start_context')}")
print(f" Highlight: {error_detail.get('highlight')}")class SqlglotError(Exception):
"""Base exception for all SQLGlot errors."""
class ParseError(SqlglotError):
"""SQL parsing error with detailed information."""
errors: List[Dict[str, Any]] # Detailed error information
def __init__(self, message: str, errors: Optional[List[Dict]] = None): ...
class TokenError(SqlglotError):
"""Tokenization error."""
class UnsupportedError(SqlglotError):
"""Unsupported feature error."""
class OptimizeError(SqlglotError):
"""Query optimization error."""
class SchemaError(SqlglotError):
"""Schema-related error."""
class ExecuteError(SqlglotError):
"""SQL execution error."""
class ErrorLevel:
"""Error handling level enumeration."""
IGNORE: str # Ignore all errors
WARN: str # Log errors but continue
RAISE: str # Collect errors and raise exception
IMMEDIATE: str # Raise immediately on first error
class TrieResult:
"""Result of trie lookup operation."""
exists: bool # Whether key exists completely in trie
prefix: bool # Whether key is prefix of existing keys
def __init__(self, exists: bool, prefix: bool = False): ...
class AutoName:
"""Base class for auto-naming enumerations."""Install with Tessl CLI
npx tessl i tessl/pypi-sqlglot