0
# SQL Dialects
1
2
Support for 30+ SQL dialects with dialect-specific parsing, generation, and transformation rules. Each dialect handles unique syntax, functions, data types, and SQL features specific to different database systems.
3
4
## Capabilities
5
6
### Base Dialect System
7
8
Core dialect functionality that provides the foundation for all SQL dialect implementations.
9
10
```python { .api }
11
class Dialect:
12
"""Base dialect class providing core SQL processing functionality."""
13
14
def parse(self, sql: str, **opts) -> List[Optional[Expression]]:
15
"""Parse SQL string using dialect-specific grammar rules."""
16
17
def generate(self, expression: Expression, **opts) -> str:
18
"""Generate SQL string from expression using dialect-specific syntax."""
19
20
def tokenize(self, sql: str) -> List[Token]:
21
"""Tokenize SQL string using dialect-specific keywords and operators."""
22
23
@classmethod
24
def get_or_raise(cls, dialect: str) -> Dialect:
25
"""Get dialect instance by name or raise error if not found."""
26
27
@classmethod
28
def get(cls, dialect: str) -> Optional[Dialect]:
29
"""Get dialect by name, returns None if not found."""
30
```
31
32
### Cloud Data Warehouse Dialects
33
34
Modern cloud-based data warehouse platforms with advanced analytics features.
35
36
```python { .api }
37
class BigQuery(Dialect):
38
"""Google BigQuery dialect with standard SQL and legacy SQL support."""
39
40
class Snowflake(Dialect):
41
"""Snowflake dialect with cloud data warehouse features."""
42
43
class Redshift(Dialect):
44
"""Amazon Redshift dialect with PostgreSQL-based syntax."""
45
46
class Databricks(Dialect):
47
"""Databricks dialect based on Apache Spark SQL."""
48
49
class Fabric(Dialect):
50
"""Microsoft Fabric dialect for data analytics."""
51
```
52
53
### Open Source Analytics Engines
54
55
Popular open-source SQL engines for data processing and analytics.
56
57
```python { .api }
58
class Spark(Dialect):
59
"""Apache Spark SQL dialect for distributed data processing."""
60
61
class Spark2(Dialect):
62
"""Apache Spark 2.x dialect with legacy compatibility."""
63
64
class DuckDB(Dialect):
65
"""DuckDB dialect for embedded analytical processing."""
66
67
class ClickHouse(Dialect):
68
"""ClickHouse dialect for real-time analytics."""
69
70
class Presto(Dialect):
71
"""Presto dialect for distributed query processing."""
72
73
class Trino(Dialect):
74
"""Trino dialect (formerly PrestoSQL) for federated queries."""
75
76
class Hive(Dialect):
77
"""Apache Hive dialect for Hadoop data warehouse."""
78
79
class Drill(Dialect):
80
"""Apache Drill dialect for schema-free SQL queries."""
81
82
class Doris(Dialect):
83
"""Apache Doris dialect for modern data warehouse."""
84
85
class StarRocks(Dialect):
86
"""StarRocks dialect for real-time analytics."""
87
```
88
89
### Traditional Database Systems
90
91
Established relational database management systems.
92
93
```python { .api }
94
class MySQL(Dialect):
95
"""MySQL dialect with MySQL-specific syntax and functions."""
96
97
class Postgres(Dialect):
98
"""PostgreSQL dialect with advanced SQL features."""
99
100
class Oracle(Dialect):
101
"""Oracle Database dialect with enterprise features."""
102
103
class SQLite(Dialect):
104
"""SQLite dialect for embedded databases."""
105
106
class TSQL(Dialect):
107
"""Microsoft SQL Server T-SQL dialect."""
108
109
class Teradata(Dialect):
110
"""Teradata dialect for enterprise data warehousing."""
111
```
112
113
### Specialized and Emerging Platforms
114
115
Newer and specialized SQL platforms for specific use cases.
116
117
```python { .api }
118
class Athena(Dialect):
119
"""Amazon Athena dialect for serverless queries."""
120
121
class Dremio(Dialect):
122
"""Dremio dialect for data lake analytics."""
123
124
class Exasol(Dialect):
125
"""Exasol dialect for high-performance analytics."""
126
127
class Materialize(Dialect):
128
"""Materialize dialect for streaming SQL."""
129
130
class RisingWave(Dialect):
131
"""RisingWave dialect for stream processing."""
132
133
class SingleStore(Dialect):
134
"""SingleStore dialect for distributed SQL."""
135
136
class Tableau(Dialect):
137
"""Tableau dialect for business intelligence."""
138
139
class Druid(Dialect):
140
"""Apache Druid dialect for real-time analytics."""
141
142
class Dune(Dialect):
143
"""Dune Analytics dialect for blockchain data."""
144
145
class PRQL(Dialect):
146
"""PRQL (Pipelined Relational Query Language) dialect."""
147
```
148
149
## Usage Examples
150
151
### Working with Specific Dialects
152
153
```python
154
import sqlglot
155
from sqlglot.dialects import BigQuery, Snowflake, Spark
156
157
# Parse using specific dialect
158
bigquery = BigQuery()
159
expressions = bigquery.parse("SELECT EXTRACT(YEAR FROM date_col) FROM table")
160
161
# Generate SQL for specific dialect
162
snowflake = Snowflake()
163
sql = snowflake.generate(expressions[0])
164
165
# Use dialect names with main functions
166
spark_sql = "SELECT explode(split('a,b,c', ',')) as item"
167
postgres_sql = sqlglot.transpile(spark_sql, read="spark", write="postgres")[0]
168
```
169
170
### Dialect-Specific Features
171
172
```python
173
import sqlglot
174
175
# BigQuery array and struct syntax
176
bq_query = """
177
SELECT
178
arr[OFFSET(0)] as first_element,
179
struct_col.field_name,
180
PARSE_DATE('%Y-%m-%d', date_string)
181
FROM table_name
182
"""
183
standard_sql = sqlglot.transpile(bq_query, read="bigquery", write="postgres")[0]
184
185
# Snowflake variant data type
186
sf_query = """
187
SELECT
188
variant_col:field::string as extracted_field,
189
FLATTEN(array_col) as flattened
190
FROM table_name
191
"""
192
193
# Spark SQL window functions with specific syntax
194
spark_query = """
195
SELECT
196
col1,
197
LAG(col1, 1) OVER (PARTITION BY col2 ORDER BY col3
198
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) as prev_val
199
FROM table
200
"""
201
```
202
203
### Custom Dialect Creation
204
205
```python
206
from sqlglot import exp
207
from sqlglot.dialects.dialect import Dialect
208
from sqlglot.generator import Generator
209
from sqlglot.tokens import Tokenizer, TokenType
210
211
class CustomDialect(Dialect):
212
class Tokenizer(Tokenizer):
213
QUOTES = ["'", '"'] # String delimiters
214
IDENTIFIERS = ["`"] # Identifier delimiters
215
216
KEYWORDS = {
217
**Tokenizer.KEYWORDS,
218
"INT64": TokenType.BIGINT,
219
"FLOAT64": TokenType.DOUBLE,
220
}
221
222
class Generator(Generator):
223
# Custom SQL generation rules
224
TRANSFORMS = {
225
exp.Array: lambda self, e: f"[{self.expressions(e)}]",
226
}
227
228
# Data type mappings
229
TYPE_MAPPING = {
230
exp.DataType.Type.TINYINT: "INT64",
231
exp.DataType.Type.SMALLINT: "INT64",
232
exp.DataType.Type.INT: "INT64",
233
exp.DataType.Type.BIGINT: "INT64",
234
exp.DataType.Type.FLOAT: "FLOAT64",
235
exp.DataType.Type.DOUBLE: "FLOAT64",
236
}
237
238
# Use custom dialect
239
custom = CustomDialect()
240
parsed = custom.parse("SELECT col FROM table WHERE int_col > 5")
241
generated = custom.generate(parsed[0])
242
```
243
244
### Dialect Detection and Conversion
245
246
```python
247
import sqlglot
248
from sqlglot.dialects import Dialect
249
250
# Get available dialects
251
available_dialects = [
252
"bigquery", "snowflake", "spark", "postgres", "mysql",
253
"oracle", "duckdb", "clickhouse", "presto", "trino"
254
]
255
256
# Convert between multiple dialects
257
source_sql = "SELECT EXTRACT(YEAR FROM date_col) FROM table"
258
259
for target_dialect in ["postgres", "mysql", "snowflake"]:
260
try:
261
converted = sqlglot.transpile(
262
source_sql,
263
read="bigquery",
264
write=target_dialect
265
)[0]
266
print(f"{target_dialect}: {converted}")
267
except Exception as e:
268
print(f"Error converting to {target_dialect}: {e}")
269
```
270
271
### Dialect-Specific Optimization
272
273
```python
274
import sqlglot
275
from sqlglot.optimizer import optimize
276
277
# Optimize for specific target dialect
278
sql = """
279
SELECT user_id, COUNT(*) as cnt
280
FROM users
281
WHERE created_date >= '2023-01-01'
282
GROUP BY user_id
283
HAVING COUNT(*) > 5
284
"""
285
286
# Optimize for BigQuery
287
optimized_bq = optimize(sql, dialect="bigquery")
288
bq_sql = optimized_bq.sql(dialect="bigquery", pretty=True)
289
290
# Optimize for Snowflake
291
optimized_sf = optimize(sql, dialect="snowflake")
292
sf_sql = optimized_sf.sql(dialect="snowflake", pretty=True)
293
294
print("BigQuery:", bq_sql)
295
print("Snowflake:", sf_sql)
296
```
297
298
## Types
299
300
```python { .api }
301
class Dialect:
302
"""Base dialect class with extensible components."""
303
304
class Tokenizer:
305
"""Dialect-specific tokenization rules."""
306
QUOTES: List[str] # String quote characters
307
IDENTIFIERS: List[str] # Identifier quote characters
308
KEYWORDS: Dict[str, TokenType] # Keyword mappings
309
310
class Parser:
311
"""Dialect-specific parsing rules."""
312
FUNCTIONS: Dict[str, Callable] # Function parsers
313
314
class Generator:
315
"""Dialect-specific SQL generation rules."""
316
TRANSFORMS: Dict[Type, Callable] # Expression transformers
317
TYPE_MAPPING: Dict[Type, str] # Data type mappings
318
319
# Core dialect methods
320
def parse(self, sql: str, **opts) -> List[Optional[Expression]]: ...
321
def generate(self, expression: Expression, **opts) -> str: ...
322
def tokenize(self, sql: str) -> List[Token]: ...
323
324
# Type alias for dialect specification
325
DialectType = Union[str, Dialect, Type[Dialect]]
326
```