or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

core-parsing.mddialects.mdexecution.mdexpression-building.mdindex.mdoptimization.mdschema.mdutilities.md

dialects.mddocs/

0

# SQL Dialects

1

2

Support for 30+ SQL dialects with dialect-specific parsing, generation, and transformation rules. Each dialect handles unique syntax, functions, data types, and SQL features specific to different database systems.

3

4

## Capabilities

5

6

### Base Dialect System

7

8

Core dialect functionality that provides the foundation for all SQL dialect implementations.

9

10

```python { .api }

11

class Dialect:

12

"""Base dialect class providing core SQL processing functionality."""

13

14

def parse(self, sql: str, **opts) -> List[Optional[Expression]]:

15

"""Parse SQL string using dialect-specific grammar rules."""

16

17

def generate(self, expression: Expression, **opts) -> str:

18

"""Generate SQL string from expression using dialect-specific syntax."""

19

20

def tokenize(self, sql: str) -> List[Token]:

21

"""Tokenize SQL string using dialect-specific keywords and operators."""

22

23

@classmethod

24

def get_or_raise(cls, dialect: str) -> Dialect:

25

"""Get dialect instance by name or raise error if not found."""

26

27

@classmethod

28

def get(cls, dialect: str) -> Optional[Dialect]:

29

"""Get dialect by name, returns None if not found."""

30

```

31

32

### Cloud Data Warehouse Dialects

33

34

Modern cloud-based data warehouse platforms with advanced analytics features.

35

36

```python { .api }

37

class BigQuery(Dialect):

38

"""Google BigQuery dialect with standard SQL and legacy SQL support."""

39

40

class Snowflake(Dialect):

41

"""Snowflake dialect with cloud data warehouse features."""

42

43

class Redshift(Dialect):

44

"""Amazon Redshift dialect with PostgreSQL-based syntax."""

45

46

class Databricks(Dialect):

47

"""Databricks dialect based on Apache Spark SQL."""

48

49

class Fabric(Dialect):

50

"""Microsoft Fabric dialect for data analytics."""

51

```

52

53

### Open Source Analytics Engines

54

55

Popular open-source SQL engines for data processing and analytics.

56

57

```python { .api }

58

class Spark(Dialect):

59

"""Apache Spark SQL dialect for distributed data processing."""

60

61

class Spark2(Dialect):

62

"""Apache Spark 2.x dialect with legacy compatibility."""

63

64

class DuckDB(Dialect):

65

"""DuckDB dialect for embedded analytical processing."""

66

67

class ClickHouse(Dialect):

68

"""ClickHouse dialect for real-time analytics."""

69

70

class Presto(Dialect):

71

"""Presto dialect for distributed query processing."""

72

73

class Trino(Dialect):

74

"""Trino dialect (formerly PrestoSQL) for federated queries."""

75

76

class Hive(Dialect):

77

"""Apache Hive dialect for Hadoop data warehouse."""

78

79

class Drill(Dialect):

80

"""Apache Drill dialect for schema-free SQL queries."""

81

82

class Doris(Dialect):

83

"""Apache Doris dialect for modern data warehouse."""

84

85

class StarRocks(Dialect):

86

"""StarRocks dialect for real-time analytics."""

87

```

88

89

### Traditional Database Systems

90

91

Established relational database management systems.

92

93

```python { .api }

94

class MySQL(Dialect):

95

"""MySQL dialect with MySQL-specific syntax and functions."""

96

97

class Postgres(Dialect):

98

"""PostgreSQL dialect with advanced SQL features."""

99

100

class Oracle(Dialect):

101

"""Oracle Database dialect with enterprise features."""

102

103

class SQLite(Dialect):

104

"""SQLite dialect for embedded databases."""

105

106

class TSQL(Dialect):

107

"""Microsoft SQL Server T-SQL dialect."""

108

109

class Teradata(Dialect):

110

"""Teradata dialect for enterprise data warehousing."""

111

```

112

113

### Specialized and Emerging Platforms

114

115

Newer and specialized SQL platforms for specific use cases.

116

117

```python { .api }

118

class Athena(Dialect):

119

"""Amazon Athena dialect for serverless queries."""

120

121

class Dremio(Dialect):

122

"""Dremio dialect for data lake analytics."""

123

124

class Exasol(Dialect):

125

"""Exasol dialect for high-performance analytics."""

126

127

class Materialize(Dialect):

128

"""Materialize dialect for streaming SQL."""

129

130

class RisingWave(Dialect):

131

"""RisingWave dialect for stream processing."""

132

133

class SingleStore(Dialect):

134

"""SingleStore dialect for distributed SQL."""

135

136

class Tableau(Dialect):

137

"""Tableau dialect for business intelligence."""

138

139

class Druid(Dialect):

140

"""Apache Druid dialect for real-time analytics."""

141

142

class Dune(Dialect):

143

"""Dune Analytics dialect for blockchain data."""

144

145

class PRQL(Dialect):

146

"""PRQL (Pipelined Relational Query Language) dialect."""

147

```

148

149

## Usage Examples

150

151

### Working with Specific Dialects

152

153

```python

154

import sqlglot

155

from sqlglot.dialects import BigQuery, Snowflake, Spark

156

157

# Parse using specific dialect

158

bigquery = BigQuery()

159

expressions = bigquery.parse("SELECT EXTRACT(YEAR FROM date_col) FROM table")

160

161

# Generate SQL for specific dialect

162

snowflake = Snowflake()

163

sql = snowflake.generate(expressions[0])

164

165

# Use dialect names with main functions

166

spark_sql = "SELECT explode(split('a,b,c', ',')) as item"

167

postgres_sql = sqlglot.transpile(spark_sql, read="spark", write="postgres")[0]

168

```

169

170

### Dialect-Specific Features

171

172

```python

173

import sqlglot

174

175

# BigQuery array and struct syntax

176

bq_query = """

177

SELECT

178

arr[OFFSET(0)] as first_element,

179

struct_col.field_name,

180

PARSE_DATE('%Y-%m-%d', date_string)

181

FROM table_name

182

"""

183

standard_sql = sqlglot.transpile(bq_query, read="bigquery", write="postgres")[0]

184

185

# Snowflake variant data type

186

sf_query = """

187

SELECT

188

variant_col:field::string as extracted_field,

189

FLATTEN(array_col) as flattened

190

FROM table_name

191

"""

192

193

# Spark SQL window functions with specific syntax

194

spark_query = """

195

SELECT

196

col1,

197

LAG(col1, 1) OVER (PARTITION BY col2 ORDER BY col3

198

ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) as prev_val

199

FROM table

200

"""

201

```

202

203

### Custom Dialect Creation

204

205

```python

206

from sqlglot import exp

207

from sqlglot.dialects.dialect import Dialect

208

from sqlglot.generator import Generator

209

from sqlglot.tokens import Tokenizer, TokenType

210

211

class CustomDialect(Dialect):

212

class Tokenizer(Tokenizer):

213

QUOTES = ["'", '"'] # String delimiters

214

IDENTIFIERS = ["`"] # Identifier delimiters

215

216

KEYWORDS = {

217

**Tokenizer.KEYWORDS,

218

"INT64": TokenType.BIGINT,

219

"FLOAT64": TokenType.DOUBLE,

220

}

221

222

class Generator(Generator):

223

# Custom SQL generation rules

224

TRANSFORMS = {

225

exp.Array: lambda self, e: f"[{self.expressions(e)}]",

226

}

227

228

# Data type mappings

229

TYPE_MAPPING = {

230

exp.DataType.Type.TINYINT: "INT64",

231

exp.DataType.Type.SMALLINT: "INT64",

232

exp.DataType.Type.INT: "INT64",

233

exp.DataType.Type.BIGINT: "INT64",

234

exp.DataType.Type.FLOAT: "FLOAT64",

235

exp.DataType.Type.DOUBLE: "FLOAT64",

236

}

237

238

# Use custom dialect

239

custom = CustomDialect()

240

parsed = custom.parse("SELECT col FROM table WHERE int_col > 5")

241

generated = custom.generate(parsed[0])

242

```

243

244

### Dialect Detection and Conversion

245

246

```python

247

import sqlglot

248

from sqlglot.dialects import Dialect

249

250

# Get available dialects

251

available_dialects = [

252

"bigquery", "snowflake", "spark", "postgres", "mysql",

253

"oracle", "duckdb", "clickhouse", "presto", "trino"

254

]

255

256

# Convert between multiple dialects

257

source_sql = "SELECT EXTRACT(YEAR FROM date_col) FROM table"

258

259

for target_dialect in ["postgres", "mysql", "snowflake"]:

260

try:

261

converted = sqlglot.transpile(

262

source_sql,

263

read="bigquery",

264

write=target_dialect

265

)[0]

266

print(f"{target_dialect}: {converted}")

267

except Exception as e:

268

print(f"Error converting to {target_dialect}: {e}")

269

```

270

271

### Dialect-Specific Optimization

272

273

```python

274

import sqlglot

275

from sqlglot.optimizer import optimize

276

277

# Optimize for specific target dialect

278

sql = """

279

SELECT user_id, COUNT(*) as cnt

280

FROM users

281

WHERE created_date >= '2023-01-01'

282

GROUP BY user_id

283

HAVING COUNT(*) > 5

284

"""

285

286

# Optimize for BigQuery

287

optimized_bq = optimize(sql, dialect="bigquery")

288

bq_sql = optimized_bq.sql(dialect="bigquery", pretty=True)

289

290

# Optimize for Snowflake

291

optimized_sf = optimize(sql, dialect="snowflake")

292

sf_sql = optimized_sf.sql(dialect="snowflake", pretty=True)

293

294

print("BigQuery:", bq_sql)

295

print("Snowflake:", sf_sql)

296

```

297

298

## Types

299

300

```python { .api }

301

class Dialect:

302

"""Base dialect class with extensible components."""

303

304

class Tokenizer:

305

"""Dialect-specific tokenization rules."""

306

QUOTES: List[str] # String quote characters

307

IDENTIFIERS: List[str] # Identifier quote characters

308

KEYWORDS: Dict[str, TokenType] # Keyword mappings

309

310

class Parser:

311

"""Dialect-specific parsing rules."""

312

FUNCTIONS: Dict[str, Callable] # Function parsers

313

314

class Generator:

315

"""Dialect-specific SQL generation rules."""

316

TRANSFORMS: Dict[Type, Callable] # Expression transformers

317

TYPE_MAPPING: Dict[Type, str] # Data type mappings

318

319

# Core dialect methods

320

def parse(self, sql: str, **opts) -> List[Optional[Expression]]: ...

321

def generate(self, expression: Expression, **opts) -> str: ...

322

def tokenize(self, sql: str) -> List[Token]: ...

323

324

# Type alias for dialect specification

325

DialectType = Union[str, Dialect, Type[Dialect]]

326

```