or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

analysis.mdcode-generation.mddata-types.mdexpressions.mdindex.mdoptimization.mdparsing.mdquery-plans.mdutilities.md

parsing.mddocs/

0

# SQL Parsing

1

2

This section covers the SQL parsing interfaces and abstract syntax tree representations in Spark Catalyst. The parser converts SQL text into logical plan trees.

3

4

## Core Imports

5

6

```scala

7

import org.apache.spark.sql.catalyst.parser._

8

import org.apache.spark.sql.catalyst.plans.logical._

9

import org.apache.spark.sql.catalyst.expressions._

10

import org.apache.spark.sql.types._

11

```

12

13

## Parser Interface

14

15

The main interface for parsing SQL statements, expressions, and data types.

16

17

```scala { .api }

18

abstract class ParserInterface {

19

def parsePlan(sqlText: String): LogicalPlan

20

def parseExpression(sqlText: String): Expression

21

def parseDataType(sqlText: String): DataType

22

def parseTableIdentifier(sqlText: String): TableIdentifier

23

def parseFunctionIdentifier(sqlText: String): FunctionIdentifier

24

def parseTableSchema(sqlText: String): StructType

25

}

26

```

27

28

### Usage Example

29

30

```scala

31

import org.apache.spark.sql.catalyst.parser._

32

33

// Create parser instance

34

val parser = new CatalystSqlParser()

35

36

// Parse SQL statement

37

val logicalPlan = parser.parsePlan("SELECT name, age FROM users WHERE age > 18")

38

39

// Parse expression

40

val expression = parser.parseExpression("age + 1")

41

42

// Parse data type

43

val dataType = parser.parseDataType("STRUCT<name: STRING, age: INT>")

44

```

45

46

## Catalyst SQL Parser

47

48

The default SQL parser implementation for Spark SQL.

49

50

```scala { .api }

51

object CatalystSqlParser extends AbstractSqlParser {

52

override def astBuilder: AstBuilder = new AstBuilder()

53

54

def parseExpression(sqlText: String): Expression

55

def parsePlan(sqlText: String): LogicalPlan

56

def parseDataType(sqlText: String): DataType

57

def parseTableIdentifier(sqlText: String): TableIdentifier

58

def parseMultipartIdentifier(sqlText: String): Seq[String]

59

}

60

```

61

62

## AST Builder

63

64

Converts ANTLR parse trees into Catalyst logical plans and expressions.

65

66

```scala { .api }

67

trait AstBuilder extends SqlBaseBaseVisitor[AnyRef] {

68

// Plan visitors

69

def visitSingleStatement(ctx: SingleStatementContext): LogicalPlan

70

def visitQuery(ctx: QueryContext): LogicalPlan

71

def visitQuerySpecification(ctx: QuerySpecificationContext): LogicalPlan

72

def visitFromClause(ctx: FromClauseContext): LogicalPlan

73

def visitJoinRelation(ctx: JoinRelationContext): LogicalPlan

74

75

// Expression visitors

76

def visitSingleExpression(ctx: SingleExpressionContext): Expression

77

def visitArithmeticBinary(ctx: ArithmeticBinaryContext): Expression

78

def visitComparison(ctx: ComparisonContext): Expression

79

def visitLogicalBinary(ctx: LogicalBinaryContext): Expression

80

def visitPredicated(ctx: PredicatedContext): Expression

81

82

// Data type visitors

83

def visitSingleDataType(ctx: SingleDataTypeContext): DataType

84

def visitPrimitiveDataType(ctx: PrimitiveDataTypeContext): DataType

85

def visitComplexDataType(ctx: ComplexDataTypeContext): DataType

86

}

87

```

88

89

## Parse Exceptions

90

91

### ParseException

92

93

```scala { .api }

94

class ParseException(

95

message: String,

96

line: Int,

97

startPosition: Int,

98

cause: Throwable = null

99

) extends Exception(message, cause) {

100

101

def withCommand(command: String): ParseException

102

override def getMessage: String

103

}

104

105

case class TemplateSqlParseException(

106

message: String,

107

errorClass: String,

108

messageParameters: Map[String, String],

109

origin: Option[Origin],

110

cause: Option[Throwable]

111

) extends ParseException(message, 0, 0, cause.orNull)

112

```

113

114

## Data Type Parsing

115

116

### Type String Conversion

117

118

```scala { .api }

119

object DataType {

120

def fromDDL(ddl: String): DataType = CatalystSqlParser.parseDataType(ddl)

121

def fromJson(json: String): DataType

122

}

123

124

// Examples of DDL type strings

125

val intType = DataType.fromDDL("INT")

126

val arrayType = DataType.fromDDL("ARRAY<STRING>")

127

val structType = DataType.fromDDL("STRUCT<name: STRING, age: INT>")

128

val mapType = DataType.fromDDL("MAP<STRING, DOUBLE>")

129

```

130

131

### Schema Parsing

132

133

```scala { .api }

134

object StructType {

135

def fromDDL(ddl: String): StructType = CatalystSqlParser.parseTableSchema(ddl)

136

}

137

138

// Example schema parsing

139

val schema = StructType.fromDDL("name STRING, age INT, scores ARRAY<DOUBLE>")

140

```

141

142

## Expression Parsing

143

144

### Expression String Conversion

145

146

```scala

147

// Parse various expression types

148

val literalExpr = parser.parseExpression("42")

149

val columnExpr = parser.parseExpression("users.name")

150

val arithmeticExpr = parser.parseExpression("age + 1")

151

val functionExpr = parser.parseExpression("UPPER(name)")

152

val caseExpr = parser.parseExpression("CASE WHEN age > 18 THEN 'adult' ELSE 'minor' END")

153

```

154

155

### Complex Expression Examples

156

157

```scala

158

import org.apache.spark.sql.catalyst.parser.CatalystSqlParser._

159

160

// Array operations

161

val arrayExpr = parseExpression("array(1, 2, 3)[0]")

162

163

// Map operations

164

val mapExpr = parseExpression("map('key1', 'value1')['key1']")

165

166

// Struct operations

167

val structExpr = parseExpression("named_struct('name', 'Alice', 'age', 25).name")

168

169

// Window functions

170

val windowExpr = parseExpression("ROW_NUMBER() OVER (PARTITION BY dept ORDER BY salary DESC)")

171

```

172

173

## SQL Statement Parsing

174

175

### DDL Statements

176

177

```scala { .api }

178

// Table creation

179

case class CreateTable(

180

tableIdentifier: TableIdentifier,

181

tableSchema: StructType,

182

partitionColumnNames: Seq[String],

183

bucketSpec: Option[BucketSpec],

184

properties: Map[String, String],

185

provider: Option[String],

186

options: Map[String, String],

187

location: Option[String],

188

comment: Option[String],

189

ifNotExists: Boolean

190

) extends LogicalPlan

191

192

// View creation

193

case class CreateView(

194

name: TableIdentifier,

195

userSpecifiedColumns: Seq[(String, Option[String])],

196

comment: Option[String],

197

properties: Map[String, String],

198

originalText: Option[String],

199

child: LogicalPlan,

200

allowExisting: Boolean,

201

replace: Boolean,

202

isTemporary: Boolean

203

) extends LogicalPlan

204

```

205

206

### DML Statements

207

208

```scala { .api }

209

// Insert statements

210

case class InsertIntoTable(

211

table: LogicalPlan,

212

partition: Map[String, Option[String]],

213

child: LogicalPlan,

214

overwrite: Boolean,

215

ifPartitionNotExists: Boolean

216

) extends LogicalPlan

217

218

// Update statements (Catalyst representation)

219

case class UpdateTable(

220

table: LogicalPlan,

221

assignments: Seq[Assignment],

222

condition: Option[Expression]

223

) extends LogicalPlan

224

```

225

226

## Function Parsing

227

228

### Function Identifier Parsing

229

230

```scala { .api }

231

case class FunctionIdentifier(funcName: String, database: Option[String] = None) {

232

def identifier: String = database.map(_ + ".").getOrElse("") + funcName

233

def unquotedString: String = identifier

234

def quotedString: String = database.map(quote).map(_ + ".").getOrElse("") + quote(funcName)

235

}

236

237

// Parse function names

238

val simpleFunc = parser.parseFunctionIdentifier("upper")

239

val qualifiedFunc = parser.parseFunctionIdentifier("my_db.custom_func")

240

```

241

242

## Parser Configuration

243

244

### SQL Configuration Impact

245

246

```scala

247

import org.apache.spark.sql.internal.SQLConf

248

249

// Parser behavior affected by configuration

250

val conf = SQLConf.get

251

val caseSensitive = conf.caseSensitiveAnalysis

252

val ansiMode = conf.ansiEnabled

253

val parser = new SparkSqlParser(conf)

254

```

255

256

## Custom Parser Extensions

257

258

### Extending the Parser

259

260

```scala

261

import org.apache.spark.sql.catalyst.parser._

262

263

// Custom AST builder with additional rules

264

class CustomAstBuilder extends AstBuilder {

265

override def visitCustomFunction(ctx: CustomFunctionContext): Expression = {

266

// Custom parsing logic for domain-specific functions

267

super.visitCustomFunction(ctx)

268

}

269

}

270

271

// Custom parser with extended functionality

272

class CustomSqlParser extends AbstractSqlParser {

273

override def astBuilder: AstBuilder = new CustomAstBuilder()

274

}

275

```

276

277

## Usage Examples

278

279

### Complete Parsing Workflow

280

281

```scala

282

import org.apache.spark.sql.catalyst.parser._

283

import org.apache.spark.sql.catalyst.plans.logical._

284

285

// Parse complex SQL query

286

val sqlText = """

287

SELECT

288

u.name,

289

COUNT(o.id) as order_count,

290

AVG(o.amount) as avg_amount

291

FROM users u

292

LEFT JOIN orders o ON u.id = o.user_id

293

WHERE u.age > 18

294

GROUP BY u.id, u.name

295

HAVING COUNT(o.id) > 5

296

ORDER BY avg_amount DESC

297

LIMIT 10

298

"""

299

300

val parser = CatalystSqlParser

301

val logicalPlan = parser.parsePlan(sqlText)

302

303

// Extract components

304

logicalPlan match {

305

case Limit(limitExpr,

306

Sort(order, global,

307

Filter(havingCondition,

308

Aggregate(groupingExprs, aggregateExprs,

309

Join(left, right, joinType, joinCondition))))) =>

310

println(s"Parsed complex query with joins, aggregation, and ordering")

311

}

312

```

313

314

### Error Handling

315

316

```scala

317

import org.apache.spark.sql.catalyst.parser.ParseException

318

319

try {

320

val plan = parser.parsePlan("INVALID SQL SYNTAX")

321

} catch {

322

case e: ParseException =>

323

println(s"Parse error at line ${e.line}, position ${e.startPosition}: ${e.getMessage}")

324

}

325

```

326

327

The parsing framework provides a complete SQL-to-AST transformation pipeline that handles the full spectrum of SQL constructs and converts them into Catalyst's internal representation for further processing.