or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

connectors.mddata-types.mdexpressions.mdindex.mdquery-plans.md

expressions.mddocs/

0

# Expressions

1

2

Catalyst's expression system provides a tree-based representation for all SQL operations, functions, and computations. The expression framework supports type-safe evaluation, code generation, and optimization transformations essential for query processing.

3

4

## Core Imports

5

6

```scala

7

import org.apache.spark.sql.catalyst.expressions._

8

import org.apache.spark.sql.catalyst.InternalRow

9

import org.apache.spark.sql.types._

10

```

11

12

## Expression Hierarchy

13

14

### Base Expression Class

15

16

```scala { .api }

17

abstract class Expression extends TreeNode[Expression] {

18

def dataType: DataType

19

def nullable: Boolean

20

def eval(input: InternalRow): Any

21

def genCode(ctx: CodegenContext): ExprCode

22

def children: Seq[Expression]

23

def references: AttributeSet

24

def prettyName: String

25

def sql: String

26

def semanticEquals(other: Expression): Boolean

27

def deterministic: Boolean

28

def foldable: Boolean

29

def semanticHash(): Int

30

}

31

```

32

33

The base `Expression` class provides evaluation capabilities, type information, and code generation support for all SQL expressions.

34

35

### Expression Categories by Arity

36

37

#### LeafExpression

38

39

```scala { .api }

40

abstract class LeafExpression extends Expression {

41

final override def children: Seq[Expression] = Nil

42

}

43

```

44

45

Expressions with no child expressions, such as literals and column references.

46

47

#### UnaryExpression

48

49

```scala { .api }

50

abstract class UnaryExpression extends Expression {

51

def child: Expression

52

final override def children: Seq[Expression] = child :: Nil

53

}

54

```

55

56

Expressions with one child expression, such as mathematical functions and type casts.

57

58

#### BinaryExpression

59

60

```scala { .api }

61

abstract class BinaryExpression extends Expression {

62

def left: Expression

63

def right: Expression

64

final override def children: Seq[Expression] = Seq(left, right)

65

}

66

```

67

68

Expressions with two child expressions, such as arithmetic and comparison operators.

69

70

#### TernaryExpression

71

72

```scala { .api }

73

abstract class TernaryExpression extends Expression {

74

def first: Expression

75

def second: Expression

76

def third: Expression

77

final override def children: Seq[Expression] = Seq(first, second, third)

78

}

79

```

80

81

Expressions with three child expressions, such as conditional expressions and substring operations.

82

83

## Core Expression Types

84

85

### Literals and References

86

87

```scala { .api }

88

case class Literal(value: Any, dataType: DataType) extends LeafExpression {

89

def this(v: Any) = this(v, Literal.inferType(v))

90

}

91

92

object Literal {

93

def apply(v: Any): Literal

94

def create(v: Any, dataType: DataType): Literal

95

def default(dataType: DataType): Literal

96

def fromObject(obj: Any): Literal

97

val TrueLiteral: Literal

98

val FalseLiteral: Literal

99

}

100

101

case class AttributeReference(

102

name: String,

103

dataType: DataType,

104

nullable: Boolean = true,

105

metadata: Metadata = Metadata.empty

106

)(val exprId: ExprId = NamedExpression.newExprId,

107

val qualifier: Seq[String] = Seq.empty) extends Attribute

108

```

109

110

**Usage Example:**

111

```scala

112

// Create literals

113

val intLit = Literal(42)

114

val stringLit = Literal("hello")

115

val nullLit = Literal.create(null, StringType)

116

117

// Create attribute references

118

val nameAttr = AttributeReference("name", StringType, nullable = false)()

119

val ageAttr = AttributeReference("age", IntegerType, nullable = true)()

120

```

121

122

### Arithmetic Expressions

123

124

```scala { .api }

125

case class Add(left: Expression, right: Expression) extends BinaryArithmetic {

126

override def inputType: AbstractDataType = NumericType

127

protected override def nullSafeEval(input1: Any, input2: Any): Any

128

}

129

130

case class Subtract(left: Expression, right: Expression) extends BinaryArithmetic

131

case class Multiply(left: Expression, right: Expression) extends BinaryArithmetic

132

case class Divide(left: Expression, right: Expression) extends BinaryArithmetic

133

case class Remainder(left: Expression, right: Expression) extends BinaryArithmetic

134

135

case class UnaryMinus(child: Expression) extends UnaryArithmetic

136

case class UnaryPlus(child: Expression) extends UnaryArithmetic

137

case class Abs(child: Expression) extends UnaryMathExpression

138

```

139

140

**Usage Example:**

141

```scala

142

val col1 = AttributeReference("a", IntegerType, false)()

143

val col2 = AttributeReference("b", IntegerType, false)()

144

145

val addExpr = Add(col1, col2)

146

val subtractExpr = Subtract(col1, Literal(10))

147

val multiplyExpr = Multiply(col1, col2)

148

val absExpr = Abs(UnaryMinus(col1))

149

```

150

151

### Comparison Expressions

152

153

```scala { .api }

154

case class EqualTo(left: Expression, right: Expression) extends BinaryComparison

155

case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComparison

156

case class LessThan(left: Expression, right: Expression) extends BinaryComparison

157

case class LessThanOrEqual(left: Expression, right: Expression) extends BinaryComparison

158

case class GreaterThan(left: Expression, right: Expression) extends BinaryComparison

159

case class GreaterThanOrEqual(left: Expression, right: Expression) extends BinaryComparison

160

161

case class In(value: Expression, list: Seq[Expression]) extends Predicate

162

case class InSet(child: Expression, hset: Set[Any]) extends UnaryExpression with Predicate

163

```

164

165

**Usage Example:**

166

```scala

167

val nameCol = AttributeReference("name", StringType, false)()

168

val ageCol = AttributeReference("age", IntegerType, true)()

169

170

val equalExpr = EqualTo(nameCol, Literal("Alice"))

171

val rangeExpr = And(GreaterThan(ageCol, Literal(18)), LessThan(ageCol, Literal(65)))

172

val inExpr = In(nameCol, Seq(Literal("Alice"), Literal("Bob"), Literal("Charlie")))

173

```

174

175

### Logical Expressions

176

177

```scala { .api }

178

case class And(left: Expression, right: Expression) extends BinaryExpression with Predicate {

179

override def dataType: DataType = BooleanType

180

}

181

182

case class Or(left: Expression, right: Expression) extends BinaryExpression with Predicate {

183

override def dataType: DataType = BooleanType

184

}

185

186

case class Not(child: Expression) extends UnaryExpression with Predicate {

187

override def dataType: DataType = BooleanType

188

}

189

190

case class IsNull(child: Expression) extends UnaryExpression with Predicate

191

case class IsNotNull(child: Expression) extends UnaryExpression with Predicate

192

```

193

194

**Usage Example:**

195

```scala

196

val nameCol = AttributeReference("name", StringType, true)()

197

val ageCol = AttributeReference("age", IntegerType, true)()

198

199

val notNullName = IsNotNull(nameCol)

200

val validAge = And(GreaterThan(ageCol, Literal(0)), LessThan(ageCol, Literal(150)))

201

val condition = And(notNullName, validAge)

202

```

203

204

### String Expressions

205

206

```scala { .api }

207

case class Upper(child: Expression) extends UnaryExpression with ImplicitCastInputTypes

208

case class Lower(child: Expression) extends UnaryExpression with ImplicitCastInputTypes

209

case class Length(child: Expression) extends UnaryExpression with ImplicitCastInputTypes

210

211

case class Substring(str: Expression, pos: Expression, len: Expression) extends TernaryExpression with ImplicitCastInputTypes

212

213

case class Concat(children: Seq[Expression]) extends Expression with ImplicitCastInputTypes

214

215

case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expression) extends TernaryExpression with ImplicitCastInputTypes

216

217

case class StartsWith(left: Expression, right: Expression) extends BinaryExpression with Predicate

218

case class EndsWith(left: Expression, right: Expression) extends BinaryExpression with Predicate

219

case class Contains(left: Expression, right: Expression) extends BinaryExpression with Predicate

220

```

221

222

**Usage Example:**

223

```scala

224

val nameCol = AttributeReference("name", StringType, false)()

225

val descCol = AttributeReference("description", StringType, true)()

226

227

val upperName = Upper(nameCol)

228

val nameLength = Length(nameCol)

229

val substring = Substring(nameCol, Literal(1), Literal(3))

230

val concat = Concat(Seq(nameCol, Literal(" - "), descCol))

231

val startsWithA = StartsWith(nameCol, Literal("A"))

232

```

233

234

### Date and Time Expressions

235

236

```scala { .api }

237

case class CurrentDate(timeZoneId: Option[String] = None) extends LeafExpression with ImplicitCastInputTypes

238

case class CurrentTimestamp() extends LeafExpression with ImplicitCastInputTypes

239

240

case class Year(child: Expression) extends UnaryExpression with ImplicitCastInputTypes

241

case class Month(child: Expression) extends UnaryExpression with ImplicitCastInputTypes

242

case class DayOfMonth(child: Expression) extends UnaryExpression with ImplicitCastInputTypes

243

case class Hour(child: Expression) extends UnaryExpression with ImplicitCastInputTypes

244

case class Minute(child: Expression) extends UnaryExpression with ImplicitCastInputTypes

245

case class Second(child: Expression) extends UnaryExpression with ImplicitCastInputTypes

246

247

case class DateAdd(startDate: Expression, days: Expression) extends BinaryExpression with ImplicitCastInputTypes

248

case class DateSub(startDate: Expression, days: Expression) extends BinaryExpression with ImplicitCastInputTypes

249

```

250

251

**Usage Example:**

252

```scala

253

val dateCol = AttributeReference("created_at", DateType, false)()

254

val timestampCol = AttributeReference("updated_at", TimestampType, true)()

255

256

val currentDate = CurrentDate()

257

val extractYear = Year(dateCol)

258

val extractMonth = Month(dateCol)

259

val addDays = DateAdd(dateCol, Literal(30))

260

val hourFromTimestamp = Hour(timestampCol)

261

```

262

263

### Conditional Expressions

264

265

```scala { .api }

266

case class If(predicate: Expression, trueValue: Expression, falseValue: Expression) extends TernaryExpression

267

268

case class CaseWhen(branches: Seq[(Expression, Expression)], elseValue: Option[Expression] = None) extends Expression

269

270

case class Coalesce(children: Seq[Expression]) extends Expression

271

272

case class Greatest(children: Seq[Expression]) extends Expression

273

case class Least(children: Seq[Expression]) extends Expression

274

```

275

276

**Usage Example:**

277

```scala

278

val ageCol = AttributeReference("age", IntegerType, true)()

279

val statusCol = AttributeReference("status", StringType, true)()

280

281

val ifExpr = If(GreaterThan(ageCol, Literal(18)), Literal("Adult"), Literal("Minor"))

282

283

val caseWhen = CaseWhen(Seq(

284

(EqualTo(statusCol, Literal("A")), Literal("Active")),

285

(EqualTo(statusCol, Literal("I")), Literal("Inactive"))

286

), Some(Literal("Unknown")))

287

288

val coalesce = Coalesce(Seq(statusCol, Literal("Default")))

289

```

290

291

### Cast and Type Conversion

292

293

```scala { .api }

294

case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String] = None) extends UnaryExpression with TimeZoneAwareExpression

295

296

case class CheckOverflow(child: Expression, dataType: DecimalType, nullOnOverflow: Boolean) extends UnaryExpression

297

298

case class PromotePrecision(child: Expression) extends UnaryExpression

299

```

300

301

**Usage Example:**

302

```scala

303

val stringCol = AttributeReference("value", StringType, false)()

304

val intCol = AttributeReference("count", IntegerType, false)()

305

306

val castToInt = Cast(stringCol, IntegerType)

307

val castToDecimal = Cast(intCol, DecimalType(10, 2))

308

val castToString = Cast(intCol, StringType)

309

```

310

311

### Aggregate Expressions

312

313

```scala { .api }

314

case class Sum(child: Expression) extends DeclarativeAggregate

315

case class Count(children: Seq[Expression]) extends DeclarativeAggregate

316

case class Average(child: Expression) extends DeclarativeAggregate

317

case class Min(child: Expression) extends DeclarativeAggregate

318

case class Max(child: Expression) extends DeclarativeAggregate

319

320

case class First(child: Expression, ignoreNulls: Expression) extends DeclarativeAggregate

321

case class Last(child: Expression, ignoreNulls: Expression) extends DeclarativeAggregate

322

323

case class CollectList(child: Expression) extends TypedImperativeAggregate[mutable.ArrayBuffer[Any]]

324

case class CollectSet(child: Expression) extends TypedImperativeAggregate[mutable.Set[Any]]

325

```

326

327

**Usage Example:**

328

```scala

329

val salaryCol = AttributeReference("salary", DecimalType(10, 2), true)()

330

val nameCol = AttributeReference("name", StringType, false)()

331

332

val sumSalary = Sum(salaryCol)

333

val countNames = Count(Seq(nameCol))

334

val avgSalary = Average(salaryCol)

335

val minSalary = Min(salaryCol)

336

val collectNames = CollectList(nameCol)

337

```

338

339

### Window Functions

340

341

```scala { .api }

342

case class RowNumber() extends RowNumberLike

343

case class Rank(children: Seq[Expression]) extends RankLike

344

case class DenseRank(children: Seq[Expression]) extends RankLike

345

346

case class Lead(input: Expression, offset: Expression, default: Expression) extends OffsetWindowFunction

347

case class Lag(input: Expression, offset: Expression, default: Expression) extends OffsetWindowFunction

348

349

case class FirstValue(child: Expression, ignoreNulls: Expression) extends AggregateWindowFunction

350

case class LastValue(child: Expression, ignoreNulls: Expression) extends AggregateWindowFunction

351

```

352

353

**Usage Example:**

354

```scala

355

val salaryCol = AttributeReference("salary", DecimalType(10, 2), false)()

356

val dateCol = AttributeReference("hire_date", DateType, false)()

357

358

val rowNum = RowNumber()

359

val rankBySalary = Rank(Seq(salaryCol))

360

val denseRankBySalary = DenseRank(Seq(salaryCol))

361

val prevSalary = Lag(salaryCol, Literal(1), Literal(0))

362

val nextSalary = Lead(salaryCol, Literal(1), salaryCol)

363

```

364

365

## Collection Expressions

366

367

### Array Operations

368

369

```scala { .api }

370

case class CreateArray(children: Seq[Expression]) extends Expression

371

372

case class ArrayContains(left: Expression, right: Expression) extends BinaryExpression with Predicate

373

374

case class GetArrayItem(child: Expression, ordinal: Expression) extends BinaryExpression

375

376

case class Size(child: Expression) extends UnaryExpression

377

378

case class ArraySort(child: Expression) extends UnaryExpression

379

case class ArrayMin(child: Expression) extends UnaryExpression

380

case class ArrayMax(child: Expression) extends UnaryExpression

381

```

382

383

**Usage Example:**

384

```scala

385

val arrayCol = AttributeReference("tags", ArrayType(StringType), true)()

386

val indexCol = AttributeReference("index", IntegerType, false)()

387

388

val createArray = CreateArray(Seq(Literal("tag1"), Literal("tag2"), Literal("tag3")))

389

val arrayContains = ArrayContains(arrayCol, Literal("important"))

390

val getItem = GetArrayItem(arrayCol, indexCol)

391

val arraySize = Size(arrayCol)

392

val sortedArray = ArraySort(arrayCol)

393

```

394

395

### Map Operations

396

397

```scala { .api }

398

case class CreateMap(children: Seq[Expression]) extends Expression

399

400

case class GetMapValue(child: Expression, key: Expression) extends BinaryExpression

401

402

case class MapKeys(child: Expression) extends UnaryExpression

403

case class MapValues(child: Expression) extends UnaryExpression

404

```

405

406

**Usage Example:**

407

```scala

408

val mapCol = AttributeReference("properties", MapType(StringType, StringType), true)()

409

val keyCol = AttributeReference("key", StringType, false)()

410

411

val createMap = CreateMap(Seq(

412

Literal("name"), Literal("John"),

413

Literal("age"), Literal("30")

414

))

415

val getValue = GetMapValue(mapCol, keyCol)

416

val getKeys = MapKeys(mapCol)

417

val getValues = MapValues(mapCol)

418

```

419

420

## Expression Utilities

421

422

### Expression Analysis

423

424

```scala { .api }

425

object ExpressionSet {

426

def apply(expressions: Seq[Expression]): ExpressionSet

427

}

428

429

case class AttributeSet(baseSet: Set[Attribute]) extends Traversable[Attribute] {

430

def +(attribute: Attribute): AttributeSet

431

def ++(other: AttributeSet): AttributeSet

432

def -(attribute: Attribute): AttributeSet

433

def --(other: AttributeSet): AttributeSet

434

def contains(attribute: Attribute): Boolean

435

def intersect(other: AttributeSet): AttributeSet

436

def subsetOf(other: AttributeSet): Boolean

437

}

438

```

439

440

### Code Generation

441

442

```scala { .api }

443

case class ExprCode(code: String, isNull: String, value: String)

444

445

class CodegenContext {

446

def freshName(name: String): String

447

def addReferenceObj(objName: String, obj: Any, className: String = null): String

448

def addNewFunction(funcName: String, funcCode: String): String

449

}

450

```

451

452

## Common Expression Patterns

453

454

### Building Complex Expressions

455

```scala

456

val nameCol = AttributeReference("name", StringType, false)()

457

val ageCol = AttributeReference("age", IntegerType, true)()

458

val salaryCol = AttributeReference("salary", DecimalType(10, 2), true)()

459

460

// Complex filtering condition

461

val complexFilter = And(

462

IsNotNull(nameCol),

463

And(

464

GreaterThan(ageCol, Literal(21)),

465

Or(

466

GreaterThan(salaryCol, Literal(50000)),

467

StartsWith(nameCol, Literal("Senior"))

468

)

469

)

470

)

471

472

// Computed column

473

val computedSalary = If(

474

IsNull(salaryCol),

475

Literal(0),

476

Add(salaryCol, Multiply(salaryCol, Literal(0.1)))

477

)

478

```

479

480

### Expression Transformation

481

```scala

482

// Transform expressions using pattern matching

483

def removeRedundantCasts(expr: Expression): Expression = {

484

expr.transform {

485

case Cast(child, dataType, _) if child.dataType == dataType => child

486

case other => other

487

}

488

}

489

490

// Fold constant expressions

491

def foldConstants(expr: Expression): Expression = {

492

expr.transform {

493

case e if e.foldable => Literal.create(e.eval(EmptyRow), e.dataType)

494

case other => other

495

}

496

}

497

```

498

499

The expression system in Catalyst provides a comprehensive framework for representing and evaluating all SQL operations with support for optimization, code generation, and type safety.