0
# Expressions
1
2
Extensible expression evaluation framework supporting complex expression trees, type checking, code generation, and high-performance evaluation for SQL operations.
3
4
## Capabilities
5
6
### Expression Base Class
7
8
The abstract base class for all expressions in Catalyst.
9
10
```scala { .api }
11
/**
12
* An expression in Catalyst.
13
* If an expression wants to be exposed in the function registry, the concrete implementation
14
* must be a case class whose constructor arguments are all Expressions types.
15
*/
16
abstract class Expression extends TreeNode[Expression] {
17
/**
18
* Returns true when an expression is a candidate for static evaluation before the query is executed.
19
* The following conditions are used to determine suitability for constant folding:
20
* - A Coalesce is foldable if all of its children are foldable
21
* - A BinaryExpression is foldable if its both left and right child are foldable
22
* - A Not, IsNull, or IsNotNull is foldable if its child is foldable
23
* - A Literal is foldable
24
* - A Cast or UnaryMinus is foldable if its child is foldable
25
*/
26
def foldable: Boolean = false
27
28
/**
29
* Returns true when the current expression always return the same result for fixed inputs from children.
30
* Note that this means that an expression should be considered as non-deterministic if:
31
* - if it relies on some mutable internal state, or
32
* - if it relies on some implicit input that is not part of the children expression list.
33
* - if it has non-deterministic child or children.
34
*/
35
def deterministic: Boolean = children.forall(_.deterministic)
36
37
/** Whether this expression can return null */
38
def nullable: Boolean
39
40
/** Set of attributes referenced by this expression */
41
def references: AttributeSet = AttributeSet(children.flatMap(_.references.iterator))
42
43
/** Returns the result of evaluating this expression on a given input Row */
44
def eval(input: InternalRow = null): Any
45
46
/** Data type of the result of evaluating this expression */
47
def dataType: DataType
48
49
/**
50
* Returns Java source code that can be used to generate the result of evaluating the expression.
51
* @param ctx a CodeGenContext
52
* @return GeneratedExpressionCode
53
*/
54
def gen(ctx: CodeGenContext): GeneratedExpressionCode
55
56
/**
57
* Returns Java source code that can be compiled to evaluate this expression.
58
* The default behavior is to call the eval method of the expression.
59
* @param ctx a CodeGenContext
60
* @param ev an GeneratedExpressionCode with unique terms
61
* @return Java source code
62
*/
63
protected def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String
64
65
/**
66
* Returns true if this expression and all its children have been resolved to a specific schema
67
* and input data types checking passed, and false if it still contains any unresolved placeholders.
68
*/
69
lazy val resolved: Boolean = childrenResolved && checkInputDataTypes().isSuccess
70
71
/**
72
* Returns true if all the children of this expression have been resolved to a specific schema
73
* and false if any still contains any unresolved placeholders.
74
*/
75
def childrenResolved: Boolean = children.forall(_.resolved)
76
77
/**
78
* Returns true when two expressions will always compute the same result, even if they differ
79
* cosmetically (i.e. capitalization of names in attributes may be different).
80
*/
81
def semanticEquals(other: Expression): Boolean
82
83
/**
84
* Returns the hash for this expression. Expressions that compute the same result, even if
85
* they differ cosmetically should return the same hash.
86
*/
87
def semanticHash(): Int
88
89
/**
90
* Returns a user-facing string representation of this expression's name.
91
* This should usually match the name of the function in SQL.
92
*/
93
def prettyName: String = getClass.getSimpleName.toLowerCase
94
95
/**
96
* Returns a user-facing string representation of this expression, i.e. does not have developer
97
* centric debugging information like the expression id.
98
*/
99
def prettyString: String
100
101
/** Validate input data types and return type check result */
102
def checkInputDataTypes(): TypeCheckResult = TypeCheckResult.TypeCheckSuccess
103
}
104
```
105
106
**Usage Examples:**
107
108
```scala
109
import org.apache.spark.sql.catalyst.expressions._
110
import org.apache.spark.sql.catalyst.InternalRow
111
import org.apache.spark.sql.types._
112
113
// Create expressions
114
val literal = Literal(42, IntegerType)
115
val attr = AttributeReference("x", IntegerType, nullable = false)()
116
val add = Add(attr, literal)
117
118
// Check expression properties
119
val foldable = literal.foldable // true (literals are foldable)
120
val addFoldable = add.foldable // false (contains non-literal)
121
val deterministic = add.deterministic // true (always same result for same input)
122
val nullable = add.nullable // false (int + int is never null)
123
val dataType = add.dataType // IntegerType
124
125
// Evaluate expression
126
val row = InternalRow(10) // x = 10
127
val result = add.eval(row) // 52 (10 + 42)
128
129
// Get referenced attributes
130
val refs = add.references // AttributeSet containing "x" attribute
131
```
132
133
### Expression Type Hierarchy
134
135
Base traits defining the structure of expression trees.
136
137
```scala { .api }
138
/**
139
* An expression that has no child expressions.
140
*/
141
trait LeafExpression extends Expression {
142
def children: Seq[Expression] = Nil
143
}
144
145
/**
146
* An expression that has one child expression.
147
*/
148
trait UnaryExpression extends Expression {
149
def child: Expression
150
def children: Seq[Expression] = child :: Nil
151
}
152
153
/**
154
* An expression that has two child expressions.
155
*/
156
trait BinaryExpression extends Expression {
157
def left: Expression
158
def right: Expression
159
def children: Seq[Expression] = Seq(left, right)
160
}
161
162
/**
163
* A special case of BinaryExpression that requires two children to have the same output data type.
164
*/
165
trait BinaryOperator extends BinaryExpression {
166
/** Expected input type from both left and right child expressions */
167
def inputType: AbstractDataType
168
169
override def checkInputDataTypes(): TypeCheckResult = {
170
// Validates that both children have compatible types
171
}
172
}
173
174
/**
175
* An expression that has three child expressions.
176
*/
177
abstract class TernaryExpression extends Expression {
178
override def foldable: Boolean = children.forall(_.foldable)
179
override def nullable: Boolean = children.exists(_.nullable)
180
181
/**
182
* Called by default eval implementation. If subclass of TernaryExpression keep the default
183
* nullability, they can override this method to save null-check code.
184
*/
185
protected def nullSafeEval(input1: Any, input2: Any, input3: Any): Any =
186
sys.error(s"TernaryExpressions must override either eval or nullSafeEval")
187
}
188
```
189
190
**Usage Examples:**
191
192
```scala
193
import org.apache.spark.sql.catalyst.expressions._
194
import org.apache.spark.sql.types._
195
196
// Leaf expression (no children)
197
val constant = Literal(100, IntegerType)
198
val children1 = constant.children // Nil
199
200
// Unary expression (one child)
201
val negation = UnaryMinus(constant)
202
val children2 = negation.children // Seq(constant)
203
val child = negation.child // constant
204
205
// Binary expression (two children)
206
val attr = AttributeReference("y", IntegerType, nullable = false)()
207
val addition = Add(attr, constant)
208
val children3 = addition.children // Seq(attr, constant)
209
val left = addition.left // attr
210
val right = addition.right // constant
211
212
// Binary operator with type constraints
213
val comparison = EqualTo(attr, constant) // Both sides must be same type
214
val typeCheck = comparison.checkInputDataTypes() // Validates type compatibility
215
```
216
217
### Expression Traits and Mixins
218
219
Special traits that modify expression behavior and evaluation.
220
221
```scala { .api }
222
/**
223
* An expression that is not deterministic - returns different results for the same input.
224
*/
225
trait Nondeterministic extends Expression {
226
final override def deterministic: Boolean = false
227
final override def foldable: Boolean = false
228
229
/**
230
* Sets the initial values for this nondeterministic expression, called before evaluation.
231
*/
232
final def setInitialValues(): Unit = {
233
initInternal()
234
initialized = true
235
}
236
237
/** Initialize any internal state before evaluation */
238
protected def initInternal(): Unit
239
240
/** Internal evaluation after initialization */
241
protected def evalInternal(input: InternalRow): Any
242
243
private[this] var initialized = false
244
245
final override def eval(input: InternalRow = null): Any = {
246
require(initialized, "nondeterministic expression should be initialized before evaluate")
247
evalInternal(input)
248
}
249
}
250
251
/**
252
* An expression that is not supposed to be evaluated.
253
* Used for expressions that are only meaningful during analysis.
254
*/
255
trait Unevaluable extends Expression {
256
override def eval(input: InternalRow): Any = throw new UnsupportedOperationException
257
}
258
259
/**
260
* An expression that does not have code gen implemented and falls back to interpreted mode.
261
*/
262
trait CodegenFallback extends Expression {
263
// Falls back to interpreted evaluation when code generation is not available
264
}
265
266
/**
267
* Expressions that expect specific input types and can provide helpful error messages.
268
*/
269
trait ExpectsInputTypes extends Expression {
270
/** Expected input types for child expressions */
271
def inputTypes: Seq[AbstractDataType]
272
273
override def checkInputDataTypes(): TypeCheckResult = {
274
// Validates children match expected input types
275
}
276
}
277
```
278
279
**Usage Examples:**
280
281
```scala
282
import org.apache.spark.sql.catalyst.expressions._
283
import org.apache.spark.util.random.RandomSeed
284
285
// Nondeterministic expression
286
case class RandomExpression(seed: Long) extends LeafExpression with Nondeterministic {
287
private var random: scala.util.Random = _
288
289
def initialize(partitionIndex: Int): Unit = {
290
random = new scala.util.Random(seed + partitionIndex)
291
}
292
293
def eval(input: InternalRow): Any = random.nextDouble()
294
def dataType: DataType = DoubleType
295
def nullable: Boolean = false
296
}
297
298
// Unevaluable expression (used during analysis only)
299
case class UnresolvedAttribute(name: String) extends LeafExpression with Unevaluable {
300
def dataType: DataType = throw new UnresolvedException(this, "dataType")
301
def nullable: Boolean = throw new UnresolvedException(this, "nullable")
302
}
303
304
// Expression with expected input types
305
case class Substring(str: Expression, pos: Expression, len: Expression)
306
extends Expression with ExpectsInputTypes {
307
def inputTypes: Seq[AbstractDataType] = Seq(StringType, IntegerType, IntegerType)
308
def children: Seq[Expression] = Seq(str, pos, len)
309
def dataType: DataType = StringType
310
def nullable: Boolean = children.exists(_.nullable)
311
312
def eval(input: InternalRow): Any = {
313
val string = str.eval(input).asInstanceOf[UTF8String]
314
val position = pos.eval(input).asInstanceOf[Int]
315
val length = len.eval(input).asInstanceOf[Int]
316
string.substring(position - 1, position - 1 + length)
317
}
318
}
319
```
320
321
### Core Expression Types
322
323
Fundamental expression implementations for common operations.
324
325
```scala { .api }
326
/**
327
* Represents a constant value.
328
*/
329
case class Literal(value: Any, dataType: DataType) extends LeafExpression {
330
override def foldable: Boolean = true
331
override def nullable: Boolean = value == null
332
333
override def eval(input: InternalRow): Any = value
334
}
335
336
object Literal {
337
/** Create literal from Scala value with automatic type inference */
338
def apply(v: Any): Literal
339
340
/** Create null literal of specified type */
341
def create(v: Any, dataType: DataType): Literal
342
343
/** Default literals for primitive types */
344
val TrueLiteral: Literal = Literal(true, BooleanType)
345
val FalseLiteral: Literal = Literal(false, BooleanType)
346
}
347
348
/**
349
* Reference to an attribute/column in a relation.
350
*/
351
case class AttributeReference(
352
name: String,
353
dataType: DataType,
354
nullable: Boolean,
355
metadata: Metadata = Metadata.empty)(
356
val exprId: ExprId = NamedExpression.newExprId,
357
val qualifiers: Seq[String] = Nil) extends Attribute {
358
359
override def eval(input: InternalRow): Any =
360
throw new UnsupportedOperationException("AttributeReference.eval() not supported")
361
362
/** Create a new copy with different nullability */
363
def withNullability(newNullability: Boolean): AttributeReference
364
365
/** Create a new copy with different name */
366
def withName(newName: String): AttributeReference
367
368
/** Create a new copy with different data type */
369
def withDataType(newType: DataType): AttributeReference
370
}
371
372
/**
373
* Reference bound to a specific input position.
374
*/
375
case class BoundReference(ordinal: Int, dataType: DataType, nullable: Boolean)
376
extends LeafExpression {
377
378
override def eval(input: InternalRow): Any = input.get(ordinal, dataType)
379
}
380
381
/**
382
* Type conversion expression.
383
*/
384
case class Cast(child: Expression, dataType: DataType) extends UnaryExpression {
385
override def nullable: Boolean = child.nullable
386
387
override def eval(input: InternalRow): Any = {
388
val value = child.eval(input)
389
if (value == null) null else cast(value)
390
}
391
392
private def cast(value: Any): Any = {
393
// Type conversion logic based on source and target types
394
}
395
}
396
```
397
398
**Usage Examples:**
399
400
```scala
401
import org.apache.spark.sql.catalyst.expressions._
402
import org.apache.spark.sql.types._
403
404
// Create literals
405
val intLit = Literal(42, IntegerType)
406
val stringLit = Literal("hello", StringType)
407
val nullLit = Literal(null, StringType)
408
val autoLit = Literal("world") // Type inferred as StringType
409
410
// Evaluate literals
411
val value1 = intLit.eval() // 42
412
val value2 = stringLit.eval() // "hello"
413
val value3 = nullLit.eval() // null
414
415
// Create attribute references
416
val userId = AttributeReference("user_id", IntegerType, nullable = false)()
417
val userName = AttributeReference("user_name", StringType, nullable = true)()
418
419
// Create bound references (for compiled expressions)
420
val boundId = BoundReference(0, IntegerType, nullable = false) // First column
421
val boundName = BoundReference(1, StringType, nullable = true) // Second column
422
423
// Cast expressions
424
val stringToInt = Cast(stringLit, IntegerType)
425
val intToString = Cast(intLit, StringType)
426
427
// Build complex expressions
428
val userIdPlusOne = Add(userId, Literal(1, IntegerType))
429
val userGreeting = Concat(Seq(Literal("Hello "), userName))
430
```
431
432
### Arithmetic and Comparison Expressions
433
434
Mathematical and comparison operations.
435
436
```scala { .api }
437
// Arithmetic operations
438
case class Add(left: Expression, right: Expression) extends BinaryOperator {
439
def inputType: AbstractDataType = NumericType
440
override def dataType: DataType = left.dataType
441
442
override def eval(input: InternalRow): Any = {
443
val leftValue = left.eval(input)
444
val rightValue = right.eval(input)
445
if (leftValue == null || rightValue == null) null
446
else numeric.plus(leftValue, rightValue)
447
}
448
}
449
450
case class Subtract(left: Expression, right: Expression) extends BinaryOperator {
451
def inputType: AbstractDataType = NumericType
452
override def dataType: DataType = left.dataType
453
}
454
455
case class Multiply(left: Expression, right: Expression) extends BinaryOperator {
456
def inputType: AbstractDataType = NumericType
457
override def dataType: DataType = left.dataType
458
}
459
460
case class Divide(left: Expression, right: Expression) extends BinaryOperator {
461
def inputType: AbstractDataType = NumericType
462
override def dataType: DataType = left.dataType
463
}
464
465
case class UnaryMinus(child: Expression) extends UnaryExpression {
466
override def dataType: DataType = child.dataType
467
override def nullable: Boolean = child.nullable
468
}
469
470
// Comparison operations
471
case class EqualTo(left: Expression, right: Expression) extends BinaryOperator {
472
def inputType: AbstractDataType = AnyDataType
473
override def dataType: DataType = BooleanType
474
}
475
476
case class LessThan(left: Expression, right: Expression) extends BinaryOperator {
477
def inputType: AbstractDataType = TypeCollection.Ordered
478
override def dataType: DataType = BooleanType
479
}
480
481
case class GreaterThan(left: Expression, right: Expression) extends BinaryOperator {
482
def inputType: AbstractDataType = TypeCollection.Ordered
483
override def dataType: DataType = BooleanType
484
}
485
486
case class LessThanOrEqual(left: Expression, right: Expression) extends BinaryOperator {
487
def inputType: AbstractDataType = TypeCollection.Ordered
488
override def dataType: DataType = BooleanType
489
}
490
491
case class GreaterThanOrEqual(left: Expression, right: Expression) extends BinaryOperator {
492
def inputType: AbstractDataType = TypeCollection.Ordered
493
override def dataType: DataType = BooleanType
494
}
495
```
496
497
**Usage Examples:**
498
499
```scala
500
import org.apache.spark.sql.catalyst.expressions._
501
import org.apache.spark.sql.catalyst.InternalRow
502
import org.apache.spark.sql.types._
503
504
// Arithmetic expressions
505
val a = AttributeReference("a", IntegerType, nullable = false)()
506
val b = AttributeReference("b", IntegerType, nullable = false)()
507
508
val sum = Add(a, b)
509
val diff = Subtract(a, b)
510
val product = Multiply(a, b)
511
val quotient = Divide(a, b)
512
val negation = UnaryMinus(a)
513
514
// Evaluate arithmetic
515
val row = InternalRow(10, 3)
516
val sumResult = sum.eval(row) // 13
517
val diffResult = diff.eval(row) // 7
518
val productResult = product.eval(row) // 30
519
val quotientResult = quotient.eval(row) // 3.33...
520
val negResult = negation.eval(row) // -10
521
522
// Comparison expressions
523
val equal = EqualTo(a, b)
524
val less = LessThan(a, b)
525
val greater = GreaterThan(a, b)
526
val lessEqual = LessThanOrEqual(a, b)
527
val greaterEqual = GreaterThanOrEqual(a, b)
528
529
// Evaluate comparisons
530
val equalResult = equal.eval(row) // false (10 != 3)
531
val lessResult = less.eval(row) // false (10 > 3)
532
val greaterResult = greater.eval(row) // true (10 > 3)
533
534
// Complex arithmetic expressions
535
val formula = Add(Multiply(a, Literal(2)), Subtract(b, Literal(1)))
536
// Equivalent to: (a * 2) + (b - 1) = (10 * 2) + (3 - 1) = 20 + 2 = 22
537
val formulaResult = formula.eval(row) // 22
538
```
539
540
### Logical and String Expressions
541
542
Boolean logic and string manipulation operations.
543
544
```scala { .api }
545
// Logical operations
546
case class And(left: Expression, right: Expression) extends BinaryOperator {
547
def inputType: AbstractDataType = BooleanType
548
override def dataType: DataType = BooleanType
549
550
override def eval(input: InternalRow): Any = {
551
val leftValue = left.eval(input)
552
if (leftValue == false) false
553
else {
554
val rightValue = right.eval(input)
555
if (rightValue == false) false
556
else if (leftValue == null || rightValue == null) null
557
else true
558
}
559
}
560
}
561
562
case class Or(left: Expression, right: Expression) extends BinaryOperator {
563
def inputType: AbstractDataType = BooleanType
564
override def dataType: DataType = BooleanType
565
}
566
567
case class Not(child: Expression) extends UnaryExpression {
568
override def dataType: DataType = BooleanType
569
override def nullable: Boolean = child.nullable
570
571
override def eval(input: InternalRow): Any = {
572
val value = child.eval(input)
573
if (value == null) null else !value.asInstanceOf[Boolean]
574
}
575
}
576
577
// Null checking
578
case class IsNull(child: Expression) extends UnaryExpression {
579
override def dataType: DataType = BooleanType
580
override def nullable: Boolean = false
581
582
override def eval(input: InternalRow): Any = child.eval(input) == null
583
}
584
585
case class IsNotNull(child: Expression) extends UnaryExpression {
586
override def dataType: DataType = BooleanType
587
override def nullable: Boolean = false
588
589
override def eval(input: InternalRow): Any = child.eval(input) != null
590
}
591
592
// String operations
593
case class Concat(children: Seq[Expression]) extends Expression {
594
override def dataType: DataType = StringType
595
override def nullable: Boolean = children.exists(_.nullable)
596
597
override def eval(input: InternalRow): Any = {
598
val values = children.map(_.eval(input))
599
if (values.contains(null)) null
600
else UTF8String.concat(values.map(_.asInstanceOf[UTF8String]): _*)
601
}
602
}
603
604
case class Length(child: Expression) extends UnaryExpression {
605
override def dataType: DataType = IntegerType
606
override def nullable: Boolean = child.nullable
607
608
override def eval(input: InternalRow): Any = {
609
val value = child.eval(input)
610
if (value == null) null else value.asInstanceOf[UTF8String].numChars()
611
}
612
}
613
614
case class Upper(child: Expression) extends UnaryExpression {
615
override def dataType: DataType = StringType
616
override def nullable: Boolean = child.nullable
617
}
618
619
case class Lower(child: Expression) extends UnaryExpression {
620
override def dataType: DataType = StringType
621
override def nullable: Boolean = child.nullable
622
}
623
```
624
625
**Usage Examples:**
626
627
```scala
628
import org.apache.spark.sql.catalyst.expressions._
629
import org.apache.spark.sql.catalyst.InternalRow
630
import org.apache.spark.sql.types._
631
import org.apache.spark.unsafe.types.UTF8String
632
633
// Logical expressions
634
val active = AttributeReference("active", BooleanType, nullable = false)()
635
val verified = AttributeReference("verified", BooleanType, nullable = false)()
636
637
val bothTrue = And(active, verified)
638
val eitherTrue = Or(active, verified)
639
val notActive = Not(active)
640
641
// Evaluate logical expressions
642
val row = InternalRow(true, false)
643
val andResult = bothTrue.eval(row) // false (true AND false)
644
val orResult = eitherTrue.eval(row) // true (true OR false)
645
val notResult = notActive.eval(row) // false (NOT true)
646
647
// Null checking
648
val name = AttributeReference("name", StringType, nullable = true)()
649
val nameIsNull = IsNull(name)
650
val nameIsNotNull = IsNotNull(name)
651
652
val nullRow = InternalRow(UTF8String.fromString(null))
653
val nullCheck = nameIsNull.eval(nullRow) // true
654
val notNullCheck = nameIsNotNull.eval(nullRow) // false
655
656
// String operations
657
val firstName = AttributeReference("first_name", StringType, nullable = false)()
658
val lastName = AttributeReference("last_name", StringType, nullable = false)()
659
660
val fullName = Concat(Seq(firstName, Literal(" "), lastName))
661
val nameLength = Length(firstName)
662
val upperName = Upper(firstName)
663
val lowerName = Lower(firstName)
664
665
val nameRow = InternalRow(UTF8String.fromString("John"), UTF8String.fromString("Doe"))
666
val concatResult = fullName.eval(nameRow) // "John Doe"
667
val lengthResult = nameLength.eval(nameRow) // 4
668
val upperResult = upperName.eval(nameRow) // "JOHN"
669
val lowerResult = lowerName.eval(nameRow) // "john"
670
```
671
672
### Advanced Expression Features
673
674
Complex expression capabilities including user-defined functions and sort ordering.
675
676
```scala { .api }
677
/**
678
* User-defined function wrapper.
679
*/
680
case class ScalaUDF(
681
function: AnyRef,
682
dataType: DataType,
683
children: Seq[Expression],
684
inputTypes: Seq[DataType] = Nil,
685
udfName: Option[String] = None) extends Expression {
686
687
override def nullable: Boolean = true
688
689
override def eval(input: InternalRow): Any = {
690
val evaluatedChildren = children.map(_.eval(input))
691
// Invoke user function with evaluated arguments
692
}
693
}
694
695
/**
696
* Sort ordering specification for ORDER BY and sorting operations.
697
*/
698
case class SortOrder(
699
child: Expression,
700
direction: SortDirection,
701
nullOrdering: NullOrdering = NullOrdering(direction)) extends Expression with Unevaluable {
702
703
override def dataType: DataType = child.dataType
704
override def nullable: Boolean = child.nullable
705
override val children: Seq[Expression] = Seq(child)
706
}
707
708
sealed abstract class SortDirection
709
case object Ascending extends SortDirection
710
case object Descending extends SortDirection
711
712
case class NullOrdering(direction: SortDirection) {
713
val nullsFirst: Boolean = direction == Descending
714
val nullsLast: Boolean = direction == Ascending
715
}
716
717
/**
718
* Attribute set for efficient attribute operations.
719
*/
720
class AttributeSet private (private val baseSet: Set[ExprId]) {
721
/** Check if an attribute is contained in this set */
722
def contains(a: Attribute): Boolean
723
724
/** Add an attribute to this set */
725
def +(a: Attribute): AttributeSet
726
727
/** Remove an attribute from this set */
728
def -(a: Attribute): AttributeSet
729
730
/** Union with another AttributeSet */
731
def ++(other: AttributeSet): AttributeSet
732
733
/** Difference with another AttributeSet */
734
def --(other: AttributeSet): AttributeSet
735
736
/** Intersection with another AttributeSet */
737
def intersect(other: AttributeSet): AttributeSet
738
739
/** Check if this set is a subset of another */
740
def subsetOf(other: AttributeSet): Boolean
741
}
742
743
object AttributeSet {
744
/** Create AttributeSet from sequence of attributes */
745
def apply(attrs: Seq[Attribute]): AttributeSet
746
747
/** Create AttributeSet from individual attributes */
748
def apply(attrs: Attribute*): AttributeSet
749
750
/** Empty AttributeSet */
751
val empty: AttributeSet
752
}
753
```
754
755
**Usage Examples:**
756
757
```scala
758
import org.apache.spark.sql.catalyst.expressions._
759
import org.apache.spark.sql.types._
760
761
// User-defined function
762
val upperFunc = (s: String) => if (s == null) null else s.toUpperCase
763
val name = AttributeReference("name", StringType, nullable = true)()
764
val upperUDF = ScalaUDF(upperFunc, StringType, Seq(name), Seq(StringType), Some("upper"))
765
766
// Sort ordering
767
val id = AttributeReference("id", IntegerType, nullable = false)()
768
val score = AttributeReference("score", DoubleType, nullable = true)()
769
770
val idAsc = SortOrder(id, Ascending) // id ASC (nulls last)
771
val scoreDesc = SortOrder(score, Descending) // score DESC (nulls first)
772
773
// Custom null ordering
774
val scoreDescNullsLast = SortOrder(score, Descending, NullOrdering(Ascending))
775
776
// Attribute sets
777
val attr1 = AttributeReference("a", IntegerType, false)()
778
val attr2 = AttributeReference("b", StringType, true)()
779
val attr3 = AttributeReference("c", DoubleType, false)()
780
781
val set1 = AttributeSet(attr1, attr2)
782
val set2 = AttributeSet(attr2, attr3)
783
784
val contains = set1.contains(attr1) // true
785
val union = set1 ++ set2 // {attr1, attr2, attr3}
786
val intersection = set1.intersect(set2) // {attr2}
787
val difference = set1 -- set2 // {attr1}
788
val subset = set1.subsetOf(union) // true
789
790
// Complex expression with multiple operations
791
val complexExpr = And(
792
GreaterThan(score, Literal(80.0)),
793
IsNotNull(name)
794
)
795
796
// Expression referencing multiple attributes
797
val referencedAttrs = complexExpr.references // AttributeSet containing score and name
798
```