or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

analysis.mdexpressions.mdindex.mdoptimization.mdquery-planning.mdrow-operations.mdtree-operations.mdtypes.md

expressions.mddocs/

0

# Expressions

1

2

Extensible expression evaluation framework supporting complex expression trees, type checking, code generation, and high-performance evaluation for SQL operations.

3

4

## Capabilities

5

6

### Expression Base Class

7

8

The abstract base class for all expressions in Catalyst.

9

10

```scala { .api }

11

/**

12

* An expression in Catalyst.

13

* If an expression wants to be exposed in the function registry, the concrete implementation

14

* must be a case class whose constructor arguments are all Expressions types.

15

*/

16

abstract class Expression extends TreeNode[Expression] {

17

/**

18

* Returns true when an expression is a candidate for static evaluation before the query is executed.

19

* The following conditions are used to determine suitability for constant folding:

20

* - A Coalesce is foldable if all of its children are foldable

21

* - A BinaryExpression is foldable if its both left and right child are foldable

22

* - A Not, IsNull, or IsNotNull is foldable if its child is foldable

23

* - A Literal is foldable

24

* - A Cast or UnaryMinus is foldable if its child is foldable

25

*/

26

def foldable: Boolean = false

27

28

/**

29

* Returns true when the current expression always return the same result for fixed inputs from children.

30

* Note that this means that an expression should be considered as non-deterministic if:

31

* - if it relies on some mutable internal state, or

32

* - if it relies on some implicit input that is not part of the children expression list.

33

* - if it has non-deterministic child or children.

34

*/

35

def deterministic: Boolean = children.forall(_.deterministic)

36

37

/** Whether this expression can return null */

38

def nullable: Boolean

39

40

/** Set of attributes referenced by this expression */

41

def references: AttributeSet = AttributeSet(children.flatMap(_.references.iterator))

42

43

/** Returns the result of evaluating this expression on a given input Row */

44

def eval(input: InternalRow = null): Any

45

46

/** Data type of the result of evaluating this expression */

47

def dataType: DataType

48

49

/**

50

* Returns Java source code that can be used to generate the result of evaluating the expression.

51

* @param ctx a CodeGenContext

52

* @return GeneratedExpressionCode

53

*/

54

def gen(ctx: CodeGenContext): GeneratedExpressionCode

55

56

/**

57

* Returns Java source code that can be compiled to evaluate this expression.

58

* The default behavior is to call the eval method of the expression.

59

* @param ctx a CodeGenContext

60

* @param ev an GeneratedExpressionCode with unique terms

61

* @return Java source code

62

*/

63

protected def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String

64

65

/**

66

* Returns true if this expression and all its children have been resolved to a specific schema

67

* and input data types checking passed, and false if it still contains any unresolved placeholders.

68

*/

69

lazy val resolved: Boolean = childrenResolved && checkInputDataTypes().isSuccess

70

71

/**

72

* Returns true if all the children of this expression have been resolved to a specific schema

73

* and false if any still contains any unresolved placeholders.

74

*/

75

def childrenResolved: Boolean = children.forall(_.resolved)

76

77

/**

78

* Returns true when two expressions will always compute the same result, even if they differ

79

* cosmetically (i.e. capitalization of names in attributes may be different).

80

*/

81

def semanticEquals(other: Expression): Boolean

82

83

/**

84

* Returns the hash for this expression. Expressions that compute the same result, even if

85

* they differ cosmetically should return the same hash.

86

*/

87

def semanticHash(): Int

88

89

/**

90

* Returns a user-facing string representation of this expression's name.

91

* This should usually match the name of the function in SQL.

92

*/

93

def prettyName: String = getClass.getSimpleName.toLowerCase

94

95

/**

96

* Returns a user-facing string representation of this expression, i.e. does not have developer

97

* centric debugging information like the expression id.

98

*/

99

def prettyString: String

100

101

/** Validate input data types and return type check result */

102

def checkInputDataTypes(): TypeCheckResult = TypeCheckResult.TypeCheckSuccess

103

}

104

```

105

106

**Usage Examples:**

107

108

```scala

109

import org.apache.spark.sql.catalyst.expressions._

110

import org.apache.spark.sql.catalyst.InternalRow

111

import org.apache.spark.sql.types._

112

113

// Create expressions

114

val literal = Literal(42, IntegerType)

115

val attr = AttributeReference("x", IntegerType, nullable = false)()

116

val add = Add(attr, literal)

117

118

// Check expression properties

119

val foldable = literal.foldable // true (literals are foldable)

120

val addFoldable = add.foldable // false (contains non-literal)

121

val deterministic = add.deterministic // true (always same result for same input)

122

val nullable = add.nullable // false (int + int is never null)

123

val dataType = add.dataType // IntegerType

124

125

// Evaluate expression

126

val row = InternalRow(10) // x = 10

127

val result = add.eval(row) // 52 (10 + 42)

128

129

// Get referenced attributes

130

val refs = add.references // AttributeSet containing "x" attribute

131

```

132

133

### Expression Type Hierarchy

134

135

Base traits defining the structure of expression trees.

136

137

```scala { .api }

138

/**

139

* An expression that has no child expressions.

140

*/

141

trait LeafExpression extends Expression {

142

def children: Seq[Expression] = Nil

143

}

144

145

/**

146

* An expression that has one child expression.

147

*/

148

trait UnaryExpression extends Expression {

149

def child: Expression

150

def children: Seq[Expression] = child :: Nil

151

}

152

153

/**

154

* An expression that has two child expressions.

155

*/

156

trait BinaryExpression extends Expression {

157

def left: Expression

158

def right: Expression

159

def children: Seq[Expression] = Seq(left, right)

160

}

161

162

/**

163

* A special case of BinaryExpression that requires two children to have the same output data type.

164

*/

165

trait BinaryOperator extends BinaryExpression {

166

/** Expected input type from both left and right child expressions */

167

def inputType: AbstractDataType

168

169

override def checkInputDataTypes(): TypeCheckResult = {

170

// Validates that both children have compatible types

171

}

172

}

173

174

/**

175

* An expression that has three child expressions.

176

*/

177

abstract class TernaryExpression extends Expression {

178

override def foldable: Boolean = children.forall(_.foldable)

179

override def nullable: Boolean = children.exists(_.nullable)

180

181

/**

182

* Called by default eval implementation. If subclass of TernaryExpression keep the default

183

* nullability, they can override this method to save null-check code.

184

*/

185

protected def nullSafeEval(input1: Any, input2: Any, input3: Any): Any =

186

sys.error(s"TernaryExpressions must override either eval or nullSafeEval")

187

}

188

```

189

190

**Usage Examples:**

191

192

```scala

193

import org.apache.spark.sql.catalyst.expressions._

194

import org.apache.spark.sql.types._

195

196

// Leaf expression (no children)

197

val constant = Literal(100, IntegerType)

198

val children1 = constant.children // Nil

199

200

// Unary expression (one child)

201

val negation = UnaryMinus(constant)

202

val children2 = negation.children // Seq(constant)

203

val child = negation.child // constant

204

205

// Binary expression (two children)

206

val attr = AttributeReference("y", IntegerType, nullable = false)()

207

val addition = Add(attr, constant)

208

val children3 = addition.children // Seq(attr, constant)

209

val left = addition.left // attr

210

val right = addition.right // constant

211

212

// Binary operator with type constraints

213

val comparison = EqualTo(attr, constant) // Both sides must be same type

214

val typeCheck = comparison.checkInputDataTypes() // Validates type compatibility

215

```

216

217

### Expression Traits and Mixins

218

219

Special traits that modify expression behavior and evaluation.

220

221

```scala { .api }

222

/**

223

* An expression that is not deterministic - returns different results for the same input.

224

*/

225

trait Nondeterministic extends Expression {

226

final override def deterministic: Boolean = false

227

final override def foldable: Boolean = false

228

229

/**

230

* Sets the initial values for this nondeterministic expression, called before evaluation.

231

*/

232

final def setInitialValues(): Unit = {

233

initInternal()

234

initialized = true

235

}

236

237

/** Initialize any internal state before evaluation */

238

protected def initInternal(): Unit

239

240

/** Internal evaluation after initialization */

241

protected def evalInternal(input: InternalRow): Any

242

243

private[this] var initialized = false

244

245

final override def eval(input: InternalRow = null): Any = {

246

require(initialized, "nondeterministic expression should be initialized before evaluate")

247

evalInternal(input)

248

}

249

}

250

251

/**

252

* An expression that is not supposed to be evaluated.

253

* Used for expressions that are only meaningful during analysis.

254

*/

255

trait Unevaluable extends Expression {

256

override def eval(input: InternalRow): Any = throw new UnsupportedOperationException

257

}

258

259

/**

260

* An expression that does not have code gen implemented and falls back to interpreted mode.

261

*/

262

trait CodegenFallback extends Expression {

263

// Falls back to interpreted evaluation when code generation is not available

264

}

265

266

/**

267

* Expressions that expect specific input types and can provide helpful error messages.

268

*/

269

trait ExpectsInputTypes extends Expression {

270

/** Expected input types for child expressions */

271

def inputTypes: Seq[AbstractDataType]

272

273

override def checkInputDataTypes(): TypeCheckResult = {

274

// Validates children match expected input types

275

}

276

}

277

```

278

279

**Usage Examples:**

280

281

```scala

282

import org.apache.spark.sql.catalyst.expressions._

283

import org.apache.spark.util.random.RandomSeed

284

285

// Nondeterministic expression

286

case class RandomExpression(seed: Long) extends LeafExpression with Nondeterministic {

287

private var random: scala.util.Random = _

288

289

def initialize(partitionIndex: Int): Unit = {

290

random = new scala.util.Random(seed + partitionIndex)

291

}

292

293

def eval(input: InternalRow): Any = random.nextDouble()

294

def dataType: DataType = DoubleType

295

def nullable: Boolean = false

296

}

297

298

// Unevaluable expression (used during analysis only)

299

case class UnresolvedAttribute(name: String) extends LeafExpression with Unevaluable {

300

def dataType: DataType = throw new UnresolvedException(this, "dataType")

301

def nullable: Boolean = throw new UnresolvedException(this, "nullable")

302

}

303

304

// Expression with expected input types

305

case class Substring(str: Expression, pos: Expression, len: Expression)

306

extends Expression with ExpectsInputTypes {

307

def inputTypes: Seq[AbstractDataType] = Seq(StringType, IntegerType, IntegerType)

308

def children: Seq[Expression] = Seq(str, pos, len)

309

def dataType: DataType = StringType

310

def nullable: Boolean = children.exists(_.nullable)

311

312

def eval(input: InternalRow): Any = {

313

val string = str.eval(input).asInstanceOf[UTF8String]

314

val position = pos.eval(input).asInstanceOf[Int]

315

val length = len.eval(input).asInstanceOf[Int]

316

string.substring(position - 1, position - 1 + length)

317

}

318

}

319

```

320

321

### Core Expression Types

322

323

Fundamental expression implementations for common operations.

324

325

```scala { .api }

326

/**

327

* Represents a constant value.

328

*/

329

case class Literal(value: Any, dataType: DataType) extends LeafExpression {

330

override def foldable: Boolean = true

331

override def nullable: Boolean = value == null

332

333

override def eval(input: InternalRow): Any = value

334

}

335

336

object Literal {

337

/** Create literal from Scala value with automatic type inference */

338

def apply(v: Any): Literal

339

340

/** Create null literal of specified type */

341

def create(v: Any, dataType: DataType): Literal

342

343

/** Default literals for primitive types */

344

val TrueLiteral: Literal = Literal(true, BooleanType)

345

val FalseLiteral: Literal = Literal(false, BooleanType)

346

}

347

348

/**

349

* Reference to an attribute/column in a relation.

350

*/

351

case class AttributeReference(

352

name: String,

353

dataType: DataType,

354

nullable: Boolean,

355

metadata: Metadata = Metadata.empty)(

356

val exprId: ExprId = NamedExpression.newExprId,

357

val qualifiers: Seq[String] = Nil) extends Attribute {

358

359

override def eval(input: InternalRow): Any =

360

throw new UnsupportedOperationException("AttributeReference.eval() not supported")

361

362

/** Create a new copy with different nullability */

363

def withNullability(newNullability: Boolean): AttributeReference

364

365

/** Create a new copy with different name */

366

def withName(newName: String): AttributeReference

367

368

/** Create a new copy with different data type */

369

def withDataType(newType: DataType): AttributeReference

370

}

371

372

/**

373

* Reference bound to a specific input position.

374

*/

375

case class BoundReference(ordinal: Int, dataType: DataType, nullable: Boolean)

376

extends LeafExpression {

377

378

override def eval(input: InternalRow): Any = input.get(ordinal, dataType)

379

}

380

381

/**

382

* Type conversion expression.

383

*/

384

case class Cast(child: Expression, dataType: DataType) extends UnaryExpression {

385

override def nullable: Boolean = child.nullable

386

387

override def eval(input: InternalRow): Any = {

388

val value = child.eval(input)

389

if (value == null) null else cast(value)

390

}

391

392

private def cast(value: Any): Any = {

393

// Type conversion logic based on source and target types

394

}

395

}

396

```

397

398

**Usage Examples:**

399

400

```scala

401

import org.apache.spark.sql.catalyst.expressions._

402

import org.apache.spark.sql.types._

403

404

// Create literals

405

val intLit = Literal(42, IntegerType)

406

val stringLit = Literal("hello", StringType)

407

val nullLit = Literal(null, StringType)

408

val autoLit = Literal("world") // Type inferred as StringType

409

410

// Evaluate literals

411

val value1 = intLit.eval() // 42

412

val value2 = stringLit.eval() // "hello"

413

val value3 = nullLit.eval() // null

414

415

// Create attribute references

416

val userId = AttributeReference("user_id", IntegerType, nullable = false)()

417

val userName = AttributeReference("user_name", StringType, nullable = true)()

418

419

// Create bound references (for compiled expressions)

420

val boundId = BoundReference(0, IntegerType, nullable = false) // First column

421

val boundName = BoundReference(1, StringType, nullable = true) // Second column

422

423

// Cast expressions

424

val stringToInt = Cast(stringLit, IntegerType)

425

val intToString = Cast(intLit, StringType)

426

427

// Build complex expressions

428

val userIdPlusOne = Add(userId, Literal(1, IntegerType))

429

val userGreeting = Concat(Seq(Literal("Hello "), userName))

430

```

431

432

### Arithmetic and Comparison Expressions

433

434

Mathematical and comparison operations.

435

436

```scala { .api }

437

// Arithmetic operations

438

case class Add(left: Expression, right: Expression) extends BinaryOperator {

439

def inputType: AbstractDataType = NumericType

440

override def dataType: DataType = left.dataType

441

442

override def eval(input: InternalRow): Any = {

443

val leftValue = left.eval(input)

444

val rightValue = right.eval(input)

445

if (leftValue == null || rightValue == null) null

446

else numeric.plus(leftValue, rightValue)

447

}

448

}

449

450

case class Subtract(left: Expression, right: Expression) extends BinaryOperator {

451

def inputType: AbstractDataType = NumericType

452

override def dataType: DataType = left.dataType

453

}

454

455

case class Multiply(left: Expression, right: Expression) extends BinaryOperator {

456

def inputType: AbstractDataType = NumericType

457

override def dataType: DataType = left.dataType

458

}

459

460

case class Divide(left: Expression, right: Expression) extends BinaryOperator {

461

def inputType: AbstractDataType = NumericType

462

override def dataType: DataType = left.dataType

463

}

464

465

case class UnaryMinus(child: Expression) extends UnaryExpression {

466

override def dataType: DataType = child.dataType

467

override def nullable: Boolean = child.nullable

468

}

469

470

// Comparison operations

471

case class EqualTo(left: Expression, right: Expression) extends BinaryOperator {

472

def inputType: AbstractDataType = AnyDataType

473

override def dataType: DataType = BooleanType

474

}

475

476

case class LessThan(left: Expression, right: Expression) extends BinaryOperator {

477

def inputType: AbstractDataType = TypeCollection.Ordered

478

override def dataType: DataType = BooleanType

479

}

480

481

case class GreaterThan(left: Expression, right: Expression) extends BinaryOperator {

482

def inputType: AbstractDataType = TypeCollection.Ordered

483

override def dataType: DataType = BooleanType

484

}

485

486

case class LessThanOrEqual(left: Expression, right: Expression) extends BinaryOperator {

487

def inputType: AbstractDataType = TypeCollection.Ordered

488

override def dataType: DataType = BooleanType

489

}

490

491

case class GreaterThanOrEqual(left: Expression, right: Expression) extends BinaryOperator {

492

def inputType: AbstractDataType = TypeCollection.Ordered

493

override def dataType: DataType = BooleanType

494

}

495

```

496

497

**Usage Examples:**

498

499

```scala

500

import org.apache.spark.sql.catalyst.expressions._

501

import org.apache.spark.sql.catalyst.InternalRow

502

import org.apache.spark.sql.types._

503

504

// Arithmetic expressions

505

val a = AttributeReference("a", IntegerType, nullable = false)()

506

val b = AttributeReference("b", IntegerType, nullable = false)()

507

508

val sum = Add(a, b)

509

val diff = Subtract(a, b)

510

val product = Multiply(a, b)

511

val quotient = Divide(a, b)

512

val negation = UnaryMinus(a)

513

514

// Evaluate arithmetic

515

val row = InternalRow(10, 3)

516

val sumResult = sum.eval(row) // 13

517

val diffResult = diff.eval(row) // 7

518

val productResult = product.eval(row) // 30

519

val quotientResult = quotient.eval(row) // 3.33...

520

val negResult = negation.eval(row) // -10

521

522

// Comparison expressions

523

val equal = EqualTo(a, b)

524

val less = LessThan(a, b)

525

val greater = GreaterThan(a, b)

526

val lessEqual = LessThanOrEqual(a, b)

527

val greaterEqual = GreaterThanOrEqual(a, b)

528

529

// Evaluate comparisons

530

val equalResult = equal.eval(row) // false (10 != 3)

531

val lessResult = less.eval(row) // false (10 > 3)

532

val greaterResult = greater.eval(row) // true (10 > 3)

533

534

// Complex arithmetic expressions

535

val formula = Add(Multiply(a, Literal(2)), Subtract(b, Literal(1)))

536

// Equivalent to: (a * 2) + (b - 1) = (10 * 2) + (3 - 1) = 20 + 2 = 22

537

val formulaResult = formula.eval(row) // 22

538

```

539

540

### Logical and String Expressions

541

542

Boolean logic and string manipulation operations.

543

544

```scala { .api }

545

// Logical operations

546

case class And(left: Expression, right: Expression) extends BinaryOperator {

547

def inputType: AbstractDataType = BooleanType

548

override def dataType: DataType = BooleanType

549

550

override def eval(input: InternalRow): Any = {

551

val leftValue = left.eval(input)

552

if (leftValue == false) false

553

else {

554

val rightValue = right.eval(input)

555

if (rightValue == false) false

556

else if (leftValue == null || rightValue == null) null

557

else true

558

}

559

}

560

}

561

562

case class Or(left: Expression, right: Expression) extends BinaryOperator {

563

def inputType: AbstractDataType = BooleanType

564

override def dataType: DataType = BooleanType

565

}

566

567

case class Not(child: Expression) extends UnaryExpression {

568

override def dataType: DataType = BooleanType

569

override def nullable: Boolean = child.nullable

570

571

override def eval(input: InternalRow): Any = {

572

val value = child.eval(input)

573

if (value == null) null else !value.asInstanceOf[Boolean]

574

}

575

}

576

577

// Null checking

578

case class IsNull(child: Expression) extends UnaryExpression {

579

override def dataType: DataType = BooleanType

580

override def nullable: Boolean = false

581

582

override def eval(input: InternalRow): Any = child.eval(input) == null

583

}

584

585

case class IsNotNull(child: Expression) extends UnaryExpression {

586

override def dataType: DataType = BooleanType

587

override def nullable: Boolean = false

588

589

override def eval(input: InternalRow): Any = child.eval(input) != null

590

}

591

592

// String operations

593

case class Concat(children: Seq[Expression]) extends Expression {

594

override def dataType: DataType = StringType

595

override def nullable: Boolean = children.exists(_.nullable)

596

597

override def eval(input: InternalRow): Any = {

598

val values = children.map(_.eval(input))

599

if (values.contains(null)) null

600

else UTF8String.concat(values.map(_.asInstanceOf[UTF8String]): _*)

601

}

602

}

603

604

case class Length(child: Expression) extends UnaryExpression {

605

override def dataType: DataType = IntegerType

606

override def nullable: Boolean = child.nullable

607

608

override def eval(input: InternalRow): Any = {

609

val value = child.eval(input)

610

if (value == null) null else value.asInstanceOf[UTF8String].numChars()

611

}

612

}

613

614

case class Upper(child: Expression) extends UnaryExpression {

615

override def dataType: DataType = StringType

616

override def nullable: Boolean = child.nullable

617

}

618

619

case class Lower(child: Expression) extends UnaryExpression {

620

override def dataType: DataType = StringType

621

override def nullable: Boolean = child.nullable

622

}

623

```

624

625

**Usage Examples:**

626

627

```scala

628

import org.apache.spark.sql.catalyst.expressions._

629

import org.apache.spark.sql.catalyst.InternalRow

630

import org.apache.spark.sql.types._

631

import org.apache.spark.unsafe.types.UTF8String

632

633

// Logical expressions

634

val active = AttributeReference("active", BooleanType, nullable = false)()

635

val verified = AttributeReference("verified", BooleanType, nullable = false)()

636

637

val bothTrue = And(active, verified)

638

val eitherTrue = Or(active, verified)

639

val notActive = Not(active)

640

641

// Evaluate logical expressions

642

val row = InternalRow(true, false)

643

val andResult = bothTrue.eval(row) // false (true AND false)

644

val orResult = eitherTrue.eval(row) // true (true OR false)

645

val notResult = notActive.eval(row) // false (NOT true)

646

647

// Null checking

648

val name = AttributeReference("name", StringType, nullable = true)()

649

val nameIsNull = IsNull(name)

650

val nameIsNotNull = IsNotNull(name)

651

652

val nullRow = InternalRow(UTF8String.fromString(null))

653

val nullCheck = nameIsNull.eval(nullRow) // true

654

val notNullCheck = nameIsNotNull.eval(nullRow) // false

655

656

// String operations

657

val firstName = AttributeReference("first_name", StringType, nullable = false)()

658

val lastName = AttributeReference("last_name", StringType, nullable = false)()

659

660

val fullName = Concat(Seq(firstName, Literal(" "), lastName))

661

val nameLength = Length(firstName)

662

val upperName = Upper(firstName)

663

val lowerName = Lower(firstName)

664

665

val nameRow = InternalRow(UTF8String.fromString("John"), UTF8String.fromString("Doe"))

666

val concatResult = fullName.eval(nameRow) // "John Doe"

667

val lengthResult = nameLength.eval(nameRow) // 4

668

val upperResult = upperName.eval(nameRow) // "JOHN"

669

val lowerResult = lowerName.eval(nameRow) // "john"

670

```

671

672

### Advanced Expression Features

673

674

Complex expression capabilities including user-defined functions and sort ordering.

675

676

```scala { .api }

677

/**

678

* User-defined function wrapper.

679

*/

680

case class ScalaUDF(

681

function: AnyRef,

682

dataType: DataType,

683

children: Seq[Expression],

684

inputTypes: Seq[DataType] = Nil,

685

udfName: Option[String] = None) extends Expression {

686

687

override def nullable: Boolean = true

688

689

override def eval(input: InternalRow): Any = {

690

val evaluatedChildren = children.map(_.eval(input))

691

// Invoke user function with evaluated arguments

692

}

693

}

694

695

/**

696

* Sort ordering specification for ORDER BY and sorting operations.

697

*/

698

case class SortOrder(

699

child: Expression,

700

direction: SortDirection,

701

nullOrdering: NullOrdering = NullOrdering(direction)) extends Expression with Unevaluable {

702

703

override def dataType: DataType = child.dataType

704

override def nullable: Boolean = child.nullable

705

override val children: Seq[Expression] = Seq(child)

706

}

707

708

sealed abstract class SortDirection

709

case object Ascending extends SortDirection

710

case object Descending extends SortDirection

711

712

case class NullOrdering(direction: SortDirection) {

713

val nullsFirst: Boolean = direction == Descending

714

val nullsLast: Boolean = direction == Ascending

715

}

716

717

/**

718

* Attribute set for efficient attribute operations.

719

*/

720

class AttributeSet private (private val baseSet: Set[ExprId]) {

721

/** Check if an attribute is contained in this set */

722

def contains(a: Attribute): Boolean

723

724

/** Add an attribute to this set */

725

def +(a: Attribute): AttributeSet

726

727

/** Remove an attribute from this set */

728

def -(a: Attribute): AttributeSet

729

730

/** Union with another AttributeSet */

731

def ++(other: AttributeSet): AttributeSet

732

733

/** Difference with another AttributeSet */

734

def --(other: AttributeSet): AttributeSet

735

736

/** Intersection with another AttributeSet */

737

def intersect(other: AttributeSet): AttributeSet

738

739

/** Check if this set is a subset of another */

740

def subsetOf(other: AttributeSet): Boolean

741

}

742

743

object AttributeSet {

744

/** Create AttributeSet from sequence of attributes */

745

def apply(attrs: Seq[Attribute]): AttributeSet

746

747

/** Create AttributeSet from individual attributes */

748

def apply(attrs: Attribute*): AttributeSet

749

750

/** Empty AttributeSet */

751

val empty: AttributeSet

752

}

753

```

754

755

**Usage Examples:**

756

757

```scala

758

import org.apache.spark.sql.catalyst.expressions._

759

import org.apache.spark.sql.types._

760

761

// User-defined function

762

val upperFunc = (s: String) => if (s == null) null else s.toUpperCase

763

val name = AttributeReference("name", StringType, nullable = true)()

764

val upperUDF = ScalaUDF(upperFunc, StringType, Seq(name), Seq(StringType), Some("upper"))

765

766

// Sort ordering

767

val id = AttributeReference("id", IntegerType, nullable = false)()

768

val score = AttributeReference("score", DoubleType, nullable = true)()

769

770

val idAsc = SortOrder(id, Ascending) // id ASC (nulls last)

771

val scoreDesc = SortOrder(score, Descending) // score DESC (nulls first)

772

773

// Custom null ordering

774

val scoreDescNullsLast = SortOrder(score, Descending, NullOrdering(Ascending))

775

776

// Attribute sets

777

val attr1 = AttributeReference("a", IntegerType, false)()

778

val attr2 = AttributeReference("b", StringType, true)()

779

val attr3 = AttributeReference("c", DoubleType, false)()

780

781

val set1 = AttributeSet(attr1, attr2)

782

val set2 = AttributeSet(attr2, attr3)

783

784

val contains = set1.contains(attr1) // true

785

val union = set1 ++ set2 // {attr1, attr2, attr3}

786

val intersection = set1.intersect(set2) // {attr2}

787

val difference = set1 -- set2 // {attr1}

788

val subset = set1.subsetOf(union) // true

789

790

// Complex expression with multiple operations

791

val complexExpr = And(

792

GreaterThan(score, Literal(80.0)),

793

IsNotNull(name)

794

)

795

796

// Expression referencing multiple attributes

797

val referencedAttrs = complexExpr.references // AttributeSet containing score and name

798

```