or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

ai-ml.mdcatalog.mddata-io.mddataframe-operations.mdexpressions.mdindex.mdsession.mdsql.mdudf.md

expressions.mddocs/

0

# Expressions and Functions

1

2

Column operations and computational expressions for data transformation. Expressions provide type-safe, optimizable operations that can be applied to DataFrame columns with support for complex nested operations and function composition.

3

4

## Capabilities

5

6

### Core Expression Functions

7

8

Create and manipulate column expressions for DataFrame operations.

9

10

```python { .api }

11

def col(name: str) -> Expression:

12

"""

13

Reference a DataFrame column by name.

14

15

Parameters:

16

- name: Column name to reference

17

18

Returns:

19

Expression: Column expression

20

"""

21

22

def lit(value: Any) -> Expression:

23

"""

24

Create literal value expression.

25

26

Parameters:

27

- value: Literal value (number, string, boolean, etc.)

28

29

Returns:

30

Expression: Literal expression

31

"""

32

33

def coalesce(*exprs: Expression) -> Expression:

34

"""

35

Return first non-null value from expressions.

36

37

Parameters:

38

- exprs: Expressions to evaluate in order

39

40

Returns:

41

Expression: Coalesced expression

42

"""

43

```

44

45

### Conditional Expressions

46

47

Create conditional logic with when/otherwise patterns.

48

49

```python { .api }

50

class Expression:

51

def when(self, predicate: Expression) -> Expression:

52

"""

53

Create conditional expression.

54

55

Parameters:

56

- predicate: Boolean condition

57

58

Returns:

59

Expression: Conditional expression

60

"""

61

62

def otherwise(self, expr: Expression) -> Expression:

63

"""

64

Provide else clause for conditional expression.

65

66

Parameters:

67

- expr: Expression to use when condition is false

68

69

Returns:

70

Expression: Complete conditional expression

71

"""

72

73

def when(predicate: Expression) -> Expression:

74

"""

75

Start conditional expression chain.

76

77

Parameters:

78

- predicate: Boolean condition

79

80

Returns:

81

Expression: Conditional expression builder

82

"""

83

```

84

85

### Complex Data Types

86

87

Work with arrays, lists, and nested structures.

88

89

```python { .api }

90

def list_(*exprs: Expression) -> Expression:

91

"""

92

Create list expression from multiple expressions.

93

94

Parameters:

95

- exprs: Expressions to combine into list

96

97

Returns:

98

Expression: List expression

99

"""

100

101

def struct(**kwargs: Expression) -> Expression:

102

"""

103

Create struct expression from named expressions.

104

105

Parameters:

106

- kwargs: Named expressions for struct fields

107

108

Returns:

109

Expression: Struct expression

110

"""

111

112

def element(n: int) -> Expression:

113

"""

114

Extract element from array/list by index.

115

116

Parameters:

117

- n: Index to extract (0-based)

118

119

Returns:

120

Expression: Element extraction expression

121

"""

122

```

123

124

### String Operations

125

126

String manipulation and text processing functions.

127

128

```python { .api }

129

class Expression:

130

def str_contains(self, pattern: str, regex: bool = False) -> Expression:

131

"""

132

Check if string contains pattern.

133

134

Parameters:

135

- pattern: Pattern to search for

136

- regex: Whether pattern is regular expression

137

138

Returns:

139

Expression: Boolean expression

140

"""

141

142

def str_length(self) -> Expression:

143

"""

144

Get string length.

145

146

Returns:

147

Expression: String length expression

148

"""

149

150

def str_upper(self) -> Expression:

151

"""

152

Convert string to uppercase.

153

154

Returns:

155

Expression: Uppercase string expression

156

"""

157

158

def str_lower(self) -> Expression:

159

"""

160

Convert string to lowercase.

161

162

Returns:

163

Expression: Lowercase string expression

164

"""

165

166

def str_slice(self, start: int, end: Optional[int] = None) -> Expression:

167

"""

168

Extract substring.

169

170

Parameters:

171

- start: Start index

172

- end: End index (end of string if None)

173

174

Returns:

175

Expression: Substring expression

176

"""

177

```

178

179

### Mathematical Operations

180

181

Arithmetic and mathematical functions.

182

183

```python { .api }

184

class Expression:

185

def __add__(self, other: Union[Expression, Any]) -> Expression:

186

"""Addition operation."""

187

188

def __sub__(self, other: Union[Expression, Any]) -> Expression:

189

"""Subtraction operation."""

190

191

def __mul__(self, other: Union[Expression, Any]) -> Expression:

192

"""Multiplication operation."""

193

194

def __truediv__(self, other: Union[Expression, Any]) -> Expression:

195

"""Division operation."""

196

197

def __mod__(self, other: Union[Expression, Any]) -> Expression:

198

"""Modulo operation."""

199

200

def abs(self) -> Expression:

201

"""Absolute value."""

202

203

def ceil(self) -> Expression:

204

"""Ceiling function."""

205

206

def floor(self) -> Expression:

207

"""Floor function."""

208

209

def round(self, decimals: int = 0) -> Expression:

210

"""Round to specified decimal places."""

211

212

def sqrt(self) -> Expression:

213

"""Square root."""

214

215

def sin(self) -> Expression:

216

"""Sine function."""

217

218

def cos(self) -> Expression:

219

"""Cosine function."""

220

221

def tan(self) -> Expression:

222

"""Tangent function."""

223

```

224

225

### Comparison Operations

226

227

Comparison and logical operations.

228

229

```python { .api }

230

class Expression:

231

def __eq__(self, other: Union[Expression, Any]) -> Expression:

232

"""Equality comparison."""

233

234

def __ne__(self, other: Union[Expression, Any]) -> Expression:

235

"""Inequality comparison."""

236

237

def __lt__(self, other: Union[Expression, Any]) -> Expression:

238

"""Less than comparison."""

239

240

def __le__(self, other: Union[Expression, Any]) -> Expression:

241

"""Less than or equal comparison."""

242

243

def __gt__(self, other: Union[Expression, Any]) -> Expression:

244

"""Greater than comparison."""

245

246

def __ge__(self, other: Union[Expression, Any]) -> Expression:

247

"""Greater than or equal comparison."""

248

249

def __and__(self, other: Expression) -> Expression:

250

"""Logical AND operation."""

251

252

def __or__(self, other: Expression) -> Expression:

253

"""Logical OR operation."""

254

255

def __invert__(self) -> Expression:

256

"""Logical NOT operation."""

257

258

def isin(self, values: List[Any]) -> Expression:

259

"""Check if value is in list."""

260

261

def is_null(self) -> Expression:

262

"""Check if value is null."""

263

264

def is_not_null(self) -> Expression:

265

"""Check if value is not null."""

266

```

267

268

### Type Operations

269

270

Type casting and validation.

271

272

```python { .api }

273

class Expression:

274

def cast(self, dtype: DataType) -> Expression:

275

"""

276

Cast expression to different data type.

277

278

Parameters:

279

- dtype: Target data type

280

281

Returns:

282

Expression: Cast expression

283

"""

284

285

def try_cast(self, dtype: DataType) -> Expression:

286

"""

287

Attempt to cast, returning null on failure.

288

289

Parameters:

290

- dtype: Target data type

291

292

Returns:

293

Expression: Safe cast expression

294

"""

295

```

296

297

### Aggregation Expressions

298

299

Create aggregation expressions for group operations.

300

301

```python { .api }

302

class Expression:

303

def sum(self) -> Expression:

304

"""Sum aggregation."""

305

306

def mean(self) -> Expression:

307

"""Mean aggregation."""

308

309

def min(self) -> Expression:

310

"""Minimum aggregation."""

311

312

def max(self) -> Expression:

313

"""Maximum aggregation."""

314

315

def count(self) -> Expression:

316

"""Count aggregation."""

317

318

def std(self) -> Expression:

319

"""Standard deviation aggregation."""

320

321

def first(self) -> Expression:

322

"""First value aggregation."""

323

324

def last(self) -> Expression:

325

"""Last value aggregation."""

326

327

def list_agg(self) -> Expression:

328

"""Aggregate into list."""

329

```

330

331

### DateTime Operations

332

333

Date and time manipulation functions.

334

335

```python { .api }

336

def interval(value: int, unit: str) -> Expression:

337

"""

338

Create time interval expression.

339

340

Parameters:

341

- value: Interval value

342

- unit: Time unit ('days', 'hours', 'minutes', 'seconds')

343

344

Returns:

345

Expression: Interval expression

346

"""

347

348

class Expression:

349

def dt_year(self) -> Expression:

350

"""Extract year from datetime."""

351

352

def dt_month(self) -> Expression:

353

"""Extract month from datetime."""

354

355

def dt_day(self) -> Expression:

356

"""Extract day from datetime."""

357

358

def dt_hour(self) -> Expression:

359

"""Extract hour from datetime."""

360

361

def dt_minute(self) -> Expression:

362

"""Extract minute from datetime."""

363

364

def dt_second(self) -> Expression:

365

"""Extract second from datetime."""

366

367

def dt_date(self) -> Expression:

368

"""Extract date part from datetime."""

369

```

370

371

### Window Functions

372

373

Window-based operations and rankings.

374

375

```python { .api }

376

class Expression:

377

def over(self, window: Window) -> Expression:

378

"""

379

Apply expression over window.

380

381

Parameters:

382

- window: Window specification

383

384

Returns:

385

Expression: Windowed expression

386

"""

387

388

def row_number() -> Expression:

389

"""Row number within partition."""

390

391

def rank() -> Expression:

392

"""Rank within partition."""

393

394

def dense_rank() -> Expression:

395

"""Dense rank within partition."""

396

397

class Window:

398

def __init__(

399

self,

400

partition_by: Optional[List[Expression]] = None,

401

order_by: Optional[List[Expression]] = None

402

):

403

"""

404

Create window specification.

405

406

Parameters:

407

- partition_by: Columns to partition by

408

- order_by: Columns to order by within partition

409

"""

410

411

def rows_between(self, start: int, end: int) -> "Window":

412

"""

413

Define row-based frame boundaries.

414

415

Parameters:

416

- start: Start row offset (negative for preceding rows)

417

- end: End row offset (positive for following rows)

418

419

Returns:

420

Window: Window with row frame specification

421

"""

422

423

def range_between(self, start: Expression, end: Expression) -> "Window":

424

"""

425

Define range-based frame boundaries.

426

427

Parameters:

428

- start: Start range value

429

- end: End range value

430

431

Returns:

432

Window: Window with range frame specification

433

"""

434

435

# Window frame constants

436

unbounded_preceding: Expression # Unbounded preceding boundary

437

unbounded_following: Expression # Unbounded following boundary

438

current_row: Expression # Current row boundary

439

```

440

441

### Built-in Functions

442

443

Additional utility functions for data processing.

444

445

```python { .api }

446

def columns_sum(*cols: ColumnInputType) -> Expression:

447

"""Sum across multiple columns."""

448

449

def columns_mean(*cols: ColumnInputType) -> Expression:

450

"""Mean across multiple columns."""

451

452

def columns_min(*cols: ColumnInputType) -> Expression:

453

"""Minimum across multiple columns."""

454

455

def columns_max(*cols: ColumnInputType) -> Expression:

456

"""Maximum across multiple columns."""

457

458

def monotonically_increasing_id() -> Expression:

459

"""Generate monotonically increasing IDs."""

460

461

def format(template: str, *args: Expression) -> Expression:

462

"""

463

Format string with expression arguments.

464

465

Parameters:

466

- template: Format string template

467

- args: Expressions to format into template

468

469

Returns:

470

Expression: Formatted string expression

471

"""

472

```

473

474

### Embedding Operations

475

476

Vector embedding operations for similarity calculations.

477

478

```python { .api }

479

class Expression:

480

@property

481

def embedding(self) -> "ExpressionEmbeddingNamespace":

482

"""Access embedding operations namespace."""

483

484

class ExpressionEmbeddingNamespace:

485

def cosine_distance(self, other: Expression) -> Expression:

486

"""

487

Calculate cosine distance between embedding vectors.

488

489

Parameters:

490

- other: Another embedding expression to compare against

491

492

Returns:

493

Expression: Cosine distance (0.0 = identical, 2.0 = opposite)

494

"""

495

```

496

497

## Usage Examples

498

499

### Basic Expression Operations

500

```python

501

from daft import col, lit, when

502

503

# Arithmetic operations

504

df.select(

505

col("price") * col("quantity").alias("total"),

506

(col("price") * 1.1).alias("price_with_tax"),

507

col("amount") + lit(100).alias("adjusted_amount")

508

)

509

510

# String operations

511

df.select(

512

col("name").str_upper().alias("name_upper"),

513

col("email").str_contains("@gmail.com").alias("is_gmail"),

514

col("description").str_length().alias("desc_length")

515

)

516

```

517

518

### Conditional Logic

519

```python

520

# Conditional expressions

521

df.select(

522

when(col("age") >= 18)

523

.then(lit("Adult"))

524

.otherwise(lit("Minor"))

525

.alias("age_group"),

526

527

when(col("score") >= 90).then(lit("A"))

528

.when(col("score") >= 80).then(lit("B"))

529

.when(col("score") >= 70).then(lit("C"))

530

.otherwise(lit("F"))

531

.alias("grade")

532

)

533

```

534

535

### Complex Data Operations

536

```python

537

from daft import list_, struct, element

538

539

# Working with arrays and structs

540

df.select(

541

list_(col("item1"), col("item2"), col("item3")).alias("items"),

542

struct(

543

name=col("name"),

544

age=col("age"),

545

active=col("is_active")

546

).alias("person"),

547

element(0).alias("first_item") # Extract first element from array

548

)

549

```

550

551

### Aggregation with Expressions

552

```python

553

# Complex aggregations

554

df.groupby("category").agg(

555

col("price").mean().alias("avg_price"),

556

col("quantity").sum().alias("total_quantity"),

557

(col("price") * col("quantity")).sum().alias("total_revenue"),

558

col("name").count().alias("item_count")

559

)

560

```

561

562

### DateTime Processing

563

```python

564

from daft import interval

565

566

# Date/time operations

567

df.select(

568

col("created_at").dt_year().alias("year"),

569

col("created_at").dt_month().alias("month"),

570

(col("created_at") + interval(30, "days")).alias("future_date"),

571

col("timestamp").dt_date().alias("date_only")

572

)

573

```

574

575

### Window Functions

576

```python

577

from daft.window import Window

578

from daft.functions import row_number, rank

579

580

# Window operations

581

window = Window(

582

partition_by=[col("department")],

583

order_by=[col("salary").desc()]

584

)

585

586

df.select(

587

col("name"),

588

col("department"),

589

col("salary"),

590

row_number().over(window).alias("rank_in_dept"),

591

col("salary").sum().over(window).alias("dept_total_salary")

592

)

593

```

594

595

## Expression Visitor Pattern

596

597

```python { .api }

598

class ExpressionVisitor:

599

"""Visitor pattern for traversing expression trees."""

600

601

def visit(self, expr: Expression) -> Any:

602

"""Visit expression node."""

603

604

def visit_column(self, expr: Expression) -> Any:

605

"""Visit column reference."""

606

607

def visit_literal(self, expr: Expression) -> Any:

608

"""Visit literal value."""

609

610

def visit_function(self, expr: Expression) -> Any:

611

"""Visit function call."""

612

613

class ExpressionsProjection:

614

"""Collection of expressions for projection operations."""

615

616

def __init__(self, exprs: List[Expression]): ...

617

618

def to_list(self) -> List[Expression]:

619

"""Convert to list of expressions."""

620

```

621

622

## Types

623

624

```python { .api }

625

ColumnInputType = Union[str, Expression]

626

```