or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

config-utilities.mdcore-data-structures.mddata-types.mdexpressions.mdfunctions.mdindex.mdio-operations.mdselectors.mdsql-interface.md

expressions.mddocs/

0

# Expressions and Column Operations

1

2

Powerful expression system for column transformations, aggregations, and complex operations that work across DataFrame and LazyFrame. Expressions are composable and lazy, enabling complex column operations and transformations.

3

4

## Capabilities

5

6

### Expression Construction

7

8

Core functions for creating expressions that operate on columns and values.

9

10

```python { .api }

11

def col(name: str | DataType) -> Expr:

12

"""

13

Create column expression.

14

15

Parameters:

16

- name: Column name or data type selector

17

18

Returns:

19

Column expression

20

"""

21

22

def lit(value: Any, dtype: DataType | None = None) -> Expr:

23

"""

24

Create literal value expression.

25

26

Parameters:

27

- value: Literal value

28

- dtype: Optional data type

29

30

Returns:

31

Literal expression

32

"""

33

34

def when(predicate: Expr) -> When:

35

"""

36

Create conditional expression.

37

38

Parameters:

39

- predicate: Boolean expression condition

40

41

Returns:

42

When object for then/otherwise chaining

43

"""

44

45

class When:

46

def then(self, statement: Expr) -> Then:

47

"""Value when condition is true."""

48

49

class Then:

50

def otherwise(self, statement: Expr) -> Expr:

51

"""Value when condition is false."""

52

```

53

54

### Expression Class

55

56

The main Expression class with methods for column operations, transformations, and aggregations.

57

58

```python { .api }

59

class Expr:

60

def alias(self, name: str) -> Expr:

61

"""

62

Assign a name to the expression.

63

64

Parameters:

65

- name: New column name

66

67

Returns:

68

Aliased expression

69

"""

70

71

def cast(self, dtype: DataType | type[Any], *, strict: bool = True) -> Expr:

72

"""

73

Cast expression to different data type.

74

75

Parameters:

76

- dtype: Target data type

77

- strict: Whether to raise on cast failure

78

79

Returns:

80

Cast expression

81

"""

82

83

def filter(self, predicate: Expr) -> Expr:

84

"""

85

Filter expression based on predicate.

86

87

Parameters:

88

- predicate: Boolean expression for filtering

89

90

Returns:

91

Filtered expression

92

"""

93

94

def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Expr:

95

"""

96

Sort expression values.

97

98

Parameters:

99

- descending: Sort in descending order

100

- nulls_last: Place nulls at end

101

102

Returns:

103

Sorted expression

104

"""

105

106

def reverse(self) -> Expr:

107

"""Reverse expression values."""

108

109

def unique(self, *, maintain_order: bool = False) -> Expr:

110

"""

111

Get unique values.

112

113

Parameters:

114

- maintain_order: Maintain original order

115

116

Returns:

117

Expression with unique values

118

"""

119

120

def drop_nulls(self) -> Expr:

121

"""Drop null values from expression."""

122

123

def fill_null(self, value: Any | Expr, *, strategy: FillNullStrategy | None = None) -> Expr:

124

"""

125

Fill null values.

126

127

Parameters:

128

- value: Fill value or expression

129

- strategy: Fill strategy ("forward", "backward", "min", "max", "mean", "zero", "one")

130

131

Returns:

132

Expression with nulls filled

133

"""

134

135

def fill_nan(self, value: Any | Expr) -> Expr:

136

"""Fill NaN values."""

137

138

def is_null(self) -> Expr:

139

"""Check for null values."""

140

141

def is_not_null(self) -> Expr:

142

"""Check for non-null values."""

143

144

def is_nan(self) -> Expr:

145

"""Check for NaN values."""

146

147

def is_not_nan(self) -> Expr:

148

"""Check for non-NaN values."""

149

150

def is_finite(self) -> Expr:

151

"""Check for finite values."""

152

153

def is_infinite(self) -> Expr:

154

"""Check for infinite values."""

155

```

156

157

### Aggregation Methods

158

159

Statistical and aggregation operations on expressions.

160

161

```python { .api }

162

class Expr:

163

def sum(self) -> Expr:

164

"""Sum all values."""

165

166

def mean(self) -> Expr:

167

"""Calculate mean."""

168

169

def median(self) -> Expr:

170

"""Calculate median."""

171

172

def max(self) -> Expr:

173

"""Get maximum value."""

174

175

def min(self) -> Expr:

176

"""Get minimum value."""

177

178

def std(self, ddof: int = 1) -> Expr:

179

"""

180

Calculate standard deviation.

181

182

Parameters:

183

- ddof: Delta degrees of freedom

184

185

Returns:

186

Standard deviation expression

187

"""

188

189

def var(self, ddof: int = 1) -> Expr:

190

"""

191

Calculate variance.

192

193

Parameters:

194

- ddof: Delta degrees of freedom

195

196

Returns:

197

Variance expression

198

"""

199

200

def quantile(self, quantile: float | Expr, *, interpolation: RollingInterpolationMethod = "nearest") -> Expr:

201

"""

202

Calculate quantile.

203

204

Parameters:

205

- quantile: Quantile value (0.0 to 1.0)

206

- interpolation: Interpolation method

207

208

Returns:

209

Quantile expression

210

"""

211

212

def count(self) -> Expr:

213

"""Count non-null values."""

214

215

def n_unique(self) -> Expr:

216

"""Count unique values."""

217

218

def null_count(self) -> Expr:

219

"""Count null values."""

220

221

def first(self) -> Expr:

222

"""Get first value."""

223

224

def last(self) -> Expr:

225

"""Get last value."""

226

227

def head(self, n: int | Expr = 10) -> Expr:

228

"""Get first n values."""

229

230

def tail(self, n: int | Expr = 10) -> Expr:

231

"""Get last n values."""

232

```

233

234

### Arithmetic Operations

235

236

Mathematical operations on expressions.

237

238

```python { .api }

239

class Expr:

240

def __add__(self, other: Any) -> Expr:

241

"""Addition operator (+)."""

242

243

def __sub__(self, other: Any) -> Expr:

244

"""Subtraction operator (-)."""

245

246

def __mul__(self, other: Any) -> Expr:

247

"""Multiplication operator (*)."""

248

249

def __truediv__(self, other: Any) -> Expr:

250

"""Division operator (/)."""

251

252

def __floordiv__(self, other: Any) -> Expr:

253

"""Floor division operator (//)."""

254

255

def __mod__(self, other: Any) -> Expr:

256

"""Modulo operator (%)."""

257

258

def __pow__(self, other: Any) -> Expr:

259

"""Power operator (**)."""

260

261

def abs(self) -> Expr:

262

"""Absolute value."""

263

264

def sqrt(self) -> Expr:

265

"""Square root."""

266

267

def ceil(self) -> Expr:

268

"""Ceiling function."""

269

270

def floor(self) -> Expr:

271

"""Floor function."""

272

273

def round(self, decimals: int | Expr = 0) -> Expr:

274

"""

275

Round to specified decimal places.

276

277

Parameters:

278

- decimals: Number of decimal places

279

280

Returns:

281

Rounded expression

282

"""

283

284

def clip(self, lower_bound: Any | Expr | None = None, upper_bound: Any | Expr | None = None) -> Expr:

285

"""

286

Clip values to specified bounds.

287

288

Parameters:

289

- lower_bound: Lower bound

290

- upper_bound: Upper bound

291

292

Returns:

293

Clipped expression

294

"""

295

```

296

297

### Comparison Operations

298

299

Comparison and logical operations on expressions.

300

301

```python { .api }

302

class Expr:

303

def __eq__(self, other: Any) -> Expr:

304

"""Equality operator (==)."""

305

306

def __ne__(self, other: Any) -> Expr:

307

"""Not equal operator (!=)."""

308

309

def __lt__(self, other: Any) -> Expr:

310

"""Less than operator (<)."""

311

312

def __le__(self, other: Any) -> Expr:

313

"""Less than or equal operator (<=)."""

314

315

def __gt__(self, other: Any) -> Expr:

316

"""Greater than operator (>)."""

317

318

def __ge__(self, other: Any) -> Expr:

319

"""Greater than or equal operator (>=)."""

320

321

def __and__(self, other: Any) -> Expr:

322

"""Logical AND operator (&)."""

323

324

def __or__(self, other: Any) -> Expr:

325

"""Logical OR operator (|)."""

326

327

def __xor__(self, other: Any) -> Expr:

328

"""Logical XOR operator (^)."""

329

330

def __invert__(self) -> Expr:

331

"""Logical NOT operator (~)."""

332

333

def is_in(self, other: Any) -> Expr:

334

"""Check if values are in collection."""

335

336

def is_between(self, lower_bound: Any | Expr, upper_bound: Any | Expr, closed: ClosedInterval = "both") -> Expr:

337

"""

338

Check if values are between bounds.

339

340

Parameters:

341

- lower_bound: Lower bound

342

- upper_bound: Upper bound

343

- closed: Include bounds ("both", "left", "right", "none")

344

345

Returns:

346

Boolean expression

347

"""

348

```

349

350

### String Operations

351

352

String manipulation methods available on string expressions.

353

354

```python { .api }

355

class Expr:

356

@property

357

def str(self) -> ExprStringNameSpace:

358

"""Access string methods."""

359

360

class ExprStringNameSpace:

361

def len_bytes(self) -> Expr:

362

"""Get byte length of strings."""

363

364

def len_chars(self) -> Expr:

365

"""Get character length of strings."""

366

367

def contains(self, pattern: str | Expr, *, literal: bool = False, strict: bool = True) -> Expr:

368

"""

369

Check if string contains pattern.

370

371

Parameters:

372

- pattern: Pattern to search for

373

- literal: Treat pattern as literal string

374

- strict: Raise on invalid regex

375

376

Returns:

377

Boolean expression

378

"""

379

380

def starts_with(self, prefix: str | Expr) -> Expr:

381

"""Check if string starts with prefix."""

382

383

def ends_with(self, suffix: str | Expr) -> Expr:

384

"""Check if string ends with suffix."""

385

386

def to_lowercase(self) -> Expr:

387

"""Convert to lowercase."""

388

389

def to_uppercase(self) -> Expr:

390

"""Convert to uppercase."""

391

392

def strip_chars(self, characters: str | None = None) -> Expr:

393

"""Strip characters from both ends."""

394

395

def split(self, by: str | Expr, *, inclusive: bool = False) -> Expr:

396

"""

397

Split string by delimiter.

398

399

Parameters:

400

- by: Delimiter

401

- inclusive: Include delimiter in result

402

403

Returns:

404

List expression

405

"""

406

407

def slice(self, offset: int | Expr, length: int | Expr | None = None) -> Expr:

408

"""

409

Slice string.

410

411

Parameters:

412

- offset: Start position

413

- length: Slice length

414

415

Returns:

416

Sliced string expression

417

"""

418

419

def replace(self, pattern: str | Expr, value: str | Expr, *, literal: bool = False, n: int = 1) -> Expr:

420

"""

421

Replace pattern in string.

422

423

Parameters:

424

- pattern: Pattern to replace

425

- value: Replacement value

426

- literal: Treat pattern as literal

427

- n: Maximum number of replacements

428

429

Returns:

430

String expression with replacements

431

"""

432

```

433

434

### Temporal Operations

435

436

Date and time operations on temporal expressions.

437

438

```python { .api }

439

class Expr:

440

@property

441

def dt(self) -> ExprDateTimeNameSpace:

442

"""Access datetime methods."""

443

444

class ExprDateTimeNameSpace:

445

def year(self) -> Expr:

446

"""Extract year."""

447

448

def month(self) -> Expr:

449

"""Extract month."""

450

451

def day(self) -> Expr:

452

"""Extract day."""

453

454

def hour(self) -> Expr:

455

"""Extract hour."""

456

457

def minute(self) -> Expr:

458

"""Extract minute."""

459

460

def second(self) -> Expr:

461

"""Extract second."""

462

463

def weekday(self) -> Expr:

464

"""Get weekday (0=Monday, 6=Sunday)."""

465

466

def week(self) -> Expr:

467

"""Get week number."""

468

469

def strftime(self, format: str) -> Expr:

470

"""

471

Format datetime as string.

472

473

Parameters:

474

- format: Format string

475

476

Returns:

477

Formatted string expression

478

"""

479

480

def truncate(self, every: str | timedelta) -> Expr:

481

"""

482

Truncate to specified time unit.

483

484

Parameters:

485

- every: Time unit ("1d", "1h", "1m", "1s", etc.)

486

487

Returns:

488

Truncated datetime expression

489

"""

490

491

def with_time_unit(self, time_unit: TimeUnit) -> Expr:

492

"""

493

Change time unit.

494

495

Parameters:

496

- time_unit: New time unit ("ns", "us", "ms", "s")

497

498

Returns:

499

Expression with new time unit

500

"""

501

```

502

503

### List Operations

504

505

Operations on list/array expressions.

506

507

```python { .api }

508

class Expr:

509

@property

510

def list(self) -> ExprListNameSpace:

511

"""Access list methods."""

512

513

class ExprListNameSpace:

514

def len(self) -> Expr:

515

"""Get list length."""

516

517

def sum(self) -> Expr:

518

"""Sum list elements."""

519

520

def max(self) -> Expr:

521

"""Get maximum element."""

522

523

def min(self) -> Expr:

524

"""Get minimum element."""

525

526

def mean(self) -> Expr:

527

"""Calculate mean of elements."""

528

529

def first(self) -> Expr:

530

"""Get first element."""

531

532

def last(self) -> Expr:

533

"""Get last element."""

534

535

def get(self, index: int | Expr, *, null_on_oob: bool = True) -> Expr:

536

"""

537

Get element at index.

538

539

Parameters:

540

- index: Element index

541

- null_on_oob: Return null if out of bounds

542

543

Returns:

544

Element expression

545

"""

546

547

def slice(self, offset: int | Expr, length: int | Expr | None = None) -> Expr:

548

"""Slice list."""

549

550

def head(self, n: int | Expr = 5) -> Expr:

551

"""Get first n elements."""

552

553

def tail(self, n: int | Expr = 5) -> Expr:

554

"""Get last n elements."""

555

556

def contains(self, item: Any | Expr) -> Expr:

557

"""Check if list contains item."""

558

559

def explode(self) -> Expr:

560

"""Explode list elements to separate rows."""

561

```

562

563

## Usage Examples

564

565

### Basic Expression Usage

566

567

```python

568

import polars as pl

569

570

df = pl.DataFrame({

571

"name": ["Alice", "Bob", "Charlie"],

572

"age": [25, 30, 35],

573

"salary": [50000, 60000, 70000]

574

})

575

576

# Column selection and transformation

577

result = df.select([

578

pl.col("name"),

579

pl.col("age").alias("years"),

580

(pl.col("salary") / 1000).alias("salary_k"),

581

pl.col("salary").cast(pl.Float64)

582

])

583

584

# Conditional expressions

585

result = df.with_columns([

586

pl.when(pl.col("age") > 30)

587

.then(pl.lit("Senior"))

588

.otherwise(pl.lit("Junior"))

589

.alias("level")

590

])

591

```

592

593

### String Operations

594

595

```python

596

df = pl.DataFrame({

597

"text": ["Hello World", "PYTHON programming", "Data Science"]

598

})

599

600

result = df.select([

601

pl.col("text"),

602

pl.col("text").str.to_lowercase().alias("lower"),

603

pl.col("text").str.len_chars().alias("length"),

604

pl.col("text").str.contains("o").alias("has_o"),

605

pl.col("text").str.split(" ").alias("words")

606

])

607

```

608

609

### Temporal Operations

610

611

```python

612

df = pl.DataFrame({

613

"timestamp": ["2023-01-15 10:30:00", "2023-02-20 14:45:00", "2023-03-10 09:15:00"]

614

}).with_columns([

615

pl.col("timestamp").str.strptime(pl.Datetime, "%Y-%m-%d %H:%M:%S")

616

])

617

618

result = df.select([

619

pl.col("timestamp"),

620

pl.col("timestamp").dt.year().alias("year"),

621

pl.col("timestamp").dt.month().alias("month"),

622

pl.col("timestamp").dt.weekday().alias("weekday"),

623

pl.col("timestamp").dt.strftime("%Y-%m").alias("year_month")

624

])

625

```

626

627

### Aggregations and Window Functions

628

629

```python

630

df = pl.DataFrame({

631

"group": ["A", "A", "B", "B", "C"],

632

"value": [10, 20, 15, 25, 30]

633

})

634

635

# Group aggregations

636

result = df.group_by("group").agg([

637

pl.col("value").sum().alias("total"),

638

pl.col("value").mean().alias("average"),

639

pl.col("value").max() - pl.col("value").min().alias("range")

640

])

641

642

# Window functions

643

result = df.with_columns([

644

pl.col("value").sum().over("group").alias("group_total"),

645

pl.col("value").rank().over("group").alias("rank_in_group")

646

])

647

```

648

649

### Complex Expressions

650

651

```python

652

# Chaining multiple operations

653

result = df.select([

654

pl.col("name"),

655

pl.col("age")

656

.cast(pl.Float64)

657

.round(0)

658

.clip(0, 100)

659

.alias("age_clipped"),

660

661

# Complex conditional logic

662

pl.when((pl.col("age") >= 18) & (pl.col("salary") > 55000))

663

.then(pl.lit("Eligible"))

664

.when(pl.col("age") >= 18)

665

.then(pl.lit("Age OK"))

666

.otherwise(pl.lit("Not Eligible"))

667

.alias("status"),

668

669

# Mathematical operations

670

((pl.col("salary") * 1.1).round(2)).alias("salary_with_raise")

671

])

672

```