0
# Expressions and Column Operations
1
2
Powerful expression system for column transformations, aggregations, and complex operations that work across DataFrame and LazyFrame. Expressions are composable and lazy, enabling complex column operations and transformations.
3
4
## Capabilities
5
6
### Expression Construction
7
8
Core functions for creating expressions that operate on columns and values.
9
10
```python { .api }
11
def col(name: str | DataType) -> Expr:
12
"""
13
Create column expression.
14
15
Parameters:
16
- name: Column name or data type selector
17
18
Returns:
19
Column expression
20
"""
21
22
def lit(value: Any, dtype: DataType | None = None) -> Expr:
23
"""
24
Create literal value expression.
25
26
Parameters:
27
- value: Literal value
28
- dtype: Optional data type
29
30
Returns:
31
Literal expression
32
"""
33
34
def when(predicate: Expr) -> When:
35
"""
36
Create conditional expression.
37
38
Parameters:
39
- predicate: Boolean expression condition
40
41
Returns:
42
When object for then/otherwise chaining
43
"""
44
45
class When:
46
def then(self, statement: Expr) -> Then:
47
"""Value when condition is true."""
48
49
class Then:
50
def otherwise(self, statement: Expr) -> Expr:
51
"""Value when condition is false."""
52
```
53
54
### Expression Class
55
56
The main Expression class with methods for column operations, transformations, and aggregations.
57
58
```python { .api }
59
class Expr:
60
def alias(self, name: str) -> Expr:
61
"""
62
Assign a name to the expression.
63
64
Parameters:
65
- name: New column name
66
67
Returns:
68
Aliased expression
69
"""
70
71
def cast(self, dtype: DataType | type[Any], *, strict: bool = True) -> Expr:
72
"""
73
Cast expression to different data type.
74
75
Parameters:
76
- dtype: Target data type
77
- strict: Whether to raise on cast failure
78
79
Returns:
80
Cast expression
81
"""
82
83
def filter(self, predicate: Expr) -> Expr:
84
"""
85
Filter expression based on predicate.
86
87
Parameters:
88
- predicate: Boolean expression for filtering
89
90
Returns:
91
Filtered expression
92
"""
93
94
def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Expr:
95
"""
96
Sort expression values.
97
98
Parameters:
99
- descending: Sort in descending order
100
- nulls_last: Place nulls at end
101
102
Returns:
103
Sorted expression
104
"""
105
106
def reverse(self) -> Expr:
107
"""Reverse expression values."""
108
109
def unique(self, *, maintain_order: bool = False) -> Expr:
110
"""
111
Get unique values.
112
113
Parameters:
114
- maintain_order: Maintain original order
115
116
Returns:
117
Expression with unique values
118
"""
119
120
def drop_nulls(self) -> Expr:
121
"""Drop null values from expression."""
122
123
def fill_null(self, value: Any | Expr, *, strategy: FillNullStrategy | None = None) -> Expr:
124
"""
125
Fill null values.
126
127
Parameters:
128
- value: Fill value or expression
129
- strategy: Fill strategy ("forward", "backward", "min", "max", "mean", "zero", "one")
130
131
Returns:
132
Expression with nulls filled
133
"""
134
135
def fill_nan(self, value: Any | Expr) -> Expr:
136
"""Fill NaN values."""
137
138
def is_null(self) -> Expr:
139
"""Check for null values."""
140
141
def is_not_null(self) -> Expr:
142
"""Check for non-null values."""
143
144
def is_nan(self) -> Expr:
145
"""Check for NaN values."""
146
147
def is_not_nan(self) -> Expr:
148
"""Check for non-NaN values."""
149
150
def is_finite(self) -> Expr:
151
"""Check for finite values."""
152
153
def is_infinite(self) -> Expr:
154
"""Check for infinite values."""
155
```
156
157
### Aggregation Methods
158
159
Statistical and aggregation operations on expressions.
160
161
```python { .api }
162
class Expr:
163
def sum(self) -> Expr:
164
"""Sum all values."""
165
166
def mean(self) -> Expr:
167
"""Calculate mean."""
168
169
def median(self) -> Expr:
170
"""Calculate median."""
171
172
def max(self) -> Expr:
173
"""Get maximum value."""
174
175
def min(self) -> Expr:
176
"""Get minimum value."""
177
178
def std(self, ddof: int = 1) -> Expr:
179
"""
180
Calculate standard deviation.
181
182
Parameters:
183
- ddof: Delta degrees of freedom
184
185
Returns:
186
Standard deviation expression
187
"""
188
189
def var(self, ddof: int = 1) -> Expr:
190
"""
191
Calculate variance.
192
193
Parameters:
194
- ddof: Delta degrees of freedom
195
196
Returns:
197
Variance expression
198
"""
199
200
def quantile(self, quantile: float | Expr, *, interpolation: RollingInterpolationMethod = "nearest") -> Expr:
201
"""
202
Calculate quantile.
203
204
Parameters:
205
- quantile: Quantile value (0.0 to 1.0)
206
- interpolation: Interpolation method
207
208
Returns:
209
Quantile expression
210
"""
211
212
def count(self) -> Expr:
213
"""Count non-null values."""
214
215
def n_unique(self) -> Expr:
216
"""Count unique values."""
217
218
def null_count(self) -> Expr:
219
"""Count null values."""
220
221
def first(self) -> Expr:
222
"""Get first value."""
223
224
def last(self) -> Expr:
225
"""Get last value."""
226
227
def head(self, n: int | Expr = 10) -> Expr:
228
"""Get first n values."""
229
230
def tail(self, n: int | Expr = 10) -> Expr:
231
"""Get last n values."""
232
```
233
234
### Arithmetic Operations
235
236
Mathematical operations on expressions.
237
238
```python { .api }
239
class Expr:
240
def __add__(self, other: Any) -> Expr:
241
"""Addition operator (+)."""
242
243
def __sub__(self, other: Any) -> Expr:
244
"""Subtraction operator (-)."""
245
246
def __mul__(self, other: Any) -> Expr:
247
"""Multiplication operator (*)."""
248
249
def __truediv__(self, other: Any) -> Expr:
250
"""Division operator (/)."""
251
252
def __floordiv__(self, other: Any) -> Expr:
253
"""Floor division operator (//)."""
254
255
def __mod__(self, other: Any) -> Expr:
256
"""Modulo operator (%)."""
257
258
def __pow__(self, other: Any) -> Expr:
259
"""Power operator (**)."""
260
261
def abs(self) -> Expr:
262
"""Absolute value."""
263
264
def sqrt(self) -> Expr:
265
"""Square root."""
266
267
def ceil(self) -> Expr:
268
"""Ceiling function."""
269
270
def floor(self) -> Expr:
271
"""Floor function."""
272
273
def round(self, decimals: int | Expr = 0) -> Expr:
274
"""
275
Round to specified decimal places.
276
277
Parameters:
278
- decimals: Number of decimal places
279
280
Returns:
281
Rounded expression
282
"""
283
284
def clip(self, lower_bound: Any | Expr | None = None, upper_bound: Any | Expr | None = None) -> Expr:
285
"""
286
Clip values to specified bounds.
287
288
Parameters:
289
- lower_bound: Lower bound
290
- upper_bound: Upper bound
291
292
Returns:
293
Clipped expression
294
"""
295
```
296
297
### Comparison Operations
298
299
Comparison and logical operations on expressions.
300
301
```python { .api }
302
class Expr:
303
def __eq__(self, other: Any) -> Expr:
304
"""Equality operator (==)."""
305
306
def __ne__(self, other: Any) -> Expr:
307
"""Not equal operator (!=)."""
308
309
def __lt__(self, other: Any) -> Expr:
310
"""Less than operator (<)."""
311
312
def __le__(self, other: Any) -> Expr:
313
"""Less than or equal operator (<=)."""
314
315
def __gt__(self, other: Any) -> Expr:
316
"""Greater than operator (>)."""
317
318
def __ge__(self, other: Any) -> Expr:
319
"""Greater than or equal operator (>=)."""
320
321
def __and__(self, other: Any) -> Expr:
322
"""Logical AND operator (&)."""
323
324
def __or__(self, other: Any) -> Expr:
325
"""Logical OR operator (|)."""
326
327
def __xor__(self, other: Any) -> Expr:
328
"""Logical XOR operator (^)."""
329
330
def __invert__(self) -> Expr:
331
"""Logical NOT operator (~)."""
332
333
def is_in(self, other: Any) -> Expr:
334
"""Check if values are in collection."""
335
336
def is_between(self, lower_bound: Any | Expr, upper_bound: Any | Expr, closed: ClosedInterval = "both") -> Expr:
337
"""
338
Check if values are between bounds.
339
340
Parameters:
341
- lower_bound: Lower bound
342
- upper_bound: Upper bound
343
- closed: Include bounds ("both", "left", "right", "none")
344
345
Returns:
346
Boolean expression
347
"""
348
```
349
350
### String Operations
351
352
String manipulation methods available on string expressions.
353
354
```python { .api }
355
class Expr:
356
@property
357
def str(self) -> ExprStringNameSpace:
358
"""Access string methods."""
359
360
class ExprStringNameSpace:
361
def len_bytes(self) -> Expr:
362
"""Get byte length of strings."""
363
364
def len_chars(self) -> Expr:
365
"""Get character length of strings."""
366
367
def contains(self, pattern: str | Expr, *, literal: bool = False, strict: bool = True) -> Expr:
368
"""
369
Check if string contains pattern.
370
371
Parameters:
372
- pattern: Pattern to search for
373
- literal: Treat pattern as literal string
374
- strict: Raise on invalid regex
375
376
Returns:
377
Boolean expression
378
"""
379
380
def starts_with(self, prefix: str | Expr) -> Expr:
381
"""Check if string starts with prefix."""
382
383
def ends_with(self, suffix: str | Expr) -> Expr:
384
"""Check if string ends with suffix."""
385
386
def to_lowercase(self) -> Expr:
387
"""Convert to lowercase."""
388
389
def to_uppercase(self) -> Expr:
390
"""Convert to uppercase."""
391
392
def strip_chars(self, characters: str | None = None) -> Expr:
393
"""Strip characters from both ends."""
394
395
def split(self, by: str | Expr, *, inclusive: bool = False) -> Expr:
396
"""
397
Split string by delimiter.
398
399
Parameters:
400
- by: Delimiter
401
- inclusive: Include delimiter in result
402
403
Returns:
404
List expression
405
"""
406
407
def slice(self, offset: int | Expr, length: int | Expr | None = None) -> Expr:
408
"""
409
Slice string.
410
411
Parameters:
412
- offset: Start position
413
- length: Slice length
414
415
Returns:
416
Sliced string expression
417
"""
418
419
def replace(self, pattern: str | Expr, value: str | Expr, *, literal: bool = False, n: int = 1) -> Expr:
420
"""
421
Replace pattern in string.
422
423
Parameters:
424
- pattern: Pattern to replace
425
- value: Replacement value
426
- literal: Treat pattern as literal
427
- n: Maximum number of replacements
428
429
Returns:
430
String expression with replacements
431
"""
432
```
433
434
### Temporal Operations
435
436
Date and time operations on temporal expressions.
437
438
```python { .api }
439
class Expr:
440
@property
441
def dt(self) -> ExprDateTimeNameSpace:
442
"""Access datetime methods."""
443
444
class ExprDateTimeNameSpace:
445
def year(self) -> Expr:
446
"""Extract year."""
447
448
def month(self) -> Expr:
449
"""Extract month."""
450
451
def day(self) -> Expr:
452
"""Extract day."""
453
454
def hour(self) -> Expr:
455
"""Extract hour."""
456
457
def minute(self) -> Expr:
458
"""Extract minute."""
459
460
def second(self) -> Expr:
461
"""Extract second."""
462
463
def weekday(self) -> Expr:
464
"""Get weekday (0=Monday, 6=Sunday)."""
465
466
def week(self) -> Expr:
467
"""Get week number."""
468
469
def strftime(self, format: str) -> Expr:
470
"""
471
Format datetime as string.
472
473
Parameters:
474
- format: Format string
475
476
Returns:
477
Formatted string expression
478
"""
479
480
def truncate(self, every: str | timedelta) -> Expr:
481
"""
482
Truncate to specified time unit.
483
484
Parameters:
485
- every: Time unit ("1d", "1h", "1m", "1s", etc.)
486
487
Returns:
488
Truncated datetime expression
489
"""
490
491
def with_time_unit(self, time_unit: TimeUnit) -> Expr:
492
"""
493
Change time unit.
494
495
Parameters:
496
- time_unit: New time unit ("ns", "us", "ms", "s")
497
498
Returns:
499
Expression with new time unit
500
"""
501
```
502
503
### List Operations
504
505
Operations on list/array expressions.
506
507
```python { .api }
508
class Expr:
509
@property
510
def list(self) -> ExprListNameSpace:
511
"""Access list methods."""
512
513
class ExprListNameSpace:
514
def len(self) -> Expr:
515
"""Get list length."""
516
517
def sum(self) -> Expr:
518
"""Sum list elements."""
519
520
def max(self) -> Expr:
521
"""Get maximum element."""
522
523
def min(self) -> Expr:
524
"""Get minimum element."""
525
526
def mean(self) -> Expr:
527
"""Calculate mean of elements."""
528
529
def first(self) -> Expr:
530
"""Get first element."""
531
532
def last(self) -> Expr:
533
"""Get last element."""
534
535
def get(self, index: int | Expr, *, null_on_oob: bool = True) -> Expr:
536
"""
537
Get element at index.
538
539
Parameters:
540
- index: Element index
541
- null_on_oob: Return null if out of bounds
542
543
Returns:
544
Element expression
545
"""
546
547
def slice(self, offset: int | Expr, length: int | Expr | None = None) -> Expr:
548
"""Slice list."""
549
550
def head(self, n: int | Expr = 5) -> Expr:
551
"""Get first n elements."""
552
553
def tail(self, n: int | Expr = 5) -> Expr:
554
"""Get last n elements."""
555
556
def contains(self, item: Any | Expr) -> Expr:
557
"""Check if list contains item."""
558
559
def explode(self) -> Expr:
560
"""Explode list elements to separate rows."""
561
```
562
563
## Usage Examples
564
565
### Basic Expression Usage
566
567
```python
568
import polars as pl
569
570
df = pl.DataFrame({
571
"name": ["Alice", "Bob", "Charlie"],
572
"age": [25, 30, 35],
573
"salary": [50000, 60000, 70000]
574
})
575
576
# Column selection and transformation
577
result = df.select([
578
pl.col("name"),
579
pl.col("age").alias("years"),
580
(pl.col("salary") / 1000).alias("salary_k"),
581
pl.col("salary").cast(pl.Float64)
582
])
583
584
# Conditional expressions
585
result = df.with_columns([
586
pl.when(pl.col("age") > 30)
587
.then(pl.lit("Senior"))
588
.otherwise(pl.lit("Junior"))
589
.alias("level")
590
])
591
```
592
593
### String Operations
594
595
```python
596
df = pl.DataFrame({
597
"text": ["Hello World", "PYTHON programming", "Data Science"]
598
})
599
600
result = df.select([
601
pl.col("text"),
602
pl.col("text").str.to_lowercase().alias("lower"),
603
pl.col("text").str.len_chars().alias("length"),
604
pl.col("text").str.contains("o").alias("has_o"),
605
pl.col("text").str.split(" ").alias("words")
606
])
607
```
608
609
### Temporal Operations
610
611
```python
612
df = pl.DataFrame({
613
"timestamp": ["2023-01-15 10:30:00", "2023-02-20 14:45:00", "2023-03-10 09:15:00"]
614
}).with_columns([
615
pl.col("timestamp").str.strptime(pl.Datetime, "%Y-%m-%d %H:%M:%S")
616
])
617
618
result = df.select([
619
pl.col("timestamp"),
620
pl.col("timestamp").dt.year().alias("year"),
621
pl.col("timestamp").dt.month().alias("month"),
622
pl.col("timestamp").dt.weekday().alias("weekday"),
623
pl.col("timestamp").dt.strftime("%Y-%m").alias("year_month")
624
])
625
```
626
627
### Aggregations and Window Functions
628
629
```python
630
df = pl.DataFrame({
631
"group": ["A", "A", "B", "B", "C"],
632
"value": [10, 20, 15, 25, 30]
633
})
634
635
# Group aggregations
636
result = df.group_by("group").agg([
637
pl.col("value").sum().alias("total"),
638
pl.col("value").mean().alias("average"),
639
pl.col("value").max() - pl.col("value").min().alias("range")
640
])
641
642
# Window functions
643
result = df.with_columns([
644
pl.col("value").sum().over("group").alias("group_total"),
645
pl.col("value").rank().over("group").alias("rank_in_group")
646
])
647
```
648
649
### Complex Expressions
650
651
```python
652
# Chaining multiple operations
653
result = df.select([
654
pl.col("name"),
655
pl.col("age")
656
.cast(pl.Float64)
657
.round(0)
658
.clip(0, 100)
659
.alias("age_clipped"),
660
661
# Complex conditional logic
662
pl.when((pl.col("age") >= 18) & (pl.col("salary") > 55000))
663
.then(pl.lit("Eligible"))
664
.when(pl.col("age") >= 18)
665
.then(pl.lit("Age OK"))
666
.otherwise(pl.lit("Not Eligible"))
667
.alias("status"),
668
669
# Mathematical operations
670
((pl.col("salary") * 1.1).round(2)).alias("salary_with_raise")
671
])
672
```