0
# Functions and Utilities
1
2
Built-in functions for aggregation, transformations, date/time operations, string manipulation, and utility functions. These functions work with expressions and can be used in DataFrame operations, LazyFrame queries, and standalone computations.
3
4
## Capabilities
5
6
### Aggregation Functions
7
8
Statistical aggregation functions that operate on columns or expressions.
9
10
```python { .api }
11
def sum(*exprs) -> Expr:
12
"""
13
Sum values horizontally across columns.
14
15
Parameters:
16
- exprs: Expressions to sum
17
18
Returns:
19
Sum expression
20
"""
21
22
def mean(*exprs) -> Expr:
23
"""
24
Calculate mean horizontally across columns.
25
26
Parameters:
27
- exprs: Expressions to average
28
29
Returns:
30
Mean expression
31
"""
32
33
def max(*exprs) -> Expr:
34
"""
35
Get maximum value horizontally across columns.
36
37
Parameters:
38
- exprs: Expressions to compare
39
40
Returns:
41
Maximum expression
42
"""
43
44
def min(*exprs) -> Expr:
45
"""
46
Get minimum value horizontally across columns.
47
48
Parameters:
49
- exprs: Expressions to compare
50
51
Returns:
52
Minimum expression
53
"""
54
55
def count(*exprs) -> Expr:
56
"""
57
Count non-null values.
58
59
Parameters:
60
- exprs: Expressions to count (optional)
61
62
Returns:
63
Count expression
64
"""
65
66
def all(*exprs) -> Expr:
67
"""
68
Check if all values are true.
69
70
Parameters:
71
- exprs: Boolean expressions
72
73
Returns:
74
Boolean expression (all true)
75
"""
76
77
def any(*exprs) -> Expr:
78
"""
79
Check if any values are true.
80
81
Parameters:
82
- exprs: Boolean expressions
83
84
Returns:
85
Boolean expression (any true)
86
"""
87
88
# Horizontal operations
89
def sum_horizontal(*exprs) -> Expr:
90
"""Sum across columns horizontally."""
91
92
def mean_horizontal(*exprs) -> Expr:
93
"""Mean across columns horizontally."""
94
95
def max_horizontal(*exprs) -> Expr:
96
"""Maximum across columns horizontally."""
97
98
def min_horizontal(*exprs) -> Expr:
99
"""Minimum across columns horizontally."""
100
101
def all_horizontal(*exprs) -> Expr:
102
"""All true across columns horizontally."""
103
104
def any_horizontal(*exprs) -> Expr:
105
"""Any true across columns horizontally."""
106
107
# Cumulative functions
108
def cum_sum(*exprs) -> Expr:
109
"""Cumulative sum."""
110
111
def cum_sum_horizontal(*exprs) -> Expr:
112
"""Cumulative sum horizontally."""
113
114
def cum_count(*exprs) -> Expr:
115
"""Cumulative count."""
116
117
def cum_fold(acc: Expr, function: Callable[[Expr, Expr], Expr], *exprs: Expr, include_init: bool = False) -> Expr:
118
"""
119
Cumulative fold operation.
120
121
Parameters:
122
- acc: Initial accumulator value
123
- function: Fold function
124
- exprs: Expressions to fold
125
- include_init: Include initial value
126
127
Returns:
128
Cumulative fold expression
129
"""
130
131
def cum_reduce(function: Callable[[Expr, Expr], Expr], *exprs: Expr) -> Expr:
132
"""
133
Cumulative reduce operation.
134
135
Parameters:
136
- function: Reduce function
137
- exprs: Expressions to reduce
138
139
Returns:
140
Cumulative reduce expression
141
"""
142
```
143
144
### Date and Time Functions
145
146
Functions for creating and manipulating temporal data.
147
148
```python { .api }
149
def date(year: int | Expr, month: int | Expr, day: int | Expr) -> Expr:
150
"""
151
Create date from year, month, day.
152
153
Parameters:
154
- year: Year value
155
- month: Month value (1-12)
156
- day: Day value (1-31)
157
158
Returns:
159
Date expression
160
"""
161
162
def datetime(
163
year: int | Expr,
164
month: int | Expr,
165
day: int | Expr,
166
hour: int | Expr = 0,
167
minute: int | Expr = 0,
168
second: int | Expr = 0,
169
microsecond: int | Expr = 0,
170
*,
171
time_unit: TimeUnit = "us",
172
time_zone: str | None = None
173
) -> Expr:
174
"""
175
Create datetime from components.
176
177
Parameters:
178
- year: Year value
179
- month: Month value (1-12)
180
- day: Day value (1-31)
181
- hour: Hour value (0-23)
182
- minute: Minute value (0-59)
183
- second: Second value (0-59)
184
- microsecond: Microsecond value
185
- time_unit: Time precision
186
- time_zone: Timezone
187
188
Returns:
189
Datetime expression
190
"""
191
192
def time(hour: int | Expr, minute: int | Expr, second: int | Expr, microsecond: int | Expr = 0) -> Expr:
193
"""
194
Create time from components.
195
196
Parameters:
197
- hour: Hour value (0-23)
198
- minute: Minute value (0-59)
199
- second: Second value (0-59)
200
- microsecond: Microsecond value
201
202
Returns:
203
Time expression
204
"""
205
206
def duration(
207
*,
208
weeks: int | Expr | None = None,
209
days: int | Expr | None = None,
210
hours: int | Expr | None = None,
211
minutes: int | Expr | None = None,
212
seconds: int | Expr | None = None,
213
milliseconds: int | Expr | None = None,
214
microseconds: int | Expr | None = None,
215
nanoseconds: int | Expr | None = None,
216
time_unit: TimeUnit = "us"
217
) -> Expr:
218
"""
219
Create duration from components.
220
221
Parameters:
222
- weeks: Number of weeks
223
- days: Number of days
224
- hours: Number of hours
225
- minutes: Number of minutes
226
- seconds: Number of seconds
227
- milliseconds: Number of milliseconds
228
- microseconds: Number of microseconds
229
- nanoseconds: Number of nanoseconds
230
- time_unit: Time unit for result
231
232
Returns:
233
Duration expression
234
"""
235
236
def from_epoch(column: str | Expr, time_unit: TimeUnit = "s") -> Expr:
237
"""
238
Convert epoch timestamp to datetime.
239
240
Parameters:
241
- column: Column with epoch values
242
- time_unit: Unit of epoch values
243
244
Returns:
245
Datetime expression
246
"""
247
```
248
249
### Range Functions
250
251
Functions for generating sequences and ranges of values.
252
253
```python { .api }
254
def arange(start: int | Expr, end: int | Expr, step: int = 1, *, eager: bool = False) -> Expr | Series:
255
"""
256
Generate range of integers.
257
258
Parameters:
259
- start: Start value (inclusive)
260
- end: End value (exclusive)
261
- step: Step size
262
- eager: Return Series instead of Expr
263
264
Returns:
265
Range expression or Series
266
"""
267
268
def date_range(
269
start: date | datetime | IntoExpr,
270
end: date | datetime | IntoExpr,
271
interval: str | timedelta = "1d",
272
*,
273
closed: ClosedInterval = "both",
274
time_unit: TimeUnit | None = None,
275
time_zone: str | None = None,
276
eager: bool = False
277
) -> Expr | Series:
278
"""
279
Generate date range.
280
281
Parameters:
282
- start: Start date
283
- end: End date
284
- interval: Time interval ("1d", "1h", etc.)
285
- closed: Include endpoints ("both", "left", "right", "none")
286
- time_unit: Time precision
287
- time_zone: Timezone
288
- eager: Return Series instead of Expr
289
290
Returns:
291
Date range expression or Series
292
"""
293
294
def date_ranges(
295
start: IntoExpr,
296
end: IntoExpr,
297
interval: str | timedelta = "1d",
298
*,
299
closed: ClosedInterval = "both",
300
time_unit: TimeUnit | None = None,
301
time_zone: str | None = None,
302
eager: bool = False
303
) -> Expr | Series:
304
"""Generate multiple date ranges."""
305
306
def datetime_range(
307
start: datetime | IntoExpr,
308
end: datetime | IntoExpr,
309
interval: str | timedelta = "1d",
310
*,
311
closed: ClosedInterval = "both",
312
time_unit: TimeUnit = "us",
313
time_zone: str | None = None,
314
eager: bool = False
315
) -> Expr | Series:
316
"""Generate datetime range."""
317
318
def datetime_ranges(
319
start: IntoExpr,
320
end: IntoExpr,
321
interval: str | timedelta = "1d",
322
**kwargs
323
) -> Expr | Series:
324
"""Generate multiple datetime ranges."""
325
326
def time_range(
327
start: time | IntoExpr | None = None,
328
end: time | IntoExpr | None = None,
329
interval: str | timedelta = "1h",
330
*,
331
closed: ClosedInterval = "both",
332
eager: bool = False
333
) -> Expr | Series:
334
"""Generate time range."""
335
336
def time_ranges(
337
start: IntoExpr,
338
end: IntoExpr,
339
interval: str | timedelta = "1h",
340
**kwargs
341
) -> Expr | Series:
342
"""Generate multiple time ranges."""
343
344
def int_range(start: int | Expr, end: int | Expr, step: int = 1, *, eager: bool = False) -> Expr | Series:
345
"""Generate integer range."""
346
347
def int_ranges(start: IntoExpr, end: IntoExpr, step: int | IntoExpr = 1, *, eager: bool = False) -> Expr | Series:
348
"""Generate multiple integer ranges."""
349
350
def linear_space(start: float | Expr, end: float | Expr, n: int, *, endpoint: bool = True, eager: bool = False) -> Expr | Series:
351
"""
352
Generate linearly spaced values.
353
354
Parameters:
355
- start: Start value
356
- end: End value
357
- n: Number of values
358
- endpoint: Include endpoint
359
- eager: Return Series instead of Expr
360
361
Returns:
362
Linear space expression or Series
363
"""
364
365
def linear_spaces(start: IntoExpr, end: IntoExpr, n: int | IntoExpr, **kwargs) -> Expr | Series:
366
"""Generate multiple linear spaces."""
367
```
368
369
### String Functions
370
371
Functions for string manipulation and processing.
372
373
```python { .api }
374
def concat_str(exprs: IntoExpr, *, separator: str = "", ignore_nulls: bool = False) -> Expr:
375
"""
376
Concatenate strings horizontally.
377
378
Parameters:
379
- exprs: String expressions to concatenate
380
- separator: Separator between strings
381
- ignore_nulls: Skip null values
382
383
Returns:
384
Concatenated string expression
385
"""
386
387
def format(format_str: str, *args: IntoExpr) -> Expr:
388
"""
389
Format string with placeholders.
390
391
Parameters:
392
- format_str: Format string with {} placeholders
393
- args: Values to substitute
394
395
Returns:
396
Formatted string expression
397
"""
398
399
def escape_regex(pattern: str | Expr) -> Expr:
400
"""
401
Escape regex special characters.
402
403
Parameters:
404
- pattern: Pattern to escape
405
406
Returns:
407
Escaped pattern expression
408
"""
409
```
410
411
### List and Array Functions
412
413
Functions for working with list and array data types.
414
415
```python { .api }
416
def concat_list(exprs: IntoExpr, *, ignore_nulls: bool = False) -> Expr:
417
"""
418
Concatenate lists horizontally.
419
420
Parameters:
421
- exprs: List expressions to concatenate
422
- ignore_nulls: Skip null values
423
424
Returns:
425
Concatenated list expression
426
"""
427
428
def concat_arr(exprs: IntoExpr, *, ignore_nulls: bool = False) -> Expr:
429
"""
430
Concatenate arrays horizontally.
431
432
Parameters:
433
- exprs: Array expressions to concatenate
434
- ignore_nulls: Skip null values
435
436
Returns:
437
Concatenated array expression
438
"""
439
```
440
441
### Statistical Functions
442
443
Advanced statistical and mathematical functions.
444
445
```python { .api }
446
def std(*exprs) -> Expr:
447
"""Calculate standard deviation."""
448
449
def var(*exprs) -> Expr:
450
"""Calculate variance."""
451
452
def median(*exprs) -> Expr:
453
"""Calculate median."""
454
455
def quantile(*exprs, quantile: float, interpolation: str = "nearest") -> Expr:
456
"""
457
Calculate quantile.
458
459
Parameters:
460
- exprs: Expressions to analyze
461
- quantile: Quantile value (0.0 to 1.0)
462
- interpolation: Interpolation method
463
464
Returns:
465
Quantile expression
466
"""
467
468
def n_unique(*exprs) -> Expr:
469
"""Count unique values."""
470
471
def approx_n_unique(*exprs) -> Expr:
472
"""Approximate unique count (faster for large data)."""
473
474
def corr(a: IntoExpr, b: IntoExpr, *, method: CorrelationMethod = "pearson", ddof: int = 1) -> Expr:
475
"""
476
Calculate correlation coefficient.
477
478
Parameters:
479
- a: First expression
480
- b: Second expression
481
- method: Correlation method ("pearson", "spearman")
482
- ddof: Delta degrees of freedom
483
484
Returns:
485
Correlation expression
486
"""
487
488
def cov(a: IntoExpr, b: IntoExpr) -> Expr:
489
"""
490
Calculate covariance.
491
492
Parameters:
493
- a: First expression
494
- b: Second expression
495
496
Returns:
497
Covariance expression
498
"""
499
500
def rolling_corr(a: IntoExpr, b: IntoExpr, window_size: int, *, min_periods: int | None = None) -> Expr:
501
"""
502
Calculate rolling correlation.
503
504
Parameters:
505
- a: First expression
506
- b: Second expression
507
- window_size: Rolling window size
508
- min_periods: Minimum periods for calculation
509
510
Returns:
511
Rolling correlation expression
512
"""
513
514
def rolling_cov(a: IntoExpr, b: IntoExpr, window_size: int, *, min_periods: int | None = None) -> Expr:
515
"""
516
Calculate rolling covariance.
517
518
Parameters:
519
- a: First expression
520
- b: Second expression
521
- window_size: Rolling window size
522
- min_periods: Minimum periods for calculation
523
524
Returns:
525
Rolling covariance expression
526
"""
527
```
528
529
### Trigonometric Functions
530
531
Mathematical trigonometric operations.
532
533
```python { .api }
534
def arctan2(y: IntoExpr, x: IntoExpr) -> Expr:
535
"""
536
Calculate arctangent of y/x in radians.
537
538
Parameters:
539
- y: Y coordinate expression
540
- x: X coordinate expression
541
542
Returns:
543
Arctangent expression
544
"""
545
546
def arctan2d(y: IntoExpr, x: IntoExpr) -> Expr:
547
"""
548
Calculate arctangent of y/x in degrees.
549
550
Parameters:
551
- y: Y coordinate expression
552
- x: X coordinate expression
553
554
Returns:
555
Arctangent expression in degrees
556
"""
557
```
558
559
### Transform and Utility Functions
560
561
General utility and transformation functions.
562
563
```python { .api }
564
def map_batches(exprs: IntoExpr, function: Callable[[DataFrame], DataFrame], return_dtype: DataType | None = None, *, inference_size: int = 256) -> Expr:
565
"""
566
Apply function to batches of data.
567
568
Parameters:
569
- exprs: Input expressions
570
- function: Function to apply to DataFrame batches
571
- return_dtype: Expected return data type
572
- inference_size: Size for type inference
573
574
Returns:
575
Mapped expression
576
"""
577
578
def map_groups(exprs: IntoExpr, function: Callable[[DataFrame], DataFrame], return_dtype: DataType | None = None) -> Expr:
579
"""
580
Apply function to groups.
581
582
Parameters:
583
- exprs: Input expressions
584
- function: Function to apply to each group
585
- return_dtype: Expected return data type
586
587
Returns:
588
Mapped expression
589
"""
590
591
def fold(acc: IntoExpr, function: Callable[[Expr, Expr], Expr], exprs: Sequence[IntoExpr] | Expr) -> Expr:
592
"""
593
Fold operation with accumulator.
594
595
Parameters:
596
- acc: Initial accumulator value
597
- function: Fold function
598
- exprs: Expressions to fold
599
600
Returns:
601
Folded expression
602
"""
603
604
def reduce(function: Callable[[Expr, Expr], Expr], exprs: Sequence[IntoExpr] | Expr) -> Expr:
605
"""
606
Reduce operation.
607
608
Parameters:
609
- function: Reduce function
610
- exprs: Expressions to reduce
611
612
Returns:
613
Reduced expression
614
"""
615
616
def coalesce(*exprs: IntoExpr) -> Expr:
617
"""
618
Return first non-null value.
619
620
Parameters:
621
- exprs: Expressions to check
622
623
Returns:
624
Coalesced expression
625
"""
626
627
def element() -> Expr:
628
"""Get element at current index in context."""
629
630
def first(*exprs: IntoExpr) -> Expr:
631
"""Get first value."""
632
633
def last(*exprs: IntoExpr) -> Expr:
634
"""Get last value."""
635
636
def head(*exprs: IntoExpr, n: int = 10) -> Expr:
637
"""Get first n values."""
638
639
def tail(*exprs: IntoExpr, n: int = 10) -> Expr:
640
"""Get last n values."""
641
642
def nth(n: int, *exprs: IntoExpr) -> Expr:
643
"""
644
Get nth value.
645
646
Parameters:
647
- n: Index to retrieve
648
- exprs: Input expressions
649
650
Returns:
651
Nth value expression
652
"""
653
654
def len() -> Expr:
655
"""Get length/count."""
656
657
def implode(*exprs: IntoExpr) -> Expr:
658
"""Combine values into list."""
659
660
def explode(*exprs: IntoExpr) -> Expr:
661
"""Explode list elements to separate rows."""
662
663
def repeat(value: IntoExpr, n: int | IntoExpr, *, eager: bool = False) -> Expr | Series:
664
"""
665
Repeat value n times.
666
667
Parameters:
668
- value: Value to repeat
669
- n: Number of repetitions
670
- eager: Return Series instead of Expr
671
672
Returns:
673
Repeated values expression or Series
674
"""
675
676
def ones(n: int | IntoExpr, *, eager: bool = False) -> Expr | Series:
677
"""Create array of ones."""
678
679
def zeros(n: int | IntoExpr, *, eager: bool = False) -> Expr | Series:
680
"""Create array of zeros."""
681
682
def exclude(*columns: str | DataType) -> Expr:
683
"""
684
Exclude columns from selection.
685
686
Parameters:
687
- columns: Column names or types to exclude
688
689
Returns:
690
Exclusion expression
691
"""
692
693
def groups() -> Expr:
694
"""Get group indices in group-by context."""
695
696
def field(*names: str) -> Expr:
697
"""
698
Access struct field(s).
699
700
Parameters:
701
- names: Field name(s) to access
702
703
Returns:
704
Field access expression
705
"""
706
707
def arg_sort_by(*exprs: IntoExpr, descending: bool | Sequence[bool] = False, nulls_last: bool = False) -> Expr:
708
"""
709
Get indices that would sort by given expressions.
710
711
Parameters:
712
- exprs: Sort key expressions
713
- descending: Sort in descending order
714
- nulls_last: Place nulls at end
715
716
Returns:
717
Sorting indices expression
718
"""
719
720
def arg_where(condition: IntoExpr) -> Expr:
721
"""
722
Get indices where condition is true.
723
724
Parameters:
725
- condition: Boolean condition
726
727
Returns:
728
Indices expression
729
"""
730
731
def row_index(name: str = "row_nr", offset: int = 0) -> Expr:
732
"""
733
Add row index column.
734
735
Parameters:
736
- name: Column name for row index
737
- offset: Starting value
738
739
Returns:
740
Row index expression
741
"""
742
743
def business_day_count(start: IntoExpr, end: IntoExpr) -> Expr:
744
"""
745
Count business days between dates.
746
747
Parameters:
748
- start: Start date expression
749
- end: End date expression
750
751
Returns:
752
Business day count expression
753
"""
754
755
def dtype_of(*exprs: IntoExpr) -> Expr:
756
"""Get data type of expression."""
757
758
def self_dtype() -> Expr:
759
"""Get data type of current context."""
760
761
def set_random_seed(seed: int) -> None:
762
"""
763
Set random seed for reproducible results.
764
765
Parameters:
766
- seed: Random seed value
767
"""
768
```
769
770
### DataFrame Operations
771
772
Functions that operate on entire DataFrames.
773
774
```python { .api }
775
def concat(items: Iterable[DataFrame | LazyFrame], *, how: UnionStrategy = "vertical", rechunk: bool = False, parallel: bool = True) -> DataFrame | LazyFrame:
776
"""
777
Concatenate DataFrames or LazyFrames.
778
779
Parameters:
780
- items: DataFrames/LazyFrames to concatenate
781
- how: Concatenation strategy ("vertical", "horizontal", "diagonal")
782
- rechunk: Rechunk after concatenation
783
- parallel: Use parallel processing
784
785
Returns:
786
Concatenated DataFrame or LazyFrame
787
"""
788
789
def align_frames(*frames: DataFrame, on: str | Expr | None = None, select: str | Expr | list[str | Expr] | None = None, reverse: bool | list[bool] = False) -> list[DataFrame]:
790
"""
791
Align DataFrames by common column values.
792
793
Parameters:
794
- frames: DataFrames to align
795
- on: Column(s) to align on
796
- select: Columns to select after alignment
797
- reverse: Reverse sort order
798
799
Returns:
800
List of aligned DataFrames
801
"""
802
803
def collect_all(lazy_frames: Sequence[LazyFrame], *, type_coercion: bool = True, predicate_pushdown: bool = True, projection_pushdown: bool = True, simplify_expression: bool = True, slice_pushdown: bool = True, comm_subplan_elim: bool = True, comm_subexpr_elim: bool = True, cluster_with_columns: bool = True, no_optimization: bool = False, streaming: bool = False) -> list[DataFrame]:
804
"""
805
Collect multiple LazyFrames in parallel.
806
807
Parameters:
808
- lazy_frames: LazyFrames to collect
809
- Various optimization flags: Same as LazyFrame.collect()
810
811
Returns:
812
List of collected DataFrames
813
"""
814
815
def collect_all_async(lazy_frames: Sequence[LazyFrame], **kwargs) -> Awaitable[list[DataFrame]]:
816
"""Collect multiple LazyFrames asynchronously."""
817
818
def explain_all(lazy_frames: Sequence[LazyFrame], **kwargs) -> str:
819
"""Get execution plans for multiple LazyFrames."""
820
821
def select(*exprs: IntoExpr, **named_exprs: IntoExpr) -> Expr:
822
"""Create selection expression."""
823
824
def struct(*exprs: IntoExpr, **named_exprs: IntoExpr) -> Expr:
825
"""Create struct from expressions."""
826
827
def struct_with_fields(fields: Sequence[str], *exprs: IntoExpr) -> Expr:
828
"""
829
Create struct with named fields.
830
831
Parameters:
832
- fields: Field names
833
- exprs: Field value expressions
834
835
Returns:
836
Struct expression
837
"""
838
```
839
840
## Usage Examples
841
842
### Aggregation Operations
843
844
```python
845
import polars as pl
846
847
df = pl.DataFrame({
848
"group": ["A", "A", "B", "B", "C"],
849
"value1": [1, 2, 3, 4, 5],
850
"value2": [10, 20, 30, 40, 50]
851
})
852
853
# Basic aggregations
854
result = df.group_by("group").agg([
855
pl.sum("value1"),
856
pl.mean("value2"),
857
pl.max("value1", "value2").alias("max_of_both")
858
])
859
860
# Horizontal aggregations
861
result = df.with_columns([
862
pl.sum_horizontal("value1", "value2").alias("total"),
863
pl.mean_horizontal("value1", "value2").alias("average")
864
])
865
```
866
867
### Date and Time Operations
868
869
```python
870
# Create date ranges
871
dates = pl.date_range(
872
start=date(2023, 1, 1),
873
end=date(2023, 12, 31),
874
interval="1d",
875
eager=True
876
)
877
878
# Create datetime with components
879
df = pl.DataFrame({
880
"year": [2023, 2023, 2023],
881
"month": [1, 2, 3],
882
"day": [15, 20, 25]
883
}).with_columns([
884
pl.date("year", "month", "day").alias("date"),
885
pl.datetime("year", "month", "day", 12, 30, 0).alias("datetime")
886
])
887
888
# Duration calculations
889
df = df.with_columns([
890
pl.duration(days=30).alias("thirty_days"),
891
pl.duration(hours=2, minutes=30).alias("two_thirty")
892
])
893
```
894
895
### String Operations
896
897
```python
898
df = pl.DataFrame({
899
"first": ["John", "Jane", "Bob"],
900
"last": ["Doe", "Smith", "Johnson"],
901
"title": ["Mr", "Ms", "Dr"]
902
})
903
904
# String concatenation
905
result = df.with_columns([
906
pl.concat_str([
907
pl.col("title"),
908
pl.lit(" "),
909
pl.col("first"),
910
pl.lit(" "),
911
pl.col("last")
912
]).alias("full_name"),
913
914
# Format strings
915
pl.format("Hello, {} {}!", pl.col("first"), pl.col("last")).alias("greeting")
916
])
917
```
918
919
### Mathematical Operations
920
921
```python
922
df = pl.DataFrame({
923
"x": [1.0, 2.0, 3.0, 4.0],
924
"y": [2.0, 3.0, 4.0, 5.0],
925
"values": [10, 20, 30, 40]
926
})
927
928
# Trigonometric functions
929
result = df.with_columns([
930
pl.arctan2("y", "x").alias("angle_rad"),
931
pl.arctan2d("y", "x").alias("angle_deg")
932
])
933
934
# Statistical functions
935
result = df.select([
936
pl.std("values").alias("std_dev"),
937
pl.var("values").alias("variance"),
938
pl.median("values").alias("median"),
939
pl.quantile("values", 0.75).alias("q75")
940
])
941
```
942
943
### Advanced Transformations
944
945
```python
946
# Fold operation (cumulative sum with custom logic)
947
result = df.with_columns([
948
pl.fold(
949
acc=pl.lit(0),
950
function=lambda acc, x: acc + x,
951
exprs=["value1", "value2"]
952
).alias("cumulative_sum")
953
])
954
955
# Coalesce (first non-null value)
956
df_with_nulls = pl.DataFrame({
957
"a": [1, None, 3],
958
"b": [None, 2, None],
959
"c": [10, 20, 30]
960
})
961
962
result = df_with_nulls.with_columns([
963
pl.coalesce("a", "b", "c").alias("first_non_null")
964
])
965
966
# Map operations for complex transformations
967
def custom_transform(batch: pl.DataFrame) -> pl.DataFrame:
968
return batch.with_columns([
969
(pl.col("value") * 2 + 1).alias("transformed")
970
])
971
972
result = df.with_columns([
973
pl.map_batches("value1", custom_transform, return_dtype=pl.Int64)
974
])
975
```
976
977
### Window Functions and Rankings
978
979
```python
980
df = pl.DataFrame({
981
"group": ["A", "A", "A", "B", "B", "B"],
982
"value": [10, 20, 30, 15, 25, 35]
983
})
984
985
# Window functions with partitioning
986
result = df.with_columns([
987
pl.col("value").sum().over("group").alias("group_total"),
988
pl.col("value").rank().over("group").alias("rank_in_group"),
989
pl.col("value").shift(1).over("group").alias("previous_value")
990
])
991
```
992
993
### Working with Lists and Arrays
994
995
```python
996
df = pl.DataFrame({
997
"lists": [[1, 2, 3], [4, 5], [6, 7, 8]]
998
})
999
1000
# List operations
1001
result = df.with_columns([
1002
pl.col("lists").list.len().alias("list_length"),
1003
pl.col("lists").list.sum().alias("list_sum"),
1004
pl.col("lists").list.get(0).alias("first_element")
1005
])
1006
1007
# Concatenate lists
1008
df2 = pl.DataFrame({
1009
"list1": [[1, 2], [3, 4]],
1010
"list2": [[5, 6], [7, 8]]
1011
})
1012
1013
result = df2.with_columns([
1014
pl.concat_list("list1", "list2").alias("combined")
1015
])
1016
```