0
# Expression Namespaces
1
2
Specialized namespaces for working with different data types including string operations (.str), datetime operations (.dt), list operations (.list), array operations (.arr), struct operations (.struct), categorical operations (.cat), binary operations (.bin), name operations (.name), and metadata operations (.meta).
3
4
## Capabilities
5
6
### String Namespace (.str)
7
8
String operations available on both Expr and Series for text processing and manipulation.
9
10
```python { .api }
11
# Available as expr.str.method() and series.str.method()
12
13
def contains(
14
pattern: str | Expr,
15
*,
16
literal: bool = False,
17
strict: bool = True
18
) -> Expr:
19
"""Check if string contains pattern."""
20
21
def ends_with(suffix: str | Expr) -> Expr:
22
"""Check if string ends with suffix."""
23
24
def starts_with(prefix: str | Expr) -> Expr:
25
"""Check if string starts with prefix."""
26
27
def extract(pattern: str, group_index: int = 1) -> Expr:
28
"""Extract regex capture group."""
29
30
def extract_all(pattern: str) -> Expr:
31
"""Extract all regex matches."""
32
33
def find(pattern: str, *, literal: bool = False) -> Expr:
34
"""Find first occurrence of pattern."""
35
36
def replace(pattern: str, value: str, *, literal: bool = False, n: int = 1) -> Expr:
37
"""Replace pattern with value."""
38
39
def replace_all(pattern: str, value: str, *, literal: bool = False) -> Expr:
40
"""Replace all occurrences of pattern."""
41
42
def slice(offset: int, length: int | None = None) -> Expr:
43
"""Extract substring by position."""
44
45
def head(n: int = 5) -> Expr:
46
"""Get first n characters."""
47
48
def tail(n: int = 5) -> Expr:
49
"""Get last n characters."""
50
51
def to_lowercase() -> Expr:
52
"""Convert to lowercase."""
53
54
def to_uppercase() -> Expr:
55
"""Convert to uppercase."""
56
57
def to_titlecase() -> Expr:
58
"""Convert to title case."""
59
60
def strip_chars(characters: str | None = None) -> Expr:
61
"""Remove characters from both ends."""
62
63
def strip_chars_start(characters: str | None = None) -> Expr:
64
"""Remove characters from start."""
65
66
def strip_chars_end(characters: str | None = None) -> Expr:
67
"""Remove characters from end."""
68
69
def zfill(width: int) -> Expr:
70
"""Pad with zeros to specified width."""
71
72
def pad_start(width: int, fillchar: str = " ") -> Expr:
73
"""Pad string to width from start."""
74
75
def pad_end(width: int, fillchar: str = " ") -> Expr:
76
"""Pad string to width from end."""
77
78
def len_bytes() -> Expr:
79
"""Get byte length of strings."""
80
81
def len_chars() -> Expr:
82
"""Get character length of strings."""
83
84
def n_chars() -> Expr:
85
"""Alias for len_chars."""
86
87
def concat(delimiter: str = "") -> Expr:
88
"""Concatenate strings in list/array."""
89
90
def explode() -> Expr:
91
"""Split string into characters."""
92
93
def split(by: str, *, inclusive: bool = False) -> Expr:
94
"""Split string by delimiter."""
95
96
def split_exact(by: str, n: int, *, inclusive: bool = False) -> Expr:
97
"""Split string into exactly n parts."""
98
99
def splitn(by: str, n: int) -> Expr:
100
"""Split string into at most n parts."""
101
102
def json_decode(dtype: type | None = None, *, infer_schema_length: int | None = None) -> Expr:
103
"""Parse JSON strings."""
104
105
def json_path_match(json_path: str) -> Expr:
106
"""Extract JSON values using JSONPath."""
107
108
def encode(encoding: str = "utf8") -> Expr:
109
"""Encode strings to bytes."""
110
111
def decode(encoding: str = "utf8", *, strict: bool = True) -> Expr:
112
"""Decode bytes to strings."""
113
114
def to_integer(*, base: int = 10, strict: bool = True) -> Expr:
115
"""Parse strings as integers."""
116
117
def to_decimal(*, infer_length: int = 10) -> Expr:
118
"""Parse strings as decimals."""
119
120
def strftime(format: str) -> Expr:
121
"""Format datetime as string."""
122
123
def strptime(
124
dtype: type,
125
format: str | None = None,
126
*,
127
strict: bool = True,
128
exact: bool = True,
129
cache: bool = True
130
) -> Expr:
131
"""Parse strings as datetime."""
132
```
133
134
### DateTime Namespace (.dt)
135
136
DateTime operations for temporal data manipulation and extraction.
137
138
```python { .api }
139
# Available as expr.dt.method() and series.dt.method()
140
141
def year() -> Expr:
142
"""Extract year."""
143
144
def month() -> Expr:
145
"""Extract month."""
146
147
def day() -> Expr:
148
"""Extract day."""
149
150
def hour() -> Expr:
151
"""Extract hour."""
152
153
def minute() -> Expr:
154
"""Extract minute."""
155
156
def second() -> Expr:
157
"""Extract second."""
158
159
def microsecond() -> Expr:
160
"""Extract microsecond."""
161
162
def nanosecond() -> Expr:
163
"""Extract nanosecond."""
164
165
def weekday() -> Expr:
166
"""Get weekday (Monday=1, Sunday=7)."""
167
168
def week() -> Expr:
169
"""Get ISO week number."""
170
171
def ordinal_day() -> Expr:
172
"""Get day of year (1-366)."""
173
174
def quarter() -> Expr:
175
"""Get quarter (1-4)."""
176
177
def date() -> Expr:
178
"""Extract date part."""
179
180
def time() -> Expr:
181
"""Extract time part."""
182
183
def epoch(time_unit: str = "us") -> Expr:
184
"""Convert to epoch timestamp."""
185
186
def timestamp(time_unit: str = "us") -> Expr:
187
"""Get timestamp."""
188
189
def with_time_unit(time_unit: str) -> Expr:
190
"""Change time unit."""
191
192
def cast_time_unit(time_unit: str) -> Expr:
193
"""Cast to different time unit."""
194
195
def convert_time_zone(time_zone: str) -> Expr:
196
"""Convert to different timezone."""
197
198
def replace_time_zone(
199
time_zone: str | None,
200
*,
201
ambiguous: str = "raise",
202
non_existent: str = "raise"
203
) -> Expr:
204
"""Replace timezone without conversion."""
205
206
def truncate(every: str) -> Expr:
207
"""Truncate to time interval."""
208
209
def round(every: str) -> Expr:
210
"""Round to nearest time interval."""
211
212
def strftime(format: str) -> Expr:
213
"""Format as string."""
214
215
def to_string(format: str) -> Expr:
216
"""Convert to string with format."""
217
218
def days() -> Expr:
219
"""Extract days from duration."""
220
221
def hours() -> Expr:
222
"""Extract hours from duration."""
223
224
def minutes() -> Expr:
225
"""Extract minutes from duration."""
226
227
def seconds() -> Expr:
228
"""Extract seconds from duration."""
229
230
def milliseconds() -> Expr:
231
"""Extract milliseconds from duration."""
232
233
def microseconds() -> Expr:
234
"""Extract microseconds from duration."""
235
236
def nanoseconds() -> Expr:
237
"""Extract nanoseconds from duration."""
238
239
def total_days() -> Expr:
240
"""Get total days in duration."""
241
242
def total_hours() -> Expr:
243
"""Get total hours in duration."""
244
245
def total_minutes() -> Expr:
246
"""Get total minutes in duration."""
247
248
def total_seconds() -> Expr:
249
"""Get total seconds in duration."""
250
251
def total_milliseconds() -> Expr:
252
"""Get total milliseconds in duration."""
253
254
def total_microseconds() -> Expr:
255
"""Get total microseconds in duration."""
256
257
def total_nanoseconds() -> Expr:
258
"""Get total nanoseconds in duration."""
259
260
def offset_by(by: str) -> Expr:
261
"""Offset datetime by interval."""
262
263
def is_between(
264
start: datetime | date | str | Expr,
265
end: datetime | date | str | Expr,
266
closed: str = "both"
267
) -> Expr:
268
"""Check if datetime is in range."""
269
```
270
271
### List Namespace (.list)
272
273
Operations for working with list-type columns.
274
275
```python { .api }
276
# Available as expr.list.method() and series.list.method()
277
278
def len() -> Expr:
279
"""Get length of lists."""
280
281
def sum() -> Expr:
282
"""Sum elements in each list."""
283
284
def max() -> Expr:
285
"""Get maximum element in each list."""
286
287
def min() -> Expr:
288
"""Get minimum element in each list."""
289
290
def mean() -> Expr:
291
"""Get mean of elements in each list."""
292
293
def sort(*, descending: bool = False, nulls_last: bool = False) -> Expr:
294
"""Sort elements in each list."""
295
296
def reverse() -> Expr:
297
"""Reverse order of elements in each list."""
298
299
def unique(*, maintain_order: bool = False) -> Expr:
300
"""Get unique elements in each list."""
301
302
def n_unique() -> Expr:
303
"""Count unique elements in each list."""
304
305
def get(index: int | Expr, *, null_on_oob: bool = True) -> Expr:
306
"""Get element at index."""
307
308
def first() -> Expr:
309
"""Get first element."""
310
311
def last() -> Expr:
312
"""Get last element."""
313
314
def head(n: int = 5) -> Expr:
315
"""Get first n elements."""
316
317
def tail(n: int = 5) -> Expr:
318
"""Get last n elements."""
319
320
def slice(offset: int, length: int | None = None) -> Expr:
321
"""Slice lists."""
322
323
def explode() -> Expr:
324
"""Explode list elements to separate rows."""
325
326
def contains(item: Any) -> Expr:
327
"""Check if lists contain item."""
328
329
def join(separator: str, *, ignore_nulls: bool = True) -> Expr:
330
"""Join list elements into string."""
331
332
def arg_min() -> Expr:
333
"""Get index of minimum element."""
334
335
def arg_max() -> Expr:
336
"""Get index of maximum element."""
337
338
def diff(n: int = 1, null_behavior: str = "ignore") -> Expr:
339
"""Calculate differences between consecutive elements."""
340
341
def shift(n: int = 1, *, fill_value: Any = None) -> Expr:
342
"""Shift elements by n positions."""
343
344
def drop_nulls() -> Expr:
345
"""Remove null values from lists."""
346
347
def sample(
348
n: int | None = None,
349
*,
350
fraction: float | None = None,
351
with_replacement: bool = False,
352
shuffle: bool = False,
353
seed: int | None = None
354
) -> Expr:
355
"""Sample elements from lists."""
356
357
def count_matches(element: Any, *, parallel: bool = False) -> Expr:
358
"""Count occurrences of element."""
359
360
def to_array(width: int) -> Expr:
361
"""Convert to array with fixed width."""
362
363
def to_struct(
364
n_field_strategy: str = "first_non_null",
365
fields: Callable[[int], str] | Sequence[str] | None = None
366
) -> Expr:
367
"""Convert to struct."""
368
369
def eval(expr: Expr, *, parallel: bool = False) -> Expr:
370
"""Evaluate expression on list elements."""
371
372
def all() -> Expr:
373
"""Check if all elements are true."""
374
375
def any() -> Expr:
376
"""Check if any elements are true."""
377
```
378
379
### Array Namespace (.arr)
380
381
Operations for working with fixed-size array columns.
382
383
```python { .api }
384
# Available as expr.arr.method() and series.arr.method()
385
386
def min() -> Expr:
387
"""Get minimum element in each array."""
388
389
def max() -> Expr:
390
"""Get maximum element in each array."""
391
392
def sum() -> Expr:
393
"""Sum elements in each array."""
394
395
def unique(*, maintain_order: bool = False) -> Expr:
396
"""Get unique elements in each array."""
397
398
def to_list() -> Expr:
399
"""Convert to list type."""
400
401
def get(index: int | Expr, *, null_on_oob: bool = True) -> Expr:
402
"""Get element at index."""
403
404
def first() -> Expr:
405
"""Get first element."""
406
407
def last() -> Expr:
408
"""Get last element."""
409
410
def join(separator: str, *, ignore_nulls: bool = True) -> Expr:
411
"""Join array elements into string."""
412
413
def contains(item: Any) -> Expr:
414
"""Check if arrays contain item."""
415
416
def count_matches(element: Any) -> Expr:
417
"""Count occurrences of element."""
418
419
def reverse() -> Expr:
420
"""Reverse order of elements."""
421
422
def shift(n: int = 1, *, fill_value: Any = None) -> Expr:
423
"""Shift elements by n positions."""
424
425
def slice(offset: int, length: int | None = None) -> Expr:
426
"""Slice arrays."""
427
428
def explode() -> Expr:
429
"""Explode array elements to separate rows."""
430
431
def all() -> Expr:
432
"""Check if all elements are true."""
433
434
def any() -> Expr:
435
"""Check if any elements are true."""
436
437
def sort(*, descending: bool = False, nulls_last: bool = False) -> Expr:
438
"""Sort elements in each array."""
439
440
def arg_min() -> Expr:
441
"""Get index of minimum element."""
442
443
def arg_max() -> Expr:
444
"""Get index of maximum element."""
445
446
def eval(expr: Expr, *, parallel: bool = False) -> Expr:
447
"""Evaluate expression on array elements."""
448
```
449
450
### Struct Namespace (.struct)
451
452
Operations for working with structured/nested data.
453
454
```python { .api }
455
# Available as expr.struct.method() and series.struct.method()
456
457
def field(name: str) -> Expr:
458
"""Extract field by name."""
459
460
def rename_fields(names: list[str]) -> Expr:
461
"""Rename struct fields."""
462
463
def json_encode() -> Expr:
464
"""Encode struct as JSON string."""
465
466
def with_fields(*exprs: Expr) -> Expr:
467
"""Add or update struct fields."""
468
469
def n_fields() -> int:
470
"""Get number of fields."""
471
472
def fields() -> list[str]:
473
"""Get field names."""
474
475
def schema() -> dict[str, type]:
476
"""Get struct schema."""
477
478
def to_frame() -> DataFrame:
479
"""Convert struct Series to DataFrame."""
480
```
481
482
### Categorical Namespace (.cat)
483
484
Operations for categorical data types.
485
486
```python { .api }
487
# Available as expr.cat.method() and series.cat.method()
488
489
def get_categories() -> Expr:
490
"""Get categorical categories."""
491
492
def len_bytes() -> Expr:
493
"""Get byte length of category strings."""
494
495
def len_chars() -> Expr:
496
"""Get character length of category strings."""
497
498
def set_ordering(ordering: str) -> Expr:
499
"""Set categorical ordering ('physical' or 'lexical')."""
500
501
def get_ordering() -> str:
502
"""Get current categorical ordering."""
503
504
def to_local() -> Expr:
505
"""Convert to local categorical."""
506
```
507
508
### Binary Namespace (.bin)
509
510
Operations for binary data types.
511
512
```python { .api }
513
# Available as expr.bin.method() and series.bin.method()
514
515
def contains(literal: bytes) -> Expr:
516
"""Check if binary contains literal bytes."""
517
518
def ends_with(suffix: bytes) -> Expr:
519
"""Check if binary ends with suffix."""
520
521
def starts_with(prefix: bytes) -> Expr:
522
"""Check if binary starts with prefix."""
523
524
def decode(encoding: str = "utf8", *, strict: bool = True) -> Expr:
525
"""Decode binary to string."""
526
527
def encode(encoding: str = "utf8") -> Expr:
528
"""Encode string to binary."""
529
530
def size() -> Expr:
531
"""Get size of binary data in bytes."""
532
```
533
534
### Name Namespace (.name)
535
536
Operations for working with expression and column names.
537
538
```python { .api }
539
# Available as expr.name.method()
540
541
def keep() -> Expr:
542
"""Keep original column name."""
543
544
def map(function: Callable[[str], str]) -> Expr:
545
"""Apply function to column name."""
546
547
def prefix(prefix: str) -> Expr:
548
"""Add prefix to column name."""
549
550
def suffix(suffix: str) -> Expr:
551
"""Add suffix to column name."""
552
553
def to_lowercase() -> Expr:
554
"""Convert column name to lowercase."""
555
556
def to_uppercase() -> Expr:
557
"""Convert column name to uppercase."""
558
```
559
560
### Meta Namespace (.meta)
561
562
Metadata operations for expressions.
563
564
```python { .api }
565
# Available as expr.meta.method()
566
567
def eq(other: Expr) -> bool:
568
"""Check expression equality."""
569
570
def ne(other: Expr) -> bool:
571
"""Check expression inequality."""
572
573
def has_multiple_outputs() -> bool:
574
"""Check if expression produces multiple columns."""
575
576
def is_column() -> bool:
577
"""Check if expression is a column reference."""
578
579
def is_regex_projection() -> bool:
580
"""Check if expression is a regex column selection."""
581
582
def output_name() -> str | None:
583
"""Get output column name if determinable."""
584
585
def pop() -> list[Expr]:
586
"""Pop and return child expressions."""
587
588
def root_names() -> list[str]:
589
"""Get root column names used by expression."""
590
591
def tree_format(*, return_as_string: bool = False) -> str | None:
592
"""Display expression tree structure."""
593
594
def undo_aliases() -> Expr:
595
"""Remove aliases from expression."""
596
597
def write_json(file: IOBase) -> None:
598
"""Write expression as JSON."""
599
```
600
601
## Usage Examples
602
603
### String Operations
604
605
```python
606
import polars as pl
607
608
df = pl.DataFrame({
609
"text": ["Hello World", "POLARS rocks", " data science "],
610
"emails": ["user@example.com", "admin@test.org", "info@company.net"]
611
})
612
613
result = df.select([
614
pl.col("text").str.to_lowercase().alias("lower"),
615
pl.col("text").str.len_chars().alias("length"),
616
pl.col("text").str.strip_chars().alias("stripped"),
617
pl.col("emails").str.extract(r"@(.+)").alias("domain"),
618
pl.col("text").str.contains("data").alias("has_data")
619
])
620
621
# Advanced string operations
622
processed = df.select([
623
pl.col("text").str.split(" ").alias("words"),
624
pl.col("text").str.replace("World", "Universe").alias("replaced"),
625
pl.col("emails").str.starts_with("admin").alias("is_admin")
626
])
627
```
628
629
### DateTime Operations
630
631
```python
632
df_dates = pl.DataFrame({
633
"timestamp": pl.datetime_range(
634
pl.datetime(2023, 1, 1),
635
pl.datetime(2023, 12, 31),
636
"1mo",
637
eager=True
638
)
639
})
640
641
result = df_dates.select([
642
pl.col("timestamp"),
643
pl.col("timestamp").dt.year().alias("year"),
644
pl.col("timestamp").dt.month().alias("month"),
645
pl.col("timestamp").dt.quarter().alias("quarter"),
646
pl.col("timestamp").dt.weekday().alias("weekday"),
647
pl.col("timestamp").dt.strftime("%Y-%m-%d").alias("formatted"),
648
pl.col("timestamp").dt.truncate("1w").alias("week_start")
649
])
650
651
# Duration operations
652
df_duration = pl.DataFrame({
653
"start": [pl.datetime(2023, 1, 1), pl.datetime(2023, 6, 1)],
654
"end": [pl.datetime(2023, 1, 15), pl.datetime(2023, 6, 30)]
655
})
656
657
duration_result = df_duration.select([
658
(pl.col("end") - pl.col("start")).alias("duration"),
659
(pl.col("end") - pl.col("start")).dt.total_days().alias("total_days")
660
])
661
```
662
663
### List Operations
664
665
```python
666
df_lists = pl.DataFrame({
667
"numbers": [[1, 2, 3], [4, 5], [6, 7, 8, 9]],
668
"words": [["hello", "world"], ["polars", "rocks"], ["data", "science"]]
669
})
670
671
result = df_lists.select([
672
pl.col("numbers").list.len().alias("count"),
673
pl.col("numbers").list.sum().alias("sum"),
674
pl.col("numbers").list.max().alias("max"),
675
pl.col("numbers").list.get(0).alias("first"),
676
pl.col("words").list.join(" ").alias("joined"),
677
pl.col("numbers").list.contains(5).alias("has_five")
678
])
679
680
# List transformations
681
transformed = df_lists.select([
682
pl.col("numbers").list.sort().alias("sorted"),
683
pl.col("numbers").list.reverse().alias("reversed"),
684
pl.col("numbers").list.unique().alias("unique"),
685
pl.col("numbers").list.slice(1, 2).alias("middle")
686
])
687
```
688
689
### Struct Operations
690
691
```python
692
df_struct = pl.DataFrame({
693
"person": [
694
{"name": "Alice", "age": 25, "city": "NYC"},
695
{"name": "Bob", "age": 30, "city": "LA"},
696
{"name": "Charlie", "age": 35, "city": "Chicago"}
697
]
698
})
699
700
result = df_struct.select([
701
pl.col("person").struct.field("name").alias("name"),
702
pl.col("person").struct.field("age").alias("age"),
703
pl.col("person").struct.field("city").alias("city")
704
])
705
706
# Struct modifications
707
modified = df_struct.select([
708
pl.col("person").struct.with_fields([
709
pl.col("person").struct.field("age").add(1).alias("age")
710
]).alias("person_older")
711
])
712
```
713
714
### Categorical Operations
715
716
```python
717
df_cat = pl.DataFrame({
718
"category": ["A", "B", "A", "C", "B", "A"]
719
}).with_columns(
720
pl.col("category").cast(pl.Categorical).alias("category")
721
)
722
723
result = df_cat.select([
724
pl.col("category"),
725
pl.col("category").cat.get_categories().alias("categories"),
726
pl.col("category").cat.len_chars().alias("category_length")
727
])
728
```
729
730
### Binary Operations
731
732
```python
733
df_binary = pl.DataFrame({
734
"data": [b"hello", b"world", b"polars"]
735
})
736
737
result = df_binary.select([
738
pl.col("data"),
739
pl.col("data").bin.size().alias("size"),
740
pl.col("data").bin.decode().alias("decoded"),
741
pl.col("data").bin.starts_with(b"hel").alias("starts_with_hel")
742
])
743
```
744
745
### Expression Metadata
746
747
```python
748
# Create complex expression
749
expr = pl.col("value").filter(pl.col("category") == "A").sum().over("group")
750
751
# Examine expression metadata
752
print(f"Output name: {expr.meta.output_name()}")
753
print(f"Root names: {expr.meta.root_names()}")
754
print(f"Has multiple outputs: {expr.meta.has_multiple_outputs()}")
755
print(f"Is column: {expr.meta.is_column()}")
756
757
# Display expression tree
758
print(expr.meta.tree_format(return_as_string=True))
759
```
760
761
### Advanced Namespace Combinations
762
763
```python
764
# Complex text processing with multiple namespaces
765
text_df = pl.DataFrame({
766
"logs": [
767
'{"timestamp": "2023-01-01T10:00:00", "level": "INFO", "message": "System started"}',
768
'{"timestamp": "2023-01-01T10:05:00", "level": "ERROR", "message": "Connection failed"}',
769
'{"timestamp": "2023-01-01T10:10:00", "level": "INFO", "message": "System recovered"}'
770
]
771
})
772
773
processed_logs = text_df.select([
774
pl.col("logs").str.json_path_match("$.timestamp").alias("timestamp_str"),
775
pl.col("logs").str.json_path_match("$.level").alias("level"),
776
pl.col("logs").str.json_path_match("$.message").alias("message")
777
]).with_columns([
778
pl.col("timestamp_str").str.strptime(pl.Datetime, "%Y-%m-%dT%H:%M:%S").alias("timestamp")
779
]).select([
780
pl.col("timestamp").dt.hour().alias("hour"),
781
pl.col("level"),
782
pl.col("message"),
783
pl.col("message").str.len_chars().alias("message_length")
784
])
785
786
# Multi-level list and struct operations
787
nested_df = pl.DataFrame({
788
"data": [
789
[{"values": [1, 2, 3], "label": "A"}, {"values": [4, 5], "label": "B"}],
790
[{"values": [6, 7, 8, 9], "label": "C"}]
791
]
792
})
793
794
result = nested_df.select([
795
pl.col("data").list.len().alias("num_items"),
796
pl.col("data").list.eval(
797
pl.element().struct.field("values").list.sum()
798
).alias("sums_per_item"),
799
pl.col("data").list.eval(
800
pl.element().struct.field("label")
801
).alias("labels")
802
])
803
```