0
# Column Selectors
1
2
Powerful column selection system for efficiently selecting columns by name patterns, data types, or column positions. Selectors provide a concise and type-safe way to work with subsets of DataFrame columns without hard-coding column names.
3
4
## Capabilities
5
6
### Basic Selectors
7
8
Core selectors for fundamental column selection operations.
9
10
```python { .api }
11
def all() -> Selector:
12
"""
13
Select all columns.
14
15
Returns:
16
Selector for all columns
17
"""
18
19
def first(*, strict: bool = True) -> Selector:
20
"""
21
Select the first column.
22
23
Parameters:
24
- strict: Raise error if no columns match
25
26
Returns:
27
Selector for first column
28
"""
29
30
def last(*, strict: bool = True) -> Selector:
31
"""
32
Select the last column.
33
34
Parameters:
35
- strict: Raise error if no columns match
36
37
Returns:
38
Selector for last column
39
"""
40
41
def by_name(*names: str | Collection[str], require_all: bool = True) -> Selector:
42
"""
43
Select columns by exact name match.
44
45
Parameters:
46
- names: Column names to select
47
- require_all: Require all specified names to exist
48
49
Returns:
50
Selector for named columns
51
"""
52
53
def by_index(*indices: int | Collection[int]) -> Selector:
54
"""
55
Select columns by index position.
56
57
Parameters:
58
- indices: Column indices to select (supports negative indexing)
59
60
Returns:
61
Selector for columns at specified indices
62
"""
63
64
def by_dtype(*dtypes: PolarsDataType | Collection[PolarsDataType]) -> Selector:
65
"""
66
Select columns by data type.
67
68
Parameters:
69
- dtypes: Data types to select
70
71
Returns:
72
Selector for columns with matching data types
73
"""
74
```
75
76
### Pattern Selectors
77
78
Selectors for matching columns by name patterns.
79
80
```python { .api }
81
def contains(*substring: str) -> Selector:
82
"""
83
Select columns containing substring in name.
84
85
Parameters:
86
- substring: Substrings to match
87
88
Returns:
89
Selector for columns containing any substring
90
"""
91
92
def starts_with(*prefix: str) -> Selector:
93
"""
94
Select columns starting with prefix.
95
96
Parameters:
97
- prefix: Prefixes to match
98
99
Returns:
100
Selector for columns starting with any prefix
101
"""
102
103
def ends_with(*suffix: str) -> Selector:
104
"""
105
Select columns ending with suffix.
106
107
Parameters:
108
- suffix: Suffixes to match
109
110
Returns:
111
Selector for columns ending with any suffix
112
"""
113
114
def matches(pattern: str) -> Selector:
115
"""
116
Select columns matching regex pattern.
117
118
Parameters:
119
- pattern: Regular expression pattern
120
121
Returns:
122
Selector for columns matching pattern
123
"""
124
```
125
126
### Numeric Type Selectors
127
128
Selectors for numeric column types.
129
130
```python { .api }
131
def numeric() -> Selector:
132
"""
133
Select all numeric columns (integers, floats, decimals).
134
135
Returns:
136
Selector for numeric columns
137
"""
138
139
def integer() -> Selector:
140
"""
141
Select integer columns (signed and unsigned).
142
143
Returns:
144
Selector for integer columns
145
"""
146
147
def signed_integer() -> Selector:
148
"""
149
Select signed integer columns (Int8, Int16, Int32, Int64, Int128).
150
151
Returns:
152
Selector for signed integer columns
153
"""
154
155
def unsigned_integer() -> Selector:
156
"""
157
Select unsigned integer columns (UInt8, UInt16, UInt32, UInt64).
158
159
Returns:
160
Selector for unsigned integer columns
161
"""
162
163
def float() -> Selector:
164
"""
165
Select floating-point columns (Float32, Float64).
166
167
Returns:
168
Selector for float columns
169
"""
170
171
def decimal() -> Selector:
172
"""
173
Select decimal columns.
174
175
Returns:
176
Selector for decimal columns
177
"""
178
```
179
180
### Text Type Selectors
181
182
Selectors for text and string-based column types.
183
184
```python { .api }
185
def string(*, include_categorical: bool = False) -> Selector:
186
"""
187
Select string columns.
188
189
Parameters:
190
- include_categorical: Include categorical columns
191
192
Returns:
193
Selector for string columns
194
"""
195
196
def binary() -> Selector:
197
"""
198
Select binary columns.
199
200
Returns:
201
Selector for binary columns
202
"""
203
204
def categorical() -> Selector:
205
"""
206
Select categorical columns.
207
208
Returns:
209
Selector for categorical columns
210
"""
211
212
def enum() -> Selector:
213
"""
214
Select enum columns.
215
216
Returns:
217
Selector for enum columns
218
"""
219
220
def alpha(ascii_only: bool = False, *, ignore_spaces: bool = False) -> Selector:
221
"""
222
Select columns containing only alphabetic characters.
223
224
Parameters:
225
- ascii_only: Consider ASCII alphabetic characters only
226
- ignore_spaces: Ignore whitespace characters
227
228
Returns:
229
Selector for alphabetic string columns
230
"""
231
232
def alphanumeric(ascii_only: bool = False, *, ignore_spaces: bool = False) -> Selector:
233
"""
234
Select columns containing only alphanumeric characters.
235
236
Parameters:
237
- ascii_only: Consider ASCII characters only
238
- ignore_spaces: Ignore whitespace characters
239
240
Returns:
241
Selector for alphanumeric string columns
242
"""
243
244
def digit(ascii_only: bool = False) -> Selector:
245
"""
246
Select columns containing only numeric digit characters.
247
248
Parameters:
249
- ascii_only: Consider ASCII digit characters only
250
251
Returns:
252
Selector for digit string columns
253
"""
254
```
255
256
### Temporal Type Selectors
257
258
Selectors for date, time, and duration column types.
259
260
```python { .api }
261
def temporal() -> Selector:
262
"""
263
Select all temporal columns (Date, Datetime, Time, Duration).
264
265
Returns:
266
Selector for temporal columns
267
"""
268
269
def date() -> Selector:
270
"""
271
Select date columns.
272
273
Returns:
274
Selector for date columns
275
"""
276
277
def datetime(time_unit: TimeUnit | None = None, time_zone: str | None = None) -> Selector:
278
"""
279
Select datetime columns, optionally filtering by time unit or timezone.
280
281
Parameters:
282
- time_unit: Filter by time unit ("ns", "us", "ms", "s")
283
- time_zone: Filter by timezone string
284
285
Returns:
286
Selector for datetime columns
287
"""
288
289
def time() -> Selector:
290
"""
291
Select time columns.
292
293
Returns:
294
Selector for time columns
295
"""
296
297
def duration(time_unit: TimeUnit | None = None) -> Selector:
298
"""
299
Select duration columns, optionally filtering by time unit.
300
301
Parameters:
302
- time_unit: Filter by time unit ("ns", "us", "ms", "s")
303
304
Returns:
305
Selector for duration columns
306
"""
307
```
308
309
### Complex Type Selectors
310
311
Selectors for nested and complex column types.
312
313
```python { .api }
314
def nested() -> Selector:
315
"""
316
Select all nested columns (List, Array, Struct).
317
318
Returns:
319
Selector for nested columns
320
"""
321
322
def list(inner: Selector | None = None) -> Selector:
323
"""
324
Select list columns, optionally filtering by inner type.
325
326
Parameters:
327
- inner: Selector for inner list element type
328
329
Returns:
330
Selector for list columns
331
"""
332
333
def array(inner: Selector | None = None, *, width: int | None = None) -> Selector:
334
"""
335
Select array columns, optionally filtering by inner type or width.
336
337
Parameters:
338
- inner: Selector for inner array element type
339
- width: Filter by array width
340
341
Returns:
342
Selector for array columns
343
"""
344
345
def struct() -> Selector:
346
"""
347
Select struct columns.
348
349
Returns:
350
Selector for struct columns
351
"""
352
353
def boolean() -> Selector:
354
"""
355
Select boolean columns.
356
357
Returns:
358
Selector for boolean columns
359
"""
360
```
361
362
### Selector Operations
363
364
Utility functions for working with selectors.
365
366
```python { .api }
367
def exclude(*selectors: Selector) -> Selector:
368
"""
369
Invert selector to exclude matching columns.
370
371
Parameters:
372
- selectors: Selectors to exclude
373
374
Returns:
375
Inverted selector
376
"""
377
378
def is_selector(obj: Any) -> bool:
379
"""
380
Check if object is a selector.
381
382
Parameters:
383
- obj: Object to check
384
385
Returns:
386
True if object is a selector
387
"""
388
389
def expand_selector(frame: DataFrame | LazyFrame, *selectors: Selector) -> list[str]:
390
"""
391
Expand selectors to concrete column names for a frame.
392
393
Parameters:
394
- frame: DataFrame or LazyFrame to evaluate selectors against
395
- selectors: Selectors to expand
396
397
Returns:
398
List of column names matching selectors
399
"""
400
401
class Selector:
402
"""
403
Column selector for advanced column selection operations.
404
405
Supports logical operations (|, &, ~) for combining selectors.
406
"""
407
408
def __or__(self, other: Selector) -> Selector:
409
"""Combine selectors with OR logic (union)."""
410
411
def __and__(self, other: Selector) -> Selector:
412
"""Combine selectors with AND logic (intersection)."""
413
414
def __invert__(self) -> Selector:
415
"""Invert selector to exclude matching columns."""
416
417
def exclude(self, *selectors: Selector) -> Selector:
418
"""Exclude other selectors from this selector."""
419
```
420
421
## Usage Examples
422
423
### Basic Column Selection
424
425
```python
426
import polars as pl
427
import polars.selectors as cs
428
429
df = pl.DataFrame({
430
"name": ["Alice", "Bob"],
431
"age": [25, 30],
432
"height": [5.5, 6.0],
433
"is_student": [True, False],
434
"graduation_date": [pl.date(2020, 5, 15), pl.date(2018, 12, 10)]
435
})
436
437
# Select numeric columns
438
numeric_cols = df.select(cs.numeric())
439
440
# Select columns by pattern
441
name_cols = df.select(cs.contains("name"))
442
443
# Combine selectors
444
text_and_bool = df.select(cs.string() | cs.boolean())
445
446
# Exclude columns
447
non_temporal = df.select(cs.all() & ~cs.temporal())
448
```
449
450
### Advanced Selector Combinations
451
452
```python
453
# Complex selector logic
454
result = df.select(
455
cs.numeric() & ~cs.float(), # Integer columns only
456
cs.starts_with("is_") | cs.ends_with("_date"), # Boolean flags or dates
457
cs.by_index(0, -1) # First and last columns
458
)
459
```