0
# Column Selection
1
2
Advanced column selection system with 30+ selector functions supporting pattern matching, data type filtering, and logical operations for complex column manipulation and DataFrame querying.
3
4
## Capabilities
5
6
### Data Type Selectors
7
8
Select columns based on their data types for type-specific operations.
9
10
```python { .api }
11
import polars.selectors as cs
12
13
def by_dtype(dtypes) -> Selector:
14
"""Select columns by data type(s)."""
15
16
def numeric() -> Selector:
17
"""Select numeric columns (integers and floats)."""
18
19
def integer() -> Selector:
20
"""Select integer columns."""
21
22
def float() -> Selector:
23
"""Select floating point columns."""
24
25
def string() -> Selector:
26
"""Select string/text columns."""
27
28
def boolean() -> Selector:
29
"""Select boolean columns."""
30
31
def binary() -> Selector:
32
"""Select binary columns."""
33
34
def temporal() -> Selector:
35
"""Select temporal columns (date, datetime, time, duration)."""
36
37
def date() -> Selector:
38
"""Select date columns."""
39
40
def datetime() -> Selector:
41
"""Select datetime columns."""
42
43
def time() -> Selector:
44
"""Select time columns."""
45
46
def duration() -> Selector:
47
"""Select duration columns."""
48
```
49
50
### Pattern Selectors
51
52
Select columns based on name patterns and string matching.
53
54
```python { .api }
55
def contains(pattern: str) -> Selector:
56
"""Select columns containing pattern in name."""
57
58
def starts_with(prefix: str) -> Selector:
59
"""Select columns starting with prefix."""
60
61
def ends_with(suffix: str) -> Selector:
62
"""Select columns ending with suffix."""
63
64
def matches(pattern: str) -> Selector:
65
"""Select columns matching regex pattern."""
66
67
def by_name(names) -> Selector:
68
"""Select columns by exact names."""
69
```
70
71
### Index Selectors
72
73
Select columns based on their position in the DataFrame.
74
75
```python { .api }
76
def by_index(indices) -> Selector:
77
"""Select columns by index positions."""
78
79
def first(n: int = 1) -> Selector:
80
"""Select first n columns."""
81
82
def last(n: int = 1) -> Selector:
83
"""Select last n columns."""
84
85
def all() -> Selector:
86
"""Select all columns."""
87
```
88
89
### Logical Operations
90
91
Combine selectors with logical operations for complex selection patterns.
92
93
```python { .api }
94
def expand_selector(selector, *more_selectors) -> list[str]:
95
"""Expand selector to column names."""
96
97
def is_selector(obj) -> bool:
98
"""Check if object is a selector."""
99
```
100
101
## Usage Examples
102
103
### Basic Type Selection
104
105
```python
106
import polars as pl
107
import polars.selectors as cs
108
109
df = pl.DataFrame({
110
"id": [1, 2, 3],
111
"name": ["Alice", "Bob", "Charlie"],
112
"age": [25, 30, 35],
113
"salary": [50000.0, 60000.0, 70000.0],
114
"is_active": [True, False, True],
115
"created_date": ["2023-01-01", "2023-01-02", "2023-01-03"]
116
}).with_columns([
117
pl.col("created_date").str.to_date().alias("created_date")
118
])
119
120
# Select numeric columns
121
numeric_cols = df.select(cs.numeric())
122
123
# Select string columns
124
string_cols = df.select(cs.string())
125
126
# Select temporal columns
127
date_cols = df.select(cs.temporal())
128
```
129
130
### Pattern-Based Selection
131
132
```python
133
df = pl.DataFrame({
134
"user_id": [1, 2, 3],
135
"user_name": ["Alice", "Bob", "Charlie"],
136
"user_email": ["alice@example.com", "bob@example.com", "charlie@example.com"],
137
"order_total": [100.0, 200.0, 150.0],
138
"order_date": ["2023-01-01", "2023-01-02", "2023-01-03"],
139
"order_status": ["completed", "pending", "completed"]
140
})
141
142
# Select columns starting with 'user'
143
user_cols = df.select(cs.starts_with("user"))
144
145
# Select columns ending with 'date'
146
date_cols = df.select(cs.ends_with("date"))
147
148
# Select columns containing 'order'
149
order_cols = df.select(cs.contains("order"))
150
151
# Select by regex pattern
152
email_cols = df.select(cs.matches(r".*email.*"))
153
```
154
155
### Index-Based Selection
156
157
```python
158
# Select first 3 columns
159
first_cols = df.select(cs.first(3))
160
161
# Select last 2 columns
162
last_cols = df.select(cs.last(2))
163
164
# Select specific indices
165
middle_cols = df.select(cs.by_index([1, 3, 5]))
166
167
# Select by column names
168
specific_cols = df.select(cs.by_name(["user_id", "user_name"]))
169
```
170
171
### Complex Selector Combinations
172
173
```python
174
# Combine selectors with logical operations
175
# Select numeric columns that don't start with 'user'
176
result = df.select(cs.numeric() & ~cs.starts_with("user"))
177
178
# Select string or temporal columns
179
result = df.select(cs.string() | cs.temporal())
180
181
# Select columns by multiple patterns
182
result = df.select(cs.starts_with("user") | cs.ends_with("date"))
183
184
# Complex filtering: numeric columns containing 'order' or 'total'
185
result = df.select(cs.numeric() & (cs.contains("order") | cs.contains("total")))
186
```
187
188
### Practical Usage in Operations
189
190
```python
191
# Apply operations to selected column types
192
result = df.with_columns([
193
# Normalize all numeric columns
194
(cs.numeric() / cs.numeric().max()).name.suffix("_normalized"),
195
196
# Convert all string columns to uppercase
197
cs.string().str.to_uppercase().name.suffix("_upper"),
198
199
# Extract year from all date columns
200
cs.temporal().dt.year().name.suffix("_year")
201
])
202
203
# Group by operations with selectors
204
grouped = df.group_by("order_status").agg([
205
cs.numeric().mean().name.suffix("_avg"),
206
cs.string().count().name.suffix("_count")
207
])
208
209
# Select and rename columns with patterns
210
result = df.select([
211
cs.starts_with("user").name.map(lambda name: name.replace("user_", "customer_")),
212
cs.numeric()
213
])
214
```