or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

column-selection.mdconfiguration.mdcore-data-structures.mddata-conversion.mddata-types.mderror-handling.mdfunctions-expressions.mdindex.mdio-operations.mdsql-interface.md

column-selection.mddocs/

0

# Column Selection

1

2

Advanced column selection system with 30+ selector functions supporting pattern matching, data type filtering, and logical operations for complex column manipulation and DataFrame querying.

3

4

## Capabilities

5

6

### Data Type Selectors

7

8

Select columns based on their data types for type-specific operations.

9

10

```python { .api }

11

import polars.selectors as cs

12

13

def by_dtype(dtypes) -> Selector:

14

"""Select columns by data type(s)."""

15

16

def numeric() -> Selector:

17

"""Select numeric columns (integers and floats)."""

18

19

def integer() -> Selector:

20

"""Select integer columns."""

21

22

def float() -> Selector:

23

"""Select floating point columns."""

24

25

def string() -> Selector:

26

"""Select string/text columns."""

27

28

def boolean() -> Selector:

29

"""Select boolean columns."""

30

31

def binary() -> Selector:

32

"""Select binary columns."""

33

34

def temporal() -> Selector:

35

"""Select temporal columns (date, datetime, time, duration)."""

36

37

def date() -> Selector:

38

"""Select date columns."""

39

40

def datetime() -> Selector:

41

"""Select datetime columns."""

42

43

def time() -> Selector:

44

"""Select time columns."""

45

46

def duration() -> Selector:

47

"""Select duration columns."""

48

```

49

50

### Pattern Selectors

51

52

Select columns based on name patterns and string matching.

53

54

```python { .api }

55

def contains(pattern: str) -> Selector:

56

"""Select columns containing pattern in name."""

57

58

def starts_with(prefix: str) -> Selector:

59

"""Select columns starting with prefix."""

60

61

def ends_with(suffix: str) -> Selector:

62

"""Select columns ending with suffix."""

63

64

def matches(pattern: str) -> Selector:

65

"""Select columns matching regex pattern."""

66

67

def by_name(names) -> Selector:

68

"""Select columns by exact names."""

69

```

70

71

### Index Selectors

72

73

Select columns based on their position in the DataFrame.

74

75

```python { .api }

76

def by_index(indices) -> Selector:

77

"""Select columns by index positions."""

78

79

def first(n: int = 1) -> Selector:

80

"""Select first n columns."""

81

82

def last(n: int = 1) -> Selector:

83

"""Select last n columns."""

84

85

def all() -> Selector:

86

"""Select all columns."""

87

```

88

89

### Logical Operations

90

91

Combine selectors with logical operations for complex selection patterns.

92

93

```python { .api }

94

def expand_selector(selector, *more_selectors) -> list[str]:

95

"""Expand selector to column names."""

96

97

def is_selector(obj) -> bool:

98

"""Check if object is a selector."""

99

```

100

101

## Usage Examples

102

103

### Basic Type Selection

104

105

```python

106

import polars as pl

107

import polars.selectors as cs

108

109

df = pl.DataFrame({

110

"id": [1, 2, 3],

111

"name": ["Alice", "Bob", "Charlie"],

112

"age": [25, 30, 35],

113

"salary": [50000.0, 60000.0, 70000.0],

114

"is_active": [True, False, True],

115

"created_date": ["2023-01-01", "2023-01-02", "2023-01-03"]

116

}).with_columns([

117

pl.col("created_date").str.to_date().alias("created_date")

118

])

119

120

# Select numeric columns

121

numeric_cols = df.select(cs.numeric())

122

123

# Select string columns

124

string_cols = df.select(cs.string())

125

126

# Select temporal columns

127

date_cols = df.select(cs.temporal())

128

```

129

130

### Pattern-Based Selection

131

132

```python

133

df = pl.DataFrame({

134

"user_id": [1, 2, 3],

135

"user_name": ["Alice", "Bob", "Charlie"],

136

"user_email": ["alice@example.com", "bob@example.com", "charlie@example.com"],

137

"order_total": [100.0, 200.0, 150.0],

138

"order_date": ["2023-01-01", "2023-01-02", "2023-01-03"],

139

"order_status": ["completed", "pending", "completed"]

140

})

141

142

# Select columns starting with 'user'

143

user_cols = df.select(cs.starts_with("user"))

144

145

# Select columns ending with 'date'

146

date_cols = df.select(cs.ends_with("date"))

147

148

# Select columns containing 'order'

149

order_cols = df.select(cs.contains("order"))

150

151

# Select by regex pattern

152

email_cols = df.select(cs.matches(r".*email.*"))

153

```

154

155

### Index-Based Selection

156

157

```python

158

# Select first 3 columns

159

first_cols = df.select(cs.first(3))

160

161

# Select last 2 columns

162

last_cols = df.select(cs.last(2))

163

164

# Select specific indices

165

middle_cols = df.select(cs.by_index([1, 3, 5]))

166

167

# Select by column names

168

specific_cols = df.select(cs.by_name(["user_id", "user_name"]))

169

```

170

171

### Complex Selector Combinations

172

173

```python

174

# Combine selectors with logical operations

175

# Select numeric columns that don't start with 'user'

176

result = df.select(cs.numeric() & ~cs.starts_with("user"))

177

178

# Select string or temporal columns

179

result = df.select(cs.string() | cs.temporal())

180

181

# Select columns by multiple patterns

182

result = df.select(cs.starts_with("user") | cs.ends_with("date"))

183

184

# Complex filtering: numeric columns containing 'order' or 'total'

185

result = df.select(cs.numeric() & (cs.contains("order") | cs.contains("total")))

186

```

187

188

### Practical Usage in Operations

189

190

```python

191

# Apply operations to selected column types

192

result = df.with_columns([

193

# Normalize all numeric columns

194

(cs.numeric() / cs.numeric().max()).name.suffix("_normalized"),

195

196

# Convert all string columns to uppercase

197

cs.string().str.to_uppercase().name.suffix("_upper"),

198

199

# Extract year from all date columns

200

cs.temporal().dt.year().name.suffix("_year")

201

])

202

203

# Group by operations with selectors

204

grouped = df.group_by("order_status").agg([

205

cs.numeric().mean().name.suffix("_avg"),

206

cs.string().count().name.suffix("_count")

207

])

208

209

# Select and rename columns with patterns

210

result = df.select([

211

cs.starts_with("user").name.map(lambda name: name.replace("user_", "customer_")),

212

cs.numeric()

213

])

214

```