Blazingly fast DataFrame library for Python with lazy and eager evaluation modes
—
Advanced column selection system with 30+ selector functions supporting pattern matching, data type filtering, and logical operations for complex column manipulation and DataFrame querying.
Select columns based on their data types for type-specific operations.
import polars.selectors as cs
def by_dtype(dtypes) -> Selector:
"""Select columns by data type(s)."""
def numeric() -> Selector:
"""Select numeric columns (integers and floats)."""
def integer() -> Selector:
"""Select integer columns."""
def float() -> Selector:
"""Select floating point columns."""
def string() -> Selector:
"""Select string/text columns."""
def boolean() -> Selector:
"""Select boolean columns."""
def binary() -> Selector:
"""Select binary columns."""
def temporal() -> Selector:
"""Select temporal columns (date, datetime, time, duration)."""
def date() -> Selector:
"""Select date columns."""
def datetime() -> Selector:
"""Select datetime columns."""
def time() -> Selector:
"""Select time columns."""
def duration() -> Selector:
"""Select duration columns."""Select columns based on name patterns and string matching.
def contains(pattern: str) -> Selector:
"""Select columns containing pattern in name."""
def starts_with(prefix: str) -> Selector:
"""Select columns starting with prefix."""
def ends_with(suffix: str) -> Selector:
"""Select columns ending with suffix."""
def matches(pattern: str) -> Selector:
"""Select columns matching regex pattern."""
def by_name(names) -> Selector:
"""Select columns by exact names."""Select columns based on their position in the DataFrame.
def by_index(indices) -> Selector:
"""Select columns by index positions."""
def first(n: int = 1) -> Selector:
"""Select first n columns."""
def last(n: int = 1) -> Selector:
"""Select last n columns."""
def all() -> Selector:
"""Select all columns."""Combine selectors with logical operations for complex selection patterns.
def expand_selector(selector, *more_selectors) -> list[str]:
"""Expand selector to column names."""
def is_selector(obj) -> bool:
"""Check if object is a selector."""import polars as pl
import polars.selectors as cs
df = pl.DataFrame({
"id": [1, 2, 3],
"name": ["Alice", "Bob", "Charlie"],
"age": [25, 30, 35],
"salary": [50000.0, 60000.0, 70000.0],
"is_active": [True, False, True],
"created_date": ["2023-01-01", "2023-01-02", "2023-01-03"]
}).with_columns([
pl.col("created_date").str.to_date().alias("created_date")
])
# Select numeric columns
numeric_cols = df.select(cs.numeric())
# Select string columns
string_cols = df.select(cs.string())
# Select temporal columns
date_cols = df.select(cs.temporal())df = pl.DataFrame({
"user_id": [1, 2, 3],
"user_name": ["Alice", "Bob", "Charlie"],
"user_email": ["alice@example.com", "bob@example.com", "charlie@example.com"],
"order_total": [100.0, 200.0, 150.0],
"order_date": ["2023-01-01", "2023-01-02", "2023-01-03"],
"order_status": ["completed", "pending", "completed"]
})
# Select columns starting with 'user'
user_cols = df.select(cs.starts_with("user"))
# Select columns ending with 'date'
date_cols = df.select(cs.ends_with("date"))
# Select columns containing 'order'
order_cols = df.select(cs.contains("order"))
# Select by regex pattern
email_cols = df.select(cs.matches(r".*email.*"))# Select first 3 columns
first_cols = df.select(cs.first(3))
# Select last 2 columns
last_cols = df.select(cs.last(2))
# Select specific indices
middle_cols = df.select(cs.by_index([1, 3, 5]))
# Select by column names
specific_cols = df.select(cs.by_name(["user_id", "user_name"]))# Combine selectors with logical operations
# Select numeric columns that don't start with 'user'
result = df.select(cs.numeric() & ~cs.starts_with("user"))
# Select string or temporal columns
result = df.select(cs.string() | cs.temporal())
# Select columns by multiple patterns
result = df.select(cs.starts_with("user") | cs.ends_with("date"))
# Complex filtering: numeric columns containing 'order' or 'total'
result = df.select(cs.numeric() & (cs.contains("order") | cs.contains("total")))# Apply operations to selected column types
result = df.with_columns([
# Normalize all numeric columns
(cs.numeric() / cs.numeric().max()).name.suffix("_normalized"),
# Convert all string columns to uppercase
cs.string().str.to_uppercase().name.suffix("_upper"),
# Extract year from all date columns
cs.temporal().dt.year().name.suffix("_year")
])
# Group by operations with selectors
grouped = df.group_by("order_status").agg([
cs.numeric().mean().name.suffix("_avg"),
cs.string().count().name.suffix("_count")
])
# Select and rename columns with patterns
result = df.select([
cs.starts_with("user").name.map(lambda name: name.replace("user_", "customer_")),
cs.numeric()
])Install with Tessl CLI
npx tessl i tessl/pypi-polars