CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-polars-u64-idx

Blazingly fast DataFrame library with 64-bit index support for handling datasets with more than 4.2 billion rows

Pending
Overview
Eval results
Files

selectors.mddocs/

Column Selectors

Powerful column selection system for efficiently selecting columns by name patterns, data types, or column positions. Selectors provide a concise and type-safe way to work with subsets of DataFrame columns without hard-coding column names.

Capabilities

Basic Selectors

Core selectors for fundamental column selection operations.

def all() -> Selector:
    """
    Select all columns.
    
    Returns:
    Selector for all columns
    """

def first(*, strict: bool = True) -> Selector:
    """
    Select the first column.
    
    Parameters:
    - strict: Raise error if no columns match
    
    Returns:
    Selector for first column
    """

def last(*, strict: bool = True) -> Selector:
    """
    Select the last column.
    
    Parameters:
    - strict: Raise error if no columns match
    
    Returns:
    Selector for last column
    """

def by_name(*names: str | Collection[str], require_all: bool = True) -> Selector:
    """
    Select columns by exact name match.
    
    Parameters:
    - names: Column names to select
    - require_all: Require all specified names to exist
    
    Returns:
    Selector for named columns
    """

def by_index(*indices: int | Collection[int]) -> Selector:
    """
    Select columns by index position.
    
    Parameters:
    - indices: Column indices to select (supports negative indexing)
    
    Returns:
    Selector for columns at specified indices
    """

def by_dtype(*dtypes: PolarsDataType | Collection[PolarsDataType]) -> Selector:
    """
    Select columns by data type.
    
    Parameters:
    - dtypes: Data types to select
    
    Returns:
    Selector for columns with matching data types
    """

Pattern Selectors

Selectors for matching columns by name patterns.

def contains(*substring: str) -> Selector:
    """
    Select columns containing substring in name.
    
    Parameters:
    - substring: Substrings to match
    
    Returns:
    Selector for columns containing any substring
    """

def starts_with(*prefix: str) -> Selector:
    """
    Select columns starting with prefix.
    
    Parameters:
    - prefix: Prefixes to match
    
    Returns:
    Selector for columns starting with any prefix
    """

def ends_with(*suffix: str) -> Selector:
    """
    Select columns ending with suffix.
    
    Parameters:
    - suffix: Suffixes to match
    
    Returns:
    Selector for columns ending with any suffix
    """

def matches(pattern: str) -> Selector:
    """
    Select columns matching regex pattern.
    
    Parameters:
    - pattern: Regular expression pattern
    
    Returns:
    Selector for columns matching pattern
    """

Numeric Type Selectors

Selectors for numeric column types.

def numeric() -> Selector:
    """
    Select all numeric columns (integers, floats, decimals).
    
    Returns:
    Selector for numeric columns
    """

def integer() -> Selector:
    """
    Select integer columns (signed and unsigned).
    
    Returns:
    Selector for integer columns
    """

def signed_integer() -> Selector:
    """
    Select signed integer columns (Int8, Int16, Int32, Int64, Int128).
    
    Returns:
    Selector for signed integer columns
    """

def unsigned_integer() -> Selector:
    """
    Select unsigned integer columns (UInt8, UInt16, UInt32, UInt64).
    
    Returns:
    Selector for unsigned integer columns
    """

def float() -> Selector:
    """
    Select floating-point columns (Float32, Float64).
    
    Returns:
    Selector for float columns
    """

def decimal() -> Selector:
    """
    Select decimal columns.
    
    Returns:
    Selector for decimal columns
    """

Text Type Selectors

Selectors for text and string-based column types.

def string(*, include_categorical: bool = False) -> Selector:
    """
    Select string columns.
    
    Parameters:
    - include_categorical: Include categorical columns
    
    Returns:
    Selector for string columns
    """

def binary() -> Selector:
    """
    Select binary columns.
    
    Returns:
    Selector for binary columns
    """

def categorical() -> Selector:
    """
    Select categorical columns.
    
    Returns:
    Selector for categorical columns
    """

def enum() -> Selector:
    """
    Select enum columns.
    
    Returns:
    Selector for enum columns
    """

def alpha(ascii_only: bool = False, *, ignore_spaces: bool = False) -> Selector:
    """
    Select columns containing only alphabetic characters.
    
    Parameters:
    - ascii_only: Consider ASCII alphabetic characters only
    - ignore_spaces: Ignore whitespace characters
    
    Returns:
    Selector for alphabetic string columns
    """

def alphanumeric(ascii_only: bool = False, *, ignore_spaces: bool = False) -> Selector:
    """
    Select columns containing only alphanumeric characters.
    
    Parameters:
    - ascii_only: Consider ASCII characters only
    - ignore_spaces: Ignore whitespace characters
    
    Returns:
    Selector for alphanumeric string columns
    """

def digit(ascii_only: bool = False) -> Selector:
    """
    Select columns containing only numeric digit characters.
    
    Parameters:
    - ascii_only: Consider ASCII digit characters only
    
    Returns:
    Selector for digit string columns
    """

Temporal Type Selectors

Selectors for date, time, and duration column types.

def temporal() -> Selector:
    """
    Select all temporal columns (Date, Datetime, Time, Duration).
    
    Returns:
    Selector for temporal columns
    """

def date() -> Selector:
    """
    Select date columns.
    
    Returns:
    Selector for date columns
    """

def datetime(time_unit: TimeUnit | None = None, time_zone: str | None = None) -> Selector:
    """
    Select datetime columns, optionally filtering by time unit or timezone.
    
    Parameters:
    - time_unit: Filter by time unit ("ns", "us", "ms", "s")
    - time_zone: Filter by timezone string
    
    Returns:
    Selector for datetime columns
    """

def time() -> Selector:
    """
    Select time columns.
    
    Returns:
    Selector for time columns
    """

def duration(time_unit: TimeUnit | None = None) -> Selector:
    """
    Select duration columns, optionally filtering by time unit.
    
    Parameters:
    - time_unit: Filter by time unit ("ns", "us", "ms", "s")
    
    Returns:
    Selector for duration columns
    """

Complex Type Selectors

Selectors for nested and complex column types.

def nested() -> Selector:
    """
    Select all nested columns (List, Array, Struct).
    
    Returns:
    Selector for nested columns
    """

def list(inner: Selector | None = None) -> Selector:
    """
    Select list columns, optionally filtering by inner type.
    
    Parameters:
    - inner: Selector for inner list element type
    
    Returns:
    Selector for list columns
    """

def array(inner: Selector | None = None, *, width: int | None = None) -> Selector:
    """
    Select array columns, optionally filtering by inner type or width.
    
    Parameters:
    - inner: Selector for inner array element type
    - width: Filter by array width
    
    Returns:
    Selector for array columns
    """

def struct() -> Selector:
    """
    Select struct columns.
    
    Returns:
    Selector for struct columns
    """

def boolean() -> Selector:
    """
    Select boolean columns.
    
    Returns:
    Selector for boolean columns
    """

Selector Operations

Utility functions for working with selectors.

def exclude(*selectors: Selector) -> Selector:
    """
    Invert selector to exclude matching columns.
    
    Parameters:
    - selectors: Selectors to exclude
    
    Returns:
    Inverted selector
    """

def is_selector(obj: Any) -> bool:
    """
    Check if object is a selector.
    
    Parameters:
    - obj: Object to check
    
    Returns:
    True if object is a selector
    """

def expand_selector(frame: DataFrame | LazyFrame, *selectors: Selector) -> list[str]:
    """
    Expand selectors to concrete column names for a frame.
    
    Parameters:
    - frame: DataFrame or LazyFrame to evaluate selectors against
    - selectors: Selectors to expand
    
    Returns:
    List of column names matching selectors
    """

class Selector:
    """
    Column selector for advanced column selection operations.
    
    Supports logical operations (|, &, ~) for combining selectors.
    """
    
    def __or__(self, other: Selector) -> Selector:
        """Combine selectors with OR logic (union)."""
    
    def __and__(self, other: Selector) -> Selector:
        """Combine selectors with AND logic (intersection)."""
    
    def __invert__(self) -> Selector:
        """Invert selector to exclude matching columns."""
    
    def exclude(self, *selectors: Selector) -> Selector:
        """Exclude other selectors from this selector."""

Usage Examples

Basic Column Selection

import polars as pl
import polars.selectors as cs

df = pl.DataFrame({
    "name": ["Alice", "Bob"],
    "age": [25, 30],
    "height": [5.5, 6.0],
    "is_student": [True, False],
    "graduation_date": [pl.date(2020, 5, 15), pl.date(2018, 12, 10)]
})

# Select numeric columns
numeric_cols = df.select(cs.numeric())

# Select columns by pattern
name_cols = df.select(cs.contains("name"))

# Combine selectors
text_and_bool = df.select(cs.string() | cs.boolean())

# Exclude columns
non_temporal = df.select(cs.all() & ~cs.temporal())

Advanced Selector Combinations

# Complex selector logic
result = df.select(
    cs.numeric() & ~cs.float(),  # Integer columns only
    cs.starts_with("is_") | cs.ends_with("_date"),  # Boolean flags or dates
    cs.by_index(0, -1)  # First and last columns
)

Install with Tessl CLI

npx tessl i tessl/pypi-polars-u64-idx@1.33.1

docs

config-utilities.md

core-data-structures.md

data-types.md

expressions.md

functions.md

index.md

io-operations.md

selectors.md

sql-interface.md

tile.json