CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-polars-u64-idx

Blazingly fast DataFrame library with 64-bit index support for handling datasets with more than 4.2 billion rows

Pending
Overview
Eval results
Files

expressions.mddocs/

Expressions and Column Operations

Powerful expression system for column transformations, aggregations, and complex operations that work across DataFrame and LazyFrame. Expressions are composable and lazy, enabling complex column operations and transformations.

Capabilities

Expression Construction

Core functions for creating expressions that operate on columns and values.

def col(name: str | DataType) -> Expr:
    """
    Create column expression.
    
    Parameters:
    - name: Column name or data type selector
    
    Returns:
    Column expression
    """

def lit(value: Any, dtype: DataType | None = None) -> Expr:
    """
    Create literal value expression.
    
    Parameters:
    - value: Literal value
    - dtype: Optional data type
    
    Returns:
    Literal expression
    """

def when(predicate: Expr) -> When:
    """
    Create conditional expression.
    
    Parameters:
    - predicate: Boolean expression condition
    
    Returns:
    When object for then/otherwise chaining
    """

class When:
    def then(self, statement: Expr) -> Then:
        """Value when condition is true."""
    
class Then:
    def otherwise(self, statement: Expr) -> Expr:
        """Value when condition is false."""

Expression Class

The main Expression class with methods for column operations, transformations, and aggregations.

class Expr:
    def alias(self, name: str) -> Expr:
        """
        Assign a name to the expression.
        
        Parameters:
        - name: New column name
        
        Returns:
        Aliased expression
        """
    
    def cast(self, dtype: DataType | type[Any], *, strict: bool = True) -> Expr:
        """
        Cast expression to different data type.
        
        Parameters:
        - dtype: Target data type
        - strict: Whether to raise on cast failure
        
        Returns:
        Cast expression
        """
    
    def filter(self, predicate: Expr) -> Expr:
        """
        Filter expression based on predicate.
        
        Parameters:
        - predicate: Boolean expression for filtering
        
        Returns:
        Filtered expression
        """
    
    def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Expr:
        """
        Sort expression values.
        
        Parameters:
        - descending: Sort in descending order
        - nulls_last: Place nulls at end
        
        Returns:
        Sorted expression
        """
    
    def reverse(self) -> Expr:
        """Reverse expression values."""
    
    def unique(self, *, maintain_order: bool = False) -> Expr:
        """
        Get unique values.
        
        Parameters:
        - maintain_order: Maintain original order
        
        Returns:
        Expression with unique values
        """
    
    def drop_nulls(self) -> Expr:
        """Drop null values from expression."""
    
    def fill_null(self, value: Any | Expr, *, strategy: FillNullStrategy | None = None) -> Expr:
        """
        Fill null values.
        
        Parameters:
        - value: Fill value or expression
        - strategy: Fill strategy ("forward", "backward", "min", "max", "mean", "zero", "one")
        
        Returns:
        Expression with nulls filled
        """
    
    def fill_nan(self, value: Any | Expr) -> Expr:
        """Fill NaN values."""
    
    def is_null(self) -> Expr:
        """Check for null values."""
    
    def is_not_null(self) -> Expr:
        """Check for non-null values."""
    
    def is_nan(self) -> Expr:
        """Check for NaN values."""
    
    def is_not_nan(self) -> Expr:
        """Check for non-NaN values."""
    
    def is_finite(self) -> Expr:
        """Check for finite values."""
    
    def is_infinite(self) -> Expr:
        """Check for infinite values."""

Aggregation Methods

Statistical and aggregation operations on expressions.

class Expr:
    def sum(self) -> Expr:
        """Sum all values."""
    
    def mean(self) -> Expr:
        """Calculate mean."""
    
    def median(self) -> Expr:
        """Calculate median."""
    
    def max(self) -> Expr:
        """Get maximum value."""
    
    def min(self) -> Expr:
        """Get minimum value."""
    
    def std(self, ddof: int = 1) -> Expr:
        """
        Calculate standard deviation.
        
        Parameters:
        - ddof: Delta degrees of freedom
        
        Returns:
        Standard deviation expression
        """
    
    def var(self, ddof: int = 1) -> Expr:
        """
        Calculate variance.
        
        Parameters:
        - ddof: Delta degrees of freedom
        
        Returns:
        Variance expression
        """
    
    def quantile(self, quantile: float | Expr, *, interpolation: RollingInterpolationMethod = "nearest") -> Expr:
        """
        Calculate quantile.
        
        Parameters:
        - quantile: Quantile value (0.0 to 1.0)
        - interpolation: Interpolation method
        
        Returns:
        Quantile expression
        """
    
    def count(self) -> Expr:
        """Count non-null values."""
    
    def n_unique(self) -> Expr:
        """Count unique values."""
    
    def null_count(self) -> Expr:
        """Count null values."""
    
    def first(self) -> Expr:
        """Get first value."""
    
    def last(self) -> Expr:
        """Get last value."""
    
    def head(self, n: int | Expr = 10) -> Expr:
        """Get first n values."""
    
    def tail(self, n: int | Expr = 10) -> Expr:
        """Get last n values."""

Arithmetic Operations

Mathematical operations on expressions.

class Expr:
    def __add__(self, other: Any) -> Expr:
        """Addition operator (+)."""
    
    def __sub__(self, other: Any) -> Expr:
        """Subtraction operator (-)."""
    
    def __mul__(self, other: Any) -> Expr:
        """Multiplication operator (*)."""
    
    def __truediv__(self, other: Any) -> Expr:
        """Division operator (/)."""
    
    def __floordiv__(self, other: Any) -> Expr:
        """Floor division operator (//)."""
    
    def __mod__(self, other: Any) -> Expr:
        """Modulo operator (%)."""
    
    def __pow__(self, other: Any) -> Expr:
        """Power operator (**)."""
    
    def abs(self) -> Expr:
        """Absolute value."""
    
    def sqrt(self) -> Expr:
        """Square root."""
    
    def ceil(self) -> Expr:
        """Ceiling function."""
    
    def floor(self) -> Expr:
        """Floor function."""
    
    def round(self, decimals: int | Expr = 0) -> Expr:
        """
        Round to specified decimal places.
        
        Parameters:
        - decimals: Number of decimal places
        
        Returns:
        Rounded expression
        """
    
    def clip(self, lower_bound: Any | Expr | None = None, upper_bound: Any | Expr | None = None) -> Expr:
        """
        Clip values to specified bounds.
        
        Parameters:
        - lower_bound: Lower bound
        - upper_bound: Upper bound
        
        Returns:
        Clipped expression
        """

Comparison Operations

Comparison and logical operations on expressions.

class Expr:
    def __eq__(self, other: Any) -> Expr:
        """Equality operator (==)."""
    
    def __ne__(self, other: Any) -> Expr:
        """Not equal operator (!=)."""
    
    def __lt__(self, other: Any) -> Expr:
        """Less than operator (<)."""
    
    def __le__(self, other: Any) -> Expr:
        """Less than or equal operator (<=)."""
    
    def __gt__(self, other: Any) -> Expr:
        """Greater than operator (>)."""
    
    def __ge__(self, other: Any) -> Expr:
        """Greater than or equal operator (>=)."""
    
    def __and__(self, other: Any) -> Expr:
        """Logical AND operator (&)."""
    
    def __or__(self, other: Any) -> Expr:
        """Logical OR operator (|)."""
    
    def __xor__(self, other: Any) -> Expr:
        """Logical XOR operator (^)."""
    
    def __invert__(self) -> Expr:
        """Logical NOT operator (~)."""
    
    def is_in(self, other: Any) -> Expr:
        """Check if values are in collection."""
    
    def is_between(self, lower_bound: Any | Expr, upper_bound: Any | Expr, closed: ClosedInterval = "both") -> Expr:
        """
        Check if values are between bounds.
        
        Parameters:
        - lower_bound: Lower bound
        - upper_bound: Upper bound  
        - closed: Include bounds ("both", "left", "right", "none")
        
        Returns:
        Boolean expression
        """

String Operations

String manipulation methods available on string expressions.

class Expr:
    @property
    def str(self) -> ExprStringNameSpace:
        """Access string methods."""

class ExprStringNameSpace:
    def len_bytes(self) -> Expr:
        """Get byte length of strings."""
    
    def len_chars(self) -> Expr:
        """Get character length of strings."""
    
    def contains(self, pattern: str | Expr, *, literal: bool = False, strict: bool = True) -> Expr:
        """
        Check if string contains pattern.
        
        Parameters:
        - pattern: Pattern to search for
        - literal: Treat pattern as literal string
        - strict: Raise on invalid regex
        
        Returns:
        Boolean expression
        """
    
    def starts_with(self, prefix: str | Expr) -> Expr:
        """Check if string starts with prefix."""
    
    def ends_with(self, suffix: str | Expr) -> Expr:
        """Check if string ends with suffix."""
    
    def to_lowercase(self) -> Expr:
        """Convert to lowercase."""
    
    def to_uppercase(self) -> Expr:
        """Convert to uppercase."""
    
    def strip_chars(self, characters: str | None = None) -> Expr:
        """Strip characters from both ends."""
    
    def split(self, by: str | Expr, *, inclusive: bool = False) -> Expr:
        """
        Split string by delimiter.
        
        Parameters:
        - by: Delimiter
        - inclusive: Include delimiter in result
        
        Returns:
        List expression
        """
    
    def slice(self, offset: int | Expr, length: int | Expr | None = None) -> Expr:
        """
        Slice string.
        
        Parameters:
        - offset: Start position
        - length: Slice length
        
        Returns:
        Sliced string expression
        """
    
    def replace(self, pattern: str | Expr, value: str | Expr, *, literal: bool = False, n: int = 1) -> Expr:
        """
        Replace pattern in string.
        
        Parameters:
        - pattern: Pattern to replace
        - value: Replacement value
        - literal: Treat pattern as literal
        - n: Maximum number of replacements
        
        Returns:
        String expression with replacements
        """

Temporal Operations

Date and time operations on temporal expressions.

class Expr:
    @property
    def dt(self) -> ExprDateTimeNameSpace:
        """Access datetime methods."""

class ExprDateTimeNameSpace:
    def year(self) -> Expr:
        """Extract year."""
    
    def month(self) -> Expr:
        """Extract month."""
    
    def day(self) -> Expr:
        """Extract day."""
    
    def hour(self) -> Expr:
        """Extract hour."""
    
    def minute(self) -> Expr:
        """Extract minute."""
    
    def second(self) -> Expr:
        """Extract second."""
    
    def weekday(self) -> Expr:
        """Get weekday (0=Monday, 6=Sunday)."""
    
    def week(self) -> Expr:
        """Get week number."""
    
    def strftime(self, format: str) -> Expr:
        """
        Format datetime as string.
        
        Parameters:
        - format: Format string
        
        Returns:
        Formatted string expression
        """
    
    def truncate(self, every: str | timedelta) -> Expr:
        """
        Truncate to specified time unit.
        
        Parameters:
        - every: Time unit ("1d", "1h", "1m", "1s", etc.)
        
        Returns:
        Truncated datetime expression
        """
    
    def with_time_unit(self, time_unit: TimeUnit) -> Expr:
        """
        Change time unit.
        
        Parameters:
        - time_unit: New time unit ("ns", "us", "ms", "s")
        
        Returns:
        Expression with new time unit
        """

List Operations

Operations on list/array expressions.

class Expr:
    @property
    def list(self) -> ExprListNameSpace:
        """Access list methods."""

class ExprListNameSpace:
    def len(self) -> Expr:
        """Get list length."""
    
    def sum(self) -> Expr:
        """Sum list elements."""
    
    def max(self) -> Expr:
        """Get maximum element."""
    
    def min(self) -> Expr:
        """Get minimum element."""
    
    def mean(self) -> Expr:
        """Calculate mean of elements."""
    
    def first(self) -> Expr:
        """Get first element."""
    
    def last(self) -> Expr:
        """Get last element."""
    
    def get(self, index: int | Expr, *, null_on_oob: bool = True) -> Expr:
        """
        Get element at index.
        
        Parameters:
        - index: Element index
        - null_on_oob: Return null if out of bounds
        
        Returns:
        Element expression
        """
    
    def slice(self, offset: int | Expr, length: int | Expr | None = None) -> Expr:
        """Slice list."""
    
    def head(self, n: int | Expr = 5) -> Expr:
        """Get first n elements."""
    
    def tail(self, n: int | Expr = 5) -> Expr:
        """Get last n elements."""
    
    def contains(self, item: Any | Expr) -> Expr:
        """Check if list contains item."""
    
    def explode(self) -> Expr:
        """Explode list elements to separate rows."""

Usage Examples

Basic Expression Usage

import polars as pl

df = pl.DataFrame({
    "name": ["Alice", "Bob", "Charlie"],
    "age": [25, 30, 35],
    "salary": [50000, 60000, 70000]
})

# Column selection and transformation
result = df.select([
    pl.col("name"),
    pl.col("age").alias("years"),
    (pl.col("salary") / 1000).alias("salary_k"),
    pl.col("salary").cast(pl.Float64)
])

# Conditional expressions
result = df.with_columns([
    pl.when(pl.col("age") > 30)
      .then(pl.lit("Senior"))
      .otherwise(pl.lit("Junior"))
      .alias("level")
])

String Operations

df = pl.DataFrame({
    "text": ["Hello World", "PYTHON programming", "Data Science"]
})

result = df.select([
    pl.col("text"),
    pl.col("text").str.to_lowercase().alias("lower"),
    pl.col("text").str.len_chars().alias("length"),
    pl.col("text").str.contains("o").alias("has_o"),
    pl.col("text").str.split(" ").alias("words")
])

Temporal Operations

df = pl.DataFrame({
    "timestamp": ["2023-01-15 10:30:00", "2023-02-20 14:45:00", "2023-03-10 09:15:00"]
}).with_columns([
    pl.col("timestamp").str.strptime(pl.Datetime, "%Y-%m-%d %H:%M:%S")
])

result = df.select([
    pl.col("timestamp"),
    pl.col("timestamp").dt.year().alias("year"),
    pl.col("timestamp").dt.month().alias("month"),
    pl.col("timestamp").dt.weekday().alias("weekday"),
    pl.col("timestamp").dt.strftime("%Y-%m").alias("year_month")
])

Aggregations and Window Functions

df = pl.DataFrame({
    "group": ["A", "A", "B", "B", "C"],
    "value": [10, 20, 15, 25, 30]
})

# Group aggregations
result = df.group_by("group").agg([
    pl.col("value").sum().alias("total"),
    pl.col("value").mean().alias("average"),
    pl.col("value").max() - pl.col("value").min().alias("range")
])

# Window functions
result = df.with_columns([
    pl.col("value").sum().over("group").alias("group_total"),
    pl.col("value").rank().over("group").alias("rank_in_group")
])

Complex Expressions

# Chaining multiple operations
result = df.select([
    pl.col("name"),
    pl.col("age")
      .cast(pl.Float64)
      .round(0)
      .clip(0, 100)
      .alias("age_clipped"),
    
    # Complex conditional logic
    pl.when((pl.col("age") >= 18) & (pl.col("salary") > 55000))
      .then(pl.lit("Eligible"))
      .when(pl.col("age") >= 18)
      .then(pl.lit("Age OK"))
      .otherwise(pl.lit("Not Eligible"))
      .alias("status"),
    
    # Mathematical operations
    ((pl.col("salary") * 1.1).round(2)).alias("salary_with_raise")
])

Install with Tessl CLI

npx tessl i tessl/pypi-polars-u64-idx@1.33.1

docs

config-utilities.md

core-data-structures.md

data-types.md

expressions.md

functions.md

index.md

io-operations.md

selectors.md

sql-interface.md

tile.json