CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-polars

Blazingly fast DataFrame library for Python with lazy and eager evaluation modes

Pending
Overview
Eval results
Files

functions-expressions.mddocs/

Functions and Expressions

90+ utility functions for data construction, aggregation, statistical operations, and expression building with support for vectorized computations, window functions, and complex data transformations.

Capabilities

Construction Functions

Functions for creating expressions, literals, and data structures from various inputs.

def col(name: str) -> Expr:
    """
    Create a column expression for referencing DataFrame columns.
    
    Parameters:
    - name: Column name or pattern (* for all columns)
    
    Returns:
    Column expression
    """

def lit(value, dtype=None, *, allow_object=False) -> Expr:
    """
    Create a literal expression from a value.
    
    Parameters:
    - value: Literal value (scalar, list, etc.)
    - dtype: Explicit data type
    - allow_object: Allow Python objects
    
    Returns:
    Literal expression
    """

def struct(*exprs, **named_exprs) -> Expr:
    """
    Create a struct expression from multiple expressions.
    
    Parameters:
    - exprs: Expressions to include as fields
    - named_exprs: Named expressions as keyword arguments
    
    Returns:
    Struct expression
    """

def field(name: str) -> Expr:
    """
    Access a field from a struct expression.
    
    Parameters:
    - name: Field name
    
    Returns:
    Field access expression
    """

def element() -> Expr:
    """
    Access elements from a list expression context.
    
    Returns:
    Element access expression
    """

Aggregation Functions

Functions for computing summary statistics and aggregations across data.

def sum(*names: str) -> Expr:
    """
    Sum values across expressions.
    
    Parameters:
    - exprs: Expressions to sum
    
    Returns:
    Sum expression
    """

def mean(*columns: str) -> Expr:
    """
    Calculate mean across expressions.
    
    Parameters:
    - exprs: Expressions to average
    
    Returns:
    Mean expression  
    """

def median(*exprs) -> Expr:
    """
    Calculate median across expressions.
    
    Parameters:
    - exprs: Expressions to find median
    
    Returns:
    Median expression
    """

def max(*names: str) -> Expr:
    """
    Find maximum value across expressions.
    
    Parameters:
    - exprs: Expressions to find max
    
    Returns:
    Maximum expression
    """

def min(*names: str) -> Expr:
    """
    Find minimum value across expressions.
    
    Parameters:
    - exprs: Expressions to find min
    
    Returns:
    Minimum expression
    """

def count(*columns: str) -> Expr:
    """
    Count non-null values across expressions.
    
    Parameters:
    - exprs: Expressions to count
    
    Returns:
    Count expression
    """

def len(*exprs) -> Expr:
    """
    Count all values (including nulls) across expressions.
    
    Parameters:
    - exprs: Expressions to count
    
    Returns:
    Length expression
    """

def std(column: str, ddof: int = 1) -> Expr:
    """
    Calculate standard deviation of a column.
    
    Parameters:
    - column: Column name to calculate std
    - ddof: Delta degrees of freedom
    
    Returns:
    Standard deviation expression
    """

def var(column: str, ddof: int = 1) -> Expr:
    """
    Calculate variance of a column.
    
    Parameters:
    - column: Column name to calculate variance
    - ddof: Delta degrees of freedom
    
    Returns:
    Variance expression
    """

def quantile(expr, quantile, interpolation="nearest") -> Expr:
    """
    Calculate quantile of expression.
    
    Parameters:
    - expr: Expression to calculate quantile
    - quantile: Quantile value (0-1)
    - interpolation: Interpolation method
    
    Returns:
    Quantile expression
    """

Horizontal Aggregation Functions

Functions for aggregating across columns (horizontally) rather than rows.

def all_horizontal(*exprs) -> Expr:
    """
    Check if all values are true across columns.
    
    Parameters:
    - exprs: Boolean expressions
    
    Returns:
    All-true expression
    """

def any_horizontal(*exprs) -> Expr:
    """
    Check if any value is true across columns.
    
    Parameters:
    - exprs: Boolean expressions
    
    Returns:
    Any-true expression
    """

def sum_horizontal(*exprs) -> Expr:
    """
    Sum values horizontally across columns.
    
    Parameters:
    - exprs: Numeric expressions
    
    Returns:
    Horizontal sum expression
    """

def mean_horizontal(*exprs) -> Expr:
    """
    Calculate mean horizontally across columns.
    
    Parameters:
    - exprs: Numeric expressions
    
    Returns:
    Horizontal mean expression
    """

def max_horizontal(*exprs) -> Expr:
    """
    Find maximum value horizontally across columns.
    
    Parameters:
    - exprs: Numeric expressions
    
    Returns:
    Horizontal max expression
    """

def min_horizontal(*exprs) -> Expr:
    """
    Find minimum value horizontally across columns.
    
    Parameters:
    - exprs: Numeric expressions
    
    Returns:
    Horizontal min expression
    """

Range Generation Functions

Functions for creating sequences and ranges of values.

def arange(start, end, step=1, *, dtype=None, eager=False) -> Expr | Series:
    """
    Create arithmetic range of values.
    
    Parameters:
    - start: Start value
    - end: End value (exclusive)
    - step: Step size
    - dtype: Data type
    - eager: Return Series if True, Expr if False
    
    Returns:
    Range expression or Series
    """

def int_range(start, end, step=1, *, dtype=None, eager=False) -> Expr | Series:
    """
    Create integer range of values.
    
    Parameters:
    - start: Start value
    - end: End value (exclusive)  
    - step: Step size
    - dtype: Integer data type
    - eager: Return Series if True, Expr if False
    
    Returns:
    Integer range expression or Series
    """

def int_ranges(start, end, step=1, *, dtype=None, eager=False) -> Expr | Series:
    """
    Create multiple integer ranges.
    
    Parameters:
    - start: Start values (can be expression)
    - end: End values (can be expression)
    - step: Step size
    - dtype: Integer data type
    - eager: Return Series if True, Expr if False
    
    Returns:
    Multiple ranges expression or Series
    """

def linear_space(start, end, n, *, dtype=None, eager=False) -> Expr | Series:
    """
    Create linearly spaced values.
    
    Parameters:
    - start: Start value
    - end: End value (inclusive)
    - n: Number of values
    - dtype: Data type
    - eager: Return Series if True, Expr if False
    
    Returns:
    Linear space expression or Series
    """

def linear_spaces(start, end, n, *, dtype=None, eager=False) -> Expr | Series:
    """
    Create multiple linearly spaced ranges.
    
    Parameters:
    - start: Start values (can be expression)
    - end: End values (can be expression)
    - n: Number of values per range
    - dtype: Data type
    - eager: Return Series if True, Expr if False
    
    Returns:
    Multiple linear spaces expression or Series
    """

Temporal Range Functions

Functions for creating date, datetime, and time ranges.

def date_range(start, end, interval="1d", *, closed="both", eager=False) -> Expr | Series:
    """
    Create date range.
    
    Parameters:
    - start: Start date
    - end: End date
    - interval: Date interval (e.g., "1d", "1w", "1mo")
    - closed: Range bounds ("both", "left", "right", "none")
    - eager: Return Series if True, Expr if False
    
    Returns:
    Date range expression or Series
    """

def date_ranges(start, end, interval="1d", *, closed="both", eager=False) -> Expr | Series:
    """
    Create multiple date ranges.
    
    Parameters:
    - start: Start dates (can be expression)
    - end: End dates (can be expression)
    - interval: Date interval
    - closed: Range bounds
    - eager: Return Series if True, Expr if False
    
    Returns:
    Multiple date ranges expression or Series
    """

def datetime_range(start, end, interval="1h", *, closed="both", time_zone=None, eager=False) -> Expr | Series:
    """
    Create datetime range.
    
    Parameters:
    - start: Start datetime
    - end: End datetime
    - interval: Datetime interval (e.g., "1h", "30m", "1d")
    - closed: Range bounds
    - time_zone: Timezone
    - eager: Return Series if True, Expr if False
    
    Returns:
    Datetime range expression or Series
    """

def datetime_ranges(start, end, interval="1h", *, closed="both", time_zone=None, eager=False) -> Expr | Series:
    """
    Create multiple datetime ranges.
    
    Parameters:
    - start: Start datetimes (can be expression)
    - end: End datetimes (can be expression)
    - interval: Datetime interval
    - closed: Range bounds
    - time_zone: Timezone
    - eager: Return Series if True, Expr if False
    
    Returns:
    Multiple datetime ranges expression or Series
    """

def time_range(start, end, interval="1h", *, closed="both", eager=False) -> Expr | Series:
    """
    Create time range.
    
    Parameters:
    - start: Start time
    - end: End time
    - interval: Time interval
    - closed: Range bounds
    - eager: Return Series if True, Expr if False
    
    Returns:
    Time range expression or Series
    """

def time_ranges(start, end, interval="1h", *, closed="both", eager=False) -> Expr | Series:
    """
    Create multiple time ranges.
    
    Parameters:
    - start: Start times (can be expression)
    - end: End times (can be expression)
    - interval: Time interval
    - closed: Range bounds
    - eager: Return Series if True, Expr if False
    
    Returns:
    Multiple time ranges expression or Series
    """

Datetime Construction Functions

Functions for creating date, datetime, and time values from components.

def date(year, month, day) -> Expr:
    """
    Create date from year, month, day components.
    
    Parameters:
    - year: Year expression or value
    - month: Month expression or value (1-12)
    - day: Day expression or value (1-31)
    
    Returns:
    Date expression
    """

def datetime(year, month, day, hour=0, minute=0, second=0, microsecond=0, *, time_unit="us", time_zone=None) -> Expr:
    """
    Create datetime from components.
    
    Parameters:
    - year: Year expression or value
    - month: Month expression or value (1-12)
    - day: Day expression or value (1-31)
    - hour: Hour expression or value (0-23)
    - minute: Minute expression or value (0-59)
    - second: Second expression or value (0-59)
    - microsecond: Microsecond expression or value
    - time_unit: Time precision ("ns", "us", "ms")
    - time_zone: Timezone
    
    Returns:
    Datetime expression
    """

def time(hour=0, minute=0, second=0, microsecond=0) -> Expr:
    """
    Create time from components.
    
    Parameters:
    - hour: Hour expression or value (0-23)
    - minute: Minute expression or value (0-59)
    - second: Second expression or value (0-59)
    - microsecond: Microsecond expression or value
    
    Returns:
    Time expression
    """

def duration(*, weeks=None, days=None, hours=None, minutes=None, seconds=None, milliseconds=None, microseconds=None, nanoseconds=None, time_unit="us") -> Expr:
    """
    Create duration from time components.
    
    Parameters:
    - weeks: Weeks expression or value
    - days: Days expression or value
    - hours: Hours expression or value
    - minutes: Minutes expression or value
    - seconds: Seconds expression or value
    - milliseconds: Milliseconds expression or value
    - microseconds: Microseconds expression or value
    - nanoseconds: Nanoseconds expression or value
    - time_unit: Time precision
    
    Returns:
    Duration expression
    """

def from_epoch(column, time_unit="s") -> Expr:
    """
    Convert epoch timestamp to datetime.
    
    Parameters:
    - column: Column with epoch timestamps
    - time_unit: Time unit of epoch ("s", "ms", "us", "ns")
    
    Returns:
    Datetime expression
    """

Statistical Functions

Functions for statistical analysis and correlation calculations.

def corr(a, b, *, method="pearson", ddof=1, propagate_nans=False) -> Expr:
    """
    Calculate correlation coefficient between two expressions.
    
    Parameters:
    - a: First expression
    - b: Second expression
    - method: Correlation method ("pearson", "spearman")
    - ddof: Delta degrees of freedom
    - propagate_nans: Propagate NaN values
    
    Returns:
    Correlation expression
    """

def cov(a, b, *, ddof=1) -> Expr:
    """
    Calculate covariance between two expressions.
    
    Parameters:
    - a: First expression
    - b: Second expression
    - ddof: Delta degrees of freedom
    
    Returns:
    Covariance expression
    """

def rolling_corr(a, b, window_size, *, min_periods=None, ddof=1) -> Expr:
    """
    Calculate rolling correlation.
    
    Parameters:
    - a: First expression
    - b: Second expression
    - window_size: Rolling window size
    - min_periods: Minimum periods for calculation
    - ddof: Delta degrees of freedom
    
    Returns:
    Rolling correlation expression
    """

def rolling_cov(a, b, window_size, *, min_periods=None, ddof=1) -> Expr:
    """
    Calculate rolling covariance.
    
    Parameters:
    - a: First expression
    - b: Second expression
    - window_size: Rolling window size
    - min_periods: Minimum periods for calculation
    - ddof: Delta degrees of freedom
    
    Returns:
    Rolling covariance expression
    """

def n_unique(expr) -> Expr:
    """
    Count unique values in expression.
    
    Parameters:
    - expr: Expression to count unique values
    
    Returns:
    Unique count expression
    """

def approx_n_unique(expr) -> Expr:
    """
    Approximate count of unique values using HyperLogLog.
    
    Parameters:
    - expr: Expression to count unique values
    
    Returns:
    Approximate unique count expression
    """

Cumulative Functions

Functions for cumulative operations and running calculations.

def cum_sum(expr, *, reverse=False) -> Expr:
    """
    Calculate cumulative sum.
    
    Parameters:
    - expr: Expression to sum cumulatively
    - reverse: Calculate in reverse order
    
    Returns:
    Cumulative sum expression
    """

def cum_sum_horizontal(*exprs) -> Expr:
    """
    Calculate cumulative sum horizontally across columns.
    
    Parameters:
    - exprs: Expressions to sum cumulatively
    
    Returns:
    Horizontal cumulative sum expression
    """

def cum_count(expr, *, reverse=False) -> Expr:
    """
    Calculate cumulative count.
    
    Parameters:
    - expr: Expression to count cumulatively
    - reverse: Calculate in reverse order
    
    Returns:
    Cumulative count expression
    """

def cum_fold(acc, lambda_expr, exprs, *, include_init=False) -> Expr:
    """
    Cumulative fold operation with custom function.
    
    Parameters:
    - acc: Initial accumulator value
    - lambda_expr: Fold function (takes acc, value)
    - exprs: Expressions to fold
    - include_init: Include initial value in result
    
    Returns:
    Cumulative fold expression
    """

def cum_reduce(lambda_expr, exprs) -> Expr:
    """
    Cumulative reduce operation.
    
    Parameters:
    - lambda_expr: Reduce function
    - exprs: Expressions to reduce
    
    Returns:
    Cumulative reduce expression
    """

Higher-Order Functions

Functions for advanced operations with custom lambda functions.

def fold(acc, lambda_expr, exprs) -> Expr:
    """
    Fold operation with custom function.
    
    Parameters:
    - acc: Initial accumulator value
    - lambda_expr: Fold function (takes acc, value)
    - exprs: Expressions to fold
    
    Returns:
    Fold expression
    """

def reduce(lambda_expr, exprs) -> Expr:
    """
    Reduce operation with custom function.
    
    Parameters:
    - lambda_expr: Reduce function (takes two values)
    - exprs: Expressions to reduce
    
    Returns:
    Reduce expression
    """

def map_batches(expr, function, return_dtype=None, *, agg_list=False) -> Expr:
    """
    Apply function to batches of data.
    
    Parameters:
    - expr: Expression to map over
    - function: Function to apply to each batch
    - return_dtype: Return data type
    - agg_list: Aggregate results into list
    
    Returns:
    Map batches expression
    """

def map_groups(expr, function, return_dtype=None) -> Expr:
    """
    Apply function to groups of data.
    
    Parameters:
    - expr: Expression to map over
    - function: Function to apply to each group
    - return_dtype: Return data type
    
    Returns:
    Map groups expression
    """

Conditional Functions

Functions for conditional logic and branching.

def when(condition) -> When:
    """
    Start conditional expression chain.
    
    Parameters:
    - condition: Boolean condition expression
    
    Returns:
    When object for chaining
    """

def coalesce(*exprs) -> Expr:
    """
    Return first non-null value from expressions.
    
    Parameters:
    - exprs: Expressions to coalesce
    
    Returns:
    Coalesced expression
    """

class When:
    def then(self, expr) -> Then:
        """
        Specify value when condition is true.
        
        Parameters:
        - expr: Expression or value to return
        
        Returns:
        Then object for chaining
        """

class Then:
    def when(self, condition) -> When:
        """
        Add another condition.
        
        Parameters:
        - condition: Boolean condition expression
        
        Returns:
        When object for chaining
        """
    
    def otherwise(self, expr) -> Expr:
        """
        Specify default value.
        
        Parameters:
        - expr: Default expression or value
        
        Returns:
        Complete conditional expression
        """

Utility Functions

Miscellaneous utility functions for various operations.

def concat(*dfs, *, how="vertical", parallel=True) -> DataFrame | LazyFrame:
    """
    Concatenate DataFrames or LazyFrames.
    
    Parameters:
    - dfs: DataFrames or LazyFrames to concatenate
    - how: Concatenation method ("vertical", "horizontal", "diagonal")
    - parallel: Use parallel execution
    
    Returns:
    Concatenated DataFrame or LazyFrame
    """

def concat_str(exprs, *, separator="", ignore_nulls=False) -> Expr:
    """
    Concatenate string expressions.
    
    Parameters:
    - exprs: String expressions to concatenate
    - separator: String separator
    - ignore_nulls: Skip null values
    
    Returns:
    Concatenated string expression
    """

def concat_list(exprs) -> Expr:
    """
    Concatenate expressions into list.
    
    Parameters:
    - exprs: Expressions to concatenate into list
    
    Returns:
    List expression
    """

def concat_arr(exprs) -> Expr:
    """
    Concatenate expressions into array.
    
    Parameters:
    - exprs: Expressions to concatenate into array
    
    Returns:
    Array expression
    """

def format(format_str, *args) -> Expr:
    """
    Format string with expressions.
    
    Parameters:
    - format_str: Format string with {} placeholders
    - args: Expressions to substitute
    
    Returns:
    Formatted string expression
    """

def escape_regex(expr) -> Expr:
    """
    Escape regex special characters in string.
    
    Parameters:
    - expr: String expression to escape
    
    Returns:
    Escaped string expression
    """

def dtype_of(expr) -> DataType:
    """
    Get data type of expression.
    
    Parameters:
    - expr: Expression to inspect
    
    Returns:
    Data type
    """

def set_random_seed(seed) -> None:
    """
    Set global random seed.
    
    Parameters:
    - seed: Random seed value
    """

Usage Examples

Basic Function Usage

import polars as pl

# Construction functions
df = pl.DataFrame({
    "a": [1, 2, 3],
    "b": [4, 5, 6],
    "c": ["x", "y", "z"]
})

result = df.select([
    pl.col("a"),
    pl.lit(10).alias("constant"),
    pl.struct(["a", "b"]).alias("struct_col")
])

Aggregation Functions

# Various aggregations
result = df.select([
    pl.col("a").sum().alias("sum_a"),
    pl.col("b").mean().alias("mean_b"),
    pl.max_horizontal("a", "b").alias("row_max"),
    pl.sum_horizontal("a", "b").alias("row_sum")
])

# Group by aggregations
grouped = df.group_by("c").agg([
    pl.col("a").sum(),
    pl.col("b").mean(),
    pl.col("a").std().alias("a_std")
])

Range Generation

# Create ranges
ranges_df = pl.DataFrame({
    "int_range": pl.int_range(0, 10, 2, eager=True),
    "date_range": pl.date_range("2023-01-01", "2023-01-10", "1d", eager=True)
})

# Multiple ranges from DataFrame
df = pl.DataFrame({
    "start": [1, 5, 10],
    "end": [5, 10, 15]
})

result = df.with_columns([
    pl.int_ranges("start", "end").alias("ranges")
])

Statistical Operations

df = pl.DataFrame({
    "x": [1, 2, 3, 4, 5],
    "y": [2, 4, 6, 8, 10]
})

# Statistical functions
stats = df.select([
    pl.corr("x", "y").alias("correlation"),
    pl.cov("x", "y").alias("covariance"),
    pl.col("x").std().alias("x_std"),
    pl.col("y").var().alias("y_var")
])

# Rolling statistics
windowed = df.with_columns([
    pl.rolling_corr("x", "y", window_size=3).alias("rolling_corr"),
    pl.col("x").rolling_mean(3).alias("rolling_mean")
])

Conditional Logic

# Complex conditional expressions
result = df.with_columns([
    pl.when(pl.col("x") > 3)
    .then(pl.lit("high"))
    .when(pl.col("x") > 1)
    .then(pl.lit("medium"))
    .otherwise(pl.lit("low"))
    .alias("category"),
    
    pl.coalesce([pl.col("a"), pl.col("b"), pl.lit(0)]).alias("first_non_null")
])

Higher-Order Functions

# Custom fold operation
result = df.select([
    pl.fold(
        acc=pl.lit(0),
        lambda_expr=lambda acc, x: acc + x,
        exprs=[pl.col("a"), pl.col("b")]
    ).alias("custom_sum")
])

# Map batches with custom function
def custom_transform(series):
    return series * 2 + 1

result = df.with_columns([
    pl.col("a").map_batches(custom_transform).alias("transformed")
])

String and Array Operations

# String concatenation and formatting
df = pl.DataFrame({
    "first": ["John", "Jane"],
    "last": ["Doe", "Smith"],
    "age": [30, 25]
})

result = df.with_columns([
    pl.concat_str([pl.col("first"), pl.col("last")], separator=" ").alias("full_name"),
    pl.format("Name: {}, Age: {}", pl.col("first"), pl.col("age")).alias("formatted")
])

# Array concatenation
df = pl.DataFrame({
    "list1": [[1, 2], [3, 4]],
    "list2": [[5, 6], [7, 8]]
})

result = df.with_columns([
    pl.concat_list([pl.col("list1"), pl.col("list2")]).alias("combined")
])

Install with Tessl CLI

npx tessl i tessl/pypi-polars

docs

column-selection.md

configuration.md

core-data-structures.md

data-conversion.md

data-types.md

error-handling.md

functions-expressions.md

index.md

io-operations.md

sql-interface.md

tile.json