CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-polars-u64-idx

Blazingly fast DataFrame library with 64-bit index support for handling datasets with more than 4.2 billion rows

Pending
Overview
Eval results
Files

functions.mddocs/

Functions and Utilities

Built-in functions for aggregation, transformations, date/time operations, string manipulation, and utility functions. These functions work with expressions and can be used in DataFrame operations, LazyFrame queries, and standalone computations.

Capabilities

Aggregation Functions

Statistical aggregation functions that operate on columns or expressions.

def sum(*exprs) -> Expr:
    """
    Sum values horizontally across columns.
    
    Parameters:
    - exprs: Expressions to sum
    
    Returns:
    Sum expression
    """

def mean(*exprs) -> Expr:
    """
    Calculate mean horizontally across columns.
    
    Parameters:
    - exprs: Expressions to average
    
    Returns:
    Mean expression
    """

def max(*exprs) -> Expr:
    """
    Get maximum value horizontally across columns.
    
    Parameters:
    - exprs: Expressions to compare
    
    Returns:
    Maximum expression
    """

def min(*exprs) -> Expr:
    """
    Get minimum value horizontally across columns.
    
    Parameters:
    - exprs: Expressions to compare
    
    Returns:
    Minimum expression
    """

def count(*exprs) -> Expr:
    """
    Count non-null values.
    
    Parameters:
    - exprs: Expressions to count (optional)
    
    Returns:
    Count expression
    """

def all(*exprs) -> Expr:
    """
    Check if all values are true.
    
    Parameters:
    - exprs: Boolean expressions
    
    Returns:
    Boolean expression (all true)
    """

def any(*exprs) -> Expr:
    """
    Check if any values are true.
    
    Parameters:
    - exprs: Boolean expressions
    
    Returns:
    Boolean expression (any true)
    """

# Horizontal operations
def sum_horizontal(*exprs) -> Expr:
    """Sum across columns horizontally."""

def mean_horizontal(*exprs) -> Expr:
    """Mean across columns horizontally."""

def max_horizontal(*exprs) -> Expr:
    """Maximum across columns horizontally."""

def min_horizontal(*exprs) -> Expr:
    """Minimum across columns horizontally."""

def all_horizontal(*exprs) -> Expr:
    """All true across columns horizontally."""

def any_horizontal(*exprs) -> Expr:
    """Any true across columns horizontally."""

# Cumulative functions
def cum_sum(*exprs) -> Expr:
    """Cumulative sum."""

def cum_sum_horizontal(*exprs) -> Expr:
    """Cumulative sum horizontally."""

def cum_count(*exprs) -> Expr:
    """Cumulative count."""

def cum_fold(acc: Expr, function: Callable[[Expr, Expr], Expr], *exprs: Expr, include_init: bool = False) -> Expr:
    """
    Cumulative fold operation.
    
    Parameters:
    - acc: Initial accumulator value
    - function: Fold function
    - exprs: Expressions to fold
    - include_init: Include initial value
    
    Returns:
    Cumulative fold expression
    """

def cum_reduce(function: Callable[[Expr, Expr], Expr], *exprs: Expr) -> Expr:
    """
    Cumulative reduce operation.
    
    Parameters:
    - function: Reduce function
    - exprs: Expressions to reduce
    
    Returns:
    Cumulative reduce expression
    """

Date and Time Functions

Functions for creating and manipulating temporal data.

def date(year: int | Expr, month: int | Expr, day: int | Expr) -> Expr:
    """
    Create date from year, month, day.
    
    Parameters:
    - year: Year value
    - month: Month value (1-12)
    - day: Day value (1-31)
    
    Returns:
    Date expression
    """

def datetime(
    year: int | Expr,
    month: int | Expr,
    day: int | Expr,
    hour: int | Expr = 0,
    minute: int | Expr = 0,
    second: int | Expr = 0,
    microsecond: int | Expr = 0,
    *,
    time_unit: TimeUnit = "us",
    time_zone: str | None = None
) -> Expr:
    """
    Create datetime from components.
    
    Parameters:
    - year: Year value
    - month: Month value (1-12)
    - day: Day value (1-31)
    - hour: Hour value (0-23)
    - minute: Minute value (0-59)
    - second: Second value (0-59)
    - microsecond: Microsecond value
    - time_unit: Time precision
    - time_zone: Timezone
    
    Returns:
    Datetime expression
    """

def time(hour: int | Expr, minute: int | Expr, second: int | Expr, microsecond: int | Expr = 0) -> Expr:
    """
    Create time from components.
    
    Parameters:
    - hour: Hour value (0-23)
    - minute: Minute value (0-59)
    - second: Second value (0-59)
    - microsecond: Microsecond value
    
    Returns:
    Time expression
    """

def duration(
    *,
    weeks: int | Expr | None = None,
    days: int | Expr | None = None,
    hours: int | Expr | None = None,
    minutes: int | Expr | None = None,
    seconds: int | Expr | None = None,
    milliseconds: int | Expr | None = None,
    microseconds: int | Expr | None = None,
    nanoseconds: int | Expr | None = None,
    time_unit: TimeUnit = "us"
) -> Expr:
    """
    Create duration from components.
    
    Parameters:
    - weeks: Number of weeks
    - days: Number of days
    - hours: Number of hours
    - minutes: Number of minutes
    - seconds: Number of seconds
    - milliseconds: Number of milliseconds
    - microseconds: Number of microseconds
    - nanoseconds: Number of nanoseconds
    - time_unit: Time unit for result
    
    Returns:
    Duration expression
    """

def from_epoch(column: str | Expr, time_unit: TimeUnit = "s") -> Expr:
    """
    Convert epoch timestamp to datetime.
    
    Parameters:
    - column: Column with epoch values
    - time_unit: Unit of epoch values
    
    Returns:
    Datetime expression
    """

Range Functions

Functions for generating sequences and ranges of values.

def arange(start: int | Expr, end: int | Expr, step: int = 1, *, eager: bool = False) -> Expr | Series:
    """
    Generate range of integers.
    
    Parameters:
    - start: Start value (inclusive)
    - end: End value (exclusive)
    - step: Step size
    - eager: Return Series instead of Expr
    
    Returns:
    Range expression or Series
    """

def date_range(
    start: date | datetime | IntoExpr,
    end: date | datetime | IntoExpr,
    interval: str | timedelta = "1d",
    *,
    closed: ClosedInterval = "both",
    time_unit: TimeUnit | None = None,
    time_zone: str | None = None,
    eager: bool = False
) -> Expr | Series:
    """
    Generate date range.
    
    Parameters:
    - start: Start date
    - end: End date
    - interval: Time interval ("1d", "1h", etc.)
    - closed: Include endpoints ("both", "left", "right", "none")
    - time_unit: Time precision
    - time_zone: Timezone
    - eager: Return Series instead of Expr
    
    Returns:
    Date range expression or Series
    """

def date_ranges(
    start: IntoExpr,
    end: IntoExpr,
    interval: str | timedelta = "1d",
    *,
    closed: ClosedInterval = "both",
    time_unit: TimeUnit | None = None,
    time_zone: str | None = None,
    eager: bool = False
) -> Expr | Series:
    """Generate multiple date ranges."""

def datetime_range(
    start: datetime | IntoExpr,
    end: datetime | IntoExpr,
    interval: str | timedelta = "1d",
    *,
    closed: ClosedInterval = "both",
    time_unit: TimeUnit = "us",
    time_zone: str | None = None,
    eager: bool = False
) -> Expr | Series:
    """Generate datetime range."""

def datetime_ranges(
    start: IntoExpr,
    end: IntoExpr,
    interval: str | timedelta = "1d",
    **kwargs
) -> Expr | Series:
    """Generate multiple datetime ranges."""

def time_range(
    start: time | IntoExpr | None = None,
    end: time | IntoExpr | None = None,
    interval: str | timedelta = "1h",
    *,
    closed: ClosedInterval = "both",
    eager: bool = False
) -> Expr | Series:
    """Generate time range."""

def time_ranges(
    start: IntoExpr,
    end: IntoExpr,
    interval: str | timedelta = "1h",
    **kwargs
) -> Expr | Series:
    """Generate multiple time ranges."""

def int_range(start: int | Expr, end: int | Expr, step: int = 1, *, eager: bool = False) -> Expr | Series:
    """Generate integer range."""

def int_ranges(start: IntoExpr, end: IntoExpr, step: int | IntoExpr = 1, *, eager: bool = False) -> Expr | Series:
    """Generate multiple integer ranges."""

def linear_space(start: float | Expr, end: float | Expr, n: int, *, endpoint: bool = True, eager: bool = False) -> Expr | Series:
    """
    Generate linearly spaced values.
    
    Parameters:
    - start: Start value
    - end: End value
    - n: Number of values
    - endpoint: Include endpoint
    - eager: Return Series instead of Expr
    
    Returns:
    Linear space expression or Series
    """

def linear_spaces(start: IntoExpr, end: IntoExpr, n: int | IntoExpr, **kwargs) -> Expr | Series:
    """Generate multiple linear spaces."""

String Functions

Functions for string manipulation and processing.

def concat_str(exprs: IntoExpr, *, separator: str = "", ignore_nulls: bool = False) -> Expr:
    """
    Concatenate strings horizontally.
    
    Parameters:
    - exprs: String expressions to concatenate
    - separator: Separator between strings
    - ignore_nulls: Skip null values
    
    Returns:
    Concatenated string expression
    """

def format(format_str: str, *args: IntoExpr) -> Expr:
    """
    Format string with placeholders.
    
    Parameters:
    - format_str: Format string with {} placeholders
    - args: Values to substitute
    
    Returns:
    Formatted string expression
    """

def escape_regex(pattern: str | Expr) -> Expr:
    """
    Escape regex special characters.
    
    Parameters:
    - pattern: Pattern to escape
    
    Returns:
    Escaped pattern expression
    """

List and Array Functions

Functions for working with list and array data types.

def concat_list(exprs: IntoExpr, *, ignore_nulls: bool = False) -> Expr:
    """
    Concatenate lists horizontally.
    
    Parameters:
    - exprs: List expressions to concatenate
    - ignore_nulls: Skip null values
    
    Returns:
    Concatenated list expression
    """

def concat_arr(exprs: IntoExpr, *, ignore_nulls: bool = False) -> Expr:
    """
    Concatenate arrays horizontally.
    
    Parameters:
    - exprs: Array expressions to concatenate
    - ignore_nulls: Skip null values
    
    Returns:
    Concatenated array expression
    """

Statistical Functions

Advanced statistical and mathematical functions.

def std(*exprs) -> Expr:
    """Calculate standard deviation."""

def var(*exprs) -> Expr:
    """Calculate variance."""

def median(*exprs) -> Expr:
    """Calculate median."""

def quantile(*exprs, quantile: float, interpolation: str = "nearest") -> Expr:
    """
    Calculate quantile.
    
    Parameters:
    - exprs: Expressions to analyze
    - quantile: Quantile value (0.0 to 1.0)
    - interpolation: Interpolation method
    
    Returns:
    Quantile expression
    """

def n_unique(*exprs) -> Expr:
    """Count unique values."""

def approx_n_unique(*exprs) -> Expr:
    """Approximate unique count (faster for large data)."""

def corr(a: IntoExpr, b: IntoExpr, *, method: CorrelationMethod = "pearson", ddof: int = 1) -> Expr:
    """
    Calculate correlation coefficient.
    
    Parameters:
    - a: First expression
    - b: Second expression
    - method: Correlation method ("pearson", "spearman")
    - ddof: Delta degrees of freedom
    
    Returns:
    Correlation expression
    """

def cov(a: IntoExpr, b: IntoExpr) -> Expr:
    """
    Calculate covariance.
    
    Parameters:
    - a: First expression
    - b: Second expression
    
    Returns:
    Covariance expression
    """

def rolling_corr(a: IntoExpr, b: IntoExpr, window_size: int, *, min_periods: int | None = None) -> Expr:
    """
    Calculate rolling correlation.
    
    Parameters:
    - a: First expression
    - b: Second expression
    - window_size: Rolling window size
    - min_periods: Minimum periods for calculation
    
    Returns:
    Rolling correlation expression
    """

def rolling_cov(a: IntoExpr, b: IntoExpr, window_size: int, *, min_periods: int | None = None) -> Expr:
    """
    Calculate rolling covariance.
    
    Parameters:
    - a: First expression
    - b: Second expression
    - window_size: Rolling window size
    - min_periods: Minimum periods for calculation
    
    Returns:
    Rolling covariance expression
    """

Trigonometric Functions

Mathematical trigonometric operations.

def arctan2(y: IntoExpr, x: IntoExpr) -> Expr:
    """
    Calculate arctangent of y/x in radians.
    
    Parameters:
    - y: Y coordinate expression
    - x: X coordinate expression
    
    Returns:
    Arctangent expression
    """

def arctan2d(y: IntoExpr, x: IntoExpr) -> Expr:
    """
    Calculate arctangent of y/x in degrees.
    
    Parameters:
    - y: Y coordinate expression
    - x: X coordinate expression
    
    Returns:
    Arctangent expression in degrees
    """

Transform and Utility Functions

General utility and transformation functions.

def map_batches(exprs: IntoExpr, function: Callable[[DataFrame], DataFrame], return_dtype: DataType | None = None, *, inference_size: int = 256) -> Expr:
    """
    Apply function to batches of data.
    
    Parameters:
    - exprs: Input expressions
    - function: Function to apply to DataFrame batches
    - return_dtype: Expected return data type
    - inference_size: Size for type inference
    
    Returns:
    Mapped expression
    """

def map_groups(exprs: IntoExpr, function: Callable[[DataFrame], DataFrame], return_dtype: DataType | None = None) -> Expr:
    """
    Apply function to groups.
    
    Parameters:
    - exprs: Input expressions
    - function: Function to apply to each group
    - return_dtype: Expected return data type
    
    Returns:
    Mapped expression
    """

def fold(acc: IntoExpr, function: Callable[[Expr, Expr], Expr], exprs: Sequence[IntoExpr] | Expr) -> Expr:
    """
    Fold operation with accumulator.
    
    Parameters:
    - acc: Initial accumulator value
    - function: Fold function
    - exprs: Expressions to fold
    
    Returns:
    Folded expression
    """

def reduce(function: Callable[[Expr, Expr], Expr], exprs: Sequence[IntoExpr] | Expr) -> Expr:
    """
    Reduce operation.
    
    Parameters:
    - function: Reduce function
    - exprs: Expressions to reduce
    
    Returns:
    Reduced expression
    """

def coalesce(*exprs: IntoExpr) -> Expr:
    """
    Return first non-null value.
    
    Parameters:
    - exprs: Expressions to check
    
    Returns:
    Coalesced expression
    """

def element() -> Expr:
    """Get element at current index in context."""

def first(*exprs: IntoExpr) -> Expr:
    """Get first value."""

def last(*exprs: IntoExpr) -> Expr:
    """Get last value."""

def head(*exprs: IntoExpr, n: int = 10) -> Expr:
    """Get first n values."""

def tail(*exprs: IntoExpr, n: int = 10) -> Expr:
    """Get last n values."""

def nth(n: int, *exprs: IntoExpr) -> Expr:
    """
    Get nth value.
    
    Parameters:
    - n: Index to retrieve
    - exprs: Input expressions
    
    Returns:
    Nth value expression
    """

def len() -> Expr:
    """Get length/count."""

def implode(*exprs: IntoExpr) -> Expr:
    """Combine values into list."""

def explode(*exprs: IntoExpr) -> Expr:
    """Explode list elements to separate rows."""

def repeat(value: IntoExpr, n: int | IntoExpr, *, eager: bool = False) -> Expr | Series:
    """
    Repeat value n times.
    
    Parameters:
    - value: Value to repeat
    - n: Number of repetitions
    - eager: Return Series instead of Expr
    
    Returns:
    Repeated values expression or Series
    """

def ones(n: int | IntoExpr, *, eager: bool = False) -> Expr | Series:
    """Create array of ones."""

def zeros(n: int | IntoExpr, *, eager: bool = False) -> Expr | Series:
    """Create array of zeros."""

def exclude(*columns: str | DataType) -> Expr:
    """
    Exclude columns from selection.
    
    Parameters:
    - columns: Column names or types to exclude
    
    Returns:
    Exclusion expression
    """

def groups() -> Expr:
    """Get group indices in group-by context."""

def field(*names: str) -> Expr:
    """
    Access struct field(s).
    
    Parameters:
    - names: Field name(s) to access
    
    Returns:
    Field access expression
    """

def arg_sort_by(*exprs: IntoExpr, descending: bool | Sequence[bool] = False, nulls_last: bool = False) -> Expr:
    """
    Get indices that would sort by given expressions.
    
    Parameters:
    - exprs: Sort key expressions
    - descending: Sort in descending order
    - nulls_last: Place nulls at end
    
    Returns:
    Sorting indices expression
    """

def arg_where(condition: IntoExpr) -> Expr:
    """
    Get indices where condition is true.
    
    Parameters:
    - condition: Boolean condition
    
    Returns:
    Indices expression
    """

def row_index(name: str = "row_nr", offset: int = 0) -> Expr:
    """
    Add row index column.
    
    Parameters:
    - name: Column name for row index
    - offset: Starting value
    
    Returns:
    Row index expression
    """

def business_day_count(start: IntoExpr, end: IntoExpr) -> Expr:
    """
    Count business days between dates.
    
    Parameters:
    - start: Start date expression
    - end: End date expression
    
    Returns:
    Business day count expression
    """

def dtype_of(*exprs: IntoExpr) -> Expr:
    """Get data type of expression."""

def self_dtype() -> Expr:
    """Get data type of current context."""

def set_random_seed(seed: int) -> None:
    """
    Set random seed for reproducible results.
    
    Parameters:
    - seed: Random seed value
    """

DataFrame Operations

Functions that operate on entire DataFrames.

def concat(items: Iterable[DataFrame | LazyFrame], *, how: UnionStrategy = "vertical", rechunk: bool = False, parallel: bool = True) -> DataFrame | LazyFrame:
    """
    Concatenate DataFrames or LazyFrames.
    
    Parameters:
    - items: DataFrames/LazyFrames to concatenate
    - how: Concatenation strategy ("vertical", "horizontal", "diagonal")
    - rechunk: Rechunk after concatenation
    - parallel: Use parallel processing
    
    Returns:
    Concatenated DataFrame or LazyFrame
    """

def align_frames(*frames: DataFrame, on: str | Expr | None = None, select: str | Expr | list[str | Expr] | None = None, reverse: bool | list[bool] = False) -> list[DataFrame]:
    """
    Align DataFrames by common column values.
    
    Parameters:
    - frames: DataFrames to align
    - on: Column(s) to align on
    - select: Columns to select after alignment
    - reverse: Reverse sort order
    
    Returns:
    List of aligned DataFrames
    """

def collect_all(lazy_frames: Sequence[LazyFrame], *, type_coercion: bool = True, predicate_pushdown: bool = True, projection_pushdown: bool = True, simplify_expression: bool = True, slice_pushdown: bool = True, comm_subplan_elim: bool = True, comm_subexpr_elim: bool = True, cluster_with_columns: bool = True, no_optimization: bool = False, streaming: bool = False) -> list[DataFrame]:
    """
    Collect multiple LazyFrames in parallel.
    
    Parameters:
    - lazy_frames: LazyFrames to collect
    - Various optimization flags: Same as LazyFrame.collect()
    
    Returns:
    List of collected DataFrames
    """

def collect_all_async(lazy_frames: Sequence[LazyFrame], **kwargs) -> Awaitable[list[DataFrame]]:
    """Collect multiple LazyFrames asynchronously."""

def explain_all(lazy_frames: Sequence[LazyFrame], **kwargs) -> str:
    """Get execution plans for multiple LazyFrames."""

def select(*exprs: IntoExpr, **named_exprs: IntoExpr) -> Expr:
    """Create selection expression."""

def struct(*exprs: IntoExpr, **named_exprs: IntoExpr) -> Expr:
    """Create struct from expressions."""

def struct_with_fields(fields: Sequence[str], *exprs: IntoExpr) -> Expr:
    """
    Create struct with named fields.
    
    Parameters:
    - fields: Field names
    - exprs: Field value expressions
    
    Returns:
    Struct expression
    """

Usage Examples

Aggregation Operations

import polars as pl

df = pl.DataFrame({
    "group": ["A", "A", "B", "B", "C"],
    "value1": [1, 2, 3, 4, 5],
    "value2": [10, 20, 30, 40, 50]
})

# Basic aggregations
result = df.group_by("group").agg([
    pl.sum("value1"),
    pl.mean("value2"),
    pl.max("value1", "value2").alias("max_of_both")
])

# Horizontal aggregations
result = df.with_columns([
    pl.sum_horizontal("value1", "value2").alias("total"),
    pl.mean_horizontal("value1", "value2").alias("average")
])

Date and Time Operations

# Create date ranges
dates = pl.date_range(
    start=date(2023, 1, 1),
    end=date(2023, 12, 31),
    interval="1d",
    eager=True
)

# Create datetime with components
df = pl.DataFrame({
    "year": [2023, 2023, 2023],
    "month": [1, 2, 3],
    "day": [15, 20, 25]
}).with_columns([
    pl.date("year", "month", "day").alias("date"),
    pl.datetime("year", "month", "day", 12, 30, 0).alias("datetime")
])

# Duration calculations
df = df.with_columns([
    pl.duration(days=30).alias("thirty_days"),
    pl.duration(hours=2, minutes=30).alias("two_thirty")
])

String Operations

df = pl.DataFrame({
    "first": ["John", "Jane", "Bob"],
    "last": ["Doe", "Smith", "Johnson"],
    "title": ["Mr", "Ms", "Dr"]
})

# String concatenation
result = df.with_columns([
    pl.concat_str([
        pl.col("title"),
        pl.lit(" "),
        pl.col("first"),
        pl.lit(" "),
        pl.col("last")
    ]).alias("full_name"),
    
    # Format strings
    pl.format("Hello, {} {}!", pl.col("first"), pl.col("last")).alias("greeting")
])

Mathematical Operations

df = pl.DataFrame({
    "x": [1.0, 2.0, 3.0, 4.0],
    "y": [2.0, 3.0, 4.0, 5.0],
    "values": [10, 20, 30, 40]
})

# Trigonometric functions
result = df.with_columns([
    pl.arctan2("y", "x").alias("angle_rad"),
    pl.arctan2d("y", "x").alias("angle_deg")
])

# Statistical functions
result = df.select([
    pl.std("values").alias("std_dev"),
    pl.var("values").alias("variance"),
    pl.median("values").alias("median"),
    pl.quantile("values", 0.75).alias("q75")
])

Advanced Transformations

# Fold operation (cumulative sum with custom logic)
result = df.with_columns([
    pl.fold(
        acc=pl.lit(0),
        function=lambda acc, x: acc + x,
        exprs=["value1", "value2"]
    ).alias("cumulative_sum")
])

# Coalesce (first non-null value)
df_with_nulls = pl.DataFrame({
    "a": [1, None, 3],
    "b": [None, 2, None],
    "c": [10, 20, 30]
})

result = df_with_nulls.with_columns([
    pl.coalesce("a", "b", "c").alias("first_non_null")
])

# Map operations for complex transformations
def custom_transform(batch: pl.DataFrame) -> pl.DataFrame:
    return batch.with_columns([
        (pl.col("value") * 2 + 1).alias("transformed")
    ])

result = df.with_columns([
    pl.map_batches("value1", custom_transform, return_dtype=pl.Int64)
])

Window Functions and Rankings

df = pl.DataFrame({
    "group": ["A", "A", "A", "B", "B", "B"],
    "value": [10, 20, 30, 15, 25, 35]
})

# Window functions with partitioning
result = df.with_columns([
    pl.col("value").sum().over("group").alias("group_total"),
    pl.col("value").rank().over("group").alias("rank_in_group"),
    pl.col("value").shift(1).over("group").alias("previous_value")
])

Working with Lists and Arrays

df = pl.DataFrame({
    "lists": [[1, 2, 3], [4, 5], [6, 7, 8]]
})

# List operations
result = df.with_columns([
    pl.col("lists").list.len().alias("list_length"),
    pl.col("lists").list.sum().alias("list_sum"),
    pl.col("lists").list.get(0).alias("first_element")
])

# Concatenate lists
df2 = pl.DataFrame({
    "list1": [[1, 2], [3, 4]],
    "list2": [[5, 6], [7, 8]]
})

result = df2.with_columns([
    pl.concat_list("list1", "list2").alias("combined")
])

Install with Tessl CLI

npx tessl i tessl/pypi-polars-u64-idx

docs

config-utilities.md

core-data-structures.md

data-types.md

expressions.md

functions.md

index.md

io-operations.md

selectors.md

sql-interface.md

tile.json