Blazingly fast DataFrame library for Python with lazy and eager evaluation modes
—
90+ utility functions for data construction, aggregation, statistical operations, and expression building with support for vectorized computations, window functions, and complex data transformations.
Functions for creating expressions, literals, and data structures from various inputs.
def col(name: str) -> Expr:
"""
Create a column expression for referencing DataFrame columns.
Parameters:
- name: Column name or pattern (* for all columns)
Returns:
Column expression
"""
def lit(value, dtype=None, *, allow_object=False) -> Expr:
"""
Create a literal expression from a value.
Parameters:
- value: Literal value (scalar, list, etc.)
- dtype: Explicit data type
- allow_object: Allow Python objects
Returns:
Literal expression
"""
def struct(*exprs, **named_exprs) -> Expr:
"""
Create a struct expression from multiple expressions.
Parameters:
- exprs: Expressions to include as fields
- named_exprs: Named expressions as keyword arguments
Returns:
Struct expression
"""
def field(name: str) -> Expr:
"""
Access a field from a struct expression.
Parameters:
- name: Field name
Returns:
Field access expression
"""
def element() -> Expr:
"""
Access elements from a list expression context.
Returns:
Element access expression
"""Functions for computing summary statistics and aggregations across data.
def sum(*names: str) -> Expr:
"""
Sum values across expressions.
Parameters:
- exprs: Expressions to sum
Returns:
Sum expression
"""
def mean(*columns: str) -> Expr:
"""
Calculate mean across expressions.
Parameters:
- exprs: Expressions to average
Returns:
Mean expression
"""
def median(*exprs) -> Expr:
"""
Calculate median across expressions.
Parameters:
- exprs: Expressions to find median
Returns:
Median expression
"""
def max(*names: str) -> Expr:
"""
Find maximum value across expressions.
Parameters:
- exprs: Expressions to find max
Returns:
Maximum expression
"""
def min(*names: str) -> Expr:
"""
Find minimum value across expressions.
Parameters:
- exprs: Expressions to find min
Returns:
Minimum expression
"""
def count(*columns: str) -> Expr:
"""
Count non-null values across expressions.
Parameters:
- exprs: Expressions to count
Returns:
Count expression
"""
def len(*exprs) -> Expr:
"""
Count all values (including nulls) across expressions.
Parameters:
- exprs: Expressions to count
Returns:
Length expression
"""
def std(column: str, ddof: int = 1) -> Expr:
"""
Calculate standard deviation of a column.
Parameters:
- column: Column name to calculate std
- ddof: Delta degrees of freedom
Returns:
Standard deviation expression
"""
def var(column: str, ddof: int = 1) -> Expr:
"""
Calculate variance of a column.
Parameters:
- column: Column name to calculate variance
- ddof: Delta degrees of freedom
Returns:
Variance expression
"""
def quantile(expr, quantile, interpolation="nearest") -> Expr:
"""
Calculate quantile of expression.
Parameters:
- expr: Expression to calculate quantile
- quantile: Quantile value (0-1)
- interpolation: Interpolation method
Returns:
Quantile expression
"""Functions for aggregating across columns (horizontally) rather than rows.
def all_horizontal(*exprs) -> Expr:
"""
Check if all values are true across columns.
Parameters:
- exprs: Boolean expressions
Returns:
All-true expression
"""
def any_horizontal(*exprs) -> Expr:
"""
Check if any value is true across columns.
Parameters:
- exprs: Boolean expressions
Returns:
Any-true expression
"""
def sum_horizontal(*exprs) -> Expr:
"""
Sum values horizontally across columns.
Parameters:
- exprs: Numeric expressions
Returns:
Horizontal sum expression
"""
def mean_horizontal(*exprs) -> Expr:
"""
Calculate mean horizontally across columns.
Parameters:
- exprs: Numeric expressions
Returns:
Horizontal mean expression
"""
def max_horizontal(*exprs) -> Expr:
"""
Find maximum value horizontally across columns.
Parameters:
- exprs: Numeric expressions
Returns:
Horizontal max expression
"""
def min_horizontal(*exprs) -> Expr:
"""
Find minimum value horizontally across columns.
Parameters:
- exprs: Numeric expressions
Returns:
Horizontal min expression
"""Functions for creating sequences and ranges of values.
def arange(start, end, step=1, *, dtype=None, eager=False) -> Expr | Series:
"""
Create arithmetic range of values.
Parameters:
- start: Start value
- end: End value (exclusive)
- step: Step size
- dtype: Data type
- eager: Return Series if True, Expr if False
Returns:
Range expression or Series
"""
def int_range(start, end, step=1, *, dtype=None, eager=False) -> Expr | Series:
"""
Create integer range of values.
Parameters:
- start: Start value
- end: End value (exclusive)
- step: Step size
- dtype: Integer data type
- eager: Return Series if True, Expr if False
Returns:
Integer range expression or Series
"""
def int_ranges(start, end, step=1, *, dtype=None, eager=False) -> Expr | Series:
"""
Create multiple integer ranges.
Parameters:
- start: Start values (can be expression)
- end: End values (can be expression)
- step: Step size
- dtype: Integer data type
- eager: Return Series if True, Expr if False
Returns:
Multiple ranges expression or Series
"""
def linear_space(start, end, n, *, dtype=None, eager=False) -> Expr | Series:
"""
Create linearly spaced values.
Parameters:
- start: Start value
- end: End value (inclusive)
- n: Number of values
- dtype: Data type
- eager: Return Series if True, Expr if False
Returns:
Linear space expression or Series
"""
def linear_spaces(start, end, n, *, dtype=None, eager=False) -> Expr | Series:
"""
Create multiple linearly spaced ranges.
Parameters:
- start: Start values (can be expression)
- end: End values (can be expression)
- n: Number of values per range
- dtype: Data type
- eager: Return Series if True, Expr if False
Returns:
Multiple linear spaces expression or Series
"""Functions for creating date, datetime, and time ranges.
def date_range(start, end, interval="1d", *, closed="both", eager=False) -> Expr | Series:
"""
Create date range.
Parameters:
- start: Start date
- end: End date
- interval: Date interval (e.g., "1d", "1w", "1mo")
- closed: Range bounds ("both", "left", "right", "none")
- eager: Return Series if True, Expr if False
Returns:
Date range expression or Series
"""
def date_ranges(start, end, interval="1d", *, closed="both", eager=False) -> Expr | Series:
"""
Create multiple date ranges.
Parameters:
- start: Start dates (can be expression)
- end: End dates (can be expression)
- interval: Date interval
- closed: Range bounds
- eager: Return Series if True, Expr if False
Returns:
Multiple date ranges expression or Series
"""
def datetime_range(start, end, interval="1h", *, closed="both", time_zone=None, eager=False) -> Expr | Series:
"""
Create datetime range.
Parameters:
- start: Start datetime
- end: End datetime
- interval: Datetime interval (e.g., "1h", "30m", "1d")
- closed: Range bounds
- time_zone: Timezone
- eager: Return Series if True, Expr if False
Returns:
Datetime range expression or Series
"""
def datetime_ranges(start, end, interval="1h", *, closed="both", time_zone=None, eager=False) -> Expr | Series:
"""
Create multiple datetime ranges.
Parameters:
- start: Start datetimes (can be expression)
- end: End datetimes (can be expression)
- interval: Datetime interval
- closed: Range bounds
- time_zone: Timezone
- eager: Return Series if True, Expr if False
Returns:
Multiple datetime ranges expression or Series
"""
def time_range(start, end, interval="1h", *, closed="both", eager=False) -> Expr | Series:
"""
Create time range.
Parameters:
- start: Start time
- end: End time
- interval: Time interval
- closed: Range bounds
- eager: Return Series if True, Expr if False
Returns:
Time range expression or Series
"""
def time_ranges(start, end, interval="1h", *, closed="both", eager=False) -> Expr | Series:
"""
Create multiple time ranges.
Parameters:
- start: Start times (can be expression)
- end: End times (can be expression)
- interval: Time interval
- closed: Range bounds
- eager: Return Series if True, Expr if False
Returns:
Multiple time ranges expression or Series
"""Functions for creating date, datetime, and time values from components.
def date(year, month, day) -> Expr:
"""
Create date from year, month, day components.
Parameters:
- year: Year expression or value
- month: Month expression or value (1-12)
- day: Day expression or value (1-31)
Returns:
Date expression
"""
def datetime(year, month, day, hour=0, minute=0, second=0, microsecond=0, *, time_unit="us", time_zone=None) -> Expr:
"""
Create datetime from components.
Parameters:
- year: Year expression or value
- month: Month expression or value (1-12)
- day: Day expression or value (1-31)
- hour: Hour expression or value (0-23)
- minute: Minute expression or value (0-59)
- second: Second expression or value (0-59)
- microsecond: Microsecond expression or value
- time_unit: Time precision ("ns", "us", "ms")
- time_zone: Timezone
Returns:
Datetime expression
"""
def time(hour=0, minute=0, second=0, microsecond=0) -> Expr:
"""
Create time from components.
Parameters:
- hour: Hour expression or value (0-23)
- minute: Minute expression or value (0-59)
- second: Second expression or value (0-59)
- microsecond: Microsecond expression or value
Returns:
Time expression
"""
def duration(*, weeks=None, days=None, hours=None, minutes=None, seconds=None, milliseconds=None, microseconds=None, nanoseconds=None, time_unit="us") -> Expr:
"""
Create duration from time components.
Parameters:
- weeks: Weeks expression or value
- days: Days expression or value
- hours: Hours expression or value
- minutes: Minutes expression or value
- seconds: Seconds expression or value
- milliseconds: Milliseconds expression or value
- microseconds: Microseconds expression or value
- nanoseconds: Nanoseconds expression or value
- time_unit: Time precision
Returns:
Duration expression
"""
def from_epoch(column, time_unit="s") -> Expr:
"""
Convert epoch timestamp to datetime.
Parameters:
- column: Column with epoch timestamps
- time_unit: Time unit of epoch ("s", "ms", "us", "ns")
Returns:
Datetime expression
"""Functions for statistical analysis and correlation calculations.
def corr(a, b, *, method="pearson", ddof=1, propagate_nans=False) -> Expr:
"""
Calculate correlation coefficient between two expressions.
Parameters:
- a: First expression
- b: Second expression
- method: Correlation method ("pearson", "spearman")
- ddof: Delta degrees of freedom
- propagate_nans: Propagate NaN values
Returns:
Correlation expression
"""
def cov(a, b, *, ddof=1) -> Expr:
"""
Calculate covariance between two expressions.
Parameters:
- a: First expression
- b: Second expression
- ddof: Delta degrees of freedom
Returns:
Covariance expression
"""
def rolling_corr(a, b, window_size, *, min_periods=None, ddof=1) -> Expr:
"""
Calculate rolling correlation.
Parameters:
- a: First expression
- b: Second expression
- window_size: Rolling window size
- min_periods: Minimum periods for calculation
- ddof: Delta degrees of freedom
Returns:
Rolling correlation expression
"""
def rolling_cov(a, b, window_size, *, min_periods=None, ddof=1) -> Expr:
"""
Calculate rolling covariance.
Parameters:
- a: First expression
- b: Second expression
- window_size: Rolling window size
- min_periods: Minimum periods for calculation
- ddof: Delta degrees of freedom
Returns:
Rolling covariance expression
"""
def n_unique(expr) -> Expr:
"""
Count unique values in expression.
Parameters:
- expr: Expression to count unique values
Returns:
Unique count expression
"""
def approx_n_unique(expr) -> Expr:
"""
Approximate count of unique values using HyperLogLog.
Parameters:
- expr: Expression to count unique values
Returns:
Approximate unique count expression
"""Functions for cumulative operations and running calculations.
def cum_sum(expr, *, reverse=False) -> Expr:
"""
Calculate cumulative sum.
Parameters:
- expr: Expression to sum cumulatively
- reverse: Calculate in reverse order
Returns:
Cumulative sum expression
"""
def cum_sum_horizontal(*exprs) -> Expr:
"""
Calculate cumulative sum horizontally across columns.
Parameters:
- exprs: Expressions to sum cumulatively
Returns:
Horizontal cumulative sum expression
"""
def cum_count(expr, *, reverse=False) -> Expr:
"""
Calculate cumulative count.
Parameters:
- expr: Expression to count cumulatively
- reverse: Calculate in reverse order
Returns:
Cumulative count expression
"""
def cum_fold(acc, lambda_expr, exprs, *, include_init=False) -> Expr:
"""
Cumulative fold operation with custom function.
Parameters:
- acc: Initial accumulator value
- lambda_expr: Fold function (takes acc, value)
- exprs: Expressions to fold
- include_init: Include initial value in result
Returns:
Cumulative fold expression
"""
def cum_reduce(lambda_expr, exprs) -> Expr:
"""
Cumulative reduce operation.
Parameters:
- lambda_expr: Reduce function
- exprs: Expressions to reduce
Returns:
Cumulative reduce expression
"""Functions for advanced operations with custom lambda functions.
def fold(acc, lambda_expr, exprs) -> Expr:
"""
Fold operation with custom function.
Parameters:
- acc: Initial accumulator value
- lambda_expr: Fold function (takes acc, value)
- exprs: Expressions to fold
Returns:
Fold expression
"""
def reduce(lambda_expr, exprs) -> Expr:
"""
Reduce operation with custom function.
Parameters:
- lambda_expr: Reduce function (takes two values)
- exprs: Expressions to reduce
Returns:
Reduce expression
"""
def map_batches(expr, function, return_dtype=None, *, agg_list=False) -> Expr:
"""
Apply function to batches of data.
Parameters:
- expr: Expression to map over
- function: Function to apply to each batch
- return_dtype: Return data type
- agg_list: Aggregate results into list
Returns:
Map batches expression
"""
def map_groups(expr, function, return_dtype=None) -> Expr:
"""
Apply function to groups of data.
Parameters:
- expr: Expression to map over
- function: Function to apply to each group
- return_dtype: Return data type
Returns:
Map groups expression
"""Functions for conditional logic and branching.
def when(condition) -> When:
"""
Start conditional expression chain.
Parameters:
- condition: Boolean condition expression
Returns:
When object for chaining
"""
def coalesce(*exprs) -> Expr:
"""
Return first non-null value from expressions.
Parameters:
- exprs: Expressions to coalesce
Returns:
Coalesced expression
"""
class When:
def then(self, expr) -> Then:
"""
Specify value when condition is true.
Parameters:
- expr: Expression or value to return
Returns:
Then object for chaining
"""
class Then:
def when(self, condition) -> When:
"""
Add another condition.
Parameters:
- condition: Boolean condition expression
Returns:
When object for chaining
"""
def otherwise(self, expr) -> Expr:
"""
Specify default value.
Parameters:
- expr: Default expression or value
Returns:
Complete conditional expression
"""Miscellaneous utility functions for various operations.
def concat(*dfs, *, how="vertical", parallel=True) -> DataFrame | LazyFrame:
"""
Concatenate DataFrames or LazyFrames.
Parameters:
- dfs: DataFrames or LazyFrames to concatenate
- how: Concatenation method ("vertical", "horizontal", "diagonal")
- parallel: Use parallel execution
Returns:
Concatenated DataFrame or LazyFrame
"""
def concat_str(exprs, *, separator="", ignore_nulls=False) -> Expr:
"""
Concatenate string expressions.
Parameters:
- exprs: String expressions to concatenate
- separator: String separator
- ignore_nulls: Skip null values
Returns:
Concatenated string expression
"""
def concat_list(exprs) -> Expr:
"""
Concatenate expressions into list.
Parameters:
- exprs: Expressions to concatenate into list
Returns:
List expression
"""
def concat_arr(exprs) -> Expr:
"""
Concatenate expressions into array.
Parameters:
- exprs: Expressions to concatenate into array
Returns:
Array expression
"""
def format(format_str, *args) -> Expr:
"""
Format string with expressions.
Parameters:
- format_str: Format string with {} placeholders
- args: Expressions to substitute
Returns:
Formatted string expression
"""
def escape_regex(expr) -> Expr:
"""
Escape regex special characters in string.
Parameters:
- expr: String expression to escape
Returns:
Escaped string expression
"""
def dtype_of(expr) -> DataType:
"""
Get data type of expression.
Parameters:
- expr: Expression to inspect
Returns:
Data type
"""
def set_random_seed(seed) -> None:
"""
Set global random seed.
Parameters:
- seed: Random seed value
"""import polars as pl
# Construction functions
df = pl.DataFrame({
"a": [1, 2, 3],
"b": [4, 5, 6],
"c": ["x", "y", "z"]
})
result = df.select([
pl.col("a"),
pl.lit(10).alias("constant"),
pl.struct(["a", "b"]).alias("struct_col")
])# Various aggregations
result = df.select([
pl.col("a").sum().alias("sum_a"),
pl.col("b").mean().alias("mean_b"),
pl.max_horizontal("a", "b").alias("row_max"),
pl.sum_horizontal("a", "b").alias("row_sum")
])
# Group by aggregations
grouped = df.group_by("c").agg([
pl.col("a").sum(),
pl.col("b").mean(),
pl.col("a").std().alias("a_std")
])# Create ranges
ranges_df = pl.DataFrame({
"int_range": pl.int_range(0, 10, 2, eager=True),
"date_range": pl.date_range("2023-01-01", "2023-01-10", "1d", eager=True)
})
# Multiple ranges from DataFrame
df = pl.DataFrame({
"start": [1, 5, 10],
"end": [5, 10, 15]
})
result = df.with_columns([
pl.int_ranges("start", "end").alias("ranges")
])df = pl.DataFrame({
"x": [1, 2, 3, 4, 5],
"y": [2, 4, 6, 8, 10]
})
# Statistical functions
stats = df.select([
pl.corr("x", "y").alias("correlation"),
pl.cov("x", "y").alias("covariance"),
pl.col("x").std().alias("x_std"),
pl.col("y").var().alias("y_var")
])
# Rolling statistics
windowed = df.with_columns([
pl.rolling_corr("x", "y", window_size=3).alias("rolling_corr"),
pl.col("x").rolling_mean(3).alias("rolling_mean")
])# Complex conditional expressions
result = df.with_columns([
pl.when(pl.col("x") > 3)
.then(pl.lit("high"))
.when(pl.col("x") > 1)
.then(pl.lit("medium"))
.otherwise(pl.lit("low"))
.alias("category"),
pl.coalesce([pl.col("a"), pl.col("b"), pl.lit(0)]).alias("first_non_null")
])# Custom fold operation
result = df.select([
pl.fold(
acc=pl.lit(0),
lambda_expr=lambda acc, x: acc + x,
exprs=[pl.col("a"), pl.col("b")]
).alias("custom_sum")
])
# Map batches with custom function
def custom_transform(series):
return series * 2 + 1
result = df.with_columns([
pl.col("a").map_batches(custom_transform).alias("transformed")
])# String concatenation and formatting
df = pl.DataFrame({
"first": ["John", "Jane"],
"last": ["Doe", "Smith"],
"age": [30, 25]
})
result = df.with_columns([
pl.concat_str([pl.col("first"), pl.col("last")], separator=" ").alias("full_name"),
pl.format("Name: {}, Age: {}", pl.col("first"), pl.col("age")).alias("formatted")
])
# Array concatenation
df = pl.DataFrame({
"list1": [[1, 2], [3, 4]],
"list2": [[5, 6], [7, 8]]
})
result = df.with_columns([
pl.concat_list([pl.col("list1"), pl.col("list2")]).alias("combined")
])Install with Tessl CLI
npx tessl i tessl/pypi-polars