Blazingly fast DataFrame library with 64-bit index support for handling datasets with more than 4.2 billion rows
—
Powerful expression system for column transformations, aggregations, and complex operations that work across DataFrame and LazyFrame. Expressions are composable and lazy, enabling complex column operations and transformations.
Core functions for creating expressions that operate on columns and values.
def col(name: str | DataType) -> Expr:
"""
Create column expression.
Parameters:
- name: Column name or data type selector
Returns:
Column expression
"""
def lit(value: Any, dtype: DataType | None = None) -> Expr:
"""
Create literal value expression.
Parameters:
- value: Literal value
- dtype: Optional data type
Returns:
Literal expression
"""
def when(predicate: Expr) -> When:
"""
Create conditional expression.
Parameters:
- predicate: Boolean expression condition
Returns:
When object for then/otherwise chaining
"""
class When:
def then(self, statement: Expr) -> Then:
"""Value when condition is true."""
class Then:
def otherwise(self, statement: Expr) -> Expr:
"""Value when condition is false."""The main Expression class with methods for column operations, transformations, and aggregations.
class Expr:
def alias(self, name: str) -> Expr:
"""
Assign a name to the expression.
Parameters:
- name: New column name
Returns:
Aliased expression
"""
def cast(self, dtype: DataType | type[Any], *, strict: bool = True) -> Expr:
"""
Cast expression to different data type.
Parameters:
- dtype: Target data type
- strict: Whether to raise on cast failure
Returns:
Cast expression
"""
def filter(self, predicate: Expr) -> Expr:
"""
Filter expression based on predicate.
Parameters:
- predicate: Boolean expression for filtering
Returns:
Filtered expression
"""
def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Expr:
"""
Sort expression values.
Parameters:
- descending: Sort in descending order
- nulls_last: Place nulls at end
Returns:
Sorted expression
"""
def reverse(self) -> Expr:
"""Reverse expression values."""
def unique(self, *, maintain_order: bool = False) -> Expr:
"""
Get unique values.
Parameters:
- maintain_order: Maintain original order
Returns:
Expression with unique values
"""
def drop_nulls(self) -> Expr:
"""Drop null values from expression."""
def fill_null(self, value: Any | Expr, *, strategy: FillNullStrategy | None = None) -> Expr:
"""
Fill null values.
Parameters:
- value: Fill value or expression
- strategy: Fill strategy ("forward", "backward", "min", "max", "mean", "zero", "one")
Returns:
Expression with nulls filled
"""
def fill_nan(self, value: Any | Expr) -> Expr:
"""Fill NaN values."""
def is_null(self) -> Expr:
"""Check for null values."""
def is_not_null(self) -> Expr:
"""Check for non-null values."""
def is_nan(self) -> Expr:
"""Check for NaN values."""
def is_not_nan(self) -> Expr:
"""Check for non-NaN values."""
def is_finite(self) -> Expr:
"""Check for finite values."""
def is_infinite(self) -> Expr:
"""Check for infinite values."""Statistical and aggregation operations on expressions.
class Expr:
def sum(self) -> Expr:
"""Sum all values."""
def mean(self) -> Expr:
"""Calculate mean."""
def median(self) -> Expr:
"""Calculate median."""
def max(self) -> Expr:
"""Get maximum value."""
def min(self) -> Expr:
"""Get minimum value."""
def std(self, ddof: int = 1) -> Expr:
"""
Calculate standard deviation.
Parameters:
- ddof: Delta degrees of freedom
Returns:
Standard deviation expression
"""
def var(self, ddof: int = 1) -> Expr:
"""
Calculate variance.
Parameters:
- ddof: Delta degrees of freedom
Returns:
Variance expression
"""
def quantile(self, quantile: float | Expr, *, interpolation: RollingInterpolationMethod = "nearest") -> Expr:
"""
Calculate quantile.
Parameters:
- quantile: Quantile value (0.0 to 1.0)
- interpolation: Interpolation method
Returns:
Quantile expression
"""
def count(self) -> Expr:
"""Count non-null values."""
def n_unique(self) -> Expr:
"""Count unique values."""
def null_count(self) -> Expr:
"""Count null values."""
def first(self) -> Expr:
"""Get first value."""
def last(self) -> Expr:
"""Get last value."""
def head(self, n: int | Expr = 10) -> Expr:
"""Get first n values."""
def tail(self, n: int | Expr = 10) -> Expr:
"""Get last n values."""Mathematical operations on expressions.
class Expr:
def __add__(self, other: Any) -> Expr:
"""Addition operator (+)."""
def __sub__(self, other: Any) -> Expr:
"""Subtraction operator (-)."""
def __mul__(self, other: Any) -> Expr:
"""Multiplication operator (*)."""
def __truediv__(self, other: Any) -> Expr:
"""Division operator (/)."""
def __floordiv__(self, other: Any) -> Expr:
"""Floor division operator (//)."""
def __mod__(self, other: Any) -> Expr:
"""Modulo operator (%)."""
def __pow__(self, other: Any) -> Expr:
"""Power operator (**)."""
def abs(self) -> Expr:
"""Absolute value."""
def sqrt(self) -> Expr:
"""Square root."""
def ceil(self) -> Expr:
"""Ceiling function."""
def floor(self) -> Expr:
"""Floor function."""
def round(self, decimals: int | Expr = 0) -> Expr:
"""
Round to specified decimal places.
Parameters:
- decimals: Number of decimal places
Returns:
Rounded expression
"""
def clip(self, lower_bound: Any | Expr | None = None, upper_bound: Any | Expr | None = None) -> Expr:
"""
Clip values to specified bounds.
Parameters:
- lower_bound: Lower bound
- upper_bound: Upper bound
Returns:
Clipped expression
"""Comparison and logical operations on expressions.
class Expr:
def __eq__(self, other: Any) -> Expr:
"""Equality operator (==)."""
def __ne__(self, other: Any) -> Expr:
"""Not equal operator (!=)."""
def __lt__(self, other: Any) -> Expr:
"""Less than operator (<)."""
def __le__(self, other: Any) -> Expr:
"""Less than or equal operator (<=)."""
def __gt__(self, other: Any) -> Expr:
"""Greater than operator (>)."""
def __ge__(self, other: Any) -> Expr:
"""Greater than or equal operator (>=)."""
def __and__(self, other: Any) -> Expr:
"""Logical AND operator (&)."""
def __or__(self, other: Any) -> Expr:
"""Logical OR operator (|)."""
def __xor__(self, other: Any) -> Expr:
"""Logical XOR operator (^)."""
def __invert__(self) -> Expr:
"""Logical NOT operator (~)."""
def is_in(self, other: Any) -> Expr:
"""Check if values are in collection."""
def is_between(self, lower_bound: Any | Expr, upper_bound: Any | Expr, closed: ClosedInterval = "both") -> Expr:
"""
Check if values are between bounds.
Parameters:
- lower_bound: Lower bound
- upper_bound: Upper bound
- closed: Include bounds ("both", "left", "right", "none")
Returns:
Boolean expression
"""String manipulation methods available on string expressions.
class Expr:
@property
def str(self) -> ExprStringNameSpace:
"""Access string methods."""
class ExprStringNameSpace:
def len_bytes(self) -> Expr:
"""Get byte length of strings."""
def len_chars(self) -> Expr:
"""Get character length of strings."""
def contains(self, pattern: str | Expr, *, literal: bool = False, strict: bool = True) -> Expr:
"""
Check if string contains pattern.
Parameters:
- pattern: Pattern to search for
- literal: Treat pattern as literal string
- strict: Raise on invalid regex
Returns:
Boolean expression
"""
def starts_with(self, prefix: str | Expr) -> Expr:
"""Check if string starts with prefix."""
def ends_with(self, suffix: str | Expr) -> Expr:
"""Check if string ends with suffix."""
def to_lowercase(self) -> Expr:
"""Convert to lowercase."""
def to_uppercase(self) -> Expr:
"""Convert to uppercase."""
def strip_chars(self, characters: str | None = None) -> Expr:
"""Strip characters from both ends."""
def split(self, by: str | Expr, *, inclusive: bool = False) -> Expr:
"""
Split string by delimiter.
Parameters:
- by: Delimiter
- inclusive: Include delimiter in result
Returns:
List expression
"""
def slice(self, offset: int | Expr, length: int | Expr | None = None) -> Expr:
"""
Slice string.
Parameters:
- offset: Start position
- length: Slice length
Returns:
Sliced string expression
"""
def replace(self, pattern: str | Expr, value: str | Expr, *, literal: bool = False, n: int = 1) -> Expr:
"""
Replace pattern in string.
Parameters:
- pattern: Pattern to replace
- value: Replacement value
- literal: Treat pattern as literal
- n: Maximum number of replacements
Returns:
String expression with replacements
"""Date and time operations on temporal expressions.
class Expr:
@property
def dt(self) -> ExprDateTimeNameSpace:
"""Access datetime methods."""
class ExprDateTimeNameSpace:
def year(self) -> Expr:
"""Extract year."""
def month(self) -> Expr:
"""Extract month."""
def day(self) -> Expr:
"""Extract day."""
def hour(self) -> Expr:
"""Extract hour."""
def minute(self) -> Expr:
"""Extract minute."""
def second(self) -> Expr:
"""Extract second."""
def weekday(self) -> Expr:
"""Get weekday (0=Monday, 6=Sunday)."""
def week(self) -> Expr:
"""Get week number."""
def strftime(self, format: str) -> Expr:
"""
Format datetime as string.
Parameters:
- format: Format string
Returns:
Formatted string expression
"""
def truncate(self, every: str | timedelta) -> Expr:
"""
Truncate to specified time unit.
Parameters:
- every: Time unit ("1d", "1h", "1m", "1s", etc.)
Returns:
Truncated datetime expression
"""
def with_time_unit(self, time_unit: TimeUnit) -> Expr:
"""
Change time unit.
Parameters:
- time_unit: New time unit ("ns", "us", "ms", "s")
Returns:
Expression with new time unit
"""Operations on list/array expressions.
class Expr:
@property
def list(self) -> ExprListNameSpace:
"""Access list methods."""
class ExprListNameSpace:
def len(self) -> Expr:
"""Get list length."""
def sum(self) -> Expr:
"""Sum list elements."""
def max(self) -> Expr:
"""Get maximum element."""
def min(self) -> Expr:
"""Get minimum element."""
def mean(self) -> Expr:
"""Calculate mean of elements."""
def first(self) -> Expr:
"""Get first element."""
def last(self) -> Expr:
"""Get last element."""
def get(self, index: int | Expr, *, null_on_oob: bool = True) -> Expr:
"""
Get element at index.
Parameters:
- index: Element index
- null_on_oob: Return null if out of bounds
Returns:
Element expression
"""
def slice(self, offset: int | Expr, length: int | Expr | None = None) -> Expr:
"""Slice list."""
def head(self, n: int | Expr = 5) -> Expr:
"""Get first n elements."""
def tail(self, n: int | Expr = 5) -> Expr:
"""Get last n elements."""
def contains(self, item: Any | Expr) -> Expr:
"""Check if list contains item."""
def explode(self) -> Expr:
"""Explode list elements to separate rows."""import polars as pl
df = pl.DataFrame({
"name": ["Alice", "Bob", "Charlie"],
"age": [25, 30, 35],
"salary": [50000, 60000, 70000]
})
# Column selection and transformation
result = df.select([
pl.col("name"),
pl.col("age").alias("years"),
(pl.col("salary") / 1000).alias("salary_k"),
pl.col("salary").cast(pl.Float64)
])
# Conditional expressions
result = df.with_columns([
pl.when(pl.col("age") > 30)
.then(pl.lit("Senior"))
.otherwise(pl.lit("Junior"))
.alias("level")
])df = pl.DataFrame({
"text": ["Hello World", "PYTHON programming", "Data Science"]
})
result = df.select([
pl.col("text"),
pl.col("text").str.to_lowercase().alias("lower"),
pl.col("text").str.len_chars().alias("length"),
pl.col("text").str.contains("o").alias("has_o"),
pl.col("text").str.split(" ").alias("words")
])df = pl.DataFrame({
"timestamp": ["2023-01-15 10:30:00", "2023-02-20 14:45:00", "2023-03-10 09:15:00"]
}).with_columns([
pl.col("timestamp").str.strptime(pl.Datetime, "%Y-%m-%d %H:%M:%S")
])
result = df.select([
pl.col("timestamp"),
pl.col("timestamp").dt.year().alias("year"),
pl.col("timestamp").dt.month().alias("month"),
pl.col("timestamp").dt.weekday().alias("weekday"),
pl.col("timestamp").dt.strftime("%Y-%m").alias("year_month")
])df = pl.DataFrame({
"group": ["A", "A", "B", "B", "C"],
"value": [10, 20, 15, 25, 30]
})
# Group aggregations
result = df.group_by("group").agg([
pl.col("value").sum().alias("total"),
pl.col("value").mean().alias("average"),
pl.col("value").max() - pl.col("value").min().alias("range")
])
# Window functions
result = df.with_columns([
pl.col("value").sum().over("group").alias("group_total"),
pl.col("value").rank().over("group").alias("rank_in_group")
])# Chaining multiple operations
result = df.select([
pl.col("name"),
pl.col("age")
.cast(pl.Float64)
.round(0)
.clip(0, 100)
.alias("age_clipped"),
# Complex conditional logic
pl.when((pl.col("age") >= 18) & (pl.col("salary") > 55000))
.then(pl.lit("Eligible"))
.when(pl.col("age") >= 18)
.then(pl.lit("Age OK"))
.otherwise(pl.lit("Not Eligible"))
.alias("status"),
# Mathematical operations
((pl.col("salary") * 1.1).round(2)).alias("salary_with_raise")
])Install with Tessl CLI
npx tessl i tessl/pypi-polars-u64-idx