The portable Python dataframe library that provides a unified API for data analysis across 20+ different backends
—
Statistical aggregation functions and window operations including ranking, cumulative calculations, and frame-based computations for advanced analytics.
Core statistical aggregation functions.
def sum(arg):
"""Sum of values."""
def mean(arg):
"""Average of values."""
def count(arg=None):
"""
Count of values.
Parameters:
- arg: expression to count, or None to count rows
Returns:
Count expression
"""
def min(arg):
"""Minimum value."""
def max(arg):
"""Maximum value."""
def std(arg):
"""Standard deviation."""
def var(arg):
"""Variance."""Usage Examples:
# Basic aggregations
result = table.aggregate(
total_sales=table.sales.sum(),
avg_price=table.price.mean(),
num_orders=table.count(),
min_date=table.order_date.min(),
max_date=table.order_date.max()
)
# Grouped aggregations
by_category = (
table
.group_by('category')
.aggregate(
revenue=table.sales.sum(),
avg_rating=table.rating.mean(),
order_count=table.count()
)
)Define window frames for analytical functions.
def window(
group_by=None,
order_by=None,
preceding=None,
following=None,
rows=None,
range=None
):
"""
Create window specification.
Parameters:
- group_by: columns to partition by
- order_by: columns to order by
- preceding: number of preceding rows/range
- following: number of following rows/range
- rows: tuple of (preceding, following) for row frame
- range: tuple of (preceding, following) for range frame
Returns:
Window specification
"""
def rows_window(preceding=None, following=None):
"""Create row-based window frame."""
def range_window(preceding=None, following=None):
"""Create range-based window frame."""
def cumulative_window():
"""Create cumulative window (unbounded preceding to current row)."""
def trailing_window(n):
"""Create trailing window of n rows."""
def trailing_range_window(interval):
"""Create trailing range window."""Usage Examples:
import ibis
# Define window specifications
win = ibis.window(
group_by='department',
order_by='salary',
rows=(2, 2) # 2 preceding, 2 following
)
# Row-based window
row_win = ibis.rows_window(preceding=5, following=0)
# Cumulative window
cum_win = ibis.cumulative_window()
# Trailing window
trail_win = ibis.trailing_window(10)Window functions for ranking and ordering.
def row_number():
"""
Assign unique row numbers within window.
Returns:
Row number expression (1, 2, 3, ...)
"""
def rank():
"""
Assign rank with gaps for ties.
Returns:
Rank expression (1, 2, 2, 4, ...)
"""
def dense_rank():
"""
Assign rank without gaps for ties.
Returns:
Dense rank expression (1, 2, 2, 3, ...)
"""
def percent_rank():
"""
Percentile rank (0 to 1).
Returns:
Percent rank expression
"""
def cume_dist():
"""
Cumulative distribution (0 to 1).
Returns:
Cumulative distribution expression
"""
def ntile(n):
"""
Divide rows into n buckets.
Parameters:
- n: int, number of buckets
Returns:
Bucket number expression (1 to n)
"""Usage Examples:
# Ranking within groups
result = table.select(
'name', 'department', 'salary',
rank=ibis.rank().over(
group_by='department',
order_by=ibis.desc('salary')
),
row_num=ibis.row_number().over(
group_by='department',
order_by=ibis.desc('salary')
),
percentile=ibis.percent_rank().over(
order_by='salary'
),
quartile=ibis.ntile(4).over(
order_by='salary'
)
)Apply aggregations over window frames.
Usage Examples:
# Moving averages and sums
result = table.select(
'date', 'sales',
moving_avg=table.sales.mean().over(
order_by='date',
rows=(6, 0) # 7-day moving average
),
running_total=table.sales.sum().over(
order_by='date',
rows=(None, 0) # Cumulative sum
),
pct_of_total=table.sales / table.sales.sum().over()
)
# Comparisons with previous/next values
result = table.select(
'date', 'value',
prev_value=table.value.lag(1).over(order_by='date'),
next_value=table.value.lead(1).over(order_by='date'),
change=table.value - table.value.lag(1).over(order_by='date')
)Access previous and next row values.
expr.lag(offset=1, default=None):
"""
Get value from previous row.
Parameters:
- offset: int, number of rows back
- default: value to use when no previous row exists
Returns:
Lagged value expression
"""
expr.lead(offset=1, default=None):
"""
Get value from next row.
Parameters:
- offset: int, number of rows forward
- default: value to use when no next row exists
Returns:
Lead value expression
"""
expr.first():
"""Get first value in window frame."""
expr.last():
"""Get last value in window frame."""Usage Examples:
# Time series calculations
result = table.select(
'date', 'price',
prev_price=table.price.lag(1).over(order_by='date'),
price_change=table.price - table.price.lag(1).over(order_by='date'),
next_price=table.price.lead(1).over(order_by='date'),
first_price=table.price.first().over(
group_by='symbol',
order_by='date'
),
last_price=table.price.last().over(
group_by='symbol',
order_by='date'
)
)Advanced statistical calculations over windows.
Usage Examples:
# Rolling statistics
result = table.select(
'date', 'value',
rolling_std=table.value.std().over(
order_by='date',
rows=(29, 0) # 30-day rolling standard deviation
),
rolling_var=table.value.var().over(
order_by='date',
rows=(29, 0)
),
z_score=(
(table.value - table.value.mean().over(rows=(29, 0))) /
table.value.std().over(rows=(29, 0))
).over(order_by='date')
)Aggregations with filtering conditions.
Usage Examples:
# Conditional aggregations
result = table.aggregate(
total_sales=table.sales.sum(),
high_value_sales=table.sales.sum(where=table.sales > 1000),
avg_rating=table.rating.mean(),
avg_high_rating=table.rating.mean(where=table.rating >= 4.0),
premium_count=table.count(where=table.tier == 'premium')
)
# Window conditional aggregations
result = table.select(
'product', 'date', 'sales',
high_sales_pct=(
table.sales.sum(where=table.sales > 1000) /
table.sales.sum()
).over(group_by='product')
)Control window frame boundaries precisely.
def preceding(n):
"""
Specify preceding boundary.
Parameters:
- n: int or None (unbounded)
Returns:
Preceding boundary specification
"""
def following(n):
"""
Specify following boundary.
Parameters:
- n: int or None (unbounded)
Returns:
Following boundary specification
"""
def range(start, end):
"""
Create range-based frame.
Parameters:
- start: boundary specification
- end: boundary specification
Returns:
Range frame specification
"""Usage Examples:
# Precise frame control
win1 = ibis.window(
order_by='date',
rows=(ibis.preceding(5), ibis.following(2))
)
win2 = ibis.window(
order_by='timestamp',
range=(
ibis.preceding(ibis.interval(hours=1)),
ibis.following(ibis.interval(minutes=30))
)
)
# Use with aggregations
result = table.select(
'timestamp', 'value',
hour_avg=table.value.mean().over(win2)
)Combine multiple window operations efficiently.
Usage Examples:
# Multiple windows in single query
dept_win = ibis.window(group_by='department', order_by='salary')
company_win = ibis.window(order_by='salary')
result = table.select(
'name', 'department', 'salary',
dept_rank=ibis.rank().over(dept_win),
dept_pct=ibis.percent_rank().over(dept_win),
company_rank=ibis.rank().over(company_win),
dept_avg=table.salary.mean().over(group_by='department'),
company_avg=table.salary.mean().over()
)Install with Tessl CLI
npx tessl i tessl/pypi-ibis-framework