tessl/pypi-ibis-framework

The portable Python dataframe library that provides a unified API for data analysis across 20+ different backends

—

Pending

Overview

Eval results

Files

Aggregation and Window Functions

Name: tessl/pypi-ibis-framework
Author: tessl

Statistical aggregation functions and window operations including ranking, cumulative calculations, and frame-based computations for advanced analytics.

Capabilities

Basic Aggregations

Core statistical aggregation functions.

def sum(arg):
    """Sum of values."""

def mean(arg):
    """Average of values."""

def count(arg=None):
    """
    Count of values.
    
    Parameters:
    - arg: expression to count, or None to count rows
    
    Returns:
    Count expression
    """

def min(arg):
    """Minimum value."""

def max(arg):
    """Maximum value."""

def std(arg):
    """Standard deviation."""

def var(arg):
    """Variance."""

Usage Examples:

# Basic aggregations
result = table.aggregate(
    total_sales=table.sales.sum(),
    avg_price=table.price.mean(),
    num_orders=table.count(),
    min_date=table.order_date.min(),
    max_date=table.order_date.max()
)

# Grouped aggregations
by_category = (
    table
    .group_by('category')
    .aggregate(
        revenue=table.sales.sum(),
        avg_rating=table.rating.mean(),
        order_count=table.count()
    )
)

Window Specifications

Define window frames for analytical functions.

def window(
    group_by=None,
    order_by=None, 
    preceding=None,
    following=None,
    rows=None,
    range=None
):
    """
    Create window specification.
    
    Parameters:
    - group_by: columns to partition by
    - order_by: columns to order by
    - preceding: number of preceding rows/range
    - following: number of following rows/range
    - rows: tuple of (preceding, following) for row frame
    - range: tuple of (preceding, following) for range frame
    
    Returns:
    Window specification
    """

def rows_window(preceding=None, following=None):
    """Create row-based window frame."""

def range_window(preceding=None, following=None): 
    """Create range-based window frame."""

def cumulative_window():
    """Create cumulative window (unbounded preceding to current row)."""

def trailing_window(n):
    """Create trailing window of n rows."""

def trailing_range_window(interval):
    """Create trailing range window."""

Usage Examples:

import ibis

# Define window specifications
win = ibis.window(
    group_by='department',
    order_by='salary',
    rows=(2, 2)  # 2 preceding, 2 following
)

# Row-based window
row_win = ibis.rows_window(preceding=5, following=0)

# Cumulative window
cum_win = ibis.cumulative_window()

# Trailing window
trail_win = ibis.trailing_window(10)

Ranking Functions

Window functions for ranking and ordering.

def row_number():
    """
    Assign unique row numbers within window.
    
    Returns:
    Row number expression (1, 2, 3, ...)
    """

def rank():
    """
    Assign rank with gaps for ties.
    
    Returns:
    Rank expression (1, 2, 2, 4, ...)
    """

def dense_rank():
    """
    Assign rank without gaps for ties.
    
    Returns:
    Dense rank expression (1, 2, 2, 3, ...)
    """

def percent_rank():
    """
    Percentile rank (0 to 1).
    
    Returns:
    Percent rank expression
    """

def cume_dist():
    """
    Cumulative distribution (0 to 1).
    
    Returns:
    Cumulative distribution expression
    """

def ntile(n):
    """
    Divide rows into n buckets.
    
    Parameters:
    - n: int, number of buckets
    
    Returns:
    Bucket number expression (1 to n)
    """

Usage Examples:

# Ranking within groups
result = table.select(
    'name', 'department', 'salary',
    rank=ibis.rank().over(
        group_by='department',
        order_by=ibis.desc('salary')
    ),
    row_num=ibis.row_number().over(
        group_by='department', 
        order_by=ibis.desc('salary')
    ),
    percentile=ibis.percent_rank().over(
        order_by='salary'
    ),
    quartile=ibis.ntile(4).over(
        order_by='salary'
    )
)

Aggregate Window Functions

Apply aggregations over window frames.

Usage Examples:

# Moving averages and sums
result = table.select(
    'date', 'sales',
    moving_avg=table.sales.mean().over(
        order_by='date',
        rows=(6, 0)  # 7-day moving average
    ),
    running_total=table.sales.sum().over(
        order_by='date',
        rows=(None, 0)  # Cumulative sum
    ),
    pct_of_total=table.sales / table.sales.sum().over()
)

# Comparisons with previous/next values
result = table.select(
    'date', 'value',
    prev_value=table.value.lag(1).over(order_by='date'),
    next_value=table.value.lead(1).over(order_by='date'),
    change=table.value - table.value.lag(1).over(order_by='date')
)

Lag and Lead Functions

Access previous and next row values.

expr.lag(offset=1, default=None):
    """
    Get value from previous row.
    
    Parameters:
    - offset: int, number of rows back
    - default: value to use when no previous row exists
    
    Returns:
    Lagged value expression
    """

expr.lead(offset=1, default=None):
    """
    Get value from next row.
    
    Parameters:
    - offset: int, number of rows forward
    - default: value to use when no next row exists
    
    Returns:
    Lead value expression
    """

expr.first():
    """Get first value in window frame."""

expr.last():
    """Get last value in window frame."""

Usage Examples:

# Time series calculations
result = table.select(
    'date', 'price',
    prev_price=table.price.lag(1).over(order_by='date'),
    price_change=table.price - table.price.lag(1).over(order_by='date'),
    next_price=table.price.lead(1).over(order_by='date'),
    first_price=table.price.first().over(
        group_by='symbol',
        order_by='date'
    ),
    last_price=table.price.last().over(
        group_by='symbol', 
        order_by='date'
    )
)

Statistical Window Functions

Advanced statistical calculations over windows.

Usage Examples:

# Rolling statistics
result = table.select(
    'date', 'value',
    rolling_std=table.value.std().over(
        order_by='date',
        rows=(29, 0)  # 30-day rolling standard deviation
    ),
    rolling_var=table.value.var().over(
        order_by='date', 
        rows=(29, 0)
    ),
    z_score=(
        (table.value - table.value.mean().over(rows=(29, 0))) /
        table.value.std().over(rows=(29, 0))
    ).over(order_by='date')
)

Conditional Aggregations

Aggregations with filtering conditions.

Usage Examples:

# Conditional aggregations
result = table.aggregate(
    total_sales=table.sales.sum(),
    high_value_sales=table.sales.sum(where=table.sales > 1000),
    avg_rating=table.rating.mean(),
    avg_high_rating=table.rating.mean(where=table.rating >= 4.0),
    premium_count=table.count(where=table.tier == 'premium')
)

# Window conditional aggregations
result = table.select(
    'product', 'date', 'sales',
    high_sales_pct=(
        table.sales.sum(where=table.sales > 1000) / 
        table.sales.sum()
    ).over(group_by='product')
)

Frame Boundary Specifications

Control window frame boundaries precisely.

def preceding(n):
    """
    Specify preceding boundary.
    
    Parameters:
    - n: int or None (unbounded)
    
    Returns:
    Preceding boundary specification
    """

def following(n):
    """
    Specify following boundary.
    
    Parameters:
    - n: int or None (unbounded)
    
    Returns:
    Following boundary specification
    """

def range(start, end):
    """
    Create range-based frame.
    
    Parameters:
    - start: boundary specification
    - end: boundary specification
    
    Returns:
    Range frame specification
    """

Usage Examples:

# Precise frame control
win1 = ibis.window(
    order_by='date',
    rows=(ibis.preceding(5), ibis.following(2))
)

win2 = ibis.window(
    order_by='timestamp',
    range=(
        ibis.preceding(ibis.interval(hours=1)),
        ibis.following(ibis.interval(minutes=30))
    )
)

# Use with aggregations
result = table.select(
    'timestamp', 'value',
    hour_avg=table.value.mean().over(win2)
)

Multiple Window Operations

Combine multiple window operations efficiently.

Usage Examples:

# Multiple windows in single query
dept_win = ibis.window(group_by='department', order_by='salary')
company_win = ibis.window(order_by='salary')

result = table.select(
    'name', 'department', 'salary',
    dept_rank=ibis.rank().over(dept_win),
    dept_pct=ibis.percent_rank().over(dept_win), 
    company_rank=ibis.rank().over(company_win),
    dept_avg=table.salary.mean().over(group_by='department'),
    company_avg=table.salary.mean().over()
)

Install with Tessl CLI