CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-pypika

A SQL query builder API for Python that provides programmatic interface for constructing SQL queries.

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

analytics.mddocs/

Analytics and Window Functions

Advanced analytical functions including window functions (ROW_NUMBER, RANK), frame specifications, and statistical functions for complex data analysis queries. These functions enable sophisticated analytical processing with support for partitioning, ordering, and frame boundaries.

Capabilities

Window Frame Components

Boundary specifications for window functions defining the range of rows to include in calculations.

class Preceding:
    def __init__(self, value):
        """
        Specify preceding boundary for window frames.
        
        Parameters:
        - value: Number of preceding rows or 'UNBOUNDED'
        """

class Following:
    def __init__(self, value):
        """
        Specify following boundary for window frames.
        
        Parameters:
        - value: Number of following rows or 'UNBOUNDED'
        """

CURRENT_ROW: object  # Current row constant for window frames

Usage Examples:

from pypika import Query, Table
from pypika.analytics import Preceding, Following, CURRENT_ROW, Sum, Avg

sales = Table('sales')

# Window frame specifications
query = (Query.from_(sales)
         .select(
             sales.date,
             sales.amount,
             # Running total (unbounded preceding to current row)
             Sum(sales.amount).over()
             .orderby(sales.date)
             .rows_between(Preceding('UNBOUNDED'), CURRENT_ROW)
             .as_('running_total'),
             
             # 7-day moving average
             Avg(sales.amount).over()
             .orderby(sales.date)
             .rows_between(Preceding(6), CURRENT_ROW)
             .as_('7_day_avg'),
             
             # Next 3 days total
             Sum(sales.amount).over()
             .orderby(sales.date)
             .rows_between(CURRENT_ROW, Following(3))
             .as_('next_3_days_total')
         ))

Ranking Functions

Window functions for ranking and ordering data within partitions.

class Rank(AnalyticFunction):
    def __init__(self):
        """RANK() window function - rank with gaps."""

class DenseRank(AnalyticFunction):
    def __init__(self):
        """DENSE_RANK() window function - rank without gaps."""

class RowNumber(AnalyticFunction):
    def __init__(self):
        """ROW_NUMBER() window function - sequential numbering."""

class NTile(AnalyticFunction):
    def __init__(self, n):
        """
        NTILE() window function - divide into n buckets.
        
        Parameters:
        - n: Number of buckets
        """

Usage Examples:

from pypika import Query, Table
from pypika.analytics import Rank, DenseRank, RowNumber, NTile

employees = Table('employees')
sales = Table('sales')

# Employee ranking by salary
query = (Query.from_(employees)
         .select(
             employees.name,
             employees.department,
             employees.salary,
             RowNumber().over(employees.department).orderby(employees.salary.desc()).as_('row_num'),
             Rank().over(employees.department).orderby(employees.salary.desc()).as_('salary_rank'),
             DenseRank().over(employees.department).orderby(employees.salary.desc()).as_('dense_rank'),
             NTile(4).over(employees.department).orderby(employees.salary.desc()).as_('quartile')
         ))

# Sales performance ranking
query = (Query.from_(sales)
         .select(
             sales.salesperson_id,
             sales.quarter,
             sales.total_sales,
             Rank().over().orderby(sales.total_sales.desc()).as_('overall_rank'),
             Rank().over(sales.quarter).orderby(sales.total_sales.desc()).as_('quarterly_rank'),
             NTile(5).over(sales.quarter).orderby(sales.total_sales.desc()).as_('performance_quintile')
         ))

# Top performers per department
top_performers = (Query.from_(employees)
                  .select(
                      employees.name,
                      employees.department,
                      employees.salary,
                      Rank().over(employees.department).orderby(employees.salary.desc()).as_('dept_rank')
                  ))

query = (Query.from_(top_performers)
         .select('*')
         .where(top_performers.dept_rank <= 3))

Value Functions

Window functions for accessing values from other rows within the partition.

class FirstValue(AnalyticFunction):
    def __init__(self, term):
        """
        FIRST_VALUE() window function - first value in partition.
        
        Parameters:
        - term: Column or expression to get first value of
        """

class LastValue(AnalyticFunction):
    def __init__(self, term):
        """
        LAST_VALUE() window function - last value in partition.
        
        Parameters:  
        - term: Column or expression to get last value of
        """

class Lag(AnalyticFunction):
    def __init__(self, term, offset=1, default=None):
        """
        LAG() window function - value from previous row.
        
        Parameters:
        - term: Column or expression
        - offset: Number of rows back (default 1)
        - default: Default value if no previous row
        """

class Lead(AnalyticFunction):
    def __init__(self, term, offset=1, default=None):
        """
        LEAD() window function - value from next row.
        
        Parameters:
        - term: Column or expression
        - offset: Number of rows forward (default 1)
        - default: Default value if no next row
        """

Usage Examples:

from pypika import Query, Table
from pypika.analytics import FirstValue, LastValue, Lag, Lead

stock_prices = Table('stock_prices')
sales = Table('sales')

# Stock price analysis
query = (Query.from_(stock_prices)
         .select(
             stock_prices.symbol,
             stock_prices.date,
             stock_prices.close_price,
             FirstValue(stock_prices.close_price).over(stock_prices.symbol)
             .orderby(stock_prices.date).as_('first_price'),
             LastValue(stock_prices.close_price).over(stock_prices.symbol)
             .orderby(stock_prices.date)
             .rows_between(Preceding('UNBOUNDED'), Following('UNBOUNDED'))
             .as_('last_price'),
             Lag(stock_prices.close_price, 1).over(stock_prices.symbol)
             .orderby(stock_prices.date).as_('prev_day_price'),
             Lead(stock_prices.close_price, 1).over(stock_prices.symbol)
             .orderby(stock_prices.date).as_('next_day_price')
         ))

# Sales trend analysis
query = (Query.from_(sales)
         .select(
             sales.month,
             sales.revenue,
             Lag(sales.revenue, 1, 0).over().orderby(sales.month).as_('prev_month_revenue'),
             Lead(sales.revenue, 1, 0).over().orderby(sales.month).as_('next_month_revenue'),
             (sales.revenue - Lag(sales.revenue, 1, 0).over().orderby(sales.month)).as_('month_over_month_change')
         ))

# Year-over-year comparison
query = (Query.from_(sales)
         .select(
             sales.year,
             sales.quarter,
             sales.revenue,
             Lag(sales.revenue, 4).over().orderby(sales.year, sales.quarter).as_('yoy_revenue'),
             ((sales.revenue - Lag(sales.revenue, 4).over().orderby(sales.year, sales.quarter)) / 
              Lag(sales.revenue, 4).over().orderby(sales.year, sales.quarter) * 100).as_('yoy_growth_pct')
         ))

Aggregate Window Functions

Window versions of aggregate functions for running calculations and statistical analysis.

class Sum(AnalyticFunction):
    def __init__(self, term):
        """SUM() window function."""

class Count(AnalyticFunction):
    def __init__(self, term):
        """COUNT() window function."""

class Avg(AnalyticFunction):
    def __init__(self, term):
        """AVG() window function."""

class Min(AnalyticFunction):
    def __init__(self, term):
        """MIN() window function."""

class Max(AnalyticFunction):
    def __init__(self, term):
        """MAX() window function."""

class Median(AnalyticFunction):
    def __init__(self, term):
        """MEDIAN() window function."""

Usage Examples:

from pypika import Query, Table
from pypika.analytics import Sum, Count, Avg, Min, Max, Median

sales = Table('sales')
orders = Table('orders')

# Running totals and averages
query = (Query.from_(sales)
         .select(
             sales.date,
             sales.amount,
             Sum(sales.amount).over().orderby(sales.date).as_('running_total'),
             Avg(sales.amount).over().orderby(sales.date)
             .rows_between(Preceding(6), CURRENT_ROW).as_('7_day_avg'),
             Count(sales.id).over().orderby(sales.date).as_('cumulative_count')
         ))

# Department statistics
query = (Query.from_(sales)
         .select(
             sales.department,
             sales.salesperson,
             sales.amount,
             Sum(sales.amount).over(sales.department).as_('dept_total'),
             Avg(sales.amount).over(sales.department).as_('dept_avg'),
             Max(sales.amount).over(sales.department).as_('dept_max'),
             Min(sales.amount).over(sales.department).as_('dept_min'),
             Median(sales.amount).over(sales.department).as_('dept_median')
         ))

# Performance metrics
query = (Query.from_(orders)
         .select(
             orders.customer_id,
             orders.order_date,
             orders.amount,
             Count('*').over(orders.customer_id)
             .orderby(orders.order_date)
             .rows_between(Preceding('UNBOUNDED'), CURRENT_ROW)
             .as_('order_sequence'),
             Sum(orders.amount).over(orders.customer_id)
             .orderby(orders.order_date)
             .rows_between(Preceding('UNBOUNDED'), CURRENT_ROW)
             .as_('lifetime_value')
         ))

Statistical Functions

Advanced statistical functions for variance, standard deviation, and distribution analysis.

class StdDev(AnalyticFunction):
    def __init__(self, term):
        """Standard deviation window function."""

class StdDevPop(AnalyticFunction):
    def __init__(self, term):
        """Population standard deviation window function."""

class StdDevSamp(AnalyticFunction):
    def __init__(self, term):
        """Sample standard deviation window function."""

class Variance(AnalyticFunction):
    def __init__(self, term):
        """Variance window function."""

class VarPop(AnalyticFunction):
    def __init__(self, term):
        """Population variance window function."""

class VarSamp(AnalyticFunction):
    def __init__(self, term):
        """Sample variance window function."""

Usage Examples:

from pypika import Query, Table
from pypika.analytics import StdDev, StdDevPop, Variance, VarPop

performance = Table('performance_metrics')
sales = Table('sales')

# Statistical analysis
query = (Query.from_(performance)
         .select(
             performance.department,
             performance.employee_id,
             performance.score,
             Avg(performance.score).over(performance.department).as_('dept_avg_score'),
             StdDev(performance.score).over(performance.department).as_('dept_score_stddev'),
             Variance(performance.score).over(performance.department).as_('dept_score_variance'),
             ((performance.score - Avg(performance.score).over(performance.department)) / 
              StdDev(performance.score).over(performance.department)).as_('z_score')
         ))

# Rolling statistics
query = (Query.from_(sales)
         .select(
             sales.date,
             sales.amount,
             Avg(sales.amount).over()
             .orderby(sales.date)
             .rows_between(Preceding(29), CURRENT_ROW)
             .as_('30_day_avg'),
             StdDev(sales.amount).over()
             .orderby(sales.date)
             .rows_between(Preceding(29), CURRENT_ROW)
             .as_('30_day_stddev')
         ))

Value Window Functions

Window functions for accessing values at specific positions within the window frame.

class FirstValue(AnalyticFunction):
    def __init__(self, term):
        """
        FIRST_VALUE() window function - first value in the window frame.
        
        Parameters:
        - term: Expression to get the first value of
        """

class LastValue(AnalyticFunction):
    def __init__(self, term):
        """
        LAST_VALUE() window function - last value in the window frame.
        
        Parameters:
        - term: Expression to get the last value of
        """

class Median(AnalyticFunction):
    def __init__(self, term):
        """
        MEDIAN() window function - median value in the window frame.
        
        Parameters:
        - term: Expression to calculate the median of
        """

class Lag(AnalyticFunction):
    def __init__(self, term, offset=1, default=None):
        """
        LAG() window function - value from a previous row.
        
        Parameters:
        - term: Expression to get value from
        - offset: Number of rows back (default: 1)
        - default: Default value if no row exists
        """

class Lead(AnalyticFunction):
    def __init__(self, term, offset=1, default=None):
        """
        LEAD() window function - value from a following row.
        
        Parameters:
        - term: Expression to get value from
        - offset: Number of rows forward (default: 1)
        - default: Default value if no row exists
        """

Usage Examples:

from pypika import Query, Table
from pypika.analytics import FirstValue, LastValue, Median, Lag, Lead

sales = Table('sales')
stock_prices = Table('stock_prices')

# First and last values in a window
query = (Query.from_(sales)
         .select(
             sales.date,
             sales.amount,
             FirstValue(sales.amount).over()
             .orderby(sales.date)
             .rows_between(Preceding('UNBOUNDED'), Following('UNBOUNDED'))
             .as_('first_sale_amount'),
             LastValue(sales.amount).over()
             .orderby(sales.date)
             .rows_between(Preceding('UNBOUNDED'), Following('UNBOUNDED'))
             .as_('last_sale_amount')
         ))

# Median calculation
query = (Query.from_(sales)
         .select(
             sales.product_id,
             sales.amount,
             Median(sales.amount).over(sales.product_id).as_('median_price')
         ))

# Lag and Lead for time series analysis
query = (Query.from_(stock_prices)
         .select(
             stock_prices.date,
             stock_prices.price,
             Lag(stock_prices.price, 1).over()
             .orderby(stock_prices.date).as_('previous_price'),
             Lead(stock_prices.price, 1).over()
             .orderby(stock_prices.date).as_('next_price'),
             # Calculate price change
             (stock_prices.price - Lag(stock_prices.price, 1).over()
              .orderby(stock_prices.date)).as_('price_change')
         ))

Window Function Syntax

Complete window function syntax with OVER clause, partitioning, ordering, and frame specifications.

Usage Examples:

from pypika import Query, Table
from pypika.analytics import RowNumber, Sum, Avg, Rank

employees = Table('employees')
sales = Table('sales')

# Complete window function syntax patterns

# Basic OVER clause
basic_window = RowNumber().over()

# PARTITION BY
partitioned = RowNumber().over(employees.department)

# ORDER BY
ordered = RowNumber().over().orderby(employees.salary.desc())

# PARTITION BY and ORDER BY
full_window = RowNumber().over(employees.department).orderby(employees.salary.desc())

# Window frames with ROWS
rows_frame = (Sum(sales.amount).over()
              .orderby(sales.date)
              .rows_between(Preceding(2), Following(2)))

# Window frames with RANGE (if supported)
# range_frame = Sum(sales.amount).over().orderby(sales.date).range_between(...)

# Complex analytical query
query = (Query.from_(employees)
         .select(
             employees.name,
             employees.department,
             employees.salary,
             employees.hire_date,
             
             # Row numbering within department by salary
             RowNumber().over(employees.department)
             .orderby(employees.salary.desc()).as_('dept_salary_rank'),
             
             # Running total of salaries by hire date
             Sum(employees.salary).over()
             .orderby(employees.hire_date)
             .rows_between(Preceding('UNBOUNDED'), CURRENT_ROW)
             .as_('cumulative_salary_cost'),
             
             # Moving average salary over last 3 hires
             Avg(employees.salary).over()
             .orderby(employees.hire_date)
             .rows_between(Preceding(2), CURRENT_ROW)
             .as_('recent_avg_salary'),
             
             # Salary percentile within department
             Rank().over(employees.department)
             .orderby(employees.salary).as_('salary_percentile_rank')
         ))

# Analytical reporting query
quarterly_sales = (Query.from_(sales)
                   .select(
                       sales.quarter,
                       sales.region,
                       sales.revenue,
                       
                       # Quarter-over-quarter growth
                       Lag(sales.revenue, 1).over(sales.region)
                       .orderby(sales.quarter).as_('prev_quarter_revenue'),
                       
                       ((sales.revenue - Lag(sales.revenue, 1).over(sales.region)
                         .orderby(sales.quarter)) / 
                        Lag(sales.revenue, 1).over(sales.region)
                        .orderby(sales.quarter) * 100).as_('qoq_growth_pct'),
                       
                       # Year-over-year growth
                       Lag(sales.revenue, 4).over(sales.region)
                       .orderby(sales.quarter).as_('yoy_revenue'),
                       
                       # Regional ranking
                       Rank().over(sales.quarter)
                       .orderby(sales.revenue.desc()).as_('regional_rank'),
                       
                       # Running total for the year
                       Sum(sales.revenue).over(sales.region)
                       .orderby(sales.quarter)
                       .rows_between(Preceding(3), CURRENT_ROW)
                       .as_('ytd_revenue')
                   ))

Frame Specification Methods

Methods for specifying window frame boundaries in analytical functions.

class AnalyticFunction:
    def rows_between(self, start, end):
        """
        Specify ROWS frame between boundaries.
        
        Parameters:
        - start: Starting boundary (Preceding, CURRENT_ROW, Following)
        - end: Ending boundary (Preceding, CURRENT_ROW, Following)
        """
        
    def range_between(self, start, end):
        """
        Specify RANGE frame between boundaries.
        
        Parameters:
        - start: Starting boundary (Preceding, CURRENT_ROW, Following)
        - end: Ending boundary (Preceding, CURRENT_ROW, Following)
        """
        
    def over(self, *partition_by):
        """
        Specify OVER clause with optional partitioning.
        
        Parameters:
        - partition_by: Columns to partition by
        """
        
    def orderby(self, *terms):
        """
        Specify ORDER BY within window.
        
        Parameters:
        - terms: Columns or expressions to order by
        """

Usage Examples:

from pypika import Query, Table
from pypika.analytics import Sum, Avg, RowNumber, Preceding, Following, CURRENT_ROW

transactions = Table('transactions')

# Various frame specifications
query = (Query.from_(transactions)
         .select(
             transactions.date,
             transactions.amount,
             
             # Unbounded preceding to current row
             Sum(transactions.amount).over()
             .orderby(transactions.date)
             .rows_between(Preceding('UNBOUNDED'), CURRENT_ROW)
             .as_('running_total'),
             
             # Fixed window: 3 rows before to 1 row after
             Avg(transactions.amount).over()
             .orderby(transactions.date)
             .rows_between(Preceding(3), Following(1))
             .as_('5_row_avg'),
             
             # Current row to unbounded following
             Sum(transactions.amount).over()
             .orderby(transactions.date)
             .rows_between(CURRENT_ROW, Following('UNBOUNDED'))
             .as_('remaining_total'),
             
             # Centered window: 2 rows before and after
             Avg(transactions.amount).over()
             .orderby(transactions.date)
             .rows_between(Preceding(2), Following(2))
             .as_('centered_5_avg')
         ))

Install with Tessl CLI

npx tessl i tessl/pypi-pypika

docs

analytics.md

core-queries.md

dialects.md

functions.md

index.md

parameters-types.md

tables-schema.md

terms-expressions.md

tile.json