A SQL query builder API for Python that provides programmatic interface for constructing SQL queries.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Advanced analytical functions including window functions (ROW_NUMBER, RANK), frame specifications, and statistical functions for complex data analysis queries. These functions enable sophisticated analytical processing with support for partitioning, ordering, and frame boundaries.
Boundary specifications for window functions defining the range of rows to include in calculations.
class Preceding:
def __init__(self, value):
"""
Specify preceding boundary for window frames.
Parameters:
- value: Number of preceding rows or 'UNBOUNDED'
"""
class Following:
def __init__(self, value):
"""
Specify following boundary for window frames.
Parameters:
- value: Number of following rows or 'UNBOUNDED'
"""
CURRENT_ROW: object # Current row constant for window framesUsage Examples:
from pypika import Query, Table
from pypika.analytics import Preceding, Following, CURRENT_ROW, Sum, Avg
sales = Table('sales')
# Window frame specifications
query = (Query.from_(sales)
.select(
sales.date,
sales.amount,
# Running total (unbounded preceding to current row)
Sum(sales.amount).over()
.orderby(sales.date)
.rows_between(Preceding('UNBOUNDED'), CURRENT_ROW)
.as_('running_total'),
# 7-day moving average
Avg(sales.amount).over()
.orderby(sales.date)
.rows_between(Preceding(6), CURRENT_ROW)
.as_('7_day_avg'),
# Next 3 days total
Sum(sales.amount).over()
.orderby(sales.date)
.rows_between(CURRENT_ROW, Following(3))
.as_('next_3_days_total')
))Window functions for ranking and ordering data within partitions.
class Rank(AnalyticFunction):
def __init__(self):
"""RANK() window function - rank with gaps."""
class DenseRank(AnalyticFunction):
def __init__(self):
"""DENSE_RANK() window function - rank without gaps."""
class RowNumber(AnalyticFunction):
def __init__(self):
"""ROW_NUMBER() window function - sequential numbering."""
class NTile(AnalyticFunction):
def __init__(self, n):
"""
NTILE() window function - divide into n buckets.
Parameters:
- n: Number of buckets
"""Usage Examples:
from pypika import Query, Table
from pypika.analytics import Rank, DenseRank, RowNumber, NTile
employees = Table('employees')
sales = Table('sales')
# Employee ranking by salary
query = (Query.from_(employees)
.select(
employees.name,
employees.department,
employees.salary,
RowNumber().over(employees.department).orderby(employees.salary.desc()).as_('row_num'),
Rank().over(employees.department).orderby(employees.salary.desc()).as_('salary_rank'),
DenseRank().over(employees.department).orderby(employees.salary.desc()).as_('dense_rank'),
NTile(4).over(employees.department).orderby(employees.salary.desc()).as_('quartile')
))
# Sales performance ranking
query = (Query.from_(sales)
.select(
sales.salesperson_id,
sales.quarter,
sales.total_sales,
Rank().over().orderby(sales.total_sales.desc()).as_('overall_rank'),
Rank().over(sales.quarter).orderby(sales.total_sales.desc()).as_('quarterly_rank'),
NTile(5).over(sales.quarter).orderby(sales.total_sales.desc()).as_('performance_quintile')
))
# Top performers per department
top_performers = (Query.from_(employees)
.select(
employees.name,
employees.department,
employees.salary,
Rank().over(employees.department).orderby(employees.salary.desc()).as_('dept_rank')
))
query = (Query.from_(top_performers)
.select('*')
.where(top_performers.dept_rank <= 3))Window functions for accessing values from other rows within the partition.
class FirstValue(AnalyticFunction):
def __init__(self, term):
"""
FIRST_VALUE() window function - first value in partition.
Parameters:
- term: Column or expression to get first value of
"""
class LastValue(AnalyticFunction):
def __init__(self, term):
"""
LAST_VALUE() window function - last value in partition.
Parameters:
- term: Column or expression to get last value of
"""
class Lag(AnalyticFunction):
def __init__(self, term, offset=1, default=None):
"""
LAG() window function - value from previous row.
Parameters:
- term: Column or expression
- offset: Number of rows back (default 1)
- default: Default value if no previous row
"""
class Lead(AnalyticFunction):
def __init__(self, term, offset=1, default=None):
"""
LEAD() window function - value from next row.
Parameters:
- term: Column or expression
- offset: Number of rows forward (default 1)
- default: Default value if no next row
"""Usage Examples:
from pypika import Query, Table
from pypika.analytics import FirstValue, LastValue, Lag, Lead
stock_prices = Table('stock_prices')
sales = Table('sales')
# Stock price analysis
query = (Query.from_(stock_prices)
.select(
stock_prices.symbol,
stock_prices.date,
stock_prices.close_price,
FirstValue(stock_prices.close_price).over(stock_prices.symbol)
.orderby(stock_prices.date).as_('first_price'),
LastValue(stock_prices.close_price).over(stock_prices.symbol)
.orderby(stock_prices.date)
.rows_between(Preceding('UNBOUNDED'), Following('UNBOUNDED'))
.as_('last_price'),
Lag(stock_prices.close_price, 1).over(stock_prices.symbol)
.orderby(stock_prices.date).as_('prev_day_price'),
Lead(stock_prices.close_price, 1).over(stock_prices.symbol)
.orderby(stock_prices.date).as_('next_day_price')
))
# Sales trend analysis
query = (Query.from_(sales)
.select(
sales.month,
sales.revenue,
Lag(sales.revenue, 1, 0).over().orderby(sales.month).as_('prev_month_revenue'),
Lead(sales.revenue, 1, 0).over().orderby(sales.month).as_('next_month_revenue'),
(sales.revenue - Lag(sales.revenue, 1, 0).over().orderby(sales.month)).as_('month_over_month_change')
))
# Year-over-year comparison
query = (Query.from_(sales)
.select(
sales.year,
sales.quarter,
sales.revenue,
Lag(sales.revenue, 4).over().orderby(sales.year, sales.quarter).as_('yoy_revenue'),
((sales.revenue - Lag(sales.revenue, 4).over().orderby(sales.year, sales.quarter)) /
Lag(sales.revenue, 4).over().orderby(sales.year, sales.quarter) * 100).as_('yoy_growth_pct')
))Window versions of aggregate functions for running calculations and statistical analysis.
class Sum(AnalyticFunction):
def __init__(self, term):
"""SUM() window function."""
class Count(AnalyticFunction):
def __init__(self, term):
"""COUNT() window function."""
class Avg(AnalyticFunction):
def __init__(self, term):
"""AVG() window function."""
class Min(AnalyticFunction):
def __init__(self, term):
"""MIN() window function."""
class Max(AnalyticFunction):
def __init__(self, term):
"""MAX() window function."""
class Median(AnalyticFunction):
def __init__(self, term):
"""MEDIAN() window function."""Usage Examples:
from pypika import Query, Table
from pypika.analytics import Sum, Count, Avg, Min, Max, Median
sales = Table('sales')
orders = Table('orders')
# Running totals and averages
query = (Query.from_(sales)
.select(
sales.date,
sales.amount,
Sum(sales.amount).over().orderby(sales.date).as_('running_total'),
Avg(sales.amount).over().orderby(sales.date)
.rows_between(Preceding(6), CURRENT_ROW).as_('7_day_avg'),
Count(sales.id).over().orderby(sales.date).as_('cumulative_count')
))
# Department statistics
query = (Query.from_(sales)
.select(
sales.department,
sales.salesperson,
sales.amount,
Sum(sales.amount).over(sales.department).as_('dept_total'),
Avg(sales.amount).over(sales.department).as_('dept_avg'),
Max(sales.amount).over(sales.department).as_('dept_max'),
Min(sales.amount).over(sales.department).as_('dept_min'),
Median(sales.amount).over(sales.department).as_('dept_median')
))
# Performance metrics
query = (Query.from_(orders)
.select(
orders.customer_id,
orders.order_date,
orders.amount,
Count('*').over(orders.customer_id)
.orderby(orders.order_date)
.rows_between(Preceding('UNBOUNDED'), CURRENT_ROW)
.as_('order_sequence'),
Sum(orders.amount).over(orders.customer_id)
.orderby(orders.order_date)
.rows_between(Preceding('UNBOUNDED'), CURRENT_ROW)
.as_('lifetime_value')
))Advanced statistical functions for variance, standard deviation, and distribution analysis.
class StdDev(AnalyticFunction):
def __init__(self, term):
"""Standard deviation window function."""
class StdDevPop(AnalyticFunction):
def __init__(self, term):
"""Population standard deviation window function."""
class StdDevSamp(AnalyticFunction):
def __init__(self, term):
"""Sample standard deviation window function."""
class Variance(AnalyticFunction):
def __init__(self, term):
"""Variance window function."""
class VarPop(AnalyticFunction):
def __init__(self, term):
"""Population variance window function."""
class VarSamp(AnalyticFunction):
def __init__(self, term):
"""Sample variance window function."""Usage Examples:
from pypika import Query, Table
from pypika.analytics import StdDev, StdDevPop, Variance, VarPop
performance = Table('performance_metrics')
sales = Table('sales')
# Statistical analysis
query = (Query.from_(performance)
.select(
performance.department,
performance.employee_id,
performance.score,
Avg(performance.score).over(performance.department).as_('dept_avg_score'),
StdDev(performance.score).over(performance.department).as_('dept_score_stddev'),
Variance(performance.score).over(performance.department).as_('dept_score_variance'),
((performance.score - Avg(performance.score).over(performance.department)) /
StdDev(performance.score).over(performance.department)).as_('z_score')
))
# Rolling statistics
query = (Query.from_(sales)
.select(
sales.date,
sales.amount,
Avg(sales.amount).over()
.orderby(sales.date)
.rows_between(Preceding(29), CURRENT_ROW)
.as_('30_day_avg'),
StdDev(sales.amount).over()
.orderby(sales.date)
.rows_between(Preceding(29), CURRENT_ROW)
.as_('30_day_stddev')
))Window functions for accessing values at specific positions within the window frame.
class FirstValue(AnalyticFunction):
def __init__(self, term):
"""
FIRST_VALUE() window function - first value in the window frame.
Parameters:
- term: Expression to get the first value of
"""
class LastValue(AnalyticFunction):
def __init__(self, term):
"""
LAST_VALUE() window function - last value in the window frame.
Parameters:
- term: Expression to get the last value of
"""
class Median(AnalyticFunction):
def __init__(self, term):
"""
MEDIAN() window function - median value in the window frame.
Parameters:
- term: Expression to calculate the median of
"""
class Lag(AnalyticFunction):
def __init__(self, term, offset=1, default=None):
"""
LAG() window function - value from a previous row.
Parameters:
- term: Expression to get value from
- offset: Number of rows back (default: 1)
- default: Default value if no row exists
"""
class Lead(AnalyticFunction):
def __init__(self, term, offset=1, default=None):
"""
LEAD() window function - value from a following row.
Parameters:
- term: Expression to get value from
- offset: Number of rows forward (default: 1)
- default: Default value if no row exists
"""Usage Examples:
from pypika import Query, Table
from pypika.analytics import FirstValue, LastValue, Median, Lag, Lead
sales = Table('sales')
stock_prices = Table('stock_prices')
# First and last values in a window
query = (Query.from_(sales)
.select(
sales.date,
sales.amount,
FirstValue(sales.amount).over()
.orderby(sales.date)
.rows_between(Preceding('UNBOUNDED'), Following('UNBOUNDED'))
.as_('first_sale_amount'),
LastValue(sales.amount).over()
.orderby(sales.date)
.rows_between(Preceding('UNBOUNDED'), Following('UNBOUNDED'))
.as_('last_sale_amount')
))
# Median calculation
query = (Query.from_(sales)
.select(
sales.product_id,
sales.amount,
Median(sales.amount).over(sales.product_id).as_('median_price')
))
# Lag and Lead for time series analysis
query = (Query.from_(stock_prices)
.select(
stock_prices.date,
stock_prices.price,
Lag(stock_prices.price, 1).over()
.orderby(stock_prices.date).as_('previous_price'),
Lead(stock_prices.price, 1).over()
.orderby(stock_prices.date).as_('next_price'),
# Calculate price change
(stock_prices.price - Lag(stock_prices.price, 1).over()
.orderby(stock_prices.date)).as_('price_change')
))Complete window function syntax with OVER clause, partitioning, ordering, and frame specifications.
Usage Examples:
from pypika import Query, Table
from pypika.analytics import RowNumber, Sum, Avg, Rank
employees = Table('employees')
sales = Table('sales')
# Complete window function syntax patterns
# Basic OVER clause
basic_window = RowNumber().over()
# PARTITION BY
partitioned = RowNumber().over(employees.department)
# ORDER BY
ordered = RowNumber().over().orderby(employees.salary.desc())
# PARTITION BY and ORDER BY
full_window = RowNumber().over(employees.department).orderby(employees.salary.desc())
# Window frames with ROWS
rows_frame = (Sum(sales.amount).over()
.orderby(sales.date)
.rows_between(Preceding(2), Following(2)))
# Window frames with RANGE (if supported)
# range_frame = Sum(sales.amount).over().orderby(sales.date).range_between(...)
# Complex analytical query
query = (Query.from_(employees)
.select(
employees.name,
employees.department,
employees.salary,
employees.hire_date,
# Row numbering within department by salary
RowNumber().over(employees.department)
.orderby(employees.salary.desc()).as_('dept_salary_rank'),
# Running total of salaries by hire date
Sum(employees.salary).over()
.orderby(employees.hire_date)
.rows_between(Preceding('UNBOUNDED'), CURRENT_ROW)
.as_('cumulative_salary_cost'),
# Moving average salary over last 3 hires
Avg(employees.salary).over()
.orderby(employees.hire_date)
.rows_between(Preceding(2), CURRENT_ROW)
.as_('recent_avg_salary'),
# Salary percentile within department
Rank().over(employees.department)
.orderby(employees.salary).as_('salary_percentile_rank')
))
# Analytical reporting query
quarterly_sales = (Query.from_(sales)
.select(
sales.quarter,
sales.region,
sales.revenue,
# Quarter-over-quarter growth
Lag(sales.revenue, 1).over(sales.region)
.orderby(sales.quarter).as_('prev_quarter_revenue'),
((sales.revenue - Lag(sales.revenue, 1).over(sales.region)
.orderby(sales.quarter)) /
Lag(sales.revenue, 1).over(sales.region)
.orderby(sales.quarter) * 100).as_('qoq_growth_pct'),
# Year-over-year growth
Lag(sales.revenue, 4).over(sales.region)
.orderby(sales.quarter).as_('yoy_revenue'),
# Regional ranking
Rank().over(sales.quarter)
.orderby(sales.revenue.desc()).as_('regional_rank'),
# Running total for the year
Sum(sales.revenue).over(sales.region)
.orderby(sales.quarter)
.rows_between(Preceding(3), CURRENT_ROW)
.as_('ytd_revenue')
))Methods for specifying window frame boundaries in analytical functions.
class AnalyticFunction:
def rows_between(self, start, end):
"""
Specify ROWS frame between boundaries.
Parameters:
- start: Starting boundary (Preceding, CURRENT_ROW, Following)
- end: Ending boundary (Preceding, CURRENT_ROW, Following)
"""
def range_between(self, start, end):
"""
Specify RANGE frame between boundaries.
Parameters:
- start: Starting boundary (Preceding, CURRENT_ROW, Following)
- end: Ending boundary (Preceding, CURRENT_ROW, Following)
"""
def over(self, *partition_by):
"""
Specify OVER clause with optional partitioning.
Parameters:
- partition_by: Columns to partition by
"""
def orderby(self, *terms):
"""
Specify ORDER BY within window.
Parameters:
- terms: Columns or expressions to order by
"""Usage Examples:
from pypika import Query, Table
from pypika.analytics import Sum, Avg, RowNumber, Preceding, Following, CURRENT_ROW
transactions = Table('transactions')
# Various frame specifications
query = (Query.from_(transactions)
.select(
transactions.date,
transactions.amount,
# Unbounded preceding to current row
Sum(transactions.amount).over()
.orderby(transactions.date)
.rows_between(Preceding('UNBOUNDED'), CURRENT_ROW)
.as_('running_total'),
# Fixed window: 3 rows before to 1 row after
Avg(transactions.amount).over()
.orderby(transactions.date)
.rows_between(Preceding(3), Following(1))
.as_('5_row_avg'),
# Current row to unbounded following
Sum(transactions.amount).over()
.orderby(transactions.date)
.rows_between(CURRENT_ROW, Following('UNBOUNDED'))
.as_('remaining_total'),
# Centered window: 2 rows before and after
Avg(transactions.amount).over()
.orderby(transactions.date)
.rows_between(Preceding(2), Following(2))
.as_('centered_5_avg')
))Install with Tessl CLI
npx tessl i tessl/pypi-pypika