0
# Aggregation and Window Functions
1
2
Statistical aggregation functions and window operations including ranking, cumulative calculations, and frame-based computations for advanced analytics.
3
4
## Capabilities
5
6
### Basic Aggregations
7
8
Core statistical aggregation functions.
9
10
```python { .api }
11
def sum(arg):
12
"""Sum of values."""
13
14
def mean(arg):
15
"""Average of values."""
16
17
def count(arg=None):
18
"""
19
Count of values.
20
21
Parameters:
22
- arg: expression to count, or None to count rows
23
24
Returns:
25
Count expression
26
"""
27
28
def min(arg):
29
"""Minimum value."""
30
31
def max(arg):
32
"""Maximum value."""
33
34
def std(arg):
35
"""Standard deviation."""
36
37
def var(arg):
38
"""Variance."""
39
```
40
41
**Usage Examples:**
42
```python
43
# Basic aggregations
44
result = table.aggregate(
45
total_sales=table.sales.sum(),
46
avg_price=table.price.mean(),
47
num_orders=table.count(),
48
min_date=table.order_date.min(),
49
max_date=table.order_date.max()
50
)
51
52
# Grouped aggregations
53
by_category = (
54
table
55
.group_by('category')
56
.aggregate(
57
revenue=table.sales.sum(),
58
avg_rating=table.rating.mean(),
59
order_count=table.count()
60
)
61
)
62
```
63
64
### Window Specifications
65
66
Define window frames for analytical functions.
67
68
```python { .api }
69
def window(
70
group_by=None,
71
order_by=None,
72
preceding=None,
73
following=None,
74
rows=None,
75
range=None
76
):
77
"""
78
Create window specification.
79
80
Parameters:
81
- group_by: columns to partition by
82
- order_by: columns to order by
83
- preceding: number of preceding rows/range
84
- following: number of following rows/range
85
- rows: tuple of (preceding, following) for row frame
86
- range: tuple of (preceding, following) for range frame
87
88
Returns:
89
Window specification
90
"""
91
92
def rows_window(preceding=None, following=None):
93
"""Create row-based window frame."""
94
95
def range_window(preceding=None, following=None):
96
"""Create range-based window frame."""
97
98
def cumulative_window():
99
"""Create cumulative window (unbounded preceding to current row)."""
100
101
def trailing_window(n):
102
"""Create trailing window of n rows."""
103
104
def trailing_range_window(interval):
105
"""Create trailing range window."""
106
```
107
108
**Usage Examples:**
109
```python
110
import ibis
111
112
# Define window specifications
113
win = ibis.window(
114
group_by='department',
115
order_by='salary',
116
rows=(2, 2) # 2 preceding, 2 following
117
)
118
119
# Row-based window
120
row_win = ibis.rows_window(preceding=5, following=0)
121
122
# Cumulative window
123
cum_win = ibis.cumulative_window()
124
125
# Trailing window
126
trail_win = ibis.trailing_window(10)
127
```
128
129
### Ranking Functions
130
131
Window functions for ranking and ordering.
132
133
```python { .api }
134
def row_number():
135
"""
136
Assign unique row numbers within window.
137
138
Returns:
139
Row number expression (1, 2, 3, ...)
140
"""
141
142
def rank():
143
"""
144
Assign rank with gaps for ties.
145
146
Returns:
147
Rank expression (1, 2, 2, 4, ...)
148
"""
149
150
def dense_rank():
151
"""
152
Assign rank without gaps for ties.
153
154
Returns:
155
Dense rank expression (1, 2, 2, 3, ...)
156
"""
157
158
def percent_rank():
159
"""
160
Percentile rank (0 to 1).
161
162
Returns:
163
Percent rank expression
164
"""
165
166
def cume_dist():
167
"""
168
Cumulative distribution (0 to 1).
169
170
Returns:
171
Cumulative distribution expression
172
"""
173
174
def ntile(n):
175
"""
176
Divide rows into n buckets.
177
178
Parameters:
179
- n: int, number of buckets
180
181
Returns:
182
Bucket number expression (1 to n)
183
"""
184
```
185
186
**Usage Examples:**
187
```python
188
# Ranking within groups
189
result = table.select(
190
'name', 'department', 'salary',
191
rank=ibis.rank().over(
192
group_by='department',
193
order_by=ibis.desc('salary')
194
),
195
row_num=ibis.row_number().over(
196
group_by='department',
197
order_by=ibis.desc('salary')
198
),
199
percentile=ibis.percent_rank().over(
200
order_by='salary'
201
),
202
quartile=ibis.ntile(4).over(
203
order_by='salary'
204
)
205
)
206
```
207
208
### Aggregate Window Functions
209
210
Apply aggregations over window frames.
211
212
**Usage Examples:**
213
```python
214
# Moving averages and sums
215
result = table.select(
216
'date', 'sales',
217
moving_avg=table.sales.mean().over(
218
order_by='date',
219
rows=(6, 0) # 7-day moving average
220
),
221
running_total=table.sales.sum().over(
222
order_by='date',
223
rows=(None, 0) # Cumulative sum
224
),
225
pct_of_total=table.sales / table.sales.sum().over()
226
)
227
228
# Comparisons with previous/next values
229
result = table.select(
230
'date', 'value',
231
prev_value=table.value.lag(1).over(order_by='date'),
232
next_value=table.value.lead(1).over(order_by='date'),
233
change=table.value - table.value.lag(1).over(order_by='date')
234
)
235
```
236
237
### Lag and Lead Functions
238
239
Access previous and next row values.
240
241
```python { .api }
242
expr.lag(offset=1, default=None):
243
"""
244
Get value from previous row.
245
246
Parameters:
247
- offset: int, number of rows back
248
- default: value to use when no previous row exists
249
250
Returns:
251
Lagged value expression
252
"""
253
254
expr.lead(offset=1, default=None):
255
"""
256
Get value from next row.
257
258
Parameters:
259
- offset: int, number of rows forward
260
- default: value to use when no next row exists
261
262
Returns:
263
Lead value expression
264
"""
265
266
expr.first():
267
"""Get first value in window frame."""
268
269
expr.last():
270
"""Get last value in window frame."""
271
```
272
273
**Usage Examples:**
274
```python
275
# Time series calculations
276
result = table.select(
277
'date', 'price',
278
prev_price=table.price.lag(1).over(order_by='date'),
279
price_change=table.price - table.price.lag(1).over(order_by='date'),
280
next_price=table.price.lead(1).over(order_by='date'),
281
first_price=table.price.first().over(
282
group_by='symbol',
283
order_by='date'
284
),
285
last_price=table.price.last().over(
286
group_by='symbol',
287
order_by='date'
288
)
289
)
290
```
291
292
### Statistical Window Functions
293
294
Advanced statistical calculations over windows.
295
296
**Usage Examples:**
297
```python
298
# Rolling statistics
299
result = table.select(
300
'date', 'value',
301
rolling_std=table.value.std().over(
302
order_by='date',
303
rows=(29, 0) # 30-day rolling standard deviation
304
),
305
rolling_var=table.value.var().over(
306
order_by='date',
307
rows=(29, 0)
308
),
309
z_score=(
310
(table.value - table.value.mean().over(rows=(29, 0))) /
311
table.value.std().over(rows=(29, 0))
312
).over(order_by='date')
313
)
314
```
315
316
### Conditional Aggregations
317
318
Aggregations with filtering conditions.
319
320
**Usage Examples:**
321
```python
322
# Conditional aggregations
323
result = table.aggregate(
324
total_sales=table.sales.sum(),
325
high_value_sales=table.sales.sum(where=table.sales > 1000),
326
avg_rating=table.rating.mean(),
327
avg_high_rating=table.rating.mean(where=table.rating >= 4.0),
328
premium_count=table.count(where=table.tier == 'premium')
329
)
330
331
# Window conditional aggregations
332
result = table.select(
333
'product', 'date', 'sales',
334
high_sales_pct=(
335
table.sales.sum(where=table.sales > 1000) /
336
table.sales.sum()
337
).over(group_by='product')
338
)
339
```
340
341
### Frame Boundary Specifications
342
343
Control window frame boundaries precisely.
344
345
```python { .api }
346
def preceding(n):
347
"""
348
Specify preceding boundary.
349
350
Parameters:
351
- n: int or None (unbounded)
352
353
Returns:
354
Preceding boundary specification
355
"""
356
357
def following(n):
358
"""
359
Specify following boundary.
360
361
Parameters:
362
- n: int or None (unbounded)
363
364
Returns:
365
Following boundary specification
366
"""
367
368
def range(start, end):
369
"""
370
Create range-based frame.
371
372
Parameters:
373
- start: boundary specification
374
- end: boundary specification
375
376
Returns:
377
Range frame specification
378
"""
379
```
380
381
**Usage Examples:**
382
```python
383
# Precise frame control
384
win1 = ibis.window(
385
order_by='date',
386
rows=(ibis.preceding(5), ibis.following(2))
387
)
388
389
win2 = ibis.window(
390
order_by='timestamp',
391
range=(
392
ibis.preceding(ibis.interval(hours=1)),
393
ibis.following(ibis.interval(minutes=30))
394
)
395
)
396
397
# Use with aggregations
398
result = table.select(
399
'timestamp', 'value',
400
hour_avg=table.value.mean().over(win2)
401
)
402
```
403
404
### Multiple Window Operations
405
406
Combine multiple window operations efficiently.
407
408
**Usage Examples:**
409
```python
410
# Multiple windows in single query
411
dept_win = ibis.window(group_by='department', order_by='salary')
412
company_win = ibis.window(order_by='salary')
413
414
result = table.select(
415
'name', 'department', 'salary',
416
dept_rank=ibis.rank().over(dept_win),
417
dept_pct=ibis.percent_rank().over(dept_win),
418
company_rank=ibis.rank().over(company_win),
419
dept_avg=table.salary.mean().over(group_by='department'),
420
company_avg=table.salary.mean().over()
421
)
422
```