0
# Aggregations and Query Helpers
1
2
Aggregation functions and helper utilities for complex queries including mathematical operations, sorting, and SQL function access. These functions provide powerful capabilities for data analysis and complex query construction.
3
4
## Capabilities
5
6
### Aggregation Functions
7
8
Mathematical aggregation functions for statistical operations and data analysis.
9
10
```python { .api }
11
def count(gen=None):
12
"""Count entities or values in query results.
13
14
Args:
15
gen: Generator expression or None for counting all results
16
17
Returns:
18
int: Count of matching entities/values
19
20
Usage:
21
count(p for p in Person if p.age > 18)
22
count() # Count all in current query context
23
"""
24
25
def sum(gen):
26
"""Calculate sum of numeric values.
27
28
Args:
29
gen: Generator expression yielding numeric values
30
31
Returns:
32
Numeric sum of values
33
34
Usage:
35
sum(o.total for o in Order if o.date >= start_date)
36
"""
37
38
def min(gen):
39
"""Find minimum value.
40
41
Args:
42
gen: Generator expression yielding comparable values
43
44
Returns:
45
Minimum value from results
46
47
Usage:
48
min(p.age for p in Person)
49
"""
50
51
def max(gen):
52
"""Find maximum value.
53
54
Args:
55
gen: Generator expression yielding comparable values
56
57
Returns:
58
Maximum value from results
59
60
Usage:
61
max(o.total for o in Order)
62
"""
63
64
def avg(gen):
65
"""Calculate average of numeric values.
66
67
Args:
68
gen: Generator expression yielding numeric values
69
70
Returns:
71
float: Average of values
72
73
Usage:
74
avg(p.age for p in Person if p.city == "NYC")
75
"""
76
77
def group_concat(gen, sep=','):
78
"""Concatenate grouped values into strings.
79
80
Args:
81
gen: Generator expression yielding string values
82
sep: Separator string (default: ',')
83
84
Returns:
85
str: Concatenated string values
86
87
Usage:
88
group_concat(p.name for p in team.members, sep=', ')
89
"""
90
91
def distinct(gen):
92
"""Eliminate duplicate values from query results.
93
94
Args:
95
gen: Generator expression
96
97
Returns:
98
Query with distinct results only
99
100
Usage:
101
distinct(p.city for p in Person)
102
"""
103
```
104
105
### Query Helper Functions
106
107
Utility functions for enhancing queries with sorting, joins, and SQL operations.
108
109
```python { .api }
110
def desc(attr):
111
"""Specify descending sort order for ORDER BY clauses.
112
113
Args:
114
attr: Entity attribute to sort by
115
116
Returns:
117
Descending sort specification
118
119
Usage:
120
select(p for p in Person).order_by(desc(Person.age))
121
"""
122
123
def JOIN(condition):
124
"""Explicit JOIN specification for complex queries.
125
126
Args:
127
condition: Join condition expression
128
129
Returns:
130
JOIN specification for query
131
132
Usage:
133
select((p, c) for p in Person
134
for c in Company if JOIN(p.company == c))
135
"""
136
137
def between(x, a, b):
138
"""BETWEEN operator for range queries.
139
140
Args:
141
x: Value to test
142
a: Lower bound (inclusive)
143
b: Upper bound (inclusive)
144
145
Returns:
146
bool: True if a <= x <= b
147
148
Usage:
149
select(p for p in Person if between(p.age, 18, 65))
150
"""
151
152
def concat(*args):
153
"""String concatenation function for queries.
154
155
Args:
156
*args: String expressions to concatenate
157
158
Returns:
159
str: Concatenated string
160
161
Usage:
162
select(concat(p.first_name, ' ', p.last_name) for p in Person)
163
"""
164
165
def coalesce(*args):
166
"""COALESCE function for NULL value handling.
167
168
Args:
169
*args: Expressions to evaluate (returns first non-NULL)
170
171
Returns:
172
First non-NULL argument value
173
174
Usage:
175
select(coalesce(p.nickname, p.first_name) for p in Person)
176
"""
177
```
178
179
## Usage Examples
180
181
### Basic Aggregations
182
183
```python
184
from pony.orm import *
185
186
with db_session:
187
# Count operations
188
total_users = count(p for p in Person)
189
adult_count = count(p for p in Person if p.age >= 18)
190
191
# Sum operations
192
total_revenue = sum(o.total for o in Order)
193
monthly_sales = sum(o.total for o in Order
194
if o.date.month == datetime.now().month)
195
196
# Min/Max operations
197
youngest_age = min(p.age for p in Person)
198
oldest_age = max(p.age for p in Person)
199
highest_order = max(o.total for o in Order)
200
201
# Average calculations
202
average_age = avg(p.age for p in Person)
203
average_order_value = avg(o.total for o in Order if o.total > 0)
204
205
print(f"Users: {total_users}, Adults: {adult_count}")
206
print(f"Age range: {youngest_age} - {oldest_age}, Average: {average_age}")
207
print(f"Revenue: ${total_revenue}, Average order: ${average_order_value}")
208
```
209
210
### Advanced Aggregations with Grouping
211
212
```python
213
with db_session:
214
# Group by with aggregations
215
city_populations = select((p.city, count()) for p in Person).group_by(Person.city)
216
217
# Multiple aggregations per group
218
order_stats = select(
219
(o.customer.name, count(), sum(o.total), avg(o.total))
220
for o in Order
221
).group_by(Order.customer)
222
223
# Group concat for string aggregation
224
team_members = select(
225
(t.name, group_concat(p.name, sep=', '))
226
for t in Team for p in t.members
227
).group_by(Team.name)
228
229
# Conditional aggregations
230
sales_by_region = select(
231
(s.region,
232
sum(s.amount for s in Sale if s.type == 'online'),
233
sum(s.amount for s in Sale if s.type == 'retail'))
234
for s in Sale
235
).group_by(Sale.region)
236
```
237
238
### Query Helpers and Sorting
239
240
```python
241
with db_session:
242
# Descending sort
243
top_earners = select(e for e in Employee).order_by(desc(Employee.salary))
244
latest_orders = select(o for o in Order).order_by(desc(Order.date))
245
246
# Multiple sort criteria
247
sorted_people = select(p for p in Person).order_by(
248
Person.last_name, Person.first_name, desc(Person.age)
249
)
250
251
# Range queries with between
252
millennials = select(p for p in Person
253
if between(p.birth_year, 1981, 1996))
254
255
moderate_orders = select(o for o in Order
256
if between(o.total, 50.0, 500.0))
257
258
# String concatenation in queries
259
full_names = select(concat(p.first_name, ' ', p.last_name)
260
for p in Person)
261
262
formatted_addresses = select(
263
concat(a.street, ', ', a.city, ', ', a.state, ' ', a.zip_code)
264
for a in Address
265
)
266
267
# NULL handling with coalesce
268
display_names = select(coalesce(p.nickname, p.first_name, 'Anonymous')
269
for p in Person)
270
271
contact_info = select(coalesce(p.mobile_phone, p.home_phone, p.work_phone)
272
for p in Person)
273
```
274
275
### Complex Analytical Queries
276
277
```python
278
with db_session:
279
# Statistical analysis
280
age_stats = {
281
'count': count(p for p in Person),
282
'min': min(p.age for p in Person),
283
'max': max(p.age for p in Person),
284
'avg': avg(p.age for p in Person),
285
'adults': count(p for p in Person if p.age >= 18)
286
}
287
288
# Sales analysis by period
289
monthly_metrics = select(
290
(o.date.month,
291
count(), # Order count
292
sum(o.total), # Total revenue
293
avg(o.total), # Average order value
294
min(o.total), # Smallest order
295
max(o.total)) # Largest order
296
for o in Order
297
if o.date.year == 2023
298
).group_by(o.date.month)
299
300
# Customer segmentation
301
customer_segments = select(
302
(c.id, c.name,
303
count(c.orders), # Order frequency
304
sum(o.total for o in c.orders), # Total spent
305
avg(o.total for o in c.orders), # Average order value
306
max(o.date for o in c.orders)) # Last order date
307
for c in Customer
308
)
309
310
# Product performance analysis
311
product_performance = select(
312
(p.category,
313
count(distinct(oi.order_id for oi in p.order_items)), # Orders with product
314
sum(oi.quantity for oi in p.order_items), # Total quantity sold
315
sum(oi.price * oi.quantity for oi in p.order_items)) # Total revenue
316
for p in Product
317
).group_by(Product.category)
318
```
319
320
### Distinct Operations and Deduplication
321
322
```python
323
with db_session:
324
# Distinct values
325
unique_cities = distinct(p.city for p in Person if p.city)
326
unique_domains = distinct(p.email.split('@')[1] for p in Person)
327
328
# Distinct with aggregations
329
cities_with_counts = select(
330
(city, count()) for city in distinct(p.city for p in Person)
331
).group_by(city)
332
333
# Complex distinct operations
334
active_customer_cities = distinct(
335
c.address.city for c in Customer
336
if exists(o for o in c.orders if o.date >= recent_date)
337
)
338
339
# Distinct combinations
340
name_age_combinations = distinct(
341
(p.name, p.age) for p in Person
342
)
343
```
344
345
### Window Functions and Advanced Analytics
346
347
```python
348
with db_session:
349
# Ranking within groups (using subqueries)
350
top_salespeople_per_region = select(
351
(s.name, s.region, s.total_sales)
352
for s in Salesperson
353
if s.total_sales == max(sp.total_sales for sp in Salesperson
354
if sp.region == s.region)
355
)
356
357
# Running totals (simplified approach)
358
cumulative_sales = []
359
running_total = 0
360
for order in select(o for o in Order).order_by(Order.date):
361
running_total += order.total
362
cumulative_sales.append((order.date, order.total, running_total))
363
364
# Percentile calculations
365
order_totals = list(select(o.total for o in Order).order_by(Order.total))
366
median_order = order_totals[len(order_totals) // 2]
367
q1_order = order_totals[len(order_totals) // 4]
368
q3_order = order_totals[3 * len(order_totals) // 4]
369
370
# Growth rate calculations
371
monthly_growth = []
372
monthly_revenues = list(select(
373
(o.date.month, sum(o.total))
374
for o in Order if o.date.year == 2023
375
).group_by(o.date.month).order_by(1))
376
377
for i in range(1, len(monthly_revenues)):
378
prev_month = monthly_revenues[i-1][1]
379
curr_month = monthly_revenues[i][1]
380
growth_rate = ((curr_month - prev_month) / prev_month) * 100
381
monthly_growth.append((monthly_revenues[i][0], growth_rate))
382
```
383
384
### Performance Optimization with Aggregations
385
386
```python
387
with db_session:
388
# Efficient counting vs fetching all
389
# Good: Count without fetching
390
user_count = count(p for p in Person if p.active)
391
392
# Bad: Fetch all then count in Python
393
# all_users = list(select(p for p in Person if p.active))
394
# user_count = len(all_users)
395
396
# Efficient aggregation queries
397
summary_stats = select(
398
(count(), sum(o.total), avg(o.total), min(o.total), max(o.total))
399
for o in Order if o.date >= start_date
400
).get() # Single query for multiple aggregations
401
402
# Batch aggregations for different groups
403
regional_stats = select(
404
(c.region, count(), sum(o.total for o in c.orders))
405
for c in Customer
406
).group_by(Customer.region)
407
408
# Use EXISTS for existence checks instead of counting
409
has_recent_orders = exists(o for o in Order
410
if o.date >= recent_date)
411
412
# Efficient distinct counting
413
unique_customer_count = count(distinct(o.customer_id for o in Order))
414
```