0
# Aggregations
1
2
Statistical analysis and data grouping with comprehensive support for metric aggregations, bucket aggregations, and pipeline aggregations. Enables complex analytical queries, data summarization, and business intelligence operations on Elasticsearch data.
3
4
## Capabilities
5
6
### Aggregation Factory Function
7
8
Main aggregation factory function for creating aggregation objects.
9
10
```python { .api }
11
def A(name, **params):
12
"""
13
Create aggregation object.
14
15
Args:
16
name (str): Aggregation type name
17
**params: Aggregation parameters
18
19
Returns:
20
Aggregation: Aggregation object
21
22
Examples:
23
A('terms', field='category')
24
A('avg', field='price')
25
A('date_histogram', field='timestamp', calendar_interval='1d')
26
"""
27
```
28
29
### Aggregation Container
30
31
Container for managing multiple aggregations and sub-aggregations.
32
33
```python { .api }
34
class Aggs:
35
"""
36
Aggregation container for organizing multiple aggregations.
37
"""
38
39
def bucket(self, name, agg_type, **params):
40
"""
41
Add bucket aggregation.
42
43
Args:
44
name (str): Aggregation name
45
agg_type (str): Bucket aggregation type
46
**params: Aggregation parameters
47
48
Returns:
49
Aggs: Sub-aggregation container
50
"""
51
52
def metric(self, name, agg_type, **params):
53
"""
54
Add metric aggregation.
55
56
Args:
57
name (str): Aggregation name
58
agg_type (str): Metric aggregation type
59
**params: Aggregation parameters
60
61
Returns:
62
Aggs: Current aggregation container
63
"""
64
65
def pipeline(self, name, agg_type, **params):
66
"""
67
Add pipeline aggregation.
68
69
Args:
70
name (str): Aggregation name
71
agg_type (str): Pipeline aggregation type
72
**params: Aggregation parameters
73
74
Returns:
75
Aggs: Current aggregation container
76
"""
77
78
def to_dict(self):
79
"""
80
Convert aggregations to dictionary.
81
82
Returns:
83
dict: Aggregations as dictionary
84
"""
85
```
86
87
### Metric Aggregations
88
89
Aggregations that compute metrics over a set of documents.
90
91
```python { .api }
92
class Avg:
93
"""
94
Average aggregation.
95
"""
96
def __init__(self, field=None, script=None, **kwargs):
97
"""
98
Args:
99
field (str, optional): Field to compute average for
100
script (dict, optional): Script to compute values
101
**kwargs: Additional parameters
102
103
Parameters:
104
missing (float): Value for documents missing the field
105
format (str): Output format for the value
106
"""
107
108
class Max:
109
"""
110
Maximum value aggregation.
111
"""
112
def __init__(self, field=None, script=None, **kwargs):
113
"""Args and parameters same as Avg."""
114
115
class Min:
116
"""
117
Minimum value aggregation.
118
"""
119
def __init__(self, field=None, script=None, **kwargs):
120
"""Args and parameters same as Avg."""
121
122
class Sum:
123
"""
124
Sum aggregation.
125
"""
126
def __init__(self, field=None, script=None, **kwargs):
127
"""Args and parameters same as Avg."""
128
129
class Stats:
130
"""
131
Basic statistics aggregation (count, min, max, avg, sum).
132
"""
133
def __init__(self, field=None, script=None, **kwargs):
134
"""Args and parameters same as Avg."""
135
136
class ExtendedStats:
137
"""
138
Extended statistics aggregation (includes variance, std deviation, etc.).
139
"""
140
def __init__(self, field=None, script=None, **kwargs):
141
"""
142
Args and parameters same as Avg, plus:
143
sigma (float): Standard deviations for bounds calculation
144
"""
145
146
class ValueCount:
147
"""
148
Count of values aggregation.
149
"""
150
def __init__(self, field=None, script=None, **kwargs):
151
"""Args and parameters same as Avg."""
152
153
class Cardinality:
154
"""
155
Cardinality (unique count) approximation aggregation.
156
"""
157
def __init__(self, field=None, script=None, **kwargs):
158
"""
159
Args and parameters same as Avg, plus:
160
precision_threshold (int): Precision threshold for accuracy/memory tradeoff
161
"""
162
163
class Percentiles:
164
"""
165
Percentiles aggregation.
166
"""
167
def __init__(self, field=None, script=None, **kwargs):
168
"""
169
Args and parameters same as Avg, plus:
170
percents (list): List of percentiles to calculate (default: [1,5,25,50,75,95,99])
171
compression (int): Compression parameter for TDigest algorithm
172
method (str): Algorithm to use ('tdigest' or 'hdr')
173
"""
174
175
class PercentileRanks:
176
"""
177
Percentile ranks aggregation.
178
"""
179
def __init__(self, field=None, values=None, **kwargs):
180
"""
181
Args:
182
field (str, optional): Field to compute percentile ranks for
183
values (list): Values to find percentile ranks for
184
**kwargs: Additional parameters same as Percentiles
185
"""
186
187
class GeoBounds:
188
"""
189
Geographic bounding box aggregation.
190
"""
191
def __init__(self, field, **kwargs):
192
"""
193
Args:
194
field (str): Geographic field
195
**kwargs: Additional parameters
196
197
Parameters:
198
wrap_longitude (bool): Wrap longitude values
199
"""
200
201
class GeoCentroid:
202
"""
203
Geographic centroid aggregation.
204
"""
205
def __init__(self, field, **kwargs):
206
"""
207
Args:
208
field (str): Geographic field
209
**kwargs: Additional parameters
210
"""
211
212
class WeightedAvg:
213
"""
214
Weighted average aggregation.
215
"""
216
def __init__(self, value=None, weight=None, **kwargs):
217
"""
218
Args:
219
value (dict): Value configuration (field or script)
220
weight (dict): Weight configuration (field or script)
221
**kwargs: Additional parameters
222
223
Parameters:
224
format (str): Output format for the value
225
"""
226
227
class ScriptedMetric:
228
"""
229
Scripted metric aggregation for custom calculations.
230
"""
231
def __init__(self, **kwargs):
232
"""
233
Args:
234
**kwargs: Scripted metric parameters
235
236
Parameters:
237
init_script (dict): Initialization script
238
map_script (dict): Map script (executed per document)
239
combine_script (dict): Combine script (executed per shard)
240
reduce_script (dict): Reduce script (executed on coordinator)
241
params (dict): Script parameters
242
"""
243
244
class TopHits:
245
"""
246
Top hits aggregation for retrieving sample documents.
247
"""
248
def __init__(self, **kwargs):
249
"""
250
Args:
251
**kwargs: Top hits parameters
252
253
Parameters:
254
from_ (int): Starting offset
255
size (int): Number of hits to return
256
sort (list): Sort configuration
257
_source (dict): Source field filtering
258
highlight (dict): Highlighting configuration
259
explain (bool): Include explanation
260
version (bool): Include document version
261
seq_no_primary_term (bool): Include sequence number and primary term
262
stored_fields (list): Stored fields to retrieve
263
docvalue_fields (list): Doc value fields to retrieve
264
script_fields (dict): Script fields to compute
265
"""
266
```
267
268
### Bucket Aggregations
269
270
Aggregations that group documents into buckets.
271
272
```python { .api }
273
class Terms:
274
"""
275
Terms aggregation for grouping by field values.
276
"""
277
def __init__(self, field=None, script=None, **kwargs):
278
"""
279
Args:
280
field (str, optional): Field to group by
281
script (dict, optional): Script to generate terms
282
**kwargs: Terms aggregation parameters
283
284
Parameters:
285
size (int): Number of buckets to return (default: 10)
286
shard_size (int): Number of buckets per shard
287
show_term_doc_count_error (bool): Show document count error
288
order (dict): Sort order for buckets
289
min_doc_count (int): Minimum document count per bucket
290
shard_min_doc_count (int): Minimum document count per shard
291
include (str or list): Include terms pattern/list
292
exclude (str or list): Exclude terms pattern/list
293
missing (str): Value for documents missing the field
294
execution_hint (str): Execution hint ('map' or 'global_ordinals')
295
collect_mode (str): Collection mode ('depth_first' or 'breadth_first')
296
"""
297
298
class Histogram:
299
"""
300
Histogram aggregation for numeric ranges.
301
"""
302
def __init__(self, field=None, interval=None, **kwargs):
303
"""
304
Args:
305
field (str, optional): Numeric field to histogram
306
interval (float): Histogram interval
307
**kwargs: Histogram parameters
308
309
Parameters:
310
min_doc_count (int): Minimum document count per bucket
311
extended_bounds (dict): Extended bounds (min, max)
312
hard_bounds (dict): Hard bounds (min, max)
313
order (dict): Sort order for buckets
314
keyed (bool): Return buckets as hash instead of array
315
missing (float): Value for documents missing the field
316
"""
317
318
class DateHistogram:
319
"""
320
Date histogram aggregation for date ranges.
321
"""
322
def __init__(self, field=None, **kwargs):
323
"""
324
Args:
325
field (str, optional): Date field to histogram
326
**kwargs: Date histogram parameters
327
328
Parameters:
329
calendar_interval (str): Calendar-aware interval ('1d', '1w', '1M', etc.)
330
fixed_interval (str): Fixed interval ('60s', '1h', etc.)
331
interval (str): Deprecated interval parameter
332
time_zone (str): Time zone for bucketing
333
offset (str): Offset for bucket boundaries
334
format (str): Date format for bucket keys
335
min_doc_count (int): Minimum document count per bucket
336
extended_bounds (dict): Extended bounds
337
hard_bounds (dict): Hard bounds
338
order (dict): Sort order for buckets
339
keyed (bool): Return buckets as hash instead of array
340
missing (str): Value for documents missing the field
341
"""
342
343
class AutoDateHistogram:
344
"""
345
Auto-interval date histogram aggregation.
346
"""
347
def __init__(self, field, buckets=None, **kwargs):
348
"""
349
Args:
350
field (str): Date field to histogram
351
buckets (int, optional): Target number of buckets
352
**kwargs: Parameters same as DateHistogram plus:
353
minimum_interval (str): Minimum allowed interval
354
"""
355
356
class Range:
357
"""
358
Range aggregation for custom numeric ranges.
359
"""
360
def __init__(self, field=None, ranges=None, **kwargs):
361
"""
362
Args:
363
field (str, optional): Numeric field for ranges
364
ranges (list): List of range definitions
365
**kwargs: Range parameters
366
367
Parameters:
368
script (dict): Script to generate values
369
keyed (bool): Return buckets as hash instead of array
370
missing (float): Value for documents missing the field
371
372
Range format: {'from': 0, 'to': 100, 'key': 'low'}
373
"""
374
375
class DateRange:
376
"""
377
Date range aggregation.
378
"""
379
def __init__(self, field=None, ranges=None, **kwargs):
380
"""
381
Args and parameters same as Range, plus:
382
format (str): Date format for range boundaries
383
time_zone (str): Time zone for date ranges
384
"""
385
386
class IpRange:
387
"""
388
IP address range aggregation.
389
"""
390
def __init__(self, field=None, ranges=None, **kwargs):
391
"""
392
Args:
393
field (str, optional): IP field for ranges
394
ranges (list): List of IP range definitions
395
**kwargs: Parameters same as Range
396
397
Range format: {'from': '192.168.1.0', 'to': '192.168.1.255'}
398
or {'mask': '192.168.1.0/24'}
399
"""
400
401
class GeoDistance:
402
"""
403
Geographic distance bucket aggregation.
404
"""
405
def __init__(self, field, origin, ranges=None, **kwargs):
406
"""
407
Args:
408
field (str): Geographic field
409
origin (dict): Origin point for distance calculation
410
ranges (list): List of distance range definitions
411
**kwargs: Geographic distance parameters
412
413
Parameters:
414
unit (str): Distance unit ('m', 'km', 'mi', etc.)
415
distance_type (str): Distance calculation type
416
keyed (bool): Return buckets as hash instead of array
417
418
Range format: {'from': 0, 'to': 100, 'key': 'near'}
419
"""
420
421
class GeoHashGrid:
422
"""
423
Geohash grid aggregation.
424
"""
425
def __init__(self, field, precision=None, **kwargs):
426
"""
427
Args:
428
field (str): Geographic field
429
precision (int, optional): Geohash precision level
430
**kwargs: Geohash grid parameters
431
432
Parameters:
433
size (int): Maximum number of buckets
434
shard_size (int): Maximum buckets per shard
435
bounds (dict): Bounding box for grid
436
"""
437
438
class GeoTileGrid:
439
"""
440
Geo-tile grid aggregation.
441
"""
442
def __init__(self, field, precision=None, **kwargs):
443
"""
444
Args:
445
field (str): Geographic field
446
precision (int, optional): Tile zoom level precision
447
**kwargs: Parameters same as GeoHashGrid
448
"""
449
450
class Filter:
451
"""
452
Filter aggregation.
453
"""
454
def __init__(self, filter=None, **kwargs):
455
"""
456
Args:
457
filter (Query): Filter query
458
**kwargs: Additional parameters
459
"""
460
461
class Filters:
462
"""
463
Multiple filters aggregation.
464
"""
465
def __init__(self, filters=None, **kwargs):
466
"""
467
Args:
468
filters (dict): Named filters mapping
469
**kwargs: Filters parameters
470
471
Parameters:
472
other_bucket (bool): Include other bucket for unmatched docs
473
other_bucket_key (str): Key for other bucket
474
"""
475
476
class Missing:
477
"""
478
Missing values aggregation.
479
"""
480
def __init__(self, field, **kwargs):
481
"""
482
Args:
483
field (str): Field to check for missing values
484
**kwargs: Additional parameters
485
"""
486
487
class Nested:
488
"""
489
Nested aggregation for nested objects.
490
"""
491
def __init__(self, path, **kwargs):
492
"""
493
Args:
494
path (str): Path to nested object
495
**kwargs: Additional parameters
496
"""
497
498
class ReverseNested:
499
"""
500
Reverse nested aggregation.
501
"""
502
def __init__(self, path=None, **kwargs):
503
"""
504
Args:
505
path (str, optional): Path to reverse to (default: root)
506
**kwargs: Additional parameters
507
"""
508
509
class Global:
510
"""
511
Global aggregation ignoring query context.
512
"""
513
def __init__(self, **kwargs):
514
"""
515
Args:
516
**kwargs: Additional parameters
517
"""
518
519
class Sampler:
520
"""
521
Sampler aggregation for sampling documents.
522
"""
523
def __init__(self, shard_size=None, **kwargs):
524
"""
525
Args:
526
shard_size (int, optional): Sample size per shard
527
**kwargs: Additional parameters
528
"""
529
530
class DiversifiedSampler:
531
"""
532
Diversified sampler aggregation.
533
"""
534
def __init__(self, field=None, shard_size=None, **kwargs):
535
"""
536
Args:
537
field (str, optional): Field to diversify on
538
shard_size (int, optional): Sample size per shard
539
**kwargs: Parameters same as Sampler plus:
540
max_docs_per_value (int): Max docs per field value
541
execution_hint (str): Execution hint
542
"""
543
544
class SignificantTerms:
545
"""
546
Significant terms aggregation.
547
"""
548
def __init__(self, field=None, **kwargs):
549
"""
550
Args:
551
field (str, optional): Field to find significant terms in
552
**kwargs: Significant terms parameters
553
554
Parameters:
555
size (int): Number of terms to return
556
shard_size (int): Number of terms per shard
557
min_doc_count (int): Minimum document count
558
shard_min_doc_count (int): Minimum document count per shard
559
chi_square (dict): Chi square significance test
560
gnd (dict): Google normalized distance test
561
mutual_information (dict): Mutual information test
562
percentage (dict): Percentage test
563
script_heuristic (dict): Custom script test
564
background_filter (Query): Background filter
565
include (str or list): Include terms pattern/list
566
exclude (str or list): Exclude terms pattern/list
567
execution_hint (str): Execution hint
568
"""
569
570
class RareTerms:
571
"""
572
Rare terms aggregation.
573
"""
574
def __init__(self, field=None, **kwargs):
575
"""
576
Args:
577
field (str, optional): Field to find rare terms in
578
**kwargs: Rare terms parameters
579
580
Parameters:
581
max_doc_count (int): Maximum document count for rare terms
582
precision (float): Precision for rarity calculation
583
include (str or list): Include terms pattern/list
584
exclude (str or list): Exclude terms pattern/list
585
missing (str): Value for documents missing the field
586
"""
587
588
class Composite:
589
"""
590
Composite aggregation for pagination of bucket aggregations.
591
"""
592
def __init__(self, sources, **kwargs):
593
"""
594
Args:
595
sources (list): List of source configurations
596
**kwargs: Composite parameters
597
598
Parameters:
599
size (int): Number of buckets to return
600
after (dict): After key for pagination
601
"""
602
603
class MultiTerms:
604
"""
605
Multi-field terms aggregation.
606
"""
607
def __init__(self, terms, **kwargs):
608
"""
609
Args:
610
terms (list): List of term configurations
611
**kwargs: Multi-terms parameters
612
613
Parameters:
614
size (int): Number of buckets to return
615
shard_size (int): Number of buckets per shard
616
show_term_doc_count_error (bool): Show document count error
617
order (dict): Sort order for buckets
618
min_doc_count (int): Minimum document count per bucket
619
shard_min_doc_count (int): Minimum document count per shard
620
"""
621
622
class Adjacency:
623
"""
624
Adjacency matrix aggregation.
625
"""
626
def __init__(self, filters=None, **kwargs):
627
"""
628
Args:
629
filters (dict): Named filters for adjacency matrix
630
**kwargs: Adjacency parameters
631
632
Parameters:
633
separator (str): Separator for bucket keys
634
"""
635
636
class Parent:
637
"""
638
Parent aggregation for parent-child relationships.
639
"""
640
def __init__(self, type, **kwargs):
641
"""
642
Args:
643
type (str): Child document type
644
**kwargs: Parent aggregation parameters
645
"""
646
647
class Children:
648
"""
649
Children aggregation for parent-child relationships.
650
"""
651
def __init__(self, type, **kwargs):
652
"""
653
Args:
654
type (str): Child document type
655
**kwargs: Children aggregation parameters
656
"""
657
658
class VariableWidthHistogram:
659
"""
660
Variable width histogram aggregation.
661
"""
662
def __init__(self, field=None, buckets=None, **kwargs):
663
"""
664
Args:
665
field (str, optional): Field to histogram
666
buckets (int, optional): Target number of buckets
667
**kwargs: Variable width histogram parameters
668
669
Parameters:
670
shard_size (int): Shard size for sampling
671
initial_buffer (int): Initial buffer size
672
"""
673
674
class CategorizeText:
675
"""
676
Categorize text aggregation for ML-based text categorization.
677
"""
678
def __init__(self, field, **kwargs):
679
"""
680
Args:
681
field (str): Text field to categorize
682
**kwargs: Categorize text parameters
683
684
Parameters:
685
max_unique_tokens (int): Maximum unique tokens
686
max_matched_tokens (int): Maximum matched tokens per category
687
similarity_threshold (float): Similarity threshold for categorization
688
categorization_filters (list): Filters for categorization
689
"""
690
```
691
692
### Pipeline Aggregations
693
694
Aggregations that process the output of other aggregations.
695
696
```python { .api }
697
class AvgBucket:
698
"""
699
Average bucket pipeline aggregation.
700
"""
701
def __init__(self, buckets_path, **kwargs):
702
"""
703
Args:
704
buckets_path (str): Path to buckets to average
705
**kwargs: Pipeline parameters
706
707
Parameters:
708
gap_policy (str): Policy for data gaps ('skip' or 'insert_zeros')
709
format (str): Output format
710
"""
711
712
class MaxBucket:
713
"""
714
Max bucket pipeline aggregation.
715
"""
716
def __init__(self, buckets_path, **kwargs):
717
"""Args and parameters same as AvgBucket."""
718
719
class MinBucket:
720
"""
721
Min bucket pipeline aggregation.
722
"""
723
def __init__(self, buckets_path, **kwargs):
724
"""Args and parameters same as AvgBucket."""
725
726
class SumBucket:
727
"""
728
Sum bucket pipeline aggregation.
729
"""
730
def __init__(self, buckets_path, **kwargs):
731
"""Args and parameters same as AvgBucket."""
732
733
class StatsBucket:
734
"""
735
Stats bucket pipeline aggregation.
736
"""
737
def __init__(self, buckets_path, **kwargs):
738
"""Args and parameters same as AvgBucket."""
739
740
class ExtendedStatsBucket:
741
"""
742
Extended stats bucket pipeline aggregation.
743
"""
744
def __init__(self, buckets_path, **kwargs):
745
"""
746
Args and parameters same as AvgBucket, plus:
747
sigma (float): Standard deviations for bounds
748
"""
749
750
class PercentilesBucket:
751
"""
752
Percentiles bucket pipeline aggregation.
753
"""
754
def __init__(self, buckets_path, **kwargs):
755
"""
756
Args and parameters same as AvgBucket, plus:
757
percents (list): Percentiles to calculate
758
"""
759
760
class MovingAvg:
761
"""
762
Moving average pipeline aggregation.
763
"""
764
def __init__(self, buckets_path, **kwargs):
765
"""
766
Args:
767
buckets_path (str): Path to values for moving average
768
**kwargs: Moving average parameters
769
770
Parameters:
771
window (int): Size of moving window
772
model (str): Moving average model ('simple', 'linear', 'ewma', 'holt', 'holt_winters')
773
gap_policy (str): Policy for data gaps
774
predict (int): Number of predictions to make
775
settings (dict): Model-specific settings
776
"""
777
778
class MovingFn:
779
"""
780
Moving function pipeline aggregation.
781
"""
782
def __init__(self, buckets_path, script, window, **kwargs):
783
"""
784
Args:
785
buckets_path (str): Path to values for moving function
786
script (dict): Script to execute
787
window (int): Size of moving window
788
**kwargs: Parameters same as MovingAvg
789
"""
790
791
class Derivative:
792
"""
793
Derivative pipeline aggregation.
794
"""
795
def __init__(self, buckets_path, **kwargs):
796
"""
797
Args:
798
buckets_path (str): Path to values for derivative
799
**kwargs: Derivative parameters
800
801
Parameters:
802
gap_policy (str): Policy for data gaps
803
format (str): Output format
804
unit (str): Unit for derivative calculation
805
"""
806
807
class SerialDiff:
808
"""
809
Serial differencing pipeline aggregation.
810
"""
811
def __init__(self, buckets_path, **kwargs):
812
"""
813
Args:
814
buckets_path (str): Path to values for differencing
815
**kwargs: Parameters same as Derivative plus:
816
lag (int): Lag for differencing calculation
817
"""
818
819
class CumulativeSum:
820
"""
821
Cumulative sum pipeline aggregation.
822
"""
823
def __init__(self, buckets_path, **kwargs):
824
"""
825
Args:
826
buckets_path (str): Path to values for cumulative sum
827
**kwargs: Parameters same as Derivative
828
"""
829
830
class BucketScript:
831
"""
832
Bucket script pipeline aggregation.
833
"""
834
def __init__(self, buckets_path=None, script=None, **kwargs):
835
"""
836
Args:
837
buckets_path (dict): Named paths to bucket values
838
script (dict): Script to execute
839
**kwargs: Bucket script parameters
840
841
Parameters:
842
gap_policy (str): Policy for data gaps
843
format (str): Output format
844
"""
845
846
class BucketSelector:
847
"""
848
Bucket selector pipeline aggregation.
849
"""
850
def __init__(self, buckets_path=None, script=None, **kwargs):
851
"""
852
Args:
853
buckets_path (dict): Named paths to bucket values
854
script (dict): Script to execute (should return boolean)
855
**kwargs: Parameters same as BucketScript
856
"""
857
858
class BucketSort:
859
"""
860
Bucket sort pipeline aggregation.
861
"""
862
def __init__(self, **kwargs):
863
"""
864
Args:
865
**kwargs: Bucket sort parameters
866
867
Parameters:
868
sort (list): Sort configuration
869
from_ (int): Starting offset
870
size (int): Number of buckets to return
871
gap_policy (str): Policy for data gaps
872
"""
873
```
874
875
## Usage Examples
876
877
### Basic Metric Aggregations
878
879
```python
880
from elasticsearch_dsl import Search, A
881
882
# Simple metric aggregations
883
search = Search(index='sales')
884
search.aggs.metric('avg_price', 'avg', field='price')
885
search.aggs.metric('total_revenue', 'sum', field='revenue')
886
search.aggs.metric('price_stats', 'stats', field='price')
887
888
response = search.execute()
889
print(f"Average price: {response.aggregations.avg_price.value}")
890
print(f"Total revenue: {response.aggregations.total_revenue.value}")
891
print(f"Price stats: {response.aggregations.price_stats}")
892
```
893
894
### Bucket Aggregations with Metrics
895
896
```python
897
# Terms aggregation with nested metrics
898
search = Search(index='sales')
899
category_agg = search.aggs.bucket('categories', 'terms', field='category', size=10)
900
category_agg.metric('avg_price', 'avg', field='price')
901
category_agg.metric('total_sales', 'sum', field='quantity')
902
903
response = search.execute()
904
for bucket in response.aggregations.categories.buckets:
905
print(f"Category: {bucket.key}")
906
print(f" Average price: {bucket.avg_price.value}")
907
print(f" Total sales: {bucket.total_sales.value}")
908
```
909
910
### Date Histogram with Multiple Metrics
911
912
```python
913
# Date histogram with multiple metrics
914
search = Search(index='sales')
915
date_agg = search.aggs.bucket(
916
'sales_over_time',
917
'date_histogram',
918
field='date',
919
calendar_interval='1d',
920
time_zone='America/New_York'
921
)
922
date_agg.metric('daily_revenue', 'sum', field='revenue')
923
date_agg.metric('daily_orders', 'value_count', field='order_id')
924
date_agg.metric('avg_order_value', 'avg', field='order_value')
925
926
response = search.execute()
927
for bucket in response.aggregations.sales_over_time.buckets:
928
print(f"Date: {bucket.key_as_string}")
929
print(f" Revenue: ${bucket.daily_revenue.value:.2f}")
930
print(f" Orders: {bucket.daily_orders.value}")
931
print(f" AOV: ${bucket.avg_order_value.value:.2f}")
932
```
933
934
### Complex Nested Aggregations
935
936
```python
937
# Multi-level nested aggregations
938
search = Search(index='sales')
939
940
# Top level: group by category
941
category_agg = search.aggs.bucket('categories', 'terms', field='category')
942
943
# Second level: group by date within each category
944
date_agg = category_agg.bucket(
945
'monthly_sales',
946
'date_histogram',
947
field='date',
948
calendar_interval='1M'
949
)
950
951
# Third level: metrics within each date bucket
952
date_agg.metric('revenue', 'sum', field='revenue')
953
date_agg.metric('avg_price', 'avg', field='price')
954
955
# Also add category-level metrics
956
category_agg.metric('total_category_revenue', 'sum', field='revenue')
957
958
response = search.execute()
959
for category in response.aggregations.categories.buckets:
960
print(f"Category: {category.key}")
961
print(f"Total revenue: ${category.total_category_revenue.value:.2f}")
962
963
for month in category.monthly_sales.buckets:
964
print(f" {month.key_as_string}: ${month.revenue.value:.2f}")
965
```
966
967
### Pipeline Aggregations
968
969
```python
970
# Pipeline aggregations for trend analysis
971
search = Search(index='sales')
972
973
# Base date histogram
974
date_agg = search.aggs.bucket(
975
'sales_over_time',
976
'date_histogram',
977
field='date',
978
calendar_interval='1d'
979
)
980
date_agg.metric('daily_sales', 'sum', field='revenue')
981
982
# Pipeline aggregations
983
search.aggs.pipeline(
984
'avg_daily_sales',
985
'avg_bucket',
986
buckets_path='sales_over_time>daily_sales'
987
)
988
989
search.aggs.pipeline(
990
'sales_derivative',
991
'derivative',
992
buckets_path='sales_over_time>daily_sales'
993
)
994
995
search.aggs.pipeline(
996
'cumulative_sales',
997
'cumulative_sum',
998
buckets_path='sales_over_time>daily_sales'
999
)
1000
1001
response = search.execute()
1002
print(f"Average daily sales: ${response.aggregations.avg_daily_sales.value:.2f}")
1003
1004
for bucket in response.aggregations.sales_over_time.buckets:
1005
print(f"Date: {bucket.key_as_string}")
1006
print(f" Daily sales: ${bucket.daily_sales.value:.2f}")
1007
1008
if hasattr(bucket, 'sales_derivative'):
1009
print(f" Change: ${bucket.sales_derivative.value:.2f}")
1010
1011
if hasattr(bucket, 'cumulative_sales'):
1012
print(f" Cumulative: ${bucket.cumulative_sales.value:.2f}")
1013
```
1014
1015
### Advanced Aggregation Patterns
1016
1017
```python
1018
# Significant terms with background filter
1019
search = Search(index='reviews')
1020
search = search.filter('range', rating={'lt': 3}) # Low rated reviews
1021
1022
search.aggs.bucket(
1023
'negative_review_terms',
1024
'significant_terms',
1025
field='review_text',
1026
size=20,
1027
min_doc_count=5,
1028
background_filter=Q('match_all')
1029
)
1030
1031
# Percentiles aggregation
1032
search.aggs.metric(
1033
'response_time_percentiles',
1034
'percentiles',
1035
field='response_time_ms',
1036
percents=[50, 90, 95, 99]
1037
)
1038
1039
# Geo distance aggregation
1040
search.aggs.bucket(
1041
'distance_from_center',
1042
'geo_distance',
1043
field='location',
1044
origin={'lat': 40.7128, 'lon': -74.0060},
1045
ranges=[
1046
{'to': 1000, 'key': 'nearby'},
1047
{'from': 1000, 'to': 5000, 'key': 'close'},
1048
{'from': 5000, 'key': 'far'}
1049
],
1050
unit='m'
1051
)
1052
1053
response = search.execute()
1054
```