0
# Query Building Utilities
1
2
Comprehensive utility modules for constructing Druid queries including aggregations, filters, dimensions, having clauses, and post-aggregations. These utilities provide a declarative API that generates the appropriate JSON structures for Druid's native query format.
3
4
## Capabilities
5
6
### Aggregations
7
8
Aggregation functions that compute metrics during query execution.
9
10
```python { .api }
11
# Numeric aggregations
12
def doublesum(raw_metric: str) -> dict:
13
"""Sum aggregation for double/float values."""
14
15
def longsum(raw_metric: str) -> dict:
16
"""Sum aggregation for long/integer values."""
17
18
def doublemin(raw_metric: str) -> dict:
19
"""Minimum aggregation for double/float values."""
20
21
def doublemax(raw_metric: str) -> dict:
22
"""Maximum aggregation for double/float values."""
23
24
def longmin(raw_metric: str) -> dict:
25
"""Minimum aggregation for long/integer values."""
26
27
def longmax(raw_metric: str) -> dict:
28
"""Maximum aggregation for long/integer values."""
29
30
def count(raw_metric: str) -> dict:
31
"""Count aggregation."""
32
33
# Approximate aggregations
34
def hyperunique(raw_metric: str) -> dict:
35
"""HyperLogLog-based unique count approximation."""
36
37
def cardinality(raw_column: str, by_row: bool = False) -> dict:
38
"""
39
Cardinality estimation using HyperLogLog.
40
41
Parameters:
42
- raw_column: Column to compute cardinality for
43
- by_row: Whether to compute cardinality by row (default: False)
44
"""
45
46
def thetasketch(raw_column: str, isinputthetasketch: bool = False, size: int = 16384) -> dict:
47
"""
48
Theta sketch aggregation for set operations.
49
50
Parameters:
51
- raw_column: Column containing sketch data
52
- isinputthetasketch: Whether input is already a theta sketch
53
- size: Sketch size parameter
54
"""
55
56
def quantilesDoublesSketch(raw_column: str, k: int = 128) -> dict:
57
"""
58
Quantiles sketch aggregation for approximate quantile computation.
59
60
Parameters:
61
- raw_column: Column to compute quantiles for
62
- k: Sketch accuracy parameter (higher = more accurate)
63
"""
64
65
# String aggregations
66
def stringfirst(raw_metric: str) -> dict:
67
"""First string value aggregation."""
68
69
def stringlast(raw_metric: str) -> dict:
70
"""Last string value aggregation."""
71
72
# Conditional aggregation
73
def filtered(filter: 'Filter', agg: dict) -> dict:
74
"""
75
Apply filter condition to aggregation.
76
77
Parameters:
78
- filter: Filter condition to apply
79
- agg: Aggregation to wrap with filter
80
"""
81
82
# Custom aggregation
83
def javascript(columns_list: list, fn_aggregate: str, fn_combine: str, fn_reset: str) -> dict:
84
"""
85
JavaScript-based custom aggregation.
86
87
Parameters:
88
- columns_list: List of column names to aggregate
89
- fn_aggregate: JavaScript function for aggregating individual rows
90
- fn_combine: JavaScript function for combining partial aggregates
91
- fn_reset: JavaScript function for resetting aggregation state
92
"""
93
```
94
95
### Filters
96
97
Filter conditions for limiting query results.
98
99
```python { .api }
100
class Filter:
101
"""Base filter class supporting logical operations."""
102
103
def __init__(self, extraction_function=None, ordering: str = "lexicographic", **args) -> None:
104
"""
105
Initialize filter.
106
107
Parameters:
108
- extraction_function: Optional extraction function to apply
109
- ordering: Sort ordering for bound filters
110
- **args: Filter-specific arguments
111
"""
112
113
def __and__(self, other: 'Filter') -> 'Filter':
114
"""Logical AND operation between filters."""
115
116
def __or__(self, other: 'Filter') -> 'Filter':
117
"""Logical OR operation between filters."""
118
119
def __invert__(self) -> 'Filter':
120
"""Logical NOT operation on filter."""
121
122
class Dimension:
123
"""Helper for creating dimension-based filters."""
124
125
def __init__(self, dim: str) -> None:
126
"""
127
Initialize dimension filter helper.
128
129
Parameters:
130
- dim: Dimension name
131
"""
132
133
def __eq__(self, other) -> Filter:
134
"""Create equality filter (dimension == value)."""
135
136
def __ne__(self, other) -> Filter:
137
"""Create inequality filter (dimension != value)."""
138
139
def in_(self, values: list) -> Filter:
140
"""Create 'in' filter (dimension in [values])."""
141
142
class JavaScript:
143
"""Helper for creating JavaScript-based filters."""
144
145
def __init__(self, dim: str) -> None:
146
"""
147
Initialize JavaScript filter helper.
148
149
Parameters:
150
- dim: Dimension name
151
"""
152
153
def __eq__(self, js_function: str) -> Filter:
154
"""Create JavaScript filter with custom function."""
155
156
class Bound:
157
"""Range-based filter for numeric and string comparisons."""
158
159
def __init__(
160
self,
161
dimension: str,
162
lower: str = None,
163
upper: str = None,
164
lowerStrict: bool = False,
165
upperStrict: bool = False,
166
alphaNumeric: bool = False,
167
ordering: str = "lexicographic",
168
extraction_function = None
169
) -> None:
170
"""
171
Initialize bound filter.
172
173
Parameters:
174
- dimension: Dimension to filter
175
- lower: Lower bound value (optional)
176
- upper: Upper bound value (optional)
177
- lowerStrict: Whether lower bound is exclusive
178
- upperStrict: Whether upper bound is exclusive
179
- alphaNumeric: Whether to use alphanumeric ordering
180
- ordering: Sort ordering type
181
- extraction_function: Optional extraction function
182
"""
183
184
class Interval:
185
"""Time interval filter."""
186
187
def __init__(self, dimension: str, intervals: list, extraction_function = None) -> None:
188
"""
189
Initialize interval filter.
190
191
Parameters:
192
- dimension: Time dimension to filter
193
- intervals: List of ISO-8601 interval strings
194
- extraction_function: Optional extraction function
195
"""
196
197
class Spatial:
198
"""Geographic/spatial filter."""
199
200
def __init__(self, dimension: str, bound_type: str, **args) -> None:
201
"""
202
Initialize spatial filter.
203
204
Parameters:
205
- dimension: Spatial dimension to filter
206
- bound_type: Type of spatial bound ('rectangular', etc.)
207
- **args: Spatial bound parameters
208
"""
209
```
210
211
### Dimensions and Extraction Functions
212
213
Dimension specifications and value transformation functions.
214
215
```python { .api }
216
class DimensionSpec:
217
"""Dimension specification with optional extraction function."""
218
219
def __init__(
220
self,
221
dimension: str,
222
output_name: str,
223
extraction_function = None,
224
filter_spec = None
225
) -> None:
226
"""
227
Initialize dimension specification.
228
229
Parameters:
230
- dimension: Source dimension name
231
- output_name: Output dimension name
232
- extraction_function: Optional value extraction function
233
- filter_spec: Optional dimension filtering specification
234
"""
235
236
# Extraction Functions
237
class RegexExtraction:
238
"""Extract values using regular expressions."""
239
240
def __init__(self, expr: str) -> None:
241
"""
242
Parameters:
243
- expr: Regular expression pattern
244
"""
245
246
class TimeFormatExtraction:
247
"""Extract and format time values."""
248
249
def __init__(self, format: str, locale: str = None, time_zone: str = None) -> None:
250
"""
251
Parameters:
252
- format: Time format string
253
- locale: Locale for formatting (optional)
254
- time_zone: Time zone for formatting (optional)
255
"""
256
257
class MapLookupExtraction:
258
"""Transform values using key-value mapping."""
259
260
def __init__(
261
self,
262
mapping: dict,
263
retain_missing_values: bool = False,
264
replace_missing_values: str = None,
265
injective: bool = False
266
) -> None:
267
"""
268
Parameters:
269
- mapping: Dictionary mapping input values to output values
270
- retain_missing_values: Whether to keep unmapped values
271
- replace_missing_values: Default value for unmapped inputs
272
- injective: Whether the mapping is one-to-one
273
"""
274
```
275
276
### Having Clauses
277
278
Having clauses for filtering grouped query results.
279
280
```python { .api }
281
class Having:
282
"""Having clause builder for groupBy queries."""
283
284
def __init__(self, **args) -> None:
285
"""Initialize having clause with filter conditions."""
286
287
def __and__(self, other: 'Having') -> 'Having':
288
"""Logical AND operation between having clauses."""
289
290
def __or__(self, other: 'Having') -> 'Having':
291
"""Logical OR operation between having clauses."""
292
293
def __invert__(self) -> 'Having':
294
"""Logical NOT operation on having clause."""
295
296
class Aggregation:
297
"""Having clause based on aggregation values."""
298
299
def __init__(self, agg: str) -> None:
300
"""
301
Parameters:
302
- agg: Aggregation name to filter on
303
"""
304
305
def __eq__(self, value) -> Having:
306
"""Create equality having clause (aggregation == value)."""
307
308
def __lt__(self, value) -> Having:
309
"""Create less-than having clause (aggregation < value)."""
310
311
def __gt__(self, value) -> Having:
312
"""Create greater-than having clause (aggregation > value)."""
313
```
314
315
### Post-Aggregations
316
317
Post-aggregation operations computed after initial aggregation.
318
319
```python { .api }
320
class Postaggregator:
321
"""Base post-aggregator with arithmetic operations."""
322
323
def __init__(self, fn: str, fields: list, name: str) -> None:
324
"""
325
Parameters:
326
- fn: Post-aggregation function name
327
- fields: List of field names to operate on
328
- name: Output name for post-aggregation result
329
"""
330
331
def __add__(self, other) -> 'Postaggregator':
332
"""Addition operation."""
333
334
def __sub__(self, other) -> 'Postaggregator':
335
"""Subtraction operation."""
336
337
def __mul__(self, other) -> 'Postaggregator':
338
"""Multiplication operation."""
339
340
def __truediv__(self, other) -> 'Postaggregator':
341
"""Division operation."""
342
343
class Field:
344
"""Access aggregated field values."""
345
346
def __init__(self, name: str) -> None:
347
"""
348
Parameters:
349
- name: Name of aggregated field to access
350
"""
351
352
class Const:
353
"""Constant value for post-aggregation operations."""
354
355
def __init__(self, value: float, output_name: str = None) -> None:
356
"""
357
Parameters:
358
- value: Constant numeric value
359
- output_name: Optional output name
360
"""
361
362
# Sketch-based post-aggregations
363
class ThetaSketch:
364
"""Access theta sketch aggregation results."""
365
366
def __init__(self, name: str) -> None:
367
"""
368
Parameters:
369
- name: Name of theta sketch aggregation
370
"""
371
372
def __and__(self, other: 'ThetaSketch') -> 'ThetaSketchOp':
373
"""Intersection operation between theta sketches."""
374
375
def __or__(self, other: 'ThetaSketch') -> 'ThetaSketchOp':
376
"""Union operation between theta sketches."""
377
378
class ThetaSketchEstimate:
379
"""Estimate cardinality from theta sketch."""
380
381
def __init__(self, fields: 'ThetaSketchOp') -> None:
382
"""
383
Parameters:
384
- fields: Theta sketch operation to estimate
385
"""
386
387
class QuantilesDoublesSketchToQuantile:
388
"""Extract quantile value from quantiles sketch."""
389
390
def __init__(self, name: str, field_name: str, fraction: float) -> None:
391
"""
392
Parameters:
393
- name: Output name for the quantile value
394
- field_name: Name of the quantiles sketch aggregation
395
- fraction: Quantile fraction (0.0 to 1.0)
396
"""
397
398
class Quantile:
399
"""Access quantile value from quantiles aggregation."""
400
401
def __init__(self, name: str, probability: float) -> None:
402
"""
403
Parameters:
404
- name: Name of quantiles aggregation field
405
- probability: Quantile probability (0.0 to 1.0)
406
"""
407
408
class Quantiles:
409
"""Access multiple quantile values from quantiles aggregation."""
410
411
def __init__(self, name: str, probabilities: list) -> None:
412
"""
413
Parameters:
414
- name: Name of quantiles aggregation field
415
- probabilities: List of quantile probabilities (0.0 to 1.0)
416
"""
417
418
class HyperUniqueCardinality:
419
"""Extract cardinality estimate from hyperunique aggregation."""
420
421
def __init__(self, name: str) -> None:
422
"""
423
Parameters:
424
- name: Name of hyperunique aggregation field
425
"""
426
427
class DoubleGreatest:
428
"""Return the greatest value among double post-aggregators."""
429
430
def __init__(self, fields: list, output_name: str = None) -> None:
431
"""
432
Parameters:
433
- fields: List of post-aggregator objects to compare
434
- output_name: Optional output name (defaults to 'doubleGreatest')
435
"""
436
437
class DoubleLeast:
438
"""Return the least value among double post-aggregators."""
439
440
def __init__(self, fields: list, output_name: str = None) -> None:
441
"""
442
Parameters:
443
- fields: List of post-aggregator objects to compare
444
- output_name: Optional output name (defaults to 'doubleLeast')
445
"""
446
447
class LongGreatest:
448
"""Return the greatest value among long post-aggregators."""
449
450
def __init__(self, fields: list, output_name: str = None) -> None:
451
"""
452
Parameters:
453
- fields: List of post-aggregator objects to compare
454
- output_name: Optional output name (defaults to 'longGreatest')
455
"""
456
457
class LongLeast:
458
"""Return the least value among long post-aggregators."""
459
460
def __init__(self, fields: list, output_name: str = None) -> None:
461
"""
462
Parameters:
463
- fields: List of post-aggregator objects to compare
464
- output_name: Optional output name (defaults to 'longLeast')
465
"""
466
467
class ThetaSketchOp:
468
"""Theta sketch set operations for combining sketches."""
469
470
def __init__(self, fn: str, fields: list, name: str) -> None:
471
"""
472
Parameters:
473
- fn: Set operation function ('UNION', 'INTERSECT', 'NOT')
474
- fields: List of theta sketch fields to operate on
475
- name: Output name for the operation result
476
"""
477
478
def __or__(self, other: 'ThetaSketchOp') -> 'ThetaSketchOp':
479
"""Union operation between theta sketches."""
480
481
def __and__(self, other: 'ThetaSketchOp') -> 'ThetaSketchOp':
482
"""Intersection operation between theta sketches."""
483
484
def __ne__(self, other: 'ThetaSketchOp') -> 'ThetaSketchOp':
485
"""Not operation (set difference) between theta sketches."""
486
```
487
488
## Usage Examples
489
490
```python
491
from pydruid.utils.aggregators import doublesum, count, filtered
492
from pydruid.utils.filters import Dimension, Filter
493
from pydruid.utils.postaggregator import Field
494
from pydruid.utils.having import Aggregation
495
496
# Complex filter example
497
filter_condition = (
498
(Dimension('user_lang') == 'en') &
499
(Dimension('first_hashtag') == 'oscars') &
500
~(Dimension('user_mention_name') == 'No Mention')
501
)
502
503
# Aggregations with post-aggregation
504
aggregations = {
505
'total_tweets': count('count'),
506
'total_length': doublesum('tweet_length'),
507
'filtered_tweets': filtered(
508
Dimension('verified') == 'true',
509
count('count')
510
)
511
}
512
513
post_aggregations = {
514
'avg_length': Field('total_length') / Field('total_tweets'),
515
'verified_ratio': Field('filtered_tweets') / Field('total_tweets')
516
}
517
518
# Having clause
519
having_clause = (Aggregation('total_tweets') > 100) & (Aggregation('avg_length') < 200)
520
```