or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

asynchronous-client.mdcommand-line-interface.mddatabase-api.mdindex.mdquery-utilities.mdsqlalchemy-integration.mdsynchronous-client.md

query-utilities.mddocs/

0

# Query Building Utilities

1

2

Comprehensive utility modules for constructing Druid queries including aggregations, filters, dimensions, having clauses, and post-aggregations. These utilities provide a declarative API that generates the appropriate JSON structures for Druid's native query format.

3

4

## Capabilities

5

6

### Aggregations

7

8

Aggregation functions that compute metrics during query execution.

9

10

```python { .api }

11

# Numeric aggregations

12

def doublesum(raw_metric: str) -> dict:

13

"""Sum aggregation for double/float values."""

14

15

def longsum(raw_metric: str) -> dict:

16

"""Sum aggregation for long/integer values."""

17

18

def doublemin(raw_metric: str) -> dict:

19

"""Minimum aggregation for double/float values."""

20

21

def doublemax(raw_metric: str) -> dict:

22

"""Maximum aggregation for double/float values."""

23

24

def longmin(raw_metric: str) -> dict:

25

"""Minimum aggregation for long/integer values."""

26

27

def longmax(raw_metric: str) -> dict:

28

"""Maximum aggregation for long/integer values."""

29

30

def count(raw_metric: str) -> dict:

31

"""Count aggregation."""

32

33

# Approximate aggregations

34

def hyperunique(raw_metric: str) -> dict:

35

"""HyperLogLog-based unique count approximation."""

36

37

def cardinality(raw_column: str, by_row: bool = False) -> dict:

38

"""

39

Cardinality estimation using HyperLogLog.

40

41

Parameters:

42

- raw_column: Column to compute cardinality for

43

- by_row: Whether to compute cardinality by row (default: False)

44

"""

45

46

def thetasketch(raw_column: str, isinputthetasketch: bool = False, size: int = 16384) -> dict:

47

"""

48

Theta sketch aggregation for set operations.

49

50

Parameters:

51

- raw_column: Column containing sketch data

52

- isinputthetasketch: Whether input is already a theta sketch

53

- size: Sketch size parameter

54

"""

55

56

def quantilesDoublesSketch(raw_column: str, k: int = 128) -> dict:

57

"""

58

Quantiles sketch aggregation for approximate quantile computation.

59

60

Parameters:

61

- raw_column: Column to compute quantiles for

62

- k: Sketch accuracy parameter (higher = more accurate)

63

"""

64

65

# String aggregations

66

def stringfirst(raw_metric: str) -> dict:

67

"""First string value aggregation."""

68

69

def stringlast(raw_metric: str) -> dict:

70

"""Last string value aggregation."""

71

72

# Conditional aggregation

73

def filtered(filter: 'Filter', agg: dict) -> dict:

74

"""

75

Apply filter condition to aggregation.

76

77

Parameters:

78

- filter: Filter condition to apply

79

- agg: Aggregation to wrap with filter

80

"""

81

82

# Custom aggregation

83

def javascript(columns_list: list, fn_aggregate: str, fn_combine: str, fn_reset: str) -> dict:

84

"""

85

JavaScript-based custom aggregation.

86

87

Parameters:

88

- columns_list: List of column names to aggregate

89

- fn_aggregate: JavaScript function for aggregating individual rows

90

- fn_combine: JavaScript function for combining partial aggregates

91

- fn_reset: JavaScript function for resetting aggregation state

92

"""

93

```

94

95

### Filters

96

97

Filter conditions for limiting query results.

98

99

```python { .api }

100

class Filter:

101

"""Base filter class supporting logical operations."""

102

103

def __init__(self, extraction_function=None, ordering: str = "lexicographic", **args) -> None:

104

"""

105

Initialize filter.

106

107

Parameters:

108

- extraction_function: Optional extraction function to apply

109

- ordering: Sort ordering for bound filters

110

- **args: Filter-specific arguments

111

"""

112

113

def __and__(self, other: 'Filter') -> 'Filter':

114

"""Logical AND operation between filters."""

115

116

def __or__(self, other: 'Filter') -> 'Filter':

117

"""Logical OR operation between filters."""

118

119

def __invert__(self) -> 'Filter':

120

"""Logical NOT operation on filter."""

121

122

class Dimension:

123

"""Helper for creating dimension-based filters."""

124

125

def __init__(self, dim: str) -> None:

126

"""

127

Initialize dimension filter helper.

128

129

Parameters:

130

- dim: Dimension name

131

"""

132

133

def __eq__(self, other) -> Filter:

134

"""Create equality filter (dimension == value)."""

135

136

def __ne__(self, other) -> Filter:

137

"""Create inequality filter (dimension != value)."""

138

139

def in_(self, values: list) -> Filter:

140

"""Create 'in' filter (dimension in [values])."""

141

142

class JavaScript:

143

"""Helper for creating JavaScript-based filters."""

144

145

def __init__(self, dim: str) -> None:

146

"""

147

Initialize JavaScript filter helper.

148

149

Parameters:

150

- dim: Dimension name

151

"""

152

153

def __eq__(self, js_function: str) -> Filter:

154

"""Create JavaScript filter with custom function."""

155

156

class Bound:

157

"""Range-based filter for numeric and string comparisons."""

158

159

def __init__(

160

self,

161

dimension: str,

162

lower: str = None,

163

upper: str = None,

164

lowerStrict: bool = False,

165

upperStrict: bool = False,

166

alphaNumeric: bool = False,

167

ordering: str = "lexicographic",

168

extraction_function = None

169

) -> None:

170

"""

171

Initialize bound filter.

172

173

Parameters:

174

- dimension: Dimension to filter

175

- lower: Lower bound value (optional)

176

- upper: Upper bound value (optional)

177

- lowerStrict: Whether lower bound is exclusive

178

- upperStrict: Whether upper bound is exclusive

179

- alphaNumeric: Whether to use alphanumeric ordering

180

- ordering: Sort ordering type

181

- extraction_function: Optional extraction function

182

"""

183

184

class Interval:

185

"""Time interval filter."""

186

187

def __init__(self, dimension: str, intervals: list, extraction_function = None) -> None:

188

"""

189

Initialize interval filter.

190

191

Parameters:

192

- dimension: Time dimension to filter

193

- intervals: List of ISO-8601 interval strings

194

- extraction_function: Optional extraction function

195

"""

196

197

class Spatial:

198

"""Geographic/spatial filter."""

199

200

def __init__(self, dimension: str, bound_type: str, **args) -> None:

201

"""

202

Initialize spatial filter.

203

204

Parameters:

205

- dimension: Spatial dimension to filter

206

- bound_type: Type of spatial bound ('rectangular', etc.)

207

- **args: Spatial bound parameters

208

"""

209

```

210

211

### Dimensions and Extraction Functions

212

213

Dimension specifications and value transformation functions.

214

215

```python { .api }

216

class DimensionSpec:

217

"""Dimension specification with optional extraction function."""

218

219

def __init__(

220

self,

221

dimension: str,

222

output_name: str,

223

extraction_function = None,

224

filter_spec = None

225

) -> None:

226

"""

227

Initialize dimension specification.

228

229

Parameters:

230

- dimension: Source dimension name

231

- output_name: Output dimension name

232

- extraction_function: Optional value extraction function

233

- filter_spec: Optional dimension filtering specification

234

"""

235

236

# Extraction Functions

237

class RegexExtraction:

238

"""Extract values using regular expressions."""

239

240

def __init__(self, expr: str) -> None:

241

"""

242

Parameters:

243

- expr: Regular expression pattern

244

"""

245

246

class TimeFormatExtraction:

247

"""Extract and format time values."""

248

249

def __init__(self, format: str, locale: str = None, time_zone: str = None) -> None:

250

"""

251

Parameters:

252

- format: Time format string

253

- locale: Locale for formatting (optional)

254

- time_zone: Time zone for formatting (optional)

255

"""

256

257

class MapLookupExtraction:

258

"""Transform values using key-value mapping."""

259

260

def __init__(

261

self,

262

mapping: dict,

263

retain_missing_values: bool = False,

264

replace_missing_values: str = None,

265

injective: bool = False

266

) -> None:

267

"""

268

Parameters:

269

- mapping: Dictionary mapping input values to output values

270

- retain_missing_values: Whether to keep unmapped values

271

- replace_missing_values: Default value for unmapped inputs

272

- injective: Whether the mapping is one-to-one

273

"""

274

```

275

276

### Having Clauses

277

278

Having clauses for filtering grouped query results.

279

280

```python { .api }

281

class Having:

282

"""Having clause builder for groupBy queries."""

283

284

def __init__(self, **args) -> None:

285

"""Initialize having clause with filter conditions."""

286

287

def __and__(self, other: 'Having') -> 'Having':

288

"""Logical AND operation between having clauses."""

289

290

def __or__(self, other: 'Having') -> 'Having':

291

"""Logical OR operation between having clauses."""

292

293

def __invert__(self) -> 'Having':

294

"""Logical NOT operation on having clause."""

295

296

class Aggregation:

297

"""Having clause based on aggregation values."""

298

299

def __init__(self, agg: str) -> None:

300

"""

301

Parameters:

302

- agg: Aggregation name to filter on

303

"""

304

305

def __eq__(self, value) -> Having:

306

"""Create equality having clause (aggregation == value)."""

307

308

def __lt__(self, value) -> Having:

309

"""Create less-than having clause (aggregation < value)."""

310

311

def __gt__(self, value) -> Having:

312

"""Create greater-than having clause (aggregation > value)."""

313

```

314

315

### Post-Aggregations

316

317

Post-aggregation operations computed after initial aggregation.

318

319

```python { .api }

320

class Postaggregator:

321

"""Base post-aggregator with arithmetic operations."""

322

323

def __init__(self, fn: str, fields: list, name: str) -> None:

324

"""

325

Parameters:

326

- fn: Post-aggregation function name

327

- fields: List of field names to operate on

328

- name: Output name for post-aggregation result

329

"""

330

331

def __add__(self, other) -> 'Postaggregator':

332

"""Addition operation."""

333

334

def __sub__(self, other) -> 'Postaggregator':

335

"""Subtraction operation."""

336

337

def __mul__(self, other) -> 'Postaggregator':

338

"""Multiplication operation."""

339

340

def __truediv__(self, other) -> 'Postaggregator':

341

"""Division operation."""

342

343

class Field:

344

"""Access aggregated field values."""

345

346

def __init__(self, name: str) -> None:

347

"""

348

Parameters:

349

- name: Name of aggregated field to access

350

"""

351

352

class Const:

353

"""Constant value for post-aggregation operations."""

354

355

def __init__(self, value: float, output_name: str = None) -> None:

356

"""

357

Parameters:

358

- value: Constant numeric value

359

- output_name: Optional output name

360

"""

361

362

# Sketch-based post-aggregations

363

class ThetaSketch:

364

"""Access theta sketch aggregation results."""

365

366

def __init__(self, name: str) -> None:

367

"""

368

Parameters:

369

- name: Name of theta sketch aggregation

370

"""

371

372

def __and__(self, other: 'ThetaSketch') -> 'ThetaSketchOp':

373

"""Intersection operation between theta sketches."""

374

375

def __or__(self, other: 'ThetaSketch') -> 'ThetaSketchOp':

376

"""Union operation between theta sketches."""

377

378

class ThetaSketchEstimate:

379

"""Estimate cardinality from theta sketch."""

380

381

def __init__(self, fields: 'ThetaSketchOp') -> None:

382

"""

383

Parameters:

384

- fields: Theta sketch operation to estimate

385

"""

386

387

class QuantilesDoublesSketchToQuantile:

388

"""Extract quantile value from quantiles sketch."""

389

390

def __init__(self, name: str, field_name: str, fraction: float) -> None:

391

"""

392

Parameters:

393

- name: Output name for the quantile value

394

- field_name: Name of the quantiles sketch aggregation

395

- fraction: Quantile fraction (0.0 to 1.0)

396

"""

397

398

class Quantile:

399

"""Access quantile value from quantiles aggregation."""

400

401

def __init__(self, name: str, probability: float) -> None:

402

"""

403

Parameters:

404

- name: Name of quantiles aggregation field

405

- probability: Quantile probability (0.0 to 1.0)

406

"""

407

408

class Quantiles:

409

"""Access multiple quantile values from quantiles aggregation."""

410

411

def __init__(self, name: str, probabilities: list) -> None:

412

"""

413

Parameters:

414

- name: Name of quantiles aggregation field

415

- probabilities: List of quantile probabilities (0.0 to 1.0)

416

"""

417

418

class HyperUniqueCardinality:

419

"""Extract cardinality estimate from hyperunique aggregation."""

420

421

def __init__(self, name: str) -> None:

422

"""

423

Parameters:

424

- name: Name of hyperunique aggregation field

425

"""

426

427

class DoubleGreatest:

428

"""Return the greatest value among double post-aggregators."""

429

430

def __init__(self, fields: list, output_name: str = None) -> None:

431

"""

432

Parameters:

433

- fields: List of post-aggregator objects to compare

434

- output_name: Optional output name (defaults to 'doubleGreatest')

435

"""

436

437

class DoubleLeast:

438

"""Return the least value among double post-aggregators."""

439

440

def __init__(self, fields: list, output_name: str = None) -> None:

441

"""

442

Parameters:

443

- fields: List of post-aggregator objects to compare

444

- output_name: Optional output name (defaults to 'doubleLeast')

445

"""

446

447

class LongGreatest:

448

"""Return the greatest value among long post-aggregators."""

449

450

def __init__(self, fields: list, output_name: str = None) -> None:

451

"""

452

Parameters:

453

- fields: List of post-aggregator objects to compare

454

- output_name: Optional output name (defaults to 'longGreatest')

455

"""

456

457

class LongLeast:

458

"""Return the least value among long post-aggregators."""

459

460

def __init__(self, fields: list, output_name: str = None) -> None:

461

"""

462

Parameters:

463

- fields: List of post-aggregator objects to compare

464

- output_name: Optional output name (defaults to 'longLeast')

465

"""

466

467

class ThetaSketchOp:

468

"""Theta sketch set operations for combining sketches."""

469

470

def __init__(self, fn: str, fields: list, name: str) -> None:

471

"""

472

Parameters:

473

- fn: Set operation function ('UNION', 'INTERSECT', 'NOT')

474

- fields: List of theta sketch fields to operate on

475

- name: Output name for the operation result

476

"""

477

478

def __or__(self, other: 'ThetaSketchOp') -> 'ThetaSketchOp':

479

"""Union operation between theta sketches."""

480

481

def __and__(self, other: 'ThetaSketchOp') -> 'ThetaSketchOp':

482

"""Intersection operation between theta sketches."""

483

484

def __ne__(self, other: 'ThetaSketchOp') -> 'ThetaSketchOp':

485

"""Not operation (set difference) between theta sketches."""

486

```

487

488

## Usage Examples

489

490

```python

491

from pydruid.utils.aggregators import doublesum, count, filtered

492

from pydruid.utils.filters import Dimension, Filter

493

from pydruid.utils.postaggregator import Field

494

from pydruid.utils.having import Aggregation

495

496

# Complex filter example

497

filter_condition = (

498

(Dimension('user_lang') == 'en') &

499

(Dimension('first_hashtag') == 'oscars') &

500

~(Dimension('user_mention_name') == 'No Mention')

501

)

502

503

# Aggregations with post-aggregation

504

aggregations = {

505

'total_tweets': count('count'),

506

'total_length': doublesum('tweet_length'),

507

'filtered_tweets': filtered(

508

Dimension('verified') == 'true',

509

count('count')

510

)

511

}

512

513

post_aggregations = {

514

'avg_length': Field('total_length') / Field('total_tweets'),

515

'verified_ratio': Field('filtered_tweets') / Field('total_tweets')

516

}

517

518

# Having clause

519

having_clause = (Aggregation('total_tweets') > 100) & (Aggregation('avg_length') < 200)

520

```