0
# Query DSL
1
2
The Elasticsearch DSL (Domain Specific Language) provides a Pythonic way to construct search queries, aggregations, and document models. It offers a high-level interface that generates Elasticsearch JSON queries while maintaining the full power and flexibility of the underlying query language.
3
4
## Core Components
5
6
### Search Builder
7
8
The main search interface for constructing and executing queries.
9
10
```python { .api }
11
class Search:
12
"""
13
Main search interface for building and executing queries.
14
"""
15
def __init__(
16
self,
17
using=None,
18
index=None,
19
doc_type=None,
20
extra=None
21
): ...
22
23
def query(self, q): ...
24
def filter(self, f): ...
25
def exclude(self, f): ...
26
def post_filter(self, f): ...
27
def aggregate(self, name, agg): ...
28
def sort(self, *keys): ...
29
def source(self, fields=None, **kwargs): ...
30
def highlight(self, *args, **kwargs): ...
31
def suggest(self, name, text, **kwargs): ...
32
def script_fields(self, **kwargs): ...
33
def from_(self, from_): ...
34
def size(self, size): ...
35
def params(self, **kwargs): ...
36
def index(self, *index): ...
37
def doc_type(self, *doc_type): ...
38
def using(self, client): ...
39
def execute(self, ignore_cache=False): ...
40
def to_dict(self): ...
41
def count(self): ...
42
def delete(self): ...
43
def scan(self): ...
44
45
class AsyncSearch:
46
"""
47
Async version of Search for use with AsyncElasticsearch.
48
"""
49
# Same methods as Search but async
50
51
class MultiSearch:
52
"""
53
Multi-search interface for executing multiple searches in a single request.
54
"""
55
def __init__(self, using=None, index=None): ...
56
def add(self, search): ...
57
def execute(self): ...
58
def to_dict(self): ...
59
60
class AsyncMultiSearch:
61
"""
62
Async version of MultiSearch.
63
"""
64
async def execute(self): ...
65
66
class UpdateByQuery:
67
"""
68
Update by query interface for bulk updates using scripts.
69
"""
70
def __init__(self, using=None, index=None): ...
71
def query(self, q): ...
72
def script(self, **kwargs): ...
73
def execute(self): ...
74
75
class AsyncUpdateByQuery:
76
"""
77
Async version of UpdateByQuery.
78
"""
79
async def execute(self): ...
80
async def execute(self, ignore_cache=False): ...
81
async def count(self): ...
82
async def delete(self): ...
83
def scan(self): ... # Returns async generator
84
```
85
86
### Query Builders
87
88
Construct various types of Elasticsearch queries.
89
90
```python { .api }
91
class Q:
92
"""
93
Base query builder class with query type methods.
94
"""
95
96
@classmethod
97
def match(cls, **kwargs): ...
98
99
@classmethod
100
def match_all(cls, **kwargs): ...
101
102
@classmethod
103
def match_phrase(cls, **kwargs): ...
104
105
@classmethod
106
def match_phrase_prefix(cls, **kwargs): ...
107
108
@classmethod
109
def multi_match(cls, query, fields, **kwargs): ...
110
111
@classmethod
112
def term(cls, **kwargs): ...
113
114
@classmethod
115
def terms(cls, **kwargs): ...
116
117
@classmethod
118
def range(cls, **kwargs): ...
119
120
@classmethod
121
def exists(cls, field): ...
122
123
@classmethod
124
def missing(cls, field): ...
125
126
@classmethod
127
def bool(cls, must=None, should=None, must_not=None, filter=None, **kwargs): ...
128
129
@classmethod
130
def nested(cls, path, query, **kwargs): ...
131
132
@classmethod
133
def has_child(cls, type, query, **kwargs): ...
134
135
@classmethod
136
def has_parent(cls, type, query, **kwargs): ...
137
138
@classmethod
139
def ids(cls, values, **kwargs): ...
140
141
@classmethod
142
def prefix(cls, **kwargs): ...
143
144
@classmethod
145
def wildcard(cls, **kwargs): ...
146
147
@classmethod
148
def regexp(cls, **kwargs): ...
149
150
@classmethod
151
def fuzzy(cls, **kwargs): ...
152
153
@classmethod
154
def query_string(cls, query, **kwargs): ...
155
156
@classmethod
157
def simple_query_string(cls, query, **kwargs): ...
158
159
@classmethod
160
def geo_distance(cls, distance, **kwargs): ...
161
162
@classmethod
163
def geo_bounding_box(cls, **kwargs): ...
164
165
@classmethod
166
def geo_polygon(cls, **kwargs): ...
167
168
@classmethod
169
def geo_shape(cls, **kwargs): ...
170
171
@classmethod
172
def function_score(cls, query=None, functions=None, **kwargs): ...
173
174
@classmethod
175
def script_score(cls, query, script, **kwargs): ...
176
177
# Alias for backward compatibility
178
Query = Q
179
```
180
181
### Aggregation Builders
182
183
Construct Elasticsearch aggregations for data analysis.
184
185
```python { .api }
186
class A:
187
"""
188
Base aggregation builder class.
189
"""
190
191
@classmethod
192
def terms(cls, field, **kwargs): ...
193
194
@classmethod
195
def date_histogram(cls, field, **kwargs): ...
196
197
@classmethod
198
def histogram(cls, field, **kwargs): ...
199
200
@classmethod
201
def range(cls, field, ranges, **kwargs): ...
202
203
@classmethod
204
def date_range(cls, field, ranges, **kwargs): ...
205
206
@classmethod
207
def nested(cls, path, **kwargs): ...
208
209
@classmethod
210
def reverse_nested(cls, **kwargs): ...
211
212
@classmethod
213
def children(cls, type, **kwargs): ...
214
215
@classmethod
216
def filter(cls, filter, **kwargs): ...
217
218
@classmethod
219
def filters(cls, filters, **kwargs): ...
220
221
@classmethod
222
def global_(cls, **kwargs): ...
223
224
@classmethod
225
def missing(cls, field, **kwargs): ...
226
227
@classmethod
228
def significant_terms(cls, field, **kwargs): ...
229
230
@classmethod
231
def cardinality(cls, field, **kwargs): ...
232
233
@classmethod
234
def avg(cls, field, **kwargs): ...
235
236
@classmethod
237
def sum(cls, field, **kwargs): ...
238
239
@classmethod
240
def min(cls, field, **kwargs): ...
241
242
@classmethod
243
def max(cls, field, **kwargs): ...
244
245
@classmethod
246
def stats(cls, field, **kwargs): ...
247
248
@classmethod
249
def extended_stats(cls, field, **kwargs): ...
250
251
@classmethod
252
def percentiles(cls, field, **kwargs): ...
253
254
@classmethod
255
def percentile_ranks(cls, field, values, **kwargs): ...
256
257
@classmethod
258
def top_hits(cls, **kwargs): ...
259
260
@classmethod
261
def geo_bounds(cls, field, **kwargs): ...
262
263
@classmethod
264
def geo_centroid(cls, field, **kwargs): ...
265
266
# Alias for backward compatibility
267
Agg = A
268
```
269
270
### Document Modeling
271
272
Define document structures and mappings.
273
274
```python { .api }
275
class Document:
276
"""
277
Base document class for modeling Elasticsearch documents.
278
"""
279
280
class Index:
281
"""
282
Configuration for the document's index.
283
"""
284
name: Optional[str] = None
285
settings: Optional[Dict] = None
286
mappings: Optional[Dict] = None
287
aliases: Optional[Dict] = None
288
analyzers: Optional[Dict] = None
289
290
class Meta:
291
"""
292
Document metadata configuration.
293
"""
294
doc_type: Optional[str] = None
295
using: Optional[str] = None
296
index: Optional[str] = None
297
routing: Optional[str] = None
298
299
def __init__(self, meta=None, **kwargs): ...
300
301
def save(self, **kwargs): ...
302
303
def update(self, **kwargs): ...
304
305
def delete(self, **kwargs): ...
306
307
@classmethod
308
def get(cls, id, **kwargs): ...
309
310
@classmethod
311
def mget(cls, docs, **kwargs): ...
312
313
@classmethod
314
def search(cls, **kwargs): ...
315
316
def to_dict(self, skip_empty=True): ...
317
318
@classmethod
319
def init(cls, index=None, using=None): ...
320
321
class AsyncDocument:
322
"""
323
Async version of Document class.
324
"""
325
# Same interface as Document but with async methods
326
async def save(self, **kwargs): ...
327
async def update(self, **kwargs): ...
328
async def delete(self, **kwargs): ...
329
330
@classmethod
331
async def get(cls, id, **kwargs): ...
332
333
@classmethod
334
async def mget(cls, docs, **kwargs): ...
335
336
class InnerDoc:
337
"""
338
Base class for nested document objects.
339
"""
340
def __init__(self, **kwargs): ...
341
def to_dict(self, skip_empty=True): ...
342
```
343
344
### Field Types
345
346
Define field mappings and types for documents.
347
348
```python { .api }
349
# Text and Keyword Fields
350
class Text:
351
def __init__(self, analyzer=None, search_analyzer=None, **kwargs): ...
352
353
class Keyword:
354
def __init__(self, ignore_above=None, normalizer=None, **kwargs): ...
355
356
class SearchAsYouType:
357
def __init__(self, max_shingle_size=None, analyzer=None, **kwargs): ...
358
359
class Completion:
360
def __init__(self, analyzer=None, contexts=None, **kwargs): ...
361
362
# Numeric Fields
363
class Integer:
364
def __init__(self, coerce=None, ignore_malformed=None, **kwargs): ...
365
366
class Long:
367
def __init__(self, coerce=None, ignore_malformed=None, **kwargs): ...
368
369
class Float:
370
def __init__(self, coerce=None, ignore_malformed=None, **kwargs): ...
371
372
class Double:
373
def __init__(self, coerce=None, ignore_malformed=None, **kwargs): ...
374
375
class Boolean:
376
def __init__(self, **kwargs): ...
377
378
# Date Fields
379
class Date:
380
def __init__(self, format=None, locale=None, **kwargs): ...
381
382
class DateRange:
383
def __init__(self, format=None, **kwargs): ...
384
385
# Geographic Fields
386
class GeoPoint:
387
def __init__(self, ignore_malformed=None, ignore_z_value=None, **kwargs): ...
388
389
class GeoShape:
390
def __init__(self, tree=None, precision=None, **kwargs): ...
391
392
# Object and Nested Fields
393
class Object:
394
def __init__(self, properties=None, dynamic=None, **kwargs): ...
395
396
class Nested:
397
def __init__(self, properties=None, dynamic=None, **kwargs): ...
398
399
# Specialized Fields
400
class Binary:
401
def __init__(self, **kwargs): ...
402
403
class Join:
404
def __init__(self, relations, **kwargs): ...
405
406
class Percolator:
407
def __init__(self, **kwargs): ...
408
409
class Ip:
410
def __init__(self, ignore_malformed=None, **kwargs): ...
411
412
class TokenCount:
413
def __init__(self, analyzer, **kwargs): ...
414
415
# Vector Fields
416
class DenseVector:
417
def __init__(self, dims, index=None, similarity=None, **kwargs): ...
418
419
class SparseVector:
420
def __init__(self, **kwargs): ...
421
422
class RankFeature:
423
def __init__(self, positive_score_impact=None, **kwargs): ...
424
425
class RankFeatures:
426
def __init__(self, **kwargs): ...
427
```
428
429
### Index Management
430
431
Manage indices and their configurations.
432
433
```python { .api }
434
class Index:
435
"""
436
Index management operations.
437
"""
438
def __init__(self, name, using='default'): ...
439
440
def analyzer(self, name, **kwargs): ...
441
def tokenizer(self, name, **kwargs): ...
442
def token_filter(self, name, **kwargs): ...
443
def char_filter(self, name, **kwargs): ...
444
def normalizer(self, name, **kwargs): ...
445
446
def create(self, **kwargs): ...
447
def delete(self, **kwargs): ...
448
def exists(self): ...
449
def close(self): ...
450
def open(self): ...
451
def clone(self, target, **kwargs): ...
452
def refresh(self): ...
453
def flush(self): ...
454
def force_merge(self, **kwargs): ...
455
456
def put_mapping(self, **kwargs): ...
457
def get_mapping(self): ...
458
def put_settings(self, **kwargs): ...
459
def get_settings(self): ...
460
461
def put_alias(self, name, **kwargs): ...
462
def get_alias(self, name=None): ...
463
def delete_alias(self, name): ...
464
465
class AsyncIndex:
466
"""
467
Async version of Index management.
468
"""
469
# Same interface as Index but with async methods
470
async def create(self, **kwargs): ...
471
async def delete(self, **kwargs): ...
472
async def exists(self): ...
473
```
474
475
## Usage Examples
476
477
### Basic Query Construction
478
479
```python
480
from elasticsearch.dsl import Search, Q
481
482
# Create a search object
483
s = Search()
484
485
# Add a simple match query
486
s = s.query('match', title='python')
487
488
# Add filters
489
s = s.filter('term', status='published')
490
s = s.filter('range', publish_date={'gte': '2015-01-01'})
491
492
# Execute the search
493
response = s.execute()
494
495
# Process results
496
for hit in response:
497
print(f"Title: {hit.title}")
498
print(f"Score: {hit.meta.score}")
499
```
500
501
### Complex Boolean Queries
502
503
```python
504
from elasticsearch.dsl import Search, Q
505
506
# Construct complex boolean query
507
q = Q('bool',
508
must=[
509
Q('match', title='elasticsearch'),
510
Q('range', publish_date={'gte': '2020-01-01'})
511
],
512
should=[
513
Q('match', tags='python'),
514
Q('match', tags='search')
515
],
516
must_not=[
517
Q('term', status='draft')
518
],
519
filter=[
520
Q('term', category='tutorial')
521
]
522
)
523
524
s = Search().query(q)
525
response = s.execute()
526
```
527
528
### Aggregations
529
530
```python
531
from elasticsearch.dsl import Search, A
532
533
s = Search()
534
535
# Add aggregations
536
s.aggs.bucket('categories', 'terms', field='category.keyword', size=10)
537
s.aggs.bucket('monthly_posts', 'date_histogram',
538
field='publish_date',
539
calendar_interval='month')
540
541
# Nested aggregation
542
s.aggs.bucket('categories', 'terms', field='category.keyword')\
543
.metric('avg_score', 'avg', field='score')
544
545
# Execute and process aggregations
546
response = s.execute()
547
548
for bucket in response.aggregations.categories.buckets:
549
print(f"Category: {bucket.key}, Count: {bucket.doc_count}")
550
if hasattr(bucket, 'avg_score'):
551
print(f"Average score: {bucket.avg_score.value}")
552
```
553
554
### Document Modeling
555
556
```python
557
from elasticsearch.dsl import Document, Text, Keyword, Date, Integer, Nested
558
559
class Comment(InnerDoc):
560
author = Text()
561
content = Text()
562
created_at = Date()
563
564
class Article(Document):
565
title = Text(analyzer='standard')
566
content = Text()
567
author = Keyword()
568
publish_date = Date()
569
tags = Keyword(multi=True)
570
comments = Nested(Comment)
571
view_count = Integer()
572
573
class Index:
574
name = 'articles'
575
settings = {
576
'number_of_shards': 1,
577
'number_of_replicas': 0
578
}
579
580
def save(self, **kwargs):
581
# Custom save logic
582
self.view_count = 0 # Initialize view count
583
return super().save(**kwargs)
584
585
@classmethod
586
def get_published(cls):
587
"""Get only published articles."""
588
s = cls.search()
589
s = s.filter('term', status='published')
590
return s
591
592
# Initialize the index
593
Article.init()
594
595
# Create and save a document
596
article = Article(
597
title='Getting Started with Elasticsearch',
598
content='This is a comprehensive guide...',
599
author='john_doe',
600
publish_date='2024-01-01',
601
tags=['elasticsearch', 'python', 'tutorial']
602
)
603
604
article.save()
605
606
# Search for articles
607
articles = Article.search().filter('term', author='john_doe')
608
for article in articles:
609
print(f"Title: {article.title}")
610
```
611
612
### Advanced Search Features
613
614
```python
615
from elasticsearch.dsl import Search, Q, A
616
617
# Search with highlighting
618
s = Search()
619
s = s.query('match', content='elasticsearch')
620
s = s.highlight('content', fragment_size=150, number_of_fragments=3)
621
622
# Add suggestions
623
s = s.suggest('title_suggestion', 'elasicsearch', term={'field': 'title'})
624
625
# Add script fields
626
s = s.script_fields(
627
popularity_score={
628
'script': {
629
'source': 'doc["view_count"].value * doc["like_count"].value'
630
}
631
}
632
)
633
634
# Sorting
635
s = s.sort('-publish_date', {'view_count': {'order': 'desc'}})
636
637
# Pagination
638
s = s[10:20] # Skip 10, take 10
639
640
response = s.execute()
641
642
# Process highlights
643
for hit in response:
644
if hasattr(hit.meta, 'highlight'):
645
for fragment in hit.meta.highlight.content:
646
print(f"Highlight: {fragment}")
647
648
# Access script fields
649
if hasattr(hit.meta, 'script_fields'):
650
print(f"Popularity: {hit.meta.script_fields.popularity_score}")
651
652
# Process suggestions
653
if hasattr(response, 'suggest'):
654
for suggestion in response.suggest.title_suggestion:
655
print(f"Original: {suggestion.text}")
656
for option in suggestion.options:
657
print(f"Suggestion: {option.text}")
658
```
659
660
### Multi-Search Operations
661
662
```python
663
from elasticsearch.dsl import MultiSearch, Search
664
665
# Create multiple searches
666
ms = MultiSearch()
667
668
# Add individual searches
669
s1 = Search().query('match', title='python')
670
s2 = Search().query('match', title='elasticsearch')
671
s3 = Search().filter('range', publish_date={'gte': '2024-01-01'})
672
673
ms = ms.add(s1)
674
ms = ms.add(s2)
675
ms = ms.add(s3)
676
677
# Execute all searches
678
responses = ms.execute()
679
680
# Process results
681
for i, response in enumerate(responses):
682
print(f"Search {i+1}: {response.hits.total.value} hits")
683
for hit in response:
684
print(f" - {hit.title}")
685
```
686
687
### Faceted Search
688
689
```python
690
from elasticsearch.dsl import FacetedSearch, TermsFacet, DateHistogramFacet, RangeFacet
691
692
class ArticleSearch(FacetedSearch):
693
doc_types = [Article]
694
695
facets = {
696
'category': TermsFacet(field='category.keyword'),
697
'tags': TermsFacet(field='tags.keyword'),
698
'publish_year': DateHistogramFacet(
699
field='publish_date',
700
calendar_interval='year'
701
),
702
'view_ranges': RangeFacet(
703
field='view_count',
704
ranges=[
705
('low', (None, 100)),
706
('medium', (100, 1000)),
707
('high', (1000, None))
708
]
709
)
710
}
711
712
def search(self):
713
# Base query
714
s = super().search()
715
# Add default filters
716
s = s.filter('term', status='published')
717
return s
718
719
# Use faceted search
720
search = ArticleSearch('python tutorial', {
721
'category': ['programming'],
722
'view_ranges': ['high']
723
})
724
725
response = search.execute()
726
727
# Access facets
728
for facet_name, facet in response.facets.items():
729
print(f"{facet_name}:")
730
for bucket in facet:
731
print(f" {bucket[0]}: {bucket[1]}")
732
```