0
# Document Operations
1
2
Object-relational mapping for Elasticsearch documents providing automatic index management, CRUD operations, bulk processing, and lifecycle hooks. The Document class bridges Python objects and Elasticsearch documents while maintaining type safety and providing convenient persistence methods.
3
4
## Capabilities
5
6
### Document Class Definition
7
8
Base class for creating Elasticsearch documents with field definitions, index configuration, and persistence methods.
9
10
```python { .api }
11
class Document:
12
"""
13
Base class for Elasticsearch documents.
14
15
Attributes are automatically converted to appropriate Field instances
16
based on their type annotations or assignments.
17
"""
18
def __init__(self, meta=None, **kwargs):
19
"""
20
Initialize document instance.
21
22
Args:
23
meta (dict, optional): Document metadata (id, index, etc.)
24
**kwargs: Field values for the document
25
"""
26
27
def save(self, using=None, index=None, validate=True, skip_empty=True, **kwargs):
28
"""
29
Save document to Elasticsearch.
30
31
Args:
32
using (str, optional): Connection alias to use
33
index (str, optional): Index name override
34
validate (bool): Whether to validate before saving
35
skip_empty (bool): Skip empty fields
36
**kwargs: Additional Elasticsearch index parameters
37
38
Returns:
39
bool: True if document was created, False if updated
40
"""
41
42
def delete(self, using=None, index=None, **kwargs):
43
"""
44
Delete document from Elasticsearch.
45
46
Args:
47
using (str, optional): Connection alias to use
48
index (str, optional): Index name override
49
**kwargs: Additional Elasticsearch delete parameters
50
51
Returns:
52
bool: True if document was deleted
53
"""
54
55
def update(self, using=None, index=None, detect_noop=True, **kwargs):
56
"""
57
Update document in Elasticsearch.
58
59
Args:
60
using (str, optional): Connection alias to use
61
index (str, optional): Index name override
62
detect_noop (bool): Detect if update is a no-op
63
**kwargs: Additional update parameters
64
65
Returns:
66
dict: Update response from Elasticsearch
67
"""
68
69
@classmethod
70
def get(cls, id, using=None, index=None, **kwargs):
71
"""
72
Retrieve document by ID.
73
74
Args:
75
id: Document ID
76
using (str, optional): Connection alias to use
77
index (str, optional): Index name override
78
**kwargs: Additional get parameters
79
80
Returns:
81
Document: Document instance
82
83
Raises:
84
NotFoundError: If document doesn't exist
85
"""
86
87
@classmethod
88
def mget(cls, docs, using=None, index=None, raise_on_error=True, **kwargs):
89
"""
90
Multi-get documents by IDs.
91
92
Args:
93
docs (list): List of document IDs or dicts with ID and other params
94
using (str, optional): Connection alias to use
95
index (str, optional): Index name override
96
raise_on_error (bool): Raise exception on missing documents
97
**kwargs: Additional mget parameters
98
99
Returns:
100
list: List of Document instances (None for missing docs if not raising)
101
"""
102
103
@classmethod
104
def search(cls, using=None, index=None):
105
"""
106
Create Search instance for this document type.
107
108
Args:
109
using (str, optional): Connection alias to use
110
index (str, optional): Index name override
111
112
Returns:
113
Search: Search instance configured for this document type
114
"""
115
116
@classmethod
117
def init(cls, index=None, using=None, **kwargs):
118
"""
119
Create index and put mapping for this document.
120
121
Args:
122
index (str, optional): Index name override
123
using (str, optional): Connection alias to use
124
**kwargs: Additional index creation parameters
125
"""
126
127
def to_dict(self, include_meta=False, skip_empty=True):
128
"""
129
Convert document to dictionary.
130
131
Args:
132
include_meta (bool): Include document metadata
133
skip_empty (bool): Skip empty fields
134
135
Returns:
136
dict: Document as dictionary
137
"""
138
139
@classmethod
140
def from_dict(cls, d):
141
"""
142
Create document instance from dictionary.
143
144
Args:
145
d (dict): Dictionary with document data
146
147
Returns:
148
Document: Document instance
149
"""
150
```
151
152
### Async Document Operations
153
154
Asynchronous version of Document class for async/await operations.
155
156
```python { .api }
157
class AsyncDocument:
158
"""
159
Async version of Document class for async/await operations.
160
"""
161
162
async def save(self, using=None, index=None, validate=True, skip_empty=True, **kwargs):
163
"""
164
Async save document to Elasticsearch.
165
166
Args:
167
using (str, optional): Connection alias to use
168
index (str, optional): Index name override
169
validate (bool): Whether to validate before saving
170
skip_empty (bool): Skip empty fields
171
**kwargs: Additional Elasticsearch index parameters
172
173
Returns:
174
bool: True if document was created, False if updated
175
"""
176
177
async def delete(self, using=None, index=None, **kwargs):
178
"""
179
Async delete document from Elasticsearch.
180
181
Args:
182
using (str, optional): Connection alias to use
183
index (str, optional): Index name override
184
**kwargs: Additional Elasticsearch delete parameters
185
186
Returns:
187
bool: True if document was deleted
188
"""
189
190
async def update(self, using=None, index=None, detect_noop=True, **kwargs):
191
"""
192
Async update document in Elasticsearch.
193
194
Args:
195
using (str, optional): Connection alias to use
196
index (str, optional): Index name override
197
detect_noop (bool): Detect if update is a no-op
198
**kwargs: Additional update parameters
199
200
Returns:
201
dict: Update response from Elasticsearch
202
"""
203
204
@classmethod
205
async def get(cls, id, using=None, index=None, **kwargs):
206
"""
207
Async retrieve document by ID.
208
209
Args:
210
id: Document ID
211
using (str, optional): Connection alias to use
212
index (str, optional): Index name override
213
**kwargs: Additional get parameters
214
215
Returns:
216
AsyncDocument: Document instance
217
218
Raises:
219
NotFoundError: If document doesn't exist
220
"""
221
222
@classmethod
223
async def mget(cls, docs, using=None, index=None, raise_on_error=True, **kwargs):
224
"""
225
Async multi-get documents by IDs.
226
227
Args:
228
docs (list): List of document IDs or dicts with ID and other params
229
using (str, optional): Connection alias to use
230
index (str, optional): Index name override
231
raise_on_error (bool): Raise exception on missing documents
232
**kwargs: Additional mget parameters
233
234
Returns:
235
list: List of AsyncDocument instances
236
"""
237
238
@classmethod
239
async def init(cls, index=None, using=None, **kwargs):
240
"""
241
Async create index and put mapping for this document.
242
243
Args:
244
index (str, optional): Index name override
245
using (str, optional): Connection alias to use
246
**kwargs: Additional index creation parameters
247
"""
248
```
249
250
### Inner Document Definition
251
252
For nested document definitions within other documents.
253
254
```python { .api }
255
class InnerDoc:
256
"""
257
Base class for nested document definitions.
258
259
Used to define object and nested field structures within documents.
260
"""
261
262
def __init__(self, **kwargs):
263
"""
264
Initialize inner document.
265
266
Args:
267
**kwargs: Field values for the inner document
268
"""
269
270
def to_dict(self, skip_empty=True):
271
"""
272
Convert inner document to dictionary.
273
274
Args:
275
skip_empty (bool): Skip empty fields
276
277
Returns:
278
dict: Inner document as dictionary
279
"""
280
```
281
282
### Index Configuration
283
284
Configure index settings and mappings within Document classes.
285
286
```python { .api }
287
class Index:
288
"""
289
Index configuration class used within Document definitions.
290
291
Example:
292
class MyDoc(Document):
293
title = Text()
294
295
class Index:
296
name = 'my_index'
297
settings = {
298
'number_of_shards': 2,
299
'number_of_replicas': 1
300
}
301
"""
302
name: str # Index name
303
settings: dict # Index settings
304
aliases: dict # Index aliases
305
```
306
307
### Meta Information
308
309
Document metadata handling for ID, index, routing, and other Elasticsearch document properties.
310
311
```python { .api }
312
class Meta:
313
"""
314
Document metadata container.
315
316
Accessible via document.meta property.
317
"""
318
id: str # Document ID
319
index: str # Document index
320
doc_type: str # Document type (deprecated in ES 7+)
321
routing: str # Document routing
322
parent: str # Parent document ID (for parent-child)
323
version: int # Document version
324
seq_no: int # Sequence number
325
primary_term: int # Primary term
326
score: float # Search score (when from search results)
327
```
328
329
### Update by Query Operations
330
331
Update multiple documents matching a query.
332
333
```python { .api }
334
class UpdateByQuery:
335
"""
336
Update documents matching a query.
337
"""
338
339
def __init__(self, using=None, index=None):
340
"""
341
Initialize update by query operation.
342
343
Args:
344
using (str, optional): Connection alias to use
345
index (str or list, optional): Index name(s) to update
346
"""
347
348
def script(self, **kwargs):
349
"""
350
Set update script.
351
352
Args:
353
**kwargs: Script parameters
354
355
Returns:
356
UpdateByQuery: Current instance with script applied
357
"""
358
359
def query(self, query, **kwargs):
360
"""
361
Set query to match documents for update.
362
363
Args:
364
query (str or Query): Query to match documents
365
**kwargs: Query parameters if query is a string
366
367
Returns:
368
UpdateByQuery: Current instance with query applied
369
"""
370
371
def filter(self, query, **kwargs):
372
"""
373
Add filter to update by query.
374
375
Args:
376
query (str or Query): Filter query
377
**kwargs: Filter parameters if query is a string
378
379
Returns:
380
UpdateByQuery: Current instance with filter applied
381
"""
382
383
def execute(self):
384
"""
385
Execute update by query operation.
386
387
Returns:
388
dict: Update by query response with statistics
389
"""
390
391
def params(self, **kwargs):
392
"""
393
Set update by query parameters.
394
395
Args:
396
**kwargs: Update parameters
397
398
Parameters:
399
conflicts (str): How to handle conflicts ('abort' or 'proceed')
400
refresh (bool or str): Refresh policy
401
timeout (str): Operation timeout
402
wait_for_active_shards (str): Wait for active shards
403
wait_for_completion (bool): Wait for completion
404
requests_per_second (int): Throttling rate
405
scroll_size (int): Scroll batch size
406
pipeline (str): Ingest pipeline to use
407
408
Returns:
409
UpdateByQuery: Current instance with parameters applied
410
"""
411
412
class AsyncUpdateByQuery:
413
"""
414
Async version of UpdateByQuery for async/await operations.
415
"""
416
417
def __init__(self, using=None, index=None):
418
"""Initialize async update by query operation."""
419
420
def script(self, **kwargs):
421
"""Set update script (same as UpdateByQuery)."""
422
423
def query(self, query, **kwargs):
424
"""Set query to match documents (same as UpdateByQuery)."""
425
426
def filter(self, query, **kwargs):
427
"""Add filter (same as UpdateByQuery)."""
428
429
def params(self, **kwargs):
430
"""Set parameters (same as UpdateByQuery)."""
431
432
async def execute(self):
433
"""
434
Async execute update by query operation.
435
436
Returns:
437
dict: Update by query response with statistics
438
"""
439
```
440
441
### Delete by Query Operations
442
443
Delete multiple documents matching a query.
444
445
```python { .api }
446
class DeleteByQuery:
447
"""
448
Delete documents matching a query.
449
"""
450
451
def __init__(self, using=None, index=None):
452
"""
453
Initialize delete by query operation.
454
455
Args:
456
using (str, optional): Connection alias to use
457
index (str or list, optional): Index name(s) to delete from
458
"""
459
460
def query(self, query, **kwargs):
461
"""
462
Set query to match documents for deletion.
463
464
Args:
465
query (str or Query): Query to match documents
466
**kwargs: Query parameters if query is a string
467
468
Returns:
469
DeleteByQuery: Current instance with query applied
470
"""
471
472
def filter(self, query, **kwargs):
473
"""
474
Add filter to delete by query.
475
476
Args:
477
query (str or Query): Filter query
478
**kwargs: Filter parameters if query is a string
479
480
Returns:
481
DeleteByQuery: Current instance with filter applied
482
"""
483
484
def execute(self):
485
"""
486
Execute delete by query operation.
487
488
Returns:
489
dict: Delete by query response with statistics
490
"""
491
492
def params(self, **kwargs):
493
"""
494
Set delete by query parameters.
495
496
Args:
497
**kwargs: Delete parameters
498
499
Parameters:
500
conflicts (str): How to handle conflicts ('abort' or 'proceed')
501
refresh (bool or str): Refresh policy
502
timeout (str): Operation timeout
503
wait_for_active_shards (str): Wait for active shards
504
wait_for_completion (bool): Wait for completion
505
requests_per_second (int): Throttling rate
506
scroll_size (int): Scroll batch size
507
508
Returns:
509
DeleteByQuery: Current instance with parameters applied
510
"""
511
512
class AsyncDeleteByQuery:
513
"""
514
Async version of DeleteByQuery for async/await operations.
515
"""
516
517
def __init__(self, using=None, index=None):
518
"""Initialize async delete by query operation."""
519
520
def query(self, query, **kwargs):
521
"""Set query to match documents (same as DeleteByQuery)."""
522
523
def filter(self, query, **kwargs):
524
"""Add filter (same as DeleteByQuery)."""
525
526
def params(self, **kwargs):
527
"""Set parameters (same as DeleteByQuery)."""
528
529
async def execute(self):
530
"""
531
Async execute delete by query operation.
532
533
Returns:
534
dict: Delete by query response with statistics
535
"""
536
```
537
538
### Reindex Operations
539
540
Reindex documents from source to destination index.
541
542
```python { .api }
543
class Reindex:
544
"""
545
Reindex documents from source to destination.
546
"""
547
548
def __init__(self, using=None):
549
"""
550
Initialize reindex operation.
551
552
Args:
553
using (str, optional): Connection alias to use
554
"""
555
556
def source(self, **kwargs):
557
"""
558
Configure source for reindex operation.
559
560
Args:
561
**kwargs: Source configuration
562
563
Parameters:
564
index (str or list): Source index name(s)
565
query (dict): Query to filter source documents
566
sort (list): Sort order for source documents
567
_source (list or dict): Source field filtering
568
size (int): Batch size for reindexing
569
570
Returns:
571
Reindex: Current instance with source configured
572
"""
573
574
def dest(self, **kwargs):
575
"""
576
Configure destination for reindex operation.
577
578
Args:
579
**kwargs: Destination configuration
580
581
Parameters:
582
index (str): Destination index name
583
type (str): Destination document type (deprecated)
584
routing (str): Routing for destination documents
585
op_type (str): Operation type ('index' or 'create')
586
version_type (str): Version type for conflicts
587
pipeline (str): Ingest pipeline to use
588
589
Returns:
590
Reindex: Current instance with destination configured
591
"""
592
593
def script(self, **kwargs):
594
"""
595
Set reindex script for document transformation.
596
597
Args:
598
**kwargs: Script configuration
599
600
Returns:
601
Reindex: Current instance with script applied
602
"""
603
604
def execute(self):
605
"""
606
Execute reindex operation.
607
608
Returns:
609
dict: Reindex response with statistics
610
"""
611
612
def params(self, **kwargs):
613
"""
614
Set reindex parameters.
615
616
Args:
617
**kwargs: Reindex parameters
618
619
Parameters:
620
conflicts (str): How to handle conflicts ('abort' or 'proceed')
621
refresh (bool or str): Refresh policy
622
timeout (str): Operation timeout
623
wait_for_active_shards (str): Wait for active shards
624
wait_for_completion (bool): Wait for completion
625
requests_per_second (int): Throttling rate
626
627
Returns:
628
Reindex: Current instance with parameters applied
629
"""
630
631
class AsyncReindex:
632
"""
633
Async version of Reindex for async/await operations.
634
"""
635
636
def __init__(self, using=None):
637
"""Initialize async reindex operation."""
638
639
def source(self, **kwargs):
640
"""Configure source (same as Reindex)."""
641
642
def dest(self, **kwargs):
643
"""Configure destination (same as Reindex)."""
644
645
def script(self, **kwargs):
646
"""Set script (same as Reindex)."""
647
648
def params(self, **kwargs):
649
"""Set parameters (same as Reindex)."""
650
651
async def execute(self):
652
"""
653
Async execute reindex operation.
654
655
Returns:
656
dict: Reindex response with statistics
657
"""
658
```
659
660
## Usage Examples
661
662
### Basic Document Definition and Operations
663
664
```python
665
from elasticsearch_dsl import Document, Text, Keyword, Date, Integer, connections
666
667
# Configure connection
668
connections.create_connection(hosts=['localhost:9200'])
669
670
class BlogPost(Document):
671
title = Text(analyzer='snowball')
672
content = Text()
673
author = Keyword()
674
published = Date()
675
views = Integer()
676
677
class Index:
678
name = 'blog'
679
settings = {
680
'number_of_shards': 2,
681
}
682
683
# Create index and mapping
684
BlogPost.init()
685
686
# Create and save document
687
post = BlogPost(
688
title='My First Post',
689
content='This is the content of my first blog post...',
690
author='john_doe',
691
published='2023-10-01T10:30:00',
692
views=0
693
)
694
post.save()
695
696
# Retrieve document
697
retrieved_post = BlogPost.get(id=post.meta.id)
698
print(f"Post: {retrieved_post.title} by {retrieved_post.author}")
699
700
# Update document
701
retrieved_post.views = 10
702
retrieved_post.save()
703
704
# Delete document
705
retrieved_post.delete()
706
```
707
708
### Nested and Object Fields
709
710
```python
711
from elasticsearch_dsl import Document, Text, Object, Nested, InnerDoc
712
713
class Address(InnerDoc):
714
street = Text()
715
city = Text()
716
country = Keyword()
717
718
class Comment(InnerDoc):
719
author = Keyword()
720
content = Text()
721
timestamp = Date()
722
723
class User(Document):
724
name = Text()
725
email = Keyword()
726
address = Object(Address) # Single nested object
727
comments = Nested(Comment) # Array of nested objects
728
729
class Index:
730
name = 'users'
731
732
# Create user with nested data
733
user = User(
734
name='John Doe',
735
email='john@example.com',
736
address=Address(
737
street='123 Main St',
738
city='New York',
739
country='USA'
740
),
741
comments=[
742
Comment(
743
author='friend1',
744
content='Great profile!',
745
timestamp='2023-10-01T12:00:00'
746
)
747
]
748
)
749
user.save()
750
```
751
752
### Bulk Operations
753
754
```python
755
from elasticsearch_dsl import Document, Text, connections
756
from elasticsearch.helpers import bulk
757
758
class Article(Document):
759
title = Text()
760
content = Text()
761
762
class Index:
763
name = 'articles'
764
765
# Bulk create documents
766
articles = [
767
Article(title=f'Article {i}', content=f'Content for article {i}')
768
for i in range(100)
769
]
770
771
# Bulk save using elasticsearch-py helper
772
actions = [
773
article.to_dict(include_meta=True)
774
for article in articles
775
]
776
bulk(connections.get_connection(), actions)
777
```