0
# Document Modeling
1
2
Object-relational mapping (ORM) style document modeling with field definitions, automatic mapping generation, and validation for structured data handling in OpenSearch. The DSL provides a Pythonic way to define document schemas and interact with OpenSearch indices.
3
4
## Capabilities
5
6
### Document Base Class
7
8
Base class for creating document models with ORM-like functionality.
9
10
```python { .api }
11
class Document:
12
def __init__(self, **kwargs):
13
"""
14
Initialize document instance with field values.
15
16
Parameters:
17
- **kwargs: Field values for the document
18
"""
19
20
def save(self, using=None, index=None, **kwargs):
21
"""
22
Save document to OpenSearch.
23
24
Parameters:
25
- using: OpenSearch client instance
26
- index (str, optional): Index name (uses class Meta if not provided)
27
- refresh (str/bool, optional): Refresh policy
28
- routing (str, optional): Routing value
29
- pipeline (str, optional): Ingest pipeline
30
31
Returns:
32
Document instance with updated meta information
33
"""
34
35
def update(self, using=None, index=None, **kwargs):
36
"""
37
Update document in OpenSearch.
38
39
Parameters:
40
- using: OpenSearch client instance
41
- index (str, optional): Index name
42
- refresh (str/bool, optional): Refresh policy
43
- retry_on_conflict (int, optional): Retry on version conflict
44
45
Returns:
46
Updated document instance
47
"""
48
49
def delete(self, using=None, index=None, **kwargs):
50
"""
51
Delete document from OpenSearch.
52
53
Parameters:
54
- using: OpenSearch client instance
55
- index (str, optional): Index name
56
- refresh (str/bool, optional): Refresh policy
57
58
Returns:
59
Deletion response
60
"""
61
62
@classmethod
63
def get(cls, id, using=None, index=None, **kwargs):
64
"""
65
Retrieve document by ID.
66
67
Parameters:
68
- id: Document ID
69
- using: OpenSearch client instance
70
- index (str, optional): Index name
71
72
Returns:
73
Document instance
74
75
Raises:
76
NotFoundError: If document doesn't exist
77
"""
78
79
@classmethod
80
def mget(cls, docs, using=None, index=None, **kwargs):
81
"""
82
Retrieve multiple documents by ID.
83
84
Parameters:
85
- docs: List of document IDs or dicts with id/index
86
- using: OpenSearch client instance
87
- index (str, optional): Default index name
88
89
Returns:
90
List of document instances
91
"""
92
93
@classmethod
94
def search(cls, using=None, index=None):
95
"""
96
Get Search object for this document type.
97
98
Parameters:
99
- using: OpenSearch client instance
100
- index (str, optional): Index name
101
102
Returns:
103
Search instance configured for this document type
104
"""
105
106
@classmethod
107
def exists(cls, id, using=None, index=None, **kwargs):
108
"""
109
Check if document exists.
110
111
Parameters:
112
- id: Document ID
113
- using: OpenSearch client instance
114
- index (str, optional): Index name
115
116
Returns:
117
bool: True if document exists
118
"""
119
120
def to_dict(self, include_meta=False, skip_empty=True):
121
"""
122
Convert document to dictionary.
123
124
Parameters:
125
- include_meta (bool): Include metadata fields
126
- skip_empty (bool): Skip fields with empty values
127
128
Returns:
129
dict: Document as dictionary
130
"""
131
132
@classmethod
133
def from_dict(cls, d):
134
"""
135
Create document instance from dictionary.
136
137
Parameters:
138
- d (dict): Document data
139
140
Returns:
141
Document instance
142
"""
143
```
144
145
### Inner Document Class
146
147
For modeling nested objects within documents.
148
149
```python { .api }
150
class InnerDoc:
151
def __init__(self, **kwargs):
152
"""
153
Initialize inner document with field values.
154
155
Parameters:
156
- **kwargs: Field values for the inner document
157
"""
158
159
def to_dict(self, skip_empty=True):
160
"""
161
Convert inner document to dictionary.
162
163
Parameters:
164
- skip_empty (bool): Skip fields with empty values
165
166
Returns:
167
dict: Inner document as dictionary
168
"""
169
170
@classmethod
171
def from_dict(cls, d):
172
"""
173
Create inner document from dictionary.
174
175
Parameters:
176
- d (dict): Inner document data
177
178
Returns:
179
InnerDoc instance
180
"""
181
```
182
183
### Mapping Management
184
185
Define and manage index mappings programmatically.
186
187
```python { .api }
188
class Mapping:
189
def __init__(self):
190
"""Initialize empty mapping."""
191
192
def field(self, name, field_type, **kwargs):
193
"""
194
Add field to mapping.
195
196
Parameters:
197
- name (str): Field name
198
- field_type (str/Field): Field type or Field instance
199
- **kwargs: Field parameters
200
201
Returns:
202
self (for chaining)
203
"""
204
205
def meta(self, name, **kwargs):
206
"""
207
Add metadata field to mapping.
208
209
Parameters:
210
- name (str): Meta field name
211
- **kwargs: Meta field parameters
212
213
Returns:
214
self (for chaining)
215
"""
216
217
def save(self, index, using=None, **kwargs):
218
"""
219
Save mapping to OpenSearch index.
220
221
Parameters:
222
- index (str): Index name
223
- using: OpenSearch client instance
224
- **kwargs: Additional mapping parameters
225
226
Returns:
227
Mapping creation response
228
"""
229
230
def update_from_opensearch(self, index, using=None):
231
"""
232
Update mapping from existing OpenSearch index.
233
234
Parameters:
235
- index (str): Index name
236
- using: OpenSearch client instance
237
238
Returns:
239
self
240
"""
241
242
def to_dict(self):
243
"""
244
Convert mapping to dictionary.
245
246
Returns:
247
dict: Mapping as dictionary
248
"""
249
```
250
251
### Index Management
252
253
Manage OpenSearch indices with settings and mappings.
254
255
```python { .api }
256
class Index:
257
def __init__(self, name, using=None):
258
"""
259
Initialize index manager.
260
261
Parameters:
262
- name (str): Index name
263
- using: OpenSearch client instance
264
"""
265
266
def settings(self, **kwargs):
267
"""
268
Set index settings.
269
270
Parameters:
271
- **kwargs: Index settings
272
273
Returns:
274
self (for chaining)
275
"""
276
277
def mapping(self, mapping):
278
"""
279
Set index mapping.
280
281
Parameters:
282
- mapping (Mapping): Mapping instance
283
284
Returns:
285
self (for chaining)
286
"""
287
288
def doc_type(self, document):
289
"""
290
Register document type with index.
291
292
Parameters:
293
- document (Document): Document class
294
295
Returns:
296
self (for chaining)
297
"""
298
299
def analyzer(self, name, **kwargs):
300
"""
301
Add custom analyzer to index.
302
303
Parameters:
304
- name (str): Analyzer name
305
- **kwargs: Analyzer configuration
306
307
Returns:
308
self (for chaining)
309
"""
310
311
def create(self, **kwargs):
312
"""
313
Create the index in OpenSearch.
314
315
Parameters:
316
- **kwargs: Index creation parameters
317
318
Returns:
319
Index creation response
320
"""
321
322
def delete(self, **kwargs):
323
"""
324
Delete the index from OpenSearch.
325
326
Parameters:
327
- **kwargs: Index deletion parameters
328
329
Returns:
330
Index deletion response
331
"""
332
333
def exists(self):
334
"""
335
Check if index exists.
336
337
Returns:
338
bool: True if index exists
339
"""
340
341
def open(self, **kwargs):
342
"""
343
Open the index.
344
345
Parameters:
346
- **kwargs: Index open parameters
347
348
Returns:
349
Index open response
350
"""
351
352
def close(self, **kwargs):
353
"""
354
Close the index.
355
356
Parameters:
357
- **kwargs: Index close parameters
358
359
Returns:
360
Index close response
361
"""
362
```
363
364
## Usage Examples
365
366
### Basic Document Model
367
368
```python
369
from opensearchpy import Document, Text, Keyword, Integer, Date
370
from datetime import datetime
371
372
class Article(Document):
373
title = Text(analyzer='standard')
374
content = Text()
375
author = Keyword()
376
category = Keyword()
377
published_date = Date()
378
view_count = Integer()
379
tags = Keyword(multi=True)
380
381
class Index:
382
name = 'articles'
383
settings = {
384
'number_of_shards': 1,
385
'number_of_replicas': 0
386
}
387
388
class Meta:
389
doc_type = '_doc'
390
391
# Create and save document
392
article = Article(
393
title='Introduction to OpenSearch',
394
content='OpenSearch is a powerful search and analytics engine...',
395
author='john_doe',
396
category='technology',
397
published_date=datetime.now(),
398
view_count=0,
399
tags=['search', 'analytics', 'opensource']
400
)
401
402
# Save to OpenSearch
403
article.meta.id = 'article-1'
404
article.save(using=client)
405
406
print(f"Article saved with ID: {article.meta.id}")
407
```
408
409
### Nested Document Modeling
410
411
```python
412
from opensearchpy import Document, InnerDoc, Nested, Text, Keyword, Integer, Date
413
414
class Comment(InnerDoc):
415
author = Keyword()
416
content = Text()
417
created_date = Date()
418
rating = Integer()
419
420
class Product(Document):
421
name = Text()
422
description = Text()
423
category = Keyword()
424
price = Integer()
425
comments = Nested(Comment)
426
427
class Index:
428
name = 'products'
429
430
# Create product with nested comments
431
product = Product(
432
name='Wireless Headphones',
433
description='High-quality wireless headphones with noise cancellation',
434
category='electronics',
435
price=199,
436
comments=[
437
Comment(
438
author='user1',
439
content='Great sound quality!',
440
created_date=datetime.now(),
441
rating=5
442
),
443
Comment(
444
author='user2',
445
content='Good value for money',
446
created_date=datetime.now(),
447
rating=4
448
)
449
]
450
)
451
452
product.save(using=client)
453
```
454
455
### Custom Field Types and Validation
456
457
```python
458
from opensearchpy import Document, Field, ValidationException
459
from opensearchpy.field import Text, Keyword, Integer
460
461
class EmailField(Keyword):
462
def clean(self, data):
463
if data and '@' not in data:
464
raise ValidationException('Invalid email format')
465
return super().clean(data)
466
467
class User(Document):
468
username = Keyword(required=True)
469
email = EmailField(required=True)
470
full_name = Text()
471
age = Integer()
472
bio = Text()
473
474
def clean(self):
475
# Document-level validation
476
if self.age and self.age < 0:
477
raise ValidationException('Age cannot be negative')
478
479
if self.username and len(self.username) < 3:
480
raise ValidationException('Username must be at least 3 characters')
481
482
def save(self, **kwargs):
483
# Custom save logic
484
self.clean()
485
return super().save(**kwargs)
486
487
class Index:
488
name = 'users'
489
490
# Create user with validation
491
user = User(
492
username='johndoe',
493
email='john@example.com',
494
full_name='John Doe',
495
age=30,
496
bio='Software developer interested in search technologies'
497
)
498
499
user.save(using=client)
500
```
501
502
### Document Relationships
503
504
```python
505
from opensearchpy import Document, Join, Text, Keyword, Integer
506
507
class BlogPost(Document):
508
title = Text()
509
content = Text()
510
author = Keyword()
511
post_comment = Join(relations={'post': 'comment'})
512
513
class Index:
514
name = 'blog'
515
516
class Comment(Document):
517
content = Text()
518
author = Keyword()
519
post_comment = Join(relations={'post': 'comment'})
520
521
class Index:
522
name = 'blog'
523
524
# Create parent document (blog post)
525
post = BlogPost(
526
title='My First Blog Post',
527
content='This is the content of my first blog post...',
528
author='blogger',
529
post_comment={'name': 'post'}
530
)
531
post.meta.id = 'post-1'
532
post.save(using=client)
533
534
# Create child document (comment)
535
comment = Comment(
536
content='Great post!',
537
author='reader',
538
post_comment={'name': 'comment', 'parent': 'post-1'}
539
)
540
comment.meta.id = 'comment-1'
541
comment.meta.routing = 'post-1' # Route to same shard as parent
542
comment.save(using=client)
543
```
544
545
### Dynamic Document Templates
546
547
```python
548
from opensearchpy import Document, DynamicDocument, Text, Keyword
549
550
class FlexibleDocument(DynamicDocument):
551
"""Document that accepts any fields dynamically."""
552
title = Text(required=True)
553
category = Keyword()
554
555
class Index:
556
name = 'flexible_docs'
557
settings = {
558
'mappings': {
559
'dynamic': True,
560
'dynamic_templates': [
561
{
562
'strings_as_keywords': {
563
'match_mapping_type': 'string',
564
'mapping': {
565
'type': 'keyword'
566
}
567
}
568
}
569
]
570
}
571
}
572
573
# Create document with dynamic fields
574
doc = FlexibleDocument(
575
title='Dynamic Document',
576
category='example',
577
# These fields will be added dynamically
578
custom_field='custom_value',
579
numerical_data=42,
580
metadata={'version': '1.0', 'source': 'api'}
581
)
582
583
doc.save(using=client)
584
```
585
586
### Bulk Document Operations
587
588
```python
589
from opensearchpy.helpers import parallel_bulk
590
591
def generate_articles(count=1000):
592
"""Generate article documents."""
593
for i in range(count):
594
article = Article(
595
title=f'Article {i}',
596
content=f'Content for article {i}...',
597
author=f'author_{i % 10}',
598
category='technology',
599
published_date=datetime.now(),
600
view_count=0,
601
tags=['tag1', 'tag2']
602
)
603
article.meta.id = f'article-{i}'
604
yield article.to_dict(include_meta=True)
605
606
# Bulk save articles
607
for success, info in parallel_bulk(
608
client,
609
generate_articles(1000),
610
index='articles',
611
chunk_size=100
612
):
613
if not success:
614
print(f'Failed to index: {info}')
615
616
print('Bulk indexing completed')
617
```
618
619
### Search with Document Models
620
621
```python
622
from opensearchpy import Search, Q
623
624
# Search using document model
625
s = Article.search(using=client)
626
s = s.query(Q('match', title='OpenSearch'))
627
s = s.filter(Q('term', category='technology'))
628
s = s.sort('-published_date')
629
630
# Execute search and get document instances
631
response = s.execute()
632
633
for article in response:
634
print(f'Title: {article.title}')
635
print(f'Author: {article.author}')
636
print(f'Published: {article.published_date}')
637
print('---')
638
639
# Aggregations with document models
640
s = Article.search(using=client)
641
s.aggs.bucket('authors', 'terms', field='author', size=10)
642
s.aggs.bucket('categories', 'terms', field='category')
643
644
response = s.execute()
645
646
print('Top authors:')
647
for bucket in response.aggregations.authors.buckets:
648
print(f' {bucket.key}: {bucket.doc_count} articles')
649
```
650
651
### Index Management with Documents
652
653
```python
654
from opensearchpy import Index
655
656
# Create index with custom settings
657
index = Index('articles', using=client)
658
index.settings(
659
number_of_shards=2,
660
number_of_replicas=1,
661
analysis={
662
'analyzer': {
663
'custom_text_analyzer': {
664
'type': 'custom',
665
'tokenizer': 'standard',
666
'filter': ['lowercase', 'stop', 'snowball']
667
}
668
}
669
}
670
)
671
672
# Register document type
673
index.doc_type(Article)
674
675
# Create the index
676
if not index.exists():
677
index.create()
678
print('Index created successfully')
679
680
# Update mapping for existing index
681
from opensearchpy import Mapping, Text, Keyword
682
683
mapping = Mapping()
684
mapping.field('title', Text(analyzer='custom_text_analyzer'))
685
mapping.field('summary', Text())
686
mapping.field('status', Keyword())
687
688
mapping.save('articles', using=client)
689
print('Mapping updated')
690
```