0
# Core Client Operations
1
2
Essential OpenSearch operations for document management, searching, and basic cluster interactions. These are the fundamental methods available on both the synchronous `OpenSearch` and asynchronous `AsyncOpenSearch` client classes.
3
4
## Capabilities
5
6
### Connection Testing
7
8
Test connectivity and retrieve basic cluster information.
9
10
```python { .api }
11
def ping(self, **kwargs):
12
"""
13
Test connection to the cluster.
14
15
Parameters:
16
- request_timeout: Request timeout in seconds
17
18
Returns:
19
bool: True if connection is successful
20
21
Raises:
22
ConnectionError: If connection fails
23
"""
24
25
def info(self, **kwargs):
26
"""
27
Get basic information about the cluster.
28
29
Parameters:
30
- request_timeout: Request timeout in seconds
31
32
Returns:
33
dict: Cluster information including version, name, and tagline
34
"""
35
```
36
37
### Document Indexing
38
39
Create and update documents in OpenSearch indices.
40
41
```python { .api }
42
def index(self, index, body, id=None, **kwargs):
43
"""
44
Index a document.
45
46
Parameters:
47
- index (str): Target index name
48
- body (dict): Document source data
49
- id (str, optional): Document ID (auto-generated if not provided)
50
- doc_type (str, optional): Document type (deprecated in newer versions)
51
- refresh (str/bool, optional): Refresh policy ('true', 'false', 'wait_for')
52
- routing (str, optional): Routing value
53
- timeout (str, optional): Operation timeout
54
- version (int, optional): Expected document version
55
- version_type (str, optional): Version type ('internal', 'external')
56
- if_seq_no (int, optional): Only perform if sequence number matches
57
- if_primary_term (int, optional): Only perform if primary term matches
58
- pipeline (str, optional): Ingest pipeline to use
59
- request_timeout (float, optional): Request timeout in seconds
60
61
Returns:
62
dict: Response containing '_index', '_id', '_version', 'result', etc.
63
64
Raises:
65
RequestError: If indexing fails due to client error
66
ConflictError: If version conflict occurs
67
"""
68
69
def create(self, index, id, body, **kwargs):
70
"""
71
Create a new document. Fails if document already exists.
72
73
Parameters:
74
- index (str): Target index name
75
- id (str): Document ID
76
- body (dict): Document source data
77
- refresh (str/bool, optional): Refresh policy
78
- routing (str, optional): Routing value
79
- timeout (str, optional): Operation timeout
80
- pipeline (str, optional): Ingest pipeline to use
81
- request_timeout (float, optional): Request timeout in seconds
82
83
Returns:
84
dict: Response containing creation details
85
86
Raises:
87
ConflictError: If document already exists
88
"""
89
```
90
91
### Document Retrieval
92
93
Retrieve documents and check for document existence.
94
95
```python { .api }
96
def get(self, index, id, **kwargs):
97
"""
98
Retrieve a document by ID.
99
100
Parameters:
101
- index (str): Index name
102
- id (str): Document ID
103
- doc_type (str, optional): Document type (deprecated)
104
- stored_fields (list, optional): List of stored fields to retrieve
105
- _source (bool/list/str, optional): Source filtering
106
- _source_excludes (list, optional): Source fields to exclude
107
- _source_includes (list, optional): Source fields to include
108
- preference (str, optional): Preference for which shard to query
109
- realtime (bool, optional): Whether to perform realtime get
110
- refresh (bool, optional): Refresh before get
111
- routing (str, optional): Routing value
112
- version (int, optional): Expected document version
113
- version_type (str, optional): Version type
114
- request_timeout (float, optional): Request timeout in seconds
115
116
Returns:
117
dict: Document data including '_source', '_id', '_version', etc.
118
119
Raises:
120
NotFoundError: If document is not found
121
"""
122
123
def exists(self, index, id, **kwargs):
124
"""
125
Check if a document exists.
126
127
Parameters:
128
- index (str): Index name
129
- id (str): Document ID
130
- doc_type (str, optional): Document type (deprecated)
131
- preference (str, optional): Preference for which shard to query
132
- realtime (bool, optional): Whether to perform realtime check
133
- refresh (bool, optional): Refresh before check
134
- routing (str, optional): Routing value
135
- request_timeout (float, optional): Request timeout in seconds
136
137
Returns:
138
bool: True if document exists, False otherwise
139
"""
140
141
def get_source(self, index, id, **kwargs):
142
"""
143
Retrieve only the source of a document.
144
145
Parameters:
146
- index (str): Index name
147
- id (str): Document ID
148
- doc_type (str, optional): Document type (deprecated)
149
- _source_excludes (list, optional): Source fields to exclude
150
- _source_includes (list, optional): Source fields to include
151
- preference (str, optional): Preference for which shard to query
152
- realtime (bool, optional): Whether to perform realtime get
153
- refresh (bool, optional): Refresh before get
154
- routing (str, optional): Routing value
155
- version (int, optional): Expected document version
156
- version_type (str, optional): Version type
157
- request_timeout (float, optional): Request timeout in seconds
158
159
Returns:
160
dict: Document source data only
161
162
Raises:
163
NotFoundError: If document is not found
164
"""
165
```
166
167
### Document Updates
168
169
Update existing documents with partial data or scripts.
170
171
```python { .api }
172
def update(self, index, id, body, **kwargs):
173
"""
174
Update a document with partial data or script.
175
176
Parameters:
177
- index (str): Index name
178
- id (str): Document ID
179
- body (dict): Update body containing 'doc', 'script', or 'upsert'
180
- doc_type (str, optional): Document type (deprecated)
181
- _source (bool/list/str, optional): Source filtering for response
182
- _source_excludes (list, optional): Source fields to exclude from response
183
- _source_includes (list, optional): Source fields to include in response
184
- if_seq_no (int, optional): Only perform if sequence number matches
185
- if_primary_term (int, optional): Only perform if primary term matches
186
- lang (str, optional): Script language
187
- refresh (str/bool, optional): Refresh policy
188
- retry_on_conflict (int, optional): Number of times to retry on conflict
189
- routing (str, optional): Routing value
190
- timeout (str, optional): Operation timeout
191
- version (int, optional): Expected document version
192
- version_type (str, optional): Version type
193
- wait_for_active_shards (str/int, optional): Wait for active shards
194
- request_timeout (float, optional): Request timeout in seconds
195
196
Returns:
197
dict: Update response containing result details
198
199
Raises:
200
NotFoundError: If document is not found
201
ConflictError: If version conflict occurs
202
"""
203
204
def update_by_query(self, index=None, body=None, **kwargs):
205
"""
206
Update documents matching a query.
207
208
Parameters:
209
- index (str/list, optional): Index name(s)
210
- body (dict, optional): Query and update specification
211
- doc_type (str/list, optional): Document type(s) (deprecated)
212
- analyzer (str, optional): Analyzer for query string
213
- analyze_wildcard (bool, optional): Whether to analyze wildcard terms
214
- conflicts (str, optional): What to do on conflicts ('abort' or 'proceed')
215
- default_operator (str, optional): Default operator for query string
216
- df (str, optional): Default field for query string
217
- expand_wildcards (str, optional): Wildcard expansion type
218
- from_ (int, optional): Starting document offset
219
- ignore_unavailable (bool, optional): Ignore unavailable indices
220
- allow_no_indices (bool, optional): Allow operation on no indices
221
- pipeline (str, optional): Ingest pipeline
222
- preference (str, optional): Preference for which shard to query
223
- q (str, optional): Query string
224
- refresh (bool, optional): Refresh after operation
225
- request_cache (bool, optional): Use request cache
226
- requests_per_second (float, optional): Throttling rate
227
- routing (str/list, optional): Routing value(s)
228
- scroll (str, optional): Scroll timeout
229
- scroll_size (int, optional): Scroll batch size
230
- search_type (str, optional): Search type
231
- search_timeout (str, optional): Search timeout
232
- size (int, optional): Number of documents to update
233
- sort (list, optional): Sort order
234
- terminate_after (int, optional): Maximum number of documents to process
235
- timeout (str, optional): Operation timeout
236
- version (bool, optional): Return document version
237
- version_type (bool, optional): Return version type
238
- wait_for_active_shards (str/int, optional): Wait for active shards
239
- wait_for_completion (bool, optional): Wait for operation completion
240
- request_timeout (float, optional): Request timeout in seconds
241
242
Returns:
243
dict: Update results including updated count and conflicts
244
"""
245
```
246
247
### Document Deletion
248
249
Delete documents individually or by query.
250
251
```python { .api }
252
def delete(self, index, id, **kwargs):
253
"""
254
Delete a document by ID.
255
256
Parameters:
257
- index (str): Index name
258
- id (str): Document ID
259
- doc_type (str, optional): Document type (deprecated)
260
- if_seq_no (int, optional): Only perform if sequence number matches
261
- if_primary_term (int, optional): Only perform if primary term matches
262
- refresh (str/bool, optional): Refresh policy
263
- routing (str, optional): Routing value
264
- timeout (str, optional): Operation timeout
265
- version (int, optional): Expected document version
266
- version_type (str, optional): Version type
267
- wait_for_active_shards (str/int, optional): Wait for active shards
268
- request_timeout (float, optional): Request timeout in seconds
269
270
Returns:
271
dict: Deletion response containing result details
272
273
Raises:
274
NotFoundError: If document is not found
275
"""
276
277
def delete_by_query(self, index, body=None, **kwargs):
278
"""
279
Delete documents matching a query.
280
281
Parameters:
282
- index (str/list): Index name(s)
283
- body (dict, optional): Query specification
284
- doc_type (str/list, optional): Document type(s) (deprecated)
285
- analyzer (str, optional): Analyzer for query string
286
- analyze_wildcard (bool, optional): Whether to analyze wildcard terms
287
- conflicts (str, optional): What to do on conflicts ('abort' or 'proceed')
288
- default_operator (str, optional): Default operator for query string
289
- df (str, optional): Default field for query string
290
- expand_wildcards (str, optional): Wildcard expansion type
291
- from_ (int, optional): Starting document offset
292
- ignore_unavailable (bool, optional): Ignore unavailable indices
293
- allow_no_indices (bool, optional): Allow operation on no indices
294
- preference (str, optional): Preference for which shard to query
295
- q (str, optional): Query string
296
- refresh (bool, optional): Refresh after operation
297
- request_cache (bool, optional): Use request cache
298
- requests_per_second (float, optional): Throttling rate
299
- routing (str/list, optional): Routing value(s)
300
- scroll (str, optional): Scroll timeout
301
- scroll_size (int, optional): Scroll batch size
302
- search_type (str, optional): Search type
303
- search_timeout (str, optional): Search timeout
304
- size (int, optional): Number of documents to delete
305
- sort (list, optional): Sort order
306
- terminate_after (int, optional): Maximum number of documents to process
307
- timeout (str, optional): Operation timeout
308
- version (bool, optional): Return document version
309
- version_type (bool, optional): Return version type
310
- wait_for_active_shards (str/int, optional): Wait for active shards
311
- wait_for_completion (bool, optional): Wait for operation completion
312
- request_timeout (float, optional): Request timeout in seconds
313
314
Returns:
315
dict: Deletion results including deleted count and conflicts
316
"""
317
```
318
319
### Search Operations
320
321
Perform search queries across documents and indices.
322
323
```python { .api }
324
def search(self, index=None, body=None, **kwargs):
325
"""
326
Execute a search query.
327
328
Parameters:
329
- index (str/list, optional): Index name(s) to search
330
- body (dict, optional): Search query body
331
- doc_type (str/list, optional): Document type(s) (deprecated)
332
- _source (bool/list/str, optional): Source filtering
333
- _source_excludes (list, optional): Source fields to exclude
334
- _source_includes (list, optional): Source fields to include
335
- allow_no_indices (bool, optional): Allow no indices to match
336
- allow_partial_search_results (bool, optional): Allow partial results
337
- analyzer (str, optional): Analyzer for query string
338
- analyze_wildcard (bool, optional): Analyze wildcard terms
339
- batched_reduce_size (int, optional): Batched reduce size
340
- ccs_minimize_roundtrips (bool, optional): Cross-cluster search optimization
341
- default_operator (str, optional): Default operator ('AND' or 'OR')
342
- df (str, optional): Default field for query string
343
- docvalue_fields (list, optional): Doc value fields to return
344
- expand_wildcards (str, optional): Wildcard expansion ('open', 'closed', 'hidden', 'none', 'all')
345
- explain (bool, optional): Return explanation for each hit
346
- from_ (int, optional): Starting document offset (default: 0)
347
- ignore_throttled (bool, optional): Ignore throttled indices
348
- ignore_unavailable (bool, optional): Ignore unavailable indices
349
- lenient (bool, optional): Ignore format-based query failures
350
- max_concurrent_shard_requests (int, optional): Max concurrent shard requests
351
- min_compatible_shard_node (str, optional): Minimum compatible shard node version
352
- preference (str, optional): Preference for which shard to query
353
- pre_filter_shard_size (int, optional): Pre-filter shard size threshold
354
- q (str, optional): Query string
355
- request_cache (bool, optional): Use request cache
356
- rest_total_hits_as_int (bool, optional): Return total hits as integer
357
- routing (str/list, optional): Routing value(s)
358
- scroll (str, optional): Scroll timeout for cursor-based pagination
359
- search_type (str, optional): Search type ('query_then_fetch', 'dfs_query_then_fetch')
360
- seq_no_primary_term (bool, optional): Return sequence number and primary term
361
- size (int, optional): Number of hits to return (default: 10)
362
- sort (list, optional): Sort order
363
- stats (list, optional): Statistics groups
364
- stored_fields (list, optional): Stored fields to return
365
- suggest_field (str, optional): Field to suggest on
366
- suggest_mode (str, optional): Suggest mode
367
- suggest_size (int, optional): Number of suggestions
368
- suggest_text (str, optional): Text to suggest on
369
- terminate_after (int, optional): Maximum number of documents to collect
370
- timeout (str, optional): Search timeout
371
- track_scores (bool, optional): Track scores for each hit
372
- track_total_hits (bool/int, optional): Track total hits
373
- typed_keys (bool, optional): Prefix aggregation names with type
374
- version (bool, optional): Return document version
375
- request_timeout (float, optional): Request timeout in seconds
376
377
Returns:
378
dict: Search results containing 'hits', 'aggregations', 'took', etc.
379
380
Raises:
381
RequestError: If search query is malformed
382
"""
383
384
def count(self, index=None, body=None, **kwargs):
385
"""
386
Count documents matching a query.
387
388
Parameters:
389
- index (str/list, optional): Index name(s)
390
- body (dict, optional): Query body
391
- doc_type (str/list, optional): Document type(s) (deprecated)
392
- allow_no_indices (bool, optional): Allow no indices to match
393
- analyzer (str, optional): Analyzer for query string
394
- analyze_wildcard (bool, optional): Analyze wildcard terms
395
- default_operator (str, optional): Default operator
396
- df (str, optional): Default field
397
- expand_wildcards (str, optional): Wildcard expansion
398
- ignore_throttled (bool, optional): Ignore throttled indices
399
- ignore_unavailable (bool, optional): Ignore unavailable indices
400
- lenient (bool, optional): Ignore format-based failures
401
- min_score (float, optional): Minimum score threshold
402
- preference (str, optional): Preference for which shard to query
403
- q (str, optional): Query string
404
- routing (str/list, optional): Routing value(s)
405
- terminate_after (int, optional): Maximum documents to count
406
- request_timeout (float, optional): Request timeout in seconds
407
408
Returns:
409
dict: Count result containing 'count' field
410
"""
411
412
def scroll(self, scroll_id, scroll='5m', **kwargs):
413
"""
414
Continue scrolling through search results.
415
416
Parameters:
417
- scroll_id (str): Scroll ID from previous search/scroll request
418
- scroll (str, optional): Scroll timeout (default: '5m')
419
- rest_total_hits_as_int (bool, optional): Return total hits as integer
420
- request_timeout (float, optional): Request timeout in seconds
421
422
Returns:
423
dict: Next batch of search results
424
425
Raises:
426
NotFoundError: If scroll ID is invalid or expired
427
"""
428
429
def clear_scroll(self, scroll_id=None, body=None, **kwargs):
430
"""
431
Clear scroll context to free resources.
432
433
Parameters:
434
- scroll_id (str/list, optional): Scroll ID(s) to clear
435
- body (dict, optional): Body containing scroll_id list
436
- request_timeout (float, optional): Request timeout in seconds
437
438
Returns:
439
dict: Clear scroll response
440
"""
441
```
442
443
### Bulk Operations
444
445
Perform multiple operations in a single request for improved performance.
446
447
```python { .api }
448
def bulk(self, body, index=None, **kwargs):
449
"""
450
Perform multiple index, create, update, or delete operations.
451
452
Parameters:
453
- body (list/str): List of operations or newline-delimited JSON string
454
- index (str, optional): Default index for operations without index
455
- doc_type (str, optional): Default document type (deprecated)
456
- pipeline (str, optional): Default ingest pipeline
457
- refresh (str/bool, optional): Refresh policy
458
- routing (str, optional): Default routing value
459
- timeout (str, optional): Operation timeout
460
- wait_for_active_shards (str/int, optional): Wait for active shards
461
- require_alias (bool, optional): Require target to be an alias
462
- request_timeout (float, optional): Request timeout in seconds
463
464
Returns:
465
dict: Bulk response containing 'items' list with operation results
466
467
Each operation format:
468
{"index": {"_index": "test", "_id": "1"}}
469
{"field1": "value1"}
470
{"create": {"_index": "test", "_id": "2"}}
471
{"field1": "value2"}
472
{"update": {"_index": "test", "_id": "1"}}
473
{"doc": {"field1": "updated_value"}}
474
{"delete": {"_index": "test", "_id": "2"}}
475
"""
476
```
477
478
### Multi-Get Operations
479
480
Retrieve multiple documents in a single request.
481
482
```python { .api }
483
def mget(self, body, index=None, **kwargs):
484
"""
485
Retrieve multiple documents by ID.
486
487
Parameters:
488
- body (dict): Multi-get request body with 'docs' or 'ids' field
489
- index (str, optional): Default index name
490
- doc_type (str, optional): Default document type (deprecated)
491
- _source (bool/list/str, optional): Source filtering
492
- _source_excludes (list, optional): Source fields to exclude
493
- _source_includes (list, optional): Source fields to include
494
- preference (str, optional): Preference for which shard to query
495
- realtime (bool, optional): Whether to perform realtime get
496
- refresh (bool, optional): Refresh before get
497
- routing (str, optional): Default routing value
498
- stored_fields (list, optional): Stored fields to retrieve
499
- request_timeout (float, optional): Request timeout in seconds
500
501
Body format:
502
{
503
"docs": [
504
{"_index": "test", "_id": "1"},
505
{"_index": "test", "_id": "2", "_source": ["field1"]}
506
]
507
}
508
509
Or with default index:
510
{
511
"ids": ["1", "2", "3"]
512
}
513
514
Returns:
515
dict: Multi-get response with 'docs' array containing individual responses
516
"""
517
518
def msearch(self, body, index=None, **kwargs):
519
"""
520
Execute multiple search queries.
521
522
Parameters:
523
- body (list/str): List of search requests or newline-delimited JSON
524
- index (str/list, optional): Default index name(s)
525
- doc_type (str/list, optional): Default document type(s) (deprecated)
526
- ccs_minimize_roundtrips (bool, optional): Cross-cluster search optimization
527
- max_concurrent_searches (int, optional): Maximum concurrent searches
528
- rest_total_hits_as_int (bool, optional): Return total hits as integer
529
- typed_keys (bool, optional): Prefix aggregation names with type
530
- request_timeout (float, optional): Request timeout in seconds
531
532
Body format (alternating headers and bodies):
533
[
534
{"index": "test1"},
535
{"query": {"match_all": {}}},
536
{"index": "test2", "type": "doc"},
537
{"query": {"match": {"field": "value"}}}
538
]
539
540
Returns:
541
dict: Multi-search response with 'responses' array containing individual results
542
"""
543
```
544
545
### Template Operations
546
547
Manage and execute search templates for reusable queries.
548
549
```python { .api }
550
def put_script(self, id, body, context=None, **kwargs):
551
"""
552
Store a script for later execution.
553
554
Parameters:
555
- id (str): Script ID
556
- body (dict): Script definition
557
- context (str, optional): Script context ('search', 'filter', 'update', etc.)
558
- master_timeout (str, optional): Master node timeout
559
- timeout (str, optional): Operation timeout
560
- request_timeout (float, optional): Request timeout in seconds
561
562
Body format:
563
{
564
"script": {
565
"lang": "painless",
566
"source": "Math.log(_score * 2) + params.factor",
567
"params": {"factor": 1.2}
568
}
569
}
570
571
Returns:
572
dict: Script creation response
573
"""
574
575
def get_script(self, id, **kwargs):
576
"""
577
Retrieve a stored script.
578
579
Parameters:
580
- id (str): Script ID
581
- master_timeout (str, optional): Master node timeout
582
- request_timeout (float, optional): Request timeout in seconds
583
584
Returns:
585
dict: Script definition
586
"""
587
588
def delete_script(self, id, **kwargs):
589
"""
590
Delete a stored script.
591
592
Parameters:
593
- id (str): Script ID
594
- master_timeout (str, optional): Master node timeout
595
- timeout (str, optional): Operation timeout
596
- request_timeout (float, optional): Request timeout in seconds
597
598
Returns:
599
dict: Deletion response
600
"""
601
602
def render_template(self, body=None, id=None, **kwargs):
603
"""
604
Render a search template with parameters.
605
606
Parameters:
607
- body (dict, optional): Template and parameters
608
- id (str, optional): Stored template ID
609
- request_timeout (float, optional): Request timeout in seconds
610
611
Returns:
612
dict: Rendered template output
613
"""
614
```
615
616
### Advanced Search Features
617
618
Additional search capabilities for specialized use cases.
619
620
```python { .api }
621
def explain(self, index, id, body=None, **kwargs):
622
"""
623
Explain why a document matches or doesn't match a query.
624
625
Parameters:
626
- index (str): Index name
627
- id (str): Document ID
628
- body (dict, optional): Query to explain
629
- doc_type (str, optional): Document type (deprecated)
630
- _source (bool/list/str, optional): Source filtering
631
- _source_excludes (list, optional): Source fields to exclude
632
- _source_includes (list, optional): Source fields to include
633
- analyzer (str, optional): Analyzer for query string
634
- analyze_wildcard (bool, optional): Analyze wildcard terms
635
- default_operator (str, optional): Default operator
636
- df (str, optional): Default field
637
- lenient (bool, optional): Ignore format-based failures
638
- parent (str, optional): Parent document ID
639
- preference (str, optional): Preference for which shard to query
640
- q (str, optional): Query string
641
- routing (str, optional): Routing value
642
- stored_fields (list, optional): Stored fields to return
643
- request_timeout (float, optional): Request timeout in seconds
644
645
Returns:
646
dict: Explanation of query scoring
647
"""
648
649
def field_caps(self, index=None, **kwargs):
650
"""
651
Get field capabilities across indices.
652
653
Parameters:
654
- index (str/list, optional): Index name(s)
655
- fields (str/list, optional): Field name(s) to get capabilities for
656
- allow_no_indices (bool, optional): Allow no indices to match
657
- expand_wildcards (str, optional): Wildcard expansion
658
- ignore_unavailable (bool, optional): Ignore unavailable indices
659
- include_unmapped (bool, optional): Include unmapped fields
660
- request_timeout (float, optional): Request timeout in seconds
661
662
Returns:
663
dict: Field capabilities information
664
"""
665
666
def rank_eval(self, body, index=None, **kwargs):
667
"""
668
Evaluate search query ranking quality.
669
670
Parameters:
671
- body (dict): Ranking evaluation specification
672
- index (str/list, optional): Index name(s)
673
- allow_no_indices (bool, optional): Allow no indices to match
674
- expand_wildcards (str, optional): Wildcard expansion
675
- ignore_unavailable (bool, optional): Ignore unavailable indices
676
- request_timeout (float, optional): Request timeout in seconds
677
678
Returns:
679
dict: Ranking evaluation results
680
"""
681
```
682
683
## Usage Examples
684
685
### Basic Document Operations
686
687
```python
688
from opensearchpy import OpenSearch
689
690
client = OpenSearch([{'host': 'localhost', 'port': 9200}])
691
692
# Index a document
693
doc = {'title': 'Test Document', 'content': 'This is a test'}
694
response = client.index(index='test-index', id='1', body=doc)
695
print(f"Indexed document: {response['result']}")
696
697
# Get the document
698
doc = client.get(index='test-index', id='1')
699
print(f"Retrieved: {doc['_source']}")
700
701
# Update the document
702
update_body = {'doc': {'content': 'Updated content'}}
703
client.update(index='test-index', id='1', body=update_body)
704
705
# Search for documents
706
search_body = {
707
'query': {
708
'match': {'title': 'Test'}
709
}
710
}
711
results = client.search(index='test-index', body=search_body)
712
print(f"Found {results['hits']['total']['value']} documents")
713
714
# Delete the document
715
client.delete(index='test-index', id='1')
716
```
717
718
### Bulk Operations
719
720
```python
721
# Bulk indexing
722
actions = [
723
{'index': {'_index': 'test-index', '_id': '1'}},
724
{'title': 'Document 1', 'content': 'Content 1'},
725
{'index': {'_index': 'test-index', '_id': '2'}},
726
{'title': 'Document 2', 'content': 'Content 2'},
727
{'update': {'_index': 'test-index', '_id': '1'}},
728
{'doc': {'status': 'updated'}},
729
{'delete': {'_index': 'test-index', '_id': '2'}}
730
]
731
732
response = client.bulk(body=actions)
733
for item in response['items']:
734
for operation, result in item.items():
735
print(f"{operation}: {result['result']}")
736
```
737
738
### Scrolling Through Large Result Sets
739
740
```python
741
# Start scroll search
742
search_body = {
743
'query': {'match_all': {}},
744
'sort': ['_doc']
745
}
746
747
response = client.search(
748
index='large-index',
749
body=search_body,
750
scroll='5m',
751
size=1000
752
)
753
754
scroll_id = response['_scroll_id']
755
hits = response['hits']['hits']
756
757
# Process first batch
758
for hit in hits:
759
print(hit['_source'])
760
761
# Continue scrolling
762
while len(hits) > 0:
763
response = client.scroll(scroll_id=scroll_id, scroll='5m')
764
hits = response['hits']['hits']
765
766
for hit in hits:
767
print(hit['_source'])
768
769
# Clear scroll context
770
client.clear_scroll(scroll_id=scroll_id)
771
```