Tessl Tile for pypi/globus-sdk@3.62.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

auth-service.md compute-service.md core-framework.md flows-service.md gcs-service.md groups-service.md index.md search-service.md timers-service.md transfer-service.md

search-service.mddocs/

0
# Search Service
1

2
Metadata indexing and search capabilities for research data discovery with support for custom schemas, faceted search, and real-time indexing. The Search service enables powerful data discovery across distributed research collections with rich metadata search and filtering capabilities.
3

4
## Capabilities
5

6
### Search Client
7

8
Core client for search index management, data indexing, and query operations with comprehensive search functionality and metadata management.
9

10
```python { .api }
11
class SearchClient(BaseClient):
12
    """
13
    Client for Globus Search service operations.
14
    
15
    Provides methods for index management, data ingestion, and search queries
16
    with support for both simple and advanced search capabilities including
17
    filters, facets, and complex query structures.
18
    """
19
    
20
    def __init__(
21
        self,
22
        *,
23
        app: GlobusApp | None = None,
24
        authorizer: GlobusAuthorizer | None = None,
25
        environment: str | None = None,
26
        base_url: str | None = None,
27
        **kwargs
28
    ) -> None: ...
29
```
30

31
### Index Management
32

33
Create, configure, and manage search indices for organizing and discovering research data with custom schemas and policies.
34

35
```python { .api }
36
def create_index(
37
    self, 
38
    display_name: str, 
39
    description: str
40
) -> GlobusHTTPResponse:
41
    """
42
    Create a new search index.
43
    
44
    Creates a new index for storing and searching metadata documents.
45
    New indices default to trial status and may have usage limitations.
46
    
47
    Parameters:
48
    - display_name: Human-readable name for the index
49
    - description: Detailed description of the index purpose and content
50
    
51
    Returns:
52
    GlobusHTTPResponse with created index details including ID
53
    """
54

55
def get_index(
56
    self,
57
    index_id: str | UUID,
58
    *,
59
    query_params: dict[str, Any] | None = None
60
) -> GlobusHTTPResponse:
61
    """
62
    Get index configuration and metadata.
63
    
64
    Returns complete index information including schema, statistics,
65
    access policies, and configuration settings.
66
    
67
    Parameters:
68
    - index_id: UUID of the index to retrieve
69
    - query_params: Additional query parameters
70
    
71
    Returns:
72
    GlobusHTTPResponse with index configuration and statistics
73
    """
74

75
def get_index_list(
76
    self, 
77
    *, 
78
    query_params: dict[str, Any] | None = None
79
) -> IndexListResponse:
80
    """
81
    List accessible search indices.
82
    
83
    Returns all indices the user can access including owned indices
84
    and indices shared with appropriate permissions.
85
    
86
    Parameters:
87
    - query_params: Additional query parameters for filtering
88
    
89
    Returns:
90
    IndexListResponse with paginated index listings
91
    """
92

93
def delete_index(self, index_id: str | UUID) -> GlobusHTTPResponse:
94
    """
95
    Mark an index for deletion.
96
    
97
    Sets index status to "delete-pending". Actual deletion happens
98
    asynchronously and may take time to complete fully.
99
    
100
    Parameters:
101
    - index_id: UUID of index to delete
102
    
103
    Returns:
104
    GlobusHTTPResponse confirming deletion request
105
    """
106
```
107

108
### Data Ingestion and Management
109

110
Add, update, and remove data from search indices with support for batch operations and real-time indexing.
111

112
```python { .api }
113
def ingest(
114
    self, 
115
    index_id: str | UUID, 
116
    data: dict[str, Any]
117
) -> GlobusHTTPResponse:
118
    """
119
    Ingest data into a search index.
120
    
121
    Adds or updates documents in the index as an asynchronous task.
122
    Data can be provided as a single document or list of documents
123
    with flexible schema support for metadata organization.
124
    
125
    Parameters:
126
    - index_id: UUID of the target index
127
    - data: Document(s) to ingest, can be single dict or list of dicts
128
    
129
    Returns:
130
    GlobusHTTPResponse with ingestion task information
131
    """
132

133
def delete_by_query(
134
    self, 
135
    index_id: str | UUID, 
136
    data: dict[str, Any]
137
) -> GlobusHTTPResponse:
138
    """
139
    Delete documents matching a query.
140
    
141
    Removes all documents that match the specified query criteria
142
    as an asynchronous task, enabling bulk deletion operations.
143
    
144
    Parameters:
145
    - index_id: UUID of the index
146
    - data: Query specification for documents to delete
147
    
148
    Returns:
149
    GlobusHTTPResponse with deletion task information
150
    """
151

152
def get_task(
153
    self, 
154
    task_id: str | UUID, 
155
    *, 
156
    query_params: dict[str, Any] | None = None
157
) -> GlobusHTTPResponse:
158
    """
159
    Get status of an indexing task.
160
    
161
    Returns current status and results of ingestion or deletion tasks,
162
    useful for monitoring asynchronous operations.
163
    
164
    Parameters:
165
    - task_id: UUID of the task to check
166
    - query_params: Additional query parameters
167
    
168
    Returns:
169
    GlobusHTTPResponse with task status and results
170
    """
171

172
def get_task_list(
173
    self, 
174
    *, 
175
    query_params: dict[str, Any] | None = None
176
) -> GlobusHTTPResponse:
177
    """
178
    List indexing tasks with optional filtering.
179
    
180
    Returns tasks for monitoring ingestion and deletion operations
181
    across all accessible indices.
182
    
183
    Parameters:
184
    - query_params: Query parameters for filtering tasks
185
    
186
    Returns:
187
    GlobusHTTPResponse with task listings
188
    """
189
```
190

191
### Search and Query Operations
192

193
Perform powerful searches with support for simple queries, advanced syntax, filters, facets, and result pagination.
194

195
```python { .api }
196
def search(
197
    self,
198
    index_id: str | UUID,
199
    q: str,
200
    *,
201
    offset: int = 0,
202
    limit: int = 10,
203
    advanced: bool = True,
204
    query_params: dict[str, Any] | None = None
205
) -> GlobusHTTPResponse:
206
    """
207
    Perform a simple search query.
208
    
209
    Executes a text search against the index with basic parameters
210
    for straightforward search operations.
211
    
212
    Parameters:
213
    - index_id: UUID of the index to search
214
    - q: Query string for search
215
    - offset: Starting position for results (pagination)
216
    - limit: Maximum number of results to return
217
    - advanced: Enable advanced query syntax (default: True)
218
    - query_params: Additional query parameters
219
    
220
    Returns:
221
    GlobusHTTPResponse with search results and metadata
222
    """
223

224
def post_search(
225
    self,
226
    index_id: str | UUID,
227
    data: dict[str, Any] | SearchQuery,
228
    *,
229
    offset: int | None = None,
230
    limit: int | None = None,
231
    query_params: dict[str, Any] | None = None
232
) -> GlobusHTTPResponse:
233
    """
234
    Perform an advanced search with complex query structure.
235
    
236
    Supports sophisticated queries including filters, facets,
237
    sorting, and other advanced search features using POST body.
238
    
239
    Parameters:
240
    - index_id: UUID of the index to search
241
    - data: Complex search query specification or SearchQuery object
242
    - offset: Starting position for results
243
    - limit: Maximum results to return
244
    - query_params: Additional parameters
245
    
246
    Returns:
247
    GlobusHTTPResponse with comprehensive search results
248
    """
249

250
def scroll_search(
251
    self,
252
    index_id: str | UUID,
253
    data: SearchScrollQuery | dict[str, Any],
254
    *,
255
    query_params: dict[str, Any] | None = None
256
) -> GlobusHTTPResponse:
257
    """
258
    Perform a scroll search for large result sets.
259
    
260
    Uses scroll API for efficiently retrieving large numbers of
261
    search results without traditional pagination limitations.
262
    
263
    Parameters:
264
    - index_id: UUID of the index to search  
265
    - data: Scroll query specification
266
    - query_params: Additional parameters
267
    
268
    Returns:
269
    GlobusHTTPResponse with scroll results and continuation token
270
    """
271
```
272

273
### Query Builder Classes
274

275
Type-safe query construction with chainable API for building complex search queries with filters, facets, and advanced options.
276

277
```python { .api }
278
class SearchQuery(PayloadWrapper):
279
    """
280
    Modern search query builder for constructing complex search requests.
281
    
282
    Provides a fluent API for building queries with filters, facets,
283
    sorting, and other advanced search features with type safety.
284
    """
285
    
286
    def __init__(
287
        self,
288
        q: str | None = None,
289
        *,
290
        offset: int | None = None,
291
        limit: int | None = None,
292
        advanced: bool | None = None,
293
        **kwargs
294
    ) -> None: ...
295
    
296
    def set_query(self, query: str) -> SearchQuery:
297
        """
298
        Set the main query string.
299
        
300
        Parameters:
301
        - query: Text query to search for
302
        
303
        Returns:
304
        Self for method chaining
305
        """
306
    
307
    def set_limit(self, limit: int) -> SearchQuery:
308
        """
309
        Set maximum number of results to return.
310
        
311
        Parameters:
312
        - limit: Maximum results per page
313
        
314
        Returns:
315
        Self for method chaining
316
        """
317
    
318
    def set_offset(self, offset: int) -> SearchQuery:
319
        """
320
        Set starting position for results (pagination).
321
        
322
        Parameters:
323
        - offset: Starting result position
324
        
325
        Returns:
326
        Self for method chaining
327
        """
328
    
329
    def set_advanced(self, advanced: bool) -> SearchQuery:
330
        """
331
        Enable or disable advanced query syntax.
332
        
333
        Parameters:
334
        - advanced: Whether to use advanced query parsing
335
        
336
        Returns:
337
        Self for method chaining
338
        """
339
    
340
    def add_filter(
341
        self,
342
        field_name: str,
343
        values: list[str],
344
        *,
345
        type: str = "match_all",
346
        additional_fields: dict[str, Any] | None = None
347
    ) -> SearchQuery:
348
        """
349
        Add a filter to constrain search results.
350
        
351
        Parameters:
352
        - field_name: Field to filter on
353
        - values: Values to match in the filter
354
        - type: Filter type (match_all, match_any, range, etc.)
355
        - additional_fields: Additional filter configuration
356
        
357
        Returns:
358
        Self for method chaining
359
        """
360
    
361
    def add_facet(
362
        self,
363
        name: str,
364
        field_name: str,
365
        *,
366
        type: str = "terms",
367
        size: int | None = None,
368
        additional_fields: dict[str, Any] | None = None
369
    ) -> SearchQuery:
370
        """
371
        Add a facet for result aggregation.
372
        
373
        Parameters:
374
        - name: Name for the facet in results
375
        - field_name: Field to facet on
376
        - type: Facet type (terms, date_histogram, etc.)
377
        - size: Maximum facet values to return
378
        - additional_fields: Additional facet configuration
379
        
380
        Returns:
381
        Self for method chaining
382
        """
383
    
384
    def add_sort(
385
        self,
386
        field_name: str,
387
        order: str = "asc"
388
    ) -> SearchQuery:
389
        """
390
        Add sort criteria to results.
391
        
392
        Parameters:
393
        - field_name: Field to sort by
394
        - order: Sort order (asc, desc)
395
        
396
        Returns:
397
        Self for method chaining
398
        """
399
    
400
    def set_field_list(self, fields: list[str]) -> SearchQuery:
401
        """
402
        Specify which fields to return in results.
403
        
404
        Parameters:
405
        - fields: List of field names to include
406
        
407
        Returns:
408
        Self for method chaining
409
        """
410

411
class SearchQueryV1(SearchQuery):
412
    """
413
    Legacy search query builder for API v1 compatibility.
414
    
415
    Maintains compatibility with older search API versions
416
    while providing similar functionality to modern SearchQuery.
417
    """
418

419
class SearchScrollQuery(PayloadWrapper):
420
    """
421
    Query builder for scroll-based search operations.
422
    
423
    Designed for efficiently retrieving large result sets using
424
    the scroll API pattern for deep pagination.
425
    """
426
    
427
    def __init__(
428
        self,
429
        q: str | None = None,
430
        *,
431
        limit: int | None = None,
432
        advanced: bool | None = None,
433
        scroll: str | None = None,
434
        scroll_id: str | None = None,
435
        **kwargs
436
    ) -> None: ...
437
    
438
    def set_scroll_size(self, size: int) -> SearchScrollQuery:
439
        """Set the scroll window size for batch retrieval."""
440
    
441
    def set_scroll_id(self, scroll_id: str) -> SearchScrollQuery:
442
        """Set scroll ID for continuing a scroll operation."""
443
```
444

445
### Response Objects
446

447
Specialized response classes providing enhanced access to search results and index listings with iteration support.
448

449
```python { .api }
450
class IndexListResponse(GlobusHTTPResponse):
451
    """
452
    Response class for index listing operations.
453
    
454
    Provides enhanced access to index listings with metadata
455
    and convenient iteration over available indices.
456
    """
457
    
458
    def __iter__(self) -> Iterator[dict[str, Any]]:
459
        """Iterate over index records."""
460
```
461

462
### Error Handling
463

464
Search-specific error handling for indexing operations and query processing.
465

466
```python { .api }
467
class SearchAPIError(GlobusAPIError):
468
    """
469
    Error class for Search service API errors.
470
    
471
    Provides enhanced error handling for search-specific error
472
    conditions including indexing failures and query syntax errors.
473
    """
474
```
475

476
## Common Usage Patterns
477

478
### Basic Index Setup and Data Ingestion
479

480
```python
481
from globus_sdk import SearchClient
482

483
# Initialize search client
484
search_client = SearchClient(authorizer=authorizer)
485

486
# Create a new index for research data
487
index_response = search_client.create_index(
488
    display_name="Climate Research Data",
489
    description="Searchable metadata for climate research datasets"
490
)
491
index_id = index_response["id"]
492

493
# Ingest sample data documents
494
documents = [
495
    {
496
        "subject": "Temperature Measurements - Station A",
497
        "description": "Daily temperature recordings from weather station A",
498
        "creator": "Climate Research Lab",
499
        "date_created": "2024-01-15",
500
        "keywords": ["temperature", "climate", "weather"],
501
        "data_type": "time-series",
502
        "location": {"lat": 40.7128, "lon": -74.0060},
503
        "file_format": "CSV",
504
        "size_mb": 15.2
505
    },
506
    {
507
        "subject": "Precipitation Data - Regional Survey", 
508
        "description": "Monthly precipitation measurements across the region",
509
        "creator": "Weather Monitoring Network",
510
        "date_created": "2024-02-01",
511
        "keywords": ["precipitation", "rainfall", "climate", "regional"],
512
        "data_type": "geospatial",
513
        "location": {"lat": 41.8781, "lon": -87.6298},
514
        "file_format": "NetCDF",
515
        "size_mb": 45.7
516
    }
517
]
518

519
# Ingest documents into the index
520
ingest_response = search_client.ingest(index_id, {"ingest_type": "GMetaList", "ingest_data": {"gmeta": documents}})
521
task_id = ingest_response["task_id"]
522

523
print(f"Ingestion task started: {task_id}")
524
```
525

526
### Advanced Search with Filters and Facets
527

528
```python
529
from globus_sdk import SearchQuery
530

531
# Build a complex search query
532
query = (SearchQuery("climate temperature")
533
    .set_limit(20)
534
    .set_advanced(True)
535
    .add_filter("data_type", ["time-series", "geospatial"], type="match_any")
536
    .add_filter("size_mb", ["0", "50"], type="range")
537
    .add_facet("creator", "creator", size=10)
538
    .add_facet("keywords", "keywords", size=20) 
539
    .add_facet("location_facet", "location.country", size=15)
540
    .add_sort("date_created", "desc")
541
    .set_field_list(["subject", "description", "creator", "date_created", "keywords"])
542
)
543

544
# Execute the search
545
results = search_client.post_search(index_id, query)
546

547
print(f"Found {results['total']} results")
548

549
# Process search results
550
for hit in results["gmeta"]:
551
    content = hit["content"][0]
552
    print(f"Title: {content['subject']}")
553
    print(f"Creator: {content['creator']}")
554
    print(f"Keywords: {', '.join(content.get('keywords', []))}")
555
    print("---")
556

557
# Process facets for building user interface
558
facets = results.get("facet_results", [])
559
for facet in facets:
560
    print(f"\nFacet: {facet['name']}")
561
    for bucket in facet["buckets"]:
562
        print(f"  {bucket['value']}: {bucket['count']}")
563
```
564

565
### Simple Text Search
566

567
```python
568
# Simple keyword search
569
simple_results = search_client.search(
570
    index_id,
571
    q="temperature climate data",
572
    limit=10,
573
    offset=0,
574
    advanced=True
575
)
576

577
print(f"Simple search found {simple_results['total']} results")
578
for hit in simple_results["gmeta"]:
579
    content = hit["content"][0]
580
    print(f"- {content['subject']}")
581
```
582

583
### Scroll Search for Large Result Sets
584

585
```python
586
from globus_sdk import SearchScrollQuery
587

588
# Create scroll query for large datasets
589
scroll_query = SearchScrollQuery("*")  # Match all documents
590
scroll_query.set_scroll_size(100)  # Retrieve 100 at a time
591

592
# Start scrolling
593
scroll_response = search_client.scroll_search(index_id, scroll_query)
594

595
all_results = []
596
scroll_id = scroll_response.get("scroll_id")
597

598
# Continue scrolling until no more results
599
while scroll_response.get("gmeta"):
600
    all_results.extend(scroll_response["gmeta"])
601
    print(f"Retrieved {len(scroll_response['gmeta'])} more results")
602
    
603
    if not scroll_id:
604
        break
605
    
606
    # Continue with next batch
607
    continue_query = SearchScrollQuery().set_scroll_id(scroll_id)
608
    scroll_response = search_client.scroll_search(index_id, continue_query)
609
    scroll_id = scroll_response.get("scroll_id")
610

611
print(f"Total results retrieved: {len(all_results)}")
612
```
613

614
### Data Management and Updates
615

616
```python
617
# Update existing documents (replace by ID)
618
updated_doc = {
619
    "subject": "Temperature Measurements - Station A (Updated)",
620
    "description": "Updated daily temperature recordings with quality control",
621
    "creator": "Climate Research Lab", 
622
    "date_created": "2024-01-15",
623
    "date_modified": "2024-03-01",
624
    "keywords": ["temperature", "climate", "weather", "quality-controlled"],
625
    "data_type": "time-series",
626
    "version": "2.0"
627
}
628

629
# Ingest updated document (same ID will replace)
630
update_response = search_client.ingest(
631
    index_id, 
632
    {"ingest_type": "GMetaList", "ingest_data": {"gmeta": [updated_doc]}}
633
)
634

635
# Delete documents matching criteria
636
delete_query = {
637
    "q": "*",
638
    "filters": [
639
        {
640
            "field_name": "data_type",
641
            "values": ["obsolete"],
642
            "type": "match_any"
643
        }
644
    ]
645
}
646

647
delete_response = search_client.delete_by_query(index_id, delete_query)
648
print(f"Deletion task: {delete_response['task_id']}")
649
```
650

651
### Task Monitoring
652

653
```python
654
import time
655

656
# Monitor ingestion/deletion tasks
657
def wait_for_task(search_client, task_id, timeout=300):
658
    start_time = time.time()
659
    while time.time() - start_time < timeout:
660
        task_status = search_client.get_task(task_id)
661
        
662
        status = task_status.get("state", "PENDING")
663
        print(f"Task {task_id}: {status}")
664
        
665
        if status in ["SUCCESS", "FAILED"]:
666
            return task_status
667
        
668
        time.sleep(5)
669
    
670
    raise TimeoutError(f"Task {task_id} did not complete within {timeout} seconds")
671

672
# Wait for ingestion to complete
673
try:
674
    final_status = wait_for_task(search_client, task_id)
675
    if final_status["state"] == "SUCCESS":
676
        print("Ingestion completed successfully")
677
    else:
678
        print(f"Ingestion failed: {final_status.get('message')}")
679
except TimeoutError as e:
680
    print(e)
681
```
682

683
### Index Management and Administration
684

685
```python
686
# List all accessible indices
687
indices = search_client.get_index_list()
688
for index in indices:
689
    print(f"Index: {index['display_name']} ({index['id']})")
690
    print(f"  Description: {index['description']}")
691
    print(f"  Status: {index.get('status', 'active')}")
692
    print(f"  Document count: {index.get('size', 'unknown')}")
693

694
# Get detailed index information
695
index_details = search_client.get_index(index_id)
696
print(f"Index created: {index_details['creation_date']}")
697
print(f"Index permissions: {index_details.get('permissions', [])}")
698

699
# List recent tasks for monitoring
700
tasks = search_client.get_task_list()
701
for task in tasks.get("tasks", []):
702
    print(f"Task {task['task_id']}: {task['state']} - {task.get('task_type', 'unknown')}")
703
```
704

705
### Advanced Query Patterns
706

707
```python
708
# Geographic search with location filters
709
geo_query = (SearchQuery("research station")
710
    .add_filter("location.country", ["United States", "Canada"])
711
    .add_filter("coordinates", ["-90,-180", "90,180"], type="geo_bounding_box")
712
    .add_facet("country_facet", "location.country")
713
    .add_sort("date_created", "desc")
714
)
715

716
# Date range search
717
date_query = (SearchQuery("climate data")
718
    .add_filter("date_created", ["2024-01-01", "2024-12-31"], type="date_range")
719
    .add_facet("monthly", "date_created", type="date_histogram", 
720
               additional_fields={"interval": "month"})
721
)
722

723
# Full-text search with boosting
724
boosted_query = (SearchQuery("temperature precipitation climate")
725
    .set_advanced(True)
726
    .add_sort("_score", "desc")  # Sort by relevance
727
    .set_field_list(["subject", "description", "creator", "keywords", "_score"])
728
)
729

730
# Execute queries
731
geo_results = search_client.post_search(index_id, geo_query)
732
date_results = search_client.post_search(index_id, date_query) 
733
relevance_results = search_client.post_search(index_id, boosted_query)
734
```

Version

Tile

Files

search-service.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

search-service.mddocs/