Tessl Tile for pypi/azure-search-documents@11.5.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

async-clients.md index-management.md index.md indexer-management.md models.md search-client.md

indexer-management.mddocs/

0
# Data Ingestion and AI Enrichment
1

2
The SearchIndexerClient manages automated data ingestion through indexers that connect to various data sources, with optional AI-powered content enrichment through skillsets. This enables knowledge mining, document cracking, and cognitive enrichment of content during the indexing process.
3

4
## Capabilities
5

6
### Client Initialization
7

8
Create a SearchIndexerClient to manage indexers, data sources, and skillsets.
9

10
```python { .api }
11
class SearchIndexerClient:
12
    def __init__(
13
        self,
14
        endpoint: str,
15
        credential: Union[AzureKeyCredential, TokenCredential], 
16
        **kwargs
17
    ) -> None:
18
        """
19
        Initialize SearchIndexerClient for indexer management.
20
        
21
        Parameters:
22
        - endpoint (str): The URL endpoint of an Azure search service
23
        - credential: A credential to authorize requests
24
        - api_version (str, optional): The Search API version to use
25
        - audience (str, optional): AAD audience for authentication
26
        """
27
    
28
    def close(self) -> None:
29
        """Close the session."""
30
    
31
    def __enter__(self) -> "SearchIndexerClient": ...
32
    def __exit__(self, *args) -> None: ...
33
```
34

35
### Indexer Management
36

37
Create, configure, and manage indexers for automated data ingestion.
38

39
```python { .api }
40
def create_indexer(self, indexer: SearchIndexer, **kwargs) -> SearchIndexer:
41
    """
42
    Create a new indexer.
43
    
44
    Parameters:
45
    - indexer (SearchIndexer): The indexer definition
46
    
47
    Returns:
48
    SearchIndexer: The created indexer
49
    """
50

51
def create_or_update_indexer(
52
    self,
53
    indexer: SearchIndexer,
54
    *,
55
    if_match: Optional[str] = None,
56
    if_none_match: Optional[str] = None,
57
    cache_reprocessing_change_detection_disabled: Optional[bool] = None,
58
    cache_reset_requirements_ignored: Optional[bool] = None,
59
    **kwargs
60
) -> SearchIndexer:
61
    """
62
    Create a new indexer or update an existing one.
63
    
64
    Parameters:
65
    - indexer (SearchIndexer): The indexer definition
66
    - if_match (str): ETag for conditional updates
67
    - if_none_match (str): ETag for conditional creation
68
    - cache_reprocessing_change_detection_disabled (bool): Disable change detection
69
    - cache_reset_requirements_ignored (bool): Ignore cache reset requirements
70
    
71
    Returns:
72
    SearchIndexer: The created or updated indexer
73
    """
74

75
def get_indexer(self, name: str, **kwargs) -> SearchIndexer:
76
    """
77
    Retrieve an indexer definition.
78
    
79
    Parameters:
80
    - name (str): Name of the indexer
81
    
82
    Returns:
83
    SearchIndexer: The indexer definition
84
    """
85

86
def get_indexers(
87
    self, 
88
    *, 
89
    select: Optional[List[str]] = None, 
90
    **kwargs
91
) -> Sequence[SearchIndexer]:
92
    """
93
    List all indexers in the search service.
94
    
95
    Parameters:
96
    - select (List[str], optional): Fields to include in results
97
    
98
    Returns:
99
    Sequence[SearchIndexer]: List of indexers
100
    """
101

102
def get_indexer_names(self, **kwargs) -> Sequence[str]:
103
    """
104
    List all indexer names.
105
    
106
    Returns:
107
    Sequence[str]: List of indexer names
108
    """
109

110
def delete_indexer(
111
    self,
112
    indexer: Union[str, SearchIndexer],
113
    *,
114
    if_match: Optional[str] = None,
115
    if_none_match: Optional[str] = None,
116
    **kwargs
117
) -> None:
118
    """
119
    Delete an indexer.
120
    
121
    Parameters:
122
    - indexer: Indexer name or SearchIndexer object
123
    - if_match (str): ETag for conditional deletion
124
    - if_none_match (str): ETag for conditional deletion
125
    """
126
```
127

128
### Indexer Execution Control
129

130
Run, reset, and monitor indexer execution.
131

132
```python { .api }
133
def run_indexer(self, name: str, **kwargs) -> None:
134
    """
135
    Run an indexer manually.
136
    
137
    Parameters:
138
    - name (str): Name of the indexer to run
139
    """
140

141
def reset_indexer(self, name: str, **kwargs) -> None:
142
    """
143
    Reset an indexer's execution state.
144
    
145
    Parameters:
146
    - name (str): Name of the indexer to reset
147
    """
148

149
def get_indexer_status(self, name: str, **kwargs) -> SearchIndexerStatus:
150
    """
151
    Get the execution status and history of an indexer.
152
    
153
    Parameters:
154
    - name (str): Name of the indexer
155
    
156
    Returns:
157
    SearchIndexerStatus: Indexer execution status and history
158
    """
159
```
160

161
### Data Source Management
162

163
Configure connections to external data sources.
164

165
```python { .api }
166
def create_data_source_connection(
167
    self,
168
    data_source: SearchIndexerDataSourceConnection,
169
    **kwargs
170
) -> SearchIndexerDataSourceConnection:
171
    """
172
    Create a new data source connection.
173
    
174
    Parameters:
175
    - data_source (SearchIndexerDataSourceConnection): Data source definition
176
    
177
    Returns:
178
    SearchIndexerDataSourceConnection: The created data source
179
    """
180

181
def create_or_update_data_source_connection(
182
    self,
183
    data_source: SearchIndexerDataSourceConnection,
184
    *,
185
    if_match: Optional[str] = None,
186
    if_none_match: Optional[str] = None,
187
    cache_reset_requirements_ignored: Optional[bool] = None,
188
    **kwargs
189
) -> SearchIndexerDataSourceConnection:
190
    """
191
    Create or update a data source connection.
192
    
193
    Parameters:
194
    - data_source (SearchIndexerDataSourceConnection): Data source definition
195
    - if_match (str): ETag for conditional updates
196
    - if_none_match (str): ETag for conditional creation
197
    - cache_reset_requirements_ignored (bool): Ignore cache reset requirements
198
    
199
    Returns:
200
    SearchIndexerDataSourceConnection: The created or updated data source
201
    """
202

203
def get_data_source_connection(
204
    self, 
205
    name: str, 
206
    **kwargs
207
) -> SearchIndexerDataSourceConnection:
208
    """
209
    Retrieve a data source connection.
210
    
211
    Parameters:
212
    - name (str): Name of the data source
213
    
214
    Returns:
215
    SearchIndexerDataSourceConnection: The data source definition
216
    """
217

218
def get_data_source_connections(
219
    self,
220
    *,
221
    select: Optional[List[str]] = None,
222
    **kwargs
223
) -> Sequence[SearchIndexerDataSourceConnection]:
224
    """
225
    List all data source connections.
226
    
227
    Parameters:
228
    - select (List[str], optional): Fields to include in results
229
    
230
    Returns:
231
    Sequence[SearchIndexerDataSourceConnection]: List of data sources
232
    """
233

234
def get_data_source_connection_names(self, **kwargs) -> Sequence[str]:
235
    """
236
    List all data source connection names.
237
    
238
    Returns:
239
    Sequence[str]: List of data source names
240
    """
241

242
def delete_data_source_connection(
243
    self,
244
    data_source: Union[str, SearchIndexerDataSourceConnection],
245
    *,
246
    if_match: Optional[str] = None,
247
    if_none_match: Optional[str] = None,
248
    **kwargs
249
) -> None:
250
    """
251
    Delete a data source connection.
252
    
253
    Parameters:
254
    - data_source: Data source name or object
255
    - if_match (str): ETag for conditional deletion
256
    - if_none_match (str): ETag for conditional deletion
257
    """
258
```
259

260
### Skillset Management
261

262
Define and manage AI enrichment skillsets for cognitive processing.
263

264
```python { .api }
265
def create_skillset(self, skillset: SearchIndexerSkillset, **kwargs) -> SearchIndexerSkillset:
266
    """
267
    Create a new skillset.
268
    
269
    Parameters:
270
    - skillset (SearchIndexerSkillset): The skillset definition
271
    
272
    Returns:
273
    SearchIndexerSkillset: The created skillset
274
    """
275

276
def create_or_update_skillset(
277
    self,
278
    skillset: SearchIndexerSkillset,
279
    *,
280
    if_match: Optional[str] = None,
281
    if_none_match: Optional[str] = None,
282
    cache_reset_requirements_ignored: Optional[bool] = None,
283
    **kwargs
284
) -> SearchIndexerSkillset:
285
    """
286
    Create or update a skillset.
287
    
288
    Parameters:
289
    - skillset (SearchIndexerSkillset): The skillset definition
290
    - if_match (str): ETag for conditional updates
291
    - if_none_match (str): ETag for conditional creation
292
    - cache_reset_requirements_ignored (bool): Ignore cache reset requirements
293
    
294
    Returns:
295
    SearchIndexerSkillset: The created or updated skillset
296
    """
297

298
def get_skillset(self, name: str, **kwargs) -> SearchIndexerSkillset:
299
    """
300
    Retrieve a skillset definition.
301
    
302
    Parameters:
303
    - name (str): Name of the skillset
304
    
305
    Returns:
306
    SearchIndexerSkillset: The skillset definition
307
    """
308

309
def get_skillsets(
310
    self, 
311
    *, 
312
    select: Optional[List[str]] = None, 
313
    **kwargs
314
) -> Sequence[SearchIndexerSkillset]:
315
    """
316
    List all skillsets.
317
    
318
    Parameters:
319
    - select (List[str], optional): Fields to include in results
320
    
321
    Returns:
322
    Sequence[SearchIndexerSkillset]: List of skillsets
323
    """
324

325
def get_skillset_names(self, **kwargs) -> Sequence[str]:
326
    """
327
    List all skillset names.
328
    
329
    Returns:
330
    Sequence[str]: List of skillset names
331
    """
332

333
def delete_skillset(
334
    self,
335
    skillset: Union[str, SearchIndexerSkillset],
336
    *,
337
    if_match: Optional[str] = None,
338
    if_none_match: Optional[str] = None,
339
    **kwargs
340
) -> None:
341
    """
342
    Delete a skillset.
343
    
344
    Parameters:
345
    - skillset: Skillset name or object
346
    - if_match (str): ETag for conditional deletion
347
    - if_none_match (str): ETag for conditional deletion
348
    """
349
```
350

351
## Usage Examples
352

353
### Azure Blob Storage Indexer
354

355
```python
356
from azure.search.documents.indexes import SearchIndexerClient
357
from azure.search.documents.indexes.models import (
358
    SearchIndexer, SearchIndexerDataSourceConnection, SearchIndexerDataContainer,
359
    BlobIndexerParsingMode, IndexingSchedule
360
)
361
from azure.core.credentials import AzureKeyCredential
362

363
client = SearchIndexerClient(
364
    endpoint="https://service.search.windows.net",
365
    credential=AzureKeyCredential("admin-key")
366
)
367

368
# Create data source for Blob Storage
369
data_source = SearchIndexerDataSourceConnection(
370
    name="blob-datasource",
371
    type="azureblob",
372
    connection_string="DefaultEndpointsProtocol=https;AccountName=account;AccountKey=key;EndpointSuffix=core.windows.net",
373
    container=SearchIndexerDataContainer(name="documents")
374
)
375
client.create_data_source_connection(data_source)
376

377
# Create indexer with scheduling
378
indexer = SearchIndexer(
379
    name="blob-indexer",
380
    data_source_name="blob-datasource",
381
    target_index_name="documents-index",
382
    schedule=IndexingSchedule(interval="PT2H"),  # Run every 2 hours
383
    parameters={
384
        "batchSize": 1000,
385
        "maxFailedItems": 10,
386
        "maxFailedItemsPerBatch": 5,
387
        "configuration": {
388
            "parsingMode": BlobIndexerParsingMode.TEXT,
389
            "excludedFileNameExtensions": ".png,.jpeg,.jpg"
390
        }
391
    }
392
)
393
client.create_indexer(indexer)
394
```
395

396
### AI Enrichment with Skillset
397

398
```python
399
from azure.search.documents.indexes.models import (
400
    SearchIndexerSkillset, EntityRecognitionSkill, KeyPhraseExtractionSkill,
401
    LanguageDetectionSkill, MergeSkill, OcrSkill, ImageAnalysisSkill,
402
    InputFieldMappingEntry, OutputFieldMappingEntry
403
)
404

405
# Create skillset with cognitive skills
406
skillset = SearchIndexerSkillset(
407
    name="ai-skillset",
408
    description="Extract entities, key phrases, and analyze images",
409
    skills=[
410
        # OCR skill for image text extraction
411
        OcrSkill(
412
            inputs=[InputFieldMappingEntry(name="image", source="/document/normalized_images/*")],
413
            outputs=[OutputFieldMappingEntry(name="text", target_name="myText")]
414
        ),
415
        
416
        # Language detection
417
        LanguageDetectionSkill(
418
            inputs=[InputFieldMappingEntry(name="text", source="/document/content")],
419
            outputs=[OutputFieldMappingEntry(name="languageCode", target_name="languageCode")]
420
        ),
421
        
422
        # Key phrase extraction
423
        KeyPhraseExtractionSkill(
424
            inputs=[
425
                InputFieldMappingEntry(name="text", source="/document/content"),
426
                InputFieldMappingEntry(name="languageCode", source="/document/languageCode")
427
            ],
428
            outputs=[OutputFieldMappingEntry(name="keyPhrases", target_name="keyPhrases")]
429
        ),
430
        
431
        # Entity recognition
432
        EntityRecognitionSkill(
433
            inputs=[
434
                InputFieldMappingEntry(name="text", source="/document/content"),
435
                InputFieldMappingEntry(name="languageCode", source="/document/languageCode")
436
            ],
437
            outputs=[
438
                OutputFieldMappingEntry(name="persons", target_name="persons"),
439
                OutputFieldMappingEntry(name="organizations", target_name="organizations"),
440
                OutputFieldMappingEntry(name="locations", target_name="locations")
441
            ]
442
        )
443
    ]
444
)
445
client.create_skillset(skillset)
446

447
# Create indexer that uses the skillset
448
indexer = SearchIndexer(
449
    name="ai-enriched-indexer",
450
    data_source_name="blob-datasource",
451
    target_index_name="enriched-documents",
452
    skillset_name="ai-skillset",
453
    field_mappings=[
454
        {"sourceFieldName": "metadata_storage_path", "targetFieldName": "id"},
455
        {"sourceFieldName": "metadata_storage_name", "targetFieldName": "filename"}
456
    ],
457
    output_field_mappings=[
458
        {"sourceFieldName": "/document/keyPhrases", "targetFieldName": "keyPhrases"},
459
        {"sourceFieldName": "/document/persons", "targetFieldName": "persons"},
460
        {"sourceFieldName": "/document/organizations", "targetFieldName": "organizations"}
461
    ]
462
)
463
client.create_indexer(indexer)
464
```
465

466
### Custom Web API Skill
467

468
```python
469
from azure.search.documents.indexes.models import WebApiSkill
470

471
# Custom skill that calls external API
472
custom_skill = WebApiSkill(
473
    name="CustomTextClassifier",
474
    description="Classifies text using custom ML model",
475
    uri="https://your-api.com/classify",
476
    http_method="POST",
477
    http_headers={"Content-Type": "application/json"},
478
    inputs=[
479
        InputFieldMappingEntry(name="text", source="/document/content")
480
    ],
481
    outputs=[
482
        OutputFieldMappingEntry(name="category", target_name="category"),
483
        OutputFieldMappingEntry(name="confidence", target_name="confidence")
484
    ]
485
)
486

487
skillset = SearchIndexerSkillset(
488
    name="custom-skillset",
489
    skills=[custom_skill]
490
)
491
client.create_skillset(skillset)
492
```
493

494
### Monitor Indexer Execution
495

496
```python
497
# Run indexer and monitor status
498
client.run_indexer("my-indexer")
499

500
# Get execution status
501
status = client.get_indexer_status("my-indexer")
502
print(f"Status: {status.status}")
503
print(f"Last result: {status.last_result.status}")
504

505
# Check execution history
506
if status.execution_history:
507
    for execution in status.execution_history:
508
        print(f"Start: {execution.start_time}, Status: {execution.status}")
509
        if execution.errors:
510
            for error in execution.errors:
511
                print(f"Error: {error.error_message}")
512
```
513

514
### SQL Database Data Source
515

516
```python
517
# SQL database data source
518
sql_data_source = SearchIndexerDataSourceConnection(
519
    name="sql-datasource",
520
    type="azuresql",
521
    connection_string="Server=server.database.windows.net;Database=mydb;User ID=user;Password=pass;",
522
    container=SearchIndexerDataContainer(
523
        name="Products",
524
        query="SELECT ProductId, ProductName, Description, ModifiedDate FROM Products WHERE ModifiedDate > @HighWaterMark ORDER BY ModifiedDate"
525
    ),
526
    data_change_detection_policy={
527
        "@odata.type": "#Microsoft.Azure.Search.HighWaterMarkChangeDetectionPolicy",
528
        "highWaterMarkColumnName": "ModifiedDate"
529
    }
530
)
531
client.create_data_source_connection(sql_data_source)
532
```
533

534
## Common Types
535

536
```python { .api }
537
# Indexer definition
538
class SearchIndexer:
539
    name: str
540
    description: Optional[str] = None
541
    data_source_name: str
542
    skillset_name: Optional[str] = None
543
    target_index_name: str
544
    schedule: Optional[IndexingSchedule] = None
545
    parameters: Optional[IndexingParameters] = None
546
    field_mappings: Optional[List[FieldMapping]] = None
547
    output_field_mappings: Optional[List[FieldMapping]] = None
548
    is_disabled: Optional[bool] = False
549
    e_tag: Optional[str] = None
550
    encryption_key: Optional[SearchResourceEncryptionKey] = None
551

552
# Data source connection
553
class SearchIndexerDataSourceConnection:
554
    name: str
555
    description: Optional[str] = None
556
    type: str
557
    connection_string: str
558
    container: SearchIndexerDataContainer
559
    data_change_detection_policy: Optional[DataChangeDetectionPolicy] = None
560
    data_deletion_detection_policy: Optional[DataDeletionDetectionPolicy] = None
561
    e_tag: Optional[str] = None
562
    encryption_key: Optional[SearchResourceEncryptionKey] = None
563

564
# Skillset definition
565
class SearchIndexerSkillset:
566
    name: str
567
    description: Optional[str] = None
568
    skills: List[SearchIndexerSkill]
569
    cognitive_services_account: Optional[CognitiveServicesAccount] = None
570
    knowledge_store: Optional[SearchIndexerKnowledgeStore] = None
571
    e_tag: Optional[str] = None
572
    encryption_key: Optional[SearchResourceEncryptionKey] = None
573

574
# Indexer status
575
class SearchIndexerStatus:
576
    status: str
577
    last_result: Optional[IndexerExecutionResult] = None
578
    execution_history: Optional[List[IndexerExecutionResult]] = None
579
    limits: Optional[SearchIndexerLimits] = None
580

581
# Execution result
582
class IndexerExecutionResult:
583
    status: str
584
    start_time: Optional[datetime] = None
585
    end_time: Optional[datetime] = None
586
    error_message: Optional[str] = None
587
    errors: Optional[List[SearchIndexerError]] = None
588
    warnings: Optional[List[SearchIndexerWarning]] = None
589
    item_count: Optional[int] = None
590
    failed_item_count: Optional[int] = None
591
```

Version

Tile

Files

indexer-management.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

indexer-management.mddocs/