or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

async-clients.mdindex-management.mdindex.mdindexer-management.mdmodels.mdsearch-client.md

indexer-management.mddocs/

0

# Data Ingestion and AI Enrichment

1

2

The SearchIndexerClient manages automated data ingestion through indexers that connect to various data sources, with optional AI-powered content enrichment through skillsets. This enables knowledge mining, document cracking, and cognitive enrichment of content during the indexing process.

3

4

## Capabilities

5

6

### Client Initialization

7

8

Create a SearchIndexerClient to manage indexers, data sources, and skillsets.

9

10

```python { .api }

11

class SearchIndexerClient:

12

def __init__(

13

self,

14

endpoint: str,

15

credential: Union[AzureKeyCredential, TokenCredential],

16

**kwargs

17

) -> None:

18

"""

19

Initialize SearchIndexerClient for indexer management.

20

21

Parameters:

22

- endpoint (str): The URL endpoint of an Azure search service

23

- credential: A credential to authorize requests

24

- api_version (str, optional): The Search API version to use

25

- audience (str, optional): AAD audience for authentication

26

"""

27

28

def close(self) -> None:

29

"""Close the session."""

30

31

def __enter__(self) -> "SearchIndexerClient": ...

32

def __exit__(self, *args) -> None: ...

33

```

34

35

### Indexer Management

36

37

Create, configure, and manage indexers for automated data ingestion.

38

39

```python { .api }

40

def create_indexer(self, indexer: SearchIndexer, **kwargs) -> SearchIndexer:

41

"""

42

Create a new indexer.

43

44

Parameters:

45

- indexer (SearchIndexer): The indexer definition

46

47

Returns:

48

SearchIndexer: The created indexer

49

"""

50

51

def create_or_update_indexer(

52

self,

53

indexer: SearchIndexer,

54

*,

55

if_match: Optional[str] = None,

56

if_none_match: Optional[str] = None,

57

cache_reprocessing_change_detection_disabled: Optional[bool] = None,

58

cache_reset_requirements_ignored: Optional[bool] = None,

59

**kwargs

60

) -> SearchIndexer:

61

"""

62

Create a new indexer or update an existing one.

63

64

Parameters:

65

- indexer (SearchIndexer): The indexer definition

66

- if_match (str): ETag for conditional updates

67

- if_none_match (str): ETag for conditional creation

68

- cache_reprocessing_change_detection_disabled (bool): Disable change detection

69

- cache_reset_requirements_ignored (bool): Ignore cache reset requirements

70

71

Returns:

72

SearchIndexer: The created or updated indexer

73

"""

74

75

def get_indexer(self, name: str, **kwargs) -> SearchIndexer:

76

"""

77

Retrieve an indexer definition.

78

79

Parameters:

80

- name (str): Name of the indexer

81

82

Returns:

83

SearchIndexer: The indexer definition

84

"""

85

86

def get_indexers(

87

self,

88

*,

89

select: Optional[List[str]] = None,

90

**kwargs

91

) -> Sequence[SearchIndexer]:

92

"""

93

List all indexers in the search service.

94

95

Parameters:

96

- select (List[str], optional): Fields to include in results

97

98

Returns:

99

Sequence[SearchIndexer]: List of indexers

100

"""

101

102

def get_indexer_names(self, **kwargs) -> Sequence[str]:

103

"""

104

List all indexer names.

105

106

Returns:

107

Sequence[str]: List of indexer names

108

"""

109

110

def delete_indexer(

111

self,

112

indexer: Union[str, SearchIndexer],

113

*,

114

if_match: Optional[str] = None,

115

if_none_match: Optional[str] = None,

116

**kwargs

117

) -> None:

118

"""

119

Delete an indexer.

120

121

Parameters:

122

- indexer: Indexer name or SearchIndexer object

123

- if_match (str): ETag for conditional deletion

124

- if_none_match (str): ETag for conditional deletion

125

"""

126

```

127

128

### Indexer Execution Control

129

130

Run, reset, and monitor indexer execution.

131

132

```python { .api }

133

def run_indexer(self, name: str, **kwargs) -> None:

134

"""

135

Run an indexer manually.

136

137

Parameters:

138

- name (str): Name of the indexer to run

139

"""

140

141

def reset_indexer(self, name: str, **kwargs) -> None:

142

"""

143

Reset an indexer's execution state.

144

145

Parameters:

146

- name (str): Name of the indexer to reset

147

"""

148

149

def get_indexer_status(self, name: str, **kwargs) -> SearchIndexerStatus:

150

"""

151

Get the execution status and history of an indexer.

152

153

Parameters:

154

- name (str): Name of the indexer

155

156

Returns:

157

SearchIndexerStatus: Indexer execution status and history

158

"""

159

```

160

161

### Data Source Management

162

163

Configure connections to external data sources.

164

165

```python { .api }

166

def create_data_source_connection(

167

self,

168

data_source: SearchIndexerDataSourceConnection,

169

**kwargs

170

) -> SearchIndexerDataSourceConnection:

171

"""

172

Create a new data source connection.

173

174

Parameters:

175

- data_source (SearchIndexerDataSourceConnection): Data source definition

176

177

Returns:

178

SearchIndexerDataSourceConnection: The created data source

179

"""

180

181

def create_or_update_data_source_connection(

182

self,

183

data_source: SearchIndexerDataSourceConnection,

184

*,

185

if_match: Optional[str] = None,

186

if_none_match: Optional[str] = None,

187

cache_reset_requirements_ignored: Optional[bool] = None,

188

**kwargs

189

) -> SearchIndexerDataSourceConnection:

190

"""

191

Create or update a data source connection.

192

193

Parameters:

194

- data_source (SearchIndexerDataSourceConnection): Data source definition

195

- if_match (str): ETag for conditional updates

196

- if_none_match (str): ETag for conditional creation

197

- cache_reset_requirements_ignored (bool): Ignore cache reset requirements

198

199

Returns:

200

SearchIndexerDataSourceConnection: The created or updated data source

201

"""

202

203

def get_data_source_connection(

204

self,

205

name: str,

206

**kwargs

207

) -> SearchIndexerDataSourceConnection:

208

"""

209

Retrieve a data source connection.

210

211

Parameters:

212

- name (str): Name of the data source

213

214

Returns:

215

SearchIndexerDataSourceConnection: The data source definition

216

"""

217

218

def get_data_source_connections(

219

self,

220

*,

221

select: Optional[List[str]] = None,

222

**kwargs

223

) -> Sequence[SearchIndexerDataSourceConnection]:

224

"""

225

List all data source connections.

226

227

Parameters:

228

- select (List[str], optional): Fields to include in results

229

230

Returns:

231

Sequence[SearchIndexerDataSourceConnection]: List of data sources

232

"""

233

234

def get_data_source_connection_names(self, **kwargs) -> Sequence[str]:

235

"""

236

List all data source connection names.

237

238

Returns:

239

Sequence[str]: List of data source names

240

"""

241

242

def delete_data_source_connection(

243

self,

244

data_source: Union[str, SearchIndexerDataSourceConnection],

245

*,

246

if_match: Optional[str] = None,

247

if_none_match: Optional[str] = None,

248

**kwargs

249

) -> None:

250

"""

251

Delete a data source connection.

252

253

Parameters:

254

- data_source: Data source name or object

255

- if_match (str): ETag for conditional deletion

256

- if_none_match (str): ETag for conditional deletion

257

"""

258

```

259

260

### Skillset Management

261

262

Define and manage AI enrichment skillsets for cognitive processing.

263

264

```python { .api }

265

def create_skillset(self, skillset: SearchIndexerSkillset, **kwargs) -> SearchIndexerSkillset:

266

"""

267

Create a new skillset.

268

269

Parameters:

270

- skillset (SearchIndexerSkillset): The skillset definition

271

272

Returns:

273

SearchIndexerSkillset: The created skillset

274

"""

275

276

def create_or_update_skillset(

277

self,

278

skillset: SearchIndexerSkillset,

279

*,

280

if_match: Optional[str] = None,

281

if_none_match: Optional[str] = None,

282

cache_reset_requirements_ignored: Optional[bool] = None,

283

**kwargs

284

) -> SearchIndexerSkillset:

285

"""

286

Create or update a skillset.

287

288

Parameters:

289

- skillset (SearchIndexerSkillset): The skillset definition

290

- if_match (str): ETag for conditional updates

291

- if_none_match (str): ETag for conditional creation

292

- cache_reset_requirements_ignored (bool): Ignore cache reset requirements

293

294

Returns:

295

SearchIndexerSkillset: The created or updated skillset

296

"""

297

298

def get_skillset(self, name: str, **kwargs) -> SearchIndexerSkillset:

299

"""

300

Retrieve a skillset definition.

301

302

Parameters:

303

- name (str): Name of the skillset

304

305

Returns:

306

SearchIndexerSkillset: The skillset definition

307

"""

308

309

def get_skillsets(

310

self,

311

*,

312

select: Optional[List[str]] = None,

313

**kwargs

314

) -> Sequence[SearchIndexerSkillset]:

315

"""

316

List all skillsets.

317

318

Parameters:

319

- select (List[str], optional): Fields to include in results

320

321

Returns:

322

Sequence[SearchIndexerSkillset]: List of skillsets

323

"""

324

325

def get_skillset_names(self, **kwargs) -> Sequence[str]:

326

"""

327

List all skillset names.

328

329

Returns:

330

Sequence[str]: List of skillset names

331

"""

332

333

def delete_skillset(

334

self,

335

skillset: Union[str, SearchIndexerSkillset],

336

*,

337

if_match: Optional[str] = None,

338

if_none_match: Optional[str] = None,

339

**kwargs

340

) -> None:

341

"""

342

Delete a skillset.

343

344

Parameters:

345

- skillset: Skillset name or object

346

- if_match (str): ETag for conditional deletion

347

- if_none_match (str): ETag for conditional deletion

348

"""

349

```

350

351

## Usage Examples

352

353

### Azure Blob Storage Indexer

354

355

```python

356

from azure.search.documents.indexes import SearchIndexerClient

357

from azure.search.documents.indexes.models import (

358

SearchIndexer, SearchIndexerDataSourceConnection, SearchIndexerDataContainer,

359

BlobIndexerParsingMode, IndexingSchedule

360

)

361

from azure.core.credentials import AzureKeyCredential

362

363

client = SearchIndexerClient(

364

endpoint="https://service.search.windows.net",

365

credential=AzureKeyCredential("admin-key")

366

)

367

368

# Create data source for Blob Storage

369

data_source = SearchIndexerDataSourceConnection(

370

name="blob-datasource",

371

type="azureblob",

372

connection_string="DefaultEndpointsProtocol=https;AccountName=account;AccountKey=key;EndpointSuffix=core.windows.net",

373

container=SearchIndexerDataContainer(name="documents")

374

)

375

client.create_data_source_connection(data_source)

376

377

# Create indexer with scheduling

378

indexer = SearchIndexer(

379

name="blob-indexer",

380

data_source_name="blob-datasource",

381

target_index_name="documents-index",

382

schedule=IndexingSchedule(interval="PT2H"), # Run every 2 hours

383

parameters={

384

"batchSize": 1000,

385

"maxFailedItems": 10,

386

"maxFailedItemsPerBatch": 5,

387

"configuration": {

388

"parsingMode": BlobIndexerParsingMode.TEXT,

389

"excludedFileNameExtensions": ".png,.jpeg,.jpg"

390

}

391

}

392

)

393

client.create_indexer(indexer)

394

```

395

396

### AI Enrichment with Skillset

397

398

```python

399

from azure.search.documents.indexes.models import (

400

SearchIndexerSkillset, EntityRecognitionSkill, KeyPhraseExtractionSkill,

401

LanguageDetectionSkill, MergeSkill, OcrSkill, ImageAnalysisSkill,

402

InputFieldMappingEntry, OutputFieldMappingEntry

403

)

404

405

# Create skillset with cognitive skills

406

skillset = SearchIndexerSkillset(

407

name="ai-skillset",

408

description="Extract entities, key phrases, and analyze images",

409

skills=[

410

# OCR skill for image text extraction

411

OcrSkill(

412

inputs=[InputFieldMappingEntry(name="image", source="/document/normalized_images/*")],

413

outputs=[OutputFieldMappingEntry(name="text", target_name="myText")]

414

),

415

416

# Language detection

417

LanguageDetectionSkill(

418

inputs=[InputFieldMappingEntry(name="text", source="/document/content")],

419

outputs=[OutputFieldMappingEntry(name="languageCode", target_name="languageCode")]

420

),

421

422

# Key phrase extraction

423

KeyPhraseExtractionSkill(

424

inputs=[

425

InputFieldMappingEntry(name="text", source="/document/content"),

426

InputFieldMappingEntry(name="languageCode", source="/document/languageCode")

427

],

428

outputs=[OutputFieldMappingEntry(name="keyPhrases", target_name="keyPhrases")]

429

),

430

431

# Entity recognition

432

EntityRecognitionSkill(

433

inputs=[

434

InputFieldMappingEntry(name="text", source="/document/content"),

435

InputFieldMappingEntry(name="languageCode", source="/document/languageCode")

436

],

437

outputs=[

438

OutputFieldMappingEntry(name="persons", target_name="persons"),

439

OutputFieldMappingEntry(name="organizations", target_name="organizations"),

440

OutputFieldMappingEntry(name="locations", target_name="locations")

441

]

442

)

443

]

444

)

445

client.create_skillset(skillset)

446

447

# Create indexer that uses the skillset

448

indexer = SearchIndexer(

449

name="ai-enriched-indexer",

450

data_source_name="blob-datasource",

451

target_index_name="enriched-documents",

452

skillset_name="ai-skillset",

453

field_mappings=[

454

{"sourceFieldName": "metadata_storage_path", "targetFieldName": "id"},

455

{"sourceFieldName": "metadata_storage_name", "targetFieldName": "filename"}

456

],

457

output_field_mappings=[

458

{"sourceFieldName": "/document/keyPhrases", "targetFieldName": "keyPhrases"},

459

{"sourceFieldName": "/document/persons", "targetFieldName": "persons"},

460

{"sourceFieldName": "/document/organizations", "targetFieldName": "organizations"}

461

]

462

)

463

client.create_indexer(indexer)

464

```

465

466

### Custom Web API Skill

467

468

```python

469

from azure.search.documents.indexes.models import WebApiSkill

470

471

# Custom skill that calls external API

472

custom_skill = WebApiSkill(

473

name="CustomTextClassifier",

474

description="Classifies text using custom ML model",

475

uri="https://your-api.com/classify",

476

http_method="POST",

477

http_headers={"Content-Type": "application/json"},

478

inputs=[

479

InputFieldMappingEntry(name="text", source="/document/content")

480

],

481

outputs=[

482

OutputFieldMappingEntry(name="category", target_name="category"),

483

OutputFieldMappingEntry(name="confidence", target_name="confidence")

484

]

485

)

486

487

skillset = SearchIndexerSkillset(

488

name="custom-skillset",

489

skills=[custom_skill]

490

)

491

client.create_skillset(skillset)

492

```

493

494

### Monitor Indexer Execution

495

496

```python

497

# Run indexer and monitor status

498

client.run_indexer("my-indexer")

499

500

# Get execution status

501

status = client.get_indexer_status("my-indexer")

502

print(f"Status: {status.status}")

503

print(f"Last result: {status.last_result.status}")

504

505

# Check execution history

506

if status.execution_history:

507

for execution in status.execution_history:

508

print(f"Start: {execution.start_time}, Status: {execution.status}")

509

if execution.errors:

510

for error in execution.errors:

511

print(f"Error: {error.error_message}")

512

```

513

514

### SQL Database Data Source

515

516

```python

517

# SQL database data source

518

sql_data_source = SearchIndexerDataSourceConnection(

519

name="sql-datasource",

520

type="azuresql",

521

connection_string="Server=server.database.windows.net;Database=mydb;User ID=user;Password=pass;",

522

container=SearchIndexerDataContainer(

523

name="Products",

524

query="SELECT ProductId, ProductName, Description, ModifiedDate FROM Products WHERE ModifiedDate > @HighWaterMark ORDER BY ModifiedDate"

525

),

526

data_change_detection_policy={

527

"@odata.type": "#Microsoft.Azure.Search.HighWaterMarkChangeDetectionPolicy",

528

"highWaterMarkColumnName": "ModifiedDate"

529

}

530

)

531

client.create_data_source_connection(sql_data_source)

532

```

533

534

## Common Types

535

536

```python { .api }

537

# Indexer definition

538

class SearchIndexer:

539

name: str

540

description: Optional[str] = None

541

data_source_name: str

542

skillset_name: Optional[str] = None

543

target_index_name: str

544

schedule: Optional[IndexingSchedule] = None

545

parameters: Optional[IndexingParameters] = None

546

field_mappings: Optional[List[FieldMapping]] = None

547

output_field_mappings: Optional[List[FieldMapping]] = None

548

is_disabled: Optional[bool] = False

549

e_tag: Optional[str] = None

550

encryption_key: Optional[SearchResourceEncryptionKey] = None

551

552

# Data source connection

553

class SearchIndexerDataSourceConnection:

554

name: str

555

description: Optional[str] = None

556

type: str

557

connection_string: str

558

container: SearchIndexerDataContainer

559

data_change_detection_policy: Optional[DataChangeDetectionPolicy] = None

560

data_deletion_detection_policy: Optional[DataDeletionDetectionPolicy] = None

561

e_tag: Optional[str] = None

562

encryption_key: Optional[SearchResourceEncryptionKey] = None

563

564

# Skillset definition

565

class SearchIndexerSkillset:

566

name: str

567

description: Optional[str] = None

568

skills: List[SearchIndexerSkill]

569

cognitive_services_account: Optional[CognitiveServicesAccount] = None

570

knowledge_store: Optional[SearchIndexerKnowledgeStore] = None

571

e_tag: Optional[str] = None

572

encryption_key: Optional[SearchResourceEncryptionKey] = None

573

574

# Indexer status

575

class SearchIndexerStatus:

576

status: str

577

last_result: Optional[IndexerExecutionResult] = None

578

execution_history: Optional[List[IndexerExecutionResult]] = None

579

limits: Optional[SearchIndexerLimits] = None

580

581

# Execution result

582

class IndexerExecutionResult:

583

status: str

584

start_time: Optional[datetime] = None

585

end_time: Optional[datetime] = None

586

error_message: Optional[str] = None

587

errors: Optional[List[SearchIndexerError]] = None

588

warnings: Optional[List[SearchIndexerWarning]] = None

589

item_count: Optional[int] = None

590

failed_item_count: Optional[int] = None

591

```