Tessl Tile for pypi/elasticsearch@9.1.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

client-operations.md cluster-management.md esql-operations.md exception-handling.md helper-functions.md index-management.md index.md inference-api.md lifecycle-management.md machine-learning.md query-dsl.md search-operations.md security-operations.md vectorstore-helpers.md

inference-api.mddocs/

0
# Inference API
1

2
The Inference API provides machine learning inference capabilities within Elasticsearch, supporting various AI services and models for text embeddings, completions, reranking, and sparse embeddings. It offers a unified interface for integrating with multiple AI providers.
3

4
## Capabilities
5

6
### Inference Operations
7

8
Execute inference tasks using configured inference endpoints for various AI/ML tasks.
9

10
```python { .api }
11
def inference(
12
    self,
13
    *,
14
    inference_id: str,
15
    input: Optional[Union[str, List[str]]] = None,
16
    query: Optional[str] = None,
17
    task_settings: Optional[Any] = None,
18
    timeout: Optional[Union[str, int]] = None,
19
    **kwargs
20
) -> ObjectApiResponse[Any]:
21
    """
22
    Perform general inference on a configured service.
23
    
24
    Parameters:
25
    - inference_id: The inference endpoint ID
26
    - input: Input text(s) for inference
27
    - query: Query text for reranking tasks
28
    - task_settings: Task-specific settings
29
    - timeout: Request timeout
30
    
31
    Returns:
32
    ObjectApiResponse with inference results
33
    """
34

35
def text_embedding(
36
    self,
37
    *,
38
    inference_id: str,
39
    input: Optional[Union[str, List[str]]] = None,
40
    task_settings: Optional[Any] = None,
41
    timeout: Optional[Union[str, int]] = None,
42
    **kwargs
43
) -> ObjectApiResponse[Any]:
44
    """
45
    Generate text embeddings using the specified inference service.
46
    
47
    Parameters:
48
    - inference_id: The embedding model inference ID
49
    - input: Text or list of texts to embed
50
    - task_settings: Model-specific embedding settings
51
    - timeout: Request timeout
52
    
53
    Returns:
54
    ObjectApiResponse with embedding vectors
55
    """
56

57
def sparse_embedding(
58
    self,
59
    *,
60
    inference_id: str,
61
    input: Optional[Union[str, List[str]]] = None,
62
    task_settings: Optional[Any] = None,
63
    timeout: Optional[Union[str, int]] = None,
64
    **kwargs
65
) -> ObjectApiResponse[Any]:
66
    """
67
    Generate sparse embeddings (e.g., SPLADE) using the specified service.
68
    
69
    Parameters:
70
    - inference_id: The sparse embedding model inference ID
71
    - input: Text or list of texts to embed
72
    - task_settings: Model-specific settings
73
    - timeout: Request timeout
74
    
75
    Returns:
76
    ObjectApiResponse with sparse embedding vectors
77
    """
78

79
def rerank(
80
    self,
81
    *,
82
    inference_id: str,
83
    input: Optional[List[str]] = None,
84
    query: Optional[str] = None,
85
    task_settings: Optional[Any] = None,
86
    timeout: Optional[Union[str, int]] = None,
87
    **kwargs
88
) -> ObjectApiResponse[Any]:
89
    """
90
    Rerank documents using the specified reranking service.
91
    
92
    Parameters:
93
    - inference_id: The reranking model inference ID
94
    - input: List of documents to rerank
95
    - query: Query text for relevance-based reranking
96
    - task_settings: Reranking-specific settings
97
    - timeout: Request timeout
98
    
99
    Returns:
100
    ObjectApiResponse with reranked documents and scores
101
    """
102

103
def completion(
104
    self,
105
    *,
106
    inference_id: str,
107
    input: Optional[Union[str, List[str]]] = None,
108
    task_settings: Optional[Any] = None,
109
    timeout: Optional[Union[str, int]] = None,
110
    **kwargs
111
) -> ObjectApiResponse[Any]:
112
    """
113
    Generate text completions using the specified language model.
114
    
115
    Parameters:
116
    - inference_id: The completion model inference ID
117
    - input: Prompt or list of prompts
118
    - task_settings: Generation settings (temperature, max_tokens, etc.)
119
    - timeout: Request timeout
120
    
121
    Returns:
122
    ObjectApiResponse with generated completions
123
    """
124
```
125

126
#### Usage Examples
127

128
```python
129
from elasticsearch import Elasticsearch
130

131
client = Elasticsearch(['http://localhost:9200'])
132

133
# Text embeddings for semantic search
134
embedding_response = client.inference.text_embedding(
135
    inference_id="my-embedding-model",
136
    input=["Hello world", "Machine learning is fascinating"]
137
)
138
embeddings = embedding_response.body['embeddings']
139

140
# Single text embedding
141
single_embedding = client.inference.text_embedding(
142
    inference_id="sentence-transformers",
143
    input="This is a sample document for embedding"
144
)
145

146
# Sparse embeddings for keyword-aware search
147
sparse_response = client.inference.sparse_embedding(
148
    inference_id="splade-model",
149
    input="Natural language processing with transformers"
150
)
151

152
# Document reranking for search relevance
153
rerank_response = client.inference.rerank(
154
    inference_id="cross-encoder-model",
155
    query="machine learning algorithms",
156
    input=[
157
        "Introduction to machine learning",
158
        "Deep learning with neural networks", 
159
        "Statistical analysis methods",
160
        "Reinforcement learning concepts"
161
    ]
162
)
163
ranked_docs = rerank_response.body['reranked']
164

165
# Text completion/generation
166
completion_response = client.inference.completion(
167
    inference_id="gpt-model",
168
    input="Explain quantum computing in simple terms:",
169
    task_settings={
170
        "max_tokens": 150,
171
        "temperature": 0.7
172
    }
173
)
174
generated_text = completion_response.body['completion']
175
```
176

177
### Inference Endpoint Management
178

179
Create, update, and manage inference endpoints for various AI services.
180

181
```python { .api }
182
def put(
183
    self,
184
    *,
185
    inference_id: str,
186
    task_type: str,
187
    inference_config: Dict[str, Any],
188
    **kwargs
189
) -> ObjectApiResponse[Any]:
190
    """
191
    Create or update a generic inference endpoint.
192
    
193
    Parameters:
194
    - inference_id: Unique identifier for the inference endpoint
195
    - task_type: Type of task (text_embedding, completion, rerank, sparse_embedding)
196
    - inference_config: Service-specific configuration
197
    
198
    Returns:
199
    ObjectApiResponse confirming endpoint creation
200
    """
201

202
def get(
203
    self,
204
    *,
205
    inference_id: Optional[str] = None,
206
    **kwargs
207
) -> ObjectApiResponse[Any]:
208
    """
209
    Get inference endpoint configuration(s).
210
    
211
    Parameters:
212
    - inference_id: Specific endpoint ID (omit for all endpoints)
213
    
214
    Returns:
215
    ObjectApiResponse with endpoint configuration(s)
216
    """
217

218
def delete(
219
    self,
220
    *,
221
    inference_id: str,
222
    **kwargs
223
) -> ObjectApiResponse[Any]:
224
    """
225
    Delete an inference endpoint.
226
    
227
    Parameters:
228
    - inference_id: The inference endpoint ID to delete
229
    
230
    Returns:
231
    ObjectApiResponse confirming deletion
232
    """
233

234
def update(
235
    self,
236
    *,
237
    inference_id: str,
238
    inference_config: Optional[Dict[str, Any]] = None,
239
    **kwargs
240
) -> ObjectApiResponse[Any]:
241
    """
242
    Update an existing inference endpoint configuration.
243
    
244
    Parameters:
245
    - inference_id: The inference endpoint ID to update
246
    - inference_config: Updated configuration
247
    
248
    Returns:
249
    ObjectApiResponse confirming update
250
    """
251
```
252

253
### AI Service Provider Support
254

255
The Inference API provides specialized methods for configuring popular AI service providers.
256

257
```python { .api }
258
def put_openai(
259
    self,
260
    *,
261
    inference_id: str,
262
    task_type: str,
263
    api_key: Optional[str] = None,
264
    model_id: Optional[str] = None,
265
    organization_id: Optional[str] = None,
266
    url: Optional[str] = None,
267
    **kwargs
268
) -> ObjectApiResponse[Any]:
269
    """Configure OpenAI inference endpoint."""
270

271
def put_azureopenai(
272
    self,
273
    *,
274
    inference_id: str,
275
    task_type: str,
276
    api_key: Optional[str] = None,
277
    api_version: Optional[str] = None,
278
    deployment_id: Optional[str] = None,
279
    resource_name: Optional[str] = None,
280
    **kwargs
281
) -> ObjectApiResponse[Any]:
282
    """Configure Azure OpenAI inference endpoint."""
283

284
def put_hugging_face(
285
    self,
286
    *,
287
    inference_id: str,
288
    task_type: str,
289
    api_key: Optional[str] = None,
290
    model_id: Optional[str] = None,
291
    url: Optional[str] = None,
292
    **kwargs
293
) -> ObjectApiResponse[Any]:
294
    """Configure Hugging Face inference endpoint."""
295

296
def put_cohere(
297
    self,
298
    *,
299
    inference_id: str,
300
    task_type: str,
301
    api_key: Optional[str] = None,
302
    model_id: Optional[str] = None,
303
    **kwargs
304
) -> ObjectApiResponse[Any]:
305
    """Configure Cohere inference endpoint."""
306

307
def put_anthropic(
308
    self,
309
    *,
310
    inference_id: str,
311
    task_type: str,
312
    api_key: Optional[str] = None,
313
    model_id: Optional[str] = None,
314
    **kwargs
315
) -> ObjectApiResponse[Any]:
316
    """Configure Anthropic inference endpoint."""
317

318
def put_amazonbedrock(
319
    self,
320
    *,
321
    inference_id: str,
322
    task_type: str,
323
    access_key_id: Optional[str] = None,
324
    secret_access_key: Optional[str] = None,
325
    region: Optional[str] = None,
326
    model_id: Optional[str] = None,
327
    **kwargs
328
) -> ObjectApiResponse[Any]:
329
    """Configure Amazon Bedrock inference endpoint."""
330

331
def put_googlevertexai(
332
    self,
333
    *,
334
    inference_id: str,
335
    task_type: str,
336
    service_account_json: Optional[str] = None,
337
    project_id: Optional[str] = None,
338
    location: Optional[str] = None,
339
    model_id: Optional[str] = None,
340
    **kwargs
341
) -> ObjectApiResponse[Any]:
342
    """Configure Google Vertex AI inference endpoint."""
343

344
def put_googleaistudio(
345
    self,
346
    *,
347
    inference_id: str,
348
    task_type: str,
349
    api_key: Optional[str] = None,
350
    model_id: Optional[str] = None,
351
    **kwargs
352
) -> ObjectApiResponse[Any]:
353
    """Configure Google AI Studio inference endpoint."""
354

355
def put_elasticsearch(
356
    self,
357
    *,
358
    inference_id: str,
359
    task_type: str,
360
    model_id: str,
361
    num_allocations: Optional[int] = None,
362
    num_threads: Optional[int] = None,
363
    **kwargs
364
) -> ObjectApiResponse[Any]:
365
    """Configure Elasticsearch built-in model inference endpoint."""
366

367
def put_elser(
368
    self,
369
    *,
370
    inference_id: str,
371
    num_allocations: Optional[int] = None,
372
    num_threads: Optional[int] = None,
373
    **kwargs
374
) -> ObjectApiResponse[Any]:
375
    """Configure Elasticsearch Learned Sparse Encoder (ELSER) endpoint."""
376
```
377

378
#### Service Configuration Examples
379

380
```python
381
# OpenAI embeddings
382
client.inference.put_openai(
383
    inference_id="openai-embeddings",
384
    task_type="text_embedding",
385
    api_key="sk-...",
386
    model_id="text-embedding-ada-002"
387
)
388

389
# Azure OpenAI completions
390
client.inference.put_azureopenai(
391
    inference_id="azure-gpt4",
392
    task_type="completion",
393
    api_key="...",
394
    api_version="2024-02-01",
395
    resource_name="my-resource",
396
    deployment_id="gpt-4-deployment"
397
)
398

399
# Hugging Face sentence transformers
400
client.inference.put_hugging_face(
401
    inference_id="sentence-transformers",
402
    task_type="text_embedding",
403
    api_key="hf_...",
404
    model_id="sentence-transformers/all-MiniLM-L6-v2"
405
)
406

407
# Cohere reranking
408
client.inference.put_cohere(
409
    inference_id="cohere-rerank",
410
    task_type="rerank",
411
    api_key="...",
412
    model_id="rerank-english-v2.0"
413
)
414

415
# Elasticsearch ELSER for sparse embeddings
416
client.inference.put_elser(
417
    inference_id="elser-sparse",
418
    num_allocations=1,
419
    num_threads=2
420
)
421

422
# Amazon Bedrock
423
client.inference.put_amazonbedrock(
424
    inference_id="bedrock-titan",
425
    task_type="text_embedding",
426
    access_key_id="AKIA...",
427
    secret_access_key="...",
428
    region="us-east-1",
429
    model_id="amazon.titan-embed-text-v1"
430
)
431

432
# Google Vertex AI
433
client.inference.put_googlevertexai(
434
    inference_id="vertex-palm",
435
    task_type="completion",
436
    service_account_json='{"type": "service_account", ...}',
437
    project_id="my-project",
438
    location="us-central1",
439
    model_id="text-bison@001"
440
)
441
```
442

443
### Additional Provider Support
444

445
Extended support for more AI service providers:
446

447
```python { .api }
448
def put_mistral(self, *, inference_id: str, task_type: str, api_key: str, model_id: str, **kwargs):
449
    """Configure Mistral AI inference endpoint."""
450

451
def put_voyageai(self, *, inference_id: str, task_type: str, api_key: str, model_id: str, **kwargs):
452
    """Configure VoyageAI inference endpoint."""
453

454
def put_jinaai(self, *, inference_id: str, task_type: str, api_key: str, model_id: str, **kwargs):
455
    """Configure Jina AI inference endpoint."""
456

457
def put_deepseek(self, *, inference_id: str, task_type: str, api_key: str, model_id: str, **kwargs):
458
    """Configure DeepSeek inference endpoint."""
459

460
def put_watsonx(self, *, inference_id: str, task_type: str, api_key: str, project_id: str, model_id: str, **kwargs):
461
    """Configure IBM watsonx inference endpoint."""
462

463
def put_azureaistudio(self, *, inference_id: str, task_type: str, api_key: str, target: str, **kwargs):
464
    """Configure Azure AI Studio inference endpoint."""
465

466
def put_alibabacloud(self, *, inference_id: str, task_type: str, api_key: str, model_id: str, **kwargs):
467
    """Configure Alibaba Cloud inference endpoint."""
468

469
def put_amazonsagemaker(self, *, inference_id: str, task_type: str, access_key_id: str, secret_access_key: str, region: str, endpoint_name: str, **kwargs):
470
    """Configure Amazon SageMaker inference endpoint."""
471

472
def put_custom(self, *, inference_id: str, task_type: str, url: str, **kwargs):
473
    """Configure custom inference endpoint."""
474
```
475

476
## Common Use Cases
477

478
### Semantic Search with Embeddings
479

480
```python
481
# 1. Configure embedding service
482
client.inference.put_openai(
483
    inference_id="embeddings",
484
    task_type="text_embedding", 
485
    api_key="sk-...",
486
    model_id="text-embedding-ada-002"
487
)
488

489
# 2. Create index with dense vector field
490
client.indices.create(
491
    index="documents",
492
    mappings={
493
        "properties": {
494
            "content": {"type": "text"},
495
            "embedding": {
496
                "type": "dense_vector",
497
                "dims": 1536,
498
                "index": True,
499
                "similarity": "cosine"
500
            }
501
        }
502
    }
503
)
504

505
# 3. Index documents with embeddings
506
doc = "Machine learning transforms data into insights"
507
embedding = client.inference.text_embedding(
508
    inference_id="embeddings",
509
    input=doc
510
)
511

512
client.index(
513
    index="documents",
514
    document={
515
        "content": doc,
516
        "embedding": embedding.body['embeddings'][0]['embedding']
517
    }
518
)
519

520
# 4. Search with semantic similarity
521
query_embedding = client.inference.text_embedding(
522
    inference_id="embeddings", 
523
    input="AI and data analysis"
524
)
525

526
results = client.search(
527
    index="documents",
528
    knn={
529
        "field": "embedding",
530
        "query_vector": query_embedding.body['embeddings'][0]['embedding'],
531
        "k": 10,
532
        "num_candidates": 100
533
    }
534
)
535
```
536

537
### RAG (Retrieval-Augmented Generation)
538

539
```python
540
# 1. Retrieve relevant documents
541
query = "What is quantum computing?"
542
query_embedding = client.inference.text_embedding(
543
    inference_id="embeddings",
544
    input=query
545
)
546

547
search_results = client.search(
548
    index="knowledge_base",
549
    knn={
550
        "field": "embedding", 
551
        "query_vector": query_embedding.body['embeddings'][0]['embedding'],
552
        "k": 5
553
    }
554
)
555

556
# 2. Rerank results for better relevance
557
documents = [hit['_source']['content'] for hit in search_results.body['hits']['hits']]
558
reranked = client.inference.rerank(
559
    inference_id="cohere-rerank",
560
    query=query,
561
    input=documents
562
)
563

564
# 3. Generate response with context
565
top_docs = [documents[idx] for idx in reranked.body['reranked'][:3]]
566
context = "\n\n".join(top_docs)
567
prompt = f"Context:\n{context}\n\nQuestion: {query}\nAnswer:"
568

569
response = client.inference.completion(
570
    inference_id="gpt-4",
571
    input=prompt,
572
    task_settings={"max_tokens": 200, "temperature": 0.3}
573
)
574

575
answer = response.body['completion']
576
```
577

578
## Types
579

580
```python { .api }
581
from typing import Any, Dict, List, Optional, Union
582

583
# Task types
584
TaskType = Literal["text_embedding", "sparse_embedding", "completion", "rerank"]
585

586
# Service configurations
587
class InferenceConfig:
588
    service: str                    # Service provider name
589
    service_settings: Dict[str, Any]  # Provider-specific settings
590
    task_settings: Dict[str, Any]     # Task-specific settings
591

592
# Response types
593
class EmbeddingResponse:
594
    embeddings: List[Dict[str, Any]]  # Embedding vectors with metadata
595

596
class CompletionResponse:
597
    completion: str                   # Generated text
598
    usage: Optional[Dict[str, int]]   # Token usage statistics
599

600
class RerankResponse:
601
    reranked: List[int]              # Reordered document indices
602
    scores: List[float]              # Relevance scores
603

604
class SparseEmbeddingResponse:
605
    embeddings: List[Dict[str, Dict[str, float]]]  # Sparse vector representations
606
```

Version

Tile

Files

inference-api.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

inference-api.mddocs/