0
# Embeddings & Semantic Search
1
2
Generate vector embeddings for text and images with multiple representation types and batch processing capabilities. Supports both legacy and modern embedding APIs with semantic search optimization and efficient batch processing.
3
4
## Capabilities
5
6
### Semantic Embeddings
7
8
Modern embedding API optimized for semantic search and similarity tasks with representation-specific optimization.
9
10
```python { .api }
11
class SemanticEmbeddingRequest:
12
prompt: Prompt
13
representation: SemanticRepresentation
14
compress_to_size: Optional[int] = None
15
normalize: bool = False
16
contextual_control_threshold: Optional[float] = None
17
control_log_additive: Optional[bool] = True
18
"""
19
Request for semantic embeddings optimized for search tasks.
20
21
Attributes:
22
- prompt: Input prompt (text, image, or multimodal)
23
- representation: Embedding type for specific use cases
24
- compress_to_size: Target embedding dimension (compression)
25
- normalize: Normalize embeddings to unit length
26
- contextual_control_threshold: Threshold for attention controls
27
- control_log_additive: How to apply attention controls
28
"""
29
30
class SemanticEmbeddingResponse:
31
model_version: str
32
embedding: EmbeddingVector
33
num_tokens_prompt_total: int
34
message: Optional[str] = None
35
"""
36
Response from semantic embedding request.
37
38
Attributes:
39
- model_version: Version of model used
40
- embedding: Generated embedding vector
41
- num_tokens_prompt_total: Total tokens processed
42
- message: Optional response message
43
"""
44
45
def semantic_embed(
46
self,
47
request: SemanticEmbeddingRequest,
48
model: str
49
) -> SemanticEmbeddingResponse:
50
"""
51
Generate semantic embedding for single prompt.
52
53
Parameters:
54
- request: Embedding configuration
55
- model: Model name to use
56
57
Returns:
58
SemanticEmbeddingResponse with embedding vector
59
"""
60
```
61
62
### Batch Semantic Embeddings
63
64
Efficient batch processing for multiple embeddings with concurrent request control and progress tracking.
65
66
```python { .api }
67
class BatchSemanticEmbeddingRequest:
68
prompts: Sequence[Prompt]
69
representation: SemanticRepresentation
70
compress_to_size: Optional[int] = None
71
normalize: bool = False
72
contextual_control_threshold: Optional[float] = None
73
control_log_additive: Optional[bool] = True
74
"""
75
Request for batch semantic embeddings.
76
77
Attributes:
78
- prompts: Sequence of input prompts to embed
79
- representation: Embedding type for all prompts
80
- compress_to_size: Target embedding dimension
81
- normalize: Normalize all embeddings
82
- contextual_control_threshold: Threshold for attention controls
83
- control_log_additive: How to apply attention controls
84
"""
85
86
class BatchSemanticEmbeddingResponse:
87
model_version: str
88
embeddings: Sequence[EmbeddingVector]
89
num_tokens_prompt_total: int
90
"""
91
Response from batch semantic embedding request.
92
93
Attributes:
94
- model_version: Version of model used
95
- embeddings: Generated embedding vectors (same order as input)
96
- num_tokens_prompt_total: Total tokens processed across all prompts
97
"""
98
99
def batch_semantic_embed(
100
self,
101
request: BatchSemanticEmbeddingRequest,
102
model: Optional[str] = None
103
) -> BatchSemanticEmbeddingResponse:
104
"""
105
Generate semantic embeddings for multiple prompts (sync).
106
107
Parameters:
108
- request: Batch embedding configuration
109
- model: Model name to use (optional for some endpoints)
110
111
Returns:
112
BatchSemanticEmbeddingResponse with embedding vectors
113
"""
114
115
async def batch_semantic_embed(
116
self,
117
request: BatchSemanticEmbeddingRequest,
118
model: Optional[str] = None,
119
num_concurrent_requests: int = 1,
120
batch_size: int = 100,
121
progress_bar: bool = False
122
) -> BatchSemanticEmbeddingResponse:
123
"""
124
Generate semantic embeddings for multiple prompts (async with controls).
125
126
Parameters:
127
- request: Batch embedding configuration
128
- model: Model name to use
129
- num_concurrent_requests: Number of concurrent API requests
130
- batch_size: Maximum prompts per batch
131
- progress_bar: Show progress bar during processing
132
133
Returns:
134
BatchSemanticEmbeddingResponse with embedding vectors
135
"""
136
```
137
138
### Legacy Embedding API
139
140
Original embedding API with layer-specific extraction and flexible pooling options.
141
142
```python { .api }
143
class EmbeddingRequest:
144
prompt: Prompt
145
layers: List[int]
146
pooling: List[str]
147
type: Optional[str] = None
148
tokens: bool = False
149
normalize: bool = False
150
contextual_control_threshold: Optional[float] = None
151
control_log_additive: Optional[bool] = True
152
"""
153
Request for layer-based embeddings (legacy API).
154
155
Attributes:
156
- prompt: Input prompt
157
- layers: Layer indices to extract embeddings from
158
- pooling: Pooling operations to apply
159
- type: Embedding type specification
160
- tokens: Return token strings along with embeddings
161
- normalize: Normalize embeddings
162
- contextual_control_threshold: Threshold for attention controls
163
- control_log_additive: How to apply attention controls
164
"""
165
166
class EmbeddingResponse:
167
model_version: str
168
num_tokens_prompt_total: int
169
embeddings: Optional[Dict[Tuple[str, str], List[float]]]
170
tokens: Optional[List[str]]
171
message: Optional[str] = None
172
"""
173
Response from layer-based embedding request.
174
175
Attributes:
176
- model_version: Version of model used
177
- num_tokens_prompt_total: Total tokens processed
178
- embeddings: Embeddings keyed by (layer, pooling) tuple
179
- tokens: Token strings (if requested)
180
- message: Optional response message
181
"""
182
183
def embed(self, request: EmbeddingRequest, model: str) -> EmbeddingResponse:
184
"""
185
Generate layer-based embeddings.
186
187
Parameters:
188
- request: Embedding configuration
189
- model: Model name to use
190
191
Returns:
192
EmbeddingResponse with layer-specific embeddings
193
"""
194
```
195
196
### OpenAI-Compatible Embeddings
197
198
OpenAI-compatible embedding API for easy migration and integration with existing tools.
199
200
```python { .api }
201
class EmbeddingV2Request:
202
input: Union[str, List[str], List[int], List[List[int]]]
203
dimensions: Optional[int] = None
204
encoding_format: Optional[Literal["float", "base64"]] = None
205
"""
206
OpenAI-compatible embedding request.
207
208
Attributes:
209
- input: Text strings or token arrays to embed
210
- dimensions: Target embedding dimensions
211
- encoding_format: Output encoding format
212
"""
213
214
class EmbeddingV2Response:
215
object: str
216
data: List[EmbeddingV2ResponseData]
217
model: str
218
usage: Usage
219
"""
220
OpenAI-compatible embedding response.
221
222
Attributes:
223
- object: Response object type
224
- data: Embedding data for each input
225
- model: Model name used
226
- usage: Token usage statistics
227
"""
228
229
def embeddings(
230
self,
231
request: EmbeddingV2Request,
232
model: str
233
) -> EmbeddingV2Response:
234
"""
235
Generate OpenAI-compatible embeddings.
236
237
Parameters:
238
- request: OpenAI-style embedding configuration
239
- model: Model name to use
240
241
Returns:
242
EmbeddingV2Response with embeddings
243
"""
244
```
245
246
### Representation Types
247
248
Enumeration defining different semantic representations optimized for specific use cases.
249
250
```python { .api }
251
class SemanticRepresentation(Enum):
252
Symmetric = "symmetric" # For similarity/clustering tasks
253
Document = "document" # For document representation in search
254
Query = "query" # For query representation in search
255
```
256
257
### Type Definitions
258
259
```python { .api }
260
# Type alias for embedding vectors
261
EmbeddingVector = List[float]
262
263
# Available pooling operations
264
POOLING_OPTIONS: List[str] = ["mean", "max", "last_token", "abs_max"]
265
```
266
267
### Usage Examples
268
269
Various embedding use cases and batch processing patterns:
270
271
```python
272
from aleph_alpha_client import (
273
Client, AsyncClient,
274
SemanticEmbeddingRequest, SemanticEmbeddingResponse,
275
BatchSemanticEmbeddingRequest,
276
EmbeddingRequest, EmbeddingV2Request,
277
SemanticRepresentation, Prompt
278
)
279
280
client = Client(token="your-api-token")
281
282
# Simple semantic embedding
283
request = SemanticEmbeddingRequest(
284
prompt=Prompt.from_text("Machine learning is transforming technology"),
285
representation=SemanticRepresentation.Symmetric,
286
normalize=True
287
)
288
response = client.semantic_embed(request, model="luminous-extended")
289
embedding = response.embedding # List[float]
290
print(f"Embedding dimension: {len(embedding)}")
291
292
# Document and query embeddings for search
293
documents = [
294
"Python is a programming language",
295
"Machine learning uses neural networks",
296
"Data science involves statistical analysis"
297
]
298
299
# Embed documents
300
doc_prompts = [Prompt.from_text(doc) for doc in documents]
301
doc_request = BatchSemanticEmbeddingRequest(
302
prompts=doc_prompts,
303
representation=SemanticRepresentation.Document,
304
normalize=True
305
)
306
doc_response = client.batch_semantic_embed(doc_request, model="luminous-extended")
307
doc_embeddings = doc_response.embeddings
308
309
# Embed query
310
query_request = SemanticEmbeddingRequest(
311
prompt=Prompt.from_text("What is Python programming?"),
312
representation=SemanticRepresentation.Query,
313
normalize=True
314
)
315
query_response = client.semantic_embed(query_request, model="luminous-extended")
316
query_embedding = query_response.embedding
317
318
# Calculate similarities (cosine similarity for normalized vectors)
319
import numpy as np
320
321
similarities = []
322
for doc_emb in doc_embeddings:
323
similarity = np.dot(query_embedding, doc_emb)
324
similarities.append(similarity)
325
326
# Find most similar document
327
best_match_idx = np.argmax(similarities)
328
print(f"Most similar document: {documents[best_match_idx]}")
329
print(f"Similarity score: {similarities[best_match_idx]}")
330
331
# Multimodal embeddings
332
from aleph_alpha_client import Image
333
334
image = Image.from_file("diagram.png")
335
multimodal_prompt = Prompt([
336
Text.from_text("Technical diagram showing:"),
337
image
338
])
339
340
multimodal_request = SemanticEmbeddingRequest(
341
prompt=multimodal_prompt,
342
representation=SemanticRepresentation.Symmetric,
343
normalize=True
344
)
345
multimodal_response = client.semantic_embed(multimodal_request, model="luminous-extended")
346
347
# Batch processing with async client
348
import asyncio
349
350
async def batch_embed_async():
351
async with AsyncClient(token="your-api-token") as client:
352
# Large batch with concurrent processing
353
large_batch = [Prompt.from_text(f"Document {i}") for i in range(1000)]
354
355
request = BatchSemanticEmbeddingRequest(
356
prompts=large_batch,
357
representation=SemanticRepresentation.Document,
358
normalize=True
359
)
360
361
response = await client.batch_semantic_embed(
362
request,
363
model="luminous-extended",
364
num_concurrent_requests=5, # 5 concurrent API calls
365
batch_size=50, # 50 prompts per batch
366
progress_bar=True # Show progress
367
)
368
369
print(f"Generated {len(response.embeddings)} embeddings")
370
print(f"Total tokens: {response.num_tokens_prompt_total}")
371
372
asyncio.run(batch_embed_async())
373
374
# Legacy embedding API with layer extraction
375
legacy_request = EmbeddingRequest(
376
prompt=Prompt.from_text("Text for layer analysis"),
377
layers=[8, 12, 16], # Extract from layers 8, 12, 16
378
pooling=["mean", "max"], # Apply mean and max pooling
379
tokens=True, # Return token strings
380
normalize=True
381
)
382
legacy_response = client.embed(legacy_request, model="luminous-extended")
383
384
# Access layer-specific embeddings
385
for (layer, pooling), embedding in legacy_response.embeddings.items():
386
print(f"Layer {layer}, {pooling} pooling: {len(embedding)} dimensions")
387
388
if legacy_response.tokens:
389
print(f"Tokens: {legacy_response.tokens}")
390
391
# OpenAI-compatible API
392
openai_request = EmbeddingV2Request(
393
input=["Hello world", "Machine learning", "Data science"],
394
dimensions=512, # Compress to 512 dimensions
395
encoding_format="float"
396
)
397
openai_response = client.embeddings(openai_request, model="luminous-extended")
398
399
for i, embedding_data in enumerate(openai_response.data):
400
print(f"Input {i}: {len(embedding_data.embedding)} dimensions")
401
402
print(f"Usage: {openai_response.usage.total_tokens} tokens")
403
```