or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

chat-interface.mdclient-management.mddocument-prompt-template.mdembeddings.mdevaluation.mdexplanations.mdindex.mdprompt-construction.mdsteering.mdstructured-output.mdtext-completion.mdtokenization.mdtranslation.mdutilities.md

embeddings.mddocs/

0

# Embeddings & Semantic Search

1

2

Generate vector embeddings for text and images with multiple representation types and batch processing capabilities. Supports both legacy and modern embedding APIs with semantic search optimization and efficient batch processing.

3

4

## Capabilities

5

6

### Semantic Embeddings

7

8

Modern embedding API optimized for semantic search and similarity tasks with representation-specific optimization.

9

10

```python { .api }

11

class SemanticEmbeddingRequest:

12

prompt: Prompt

13

representation: SemanticRepresentation

14

compress_to_size: Optional[int] = None

15

normalize: bool = False

16

contextual_control_threshold: Optional[float] = None

17

control_log_additive: Optional[bool] = True

18

"""

19

Request for semantic embeddings optimized for search tasks.

20

21

Attributes:

22

- prompt: Input prompt (text, image, or multimodal)

23

- representation: Embedding type for specific use cases

24

- compress_to_size: Target embedding dimension (compression)

25

- normalize: Normalize embeddings to unit length

26

- contextual_control_threshold: Threshold for attention controls

27

- control_log_additive: How to apply attention controls

28

"""

29

30

class SemanticEmbeddingResponse:

31

model_version: str

32

embedding: EmbeddingVector

33

num_tokens_prompt_total: int

34

message: Optional[str] = None

35

"""

36

Response from semantic embedding request.

37

38

Attributes:

39

- model_version: Version of model used

40

- embedding: Generated embedding vector

41

- num_tokens_prompt_total: Total tokens processed

42

- message: Optional response message

43

"""

44

45

def semantic_embed(

46

self,

47

request: SemanticEmbeddingRequest,

48

model: str

49

) -> SemanticEmbeddingResponse:

50

"""

51

Generate semantic embedding for single prompt.

52

53

Parameters:

54

- request: Embedding configuration

55

- model: Model name to use

56

57

Returns:

58

SemanticEmbeddingResponse with embedding vector

59

"""

60

```

61

62

### Batch Semantic Embeddings

63

64

Efficient batch processing for multiple embeddings with concurrent request control and progress tracking.

65

66

```python { .api }

67

class BatchSemanticEmbeddingRequest:

68

prompts: Sequence[Prompt]

69

representation: SemanticRepresentation

70

compress_to_size: Optional[int] = None

71

normalize: bool = False

72

contextual_control_threshold: Optional[float] = None

73

control_log_additive: Optional[bool] = True

74

"""

75

Request for batch semantic embeddings.

76

77

Attributes:

78

- prompts: Sequence of input prompts to embed

79

- representation: Embedding type for all prompts

80

- compress_to_size: Target embedding dimension

81

- normalize: Normalize all embeddings

82

- contextual_control_threshold: Threshold for attention controls

83

- control_log_additive: How to apply attention controls

84

"""

85

86

class BatchSemanticEmbeddingResponse:

87

model_version: str

88

embeddings: Sequence[EmbeddingVector]

89

num_tokens_prompt_total: int

90

"""

91

Response from batch semantic embedding request.

92

93

Attributes:

94

- model_version: Version of model used

95

- embeddings: Generated embedding vectors (same order as input)

96

- num_tokens_prompt_total: Total tokens processed across all prompts

97

"""

98

99

def batch_semantic_embed(

100

self,

101

request: BatchSemanticEmbeddingRequest,

102

model: Optional[str] = None

103

) -> BatchSemanticEmbeddingResponse:

104

"""

105

Generate semantic embeddings for multiple prompts (sync).

106

107

Parameters:

108

- request: Batch embedding configuration

109

- model: Model name to use (optional for some endpoints)

110

111

Returns:

112

BatchSemanticEmbeddingResponse with embedding vectors

113

"""

114

115

async def batch_semantic_embed(

116

self,

117

request: BatchSemanticEmbeddingRequest,

118

model: Optional[str] = None,

119

num_concurrent_requests: int = 1,

120

batch_size: int = 100,

121

progress_bar: bool = False

122

) -> BatchSemanticEmbeddingResponse:

123

"""

124

Generate semantic embeddings for multiple prompts (async with controls).

125

126

Parameters:

127

- request: Batch embedding configuration

128

- model: Model name to use

129

- num_concurrent_requests: Number of concurrent API requests

130

- batch_size: Maximum prompts per batch

131

- progress_bar: Show progress bar during processing

132

133

Returns:

134

BatchSemanticEmbeddingResponse with embedding vectors

135

"""

136

```

137

138

### Legacy Embedding API

139

140

Original embedding API with layer-specific extraction and flexible pooling options.

141

142

```python { .api }

143

class EmbeddingRequest:

144

prompt: Prompt

145

layers: List[int]

146

pooling: List[str]

147

type: Optional[str] = None

148

tokens: bool = False

149

normalize: bool = False

150

contextual_control_threshold: Optional[float] = None

151

control_log_additive: Optional[bool] = True

152

"""

153

Request for layer-based embeddings (legacy API).

154

155

Attributes:

156

- prompt: Input prompt

157

- layers: Layer indices to extract embeddings from

158

- pooling: Pooling operations to apply

159

- type: Embedding type specification

160

- tokens: Return token strings along with embeddings

161

- normalize: Normalize embeddings

162

- contextual_control_threshold: Threshold for attention controls

163

- control_log_additive: How to apply attention controls

164

"""

165

166

class EmbeddingResponse:

167

model_version: str

168

num_tokens_prompt_total: int

169

embeddings: Optional[Dict[Tuple[str, str], List[float]]]

170

tokens: Optional[List[str]]

171

message: Optional[str] = None

172

"""

173

Response from layer-based embedding request.

174

175

Attributes:

176

- model_version: Version of model used

177

- num_tokens_prompt_total: Total tokens processed

178

- embeddings: Embeddings keyed by (layer, pooling) tuple

179

- tokens: Token strings (if requested)

180

- message: Optional response message

181

"""

182

183

def embed(self, request: EmbeddingRequest, model: str) -> EmbeddingResponse:

184

"""

185

Generate layer-based embeddings.

186

187

Parameters:

188

- request: Embedding configuration

189

- model: Model name to use

190

191

Returns:

192

EmbeddingResponse with layer-specific embeddings

193

"""

194

```

195

196

### OpenAI-Compatible Embeddings

197

198

OpenAI-compatible embedding API for easy migration and integration with existing tools.

199

200

```python { .api }

201

class EmbeddingV2Request:

202

input: Union[str, List[str], List[int], List[List[int]]]

203

dimensions: Optional[int] = None

204

encoding_format: Optional[Literal["float", "base64"]] = None

205

"""

206

OpenAI-compatible embedding request.

207

208

Attributes:

209

- input: Text strings or token arrays to embed

210

- dimensions: Target embedding dimensions

211

- encoding_format: Output encoding format

212

"""

213

214

class EmbeddingV2Response:

215

object: str

216

data: List[EmbeddingV2ResponseData]

217

model: str

218

usage: Usage

219

"""

220

OpenAI-compatible embedding response.

221

222

Attributes:

223

- object: Response object type

224

- data: Embedding data for each input

225

- model: Model name used

226

- usage: Token usage statistics

227

"""

228

229

def embeddings(

230

self,

231

request: EmbeddingV2Request,

232

model: str

233

) -> EmbeddingV2Response:

234

"""

235

Generate OpenAI-compatible embeddings.

236

237

Parameters:

238

- request: OpenAI-style embedding configuration

239

- model: Model name to use

240

241

Returns:

242

EmbeddingV2Response with embeddings

243

"""

244

```

245

246

### Representation Types

247

248

Enumeration defining different semantic representations optimized for specific use cases.

249

250

```python { .api }

251

class SemanticRepresentation(Enum):

252

Symmetric = "symmetric" # For similarity/clustering tasks

253

Document = "document" # For document representation in search

254

Query = "query" # For query representation in search

255

```

256

257

### Type Definitions

258

259

```python { .api }

260

# Type alias for embedding vectors

261

EmbeddingVector = List[float]

262

263

# Available pooling operations

264

POOLING_OPTIONS: List[str] = ["mean", "max", "last_token", "abs_max"]

265

```

266

267

### Usage Examples

268

269

Various embedding use cases and batch processing patterns:

270

271

```python

272

from aleph_alpha_client import (

273

Client, AsyncClient,

274

SemanticEmbeddingRequest, SemanticEmbeddingResponse,

275

BatchSemanticEmbeddingRequest,

276

EmbeddingRequest, EmbeddingV2Request,

277

SemanticRepresentation, Prompt

278

)

279

280

client = Client(token="your-api-token")

281

282

# Simple semantic embedding

283

request = SemanticEmbeddingRequest(

284

prompt=Prompt.from_text("Machine learning is transforming technology"),

285

representation=SemanticRepresentation.Symmetric,

286

normalize=True

287

)

288

response = client.semantic_embed(request, model="luminous-extended")

289

embedding = response.embedding # List[float]

290

print(f"Embedding dimension: {len(embedding)}")

291

292

# Document and query embeddings for search

293

documents = [

294

"Python is a programming language",

295

"Machine learning uses neural networks",

296

"Data science involves statistical analysis"

297

]

298

299

# Embed documents

300

doc_prompts = [Prompt.from_text(doc) for doc in documents]

301

doc_request = BatchSemanticEmbeddingRequest(

302

prompts=doc_prompts,

303

representation=SemanticRepresentation.Document,

304

normalize=True

305

)

306

doc_response = client.batch_semantic_embed(doc_request, model="luminous-extended")

307

doc_embeddings = doc_response.embeddings

308

309

# Embed query

310

query_request = SemanticEmbeddingRequest(

311

prompt=Prompt.from_text("What is Python programming?"),

312

representation=SemanticRepresentation.Query,

313

normalize=True

314

)

315

query_response = client.semantic_embed(query_request, model="luminous-extended")

316

query_embedding = query_response.embedding

317

318

# Calculate similarities (cosine similarity for normalized vectors)

319

import numpy as np

320

321

similarities = []

322

for doc_emb in doc_embeddings:

323

similarity = np.dot(query_embedding, doc_emb)

324

similarities.append(similarity)

325

326

# Find most similar document

327

best_match_idx = np.argmax(similarities)

328

print(f"Most similar document: {documents[best_match_idx]}")

329

print(f"Similarity score: {similarities[best_match_idx]}")

330

331

# Multimodal embeddings

332

from aleph_alpha_client import Image

333

334

image = Image.from_file("diagram.png")

335

multimodal_prompt = Prompt([

336

Text.from_text("Technical diagram showing:"),

337

image

338

])

339

340

multimodal_request = SemanticEmbeddingRequest(

341

prompt=multimodal_prompt,

342

representation=SemanticRepresentation.Symmetric,

343

normalize=True

344

)

345

multimodal_response = client.semantic_embed(multimodal_request, model="luminous-extended")

346

347

# Batch processing with async client

348

import asyncio

349

350

async def batch_embed_async():

351

async with AsyncClient(token="your-api-token") as client:

352

# Large batch with concurrent processing

353

large_batch = [Prompt.from_text(f"Document {i}") for i in range(1000)]

354

355

request = BatchSemanticEmbeddingRequest(

356

prompts=large_batch,

357

representation=SemanticRepresentation.Document,

358

normalize=True

359

)

360

361

response = await client.batch_semantic_embed(

362

request,

363

model="luminous-extended",

364

num_concurrent_requests=5, # 5 concurrent API calls

365

batch_size=50, # 50 prompts per batch

366

progress_bar=True # Show progress

367

)

368

369

print(f"Generated {len(response.embeddings)} embeddings")

370

print(f"Total tokens: {response.num_tokens_prompt_total}")

371

372

asyncio.run(batch_embed_async())

373

374

# Legacy embedding API with layer extraction

375

legacy_request = EmbeddingRequest(

376

prompt=Prompt.from_text("Text for layer analysis"),

377

layers=[8, 12, 16], # Extract from layers 8, 12, 16

378

pooling=["mean", "max"], # Apply mean and max pooling

379

tokens=True, # Return token strings

380

normalize=True

381

)

382

legacy_response = client.embed(legacy_request, model="luminous-extended")

383

384

# Access layer-specific embeddings

385

for (layer, pooling), embedding in legacy_response.embeddings.items():

386

print(f"Layer {layer}, {pooling} pooling: {len(embedding)} dimensions")

387

388

if legacy_response.tokens:

389

print(f"Tokens: {legacy_response.tokens}")

390

391

# OpenAI-compatible API

392

openai_request = EmbeddingV2Request(

393

input=["Hello world", "Machine learning", "Data science"],

394

dimensions=512, # Compress to 512 dimensions

395

encoding_format="float"

396

)

397

openai_response = client.embeddings(openai_request, model="luminous-extended")

398

399

for i, embedding_data in enumerate(openai_response.data):

400

print(f"Input {i}: {len(embedding_data.embedding)} dimensions")

401

402

print(f"Usage: {openai_response.usage.total_tokens} tokens")

403

```