or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

agents-tools.mddocuments-nodes.mdevaluation.mdindex.mdindices.mdllms-embeddings.mdnode-parsers.mdpostprocessors.mdprompts.mdquery-engines.mdretrievers.mdsettings.mdstorage.md

llms-embeddings.mddocs/

0

# LLMs & Embeddings

1

2

Pluggable interfaces for language models and embedding systems, supporting both synchronous and asynchronous operations with extensive customization options. These interfaces enable integration with various LLM providers and embedding models while maintaining consistent APIs.

3

4

## Capabilities

5

6

### Base LLM Interface

7

8

Foundation interface for all language model implementations, providing standardized completion and chat methods.

9

10

```python { .api }

11

class LLM:

12

"""

13

Base language model interface with completion and chat capabilities.

14

15

Parameters:

16

- model_name: str, name identifier for the model

17

- context_window: int, maximum context window size in tokens

18

- max_new_tokens: Optional[int], maximum new tokens to generate

19

- system_prompt: Optional[str], default system prompt

20

- messages_to_prompt: Optional[Callable], function to convert messages to prompt

21

- completion_to_prompt: Optional[Callable], function to convert completion to prompt

22

- pydantic_program_mode: PydanticProgramMode, mode for Pydantic program execution

23

- output_parser: Optional[BaseOutputParser], parser for model output

24

"""

25

def __init__(

26

self,

27

model_name: str = "unknown",

28

context_window: int = 4096,

29

max_new_tokens: Optional[int] = None,

30

system_prompt: Optional[str] = None,

31

messages_to_prompt: Optional[Callable] = None,

32

completion_to_prompt: Optional[Callable] = None,

33

pydantic_program_mode: PydanticProgramMode = PydanticProgramMode.DEFAULT,

34

output_parser: Optional[BaseOutputParser] = None,

35

**kwargs

36

): ...

37

38

def complete(

39

self,

40

prompt: str,

41

formatted: bool = False,

42

**kwargs

43

) -> CompletionResponse:

44

"""

45

Complete a text prompt.

46

47

Parameters:

48

- prompt: str, the text prompt to complete

49

- formatted: bool, whether prompt is already formatted

50

51

Returns:

52

- CompletionResponse, completion result with text and metadata

53

"""

54

55

def stream_complete(

56

self,

57

prompt: str,

58

formatted: bool = False,

59

**kwargs

60

) -> CompletionResponseGen:

61

"""

62

Stream completion results for a text prompt.

63

64

Parameters:

65

- prompt: str, the text prompt to complete

66

- formatted: bool, whether prompt is already formatted

67

68

Returns:

69

- CompletionResponseGen, streaming completion generator

70

"""

71

72

def chat(

73

self,

74

messages: Sequence[ChatMessage],

75

**kwargs

76

) -> ChatResponse:

77

"""

78

Generate chat response from message history.

79

80

Parameters:

81

- messages: Sequence[ChatMessage], conversation history

82

83

Returns:

84

- ChatResponse, chat response with message and metadata

85

"""

86

87

def stream_chat(

88

self,

89

messages: Sequence[ChatMessage],

90

**kwargs

91

) -> ChatResponseGen:

92

"""

93

Stream chat response from message history.

94

95

Parameters:

96

- messages: Sequence[ChatMessage], conversation history

97

98

Returns:

99

- ChatResponseGen, streaming chat response generator

100

"""

101

102

async def acomplete(

103

self,

104

prompt: str,

105

formatted: bool = False,

106

**kwargs

107

) -> CompletionResponse:

108

"""Async version of complete method."""

109

110

async def astream_complete(

111

self,

112

prompt: str,

113

formatted: bool = False,

114

**kwargs

115

) -> CompletionResponseAsyncGen:

116

"""Async version of stream_complete method."""

117

118

async def achat(

119

self,

120

messages: Sequence[ChatMessage],

121

**kwargs

122

) -> ChatResponse:

123

"""Async version of chat method."""

124

125

async def astream_chat(

126

self,

127

messages: Sequence[ChatMessage],

128

**kwargs

129

) -> ChatResponseAsyncGen:

130

"""Async version of stream_chat method."""

131

132

@property

133

def metadata(self) -> LLMMetadata:

134

"""Get LLM metadata including context window and token limits."""

135

136

def get_num_tokens(self, text: str) -> int:

137

"""Get token count for text."""

138

139

def get_num_tokens_from_messages(self, messages: Sequence[ChatMessage]) -> int:

140

"""Get token count for message sequence."""

141

```

142

143

### Custom LLM Implementation

144

145

Base class for implementing custom language models with standardized interfaces.

146

147

```python { .api }

148

class CustomLLM(LLM):

149

"""

150

Base class for custom LLM implementations.

151

152

Subclasses must implement:

153

- _complete: Core completion logic

154

- _stream_complete: Core streaming completion logic

155

- _chat: Core chat logic (optional, defaults to completion-based)

156

- _stream_chat: Core streaming chat logic (optional)

157

"""

158

159

def _complete(self, prompt: str, **kwargs) -> CompletionResponse:

160

"""Core completion implementation to be overridden."""

161

162

def _stream_complete(self, prompt: str, **kwargs) -> CompletionResponseGen:

163

"""Core streaming completion implementation to be overridden."""

164

165

def _chat(self, messages: Sequence[ChatMessage], **kwargs) -> ChatResponse:

166

"""Core chat implementation, defaults to completion-based."""

167

168

def _stream_chat(self, messages: Sequence[ChatMessage], **kwargs) -> ChatResponseGen:

169

"""Core streaming chat implementation, defaults to completion-based."""

170

```

171

172

### Mock LLM Implementation

173

174

Testing and development LLM that returns predictable responses without external API calls.

175

176

```python { .api }

177

class MockLLM(CustomLLM):

178

"""

179

Mock LLM for testing and development purposes.

180

181

Parameters:

182

- max_tokens: Optional[int], maximum tokens to return

183

- system_prompt: Optional[str], default system prompt

184

"""

185

def __init__(

186

self,

187

max_tokens: Optional[int] = None,

188

system_prompt: Optional[str] = None,

189

**kwargs

190

): ...

191

```

192

193

### LLM Response Types

194

195

Response structures for various LLM operations with rich metadata and content support.

196

197

```python { .api }

198

class CompletionResponse:

199

"""

200

Response from text completion operations.

201

202

Parameters:

203

- text: str, the completed text

204

- additional_kwargs: Optional[dict], additional response metadata

205

- raw: Optional[dict], raw response from the LLM provider

206

"""

207

def __init__(

208

self,

209

text: str,

210

additional_kwargs: Optional[dict] = None,

211

raw: Optional[dict] = None,

212

**kwargs

213

): ...

214

215

@property

216

def delta(self) -> Optional[str]:

217

"""Get response delta for streaming operations."""

218

219

class ChatResponse:

220

"""

221

Response from chat operations.

222

223

Parameters:

224

- message: ChatMessage, the response message

225

- raw: Optional[dict], raw response from the LLM provider

226

- additional_kwargs: Optional[dict], additional response metadata

227

"""

228

def __init__(

229

self,

230

message: ChatMessage,

231

raw: Optional[dict] = None,

232

additional_kwargs: Optional[dict] = None,

233

**kwargs

234

): ...

235

236

@property

237

def delta(self) -> Optional[str]:

238

"""Get response delta for streaming operations."""

239

240

# Type aliases for streaming responses

241

CompletionResponseGen = Generator[CompletionResponse, None, None]

242

CompletionResponseAsyncGen = AsyncGenerator[CompletionResponse, None]

243

ChatResponseGen = Generator[ChatResponse, None, None]

244

ChatResponseAsyncGen = AsyncGenerator[ChatResponse, None]

245

```

246

247

### Chat Messages & Roles

248

249

Structured message types for chat-based interactions with role-based organization.

250

251

```python { .api }

252

class ChatMessage:

253

"""

254

Individual message in a chat conversation.

255

256

Parameters:

257

- role: MessageRole, role of the message sender

258

- content: Union[str, List[ContentBlock]], message content

259

- additional_kwargs: Optional[dict], additional message metadata

260

- tool_calls: Optional[List[ToolCall]], tool calls in the message

261

- tool_call_id: Optional[str], identifier for tool call responses

262

"""

263

def __init__(

264

self,

265

role: MessageRole,

266

content: Union[str, List[ContentBlock]] = "",

267

additional_kwargs: Optional[dict] = None,

268

tool_calls: Optional[List[ToolCall]] = None,

269

tool_call_id: Optional[str] = None,

270

**kwargs

271

): ...

272

273

@classmethod

274

def from_str(

275

cls,

276

content: str,

277

role: str = MessageRole.USER,

278

**kwargs

279

) -> "ChatMessage":

280

"""Create ChatMessage from string content."""

281

282

class MessageRole(str, Enum):

283

"""Roles for chat message participants."""

284

SYSTEM = "system" # System instructions and context

285

USER = "user" # User input messages

286

ASSISTANT = "assistant" # Assistant/model responses

287

FUNCTION = "function" # Function call results (deprecated)

288

TOOL = "tool" # Tool execution results

289

```

290

291

### Content Block Types

292

293

Rich content support for multi-modal messages including text, images, and documents.

294

295

```python { .api }

296

class TextBlock:

297

"""

298

Text content block for messages.

299

300

Parameters:

301

- text: str, the text content

302

"""

303

def __init__(self, text: str): ...

304

305

class ImageBlock:

306

"""

307

Image content block for messages.

308

309

Parameters:

310

- image: str, base64 encoded image or image URL

311

- image_url: Optional[str], URL to image resource

312

- image_mimetype: Optional[str], MIME type of the image

313

"""

314

def __init__(

315

self,

316

image: str,

317

image_url: Optional[str] = None,

318

image_mimetype: Optional[str] = None

319

): ...

320

321

class AudioBlock:

322

"""

323

Audio content block for messages.

324

325

Parameters:

326

- audio: str, base64 encoded audio data

327

- audio_url: Optional[str], URL to audio resource

328

- audio_mimetype: Optional[str], MIME type of the audio

329

"""

330

def __init__(

331

self,

332

audio: str,

333

audio_url: Optional[str] = None,

334

audio_mimetype: Optional[str] = None

335

): ...

336

337

class DocumentBlock:

338

"""

339

Document content block for messages.

340

341

Parameters:

342

- document: str, base64 encoded document data

343

- document_url: Optional[str], URL to document resource

344

- document_mimetype: Optional[str], MIME type of the document

345

"""

346

def __init__(

347

self,

348

document: str,

349

document_url: Optional[str] = None,

350

document_mimetype: Optional[str] = None

351

): ...

352

```

353

354

### LLM Metadata & Configuration

355

356

Metadata structures for describing LLM capabilities and constraints.

357

358

```python { .api }

359

class LLMMetadata:

360

"""

361

Metadata describing LLM capabilities and limitations.

362

363

Parameters:

364

- context_window: int, maximum context window size in tokens

365

- num_output: int, maximum output tokens per request

366

- is_chat_model: bool, whether model supports chat interface

367

- is_function_calling_model: bool, whether model supports function calling

368

- model_name: str, name identifier for the model

369

- system_role: MessageRole, role used for system messages

370

"""

371

def __init__(

372

self,

373

context_window: int = 4096,

374

num_output: int = 256,

375

is_chat_model: bool = False,

376

is_function_calling_model: bool = False,

377

model_name: str = "unknown",

378

system_role: MessageRole = MessageRole.SYSTEM,

379

**kwargs

380

): ...

381

```

382

383

### Cache Control & Optimization

384

385

Advanced caching mechanisms for optimizing LLM performance and reducing costs.

386

387

```python { .api }

388

class CacheControl:

389

"""

390

Cache control settings for LLM optimization.

391

392

Parameters:

393

- type: str, cache control type (ephemeral, session, etc.)

394

"""

395

def __init__(self, type: str): ...

396

397

class CachePoint:

398

"""

399

Cache point configuration for specific content blocks.

400

401

Parameters:

402

- type: str, cache point type

403

"""

404

def __init__(self, type: str): ...

405

```

406

407

### Base Embedding Interface

408

409

Foundation interface for all embedding model implementations with text and batch processing support.

410

411

```python { .api }

412

class BaseEmbedding:

413

"""

414

Base interface for embedding models.

415

416

Parameters:

417

- model_name: str, name identifier for the embedding model

418

- embed_batch_size: int, batch size for embedding operations

419

- callback_manager: Optional[CallbackManager], callback management system

420

- num_workers: Optional[int], number of worker threads for parallel processing

421

"""

422

def __init__(

423

self,

424

model_name: str = "unknown",

425

embed_batch_size: int = 10,

426

callback_manager: Optional[CallbackManager] = None,

427

num_workers: Optional[int] = None,

428

**kwargs

429

): ...

430

431

def get_text_embedding(self, text: str) -> List[float]:

432

"""

433

Get embedding for single text string.

434

435

Parameters:

436

- text: str, input text to embed

437

438

Returns:

439

- List[float], embedding vector

440

"""

441

442

def get_text_embeddings(self, texts: List[str]) -> List[List[float]]:

443

"""

444

Get embeddings for multiple text strings.

445

446

Parameters:

447

- texts: List[str], list of input texts to embed

448

449

Returns:

450

- List[List[float]], list of embedding vectors

451

"""

452

453

async def aget_text_embedding(self, text: str) -> List[float]:

454

"""Async version of get_text_embedding."""

455

456

async def aget_text_embeddings(self, texts: List[str]) -> List[List[float]]:

457

"""Async version of get_text_embeddings."""

458

459

def get_query_embedding(self, query: str) -> List[float]:

460

"""

461

Get embedding for query text (may differ from document embedding).

462

463

Parameters:

464

- query: str, query text to embed

465

466

Returns:

467

- List[float], query embedding vector

468

"""

469

470

async def aget_query_embedding(self, query: str) -> List[float]:

471

"""Async version of get_query_embedding."""

472

473

def similarity(

474

self,

475

embedding1: List[float],

476

embedding2: List[float]

477

) -> float:

478

"""

479

Compute similarity between two embeddings.

480

481

Parameters:

482

- embedding1: List[float], first embedding vector

483

- embedding2: List[float], second embedding vector

484

485

Returns:

486

- float, similarity score

487

"""

488

```

489

490

### Mock Embedding Implementation

491

492

Testing and development embedding model that generates consistent vectors without external API calls.

493

494

```python { .api }

495

class MockEmbedding(BaseEmbedding):

496

"""

497

Mock embedding model for testing and development.

498

499

Parameters:

500

- embed_dim: int, dimensionality of embedding vectors

501

- deterministic: bool, whether to generate deterministic embeddings

502

"""

503

def __init__(

504

self,

505

embed_dim: int = 1536,

506

deterministic: bool = True,

507

**kwargs

508

): ...

509

```

510

511

### Multi-Modal Embedding Support

512

513

Extended embedding interface for handling multiple content modalities.

514

515

```python { .api }

516

class MultiModalEmbedding(BaseEmbedding):

517

"""

518

Multi-modal embedding interface supporting text, images, and other content types.

519

520

Parameters:

521

- model_name: str, name identifier for the multi-modal embedding model

522

- embed_batch_size: int, batch size for embedding operations

523

"""

524

def __init__(

525

self,

526

model_name: str = "unknown",

527

embed_batch_size: int = 10,

528

**kwargs

529

): ...

530

531

def get_image_embedding(self, img_file_path: str) -> List[float]:

532

"""

533

Get embedding for image file.

534

535

Parameters:

536

- img_file_path: str, path to image file

537

538

Returns:

539

- List[float], image embedding vector

540

"""

541

542

async def aget_image_embedding(self, img_file_path: str) -> List[float]:

543

"""Async version of get_image_embedding."""

544

```

545

546

### Embedding Utilities

547

548

Utility functions and classes for embedding model management and operations.

549

550

```python { .api }

551

class Pooling:

552

"""

553

Embedding pooling operations for combining token embeddings.

554

555

Parameters:

556

- pooling_type: str, type of pooling (mean, max, cls)

557

"""

558

def __init__(self, pooling_type: str = "mean"): ...

559

560

def pool(self, embeddings: List[List[float]]) -> List[float]:

561

"""

562

Pool multiple embeddings into single vector.

563

564

Parameters:

565

- embeddings: List[List[float]], embeddings to pool

566

567

Returns:

568

- List[float], pooled embedding vector

569

"""

570

571

def resolve_embed_model(embed_model: Union[str, BaseEmbedding]) -> BaseEmbedding:

572

"""

573

Resolve embedding model from string name or return existing instance.

574

575

Parameters:

576

- embed_model: Union[str, BaseEmbedding], model name or instance

577

578

Returns:

579

- BaseEmbedding, resolved embedding model instance

580

"""

581

```

582

583

### Multi-Modal LLM Interface

584

585

Language models with vision and multi-modal capabilities for processing images alongside text.

586

587

```python { .api }

588

class MultiModalLLM:

589

"""

590

Multi-modal language model interface for vision and text processing.

591

592

Parameters:

593

- model_name: str, name identifier for the model

594

- max_new_tokens: int, maximum new tokens to generate

595

- context_window: int, maximum context window size

596

"""

597

def __init__(

598

self,

599

model_name: str = "unknown",

600

max_new_tokens: int = 300,

601

context_window: int = 4096,

602

**kwargs

603

): ...

604

605

def complete(

606

self,

607

prompt: str,

608

image_documents: Sequence[ImageDocument],

609

**kwargs

610

) -> CompletionResponse:

611

"""

612

Complete prompt with image context.

613

614

Parameters:

615

- prompt: str, text prompt

616

- image_documents: Sequence[ImageDocument], images for context

617

618

Returns:

619

- CompletionResponse, completion with image understanding

620

"""

621

622

def stream_complete(

623

self,

624

prompt: str,

625

image_documents: Sequence[ImageDocument],

626

**kwargs

627

) -> CompletionResponseGen:

628

"""Stream completion with image context."""

629

630

async def acomplete(

631

self,

632

prompt: str,

633

image_documents: Sequence[ImageDocument],

634

**kwargs

635

) -> CompletionResponse:

636

"""Async completion with image context."""

637

638

@property

639

def metadata(self) -> MultiModalLLMMetadata:

640

"""Get multi-modal LLM metadata."""

641

642

class MultiModalLLMMetadata:

643

"""

644

Metadata for multi-modal LLM capabilities.

645

646

Parameters:

647

- num_output: int, maximum output tokens

648

- model_name: str, model identifier

649

"""

650

def __init__(

651

self,

652

num_output: int = 300,

653

model_name: str = "unknown"

654

): ...

655

```

656

657

## Usage Examples

658

659

### Basic LLM Usage

660

661

```python

662

from llama_index.core.llms import MockLLM

663

from llama_index.core.llms.types import ChatMessage, MessageRole

664

665

# Initialize mock LLM

666

llm = MockLLM(max_tokens=256)

667

668

# Text completion

669

response = llm.complete("Explain machine learning in simple terms:")

670

print(response.text)

671

672

# Chat conversation

673

messages = [

674

ChatMessage(role=MessageRole.SYSTEM, content="You are a helpful assistant."),

675

ChatMessage(role=MessageRole.USER, content="What is deep learning?")

676

]

677

678

chat_response = llm.chat(messages)

679

print(chat_response.message.content)

680

```

681

682

### Streaming Responses

683

684

```python

685

# Streaming completion

686

stream = llm.stream_complete("Write a short story about AI:")

687

for response in stream:

688

print(response.delta, end="", flush=True)

689

690

# Streaming chat

691

stream = llm.stream_chat(messages)

692

for response in stream:

693

print(response.delta, end="", flush=True)

694

```

695

696

### Basic Embedding Usage

697

698

```python

699

from llama_index.core.embeddings import MockEmbedding

700

701

# Initialize mock embedding

702

embed_model = MockEmbedding(embed_dim=384)

703

704

# Single text embedding

705

text = "Machine learning is a subset of artificial intelligence."

706

embedding = embed_model.get_text_embedding(text)

707

print(f"Embedding dimension: {len(embedding)}")

708

709

# Batch embeddings

710

texts = [

711

"Natural language processing helps computers understand text.",

712

"Computer vision enables machines to interpret images.",

713

"Reinforcement learning trains agents through rewards."

714

]

715

716

embeddings = embed_model.get_text_embeddings(texts)

717

print(f"Generated {len(embeddings)} embeddings")

718

719

# Query embedding (may differ from document embeddings)

720

query_embedding = embed_model.get_query_embedding("What is AI?")

721

722

# Compute similarity

723

similarity = embed_model.similarity(embedding, query_embedding)

724

print(f"Similarity: {similarity:.3f}")

725

```

726

727

### Custom LLM Implementation

728

729

```python

730

from llama_index.core.llms import CustomLLM

731

from llama_index.core.llms.types import CompletionResponse, LLMMetadata

732

733

class MyCustomLLM(CustomLLM):

734

"""Example custom LLM implementation."""

735

736

def __init__(self, model_path: str, **kwargs):

737

self.model_path = model_path

738

super().__init__(**kwargs)

739

740

@property

741

def metadata(self) -> LLMMetadata:

742

return LLMMetadata(

743

context_window=4096,

744

num_output=512,

745

model_name="my_custom_model"

746

)

747

748

def _complete(self, prompt: str, **kwargs) -> CompletionResponse:

749

# Custom completion logic here

750

generated_text = f"Generated response for: {prompt}"

751

return CompletionResponse(text=generated_text)

752

753

def _stream_complete(self, prompt: str, **kwargs):

754

# Custom streaming logic here

755

response = self._complete(prompt, **kwargs)

756

yield response

757

758

# Use custom LLM

759

custom_llm = MyCustomLLM(model_path="/path/to/model")

760

response = custom_llm.complete("Hello, world!")

761

```

762

763

### Multi-Modal Content

764

765

```python

766

from llama_index.core.llms.types import ChatMessage, ImageBlock, TextBlock

767

768

# Create message with image and text

769

message = ChatMessage(

770

role=MessageRole.USER,

771

content=[

772

TextBlock(text="What do you see in this image?"),

773

ImageBlock(image="base64_encoded_image_data")

774

]

775

)

776

777

# Use in chat (with compatible multi-modal LLM)

778

# response = multimodal_llm.chat([message])

779

```

780

781

## Types & Configuration

782

783

```python { .api }

784

# Response type unions

785

Response = Union[str, ChatResponse, CompletionResponse]

786

RESPONSE_TYPE = Union[Response, StreamingResponse]

787

788

# Content block union

789

ContentBlock = Union[TextBlock, ImageBlock, AudioBlock, DocumentBlock]

790

791

# Pydantic program modes

792

class PydanticProgramMode(str, Enum):

793

DEFAULT = "default"

794

OPENAI = "openai"

795

LLM = "llm"

796

GUIDANCE = "guidance"

797

LM_FORMAT_ENFORCER = "lm-format-enforcer"

798

```