Interface between LLMs and your data
—
Pluggable interfaces for language models and embedding systems, supporting both synchronous and asynchronous operations with extensive customization options. These interfaces enable integration with various LLM providers and embedding models while maintaining consistent APIs.
Foundation interface for all language model implementations, providing standardized completion and chat methods.
class LLM:
"""
Base language model interface with completion and chat capabilities.
Parameters:
- model_name: str, name identifier for the model
- context_window: int, maximum context window size in tokens
- max_new_tokens: Optional[int], maximum new tokens to generate
- system_prompt: Optional[str], default system prompt
- messages_to_prompt: Optional[Callable], function to convert messages to prompt
- completion_to_prompt: Optional[Callable], function to convert completion to prompt
- pydantic_program_mode: PydanticProgramMode, mode for Pydantic program execution
- output_parser: Optional[BaseOutputParser], parser for model output
"""
def __init__(
self,
model_name: str = "unknown",
context_window: int = 4096,
max_new_tokens: Optional[int] = None,
system_prompt: Optional[str] = None,
messages_to_prompt: Optional[Callable] = None,
completion_to_prompt: Optional[Callable] = None,
pydantic_program_mode: PydanticProgramMode = PydanticProgramMode.DEFAULT,
output_parser: Optional[BaseOutputParser] = None,
**kwargs
): ...
def complete(
self,
prompt: str,
formatted: bool = False,
**kwargs
) -> CompletionResponse:
"""
Complete a text prompt.
Parameters:
- prompt: str, the text prompt to complete
- formatted: bool, whether prompt is already formatted
Returns:
- CompletionResponse, completion result with text and metadata
"""
def stream_complete(
self,
prompt: str,
formatted: bool = False,
**kwargs
) -> CompletionResponseGen:
"""
Stream completion results for a text prompt.
Parameters:
- prompt: str, the text prompt to complete
- formatted: bool, whether prompt is already formatted
Returns:
- CompletionResponseGen, streaming completion generator
"""
def chat(
self,
messages: Sequence[ChatMessage],
**kwargs
) -> ChatResponse:
"""
Generate chat response from message history.
Parameters:
- messages: Sequence[ChatMessage], conversation history
Returns:
- ChatResponse, chat response with message and metadata
"""
def stream_chat(
self,
messages: Sequence[ChatMessage],
**kwargs
) -> ChatResponseGen:
"""
Stream chat response from message history.
Parameters:
- messages: Sequence[ChatMessage], conversation history
Returns:
- ChatResponseGen, streaming chat response generator
"""
async def acomplete(
self,
prompt: str,
formatted: bool = False,
**kwargs
) -> CompletionResponse:
"""Async version of complete method."""
async def astream_complete(
self,
prompt: str,
formatted: bool = False,
**kwargs
) -> CompletionResponseAsyncGen:
"""Async version of stream_complete method."""
async def achat(
self,
messages: Sequence[ChatMessage],
**kwargs
) -> ChatResponse:
"""Async version of chat method."""
async def astream_chat(
self,
messages: Sequence[ChatMessage],
**kwargs
) -> ChatResponseAsyncGen:
"""Async version of stream_chat method."""
@property
def metadata(self) -> LLMMetadata:
"""Get LLM metadata including context window and token limits."""
def get_num_tokens(self, text: str) -> int:
"""Get token count for text."""
def get_num_tokens_from_messages(self, messages: Sequence[ChatMessage]) -> int:
"""Get token count for message sequence."""Base class for implementing custom language models with standardized interfaces.
class CustomLLM(LLM):
"""
Base class for custom LLM implementations.
Subclasses must implement:
- _complete: Core completion logic
- _stream_complete: Core streaming completion logic
- _chat: Core chat logic (optional, defaults to completion-based)
- _stream_chat: Core streaming chat logic (optional)
"""
def _complete(self, prompt: str, **kwargs) -> CompletionResponse:
"""Core completion implementation to be overridden."""
def _stream_complete(self, prompt: str, **kwargs) -> CompletionResponseGen:
"""Core streaming completion implementation to be overridden."""
def _chat(self, messages: Sequence[ChatMessage], **kwargs) -> ChatResponse:
"""Core chat implementation, defaults to completion-based."""
def _stream_chat(self, messages: Sequence[ChatMessage], **kwargs) -> ChatResponseGen:
"""Core streaming chat implementation, defaults to completion-based."""Testing and development LLM that returns predictable responses without external API calls.
class MockLLM(CustomLLM):
"""
Mock LLM for testing and development purposes.
Parameters:
- max_tokens: Optional[int], maximum tokens to return
- system_prompt: Optional[str], default system prompt
"""
def __init__(
self,
max_tokens: Optional[int] = None,
system_prompt: Optional[str] = None,
**kwargs
): ...Response structures for various LLM operations with rich metadata and content support.
class CompletionResponse:
"""
Response from text completion operations.
Parameters:
- text: str, the completed text
- additional_kwargs: Optional[dict], additional response metadata
- raw: Optional[dict], raw response from the LLM provider
"""
def __init__(
self,
text: str,
additional_kwargs: Optional[dict] = None,
raw: Optional[dict] = None,
**kwargs
): ...
@property
def delta(self) -> Optional[str]:
"""Get response delta for streaming operations."""
class ChatResponse:
"""
Response from chat operations.
Parameters:
- message: ChatMessage, the response message
- raw: Optional[dict], raw response from the LLM provider
- additional_kwargs: Optional[dict], additional response metadata
"""
def __init__(
self,
message: ChatMessage,
raw: Optional[dict] = None,
additional_kwargs: Optional[dict] = None,
**kwargs
): ...
@property
def delta(self) -> Optional[str]:
"""Get response delta for streaming operations."""
# Type aliases for streaming responses
CompletionResponseGen = Generator[CompletionResponse, None, None]
CompletionResponseAsyncGen = AsyncGenerator[CompletionResponse, None]
ChatResponseGen = Generator[ChatResponse, None, None]
ChatResponseAsyncGen = AsyncGenerator[ChatResponse, None]Structured message types for chat-based interactions with role-based organization.
class ChatMessage:
"""
Individual message in a chat conversation.
Parameters:
- role: MessageRole, role of the message sender
- content: Union[str, List[ContentBlock]], message content
- additional_kwargs: Optional[dict], additional message metadata
- tool_calls: Optional[List[ToolCall]], tool calls in the message
- tool_call_id: Optional[str], identifier for tool call responses
"""
def __init__(
self,
role: MessageRole,
content: Union[str, List[ContentBlock]] = "",
additional_kwargs: Optional[dict] = None,
tool_calls: Optional[List[ToolCall]] = None,
tool_call_id: Optional[str] = None,
**kwargs
): ...
@classmethod
def from_str(
cls,
content: str,
role: str = MessageRole.USER,
**kwargs
) -> "ChatMessage":
"""Create ChatMessage from string content."""
class MessageRole(str, Enum):
"""Roles for chat message participants."""
SYSTEM = "system" # System instructions and context
USER = "user" # User input messages
ASSISTANT = "assistant" # Assistant/model responses
FUNCTION = "function" # Function call results (deprecated)
TOOL = "tool" # Tool execution resultsRich content support for multi-modal messages including text, images, and documents.
class TextBlock:
"""
Text content block for messages.
Parameters:
- text: str, the text content
"""
def __init__(self, text: str): ...
class ImageBlock:
"""
Image content block for messages.
Parameters:
- image: str, base64 encoded image or image URL
- image_url: Optional[str], URL to image resource
- image_mimetype: Optional[str], MIME type of the image
"""
def __init__(
self,
image: str,
image_url: Optional[str] = None,
image_mimetype: Optional[str] = None
): ...
class AudioBlock:
"""
Audio content block for messages.
Parameters:
- audio: str, base64 encoded audio data
- audio_url: Optional[str], URL to audio resource
- audio_mimetype: Optional[str], MIME type of the audio
"""
def __init__(
self,
audio: str,
audio_url: Optional[str] = None,
audio_mimetype: Optional[str] = None
): ...
class DocumentBlock:
"""
Document content block for messages.
Parameters:
- document: str, base64 encoded document data
- document_url: Optional[str], URL to document resource
- document_mimetype: Optional[str], MIME type of the document
"""
def __init__(
self,
document: str,
document_url: Optional[str] = None,
document_mimetype: Optional[str] = None
): ...Metadata structures for describing LLM capabilities and constraints.
class LLMMetadata:
"""
Metadata describing LLM capabilities and limitations.
Parameters:
- context_window: int, maximum context window size in tokens
- num_output: int, maximum output tokens per request
- is_chat_model: bool, whether model supports chat interface
- is_function_calling_model: bool, whether model supports function calling
- model_name: str, name identifier for the model
- system_role: MessageRole, role used for system messages
"""
def __init__(
self,
context_window: int = 4096,
num_output: int = 256,
is_chat_model: bool = False,
is_function_calling_model: bool = False,
model_name: str = "unknown",
system_role: MessageRole = MessageRole.SYSTEM,
**kwargs
): ...Advanced caching mechanisms for optimizing LLM performance and reducing costs.
class CacheControl:
"""
Cache control settings for LLM optimization.
Parameters:
- type: str, cache control type (ephemeral, session, etc.)
"""
def __init__(self, type: str): ...
class CachePoint:
"""
Cache point configuration for specific content blocks.
Parameters:
- type: str, cache point type
"""
def __init__(self, type: str): ...Foundation interface for all embedding model implementations with text and batch processing support.
class BaseEmbedding:
"""
Base interface for embedding models.
Parameters:
- model_name: str, name identifier for the embedding model
- embed_batch_size: int, batch size for embedding operations
- callback_manager: Optional[CallbackManager], callback management system
- num_workers: Optional[int], number of worker threads for parallel processing
"""
def __init__(
self,
model_name: str = "unknown",
embed_batch_size: int = 10,
callback_manager: Optional[CallbackManager] = None,
num_workers: Optional[int] = None,
**kwargs
): ...
def get_text_embedding(self, text: str) -> List[float]:
"""
Get embedding for single text string.
Parameters:
- text: str, input text to embed
Returns:
- List[float], embedding vector
"""
def get_text_embeddings(self, texts: List[str]) -> List[List[float]]:
"""
Get embeddings for multiple text strings.
Parameters:
- texts: List[str], list of input texts to embed
Returns:
- List[List[float]], list of embedding vectors
"""
async def aget_text_embedding(self, text: str) -> List[float]:
"""Async version of get_text_embedding."""
async def aget_text_embeddings(self, texts: List[str]) -> List[List[float]]:
"""Async version of get_text_embeddings."""
def get_query_embedding(self, query: str) -> List[float]:
"""
Get embedding for query text (may differ from document embedding).
Parameters:
- query: str, query text to embed
Returns:
- List[float], query embedding vector
"""
async def aget_query_embedding(self, query: str) -> List[float]:
"""Async version of get_query_embedding."""
def similarity(
self,
embedding1: List[float],
embedding2: List[float]
) -> float:
"""
Compute similarity between two embeddings.
Parameters:
- embedding1: List[float], first embedding vector
- embedding2: List[float], second embedding vector
Returns:
- float, similarity score
"""Testing and development embedding model that generates consistent vectors without external API calls.
class MockEmbedding(BaseEmbedding):
"""
Mock embedding model for testing and development.
Parameters:
- embed_dim: int, dimensionality of embedding vectors
- deterministic: bool, whether to generate deterministic embeddings
"""
def __init__(
self,
embed_dim: int = 1536,
deterministic: bool = True,
**kwargs
): ...Extended embedding interface for handling multiple content modalities.
class MultiModalEmbedding(BaseEmbedding):
"""
Multi-modal embedding interface supporting text, images, and other content types.
Parameters:
- model_name: str, name identifier for the multi-modal embedding model
- embed_batch_size: int, batch size for embedding operations
"""
def __init__(
self,
model_name: str = "unknown",
embed_batch_size: int = 10,
**kwargs
): ...
def get_image_embedding(self, img_file_path: str) -> List[float]:
"""
Get embedding for image file.
Parameters:
- img_file_path: str, path to image file
Returns:
- List[float], image embedding vector
"""
async def aget_image_embedding(self, img_file_path: str) -> List[float]:
"""Async version of get_image_embedding."""Utility functions and classes for embedding model management and operations.
class Pooling:
"""
Embedding pooling operations for combining token embeddings.
Parameters:
- pooling_type: str, type of pooling (mean, max, cls)
"""
def __init__(self, pooling_type: str = "mean"): ...
def pool(self, embeddings: List[List[float]]) -> List[float]:
"""
Pool multiple embeddings into single vector.
Parameters:
- embeddings: List[List[float]], embeddings to pool
Returns:
- List[float], pooled embedding vector
"""
def resolve_embed_model(embed_model: Union[str, BaseEmbedding]) -> BaseEmbedding:
"""
Resolve embedding model from string name or return existing instance.
Parameters:
- embed_model: Union[str, BaseEmbedding], model name or instance
Returns:
- BaseEmbedding, resolved embedding model instance
"""Language models with vision and multi-modal capabilities for processing images alongside text.
class MultiModalLLM:
"""
Multi-modal language model interface for vision and text processing.
Parameters:
- model_name: str, name identifier for the model
- max_new_tokens: int, maximum new tokens to generate
- context_window: int, maximum context window size
"""
def __init__(
self,
model_name: str = "unknown",
max_new_tokens: int = 300,
context_window: int = 4096,
**kwargs
): ...
def complete(
self,
prompt: str,
image_documents: Sequence[ImageDocument],
**kwargs
) -> CompletionResponse:
"""
Complete prompt with image context.
Parameters:
- prompt: str, text prompt
- image_documents: Sequence[ImageDocument], images for context
Returns:
- CompletionResponse, completion with image understanding
"""
def stream_complete(
self,
prompt: str,
image_documents: Sequence[ImageDocument],
**kwargs
) -> CompletionResponseGen:
"""Stream completion with image context."""
async def acomplete(
self,
prompt: str,
image_documents: Sequence[ImageDocument],
**kwargs
) -> CompletionResponse:
"""Async completion with image context."""
@property
def metadata(self) -> MultiModalLLMMetadata:
"""Get multi-modal LLM metadata."""
class MultiModalLLMMetadata:
"""
Metadata for multi-modal LLM capabilities.
Parameters:
- num_output: int, maximum output tokens
- model_name: str, model identifier
"""
def __init__(
self,
num_output: int = 300,
model_name: str = "unknown"
): ...from llama_index.core.llms import MockLLM
from llama_index.core.llms.types import ChatMessage, MessageRole
# Initialize mock LLM
llm = MockLLM(max_tokens=256)
# Text completion
response = llm.complete("Explain machine learning in simple terms:")
print(response.text)
# Chat conversation
messages = [
ChatMessage(role=MessageRole.SYSTEM, content="You are a helpful assistant."),
ChatMessage(role=MessageRole.USER, content="What is deep learning?")
]
chat_response = llm.chat(messages)
print(chat_response.message.content)# Streaming completion
stream = llm.stream_complete("Write a short story about AI:")
for response in stream:
print(response.delta, end="", flush=True)
# Streaming chat
stream = llm.stream_chat(messages)
for response in stream:
print(response.delta, end="", flush=True)from llama_index.core.embeddings import MockEmbedding
# Initialize mock embedding
embed_model = MockEmbedding(embed_dim=384)
# Single text embedding
text = "Machine learning is a subset of artificial intelligence."
embedding = embed_model.get_text_embedding(text)
print(f"Embedding dimension: {len(embedding)}")
# Batch embeddings
texts = [
"Natural language processing helps computers understand text.",
"Computer vision enables machines to interpret images.",
"Reinforcement learning trains agents through rewards."
]
embeddings = embed_model.get_text_embeddings(texts)
print(f"Generated {len(embeddings)} embeddings")
# Query embedding (may differ from document embeddings)
query_embedding = embed_model.get_query_embedding("What is AI?")
# Compute similarity
similarity = embed_model.similarity(embedding, query_embedding)
print(f"Similarity: {similarity:.3f}")from llama_index.core.llms import CustomLLM
from llama_index.core.llms.types import CompletionResponse, LLMMetadata
class MyCustomLLM(CustomLLM):
"""Example custom LLM implementation."""
def __init__(self, model_path: str, **kwargs):
self.model_path = model_path
super().__init__(**kwargs)
@property
def metadata(self) -> LLMMetadata:
return LLMMetadata(
context_window=4096,
num_output=512,
model_name="my_custom_model"
)
def _complete(self, prompt: str, **kwargs) -> CompletionResponse:
# Custom completion logic here
generated_text = f"Generated response for: {prompt}"
return CompletionResponse(text=generated_text)
def _stream_complete(self, prompt: str, **kwargs):
# Custom streaming logic here
response = self._complete(prompt, **kwargs)
yield response
# Use custom LLM
custom_llm = MyCustomLLM(model_path="/path/to/model")
response = custom_llm.complete("Hello, world!")from llama_index.core.llms.types import ChatMessage, ImageBlock, TextBlock
# Create message with image and text
message = ChatMessage(
role=MessageRole.USER,
content=[
TextBlock(text="What do you see in this image?"),
ImageBlock(image="base64_encoded_image_data")
]
)
# Use in chat (with compatible multi-modal LLM)
# response = multimodal_llm.chat([message])# Response type unions
Response = Union[str, ChatResponse, CompletionResponse]
RESPONSE_TYPE = Union[Response, StreamingResponse]
# Content block union
ContentBlock = Union[TextBlock, ImageBlock, AudioBlock, DocumentBlock]
# Pydantic program modes
class PydanticProgramMode(str, Enum):
DEFAULT = "default"
OPENAI = "openai"
LLM = "llm"
GUIDANCE = "guidance"
LM_FORMAT_ENFORCER = "lm-format-enforcer"Install with Tessl CLI
npx tessl i tessl/pypi-llama-index-core