CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-llama-index-core

Interface between LLMs and your data

Pending
Overview
Eval results
Files

llms-embeddings.mddocs/

LLMs & Embeddings

Pluggable interfaces for language models and embedding systems, supporting both synchronous and asynchronous operations with extensive customization options. These interfaces enable integration with various LLM providers and embedding models while maintaining consistent APIs.

Capabilities

Base LLM Interface

Foundation interface for all language model implementations, providing standardized completion and chat methods.

class LLM:
    """
    Base language model interface with completion and chat capabilities.
    
    Parameters:
    - model_name: str, name identifier for the model
    - context_window: int, maximum context window size in tokens
    - max_new_tokens: Optional[int], maximum new tokens to generate
    - system_prompt: Optional[str], default system prompt
    - messages_to_prompt: Optional[Callable], function to convert messages to prompt
    - completion_to_prompt: Optional[Callable], function to convert completion to prompt
    - pydantic_program_mode: PydanticProgramMode, mode for Pydantic program execution
    - output_parser: Optional[BaseOutputParser], parser for model output
    """
    def __init__(
        self,
        model_name: str = "unknown",
        context_window: int = 4096,
        max_new_tokens: Optional[int] = None,
        system_prompt: Optional[str] = None,
        messages_to_prompt: Optional[Callable] = None,
        completion_to_prompt: Optional[Callable] = None,
        pydantic_program_mode: PydanticProgramMode = PydanticProgramMode.DEFAULT,
        output_parser: Optional[BaseOutputParser] = None,
        **kwargs
    ): ...
    
    def complete(
        self,
        prompt: str,
        formatted: bool = False,
        **kwargs
    ) -> CompletionResponse:
        """
        Complete a text prompt.
        
        Parameters:
        - prompt: str, the text prompt to complete
        - formatted: bool, whether prompt is already formatted
        
        Returns:
        - CompletionResponse, completion result with text and metadata
        """
        
    def stream_complete(
        self,
        prompt: str,
        formatted: bool = False,
        **kwargs
    ) -> CompletionResponseGen:
        """
        Stream completion results for a text prompt.
        
        Parameters:
        - prompt: str, the text prompt to complete
        - formatted: bool, whether prompt is already formatted
        
        Returns:
        - CompletionResponseGen, streaming completion generator
        """
        
    def chat(
        self,
        messages: Sequence[ChatMessage],
        **kwargs
    ) -> ChatResponse:
        """
        Generate chat response from message history.
        
        Parameters:
        - messages: Sequence[ChatMessage], conversation history
        
        Returns:
        - ChatResponse, chat response with message and metadata
        """
        
    def stream_chat(
        self,
        messages: Sequence[ChatMessage],
        **kwargs
    ) -> ChatResponseGen:
        """
        Stream chat response from message history.
        
        Parameters:
        - messages: Sequence[ChatMessage], conversation history
        
        Returns:
        - ChatResponseGen, streaming chat response generator
        """
        
    async def acomplete(
        self,
        prompt: str,
        formatted: bool = False,
        **kwargs
    ) -> CompletionResponse:
        """Async version of complete method."""
        
    async def astream_complete(
        self,
        prompt: str,
        formatted: bool = False,
        **kwargs
    ) -> CompletionResponseAsyncGen:
        """Async version of stream_complete method."""
        
    async def achat(
        self,
        messages: Sequence[ChatMessage],
        **kwargs
    ) -> ChatResponse:
        """Async version of chat method."""
        
    async def astream_chat(
        self,
        messages: Sequence[ChatMessage],
        **kwargs
    ) -> ChatResponseAsyncGen:
        """Async version of stream_chat method."""
        
    @property
    def metadata(self) -> LLMMetadata:
        """Get LLM metadata including context window and token limits."""
        
    def get_num_tokens(self, text: str) -> int:
        """Get token count for text."""
        
    def get_num_tokens_from_messages(self, messages: Sequence[ChatMessage]) -> int:
        """Get token count for message sequence."""

Custom LLM Implementation

Base class for implementing custom language models with standardized interfaces.

class CustomLLM(LLM):
    """
    Base class for custom LLM implementations.
    
    Subclasses must implement:
    - _complete: Core completion logic
    - _stream_complete: Core streaming completion logic
    - _chat: Core chat logic (optional, defaults to completion-based)
    - _stream_chat: Core streaming chat logic (optional)
    """
    
    def _complete(self, prompt: str, **kwargs) -> CompletionResponse:
        """Core completion implementation to be overridden."""
        
    def _stream_complete(self, prompt: str, **kwargs) -> CompletionResponseGen:
        """Core streaming completion implementation to be overridden."""
        
    def _chat(self, messages: Sequence[ChatMessage], **kwargs) -> ChatResponse:
        """Core chat implementation, defaults to completion-based."""
        
    def _stream_chat(self, messages: Sequence[ChatMessage], **kwargs) -> ChatResponseGen:
        """Core streaming chat implementation, defaults to completion-based."""

Mock LLM Implementation

Testing and development LLM that returns predictable responses without external API calls.

class MockLLM(CustomLLM):
    """
    Mock LLM for testing and development purposes.
    
    Parameters:
    - max_tokens: Optional[int], maximum tokens to return
    - system_prompt: Optional[str], default system prompt
    """
    def __init__(
        self,
        max_tokens: Optional[int] = None,
        system_prompt: Optional[str] = None,
        **kwargs
    ): ...

LLM Response Types

Response structures for various LLM operations with rich metadata and content support.

class CompletionResponse:
    """
    Response from text completion operations.
    
    Parameters:
    - text: str, the completed text
    - additional_kwargs: Optional[dict], additional response metadata
    - raw: Optional[dict], raw response from the LLM provider
    """
    def __init__(
        self,
        text: str,
        additional_kwargs: Optional[dict] = None,
        raw: Optional[dict] = None,
        **kwargs
    ): ...
    
    @property
    def delta(self) -> Optional[str]:
        """Get response delta for streaming operations."""

class ChatResponse:
    """
    Response from chat operations.
    
    Parameters:
    - message: ChatMessage, the response message
    - raw: Optional[dict], raw response from the LLM provider
    - additional_kwargs: Optional[dict], additional response metadata
    """
    def __init__(
        self,
        message: ChatMessage,
        raw: Optional[dict] = None,
        additional_kwargs: Optional[dict] = None,
        **kwargs
    ): ...
    
    @property
    def delta(self) -> Optional[str]:
        """Get response delta for streaming operations."""

# Type aliases for streaming responses
CompletionResponseGen = Generator[CompletionResponse, None, None]
CompletionResponseAsyncGen = AsyncGenerator[CompletionResponse, None]
ChatResponseGen = Generator[ChatResponse, None, None] 
ChatResponseAsyncGen = AsyncGenerator[ChatResponse, None]

Chat Messages & Roles

Structured message types for chat-based interactions with role-based organization.

class ChatMessage:
    """
    Individual message in a chat conversation.
    
    Parameters:
    - role: MessageRole, role of the message sender
    - content: Union[str, List[ContentBlock]], message content
    - additional_kwargs: Optional[dict], additional message metadata
    - tool_calls: Optional[List[ToolCall]], tool calls in the message
    - tool_call_id: Optional[str], identifier for tool call responses
    """
    def __init__(
        self,
        role: MessageRole,
        content: Union[str, List[ContentBlock]] = "",
        additional_kwargs: Optional[dict] = None,
        tool_calls: Optional[List[ToolCall]] = None,
        tool_call_id: Optional[str] = None,
        **kwargs
    ): ...
    
    @classmethod
    def from_str(
        cls,
        content: str,
        role: str = MessageRole.USER,
        **kwargs
    ) -> "ChatMessage":
        """Create ChatMessage from string content."""

class MessageRole(str, Enum):
    """Roles for chat message participants."""
    SYSTEM = "system"      # System instructions and context
    USER = "user"          # User input messages
    ASSISTANT = "assistant" # Assistant/model responses
    FUNCTION = "function"   # Function call results (deprecated)
    TOOL = "tool"          # Tool execution results

Content Block Types

Rich content support for multi-modal messages including text, images, and documents.

class TextBlock:
    """
    Text content block for messages.
    
    Parameters:
    - text: str, the text content
    """
    def __init__(self, text: str): ...

class ImageBlock:
    """
    Image content block for messages.
    
    Parameters:
    - image: str, base64 encoded image or image URL
    - image_url: Optional[str], URL to image resource
    - image_mimetype: Optional[str], MIME type of the image
    """
    def __init__(
        self,
        image: str,
        image_url: Optional[str] = None,
        image_mimetype: Optional[str] = None
    ): ...

class AudioBlock:
    """
    Audio content block for messages.
    
    Parameters:
    - audio: str, base64 encoded audio data
    - audio_url: Optional[str], URL to audio resource
    - audio_mimetype: Optional[str], MIME type of the audio
    """
    def __init__(
        self,
        audio: str,
        audio_url: Optional[str] = None,
        audio_mimetype: Optional[str] = None
    ): ...

class DocumentBlock:
    """
    Document content block for messages.
    
    Parameters:
    - document: str, base64 encoded document data
    - document_url: Optional[str], URL to document resource
    - document_mimetype: Optional[str], MIME type of the document
    """
    def __init__(
        self,
        document: str,
        document_url: Optional[str] = None,
        document_mimetype: Optional[str] = None
    ): ...

LLM Metadata & Configuration

Metadata structures for describing LLM capabilities and constraints.

class LLMMetadata:
    """
    Metadata describing LLM capabilities and limitations.
    
    Parameters:
    - context_window: int, maximum context window size in tokens
    - num_output: int, maximum output tokens per request
    - is_chat_model: bool, whether model supports chat interface
    - is_function_calling_model: bool, whether model supports function calling
    - model_name: str, name identifier for the model
    - system_role: MessageRole, role used for system messages
    """
    def __init__(
        self,
        context_window: int = 4096,
        num_output: int = 256,
        is_chat_model: bool = False,
        is_function_calling_model: bool = False,
        model_name: str = "unknown",
        system_role: MessageRole = MessageRole.SYSTEM,
        **kwargs
    ): ...

Cache Control & Optimization

Advanced caching mechanisms for optimizing LLM performance and reducing costs.

class CacheControl:
    """
    Cache control settings for LLM optimization.
    
    Parameters:
    - type: str, cache control type (ephemeral, session, etc.)
    """
    def __init__(self, type: str): ...

class CachePoint:
    """
    Cache point configuration for specific content blocks.
    
    Parameters:
    - type: str, cache point type
    """
    def __init__(self, type: str): ...

Base Embedding Interface

Foundation interface for all embedding model implementations with text and batch processing support.

class BaseEmbedding:
    """
    Base interface for embedding models.
    
    Parameters:
    - model_name: str, name identifier for the embedding model
    - embed_batch_size: int, batch size for embedding operations
    - callback_manager: Optional[CallbackManager], callback management system
    - num_workers: Optional[int], number of worker threads for parallel processing
    """
    def __init__(
        self,
        model_name: str = "unknown",
        embed_batch_size: int = 10,
        callback_manager: Optional[CallbackManager] = None,
        num_workers: Optional[int] = None,
        **kwargs
    ): ...
    
    def get_text_embedding(self, text: str) -> List[float]:
        """
        Get embedding for single text string.
        
        Parameters:
        - text: str, input text to embed
        
        Returns:
        - List[float], embedding vector
        """
        
    def get_text_embeddings(self, texts: List[str]) -> List[List[float]]:
        """
        Get embeddings for multiple text strings.
        
        Parameters:
        - texts: List[str], list of input texts to embed
        
        Returns:
        - List[List[float]], list of embedding vectors
        """
        
    async def aget_text_embedding(self, text: str) -> List[float]:
        """Async version of get_text_embedding."""
        
    async def aget_text_embeddings(self, texts: List[str]) -> List[List[float]]:
        """Async version of get_text_embeddings."""
        
    def get_query_embedding(self, query: str) -> List[float]:
        """
        Get embedding for query text (may differ from document embedding).
        
        Parameters:
        - query: str, query text to embed
        
        Returns:
        - List[float], query embedding vector
        """
        
    async def aget_query_embedding(self, query: str) -> List[float]:
        """Async version of get_query_embedding."""
        
    def similarity(
        self,
        embedding1: List[float],
        embedding2: List[float]
    ) -> float:
        """
        Compute similarity between two embeddings.
        
        Parameters:
        - embedding1: List[float], first embedding vector
        - embedding2: List[float], second embedding vector
        
        Returns:
        - float, similarity score
        """

Mock Embedding Implementation

Testing and development embedding model that generates consistent vectors without external API calls.

class MockEmbedding(BaseEmbedding):
    """
    Mock embedding model for testing and development.
    
    Parameters:
    - embed_dim: int, dimensionality of embedding vectors
    - deterministic: bool, whether to generate deterministic embeddings
    """
    def __init__(
        self,
        embed_dim: int = 1536,
        deterministic: bool = True,
        **kwargs
    ): ...

Multi-Modal Embedding Support

Extended embedding interface for handling multiple content modalities.

class MultiModalEmbedding(BaseEmbedding):
    """
    Multi-modal embedding interface supporting text, images, and other content types.
    
    Parameters:
    - model_name: str, name identifier for the multi-modal embedding model
    - embed_batch_size: int, batch size for embedding operations
    """
    def __init__(
        self,
        model_name: str = "unknown",
        embed_batch_size: int = 10,
        **kwargs
    ): ...
    
    def get_image_embedding(self, img_file_path: str) -> List[float]:
        """
        Get embedding for image file.
        
        Parameters:
        - img_file_path: str, path to image file
        
        Returns:
        - List[float], image embedding vector
        """
        
    async def aget_image_embedding(self, img_file_path: str) -> List[float]:
        """Async version of get_image_embedding."""

Embedding Utilities

Utility functions and classes for embedding model management and operations.

class Pooling:
    """
    Embedding pooling operations for combining token embeddings.
    
    Parameters:
    - pooling_type: str, type of pooling (mean, max, cls)
    """
    def __init__(self, pooling_type: str = "mean"): ...
    
    def pool(self, embeddings: List[List[float]]) -> List[float]:
        """
        Pool multiple embeddings into single vector.
        
        Parameters:
        - embeddings: List[List[float]], embeddings to pool
        
        Returns:
        - List[float], pooled embedding vector
        """

def resolve_embed_model(embed_model: Union[str, BaseEmbedding]) -> BaseEmbedding:
    """
    Resolve embedding model from string name or return existing instance.
    
    Parameters:
    - embed_model: Union[str, BaseEmbedding], model name or instance
    
    Returns:
    - BaseEmbedding, resolved embedding model instance
    """

Multi-Modal LLM Interface

Language models with vision and multi-modal capabilities for processing images alongside text.

class MultiModalLLM:
    """
    Multi-modal language model interface for vision and text processing.
    
    Parameters:
    - model_name: str, name identifier for the model
    - max_new_tokens: int, maximum new tokens to generate
    - context_window: int, maximum context window size
    """
    def __init__(
        self,
        model_name: str = "unknown",
        max_new_tokens: int = 300,
        context_window: int = 4096,
        **kwargs
    ): ...
    
    def complete(
        self,
        prompt: str,
        image_documents: Sequence[ImageDocument],
        **kwargs
    ) -> CompletionResponse:
        """
        Complete prompt with image context.
        
        Parameters:
        - prompt: str, text prompt
        - image_documents: Sequence[ImageDocument], images for context
        
        Returns:
        - CompletionResponse, completion with image understanding
        """
        
    def stream_complete(
        self,
        prompt: str,
        image_documents: Sequence[ImageDocument],
        **kwargs
    ) -> CompletionResponseGen:
        """Stream completion with image context."""
        
    async def acomplete(
        self,
        prompt: str,
        image_documents: Sequence[ImageDocument],
        **kwargs
    ) -> CompletionResponse:
        """Async completion with image context."""
        
    @property
    def metadata(self) -> MultiModalLLMMetadata:
        """Get multi-modal LLM metadata."""

class MultiModalLLMMetadata:
    """
    Metadata for multi-modal LLM capabilities.
    
    Parameters:
    - num_output: int, maximum output tokens
    - model_name: str, model identifier
    """
    def __init__(
        self,
        num_output: int = 300,
        model_name: str = "unknown"
    ): ...

Usage Examples

Basic LLM Usage

from llama_index.core.llms import MockLLM
from llama_index.core.llms.types import ChatMessage, MessageRole

# Initialize mock LLM
llm = MockLLM(max_tokens=256)

# Text completion
response = llm.complete("Explain machine learning in simple terms:")
print(response.text)

# Chat conversation
messages = [
    ChatMessage(role=MessageRole.SYSTEM, content="You are a helpful assistant."),
    ChatMessage(role=MessageRole.USER, content="What is deep learning?")
]

chat_response = llm.chat(messages)
print(chat_response.message.content)

Streaming Responses

# Streaming completion
stream = llm.stream_complete("Write a short story about AI:")
for response in stream:
    print(response.delta, end="", flush=True)

# Streaming chat
stream = llm.stream_chat(messages)
for response in stream:
    print(response.delta, end="", flush=True)

Basic Embedding Usage

from llama_index.core.embeddings import MockEmbedding

# Initialize mock embedding
embed_model = MockEmbedding(embed_dim=384)

# Single text embedding
text = "Machine learning is a subset of artificial intelligence."
embedding = embed_model.get_text_embedding(text)
print(f"Embedding dimension: {len(embedding)}")

# Batch embeddings
texts = [
    "Natural language processing helps computers understand text.",
    "Computer vision enables machines to interpret images.",
    "Reinforcement learning trains agents through rewards."
]

embeddings = embed_model.get_text_embeddings(texts)
print(f"Generated {len(embeddings)} embeddings")

# Query embedding (may differ from document embeddings)
query_embedding = embed_model.get_query_embedding("What is AI?")

# Compute similarity
similarity = embed_model.similarity(embedding, query_embedding)
print(f"Similarity: {similarity:.3f}")

Custom LLM Implementation

from llama_index.core.llms import CustomLLM
from llama_index.core.llms.types import CompletionResponse, LLMMetadata

class MyCustomLLM(CustomLLM):
    """Example custom LLM implementation."""
    
    def __init__(self, model_path: str, **kwargs):
        self.model_path = model_path
        super().__init__(**kwargs)
    
    @property
    def metadata(self) -> LLMMetadata:
        return LLMMetadata(
            context_window=4096,
            num_output=512,
            model_name="my_custom_model"
        )
    
    def _complete(self, prompt: str, **kwargs) -> CompletionResponse:
        # Custom completion logic here
        generated_text = f"Generated response for: {prompt}"
        return CompletionResponse(text=generated_text)
    
    def _stream_complete(self, prompt: str, **kwargs):
        # Custom streaming logic here
        response = self._complete(prompt, **kwargs)
        yield response

# Use custom LLM
custom_llm = MyCustomLLM(model_path="/path/to/model")
response = custom_llm.complete("Hello, world!")

Multi-Modal Content

from llama_index.core.llms.types import ChatMessage, ImageBlock, TextBlock

# Create message with image and text
message = ChatMessage(
    role=MessageRole.USER,
    content=[
        TextBlock(text="What do you see in this image?"),
        ImageBlock(image="base64_encoded_image_data")
    ]
)

# Use in chat (with compatible multi-modal LLM)
# response = multimodal_llm.chat([message])

Types & Configuration

# Response type unions
Response = Union[str, ChatResponse, CompletionResponse]
RESPONSE_TYPE = Union[Response, StreamingResponse]

# Content block union
ContentBlock = Union[TextBlock, ImageBlock, AudioBlock, DocumentBlock]

# Pydantic program modes
class PydanticProgramMode(str, Enum):
    DEFAULT = "default"
    OPENAI = "openai" 
    LLM = "llm"
    GUIDANCE = "guidance"
    LM_FORMAT_ENFORCER = "lm-format-enforcer"

Install with Tessl CLI

npx tessl i tessl/pypi-llama-index-core

docs

agents-tools.md

documents-nodes.md

evaluation.md

index.md

indices.md

llms-embeddings.md

node-parsers.md

postprocessors.md

prompts.md

query-engines.md

retrievers.md

settings.md

storage.md

tile.json