CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-llama-index-core

Interface between LLMs and your data

Pending
Overview
Eval results
Files

settings.mddocs/

Settings & Configuration

Centralized configuration system for managing LLMs, embeddings, callback handlers, and other global settings across the application. The settings system provides a unified way to configure and manage all LlamaIndex components with support for global defaults and context-specific overrides.

Capabilities

Global Settings Management

Centralized configuration system for managing application-wide settings and component defaults.

class Settings:
    """
    Global settings for LlamaIndex configuration and component management.
    
    The Settings class provides centralized configuration for LLMs, embeddings,
    callbacks, and other system-wide components with automatic initialization
    and lazy loading capabilities.
    
    Class Attributes:
    - llm: Optional[LLM], global language model instance
    - embed_model: Optional[BaseEmbedding], global embedding model
    - callback_manager: Optional[CallbackManager], global callback system
    - transformations: Optional[List[TransformComponent]], global transformations
    - chunk_size: int, default chunk size for text processing
    - chunk_overlap: int, default overlap between text chunks
    - context_window: int, default context window size
    - num_output: int, default number of output tokens
    """
    
    # Core model components
    llm: Optional[LLM] = None
    embed_model: Optional[BaseEmbedding] = None
    
    # Callback and monitoring
    callback_manager: Optional[CallbackManager] = None
    
    # Text processing
    transformations: Optional[List[TransformComponent]] = None
    
    # Chunking configuration
    chunk_size: int = 1024
    chunk_overlap: int = 20
    
    # Model configuration
    context_window: int = 4096
    num_output: int = 256
    
    @classmethod
    def reset(cls) -> None:
        """
        Reset all settings to default values.
        
        Clears all configured components and resets settings to their
        default state for clean initialization.
        """
        
    @classmethod
    def configure(
        cls,
        llm: Optional[LLM] = None,
        embed_model: Optional[BaseEmbedding] = None,
        callback_manager: Optional[CallbackManager] = None,
        transformations: Optional[List[TransformComponent]] = None,
        chunk_size: Optional[int] = None,
        chunk_overlap: Optional[int] = None,
        context_window: Optional[int] = None,
        num_output: Optional[int] = None,
        **kwargs
    ) -> None:
        """
        Configure global settings with provided components.
        
        Parameters:
        - llm: Optional[LLM], language model for global use
        - embed_model: Optional[BaseEmbedding], embedding model for global use
        - callback_manager: Optional[CallbackManager], callback system
        - transformations: Optional[List[TransformComponent]], global transformations
        - chunk_size: Optional[int], default chunk size
        - chunk_overlap: Optional[int], default chunk overlap
        - context_window: Optional[int], default context window size
        - num_output: Optional[int], default output token count
        """
        
    @classmethod
    def get_llm(cls) -> LLM:
        """
        Get configured language model with automatic initialization.
        
        Returns the configured LLM or initializes a default mock LLM
        if none is configured.
        
        Returns:
        - LLM, configured or default language model
        """
        
    @classmethod
    def get_embed_model(cls) -> BaseEmbedding:
        """
        Get configured embedding model with automatic initialization.
        
        Returns the configured embedding model or initializes a default
        mock embedding model if none is configured.
        
        Returns:
        - BaseEmbedding, configured or default embedding model
        """
        
    @classmethod
    def get_callback_manager(cls) -> CallbackManager:
        """
        Get configured callback manager with automatic initialization.
        
        Returns the configured callback manager or creates a default
        instance if none is configured.
        
        Returns:
        - CallbackManager, configured or default callback manager
        """
        
    @classmethod
    def get_transformations(cls) -> List[TransformComponent]:
        """
        Get configured transformations with automatic initialization.
        
        Returns the configured transformation pipeline or creates
        default transformations if none are configured.
        
        Returns:
        - List[TransformComponent], configured or default transformations
        """

Service Context (Legacy)

Legacy service context for backward compatibility with older LlamaIndex versions.

class ServiceContext:
    """
    Legacy service context for LLM operations and configuration.
    
    Note: ServiceContext is deprecated in favor of the Settings class.
    This class is maintained for backward compatibility.
    
    Parameters:
    - llm: Optional[LLM], language model instance
    - embed_model: Optional[BaseEmbedding], embedding model instance
    - node_parser: Optional[NodeParser], text parsing configuration
    - text_splitter: Optional[TextSplitter], text splitting configuration
    - transformations: Optional[List[TransformComponent]], transformation pipeline
    - callback_manager: Optional[CallbackManager], callback management
    """
    def __init__(
        self,
        llm: Optional[LLM] = None,
        embed_model: Optional[BaseEmbedding] = None,
        node_parser: Optional[NodeParser] = None,
        text_splitter: Optional[TextSplitter] = None,
        transformations: Optional[List[TransformComponent]] = None,
        callback_manager: Optional[CallbackManager] = None,
        **kwargs
    ): ...
    
    @classmethod
    def from_defaults(
        cls,
        llm: Optional[LLM] = None,
        embed_model: Optional[BaseEmbedding] = None,
        node_parser: Optional[NodeParser] = None,
        text_splitter: Optional[TextSplitter] = None,
        transformations: Optional[List[TransformComponent]] = None,
        callback_manager: Optional[CallbackManager] = None,
        chunk_size: Optional[int] = None,
        chunk_overlap: Optional[int] = None,
        context_window: Optional[int] = None,
        num_output: Optional[int] = None,
        **kwargs
    ) -> "ServiceContext":
        """
        Create ServiceContext with default configurations.
        
        Parameters:
        - llm: Optional[LLM], language model
        - embed_model: Optional[BaseEmbedding], embedding model
        - node_parser: Optional[NodeParser], node parsing configuration
        - text_splitter: Optional[TextSplitter], text splitting configuration
        - transformations: Optional[List[TransformComponent]], transformations
        - callback_manager: Optional[CallbackManager], callback system
        - chunk_size: Optional[int], text chunk size
        - chunk_overlap: Optional[int], chunk overlap size
        - context_window: Optional[int], model context window
        - num_output: Optional[int], output token limit
        
        Returns:
        - ServiceContext, configured service context
        """

Global Configuration Functions

Utility functions for setting global configuration and managing system-wide settings.

def set_global_service_context(service_context: ServiceContext) -> None:
    """
    Set global service context for legacy compatibility.
    
    Parameters:
    - service_context: ServiceContext, service context to set globally
    
    Note: This function is deprecated. Use Settings.configure() instead.
    """

def set_global_handler(handler: BaseCallbackHandler) -> None:
    """
    Set global callback handler for system-wide event handling.
    
    Parameters:
    - handler: BaseCallbackHandler, callback handler to set globally
    """

def set_global_tokenizer(tokenizer: Callable[[str], List]) -> None:
    """
    Set global tokenizer function for text processing.
    
    Parameters:
    - tokenizer: Callable[[str], List], tokenizer function
    """

def get_tokenizer() -> Callable[[str], List]:
    """
    Get current global tokenizer function.
    
    Returns:
    - Callable[[str], List], current tokenizer function
    """

Prompt Helper Configuration

Configuration utilities for prompt management and optimization.

class PromptHelper:
    """
    Helper for prompt management and token optimization.
    
    Parameters:
    - context_window: int, available context window size
    - num_output: int, reserved tokens for output
    - chunk_overlap_ratio: float, ratio of overlap between chunks
    - chunk_size_limit: Optional[int], maximum chunk size
    - tokenizer: Optional[Callable], tokenizer function for counting
    """
    def __init__(
        self,
        context_window: int = 4096,
        num_output: int = 256,
        chunk_overlap_ratio: float = 0.1,
        chunk_size_limit: Optional[int] = None,
        tokenizer: Optional[Callable] = None,
        **kwargs
    ): ...
    
    def get_text_splitter_given_prompt(
        self,
        prompt: BasePromptTemplate,
        num_chunks: int = 1,
        padding: int = 5
    ) -> TokenTextSplitter:
        """
        Get text splitter configured for specific prompt requirements.
        
        Parameters:
        - prompt: BasePromptTemplate, prompt template for sizing
        - num_chunks: int, number of chunks to accommodate
        - padding: int, safety padding for token count
        
        Returns:
        - TokenTextSplitter, configured text splitter
        """
        
    def get_chunk_size_given_prompt(
        self,
        prompt: BasePromptTemplate,
        num_chunks: int = 1,
        padding: int = 5
    ) -> int:
        """
        Calculate optimal chunk size for prompt and context window.
        
        Parameters:
        - prompt: BasePromptTemplate, prompt template
        - num_chunks: int, number of chunks to fit
        - padding: int, safety padding
        
        Returns:
        - int, optimal chunk size in tokens
        """

Callback Management

System for managing callback handlers and event processing across the application.

class CallbackManager:
    """
    Manager for callback handlers and event processing.
    
    Parameters:
    - handlers: List[BaseCallbackHandler], list of callback handlers
    """
    def __init__(self, handlers: Optional[List[BaseCallbackHandler]] = None): ...
    
    def add_handler(self, handler: BaseCallbackHandler) -> None:
        """
        Add callback handler to manager.
        
        Parameters:
        - handler: BaseCallbackHandler, handler to add
        """
        
    def remove_handler(self, handler: BaseCallbackHandler) -> None:
        """
        Remove callback handler from manager.
        
        Parameters:
        - handler: BaseCallbackHandler, handler to remove
        """
        
    def set_handlers(self, handlers: List[BaseCallbackHandler]) -> None:
        """
        Set list of callback handlers, replacing existing ones.
        
        Parameters:
        - handlers: List[BaseCallbackHandler], new handler list
        """
        
    def on_event_start(
        self,
        event_type: CBEventType,
        payload: Optional[Dict[str, Any]] = None,
        event_id: str = "",
        **kwargs: Any
    ) -> str:
        """
        Handle event start callback.
        
        Parameters:
        - event_type: CBEventType, type of event starting
        - payload: Optional[Dict[str, Any]], event payload data
        - event_id: str, unique event identifier
        
        Returns:
        - str, event identifier for tracking
        """
        
    def on_event_end(
        self,
        event_type: CBEventType,
        payload: Optional[Dict[str, Any]] = None,
        event_id: str = "",
        **kwargs: Any
    ) -> None:
        """
        Handle event end callback.
        
        Parameters:
        - event_type: CBEventType, type of event ending
        - payload: Optional[Dict[str, Any]], event payload data
        - event_id: str, event identifier
        """

class BaseCallbackHandler:
    """
    Base interface for callback handler implementations.
    
    Callback handlers receive and process events from LlamaIndex
    operations for logging, monitoring, and debugging purposes.
    """
    def on_event_start(
        self,
        event_type: CBEventType,
        payload: Optional[Dict[str, Any]] = None,
        event_id: str = "",
        **kwargs: Any
    ) -> str:
        """Handle event start."""
        
    def on_event_end(
        self,
        event_type: CBEventType,
        payload: Optional[Dict[str, Any]] = None,
        event_id: str = "",
        **kwargs: Any
    ) -> None:
        """Handle event end."""
        
    def start_trace(self, trace_id: Optional[str] = None) -> None:
        """Start execution trace."""
        
    def end_trace(
        self,
        trace_id: Optional[str] = None,
        trace_map: Optional[Dict[str, List[str]]] = None
    ) -> None:
        """End execution trace."""

class CBEventType(str, Enum):
    """Enumeration of callback event types."""
    LLM = "llm"
    CHUNKING = "chunking"
    NODE_PARSING = "node_parsing"
    EMBEDDING = "embedding"
    QUERY = "query"
    RETRIEVE = "retrieve"
    SYNTHESIZE = "synthesize"
    TREE = "tree"
    SUB_QUESTION = "sub_question"
    TEMPLATING = "templating"
    FUNCTION_CALL = "function_call"
    RERANKING = "reranking"
    EXCEPTION = "exception"

Usage Examples

Basic Global Configuration

from llama_index.core import Settings
from llama_index.core.llms import MockLLM
from llama_index.core.embeddings import MockEmbedding

# Configure global settings
Settings.configure(
    llm=MockLLM(max_tokens=512),
    embed_model=MockEmbedding(embed_dim=384),
    chunk_size=512,
    chunk_overlap=50,
    context_window=2048,
    num_output=256
)

# Settings are now available globally
print(f"Global LLM: {type(Settings.llm).__name__}")
print(f"Global embedding model: {type(Settings.embed_model).__name__}")
print(f"Chunk size: {Settings.chunk_size}")
print(f"Context window: {Settings.context_window}")

Automatic Component Initialization

# Clear any existing configuration
Settings.reset()

# Components are initialized automatically when accessed
llm = Settings.get_llm()  # Creates MockLLM if none configured
embed_model = Settings.get_embed_model()  # Creates MockEmbedding if none configured

print(f"Auto-initialized LLM: {type(llm).__name__}")
print(f"Auto-initialized embedding: {type(embed_model).__name__}")

Using Settings with Index Creation

from llama_index.core import VectorStoreIndex, Document

# Configure settings first
Settings.configure(
    llm=MockLLM(),
    embed_model=MockEmbedding(embed_dim=384),
    chunk_size=256  # Smaller chunks for this example
)

# Create documents
documents = [
    Document(text="Machine learning is a subset of artificial intelligence that focuses on algorithms."),
    Document(text="Deep learning uses neural networks with multiple layers for complex pattern recognition."),
    Document(text="Natural language processing enables computers to understand human language.")
]

# Index will use global settings automatically
index = VectorStoreIndex.from_documents(documents)

# Query engine inherits global LLM
query_engine = index.as_query_engine()
response = query_engine.query("What is machine learning?")
print(f"Response: {response.response}")

Custom Callback Handler

from llama_index.core.callbacks import BaseCallbackHandler, CBEventType

class CustomLoggingHandler(BaseCallbackHandler):
    """Custom callback handler for logging events."""
    
    def __init__(self):
        super().__init__()
        self.events = []
    
    def on_event_start(
        self,
        event_type: CBEventType,
        payload: Optional[Dict[str, Any]] = None,
        event_id: str = "",
        **kwargs: Any
    ) -> str:
        event_info = {
            "event_type": event_type,
            "event_id": event_id,
            "stage": "start",
            "payload": payload or {}
        }
        self.events.append(event_info)
        print(f"Event started: {event_type} - {event_id}")
        return event_id
    
    def on_event_end(
        self,
        event_type: CBEventType,
        payload: Optional[Dict[str, Any]] = None,
        event_id: str = "",
        **kwargs: Any
    ) -> None:
        event_info = {
            "event_type": event_type,
            "event_id": event_id,
            "stage": "end",
            "payload": payload or {}
        }
        self.events.append(event_info)
        print(f"Event ended: {event_type} - {event_id}")

# Create and configure callback handler
custom_handler = CustomLoggingHandler()
Settings.configure(callback_manager=CallbackManager([custom_handler]))

# Operations will now trigger callbacks
index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine()
response = query_engine.query("Explain deep learning")

# Check logged events
print(f"\\nLogged {len(custom_handler.events)} events:")
for event in custom_handler.events:
    print(f"  {event['stage'].upper()}: {event['event_type']} - {event['event_id']}")

Service Context (Legacy) Usage

from llama_index.core import ServiceContext

# Create service context (legacy approach)
service_context = ServiceContext.from_defaults(
    llm=MockLLM(),
    embed_model=MockEmbedding(embed_dim=384),
    chunk_size=512,
    chunk_overlap=50
)

# Use with index creation
index_with_service_context = VectorStoreIndex.from_documents(
    documents,
    service_context=service_context
)

# Set globally (deprecated approach)
set_global_service_context(service_context)

Transformation Pipeline Configuration

from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.extractors import TitleExtractor

# Configure transformation pipeline
transformations = [
    SentenceSplitter(chunk_size=512, chunk_overlap=20),
    TitleExtractor()  # Example: extract titles as metadata
]

Settings.configure(
    transformations=transformations,
    llm=MockLLM(),
    embed_model=MockEmbedding(embed_dim=384)
)

# Transformations will be applied automatically during indexing
index_with_transforms = VectorStoreIndex.from_documents(documents)

Prompt Helper Configuration

from llama_index.core.indices import PromptHelper
from llama_index.core.prompts import PromptTemplate

# Configure prompt helper for token optimization
prompt_helper = PromptHelper(
    context_window=2048,
    num_output=256,
    chunk_overlap_ratio=0.1
)

# Create a sample prompt template
sample_prompt = PromptTemplate(
    template="Context: {context_str}\\n\\nQuestion: {query_str}\\n\\nAnswer:"
)

# Get optimal chunk size for this prompt
optimal_chunk_size = prompt_helper.get_chunk_size_given_prompt(
    prompt=sample_prompt,
    num_chunks=3
)

print(f"Optimal chunk size: {optimal_chunk_size}")

# Get configured text splitter
text_splitter = prompt_helper.get_text_splitter_given_prompt(
    prompt=sample_prompt,
    num_chunks=3
)

print(f"Text splitter chunk size: {text_splitter.chunk_size}")

Environment-Specific Configuration

import os

def configure_for_environment():
    """Configure settings based on environment."""
    
    env = os.getenv("LLAMAINDEX_ENV", "development")
    
    if env == "production":
        # Production configuration
        Settings.configure(
            llm=MockLLM(),  # Would be real LLM in production
            embed_model=MockEmbedding(embed_dim=1536),
            chunk_size=1024,
            chunk_overlap=100,
            context_window=4096
        )
        print("Configured for production environment")
        
    elif env == "development":
        # Development configuration
        Settings.configure(
            llm=MockLLM(),
            embed_model=MockEmbedding(embed_dim=384),
            chunk_size=512,
            chunk_overlap=50,
            context_window=2048
        )
        print("Configured for development environment")
        
    else:
        # Test configuration
        Settings.configure(
            llm=MockLLM(),
            embed_model=MockEmbedding(embed_dim=128),
            chunk_size=256,
            chunk_overlap=25,
            context_window=1024
        )
        print("Configured for test environment")

# Configure based on environment
configure_for_environment()

Settings State Management

def save_settings_state():
    """Save current settings state."""
    return {
        "llm": Settings.llm,
        "embed_model": Settings.embed_model,
        "chunk_size": Settings.chunk_size,
        "chunk_overlap": Settings.chunk_overlap,
        "context_window": Settings.context_window,
        "num_output": Settings.num_output
    }

def restore_settings_state(state):
    """Restore settings from saved state."""
    Settings.configure(
        llm=state["llm"],
        embed_model=state["embed_model"],
        chunk_size=state["chunk_size"],
        chunk_overlap=state["chunk_overlap"],
        context_window=state["context_window"],
        num_output=state["num_output"]
    )

# Save current state
saved_state = save_settings_state()

# Modify settings for specific operation
Settings.configure(chunk_size=128)
print(f"Modified chunk size: {Settings.chunk_size}")

# Restore original state
restore_settings_state(saved_state)
print(f"Restored chunk size: {Settings.chunk_size}")

Global Tokenizer Configuration

def simple_tokenizer(text: str) -> List[str]:
    """Simple whitespace tokenizer."""
    return text.split()

def advanced_tokenizer(text: str) -> List[str]:
    """More advanced tokenization (mock implementation)."""
    import re
    # Simple word tokenization with punctuation handling
    tokens = re.findall(r'\\b\\w+\\b', text.lower())
    return tokens

# Set global tokenizer
set_global_tokenizer(advanced_tokenizer)

# Get current tokenizer
current_tokenizer = get_tokenizer()
sample_text = "Hello, world! This is a test."
tokens = current_tokenizer(sample_text)
print(f"Tokenized '{sample_text}' into: {tokens}")

Configuration Patterns

Configuration Factory Pattern

class ConfigurationFactory:
    """Factory for creating different configuration setups."""
    
    @staticmethod
    def create_development_config():
        """Create development configuration."""
        return {
            "llm": MockLLM(max_tokens=256),
            "embed_model": MockEmbedding(embed_dim=384),
            "chunk_size": 512,
            "chunk_overlap": 50,
            "context_window": 2048
        }
    
    @staticmethod
    def create_production_config():
        """Create production configuration."""
        return {
            "llm": MockLLM(max_tokens=512),  # Real LLM in actual production
            "embed_model": MockEmbedding(embed_dim=1536),
            "chunk_size": 1024,
            "chunk_overlap": 100,
            "context_window": 4096
        }
    
    @staticmethod
    def create_memory_optimized_config():
        """Create memory-optimized configuration."""
        return {
            "llm": MockLLM(max_tokens=128),
            "embed_model": MockEmbedding(embed_dim=256),
            "chunk_size": 256,
            "chunk_overlap": 25,
            "context_window": 1024
        }

# Use factory to configure
config = ConfigurationFactory.create_development_config()
Settings.configure(**config)

Types & Constants

# Default configuration values
DEFAULT_CHUNK_SIZE = 1024
DEFAULT_CHUNK_OVERLAP = 20
DEFAULT_CONTEXT_WINDOW = 4096
DEFAULT_NUM_OUTPUT = 256

# Component initialization
LAZY_INITIALIZATION = True
AUTO_FALLBACK_TO_MOCK = True

# Callback event payload structure
EventPayload = Dict[str, Any]

# Settings validation
VALIDATE_SETTINGS_ON_CONFIGURE = True

# Legacy support flags
SUPPORT_LEGACY_SERVICE_CONTEXT = True
DEPRECATION_WARNINGS_ENABLED = True

Install with Tessl CLI

npx tessl i tessl/pypi-llama-index-core

docs

agents-tools.md

documents-nodes.md

evaluation.md

index.md

indices.md

llms-embeddings.md

node-parsers.md

postprocessors.md

prompts.md

query-engines.md

retrievers.md

settings.md

storage.md

tile.json