Interface between LLMs and your data
—
Centralized configuration system for managing LLMs, embeddings, callback handlers, and other global settings across the application. The settings system provides a unified way to configure and manage all LlamaIndex components with support for global defaults and context-specific overrides.
Centralized configuration system for managing application-wide settings and component defaults.
class Settings:
"""
Global settings for LlamaIndex configuration and component management.
The Settings class provides centralized configuration for LLMs, embeddings,
callbacks, and other system-wide components with automatic initialization
and lazy loading capabilities.
Class Attributes:
- llm: Optional[LLM], global language model instance
- embed_model: Optional[BaseEmbedding], global embedding model
- callback_manager: Optional[CallbackManager], global callback system
- transformations: Optional[List[TransformComponent]], global transformations
- chunk_size: int, default chunk size for text processing
- chunk_overlap: int, default overlap between text chunks
- context_window: int, default context window size
- num_output: int, default number of output tokens
"""
# Core model components
llm: Optional[LLM] = None
embed_model: Optional[BaseEmbedding] = None
# Callback and monitoring
callback_manager: Optional[CallbackManager] = None
# Text processing
transformations: Optional[List[TransformComponent]] = None
# Chunking configuration
chunk_size: int = 1024
chunk_overlap: int = 20
# Model configuration
context_window: int = 4096
num_output: int = 256
@classmethod
def reset(cls) -> None:
"""
Reset all settings to default values.
Clears all configured components and resets settings to their
default state for clean initialization.
"""
@classmethod
def configure(
cls,
llm: Optional[LLM] = None,
embed_model: Optional[BaseEmbedding] = None,
callback_manager: Optional[CallbackManager] = None,
transformations: Optional[List[TransformComponent]] = None,
chunk_size: Optional[int] = None,
chunk_overlap: Optional[int] = None,
context_window: Optional[int] = None,
num_output: Optional[int] = None,
**kwargs
) -> None:
"""
Configure global settings with provided components.
Parameters:
- llm: Optional[LLM], language model for global use
- embed_model: Optional[BaseEmbedding], embedding model for global use
- callback_manager: Optional[CallbackManager], callback system
- transformations: Optional[List[TransformComponent]], global transformations
- chunk_size: Optional[int], default chunk size
- chunk_overlap: Optional[int], default chunk overlap
- context_window: Optional[int], default context window size
- num_output: Optional[int], default output token count
"""
@classmethod
def get_llm(cls) -> LLM:
"""
Get configured language model with automatic initialization.
Returns the configured LLM or initializes a default mock LLM
if none is configured.
Returns:
- LLM, configured or default language model
"""
@classmethod
def get_embed_model(cls) -> BaseEmbedding:
"""
Get configured embedding model with automatic initialization.
Returns the configured embedding model or initializes a default
mock embedding model if none is configured.
Returns:
- BaseEmbedding, configured or default embedding model
"""
@classmethod
def get_callback_manager(cls) -> CallbackManager:
"""
Get configured callback manager with automatic initialization.
Returns the configured callback manager or creates a default
instance if none is configured.
Returns:
- CallbackManager, configured or default callback manager
"""
@classmethod
def get_transformations(cls) -> List[TransformComponent]:
"""
Get configured transformations with automatic initialization.
Returns the configured transformation pipeline or creates
default transformations if none are configured.
Returns:
- List[TransformComponent], configured or default transformations
"""Legacy service context for backward compatibility with older LlamaIndex versions.
class ServiceContext:
"""
Legacy service context for LLM operations and configuration.
Note: ServiceContext is deprecated in favor of the Settings class.
This class is maintained for backward compatibility.
Parameters:
- llm: Optional[LLM], language model instance
- embed_model: Optional[BaseEmbedding], embedding model instance
- node_parser: Optional[NodeParser], text parsing configuration
- text_splitter: Optional[TextSplitter], text splitting configuration
- transformations: Optional[List[TransformComponent]], transformation pipeline
- callback_manager: Optional[CallbackManager], callback management
"""
def __init__(
self,
llm: Optional[LLM] = None,
embed_model: Optional[BaseEmbedding] = None,
node_parser: Optional[NodeParser] = None,
text_splitter: Optional[TextSplitter] = None,
transformations: Optional[List[TransformComponent]] = None,
callback_manager: Optional[CallbackManager] = None,
**kwargs
): ...
@classmethod
def from_defaults(
cls,
llm: Optional[LLM] = None,
embed_model: Optional[BaseEmbedding] = None,
node_parser: Optional[NodeParser] = None,
text_splitter: Optional[TextSplitter] = None,
transformations: Optional[List[TransformComponent]] = None,
callback_manager: Optional[CallbackManager] = None,
chunk_size: Optional[int] = None,
chunk_overlap: Optional[int] = None,
context_window: Optional[int] = None,
num_output: Optional[int] = None,
**kwargs
) -> "ServiceContext":
"""
Create ServiceContext with default configurations.
Parameters:
- llm: Optional[LLM], language model
- embed_model: Optional[BaseEmbedding], embedding model
- node_parser: Optional[NodeParser], node parsing configuration
- text_splitter: Optional[TextSplitter], text splitting configuration
- transformations: Optional[List[TransformComponent]], transformations
- callback_manager: Optional[CallbackManager], callback system
- chunk_size: Optional[int], text chunk size
- chunk_overlap: Optional[int], chunk overlap size
- context_window: Optional[int], model context window
- num_output: Optional[int], output token limit
Returns:
- ServiceContext, configured service context
"""Utility functions for setting global configuration and managing system-wide settings.
def set_global_service_context(service_context: ServiceContext) -> None:
"""
Set global service context for legacy compatibility.
Parameters:
- service_context: ServiceContext, service context to set globally
Note: This function is deprecated. Use Settings.configure() instead.
"""
def set_global_handler(handler: BaseCallbackHandler) -> None:
"""
Set global callback handler for system-wide event handling.
Parameters:
- handler: BaseCallbackHandler, callback handler to set globally
"""
def set_global_tokenizer(tokenizer: Callable[[str], List]) -> None:
"""
Set global tokenizer function for text processing.
Parameters:
- tokenizer: Callable[[str], List], tokenizer function
"""
def get_tokenizer() -> Callable[[str], List]:
"""
Get current global tokenizer function.
Returns:
- Callable[[str], List], current tokenizer function
"""Configuration utilities for prompt management and optimization.
class PromptHelper:
"""
Helper for prompt management and token optimization.
Parameters:
- context_window: int, available context window size
- num_output: int, reserved tokens for output
- chunk_overlap_ratio: float, ratio of overlap between chunks
- chunk_size_limit: Optional[int], maximum chunk size
- tokenizer: Optional[Callable], tokenizer function for counting
"""
def __init__(
self,
context_window: int = 4096,
num_output: int = 256,
chunk_overlap_ratio: float = 0.1,
chunk_size_limit: Optional[int] = None,
tokenizer: Optional[Callable] = None,
**kwargs
): ...
def get_text_splitter_given_prompt(
self,
prompt: BasePromptTemplate,
num_chunks: int = 1,
padding: int = 5
) -> TokenTextSplitter:
"""
Get text splitter configured for specific prompt requirements.
Parameters:
- prompt: BasePromptTemplate, prompt template for sizing
- num_chunks: int, number of chunks to accommodate
- padding: int, safety padding for token count
Returns:
- TokenTextSplitter, configured text splitter
"""
def get_chunk_size_given_prompt(
self,
prompt: BasePromptTemplate,
num_chunks: int = 1,
padding: int = 5
) -> int:
"""
Calculate optimal chunk size for prompt and context window.
Parameters:
- prompt: BasePromptTemplate, prompt template
- num_chunks: int, number of chunks to fit
- padding: int, safety padding
Returns:
- int, optimal chunk size in tokens
"""System for managing callback handlers and event processing across the application.
class CallbackManager:
"""
Manager for callback handlers and event processing.
Parameters:
- handlers: List[BaseCallbackHandler], list of callback handlers
"""
def __init__(self, handlers: Optional[List[BaseCallbackHandler]] = None): ...
def add_handler(self, handler: BaseCallbackHandler) -> None:
"""
Add callback handler to manager.
Parameters:
- handler: BaseCallbackHandler, handler to add
"""
def remove_handler(self, handler: BaseCallbackHandler) -> None:
"""
Remove callback handler from manager.
Parameters:
- handler: BaseCallbackHandler, handler to remove
"""
def set_handlers(self, handlers: List[BaseCallbackHandler]) -> None:
"""
Set list of callback handlers, replacing existing ones.
Parameters:
- handlers: List[BaseCallbackHandler], new handler list
"""
def on_event_start(
self,
event_type: CBEventType,
payload: Optional[Dict[str, Any]] = None,
event_id: str = "",
**kwargs: Any
) -> str:
"""
Handle event start callback.
Parameters:
- event_type: CBEventType, type of event starting
- payload: Optional[Dict[str, Any]], event payload data
- event_id: str, unique event identifier
Returns:
- str, event identifier for tracking
"""
def on_event_end(
self,
event_type: CBEventType,
payload: Optional[Dict[str, Any]] = None,
event_id: str = "",
**kwargs: Any
) -> None:
"""
Handle event end callback.
Parameters:
- event_type: CBEventType, type of event ending
- payload: Optional[Dict[str, Any]], event payload data
- event_id: str, event identifier
"""
class BaseCallbackHandler:
"""
Base interface for callback handler implementations.
Callback handlers receive and process events from LlamaIndex
operations for logging, monitoring, and debugging purposes.
"""
def on_event_start(
self,
event_type: CBEventType,
payload: Optional[Dict[str, Any]] = None,
event_id: str = "",
**kwargs: Any
) -> str:
"""Handle event start."""
def on_event_end(
self,
event_type: CBEventType,
payload: Optional[Dict[str, Any]] = None,
event_id: str = "",
**kwargs: Any
) -> None:
"""Handle event end."""
def start_trace(self, trace_id: Optional[str] = None) -> None:
"""Start execution trace."""
def end_trace(
self,
trace_id: Optional[str] = None,
trace_map: Optional[Dict[str, List[str]]] = None
) -> None:
"""End execution trace."""
class CBEventType(str, Enum):
"""Enumeration of callback event types."""
LLM = "llm"
CHUNKING = "chunking"
NODE_PARSING = "node_parsing"
EMBEDDING = "embedding"
QUERY = "query"
RETRIEVE = "retrieve"
SYNTHESIZE = "synthesize"
TREE = "tree"
SUB_QUESTION = "sub_question"
TEMPLATING = "templating"
FUNCTION_CALL = "function_call"
RERANKING = "reranking"
EXCEPTION = "exception"from llama_index.core import Settings
from llama_index.core.llms import MockLLM
from llama_index.core.embeddings import MockEmbedding
# Configure global settings
Settings.configure(
llm=MockLLM(max_tokens=512),
embed_model=MockEmbedding(embed_dim=384),
chunk_size=512,
chunk_overlap=50,
context_window=2048,
num_output=256
)
# Settings are now available globally
print(f"Global LLM: {type(Settings.llm).__name__}")
print(f"Global embedding model: {type(Settings.embed_model).__name__}")
print(f"Chunk size: {Settings.chunk_size}")
print(f"Context window: {Settings.context_window}")# Clear any existing configuration
Settings.reset()
# Components are initialized automatically when accessed
llm = Settings.get_llm() # Creates MockLLM if none configured
embed_model = Settings.get_embed_model() # Creates MockEmbedding if none configured
print(f"Auto-initialized LLM: {type(llm).__name__}")
print(f"Auto-initialized embedding: {type(embed_model).__name__}")from llama_index.core import VectorStoreIndex, Document
# Configure settings first
Settings.configure(
llm=MockLLM(),
embed_model=MockEmbedding(embed_dim=384),
chunk_size=256 # Smaller chunks for this example
)
# Create documents
documents = [
Document(text="Machine learning is a subset of artificial intelligence that focuses on algorithms."),
Document(text="Deep learning uses neural networks with multiple layers for complex pattern recognition."),
Document(text="Natural language processing enables computers to understand human language.")
]
# Index will use global settings automatically
index = VectorStoreIndex.from_documents(documents)
# Query engine inherits global LLM
query_engine = index.as_query_engine()
response = query_engine.query("What is machine learning?")
print(f"Response: {response.response}")from llama_index.core.callbacks import BaseCallbackHandler, CBEventType
class CustomLoggingHandler(BaseCallbackHandler):
"""Custom callback handler for logging events."""
def __init__(self):
super().__init__()
self.events = []
def on_event_start(
self,
event_type: CBEventType,
payload: Optional[Dict[str, Any]] = None,
event_id: str = "",
**kwargs: Any
) -> str:
event_info = {
"event_type": event_type,
"event_id": event_id,
"stage": "start",
"payload": payload or {}
}
self.events.append(event_info)
print(f"Event started: {event_type} - {event_id}")
return event_id
def on_event_end(
self,
event_type: CBEventType,
payload: Optional[Dict[str, Any]] = None,
event_id: str = "",
**kwargs: Any
) -> None:
event_info = {
"event_type": event_type,
"event_id": event_id,
"stage": "end",
"payload": payload or {}
}
self.events.append(event_info)
print(f"Event ended: {event_type} - {event_id}")
# Create and configure callback handler
custom_handler = CustomLoggingHandler()
Settings.configure(callback_manager=CallbackManager([custom_handler]))
# Operations will now trigger callbacks
index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine()
response = query_engine.query("Explain deep learning")
# Check logged events
print(f"\\nLogged {len(custom_handler.events)} events:")
for event in custom_handler.events:
print(f" {event['stage'].upper()}: {event['event_type']} - {event['event_id']}")from llama_index.core import ServiceContext
# Create service context (legacy approach)
service_context = ServiceContext.from_defaults(
llm=MockLLM(),
embed_model=MockEmbedding(embed_dim=384),
chunk_size=512,
chunk_overlap=50
)
# Use with index creation
index_with_service_context = VectorStoreIndex.from_documents(
documents,
service_context=service_context
)
# Set globally (deprecated approach)
set_global_service_context(service_context)from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.extractors import TitleExtractor
# Configure transformation pipeline
transformations = [
SentenceSplitter(chunk_size=512, chunk_overlap=20),
TitleExtractor() # Example: extract titles as metadata
]
Settings.configure(
transformations=transformations,
llm=MockLLM(),
embed_model=MockEmbedding(embed_dim=384)
)
# Transformations will be applied automatically during indexing
index_with_transforms = VectorStoreIndex.from_documents(documents)from llama_index.core.indices import PromptHelper
from llama_index.core.prompts import PromptTemplate
# Configure prompt helper for token optimization
prompt_helper = PromptHelper(
context_window=2048,
num_output=256,
chunk_overlap_ratio=0.1
)
# Create a sample prompt template
sample_prompt = PromptTemplate(
template="Context: {context_str}\\n\\nQuestion: {query_str}\\n\\nAnswer:"
)
# Get optimal chunk size for this prompt
optimal_chunk_size = prompt_helper.get_chunk_size_given_prompt(
prompt=sample_prompt,
num_chunks=3
)
print(f"Optimal chunk size: {optimal_chunk_size}")
# Get configured text splitter
text_splitter = prompt_helper.get_text_splitter_given_prompt(
prompt=sample_prompt,
num_chunks=3
)
print(f"Text splitter chunk size: {text_splitter.chunk_size}")import os
def configure_for_environment():
"""Configure settings based on environment."""
env = os.getenv("LLAMAINDEX_ENV", "development")
if env == "production":
# Production configuration
Settings.configure(
llm=MockLLM(), # Would be real LLM in production
embed_model=MockEmbedding(embed_dim=1536),
chunk_size=1024,
chunk_overlap=100,
context_window=4096
)
print("Configured for production environment")
elif env == "development":
# Development configuration
Settings.configure(
llm=MockLLM(),
embed_model=MockEmbedding(embed_dim=384),
chunk_size=512,
chunk_overlap=50,
context_window=2048
)
print("Configured for development environment")
else:
# Test configuration
Settings.configure(
llm=MockLLM(),
embed_model=MockEmbedding(embed_dim=128),
chunk_size=256,
chunk_overlap=25,
context_window=1024
)
print("Configured for test environment")
# Configure based on environment
configure_for_environment()def save_settings_state():
"""Save current settings state."""
return {
"llm": Settings.llm,
"embed_model": Settings.embed_model,
"chunk_size": Settings.chunk_size,
"chunk_overlap": Settings.chunk_overlap,
"context_window": Settings.context_window,
"num_output": Settings.num_output
}
def restore_settings_state(state):
"""Restore settings from saved state."""
Settings.configure(
llm=state["llm"],
embed_model=state["embed_model"],
chunk_size=state["chunk_size"],
chunk_overlap=state["chunk_overlap"],
context_window=state["context_window"],
num_output=state["num_output"]
)
# Save current state
saved_state = save_settings_state()
# Modify settings for specific operation
Settings.configure(chunk_size=128)
print(f"Modified chunk size: {Settings.chunk_size}")
# Restore original state
restore_settings_state(saved_state)
print(f"Restored chunk size: {Settings.chunk_size}")def simple_tokenizer(text: str) -> List[str]:
"""Simple whitespace tokenizer."""
return text.split()
def advanced_tokenizer(text: str) -> List[str]:
"""More advanced tokenization (mock implementation)."""
import re
# Simple word tokenization with punctuation handling
tokens = re.findall(r'\\b\\w+\\b', text.lower())
return tokens
# Set global tokenizer
set_global_tokenizer(advanced_tokenizer)
# Get current tokenizer
current_tokenizer = get_tokenizer()
sample_text = "Hello, world! This is a test."
tokens = current_tokenizer(sample_text)
print(f"Tokenized '{sample_text}' into: {tokens}")class ConfigurationFactory:
"""Factory for creating different configuration setups."""
@staticmethod
def create_development_config():
"""Create development configuration."""
return {
"llm": MockLLM(max_tokens=256),
"embed_model": MockEmbedding(embed_dim=384),
"chunk_size": 512,
"chunk_overlap": 50,
"context_window": 2048
}
@staticmethod
def create_production_config():
"""Create production configuration."""
return {
"llm": MockLLM(max_tokens=512), # Real LLM in actual production
"embed_model": MockEmbedding(embed_dim=1536),
"chunk_size": 1024,
"chunk_overlap": 100,
"context_window": 4096
}
@staticmethod
def create_memory_optimized_config():
"""Create memory-optimized configuration."""
return {
"llm": MockLLM(max_tokens=128),
"embed_model": MockEmbedding(embed_dim=256),
"chunk_size": 256,
"chunk_overlap": 25,
"context_window": 1024
}
# Use factory to configure
config = ConfigurationFactory.create_development_config()
Settings.configure(**config)# Default configuration values
DEFAULT_CHUNK_SIZE = 1024
DEFAULT_CHUNK_OVERLAP = 20
DEFAULT_CONTEXT_WINDOW = 4096
DEFAULT_NUM_OUTPUT = 256
# Component initialization
LAZY_INITIALIZATION = True
AUTO_FALLBACK_TO_MOCK = True
# Callback event payload structure
EventPayload = Dict[str, Any]
# Settings validation
VALIDATE_SETTINGS_ON_CONFIGURE = True
# Legacy support flags
SUPPORT_LEGACY_SERVICE_CONTEXT = True
DEPRECATION_WARNINGS_ENABLED = TrueInstall with Tessl CLI
npx tessl i tessl/pypi-llama-index-core