0
# Settings & Configuration
1
2
Centralized configuration system for managing LLMs, embeddings, callback handlers, and other global settings across the application. The settings system provides a unified way to configure and manage all LlamaIndex components with support for global defaults and context-specific overrides.
3
4
## Capabilities
5
6
### Global Settings Management
7
8
Centralized configuration system for managing application-wide settings and component defaults.
9
10
```python { .api }
11
class Settings:
12
"""
13
Global settings for LlamaIndex configuration and component management.
14
15
The Settings class provides centralized configuration for LLMs, embeddings,
16
callbacks, and other system-wide components with automatic initialization
17
and lazy loading capabilities.
18
19
Class Attributes:
20
- llm: Optional[LLM], global language model instance
21
- embed_model: Optional[BaseEmbedding], global embedding model
22
- callback_manager: Optional[CallbackManager], global callback system
23
- transformations: Optional[List[TransformComponent]], global transformations
24
- chunk_size: int, default chunk size for text processing
25
- chunk_overlap: int, default overlap between text chunks
26
- context_window: int, default context window size
27
- num_output: int, default number of output tokens
28
"""
29
30
# Core model components
31
llm: Optional[LLM] = None
32
embed_model: Optional[BaseEmbedding] = None
33
34
# Callback and monitoring
35
callback_manager: Optional[CallbackManager] = None
36
37
# Text processing
38
transformations: Optional[List[TransformComponent]] = None
39
40
# Chunking configuration
41
chunk_size: int = 1024
42
chunk_overlap: int = 20
43
44
# Model configuration
45
context_window: int = 4096
46
num_output: int = 256
47
48
@classmethod
49
def reset(cls) -> None:
50
"""
51
Reset all settings to default values.
52
53
Clears all configured components and resets settings to their
54
default state for clean initialization.
55
"""
56
57
@classmethod
58
def configure(
59
cls,
60
llm: Optional[LLM] = None,
61
embed_model: Optional[BaseEmbedding] = None,
62
callback_manager: Optional[CallbackManager] = None,
63
transformations: Optional[List[TransformComponent]] = None,
64
chunk_size: Optional[int] = None,
65
chunk_overlap: Optional[int] = None,
66
context_window: Optional[int] = None,
67
num_output: Optional[int] = None,
68
**kwargs
69
) -> None:
70
"""
71
Configure global settings with provided components.
72
73
Parameters:
74
- llm: Optional[LLM], language model for global use
75
- embed_model: Optional[BaseEmbedding], embedding model for global use
76
- callback_manager: Optional[CallbackManager], callback system
77
- transformations: Optional[List[TransformComponent]], global transformations
78
- chunk_size: Optional[int], default chunk size
79
- chunk_overlap: Optional[int], default chunk overlap
80
- context_window: Optional[int], default context window size
81
- num_output: Optional[int], default output token count
82
"""
83
84
@classmethod
85
def get_llm(cls) -> LLM:
86
"""
87
Get configured language model with automatic initialization.
88
89
Returns the configured LLM or initializes a default mock LLM
90
if none is configured.
91
92
Returns:
93
- LLM, configured or default language model
94
"""
95
96
@classmethod
97
def get_embed_model(cls) -> BaseEmbedding:
98
"""
99
Get configured embedding model with automatic initialization.
100
101
Returns the configured embedding model or initializes a default
102
mock embedding model if none is configured.
103
104
Returns:
105
- BaseEmbedding, configured or default embedding model
106
"""
107
108
@classmethod
109
def get_callback_manager(cls) -> CallbackManager:
110
"""
111
Get configured callback manager with automatic initialization.
112
113
Returns the configured callback manager or creates a default
114
instance if none is configured.
115
116
Returns:
117
- CallbackManager, configured or default callback manager
118
"""
119
120
@classmethod
121
def get_transformations(cls) -> List[TransformComponent]:
122
"""
123
Get configured transformations with automatic initialization.
124
125
Returns the configured transformation pipeline or creates
126
default transformations if none are configured.
127
128
Returns:
129
- List[TransformComponent], configured or default transformations
130
"""
131
```
132
133
### Service Context (Legacy)
134
135
Legacy service context for backward compatibility with older LlamaIndex versions.
136
137
```python { .api }
138
class ServiceContext:
139
"""
140
Legacy service context for LLM operations and configuration.
141
142
Note: ServiceContext is deprecated in favor of the Settings class.
143
This class is maintained for backward compatibility.
144
145
Parameters:
146
- llm: Optional[LLM], language model instance
147
- embed_model: Optional[BaseEmbedding], embedding model instance
148
- node_parser: Optional[NodeParser], text parsing configuration
149
- text_splitter: Optional[TextSplitter], text splitting configuration
150
- transformations: Optional[List[TransformComponent]], transformation pipeline
151
- callback_manager: Optional[CallbackManager], callback management
152
"""
153
def __init__(
154
self,
155
llm: Optional[LLM] = None,
156
embed_model: Optional[BaseEmbedding] = None,
157
node_parser: Optional[NodeParser] = None,
158
text_splitter: Optional[TextSplitter] = None,
159
transformations: Optional[List[TransformComponent]] = None,
160
callback_manager: Optional[CallbackManager] = None,
161
**kwargs
162
): ...
163
164
@classmethod
165
def from_defaults(
166
cls,
167
llm: Optional[LLM] = None,
168
embed_model: Optional[BaseEmbedding] = None,
169
node_parser: Optional[NodeParser] = None,
170
text_splitter: Optional[TextSplitter] = None,
171
transformations: Optional[List[TransformComponent]] = None,
172
callback_manager: Optional[CallbackManager] = None,
173
chunk_size: Optional[int] = None,
174
chunk_overlap: Optional[int] = None,
175
context_window: Optional[int] = None,
176
num_output: Optional[int] = None,
177
**kwargs
178
) -> "ServiceContext":
179
"""
180
Create ServiceContext with default configurations.
181
182
Parameters:
183
- llm: Optional[LLM], language model
184
- embed_model: Optional[BaseEmbedding], embedding model
185
- node_parser: Optional[NodeParser], node parsing configuration
186
- text_splitter: Optional[TextSplitter], text splitting configuration
187
- transformations: Optional[List[TransformComponent]], transformations
188
- callback_manager: Optional[CallbackManager], callback system
189
- chunk_size: Optional[int], text chunk size
190
- chunk_overlap: Optional[int], chunk overlap size
191
- context_window: Optional[int], model context window
192
- num_output: Optional[int], output token limit
193
194
Returns:
195
- ServiceContext, configured service context
196
"""
197
```
198
199
### Global Configuration Functions
200
201
Utility functions for setting global configuration and managing system-wide settings.
202
203
```python { .api }
204
def set_global_service_context(service_context: ServiceContext) -> None:
205
"""
206
Set global service context for legacy compatibility.
207
208
Parameters:
209
- service_context: ServiceContext, service context to set globally
210
211
Note: This function is deprecated. Use Settings.configure() instead.
212
"""
213
214
def set_global_handler(handler: BaseCallbackHandler) -> None:
215
"""
216
Set global callback handler for system-wide event handling.
217
218
Parameters:
219
- handler: BaseCallbackHandler, callback handler to set globally
220
"""
221
222
def set_global_tokenizer(tokenizer: Callable[[str], List]) -> None:
223
"""
224
Set global tokenizer function for text processing.
225
226
Parameters:
227
- tokenizer: Callable[[str], List], tokenizer function
228
"""
229
230
def get_tokenizer() -> Callable[[str], List]:
231
"""
232
Get current global tokenizer function.
233
234
Returns:
235
- Callable[[str], List], current tokenizer function
236
"""
237
```
238
239
### Prompt Helper Configuration
240
241
Configuration utilities for prompt management and optimization.
242
243
```python { .api }
244
class PromptHelper:
245
"""
246
Helper for prompt management and token optimization.
247
248
Parameters:
249
- context_window: int, available context window size
250
- num_output: int, reserved tokens for output
251
- chunk_overlap_ratio: float, ratio of overlap between chunks
252
- chunk_size_limit: Optional[int], maximum chunk size
253
- tokenizer: Optional[Callable], tokenizer function for counting
254
"""
255
def __init__(
256
self,
257
context_window: int = 4096,
258
num_output: int = 256,
259
chunk_overlap_ratio: float = 0.1,
260
chunk_size_limit: Optional[int] = None,
261
tokenizer: Optional[Callable] = None,
262
**kwargs
263
): ...
264
265
def get_text_splitter_given_prompt(
266
self,
267
prompt: BasePromptTemplate,
268
num_chunks: int = 1,
269
padding: int = 5
270
) -> TokenTextSplitter:
271
"""
272
Get text splitter configured for specific prompt requirements.
273
274
Parameters:
275
- prompt: BasePromptTemplate, prompt template for sizing
276
- num_chunks: int, number of chunks to accommodate
277
- padding: int, safety padding for token count
278
279
Returns:
280
- TokenTextSplitter, configured text splitter
281
"""
282
283
def get_chunk_size_given_prompt(
284
self,
285
prompt: BasePromptTemplate,
286
num_chunks: int = 1,
287
padding: int = 5
288
) -> int:
289
"""
290
Calculate optimal chunk size for prompt and context window.
291
292
Parameters:
293
- prompt: BasePromptTemplate, prompt template
294
- num_chunks: int, number of chunks to fit
295
- padding: int, safety padding
296
297
Returns:
298
- int, optimal chunk size in tokens
299
"""
300
```
301
302
### Callback Management
303
304
System for managing callback handlers and event processing across the application.
305
306
```python { .api }
307
class CallbackManager:
308
"""
309
Manager for callback handlers and event processing.
310
311
Parameters:
312
- handlers: List[BaseCallbackHandler], list of callback handlers
313
"""
314
def __init__(self, handlers: Optional[List[BaseCallbackHandler]] = None): ...
315
316
def add_handler(self, handler: BaseCallbackHandler) -> None:
317
"""
318
Add callback handler to manager.
319
320
Parameters:
321
- handler: BaseCallbackHandler, handler to add
322
"""
323
324
def remove_handler(self, handler: BaseCallbackHandler) -> None:
325
"""
326
Remove callback handler from manager.
327
328
Parameters:
329
- handler: BaseCallbackHandler, handler to remove
330
"""
331
332
def set_handlers(self, handlers: List[BaseCallbackHandler]) -> None:
333
"""
334
Set list of callback handlers, replacing existing ones.
335
336
Parameters:
337
- handlers: List[BaseCallbackHandler], new handler list
338
"""
339
340
def on_event_start(
341
self,
342
event_type: CBEventType,
343
payload: Optional[Dict[str, Any]] = None,
344
event_id: str = "",
345
**kwargs: Any
346
) -> str:
347
"""
348
Handle event start callback.
349
350
Parameters:
351
- event_type: CBEventType, type of event starting
352
- payload: Optional[Dict[str, Any]], event payload data
353
- event_id: str, unique event identifier
354
355
Returns:
356
- str, event identifier for tracking
357
"""
358
359
def on_event_end(
360
self,
361
event_type: CBEventType,
362
payload: Optional[Dict[str, Any]] = None,
363
event_id: str = "",
364
**kwargs: Any
365
) -> None:
366
"""
367
Handle event end callback.
368
369
Parameters:
370
- event_type: CBEventType, type of event ending
371
- payload: Optional[Dict[str, Any]], event payload data
372
- event_id: str, event identifier
373
"""
374
375
class BaseCallbackHandler:
376
"""
377
Base interface for callback handler implementations.
378
379
Callback handlers receive and process events from LlamaIndex
380
operations for logging, monitoring, and debugging purposes.
381
"""
382
def on_event_start(
383
self,
384
event_type: CBEventType,
385
payload: Optional[Dict[str, Any]] = None,
386
event_id: str = "",
387
**kwargs: Any
388
) -> str:
389
"""Handle event start."""
390
391
def on_event_end(
392
self,
393
event_type: CBEventType,
394
payload: Optional[Dict[str, Any]] = None,
395
event_id: str = "",
396
**kwargs: Any
397
) -> None:
398
"""Handle event end."""
399
400
def start_trace(self, trace_id: Optional[str] = None) -> None:
401
"""Start execution trace."""
402
403
def end_trace(
404
self,
405
trace_id: Optional[str] = None,
406
trace_map: Optional[Dict[str, List[str]]] = None
407
) -> None:
408
"""End execution trace."""
409
410
class CBEventType(str, Enum):
411
"""Enumeration of callback event types."""
412
LLM = "llm"
413
CHUNKING = "chunking"
414
NODE_PARSING = "node_parsing"
415
EMBEDDING = "embedding"
416
QUERY = "query"
417
RETRIEVE = "retrieve"
418
SYNTHESIZE = "synthesize"
419
TREE = "tree"
420
SUB_QUESTION = "sub_question"
421
TEMPLATING = "templating"
422
FUNCTION_CALL = "function_call"
423
RERANKING = "reranking"
424
EXCEPTION = "exception"
425
```
426
427
## Usage Examples
428
429
### Basic Global Configuration
430
431
```python
432
from llama_index.core import Settings
433
from llama_index.core.llms import MockLLM
434
from llama_index.core.embeddings import MockEmbedding
435
436
# Configure global settings
437
Settings.configure(
438
llm=MockLLM(max_tokens=512),
439
embed_model=MockEmbedding(embed_dim=384),
440
chunk_size=512,
441
chunk_overlap=50,
442
context_window=2048,
443
num_output=256
444
)
445
446
# Settings are now available globally
447
print(f"Global LLM: {type(Settings.llm).__name__}")
448
print(f"Global embedding model: {type(Settings.embed_model).__name__}")
449
print(f"Chunk size: {Settings.chunk_size}")
450
print(f"Context window: {Settings.context_window}")
451
```
452
453
### Automatic Component Initialization
454
455
```python
456
# Clear any existing configuration
457
Settings.reset()
458
459
# Components are initialized automatically when accessed
460
llm = Settings.get_llm() # Creates MockLLM if none configured
461
embed_model = Settings.get_embed_model() # Creates MockEmbedding if none configured
462
463
print(f"Auto-initialized LLM: {type(llm).__name__}")
464
print(f"Auto-initialized embedding: {type(embed_model).__name__}")
465
```
466
467
### Using Settings with Index Creation
468
469
```python
470
from llama_index.core import VectorStoreIndex, Document
471
472
# Configure settings first
473
Settings.configure(
474
llm=MockLLM(),
475
embed_model=MockEmbedding(embed_dim=384),
476
chunk_size=256 # Smaller chunks for this example
477
)
478
479
# Create documents
480
documents = [
481
Document(text="Machine learning is a subset of artificial intelligence that focuses on algorithms."),
482
Document(text="Deep learning uses neural networks with multiple layers for complex pattern recognition."),
483
Document(text="Natural language processing enables computers to understand human language.")
484
]
485
486
# Index will use global settings automatically
487
index = VectorStoreIndex.from_documents(documents)
488
489
# Query engine inherits global LLM
490
query_engine = index.as_query_engine()
491
response = query_engine.query("What is machine learning?")
492
print(f"Response: {response.response}")
493
```
494
495
### Custom Callback Handler
496
497
```python
498
from llama_index.core.callbacks import BaseCallbackHandler, CBEventType
499
500
class CustomLoggingHandler(BaseCallbackHandler):
501
"""Custom callback handler for logging events."""
502
503
def __init__(self):
504
super().__init__()
505
self.events = []
506
507
def on_event_start(
508
self,
509
event_type: CBEventType,
510
payload: Optional[Dict[str, Any]] = None,
511
event_id: str = "",
512
**kwargs: Any
513
) -> str:
514
event_info = {
515
"event_type": event_type,
516
"event_id": event_id,
517
"stage": "start",
518
"payload": payload or {}
519
}
520
self.events.append(event_info)
521
print(f"Event started: {event_type} - {event_id}")
522
return event_id
523
524
def on_event_end(
525
self,
526
event_type: CBEventType,
527
payload: Optional[Dict[str, Any]] = None,
528
event_id: str = "",
529
**kwargs: Any
530
) -> None:
531
event_info = {
532
"event_type": event_type,
533
"event_id": event_id,
534
"stage": "end",
535
"payload": payload or {}
536
}
537
self.events.append(event_info)
538
print(f"Event ended: {event_type} - {event_id}")
539
540
# Create and configure callback handler
541
custom_handler = CustomLoggingHandler()
542
Settings.configure(callback_manager=CallbackManager([custom_handler]))
543
544
# Operations will now trigger callbacks
545
index = VectorStoreIndex.from_documents(documents)
546
query_engine = index.as_query_engine()
547
response = query_engine.query("Explain deep learning")
548
549
# Check logged events
550
print(f"\\nLogged {len(custom_handler.events)} events:")
551
for event in custom_handler.events:
552
print(f" {event['stage'].upper()}: {event['event_type']} - {event['event_id']}")
553
```
554
555
### Service Context (Legacy) Usage
556
557
```python
558
from llama_index.core import ServiceContext
559
560
# Create service context (legacy approach)
561
service_context = ServiceContext.from_defaults(
562
llm=MockLLM(),
563
embed_model=MockEmbedding(embed_dim=384),
564
chunk_size=512,
565
chunk_overlap=50
566
)
567
568
# Use with index creation
569
index_with_service_context = VectorStoreIndex.from_documents(
570
documents,
571
service_context=service_context
572
)
573
574
# Set globally (deprecated approach)
575
set_global_service_context(service_context)
576
```
577
578
### Transformation Pipeline Configuration
579
580
```python
581
from llama_index.core.node_parser import SentenceSplitter
582
from llama_index.core.extractors import TitleExtractor
583
584
# Configure transformation pipeline
585
transformations = [
586
SentenceSplitter(chunk_size=512, chunk_overlap=20),
587
TitleExtractor() # Example: extract titles as metadata
588
]
589
590
Settings.configure(
591
transformations=transformations,
592
llm=MockLLM(),
593
embed_model=MockEmbedding(embed_dim=384)
594
)
595
596
# Transformations will be applied automatically during indexing
597
index_with_transforms = VectorStoreIndex.from_documents(documents)
598
```
599
600
### Prompt Helper Configuration
601
602
```python
603
from llama_index.core.indices import PromptHelper
604
from llama_index.core.prompts import PromptTemplate
605
606
# Configure prompt helper for token optimization
607
prompt_helper = PromptHelper(
608
context_window=2048,
609
num_output=256,
610
chunk_overlap_ratio=0.1
611
)
612
613
# Create a sample prompt template
614
sample_prompt = PromptTemplate(
615
template="Context: {context_str}\\n\\nQuestion: {query_str}\\n\\nAnswer:"
616
)
617
618
# Get optimal chunk size for this prompt
619
optimal_chunk_size = prompt_helper.get_chunk_size_given_prompt(
620
prompt=sample_prompt,
621
num_chunks=3
622
)
623
624
print(f"Optimal chunk size: {optimal_chunk_size}")
625
626
# Get configured text splitter
627
text_splitter = prompt_helper.get_text_splitter_given_prompt(
628
prompt=sample_prompt,
629
num_chunks=3
630
)
631
632
print(f"Text splitter chunk size: {text_splitter.chunk_size}")
633
```
634
635
### Environment-Specific Configuration
636
637
```python
638
import os
639
640
def configure_for_environment():
641
"""Configure settings based on environment."""
642
643
env = os.getenv("LLAMAINDEX_ENV", "development")
644
645
if env == "production":
646
# Production configuration
647
Settings.configure(
648
llm=MockLLM(), # Would be real LLM in production
649
embed_model=MockEmbedding(embed_dim=1536),
650
chunk_size=1024,
651
chunk_overlap=100,
652
context_window=4096
653
)
654
print("Configured for production environment")
655
656
elif env == "development":
657
# Development configuration
658
Settings.configure(
659
llm=MockLLM(),
660
embed_model=MockEmbedding(embed_dim=384),
661
chunk_size=512,
662
chunk_overlap=50,
663
context_window=2048
664
)
665
print("Configured for development environment")
666
667
else:
668
# Test configuration
669
Settings.configure(
670
llm=MockLLM(),
671
embed_model=MockEmbedding(embed_dim=128),
672
chunk_size=256,
673
chunk_overlap=25,
674
context_window=1024
675
)
676
print("Configured for test environment")
677
678
# Configure based on environment
679
configure_for_environment()
680
```
681
682
### Settings State Management
683
684
```python
685
def save_settings_state():
686
"""Save current settings state."""
687
return {
688
"llm": Settings.llm,
689
"embed_model": Settings.embed_model,
690
"chunk_size": Settings.chunk_size,
691
"chunk_overlap": Settings.chunk_overlap,
692
"context_window": Settings.context_window,
693
"num_output": Settings.num_output
694
}
695
696
def restore_settings_state(state):
697
"""Restore settings from saved state."""
698
Settings.configure(
699
llm=state["llm"],
700
embed_model=state["embed_model"],
701
chunk_size=state["chunk_size"],
702
chunk_overlap=state["chunk_overlap"],
703
context_window=state["context_window"],
704
num_output=state["num_output"]
705
)
706
707
# Save current state
708
saved_state = save_settings_state()
709
710
# Modify settings for specific operation
711
Settings.configure(chunk_size=128)
712
print(f"Modified chunk size: {Settings.chunk_size}")
713
714
# Restore original state
715
restore_settings_state(saved_state)
716
print(f"Restored chunk size: {Settings.chunk_size}")
717
```
718
719
### Global Tokenizer Configuration
720
721
```python
722
def simple_tokenizer(text: str) -> List[str]:
723
"""Simple whitespace tokenizer."""
724
return text.split()
725
726
def advanced_tokenizer(text: str) -> List[str]:
727
"""More advanced tokenization (mock implementation)."""
728
import re
729
# Simple word tokenization with punctuation handling
730
tokens = re.findall(r'\\b\\w+\\b', text.lower())
731
return tokens
732
733
# Set global tokenizer
734
set_global_tokenizer(advanced_tokenizer)
735
736
# Get current tokenizer
737
current_tokenizer = get_tokenizer()
738
sample_text = "Hello, world! This is a test."
739
tokens = current_tokenizer(sample_text)
740
print(f"Tokenized '{sample_text}' into: {tokens}")
741
```
742
743
## Configuration Patterns
744
745
### Configuration Factory Pattern
746
747
```python
748
class ConfigurationFactory:
749
"""Factory for creating different configuration setups."""
750
751
@staticmethod
752
def create_development_config():
753
"""Create development configuration."""
754
return {
755
"llm": MockLLM(max_tokens=256),
756
"embed_model": MockEmbedding(embed_dim=384),
757
"chunk_size": 512,
758
"chunk_overlap": 50,
759
"context_window": 2048
760
}
761
762
@staticmethod
763
def create_production_config():
764
"""Create production configuration."""
765
return {
766
"llm": MockLLM(max_tokens=512), # Real LLM in actual production
767
"embed_model": MockEmbedding(embed_dim=1536),
768
"chunk_size": 1024,
769
"chunk_overlap": 100,
770
"context_window": 4096
771
}
772
773
@staticmethod
774
def create_memory_optimized_config():
775
"""Create memory-optimized configuration."""
776
return {
777
"llm": MockLLM(max_tokens=128),
778
"embed_model": MockEmbedding(embed_dim=256),
779
"chunk_size": 256,
780
"chunk_overlap": 25,
781
"context_window": 1024
782
}
783
784
# Use factory to configure
785
config = ConfigurationFactory.create_development_config()
786
Settings.configure(**config)
787
```
788
789
## Types & Constants
790
791
```python { .api }
792
# Default configuration values
793
DEFAULT_CHUNK_SIZE = 1024
794
DEFAULT_CHUNK_OVERLAP = 20
795
DEFAULT_CONTEXT_WINDOW = 4096
796
DEFAULT_NUM_OUTPUT = 256
797
798
# Component initialization
799
LAZY_INITIALIZATION = True
800
AUTO_FALLBACK_TO_MOCK = True
801
802
# Callback event payload structure
803
EventPayload = Dict[str, Any]
804
805
# Settings validation
806
VALIDATE_SETTINGS_ON_CONFIGURE = True
807
808
# Legacy support flags
809
SUPPORT_LEGACY_SERVICE_CONTEXT = True
810
DEPRECATION_WARNINGS_ENABLED = True
811
```