or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

agents-tools.mddocuments-nodes.mdevaluation.mdindex.mdindices.mdllms-embeddings.mdnode-parsers.mdpostprocessors.mdprompts.mdquery-engines.mdretrievers.mdsettings.mdstorage.md

settings.mddocs/

0

# Settings & Configuration

1

2

Centralized configuration system for managing LLMs, embeddings, callback handlers, and other global settings across the application. The settings system provides a unified way to configure and manage all LlamaIndex components with support for global defaults and context-specific overrides.

3

4

## Capabilities

5

6

### Global Settings Management

7

8

Centralized configuration system for managing application-wide settings and component defaults.

9

10

```python { .api }

11

class Settings:

12

"""

13

Global settings for LlamaIndex configuration and component management.

14

15

The Settings class provides centralized configuration for LLMs, embeddings,

16

callbacks, and other system-wide components with automatic initialization

17

and lazy loading capabilities.

18

19

Class Attributes:

20

- llm: Optional[LLM], global language model instance

21

- embed_model: Optional[BaseEmbedding], global embedding model

22

- callback_manager: Optional[CallbackManager], global callback system

23

- transformations: Optional[List[TransformComponent]], global transformations

24

- chunk_size: int, default chunk size for text processing

25

- chunk_overlap: int, default overlap between text chunks

26

- context_window: int, default context window size

27

- num_output: int, default number of output tokens

28

"""

29

30

# Core model components

31

llm: Optional[LLM] = None

32

embed_model: Optional[BaseEmbedding] = None

33

34

# Callback and monitoring

35

callback_manager: Optional[CallbackManager] = None

36

37

# Text processing

38

transformations: Optional[List[TransformComponent]] = None

39

40

# Chunking configuration

41

chunk_size: int = 1024

42

chunk_overlap: int = 20

43

44

# Model configuration

45

context_window: int = 4096

46

num_output: int = 256

47

48

@classmethod

49

def reset(cls) -> None:

50

"""

51

Reset all settings to default values.

52

53

Clears all configured components and resets settings to their

54

default state for clean initialization.

55

"""

56

57

@classmethod

58

def configure(

59

cls,

60

llm: Optional[LLM] = None,

61

embed_model: Optional[BaseEmbedding] = None,

62

callback_manager: Optional[CallbackManager] = None,

63

transformations: Optional[List[TransformComponent]] = None,

64

chunk_size: Optional[int] = None,

65

chunk_overlap: Optional[int] = None,

66

context_window: Optional[int] = None,

67

num_output: Optional[int] = None,

68

**kwargs

69

) -> None:

70

"""

71

Configure global settings with provided components.

72

73

Parameters:

74

- llm: Optional[LLM], language model for global use

75

- embed_model: Optional[BaseEmbedding], embedding model for global use

76

- callback_manager: Optional[CallbackManager], callback system

77

- transformations: Optional[List[TransformComponent]], global transformations

78

- chunk_size: Optional[int], default chunk size

79

- chunk_overlap: Optional[int], default chunk overlap

80

- context_window: Optional[int], default context window size

81

- num_output: Optional[int], default output token count

82

"""

83

84

@classmethod

85

def get_llm(cls) -> LLM:

86

"""

87

Get configured language model with automatic initialization.

88

89

Returns the configured LLM or initializes a default mock LLM

90

if none is configured.

91

92

Returns:

93

- LLM, configured or default language model

94

"""

95

96

@classmethod

97

def get_embed_model(cls) -> BaseEmbedding:

98

"""

99

Get configured embedding model with automatic initialization.

100

101

Returns the configured embedding model or initializes a default

102

mock embedding model if none is configured.

103

104

Returns:

105

- BaseEmbedding, configured or default embedding model

106

"""

107

108

@classmethod

109

def get_callback_manager(cls) -> CallbackManager:

110

"""

111

Get configured callback manager with automatic initialization.

112

113

Returns the configured callback manager or creates a default

114

instance if none is configured.

115

116

Returns:

117

- CallbackManager, configured or default callback manager

118

"""

119

120

@classmethod

121

def get_transformations(cls) -> List[TransformComponent]:

122

"""

123

Get configured transformations with automatic initialization.

124

125

Returns the configured transformation pipeline or creates

126

default transformations if none are configured.

127

128

Returns:

129

- List[TransformComponent], configured or default transformations

130

"""

131

```

132

133

### Service Context (Legacy)

134

135

Legacy service context for backward compatibility with older LlamaIndex versions.

136

137

```python { .api }

138

class ServiceContext:

139

"""

140

Legacy service context for LLM operations and configuration.

141

142

Note: ServiceContext is deprecated in favor of the Settings class.

143

This class is maintained for backward compatibility.

144

145

Parameters:

146

- llm: Optional[LLM], language model instance

147

- embed_model: Optional[BaseEmbedding], embedding model instance

148

- node_parser: Optional[NodeParser], text parsing configuration

149

- text_splitter: Optional[TextSplitter], text splitting configuration

150

- transformations: Optional[List[TransformComponent]], transformation pipeline

151

- callback_manager: Optional[CallbackManager], callback management

152

"""

153

def __init__(

154

self,

155

llm: Optional[LLM] = None,

156

embed_model: Optional[BaseEmbedding] = None,

157

node_parser: Optional[NodeParser] = None,

158

text_splitter: Optional[TextSplitter] = None,

159

transformations: Optional[List[TransformComponent]] = None,

160

callback_manager: Optional[CallbackManager] = None,

161

**kwargs

162

): ...

163

164

@classmethod

165

def from_defaults(

166

cls,

167

llm: Optional[LLM] = None,

168

embed_model: Optional[BaseEmbedding] = None,

169

node_parser: Optional[NodeParser] = None,

170

text_splitter: Optional[TextSplitter] = None,

171

transformations: Optional[List[TransformComponent]] = None,

172

callback_manager: Optional[CallbackManager] = None,

173

chunk_size: Optional[int] = None,

174

chunk_overlap: Optional[int] = None,

175

context_window: Optional[int] = None,

176

num_output: Optional[int] = None,

177

**kwargs

178

) -> "ServiceContext":

179

"""

180

Create ServiceContext with default configurations.

181

182

Parameters:

183

- llm: Optional[LLM], language model

184

- embed_model: Optional[BaseEmbedding], embedding model

185

- node_parser: Optional[NodeParser], node parsing configuration

186

- text_splitter: Optional[TextSplitter], text splitting configuration

187

- transformations: Optional[List[TransformComponent]], transformations

188

- callback_manager: Optional[CallbackManager], callback system

189

- chunk_size: Optional[int], text chunk size

190

- chunk_overlap: Optional[int], chunk overlap size

191

- context_window: Optional[int], model context window

192

- num_output: Optional[int], output token limit

193

194

Returns:

195

- ServiceContext, configured service context

196

"""

197

```

198

199

### Global Configuration Functions

200

201

Utility functions for setting global configuration and managing system-wide settings.

202

203

```python { .api }

204

def set_global_service_context(service_context: ServiceContext) -> None:

205

"""

206

Set global service context for legacy compatibility.

207

208

Parameters:

209

- service_context: ServiceContext, service context to set globally

210

211

Note: This function is deprecated. Use Settings.configure() instead.

212

"""

213

214

def set_global_handler(handler: BaseCallbackHandler) -> None:

215

"""

216

Set global callback handler for system-wide event handling.

217

218

Parameters:

219

- handler: BaseCallbackHandler, callback handler to set globally

220

"""

221

222

def set_global_tokenizer(tokenizer: Callable[[str], List]) -> None:

223

"""

224

Set global tokenizer function for text processing.

225

226

Parameters:

227

- tokenizer: Callable[[str], List], tokenizer function

228

"""

229

230

def get_tokenizer() -> Callable[[str], List]:

231

"""

232

Get current global tokenizer function.

233

234

Returns:

235

- Callable[[str], List], current tokenizer function

236

"""

237

```

238

239

### Prompt Helper Configuration

240

241

Configuration utilities for prompt management and optimization.

242

243

```python { .api }

244

class PromptHelper:

245

"""

246

Helper for prompt management and token optimization.

247

248

Parameters:

249

- context_window: int, available context window size

250

- num_output: int, reserved tokens for output

251

- chunk_overlap_ratio: float, ratio of overlap between chunks

252

- chunk_size_limit: Optional[int], maximum chunk size

253

- tokenizer: Optional[Callable], tokenizer function for counting

254

"""

255

def __init__(

256

self,

257

context_window: int = 4096,

258

num_output: int = 256,

259

chunk_overlap_ratio: float = 0.1,

260

chunk_size_limit: Optional[int] = None,

261

tokenizer: Optional[Callable] = None,

262

**kwargs

263

): ...

264

265

def get_text_splitter_given_prompt(

266

self,

267

prompt: BasePromptTemplate,

268

num_chunks: int = 1,

269

padding: int = 5

270

) -> TokenTextSplitter:

271

"""

272

Get text splitter configured for specific prompt requirements.

273

274

Parameters:

275

- prompt: BasePromptTemplate, prompt template for sizing

276

- num_chunks: int, number of chunks to accommodate

277

- padding: int, safety padding for token count

278

279

Returns:

280

- TokenTextSplitter, configured text splitter

281

"""

282

283

def get_chunk_size_given_prompt(

284

self,

285

prompt: BasePromptTemplate,

286

num_chunks: int = 1,

287

padding: int = 5

288

) -> int:

289

"""

290

Calculate optimal chunk size for prompt and context window.

291

292

Parameters:

293

- prompt: BasePromptTemplate, prompt template

294

- num_chunks: int, number of chunks to fit

295

- padding: int, safety padding

296

297

Returns:

298

- int, optimal chunk size in tokens

299

"""

300

```

301

302

### Callback Management

303

304

System for managing callback handlers and event processing across the application.

305

306

```python { .api }

307

class CallbackManager:

308

"""

309

Manager for callback handlers and event processing.

310

311

Parameters:

312

- handlers: List[BaseCallbackHandler], list of callback handlers

313

"""

314

def __init__(self, handlers: Optional[List[BaseCallbackHandler]] = None): ...

315

316

def add_handler(self, handler: BaseCallbackHandler) -> None:

317

"""

318

Add callback handler to manager.

319

320

Parameters:

321

- handler: BaseCallbackHandler, handler to add

322

"""

323

324

def remove_handler(self, handler: BaseCallbackHandler) -> None:

325

"""

326

Remove callback handler from manager.

327

328

Parameters:

329

- handler: BaseCallbackHandler, handler to remove

330

"""

331

332

def set_handlers(self, handlers: List[BaseCallbackHandler]) -> None:

333

"""

334

Set list of callback handlers, replacing existing ones.

335

336

Parameters:

337

- handlers: List[BaseCallbackHandler], new handler list

338

"""

339

340

def on_event_start(

341

self,

342

event_type: CBEventType,

343

payload: Optional[Dict[str, Any]] = None,

344

event_id: str = "",

345

**kwargs: Any

346

) -> str:

347

"""

348

Handle event start callback.

349

350

Parameters:

351

- event_type: CBEventType, type of event starting

352

- payload: Optional[Dict[str, Any]], event payload data

353

- event_id: str, unique event identifier

354

355

Returns:

356

- str, event identifier for tracking

357

"""

358

359

def on_event_end(

360

self,

361

event_type: CBEventType,

362

payload: Optional[Dict[str, Any]] = None,

363

event_id: str = "",

364

**kwargs: Any

365

) -> None:

366

"""

367

Handle event end callback.

368

369

Parameters:

370

- event_type: CBEventType, type of event ending

371

- payload: Optional[Dict[str, Any]], event payload data

372

- event_id: str, event identifier

373

"""

374

375

class BaseCallbackHandler:

376

"""

377

Base interface for callback handler implementations.

378

379

Callback handlers receive and process events from LlamaIndex

380

operations for logging, monitoring, and debugging purposes.

381

"""

382

def on_event_start(

383

self,

384

event_type: CBEventType,

385

payload: Optional[Dict[str, Any]] = None,

386

event_id: str = "",

387

**kwargs: Any

388

) -> str:

389

"""Handle event start."""

390

391

def on_event_end(

392

self,

393

event_type: CBEventType,

394

payload: Optional[Dict[str, Any]] = None,

395

event_id: str = "",

396

**kwargs: Any

397

) -> None:

398

"""Handle event end."""

399

400

def start_trace(self, trace_id: Optional[str] = None) -> None:

401

"""Start execution trace."""

402

403

def end_trace(

404

self,

405

trace_id: Optional[str] = None,

406

trace_map: Optional[Dict[str, List[str]]] = None

407

) -> None:

408

"""End execution trace."""

409

410

class CBEventType(str, Enum):

411

"""Enumeration of callback event types."""

412

LLM = "llm"

413

CHUNKING = "chunking"

414

NODE_PARSING = "node_parsing"

415

EMBEDDING = "embedding"

416

QUERY = "query"

417

RETRIEVE = "retrieve"

418

SYNTHESIZE = "synthesize"

419

TREE = "tree"

420

SUB_QUESTION = "sub_question"

421

TEMPLATING = "templating"

422

FUNCTION_CALL = "function_call"

423

RERANKING = "reranking"

424

EXCEPTION = "exception"

425

```

426

427

## Usage Examples

428

429

### Basic Global Configuration

430

431

```python

432

from llama_index.core import Settings

433

from llama_index.core.llms import MockLLM

434

from llama_index.core.embeddings import MockEmbedding

435

436

# Configure global settings

437

Settings.configure(

438

llm=MockLLM(max_tokens=512),

439

embed_model=MockEmbedding(embed_dim=384),

440

chunk_size=512,

441

chunk_overlap=50,

442

context_window=2048,

443

num_output=256

444

)

445

446

# Settings are now available globally

447

print(f"Global LLM: {type(Settings.llm).__name__}")

448

print(f"Global embedding model: {type(Settings.embed_model).__name__}")

449

print(f"Chunk size: {Settings.chunk_size}")

450

print(f"Context window: {Settings.context_window}")

451

```

452

453

### Automatic Component Initialization

454

455

```python

456

# Clear any existing configuration

457

Settings.reset()

458

459

# Components are initialized automatically when accessed

460

llm = Settings.get_llm() # Creates MockLLM if none configured

461

embed_model = Settings.get_embed_model() # Creates MockEmbedding if none configured

462

463

print(f"Auto-initialized LLM: {type(llm).__name__}")

464

print(f"Auto-initialized embedding: {type(embed_model).__name__}")

465

```

466

467

### Using Settings with Index Creation

468

469

```python

470

from llama_index.core import VectorStoreIndex, Document

471

472

# Configure settings first

473

Settings.configure(

474

llm=MockLLM(),

475

embed_model=MockEmbedding(embed_dim=384),

476

chunk_size=256 # Smaller chunks for this example

477

)

478

479

# Create documents

480

documents = [

481

Document(text="Machine learning is a subset of artificial intelligence that focuses on algorithms."),

482

Document(text="Deep learning uses neural networks with multiple layers for complex pattern recognition."),

483

Document(text="Natural language processing enables computers to understand human language.")

484

]

485

486

# Index will use global settings automatically

487

index = VectorStoreIndex.from_documents(documents)

488

489

# Query engine inherits global LLM

490

query_engine = index.as_query_engine()

491

response = query_engine.query("What is machine learning?")

492

print(f"Response: {response.response}")

493

```

494

495

### Custom Callback Handler

496

497

```python

498

from llama_index.core.callbacks import BaseCallbackHandler, CBEventType

499

500

class CustomLoggingHandler(BaseCallbackHandler):

501

"""Custom callback handler for logging events."""

502

503

def __init__(self):

504

super().__init__()

505

self.events = []

506

507

def on_event_start(

508

self,

509

event_type: CBEventType,

510

payload: Optional[Dict[str, Any]] = None,

511

event_id: str = "",

512

**kwargs: Any

513

) -> str:

514

event_info = {

515

"event_type": event_type,

516

"event_id": event_id,

517

"stage": "start",

518

"payload": payload or {}

519

}

520

self.events.append(event_info)

521

print(f"Event started: {event_type} - {event_id}")

522

return event_id

523

524

def on_event_end(

525

self,

526

event_type: CBEventType,

527

payload: Optional[Dict[str, Any]] = None,

528

event_id: str = "",

529

**kwargs: Any

530

) -> None:

531

event_info = {

532

"event_type": event_type,

533

"event_id": event_id,

534

"stage": "end",

535

"payload": payload or {}

536

}

537

self.events.append(event_info)

538

print(f"Event ended: {event_type} - {event_id}")

539

540

# Create and configure callback handler

541

custom_handler = CustomLoggingHandler()

542

Settings.configure(callback_manager=CallbackManager([custom_handler]))

543

544

# Operations will now trigger callbacks

545

index = VectorStoreIndex.from_documents(documents)

546

query_engine = index.as_query_engine()

547

response = query_engine.query("Explain deep learning")

548

549

# Check logged events

550

print(f"\\nLogged {len(custom_handler.events)} events:")

551

for event in custom_handler.events:

552

print(f" {event['stage'].upper()}: {event['event_type']} - {event['event_id']}")

553

```

554

555

### Service Context (Legacy) Usage

556

557

```python

558

from llama_index.core import ServiceContext

559

560

# Create service context (legacy approach)

561

service_context = ServiceContext.from_defaults(

562

llm=MockLLM(),

563

embed_model=MockEmbedding(embed_dim=384),

564

chunk_size=512,

565

chunk_overlap=50

566

)

567

568

# Use with index creation

569

index_with_service_context = VectorStoreIndex.from_documents(

570

documents,

571

service_context=service_context

572

)

573

574

# Set globally (deprecated approach)

575

set_global_service_context(service_context)

576

```

577

578

### Transformation Pipeline Configuration

579

580

```python

581

from llama_index.core.node_parser import SentenceSplitter

582

from llama_index.core.extractors import TitleExtractor

583

584

# Configure transformation pipeline

585

transformations = [

586

SentenceSplitter(chunk_size=512, chunk_overlap=20),

587

TitleExtractor() # Example: extract titles as metadata

588

]

589

590

Settings.configure(

591

transformations=transformations,

592

llm=MockLLM(),

593

embed_model=MockEmbedding(embed_dim=384)

594

)

595

596

# Transformations will be applied automatically during indexing

597

index_with_transforms = VectorStoreIndex.from_documents(documents)

598

```

599

600

### Prompt Helper Configuration

601

602

```python

603

from llama_index.core.indices import PromptHelper

604

from llama_index.core.prompts import PromptTemplate

605

606

# Configure prompt helper for token optimization

607

prompt_helper = PromptHelper(

608

context_window=2048,

609

num_output=256,

610

chunk_overlap_ratio=0.1

611

)

612

613

# Create a sample prompt template

614

sample_prompt = PromptTemplate(

615

template="Context: {context_str}\\n\\nQuestion: {query_str}\\n\\nAnswer:"

616

)

617

618

# Get optimal chunk size for this prompt

619

optimal_chunk_size = prompt_helper.get_chunk_size_given_prompt(

620

prompt=sample_prompt,

621

num_chunks=3

622

)

623

624

print(f"Optimal chunk size: {optimal_chunk_size}")

625

626

# Get configured text splitter

627

text_splitter = prompt_helper.get_text_splitter_given_prompt(

628

prompt=sample_prompt,

629

num_chunks=3

630

)

631

632

print(f"Text splitter chunk size: {text_splitter.chunk_size}")

633

```

634

635

### Environment-Specific Configuration

636

637

```python

638

import os

639

640

def configure_for_environment():

641

"""Configure settings based on environment."""

642

643

env = os.getenv("LLAMAINDEX_ENV", "development")

644

645

if env == "production":

646

# Production configuration

647

Settings.configure(

648

llm=MockLLM(), # Would be real LLM in production

649

embed_model=MockEmbedding(embed_dim=1536),

650

chunk_size=1024,

651

chunk_overlap=100,

652

context_window=4096

653

)

654

print("Configured for production environment")

655

656

elif env == "development":

657

# Development configuration

658

Settings.configure(

659

llm=MockLLM(),

660

embed_model=MockEmbedding(embed_dim=384),

661

chunk_size=512,

662

chunk_overlap=50,

663

context_window=2048

664

)

665

print("Configured for development environment")

666

667

else:

668

# Test configuration

669

Settings.configure(

670

llm=MockLLM(),

671

embed_model=MockEmbedding(embed_dim=128),

672

chunk_size=256,

673

chunk_overlap=25,

674

context_window=1024

675

)

676

print("Configured for test environment")

677

678

# Configure based on environment

679

configure_for_environment()

680

```

681

682

### Settings State Management

683

684

```python

685

def save_settings_state():

686

"""Save current settings state."""

687

return {

688

"llm": Settings.llm,

689

"embed_model": Settings.embed_model,

690

"chunk_size": Settings.chunk_size,

691

"chunk_overlap": Settings.chunk_overlap,

692

"context_window": Settings.context_window,

693

"num_output": Settings.num_output

694

}

695

696

def restore_settings_state(state):

697

"""Restore settings from saved state."""

698

Settings.configure(

699

llm=state["llm"],

700

embed_model=state["embed_model"],

701

chunk_size=state["chunk_size"],

702

chunk_overlap=state["chunk_overlap"],

703

context_window=state["context_window"],

704

num_output=state["num_output"]

705

)

706

707

# Save current state

708

saved_state = save_settings_state()

709

710

# Modify settings for specific operation

711

Settings.configure(chunk_size=128)

712

print(f"Modified chunk size: {Settings.chunk_size}")

713

714

# Restore original state

715

restore_settings_state(saved_state)

716

print(f"Restored chunk size: {Settings.chunk_size}")

717

```

718

719

### Global Tokenizer Configuration

720

721

```python

722

def simple_tokenizer(text: str) -> List[str]:

723

"""Simple whitespace tokenizer."""

724

return text.split()

725

726

def advanced_tokenizer(text: str) -> List[str]:

727

"""More advanced tokenization (mock implementation)."""

728

import re

729

# Simple word tokenization with punctuation handling

730

tokens = re.findall(r'\\b\\w+\\b', text.lower())

731

return tokens

732

733

# Set global tokenizer

734

set_global_tokenizer(advanced_tokenizer)

735

736

# Get current tokenizer

737

current_tokenizer = get_tokenizer()

738

sample_text = "Hello, world! This is a test."

739

tokens = current_tokenizer(sample_text)

740

print(f"Tokenized '{sample_text}' into: {tokens}")

741

```

742

743

## Configuration Patterns

744

745

### Configuration Factory Pattern

746

747

```python

748

class ConfigurationFactory:

749

"""Factory for creating different configuration setups."""

750

751

@staticmethod

752

def create_development_config():

753

"""Create development configuration."""

754

return {

755

"llm": MockLLM(max_tokens=256),

756

"embed_model": MockEmbedding(embed_dim=384),

757

"chunk_size": 512,

758

"chunk_overlap": 50,

759

"context_window": 2048

760

}

761

762

@staticmethod

763

def create_production_config():

764

"""Create production configuration."""

765

return {

766

"llm": MockLLM(max_tokens=512), # Real LLM in actual production

767

"embed_model": MockEmbedding(embed_dim=1536),

768

"chunk_size": 1024,

769

"chunk_overlap": 100,

770

"context_window": 4096

771

}

772

773

@staticmethod

774

def create_memory_optimized_config():

775

"""Create memory-optimized configuration."""

776

return {

777

"llm": MockLLM(max_tokens=128),

778

"embed_model": MockEmbedding(embed_dim=256),

779

"chunk_size": 256,

780

"chunk_overlap": 25,

781

"context_window": 1024

782

}

783

784

# Use factory to configure

785

config = ConfigurationFactory.create_development_config()

786

Settings.configure(**config)

787

```

788

789

## Types & Constants

790

791

```python { .api }

792

# Default configuration values

793

DEFAULT_CHUNK_SIZE = 1024

794

DEFAULT_CHUNK_OVERLAP = 20

795

DEFAULT_CONTEXT_WINDOW = 4096

796

DEFAULT_NUM_OUTPUT = 256

797

798

# Component initialization

799

LAZY_INITIALIZATION = True

800

AUTO_FALLBACK_TO_MOCK = True

801

802

# Callback event payload structure

803

EventPayload = Dict[str, Any]

804

805

# Settings validation

806

VALIDATE_SETTINGS_ON_CONFIGURE = True

807

808

# Legacy support flags

809

SUPPORT_LEGACY_SERVICE_CONTEXT = True

810

DEPRECATION_WARNINGS_ENABLED = True

811

```