or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

agents-workflows.mddata-indexing.mddocument-processing.mdindex.mdllm-integration.mdprompts.mdquery-processing.mdresponse-synthesis.mdretrievers.mdstorage-settings.md

storage-settings.mddocs/

0

# Storage & Settings

1

2

Storage backends and global configuration for persisting indices, managing contexts, and configuring system-wide settings for LlamaIndex applications.

3

4

## Capabilities

5

6

### Storage Context

7

8

Central storage management for indices, documents, and metadata with support for various storage backends and persistence options.

9

10

```python { .api }

11

class StorageContext:

12

"""

13

Storage context for managing index persistence and document storage.

14

15

Args:

16

docstore: Document storage backend

17

index_store: Index metadata storage

18

vector_stores: Vector storage backends

19

graph_store: Graph storage backend

20

property_graph_store: Property graph storage

21

**kwargs: Additional storage configurations

22

"""

23

def __init__(

24

self,

25

docstore=None,

26

index_store=None,

27

vector_stores=None,

28

graph_store=None,

29

property_graph_store=None,

30

**kwargs

31

): ...

32

33

@classmethod

34

def from_defaults(

35

cls,

36

persist_dir=None,

37

docstore=None,

38

index_store=None,

39

vector_store=None,

40

graph_store=None,

41

**kwargs

42

):

43

"""

44

Create storage context with default configurations.

45

46

Args:

47

persist_dir: Directory for persistent storage

48

docstore: Custom document store

49

index_store: Custom index store

50

vector_store: Custom vector store

51

graph_store: Custom graph store

52

53

Returns:

54

StorageContext: Configured storage context

55

"""

56

57

def persist(self, persist_dir=None, **kwargs):

58

"""

59

Persist all storage components to disk.

60

61

Args:

62

persist_dir: Target directory for persistence

63

"""

64

65

@property

66

def vector_store(self):

67

"""Default vector store instance."""

68

69

def add_vector_store(self, vector_store, namespace=None):

70

"""Add additional vector store with optional namespace."""

71

```

72

73

**Usage Example:**

74

75

```python

76

from llama_index.core import StorageContext, VectorStoreIndex, SimpleDirectoryReader

77

from llama_index.vector_stores.chroma import ChromaVectorStore

78

import chromadb

79

80

# Default file-based storage

81

storage_context = StorageContext.from_defaults(persist_dir="./storage")

82

83

# Custom vector store

84

chroma_client = chromadb.Client()

85

chroma_collection = chroma_client.create_collection("my_collection")

86

vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

87

88

storage_context = StorageContext.from_defaults(

89

vector_store=vector_store,

90

persist_dir="./custom_storage"

91

)

92

93

# Create index with custom storage

94

documents = SimpleDirectoryReader("data").load_data()

95

index = VectorStoreIndex.from_documents(

96

documents,

97

storage_context=storage_context

98

)

99

100

# Persist to disk

101

storage_context.persist()

102

103

# Load from persisted storage

104

storage_context = StorageContext.from_defaults(persist_dir="./storage")

105

index = load_index_from_storage(storage_context)

106

```

107

108

### Global Settings

109

110

System-wide configuration for LLM, embedding models, and other core components with dynamic reconfiguration support.

111

112

```python { .api }

113

class Settings:

114

"""

115

Global settings singleton for LlamaIndex configuration.

116

117

Attributes:

118

llm: Default language model instance

119

embed_model: Default embedding model instance

120

node_parser: Default node parser for document chunking

121

transformations: List of document transformations

122

chunk_size: Default chunk size for text splitting

123

chunk_overlap: Default overlap between chunks

124

callback_manager: Global callback manager

125

tokenizer: Tokenizer function for token counting

126

"""

127

# Core model settings

128

llm: LLM = None

129

embed_model: BaseEmbedding = None

130

131

# Document processing settings

132

node_parser: NodeParser = None

133

transformations: List[TransformComponent] = None

134

135

# Chunking parameters

136

chunk_size: int = 1024

137

chunk_overlap: int = 200

138

139

# System components

140

callback_manager: CallbackManager = None

141

tokenizer: Callable[[str], List] = None

142

143

@classmethod

144

def from_defaults(

145

cls,

146

llm=None,

147

embed_model=None,

148

node_parser=None,

149

chunk_size=None,

150

chunk_overlap=None,

151

**kwargs

152

):

153

"""Configure settings with default values."""

154

155

@staticmethod

156

def reset():

157

"""Reset all settings to default values."""

158

159

@staticmethod

160

def configure(**kwargs):

161

"""Configure multiple settings at once."""

162

```

163

164

**Usage Example:**

165

166

```python

167

from llama_index.core import Settings

168

from llama_index.llms.openai import OpenAI

169

from llama_index.embeddings.openai import OpenAIEmbedding

170

171

# Configure global settings

172

Settings.llm = OpenAI(model="gpt-4", temperature=0.1)

173

Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")

174

Settings.chunk_size = 512

175

Settings.chunk_overlap = 50

176

177

# All indices and operations will use these settings by default

178

index = VectorStoreIndex.from_documents(documents)

179

180

# Temporary override for specific operations

181

with Settings.context(llm=OpenAI(model="gpt-3.5-turbo")):

182

query_engine = index.as_query_engine()

183

response = query_engine.query("What is this about?")

184

185

# Reset to defaults

186

Settings.reset()

187

```

188

189

### Document Storage

190

191

Document storage backends for managing raw document content with metadata and efficient retrieval.

192

193

```python { .api }

194

class BaseDocumentStore:

195

"""Base class for document storage backends."""

196

197

def add_documents(self, docs, allow_update=True):

198

"""Add documents to storage."""

199

200

def get_document(self, doc_id, raise_error=True):

201

"""Retrieve document by ID."""

202

203

def delete_document(self, doc_id, raise_error=True):

204

"""Delete document by ID."""

205

206

def document_exists(self, doc_id):

207

"""Check if document exists."""

208

209

class SimpleDocumentStore(BaseDocumentStore):

210

"""

211

In-memory document store with optional file persistence.

212

213

Args:

214

simple_file_store: File store for persistence

215

"""

216

def __init__(self, simple_file_store=None): ...

217

218

def persist(self, persist_path=None):

219

"""Persist document store to file."""

220

221

@classmethod

222

def from_persist_path(cls, persist_path):

223

"""Load document store from file."""

224

225

class MongoDocumentStore(BaseDocumentStore):

226

"""

227

MongoDB-based document store.

228

229

Args:

230

mongo_client: MongoDB client instance

231

db_name: Database name

232

collection_name: Collection name

233

"""

234

def __init__(self, mongo_client, db_name="llama_index", collection_name="documents"): ...

235

```

236

237

### Index Storage

238

239

Index metadata storage for managing index structures, mappings, and retrieval metadata.

240

241

```python { .api }

242

class BaseIndexStore:

243

"""Base class for index storage backends."""

244

245

def add_index_struct(self, index_struct):

246

"""Add index structure to storage."""

247

248

def delete_index_struct(self, key):

249

"""Delete index structure."""

250

251

def get_index_struct(self, struct_id=None):

252

"""Get index structure by ID."""

253

254

class SimpleIndexStore(BaseIndexStore):

255

"""

256

Simple file-based index store.

257

258

Args:

259

simple_file_store: File store for persistence

260

"""

261

def __init__(self, simple_file_store=None): ...

262

263

def persist(self, persist_path=None):

264

"""Persist index store to file."""

265

266

@classmethod

267

def from_persist_path(cls, persist_path):

268

"""Load index store from file."""

269

```

270

271

### Vector Storage

272

273

Vector storage backends for embedding storage and similarity search with support for various vector databases.

274

275

```python { .api }

276

class VectorStore:

277

"""Base class for vector storage backends."""

278

279

def add(self, nodes, **kwargs):

280

"""Add nodes with embeddings to vector store."""

281

282

def delete(self, ref_doc_id, **kwargs):

283

"""Delete vectors by document reference ID."""

284

285

def query(self, query, **kwargs):

286

"""Query for similar vectors."""

287

288

def persist(self, persist_path=None, **kwargs):

289

"""Persist vector store if supported."""

290

291

class SimpleVectorStore(VectorStore):

292

"""

293

Simple in-memory vector store with file persistence.

294

295

Args:

296

simple_file_store: File store for persistence

297

"""

298

def __init__(self, simple_file_store=None): ...

299

300

# Integration vector stores

301

class ChromaVectorStore(VectorStore):

302

"""Chroma vector database integration."""

303

304

class PineconeVectorStore(VectorStore):

305

"""Pinecone vector database integration."""

306

307

class WeaviateVectorStore(VectorStore):

308

"""Weaviate vector database integration."""

309

310

class QdrantVectorStore(VectorStore):

311

"""Qdrant vector database integration."""

312

```

313

314

**Vector Store Usage Example:**

315

316

```python

317

from llama_index.vector_stores.chroma import ChromaVectorStore

318

from llama_index.vector_stores.pinecone import PineconeVectorStore

319

import chromadb

320

import pinecone

321

322

# Chroma setup

323

chroma_client = chromadb.Client()

324

chroma_collection = chroma_client.create_collection("my_docs")

325

chroma_vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

326

327

# Pinecone setup

328

pinecone.init(api_key="your-key", environment="your-env")

329

pinecone_index = pinecone.Index("my-index")

330

pinecone_vector_store = PineconeVectorStore(pinecone_index=pinecone_index)

331

332

# Use with storage context

333

storage_context = StorageContext.from_defaults(

334

vector_store=chroma_vector_store # or pinecone_vector_store

335

)

336

337

index = VectorStoreIndex.from_documents(

338

documents,

339

storage_context=storage_context

340

)

341

```

342

343

### Graph Storage

344

345

Graph storage backends for knowledge graphs and property graphs with support for various graph databases.

346

347

```python { .api }

348

class GraphStore:

349

"""Base class for graph storage backends."""

350

351

def upsert_triplet(self, subj, pred, obj):

352

"""Insert or update a knowledge triplet."""

353

354

def delete_triplet(self, subj, pred, obj):

355

"""Delete a knowledge triplet."""

356

357

def get_triplets(self, subj=None):

358

"""Get triplets by subject."""

359

360

def get_rel_map(self, subjs=None):

361

"""Get relationship map for subjects."""

362

363

class SimpleGraphStore(GraphStore):

364

"""

365

Simple in-memory graph store.

366

367

Args:

368

simple_file_store: File store for persistence

369

"""

370

def __init__(self, simple_file_store=None): ...

371

372

def persist(self, persist_path=None):

373

"""Persist graph store to file."""

374

375

class Neo4jGraphStore(GraphStore):

376

"""

377

Neo4j graph database integration.

378

379

Args:

380

url: Neo4j database URL

381

username: Database username

382

password: Database password

383

database: Database name

384

"""

385

def __init__(self, url, username, password, database="neo4j"): ...

386

387

class PropertyGraphStore:

388

"""Base class for property graph storage."""

389

390

def upsert_nodes(self, nodes):

391

"""Insert or update graph nodes."""

392

393

def upsert_relations(self, relations):

394

"""Insert or update graph relations."""

395

396

def delete(self, ids):

397

"""Delete nodes and relations by IDs."""

398

399

def structured_query(self, query, **kwargs):

400

"""Execute structured query against graph."""

401

```

402

403

### Persistence and Loading

404

405

Utility functions for saving and loading indices with storage context management.

406

407

```python { .api }

408

def load_index_from_storage(

409

storage_context,

410

index_id=None,

411

**kwargs

412

):

413

"""

414

Load index from storage context.

415

416

Args:

417

storage_context: Storage context with persisted data

418

index_id: Specific index ID to load

419

420

Returns:

421

BaseIndex: Loaded index instance

422

"""

423

424

def load_indices_from_storage(

425

storage_context,

426

index_ids=None,

427

**kwargs

428

):

429

"""

430

Load multiple indices from storage.

431

432

Args:

433

storage_context: Storage context

434

index_ids: List of index IDs to load

435

436

Returns:

437

Dict[str, BaseIndex]: Dictionary of loaded indices

438

"""

439

440

def load_graph_from_storage(

441

storage_context,

442

root_id=None,

443

**kwargs

444

):

445

"""

446

Load composable graph from storage.

447

448

Args:

449

storage_context: Storage context

450

root_id: Root graph node ID

451

452

Returns:

453

ComposableGraph: Loaded graph structure

454

"""

455

```

456

457

**Persistence Usage Example:**

458

459

```python

460

from llama_index.core import (

461

StorageContext,

462

VectorStoreIndex,

463

load_index_from_storage,

464

load_indices_from_storage

465

)

466

467

# Create and persist index

468

documents = SimpleDirectoryReader("data").load_data()

469

index = VectorStoreIndex.from_documents(documents)

470

471

# Persist to default location

472

index.storage_context.persist()

473

474

# Persist to custom location

475

index.storage_context.persist(persist_dir="./my_storage")

476

477

# Load from storage

478

storage_context = StorageContext.from_defaults(persist_dir="./my_storage")

479

loaded_index = load_index_from_storage(storage_context)

480

481

# Load multiple indices

482

storage_context = StorageContext.from_defaults(persist_dir="./multi_storage")

483

indices = load_indices_from_storage(storage_context)

484

485

print(f"Loaded {len(indices)} indices")

486

for index_id, index in indices.items():

487

print(f"Index {index_id}: {type(index)}")

488

```

489

490

### Service Context (Legacy)

491

492

Legacy service context for backwards compatibility with older LlamaIndex versions.

493

494

```python { .api }

495

class ServiceContext:

496

"""

497

Legacy service context for backwards compatibility.

498

499

Note: Deprecated in favor of Settings class.

500

501

Args:

502

llm: Language model instance

503

embed_model: Embedding model instance

504

node_parser: Node parser for chunking

505

transformations: Document transformations

506

**kwargs: Additional service configurations

507

"""

508

def __init__(

509

self,

510

llm=None,

511

embed_model=None,

512

node_parser=None,

513

transformations=None,

514

**kwargs

515

): ...

516

517

@classmethod

518

def from_defaults(

519

cls,

520

llm=None,

521

embed_model=None,

522

chunk_size=None,

523

chunk_overlap=None,

524

**kwargs

525

):

526

"""Create service context with defaults."""

527

528

def set_global_service_context(service_context):

529

"""Set global service context (deprecated)."""

530

```

531

532

### Configuration Management

533

534

Advanced configuration patterns for complex deployments and environment management.

535

536

```python { .api }

537

class SettingsContext:

538

"""Context manager for temporary settings changes."""

539

540

def __init__(self, **kwargs):

541

"""Initialize with temporary settings."""

542

543

def __enter__(self):

544

"""Apply temporary settings."""

545

546

def __exit__(self, exc_type, exc_val, exc_tb):

547

"""Restore original settings."""

548

549

# Context manager usage

550

with Settings.context(

551

llm=OpenAI(model="gpt-3.5-turbo"),

552

chunk_size=256

553

):

554

# Temporary settings active here

555

index = VectorStoreIndex.from_documents(documents)

556

557

# Original settings restored automatically

558

```

559

560

### Environment Configuration

561

562

Environment-based configuration for deployment across different environments.

563

564

**Environment Variables:**

565

566

```python

567

# Common environment variables

568

OPENAI_API_KEY = "your-openai-key"

569

LLAMA_INDEX_CACHE_DIR = "./cache"

570

LLAMA_INDEX_GLOBAL_HANDLER = "wandb" # or "simple", "arize", etc.

571

572

# Vector store configurations

573

PINECONE_API_KEY = "your-pinecone-key"

574

PINECONE_ENVIRONMENT = "your-environment"

575

CHROMA_HOST = "localhost"

576

CHROMA_PORT = "8000"

577

578

# Graph database configurations

579

NEO4J_URL = "bolt://localhost:7687"

580

NEO4J_USERNAME = "neo4j"

581

NEO4J_PASSWORD = "password"

582

```

583

584

**Configuration Loading:**

585

586

```python

587

import os

588

from llama_index.core import Settings

589

from llama_index.llms.openai import OpenAI

590

from llama_index.embeddings.openai import OpenAIEmbedding

591

592

# Load from environment

593

Settings.llm = OpenAI(

594

api_key=os.getenv("OPENAI_API_KEY"),

595

model=os.getenv("OPENAI_MODEL", "gpt-3.5-turbo")

596

)

597

598

Settings.embed_model = OpenAIEmbedding(

599

api_key=os.getenv("OPENAI_API_KEY"),

600

model=os.getenv("OPENAI_EMBEDDING_MODEL", "text-embedding-ada-002")

601

)

602

603

# Cache directory

604

cache_dir = os.getenv("LLAMA_INDEX_CACHE_DIR", "./cache")

605

```