or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

agents-tools.mddocuments-nodes.mdevaluation.mdindex.mdindices.mdllms-embeddings.mdnode-parsers.mdpostprocessors.mdprompts.mdquery-engines.mdretrievers.mdsettings.mdstorage.md

indices.mddocs/

0

# Indices

1

2

Index structures for organizing and retrieving information from documents. LlamaIndex provides multiple index types optimized for different retrieval patterns, from semantic similarity search to keyword matching and hierarchical navigation.

3

4

## Capabilities

5

6

### Vector Store Index

7

8

Primary index type for semantic similarity search using vector embeddings. Stores document chunks as embeddings and retrieves relevant content based on query similarity.

9

10

```python { .api }

11

class VectorStoreIndex:

12

"""

13

Index that stores embeddings for semantic similarity retrieval.

14

15

Parameters:

16

- nodes: Optional[Sequence[BaseNode]], nodes to index

17

- embed_model: Optional[BaseEmbedding], embedding model to use

18

- storage_context: Optional[StorageContext], storage configuration

19

- service_context: Optional[ServiceContext], service configuration (deprecated)

20

- show_progress: bool, whether to show indexing progress

21

- store_nodes_override: bool, whether to store nodes in docstore

22

"""

23

def __init__(

24

self,

25

nodes: Optional[Sequence[BaseNode]] = None,

26

embed_model: Optional[BaseEmbedding] = None,

27

storage_context: Optional[StorageContext] = None,

28

service_context: Optional[ServiceContext] = None,

29

show_progress: bool = False,

30

store_nodes_override: bool = False,

31

**kwargs

32

): ...

33

34

@classmethod

35

def from_documents(

36

cls,

37

documents: Sequence[Document],

38

storage_context: Optional[StorageContext] = None,

39

service_context: Optional[ServiceContext] = None,

40

show_progress: bool = False,

41

**kwargs

42

) -> "VectorStoreIndex":

43

"""Create index from documents."""

44

45

def as_query_engine(

46

self,

47

retriever_mode: str = "default",

48

response_mode: str = "compact",

49

**kwargs

50

) -> BaseQueryEngine:

51

"""Convert to query engine."""

52

53

def as_retriever(

54

self,

55

retriever_mode: str = "default",

56

similarity_top_k: int = 10,

57

**kwargs

58

) -> BaseRetriever:

59

"""Convert to retriever."""

60

61

def as_chat_engine(

62

self,

63

chat_mode: str = "best",

64

**kwargs

65

) -> BaseChatEngine:

66

"""Convert to chat engine."""

67

68

def insert(self, document: Document, **kwargs) -> None:

69

"""Insert document into index."""

70

71

def insert_nodes(self, nodes: List[BaseNode], **kwargs) -> None:

72

"""Insert nodes into index."""

73

74

def delete_ref_doc(self, ref_doc_id: str, **kwargs) -> None:

75

"""Delete document from index."""

76

77

def update_ref_doc(self, document: Document, **kwargs) -> None:

78

"""Update document in index."""

79

```

80

81

### Summary Index

82

83

Simple index that stores all nodes sequentially, useful for small document collections or when comprehensive retrieval is needed.

84

85

```python { .api }

86

class SummaryIndex:

87

"""

88

Simple index storing all nodes for comprehensive retrieval.

89

90

Parameters:

91

- nodes: Optional[Sequence[BaseNode]], nodes to index

92

- storage_context: Optional[StorageContext], storage configuration

93

- service_context: Optional[ServiceContext], service configuration (deprecated)

94

- show_progress: bool, whether to show indexing progress

95

"""

96

def __init__(

97

self,

98

nodes: Optional[Sequence[BaseNode]] = None,

99

storage_context: Optional[StorageContext] = None,

100

service_context: Optional[ServiceContext] = None,

101

show_progress: bool = False,

102

**kwargs

103

): ...

104

105

@classmethod

106

def from_documents(

107

cls,

108

documents: Sequence[Document],

109

storage_context: Optional[StorageContext] = None,

110

service_context: Optional[ServiceContext] = None,

111

show_progress: bool = False,

112

**kwargs

113

) -> "SummaryIndex":

114

"""Create index from documents."""

115

116

def as_query_engine(self, **kwargs) -> BaseQueryEngine:

117

"""Convert to query engine."""

118

119

def as_retriever(self, **kwargs) -> BaseRetriever:

120

"""Convert to retriever."""

121

```

122

123

### Tree Index

124

125

Hierarchical index that organizes information in a tree structure, enabling top-down traversal and summarization at different levels.

126

127

```python { .api }

128

class TreeIndex:

129

"""

130

Hierarchical tree-based index for structured information organization.

131

132

Parameters:

133

- nodes: Optional[Sequence[BaseNode]], nodes to index

134

- num_children: int, branching factor for tree construction

135

- build_tree: bool, whether to build tree during initialization

136

- storage_context: Optional[StorageContext], storage configuration

137

- service_context: Optional[ServiceContext], service configuration (deprecated)

138

- show_progress: bool, whether to show indexing progress

139

"""

140

def __init__(

141

self,

142

nodes: Optional[Sequence[BaseNode]] = None,

143

num_children: int = 10,

144

build_tree: bool = True,

145

storage_context: Optional[StorageContext] = None,

146

service_context: Optional[ServiceContext] = None,

147

show_progress: bool = False,

148

**kwargs

149

): ...

150

151

@classmethod

152

def from_documents(

153

cls,

154

documents: Sequence[Document],

155

num_children: int = 10,

156

build_tree: bool = True,

157

storage_context: Optional[StorageContext] = None,

158

service_context: Optional[ServiceContext] = None,

159

show_progress: bool = False,

160

**kwargs

161

) -> "TreeIndex":

162

"""Create tree index from documents."""

163

164

def as_query_engine(self, **kwargs) -> BaseQueryEngine:

165

"""Convert to query engine."""

166

167

def as_retriever(self, **kwargs) -> BaseRetriever:

168

"""Convert to retriever."""

169

```

170

171

### Keyword Table Index

172

173

Index based on keyword extraction and matching, supporting various keyword extraction algorithms for precise term-based retrieval.

174

175

```python { .api }

176

class KeywordTableIndex:

177

"""

178

Index based on keyword extraction and matching.

179

180

Parameters:

181

- nodes: Optional[Sequence[BaseNode]], nodes to index

182

- storage_context: Optional[StorageContext], storage configuration

183

- service_context: Optional[ServiceContext], service configuration (deprecated)

184

- show_progress: bool, whether to show indexing progress

185

"""

186

def __init__(

187

self,

188

nodes: Optional[Sequence[BaseNode]] = None,

189

storage_context: Optional[StorageContext] = None,

190

service_context: Optional[ServiceContext] = None,

191

show_progress: bool = False,

192

**kwargs

193

): ...

194

195

@classmethod

196

def from_documents(

197

cls,

198

documents: Sequence[Document],

199

storage_context: Optional[StorageContext] = None,

200

service_context: Optional[ServiceContext] = None,

201

show_progress: bool = False,

202

**kwargs

203

) -> "KeywordTableIndex":

204

"""Create keyword index from documents."""

205

206

class SimpleKeywordTableIndex(KeywordTableIndex):

207

"""Simple keyword extraction using basic text processing."""

208

209

class RAKEKeywordTableIndex(KeywordTableIndex):

210

"""Keyword extraction using RAKE (Rapid Automatic Keyword Extraction) algorithm."""

211

```

212

213

### Knowledge Graph Index

214

215

Index that constructs and queries knowledge graphs from text, extracting entities and relationships for graph-based retrieval.

216

217

```python { .api }

218

class KnowledgeGraphIndex:

219

"""

220

Index that builds knowledge graphs from text for entity-relationship queries.

221

222

Parameters:

223

- nodes: Optional[Sequence[BaseNode]], nodes to index

224

- storage_context: Optional[StorageContext], storage configuration

225

- service_context: Optional[ServiceContext], service configuration (deprecated)

226

- max_triplets_per_chunk: int, maximum triplets to extract per chunk

227

- show_progress: bool, whether to show indexing progress

228

- include_embeddings: bool, whether to include embeddings

229

"""

230

def __init__(

231

self,

232

nodes: Optional[Sequence[BaseNode]] = None,

233

storage_context: Optional[StorageContext] = None,

234

service_context: Optional[ServiceContext] = None,

235

max_triplets_per_chunk: int = 10,

236

show_progress: bool = False,

237

include_embeddings: bool = True,

238

**kwargs

239

): ...

240

241

@classmethod

242

def from_documents(

243

cls,

244

documents: Sequence[Document],

245

storage_context: Optional[StorageContext] = None,

246

service_context: Optional[ServiceContext] = None,

247

max_triplets_per_chunk: int = 10,

248

show_progress: bool = False,

249

include_embeddings: bool = True,

250

**kwargs

251

) -> "KnowledgeGraphIndex":

252

"""Create knowledge graph index from documents."""

253

254

def as_query_engine(self, **kwargs) -> BaseQueryEngine:

255

"""Convert to query engine."""

256

257

def as_retriever(self, **kwargs) -> BaseRetriever:

258

"""Convert to retriever."""

259

```

260

261

### Property Graph Index

262

263

Advanced graph index supporting property graphs with typed nodes and relationships, enabling complex graph queries and traversal.

264

265

```python { .api }

266

class PropertyGraphIndex:

267

"""

268

Index supporting property graphs with typed nodes and relationships.

269

270

Parameters:

271

- nodes: Optional[Sequence[BaseNode]], nodes to index

272

- property_graph_store: Optional[PropertyGraphStore], graph store backend

273

- embed_kg_nodes: bool, whether to embed knowledge graph nodes

274

- storage_context: Optional[StorageContext], storage configuration

275

- service_context: Optional[ServiceContext], service configuration (deprecated)

276

- show_progress: bool, whether to show indexing progress

277

"""

278

def __init__(

279

self,

280

nodes: Optional[Sequence[BaseNode]] = None,

281

property_graph_store: Optional[PropertyGraphStore] = None,

282

embed_kg_nodes: bool = True,

283

storage_context: Optional[StorageContext] = None,

284

service_context: Optional[ServiceContext] = None,

285

show_progress: bool = False,

286

**kwargs

287

): ...

288

289

@classmethod

290

def from_documents(

291

cls,

292

documents: Sequence[Document],

293

property_graph_store: Optional[PropertyGraphStore] = None,

294

embed_kg_nodes: bool = True,

295

storage_context: Optional[StorageContext] = None,

296

service_context: Optional[ServiceContext] = None,

297

show_progress: bool = False,

298

**kwargs

299

) -> "PropertyGraphIndex":

300

"""Create property graph index from documents."""

301

302

def as_query_engine(self, **kwargs) -> BaseQueryEngine:

303

"""Convert to query engine."""

304

305

def as_retriever(self, **kwargs) -> BaseRetriever:

306

"""Convert to retriever."""

307

```

308

309

### Document Summary Index

310

311

Index that creates summaries for each document, enabling summary-based retrieval and hierarchical information access.

312

313

```python { .api }

314

class DocumentSummaryIndex:

315

"""

316

Index that creates summaries for documents to enable summary-based retrieval.

317

318

Parameters:

319

- nodes: Optional[Sequence[BaseNode]], nodes to index

320

- storage_context: Optional[StorageContext], storage configuration

321

- service_context: Optional[ServiceContext], service configuration (deprecated)

322

- response_synthesizer: Optional[BaseSynthesizer], synthesizer for summaries

323

- show_progress: bool, whether to show indexing progress

324

"""

325

def __init__(

326

self,

327

nodes: Optional[Sequence[BaseNode]] = None,

328

storage_context: Optional[StorageContext] = None,

329

service_context: Optional[ServiceContext] = None,

330

response_synthesizer: Optional[BaseSynthesizer] = None,

331

show_progress: bool = False,

332

**kwargs

333

): ...

334

335

@classmethod

336

def from_documents(

337

cls,

338

documents: Sequence[Document],

339

storage_context: Optional[StorageContext] = None,

340

service_context: Optional[ServiceContext] = None,

341

response_synthesizer: Optional[BaseSynthesizer] = None,

342

show_progress: bool = False,

343

**kwargs

344

) -> "DocumentSummaryIndex":

345

"""Create document summary index from documents."""

346

347

def as_query_engine(self, **kwargs) -> BaseQueryEngine:

348

"""Convert to query engine."""

349

350

def as_retriever(self, **kwargs) -> BaseRetriever:

351

"""Convert to retriever."""

352

```

353

354

### Composable Graph

355

356

Container for multiple indices that can be queried together, enabling complex multi-index retrieval strategies.

357

358

```python { .api }

359

class ComposableGraph:

360

"""

361

Container for multiple indices enabling composable queries.

362

363

Parameters:

364

- all_indices: Dict[str, BaseIndex], dictionary of index_id to index

365

- root_id: str, identifier of the root index

366

- storage_context: Optional[StorageContext], storage configuration

367

- service_context: Optional[ServiceContext], service configuration (deprecated)

368

"""

369

def __init__(

370

self,

371

all_indices: Dict[str, BaseIndex],

372

root_id: str,

373

storage_context: Optional[StorageContext] = None,

374

service_context: Optional[ServiceContext] = None,

375

**kwargs

376

): ...

377

378

def as_query_engine(

379

self,

380

custom_query_engines: Optional[Dict[str, BaseQueryEngine]] = None,

381

**kwargs

382

) -> BaseQueryEngine:

383

"""Convert to composable query engine."""

384

385

def as_retriever(

386

self,

387

custom_retrievers: Optional[Dict[str, BaseRetriever]] = None,

388

**kwargs

389

) -> BaseRetriever:

390

"""Convert to composable retriever."""

391

```

392

393

### Index Loading & Storage

394

395

Functions for persisting and loading indices from storage backends.

396

397

```python { .api }

398

def load_index_from_storage(

399

storage_context: StorageContext,

400

index_id: Optional[str] = None,

401

service_context: Optional[ServiceContext] = None,

402

**kwargs

403

) -> BaseIndex:

404

"""

405

Load index from storage context.

406

407

Parameters:

408

- storage_context: StorageContext, storage configuration

409

- index_id: Optional[str], specific index to load

410

- service_context: Optional[ServiceContext], service configuration

411

412

Returns:

413

BaseIndex: The loaded index

414

"""

415

416

def load_indices_from_storage(

417

storage_context: StorageContext,

418

index_ids: Optional[Sequence[str]] = None,

419

service_context: Optional[ServiceContext] = None,

420

**kwargs

421

) -> List[BaseIndex]:

422

"""

423

Load multiple indices from storage context.

424

425

Parameters:

426

- storage_context: StorageContext, storage configuration

427

- index_ids: Optional[Sequence[str]], specific indices to load

428

- service_context: Optional[ServiceContext], service configuration

429

430

Returns:

431

List[BaseIndex]: List of loaded indices

432

"""

433

434

def load_graph_from_storage(

435

storage_context: StorageContext,

436

root_id: str,

437

service_context: Optional[ServiceContext] = None,

438

**kwargs

439

) -> ComposableGraph:

440

"""

441

Load composable graph from storage context.

442

443

Parameters:

444

- storage_context: StorageContext, storage configuration

445

- root_id: str, root index identifier

446

- service_context: Optional[ServiceContext], service configuration

447

448

Returns:

449

ComposableGraph: The loaded composable graph

450

"""

451

```

452

453

### Multi-Modal Vector Store Index

454

455

Specialized vector index supporting multi-modal content including text, images, and other media types.

456

457

```python { .api }

458

class MultiModalVectorStoreIndex(VectorStoreIndex):

459

"""

460

Vector store index supporting multi-modal content (text, images, etc.).

461

462

Parameters:

463

- nodes: Optional[Sequence[BaseNode]], nodes to index

464

- embed_model: Optional[MultiModalEmbedding], multi-modal embedding model

465

- storage_context: Optional[StorageContext], storage configuration

466

- service_context: Optional[ServiceContext], service configuration (deprecated)

467

- show_progress: bool, whether to show indexing progress

468

"""

469

def __init__(

470

self,

471

nodes: Optional[Sequence[BaseNode]] = None,

472

embed_model: Optional[MultiModalEmbedding] = None,

473

storage_context: Optional[StorageContext] = None,

474

service_context: Optional[ServiceContext] = None,

475

show_progress: bool = False,

476

**kwargs

477

): ...

478

```

479

480

## Legacy Aliases

481

482

For backward compatibility, GPT-prefixed aliases are available for all index types:

483

484

```python { .api }

485

# Legacy aliases (deprecated, use non-GPT versions)

486

GPTVectorStoreIndex = VectorStoreIndex

487

GPTListIndex = SummaryIndex

488

GPTTreeIndex = TreeIndex

489

GPTKeywordTableIndex = KeywordTableIndex

490

GPTSimpleKeywordTableIndex = SimpleKeywordTableIndex

491

GPTRAKEKeywordTableIndex = RAKEKeywordTableIndex

492

GPTDocumentSummaryIndex = DocumentSummaryIndex

493

```

494

495

## Usage Examples

496

497

### Creating a Vector Store Index

498

499

```python

500

from llama_index.core import VectorStoreIndex, Document, Settings

501

from llama_index.core.embeddings import MockEmbedding

502

503

# Configure embedding model

504

Settings.embed_model = MockEmbedding(embed_dim=384)

505

506

# Create documents

507

documents = [

508

Document(text="Introduction to machine learning and artificial intelligence."),

509

Document(text="Deep learning techniques for computer vision applications."),

510

Document(text="Natural language processing with transformer models.")

511

]

512

513

# Create vector store index

514

index = VectorStoreIndex.from_documents(documents, show_progress=True)

515

516

# Query the index

517

query_engine = index.as_query_engine()

518

response = query_engine.query("What is machine learning?")

519

print(response.response)

520

521

# Use as retriever

522

retriever = index.as_retriever(similarity_top_k=2)

523

nodes = retriever.retrieve("deep learning")

524

for node in nodes:

525

print(f"Score: {node.score:.3f}, Text: {node.text}")

526

```

527

528

### Working with Multiple Index Types

529

530

```python

531

from llama_index.core import (

532

VectorStoreIndex,

533

TreeIndex,

534

KeywordTableIndex,

535

ComposableGraph

536

)

537

538

# Create different index types

539

vector_index = VectorStoreIndex.from_documents(documents)

540

tree_index = TreeIndex.from_documents(documents)

541

keyword_index = KeywordTableIndex.from_documents(documents)

542

543

# Create composable graph

544

graph = ComposableGraph(

545

all_indices={

546

"vector": vector_index,

547

"tree": tree_index,

548

"keyword": keyword_index

549

},

550

root_id="vector"

551

)

552

553

# Query the composable graph

554

query_engine = graph.as_query_engine()

555

response = query_engine.query("Compare machine learning approaches")

556

```

557

558

### Persisting and Loading Indices

559

560

```python

561

from llama_index.core import StorageContext, load_index_from_storage

562

563

# Create index with storage context

564

storage_context = StorageContext.from_defaults(persist_dir="./storage")

565

index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)

566

567

# Persist index

568

index.storage_context.persist()

569

570

# Load index later

571

storage_context = StorageContext.from_defaults(persist_dir="./storage")

572

loaded_index = load_index_from_storage(storage_context)

573

```

574

575

## Types & Enums

576

577

```python { .api }

578

class IndexStructType(str, Enum):

579

"""Types of index structures."""

580

TREE = "tree"

581

LIST = "list"

582

KEYWORD_TABLE = "keyword_table"

583

VECTOR_STORE = "vector_store"

584

DOCUMENT_SUMMARY = "document_summary"

585

KNOWLEDGE_GRAPH = "kg"

586

PROPERTY_GRAPH = "property_graph"

587

EMPTY = "empty"

588

```