0
# Indices
1
2
Index structures for organizing and retrieving information from documents. LlamaIndex provides multiple index types optimized for different retrieval patterns, from semantic similarity search to keyword matching and hierarchical navigation.
3
4
## Capabilities
5
6
### Vector Store Index
7
8
Primary index type for semantic similarity search using vector embeddings. Stores document chunks as embeddings and retrieves relevant content based on query similarity.
9
10
```python { .api }
11
class VectorStoreIndex:
12
"""
13
Index that stores embeddings for semantic similarity retrieval.
14
15
Parameters:
16
- nodes: Optional[Sequence[BaseNode]], nodes to index
17
- embed_model: Optional[BaseEmbedding], embedding model to use
18
- storage_context: Optional[StorageContext], storage configuration
19
- service_context: Optional[ServiceContext], service configuration (deprecated)
20
- show_progress: bool, whether to show indexing progress
21
- store_nodes_override: bool, whether to store nodes in docstore
22
"""
23
def __init__(
24
self,
25
nodes: Optional[Sequence[BaseNode]] = None,
26
embed_model: Optional[BaseEmbedding] = None,
27
storage_context: Optional[StorageContext] = None,
28
service_context: Optional[ServiceContext] = None,
29
show_progress: bool = False,
30
store_nodes_override: bool = False,
31
**kwargs
32
): ...
33
34
@classmethod
35
def from_documents(
36
cls,
37
documents: Sequence[Document],
38
storage_context: Optional[StorageContext] = None,
39
service_context: Optional[ServiceContext] = None,
40
show_progress: bool = False,
41
**kwargs
42
) -> "VectorStoreIndex":
43
"""Create index from documents."""
44
45
def as_query_engine(
46
self,
47
retriever_mode: str = "default",
48
response_mode: str = "compact",
49
**kwargs
50
) -> BaseQueryEngine:
51
"""Convert to query engine."""
52
53
def as_retriever(
54
self,
55
retriever_mode: str = "default",
56
similarity_top_k: int = 10,
57
**kwargs
58
) -> BaseRetriever:
59
"""Convert to retriever."""
60
61
def as_chat_engine(
62
self,
63
chat_mode: str = "best",
64
**kwargs
65
) -> BaseChatEngine:
66
"""Convert to chat engine."""
67
68
def insert(self, document: Document, **kwargs) -> None:
69
"""Insert document into index."""
70
71
def insert_nodes(self, nodes: List[BaseNode], **kwargs) -> None:
72
"""Insert nodes into index."""
73
74
def delete_ref_doc(self, ref_doc_id: str, **kwargs) -> None:
75
"""Delete document from index."""
76
77
def update_ref_doc(self, document: Document, **kwargs) -> None:
78
"""Update document in index."""
79
```
80
81
### Summary Index
82
83
Simple index that stores all nodes sequentially, useful for small document collections or when comprehensive retrieval is needed.
84
85
```python { .api }
86
class SummaryIndex:
87
"""
88
Simple index storing all nodes for comprehensive retrieval.
89
90
Parameters:
91
- nodes: Optional[Sequence[BaseNode]], nodes to index
92
- storage_context: Optional[StorageContext], storage configuration
93
- service_context: Optional[ServiceContext], service configuration (deprecated)
94
- show_progress: bool, whether to show indexing progress
95
"""
96
def __init__(
97
self,
98
nodes: Optional[Sequence[BaseNode]] = None,
99
storage_context: Optional[StorageContext] = None,
100
service_context: Optional[ServiceContext] = None,
101
show_progress: bool = False,
102
**kwargs
103
): ...
104
105
@classmethod
106
def from_documents(
107
cls,
108
documents: Sequence[Document],
109
storage_context: Optional[StorageContext] = None,
110
service_context: Optional[ServiceContext] = None,
111
show_progress: bool = False,
112
**kwargs
113
) -> "SummaryIndex":
114
"""Create index from documents."""
115
116
def as_query_engine(self, **kwargs) -> BaseQueryEngine:
117
"""Convert to query engine."""
118
119
def as_retriever(self, **kwargs) -> BaseRetriever:
120
"""Convert to retriever."""
121
```
122
123
### Tree Index
124
125
Hierarchical index that organizes information in a tree structure, enabling top-down traversal and summarization at different levels.
126
127
```python { .api }
128
class TreeIndex:
129
"""
130
Hierarchical tree-based index for structured information organization.
131
132
Parameters:
133
- nodes: Optional[Sequence[BaseNode]], nodes to index
134
- num_children: int, branching factor for tree construction
135
- build_tree: bool, whether to build tree during initialization
136
- storage_context: Optional[StorageContext], storage configuration
137
- service_context: Optional[ServiceContext], service configuration (deprecated)
138
- show_progress: bool, whether to show indexing progress
139
"""
140
def __init__(
141
self,
142
nodes: Optional[Sequence[BaseNode]] = None,
143
num_children: int = 10,
144
build_tree: bool = True,
145
storage_context: Optional[StorageContext] = None,
146
service_context: Optional[ServiceContext] = None,
147
show_progress: bool = False,
148
**kwargs
149
): ...
150
151
@classmethod
152
def from_documents(
153
cls,
154
documents: Sequence[Document],
155
num_children: int = 10,
156
build_tree: bool = True,
157
storage_context: Optional[StorageContext] = None,
158
service_context: Optional[ServiceContext] = None,
159
show_progress: bool = False,
160
**kwargs
161
) -> "TreeIndex":
162
"""Create tree index from documents."""
163
164
def as_query_engine(self, **kwargs) -> BaseQueryEngine:
165
"""Convert to query engine."""
166
167
def as_retriever(self, **kwargs) -> BaseRetriever:
168
"""Convert to retriever."""
169
```
170
171
### Keyword Table Index
172
173
Index based on keyword extraction and matching, supporting various keyword extraction algorithms for precise term-based retrieval.
174
175
```python { .api }
176
class KeywordTableIndex:
177
"""
178
Index based on keyword extraction and matching.
179
180
Parameters:
181
- nodes: Optional[Sequence[BaseNode]], nodes to index
182
- storage_context: Optional[StorageContext], storage configuration
183
- service_context: Optional[ServiceContext], service configuration (deprecated)
184
- show_progress: bool, whether to show indexing progress
185
"""
186
def __init__(
187
self,
188
nodes: Optional[Sequence[BaseNode]] = None,
189
storage_context: Optional[StorageContext] = None,
190
service_context: Optional[ServiceContext] = None,
191
show_progress: bool = False,
192
**kwargs
193
): ...
194
195
@classmethod
196
def from_documents(
197
cls,
198
documents: Sequence[Document],
199
storage_context: Optional[StorageContext] = None,
200
service_context: Optional[ServiceContext] = None,
201
show_progress: bool = False,
202
**kwargs
203
) -> "KeywordTableIndex":
204
"""Create keyword index from documents."""
205
206
class SimpleKeywordTableIndex(KeywordTableIndex):
207
"""Simple keyword extraction using basic text processing."""
208
209
class RAKEKeywordTableIndex(KeywordTableIndex):
210
"""Keyword extraction using RAKE (Rapid Automatic Keyword Extraction) algorithm."""
211
```
212
213
### Knowledge Graph Index
214
215
Index that constructs and queries knowledge graphs from text, extracting entities and relationships for graph-based retrieval.
216
217
```python { .api }
218
class KnowledgeGraphIndex:
219
"""
220
Index that builds knowledge graphs from text for entity-relationship queries.
221
222
Parameters:
223
- nodes: Optional[Sequence[BaseNode]], nodes to index
224
- storage_context: Optional[StorageContext], storage configuration
225
- service_context: Optional[ServiceContext], service configuration (deprecated)
226
- max_triplets_per_chunk: int, maximum triplets to extract per chunk
227
- show_progress: bool, whether to show indexing progress
228
- include_embeddings: bool, whether to include embeddings
229
"""
230
def __init__(
231
self,
232
nodes: Optional[Sequence[BaseNode]] = None,
233
storage_context: Optional[StorageContext] = None,
234
service_context: Optional[ServiceContext] = None,
235
max_triplets_per_chunk: int = 10,
236
show_progress: bool = False,
237
include_embeddings: bool = True,
238
**kwargs
239
): ...
240
241
@classmethod
242
def from_documents(
243
cls,
244
documents: Sequence[Document],
245
storage_context: Optional[StorageContext] = None,
246
service_context: Optional[ServiceContext] = None,
247
max_triplets_per_chunk: int = 10,
248
show_progress: bool = False,
249
include_embeddings: bool = True,
250
**kwargs
251
) -> "KnowledgeGraphIndex":
252
"""Create knowledge graph index from documents."""
253
254
def as_query_engine(self, **kwargs) -> BaseQueryEngine:
255
"""Convert to query engine."""
256
257
def as_retriever(self, **kwargs) -> BaseRetriever:
258
"""Convert to retriever."""
259
```
260
261
### Property Graph Index
262
263
Advanced graph index supporting property graphs with typed nodes and relationships, enabling complex graph queries and traversal.
264
265
```python { .api }
266
class PropertyGraphIndex:
267
"""
268
Index supporting property graphs with typed nodes and relationships.
269
270
Parameters:
271
- nodes: Optional[Sequence[BaseNode]], nodes to index
272
- property_graph_store: Optional[PropertyGraphStore], graph store backend
273
- embed_kg_nodes: bool, whether to embed knowledge graph nodes
274
- storage_context: Optional[StorageContext], storage configuration
275
- service_context: Optional[ServiceContext], service configuration (deprecated)
276
- show_progress: bool, whether to show indexing progress
277
"""
278
def __init__(
279
self,
280
nodes: Optional[Sequence[BaseNode]] = None,
281
property_graph_store: Optional[PropertyGraphStore] = None,
282
embed_kg_nodes: bool = True,
283
storage_context: Optional[StorageContext] = None,
284
service_context: Optional[ServiceContext] = None,
285
show_progress: bool = False,
286
**kwargs
287
): ...
288
289
@classmethod
290
def from_documents(
291
cls,
292
documents: Sequence[Document],
293
property_graph_store: Optional[PropertyGraphStore] = None,
294
embed_kg_nodes: bool = True,
295
storage_context: Optional[StorageContext] = None,
296
service_context: Optional[ServiceContext] = None,
297
show_progress: bool = False,
298
**kwargs
299
) -> "PropertyGraphIndex":
300
"""Create property graph index from documents."""
301
302
def as_query_engine(self, **kwargs) -> BaseQueryEngine:
303
"""Convert to query engine."""
304
305
def as_retriever(self, **kwargs) -> BaseRetriever:
306
"""Convert to retriever."""
307
```
308
309
### Document Summary Index
310
311
Index that creates summaries for each document, enabling summary-based retrieval and hierarchical information access.
312
313
```python { .api }
314
class DocumentSummaryIndex:
315
"""
316
Index that creates summaries for documents to enable summary-based retrieval.
317
318
Parameters:
319
- nodes: Optional[Sequence[BaseNode]], nodes to index
320
- storage_context: Optional[StorageContext], storage configuration
321
- service_context: Optional[ServiceContext], service configuration (deprecated)
322
- response_synthesizer: Optional[BaseSynthesizer], synthesizer for summaries
323
- show_progress: bool, whether to show indexing progress
324
"""
325
def __init__(
326
self,
327
nodes: Optional[Sequence[BaseNode]] = None,
328
storage_context: Optional[StorageContext] = None,
329
service_context: Optional[ServiceContext] = None,
330
response_synthesizer: Optional[BaseSynthesizer] = None,
331
show_progress: bool = False,
332
**kwargs
333
): ...
334
335
@classmethod
336
def from_documents(
337
cls,
338
documents: Sequence[Document],
339
storage_context: Optional[StorageContext] = None,
340
service_context: Optional[ServiceContext] = None,
341
response_synthesizer: Optional[BaseSynthesizer] = None,
342
show_progress: bool = False,
343
**kwargs
344
) -> "DocumentSummaryIndex":
345
"""Create document summary index from documents."""
346
347
def as_query_engine(self, **kwargs) -> BaseQueryEngine:
348
"""Convert to query engine."""
349
350
def as_retriever(self, **kwargs) -> BaseRetriever:
351
"""Convert to retriever."""
352
```
353
354
### Composable Graph
355
356
Container for multiple indices that can be queried together, enabling complex multi-index retrieval strategies.
357
358
```python { .api }
359
class ComposableGraph:
360
"""
361
Container for multiple indices enabling composable queries.
362
363
Parameters:
364
- all_indices: Dict[str, BaseIndex], dictionary of index_id to index
365
- root_id: str, identifier of the root index
366
- storage_context: Optional[StorageContext], storage configuration
367
- service_context: Optional[ServiceContext], service configuration (deprecated)
368
"""
369
def __init__(
370
self,
371
all_indices: Dict[str, BaseIndex],
372
root_id: str,
373
storage_context: Optional[StorageContext] = None,
374
service_context: Optional[ServiceContext] = None,
375
**kwargs
376
): ...
377
378
def as_query_engine(
379
self,
380
custom_query_engines: Optional[Dict[str, BaseQueryEngine]] = None,
381
**kwargs
382
) -> BaseQueryEngine:
383
"""Convert to composable query engine."""
384
385
def as_retriever(
386
self,
387
custom_retrievers: Optional[Dict[str, BaseRetriever]] = None,
388
**kwargs
389
) -> BaseRetriever:
390
"""Convert to composable retriever."""
391
```
392
393
### Index Loading & Storage
394
395
Functions for persisting and loading indices from storage backends.
396
397
```python { .api }
398
def load_index_from_storage(
399
storage_context: StorageContext,
400
index_id: Optional[str] = None,
401
service_context: Optional[ServiceContext] = None,
402
**kwargs
403
) -> BaseIndex:
404
"""
405
Load index from storage context.
406
407
Parameters:
408
- storage_context: StorageContext, storage configuration
409
- index_id: Optional[str], specific index to load
410
- service_context: Optional[ServiceContext], service configuration
411
412
Returns:
413
BaseIndex: The loaded index
414
"""
415
416
def load_indices_from_storage(
417
storage_context: StorageContext,
418
index_ids: Optional[Sequence[str]] = None,
419
service_context: Optional[ServiceContext] = None,
420
**kwargs
421
) -> List[BaseIndex]:
422
"""
423
Load multiple indices from storage context.
424
425
Parameters:
426
- storage_context: StorageContext, storage configuration
427
- index_ids: Optional[Sequence[str]], specific indices to load
428
- service_context: Optional[ServiceContext], service configuration
429
430
Returns:
431
List[BaseIndex]: List of loaded indices
432
"""
433
434
def load_graph_from_storage(
435
storage_context: StorageContext,
436
root_id: str,
437
service_context: Optional[ServiceContext] = None,
438
**kwargs
439
) -> ComposableGraph:
440
"""
441
Load composable graph from storage context.
442
443
Parameters:
444
- storage_context: StorageContext, storage configuration
445
- root_id: str, root index identifier
446
- service_context: Optional[ServiceContext], service configuration
447
448
Returns:
449
ComposableGraph: The loaded composable graph
450
"""
451
```
452
453
### Multi-Modal Vector Store Index
454
455
Specialized vector index supporting multi-modal content including text, images, and other media types.
456
457
```python { .api }
458
class MultiModalVectorStoreIndex(VectorStoreIndex):
459
"""
460
Vector store index supporting multi-modal content (text, images, etc.).
461
462
Parameters:
463
- nodes: Optional[Sequence[BaseNode]], nodes to index
464
- embed_model: Optional[MultiModalEmbedding], multi-modal embedding model
465
- storage_context: Optional[StorageContext], storage configuration
466
- service_context: Optional[ServiceContext], service configuration (deprecated)
467
- show_progress: bool, whether to show indexing progress
468
"""
469
def __init__(
470
self,
471
nodes: Optional[Sequence[BaseNode]] = None,
472
embed_model: Optional[MultiModalEmbedding] = None,
473
storage_context: Optional[StorageContext] = None,
474
service_context: Optional[ServiceContext] = None,
475
show_progress: bool = False,
476
**kwargs
477
): ...
478
```
479
480
## Legacy Aliases
481
482
For backward compatibility, GPT-prefixed aliases are available for all index types:
483
484
```python { .api }
485
# Legacy aliases (deprecated, use non-GPT versions)
486
GPTVectorStoreIndex = VectorStoreIndex
487
GPTListIndex = SummaryIndex
488
GPTTreeIndex = TreeIndex
489
GPTKeywordTableIndex = KeywordTableIndex
490
GPTSimpleKeywordTableIndex = SimpleKeywordTableIndex
491
GPTRAKEKeywordTableIndex = RAKEKeywordTableIndex
492
GPTDocumentSummaryIndex = DocumentSummaryIndex
493
```
494
495
## Usage Examples
496
497
### Creating a Vector Store Index
498
499
```python
500
from llama_index.core import VectorStoreIndex, Document, Settings
501
from llama_index.core.embeddings import MockEmbedding
502
503
# Configure embedding model
504
Settings.embed_model = MockEmbedding(embed_dim=384)
505
506
# Create documents
507
documents = [
508
Document(text="Introduction to machine learning and artificial intelligence."),
509
Document(text="Deep learning techniques for computer vision applications."),
510
Document(text="Natural language processing with transformer models.")
511
]
512
513
# Create vector store index
514
index = VectorStoreIndex.from_documents(documents, show_progress=True)
515
516
# Query the index
517
query_engine = index.as_query_engine()
518
response = query_engine.query("What is machine learning?")
519
print(response.response)
520
521
# Use as retriever
522
retriever = index.as_retriever(similarity_top_k=2)
523
nodes = retriever.retrieve("deep learning")
524
for node in nodes:
525
print(f"Score: {node.score:.3f}, Text: {node.text}")
526
```
527
528
### Working with Multiple Index Types
529
530
```python
531
from llama_index.core import (
532
VectorStoreIndex,
533
TreeIndex,
534
KeywordTableIndex,
535
ComposableGraph
536
)
537
538
# Create different index types
539
vector_index = VectorStoreIndex.from_documents(documents)
540
tree_index = TreeIndex.from_documents(documents)
541
keyword_index = KeywordTableIndex.from_documents(documents)
542
543
# Create composable graph
544
graph = ComposableGraph(
545
all_indices={
546
"vector": vector_index,
547
"tree": tree_index,
548
"keyword": keyword_index
549
},
550
root_id="vector"
551
)
552
553
# Query the composable graph
554
query_engine = graph.as_query_engine()
555
response = query_engine.query("Compare machine learning approaches")
556
```
557
558
### Persisting and Loading Indices
559
560
```python
561
from llama_index.core import StorageContext, load_index_from_storage
562
563
# Create index with storage context
564
storage_context = StorageContext.from_defaults(persist_dir="./storage")
565
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
566
567
# Persist index
568
index.storage_context.persist()
569
570
# Load index later
571
storage_context = StorageContext.from_defaults(persist_dir="./storage")
572
loaded_index = load_index_from_storage(storage_context)
573
```
574
575
## Types & Enums
576
577
```python { .api }
578
class IndexStructType(str, Enum):
579
"""Types of index structures."""
580
TREE = "tree"
581
LIST = "list"
582
KEYWORD_TABLE = "keyword_table"
583
VECTOR_STORE = "vector_store"
584
DOCUMENT_SUMMARY = "document_summary"
585
KNOWLEDGE_GRAPH = "kg"
586
PROPERTY_GRAPH = "property_graph"
587
EMPTY = "empty"
588
```