Interface between LLMs and your data
—
Index structures for organizing and retrieving information from documents. LlamaIndex provides multiple index types optimized for different retrieval patterns, from semantic similarity search to keyword matching and hierarchical navigation.
Primary index type for semantic similarity search using vector embeddings. Stores document chunks as embeddings and retrieves relevant content based on query similarity.
class VectorStoreIndex:
"""
Index that stores embeddings for semantic similarity retrieval.
Parameters:
- nodes: Optional[Sequence[BaseNode]], nodes to index
- embed_model: Optional[BaseEmbedding], embedding model to use
- storage_context: Optional[StorageContext], storage configuration
- service_context: Optional[ServiceContext], service configuration (deprecated)
- show_progress: bool, whether to show indexing progress
- store_nodes_override: bool, whether to store nodes in docstore
"""
def __init__(
self,
nodes: Optional[Sequence[BaseNode]] = None,
embed_model: Optional[BaseEmbedding] = None,
storage_context: Optional[StorageContext] = None,
service_context: Optional[ServiceContext] = None,
show_progress: bool = False,
store_nodes_override: bool = False,
**kwargs
): ...
@classmethod
def from_documents(
cls,
documents: Sequence[Document],
storage_context: Optional[StorageContext] = None,
service_context: Optional[ServiceContext] = None,
show_progress: bool = False,
**kwargs
) -> "VectorStoreIndex":
"""Create index from documents."""
def as_query_engine(
self,
retriever_mode: str = "default",
response_mode: str = "compact",
**kwargs
) -> BaseQueryEngine:
"""Convert to query engine."""
def as_retriever(
self,
retriever_mode: str = "default",
similarity_top_k: int = 10,
**kwargs
) -> BaseRetriever:
"""Convert to retriever."""
def as_chat_engine(
self,
chat_mode: str = "best",
**kwargs
) -> BaseChatEngine:
"""Convert to chat engine."""
def insert(self, document: Document, **kwargs) -> None:
"""Insert document into index."""
def insert_nodes(self, nodes: List[BaseNode], **kwargs) -> None:
"""Insert nodes into index."""
def delete_ref_doc(self, ref_doc_id: str, **kwargs) -> None:
"""Delete document from index."""
def update_ref_doc(self, document: Document, **kwargs) -> None:
"""Update document in index."""Simple index that stores all nodes sequentially, useful for small document collections or when comprehensive retrieval is needed.
class SummaryIndex:
"""
Simple index storing all nodes for comprehensive retrieval.
Parameters:
- nodes: Optional[Sequence[BaseNode]], nodes to index
- storage_context: Optional[StorageContext], storage configuration
- service_context: Optional[ServiceContext], service configuration (deprecated)
- show_progress: bool, whether to show indexing progress
"""
def __init__(
self,
nodes: Optional[Sequence[BaseNode]] = None,
storage_context: Optional[StorageContext] = None,
service_context: Optional[ServiceContext] = None,
show_progress: bool = False,
**kwargs
): ...
@classmethod
def from_documents(
cls,
documents: Sequence[Document],
storage_context: Optional[StorageContext] = None,
service_context: Optional[ServiceContext] = None,
show_progress: bool = False,
**kwargs
) -> "SummaryIndex":
"""Create index from documents."""
def as_query_engine(self, **kwargs) -> BaseQueryEngine:
"""Convert to query engine."""
def as_retriever(self, **kwargs) -> BaseRetriever:
"""Convert to retriever."""Hierarchical index that organizes information in a tree structure, enabling top-down traversal and summarization at different levels.
class TreeIndex:
"""
Hierarchical tree-based index for structured information organization.
Parameters:
- nodes: Optional[Sequence[BaseNode]], nodes to index
- num_children: int, branching factor for tree construction
- build_tree: bool, whether to build tree during initialization
- storage_context: Optional[StorageContext], storage configuration
- service_context: Optional[ServiceContext], service configuration (deprecated)
- show_progress: bool, whether to show indexing progress
"""
def __init__(
self,
nodes: Optional[Sequence[BaseNode]] = None,
num_children: int = 10,
build_tree: bool = True,
storage_context: Optional[StorageContext] = None,
service_context: Optional[ServiceContext] = None,
show_progress: bool = False,
**kwargs
): ...
@classmethod
def from_documents(
cls,
documents: Sequence[Document],
num_children: int = 10,
build_tree: bool = True,
storage_context: Optional[StorageContext] = None,
service_context: Optional[ServiceContext] = None,
show_progress: bool = False,
**kwargs
) -> "TreeIndex":
"""Create tree index from documents."""
def as_query_engine(self, **kwargs) -> BaseQueryEngine:
"""Convert to query engine."""
def as_retriever(self, **kwargs) -> BaseRetriever:
"""Convert to retriever."""Index based on keyword extraction and matching, supporting various keyword extraction algorithms for precise term-based retrieval.
class KeywordTableIndex:
"""
Index based on keyword extraction and matching.
Parameters:
- nodes: Optional[Sequence[BaseNode]], nodes to index
- storage_context: Optional[StorageContext], storage configuration
- service_context: Optional[ServiceContext], service configuration (deprecated)
- show_progress: bool, whether to show indexing progress
"""
def __init__(
self,
nodes: Optional[Sequence[BaseNode]] = None,
storage_context: Optional[StorageContext] = None,
service_context: Optional[ServiceContext] = None,
show_progress: bool = False,
**kwargs
): ...
@classmethod
def from_documents(
cls,
documents: Sequence[Document],
storage_context: Optional[StorageContext] = None,
service_context: Optional[ServiceContext] = None,
show_progress: bool = False,
**kwargs
) -> "KeywordTableIndex":
"""Create keyword index from documents."""
class SimpleKeywordTableIndex(KeywordTableIndex):
"""Simple keyword extraction using basic text processing."""
class RAKEKeywordTableIndex(KeywordTableIndex):
"""Keyword extraction using RAKE (Rapid Automatic Keyword Extraction) algorithm."""Index that constructs and queries knowledge graphs from text, extracting entities and relationships for graph-based retrieval.
class KnowledgeGraphIndex:
"""
Index that builds knowledge graphs from text for entity-relationship queries.
Parameters:
- nodes: Optional[Sequence[BaseNode]], nodes to index
- storage_context: Optional[StorageContext], storage configuration
- service_context: Optional[ServiceContext], service configuration (deprecated)
- max_triplets_per_chunk: int, maximum triplets to extract per chunk
- show_progress: bool, whether to show indexing progress
- include_embeddings: bool, whether to include embeddings
"""
def __init__(
self,
nodes: Optional[Sequence[BaseNode]] = None,
storage_context: Optional[StorageContext] = None,
service_context: Optional[ServiceContext] = None,
max_triplets_per_chunk: int = 10,
show_progress: bool = False,
include_embeddings: bool = True,
**kwargs
): ...
@classmethod
def from_documents(
cls,
documents: Sequence[Document],
storage_context: Optional[StorageContext] = None,
service_context: Optional[ServiceContext] = None,
max_triplets_per_chunk: int = 10,
show_progress: bool = False,
include_embeddings: bool = True,
**kwargs
) -> "KnowledgeGraphIndex":
"""Create knowledge graph index from documents."""
def as_query_engine(self, **kwargs) -> BaseQueryEngine:
"""Convert to query engine."""
def as_retriever(self, **kwargs) -> BaseRetriever:
"""Convert to retriever."""Advanced graph index supporting property graphs with typed nodes and relationships, enabling complex graph queries and traversal.
class PropertyGraphIndex:
"""
Index supporting property graphs with typed nodes and relationships.
Parameters:
- nodes: Optional[Sequence[BaseNode]], nodes to index
- property_graph_store: Optional[PropertyGraphStore], graph store backend
- embed_kg_nodes: bool, whether to embed knowledge graph nodes
- storage_context: Optional[StorageContext], storage configuration
- service_context: Optional[ServiceContext], service configuration (deprecated)
- show_progress: bool, whether to show indexing progress
"""
def __init__(
self,
nodes: Optional[Sequence[BaseNode]] = None,
property_graph_store: Optional[PropertyGraphStore] = None,
embed_kg_nodes: bool = True,
storage_context: Optional[StorageContext] = None,
service_context: Optional[ServiceContext] = None,
show_progress: bool = False,
**kwargs
): ...
@classmethod
def from_documents(
cls,
documents: Sequence[Document],
property_graph_store: Optional[PropertyGraphStore] = None,
embed_kg_nodes: bool = True,
storage_context: Optional[StorageContext] = None,
service_context: Optional[ServiceContext] = None,
show_progress: bool = False,
**kwargs
) -> "PropertyGraphIndex":
"""Create property graph index from documents."""
def as_query_engine(self, **kwargs) -> BaseQueryEngine:
"""Convert to query engine."""
def as_retriever(self, **kwargs) -> BaseRetriever:
"""Convert to retriever."""Index that creates summaries for each document, enabling summary-based retrieval and hierarchical information access.
class DocumentSummaryIndex:
"""
Index that creates summaries for documents to enable summary-based retrieval.
Parameters:
- nodes: Optional[Sequence[BaseNode]], nodes to index
- storage_context: Optional[StorageContext], storage configuration
- service_context: Optional[ServiceContext], service configuration (deprecated)
- response_synthesizer: Optional[BaseSynthesizer], synthesizer for summaries
- show_progress: bool, whether to show indexing progress
"""
def __init__(
self,
nodes: Optional[Sequence[BaseNode]] = None,
storage_context: Optional[StorageContext] = None,
service_context: Optional[ServiceContext] = None,
response_synthesizer: Optional[BaseSynthesizer] = None,
show_progress: bool = False,
**kwargs
): ...
@classmethod
def from_documents(
cls,
documents: Sequence[Document],
storage_context: Optional[StorageContext] = None,
service_context: Optional[ServiceContext] = None,
response_synthesizer: Optional[BaseSynthesizer] = None,
show_progress: bool = False,
**kwargs
) -> "DocumentSummaryIndex":
"""Create document summary index from documents."""
def as_query_engine(self, **kwargs) -> BaseQueryEngine:
"""Convert to query engine."""
def as_retriever(self, **kwargs) -> BaseRetriever:
"""Convert to retriever."""Container for multiple indices that can be queried together, enabling complex multi-index retrieval strategies.
class ComposableGraph:
"""
Container for multiple indices enabling composable queries.
Parameters:
- all_indices: Dict[str, BaseIndex], dictionary of index_id to index
- root_id: str, identifier of the root index
- storage_context: Optional[StorageContext], storage configuration
- service_context: Optional[ServiceContext], service configuration (deprecated)
"""
def __init__(
self,
all_indices: Dict[str, BaseIndex],
root_id: str,
storage_context: Optional[StorageContext] = None,
service_context: Optional[ServiceContext] = None,
**kwargs
): ...
def as_query_engine(
self,
custom_query_engines: Optional[Dict[str, BaseQueryEngine]] = None,
**kwargs
) -> BaseQueryEngine:
"""Convert to composable query engine."""
def as_retriever(
self,
custom_retrievers: Optional[Dict[str, BaseRetriever]] = None,
**kwargs
) -> BaseRetriever:
"""Convert to composable retriever."""Functions for persisting and loading indices from storage backends.
def load_index_from_storage(
storage_context: StorageContext,
index_id: Optional[str] = None,
service_context: Optional[ServiceContext] = None,
**kwargs
) -> BaseIndex:
"""
Load index from storage context.
Parameters:
- storage_context: StorageContext, storage configuration
- index_id: Optional[str], specific index to load
- service_context: Optional[ServiceContext], service configuration
Returns:
BaseIndex: The loaded index
"""
def load_indices_from_storage(
storage_context: StorageContext,
index_ids: Optional[Sequence[str]] = None,
service_context: Optional[ServiceContext] = None,
**kwargs
) -> List[BaseIndex]:
"""
Load multiple indices from storage context.
Parameters:
- storage_context: StorageContext, storage configuration
- index_ids: Optional[Sequence[str]], specific indices to load
- service_context: Optional[ServiceContext], service configuration
Returns:
List[BaseIndex]: List of loaded indices
"""
def load_graph_from_storage(
storage_context: StorageContext,
root_id: str,
service_context: Optional[ServiceContext] = None,
**kwargs
) -> ComposableGraph:
"""
Load composable graph from storage context.
Parameters:
- storage_context: StorageContext, storage configuration
- root_id: str, root index identifier
- service_context: Optional[ServiceContext], service configuration
Returns:
ComposableGraph: The loaded composable graph
"""Specialized vector index supporting multi-modal content including text, images, and other media types.
class MultiModalVectorStoreIndex(VectorStoreIndex):
"""
Vector store index supporting multi-modal content (text, images, etc.).
Parameters:
- nodes: Optional[Sequence[BaseNode]], nodes to index
- embed_model: Optional[MultiModalEmbedding], multi-modal embedding model
- storage_context: Optional[StorageContext], storage configuration
- service_context: Optional[ServiceContext], service configuration (deprecated)
- show_progress: bool, whether to show indexing progress
"""
def __init__(
self,
nodes: Optional[Sequence[BaseNode]] = None,
embed_model: Optional[MultiModalEmbedding] = None,
storage_context: Optional[StorageContext] = None,
service_context: Optional[ServiceContext] = None,
show_progress: bool = False,
**kwargs
): ...For backward compatibility, GPT-prefixed aliases are available for all index types:
# Legacy aliases (deprecated, use non-GPT versions)
GPTVectorStoreIndex = VectorStoreIndex
GPTListIndex = SummaryIndex
GPTTreeIndex = TreeIndex
GPTKeywordTableIndex = KeywordTableIndex
GPTSimpleKeywordTableIndex = SimpleKeywordTableIndex
GPTRAKEKeywordTableIndex = RAKEKeywordTableIndex
GPTDocumentSummaryIndex = DocumentSummaryIndexfrom llama_index.core import VectorStoreIndex, Document, Settings
from llama_index.core.embeddings import MockEmbedding
# Configure embedding model
Settings.embed_model = MockEmbedding(embed_dim=384)
# Create documents
documents = [
Document(text="Introduction to machine learning and artificial intelligence."),
Document(text="Deep learning techniques for computer vision applications."),
Document(text="Natural language processing with transformer models.")
]
# Create vector store index
index = VectorStoreIndex.from_documents(documents, show_progress=True)
# Query the index
query_engine = index.as_query_engine()
response = query_engine.query("What is machine learning?")
print(response.response)
# Use as retriever
retriever = index.as_retriever(similarity_top_k=2)
nodes = retriever.retrieve("deep learning")
for node in nodes:
print(f"Score: {node.score:.3f}, Text: {node.text}")from llama_index.core import (
VectorStoreIndex,
TreeIndex,
KeywordTableIndex,
ComposableGraph
)
# Create different index types
vector_index = VectorStoreIndex.from_documents(documents)
tree_index = TreeIndex.from_documents(documents)
keyword_index = KeywordTableIndex.from_documents(documents)
# Create composable graph
graph = ComposableGraph(
all_indices={
"vector": vector_index,
"tree": tree_index,
"keyword": keyword_index
},
root_id="vector"
)
# Query the composable graph
query_engine = graph.as_query_engine()
response = query_engine.query("Compare machine learning approaches")from llama_index.core import StorageContext, load_index_from_storage
# Create index with storage context
storage_context = StorageContext.from_defaults(persist_dir="./storage")
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
# Persist index
index.storage_context.persist()
# Load index later
storage_context = StorageContext.from_defaults(persist_dir="./storage")
loaded_index = load_index_from_storage(storage_context)class IndexStructType(str, Enum):
"""Types of index structures."""
TREE = "tree"
LIST = "list"
KEYWORD_TABLE = "keyword_table"
VECTOR_STORE = "vector_store"
DOCUMENT_SUMMARY = "document_summary"
KNOWLEDGE_GRAPH = "kg"
PROPERTY_GRAPH = "property_graph"
EMPTY = "empty"Install with Tessl CLI
npx tessl i tessl/pypi-llama-index-core