Interface between LLMs and your data
—
Storage abstractions and context management for persisting indices, documents, and vector stores with support for various backends. The storage system provides comprehensive persistence capabilities for maintaining index state, document collections, and retrieval contexts across application sessions.
Central storage context for coordinating persistence across different storage backends and maintaining consistency.
class StorageContext:
"""
Storage context for managing persistence of indices, documents, and vector stores.
Parameters:
- docstore: Optional[BaseDocumentStore], document storage backend
- index_store: Optional[BaseIndexStore], index metadata storage
- vector_store: Optional[VectorStore], vector embedding storage
- property_graph_store: Optional[PropertyGraphStore], property graph storage
- graph_store: Optional[GraphStore], knowledge graph storage (deprecated)
- image_store: Optional[BaseImageStore], image storage backend
- persist_dir: Optional[str], directory for persistence
"""
def __init__(
self,
docstore: Optional[BaseDocumentStore] = None,
index_store: Optional[BaseIndexStore] = None,
vector_store: Optional[VectorStore] = None,
property_graph_store: Optional[PropertyGraphStore] = None,
graph_store: Optional[GraphStore] = None,
image_store: Optional[BaseImageStore] = None,
persist_dir: Optional[str] = None,
**kwargs
): ...
@classmethod
def from_defaults(
cls,
docstore: Optional[BaseDocumentStore] = None,
index_store: Optional[BaseIndexStore] = None,
vector_store: Optional[VectorStore] = None,
property_graph_store: Optional[PropertyGraphStore] = None,
graph_store: Optional[GraphStore] = None,
image_store: Optional[BaseImageStore] = None,
persist_dir: Optional[str] = None,
**kwargs
) -> "StorageContext":
"""
Create StorageContext with default storage implementations.
Parameters:
- docstore: Optional[BaseDocumentStore], document storage backend
- index_store: Optional[BaseIndexStore], index storage backend
- vector_store: Optional[VectorStore], vector storage backend
- property_graph_store: Optional[PropertyGraphStore], property graph storage
- graph_store: Optional[GraphStore], knowledge graph storage
- image_store: Optional[BaseImageStore], image storage backend
- persist_dir: Optional[str], persistence directory path
Returns:
- StorageContext, configured storage context with default implementations
"""
def persist(self, persist_dir: Optional[str] = None) -> None:
"""
Persist all storage components to disk.
Parameters:
- persist_dir: Optional[str], directory path for persistence
"""
@classmethod
def from_persist_dir(
cls,
persist_dir: str,
docstore_fname: str = "docstore.json",
index_store_fname: str = "index_store.json",
vector_store_fname: str = "vector_store.json",
property_graph_store_fname: str = "property_graph_store.json",
graph_store_fname: str = "graph_store.json",
image_store_fname: str = "image_store.json",
**kwargs
) -> "StorageContext":
"""
Load StorageContext from persisted directory.
Parameters:
- persist_dir: str, directory containing persisted storage files
- docstore_fname: str, filename for document store data
- index_store_fname: str, filename for index store data
- vector_store_fname: str, filename for vector store data
- property_graph_store_fname: str, filename for property graph data
- graph_store_fname: str, filename for graph store data
- image_store_fname: str, filename for image store data
Returns:
- StorageContext, loaded storage context from disk
"""
def to_dict(self) -> dict:
"""Convert StorageContext to dictionary representation."""
@classmethod
def from_dict(cls, config_dict: dict) -> "StorageContext":
"""Create StorageContext from dictionary configuration."""Persistent storage for document collections with metadata and relationship management.
class BaseDocumentStore:
"""
Base interface for document storage implementations.
Document stores persist document objects and their metadata, supporting
efficient retrieval by document ID and batch operations.
"""
def add_documents(
self,
docs: Sequence[BaseNode],
allow_update: bool = True,
batch_size: Optional[int] = None,
store_text: bool = True
) -> None:
"""
Add documents to the store.
Parameters:
- docs: Sequence[BaseNode], documents to store
- allow_update: bool, whether to allow updating existing documents
- batch_size: Optional[int], batch size for bulk operations
- store_text: bool, whether to store document text content
"""
def get_document(
self,
doc_id: str,
raise_error: bool = True
) -> Optional[BaseNode]:
"""
Retrieve document by ID.
Parameters:
- doc_id: str, document identifier
- raise_error: bool, whether to raise error if document not found
Returns:
- Optional[BaseNode], retrieved document or None if not found
"""
def get_documents(
self,
doc_ids: List[str],
raise_error: bool = True
) -> List[BaseNode]:
"""
Retrieve multiple documents by IDs.
Parameters:
- doc_ids: List[str], list of document identifiers
- raise_error: bool, whether to raise error for missing documents
Returns:
- List[BaseNode], list of retrieved documents
"""
def delete_document(self, doc_id: str, raise_error: bool = True) -> None:
"""
Delete document from store.
Parameters:
- doc_id: str, document identifier to delete
- raise_error: bool, whether to raise error if document not found
"""
def delete_documents(
self,
doc_ids: List[str],
raise_error: bool = True
) -> None:
"""
Delete multiple documents from store.
Parameters:
- doc_ids: List[str], list of document identifiers to delete
- raise_error: bool, whether to raise error for missing documents
"""
def set_document_hash(self, doc_id: str, doc_hash: str) -> None:
"""Set hash for document to track changes."""
def get_document_hash(self, doc_id: str) -> Optional[str]:
"""Get hash for document."""
def get_all_document_hashes(self) -> Dict[str, str]:
"""Get hashes for all documents."""
@property
def docs(self) -> Dict[str, BaseNode]:
"""Get all documents as dictionary."""
def persist(
self,
persist_path: str,
fs: Optional[AbstractFileSystem] = None
) -> None:
"""
Persist document store to disk.
Parameters:
- persist_path: str, path for persistence
- fs: Optional[AbstractFileSystem], filesystem interface
"""
class SimpleDocumentStore(BaseDocumentStore):
"""
Simple in-memory document store implementation.
Parameters:
- simple_file_store: Optional[SimpleFileStore], file store backend
"""
def __init__(self, simple_file_store: Optional[SimpleFileStore] = None): ...
@classmethod
def from_persist_dir(
cls,
persist_dir: str,
namespace: Optional[str] = None,
fs: Optional[AbstractFileSystem] = None
) -> "SimpleDocumentStore":
"""Load SimpleDocumentStore from persisted directory."""
@classmethod
def from_persist_path(
cls,
persist_path: str,
fs: Optional[AbstractFileSystem] = None
) -> "SimpleDocumentStore":
"""Load SimpleDocumentStore from persist path."""Metadata storage for index structures and configurations with support for multiple index types.
class BaseIndexStore:
"""
Base interface for index metadata storage.
Index stores maintain metadata about index structures, their configuration,
and relationships without storing the actual index data.
"""
def add_index_struct(self, index_struct: IndexStruct) -> None:
"""
Add index structure metadata to store.
Parameters:
- index_struct: IndexStruct, index structure metadata to store
"""
def delete_index_struct(self, key: str) -> None:
"""
Delete index structure from store.
Parameters:
- key: str, index structure key to delete
"""
def get_index_struct(self, struct_id: Optional[str] = None) -> Optional[IndexStruct]:
"""
Retrieve index structure by ID.
Parameters:
- struct_id: Optional[str], index structure identifier
Returns:
- Optional[IndexStruct], retrieved index structure or None
"""
def get_index_structs(self) -> List[IndexStruct]:
"""
Get all index structures.
Returns:
- List[IndexStruct], list of all stored index structures
"""
@property
def index_structs(self) -> Dict[str, IndexStruct]:
"""Get all index structures as dictionary."""
class SimpleIndexStore(BaseIndexStore):
"""
Simple in-memory index store implementation.
Parameters:
- simple_file_store: Optional[SimpleFileStore], file store backend
"""
def __init__(self, simple_file_store: Optional[SimpleFileStore] = None): ...
def persist(
self,
persist_path: str,
fs: Optional[AbstractFileSystem] = None
) -> None:
"""Persist index store to disk."""
@classmethod
def from_persist_dir(
cls,
persist_dir: str,
namespace: Optional[str] = None,
fs: Optional[AbstractFileSystem] = None
) -> "SimpleIndexStore":
"""Load SimpleIndexStore from persisted directory."""Vector store interfaces and implementations for embedding storage and similarity search.
class VectorStore:
"""
Base interface for vector storage implementations.
Vector stores handle embedding storage, similarity search, and metadata filtering
for semantic retrieval operations.
"""
def add(self, nodes: List[BaseNode]) -> List[str]:
"""
Add nodes with embeddings to vector store.
Parameters:
- nodes: List[BaseNode], nodes with embeddings to add
Returns:
- List[str], list of node IDs added to store
"""
def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
"""
Delete vectors by reference document ID.
Parameters:
- ref_doc_id: str, reference document ID to delete
- **delete_kwargs: additional deletion parameters
"""
def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult:
"""
Query vector store for similar vectors.
Parameters:
- query: VectorStoreQuery, query specification with embedding and filters
- **kwargs: additional query parameters
Returns:
- VectorStoreQueryResult, query results with nodes and similarities
"""
def persist(
self,
persist_path: str,
fs: Optional[AbstractFileSystem] = None
) -> None:
"""
Persist vector store to disk.
Parameters:
- persist_path: str, path for persistence
- fs: Optional[AbstractFileSystem], filesystem interface
"""
class SimpleVectorStore(VectorStore):
"""
Simple in-memory vector store implementation.
Parameters:
- data: Optional[SimpleVectorStoreData], vector store data
- simple_file_store: Optional[SimpleFileStore], file store backend
"""
def __init__(
self,
data: Optional[SimpleVectorStoreData] = None,
simple_file_store: Optional[SimpleFileStore] = None,
**kwargs
): ...
@classmethod
def from_persist_dir(
cls,
persist_dir: str,
namespace: Optional[str] = None,
fs: Optional[AbstractFileSystem] = None
) -> "SimpleVectorStore":
"""Load SimpleVectorStore from persisted directory."""
def get(self, text_id: str) -> List[float]:
"""Get embedding vector by text ID."""
def get_nodes(self, node_ids: Optional[List[str]] = None) -> List[BaseNode]:
"""Get nodes by IDs or all nodes if none specified."""Storage for property graph structures with nodes, relationships, and properties.
class PropertyGraphStore:
"""
Base interface for property graph storage implementations.
Property graph stores handle graph structures with typed nodes and relationships,
supporting complex graph queries and traversal operations.
"""
def get(
self,
properties: Optional[dict] = None,
ids: Optional[List[str]] = None
) -> List[LabelledNode]:
"""
Retrieve nodes by properties or IDs.
Parameters:
- properties: Optional[dict], property filters for node retrieval
- ids: Optional[List[str]], specific node IDs to retrieve
Returns:
- List[LabelledNode], matching nodes from the graph
"""
def get_rel_map(
self,
subjs: Optional[List[str]] = None,
depth: int = 2,
limit: int = 30
) -> List[Triplet]:
"""
Get relationship map starting from subject nodes.
Parameters:
- subjs: Optional[List[str]], starting subject node IDs
- depth: int, maximum traversal depth
- limit: int, maximum number of relationships to return
Returns:
- List[Triplet], relationship triplets in the graph
"""
def upsert_nodes(self, nodes: List[LabelledNode]) -> None:
"""
Insert or update nodes in the graph.
Parameters:
- nodes: List[LabelledNode], nodes to insert or update
"""
def upsert_relations(self, relations: List[Relation]) -> None:
"""
Insert or update relationships in the graph.
Parameters:
- relations: List[Relation], relationships to insert or update
"""
def delete(
self,
entity_names: Optional[List[str]] = None,
relation_names: Optional[List[str]] = None,
properties: Optional[dict] = None,
ids: Optional[List[str]] = None
) -> None:
"""
Delete nodes and relationships from graph.
Parameters:
- entity_names: Optional[List[str]], entity names to delete
- relation_names: Optional[List[str]], relation types to delete
- properties: Optional[dict], property-based deletion filters
- ids: Optional[List[str]], specific IDs to delete
"""
class SimplePropertyGraphStore(PropertyGraphStore):
"""
Simple in-memory property graph store implementation.
Parameters:
- simple_file_store: Optional[SimpleFileStore], file store backend
"""
def __init__(self, simple_file_store: Optional[SimpleFileStore] = None): ...
def persist(
self,
persist_path: str,
fs: Optional[AbstractFileSystem] = None
) -> None:
"""Persist property graph store to disk."""
@classmethod
def from_persist_dir(
cls,
persist_dir: str,
namespace: Optional[str] = None,
fs: Optional[AbstractFileSystem] = None
) -> "SimplePropertyGraphStore":
"""Load SimplePropertyGraphStore from persisted directory."""Legacy graph storage interface for knowledge graphs and relationship management.
class GraphStore:
"""
Legacy graph storage interface for knowledge graphs.
Note: This interface is deprecated in favor of PropertyGraphStore
for new applications.
"""
def get(self, subj: str) -> List[List[str]]:
"""Get relationships for a subject."""
def get_rel_map(
self,
subjs: Optional[List[str]] = None,
depth: int = 2
) -> Dict[str, List[List[str]]]:
"""Get relationship map for subjects."""
def upsert_triplet(self, subj: str, rel: str, obj: str) -> None:
"""Insert or update a relationship triplet."""
def delete(self, subj: str, rel: str, obj: str) -> None:
"""Delete a relationship triplet."""
class SimpleGraphStore(GraphStore):
"""Simple in-memory graph store implementation (legacy)."""
def __init__(self, simple_file_store: Optional[SimpleFileStore] = None): ...Storage interface for image data and multi-modal content management.
class BaseImageStore:
"""
Base interface for image storage implementations.
Image stores handle storage and retrieval of image data with metadata
and support for different image formats and storage backends.
"""
def add_image(self, image_id: str, image_data: bytes) -> None:
"""
Add image to store.
Parameters:
- image_id: str, unique identifier for the image
- image_data: bytes, raw image data
"""
def get_image(self, image_id: str) -> Optional[bytes]:
"""
Retrieve image by ID.
Parameters:
- image_id: str, image identifier
Returns:
- Optional[bytes], image data or None if not found
"""
def delete_image(self, image_id: str) -> None:
"""
Delete image from store.
Parameters:
- image_id: str, image identifier to delete
"""
class SimpleImageStore(BaseImageStore):
"""Simple in-memory image store implementation."""
def __init__(self, simple_file_store: Optional[SimpleFileStore] = None): ...Utility functions for loading indices and data structures from persistent storage.
def load_index_from_storage(
storage_context: StorageContext,
index_id: Optional[str] = None,
**kwargs
) -> BaseIndex:
"""
Load a single index from storage context.
Parameters:
- storage_context: StorageContext, storage context containing index data
- index_id: Optional[str], specific index ID to load
- **kwargs: additional parameters for index construction
Returns:
- BaseIndex, loaded index instance
"""
def load_indices_from_storage(
storage_context: StorageContext,
index_ids: Optional[List[str]] = None,
**kwargs
) -> List[BaseIndex]:
"""
Load multiple indices from storage context.
Parameters:
- storage_context: StorageContext, storage context containing index data
- index_ids: Optional[List[str]], specific index IDs to load
- **kwargs: additional parameters for index construction
Returns:
- List[BaseIndex], list of loaded index instances
"""
def load_graph_from_storage(
storage_context: StorageContext,
root_id: str,
**kwargs
) -> ComposableGraph:
"""
Load composable graph from storage context.
Parameters:
- storage_context: StorageContext, storage context containing graph data
- root_id: str, root node ID for the graph
- **kwargs: additional parameters for graph construction
Returns:
- ComposableGraph, loaded graph instance
"""from llama_index.core.storage import StorageContext
from llama_index.core import VectorStoreIndex, Document
# Create documents
documents = [
Document(text="Machine learning is a subset of artificial intelligence."),
Document(text="Deep learning uses neural networks with multiple layers."),
Document(text="Natural language processing helps computers understand text.")
]
# Create storage context with default implementations
storage_context = StorageContext.from_defaults(persist_dir="./storage")
# Create index with storage context
index = VectorStoreIndex.from_documents(
documents,
storage_context=storage_context
)
# Persist to disk
storage_context.persist()
print("Index and storage persisted to ./storage")# Load storage context from disk
loaded_storage_context = StorageContext.from_persist_dir("./storage")
# Load index from storage
loaded_index = load_index_from_storage(loaded_storage_context)
# Use loaded index
query_engine = loaded_index.as_query_engine()
response = query_engine.query("What is machine learning?")
print(response.response)from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.core.storage.index_store import SimpleIndexStore
from llama_index.core.vector_stores import SimpleVectorStore
# Create custom storage components
docstore = SimpleDocumentStore()
index_store = SimpleIndexStore()
vector_store = SimpleVectorStore()
# Create custom storage context
custom_storage_context = StorageContext.from_defaults(
docstore=docstore,
index_store=index_store,
vector_store=vector_store,
persist_dir="./custom_storage"
)
# Create index with custom storage
custom_index = VectorStoreIndex.from_documents(
documents,
storage_context=custom_storage_context
)
# Persist custom configuration
custom_storage_context.persist()from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.core.schema import TextNode
# Create document store
docstore = SimpleDocumentStore()
# Create and add nodes
nodes = [
TextNode(text="First document content", node_id="doc_1"),
TextNode(text="Second document content", node_id="doc_2"),
TextNode(text="Third document content", node_id="doc_3")
]
docstore.add_documents(nodes)
# Retrieve documents
doc_1 = docstore.get_document("doc_1")
print(f"Retrieved: {doc_1.text}")
# Retrieve multiple documents
docs = docstore.get_documents(["doc_1", "doc_2"])
print(f"Retrieved {len(docs)} documents")
# Get all documents
all_docs = docstore.docs
print(f"Total documents in store: {len(all_docs)}")
# Delete document
docstore.delete_document("doc_3")
print(f"Remaining documents: {len(docstore.docs)}")from llama_index.core.vector_stores import SimpleVectorStore
from llama_index.core.vector_stores.types import VectorStoreQuery
# Create vector store
vector_store = SimpleVectorStore()
# Add nodes with embeddings (embeddings would be computed by embedding model)
import random
nodes_with_embeddings = []
for i, node in enumerate(nodes):
# Mock embeddings for demonstration
node.embedding = [random.random() for _ in range(384)]
nodes_with_embeddings.append(node)
# Add to vector store
vector_store.add(nodes_with_embeddings)
# Query vector store
query_embedding = [random.random() for _ in range(384)]
query = VectorStoreQuery(
query_embedding=query_embedding,
similarity_top_k=2
)
results = vector_store.query(query)
print(f"Found {len(results.nodes)} similar nodes")
for node in results.nodes:
print(f"Node ID: {node.node_id}, Text: {node.text[:50]}...")from llama_index.core.graph_stores import SimplePropertyGraphStore
from llama_index.core.graph_stores.types import LabelledNode, Relation
# Create property graph store
graph_store = SimplePropertyGraphStore()
# Create nodes with labels and properties
nodes = [
LabelledNode(
id="ml",
label="Topic",
properties={"name": "Machine Learning", "category": "AI"}
),
LabelledNode(
id="dl",
label="Topic",
properties={"name": "Deep Learning", "category": "AI"}
),
LabelledNode(
id="nn",
label="Concept",
properties={"name": "Neural Networks", "type": "algorithm"}
)
]
# Create relationships
relations = [
Relation(
source_id="dl",
target_id="ml",
label="IS_SUBSET_OF",
properties={"strength": 0.9}
),
Relation(
source_id="dl",
target_id="nn",
label="USES",
properties={"importance": "high"}
)
]
# Add to graph store
graph_store.upsert_nodes(nodes)
graph_store.upsert_relations(relations)
# Query graph
ml_related = graph_store.get(properties={"category": "AI"})
print(f"AI-related nodes: {len(ml_related)}")
# Get relationship map
rel_map = graph_store.get_rel_map(subjs=["dl"], depth=2)
print(f"Deep learning relationships: {len(rel_map)}")# Create multiple indices with shared storage
storage_context = StorageContext.from_defaults(persist_dir="./multi_index")
# Create different index types
vector_index = VectorStoreIndex.from_documents(
documents[:2],
storage_context=storage_context
)
from llama_index.core import SummaryIndex
summary_index = SummaryIndex.from_documents(
documents[2:],
storage_context=storage_context
)
# Persist all indices
storage_context.persist()
# Load all indices
loaded_storage = StorageContext.from_persist_dir("./multi_index")
loaded_indices = load_indices_from_storage(loaded_storage)
print(f"Loaded {len(loaded_indices)} indices")
for i, index in enumerate(loaded_indices):
print(f"Index {i}: {type(index).__name__}")# Create and configure storage context
storage_context = StorageContext.from_defaults()
# Convert to dictionary
storage_dict = storage_context.to_dict()
print("Storage context configuration:")
for key, value in storage_dict.items():
print(f" {key}: {type(value).__name__}")
# Recreate from dictionary
restored_context = StorageContext.from_dict(storage_dict)
print("Storage context restored from configuration")# Storage configuration constants
DEFAULT_PERSIST_DIR = "./storage"
DEFAULT_DOCSTORE_FNAME = "docstore.json"
DEFAULT_INDEX_STORE_FNAME = "index_store.json"
DEFAULT_VECTOR_STORE_FNAME = "vector_store.json"
DEFAULT_GRAPH_STORE_FNAME = "graph_store.json"
DEFAULT_PROPERTY_GRAPH_STORE_FNAME = "property_graph_store.json"
DEFAULT_IMAGE_STORE_FNAME = "image_store.json"
# File system types
from fsspec import AbstractFileSystem
# Storage backend types
StorageBackend = Union[str, AbstractFileSystem]
# Index structure types
class IndexStruct:
"""Base class for index structure metadata."""
passInstall with Tessl CLI
npx tessl i tessl/pypi-llama-index-core