Interface between LLMs and your data for building retrieval-augmented generation (RAG) applications
—
Storage backends and global configuration for persisting indices, managing contexts, and configuring system-wide settings for LlamaIndex applications.
Central storage management for indices, documents, and metadata with support for various storage backends and persistence options.
class StorageContext:
"""
Storage context for managing index persistence and document storage.
Args:
docstore: Document storage backend
index_store: Index metadata storage
vector_stores: Vector storage backends
graph_store: Graph storage backend
property_graph_store: Property graph storage
**kwargs: Additional storage configurations
"""
def __init__(
self,
docstore=None,
index_store=None,
vector_stores=None,
graph_store=None,
property_graph_store=None,
**kwargs
): ...
@classmethod
def from_defaults(
cls,
persist_dir=None,
docstore=None,
index_store=None,
vector_store=None,
graph_store=None,
**kwargs
):
"""
Create storage context with default configurations.
Args:
persist_dir: Directory for persistent storage
docstore: Custom document store
index_store: Custom index store
vector_store: Custom vector store
graph_store: Custom graph store
Returns:
StorageContext: Configured storage context
"""
def persist(self, persist_dir=None, **kwargs):
"""
Persist all storage components to disk.
Args:
persist_dir: Target directory for persistence
"""
@property
def vector_store(self):
"""Default vector store instance."""
def add_vector_store(self, vector_store, namespace=None):
"""Add additional vector store with optional namespace."""Usage Example:
from llama_index.core import StorageContext, VectorStoreIndex, SimpleDirectoryReader
from llama_index.vector_stores.chroma import ChromaVectorStore
import chromadb
# Default file-based storage
storage_context = StorageContext.from_defaults(persist_dir="./storage")
# Custom vector store
chroma_client = chromadb.Client()
chroma_collection = chroma_client.create_collection("my_collection")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(
vector_store=vector_store,
persist_dir="./custom_storage"
)
# Create index with custom storage
documents = SimpleDirectoryReader("data").load_data()
index = VectorStoreIndex.from_documents(
documents,
storage_context=storage_context
)
# Persist to disk
storage_context.persist()
# Load from persisted storage
storage_context = StorageContext.from_defaults(persist_dir="./storage")
index = load_index_from_storage(storage_context)System-wide configuration for LLM, embedding models, and other core components with dynamic reconfiguration support.
class Settings:
"""
Global settings singleton for LlamaIndex configuration.
Attributes:
llm: Default language model instance
embed_model: Default embedding model instance
node_parser: Default node parser for document chunking
transformations: List of document transformations
chunk_size: Default chunk size for text splitting
chunk_overlap: Default overlap between chunks
callback_manager: Global callback manager
tokenizer: Tokenizer function for token counting
"""
# Core model settings
llm: LLM = None
embed_model: BaseEmbedding = None
# Document processing settings
node_parser: NodeParser = None
transformations: List[TransformComponent] = None
# Chunking parameters
chunk_size: int = 1024
chunk_overlap: int = 200
# System components
callback_manager: CallbackManager = None
tokenizer: Callable[[str], List] = None
@classmethod
def from_defaults(
cls,
llm=None,
embed_model=None,
node_parser=None,
chunk_size=None,
chunk_overlap=None,
**kwargs
):
"""Configure settings with default values."""
@staticmethod
def reset():
"""Reset all settings to default values."""
@staticmethod
def configure(**kwargs):
"""Configure multiple settings at once."""Usage Example:
from llama_index.core import Settings
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
# Configure global settings
Settings.llm = OpenAI(model="gpt-4", temperature=0.1)
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")
Settings.chunk_size = 512
Settings.chunk_overlap = 50
# All indices and operations will use these settings by default
index = VectorStoreIndex.from_documents(documents)
# Temporary override for specific operations
with Settings.context(llm=OpenAI(model="gpt-3.5-turbo")):
query_engine = index.as_query_engine()
response = query_engine.query("What is this about?")
# Reset to defaults
Settings.reset()Document storage backends for managing raw document content with metadata and efficient retrieval.
class BaseDocumentStore:
"""Base class for document storage backends."""
def add_documents(self, docs, allow_update=True):
"""Add documents to storage."""
def get_document(self, doc_id, raise_error=True):
"""Retrieve document by ID."""
def delete_document(self, doc_id, raise_error=True):
"""Delete document by ID."""
def document_exists(self, doc_id):
"""Check if document exists."""
class SimpleDocumentStore(BaseDocumentStore):
"""
In-memory document store with optional file persistence.
Args:
simple_file_store: File store for persistence
"""
def __init__(self, simple_file_store=None): ...
def persist(self, persist_path=None):
"""Persist document store to file."""
@classmethod
def from_persist_path(cls, persist_path):
"""Load document store from file."""
class MongoDocumentStore(BaseDocumentStore):
"""
MongoDB-based document store.
Args:
mongo_client: MongoDB client instance
db_name: Database name
collection_name: Collection name
"""
def __init__(self, mongo_client, db_name="llama_index", collection_name="documents"): ...Index metadata storage for managing index structures, mappings, and retrieval metadata.
class BaseIndexStore:
"""Base class for index storage backends."""
def add_index_struct(self, index_struct):
"""Add index structure to storage."""
def delete_index_struct(self, key):
"""Delete index structure."""
def get_index_struct(self, struct_id=None):
"""Get index structure by ID."""
class SimpleIndexStore(BaseIndexStore):
"""
Simple file-based index store.
Args:
simple_file_store: File store for persistence
"""
def __init__(self, simple_file_store=None): ...
def persist(self, persist_path=None):
"""Persist index store to file."""
@classmethod
def from_persist_path(cls, persist_path):
"""Load index store from file."""Vector storage backends for embedding storage and similarity search with support for various vector databases.
class VectorStore:
"""Base class for vector storage backends."""
def add(self, nodes, **kwargs):
"""Add nodes with embeddings to vector store."""
def delete(self, ref_doc_id, **kwargs):
"""Delete vectors by document reference ID."""
def query(self, query, **kwargs):
"""Query for similar vectors."""
def persist(self, persist_path=None, **kwargs):
"""Persist vector store if supported."""
class SimpleVectorStore(VectorStore):
"""
Simple in-memory vector store with file persistence.
Args:
simple_file_store: File store for persistence
"""
def __init__(self, simple_file_store=None): ...
# Integration vector stores
class ChromaVectorStore(VectorStore):
"""Chroma vector database integration."""
class PineconeVectorStore(VectorStore):
"""Pinecone vector database integration."""
class WeaviateVectorStore(VectorStore):
"""Weaviate vector database integration."""
class QdrantVectorStore(VectorStore):
"""Qdrant vector database integration."""Vector Store Usage Example:
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.vector_stores.pinecone import PineconeVectorStore
import chromadb
import pinecone
# Chroma setup
chroma_client = chromadb.Client()
chroma_collection = chroma_client.create_collection("my_docs")
chroma_vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
# Pinecone setup
pinecone.init(api_key="your-key", environment="your-env")
pinecone_index = pinecone.Index("my-index")
pinecone_vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
# Use with storage context
storage_context = StorageContext.from_defaults(
vector_store=chroma_vector_store # or pinecone_vector_store
)
index = VectorStoreIndex.from_documents(
documents,
storage_context=storage_context
)Graph storage backends for knowledge graphs and property graphs with support for various graph databases.
class GraphStore:
"""Base class for graph storage backends."""
def upsert_triplet(self, subj, pred, obj):
"""Insert or update a knowledge triplet."""
def delete_triplet(self, subj, pred, obj):
"""Delete a knowledge triplet."""
def get_triplets(self, subj=None):
"""Get triplets by subject."""
def get_rel_map(self, subjs=None):
"""Get relationship map for subjects."""
class SimpleGraphStore(GraphStore):
"""
Simple in-memory graph store.
Args:
simple_file_store: File store for persistence
"""
def __init__(self, simple_file_store=None): ...
def persist(self, persist_path=None):
"""Persist graph store to file."""
class Neo4jGraphStore(GraphStore):
"""
Neo4j graph database integration.
Args:
url: Neo4j database URL
username: Database username
password: Database password
database: Database name
"""
def __init__(self, url, username, password, database="neo4j"): ...
class PropertyGraphStore:
"""Base class for property graph storage."""
def upsert_nodes(self, nodes):
"""Insert or update graph nodes."""
def upsert_relations(self, relations):
"""Insert or update graph relations."""
def delete(self, ids):
"""Delete nodes and relations by IDs."""
def structured_query(self, query, **kwargs):
"""Execute structured query against graph."""Utility functions for saving and loading indices with storage context management.
def load_index_from_storage(
storage_context,
index_id=None,
**kwargs
):
"""
Load index from storage context.
Args:
storage_context: Storage context with persisted data
index_id: Specific index ID to load
Returns:
BaseIndex: Loaded index instance
"""
def load_indices_from_storage(
storage_context,
index_ids=None,
**kwargs
):
"""
Load multiple indices from storage.
Args:
storage_context: Storage context
index_ids: List of index IDs to load
Returns:
Dict[str, BaseIndex]: Dictionary of loaded indices
"""
def load_graph_from_storage(
storage_context,
root_id=None,
**kwargs
):
"""
Load composable graph from storage.
Args:
storage_context: Storage context
root_id: Root graph node ID
Returns:
ComposableGraph: Loaded graph structure
"""Persistence Usage Example:
from llama_index.core import (
StorageContext,
VectorStoreIndex,
load_index_from_storage,
load_indices_from_storage
)
# Create and persist index
documents = SimpleDirectoryReader("data").load_data()
index = VectorStoreIndex.from_documents(documents)
# Persist to default location
index.storage_context.persist()
# Persist to custom location
index.storage_context.persist(persist_dir="./my_storage")
# Load from storage
storage_context = StorageContext.from_defaults(persist_dir="./my_storage")
loaded_index = load_index_from_storage(storage_context)
# Load multiple indices
storage_context = StorageContext.from_defaults(persist_dir="./multi_storage")
indices = load_indices_from_storage(storage_context)
print(f"Loaded {len(indices)} indices")
for index_id, index in indices.items():
print(f"Index {index_id}: {type(index)}")Legacy service context for backwards compatibility with older LlamaIndex versions.
class ServiceContext:
"""
Legacy service context for backwards compatibility.
Note: Deprecated in favor of Settings class.
Args:
llm: Language model instance
embed_model: Embedding model instance
node_parser: Node parser for chunking
transformations: Document transformations
**kwargs: Additional service configurations
"""
def __init__(
self,
llm=None,
embed_model=None,
node_parser=None,
transformations=None,
**kwargs
): ...
@classmethod
def from_defaults(
cls,
llm=None,
embed_model=None,
chunk_size=None,
chunk_overlap=None,
**kwargs
):
"""Create service context with defaults."""
def set_global_service_context(service_context):
"""Set global service context (deprecated)."""Advanced configuration patterns for complex deployments and environment management.
class SettingsContext:
"""Context manager for temporary settings changes."""
def __init__(self, **kwargs):
"""Initialize with temporary settings."""
def __enter__(self):
"""Apply temporary settings."""
def __exit__(self, exc_type, exc_val, exc_tb):
"""Restore original settings."""
# Context manager usage
with Settings.context(
llm=OpenAI(model="gpt-3.5-turbo"),
chunk_size=256
):
# Temporary settings active here
index = VectorStoreIndex.from_documents(documents)
# Original settings restored automaticallyEnvironment-based configuration for deployment across different environments.
Environment Variables:
# Common environment variables
OPENAI_API_KEY = "your-openai-key"
LLAMA_INDEX_CACHE_DIR = "./cache"
LLAMA_INDEX_GLOBAL_HANDLER = "wandb" # or "simple", "arize", etc.
# Vector store configurations
PINECONE_API_KEY = "your-pinecone-key"
PINECONE_ENVIRONMENT = "your-environment"
CHROMA_HOST = "localhost"
CHROMA_PORT = "8000"
# Graph database configurations
NEO4J_URL = "bolt://localhost:7687"
NEO4J_USERNAME = "neo4j"
NEO4J_PASSWORD = "password"Configuration Loading:
import os
from llama_index.core import Settings
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
# Load from environment
Settings.llm = OpenAI(
api_key=os.getenv("OPENAI_API_KEY"),
model=os.getenv("OPENAI_MODEL", "gpt-3.5-turbo")
)
Settings.embed_model = OpenAIEmbedding(
api_key=os.getenv("OPENAI_API_KEY"),
model=os.getenv("OPENAI_EMBEDDING_MODEL", "text-embedding-ada-002")
)
# Cache directory
cache_dir = os.getenv("LLAMA_INDEX_CACHE_DIR", "./cache")Install with Tessl CLI
npx tessl i tessl/pypi-llama-index