tessl/pypi-llama-index

Interface between LLMs and your data for building retrieval-augmented generation (RAG) applications

—

Pending

Overview

Eval results

Files

Storage & Settings

Name: tessl/pypi-llama-index
Author: tessl

Storage backends and global configuration for persisting indices, managing contexts, and configuring system-wide settings for LlamaIndex applications.

Capabilities

Storage Context

Central storage management for indices, documents, and metadata with support for various storage backends and persistence options.

class StorageContext:
    """
    Storage context for managing index persistence and document storage.
    
    Args:
        docstore: Document storage backend
        index_store: Index metadata storage
        vector_stores: Vector storage backends
        graph_store: Graph storage backend
        property_graph_store: Property graph storage
        **kwargs: Additional storage configurations
    """
    def __init__(
        self,
        docstore=None,
        index_store=None,
        vector_stores=None,
        graph_store=None,
        property_graph_store=None,
        **kwargs
    ): ...
    
    @classmethod
    def from_defaults(
        cls,
        persist_dir=None,
        docstore=None,
        index_store=None,
        vector_store=None,
        graph_store=None,
        **kwargs
    ):
        """
        Create storage context with default configurations.
        
        Args:
            persist_dir: Directory for persistent storage
            docstore: Custom document store
            index_store: Custom index store
            vector_store: Custom vector store
            graph_store: Custom graph store
            
        Returns:
            StorageContext: Configured storage context
        """
    
    def persist(self, persist_dir=None, **kwargs):
        """
        Persist all storage components to disk.
        
        Args:
            persist_dir: Target directory for persistence
        """
    
    @property
    def vector_store(self):
        """Default vector store instance."""
    
    def add_vector_store(self, vector_store, namespace=None):
        """Add additional vector store with optional namespace."""

Usage Example:

from llama_index.core import StorageContext, VectorStoreIndex, SimpleDirectoryReader
from llama_index.vector_stores.chroma import ChromaVectorStore
import chromadb

# Default file-based storage
storage_context = StorageContext.from_defaults(persist_dir="./storage")

# Custom vector store
chroma_client = chromadb.Client()
chroma_collection = chroma_client.create_collection("my_collection")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

storage_context = StorageContext.from_defaults(
    vector_store=vector_store,
    persist_dir="./custom_storage"
)

# Create index with custom storage
documents = SimpleDirectoryReader("data").load_data()
index = VectorStoreIndex.from_documents(
    documents,
    storage_context=storage_context
)

# Persist to disk
storage_context.persist()

# Load from persisted storage
storage_context = StorageContext.from_defaults(persist_dir="./storage")
index = load_index_from_storage(storage_context)

Global Settings

System-wide configuration for LLM, embedding models, and other core components with dynamic reconfiguration support.

class Settings:
    """
    Global settings singleton for LlamaIndex configuration.
    
    Attributes:
        llm: Default language model instance
        embed_model: Default embedding model instance
        node_parser: Default node parser for document chunking
        transformations: List of document transformations
        chunk_size: Default chunk size for text splitting
        chunk_overlap: Default overlap between chunks
        callback_manager: Global callback manager
        tokenizer: Tokenizer function for token counting
    """
    # Core model settings
    llm: LLM = None
    embed_model: BaseEmbedding = None
    
    # Document processing settings
    node_parser: NodeParser = None
    transformations: List[TransformComponent] = None
    
    # Chunking parameters
    chunk_size: int = 1024
    chunk_overlap: int = 200
    
    # System components
    callback_manager: CallbackManager = None
    tokenizer: Callable[[str], List] = None
    
    @classmethod
    def from_defaults(
        cls,
        llm=None,
        embed_model=None,
        node_parser=None,
        chunk_size=None,
        chunk_overlap=None,
        **kwargs
    ):
        """Configure settings with default values."""
    
    @staticmethod
    def reset():
        """Reset all settings to default values."""
    
    @staticmethod
    def configure(**kwargs):
        """Configure multiple settings at once."""

Usage Example:

from llama_index.core import Settings
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding

# Configure global settings
Settings.llm = OpenAI(model="gpt-4", temperature=0.1)
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")
Settings.chunk_size = 512
Settings.chunk_overlap = 50

# All indices and operations will use these settings by default
index = VectorStoreIndex.from_documents(documents)

# Temporary override for specific operations
with Settings.context(llm=OpenAI(model="gpt-3.5-turbo")):
    query_engine = index.as_query_engine()
    response = query_engine.query("What is this about?")

# Reset to defaults
Settings.reset()

Document Storage

Document storage backends for managing raw document content with metadata and efficient retrieval.

class BaseDocumentStore:
    """Base class for document storage backends."""
    
    def add_documents(self, docs, allow_update=True):
        """Add documents to storage."""
    
    def get_document(self, doc_id, raise_error=True):
        """Retrieve document by ID."""
    
    def delete_document(self, doc_id, raise_error=True):
        """Delete document by ID."""
    
    def document_exists(self, doc_id):
        """Check if document exists."""

class SimpleDocumentStore(BaseDocumentStore):
    """
    In-memory document store with optional file persistence.
    
    Args:
        simple_file_store: File store for persistence
    """
    def __init__(self, simple_file_store=None): ...
    
    def persist(self, persist_path=None):
        """Persist document store to file."""
    
    @classmethod
    def from_persist_path(cls, persist_path):
        """Load document store from file."""

class MongoDocumentStore(BaseDocumentStore):
    """
    MongoDB-based document store.
    
    Args:
        mongo_client: MongoDB client instance
        db_name: Database name
        collection_name: Collection name
    """
    def __init__(self, mongo_client, db_name="llama_index", collection_name="documents"): ...

Index Storage

Index metadata storage for managing index structures, mappings, and retrieval metadata.

class BaseIndexStore:
    """Base class for index storage backends."""
    
    def add_index_struct(self, index_struct):
        """Add index structure to storage."""
    
    def delete_index_struct(self, key):
        """Delete index structure."""
    
    def get_index_struct(self, struct_id=None):
        """Get index structure by ID."""

class SimpleIndexStore(BaseIndexStore):
    """
    Simple file-based index store.
    
    Args:
        simple_file_store: File store for persistence
    """
    def __init__(self, simple_file_store=None): ...
    
    def persist(self, persist_path=None):
        """Persist index store to file."""
    
    @classmethod
    def from_persist_path(cls, persist_path):
        """Load index store from file."""

Vector Storage

Vector storage backends for embedding storage and similarity search with support for various vector databases.

class VectorStore:
    """Base class for vector storage backends."""
    
    def add(self, nodes, **kwargs):
        """Add nodes with embeddings to vector store."""
    
    def delete(self, ref_doc_id, **kwargs):
        """Delete vectors by document reference ID."""
    
    def query(self, query, **kwargs):
        """Query for similar vectors."""
    
    def persist(self, persist_path=None, **kwargs):
        """Persist vector store if supported."""

class SimpleVectorStore(VectorStore):
    """
    Simple in-memory vector store with file persistence.
    
    Args:
        simple_file_store: File store for persistence
    """
    def __init__(self, simple_file_store=None): ...

# Integration vector stores
class ChromaVectorStore(VectorStore):
    """Chroma vector database integration."""

class PineconeVectorStore(VectorStore):
    """Pinecone vector database integration."""

class WeaviateVectorStore(VectorStore):
    """Weaviate vector database integration."""

class QdrantVectorStore(VectorStore):
    """Qdrant vector database integration."""

Vector Store Usage Example:

from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.vector_stores.pinecone import PineconeVectorStore
import chromadb
import pinecone

# Chroma setup
chroma_client = chromadb.Client()
chroma_collection = chroma_client.create_collection("my_docs")
chroma_vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

# Pinecone setup
pinecone.init(api_key="your-key", environment="your-env")
pinecone_index = pinecone.Index("my-index")
pinecone_vector_store = PineconeVectorStore(pinecone_index=pinecone_index)

# Use with storage context
storage_context = StorageContext.from_defaults(
    vector_store=chroma_vector_store  # or pinecone_vector_store
)

index = VectorStoreIndex.from_documents(
    documents,
    storage_context=storage_context
)

Graph Storage

Graph storage backends for knowledge graphs and property graphs with support for various graph databases.

class GraphStore:
    """Base class for graph storage backends."""
    
    def upsert_triplet(self, subj, pred, obj):
        """Insert or update a knowledge triplet."""
    
    def delete_triplet(self, subj, pred, obj):
        """Delete a knowledge triplet."""
    
    def get_triplets(self, subj=None):
        """Get triplets by subject."""
    
    def get_rel_map(self, subjs=None):
        """Get relationship map for subjects."""

class SimpleGraphStore(GraphStore):
    """
    Simple in-memory graph store.
    
    Args:
        simple_file_store: File store for persistence
    """
    def __init__(self, simple_file_store=None): ...
    
    def persist(self, persist_path=None):
        """Persist graph store to file."""

class Neo4jGraphStore(GraphStore):
    """
    Neo4j graph database integration.
    
    Args:
        url: Neo4j database URL
        username: Database username
        password: Database password
        database: Database name
    """
    def __init__(self, url, username, password, database="neo4j"): ...

class PropertyGraphStore:
    """Base class for property graph storage."""
    
    def upsert_nodes(self, nodes):
        """Insert or update graph nodes."""
    
    def upsert_relations(self, relations):
        """Insert or update graph relations."""
    
    def delete(self, ids):
        """Delete nodes and relations by IDs."""
    
    def structured_query(self, query, **kwargs):
        """Execute structured query against graph."""

Persistence and Loading

Utility functions for saving and loading indices with storage context management.

def load_index_from_storage(
    storage_context,
    index_id=None,
    **kwargs
):
    """
    Load index from storage context.
    
    Args:
        storage_context: Storage context with persisted data
        index_id: Specific index ID to load
        
    Returns:
        BaseIndex: Loaded index instance
    """

def load_indices_from_storage(
    storage_context,
    index_ids=None,
    **kwargs
):
    """
    Load multiple indices from storage.
    
    Args:
        storage_context: Storage context
        index_ids: List of index IDs to load
        
    Returns:
        Dict[str, BaseIndex]: Dictionary of loaded indices
    """

def load_graph_from_storage(
    storage_context,
    root_id=None,
    **kwargs
):
    """
    Load composable graph from storage.
    
    Args:
        storage_context: Storage context
        root_id: Root graph node ID
        
    Returns:
        ComposableGraph: Loaded graph structure
    """

Persistence Usage Example:

from llama_index.core import (
    StorageContext,
    VectorStoreIndex,
    load_index_from_storage,
    load_indices_from_storage
)

# Create and persist index
documents = SimpleDirectoryReader("data").load_data()
index = VectorStoreIndex.from_documents(documents)

# Persist to default location
index.storage_context.persist()

# Persist to custom location
index.storage_context.persist(persist_dir="./my_storage")

# Load from storage
storage_context = StorageContext.from_defaults(persist_dir="./my_storage")
loaded_index = load_index_from_storage(storage_context)

# Load multiple indices
storage_context = StorageContext.from_defaults(persist_dir="./multi_storage")
indices = load_indices_from_storage(storage_context)

print(f"Loaded {len(indices)} indices")
for index_id, index in indices.items():
    print(f"Index {index_id}: {type(index)}")

Service Context (Legacy)

Legacy service context for backwards compatibility with older LlamaIndex versions.

class ServiceContext:
    """
    Legacy service context for backwards compatibility.
    
    Note: Deprecated in favor of Settings class.
    
    Args:
        llm: Language model instance
        embed_model: Embedding model instance
        node_parser: Node parser for chunking
        transformations: Document transformations
        **kwargs: Additional service configurations
    """
    def __init__(
        self,
        llm=None,
        embed_model=None,
        node_parser=None,
        transformations=None,
        **kwargs
    ): ...
    
    @classmethod
    def from_defaults(
        cls,
        llm=None,
        embed_model=None,
        chunk_size=None,
        chunk_overlap=None,
        **kwargs
    ):
        """Create service context with defaults."""

def set_global_service_context(service_context):
    """Set global service context (deprecated)."""

Configuration Management

Advanced configuration patterns for complex deployments and environment management.

class SettingsContext:
    """Context manager for temporary settings changes."""
    
    def __init__(self, **kwargs):
        """Initialize with temporary settings."""
    
    def __enter__(self):
        """Apply temporary settings."""
    
    def __exit__(self, exc_type, exc_val, exc_tb):
        """Restore original settings."""

# Context manager usage
with Settings.context(
    llm=OpenAI(model="gpt-3.5-turbo"),
    chunk_size=256
):
    # Temporary settings active here
    index = VectorStoreIndex.from_documents(documents)
    
# Original settings restored automatically

Environment Configuration

Environment-based configuration for deployment across different environments.

Environment Variables:

# Common environment variables
OPENAI_API_KEY = "your-openai-key"
LLAMA_INDEX_CACHE_DIR = "./cache"
LLAMA_INDEX_GLOBAL_HANDLER = "wandb"  # or "simple", "arize", etc.

# Vector store configurations
PINECONE_API_KEY = "your-pinecone-key"
PINECONE_ENVIRONMENT = "your-environment"
CHROMA_HOST = "localhost"
CHROMA_PORT = "8000"

# Graph database configurations
NEO4J_URL = "bolt://localhost:7687"
NEO4J_USERNAME = "neo4j"
NEO4J_PASSWORD = "password"

Configuration Loading:

import os
from llama_index.core import Settings
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding

# Load from environment
Settings.llm = OpenAI(
    api_key=os.getenv("OPENAI_API_KEY"),
    model=os.getenv("OPENAI_MODEL", "gpt-3.5-turbo")
)

Settings.embed_model = OpenAIEmbedding(
    api_key=os.getenv("OPENAI_API_KEY"),
    model=os.getenv("OPENAI_EMBEDDING_MODEL", "text-embedding-ada-002")
)

# Cache directory
cache_dir = os.getenv("LLAMA_INDEX_CACHE_DIR", "./cache")

Install with Tessl CLI