tessl/pypi-llama-index-core

Interface between LLMs and your data

—

Pending

Overview

Eval results

Files

Indices

Name: tessl/pypi-llama-index-core
Author: tessl

Index structures for organizing and retrieving information from documents. LlamaIndex provides multiple index types optimized for different retrieval patterns, from semantic similarity search to keyword matching and hierarchical navigation.

Capabilities

Vector Store Index

Primary index type for semantic similarity search using vector embeddings. Stores document chunks as embeddings and retrieves relevant content based on query similarity.

class VectorStoreIndex:
    """
    Index that stores embeddings for semantic similarity retrieval.
    
    Parameters:
    - nodes: Optional[Sequence[BaseNode]], nodes to index
    - embed_model: Optional[BaseEmbedding], embedding model to use
    - storage_context: Optional[StorageContext], storage configuration
    - service_context: Optional[ServiceContext], service configuration (deprecated)
    - show_progress: bool, whether to show indexing progress
    - store_nodes_override: bool, whether to store nodes in docstore
    """
    def __init__(
        self,
        nodes: Optional[Sequence[BaseNode]] = None,
        embed_model: Optional[BaseEmbedding] = None,
        storage_context: Optional[StorageContext] = None,
        service_context: Optional[ServiceContext] = None,
        show_progress: bool = False,
        store_nodes_override: bool = False,
        **kwargs
    ): ...
    
    @classmethod
    def from_documents(
        cls,
        documents: Sequence[Document],
        storage_context: Optional[StorageContext] = None,
        service_context: Optional[ServiceContext] = None,
        show_progress: bool = False,
        **kwargs
    ) -> "VectorStoreIndex":
        """Create index from documents."""
    
    def as_query_engine(
        self,
        retriever_mode: str = "default",
        response_mode: str = "compact",
        **kwargs
    ) -> BaseQueryEngine:
        """Convert to query engine."""
        
    def as_retriever(
        self,
        retriever_mode: str = "default", 
        similarity_top_k: int = 10,
        **kwargs
    ) -> BaseRetriever:
        """Convert to retriever."""
        
    def as_chat_engine(
        self,
        chat_mode: str = "best",
        **kwargs
    ) -> BaseChatEngine:
        """Convert to chat engine."""
        
    def insert(self, document: Document, **kwargs) -> None:
        """Insert document into index."""
        
    def insert_nodes(self, nodes: List[BaseNode], **kwargs) -> None:
        """Insert nodes into index."""
        
    def delete_ref_doc(self, ref_doc_id: str, **kwargs) -> None:
        """Delete document from index."""
        
    def update_ref_doc(self, document: Document, **kwargs) -> None:
        """Update document in index."""

Summary Index

Simple index that stores all nodes sequentially, useful for small document collections or when comprehensive retrieval is needed.

class SummaryIndex:
    """
    Simple index storing all nodes for comprehensive retrieval.
    
    Parameters:
    - nodes: Optional[Sequence[BaseNode]], nodes to index
    - storage_context: Optional[StorageContext], storage configuration
    - service_context: Optional[ServiceContext], service configuration (deprecated)
    - show_progress: bool, whether to show indexing progress
    """
    def __init__(
        self,
        nodes: Optional[Sequence[BaseNode]] = None,
        storage_context: Optional[StorageContext] = None,
        service_context: Optional[ServiceContext] = None,
        show_progress: bool = False,
        **kwargs
    ): ...
    
    @classmethod
    def from_documents(
        cls,
        documents: Sequence[Document],
        storage_context: Optional[StorageContext] = None,
        service_context: Optional[ServiceContext] = None,
        show_progress: bool = False,
        **kwargs
    ) -> "SummaryIndex":
        """Create index from documents."""
        
    def as_query_engine(self, **kwargs) -> BaseQueryEngine:
        """Convert to query engine."""
        
    def as_retriever(self, **kwargs) -> BaseRetriever:
        """Convert to retriever."""

Tree Index

Hierarchical index that organizes information in a tree structure, enabling top-down traversal and summarization at different levels.

class TreeIndex:
    """
    Hierarchical tree-based index for structured information organization.
    
    Parameters:
    - nodes: Optional[Sequence[BaseNode]], nodes to index
    - num_children: int, branching factor for tree construction
    - build_tree: bool, whether to build tree during initialization
    - storage_context: Optional[StorageContext], storage configuration
    - service_context: Optional[ServiceContext], service configuration (deprecated)
    - show_progress: bool, whether to show indexing progress
    """
    def __init__(
        self,
        nodes: Optional[Sequence[BaseNode]] = None,
        num_children: int = 10,
        build_tree: bool = True,
        storage_context: Optional[StorageContext] = None,
        service_context: Optional[ServiceContext] = None,
        show_progress: bool = False,
        **kwargs
    ): ...
    
    @classmethod
    def from_documents(
        cls,
        documents: Sequence[Document],
        num_children: int = 10,
        build_tree: bool = True,
        storage_context: Optional[StorageContext] = None,
        service_context: Optional[ServiceContext] = None,
        show_progress: bool = False,
        **kwargs
    ) -> "TreeIndex":
        """Create tree index from documents."""
        
    def as_query_engine(self, **kwargs) -> BaseQueryEngine:
        """Convert to query engine."""
        
    def as_retriever(self, **kwargs) -> BaseRetriever:
        """Convert to retriever."""

Keyword Table Index

Index based on keyword extraction and matching, supporting various keyword extraction algorithms for precise term-based retrieval.

class KeywordTableIndex:
    """
    Index based on keyword extraction and matching.
    
    Parameters:
    - nodes: Optional[Sequence[BaseNode]], nodes to index
    - storage_context: Optional[StorageContext], storage configuration
    - service_context: Optional[ServiceContext], service configuration (deprecated)
    - show_progress: bool, whether to show indexing progress
    """
    def __init__(
        self,
        nodes: Optional[Sequence[BaseNode]] = None,
        storage_context: Optional[StorageContext] = None,
        service_context: Optional[ServiceContext] = None,
        show_progress: bool = False,
        **kwargs
    ): ...
    
    @classmethod
    def from_documents(
        cls,
        documents: Sequence[Document],
        storage_context: Optional[StorageContext] = None,
        service_context: Optional[ServiceContext] = None,
        show_progress: bool = False,
        **kwargs
    ) -> "KeywordTableIndex":
        """Create keyword index from documents."""

class SimpleKeywordTableIndex(KeywordTableIndex):
    """Simple keyword extraction using basic text processing."""

class RAKEKeywordTableIndex(KeywordTableIndex):
    """Keyword extraction using RAKE (Rapid Automatic Keyword Extraction) algorithm."""

Knowledge Graph Index

Index that constructs and queries knowledge graphs from text, extracting entities and relationships for graph-based retrieval.

class KnowledgeGraphIndex:
    """
    Index that builds knowledge graphs from text for entity-relationship queries.
    
    Parameters:
    - nodes: Optional[Sequence[BaseNode]], nodes to index
    - storage_context: Optional[StorageContext], storage configuration
    - service_context: Optional[ServiceContext], service configuration (deprecated)
    - max_triplets_per_chunk: int, maximum triplets to extract per chunk
    - show_progress: bool, whether to show indexing progress
    - include_embeddings: bool, whether to include embeddings
    """
    def __init__(
        self,
        nodes: Optional[Sequence[BaseNode]] = None,
        storage_context: Optional[StorageContext] = None,
        service_context: Optional[ServiceContext] = None,
        max_triplets_per_chunk: int = 10,
        show_progress: bool = False,
        include_embeddings: bool = True,
        **kwargs
    ): ...
    
    @classmethod
    def from_documents(
        cls,
        documents: Sequence[Document],
        storage_context: Optional[StorageContext] = None,
        service_context: Optional[ServiceContext] = None,
        max_triplets_per_chunk: int = 10,
        show_progress: bool = False,
        include_embeddings: bool = True,
        **kwargs
    ) -> "KnowledgeGraphIndex":
        """Create knowledge graph index from documents."""
        
    def as_query_engine(self, **kwargs) -> BaseQueryEngine:
        """Convert to query engine."""
        
    def as_retriever(self, **kwargs) -> BaseRetriever:  
        """Convert to retriever."""

Property Graph Index

Advanced graph index supporting property graphs with typed nodes and relationships, enabling complex graph queries and traversal.

class PropertyGraphIndex:
    """
    Index supporting property graphs with typed nodes and relationships.
    
    Parameters:
    - nodes: Optional[Sequence[BaseNode]], nodes to index
    - property_graph_store: Optional[PropertyGraphStore], graph store backend
    - embed_kg_nodes: bool, whether to embed knowledge graph nodes
    - storage_context: Optional[StorageContext], storage configuration
    - service_context: Optional[ServiceContext], service configuration (deprecated)
    - show_progress: bool, whether to show indexing progress
    """
    def __init__(
        self,
        nodes: Optional[Sequence[BaseNode]] = None,
        property_graph_store: Optional[PropertyGraphStore] = None,
        embed_kg_nodes: bool = True,
        storage_context: Optional[StorageContext] = None,
        service_context: Optional[ServiceContext] = None,
        show_progress: bool = False,
        **kwargs
    ): ...
    
    @classmethod
    def from_documents(
        cls,
        documents: Sequence[Document],
        property_graph_store: Optional[PropertyGraphStore] = None,
        embed_kg_nodes: bool = True,
        storage_context: Optional[StorageContext] = None,
        service_context: Optional[ServiceContext] = None,
        show_progress: bool = False,
        **kwargs
    ) -> "PropertyGraphIndex":
        """Create property graph index from documents."""
        
    def as_query_engine(self, **kwargs) -> BaseQueryEngine:
        """Convert to query engine."""
        
    def as_retriever(self, **kwargs) -> BaseRetriever:
        """Convert to retriever."""

Document Summary Index

Index that creates summaries for each document, enabling summary-based retrieval and hierarchical information access.

class DocumentSummaryIndex:
    """
    Index that creates summaries for documents to enable summary-based retrieval.
    
    Parameters:
    - nodes: Optional[Sequence[BaseNode]], nodes to index
    - storage_context: Optional[StorageContext], storage configuration
    - service_context: Optional[ServiceContext], service configuration (deprecated)
    - response_synthesizer: Optional[BaseSynthesizer], synthesizer for summaries
    - show_progress: bool, whether to show indexing progress
    """
    def __init__(
        self,
        nodes: Optional[Sequence[BaseNode]] = None,
        storage_context: Optional[StorageContext] = None,
        service_context: Optional[ServiceContext] = None,
        response_synthesizer: Optional[BaseSynthesizer] = None,
        show_progress: bool = False,
        **kwargs
    ): ...
    
    @classmethod
    def from_documents(
        cls,
        documents: Sequence[Document],
        storage_context: Optional[StorageContext] = None,
        service_context: Optional[ServiceContext] = None,
        response_synthesizer: Optional[BaseSynthesizer] = None,
        show_progress: bool = False,
        **kwargs
    ) -> "DocumentSummaryIndex":
        """Create document summary index from documents."""
        
    def as_query_engine(self, **kwargs) -> BaseQueryEngine:
        """Convert to query engine."""
        
    def as_retriever(self, **kwargs) -> BaseRetriever:
        """Convert to retriever."""

Composable Graph

Container for multiple indices that can be queried together, enabling complex multi-index retrieval strategies.

class ComposableGraph:
    """
    Container for multiple indices enabling composable queries.
    
    Parameters:
    - all_indices: Dict[str, BaseIndex], dictionary of index_id to index
    - root_id: str, identifier of the root index
    - storage_context: Optional[StorageContext], storage configuration
    - service_context: Optional[ServiceContext], service configuration (deprecated)
    """
    def __init__(
        self,
        all_indices: Dict[str, BaseIndex],
        root_id: str,
        storage_context: Optional[StorageContext] = None,
        service_context: Optional[ServiceContext] = None,
        **kwargs
    ): ...
    
    def as_query_engine(
        self,
        custom_query_engines: Optional[Dict[str, BaseQueryEngine]] = None,
        **kwargs
    ) -> BaseQueryEngine:
        """Convert to composable query engine."""
        
    def as_retriever(
        self,
        custom_retrievers: Optional[Dict[str, BaseRetriever]] = None,
        **kwargs
    ) -> BaseRetriever:
        """Convert to composable retriever."""

Index Loading & Storage

Functions for persisting and loading indices from storage backends.

def load_index_from_storage(
    storage_context: StorageContext,
    index_id: Optional[str] = None,
    service_context: Optional[ServiceContext] = None,
    **kwargs
) -> BaseIndex:
    """
    Load index from storage context.
    
    Parameters:
    - storage_context: StorageContext, storage configuration
    - index_id: Optional[str], specific index to load
    - service_context: Optional[ServiceContext], service configuration
    
    Returns:
    BaseIndex: The loaded index
    """

def load_indices_from_storage(
    storage_context: StorageContext,
    index_ids: Optional[Sequence[str]] = None,
    service_context: Optional[ServiceContext] = None,
    **kwargs
) -> List[BaseIndex]:
    """
    Load multiple indices from storage context.
    
    Parameters:
    - storage_context: StorageContext, storage configuration
    - index_ids: Optional[Sequence[str]], specific indices to load
    - service_context: Optional[ServiceContext], service configuration
    
    Returns:
    List[BaseIndex]: List of loaded indices
    """

def load_graph_from_storage(
    storage_context: StorageContext,
    root_id: str,
    service_context: Optional[ServiceContext] = None,
    **kwargs
) -> ComposableGraph:
    """
    Load composable graph from storage context.
    
    Parameters:
    - storage_context: StorageContext, storage configuration
    - root_id: str, root index identifier
    - service_context: Optional[ServiceContext], service configuration
    
    Returns:
    ComposableGraph: The loaded composable graph
    """

Multi-Modal Vector Store Index

Specialized vector index supporting multi-modal content including text, images, and other media types.

class MultiModalVectorStoreIndex(VectorStoreIndex):
    """
    Vector store index supporting multi-modal content (text, images, etc.).
    
    Parameters:
    - nodes: Optional[Sequence[BaseNode]], nodes to index
    - embed_model: Optional[MultiModalEmbedding], multi-modal embedding model
    - storage_context: Optional[StorageContext], storage configuration
    - service_context: Optional[ServiceContext], service configuration (deprecated)
    - show_progress: bool, whether to show indexing progress
    """
    def __init__(
        self,
        nodes: Optional[Sequence[BaseNode]] = None,
        embed_model: Optional[MultiModalEmbedding] = None,
        storage_context: Optional[StorageContext] = None,
        service_context: Optional[ServiceContext] = None,
        show_progress: bool = False,
        **kwargs
    ): ...

Legacy Aliases

For backward compatibility, GPT-prefixed aliases are available for all index types:

# Legacy aliases (deprecated, use non-GPT versions)
GPTVectorStoreIndex = VectorStoreIndex
GPTListIndex = SummaryIndex  
GPTTreeIndex = TreeIndex
GPTKeywordTableIndex = KeywordTableIndex
GPTSimpleKeywordTableIndex = SimpleKeywordTableIndex
GPTRAKEKeywordTableIndex = RAKEKeywordTableIndex
GPTDocumentSummaryIndex = DocumentSummaryIndex

Usage Examples

Creating a Vector Store Index

from llama_index.core import VectorStoreIndex, Document, Settings
from llama_index.core.embeddings import MockEmbedding

# Configure embedding model
Settings.embed_model = MockEmbedding(embed_dim=384)

# Create documents
documents = [
    Document(text="Introduction to machine learning and artificial intelligence."),
    Document(text="Deep learning techniques for computer vision applications."),
    Document(text="Natural language processing with transformer models.")
]

# Create vector store index
index = VectorStoreIndex.from_documents(documents, show_progress=True)

# Query the index
query_engine = index.as_query_engine()
response = query_engine.query("What is machine learning?")
print(response.response)

# Use as retriever
retriever = index.as_retriever(similarity_top_k=2)
nodes = retriever.retrieve("deep learning")
for node in nodes:
    print(f"Score: {node.score:.3f}, Text: {node.text}")

Working with Multiple Index Types

from llama_index.core import (
    VectorStoreIndex, 
    TreeIndex, 
    KeywordTableIndex,
    ComposableGraph
)

# Create different index types
vector_index = VectorStoreIndex.from_documents(documents)
tree_index = TreeIndex.from_documents(documents)
keyword_index = KeywordTableIndex.from_documents(documents)

# Create composable graph
graph = ComposableGraph(
    all_indices={
        "vector": vector_index,
        "tree": tree_index, 
        "keyword": keyword_index
    },
    root_id="vector"
)

# Query the composable graph
query_engine = graph.as_query_engine()
response = query_engine.query("Compare machine learning approaches")

Persisting and Loading Indices

from llama_index.core import StorageContext, load_index_from_storage

# Create index with storage context
storage_context = StorageContext.from_defaults(persist_dir="./storage")
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)

# Persist index
index.storage_context.persist()

# Load index later
storage_context = StorageContext.from_defaults(persist_dir="./storage")
loaded_index = load_index_from_storage(storage_context)

Types & Enums

class IndexStructType(str, Enum):
    """Types of index structures."""
    TREE = "tree"
    LIST = "list" 
    KEYWORD_TABLE = "keyword_table"
    VECTOR_STORE = "vector_store"
    DOCUMENT_SUMMARY = "document_summary"
    KNOWLEDGE_GRAPH = "kg"
    PROPERTY_GRAPH = "property_graph"
    EMPTY = "empty"

Install with Tessl CLI