tessl/pypi-llama-index-core

Interface between LLMs and your data

—

Pending

Overview

Eval results

Files

Documents & Nodes

Name: tessl/pypi-llama-index-core
Author: tessl

Core data structures for representing textual content, managing metadata, and organizing information in LlamaIndex applications. Documents serve as the primary input format, while nodes provide the fundamental unit for indexing and retrieval operations.

Capabilities

Document Creation & Management

Documents represent the primary input format for LlamaIndex, containing text content with optional metadata and supporting various content types including text and images.

class Document:
    """
    A document represents a piece of unstructured text with optional metadata.
    
    Parameters:
    - text: str, the main text content
    - metadata: Optional[dict], key-value metadata pairs  
    - excluded_embed_metadata_keys: Optional[List[str]], metadata keys to exclude from embedding
    - excluded_llm_metadata_keys: Optional[List[str]], metadata keys to exclude from LLM context
    - relationships: Optional[Dict[NodeRelationship, RelatedNodeInfo]], relationships to other nodes
    - mimetype: Optional[str], MIME type of the content
    - start_char_idx: Optional[int], starting character index in source
    - end_char_idx: Optional[int], ending character index in source
    """
    def __init__(
        self,
        text: str,
        metadata: Optional[dict] = None,
        excluded_embed_metadata_keys: Optional[List[str]] = None,
        excluded_llm_metadata_keys: Optional[List[str]] = None,
        relationships: Optional[Dict[NodeRelationship, RelatedNodeInfo]] = None,
        mimetype: Optional[str] = None,
        start_char_idx: Optional[int] = None,
        end_char_idx: Optional[int] = None,
        **kwargs
    ): ...
    
    def get_content(self, metadata_mode: MetadataMode = MetadataMode.ALL) -> str:
        """Get text content with optional metadata inclusion."""
    
    def set_content(self, value: str) -> None:
        """Set the text content."""
    
    def get_metadata_str(self, mode: MetadataMode = MetadataMode.ALL) -> str:
        """Get formatted metadata string."""
        
    def get_doc_id(self) -> str:
        """Get document ID."""
        
    def __str__(self) -> str: ...

Image Documents

Specialized document type for handling image content with text descriptions and image-specific metadata.

class ImageDocument(Document):
    """
    Document containing image data with optional text description.
    
    Parameters:
    - text: str, text description of the image
    - image: Optional[str], base64 encoded image data or image path  
    - image_path: Optional[str], path to image file
    - image_url: Optional[str], URL to image
    - metadata: Optional[dict], additional metadata
    """
    def __init__(
        self,
        text: str = "",
        image: Optional[str] = None,
        image_path: Optional[str] = None, 
        image_url: Optional[str] = None,
        metadata: Optional[dict] = None,
        **kwargs
    ): ...
    
    def resolve_image(self) -> str:
        """Resolve image to base64 encoded string."""

Base Node Structure

Foundation class for all node types, providing core functionality for text content, metadata management, and relationship tracking.

class BaseNode:
    """
    Base class for all node types in LlamaIndex.
    
    Parameters:
    - id_: str, unique identifier for the node
    - embedding: Optional[List[float]], vector embedding for the node
    - metadata: Optional[dict], key-value metadata pairs
    - excluded_embed_metadata_keys: Optional[List[str]], metadata keys excluded from embedding
    - excluded_llm_metadata_keys: Optional[List[str]], metadata keys excluded from LLM context
    - relationships: Optional[Dict[NodeRelationship, RelatedNodeInfo]], relationships to other nodes
    - start_char_idx: Optional[int], starting character index in source
    - end_char_idx: Optional[int], ending character index in source
    """
    def __init__(
        self,
        id_: Optional[str] = None,
        embedding: Optional[List[float]] = None,
        metadata: Optional[dict] = None,
        excluded_embed_metadata_keys: Optional[List[str]] = None,
        excluded_llm_metadata_keys: Optional[List[str]] = None,
        relationships: Optional[Dict[NodeRelationship, RelatedNodeInfo]] = None,
        start_char_idx: Optional[int] = None,
        end_char_idx: Optional[int] = None,
        **kwargs
    ): ...
    
    @property
    def node_id(self) -> str:
        """Get node identifier."""
        
    @node_id.setter
    def node_id(self, node_id: str) -> None:
        """Set node identifier."""
    
    def get_content(self, metadata_mode: MetadataMode = MetadataMode.ALL) -> str:
        """Get node content with optional metadata."""
        
    def get_metadata_str(self, mode: MetadataMode = MetadataMode.ALL) -> str:
        """Get formatted metadata string."""
        
    def set_content(self, value: str) -> None:
        """Set node content."""
        
    def get_embedding(self) -> List[float]:
        """Get node embedding vector."""
        
    def as_related_node_info(self) -> RelatedNodeInfo:
        """Convert to RelatedNodeInfo for relationship tracking."""

Text Nodes

Primary node type for text content, extending BaseNode with text-specific functionality and serving as the fundamental unit for most LlamaIndex operations.

class TextNode(BaseNode):
    """
    Node containing text content for indexing and retrieval.
    
    Parameters:
    - text: str, the text content of the node
    - start_char_idx: Optional[int], starting character index in source document
    - end_char_idx: Optional[int], ending character index in source document
    - text_template: str, template for formatting text with metadata
    - metadata_template: str, template for formatting metadata
    - metadata_separator: str, separator between metadata items
    """
    def __init__(
        self,
        text: str = "",
        start_char_idx: Optional[int] = None,
        end_char_idx: Optional[int] = None,
        text_template: str = "{metadata_str}\\n\\n{content}",
        metadata_template: str = "{key}: {value}",
        metadata_separator: str = "\\n",
        **kwargs
    ): ...
    
    @classmethod
    def get_type(cls) -> str:
        """Get node type identifier."""
        
    def get_text(self) -> str:
        """Get raw text content."""
        
    def set_text(self, text: str) -> None:
        """Set text content."""

Image Nodes

Specialized nodes for handling image content, extending TextNode with image-specific capabilities and metadata.

class ImageNode(TextNode):
    """
    Node containing image data with optional text description.
    
    Parameters:
    - text: str, text description of the image
    - image: Optional[str], base64 encoded image data or image path
    - image_path: Optional[str], path to image file  
    - image_url: Optional[str], URL to image
    - image_mimetype: Optional[str], MIME type of image
    - text_embedding: Optional[List[float]], embedding for text content
    - image_embedding: Optional[List[float]], embedding for image content
    """
    def __init__(
        self,
        text: str = "",
        image: Optional[str] = None,
        image_path: Optional[str] = None,
        image_url: Optional[str] = None,
        image_mimetype: Optional[str] = None,
        text_embedding: Optional[List[float]] = None,
        image_embedding: Optional[List[float]] = None,
        **kwargs
    ): ...
    
    def resolve_image(self) -> str:
        """Resolve image to base64 encoded string."""
        
    def set_image(self, image: Optional[str]) -> None:
        """Set image data."""

Index Reference Nodes

Nodes that reference other indices, enabling hierarchical and composable index structures for complex document organizations.

class IndexNode(BaseNode):
    """
    Node that references another index for hierarchical structures.
    
    Parameters:
    - text: str, text description of the referenced index
    - index_id: str, identifier of the referenced index
    - obj: Optional[BaseIndex], the referenced index object
    """
    def __init__(
        self,
        text: str = "",
        index_id: Optional[str] = None,
        obj: Optional[BaseIndex] = None,
        **kwargs
    ): ...
    
    @classmethod  
    def from_text_node(cls, node: TextNode, index_id: str) -> "IndexNode":
        """Create IndexNode from TextNode."""

Node Scoring & Ranking

Container for nodes with associated relevance scores, used throughout retrieval and ranking operations.

class NodeWithScore:
    """
    Container for a node with an associated relevance score.
    
    Parameters:
    - node: BaseNode, the node content
    - score: Optional[float], relevance score (higher = more relevant)
    """
    def __init__(self, node: BaseNode, score: Optional[float] = None): ...
    
    @property
    def node_id(self) -> str:
        """Get node identifier."""
        
    @property
    def text(self) -> str:
        """Get node text content."""
        
    def __str__(self) -> str: ...
    
    def get_content(self, metadata_mode: MetadataMode = MetadataMode.ALL) -> str:
        """Get node content with metadata."""

Node Relationships & References

System for tracking relationships between nodes and managing references to related content.

class RelatedNodeInfo:
    """
    Information about a related node.
    
    Parameters:
    - node_id: str, identifier of the related node
    - node_type: Optional[ObjectType], type of the related node
    - metadata: Optional[dict], metadata about the relationship
    - hash: Optional[str], hash of the related node content
    """
    def __init__(
        self,
        node_id: str,
        node_type: Optional[ObjectType] = None,
        metadata: Optional[dict] = None,
        hash: Optional[str] = None
    ): ...

class NodeRelationship(str, Enum):
    """Types of relationships between nodes."""
    SOURCE = "SOURCE"      # Source document relationship
    PREVIOUS = "PREVIOUS"  # Previous node in sequence  
    NEXT = "NEXT"         # Next node in sequence
    PARENT = "PARENT"     # Parent node in hierarchy
    CHILD = "CHILD"       # Child node in hierarchy

Media Resource Handling

Support for various media types and resource management in documents and nodes.

class MediaResource:
    """
    Resource for handling media content in documents.
    
    Parameters:  
    - text: Optional[str], text description of the resource
    - url: Optional[str], URL to the resource
    - path: Optional[str], local path to the resource
    - mimetype: Optional[str], MIME type of the resource
    """
    def __init__(
        self,
        text: Optional[str] = None,
        url: Optional[str] = None, 
        path: Optional[str] = None,
        mimetype: Optional[str] = None
    ): ...

Usage Examples

Creating and Managing Documents

from llama_index.core import Document
from llama_index.core.schema import MetadataMode

# Create a basic document
doc = Document(
    text="LlamaIndex provides tools for building RAG applications with LLMs.",
    metadata={
        "source": "documentation",
        "category": "technical",
        "author": "LlamaIndex Team"
    }
)

# Access document content
print(doc.get_content())  # Includes metadata by default
print(doc.get_content(metadata_mode=MetadataMode.NONE))  # Text only

# Update document
doc.set_content("Updated content about LlamaIndex capabilities.")

Working with Text Nodes

from llama_index.core.schema import TextNode, NodeRelationship, RelatedNodeInfo

# Create text nodes
node1 = TextNode(
    text="Introduction to machine learning concepts.",
    metadata={"chapter": "1", "topic": "intro"}
)

node2 = TextNode(
    text="Deep learning architectures and applications.", 
    metadata={"chapter": "2", "topic": "deep_learning"}
)

# Establish relationships
node2.relationships[NodeRelationship.PREVIOUS] = node1.as_related_node_info()
node1.relationships[NodeRelationship.NEXT] = node2.as_related_node_info()

# Access node properties
print(f"Node ID: {node1.node_id}")
print(f"Text: {node1.get_text()}")
print(f"Content with metadata: {node1.get_content()}")

Handling Scored Results

from llama_index.core.schema import NodeWithScore

# Create scored nodes (typically from retrieval)
scored_nodes = [
    NodeWithScore(node=node1, score=0.85),
    NodeWithScore(node=node2, score=0.72)
]

# Process results
for scored_node in scored_nodes:
    print(f"Score: {scored_node.score:.2f}")
    print(f"Text: {scored_node.text}")
    print(f"Node ID: {scored_node.node_id}")

Types & Enums

class ObjectType(str, Enum):
    """Types of objects in LlamaIndex."""
    TEXT = "text"
    IMAGE = "image" 
    INDEX = "index"
    DOCUMENT = "document"

class MetadataMode(str, Enum):
    """Modes for including metadata in content."""
    ALL = "all"      # Include all metadata
    EMBED = "embed"  # Include only embedding metadata
    LLM = "llm"      # Include only LLM metadata  
    NONE = "none"    # Include no metadata

class Modality(str, Enum):
    """Content modalities supported."""
    TEXT = "text"
    IMAGE = "image"
    AUDIO = "audio" 
    VIDEO = "video"

Install with Tessl CLI