Interface between LLMs and your data
—
Core data structures for representing textual content, managing metadata, and organizing information in LlamaIndex applications. Documents serve as the primary input format, while nodes provide the fundamental unit for indexing and retrieval operations.
Documents represent the primary input format for LlamaIndex, containing text content with optional metadata and supporting various content types including text and images.
class Document:
"""
A document represents a piece of unstructured text with optional metadata.
Parameters:
- text: str, the main text content
- metadata: Optional[dict], key-value metadata pairs
- excluded_embed_metadata_keys: Optional[List[str]], metadata keys to exclude from embedding
- excluded_llm_metadata_keys: Optional[List[str]], metadata keys to exclude from LLM context
- relationships: Optional[Dict[NodeRelationship, RelatedNodeInfo]], relationships to other nodes
- mimetype: Optional[str], MIME type of the content
- start_char_idx: Optional[int], starting character index in source
- end_char_idx: Optional[int], ending character index in source
"""
def __init__(
self,
text: str,
metadata: Optional[dict] = None,
excluded_embed_metadata_keys: Optional[List[str]] = None,
excluded_llm_metadata_keys: Optional[List[str]] = None,
relationships: Optional[Dict[NodeRelationship, RelatedNodeInfo]] = None,
mimetype: Optional[str] = None,
start_char_idx: Optional[int] = None,
end_char_idx: Optional[int] = None,
**kwargs
): ...
def get_content(self, metadata_mode: MetadataMode = MetadataMode.ALL) -> str:
"""Get text content with optional metadata inclusion."""
def set_content(self, value: str) -> None:
"""Set the text content."""
def get_metadata_str(self, mode: MetadataMode = MetadataMode.ALL) -> str:
"""Get formatted metadata string."""
def get_doc_id(self) -> str:
"""Get document ID."""
def __str__(self) -> str: ...Specialized document type for handling image content with text descriptions and image-specific metadata.
class ImageDocument(Document):
"""
Document containing image data with optional text description.
Parameters:
- text: str, text description of the image
- image: Optional[str], base64 encoded image data or image path
- image_path: Optional[str], path to image file
- image_url: Optional[str], URL to image
- metadata: Optional[dict], additional metadata
"""
def __init__(
self,
text: str = "",
image: Optional[str] = None,
image_path: Optional[str] = None,
image_url: Optional[str] = None,
metadata: Optional[dict] = None,
**kwargs
): ...
def resolve_image(self) -> str:
"""Resolve image to base64 encoded string."""Foundation class for all node types, providing core functionality for text content, metadata management, and relationship tracking.
class BaseNode:
"""
Base class for all node types in LlamaIndex.
Parameters:
- id_: str, unique identifier for the node
- embedding: Optional[List[float]], vector embedding for the node
- metadata: Optional[dict], key-value metadata pairs
- excluded_embed_metadata_keys: Optional[List[str]], metadata keys excluded from embedding
- excluded_llm_metadata_keys: Optional[List[str]], metadata keys excluded from LLM context
- relationships: Optional[Dict[NodeRelationship, RelatedNodeInfo]], relationships to other nodes
- start_char_idx: Optional[int], starting character index in source
- end_char_idx: Optional[int], ending character index in source
"""
def __init__(
self,
id_: Optional[str] = None,
embedding: Optional[List[float]] = None,
metadata: Optional[dict] = None,
excluded_embed_metadata_keys: Optional[List[str]] = None,
excluded_llm_metadata_keys: Optional[List[str]] = None,
relationships: Optional[Dict[NodeRelationship, RelatedNodeInfo]] = None,
start_char_idx: Optional[int] = None,
end_char_idx: Optional[int] = None,
**kwargs
): ...
@property
def node_id(self) -> str:
"""Get node identifier."""
@node_id.setter
def node_id(self, node_id: str) -> None:
"""Set node identifier."""
def get_content(self, metadata_mode: MetadataMode = MetadataMode.ALL) -> str:
"""Get node content with optional metadata."""
def get_metadata_str(self, mode: MetadataMode = MetadataMode.ALL) -> str:
"""Get formatted metadata string."""
def set_content(self, value: str) -> None:
"""Set node content."""
def get_embedding(self) -> List[float]:
"""Get node embedding vector."""
def as_related_node_info(self) -> RelatedNodeInfo:
"""Convert to RelatedNodeInfo for relationship tracking."""Primary node type for text content, extending BaseNode with text-specific functionality and serving as the fundamental unit for most LlamaIndex operations.
class TextNode(BaseNode):
"""
Node containing text content for indexing and retrieval.
Parameters:
- text: str, the text content of the node
- start_char_idx: Optional[int], starting character index in source document
- end_char_idx: Optional[int], ending character index in source document
- text_template: str, template for formatting text with metadata
- metadata_template: str, template for formatting metadata
- metadata_separator: str, separator between metadata items
"""
def __init__(
self,
text: str = "",
start_char_idx: Optional[int] = None,
end_char_idx: Optional[int] = None,
text_template: str = "{metadata_str}\\n\\n{content}",
metadata_template: str = "{key}: {value}",
metadata_separator: str = "\\n",
**kwargs
): ...
@classmethod
def get_type(cls) -> str:
"""Get node type identifier."""
def get_text(self) -> str:
"""Get raw text content."""
def set_text(self, text: str) -> None:
"""Set text content."""Specialized nodes for handling image content, extending TextNode with image-specific capabilities and metadata.
class ImageNode(TextNode):
"""
Node containing image data with optional text description.
Parameters:
- text: str, text description of the image
- image: Optional[str], base64 encoded image data or image path
- image_path: Optional[str], path to image file
- image_url: Optional[str], URL to image
- image_mimetype: Optional[str], MIME type of image
- text_embedding: Optional[List[float]], embedding for text content
- image_embedding: Optional[List[float]], embedding for image content
"""
def __init__(
self,
text: str = "",
image: Optional[str] = None,
image_path: Optional[str] = None,
image_url: Optional[str] = None,
image_mimetype: Optional[str] = None,
text_embedding: Optional[List[float]] = None,
image_embedding: Optional[List[float]] = None,
**kwargs
): ...
def resolve_image(self) -> str:
"""Resolve image to base64 encoded string."""
def set_image(self, image: Optional[str]) -> None:
"""Set image data."""Nodes that reference other indices, enabling hierarchical and composable index structures for complex document organizations.
class IndexNode(BaseNode):
"""
Node that references another index for hierarchical structures.
Parameters:
- text: str, text description of the referenced index
- index_id: str, identifier of the referenced index
- obj: Optional[BaseIndex], the referenced index object
"""
def __init__(
self,
text: str = "",
index_id: Optional[str] = None,
obj: Optional[BaseIndex] = None,
**kwargs
): ...
@classmethod
def from_text_node(cls, node: TextNode, index_id: str) -> "IndexNode":
"""Create IndexNode from TextNode."""Container for nodes with associated relevance scores, used throughout retrieval and ranking operations.
class NodeWithScore:
"""
Container for a node with an associated relevance score.
Parameters:
- node: BaseNode, the node content
- score: Optional[float], relevance score (higher = more relevant)
"""
def __init__(self, node: BaseNode, score: Optional[float] = None): ...
@property
def node_id(self) -> str:
"""Get node identifier."""
@property
def text(self) -> str:
"""Get node text content."""
def __str__(self) -> str: ...
def get_content(self, metadata_mode: MetadataMode = MetadataMode.ALL) -> str:
"""Get node content with metadata."""System for tracking relationships between nodes and managing references to related content.
class RelatedNodeInfo:
"""
Information about a related node.
Parameters:
- node_id: str, identifier of the related node
- node_type: Optional[ObjectType], type of the related node
- metadata: Optional[dict], metadata about the relationship
- hash: Optional[str], hash of the related node content
"""
def __init__(
self,
node_id: str,
node_type: Optional[ObjectType] = None,
metadata: Optional[dict] = None,
hash: Optional[str] = None
): ...
class NodeRelationship(str, Enum):
"""Types of relationships between nodes."""
SOURCE = "SOURCE" # Source document relationship
PREVIOUS = "PREVIOUS" # Previous node in sequence
NEXT = "NEXT" # Next node in sequence
PARENT = "PARENT" # Parent node in hierarchy
CHILD = "CHILD" # Child node in hierarchySupport for various media types and resource management in documents and nodes.
class MediaResource:
"""
Resource for handling media content in documents.
Parameters:
- text: Optional[str], text description of the resource
- url: Optional[str], URL to the resource
- path: Optional[str], local path to the resource
- mimetype: Optional[str], MIME type of the resource
"""
def __init__(
self,
text: Optional[str] = None,
url: Optional[str] = None,
path: Optional[str] = None,
mimetype: Optional[str] = None
): ...from llama_index.core import Document
from llama_index.core.schema import MetadataMode
# Create a basic document
doc = Document(
text="LlamaIndex provides tools for building RAG applications with LLMs.",
metadata={
"source": "documentation",
"category": "technical",
"author": "LlamaIndex Team"
}
)
# Access document content
print(doc.get_content()) # Includes metadata by default
print(doc.get_content(metadata_mode=MetadataMode.NONE)) # Text only
# Update document
doc.set_content("Updated content about LlamaIndex capabilities.")from llama_index.core.schema import TextNode, NodeRelationship, RelatedNodeInfo
# Create text nodes
node1 = TextNode(
text="Introduction to machine learning concepts.",
metadata={"chapter": "1", "topic": "intro"}
)
node2 = TextNode(
text="Deep learning architectures and applications.",
metadata={"chapter": "2", "topic": "deep_learning"}
)
# Establish relationships
node2.relationships[NodeRelationship.PREVIOUS] = node1.as_related_node_info()
node1.relationships[NodeRelationship.NEXT] = node2.as_related_node_info()
# Access node properties
print(f"Node ID: {node1.node_id}")
print(f"Text: {node1.get_text()}")
print(f"Content with metadata: {node1.get_content()}")from llama_index.core.schema import NodeWithScore
# Create scored nodes (typically from retrieval)
scored_nodes = [
NodeWithScore(node=node1, score=0.85),
NodeWithScore(node=node2, score=0.72)
]
# Process results
for scored_node in scored_nodes:
print(f"Score: {scored_node.score:.2f}")
print(f"Text: {scored_node.text}")
print(f"Node ID: {scored_node.node_id}")class ObjectType(str, Enum):
"""Types of objects in LlamaIndex."""
TEXT = "text"
IMAGE = "image"
INDEX = "index"
DOCUMENT = "document"
class MetadataMode(str, Enum):
"""Modes for including metadata in content."""
ALL = "all" # Include all metadata
EMBED = "embed" # Include only embedding metadata
LLM = "llm" # Include only LLM metadata
NONE = "none" # Include no metadata
class Modality(str, Enum):
"""Content modalities supported."""
TEXT = "text"
IMAGE = "image"
AUDIO = "audio"
VIDEO = "video"Install with Tessl CLI
npx tessl i tessl/pypi-llama-index-core