Interface between LLMs and your data
—
Components for finding and ranking relevant information from indices. Retrievers serve as the core information retrieval layer, supporting various search strategies from simple vector similarity to advanced multi-step reasoning and query fusion.
Foundation interface for all retriever implementations, providing standardized query processing and result formatting.
class BaseRetriever:
"""
Base interface for all retriever implementations.
Parameters:
- callback_manager: Optional[CallbackManager], callback management system
- object_map: Optional[ObjectMap], object mapping for retrieval
- verbose: bool, whether to enable verbose logging
"""
def __init__(
self,
callback_manager: Optional[CallbackManager] = None,
object_map: Optional[ObjectMap] = None,
verbose: bool = False,
**kwargs
): ...
def retrieve(self, str_or_query_bundle: Union[str, QueryBundle]) -> List[NodeWithScore]:
"""
Retrieve relevant nodes for a query.
Parameters:
- str_or_query_bundle: Union[str, QueryBundle], query string or bundle
Returns:
- List[NodeWithScore], ranked list of relevant nodes with scores
"""
def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
"""Internal retrieval method to be implemented by subclasses."""
def _get_prompt_modules(self) -> PromptMixinType:
"""Get prompt modules used by retriever."""Retrievers that leverage vector embeddings for semantic similarity search and filtering.
class VectorIndexRetriever(BaseRetriever):
"""
Retriever for vector-based semantic similarity search.
Parameters:
- index: VectorStoreIndex, the vector index to retrieve from
- similarity_top_k: int, number of top similar nodes to retrieve
- vector_store_query_mode: str, query mode for vector store
- filters: Optional[MetadataFilters], metadata filters for retrieval
- alpha: Optional[float], weight for sparse/dense retrieval combination
- doc_ids: Optional[List[str]], specific document IDs to retrieve from
- vector_store_kwargs: dict, additional vector store arguments
"""
def __init__(
self,
index: VectorStoreIndex,
similarity_top_k: int = 10,
vector_store_query_mode: str = "default",
filters: Optional[MetadataFilters] = None,
alpha: Optional[float] = None,
doc_ids: Optional[List[str]] = None,
vector_store_kwargs: Optional[dict] = None,
**kwargs
): ...
class VectorIndexAutoRetriever(BaseRetriever):
"""
Auto retriever with metadata filtering based on natural language queries.
Parameters:
- index: VectorStoreIndex, the vector index to retrieve from
- vector_store_info: VectorStoreInfo, metadata about vector store structure
- similarity_top_k: int, number of similar nodes to retrieve
- empty_query_top_k: Optional[int], top k when query is empty
- max_top_k: int, maximum number of nodes to retrieve
- llm: Optional[LLM], language model for filter generation
"""
def __init__(
self,
index: VectorStoreIndex,
vector_store_info: VectorStoreInfo,
similarity_top_k: int = 10,
empty_query_top_k: Optional[int] = None,
max_top_k: int = 10,
llm: Optional[LLM] = None,
**kwargs
): ...Retrievers for comprehensive document retrieval and selection from summary indices.
class SummaryIndexRetriever(BaseRetriever):
"""
Retriever that returns all nodes from a summary index.
Parameters:
- index: SummaryIndex, the summary index to retrieve from
"""
def __init__(self, index: SummaryIndex, **kwargs): ...
class SummaryIndexEmbeddingRetriever(BaseRetriever):
"""
Summary index retriever with embedding-based node selection.
Parameters:
- index: SummaryIndex, the summary index to retrieve from
- similarity_top_k: int, number of similar nodes to retrieve
- embed_model: Optional[BaseEmbedding], embedding model for similarity
"""
def __init__(
self,
index: SummaryIndex,
similarity_top_k: int = 10,
embed_model: Optional[BaseEmbedding] = None,
**kwargs
): ...
class SummaryIndexLLMRetriever(BaseRetriever):
"""
Summary index retriever with LLM-based node selection.
Parameters:
- index: SummaryIndex, the summary index to retrieve from
- choice_select_prompt: Optional[BasePromptTemplate], prompt for node selection
- choice_batch_size: int, batch size for LLM selection
- format_node_batch_fn: Optional[Callable], function to format node batches
- parse_choice_select_answer_fn: Optional[Callable], function to parse LLM response
- llm: Optional[LLM], language model for selection
"""
def __init__(
self,
index: SummaryIndex,
choice_select_prompt: Optional[BasePromptTemplate] = None,
choice_batch_size: int = 10,
format_node_batch_fn: Optional[Callable] = None,
parse_choice_select_answer_fn: Optional[Callable] = None,
llm: Optional[LLM] = None,
**kwargs
): ...Specialized retrievers for hierarchical tree-structured indices with various traversal strategies.
class TreeAllLeafRetriever(BaseRetriever):
"""
Retriever that returns all leaf nodes from a tree index.
Parameters:
- index: TreeIndex, the tree index to retrieve from
"""
def __init__(self, index: TreeIndex, **kwargs): ...
class TreeSelectLeafEmbeddingRetriever(BaseRetriever):
"""
Tree retriever with embedding-based leaf node selection.
Parameters:
- index: TreeIndex, the tree index to retrieve from
- embed_model: Optional[BaseEmbedding], embedding model for selection
- similarity_top_k: int, number of similar nodes to retrieve
"""
def __init__(
self,
index: TreeIndex,
embed_model: Optional[BaseEmbedding] = None,
similarity_top_k: int = 10,
**kwargs
): ...
class TreeSelectLeafRetriever(BaseRetriever):
"""
Tree retriever with LLM-based leaf node selection.
Parameters:
- index: TreeIndex, the tree index to retrieve from
- child_branch_factor: int, number of child nodes to consider per branch
- llm: Optional[LLM], language model for selection
"""
def __init__(
self,
index: TreeIndex,
child_branch_factor: int = 1,
llm: Optional[LLM] = None,
**kwargs
): ...
class TreeRootRetriever(BaseRetriever):
"""
Retriever that returns the root node of a tree index.
Parameters:
- index: TreeIndex, the tree index to retrieve from
"""
def __init__(self, index: TreeIndex, **kwargs): ...Retrievers for keyword-based search and matching operations.
class KeywordTableSimpleRetriever(BaseRetriever):
"""
Simple keyword table retriever for exact keyword matching.
Parameters:
- index: KeywordTableIndex, the keyword table index
- max_keywords_per_query: int, maximum keywords to extract per query
- num_chunks_per_query: int, number of chunks to retrieve per query
- keyword_extractor: Optional[BaseKeywordExtractor], keyword extraction method
"""
def __init__(
self,
index: KeywordTableIndex,
max_keywords_per_query: int = 10,
num_chunks_per_query: int = 10,
keyword_extractor: Optional[BaseKeywordExtractor] = None,
**kwargs
): ...Retrievers for graph-based knowledge representation and traversal.
class KGTableRetriever(BaseRetriever):
"""
Knowledge graph table retriever for entity-based queries.
Parameters:
- index: KnowledgeGraphIndex, the knowledge graph index
- retriever_mode: str, retrieval mode (keyword, embedding, hybrid)
- similarity_top_k: int, number of similar nodes to retrieve
- graph_store_query_depth: int, depth of graph traversal
- use_global_node_triplets: bool, whether to use global node relationships
- max_knowledge_sequence: int, maximum knowledge sequence length
- keyword_extractor: Optional[BaseKeywordExtractor], keyword extraction method
"""
def __init__(
self,
index: KnowledgeGraphIndex,
retriever_mode: str = "keyword",
similarity_top_k: int = 2,
graph_store_query_depth: int = 2,
use_global_node_triplets: bool = True,
max_knowledge_sequence: int = 128,
keyword_extractor: Optional[BaseKeywordExtractor] = None,
**kwargs
): ...
class KnowledgeGraphRAGRetriever(BaseRetriever):
"""
RAG-based knowledge graph retriever combining entity extraction and graph traversal.
Parameters:
- storage_context: StorageContext, storage configuration
- entity_extract_policy: Optional[str], entity extraction policy
- synonym_expand_policy: Optional[str], synonym expansion policy
- retriever_mode: str, retrieval mode configuration
- llm: Optional[LLM], language model for processing
- verbose: bool, whether to enable verbose logging
"""
def __init__(
self,
storage_context: StorageContext,
entity_extract_policy: Optional[str] = None,
synonym_expand_policy: Optional[str] = None,
retriever_mode: str = "keyword",
llm: Optional[LLM] = None,
verbose: bool = True,
**kwargs
): ...Advanced retrievers for property graph structures with Cypher query support.
class BasePGRetriever(BaseRetriever):
"""
Base class for property graph retrievers.
Parameters:
- graph_store: PropertyGraphStore, the property graph store
- llm: Optional[LLM], language model for processing
"""
def __init__(
self,
graph_store: PropertyGraphStore,
llm: Optional[LLM] = None,
**kwargs
): ...
class PGRetriever(BasePGRetriever):
"""
Standard property graph retriever with multiple retrieval strategies.
Parameters:
- graph_store: PropertyGraphStore, the property graph store
- include_text: bool, whether to include text content in results
- llm: Optional[LLM], language model for processing
"""
def __init__(
self,
graph_store: PropertyGraphStore,
include_text: bool = True,
llm: Optional[LLM] = None,
**kwargs
): ...
class LLMSynonymRetriever(BasePGRetriever):
"""
Property graph retriever with LLM-based synonym expansion.
Parameters:
- graph_store: PropertyGraphStore, the property graph store
- llm: Optional[LLM], language model for synonym generation
- include_text: bool, whether to include text in results
- synonym_prompt: Optional[PromptTemplate], prompt for synonym generation
- output_parser: Optional[BaseOutputParser], parser for LLM output
- max_keywords: int, maximum keywords to generate
- path_depth: int, depth of graph path traversal
"""
def __init__(
self,
graph_store: PropertyGraphStore,
llm: Optional[LLM] = None,
include_text: bool = True,
synonym_prompt: Optional[PromptTemplate] = None,
output_parser: Optional[BaseOutputParser] = None,
max_keywords: int = 10,
path_depth: int = 1,
**kwargs
): ...
class CypherTemplateRetriever(BasePGRetriever):
"""
Retriever using Cypher query templates for property graphs.
Parameters:
- graph_store: PropertyGraphStore, the property graph store
- cypher_query_template: str, Cypher query template
- output_parser: Optional[BaseOutputParser], parser for query results
- llm: Optional[LLM], language model for template processing
"""
def __init__(
self,
graph_store: PropertyGraphStore,
cypher_query_template: str,
output_parser: Optional[BaseOutputParser] = None,
llm: Optional[LLM] = None,
**kwargs
): ...
class TextToCypherRetriever(BasePGRetriever):
"""
Natural language to Cypher query retriever.
Parameters:
- graph_store: PropertyGraphStore, the property graph store
- nl_to_cypher_template: Optional[PromptTemplate], natural language to Cypher prompt
- cypher_validation_template: Optional[PromptTemplate], Cypher validation prompt
- allowed_output_fields: Optional[List[str]], allowed output fields
- llm: Optional[LLM], language model for query generation
"""
def __init__(
self,
graph_store: PropertyGraphStore,
nl_to_cypher_template: Optional[PromptTemplate] = None,
cypher_validation_template: Optional[PromptTemplate] = None,
allowed_output_fields: Optional[List[str]] = None,
llm: Optional[LLM] = None,
**kwargs
): ...Retrievers for SQL database queries and natural language to SQL conversion.
class SQLRetriever(BaseRetriever):
"""
SQL query-based retriever for structured database content.
Parameters:
- sql_database: SQLDatabase, the SQL database connection
- return_raw: bool, whether to return raw SQL results
"""
def __init__(
self,
sql_database: SQLDatabase,
return_raw: bool = True,
**kwargs
): ...
class NLSQLRetriever(BaseRetriever):
"""
Natural language to SQL query retriever.
Parameters:
- sql_database: SQLDatabase, the SQL database connection
- text_to_sql_prompt: Optional[BasePromptTemplate], text to SQL conversion prompt
- context_query_kwargs: Optional[dict], additional query context arguments
- table_retriever: Optional[ObjectRetriever], table schema retriever
- context_str_prefix: Optional[str], prefix for context strings
- sql_parser_mode: SQLParserMode, SQL parsing mode (strict or relaxed)
- llm: Optional[LLM], language model for SQL generation
"""
def __init__(
self,
sql_database: SQLDatabase,
text_to_sql_prompt: Optional[BasePromptTemplate] = None,
context_query_kwargs: Optional[dict] = None,
table_retriever: Optional[ObjectRetriever] = None,
context_str_prefix: Optional[str] = None,
sql_parser_mode: SQLParserMode = SQLParserMode.DEFAULT,
llm: Optional[LLM] = None,
**kwargs
): ...Sophisticated retrieval strategies combining multiple approaches and reasoning patterns.
class RecursiveRetriever(BaseRetriever):
"""
Recursive retriever for multi-step information gathering.
Parameters:
- root_id: str, identifier of the root node to start retrieval
- retriever_dict: Dict[str, BaseRetriever], mapping of node IDs to retrievers
- query_transform_fn: Optional[Callable], function to transform queries
- node_dict: Optional[Dict[str, BaseNode]], mapping of node IDs to nodes
- verbose: bool, whether to enable verbose logging
"""
def __init__(
self,
root_id: str,
retriever_dict: Dict[str, BaseRetriever],
query_transform_fn: Optional[Callable] = None,
node_dict: Optional[Dict[str, BaseNode]] = None,
verbose: bool = True,
**kwargs
): ...
class AutoMergingRetriever(BaseRetriever):
"""
Auto-merging retriever for hierarchical node structures.
Parameters:
- vector_retriever: BaseRetriever, base vector retriever
- storage_context: StorageContext, storage configuration
- simple_ratio_thresh: float, threshold for simple merging
- verbose: bool, whether to enable verbose logging
"""
def __init__(
self,
vector_retriever: BaseRetriever,
storage_context: StorageContext,
simple_ratio_thresh: float = 0.5,
verbose: bool = True,
**kwargs
): ...
class RouterRetriever(BaseRetriever):
"""
Router-based retriever for selecting appropriate retrieval strategies.
Parameters:
- selector: BaseSelector, selector for choosing retrievers
- retriever_tools: List[RetrieverTool], available retriever tools
- llm: Optional[LLM], language model for routing decisions
"""
def __init__(
self,
selector: BaseSelector,
retriever_tools: List[RetrieverTool],
llm: Optional[LLM] = None,
**kwargs
): ...
class QueryFusionRetriever(BaseRetriever):
"""
Query fusion retriever combining multiple query variations.
Parameters:
- retrievers: List[BaseRetriever], retrievers to fuse results from
- similarity_top_k: int, number of similar nodes per retriever
- num_queries: int, number of query variations to generate
- mode: str, fusion mode (reciprocal_rank, relative_score, dist_based_score)
- use_async: bool, whether to use async retrieval
- retriever_weights: Optional[List[float]], weights for individual retrievers
- llm: Optional[LLM], language model for query generation
"""
def __init__(
self,
retrievers: List[BaseRetriever],
similarity_top_k: int = 2,
num_queries: int = 4,
mode: str = "reciprocal_rank",
use_async: bool = True,
retriever_weights: Optional[List[float]] = None,
llm: Optional[LLM] = None,
**kwargs
): ...
class TransformRetriever(BaseRetriever):
"""
Transform-based retriever with query preprocessing.
Parameters:
- retriever: BaseRetriever, base retriever to transform
- query_transform: BaseQueryTransform, query transformation method
"""
def __init__(
self,
retriever: BaseRetriever,
query_transform: BaseQueryTransform,
**kwargs
): ...Placeholder retriever for empty or placeholder indices.
class EmptyIndexRetriever(BaseRetriever):
"""
Retriever that returns empty results, used for placeholder indices.
Parameters:
- index: EmptyIndex, the empty index
"""
def __init__(self, index: EmptyIndex, **kwargs): ...Specialized retrievers for image and multi-modal content.
class BaseImageRetriever:
"""
Base interface for image-specific retrieval operations.
Parameters:
- callback_manager: Optional[CallbackManager], callback management
"""
def __init__(self, callback_manager: Optional[CallbackManager] = None): ...
def text_to_image_retrieve(self, str_or_query_bundle: Union[str, QueryBundle]) -> List[NodeWithScore]:
"""Retrieve images based on text query."""
def image_to_image_retrieve(self, str_or_query_bundle: Union[str, QueryBundle]) -> List[NodeWithScore]:
"""Retrieve similar images based on image query."""from llama_index.core import VectorStoreIndex, Document
from llama_index.core.retrievers import VectorIndexRetriever
# Create documents and index
documents = [
Document(text="Machine learning is a subset of artificial intelligence."),
Document(text="Deep learning uses neural networks with multiple layers."),
Document(text="Natural language processing helps computers understand text.")
]
index = VectorStoreIndex.from_documents(documents)
# Create retriever
retriever = VectorIndexRetriever(
index=index,
similarity_top_k=2,
filters=None
)
# Retrieve relevant nodes
nodes = retriever.retrieve("What is machine learning?")
for node in nodes:
print(f"Score: {node.score:.3f}")
print(f"Text: {node.text}")from llama_index.core.retrievers import RecursiveRetriever
from llama_index.core.schema import IndexNode
# Setup hierarchical indices
summary_index = SummaryIndex.from_documents(documents)
detail_indices = {
"ml_detail": VectorStoreIndex.from_documents(ml_documents),
"dl_detail": VectorStoreIndex.from_documents(dl_documents)
}
# Create retriever mapping
retriever_dict = {
"summary": summary_index.as_retriever(),
"ml_detail": detail_indices["ml_detail"].as_retriever(),
"dl_detail": detail_indices["dl_detail"].as_retriever()
}
# Recursive retriever
recursive_retriever = RecursiveRetriever(
root_id="summary",
retriever_dict=retriever_dict,
verbose=True
)
# Retrieve with multi-step reasoning
results = recursive_retriever.retrieve("Explain deep learning architectures")from llama_index.core.retrievers import QueryFusionRetriever
# Multiple retrieval strategies
vector_retriever = index.as_retriever(similarity_top_k=3)
keyword_retriever = keyword_index.as_retriever(max_keywords_per_query=5)
# Fusion retriever
fusion_retriever = QueryFusionRetriever(
retrievers=[vector_retriever, keyword_retriever],
similarity_top_k=2,
num_queries=4,
mode="reciprocal_rank",
use_async=True
)
# Retrieve with query fusion
nodes = fusion_retriever.retrieve("machine learning applications")class SQLParserMode(str, Enum):
"""SQL parsing modes for natural language to SQL conversion."""
DEFAULT = "default"
STRICT = "strict"
RELAXED = "relaxed"
# Legacy aliases maintained for compatibility
ListIndexEmbeddingRetriever = SummaryIndexEmbeddingRetriever
ListIndexRetriever = SummaryIndexRetrieverInstall with Tessl CLI
npx tessl i tessl/pypi-llama-index-core