0
# Retrievers
1
2
Components for finding and ranking relevant information from indices. Retrievers serve as the core information retrieval layer, supporting various search strategies from simple vector similarity to advanced multi-step reasoning and query fusion.
3
4
## Capabilities
5
6
### Base Retriever Interface
7
8
Foundation interface for all retriever implementations, providing standardized query processing and result formatting.
9
10
```python { .api }
11
class BaseRetriever:
12
"""
13
Base interface for all retriever implementations.
14
15
Parameters:
16
- callback_manager: Optional[CallbackManager], callback management system
17
- object_map: Optional[ObjectMap], object mapping for retrieval
18
- verbose: bool, whether to enable verbose logging
19
"""
20
def __init__(
21
self,
22
callback_manager: Optional[CallbackManager] = None,
23
object_map: Optional[ObjectMap] = None,
24
verbose: bool = False,
25
**kwargs
26
): ...
27
28
def retrieve(self, str_or_query_bundle: Union[str, QueryBundle]) -> List[NodeWithScore]:
29
"""
30
Retrieve relevant nodes for a query.
31
32
Parameters:
33
- str_or_query_bundle: Union[str, QueryBundle], query string or bundle
34
35
Returns:
36
- List[NodeWithScore], ranked list of relevant nodes with scores
37
"""
38
39
def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
40
"""Internal retrieval method to be implemented by subclasses."""
41
42
def _get_prompt_modules(self) -> PromptMixinType:
43
"""Get prompt modules used by retriever."""
44
```
45
46
### Vector Store Retrievers
47
48
Retrievers that leverage vector embeddings for semantic similarity search and filtering.
49
50
```python { .api }
51
class VectorIndexRetriever(BaseRetriever):
52
"""
53
Retriever for vector-based semantic similarity search.
54
55
Parameters:
56
- index: VectorStoreIndex, the vector index to retrieve from
57
- similarity_top_k: int, number of top similar nodes to retrieve
58
- vector_store_query_mode: str, query mode for vector store
59
- filters: Optional[MetadataFilters], metadata filters for retrieval
60
- alpha: Optional[float], weight for sparse/dense retrieval combination
61
- doc_ids: Optional[List[str]], specific document IDs to retrieve from
62
- vector_store_kwargs: dict, additional vector store arguments
63
"""
64
def __init__(
65
self,
66
index: VectorStoreIndex,
67
similarity_top_k: int = 10,
68
vector_store_query_mode: str = "default",
69
filters: Optional[MetadataFilters] = None,
70
alpha: Optional[float] = None,
71
doc_ids: Optional[List[str]] = None,
72
vector_store_kwargs: Optional[dict] = None,
73
**kwargs
74
): ...
75
76
class VectorIndexAutoRetriever(BaseRetriever):
77
"""
78
Auto retriever with metadata filtering based on natural language queries.
79
80
Parameters:
81
- index: VectorStoreIndex, the vector index to retrieve from
82
- vector_store_info: VectorStoreInfo, metadata about vector store structure
83
- similarity_top_k: int, number of similar nodes to retrieve
84
- empty_query_top_k: Optional[int], top k when query is empty
85
- max_top_k: int, maximum number of nodes to retrieve
86
- llm: Optional[LLM], language model for filter generation
87
"""
88
def __init__(
89
self,
90
index: VectorStoreIndex,
91
vector_store_info: VectorStoreInfo,
92
similarity_top_k: int = 10,
93
empty_query_top_k: Optional[int] = None,
94
max_top_k: int = 10,
95
llm: Optional[LLM] = None,
96
**kwargs
97
): ...
98
```
99
100
### Summary Index Retrievers
101
102
Retrievers for comprehensive document retrieval and selection from summary indices.
103
104
```python { .api }
105
class SummaryIndexRetriever(BaseRetriever):
106
"""
107
Retriever that returns all nodes from a summary index.
108
109
Parameters:
110
- index: SummaryIndex, the summary index to retrieve from
111
"""
112
def __init__(self, index: SummaryIndex, **kwargs): ...
113
114
class SummaryIndexEmbeddingRetriever(BaseRetriever):
115
"""
116
Summary index retriever with embedding-based node selection.
117
118
Parameters:
119
- index: SummaryIndex, the summary index to retrieve from
120
- similarity_top_k: int, number of similar nodes to retrieve
121
- embed_model: Optional[BaseEmbedding], embedding model for similarity
122
"""
123
def __init__(
124
self,
125
index: SummaryIndex,
126
similarity_top_k: int = 10,
127
embed_model: Optional[BaseEmbedding] = None,
128
**kwargs
129
): ...
130
131
class SummaryIndexLLMRetriever(BaseRetriever):
132
"""
133
Summary index retriever with LLM-based node selection.
134
135
Parameters:
136
- index: SummaryIndex, the summary index to retrieve from
137
- choice_select_prompt: Optional[BasePromptTemplate], prompt for node selection
138
- choice_batch_size: int, batch size for LLM selection
139
- format_node_batch_fn: Optional[Callable], function to format node batches
140
- parse_choice_select_answer_fn: Optional[Callable], function to parse LLM response
141
- llm: Optional[LLM], language model for selection
142
"""
143
def __init__(
144
self,
145
index: SummaryIndex,
146
choice_select_prompt: Optional[BasePromptTemplate] = None,
147
choice_batch_size: int = 10,
148
format_node_batch_fn: Optional[Callable] = None,
149
parse_choice_select_answer_fn: Optional[Callable] = None,
150
llm: Optional[LLM] = None,
151
**kwargs
152
): ...
153
```
154
155
### Tree Index Retrievers
156
157
Specialized retrievers for hierarchical tree-structured indices with various traversal strategies.
158
159
```python { .api }
160
class TreeAllLeafRetriever(BaseRetriever):
161
"""
162
Retriever that returns all leaf nodes from a tree index.
163
164
Parameters:
165
- index: TreeIndex, the tree index to retrieve from
166
"""
167
def __init__(self, index: TreeIndex, **kwargs): ...
168
169
class TreeSelectLeafEmbeddingRetriever(BaseRetriever):
170
"""
171
Tree retriever with embedding-based leaf node selection.
172
173
Parameters:
174
- index: TreeIndex, the tree index to retrieve from
175
- embed_model: Optional[BaseEmbedding], embedding model for selection
176
- similarity_top_k: int, number of similar nodes to retrieve
177
"""
178
def __init__(
179
self,
180
index: TreeIndex,
181
embed_model: Optional[BaseEmbedding] = None,
182
similarity_top_k: int = 10,
183
**kwargs
184
): ...
185
186
class TreeSelectLeafRetriever(BaseRetriever):
187
"""
188
Tree retriever with LLM-based leaf node selection.
189
190
Parameters:
191
- index: TreeIndex, the tree index to retrieve from
192
- child_branch_factor: int, number of child nodes to consider per branch
193
- llm: Optional[LLM], language model for selection
194
"""
195
def __init__(
196
self,
197
index: TreeIndex,
198
child_branch_factor: int = 1,
199
llm: Optional[LLM] = None,
200
**kwargs
201
): ...
202
203
class TreeRootRetriever(BaseRetriever):
204
"""
205
Retriever that returns the root node of a tree index.
206
207
Parameters:
208
- index: TreeIndex, the tree index to retrieve from
209
"""
210
def __init__(self, index: TreeIndex, **kwargs): ...
211
```
212
213
### Keyword Table Retrievers
214
215
Retrievers for keyword-based search and matching operations.
216
217
```python { .api }
218
class KeywordTableSimpleRetriever(BaseRetriever):
219
"""
220
Simple keyword table retriever for exact keyword matching.
221
222
Parameters:
223
- index: KeywordTableIndex, the keyword table index
224
- max_keywords_per_query: int, maximum keywords to extract per query
225
- num_chunks_per_query: int, number of chunks to retrieve per query
226
- keyword_extractor: Optional[BaseKeywordExtractor], keyword extraction method
227
"""
228
def __init__(
229
self,
230
index: KeywordTableIndex,
231
max_keywords_per_query: int = 10,
232
num_chunks_per_query: int = 10,
233
keyword_extractor: Optional[BaseKeywordExtractor] = None,
234
**kwargs
235
): ...
236
```
237
238
### Knowledge Graph Retrievers
239
240
Retrievers for graph-based knowledge representation and traversal.
241
242
```python { .api }
243
class KGTableRetriever(BaseRetriever):
244
"""
245
Knowledge graph table retriever for entity-based queries.
246
247
Parameters:
248
- index: KnowledgeGraphIndex, the knowledge graph index
249
- retriever_mode: str, retrieval mode (keyword, embedding, hybrid)
250
- similarity_top_k: int, number of similar nodes to retrieve
251
- graph_store_query_depth: int, depth of graph traversal
252
- use_global_node_triplets: bool, whether to use global node relationships
253
- max_knowledge_sequence: int, maximum knowledge sequence length
254
- keyword_extractor: Optional[BaseKeywordExtractor], keyword extraction method
255
"""
256
def __init__(
257
self,
258
index: KnowledgeGraphIndex,
259
retriever_mode: str = "keyword",
260
similarity_top_k: int = 2,
261
graph_store_query_depth: int = 2,
262
use_global_node_triplets: bool = True,
263
max_knowledge_sequence: int = 128,
264
keyword_extractor: Optional[BaseKeywordExtractor] = None,
265
**kwargs
266
): ...
267
268
class KnowledgeGraphRAGRetriever(BaseRetriever):
269
"""
270
RAG-based knowledge graph retriever combining entity extraction and graph traversal.
271
272
Parameters:
273
- storage_context: StorageContext, storage configuration
274
- entity_extract_policy: Optional[str], entity extraction policy
275
- synonym_expand_policy: Optional[str], synonym expansion policy
276
- retriever_mode: str, retrieval mode configuration
277
- llm: Optional[LLM], language model for processing
278
- verbose: bool, whether to enable verbose logging
279
"""
280
def __init__(
281
self,
282
storage_context: StorageContext,
283
entity_extract_policy: Optional[str] = None,
284
synonym_expand_policy: Optional[str] = None,
285
retriever_mode: str = "keyword",
286
llm: Optional[LLM] = None,
287
verbose: bool = True,
288
**kwargs
289
): ...
290
```
291
292
### Property Graph Retrievers
293
294
Advanced retrievers for property graph structures with Cypher query support.
295
296
```python { .api }
297
class BasePGRetriever(BaseRetriever):
298
"""
299
Base class for property graph retrievers.
300
301
Parameters:
302
- graph_store: PropertyGraphStore, the property graph store
303
- llm: Optional[LLM], language model for processing
304
"""
305
def __init__(
306
self,
307
graph_store: PropertyGraphStore,
308
llm: Optional[LLM] = None,
309
**kwargs
310
): ...
311
312
class PGRetriever(BasePGRetriever):
313
"""
314
Standard property graph retriever with multiple retrieval strategies.
315
316
Parameters:
317
- graph_store: PropertyGraphStore, the property graph store
318
- include_text: bool, whether to include text content in results
319
- llm: Optional[LLM], language model for processing
320
"""
321
def __init__(
322
self,
323
graph_store: PropertyGraphStore,
324
include_text: bool = True,
325
llm: Optional[LLM] = None,
326
**kwargs
327
): ...
328
329
class LLMSynonymRetriever(BasePGRetriever):
330
"""
331
Property graph retriever with LLM-based synonym expansion.
332
333
Parameters:
334
- graph_store: PropertyGraphStore, the property graph store
335
- llm: Optional[LLM], language model for synonym generation
336
- include_text: bool, whether to include text in results
337
- synonym_prompt: Optional[PromptTemplate], prompt for synonym generation
338
- output_parser: Optional[BaseOutputParser], parser for LLM output
339
- max_keywords: int, maximum keywords to generate
340
- path_depth: int, depth of graph path traversal
341
"""
342
def __init__(
343
self,
344
graph_store: PropertyGraphStore,
345
llm: Optional[LLM] = None,
346
include_text: bool = True,
347
synonym_prompt: Optional[PromptTemplate] = None,
348
output_parser: Optional[BaseOutputParser] = None,
349
max_keywords: int = 10,
350
path_depth: int = 1,
351
**kwargs
352
): ...
353
354
class CypherTemplateRetriever(BasePGRetriever):
355
"""
356
Retriever using Cypher query templates for property graphs.
357
358
Parameters:
359
- graph_store: PropertyGraphStore, the property graph store
360
- cypher_query_template: str, Cypher query template
361
- output_parser: Optional[BaseOutputParser], parser for query results
362
- llm: Optional[LLM], language model for template processing
363
"""
364
def __init__(
365
self,
366
graph_store: PropertyGraphStore,
367
cypher_query_template: str,
368
output_parser: Optional[BaseOutputParser] = None,
369
llm: Optional[LLM] = None,
370
**kwargs
371
): ...
372
373
class TextToCypherRetriever(BasePGRetriever):
374
"""
375
Natural language to Cypher query retriever.
376
377
Parameters:
378
- graph_store: PropertyGraphStore, the property graph store
379
- nl_to_cypher_template: Optional[PromptTemplate], natural language to Cypher prompt
380
- cypher_validation_template: Optional[PromptTemplate], Cypher validation prompt
381
- allowed_output_fields: Optional[List[str]], allowed output fields
382
- llm: Optional[LLM], language model for query generation
383
"""
384
def __init__(
385
self,
386
graph_store: PropertyGraphStore,
387
nl_to_cypher_template: Optional[PromptTemplate] = None,
388
cypher_validation_template: Optional[PromptTemplate] = None,
389
allowed_output_fields: Optional[List[str]] = None,
390
llm: Optional[LLM] = None,
391
**kwargs
392
): ...
393
```
394
395
### SQL Retrievers
396
397
Retrievers for SQL database queries and natural language to SQL conversion.
398
399
```python { .api }
400
class SQLRetriever(BaseRetriever):
401
"""
402
SQL query-based retriever for structured database content.
403
404
Parameters:
405
- sql_database: SQLDatabase, the SQL database connection
406
- return_raw: bool, whether to return raw SQL results
407
"""
408
def __init__(
409
self,
410
sql_database: SQLDatabase,
411
return_raw: bool = True,
412
**kwargs
413
): ...
414
415
class NLSQLRetriever(BaseRetriever):
416
"""
417
Natural language to SQL query retriever.
418
419
Parameters:
420
- sql_database: SQLDatabase, the SQL database connection
421
- text_to_sql_prompt: Optional[BasePromptTemplate], text to SQL conversion prompt
422
- context_query_kwargs: Optional[dict], additional query context arguments
423
- table_retriever: Optional[ObjectRetriever], table schema retriever
424
- context_str_prefix: Optional[str], prefix for context strings
425
- sql_parser_mode: SQLParserMode, SQL parsing mode (strict or relaxed)
426
- llm: Optional[LLM], language model for SQL generation
427
"""
428
def __init__(
429
self,
430
sql_database: SQLDatabase,
431
text_to_sql_prompt: Optional[BasePromptTemplate] = None,
432
context_query_kwargs: Optional[dict] = None,
433
table_retriever: Optional[ObjectRetriever] = None,
434
context_str_prefix: Optional[str] = None,
435
sql_parser_mode: SQLParserMode = SQLParserMode.DEFAULT,
436
llm: Optional[LLM] = None,
437
**kwargs
438
): ...
439
```
440
441
### Advanced Retrievers
442
443
Sophisticated retrieval strategies combining multiple approaches and reasoning patterns.
444
445
```python { .api }
446
class RecursiveRetriever(BaseRetriever):
447
"""
448
Recursive retriever for multi-step information gathering.
449
450
Parameters:
451
- root_id: str, identifier of the root node to start retrieval
452
- retriever_dict: Dict[str, BaseRetriever], mapping of node IDs to retrievers
453
- query_transform_fn: Optional[Callable], function to transform queries
454
- node_dict: Optional[Dict[str, BaseNode]], mapping of node IDs to nodes
455
- verbose: bool, whether to enable verbose logging
456
"""
457
def __init__(
458
self,
459
root_id: str,
460
retriever_dict: Dict[str, BaseRetriever],
461
query_transform_fn: Optional[Callable] = None,
462
node_dict: Optional[Dict[str, BaseNode]] = None,
463
verbose: bool = True,
464
**kwargs
465
): ...
466
467
class AutoMergingRetriever(BaseRetriever):
468
"""
469
Auto-merging retriever for hierarchical node structures.
470
471
Parameters:
472
- vector_retriever: BaseRetriever, base vector retriever
473
- storage_context: StorageContext, storage configuration
474
- simple_ratio_thresh: float, threshold for simple merging
475
- verbose: bool, whether to enable verbose logging
476
"""
477
def __init__(
478
self,
479
vector_retriever: BaseRetriever,
480
storage_context: StorageContext,
481
simple_ratio_thresh: float = 0.5,
482
verbose: bool = True,
483
**kwargs
484
): ...
485
486
class RouterRetriever(BaseRetriever):
487
"""
488
Router-based retriever for selecting appropriate retrieval strategies.
489
490
Parameters:
491
- selector: BaseSelector, selector for choosing retrievers
492
- retriever_tools: List[RetrieverTool], available retriever tools
493
- llm: Optional[LLM], language model for routing decisions
494
"""
495
def __init__(
496
self,
497
selector: BaseSelector,
498
retriever_tools: List[RetrieverTool],
499
llm: Optional[LLM] = None,
500
**kwargs
501
): ...
502
503
class QueryFusionRetriever(BaseRetriever):
504
"""
505
Query fusion retriever combining multiple query variations.
506
507
Parameters:
508
- retrievers: List[BaseRetriever], retrievers to fuse results from
509
- similarity_top_k: int, number of similar nodes per retriever
510
- num_queries: int, number of query variations to generate
511
- mode: str, fusion mode (reciprocal_rank, relative_score, dist_based_score)
512
- use_async: bool, whether to use async retrieval
513
- retriever_weights: Optional[List[float]], weights for individual retrievers
514
- llm: Optional[LLM], language model for query generation
515
"""
516
def __init__(
517
self,
518
retrievers: List[BaseRetriever],
519
similarity_top_k: int = 2,
520
num_queries: int = 4,
521
mode: str = "reciprocal_rank",
522
use_async: bool = True,
523
retriever_weights: Optional[List[float]] = None,
524
llm: Optional[LLM] = None,
525
**kwargs
526
): ...
527
528
class TransformRetriever(BaseRetriever):
529
"""
530
Transform-based retriever with query preprocessing.
531
532
Parameters:
533
- retriever: BaseRetriever, base retriever to transform
534
- query_transform: BaseQueryTransform, query transformation method
535
"""
536
def __init__(
537
self,
538
retriever: BaseRetriever,
539
query_transform: BaseQueryTransform,
540
**kwargs
541
): ...
542
```
543
544
### Empty Index Retriever
545
546
Placeholder retriever for empty or placeholder indices.
547
548
```python { .api }
549
class EmptyIndexRetriever(BaseRetriever):
550
"""
551
Retriever that returns empty results, used for placeholder indices.
552
553
Parameters:
554
- index: EmptyIndex, the empty index
555
"""
556
def __init__(self, index: EmptyIndex, **kwargs): ...
557
```
558
559
### Image Retrievers
560
561
Specialized retrievers for image and multi-modal content.
562
563
```python { .api }
564
class BaseImageRetriever:
565
"""
566
Base interface for image-specific retrieval operations.
567
568
Parameters:
569
- callback_manager: Optional[CallbackManager], callback management
570
"""
571
def __init__(self, callback_manager: Optional[CallbackManager] = None): ...
572
573
def text_to_image_retrieve(self, str_or_query_bundle: Union[str, QueryBundle]) -> List[NodeWithScore]:
574
"""Retrieve images based on text query."""
575
576
def image_to_image_retrieve(self, str_or_query_bundle: Union[str, QueryBundle]) -> List[NodeWithScore]:
577
"""Retrieve similar images based on image query."""
578
```
579
580
## Usage Examples
581
582
### Basic Vector Retrieval
583
584
```python
585
from llama_index.core import VectorStoreIndex, Document
586
from llama_index.core.retrievers import VectorIndexRetriever
587
588
# Create documents and index
589
documents = [
590
Document(text="Machine learning is a subset of artificial intelligence."),
591
Document(text="Deep learning uses neural networks with multiple layers."),
592
Document(text="Natural language processing helps computers understand text.")
593
]
594
595
index = VectorStoreIndex.from_documents(documents)
596
597
# Create retriever
598
retriever = VectorIndexRetriever(
599
index=index,
600
similarity_top_k=2,
601
filters=None
602
)
603
604
# Retrieve relevant nodes
605
nodes = retriever.retrieve("What is machine learning?")
606
for node in nodes:
607
print(f"Score: {node.score:.3f}")
608
print(f"Text: {node.text}")
609
```
610
611
### Multi-step Recursive Retrieval
612
613
```python
614
from llama_index.core.retrievers import RecursiveRetriever
615
from llama_index.core.schema import IndexNode
616
617
# Setup hierarchical indices
618
summary_index = SummaryIndex.from_documents(documents)
619
detail_indices = {
620
"ml_detail": VectorStoreIndex.from_documents(ml_documents),
621
"dl_detail": VectorStoreIndex.from_documents(dl_documents)
622
}
623
624
# Create retriever mapping
625
retriever_dict = {
626
"summary": summary_index.as_retriever(),
627
"ml_detail": detail_indices["ml_detail"].as_retriever(),
628
"dl_detail": detail_indices["dl_detail"].as_retriever()
629
}
630
631
# Recursive retriever
632
recursive_retriever = RecursiveRetriever(
633
root_id="summary",
634
retriever_dict=retriever_dict,
635
verbose=True
636
)
637
638
# Retrieve with multi-step reasoning
639
results = recursive_retriever.retrieve("Explain deep learning architectures")
640
```
641
642
### Query Fusion Retrieval
643
644
```python
645
from llama_index.core.retrievers import QueryFusionRetriever
646
647
# Multiple retrieval strategies
648
vector_retriever = index.as_retriever(similarity_top_k=3)
649
keyword_retriever = keyword_index.as_retriever(max_keywords_per_query=5)
650
651
# Fusion retriever
652
fusion_retriever = QueryFusionRetriever(
653
retrievers=[vector_retriever, keyword_retriever],
654
similarity_top_k=2,
655
num_queries=4,
656
mode="reciprocal_rank",
657
use_async=True
658
)
659
660
# Retrieve with query fusion
661
nodes = fusion_retriever.retrieve("machine learning applications")
662
```
663
664
## Types & Enums
665
666
```python { .api }
667
class SQLParserMode(str, Enum):
668
"""SQL parsing modes for natural language to SQL conversion."""
669
DEFAULT = "default"
670
STRICT = "strict"
671
RELAXED = "relaxed"
672
673
# Legacy aliases maintained for compatibility
674
ListIndexEmbeddingRetriever = SummaryIndexEmbeddingRetriever
675
ListIndexRetriever = SummaryIndexRetriever
676
```