or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

agents-tools.mddocuments-nodes.mdevaluation.mdindex.mdindices.mdllms-embeddings.mdnode-parsers.mdpostprocessors.mdprompts.mdquery-engines.mdretrievers.mdsettings.mdstorage.md

retrievers.mddocs/

0

# Retrievers

1

2

Components for finding and ranking relevant information from indices. Retrievers serve as the core information retrieval layer, supporting various search strategies from simple vector similarity to advanced multi-step reasoning and query fusion.

3

4

## Capabilities

5

6

### Base Retriever Interface

7

8

Foundation interface for all retriever implementations, providing standardized query processing and result formatting.

9

10

```python { .api }

11

class BaseRetriever:

12

"""

13

Base interface for all retriever implementations.

14

15

Parameters:

16

- callback_manager: Optional[CallbackManager], callback management system

17

- object_map: Optional[ObjectMap], object mapping for retrieval

18

- verbose: bool, whether to enable verbose logging

19

"""

20

def __init__(

21

self,

22

callback_manager: Optional[CallbackManager] = None,

23

object_map: Optional[ObjectMap] = None,

24

verbose: bool = False,

25

**kwargs

26

): ...

27

28

def retrieve(self, str_or_query_bundle: Union[str, QueryBundle]) -> List[NodeWithScore]:

29

"""

30

Retrieve relevant nodes for a query.

31

32

Parameters:

33

- str_or_query_bundle: Union[str, QueryBundle], query string or bundle

34

35

Returns:

36

- List[NodeWithScore], ranked list of relevant nodes with scores

37

"""

38

39

def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:

40

"""Internal retrieval method to be implemented by subclasses."""

41

42

def _get_prompt_modules(self) -> PromptMixinType:

43

"""Get prompt modules used by retriever."""

44

```

45

46

### Vector Store Retrievers

47

48

Retrievers that leverage vector embeddings for semantic similarity search and filtering.

49

50

```python { .api }

51

class VectorIndexRetriever(BaseRetriever):

52

"""

53

Retriever for vector-based semantic similarity search.

54

55

Parameters:

56

- index: VectorStoreIndex, the vector index to retrieve from

57

- similarity_top_k: int, number of top similar nodes to retrieve

58

- vector_store_query_mode: str, query mode for vector store

59

- filters: Optional[MetadataFilters], metadata filters for retrieval

60

- alpha: Optional[float], weight for sparse/dense retrieval combination

61

- doc_ids: Optional[List[str]], specific document IDs to retrieve from

62

- vector_store_kwargs: dict, additional vector store arguments

63

"""

64

def __init__(

65

self,

66

index: VectorStoreIndex,

67

similarity_top_k: int = 10,

68

vector_store_query_mode: str = "default",

69

filters: Optional[MetadataFilters] = None,

70

alpha: Optional[float] = None,

71

doc_ids: Optional[List[str]] = None,

72

vector_store_kwargs: Optional[dict] = None,

73

**kwargs

74

): ...

75

76

class VectorIndexAutoRetriever(BaseRetriever):

77

"""

78

Auto retriever with metadata filtering based on natural language queries.

79

80

Parameters:

81

- index: VectorStoreIndex, the vector index to retrieve from

82

- vector_store_info: VectorStoreInfo, metadata about vector store structure

83

- similarity_top_k: int, number of similar nodes to retrieve

84

- empty_query_top_k: Optional[int], top k when query is empty

85

- max_top_k: int, maximum number of nodes to retrieve

86

- llm: Optional[LLM], language model for filter generation

87

"""

88

def __init__(

89

self,

90

index: VectorStoreIndex,

91

vector_store_info: VectorStoreInfo,

92

similarity_top_k: int = 10,

93

empty_query_top_k: Optional[int] = None,

94

max_top_k: int = 10,

95

llm: Optional[LLM] = None,

96

**kwargs

97

): ...

98

```

99

100

### Summary Index Retrievers

101

102

Retrievers for comprehensive document retrieval and selection from summary indices.

103

104

```python { .api }

105

class SummaryIndexRetriever(BaseRetriever):

106

"""

107

Retriever that returns all nodes from a summary index.

108

109

Parameters:

110

- index: SummaryIndex, the summary index to retrieve from

111

"""

112

def __init__(self, index: SummaryIndex, **kwargs): ...

113

114

class SummaryIndexEmbeddingRetriever(BaseRetriever):

115

"""

116

Summary index retriever with embedding-based node selection.

117

118

Parameters:

119

- index: SummaryIndex, the summary index to retrieve from

120

- similarity_top_k: int, number of similar nodes to retrieve

121

- embed_model: Optional[BaseEmbedding], embedding model for similarity

122

"""

123

def __init__(

124

self,

125

index: SummaryIndex,

126

similarity_top_k: int = 10,

127

embed_model: Optional[BaseEmbedding] = None,

128

**kwargs

129

): ...

130

131

class SummaryIndexLLMRetriever(BaseRetriever):

132

"""

133

Summary index retriever with LLM-based node selection.

134

135

Parameters:

136

- index: SummaryIndex, the summary index to retrieve from

137

- choice_select_prompt: Optional[BasePromptTemplate], prompt for node selection

138

- choice_batch_size: int, batch size for LLM selection

139

- format_node_batch_fn: Optional[Callable], function to format node batches

140

- parse_choice_select_answer_fn: Optional[Callable], function to parse LLM response

141

- llm: Optional[LLM], language model for selection

142

"""

143

def __init__(

144

self,

145

index: SummaryIndex,

146

choice_select_prompt: Optional[BasePromptTemplate] = None,

147

choice_batch_size: int = 10,

148

format_node_batch_fn: Optional[Callable] = None,

149

parse_choice_select_answer_fn: Optional[Callable] = None,

150

llm: Optional[LLM] = None,

151

**kwargs

152

): ...

153

```

154

155

### Tree Index Retrievers

156

157

Specialized retrievers for hierarchical tree-structured indices with various traversal strategies.

158

159

```python { .api }

160

class TreeAllLeafRetriever(BaseRetriever):

161

"""

162

Retriever that returns all leaf nodes from a tree index.

163

164

Parameters:

165

- index: TreeIndex, the tree index to retrieve from

166

"""

167

def __init__(self, index: TreeIndex, **kwargs): ...

168

169

class TreeSelectLeafEmbeddingRetriever(BaseRetriever):

170

"""

171

Tree retriever with embedding-based leaf node selection.

172

173

Parameters:

174

- index: TreeIndex, the tree index to retrieve from

175

- embed_model: Optional[BaseEmbedding], embedding model for selection

176

- similarity_top_k: int, number of similar nodes to retrieve

177

"""

178

def __init__(

179

self,

180

index: TreeIndex,

181

embed_model: Optional[BaseEmbedding] = None,

182

similarity_top_k: int = 10,

183

**kwargs

184

): ...

185

186

class TreeSelectLeafRetriever(BaseRetriever):

187

"""

188

Tree retriever with LLM-based leaf node selection.

189

190

Parameters:

191

- index: TreeIndex, the tree index to retrieve from

192

- child_branch_factor: int, number of child nodes to consider per branch

193

- llm: Optional[LLM], language model for selection

194

"""

195

def __init__(

196

self,

197

index: TreeIndex,

198

child_branch_factor: int = 1,

199

llm: Optional[LLM] = None,

200

**kwargs

201

): ...

202

203

class TreeRootRetriever(BaseRetriever):

204

"""

205

Retriever that returns the root node of a tree index.

206

207

Parameters:

208

- index: TreeIndex, the tree index to retrieve from

209

"""

210

def __init__(self, index: TreeIndex, **kwargs): ...

211

```

212

213

### Keyword Table Retrievers

214

215

Retrievers for keyword-based search and matching operations.

216

217

```python { .api }

218

class KeywordTableSimpleRetriever(BaseRetriever):

219

"""

220

Simple keyword table retriever for exact keyword matching.

221

222

Parameters:

223

- index: KeywordTableIndex, the keyword table index

224

- max_keywords_per_query: int, maximum keywords to extract per query

225

- num_chunks_per_query: int, number of chunks to retrieve per query

226

- keyword_extractor: Optional[BaseKeywordExtractor], keyword extraction method

227

"""

228

def __init__(

229

self,

230

index: KeywordTableIndex,

231

max_keywords_per_query: int = 10,

232

num_chunks_per_query: int = 10,

233

keyword_extractor: Optional[BaseKeywordExtractor] = None,

234

**kwargs

235

): ...

236

```

237

238

### Knowledge Graph Retrievers

239

240

Retrievers for graph-based knowledge representation and traversal.

241

242

```python { .api }

243

class KGTableRetriever(BaseRetriever):

244

"""

245

Knowledge graph table retriever for entity-based queries.

246

247

Parameters:

248

- index: KnowledgeGraphIndex, the knowledge graph index

249

- retriever_mode: str, retrieval mode (keyword, embedding, hybrid)

250

- similarity_top_k: int, number of similar nodes to retrieve

251

- graph_store_query_depth: int, depth of graph traversal

252

- use_global_node_triplets: bool, whether to use global node relationships

253

- max_knowledge_sequence: int, maximum knowledge sequence length

254

- keyword_extractor: Optional[BaseKeywordExtractor], keyword extraction method

255

"""

256

def __init__(

257

self,

258

index: KnowledgeGraphIndex,

259

retriever_mode: str = "keyword",

260

similarity_top_k: int = 2,

261

graph_store_query_depth: int = 2,

262

use_global_node_triplets: bool = True,

263

max_knowledge_sequence: int = 128,

264

keyword_extractor: Optional[BaseKeywordExtractor] = None,

265

**kwargs

266

): ...

267

268

class KnowledgeGraphRAGRetriever(BaseRetriever):

269

"""

270

RAG-based knowledge graph retriever combining entity extraction and graph traversal.

271

272

Parameters:

273

- storage_context: StorageContext, storage configuration

274

- entity_extract_policy: Optional[str], entity extraction policy

275

- synonym_expand_policy: Optional[str], synonym expansion policy

276

- retriever_mode: str, retrieval mode configuration

277

- llm: Optional[LLM], language model for processing

278

- verbose: bool, whether to enable verbose logging

279

"""

280

def __init__(

281

self,

282

storage_context: StorageContext,

283

entity_extract_policy: Optional[str] = None,

284

synonym_expand_policy: Optional[str] = None,

285

retriever_mode: str = "keyword",

286

llm: Optional[LLM] = None,

287

verbose: bool = True,

288

**kwargs

289

): ...

290

```

291

292

### Property Graph Retrievers

293

294

Advanced retrievers for property graph structures with Cypher query support.

295

296

```python { .api }

297

class BasePGRetriever(BaseRetriever):

298

"""

299

Base class for property graph retrievers.

300

301

Parameters:

302

- graph_store: PropertyGraphStore, the property graph store

303

- llm: Optional[LLM], language model for processing

304

"""

305

def __init__(

306

self,

307

graph_store: PropertyGraphStore,

308

llm: Optional[LLM] = None,

309

**kwargs

310

): ...

311

312

class PGRetriever(BasePGRetriever):

313

"""

314

Standard property graph retriever with multiple retrieval strategies.

315

316

Parameters:

317

- graph_store: PropertyGraphStore, the property graph store

318

- include_text: bool, whether to include text content in results

319

- llm: Optional[LLM], language model for processing

320

"""

321

def __init__(

322

self,

323

graph_store: PropertyGraphStore,

324

include_text: bool = True,

325

llm: Optional[LLM] = None,

326

**kwargs

327

): ...

328

329

class LLMSynonymRetriever(BasePGRetriever):

330

"""

331

Property graph retriever with LLM-based synonym expansion.

332

333

Parameters:

334

- graph_store: PropertyGraphStore, the property graph store

335

- llm: Optional[LLM], language model for synonym generation

336

- include_text: bool, whether to include text in results

337

- synonym_prompt: Optional[PromptTemplate], prompt for synonym generation

338

- output_parser: Optional[BaseOutputParser], parser for LLM output

339

- max_keywords: int, maximum keywords to generate

340

- path_depth: int, depth of graph path traversal

341

"""

342

def __init__(

343

self,

344

graph_store: PropertyGraphStore,

345

llm: Optional[LLM] = None,

346

include_text: bool = True,

347

synonym_prompt: Optional[PromptTemplate] = None,

348

output_parser: Optional[BaseOutputParser] = None,

349

max_keywords: int = 10,

350

path_depth: int = 1,

351

**kwargs

352

): ...

353

354

class CypherTemplateRetriever(BasePGRetriever):

355

"""

356

Retriever using Cypher query templates for property graphs.

357

358

Parameters:

359

- graph_store: PropertyGraphStore, the property graph store

360

- cypher_query_template: str, Cypher query template

361

- output_parser: Optional[BaseOutputParser], parser for query results

362

- llm: Optional[LLM], language model for template processing

363

"""

364

def __init__(

365

self,

366

graph_store: PropertyGraphStore,

367

cypher_query_template: str,

368

output_parser: Optional[BaseOutputParser] = None,

369

llm: Optional[LLM] = None,

370

**kwargs

371

): ...

372

373

class TextToCypherRetriever(BasePGRetriever):

374

"""

375

Natural language to Cypher query retriever.

376

377

Parameters:

378

- graph_store: PropertyGraphStore, the property graph store

379

- nl_to_cypher_template: Optional[PromptTemplate], natural language to Cypher prompt

380

- cypher_validation_template: Optional[PromptTemplate], Cypher validation prompt

381

- allowed_output_fields: Optional[List[str]], allowed output fields

382

- llm: Optional[LLM], language model for query generation

383

"""

384

def __init__(

385

self,

386

graph_store: PropertyGraphStore,

387

nl_to_cypher_template: Optional[PromptTemplate] = None,

388

cypher_validation_template: Optional[PromptTemplate] = None,

389

allowed_output_fields: Optional[List[str]] = None,

390

llm: Optional[LLM] = None,

391

**kwargs

392

): ...

393

```

394

395

### SQL Retrievers

396

397

Retrievers for SQL database queries and natural language to SQL conversion.

398

399

```python { .api }

400

class SQLRetriever(BaseRetriever):

401

"""

402

SQL query-based retriever for structured database content.

403

404

Parameters:

405

- sql_database: SQLDatabase, the SQL database connection

406

- return_raw: bool, whether to return raw SQL results

407

"""

408

def __init__(

409

self,

410

sql_database: SQLDatabase,

411

return_raw: bool = True,

412

**kwargs

413

): ...

414

415

class NLSQLRetriever(BaseRetriever):

416

"""

417

Natural language to SQL query retriever.

418

419

Parameters:

420

- sql_database: SQLDatabase, the SQL database connection

421

- text_to_sql_prompt: Optional[BasePromptTemplate], text to SQL conversion prompt

422

- context_query_kwargs: Optional[dict], additional query context arguments

423

- table_retriever: Optional[ObjectRetriever], table schema retriever

424

- context_str_prefix: Optional[str], prefix for context strings

425

- sql_parser_mode: SQLParserMode, SQL parsing mode (strict or relaxed)

426

- llm: Optional[LLM], language model for SQL generation

427

"""

428

def __init__(

429

self,

430

sql_database: SQLDatabase,

431

text_to_sql_prompt: Optional[BasePromptTemplate] = None,

432

context_query_kwargs: Optional[dict] = None,

433

table_retriever: Optional[ObjectRetriever] = None,

434

context_str_prefix: Optional[str] = None,

435

sql_parser_mode: SQLParserMode = SQLParserMode.DEFAULT,

436

llm: Optional[LLM] = None,

437

**kwargs

438

): ...

439

```

440

441

### Advanced Retrievers

442

443

Sophisticated retrieval strategies combining multiple approaches and reasoning patterns.

444

445

```python { .api }

446

class RecursiveRetriever(BaseRetriever):

447

"""

448

Recursive retriever for multi-step information gathering.

449

450

Parameters:

451

- root_id: str, identifier of the root node to start retrieval

452

- retriever_dict: Dict[str, BaseRetriever], mapping of node IDs to retrievers

453

- query_transform_fn: Optional[Callable], function to transform queries

454

- node_dict: Optional[Dict[str, BaseNode]], mapping of node IDs to nodes

455

- verbose: bool, whether to enable verbose logging

456

"""

457

def __init__(

458

self,

459

root_id: str,

460

retriever_dict: Dict[str, BaseRetriever],

461

query_transform_fn: Optional[Callable] = None,

462

node_dict: Optional[Dict[str, BaseNode]] = None,

463

verbose: bool = True,

464

**kwargs

465

): ...

466

467

class AutoMergingRetriever(BaseRetriever):

468

"""

469

Auto-merging retriever for hierarchical node structures.

470

471

Parameters:

472

- vector_retriever: BaseRetriever, base vector retriever

473

- storage_context: StorageContext, storage configuration

474

- simple_ratio_thresh: float, threshold for simple merging

475

- verbose: bool, whether to enable verbose logging

476

"""

477

def __init__(

478

self,

479

vector_retriever: BaseRetriever,

480

storage_context: StorageContext,

481

simple_ratio_thresh: float = 0.5,

482

verbose: bool = True,

483

**kwargs

484

): ...

485

486

class RouterRetriever(BaseRetriever):

487

"""

488

Router-based retriever for selecting appropriate retrieval strategies.

489

490

Parameters:

491

- selector: BaseSelector, selector for choosing retrievers

492

- retriever_tools: List[RetrieverTool], available retriever tools

493

- llm: Optional[LLM], language model for routing decisions

494

"""

495

def __init__(

496

self,

497

selector: BaseSelector,

498

retriever_tools: List[RetrieverTool],

499

llm: Optional[LLM] = None,

500

**kwargs

501

): ...

502

503

class QueryFusionRetriever(BaseRetriever):

504

"""

505

Query fusion retriever combining multiple query variations.

506

507

Parameters:

508

- retrievers: List[BaseRetriever], retrievers to fuse results from

509

- similarity_top_k: int, number of similar nodes per retriever

510

- num_queries: int, number of query variations to generate

511

- mode: str, fusion mode (reciprocal_rank, relative_score, dist_based_score)

512

- use_async: bool, whether to use async retrieval

513

- retriever_weights: Optional[List[float]], weights for individual retrievers

514

- llm: Optional[LLM], language model for query generation

515

"""

516

def __init__(

517

self,

518

retrievers: List[BaseRetriever],

519

similarity_top_k: int = 2,

520

num_queries: int = 4,

521

mode: str = "reciprocal_rank",

522

use_async: bool = True,

523

retriever_weights: Optional[List[float]] = None,

524

llm: Optional[LLM] = None,

525

**kwargs

526

): ...

527

528

class TransformRetriever(BaseRetriever):

529

"""

530

Transform-based retriever with query preprocessing.

531

532

Parameters:

533

- retriever: BaseRetriever, base retriever to transform

534

- query_transform: BaseQueryTransform, query transformation method

535

"""

536

def __init__(

537

self,

538

retriever: BaseRetriever,

539

query_transform: BaseQueryTransform,

540

**kwargs

541

): ...

542

```

543

544

### Empty Index Retriever

545

546

Placeholder retriever for empty or placeholder indices.

547

548

```python { .api }

549

class EmptyIndexRetriever(BaseRetriever):

550

"""

551

Retriever that returns empty results, used for placeholder indices.

552

553

Parameters:

554

- index: EmptyIndex, the empty index

555

"""

556

def __init__(self, index: EmptyIndex, **kwargs): ...

557

```

558

559

### Image Retrievers

560

561

Specialized retrievers for image and multi-modal content.

562

563

```python { .api }

564

class BaseImageRetriever:

565

"""

566

Base interface for image-specific retrieval operations.

567

568

Parameters:

569

- callback_manager: Optional[CallbackManager], callback management

570

"""

571

def __init__(self, callback_manager: Optional[CallbackManager] = None): ...

572

573

def text_to_image_retrieve(self, str_or_query_bundle: Union[str, QueryBundle]) -> List[NodeWithScore]:

574

"""Retrieve images based on text query."""

575

576

def image_to_image_retrieve(self, str_or_query_bundle: Union[str, QueryBundle]) -> List[NodeWithScore]:

577

"""Retrieve similar images based on image query."""

578

```

579

580

## Usage Examples

581

582

### Basic Vector Retrieval

583

584

```python

585

from llama_index.core import VectorStoreIndex, Document

586

from llama_index.core.retrievers import VectorIndexRetriever

587

588

# Create documents and index

589

documents = [

590

Document(text="Machine learning is a subset of artificial intelligence."),

591

Document(text="Deep learning uses neural networks with multiple layers."),

592

Document(text="Natural language processing helps computers understand text.")

593

]

594

595

index = VectorStoreIndex.from_documents(documents)

596

597

# Create retriever

598

retriever = VectorIndexRetriever(

599

index=index,

600

similarity_top_k=2,

601

filters=None

602

)

603

604

# Retrieve relevant nodes

605

nodes = retriever.retrieve("What is machine learning?")

606

for node in nodes:

607

print(f"Score: {node.score:.3f}")

608

print(f"Text: {node.text}")

609

```

610

611

### Multi-step Recursive Retrieval

612

613

```python

614

from llama_index.core.retrievers import RecursiveRetriever

615

from llama_index.core.schema import IndexNode

616

617

# Setup hierarchical indices

618

summary_index = SummaryIndex.from_documents(documents)

619

detail_indices = {

620

"ml_detail": VectorStoreIndex.from_documents(ml_documents),

621

"dl_detail": VectorStoreIndex.from_documents(dl_documents)

622

}

623

624

# Create retriever mapping

625

retriever_dict = {

626

"summary": summary_index.as_retriever(),

627

"ml_detail": detail_indices["ml_detail"].as_retriever(),

628

"dl_detail": detail_indices["dl_detail"].as_retriever()

629

}

630

631

# Recursive retriever

632

recursive_retriever = RecursiveRetriever(

633

root_id="summary",

634

retriever_dict=retriever_dict,

635

verbose=True

636

)

637

638

# Retrieve with multi-step reasoning

639

results = recursive_retriever.retrieve("Explain deep learning architectures")

640

```

641

642

### Query Fusion Retrieval

643

644

```python

645

from llama_index.core.retrievers import QueryFusionRetriever

646

647

# Multiple retrieval strategies

648

vector_retriever = index.as_retriever(similarity_top_k=3)

649

keyword_retriever = keyword_index.as_retriever(max_keywords_per_query=5)

650

651

# Fusion retriever

652

fusion_retriever = QueryFusionRetriever(

653

retrievers=[vector_retriever, keyword_retriever],

654

similarity_top_k=2,

655

num_queries=4,

656

mode="reciprocal_rank",

657

use_async=True

658

)

659

660

# Retrieve with query fusion

661

nodes = fusion_retriever.retrieve("machine learning applications")

662

```

663

664

## Types & Enums

665

666

```python { .api }

667

class SQLParserMode(str, Enum):

668

"""SQL parsing modes for natural language to SQL conversion."""

669

DEFAULT = "default"

670

STRICT = "strict"

671

RELAXED = "relaxed"

672

673

# Legacy aliases maintained for compatibility

674

ListIndexEmbeddingRetriever = SummaryIndexEmbeddingRetriever

675

ListIndexRetriever = SummaryIndexRetriever

676

```