Data framework for your LLM application
—
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Pending
The risk profile of this skill
Query processing and response synthesis for retrieving and generating answers from indexed data in LlamaIndex.TS.
import { VectorStoreIndex } from "llamaindex";
// Or from specific submodules
import { RetrieverQueryEngine, SubQuestionQueryEngine } from "llamaindex/engines";Query engines in LlamaIndex.TS handle the process of retrieving relevant information from indices and synthesizing coherent responses. They combine retrieval mechanisms with response generation to provide comprehensive answers to user queries.
All query engines implement the base interface.
interface BaseQueryEngine {
query(query: string, options?: QueryOptions): Promise<EngineResponse>;
aquery(query: string, options?: QueryOptions): AsyncIterable<EngineResponse>;
}
interface QueryOptions {
stream?: boolean;
preFilters?: MetadataFilters;
similarity_top_k?: number;
}The most commonly used query engine that combines a retriever with response synthesis.
class RetrieverQueryEngine implements BaseQueryEngine {
constructor(args: {
retriever: BaseRetriever;
responseSynthesizer?: ResponseSynthesizer;
nodePostprocessors?: BasePostprocessor[];
});
query(query: string, options?: QueryOptions): Promise<EngineResponse>;
aquery(query: string, options?: QueryOptions): AsyncIterable<EngineResponse>;
retriever: BaseRetriever;
responseSynthesizer: ResponseSynthesizer;
nodePostprocessors: BasePostprocessor[];
}Breaks down complex queries into sub-questions for better handling of multi-part queries.
class SubQuestionQueryEngine implements BaseQueryEngine {
constructor(args: {
queryEngineTools: QueryEngineTool[];
questionGen?: LLMQuestionGenerator;
responseSynthesizer?: ResponseSynthesizer;
});
query(query: string, options?: QueryOptions): Promise<EngineResponse>;
aquery(query: string, options?: QueryOptions): AsyncIterable<EngineResponse>;
queryEngineTools: QueryEngineTool[];
questionGen: LLMQuestionGenerator;
responseSynthesizer: ResponseSynthesizer;
}Routes queries to different query engines based on query characteristics.
class RouterQueryEngine implements BaseQueryEngine {
constructor(args: {
selector: BaseSelector;
queryEngineTools: QueryEngineTool[];
defaultTool?: QueryEngineTool;
});
query(query: string, options?: QueryOptions): Promise<EngineResponse>;
aquery(query: string, options?: QueryOptions): AsyncIterable<EngineResponse>;
selector: BaseSelector;
queryEngineTools: QueryEngineTool[];
defaultTool?: QueryEngineTool;
}Standard response object returned by query engines.
class EngineResponse {
response: string;
sourceNodes?: NodeWithScore[];
metadata?: Record<string, any>;
toString(): string;
print(): void;
}
interface NodeWithScore {
node: BaseNode;
score?: number;
}import { VectorStoreIndex, Document } from "llamaindex";
// Create index
const documents = [
new Document({ text: "LlamaIndex is a data framework for LLM applications." }),
new Document({ text: "It provides tools for document processing and retrieval." }),
];
const index = await VectorStoreIndex.fromDocuments(documents);
// Create query engine (uses RetrieverQueryEngine internally)
const queryEngine = index.asQueryEngine();
// Query the engine
const response = await queryEngine.query("What is LlamaIndex?");
console.log("Answer:", response.toString());
// Access source information
if (response.sourceNodes) {
console.log("Sources:");
response.sourceNodes.forEach((nodeWithScore, i) => {
console.log(` ${i + 1}. ${nodeWithScore.node.text} (score: ${nodeWithScore.score})`);
});
}// Stream responses for real-time output
const response = await queryEngine.query("Explain LlamaIndex in detail", {
stream: true
});
// For streaming, iterate over the async generator
for await (const chunk of queryEngine.aquery("What is LlamaIndex?")) {
console.log("Chunk:", chunk.response);
}import { RetrieverQueryEngine, ResponseSynthesizer } from "llamaindex/engines";
// Create custom retriever
const retriever = index.asRetriever({
similarityTopK: 5,
});
// Create custom response synthesizer
const responseSynthesizer = new ResponseSynthesizer({
responseMode: "tree_summarize",
});
// Create query engine with custom components
const customQueryEngine = new RetrieverQueryEngine({
retriever,
responseSynthesizer,
});
const response = await customQueryEngine.query("How does semantic search work?");// Query with metadata filters
const response = await queryEngine.query("Find financial information", {
preFilters: {
filters: [
{ key: "category", value: "finance", operator: "==" },
{ key: "year", value: 2024, operator: ">=" }
]
}
});import { SubQuestionQueryEngine, QueryEngineTool } from "llamaindex/engines";
// Create multiple specialized indices
const techIndex = await VectorStoreIndex.fromDocuments(techDocs);
const financeIndex = await VectorStoreIndex.fromDocuments(financeDocs);
// Create query engine tools
const queryEngineTools = [
new QueryEngineTool({
queryEngine: techIndex.asQueryEngine(),
metadata: {
name: "tech_search",
description: "Useful for answering questions about technical topics"
}
}),
new QueryEngineTool({
queryEngine: financeIndex.asQueryEngine(),
metadata: {
name: "finance_search",
description: "Useful for answering questions about financial topics"
}
})
];
// Create sub-question query engine
const subQuestionQE = new SubQuestionQueryEngine({
queryEngineTools,
});
// Ask complex multi-part question
const response = await subQuestionQE.query(
"What are the technical challenges and financial implications of implementing AI in healthcare?"
);
console.log("Complex answer:", response.toString());import { RouterQueryEngine, LLMSingleSelector } from "llamaindex/engines";
// Create selector to route queries
const selector = new LLMSingleSelector();
// Create router query engine
const routerQE = new RouterQueryEngine({
selector,
queryEngineTools,
defaultTool: queryEngineTools[0], // Fallback tool
});
// Route queries automatically
const response1 = await routerQE.query("How do neural networks work?"); // → tech_search
const response2 = await routerQE.query("What was the quarterly revenue?"); // → finance_searchBest for comprehensive answers from multiple sources.
import { ResponseSynthesizer } from "llamaindex";
const queryEngine = new RetrieverQueryEngine({
retriever: index.asRetriever(),
responseSynthesizer: new ResponseSynthesizer({
responseMode: "tree_summarize",
}),
});Iteratively refines the answer with each retrieved chunk.
const queryEngine = new RetrieverQueryEngine({
retriever: index.asRetriever(),
responseSynthesizer: new ResponseSynthesizer({
responseMode: "refine",
}),
});Combines chunks to maximize context window usage.
const queryEngine = new RetrieverQueryEngine({
retriever: index.asRetriever(),
responseSynthesizer: new ResponseSynthesizer({
responseMode: "compact",
}),
});The query engine supports a comprehensive post-processing pipeline to refine and filter retrieved nodes before response synthesis.
All post-processors implement the base interface.
interface BasePostProcessor {
postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];
}class SimilarityPostprocessor implements BasePostProcessor {
constructor(options: {
similarityCutoff?: number;
similarityTop?: number;
});
postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];
}
class KeywordNodePostprocessor implements BasePostProcessor {
constructor(options: {
requiredKeywords?: string[];
excludeKeywords?: string[];
lang?: string;
});
postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];
}
class MetadataReplacementPostProcessor implements BasePostProcessor {
constructor(targetMetadataKey: string);
postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];
}
class SentenceEmbeddingOptimizer implements BasePostProcessor {
constructor(options: {
embedModel?: BaseEmbedding;
percentilesCutoff?: number;
thresholdCutoff?: number;
});
postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];
}
class CohereRerank implements BasePostProcessor {
constructor(options: {
apiKey: string;
topN?: number;
model?: string;
});
postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];
}
class LLMRerank implements BasePostProcessor {
constructor(options: {
llm?: LLM;
topN?: number;
choice_batch_size?: number;
});
postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];
}
class FixedRecencyPostprocessor implements BasePostProcessor {
constructor(options: {
topK?: number;
dateKey?: string;
inPlace?: boolean;
});
postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];
}
class EmbeddingRecencyPostprocessor implements BasePostProcessor {
constructor(options: {
embedModel?: BaseEmbedding;
similarityTopK?: number;
dateKey?: string;
recencyWeights?: number[];
});
postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];
}
class TimeWeightedPostprocessor implements BasePostProcessor {
constructor(options: {
timeDecayFunction?: (timeDiff: number) => number;
timeKey?: string;
nowTimestamp?: Date;
});
postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];
}import {
SimilarityPostprocessor,
KeywordNodePostprocessor,
CohereRerank,
LLMRerank,
SentenceEmbeddingOptimizer
} from "llamaindex/postprocessors";
// Multi-stage post-processing pipeline
const queryEngine = new RetrieverQueryEngine({
retriever: index.asRetriever({ similarityTopK: 20 }), // Get more candidates
nodePostprocessors: [
// Stage 1: Filter by similarity threshold
new SimilarityPostprocessor({
similarityCutoff: 0.6, // Remove low-relevance nodes
}),
// Stage 2: Filter by keywords
new KeywordNodePostprocessor({
requiredKeywords: ["important", "relevant"],
excludeKeywords: ["outdated", "deprecated"],
}),
// Stage 3: Sentence-level embedding optimization
new SentenceEmbeddingOptimizer({
percentilesCutoff: 0.8, // Keep top 80% of sentence embeddings
thresholdCutoff: 0.7, // Minimum threshold
}),
// Stage 4: Re-rank using external service
new CohereRerank({
apiKey: process.env.COHERE_API_KEY,
topN: 5, // Final top 5 results
model: "rerank-english-v2.0",
}),
],
});
// Time-weighted post-processing for temporal data
const temporalQueryEngine = new RetrieverQueryEngine({
retriever: index.asRetriever(),
nodePostprocessors: [
new TimeWeightedPostprocessor({
timeKey: "created_date",
timeDecayFunction: (timeDiffDays) => Math.exp(-timeDiffDays / 30), // Exponential decay
}),
new FixedRecencyPostprocessor({
topK: 10,
dateKey: "created_date",
}),
],
});
// LLM-based re-ranking
const llmRerankEngine = new RetrieverQueryEngine({
retriever: index.asRetriever({ similarityTopK: 10 }),
nodePostprocessors: [
new LLMRerank({
llm: /* your LLM instance */,
topN: 3,
choice_batch_size: 5, // Process in batches for efficiency
}),
],
});import { QueryEngineTool } from "llamaindex/tools";
// Create query engine tool for use with agents
const queryTool = new QueryEngineTool({
queryEngine: index.asQueryEngine(),
metadata: {
name: "knowledge_search",
description: "Search the knowledge base for information about the company",
},
});
// Use with ReAct agent
import { ReActAgent } from "llamaindex/agent";
const agent = new ReActAgent({
tools: [queryTool],
llm: /* your LLM */,
});
const response = await agent.chat("Find information about our product roadmap");// Process multiple queries concurrently
const queries = [
"What is machine learning?",
"How does deep learning work?",
"What are neural networks?",
];
const responses = await Promise.all(
queries.map(query => queryEngine.query(query))
);
responses.forEach((response, i) => {
console.log(`Query ${i + 1}:`, response.toString());
});// Simple query cache implementation
class CachedQueryEngine {
private cache = new Map<string, EngineResponse>();
constructor(private queryEngine: BaseQueryEngine) {}
async query(query: string): Promise<EngineResponse> {
if (this.cache.has(query)) {
return this.cache.get(query)!;
}
const response = await this.queryEngine.query(query);
this.cache.set(query, response);
return response;
}
}
const cachedQE = new CachedQueryEngine(queryEngine);// Process queries in batches to manage memory
const batchQueries = async (queries: string[], batchSize: number = 5) => {
const results: EngineResponse[] = [];
for (let i = 0; i < queries.length; i += batchSize) {
const batch = queries.slice(i, i + batchSize);
const batchResults = await Promise.all(
batch.map(query => queryEngine.query(query))
);
results.push(...batchResults);
// Optional: Add delay between batches
await new Promise(resolve => setTimeout(resolve, 100));
}
return results;
};const safeQuery = async (query: string): Promise<EngineResponse | null> => {
try {
const response = await queryEngine.query(query);
// Validate response
if (!response.response || response.response.trim().length === 0) {
console.warn("Empty response received");
return null;
}
return response;
} catch (error) {
console.error("Query failed:", error);
// Handle specific error types
if (error.message.includes("embedding")) {
console.error("Embedding service issue");
} else if (error.message.includes("LLM")) {
console.error("Language model issue");
}
return null;
}
};
const response = await safeQuery("What is the meaning of life?");
if (response) {
console.log("Answer:", response.toString());
} else {
console.log("Could not generate response");
}// Choose the right query engine for your use case
const createQueryEngine = (useCase: string) => {
switch (useCase) {
case "simple":
// Basic retrieval and synthesis
return index.asQueryEngine();
case "complex":
// Multi-step reasoning
return new SubQuestionQueryEngine({ queryEngineTools });
case "specialized":
// Route to different knowledge bases
return new RouterQueryEngine({ selector, queryEngineTools });
default:
return index.asQueryEngine();
}
};// Configure for high-quality responses
const highQualityQE = new RetrieverQueryEngine({
retriever: index.asRetriever({
similarityTopK: 5, // Get more context
}),
responseSynthesizer: new ResponseSynthesizer({
responseMode: "tree_summarize", // Best synthesis method
}),
nodePostprocessors: [
new SimilarityPostprocessor({
similarityCutoff: 0.7, // Only high-quality sources
}),
],
});// Add response metadata logging
const logQueryResponse = async (query: string) => {
const response = await queryEngine.query(query);
console.log("Query:", query);
console.log("Response:", response.toString());
console.log("Source count:", response.sourceNodes?.length || 0);
console.log("Metadata:", response.metadata);
return response;
};