Interface between LLMs and your data for building retrieval-augmented generation (RAG) applications
—
Response generation strategies for combining retrieved context into coherent answers with various summarization approaches and synthesis modes.
Factory function for creating response synthesizers with different strategies and configurations.
def get_response_synthesizer(
response_mode="compact",
service_context=None,
text_qa_template=None,
refine_template=None,
summary_template=None,
simple_template=None,
use_async=False,
streaming=False,
structured_answer_filtering=False,
**kwargs
):
"""
Create response synthesizer with specified mode and configuration.
Args:
response_mode: Synthesis strategy ("compact", "refine", "tree_summarize",
"simple_summarize", "accumulate", "generation")
service_context: Service context (deprecated, use Settings)
text_qa_template: Template for question-answering
refine_template: Template for iterative refinement
summary_template: Template for summarization
simple_template: Template for simple responses
use_async: Enable asynchronous processing
streaming: Enable streaming responses
structured_answer_filtering: Filter responses for structured output
Returns:
BaseSynthesizer: Configured response synthesizer
"""Usage Example:
from llama_index.core import get_response_synthesizer
# Compact mode (default) - combines chunks efficiently
synthesizer = get_response_synthesizer(
response_mode="compact",
streaming=True
)
# Tree summarize mode - hierarchical summarization
tree_synthesizer = get_response_synthesizer(
response_mode="tree_summarize",
use_async=True
)
# Refine mode - iterative improvement
refine_synthesizer = get_response_synthesizer(
response_mode="refine",
structured_answer_filtering=True
)
# Use with query engine
query_engine = index.as_query_engine(
response_synthesizer=synthesizer
)Efficient synthesis mode that combines retrieved chunks with intelligent context compression.
class CompactAndRefine:
"""
Compact and refine synthesis strategy.
Combines chunks into larger contexts, then applies refinement for final answer.
Args:
text_qa_template: Template for initial question answering
refine_template: Template for iterative refinement
max_prompt_size: Maximum prompt size in tokens
callback_manager: Callback manager for events
use_async: Enable asynchronous processing
streaming: Enable streaming responses
"""
def __init__(
self,
text_qa_template=None,
refine_template=None,
max_prompt_size=None,
callback_manager=None,
use_async=False,
streaming=False,
**kwargs
): ...
def synthesize(
self,
query,
nodes,
additional_source_nodes=None,
**kwargs
):
"""
Synthesize response from query and retrieved nodes.
Args:
query: User query or QueryBundle
nodes: List of retrieved NodeWithScore objects
additional_source_nodes: Extra context nodes
Returns:
Response: Synthesized response with sources
"""
async def asynthesize(self, query, nodes, **kwargs):
"""Async version of synthesize."""Hierarchical summarization strategy that builds responses bottom-up through tree structures.
class TreeSummarize:
"""
Tree-based summarization synthesis.
Recursively summarizes chunks in a tree structure for comprehensive responses.
Args:
summary_template: Template for summarization steps
text_qa_template: Template for final question answering
use_async: Enable asynchronous processing
callback_manager: Callback manager for events
"""
def __init__(
self,
summary_template=None,
text_qa_template=None,
use_async=False,
callback_manager=None,
**kwargs
): ...
def synthesize(self, query, nodes, **kwargs):
"""Tree-based synthesis of response."""
async def asynthesize(self, query, nodes, **kwargs):
"""Async tree synthesis."""Usage Example:
from llama_index.core.response_synthesizers import TreeSummarize
from llama_index.core.prompts import PromptTemplate
# Custom summarization template
summary_template = PromptTemplate(
"Context information is below:\n"
"---------------------\n"
"{context_str}\n"
"---------------------\n"
"Summarize the key points relevant to: {query_str}\n"
"Summary: "
)
tree_synthesizer = TreeSummarize(
summary_template=summary_template,
use_async=True
)
# Use with query engine
query_engine = index.as_query_engine(
response_synthesizer=tree_synthesizer,
similarity_top_k=10 # More chunks for tree processing
)
response = query_engine.query("What are the main themes in the documents?")Refine synthesis strategy that iteratively improves responses using additional context.
class Refine:
"""
Iterative refinement synthesis strategy.
Starts with initial response and refines it using additional retrieved chunks.
Args:
text_qa_template: Template for initial response
refine_template: Template for refinement steps
callback_manager: Callback manager for events
streaming: Enable streaming responses
"""
def __init__(
self,
text_qa_template=None,
refine_template=None,
callback_manager=None,
streaming=False,
**kwargs
): ...
def synthesize(self, query, nodes, **kwargs):
"""Iteratively refine response using retrieved nodes."""
async def asynthesize(self, query, nodes, **kwargs):
"""Async iterative refinement."""Usage Example:
from llama_index.core.response_synthesizers import Refine
from llama_index.core.prompts import PromptTemplate
# Custom refinement template
refine_template = PromptTemplate(
"The original query is as follows: {query_str}\n"
"We have provided an existing answer: {existing_answer}\n"
"We have the opportunity to refine the existing answer "
"(only if needed) with some more context below.\n"
"------------\n"
"{context_msg}\n"
"------------\n"
"Given the new context, refine the original answer to better "
"answer the query. If the context isn't useful, return the original answer.\n"
"Refined Answer: "
)
refine_synthesizer = Refine(
refine_template=refine_template,
streaming=True
)
query_engine = index.as_query_engine(
response_synthesizer=refine_synthesizer
)Direct summarization strategy for straightforward responses without complex processing.
class SimpleSummarize:
"""
Simple summarization synthesis.
Directly summarizes all retrieved context in a single step.
Args:
text_qa_template: Template for question answering
callback_manager: Callback manager for events
streaming: Enable streaming responses
"""
def __init__(
self,
text_qa_template=None,
callback_manager=None,
streaming=False,
**kwargs
): ...
def synthesize(self, query, nodes, **kwargs):
"""Simple one-step summarization."""Accumulation strategy that concatenates individual responses from each retrieved chunk.
class Accumulate:
"""
Accumulate synthesis strategy.
Generates individual responses for each chunk and concatenates them.
Args:
text_qa_template: Template for individual chunk responses
output_cls: Structured output class
callback_manager: Callback manager for events
use_async: Enable asynchronous processing
"""
def __init__(
self,
text_qa_template=None,
output_cls=None,
callback_manager=None,
use_async=False,
**kwargs
): ...
def synthesize(self, query, nodes, **kwargs):
"""Accumulate responses from individual chunks."""Usage Example:
from llama_index.core.response_synthesizers import Accumulate
accumulate_synthesizer = Accumulate(
use_async=True # Process chunks in parallel
)
# Good for gathering diverse perspectives
query_engine = index.as_query_engine(
response_synthesizer=accumulate_synthesizer,
similarity_top_k=5
)
response = query_engine.query("What are different opinions on this topic?")
print(response.response) # Contains accumulated individual responsesDirect generation strategy that creates responses without using retrieved context.
class Generation:
"""
Generation synthesis strategy.
Generates responses directly from the query without using retrieved context.
Args:
simple_template: Template for direct generation
callback_manager: Callback manager for events
streaming: Enable streaming responses
"""
def __init__(
self,
simple_template=None,
callback_manager=None,
streaming=False,
**kwargs
): ...
def synthesize(self, query, nodes, **kwargs):
"""Generate response directly from query."""Base class for implementing custom response synthesis strategies.
class BaseSynthesizer:
"""
Base class for response synthesizers.
Args:
callback_manager: Callback manager for events
streaming: Enable streaming responses
"""
def __init__(
self,
callback_manager=None,
streaming=False,
**kwargs
): ...
def synthesize(
self,
query,
nodes,
additional_source_nodes=None,
**kwargs
):
"""
Synthesize response from query and nodes.
Args:
query: User query string or QueryBundle
nodes: List of NodeWithScore objects from retrieval
additional_source_nodes: Extra source nodes for context
Returns:
Response: Generated response with metadata
"""
async def asynthesize(self, query, nodes, **kwargs):
"""Async version of synthesize method."""
def get_prompts(self):
"""Get prompt templates used by synthesizer."""
def update_prompts(self, prompts_dict):
"""Update prompt templates."""Advanced synthesis with structured output generation for extracting specific information formats.
class StructuredResponseSynthesizer(BaseSynthesizer):
"""
Structured response synthesizer for typed outputs.
Args:
output_cls: Pydantic model class for structured output
llm: Language model for generation
text_qa_template: Template for question answering
streaming: Enable streaming (limited for structured output)
"""
def __init__(
self,
output_cls,
llm=None,
text_qa_template=None,
streaming=False,
**kwargs
): ...
def synthesize(self, query, nodes, **kwargs):
"""Generate structured response matching output_cls schema."""Structured Output Example:
from pydantic import BaseModel
from typing import List
from llama_index.core.response_synthesizers import get_response_synthesizer
class SummaryOutput(BaseModel):
main_points: List[str]
sentiment: str
confidence_score: float
# Create structured synthesizer
structured_synthesizer = get_response_synthesizer(
response_mode="compact",
output_cls=SummaryOutput,
structured_answer_filtering=True
)
query_engine = index.as_query_engine(
response_synthesizer=structured_synthesizer
)
response = query_engine.query("Summarize the main points")
structured_data = response.metadata.get("structured_response")
# structured_data is now a SummaryOutput instanceFramework for implementing custom response synthesis logic with full control over the generation process.
class CustomSynthesizer(BaseSynthesizer):
"""
Custom response synthesizer implementation.
Args:
custom_prompt: Custom prompt template
processing_fn: Custom processing function
**kwargs: BaseSynthesizer arguments
"""
def __init__(
self,
custom_prompt=None,
processing_fn=None,
**kwargs
): ...
def synthesize(self, query, nodes, **kwargs):
"""Custom synthesis logic."""
context_str = self._prepare_context(nodes)
if self.processing_fn:
return self.processing_fn(query, context_str, **kwargs)
# Default processing
return self._generate_response(query, context_str)
def _prepare_context(self, nodes):
"""Prepare context string from nodes."""
return "\n\n".join([node.node.get_content() for node in nodes])
def _generate_response(self, query, context):
"""Generate response using LLM."""
# Implementation details
passCustom Synthesizer Example:
from llama_index.core.response_synthesizers import BaseSynthesizer
from llama_index.core.base.response.schema import Response
class FactCheckSynthesizer(BaseSynthesizer):
"""Custom synthesizer that fact-checks responses."""
def __init__(self, fact_check_threshold=0.8, **kwargs):
super().__init__(**kwargs)
self.fact_check_threshold = fact_check_threshold
def synthesize(self, query, nodes, **kwargs):
# Generate initial response
context_str = "\n\n".join([node.node.get_content() for node in nodes])
initial_response = self._llm.complete(
f"Context: {context_str}\n\nQuestion: {query}\n\nAnswer:"
)
# Fact-check the response
fact_check_score = self._fact_check(initial_response.text, context_str)
if fact_check_score < self.fact_check_threshold:
# Generate more conservative response
refined_response = self._llm.complete(
f"Based only on the provided context, answer: {query}\n"
f"Context: {context_str}\n"
f"Conservative Answer:"
)
response_text = refined_response.text
else:
response_text = initial_response.text
return Response(
response=response_text,
source_nodes=nodes,
metadata={"fact_check_score": fact_check_score}
)
def _fact_check(self, response_text, context_str):
# Custom fact-checking logic
# Return confidence score 0-1
return 0.9 # Placeholder
# Use custom synthesizer
fact_check_synthesizer = FactCheckSynthesizer(
fact_check_threshold=0.85,
streaming=False
)
query_engine = index.as_query_engine(
response_synthesizer=fact_check_synthesizer
)Advanced response objects with comprehensive metadata and source attribution.
class Response:
"""
Response object with synthesis results and metadata.
Attributes:
response: Generated response text
source_nodes: List of source nodes used
metadata: Additional response metadata
"""
response: str
source_nodes: List[NodeWithScore]
metadata: Dict[str, Any]
def get_formatted_sources(self, length=100):
"""Get formatted source excerpts."""
def __str__(self):
"""String representation of response."""
class StreamingResponse:
"""
Streaming response for real-time synthesis.
Methods:
response_gen: Generator yielding response tokens
get_response: Get complete response object
print_response_stream: Print streaming response
"""
def response_gen(self):
"""Generate response tokens in real-time."""
def get_response(self):
"""Get final complete response."""
def print_response_stream(self):
"""Print response as it's generated."""Response Usage Example:
# Regular response
response = query_engine.query("What is machine learning?")
print(f"Response: {response.response}")
print(f"Sources: {len(response.source_nodes)}")
print(f"Metadata: {response.metadata}")
# Streaming response
streaming_engine = index.as_query_engine(
response_synthesizer=get_response_synthesizer(streaming=True)
)
streaming_response = streaming_engine.query("Explain neural networks")
streaming_response.print_response_stream()Install with Tessl CLI
npx tessl i tessl/pypi-llama-index