A declarative framework for programming foundation models through compositional Python code, enabling modular AI systems with automated optimization algorithms that synthesize examples, generate instructions, and finetune weights based on user-defined metrics.
npx @tessl/cli install tessl/pypi-dspy@3.1.0DSPy is a declarative framework for programming foundation models that enables developers to build modular AI systems through compositional Python code rather than manual prompt engineering. It provides a structured approach to define AI behavior through natural-language signatures and reusable modules, making systems maintainable and portable across different language models.
pip install dspyimport dspyCommon imports for building DSPy programs:
from dspy import (
Module,
Signature,
Predict,
ChainOfThought,
Example,
configure,
)For specialized functionality:
# Prediction modules
from dspy import ReAct, ProgramOfThought, CodeAct, BestOfN
# Optimization
from dspy import BootstrapFewShot, MIPROv2, Evaluate
# Language models
from dspy import LM, Embedder
# Retrieval
from dspy import Retrieve
# Signatures
from dspy import InputField, OutputField
# Datasets
from dspy.datasets import Dataset, DataLoader, HotPotQA, MATH, Colors
# Utilities
from dspy import asyncify, syncify, load, track_usageimport dspy
# Configure with a language model
dspy.configure(lm=dspy.LM('openai/gpt-4o-mini'))
# Create a simple question-answering module
qa = dspy.ChainOfThought("question -> answer")
# Use the module
result = qa(question="What is machine learning?")
print(result.answer)
print(result.reasoning) # Chain-of-thought reasoningBuilding a custom module:
class RAG(dspy.Module):
"""Retrieval-Augmented Generation system."""
def __init__(self, k=3):
super().__init__()
self.retrieve = dspy.Retrieve(k=k)
self.generate = dspy.ChainOfThought("context, question -> answer")
def forward(self, question):
# Retrieve relevant passages
context = self.retrieve(query=question).passages
# Generate answer from context
return self.generate(context=context, question=question)
# Configure with retrieval model
dspy.configure(
lm=dspy.LM('openai/gpt-4o-mini'),
rm=my_retrieval_model
)
# Use the RAG system
rag = RAG(k=5)
result = rag(question="What is the capital of France?")
print(result.answer)Optimizing a program:
# Define validation metric
def validate_answer(example, pred, trace=None):
"""Check if prediction matches expected answer."""
return example.answer.lower() == pred.answer.lower()
# Create training examples
trainset = [
dspy.Example(question="What is 2+2?", answer="4").with_inputs("question"),
dspy.Example(question="Capital of Spain?", answer="Madrid").with_inputs("question"),
# ... more examples
]
# Optimize the program
optimizer = dspy.BootstrapFewShot(
metric=validate_answer,
max_bootstrapped_demos=4
)
compiled_rag = optimizer.compile(rag, trainset=trainset)
# The compiled program now has optimized demonstrations
result = compiled_rag(question="What is the capital of Japan?")DSPy provides a modular architecture that separates concerns:
Signatures define the input/output structure of AI operations using natural language descriptions or typed fields. They serve as the interface between your program logic and language models.
class Signature:
"""Base class for defining task signatures."""
passModules are composable building blocks that implement AI operations. They can be nested and combined to create complex pipelines.
class Module:
"""Base class for all DSPy modules."""
def __call__(self, *args, **kwargs):
"""Execute the module."""
pass
def forward(self, *args, **kwargs):
"""Forward pass (implement in subclasses)."""
passLanguage models provide the AI capabilities. DSPy supports multiple providers through a unified interface.
class LM:
"""Language model client."""
def __init__(self, model: str, **kwargs):
"""
Initialize language model.
Args:
model: Model identifier (e.g., "openai/gpt-4o-mini")
**kwargs: Additional configuration (temperature, max_tokens, etc.)
"""
passOptimizers (teleprompts) automatically improve programs by finding better prompts, demonstrations, and parameters.
class Teleprompter:
"""Base class for optimizers."""
def compile(self, student, *, trainset, **kwargs):
"""
Optimize and compile a program.
Args:
student: Module to optimize
trainset: Training examples
**kwargs: Additional optimization parameters
Returns:
Compiled module
"""
passGlobal configuration for language models, adapters, and execution settings. Provides context managers for temporary overrides.
def configure(
lm=None,
adapter=None,
rm=None,
trace=None,
callbacks=None,
async_max_workers=None,
track_usage=False,
**kwargs
):
"""
Configure global DSPy settings.
Args:
lm: Language model instance
adapter: Adapter for formatting prompts
rm: Retrieval model
trace: List to store execution traces
callbacks: Callback functions
async_max_workers: Max async workers
track_usage: Enable token usage tracking
**kwargs: Additional settings
"""
pass
def context(**kwargs):
"""
Create context manager for temporary settings.
Returns:
Context manager
"""
passDefine task structures using signatures with typed fields, custom instructions, and validation.
class Signature:
"""Task signature with input/output fields."""
instructions: str
input_fields: dict
output_fields: dict
def InputField(
desc: str = None,
prefix: str = None,
format: callable = None,
**kwargs
):
"""
Create input field for signatures.
Args:
desc: Field description
prefix: Display prefix
format: Formatting function
**kwargs: Pydantic Field parameters
Returns:
Field definition
"""
pass
def OutputField(desc: str = None, prefix: str = None, **kwargs):
"""
Create output field for signatures.
Args:
desc: Field description
prefix: Display prefix
**kwargs: Pydantic Field parameters
Returns:
Field definition
"""
pass
def make_signature(
signature,
instructions: str = None,
custom_types: dict = None
):
"""
Create signature dynamically from string or dict.
Args:
signature: String spec ("input -> output") or field dict
instructions: Task instructions
custom_types: Custom type mappings
Returns:
Signature class
"""
passBase classes and data structures for building composable AI programs.
class Module:
"""Base class for DSPy modules."""
def __init__(self, callbacks=None):
"""Initialize module with optional callbacks."""
pass
def forward(self, *args, **kwargs):
"""Forward pass (implement in subclasses)."""
pass
def named_predictors(self):
"""Get all named Predict instances."""
pass
def save(self, path: str, **kwargs):
"""Save module to disk."""
pass
class Example:
"""Data container for examples and training data."""
def __init__(self, base=None, **kwargs):
"""
Create example with key-value pairs.
Args:
base: Base data to copy
**kwargs: Field values
"""
pass
def with_inputs(self, *keys):
"""Mark fields as inputs."""
pass
class Prediction:
"""Prediction result from module execution."""
completions: object
def get_lm_usage(self):
"""Get token usage information."""
passPre-built modules for common AI patterns including reasoning, tool use, and code generation.
class Predict:
"""Basic prediction module mapping inputs to outputs."""
def __init__(self, signature, callbacks=None, **config):
"""
Initialize predictor.
Args:
signature: Task signature
callbacks: Optional callbacks
**config: Default LM parameters
"""
pass
class ChainOfThought:
"""Chain-of-thought reasoning module."""
def __init__(self, signature, rationale_field=None, **config):
"""
Initialize chain-of-thought module.
Args:
signature: Task signature
rationale_field: Field for reasoning
**config: LM parameters
"""
pass
class ReAct:
"""Reasoning and Acting agent with tool use."""
def __init__(self, signature, tools: list, max_iters: int = 10):
"""
Initialize ReAct agent.
Args:
signature: Task signature
tools: List of callable tools
max_iters: Maximum iterations
"""
pass
class ProgramOfThought:
"""Generate and execute Python code to solve problems."""
def __init__(self, signature, max_iters: int = 3, interpreter=None):
"""
Initialize program-of-thought module.
Args:
signature: Task signature
max_iters: Max code generation retries
interpreter: Python interpreter instance
"""
pass
class CodeAct:
"""Combine code interpretation with tool use."""
def __init__(self, signature, tools: list, max_iters: int = 5, interpreter=None):
"""
Initialize CodeAct module.
Args:
signature: Task signature
tools: Available tools
max_iters: Maximum iterations
interpreter: Python interpreter
"""
passTeleprompt optimizers that automatically improve programs through various strategies.
class BootstrapFewShot:
"""Bootstrap few-shot demonstrations from examples."""
def __init__(
self,
metric=None,
metric_threshold=None,
teacher_settings=None,
max_bootstrapped_demos: int = 4,
max_labeled_demos: int = 16,
max_rounds: int = 1,
max_errors=None
):
"""
Initialize bootstrap optimizer.
Args:
metric: Validation metric function
metric_threshold: Threshold for accepting demos
teacher_settings: Settings for teacher model
max_bootstrapped_demos: Max bootstrapped demonstrations
max_labeled_demos: Max labeled demonstrations
max_rounds: Bootstrap rounds
max_errors: Max errors before stopping
"""
pass
def compile(self, student, teacher=None, trainset=None):
"""Compile program with bootstrapped demonstrations."""
pass
class MIPROv2:
"""Multi-prompt Instruction Proposal Optimizer."""
def __init__(
self,
metric,
prompt_model=None,
task_model=None,
auto: str = "light",
num_candidates=None,
max_bootstrapped_demos: int = 4,
**kwargs
):
"""
Initialize MIPROv2 optimizer.
Args:
metric: Evaluation metric
prompt_model: Model for generating prompts
task_model: Model for task execution
auto: Auto-config mode ("light", "medium", "heavy")
num_candidates: Number of candidates to try
max_bootstrapped_demos: Max demonstrations
**kwargs: Additional parameters
"""
pass
class LabeledFewShot:
"""Simple few-shot learning with labeled examples."""
def __init__(self, k: int = 16):
"""
Initialize labeled few-shot optimizer.
Args:
k: Number of examples per predictor
"""
passLanguage model clients with support for multiple providers, caching, and finetuning.
class LM:
"""Main language model class."""
def __init__(
self,
model: str,
model_type: str = "chat",
temperature: float = None,
max_tokens: int = None,
cache: bool = True,
callbacks=None,
num_retries: int = 3,
**kwargs
):
"""
Initialize language model.
Args:
model: Model identifier (e.g., "openai/gpt-4o-mini")
model_type: Type ("chat", "text", "responses")
temperature: Sampling temperature
max_tokens: Max tokens per response
cache: Enable caching
callbacks: Callback functions
num_retries: Retry attempts
**kwargs: Additional parameters (n, rollout_id, etc.)
"""
pass
def __call__(self, prompt=None, messages=None, **kwargs):
"""Generate completion."""
pass
def inspect_history(self, n: int = 1):
"""Inspect recent calls."""
pass
class Embedder:
"""Text embedding class."""
def __init__(self, model, batch_size: int = 200, caching: bool = True, **kwargs):
"""
Initialize embedder.
Args:
model: Model name or custom function
batch_size: Batch size for processing
caching: Cache responses
**kwargs: Additional parameters
"""
pass
def __call__(self, texts: list, **kwargs):
"""
Compute embeddings.
Args:
texts: List of texts to embed
**kwargs: Additional parameters
Returns:
numpy array of embeddings
"""
pass
def inspect_history(lm_or_module, n: int = 1):
"""
Inspect call history of LM or module.
Args:
lm_or_module: LM or Module instance
n: Number of recent calls
"""
pass
def configure_cache(
enable_disk_cache: bool = None,
enable_memory_cache: bool = None,
disk_cache_dir: str = None,
disk_size_limit_bytes: int = None,
memory_max_entries: int = None
):
"""
Configure caching system.
Args:
enable_disk_cache: Enable disk cache
enable_memory_cache: Enable memory cache
disk_cache_dir: Cache directory path
disk_size_limit_bytes: Disk cache size limit
memory_max_entries: Max memory cache entries
"""
passModel adapters for formatting prompts and custom types for multimodal inputs.
class ChatAdapter:
"""Chat-based adapter for conversation models."""
def __init__(self, callbacks=None, **kwargs):
"""Initialize chat adapter."""
pass
class JSONAdapter:
"""JSON-based adapter for structured I/O."""
def __init__(self, callbacks=None, **kwargs):
"""Initialize JSON adapter."""
pass
class Tool:
"""Wrapper for functions used as tools."""
def __init__(
self,
func,
name: str = None,
desc: str = None,
args: dict = None
):
"""
Create tool from function.
Args:
func: Function to wrap
name: Tool name
desc: Tool description
args: Argument schemas
"""
pass
class Image:
"""Image input type for multimodal models."""
pass
class Audio:
"""Audio input type."""
pass
class History:
"""Conversation history type."""
passRetrieval components for fetching relevant passages from various vector stores including ColBERT, Weaviate, and Databricks.
class Retrieve:
"""Retrieval module for fetching passages."""
def __init__(self, k: int = 3, callbacks=None):
"""
Initialize retrieval module.
Args:
k: Number of passages to retrieve
callbacks: Optional callbacks
"""
pass
def forward(self, query: str, k: int = None, **kwargs):
"""
Retrieve passages for query.
Args:
query: Search query
k: Override number of passages
**kwargs: Additional parameters
Returns:
Prediction with 'passages' field
"""
pass
class Embeddings:
"""Embedding-based retrieval using vector similarity."""
def __init__(self, embedder, documents: list = None, k: int = 3):
"""Initialize embeddings retrieval."""
pass
class ColBERTv2:
"""ColBERTv2 retrieval model for dense passage retrieval."""
def __init__(self, url: str = "http://0.0.0.0", port: int = None, **kwargs):
"""Initialize ColBERT retriever."""
pass
class WeaviateRM:
"""Weaviate vector database retrieval model."""
def __init__(
self,
weaviate_collection_name: str,
weaviate_client=None,
k: int = 3
):
"""Initialize Weaviate retrieval model."""
pass
class DatabricksRM:
"""Databricks Vector Search retrieval model."""
def __init__(
self,
databricks_index_name: str,
databricks_endpoint: str = None,
k: int = 3
):
"""Initialize Databricks retrieval model."""
passFramework for evaluating programs with metrics and parallel execution.
class Evaluate:
"""Evaluate DSPy programs on datasets."""
def __init__(
self,
devset: list,
metric=None,
num_threads: int = None,
display_progress: bool = False,
display_table: bool = False,
max_errors: int = None,
failure_score: float = 0.0,
**kwargs
):
"""
Initialize evaluator.
Args:
devset: Evaluation dataset
metric: Metric function
num_threads: Threads for parallel evaluation
display_progress: Show progress bar
display_table: Display results table
max_errors: Max errors before stopping
failure_score: Score for failed examples
**kwargs: Additional parameters
"""
pass
def __call__(self, program, **kwargs):
"""
Evaluate program on dataset.
Args:
program: Module to evaluate
**kwargs: Additional parameters
Returns:
EvaluationResult with score and details
"""
pass
def EM(prediction: str, answers_list: list) -> bool:
"""
Exact match metric after normalization.
Args:
prediction: Predicted answer
answers_list: Reference answers
Returns:
True if exact match
"""
pass
def F1(prediction: str, answers_list: list) -> float:
"""
Token-level F1 score.
Args:
prediction: Predicted answer
answers_list: Reference answers
Returns:
F1 score (0.0 to 1.0)
"""
pass
def answer_exact_match(example, pred, trace=None, frac: float = 1.0) -> bool:
"""
Metric function for exact match evaluation.
Args:
example: Example with 'answer' field
pred: Prediction with 'answer' field
trace: Unused, for compatibility
frac: Threshold (1.0 = exact match)
Returns:
True if match
"""
passSupport for streaming responses from language models.
def streamify(module):
"""
Enable streaming for a module.
Args:
module: Module to enable streaming for
Returns:
StreamListener context manager
"""
passDataset loading and management utilities for various formats including HuggingFace, CSV, JSON, and Parquet.
class Dataset:
"""Base class for structured datasets with train/dev/test splits."""
def __init__(
self,
train_seed: int = 0,
train_size: int = None,
eval_seed: int = 0,
dev_size: int = None,
test_size: int = None,
input_keys: list = None
):
"""Initialize dataset with split configuration."""
pass
@property
def train(self) -> list:
"""Get training split."""
pass
@property
def dev(self) -> list:
"""Get development/validation split."""
pass
@property
def test(self) -> list:
"""Get test split."""
pass
class DataLoader:
"""Universal data loader for various file formats and sources."""
def from_huggingface(
self,
dataset_name: str,
*args,
input_keys: tuple = (),
fields: tuple = None,
**kwargs
):
"""Load dataset from HuggingFace Hub."""
pass
def from_csv(self, file_path: str, fields: list = None, input_keys: tuple = ()) -> list:
"""Load dataset from CSV file."""
pass
def from_json(self, file_path: str, fields: list = None, input_keys: tuple = ()) -> list:
"""Load dataset from JSON file."""
pass
def from_pandas(self, df, fields: list = None, input_keys: tuple = ()) -> list:
"""Load dataset from pandas DataFrame."""
pass
class HotPotQA(Dataset):
"""HotPotQA multi-hop question answering dataset."""
pass
class MATH(Dataset):
"""MATH dataset for mathematical problem solving."""
pass
class Colors(Dataset):
"""Colors dataset for testing and examples."""
passHelper functions for async/sync conversion, saving/loading, and usage tracking.
def asyncify(func):
"""
Convert sync function to async.
Args:
func: Synchronous function
Returns:
Async version of function
"""
pass
def syncify(func):
"""
Convert async function to sync.
Args:
func: Async function
Returns:
Sync version of function
"""
pass
def load(path: str, allow_pickle: bool = False):
"""
Load saved module from disk.
Args:
path: Path to saved module
allow_pickle: Allow pickle loading (security)
Returns:
Module instance
"""
pass
def track_usage():
"""
Track token usage in context.
Returns:
UsageTracker context manager
"""
pass
def enable_logging():
"""Enable DSPy logging output."""
pass
def disable_logging():
"""Disable DSPy logging output."""
pass