or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

adapters.mdconfiguration.mddatasets.mdevaluation.mdindex.mdlanguage-models.mdmodules.mdoptimization.mdprediction.mdretrieval.mdsignatures.mdstreaming.mdutilities.md
tile.json

language-models.mddocs/

Language Models

Language model clients with support for multiple providers through LiteLLM, including caching, finetuning, embeddings, and provider-specific features.

Capabilities

Language Model Client

Main language model class supporting chat and text completion via LiteLLM.

class LM:
    """
    Main language model class.

    Provides unified interface to 100+ LLM providers through LiteLLM,
    with built-in caching, retry logic, and finetuning support.
    """

    def __init__(
        self,
        model: str,
        model_type: str = "chat",
        temperature: float = None,
        max_tokens: int = None,
        cache: bool = True,
        callbacks: list = None,
        num_retries: int = 3,
        provider=None,
        finetuning_model: str = None,
        launch_kwargs: dict = None,
        train_kwargs: dict = None,
        use_developer_role: bool = False,
        **kwargs
    ):
        """
        Initialize language model.

        Args:
            model (str): Model identifier (e.g., "openai/gpt-4o-mini", "anthropic/claude-3-5-sonnet-20241022")
            model_type (str): Model type - "chat", "text", or "responses" (default: "chat")
            temperature (float | None): Sampling temperature (0.0 to 2.0)
            max_tokens (int | None): Maximum tokens per response
            cache (bool): Enable response caching (default: True)
            callbacks (list | None): Callback functions for monitoring
            num_retries (int): Number of retry attempts on failure (default: 3)
            provider (Provider | None): Provider instance for advanced features
            finetuning_model (str | None): Model identifier for finetuning
            launch_kwargs (dict | None): Launch configuration for local models
            train_kwargs (dict | None): Default training configuration
            use_developer_role (bool): Use developer role for responses model (default: False)
            **kwargs: Additional LM parameters:
                - n (int): Number of completions to generate
                - rollout_id (int): Seed for deterministic generation
                - stop (list[str]): Stop sequences
                - presence_penalty (float): Presence penalty
                - frequency_penalty (float): Frequency penalty
                - top_p (float): Nucleus sampling parameter
        """
        pass

    def __call__(self, prompt: str = None, messages: list = None, **kwargs):
        """
        Generate completion.

        Args:
            prompt (str | None): Text prompt (for text models)
            messages (list | None): List of message dicts (for chat models)
            **kwargs: Override default parameters (temperature, max_tokens, etc.)

        Returns:
            List of response strings (length = n parameter)
        """
        pass

    def forward(self, prompt: str = None, messages: list = None, **kwargs):
        """
        Same as __call__. Generate completion.

        Args:
            prompt (str | None): Text prompt
            messages (list | None): Message list
            **kwargs: Parameter overrides

        Returns:
            List of response strings
        """
        pass

    def copy(self, **kwargs):
        """
        Create copy with updated parameters.

        Args:
            **kwargs: Parameters to override

        Returns:
            New LM instance
        """
        pass

    def inspect_history(self, n: int = 1):
        """
        Pretty-print recent calls.

        Args:
            n (int): Number of recent calls to display (default: 1)
        """
        pass

    def get_convo(self, index: int):
        """
        Get conversation at index.

        Args:
            index (int): Conversation index in history

        Returns:
            Conversation dict with messages and responses
        """
        pass

    def dump_state(self) -> dict:
        """
        Serialize state to dictionary.

        Returns:
            Dictionary representation of LM state
        """
        pass

    def launch(self, **launch_kwargs):
        """
        Launch model instance (for local/vLLM models).

        Args:
            **launch_kwargs: Launch configuration parameters
        """
        pass

    def kill(self):
        """Kill launched model instance."""
        pass

    def finetune(
        self,
        train_data: list,
        train_kwargs: dict = None,
        cache_finetune: bool = False
    ):
        """
        Start finetuning job.

        Args:
            train_data (list): Training examples
            train_kwargs (dict | None): Training configuration
            cache_finetune (bool): Cache finetuning job (default: False)

        Returns:
            TrainingJob instance
        """
        pass

    def push_to_hf_hub(self, repo_id: str, **kwargs):
        """
        Push finetuned model to Hugging Face Hub.

        Args:
            repo_id (str): Repository ID (e.g., "username/model-name")
            **kwargs: Additional push parameters
        """
        pass

Usage:

import dspy

# Basic usage
lm = dspy.LM('openai/gpt-4o-mini')
dspy.configure(lm=lm)

# With parameters
lm = dspy.LM(
    'openai/gpt-4o-mini',
    temperature=0.7,
    max_tokens=500,
    cache=True
)

# Direct call
response = lm(prompt="Translate to French: Hello")
print(response[0])  # "Bonjour"

# Chat messages
response = lm(messages=[
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "What is 2+2?"}
])

# Multiple completions
lm = dspy.LM('openai/gpt-4o-mini', n=5)
responses = lm(prompt="Write a haiku about coding")
for i, resp in enumerate(responses):
    print(f"Completion {i+1}: {resp}")

# Copy with different parameters
lm_creative = lm.copy(temperature=1.5, max_tokens=1000)

# Inspect history
lm.inspect_history(n=3)  # Show last 3 calls

# Different providers
anthropic_lm = dspy.LM('anthropic/claude-3-5-sonnet-20241022')
gemini_lm = dspy.LM('gemini/gemini-1.5-pro')
mistral_lm = dspy.LM('mistral/mistral-large-latest')

Base Language Model

Abstract base class for custom language model implementations.

class BaseLM:
    """
    Base class for language models.

    Defines the interface that custom LM implementations should follow.
    Extend this class to create custom model integrations.
    """

    def __call__(self, prompt: str = None, messages: list = None, **kwargs):
        """
        Generate completion (must be implemented).

        Args:
            prompt (str | None): Text prompt
            messages (list | None): Message list
            **kwargs: Model-specific parameters

        Returns:
            List of response strings
        """
        pass

    def dump_state(self) -> dict:
        """
        Serialize state (must be implemented).

        Returns:
            Dictionary representation of state
        """
        pass

Usage:

import dspy

class CustomLM(dspy.BaseLM):
    """Custom language model integration."""

    def __init__(self, model_path: str, **kwargs):
        self.model_path = model_path
        self.config = kwargs
        # Initialize your model here

    def __call__(self, prompt=None, messages=None, **kwargs):
        # Implement your model's generation logic
        # Must return list of strings
        response = self._generate(prompt or messages)
        return [response]

    def dump_state(self):
        return {
            "model_path": self.model_path,
            "config": self.config
        }

# Use custom LM
custom_lm = CustomLM("/path/to/model")
dspy.configure(lm=custom_lm)

Embedder

Text embedding class for vectorization.

class Embedder:
    """
    Embedding class for text vectorization.

    Supports both hosted embedding models and custom embedding functions.
    """

    def __init__(
        self,
        model,
        batch_size: int = 200,
        caching: bool = True,
        **kwargs
    ):
        """
        Initialize embedder.

        Args:
            model (str | callable): Model name or custom embedding function
                - str: Hosted model (e.g., "openai/text-embedding-3-small")
                - callable: Custom function that takes list of texts
            batch_size (int): Batch size for processing (default: 200)
            caching (bool): Cache embedding responses (default: True)
            **kwargs: Additional model parameters
        """
        pass

    def __call__(self, texts: list, batch_size: int = None, **kwargs):
        """
        Compute embeddings for texts.

        Args:
            texts (list): List of strings to embed
            batch_size (int | None): Override batch size
            **kwargs: Additional parameters

        Returns:
            numpy.ndarray of shape (len(texts), embedding_dim)
        """
        pass

Usage:

import dspy
import numpy as np

# Hosted model
embedder = dspy.Embedder("openai/text-embedding-3-small")
embeddings = embedder(["hello", "world", "machine learning"])
print(embeddings.shape)  # (3, 1536)

# Custom function
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("all-MiniLM-L6-v2")
embedder = dspy.Embedder(model.encode, batch_size=32)
embeddings = embedder(["hello", "world"])
print(embeddings.shape)  # (2, 384)

# Use with KNN
knn = dspy.KNN(k=5, trainset=trainset, vectorizer=embedder)
similar = knn(query="machine learning")

Provider System

Base class for LM providers handling launching, finetuning, and provider-specific operations.

class Provider:
    """
    Base class for LM providers.

    Handles provider-specific operations like model launching,
    finetuning, and reinforcement learning.
    """

    finetunable: bool
    """Whether this provider supports finetuning."""

    reinforceable: bool
    """Whether this provider supports reinforcement learning."""

    TrainingJob: type
    """TrainingJob class for this provider."""

    ReinforceJob: type
    """ReinforceJob class for this provider."""

    @staticmethod
    def is_provider_model(model: str) -> bool:
        """
        Check if model belongs to this provider.

        Args:
            model (str): Model identifier

        Returns:
            True if model is from this provider
        """
        pass

    def launch(self, lm, launch_kwargs: dict):
        """
        Launch model instance.

        Args:
            lm: LM instance
            launch_kwargs (dict): Launch configuration
        """
        pass

    def kill(self, lm, launch_kwargs: dict):
        """
        Kill launched instance.

        Args:
            lm: LM instance
            launch_kwargs (dict): Launch configuration
        """
        pass

    def finetune(self, job, model: str, train_data: list, train_kwargs: dict):
        """
        Start finetuning job.

        Args:
            job: TrainingJob instance
            model (str): Model to finetune
            train_data (list): Training examples
            train_kwargs (dict): Training configuration
        """
        pass

Training Job

Represents a finetuning job with async monitoring.

class TrainingJob:
    """
    Training job for model finetuning.

    Extends concurrent.futures.Future for async job monitoring.
    """

    def __init__(
        self,
        thread=None,
        model: str = None,
        train_data: list = None,
        train_data_format: str = None,
        train_kwargs: dict = None
    ):
        """
        Initialize training job.

        Args:
            thread: Thread running the job
            model (str | None): Model being finetuned
            train_data (list | None): Training data
            train_data_format (str | None): Format of training data
            train_kwargs (dict | None): Training configuration
        """
        pass

    def status(self) -> str:
        """
        Get job status.

        Returns:
            Status string ("running", "completed", "failed", etc.)
        """
        pass

    def cancel(self) -> bool:
        """
        Cancel job.

        Returns:
            True if successfully cancelled
        """
        pass

    def result(self, timeout: float = None):
        """
        Wait for and get result.

        Args:
            timeout (float | None): Maximum wait time in seconds

        Returns:
            Finetuned model identifier
        """
        pass

    def done(self) -> bool:
        """
        Check if job is done.

        Returns:
            True if job completed (success or failure)
        """
        pass

Usage:

import dspy

# Start finetuning
lm = dspy.LM('openai/gpt-4o-mini', finetuning_model='gpt-4o-mini-2024-07-18')

train_data = [
    {"messages": [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "What is ML?"},
        {"role": "assistant", "content": "Machine Learning is..."}
    ]},
    # ... more examples
]

job = lm.finetune(
    train_data=train_data,
    train_kwargs={
        "n_epochs": 3,
        "batch_size": 4,
        "learning_rate_multiplier": 0.1
    }
)

# Check status
print(job.status())  # "running"

# Wait for completion
finetuned_model_id = job.result(timeout=3600)  # Wait up to 1 hour
print(f"Finetuned model: {finetuned_model_id}")

# Use finetuned model
finetuned_lm = dspy.LM(finetuned_model_id)
dspy.configure(lm=finetuned_lm)

History Inspection

Inspect call history of language models or modules.

def inspect_history(lm_or_module, n: int = 1):
    """
    Inspect call history with pretty printing.

    Args:
        lm_or_module: LM instance or Module instance
        n (int): Number of recent calls to display (default: 1)
    """
    pass

Usage:

import dspy

lm = dspy.LM('openai/gpt-4o-mini')
dspy.configure(lm=lm)

# Make some calls
qa = dspy.Predict("question -> answer")
qa(question="What is 2+2?")
qa(question="What is the capital of France?")

# Inspect LM history
dspy.inspect_history(lm, n=2)

# Inspect module history
dspy.inspect_history(qa, n=2)

Cache Configuration

Configure the caching system for language model responses.

def configure_cache(
    enable_disk_cache: bool = None,
    enable_memory_cache: bool = None,
    disk_cache_dir: str = None,
    disk_size_limit_bytes: int = None,
    memory_max_entries: int = None
):
    """
    Configure DSPy caching system.

    Args:
        enable_disk_cache (bool | None): Enable disk cache (default: True)
        enable_memory_cache (bool | None): Enable memory cache (default: True)
        disk_cache_dir (str | None): Directory path for disk cache
        disk_size_limit_bytes (int | None): Maximum disk cache size in bytes
        memory_max_entries (int | None): Maximum number of memory cache entries
    """
    pass

Usage:

import dspy

# Configure cache
dspy.configure_cache(
    enable_disk_cache=True,
    enable_memory_cache=True,
    disk_cache_dir="/tmp/dspy_cache",
    disk_size_limit_bytes=5_000_000_000,  # 5GB
    memory_max_entries=5000
)

# Disable caching entirely
dspy.configure_cache(
    enable_disk_cache=False,
    enable_memory_cache=False
)

LiteLLM Logging

Control LiteLLM logging output.

def enable_litellm_logging():
    """Enable LiteLLM debug logging."""
    pass

def disable_litellm_logging():
    """Disable LiteLLM debug logging."""
    pass

Usage:

import dspy

# Disable verbose LiteLLM logs
dspy.disable_litellm_logging()

# Enable for debugging
dspy.enable_litellm_logging()

Language Model Patterns

Multiple Models

Use different models for different purposes:

import dspy

# Fast model for simple tasks
fast_lm = dspy.LM('openai/gpt-4o-mini', temperature=0.0)

# Powerful model for complex reasoning
strong_lm = dspy.LM('openai/gpt-4o', temperature=0.7)

# Configure default
dspy.configure(lm=fast_lm)

# Use strong model in specific context
with dspy.context(lm=strong_lm):
    result = complex_module(input=data)

Model Cascading

Try cheap model first, fall back to expensive:

import dspy

class CascadingQA(dspy.Module):
    def __init__(self):
        super().__init__()
        self.cheap_qa = dspy.Predict("question -> answer")
        self.expensive_qa = dspy.ChainOfThought("question -> answer")
        self.judge = dspy.Predict("question, answer -> confident: bool")

    def forward(self, question):
        # Try cheap model
        cheap_result = self.cheap_qa(question=question)

        # Check confidence
        judgment = self.judge(
            question=question,
            answer=cheap_result.answer
        )

        if judgment.confident:
            return cheap_result
        else:
            # Fall back to expensive model
            with dspy.context(lm=expensive_lm):
                return self.expensive_qa(question=question)

Finetuning Workflow

Complete finetuning workflow:

import dspy

# 1. Bootstrap high-quality training data
dspy.configure(lm=dspy.LM('openai/gpt-4o'))
program = MyModule()

optimizer = dspy.BootstrapFinetune(metric=my_metric)
compiled, finetune_data = optimizer.compile(program, trainset=trainset)

# 2. Finetune model
base_lm = dspy.LM(
    'openai/gpt-4o-mini',
    finetuning_model='gpt-4o-mini-2024-07-18'
)

job = base_lm.finetune(
    train_data=finetune_data,
    train_kwargs={"n_epochs": 3}
)

# 3. Wait for completion
finetuned_model_id = job.result(timeout=7200)

# 4. Use finetuned model
finetuned_lm = dspy.LM(finetuned_model_id)
dspy.configure(lm=finetuned_lm)

# 5. Evaluate improvement
evaluator = dspy.Evaluate(devset=test_set, metric=my_metric)
score = evaluator(program)

Local Model Setup

Use local models with vLLM:

import dspy

# Launch local model with vLLM
lm = dspy.LM(
    'meta-llama/Llama-3.1-8B-Instruct',
    launch_kwargs={
        "gpu_memory_utilization": 0.9,
        "max_model_len": 8192,
        "tensor_parallel_size": 2
    }
)

lm.launch()  # Start vLLM server

# Use local model
dspy.configure(lm=lm)
result = my_program(input=data)

# Clean up
lm.kill()

Embedding Patterns

Use embeddings for retrieval and similarity:

import dspy

# Create embedder
embedder = dspy.Embedder("openai/text-embedding-3-small")

# Embed documents
docs = ["Document 1 text", "Document 2 text", ...]
doc_embeddings = embedder(docs)

# Embed query
query = "search query"
query_embedding = embedder([query])

# Compute similarity
from numpy import dot
from numpy.linalg import norm

def cosine_similarity(a, b):
    return dot(a, b) / (norm(a) * norm(b))

similarities = [
    cosine_similarity(query_embedding[0], doc_emb)
    for doc_emb in doc_embeddings
]

# Get top-k documents
top_k = sorted(enumerate(similarities), key=lambda x: -x[1])[:5]