or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

adapters.md configuration.md datasets.md evaluation.md index.md language-models.md modules.md optimization.md prediction.md retrieval.md signatures.md streaming.md utilities.md

tile.json

Language Models

Language model clients with support for multiple providers through LiteLLM, including caching, finetuning, embeddings, and provider-specific features.

Capabilities

Language Model Client

Main language model class supporting chat and text completion via LiteLLM.

class LM:
    """
    Main language model class.

    Provides unified interface to 100+ LLM providers through LiteLLM,
    with built-in caching, retry logic, and finetuning support.
    """

    def __init__(
        self,
        model: str,
        model_type: str = "chat",
        temperature: float = None,
        max_tokens: int = None,
        cache: bool = True,
        callbacks: list = None,
        num_retries: int = 3,
        provider=None,
        finetuning_model: str = None,
        launch_kwargs: dict = None,
        train_kwargs: dict = None,
        use_developer_role: bool = False,
        **kwargs
    ):
        """
        Initialize language model.

        Args:
            model (str): Model identifier (e.g., "openai/gpt-4o-mini", "anthropic/claude-3-5-sonnet-20241022")
            model_type (str): Model type - "chat", "text", or "responses" (default: "chat")
            temperature (float | None): Sampling temperature (0.0 to 2.0)
            max_tokens (int | None): Maximum tokens per response
            cache (bool): Enable response caching (default: True)
            callbacks (list | None): Callback functions for monitoring
            num_retries (int): Number of retry attempts on failure (default: 3)
            provider (Provider | None): Provider instance for advanced features
            finetuning_model (str | None): Model identifier for finetuning
            launch_kwargs (dict | None): Launch configuration for local models
            train_kwargs (dict | None): Default training configuration
            use_developer_role (bool): Use developer role for responses model (default: False)
            **kwargs: Additional LM parameters:
                - n (int): Number of completions to generate
                - rollout_id (int): Seed for deterministic generation
                - stop (list[str]): Stop sequences
                - presence_penalty (float): Presence penalty
                - frequency_penalty (float): Frequency penalty
                - top_p (float): Nucleus sampling parameter
        """
        pass

    def __call__(self, prompt: str = None, messages: list = None, **kwargs):
        """
        Generate completion.

        Args:
            prompt (str | None): Text prompt (for text models)
            messages (list | None): List of message dicts (for chat models)
            **kwargs: Override default parameters (temperature, max_tokens, etc.)

        Returns:
            List of response strings (length = n parameter)
        """
        pass

    def forward(self, prompt: str = None, messages: list = None, **kwargs):
        """
        Same as __call__. Generate completion.

        Args:
            prompt (str | None): Text prompt
            messages (list | None): Message list
            **kwargs: Parameter overrides

        Returns:
            List of response strings
        """
        pass

    def copy(self, **kwargs):
        """
        Create copy with updated parameters.

        Args:
            **kwargs: Parameters to override

        Returns:
            New LM instance
        """
        pass

    def inspect_history(self, n: int = 1):
        """
        Pretty-print recent calls.

        Args:
            n (int): Number of recent calls to display (default: 1)
        """
        pass

    def get_convo(self, index: int):
        """
        Get conversation at index.

        Args:
            index (int): Conversation index in history

        Returns:
            Conversation dict with messages and responses
        """
        pass

    def dump_state(self) -> dict:
        """
        Serialize state to dictionary.

        Returns:
            Dictionary representation of LM state
        """
        pass

    def launch(self, **launch_kwargs):
        """
        Launch model instance (for local/vLLM models).

        Args:
            **launch_kwargs: Launch configuration parameters
        """
        pass

    def kill(self):
        """Kill launched model instance."""
        pass

    def finetune(
        self,
        train_data: list,
        train_kwargs: dict = None,
        cache_finetune: bool = False
    ):
        """
        Start finetuning job.

        Args:
            train_data (list): Training examples
            train_kwargs (dict | None): Training configuration
            cache_finetune (bool): Cache finetuning job (default: False)

        Returns:
            TrainingJob instance
        """
        pass

    def push_to_hf_hub(self, repo_id: str, **kwargs):
        """
        Push finetuned model to Hugging Face Hub.

        Args:
            repo_id (str): Repository ID (e.g., "username/model-name")
            **kwargs: Additional push parameters
        """
        pass

Usage:

import dspy

# Basic usage
lm = dspy.LM('openai/gpt-4o-mini')
dspy.configure(lm=lm)

# With parameters
lm = dspy.LM(
    'openai/gpt-4o-mini',
    temperature=0.7,
    max_tokens=500,
    cache=True
)

# Direct call
response = lm(prompt="Translate to French: Hello")
print(response[0])  # "Bonjour"

# Chat messages
response = lm(messages=[
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "What is 2+2?"}
])

# Multiple completions
lm = dspy.LM('openai/gpt-4o-mini', n=5)
responses = lm(prompt="Write a haiku about coding")
for i, resp in enumerate(responses):
    print(f"Completion {i+1}: {resp}")

# Copy with different parameters
lm_creative = lm.copy(temperature=1.5, max_tokens=1000)

# Inspect history
lm.inspect_history(n=3)  # Show last 3 calls

# Different providers
anthropic_lm = dspy.LM('anthropic/claude-3-5-sonnet-20241022')
gemini_lm = dspy.LM('gemini/gemini-1.5-pro')
mistral_lm = dspy.LM('mistral/mistral-large-latest')

Base Language Model

Abstract base class for custom language model implementations.

class BaseLM:
    """
    Base class for language models.

    Defines the interface that custom LM implementations should follow.
    Extend this class to create custom model integrations.
    """

    def __call__(self, prompt: str = None, messages: list = None, **kwargs):
        """
        Generate completion (must be implemented).

        Args:
            prompt (str | None): Text prompt
            messages (list | None): Message list
            **kwargs: Model-specific parameters

        Returns:
            List of response strings
        """
        pass

    def dump_state(self) -> dict:
        """
        Serialize state (must be implemented).

        Returns:
            Dictionary representation of state
        """
        pass

Usage:

import dspy

class CustomLM(dspy.BaseLM):
    """Custom language model integration."""

    def __init__(self, model_path: str, **kwargs):
        self.model_path = model_path
        self.config = kwargs
        # Initialize your model here

    def __call__(self, prompt=None, messages=None, **kwargs):
        # Implement your model's generation logic
        # Must return list of strings
        response = self._generate(prompt or messages)
        return [response]

    def dump_state(self):
        return {
            "model_path": self.model_path,
            "config": self.config
        }

# Use custom LM
custom_lm = CustomLM("/path/to/model")
dspy.configure(lm=custom_lm)

Embedder

Text embedding class for vectorization.

class Embedder:
    """
    Embedding class for text vectorization.

    Supports both hosted embedding models and custom embedding functions.
    """

    def __init__(
        self,
        model,
        batch_size: int = 200,
        caching: bool = True,
        **kwargs
    ):
        """
        Initialize embedder.

        Args:
            model (str | callable): Model name or custom embedding function
                - str: Hosted model (e.g., "openai/text-embedding-3-small")
                - callable: Custom function that takes list of texts
            batch_size (int): Batch size for processing (default: 200)
            caching (bool): Cache embedding responses (default: True)
            **kwargs: Additional model parameters
        """
        pass

    def __call__(self, texts: list, batch_size: int = None, **kwargs):
        """
        Compute embeddings for texts.

        Args:
            texts (list): List of strings to embed
            batch_size (int | None): Override batch size
            **kwargs: Additional parameters

        Returns:
            numpy.ndarray of shape (len(texts), embedding_dim)
        """
        pass

Usage:

import dspy
import numpy as np

# Hosted model
embedder = dspy.Embedder("openai/text-embedding-3-small")
embeddings = embedder(["hello", "world", "machine learning"])
print(embeddings.shape)  # (3, 1536)

# Custom function
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("all-MiniLM-L6-v2")
embedder = dspy.Embedder(model.encode, batch_size=32)
embeddings = embedder(["hello", "world"])
print(embeddings.shape)  # (2, 384)

# Use with KNN
knn = dspy.KNN(k=5, trainset=trainset, vectorizer=embedder)
similar = knn(query="machine learning")

Provider System

Base class for LM providers handling launching, finetuning, and provider-specific operations.

class Provider:
    """
    Base class for LM providers.

    Handles provider-specific operations like model launching,
    finetuning, and reinforcement learning.
    """

    finetunable: bool
    """Whether this provider supports finetuning."""

    reinforceable: bool
    """Whether this provider supports reinforcement learning."""

    TrainingJob: type
    """TrainingJob class for this provider."""

    ReinforceJob: type
    """ReinforceJob class for this provider."""

    @staticmethod
    def is_provider_model(model: str) -> bool:
        """
        Check if model belongs to this provider.

        Args:
            model (str): Model identifier

        Returns:
            True if model is from this provider
        """
        pass

    def launch(self, lm, launch_kwargs: dict):
        """
        Launch model instance.

        Args:
            lm: LM instance
            launch_kwargs (dict): Launch configuration
        """
        pass

    def kill(self, lm, launch_kwargs: dict):
        """
        Kill launched instance.

        Args:
            lm: LM instance
            launch_kwargs (dict): Launch configuration
        """
        pass

    def finetune(self, job, model: str, train_data: list, train_kwargs: dict):
        """
        Start finetuning job.

        Args:
            job: TrainingJob instance
            model (str): Model to finetune
            train_data (list): Training examples
            train_kwargs (dict): Training configuration
        """
        pass

Training Job

Represents a finetuning job with async monitoring.

class TrainingJob:
    """
    Training job for model finetuning.

    Extends concurrent.futures.Future for async job monitoring.
    """

    def __init__(
        self,
        thread=None,
        model: str = None,
        train_data: list = None,
        train_data_format: str = None,
        train_kwargs: dict = None
    ):
        """
        Initialize training job.

        Args:
            thread: Thread running the job
            model (str | None): Model being finetuned
            train_data (list | None): Training data
            train_data_format (str | None): Format of training data
            train_kwargs (dict | None): Training configuration
        """
        pass

    def status(self) -> str:
        """
        Get job status.

        Returns:
            Status string ("running", "completed", "failed", etc.)
        """
        pass

    def cancel(self) -> bool:
        """
        Cancel job.

        Returns:
            True if successfully cancelled
        """
        pass

    def result(self, timeout: float = None):
        """
        Wait for and get result.

        Args:
            timeout (float | None): Maximum wait time in seconds

        Returns:
            Finetuned model identifier
        """
        pass

    def done(self) -> bool:
        """
        Check if job is done.

        Returns:
            True if job completed (success or failure)
        """
        pass

Usage:

import dspy

# Start finetuning
lm = dspy.LM('openai/gpt-4o-mini', finetuning_model='gpt-4o-mini-2024-07-18')

train_data = [
    {"messages": [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "What is ML?"},
        {"role": "assistant", "content": "Machine Learning is..."}
    ]},
    # ... more examples
]

job = lm.finetune(
    train_data=train_data,
    train_kwargs={
        "n_epochs": 3,
        "batch_size": 4,
        "learning_rate_multiplier": 0.1
    }
)

# Check status
print(job.status())  # "running"

# Wait for completion
finetuned_model_id = job.result(timeout=3600)  # Wait up to 1 hour
print(f"Finetuned model: {finetuned_model_id}")

# Use finetuned model
finetuned_lm = dspy.LM(finetuned_model_id)
dspy.configure(lm=finetuned_lm)

History Inspection

Inspect call history of language models or modules.

def inspect_history(lm_or_module, n: int = 1):
    """
    Inspect call history with pretty printing.

    Args:
        lm_or_module: LM instance or Module instance
        n (int): Number of recent calls to display (default: 1)
    """
    pass

Usage:

import dspy

lm = dspy.LM('openai/gpt-4o-mini')
dspy.configure(lm=lm)

# Make some calls
qa = dspy.Predict("question -> answer")
qa(question="What is 2+2?")
qa(question="What is the capital of France?")

# Inspect LM history
dspy.inspect_history(lm, n=2)

# Inspect module history
dspy.inspect_history(qa, n=2)

Cache Configuration

Configure the caching system for language model responses.

def configure_cache(
    enable_disk_cache: bool = None,
    enable_memory_cache: bool = None,
    disk_cache_dir: str = None,
    disk_size_limit_bytes: int = None,
    memory_max_entries: int = None
):
    """
    Configure DSPy caching system.

    Args:
        enable_disk_cache (bool | None): Enable disk cache (default: True)
        enable_memory_cache (bool | None): Enable memory cache (default: True)
        disk_cache_dir (str | None): Directory path for disk cache
        disk_size_limit_bytes (int | None): Maximum disk cache size in bytes
        memory_max_entries (int | None): Maximum number of memory cache entries
    """
    pass

Usage:

import dspy

# Configure cache
dspy.configure_cache(
    enable_disk_cache=True,
    enable_memory_cache=True,
    disk_cache_dir="/tmp/dspy_cache",
    disk_size_limit_bytes=5_000_000_000,  # 5GB
    memory_max_entries=5000
)

# Disable caching entirely
dspy.configure_cache(
    enable_disk_cache=False,
    enable_memory_cache=False
)

LiteLLM Logging

Control LiteLLM logging output.

def enable_litellm_logging():
    """Enable LiteLLM debug logging."""
    pass

def disable_litellm_logging():
    """Disable LiteLLM debug logging."""
    pass

Usage:

import dspy

# Disable verbose LiteLLM logs
dspy.disable_litellm_logging()

# Enable for debugging
dspy.enable_litellm_logging()

Language Model Patterns

Multiple Models

Use different models for different purposes:

import dspy

# Fast model for simple tasks
fast_lm = dspy.LM('openai/gpt-4o-mini', temperature=0.0)

# Powerful model for complex reasoning
strong_lm = dspy.LM('openai/gpt-4o', temperature=0.7)

# Configure default
dspy.configure(lm=fast_lm)

# Use strong model in specific context
with dspy.context(lm=strong_lm):
    result = complex_module(input=data)

Model Cascading

Try cheap model first, fall back to expensive:

import dspy

class CascadingQA(dspy.Module):
    def __init__(self):
        super().__init__()
        self.cheap_qa = dspy.Predict("question -> answer")
        self.expensive_qa = dspy.ChainOfThought("question -> answer")
        self.judge = dspy.Predict("question, answer -> confident: bool")

    def forward(self, question):
        # Try cheap model
        cheap_result = self.cheap_qa(question=question)

        # Check confidence
        judgment = self.judge(
            question=question,
            answer=cheap_result.answer
        )

        if judgment.confident:
            return cheap_result
        else:
            # Fall back to expensive model
            with dspy.context(lm=expensive_lm):
                return self.expensive_qa(question=question)

Finetuning Workflow

Complete finetuning workflow:

import dspy

# 1. Bootstrap high-quality training data
dspy.configure(lm=dspy.LM('openai/gpt-4o'))
program = MyModule()

optimizer = dspy.BootstrapFinetune(metric=my_metric)
compiled, finetune_data = optimizer.compile(program, trainset=trainset)

# 2. Finetune model
base_lm = dspy.LM(
    'openai/gpt-4o-mini',
    finetuning_model='gpt-4o-mini-2024-07-18'
)

job = base_lm.finetune(
    train_data=finetune_data,
    train_kwargs={"n_epochs": 3}
)

# 3. Wait for completion
finetuned_model_id = job.result(timeout=7200)

# 4. Use finetuned model
finetuned_lm = dspy.LM(finetuned_model_id)
dspy.configure(lm=finetuned_lm)

# 5. Evaluate improvement
evaluator = dspy.Evaluate(devset=test_set, metric=my_metric)
score = evaluator(program)

Local Model Setup

Use local models with vLLM:

import dspy

# Launch local model with vLLM
lm = dspy.LM(
    'meta-llama/Llama-3.1-8B-Instruct',
    launch_kwargs={
        "gpu_memory_utilization": 0.9,
        "max_model_len": 8192,
        "tensor_parallel_size": 2
    }
)

lm.launch()  # Start vLLM server

# Use local model
dspy.configure(lm=lm)
result = my_program(input=data)

# Clean up
lm.kill()

Embedding Patterns

Use embeddings for retrieval and similarity:

import dspy

# Create embedder
embedder = dspy.Embedder("openai/text-embedding-3-small")

# Embed documents
docs = ["Document 1 text", "Document 2 text", ...]
doc_embeddings = embedder(docs)

# Embed query
query = "search query"
query_embedding = embedder([query])

# Compute similarity
from numpy import dot
from numpy.linalg import norm

def cosine_similarity(a, b):
    return dot(a, b) / (norm(a) * norm(b))

similarities = [
    cosine_similarity(query_embedding[0], doc_emb)
    for doc_emb in doc_embeddings
]

# Get top-k documents
top_k = sorted(enumerate(similarities), key=lambda x: -x[1])[:5]

Version

Tile

Files

language-models.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

Language Models

Capabilities

Language Model Client

Base Language Model

Embedder

Provider System

Training Job

History Inspection

Cache Configuration

LiteLLM Logging

Language Model Patterns

Multiple Models

Model Cascading

Finetuning Workflow

Local Model Setup

Embedding Patterns

language-models.mddocs/