CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-litellm

Library to easily interface with LLM API providers

Pending
Overview
Eval results
Files

exceptions.mddocs/

Exception Handling

Comprehensive exception hierarchy for robust error handling across all LLM providers. LiteLLM provides consistent error types with detailed context information, enabling reliable error handling and retry logic in production applications.

Capabilities

Base Exception Classes

LiteLLM exceptions inherit from OpenAI's exception hierarchy while adding provider-specific context and enhanced error information.

class AuthenticationError(openai.AuthenticationError):
    """
    Authentication failure with API key or credentials.
    
    Attributes:
        message (str): Error description
        llm_provider (str): Provider that failed authentication
        model (str): Model being accessed
        response (Optional[httpx.Response]): HTTP response object
    """

class InvalidRequestError(openai.BadRequestError):
    """
    Invalid request parameters or unsupported operations.
    
    Common causes:
    - Invalid model name
    - Unsupported parameters for provider
    - Malformed request data
    
    Attributes:
        message (str): Error description
        model (str): Model that caused the error
        llm_provider (str): Provider name
    """

class NotFoundError(openai.NotFoundError):
    """
    Requested resource not found (model, deployment, etc.).
    
    Attributes:
        message (str): Error description
        model (str): Model that was not found
        llm_provider (str): Provider name
    """

class BadRequestError(openai.BadRequestError):
    """
    Malformed request or invalid parameters.
    
    Attributes:
        message (str): Error description
        model (str): Model being accessed
        llm_provider (str): Provider name
    """

class RateLimitError(openai.RateLimitError):
    """
    Rate limit exceeded for API usage.
    
    Attributes:
        message (str): Error description
        model (str): Model being accessed
        llm_provider (str): Provider name
        retry_after (Optional[int]): Seconds to wait before retry
    """

class ServiceUnavailableError(openai.APIStatusError):
    """
    Provider service temporarily unavailable.
    
    Attributes:
        message (str): Error description
        model (str): Model being accessed
        llm_provider (str): Provider name
        status_code (int): HTTP status code
    """

class InternalServerError(openai.InternalServerError):
    """
    Provider internal server error (5xx status codes).
    
    Attributes:
        message (str): Error description
        model (str): Model being accessed
        llm_provider (str): Provider name
        status_code (int): HTTP status code
    """

class Timeout(openai.APITimeoutError):
    """
    Request timeout exceeded.
    
    Attributes:
        message (str): Error description
        model (str): Model being accessed
        llm_provider (str): Provider name
    """

class APIError(openai.APIError):
    """
    Generic API error for unexpected failures.
    
    Attributes:
        message (str): Error description
        model (str): Model being accessed
        llm_provider (str): Provider name
        status_code (Optional[int]): HTTP status code if available
    """

class APIConnectionError(openai.APIConnectionError):
    """
    Connection failure to provider API.
    
    Attributes:
        message (str): Error description
        model (str): Model being accessed
        llm_provider (str): Provider name
    """

class APIResponseValidationError(openai.APIResponseValidationError):
    """
    Response validation failure or unexpected format.
    
    Attributes:
        message (str): Error description
        model (str): Model being accessed
        llm_provider (str): Provider name
    """

class OpenAIError(openai.OpenAIError):
    """
    Base class for all LiteLLM/OpenAI exceptions.
    
    Attributes:
        message (str): Error description
    """

LiteLLM-Specific Exception Classes

Advanced exception types for LiteLLM-specific functionality and enhanced error handling.

class ContextWindowExceededError(BadRequestError):
    """
    Input exceeds model's maximum context window.
    
    Raised when the input tokens exceed the model's context limit.
    Often triggers automatic fallback to models with larger context windows.
    
    Attributes:
        message (str): Error description with token counts
        model (str): Model with insufficient context
        llm_provider (str): Provider name
        max_tokens (Optional[int]): Model's maximum context size
        current_tokens (Optional[int]): Actual input tokens
    """

class ContentPolicyViolationError(BadRequestError):
    """
    Content violates provider's usage policies.
    
    Raised when input or generated content violates safety policies.
    May trigger fallback to alternative providers with different policies.
    
    Attributes:
        message (str): Error description
        model (str): Model that flagged content
        llm_provider (str): Provider name
        violation_type (Optional[str]): Type of policy violation
    """

class BudgetExceededError(Exception):
    """
    Usage exceeds configured budget limits.
    
    Raised when cumulative costs exceed budget thresholds set in LiteLLM configuration.
    
    Attributes:
        message (str): Error description with budget information
        current_cost (float): Current accumulated cost
        max_budget (float): Maximum allowed budget
    """

class UnsupportedParamsError(BadRequestError):
    """
    Parameters not supported by the target provider.
    
    Raised when using OpenAI parameters that aren't supported by other providers.
    Can be handled with drop_params=True or modify_params=True.
    
    Attributes:
        message (str): Error description
        model (str): Target model
        llm_provider (str): Provider name
        unsupported_params (List[str]): List of unsupported parameter names
    """

class JSONSchemaValidationError(APIResponseValidationError):
    """
    Response doesn't match expected JSON schema.
    
    Raised when response_format with JSON schema is specified but response
    doesn't conform to the schema.
    
    Attributes:
        message (str): Validation error details
        model (str): Model that produced invalid response
        schema_errors (List[str]): List of validation errors
    """

class UnprocessableEntityError(openai.UnprocessableEntityError):
    """
    Request was well-formed but couldn't be processed.
    
    HTTP 422 error for semantically invalid requests.
    
    Attributes:
        message (str): Error description
        model (str): Model being accessed
        llm_provider (str): Provider name
    """

class LiteLLMUnknownProvider(BadRequestError):
    """
    Provider not recognized by LiteLLM.
    
    Raised when specifying an unknown custom_llm_provider or model format.
    
    Attributes:
        message (str): Error description
        provider (str): Unknown provider name
        available_providers (List[str]): List of supported providers
    """

class GuardrailRaisedException(Exception):
    """
    Custom guardrail check failed.
    
    Raised by user-defined guardrail functions that reject requests or responses.
    
    Attributes:
        message (str): Guardrail failure reason
        guardrail_name (str): Name of failed guardrail
    """

class BlockedPiiEntityError(Exception):
    """
    PII (Personally Identifiable Information) detected and blocked.
    
    Raised when PII detection guardrails identify and block sensitive information.
    
    Attributes:
        message (str): Error description
        detected_entities (List[str]): Types of PII detected
    """

class MockException(openai.APIError):
    """
    Exception for testing and mocking purposes.
    
    Used in test environments to simulate various error conditions.
    
    Attributes:
        message (str): Mock error message
        status_code (int): Simulated HTTP status code
    """

class ImageFetchError(BadRequestError):
    """
    Error fetching image for vision models.
    
    Raised when image URLs are inaccessible or in unsupported formats.
    
    Attributes:
        message (str): Error description
        image_url (str): URL that failed to fetch
        status_code (Optional[int]): HTTP status from image fetch
    """

class RejectedRequestError(BadRequestError):
    """
    Request was rejected by provider-specific filtering.
    
    Raised when providers reject requests based on internal policies
    beyond standard content policy violations.
    
    Attributes:
        message (str): Rejection reason
        model (str): Model that rejected request
        llm_provider (str): Provider name
    """

class MidStreamFallbackError(ServiceUnavailableError):
    """
    Error during streaming that requires fallback.
    
    Raised when streaming responses fail mid-stream and require
    switching to a fallback provider.
    
    Attributes:
        message (str): Error description
        original_model (str): Model that failed during streaming
        fallback_model (Optional[str]): Fallback model to use
    """

Exception Attributes

class LiteLLMExceptionAttributes:
    """Common attributes available on LiteLLM exceptions"""
    
    # Core identification
    message: str                           # Human-readable error description
    model: Optional[str]                   # Model that caused the error
    llm_provider: Optional[str]            # Provider name (openai, anthropic, etc.)
    
    # HTTP context
    status_code: Optional[int]             # HTTP status code from provider
    response: Optional[httpx.Response]     # Full HTTP response object
    request: Optional[httpx.Request]       # Original HTTP request
    
    # Retry and timing
    retry_after: Optional[int]             # Seconds to wait before retry (rate limits)
    response_ms: Optional[float]           # Response time in milliseconds
    
    # Provider-specific context
    provider_original_error: Optional[str] # Original error from provider
    provider_error_code: Optional[str]     # Provider-specific error code
    
    # Token and cost context
    prompt_tokens: Optional[int]           # Input tokens when error occurred
    completion_tokens: Optional[int]       # Output tokens when error occurred
    total_tokens: Optional[int]            # Total tokens when error occurred
    
    # Request context
    litellm_params: Optional[Dict]         # LiteLLM parameters used
    original_response: Optional[Dict]      # Raw response from provider

Usage Examples

Basic Exception Handling

import litellm
from litellm import (
    AuthenticationError, RateLimitError, ContextWindowExceededError,
    ContentPolicyViolationError, BudgetExceededError
)

def safe_completion(model, messages, **kwargs):
    try:
        response = litellm.completion(
            model=model,
            messages=messages,
            **kwargs
        )
        return response
        
    except AuthenticationError as e:
        print(f"Authentication failed for {e.llm_provider}: {e.message}")
        # Handle API key issues
        return None
        
    except RateLimitError as e:
        print(f"Rate limit hit for {e.model}: {e.message}")
        if e.retry_after:
            print(f"Retry after {e.retry_after} seconds")
        # Implement backoff or queue request
        return None
        
    except ContextWindowExceededError as e:
        print(f"Context window exceeded: {e.current_tokens}/{e.max_tokens} tokens")
        # Try with shorter input or different model
        return None
        
    except ContentPolicyViolationError as e:
        print(f"Content policy violation: {e.message}")
        # Handle content filtering
        return None
        
    except BudgetExceededError as e:
        print(f"Budget exceeded: ${e.current_cost:.2f}/${e.max_budget:.2f}")
        # Handle budget management
        return None
        
    except Exception as e:
        print(f"Unexpected error: {type(e).__name__}: {e}")
        return None

# Usage
response = safe_completion(
    model="gpt-4",
    messages=[{"role": "user", "content": "Hello!"}]
)

Advanced Error Handling with Retry Logic

import time
import random
from typing import Optional

def completion_with_retry(
    model: str,
    messages: list,
    max_retries: int = 3,
    base_delay: float = 1.0,
    max_delay: float = 60.0,
    **kwargs
) -> Optional[litellm.ModelResponse]:
    """
    Completion with exponential backoff retry logic.
    """
    
    for attempt in range(max_retries + 1):
        try:
            response = litellm.completion(
                model=model,
                messages=messages,
                **kwargs
            )
            return response
            
        except RateLimitError as e:
            if attempt == max_retries:
                raise e
                
            # Use retry_after if provided, otherwise exponential backoff
            if e.retry_after:
                delay = min(e.retry_after, max_delay)
            else:
                delay = min(base_delay * (2 ** attempt), max_delay)
                
            # Add jitter to prevent thundering herd
            jitter = random.uniform(0, 0.1 * delay)
            total_delay = delay + jitter
            
            print(f"Rate limited, retrying in {total_delay:.1f}s (attempt {attempt + 1})")
            time.sleep(total_delay)
            
        except (ServiceUnavailableError, InternalServerError, APIConnectionError) as e:
            if attempt == max_retries:
                raise e
                
            delay = min(base_delay * (2 ** attempt), max_delay)
            jitter = random.uniform(0, 0.1 * delay)
            total_delay = delay + jitter
            
            print(f"Service error, retrying in {total_delay:.1f}s: {e}")
            time.sleep(total_delay)
            
        except (AuthenticationError, InvalidRequestError, NotFoundError) as e:
            # Don't retry these errors
            print(f"Non-retryable error: {e}")
            raise e
            
    return None

# Usage
try:
    response = completion_with_retry(
        model="gpt-4",
        messages=[{"role": "user", "content": "Hello!"}],
        max_retries=5
    )
except Exception as e:
    print(f"All retries failed: {e}")

Fallback Strategy Implementation

def completion_with_fallbacks(
    primary_model: str,
    messages: list,
    fallback_models: list = None,
    **kwargs
):
    """
    Try primary model, fall back to alternatives on certain errors.
    """
    
    if fallback_models is None:
        fallback_models = ["gpt-3.5-turbo", "claude-3-haiku-20240307"]
    
    models_to_try = [primary_model] + fallback_models
    
    for i, model in enumerate(models_to_try):
        try:
            response = litellm.completion(
                model=model,
                messages=messages,
                **kwargs
            )
            
            if i > 0:  # Used fallback
                print(f"Successfully used fallback model: {model}")
                
            return response
            
        except ContextWindowExceededError as e:
            print(f"Context window exceeded for {model}, trying fallback")
            if i == len(models_to_try) - 1:  # Last model
                raise e
            continue
            
        except ContentPolicyViolationError as e:
            print(f"Content policy violation for {model}, trying fallback")
            if i == len(models_to_try) - 1:  # Last model
                raise e
            continue
            
        except RateLimitError as e:
            print(f"Rate limit for {model}, trying fallback")
            if i == len(models_to_try) - 1:  # Last model
                raise e
            continue
            
        except (AuthenticationError, InvalidRequestError) as e:
            # Don't fallback for these errors
            raise e
            
    raise Exception("All fallback models failed")

# Usage
response = completion_with_fallbacks(
    primary_model="gpt-4",
    messages=[{"role": "user", "content": "Very long prompt..."}],
    fallback_models=["claude-3-sonnet-20240229", "gpt-3.5-turbo-16k"]
)

Budget Management with Exceptions

class BudgetManager:
    def __init__(self, max_budget: float):
        self.max_budget = max_budget
        self.current_cost = 0.0
        
    def check_budget(self, estimated_cost: float):
        if self.current_cost + estimated_cost > self.max_budget:
            raise BudgetExceededError(
                f"Estimated cost ${estimated_cost:.4f} would exceed budget "
                f"(${self.current_cost:.4f}/${self.max_budget:.2f})"
            )
    
    def track_completion(self, response):
        if hasattr(response, '_hidden_params') and 'response_cost' in response._hidden_params:
            cost = response._hidden_params['response_cost']
            self.current_cost += cost
            print(f"Request cost: ${cost:.4f}, Total: ${self.current_cost:.4f}")

# Usage
budget_manager = BudgetManager(max_budget=10.00)

def budget_aware_completion(model, messages, **kwargs):
    # Estimate cost before making request
    estimated_tokens = litellm.token_counter(model=model, messages=messages)
    estimated_cost = litellm.cost_per_token(
        model=model,
        prompt_tokens=estimated_tokens,
        completion_tokens=kwargs.get('max_tokens', 256)
    )
    
    try:
        budget_manager.check_budget(estimated_cost)
        
        response = litellm.completion(
            model=model,
            messages=messages,
            **kwargs
        )
        
        budget_manager.track_completion(response)
        return response
        
    except BudgetExceededError as e:
        print(f"Budget management: {e}")
        # Could fallback to cheaper model
        return None

response = budget_aware_completion(
    model="gpt-4",
    messages=[{"role": "user", "content": "Hello!"}]
)

Provider-Specific Error Handling

def handle_provider_specific_errors(model, messages, **kwargs):
    try:
        response = litellm.completion(
            model=model,
            messages=messages,
            **kwargs
        )
        return response
        
    except Exception as e:
        # Check provider-specific error context
        if hasattr(e, 'llm_provider'):
            provider = e.llm_provider
            
            if provider == "openai":
                if "insufficient_quota" in str(e).lower():
                    print("OpenAI quota exceeded, switching to backup provider")
                    return litellm.completion(
                        model="claude-3-sonnet-20240229",
                        messages=messages,
                        **kwargs
                    )
                    
            elif provider == "anthropic":
                if "overloaded" in str(e).lower():
                    print("Anthropic overloaded, trying OpenAI")
                    return litellm.completion(
                        model="gpt-4",
                        messages=messages,
                        **kwargs
                    )
                    
            elif provider == "cohere":
                if isinstance(e, UnsupportedParamsError):
                    print("Removing unsupported parameters for Cohere")
                    # Remove OpenAI-specific parameters
                    clean_kwargs = {k: v for k, v in kwargs.items() 
                                  if k not in ['logit_bias', 'seed']}
                    return litellm.completion(
                        model=model,
                        messages=messages,
                        **clean_kwargs
                    )
        
        # Re-raise if not handled
        raise e

Custom Exception Classes

class CustomLiteLLMError(Exception):
    """Custom application-specific error"""
    def __init__(self, message: str, model: str, cost: float = 0.0):
        super().__init__(message)
        self.model = model
        self.cost = cost

def application_completion_wrapper(model, messages, **kwargs):
    """Application-specific completion wrapper with custom error handling"""
    
    try:
        response = litellm.completion(
            model=model,
            messages=messages,
            **kwargs
        )
        
        # Custom validation
        if not response.choices or not response.choices[0].message.content:
            raise CustomLiteLLMError(
                "Empty response received",
                model=model,
                cost=litellm.completion_cost(response)
            )
            
        return response
        
    except ContextWindowExceededError as e:
        # Convert to custom error with application context
        raise CustomLiteLLMError(
            f"Input too long for model {model}. "
            f"Required: {e.current_tokens}, Max: {e.max_tokens}",
            model=model
        )
        
    except ContentPolicyViolationError as e:
        # Custom content policy handling
        raise CustomLiteLLMError(
            f"Content rejected by {model}: {e.violation_type or 'policy violation'}",
            model=model
        )

# Usage with custom error handling
try:
    response = application_completion_wrapper(
        model="gpt-4",
        messages=[{"role": "user", "content": "Hello!"}]
    )
except CustomLiteLLMError as e:
    print(f"Application error with {e.model}: {e}")
    print(f"Cost incurred: ${e.cost:.4f}")

Install with Tessl CLI

npx tessl i tessl/pypi-litellm

docs

core-completion.md

exceptions.md

index.md

other-apis.md

providers.md

router.md

utilities.md

tile.json