Library to easily interface with LLM API providers
—
Comprehensive exception hierarchy for robust error handling across all LLM providers. LiteLLM provides consistent error types with detailed context information, enabling reliable error handling and retry logic in production applications.
LiteLLM exceptions inherit from OpenAI's exception hierarchy while adding provider-specific context and enhanced error information.
class AuthenticationError(openai.AuthenticationError):
"""
Authentication failure with API key or credentials.
Attributes:
message (str): Error description
llm_provider (str): Provider that failed authentication
model (str): Model being accessed
response (Optional[httpx.Response]): HTTP response object
"""
class InvalidRequestError(openai.BadRequestError):
"""
Invalid request parameters or unsupported operations.
Common causes:
- Invalid model name
- Unsupported parameters for provider
- Malformed request data
Attributes:
message (str): Error description
model (str): Model that caused the error
llm_provider (str): Provider name
"""
class NotFoundError(openai.NotFoundError):
"""
Requested resource not found (model, deployment, etc.).
Attributes:
message (str): Error description
model (str): Model that was not found
llm_provider (str): Provider name
"""
class BadRequestError(openai.BadRequestError):
"""
Malformed request or invalid parameters.
Attributes:
message (str): Error description
model (str): Model being accessed
llm_provider (str): Provider name
"""
class RateLimitError(openai.RateLimitError):
"""
Rate limit exceeded for API usage.
Attributes:
message (str): Error description
model (str): Model being accessed
llm_provider (str): Provider name
retry_after (Optional[int]): Seconds to wait before retry
"""
class ServiceUnavailableError(openai.APIStatusError):
"""
Provider service temporarily unavailable.
Attributes:
message (str): Error description
model (str): Model being accessed
llm_provider (str): Provider name
status_code (int): HTTP status code
"""
class InternalServerError(openai.InternalServerError):
"""
Provider internal server error (5xx status codes).
Attributes:
message (str): Error description
model (str): Model being accessed
llm_provider (str): Provider name
status_code (int): HTTP status code
"""
class Timeout(openai.APITimeoutError):
"""
Request timeout exceeded.
Attributes:
message (str): Error description
model (str): Model being accessed
llm_provider (str): Provider name
"""
class APIError(openai.APIError):
"""
Generic API error for unexpected failures.
Attributes:
message (str): Error description
model (str): Model being accessed
llm_provider (str): Provider name
status_code (Optional[int]): HTTP status code if available
"""
class APIConnectionError(openai.APIConnectionError):
"""
Connection failure to provider API.
Attributes:
message (str): Error description
model (str): Model being accessed
llm_provider (str): Provider name
"""
class APIResponseValidationError(openai.APIResponseValidationError):
"""
Response validation failure or unexpected format.
Attributes:
message (str): Error description
model (str): Model being accessed
llm_provider (str): Provider name
"""
class OpenAIError(openai.OpenAIError):
"""
Base class for all LiteLLM/OpenAI exceptions.
Attributes:
message (str): Error description
"""Advanced exception types for LiteLLM-specific functionality and enhanced error handling.
class ContextWindowExceededError(BadRequestError):
"""
Input exceeds model's maximum context window.
Raised when the input tokens exceed the model's context limit.
Often triggers automatic fallback to models with larger context windows.
Attributes:
message (str): Error description with token counts
model (str): Model with insufficient context
llm_provider (str): Provider name
max_tokens (Optional[int]): Model's maximum context size
current_tokens (Optional[int]): Actual input tokens
"""
class ContentPolicyViolationError(BadRequestError):
"""
Content violates provider's usage policies.
Raised when input or generated content violates safety policies.
May trigger fallback to alternative providers with different policies.
Attributes:
message (str): Error description
model (str): Model that flagged content
llm_provider (str): Provider name
violation_type (Optional[str]): Type of policy violation
"""
class BudgetExceededError(Exception):
"""
Usage exceeds configured budget limits.
Raised when cumulative costs exceed budget thresholds set in LiteLLM configuration.
Attributes:
message (str): Error description with budget information
current_cost (float): Current accumulated cost
max_budget (float): Maximum allowed budget
"""
class UnsupportedParamsError(BadRequestError):
"""
Parameters not supported by the target provider.
Raised when using OpenAI parameters that aren't supported by other providers.
Can be handled with drop_params=True or modify_params=True.
Attributes:
message (str): Error description
model (str): Target model
llm_provider (str): Provider name
unsupported_params (List[str]): List of unsupported parameter names
"""
class JSONSchemaValidationError(APIResponseValidationError):
"""
Response doesn't match expected JSON schema.
Raised when response_format with JSON schema is specified but response
doesn't conform to the schema.
Attributes:
message (str): Validation error details
model (str): Model that produced invalid response
schema_errors (List[str]): List of validation errors
"""
class UnprocessableEntityError(openai.UnprocessableEntityError):
"""
Request was well-formed but couldn't be processed.
HTTP 422 error for semantically invalid requests.
Attributes:
message (str): Error description
model (str): Model being accessed
llm_provider (str): Provider name
"""
class LiteLLMUnknownProvider(BadRequestError):
"""
Provider not recognized by LiteLLM.
Raised when specifying an unknown custom_llm_provider or model format.
Attributes:
message (str): Error description
provider (str): Unknown provider name
available_providers (List[str]): List of supported providers
"""
class GuardrailRaisedException(Exception):
"""
Custom guardrail check failed.
Raised by user-defined guardrail functions that reject requests or responses.
Attributes:
message (str): Guardrail failure reason
guardrail_name (str): Name of failed guardrail
"""
class BlockedPiiEntityError(Exception):
"""
PII (Personally Identifiable Information) detected and blocked.
Raised when PII detection guardrails identify and block sensitive information.
Attributes:
message (str): Error description
detected_entities (List[str]): Types of PII detected
"""
class MockException(openai.APIError):
"""
Exception for testing and mocking purposes.
Used in test environments to simulate various error conditions.
Attributes:
message (str): Mock error message
status_code (int): Simulated HTTP status code
"""
class ImageFetchError(BadRequestError):
"""
Error fetching image for vision models.
Raised when image URLs are inaccessible or in unsupported formats.
Attributes:
message (str): Error description
image_url (str): URL that failed to fetch
status_code (Optional[int]): HTTP status from image fetch
"""
class RejectedRequestError(BadRequestError):
"""
Request was rejected by provider-specific filtering.
Raised when providers reject requests based on internal policies
beyond standard content policy violations.
Attributes:
message (str): Rejection reason
model (str): Model that rejected request
llm_provider (str): Provider name
"""
class MidStreamFallbackError(ServiceUnavailableError):
"""
Error during streaming that requires fallback.
Raised when streaming responses fail mid-stream and require
switching to a fallback provider.
Attributes:
message (str): Error description
original_model (str): Model that failed during streaming
fallback_model (Optional[str]): Fallback model to use
"""class LiteLLMExceptionAttributes:
"""Common attributes available on LiteLLM exceptions"""
# Core identification
message: str # Human-readable error description
model: Optional[str] # Model that caused the error
llm_provider: Optional[str] # Provider name (openai, anthropic, etc.)
# HTTP context
status_code: Optional[int] # HTTP status code from provider
response: Optional[httpx.Response] # Full HTTP response object
request: Optional[httpx.Request] # Original HTTP request
# Retry and timing
retry_after: Optional[int] # Seconds to wait before retry (rate limits)
response_ms: Optional[float] # Response time in milliseconds
# Provider-specific context
provider_original_error: Optional[str] # Original error from provider
provider_error_code: Optional[str] # Provider-specific error code
# Token and cost context
prompt_tokens: Optional[int] # Input tokens when error occurred
completion_tokens: Optional[int] # Output tokens when error occurred
total_tokens: Optional[int] # Total tokens when error occurred
# Request context
litellm_params: Optional[Dict] # LiteLLM parameters used
original_response: Optional[Dict] # Raw response from providerimport litellm
from litellm import (
AuthenticationError, RateLimitError, ContextWindowExceededError,
ContentPolicyViolationError, BudgetExceededError
)
def safe_completion(model, messages, **kwargs):
try:
response = litellm.completion(
model=model,
messages=messages,
**kwargs
)
return response
except AuthenticationError as e:
print(f"Authentication failed for {e.llm_provider}: {e.message}")
# Handle API key issues
return None
except RateLimitError as e:
print(f"Rate limit hit for {e.model}: {e.message}")
if e.retry_after:
print(f"Retry after {e.retry_after} seconds")
# Implement backoff or queue request
return None
except ContextWindowExceededError as e:
print(f"Context window exceeded: {e.current_tokens}/{e.max_tokens} tokens")
# Try with shorter input or different model
return None
except ContentPolicyViolationError as e:
print(f"Content policy violation: {e.message}")
# Handle content filtering
return None
except BudgetExceededError as e:
print(f"Budget exceeded: ${e.current_cost:.2f}/${e.max_budget:.2f}")
# Handle budget management
return None
except Exception as e:
print(f"Unexpected error: {type(e).__name__}: {e}")
return None
# Usage
response = safe_completion(
model="gpt-4",
messages=[{"role": "user", "content": "Hello!"}]
)import time
import random
from typing import Optional
def completion_with_retry(
model: str,
messages: list,
max_retries: int = 3,
base_delay: float = 1.0,
max_delay: float = 60.0,
**kwargs
) -> Optional[litellm.ModelResponse]:
"""
Completion with exponential backoff retry logic.
"""
for attempt in range(max_retries + 1):
try:
response = litellm.completion(
model=model,
messages=messages,
**kwargs
)
return response
except RateLimitError as e:
if attempt == max_retries:
raise e
# Use retry_after if provided, otherwise exponential backoff
if e.retry_after:
delay = min(e.retry_after, max_delay)
else:
delay = min(base_delay * (2 ** attempt), max_delay)
# Add jitter to prevent thundering herd
jitter = random.uniform(0, 0.1 * delay)
total_delay = delay + jitter
print(f"Rate limited, retrying in {total_delay:.1f}s (attempt {attempt + 1})")
time.sleep(total_delay)
except (ServiceUnavailableError, InternalServerError, APIConnectionError) as e:
if attempt == max_retries:
raise e
delay = min(base_delay * (2 ** attempt), max_delay)
jitter = random.uniform(0, 0.1 * delay)
total_delay = delay + jitter
print(f"Service error, retrying in {total_delay:.1f}s: {e}")
time.sleep(total_delay)
except (AuthenticationError, InvalidRequestError, NotFoundError) as e:
# Don't retry these errors
print(f"Non-retryable error: {e}")
raise e
return None
# Usage
try:
response = completion_with_retry(
model="gpt-4",
messages=[{"role": "user", "content": "Hello!"}],
max_retries=5
)
except Exception as e:
print(f"All retries failed: {e}")def completion_with_fallbacks(
primary_model: str,
messages: list,
fallback_models: list = None,
**kwargs
):
"""
Try primary model, fall back to alternatives on certain errors.
"""
if fallback_models is None:
fallback_models = ["gpt-3.5-turbo", "claude-3-haiku-20240307"]
models_to_try = [primary_model] + fallback_models
for i, model in enumerate(models_to_try):
try:
response = litellm.completion(
model=model,
messages=messages,
**kwargs
)
if i > 0: # Used fallback
print(f"Successfully used fallback model: {model}")
return response
except ContextWindowExceededError as e:
print(f"Context window exceeded for {model}, trying fallback")
if i == len(models_to_try) - 1: # Last model
raise e
continue
except ContentPolicyViolationError as e:
print(f"Content policy violation for {model}, trying fallback")
if i == len(models_to_try) - 1: # Last model
raise e
continue
except RateLimitError as e:
print(f"Rate limit for {model}, trying fallback")
if i == len(models_to_try) - 1: # Last model
raise e
continue
except (AuthenticationError, InvalidRequestError) as e:
# Don't fallback for these errors
raise e
raise Exception("All fallback models failed")
# Usage
response = completion_with_fallbacks(
primary_model="gpt-4",
messages=[{"role": "user", "content": "Very long prompt..."}],
fallback_models=["claude-3-sonnet-20240229", "gpt-3.5-turbo-16k"]
)class BudgetManager:
def __init__(self, max_budget: float):
self.max_budget = max_budget
self.current_cost = 0.0
def check_budget(self, estimated_cost: float):
if self.current_cost + estimated_cost > self.max_budget:
raise BudgetExceededError(
f"Estimated cost ${estimated_cost:.4f} would exceed budget "
f"(${self.current_cost:.4f}/${self.max_budget:.2f})"
)
def track_completion(self, response):
if hasattr(response, '_hidden_params') and 'response_cost' in response._hidden_params:
cost = response._hidden_params['response_cost']
self.current_cost += cost
print(f"Request cost: ${cost:.4f}, Total: ${self.current_cost:.4f}")
# Usage
budget_manager = BudgetManager(max_budget=10.00)
def budget_aware_completion(model, messages, **kwargs):
# Estimate cost before making request
estimated_tokens = litellm.token_counter(model=model, messages=messages)
estimated_cost = litellm.cost_per_token(
model=model,
prompt_tokens=estimated_tokens,
completion_tokens=kwargs.get('max_tokens', 256)
)
try:
budget_manager.check_budget(estimated_cost)
response = litellm.completion(
model=model,
messages=messages,
**kwargs
)
budget_manager.track_completion(response)
return response
except BudgetExceededError as e:
print(f"Budget management: {e}")
# Could fallback to cheaper model
return None
response = budget_aware_completion(
model="gpt-4",
messages=[{"role": "user", "content": "Hello!"}]
)def handle_provider_specific_errors(model, messages, **kwargs):
try:
response = litellm.completion(
model=model,
messages=messages,
**kwargs
)
return response
except Exception as e:
# Check provider-specific error context
if hasattr(e, 'llm_provider'):
provider = e.llm_provider
if provider == "openai":
if "insufficient_quota" in str(e).lower():
print("OpenAI quota exceeded, switching to backup provider")
return litellm.completion(
model="claude-3-sonnet-20240229",
messages=messages,
**kwargs
)
elif provider == "anthropic":
if "overloaded" in str(e).lower():
print("Anthropic overloaded, trying OpenAI")
return litellm.completion(
model="gpt-4",
messages=messages,
**kwargs
)
elif provider == "cohere":
if isinstance(e, UnsupportedParamsError):
print("Removing unsupported parameters for Cohere")
# Remove OpenAI-specific parameters
clean_kwargs = {k: v for k, v in kwargs.items()
if k not in ['logit_bias', 'seed']}
return litellm.completion(
model=model,
messages=messages,
**clean_kwargs
)
# Re-raise if not handled
raise eclass CustomLiteLLMError(Exception):
"""Custom application-specific error"""
def __init__(self, message: str, model: str, cost: float = 0.0):
super().__init__(message)
self.model = model
self.cost = cost
def application_completion_wrapper(model, messages, **kwargs):
"""Application-specific completion wrapper with custom error handling"""
try:
response = litellm.completion(
model=model,
messages=messages,
**kwargs
)
# Custom validation
if not response.choices or not response.choices[0].message.content:
raise CustomLiteLLMError(
"Empty response received",
model=model,
cost=litellm.completion_cost(response)
)
return response
except ContextWindowExceededError as e:
# Convert to custom error with application context
raise CustomLiteLLMError(
f"Input too long for model {model}. "
f"Required: {e.current_tokens}, Max: {e.max_tokens}",
model=model
)
except ContentPolicyViolationError as e:
# Custom content policy handling
raise CustomLiteLLMError(
f"Content rejected by {model}: {e.violation_type or 'policy violation'}",
model=model
)
# Usage with custom error handling
try:
response = application_completion_wrapper(
model="gpt-4",
messages=[{"role": "user", "content": "Hello!"}]
)
except CustomLiteLLMError as e:
print(f"Application error with {e.model}: {e}")
print(f"Cost incurred: ${e.cost:.4f}")Install with Tessl CLI
npx tessl i tessl/pypi-litellm