Library to easily interface with LLM API providers
npx @tessl/cli install tessl/pypi-litellm@1.76.0A unified Python interface for calling 100+ LLM API providers including OpenAI, Anthropic, Cohere, Replicate, and more. LiteLLM provides OpenAI-compatible API formats, intelligent routing, load balancing, fallbacks, and cost tracking across all supported providers.
pip install litellmimport litellm
from litellm import completion, embedding, RouterFor async functions:
from litellm import acompletion, aembeddingFor specific components:
from litellm import (
completion, text_completion, embedding, transcription, speech,
Router, token_counter, get_model_info, completion_cost
)import litellm
from litellm import completion
# OpenAI GPT-4
response = completion(
model="gpt-4",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "What is the capital of France?"}
]
)
print(response.choices[0].message.content)
# Anthropic Claude
response = completion(
model="claude-3-sonnet-20240229",
messages=[
{"role": "user", "content": "Explain quantum computing"}
]
)
# Cohere Command
response = completion(
model="command-nightly",
messages=[
{"role": "user", "content": "Write a short poem"}
]
)
# With streaming
response = completion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Count to 10"}],
stream=True
)
for chunk in response:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="")LiteLLM provides a unified interface that abstracts away provider-specific differences while maintaining full OpenAI API compatibility. Key architectural components:
The library serves as a drop-in replacement for OpenAI's client while adding powerful enterprise features like routing, caching, and observability.
Unified chat completion, text completion, and streaming interfaces that work across all supported LLM providers with OpenAI-compatible parameters.
def completion(
model: str,
messages: List[Dict[str, Any]],
temperature: Optional[float] = None,
max_tokens: Optional[int] = None,
stream: Optional[bool] = None,
**kwargs
) -> Union[ModelResponse, Iterator[ModelResponseStream]]
def text_completion(
model: str,
prompt: str,
max_tokens: Optional[int] = None,
**kwargs
) -> Union[TextCompletionResponse, Iterator[TextCompletionResponse]]
async def acompletion(**kwargs) -> Union[ModelResponse, AsyncIterator[ModelResponseStream]]Router class for intelligent load balancing, automatic fallbacks, and retry logic across multiple model deployments with cost optimization and reliability features.
class Router:
def __init__(
self,
model_list: Optional[List[DeploymentTypedDict]] = None,
routing_strategy: Literal["simple-shuffle", "least-busy", "usage-based-routing", "latency-based-routing", "cost-based-routing"] = "simple-shuffle",
num_retries: Optional[int] = None,
max_fallbacks: Optional[int] = None,
**kwargs
)
def completion(self, **kwargs) -> Union[ModelResponse, Iterator[ModelResponseStream]]
def health_check(self, model: Optional[str] = None) -> Dict[str, Any]Embedding generation, image creation, audio transcription/synthesis, moderation, and other specialized API endpoints with unified interfaces.
def embedding(
model: str,
input: Union[str, List[str], List[int], List[List[int]]],
**kwargs
) -> EmbeddingResponse
def image_generation(
prompt: str,
model: Optional[str] = None,
**kwargs
) -> ImageResponse
def transcription(model: str, file: Union[str, bytes, IO], **kwargs) -> TranscriptionResponse
def speech(model: str, input: str, voice: str, **kwargs) -> bytes
def moderation(input: Union[str, List[str]], **kwargs) -> ModerationCreateResponseComprehensive exception hierarchy with provider-specific error handling, context information, and retry logic for robust error management.
class AuthenticationError(openai.AuthenticationError): ...
class RateLimitError(openai.RateLimitError): ...
class ContextWindowExceededError(BadRequestError): ...
class ContentPolicyViolationError(BadRequestError): ...
class BudgetExceededError(Exception): ...Configuration classes and settings for 100+ LLM providers including authentication, custom endpoints, and provider-specific parameters.
class OpenAIConfig(BaseConfig):
frequency_penalty: Optional[int] = None
max_tokens: Optional[int] = None
temperature: Optional[int] = None
# ... all OpenAI parameters
class AnthropicConfig(BaseConfig):
max_tokens: int
temperature: Optional[float] = None
top_k: Optional[int] = NoneToken counting, cost calculation, model information, capability detection, and validation utilities for comprehensive LLM management.
def token_counter(model: str, text: Union[str, List[str]], **kwargs) -> int
def completion_cost(completion_response: Union[ModelResponse, EmbeddingResponse], **kwargs) -> float
def get_model_info(model: str, **kwargs) -> Dict[str, Any]
def supports_function_calling(model: str, **kwargs) -> bool
def validate_environment(model: str, **kwargs) -> Dict[str, str]class ModelResponse(BaseLiteLLMOpenAIResponseObject):
id: str
choices: List[Choices]
created: int
model: Optional[str] = None
usage: Optional[Usage] = None
class EmbeddingResponse(OpenAIObject):
data: List[EmbeddingData]
model: Optional[str]
usage: Optional[Usage]
class Usage:
prompt_tokens: int
completion_tokens: Optional[int] = None
total_tokens: int
class Choices:
finish_reason: Optional[str] = None
index: int = 0
message: Optional[Message] = None
class Message:
content: Optional[str] = None
role: str
tool_calls: Optional[List[ChatCompletionMessageToolCall]] = None# Authentication
litellm.api_key: Optional[str] = None
litellm.openai_key: Optional[str] = None
litellm.anthropic_key: Optional[str] = None
# Timeout & Retry Settings
litellm.request_timeout: float = 600
litellm.num_retries: Optional[int] = None
litellm.max_fallbacks: Optional[int] = None
# Debugging & Logging
litellm.set_verbose: bool = False
litellm.suppress_debug_info: bool = False
# Model Configuration
litellm.model_alias_map: Dict[str, str] = {}
litellm.drop_params: bool = False
litellm.modify_params: bool = False