Library to easily interface with LLM API providers
—
Configuration classes, settings, and authentication for 100+ LLM providers supported by LiteLLM. This includes provider-specific parameters, custom endpoints, API key management, and advanced configuration options for enterprise deployments.
All provider configurations inherit from the base configuration class with common parameters and validation.
class BaseConfig:
"""
Base configuration class for all LLM providers.
Provides common parameters and validation for provider-specific configurations.
"""
# Core parameters available across providers
max_tokens: Optional[int] = None
temperature: Optional[float] = None
top_p: Optional[float] = None
stream: Optional[bool] = None
stop: Optional[Union[str, List[str]]] = None
user: Optional[str] = None
# LiteLLM specific parameters
timeout: Optional[float] = None
api_key: Optional[str] = None
api_base: Optional[str] = None
def __init__(self, **kwargs):
"""Initialize configuration with provider-specific parameters"""
pass
def get_config(self) -> Dict[str, Any]:
"""Get configuration dictionary for provider API calls"""
passComplete configuration for OpenAI and OpenAI-compatible providers including Azure OpenAI.
class OpenAIConfig(BaseConfig):
"""
Configuration for OpenAI API and compatible providers.
Supports all OpenAI API parameters including function calling,
response formatting, and advanced features.
"""
# Standard OpenAI parameters
frequency_penalty: Optional[float] = None
logit_bias: Optional[Dict[str, float]] = None
max_tokens: Optional[int] = None
n: Optional[int] = None
presence_penalty: Optional[float] = None
response_format: Optional[Dict[str, Any]] = None
seed: Optional[int] = None
stop: Optional[Union[str, List[str]]] = None
stream: Optional[bool] = None
temperature: Optional[float] = None
top_p: Optional[float] = None
user: Optional[str] = None
# Function calling parameters
functions: Optional[List[Dict[str, Any]]] = None
function_call: Optional[Union[str, Dict[str, Any]]] = None
tools: Optional[List[Dict[str, Any]]] = None
tool_choice: Optional[Union[str, Dict[str, Any]]] = None
# OpenAI-specific parameters
logprobs: Optional[bool] = None
top_logprobs: Optional[int] = None
# Advanced features
parallel_tool_calls: Optional[bool] = None
service_tier: Optional[Literal["auto", "default"]] = None
def __init__(
self,
frequency_penalty: Optional[float] = None,
logit_bias: Optional[Dict[str, float]] = None,
max_tokens: Optional[int] = None,
n: Optional[int] = None,
presence_penalty: Optional[float] = None,
response_format: Optional[Dict[str, Any]] = None,
seed: Optional[int] = None,
stop: Optional[Union[str, List[str]]] = None,
stream: Optional[bool] = None,
temperature: Optional[float] = None,
top_p: Optional[float] = None,
tools: Optional[List[Dict[str, Any]]] = None,
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
user: Optional[str] = None,
**kwargs
):
"""
Initialize OpenAI configuration.
Args:
frequency_penalty (Optional[float]): Penalty for token frequency (-2.0 to 2.0)
presence_penalty (Optional[float]): Penalty for token presence (-2.0 to 2.0)
temperature (Optional[float]): Sampling temperature (0.0 to 2.0)
max_tokens (Optional[int]): Maximum tokens to generate
tools (Optional[List[Dict]]): Available function tools
response_format (Optional[Dict]): Response format specification
"""class AzureOpenAIConfig(OpenAIConfig): """ Configuration for Azure OpenAI Service deployments.
Extends OpenAI configuration with Azure-specific parameters.
"""
# Azure-specific parameters
api_version: str = "2024-02-01"
azure_endpoint: Optional[str] = None
azure_deployment: Optional[str] = None
azure_ad_token: Optional[str] = None
azure_ad_token_provider: Optional[Callable[[], str]] = None
def __init__(
self,
api_version: str = "2024-02-01",
azure_endpoint: Optional[str] = None,
azure_deployment: Optional[str] = None,
azure_ad_token: Optional[str] = None,
**kwargs
):
"""
Initialize Azure OpenAI configuration.
Args:
api_version (str): Azure API version
azure_endpoint (Optional[str]): Azure endpoint URL
azure_deployment (Optional[str]): Deployment name
azure_ad_token (Optional[str]): Azure AD authentication token
"""
super().__init__(**kwargs)### Anthropic Configuration
Configuration for Anthropic Claude models with provider-specific parameters.
```python { .api }
class AnthropicConfig(BaseConfig):
"""
Configuration for Anthropic Claude models.
Supports Anthropic-specific parameters including system prompts,
stop sequences, and advanced sampling options.
"""
# Required parameter for Anthropic
max_tokens: int
# Optional parameters
metadata: Optional[Dict[str, Any]] = None
stop_sequences: Optional[List[str]] = None
system: Optional[str] = None
temperature: Optional[float] = None
tool_choice: Optional[Dict[str, Any]] = None
tools: Optional[List[Dict[str, Any]]] = None
top_k: Optional[int] = None
top_p: Optional[float] = None
def __init__(
self,
max_tokens: int,
metadata: Optional[Dict[str, Any]] = None,
stop_sequences: Optional[List[str]] = None,
system: Optional[str] = None,
temperature: Optional[float] = None,
top_k: Optional[int] = None,
top_p: Optional[float] = None,
**kwargs
):
"""
Initialize Anthropic configuration.
Args:
max_tokens (int): Maximum tokens to generate (required)
system (Optional[str]): System prompt for Claude
temperature (Optional[float]): Sampling temperature (0.0 to 1.0)
top_k (Optional[int]): Top-k sampling parameter
top_p (Optional[float]): Nucleus sampling parameter
stop_sequences (Optional[List[str]]): Custom stop sequences
"""Configuration for Google's AI models including Vertex AI and Gemini.
class GoogleConfig(BaseConfig):
"""Configuration for Google AI models (Vertex AI, Gemini)"""
# Google-specific parameters
candidate_count: Optional[int] = None
max_output_tokens: Optional[int] = None
temperature: Optional[float] = None
top_p: Optional[float] = None
top_k: Optional[int] = None
# Safety settings
safety_settings: Optional[List[Dict[str, Any]]] = None
# Generation configuration
generation_config: Optional[Dict[str, Any]] = None
def __init__(
self,
candidate_count: Optional[int] = None,
max_output_tokens: Optional[int] = None,
temperature: Optional[float] = None,
top_p: Optional[float] = None,
top_k: Optional[int] = None,
safety_settings: Optional[List[Dict[str, Any]]] = None,
**kwargs
):
"""
Initialize Google AI configuration.
Args:
max_output_tokens (Optional[int]): Maximum output tokens
temperature (Optional[float]): Sampling temperature
top_p (Optional[float]): Nucleus sampling parameter
top_k (Optional[int]): Top-k sampling parameter
safety_settings (Optional[List[Dict]]): Content safety settings
"""
class VertexAIConfig(GoogleConfig):
"""
Configuration for Google Vertex AI deployments.
Extends Google configuration with Vertex AI specific parameters.
"""
# Vertex AI specific
project_id: Optional[str] = None
location: Optional[str] = None
credentials: Optional[str] = None
def __init__(
self,
project_id: Optional[str] = None,
location: Optional[str] = "us-central1",
credentials: Optional[str] = None,
**kwargs
):
"""
Initialize Vertex AI configuration.
Args:
project_id (Optional[str]): Google Cloud project ID
location (Optional[str]): Vertex AI location/region
credentials (Optional[str]): Service account credentials path
"""
super().__init__(**kwargs)Configuration for Cohere models with provider-specific parameters.
class CohereConfig(BaseConfig):
"""Configuration for Cohere models"""
# Cohere-specific parameters
max_tokens: Optional[int] = None
temperature: Optional[float] = None
k: Optional[int] = None # top-k
p: Optional[float] = None # top-p
frequency_penalty: Optional[float] = None
presence_penalty: Optional[float] = None
end_sequences: Optional[List[str]] = None
stop_sequences: Optional[List[str]] = None
return_likelihoods: Optional[str] = None
logit_bias: Optional[Dict[int, float]] = None
# Chat-specific parameters
chat_history: Optional[List[Dict[str, str]]] = None
conversation_id: Optional[str] = None
# Tool use parameters
tools: Optional[List[Dict[str, Any]]] = None
tool_results: Optional[List[Dict[str, Any]]] = None
def __init__(
self,
max_tokens: Optional[int] = None,
temperature: Optional[float] = None,
k: Optional[int] = None,
p: Optional[float] = None,
end_sequences: Optional[List[str]] = None,
**kwargs
):
"""
Initialize Cohere configuration.
Args:
max_tokens (Optional[int]): Maximum tokens to generate
temperature (Optional[float]): Sampling temperature
k (Optional[int]): Top-k sampling parameter
p (Optional[float]): Top-p sampling parameter
end_sequences (Optional[List[str]]): Stop sequences
"""Configuration for AWS Bedrock models across different providers.
class BedrockConfig(BaseConfig):
"""Configuration for AWS Bedrock models"""
# AWS Bedrock parameters
aws_access_key_id: Optional[str] = None
aws_secret_access_key: Optional[str] = None
aws_session_token: Optional[str] = None
aws_region_name: Optional[str] = None
# Model-specific parameters (varies by provider on Bedrock)
max_tokens_to_sample: Optional[int] = None
temperature: Optional[float] = None
top_p: Optional[float] = None
top_k: Optional[int] = None
stop_sequences: Optional[List[str]] = None
def __init__(
self,
aws_region_name: str = "us-east-1",
aws_access_key_id: Optional[str] = None,
aws_secret_access_key: Optional[str] = None,
max_tokens_to_sample: Optional[int] = None,
**kwargs
):
"""
Initialize AWS Bedrock configuration.
Args:
aws_region_name (str): AWS region for Bedrock
aws_access_key_id (Optional[str]): AWS access key
aws_secret_access_key (Optional[str]): AWS secret key
max_tokens_to_sample (Optional[int]): Maximum tokens to generate
"""Configuration for Hugging Face models and inference endpoints.
class HuggingFaceConfig(BaseConfig):
"""Configuration for Hugging Face models"""
# Hugging Face parameters
max_new_tokens: Optional[int] = None
temperature: Optional[float] = None
top_p: Optional[float] = None
top_k: Optional[int] = None
repetition_penalty: Optional[float] = None
do_sample: Optional[bool] = None
use_cache: Optional[bool] = None
# Generation parameters
num_return_sequences: Optional[int] = None
pad_token_id: Optional[int] = None
eos_token_id: Optional[int] = None
# Inference endpoint parameters
endpoint_url: Optional[str] = None
huggingface_api_key: Optional[str] = None
def __init__(
self,
max_new_tokens: Optional[int] = None,
temperature: Optional[float] = None,
top_p: Optional[float] = None,
do_sample: Optional[bool] = True,
**kwargs
):
"""
Initialize Hugging Face configuration.
Args:
max_new_tokens (Optional[int]): Maximum new tokens to generate
temperature (Optional[float]): Sampling temperature
top_p (Optional[float]): Nucleus sampling parameter
do_sample (Optional[bool]): Enable sampling vs greedy decoding
"""Register custom LLM providers with LiteLLM for proprietary or specialized models.
def register_model(model_cost: Union[str, Dict[str, Any]]) -> None:
"""
Register custom model with cost information.
Args:
model_cost (Union[str, Dict]): Model cost configuration
Can be JSON string or dictionary with cost parameters
Example:
register_model({
"model_name": "custom-gpt-4",
"litellm_provider": "openai",
"model_cost": {
"input_cost_per_token": 0.00003,
"output_cost_per_token": 0.00006,
"litellm_provider": "openai",
"mode": "chat"
}
})
"""
def register_prompt_template(
model: str,
roles: Dict[str, str],
initial_prompt_value: str = "",
final_prompt_value: str = ""
) -> Dict[str, Any]:
"""
Register custom prompt template for model.
Args:
model (str): Model identifier
roles (Dict[str, str]): Role mapping for messages
initial_prompt_value (str): Template prefix
final_prompt_value (str): Template suffix
Returns:
Dict[str, Any]: Registered template configuration
Example:
register_prompt_template(
model="custom-model",
roles={
"system": "System: ",
"user": "Human: ",
"assistant": "Assistant: "
},
initial_prompt_value="<start>",
final_prompt_value="<end>"
)
"""
class CustomLLM:
"""
Base class for implementing custom LLM providers.
Implement this class to add support for proprietary or specialized models.
"""
def completion(
self,
model: str,
messages: List[Dict[str, Any]],
api_base: str,
model_response: ModelResponse,
print_verbose: Callable,
encoding: str,
api_key: str,
logging_obj: Any,
custom_prompt_dict: Dict[str, str] = {},
litellm_params: Dict[str, Any] = {},
logger_fn: Optional[Callable] = None,
headers: Dict[str, str] = {},
**kwargs
) -> ModelResponse:
"""
Implement completion for custom provider.
Args:
model (str): Model identifier
messages (List[Dict]): Chat messages
api_base (str): API base URL
model_response (ModelResponse): Response object to populate
print_verbose (Callable): Logging function
encoding (str): Text encoding
api_key (str): Provider API key
logging_obj (Any): Logging object
custom_prompt_dict (Dict): Custom prompt template
litellm_params (Dict): LiteLLM parameters
logger_fn (Optional[Callable]): Logger function
headers (Dict): HTTP headers
Returns:
ModelResponse: Populated response object
"""
pass
def streaming(
self,
model: str,
messages: List[Dict[str, Any]],
api_base: str,
model_response: ModelResponse,
print_verbose: Callable,
encoding: str,
api_key: str,
logging_obj: Any,
custom_prompt_dict: Dict[str, str] = {},
litellm_params: Dict[str, Any] = {},
logger_fn: Optional[Callable] = None,
headers: Dict[str, str] = {},
**kwargs
) -> Iterator[ModelResponseStream]:
"""
Implement streaming completion for custom provider.
Args:
Same as completion() method
Returns:
Iterator[ModelResponseStream]: Streaming response chunks
"""
pass
def register_custom_llm(custom_llm: CustomLLM, provider_name: str) -> None:
"""
Register custom LLM provider implementation.
Args:
custom_llm (CustomLLM): Custom provider implementation
provider_name (str): Name for the custom provider
"""# Authentication configuration
litellm.api_key: Optional[str] = None
litellm.openai_key: Optional[str] = None
litellm.anthropic_key: Optional[str] = None
litellm.cohere_key: Optional[str] = None
litellm.replicate_key: Optional[str] = None
litellm.huggingface_key: Optional[str] = None
litellm.together_ai_key: Optional[str] = None
litellm.palm_key: Optional[str] = None
litellm.vertex_project: Optional[str] = None
litellm.vertex_location: Optional[str] = None
litellm.bedrock_aws_access_key_id: Optional[str] = None
litellm.bedrock_aws_secret_access_key: Optional[str] = None
litellm.bedrock_aws_region_name: Optional[str] = None
# Provider-specific API bases
litellm.openai_api_base: Optional[str] = None
litellm.anthropic_api_base: Optional[str] = None
litellm.cohere_api_base: Optional[str] = None
# Parameter handling
litellm.drop_params: bool = False
litellm.modify_params: bool = False
litellm.model_alias_map: Dict[str, str] = {}
# Default settings
litellm.request_timeout: float = 600
litellm.max_tokens: int = 256
litellm.temperature: Optional[float] = None
# Debugging and logging
litellm.set_verbose: bool = False
litellm.suppress_debug_info: bool = Falseimport litellm
# Configure OpenAI
litellm.openai_key = "sk-your-openai-key"
response = litellm.completion(
model="gpt-4",
messages=[{"role": "user", "content": "Hello!"}]
)
# Configure Anthropic
litellm.anthropic_key = "your-anthropic-key"
response = litellm.completion(
model="claude-3-sonnet-20240229",
messages=[{"role": "user", "content": "Hello!"}]
)
# Configure multiple providers
litellm.api_key = "fallback-key"
litellm.cohere_key = "your-cohere-key"
litellm.huggingface_key = "your-hf-key"import litellm
# Method 1: Environment variables
import os
os.environ["AZURE_API_KEY"] = "your-azure-key"
os.environ["AZURE_API_BASE"] = "https://your-resource.openai.azure.com/"
os.environ["AZURE_API_VERSION"] = "2024-02-01"
response = litellm.completion(
model="azure/gpt-4",
messages=[{"role": "user", "content": "Hello!"}]
)
# Method 2: Direct parameters
response = litellm.completion(
model="azure/gpt-4-deployment",
messages=[{"role": "user", "content": "Hello!"}],
api_key="your-azure-key",
api_base="https://your-resource.openai.azure.com/",
api_version="2024-02-01"
)
# Method 3: Using configuration class
config = litellm.AzureOpenAIConfig(
api_version="2024-02-01",
azure_endpoint="https://your-resource.openai.azure.com/",
azure_deployment="gpt-4-deployment"
)
response = litellm.completion(
model="azure/gpt-4",
messages=[{"role": "user", "content": "Hello!"}],
**config.get_config()
)import litellm
# Configure AWS credentials
litellm.bedrock_aws_access_key_id = "your-access-key"
litellm.bedrock_aws_secret_access_key = "your-secret-key"
litellm.bedrock_aws_region_name = "us-east-1"
# Use Bedrock models
response = litellm.completion(
model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
messages=[{"role": "user", "content": "Hello from Bedrock!"}]
)
# With provider-specific parameters
response = litellm.completion(
model="bedrock/anthropic.claude-v2",
messages=[{"role": "user", "content": "Hello!"}],
max_tokens_to_sample=1000,
temperature=0.7,
top_k=250,
top_p=1.0,
stop_sequences=["Human:"]
)import litellm
# Set up Vertex AI credentials
import os
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "path/to/service-account.json"
litellm.vertex_project = "your-project-id"
litellm.vertex_location = "us-central1"
# Use Vertex AI models
response = litellm.completion(
model="vertex_ai/gemini-pro",
messages=[{"role": "user", "content": "Hello from Vertex AI!"}]
)
# With Google-specific parameters
response = litellm.completion(
model="vertex_ai/gemini-pro",
messages=[{"role": "user", "content": "Hello!"}],
max_output_tokens=1024,
temperature=0.8,
top_p=0.95,
top_k=40,
safety_settings=[
{
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
"threshold": "BLOCK_MEDIUM_AND_ABOVE"
}
]
)import litellm
from litellm import CustomLLM, ModelResponse
import requests
class MyCustomProvider(CustomLLM):
def completion(self, model, messages, **kwargs):
# Implement your custom API call
api_base = kwargs.get("api_base", "https://api.example.com")
api_key = kwargs.get("api_key")
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
}
# Convert LiteLLM format to your API format
custom_payload = {
"model": model,
"messages": messages,
"temperature": kwargs.get("temperature", 0.7),
"max_tokens": kwargs.get("max_tokens", 256)
}
# Make API request
response = requests.post(
f"{api_base}/completions",
headers=headers,
json=custom_payload,
timeout=kwargs.get("timeout", 30)
)
# Convert response to LiteLLM format
response_json = response.json()
# Create ModelResponse object
model_response = ModelResponse()
model_response.choices = [{
"message": {
"content": response_json["content"],
"role": "assistant"
},
"finish_reason": "stop",
"index": 0
}]
model_response.model = model
model_response.usage = {
"prompt_tokens": response_json.get("prompt_tokens", 0),
"completion_tokens": response_json.get("completion_tokens", 0),
"total_tokens": response_json.get("total_tokens", 0)
}
return model_response
def streaming(self, model, messages, **kwargs):
# Implement streaming if supported
# Return iterator of ModelResponseStream objects
pass
# Register custom provider
custom_provider = MyCustomProvider()
litellm.register_custom_llm(custom_provider, "my_provider")
# Use custom provider
response = litellm.completion(
model="my_provider/custom-model-v1",
messages=[{"role": "user", "content": "Hello!"}],
api_key="your-custom-key",
api_base="https://api.example.com"
)import litellm
# Register custom model with cost information
litellm.register_model({
"model_name": "custom/my-model-v1",
"litellm_provider": "custom",
"model_cost": {
"input_cost_per_token": 0.00001,
"output_cost_per_token": 0.00003,
"litellm_provider": "custom",
"mode": "chat"
}
})
# Register custom prompt template
litellm.register_prompt_template(
model="custom/my-model-v1",
roles={
"system": "### System:\n",
"user": "### Human:\n",
"assistant": "### Assistant:\n"
},
initial_prompt_value="<conversation_start>",
final_prompt_value="### Assistant:\n"
)
# Use registered model
response = litellm.completion(
model="custom/my-model-v1",
messages=[
{"role": "system", "content": "You are helpful."},
{"role": "user", "content": "Hello!"}
]
)
# Check cost
cost = litellm.completion_cost(response)
print(f"Request cost: ${cost:.6f}")# Set up environment variables for multiple providers
import os
# OpenAI
os.environ["OPENAI_API_KEY"] = "sk-your-openai-key"
# Anthropic
os.environ["ANTHROPIC_API_KEY"] = "your-anthropic-key"
# Cohere
os.environ["COHERE_API_KEY"] = "your-cohere-key"
# Google
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/path/to/service-account.json"
os.environ["VERTEXAI_PROJECT"] = "your-project-id"
os.environ["VERTEXAI_LOCATION"] = "us-central1"
# AWS
os.environ["AWS_ACCESS_KEY_ID"] = "your-access-key"
os.environ["AWS_SECRET_ACCESS_KEY"] = "your-secret-key"
os.environ["AWS_REGION_NAME"] = "us-east-1"
# Azure
os.environ["AZURE_API_KEY"] = "your-azure-key"
os.environ["AZURE_API_BASE"] = "https://your-resource.openai.azure.com/"
os.environ["AZURE_API_VERSION"] = "2024-02-01"
# Hugging Face
os.environ["HUGGINGFACE_API_KEY"] = "your-hf-token"
# Now all providers are configured and ready to use
import litellm
# Use any provider without additional configuration
openai_response = litellm.completion(
model="gpt-4",
messages=[{"role": "user", "content": "Hello OpenAI!"}]
)
anthropic_response = litellm.completion(
model="claude-3-sonnet-20240229",
messages=[{"role": "user", "content": "Hello Anthropic!"}]
)
bedrock_response = litellm.completion(
model="bedrock/anthropic.claude-v2",
messages=[{"role": "user", "content": "Hello Bedrock!"}]
)from litellm import Router
# Configure router with multiple providers
model_list = [
# OpenAI deployment
{
"model_name": "gpt-4",
"litellm_params": {
"model": "gpt-4",
"api_key": os.environ["OPENAI_API_KEY"]
}
},
# Azure OpenAI deployment
{
"model_name": "gpt-4",
"litellm_params": {
"model": "azure/gpt-4-deployment",
"api_key": os.environ["AZURE_API_KEY"],
"api_base": os.environ["AZURE_API_BASE"],
"api_version": "2024-02-01"
}
},
# Anthropic deployment
{
"model_name": "claude-3",
"litellm_params": {
"model": "claude-3-sonnet-20240229",
"api_key": os.environ["ANTHROPIC_API_KEY"]
}
},
# Bedrock deployment
{
"model_name": "claude-bedrock",
"litellm_params": {
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
"aws_access_key_id": os.environ["AWS_ACCESS_KEY_ID"],
"aws_secret_access_key": os.environ["AWS_SECRET_ACCESS_KEY"],
"aws_region_name": "us-east-1"
}
}
]
router = Router(model_list=model_list)
# Router automatically handles provider-specific configurations
response = router.completion(
model="gpt-4",
messages=[{"role": "user", "content": "Hello!"}]
)Install with Tessl CLI
npx tessl i tessl/pypi-litellm