Library to easily interface with LLM API providers
—
Comprehensive utility functions for token counting, cost calculation, model information, capability detection, and validation. These tools enable advanced LLM management, optimization, and monitoring across all supported providers.
Accurate token counting for prompts, messages, and completions across different model tokenizers.
def token_counter(
model: str = "",
text: Union[str, List[str]] = "",
messages: Optional[List[Dict[str, str]]] = None,
count_response_tokens: Optional[bool] = False,
custom_tokenizer: Optional[Dict] = None
) -> int:
"""
Count tokens for text or messages using model-specific tokenizers.
Args:
model (str): Model identifier for tokenizer selection
text (Union[str, List[str]]): Text string or list of strings to count
messages (Optional[List[Dict]]): Chat messages in OpenAI format
count_response_tokens (Optional[bool]): Include estimated response tokens
custom_tokenizer (Optional[Dict]): Custom tokenizer configuration
Returns:
int: Total token count
Raises:
ValueError: Invalid input parameters or unsupported model
ImportError: Required tokenizer package not installed
Examples:
# Count tokens in text
tokens = token_counter(model="gpt-4", text="Hello, world!")
# Count tokens in messages
messages = [
{"role": "system", "content": "You are helpful."},
{"role": "user", "content": "Hello!"}
]
tokens = token_counter(model="gpt-4", messages=messages)
# Include response token estimation
tokens = token_counter(
model="gpt-4",
messages=messages,
count_response_tokens=True
)
"""
def encode(
model: str,
text: str,
custom_tokenizer: Optional[Dict] = None
) -> List[int]:
"""
Encode text to tokens using model-specific tokenizer.
Args:
model (str): Model identifier
text (str): Text to encode
custom_tokenizer (Optional[Dict]): Custom tokenizer configuration
Returns:
List[int]: List of token IDs
"""
def decode(
model: str,
tokens: List[int],
custom_tokenizer: Optional[Dict] = None
) -> str:
"""
Decode tokens back to text using model-specific tokenizer.
Args:
model (str): Model identifier
tokens (List[int]): Token IDs to decode
custom_tokenizer (Optional[Dict]): Custom tokenizer configuration
Returns:
str: Decoded text string
"""Calculate costs for completions, embeddings, and other API operations across all providers.
def completion_cost(
completion_response: Union[ModelResponse, EmbeddingResponse, ImageResponse, TranscriptionResponse],
model: Optional[str] = None,
prompt_tokens: Optional[int] = None,
completion_tokens: Optional[int] = None,
custom_cost_per_token: Optional[CostPerToken] = None
) -> float:
"""
Calculate cost for a completion response.
Args:
completion_response: Response object from LiteLLM API call
model (Optional[str]): Model identifier override
prompt_tokens (Optional[int]): Input token count override
completion_tokens (Optional[int]): Output token count override
custom_cost_per_token (Optional[CostPerToken]): Custom cost configuration
Returns:
float: Cost in USD
Raises:
ValueError: Missing required information for cost calculation
Examples:
# Calculate cost from response
response = litellm.completion(model="gpt-4", messages=messages)
cost = completion_cost(response)
# Calculate with custom token counts
cost = completion_cost(
response,
prompt_tokens=100,
completion_tokens=50
)
# Calculate with custom cost configuration
custom_costs = CostPerToken(
input_cost_per_token=0.00001,
output_cost_per_token=0.00003
)
cost = completion_cost(response, custom_cost_per_token=custom_costs)
"""
def cost_per_token(
model: str = "",
prompt_tokens: int = 0,
completion_tokens: int = 0,
custom_cost_per_token: Optional[CostPerToken] = None
) -> float:
"""
Calculate cost based on token counts and model pricing.
Args:
model (str): Model identifier
prompt_tokens (int): Input token count
completion_tokens (int): Output token count
custom_cost_per_token (Optional[CostPerToken]): Custom pricing
Returns:
float: Total cost in USD
Examples:
# Calculate cost for specific token counts
cost = cost_per_token(
model="gpt-4",
prompt_tokens=100,
completion_tokens=50
)
# Calculate with custom pricing
cost = cost_per_token(
model="custom-model",
prompt_tokens=100,
completion_tokens=50,
custom_cost_per_token=CostPerToken(
input_cost_per_token=0.00001,
output_cost_per_token=0.00002
)
)
"""
def get_max_budget() -> float:
"""
Get current maximum budget limit.
Returns:
float: Maximum budget in USD
"""
def set_max_budget(budget: float) -> None:
"""
Set maximum budget limit for API usage.
Args:
budget (float): Maximum budget in USD
"""Retrieve detailed information about models including capabilities, pricing, and technical specifications.
def get_model_info(
model: str,
custom_llm_provider: Optional[str] = None,
api_base: Optional[str] = None
) -> Dict[str, Any]:
"""
Get comprehensive information about a model.
Args:
model (str): Model identifier
custom_llm_provider (Optional[str]): Provider override
api_base (Optional[str]): Custom API base URL
Returns:
Dict[str, Any]: Model information including:
- max_tokens: Maximum context window
- max_input_tokens: Maximum input tokens
- max_output_tokens: Maximum output tokens
- input_cost_per_token: Input cost per token
- output_cost_per_token: Output cost per token
- litellm_provider: Provider name
- mode: Model mode (chat, completion, embedding)
- supports_function_calling: Function calling support
- supports_parallel_function_calling: Parallel function calling
- supports_vision: Vision capability support
- supports_tool_choice: Tool choice support
Examples:
# Get OpenAI model info
info = get_model_info("gpt-4")
print(f"Max tokens: {info['max_tokens']}")
print(f"Input cost: ${info['input_cost_per_token']}")
# Get provider-specific model info
info = get_model_info("claude-3-sonnet-20240229", "anthropic")
# Get custom model info
info = get_model_info("custom/model", api_base="https://api.example.com")
"""
def get_model_list() -> List[str]:
"""
Get list of all supported models across all providers.
Returns:
List[str]: List of model identifiers
"""
def get_supported_openai_params(
model: str,
custom_llm_provider: str
) -> List[str]:
"""
Get list of OpenAI parameters supported by a provider/model.
Args:
model (str): Model identifier
custom_llm_provider (str): Provider name
Returns:
List[str]: List of supported parameter names
Examples:
# Check what parameters Anthropic supports
params = get_supported_openai_params("claude-3-sonnet-20240229", "anthropic")
print("Supported params:", params)
# Check Cohere parameter support
params = get_supported_openai_params("command-nightly", "cohere")
"""
def get_llm_provider(
model: str,
custom_llm_provider: Optional[str] = None,
api_base: Optional[str] = None
) -> Tuple[str, str, str, Optional[str]]:
"""
Detect and return provider information for a model.
Args:
model (str): Model identifier
custom_llm_provider (Optional[str]): Provider override
api_base (Optional[str]): Custom API base
Returns:
Tuple[str, str, str, Optional[str]]: (model, custom_llm_provider, dynamic_api_key, api_base)
Examples:
# Auto-detect provider
model, provider, api_key, api_base = get_llm_provider("gpt-4")
print(f"Provider: {provider}")
# Check Azure OpenAI
model, provider, api_key, api_base = get_llm_provider("azure/gpt-4")
"""Check model capabilities and feature support across different providers.
def supports_function_calling(
model: str,
custom_llm_provider: Optional[str] = None
) -> bool:
"""
Check if model supports function calling.
Args:
model (str): Model identifier
custom_llm_provider (Optional[str]): Provider override
Returns:
bool: True if function calling is supported
"""
def supports_parallel_function_calling(
model: str,
custom_llm_provider: Optional[str] = None
) -> bool:
"""
Check if model supports parallel function calling.
Args:
model (str): Model identifier
custom_llm_provider (Optional[str]): Provider override
Returns:
bool: True if parallel function calling is supported
"""
def supports_vision(
model: str,
custom_llm_provider: Optional[str] = None
) -> bool:
"""
Check if model supports vision/image inputs.
Args:
model (str): Model identifier
custom_llm_provider (Optional[str]): Provider override
Returns:
bool: True if vision is supported
"""
def supports_response_schema(
model: str,
custom_llm_provider: Optional[str] = None
) -> bool:
"""
Check if model supports structured response schemas.
Args:
model (str): Model identifier
custom_llm_provider (Optional[str]): Provider override
Returns:
bool: True if response schema is supported
"""
def supports_system_messages(
model: str,
custom_llm_provider: Optional[str] = None
) -> bool:
"""
Check if model supports system messages.
Args:
model (str): Model identifier
custom_llm_provider (Optional[str]): Provider override
Returns:
bool: True if system messages are supported
"""
def supports_tool_choice(
model: str,
custom_llm_provider: Optional[str] = None
) -> bool:
"""
Check if model supports tool choice parameter.
Args:
model (str): Model identifier
custom_llm_provider (Optional[str]): Provider override
Returns:
bool: True if tool choice is supported
"""
def supports_audio_input(
model: str,
custom_llm_provider: Optional[str] = None
) -> bool:
"""
Check if model supports audio input.
Args:
model (str): Model identifier
custom_llm_provider (Optional[str]): Provider override
Returns:
bool: True if audio input is supported
"""
def supports_audio_output(
model: str,
custom_llm_provider: Optional[str] = None
) -> bool:
"""
Check if model supports audio output.
Args:
model (str): Model identifier
custom_llm_provider (Optional[str]): Provider override
Returns:
bool: True if audio output is supported
"""
def supports_reasoning(
model: str,
custom_llm_provider: Optional[str] = None
) -> bool:
"""
Check if model supports reasoning capabilities (like OpenAI o1).
Args:
model (str): Model identifier
custom_llm_provider (Optional[str]): Provider override
Returns:
bool: True if reasoning mode is supported
"""
def supports_prompt_caching(
model: str,
custom_llm_provider: Optional[str] = None
) -> bool:
"""
Check if model supports prompt caching.
Args:
model (str): Model identifier
custom_llm_provider (Optional[str]): Provider override
Returns:
bool: True if prompt caching is supported
"""
def supports_computer_use(
model: str,
custom_llm_provider: Optional[str] = None
) -> bool:
"""
Check if model supports computer use/interaction capabilities.
Args:
model (str): Model identifier
custom_llm_provider (Optional[str]): Provider override
Returns:
bool: True if computer use is supported
"""Validate API keys, environment setup, and provider configurations.
def validate_environment(
model: str,
api_key: Optional[str] = None
) -> Dict[str, str]:
"""
Validate environment configuration for a model.
Args:
model (str): Model to validate environment for
api_key (Optional[str]): API key to validate
Returns:
Dict[str, str]: Validation results with missing/invalid keys
Raises:
ValueError: Invalid model or missing required configuration
Examples:
# Validate OpenAI setup
validation = validate_environment("gpt-4")
if validation:
print("Missing configuration:", validation)
# Validate with specific API key
validation = validate_environment("gpt-4", "sk-test-key")
# Validate Azure setup
validation = validate_environment("azure/gpt-4")
"""
def check_valid_key(model: str, api_key: str) -> bool:
"""
Test if an API key is valid for a model.
Args:
model (str): Model identifier
api_key (str): API key to test
Returns:
bool: True if key is valid
Examples:
# Test OpenAI key
is_valid = check_valid_key("gpt-4", "sk-test-key")
# Test Anthropic key
is_valid = check_valid_key("claude-3-sonnet-20240229", "test-key")
"""
def get_optional_params(model: str) -> List[str]:
"""
Get list of optional parameters for a model.
Args:
model (str): Model identifier
Returns:
List[str]: List of optional parameter names
"""
def get_required_params(model: str) -> List[str]:
"""
Get list of required parameters for a model.
Args:
model (str): Model identifier
Returns:
List[str]: List of required parameter names
"""Utilities for processing multiple requests efficiently.
def batch_completion(
requests: List[Dict[str, Any]],
max_workers: int = 5,
timeout: float = 60.0
) -> List[Union[ModelResponse, Exception]]:
"""
Process multiple completion requests in parallel.
Args:
requests (List[Dict]): List of completion request parameters
max_workers (int): Maximum concurrent workers
timeout (float): Timeout per request
Returns:
List[Union[ModelResponse, Exception]]: Results or exceptions for each request
Examples:
requests = [
{"model": "gpt-4", "messages": [{"role": "user", "content": "Hello 1"}]},
{"model": "gpt-4", "messages": [{"role": "user", "content": "Hello 2"}]},
{"model": "gpt-4", "messages": [{"role": "user", "content": "Hello 3"}]}
]
results = batch_completion(requests, max_workers=3)
for i, result in enumerate(results):
if isinstance(result, Exception):
print(f"Request {i} failed: {result}")
else:
print(f"Request {i}: {result.choices[0].message.content}")
"""
async def abatch_completion(
requests: List[Dict[str, Any]],
max_concurrent: int = 5
) -> List[Union[ModelResponse, Exception]]:
"""
Async version of batch completion processing.
Args:
requests (List[Dict]): List of completion request parameters
max_concurrent (int): Maximum concurrent requests
Returns:
List[Union[ModelResponse, Exception]]: Results or exceptions for each request
"""class CostPerToken:
"""Cost configuration for custom models"""
input_cost_per_token: float
output_cost_per_token: float
litellm_provider: Optional[str] = None
mode: Optional[Literal["chat", "completion", "embedding"]] = None
class TokenizerConfig:
"""Custom tokenizer configuration"""
tokenizer_name: str
tokenizer_params: Dict[str, Any]
encoding_name: Optional[str] = None
class ModelCapabilities:
"""Model capability flags"""
supports_function_calling: bool = False
supports_parallel_function_calling: bool = False
supports_vision: bool = False
supports_response_schema: bool = False
supports_system_messages: bool = False
supports_tool_choice: bool = False
supports_audio_input: bool = False
supports_audio_output: bool = False
supports_reasoning: bool = False
supports_prompt_caching: bool = False
supports_computer_use: bool = False
max_tokens: Optional[int] = None
max_input_tokens: Optional[int] = None
max_output_tokens: Optional[int] = Noneimport litellm
# Count tokens for different input types
text_tokens = litellm.token_counter(model="gpt-4", text="Hello, world!")
print(f"Text tokens: {text_tokens}")
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "What is machine learning?"}
]
message_tokens = litellm.token_counter(model="gpt-4", messages=messages)
print(f"Message tokens: {message_tokens}")
# Estimate total cost before making request
prompt_tokens = litellm.token_counter(model="gpt-4", messages=messages)
estimated_response_tokens = 200 # Estimate
estimated_cost = litellm.cost_per_token(
model="gpt-4",
prompt_tokens=prompt_tokens,
completion_tokens=estimated_response_tokens
)
print(f"Estimated cost: ${estimated_cost:.6f}")
# Make request and calculate actual cost
response = litellm.completion(model="gpt-4", messages=messages)
actual_cost = litellm.completion_cost(response)
print(f"Actual cost: ${actual_cost:.6f}")import litellm
def check_model_capabilities(model: str):
"""Check and display all capabilities for a model."""
capabilities = {
"Function Calling": litellm.supports_function_calling(model),
"Parallel Function Calling": litellm.supports_parallel_function_calling(model),
"Vision": litellm.supports_vision(model),
"Response Schema": litellm.supports_response_schema(model),
"System Messages": litellm.supports_system_messages(model),
"Tool Choice": litellm.supports_tool_choice(model),
"Audio Input": litellm.supports_audio_input(model),
"Audio Output": litellm.supports_audio_output(model),
"Reasoning": litellm.supports_reasoning(model),
"Prompt Caching": litellm.supports_prompt_caching(model),
"Computer Use": litellm.supports_computer_use(model)
}
print(f"Capabilities for {model}:")
for capability, supported in capabilities.items():
status = "✓" if supported else "✗"
print(f" {status} {capability}")
# Get detailed model info
model_info = litellm.get_model_info(model)
print(f"\nModel Info:")
print(f" Max tokens: {model_info.get('max_tokens', 'Unknown')}")
print(f" Provider: {model_info.get('litellm_provider', 'Unknown')}")
print(f" Input cost: ${model_info.get('input_cost_per_token', 0)}")
print(f" Output cost: ${model_info.get('output_cost_per_token', 0)}")
# Check capabilities for different models
models_to_check = [
"gpt-4",
"gpt-4-vision-preview",
"claude-3-sonnet-20240229",
"gemini-pro"
]
for model in models_to_check:
check_model_capabilities(model)
print("-" * 50)import litellm
import os
def setup_and_validate_providers():
"""Setup and validate multiple provider configurations."""
providers_to_check = [
("gpt-4", "OpenAI"),
("claude-3-sonnet-20240229", "Anthropic"),
("command-nightly", "Cohere"),
("gemini-pro", "Google"),
("bedrock/anthropic.claude-v2", "AWS Bedrock"),
("azure/gpt-4", "Azure OpenAI")
]
for model, provider_name in providers_to_check:
print(f"\nValidating {provider_name} ({model}):")
try:
# Validate environment
validation_result = litellm.validate_environment(model)
if not validation_result:
print(" ✓ Environment is properly configured")
# Test with a simple request if environment is valid
try:
response = litellm.completion(
model=model,
messages=[{"role": "user", "content": "Hello"}],
max_tokens=5
)
print(" ✓ API call successful")
# Calculate cost
cost = litellm.completion_cost(response)
print(f" ✓ Request cost: ${cost:.6f}")
except Exception as e:
print(f" ✗ API call failed: {e}")
else:
print(" ✗ Missing configuration:")
for key, message in validation_result.items():
print(f" - {key}: {message}")
except Exception as e:
print(f" ✗ Validation failed: {e}")
# Run validation
setup_and_validate_providers()
# Set up missing environment variables
def setup_missing_env_vars():
"""Interactively setup missing environment variables."""
env_vars = {
"OPENAI_API_KEY": "OpenAI API key",
"ANTHROPIC_API_KEY": "Anthropic API key",
"COHERE_API_KEY": "Cohere API key",
"GOOGLE_APPLICATION_CREDENTIALS": "Google credentials file path",
"AWS_ACCESS_KEY_ID": "AWS access key",
"AZURE_API_KEY": "Azure OpenAI API key"
}
for var_name, description in env_vars.items():
if not os.environ.get(var_name):
value = input(f"Enter {description} (or press Enter to skip): ").strip()
if value:
os.environ[var_name] = value
print(f"Set {var_name}")
# Uncomment to run interactive setup
# setup_missing_env_vars()import litellm
import asyncio
def process_batch_sync():
"""Process multiple requests synchronously with batch utility."""
requests = [
{
"model": "gpt-3.5-turbo",
"messages": [{"role": "user", "content": f"Count to {i}"}],
"max_tokens": 50
}
for i in range(1, 6)
]
print("Processing batch synchronously...")
results = litellm.batch_completion(requests, max_workers=3)
for i, result in enumerate(results):
if isinstance(result, Exception):
print(f"Request {i+1} failed: {result}")
else:
content = result.choices[0].message.content
cost = litellm.completion_cost(result)
print(f"Request {i+1}: {content} (${cost:.6f})")
async def process_batch_async():
"""Process multiple requests asynchronously."""
requests = [
{
"model": "gpt-3.5-turbo",
"messages": [{"role": "user", "content": f"What is {i} + {i}?"}],
"max_tokens": 20
}
for i in range(1, 11)
]
print("Processing batch asynchronously...")
results = await litellm.abatch_completion(requests, max_concurrent=5)
total_cost = 0
for i, result in enumerate(results):
if isinstance(result, Exception):
print(f"Request {i+1} failed: {result}")
else:
content = result.choices[0].message.content.strip()
cost = litellm.completion_cost(result)
total_cost += cost
print(f"Request {i+1}: {content} (${cost:.6f})")
print(f"Total batch cost: ${total_cost:.6f}")
# Run batch processing examples
process_batch_sync()
asyncio.run(process_batch_async())import litellm
from typing import List, Dict, Any
class CostTracker:
"""Advanced cost tracking and budget management."""
def __init__(self, daily_budget: float = 10.0):
self.daily_budget = daily_budget
self.current_cost = 0.0
self.requests = []
def estimate_request_cost(self, model: str, messages: List[Dict], max_tokens: int = 256) -> float:
"""Estimate cost before making request."""
prompt_tokens = litellm.token_counter(model=model, messages=messages)
estimated_cost = litellm.cost_per_token(
model=model,
prompt_tokens=prompt_tokens,
completion_tokens=max_tokens
)
return estimated_cost
def can_afford_request(self, estimated_cost: float) -> bool:
"""Check if request fits within budget."""
return (self.current_cost + estimated_cost) <= self.daily_budget
def track_request(self, model: str, response: Any, estimated_cost: float):
"""Track completed request cost."""
actual_cost = litellm.completion_cost(response)
self.current_cost += actual_cost
self.requests.append({
"model": model,
"estimated_cost": estimated_cost,
"actual_cost": actual_cost,
"tokens_used": response.usage.total_tokens if hasattr(response, 'usage') else 0
})
print(f"Request: ${actual_cost:.6f} (est: ${estimated_cost:.6f})")
print(f"Budget: ${self.current_cost:.2f}/${self.daily_budget:.2f}")
def safe_completion(self, **kwargs):
"""Make completion with budget checking."""
model = kwargs.get("model")
messages = kwargs.get("messages")
max_tokens = kwargs.get("max_tokens", 256)
# Estimate cost
estimated_cost = self.estimate_request_cost(model, messages, max_tokens)
if not self.can_afford_request(estimated_cost):
raise litellm.BudgetExceededError(
f"Request would exceed budget: ${estimated_cost:.6f} "
f"(remaining: ${self.daily_budget - self.current_cost:.6f})"
)
# Make request
response = litellm.completion(**kwargs)
# Track cost
self.track_request(model, response, estimated_cost)
return response
def get_stats(self) -> Dict[str, Any]:
"""Get cost tracking statistics."""
if not self.requests:
return {"total_requests": 0, "total_cost": 0}
total_requests = len(self.requests)
total_tokens = sum(r["tokens_used"] for r in self.requests)
avg_cost_per_request = self.current_cost / total_requests
model_usage = {}
for request in self.requests:
model = request["model"]
if model not in model_usage:
model_usage[model] = {"requests": 0, "cost": 0, "tokens": 0}
model_usage[model]["requests"] += 1
model_usage[model]["cost"] += request["actual_cost"]
model_usage[model]["tokens"] += request["tokens_used"]
return {
"total_requests": total_requests,
"total_cost": self.current_cost,
"total_tokens": total_tokens,
"avg_cost_per_request": avg_cost_per_request,
"budget_used": (self.current_cost / self.daily_budget) * 100,
"model_usage": model_usage
}
# Usage example
tracker = CostTracker(daily_budget=5.00)
try:
# Make tracked requests
response1 = tracker.safe_completion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "What is AI?"}],
max_tokens=100
)
response2 = tracker.safe_completion(
model="gpt-4",
messages=[{"role": "user", "content": "Explain quantum computing"}],
max_tokens=200
)
# Get statistics
stats = tracker.get_stats()
print("\nCost Tracking Statistics:")
print(f"Total requests: {stats['total_requests']}")
print(f"Total cost: ${stats['total_cost']:.6f}")
print(f"Budget used: {stats['budget_used']:.1f}%")
print(f"Average cost per request: ${stats['avg_cost_per_request']:.6f}")
print("\nModel usage breakdown:")
for model, usage in stats['model_usage'].items():
print(f" {model}: {usage['requests']} requests, "
f"${usage['cost']:.6f}, {usage['tokens']} tokens")
except litellm.BudgetExceededError as e:
print(f"Budget exceeded: {e}")Install with Tessl CLI
npx tessl i tessl/pypi-litellm