tessl/pypi-litellm

Library to easily interface with LLM API providers

—

Pending

Overview

Eval results

Files

Utilities & Helpers

Name: tessl/pypi-litellm
Author: tessl

Comprehensive utility functions for token counting, cost calculation, model information, capability detection, and validation. These tools enable advanced LLM management, optimization, and monitoring across all supported providers.

Capabilities

Token Counting

Accurate token counting for prompts, messages, and completions across different model tokenizers.

def token_counter(
    model: str = "",
    text: Union[str, List[str]] = "",
    messages: Optional[List[Dict[str, str]]] = None,
    count_response_tokens: Optional[bool] = False,
    custom_tokenizer: Optional[Dict] = None
) -> int:
    """
    Count tokens for text or messages using model-specific tokenizers.

    Args:
        model (str): Model identifier for tokenizer selection
        text (Union[str, List[str]]): Text string or list of strings to count
        messages (Optional[List[Dict]]): Chat messages in OpenAI format
        count_response_tokens (Optional[bool]): Include estimated response tokens
        custom_tokenizer (Optional[Dict]): Custom tokenizer configuration

    Returns:
        int: Total token count

    Raises:
        ValueError: Invalid input parameters or unsupported model
        ImportError: Required tokenizer package not installed
    
    Examples:
        # Count tokens in text
        tokens = token_counter(model="gpt-4", text="Hello, world!")
        
        # Count tokens in messages
        messages = [
            {"role": "system", "content": "You are helpful."},
            {"role": "user", "content": "Hello!"}
        ]
        tokens = token_counter(model="gpt-4", messages=messages)
        
        # Include response token estimation
        tokens = token_counter(
            model="gpt-4", 
            messages=messages, 
            count_response_tokens=True
        )
    """

def encode(
    model: str,
    text: str,
    custom_tokenizer: Optional[Dict] = None
) -> List[int]:
    """
    Encode text to tokens using model-specific tokenizer.

    Args:
        model (str): Model identifier
        text (str): Text to encode
        custom_tokenizer (Optional[Dict]): Custom tokenizer configuration

    Returns:
        List[int]: List of token IDs
    """

def decode(
    model: str,
    tokens: List[int],
    custom_tokenizer: Optional[Dict] = None
) -> str:
    """
    Decode tokens back to text using model-specific tokenizer.

    Args:
        model (str): Model identifier
        tokens (List[int]): Token IDs to decode
        custom_tokenizer (Optional[Dict]): Custom tokenizer configuration

    Returns:
        str: Decoded text string
    """

Cost Calculation

Calculate costs for completions, embeddings, and other API operations across all providers.

def completion_cost(
    completion_response: Union[ModelResponse, EmbeddingResponse, ImageResponse, TranscriptionResponse],
    model: Optional[str] = None,
    prompt_tokens: Optional[int] = None,
    completion_tokens: Optional[int] = None,
    custom_cost_per_token: Optional[CostPerToken] = None
) -> float:
    """
    Calculate cost for a completion response.

    Args:
        completion_response: Response object from LiteLLM API call
        model (Optional[str]): Model identifier override
        prompt_tokens (Optional[int]): Input token count override
        completion_tokens (Optional[int]): Output token count override
        custom_cost_per_token (Optional[CostPerToken]): Custom cost configuration

    Returns:
        float: Cost in USD

    Raises:
        ValueError: Missing required information for cost calculation
    
    Examples:
        # Calculate cost from response
        response = litellm.completion(model="gpt-4", messages=messages)
        cost = completion_cost(response)
        
        # Calculate with custom token counts
        cost = completion_cost(
            response, 
            prompt_tokens=100, 
            completion_tokens=50
        )
        
        # Calculate with custom cost configuration
        custom_costs = CostPerToken(
            input_cost_per_token=0.00001,
            output_cost_per_token=0.00003
        )
        cost = completion_cost(response, custom_cost_per_token=custom_costs)
    """

def cost_per_token(
    model: str = "",
    prompt_tokens: int = 0,
    completion_tokens: int = 0,
    custom_cost_per_token: Optional[CostPerToken] = None
) -> float:
    """
    Calculate cost based on token counts and model pricing.

    Args:
        model (str): Model identifier
        prompt_tokens (int): Input token count
        completion_tokens (int): Output token count
        custom_cost_per_token (Optional[CostPerToken]): Custom pricing

    Returns:
        float: Total cost in USD
    
    Examples:
        # Calculate cost for specific token counts
        cost = cost_per_token(
            model="gpt-4",
            prompt_tokens=100,
            completion_tokens=50
        )
        
        # Calculate with custom pricing
        cost = cost_per_token(
            model="custom-model",
            prompt_tokens=100,
            completion_tokens=50,
            custom_cost_per_token=CostPerToken(
                input_cost_per_token=0.00001,
                output_cost_per_token=0.00002
            )
        )
    """

def get_max_budget() -> float:
    """
    Get current maximum budget limit.

    Returns:
        float: Maximum budget in USD
    """

def set_max_budget(budget: float) -> None:
    """
    Set maximum budget limit for API usage.

    Args:
        budget (float): Maximum budget in USD
    """

Model Information

Retrieve detailed information about models including capabilities, pricing, and technical specifications.

def get_model_info(
    model: str,
    custom_llm_provider: Optional[str] = None,
    api_base: Optional[str] = None
) -> Dict[str, Any]:
    """
    Get comprehensive information about a model.

    Args:
        model (str): Model identifier
        custom_llm_provider (Optional[str]): Provider override
        api_base (Optional[str]): Custom API base URL

    Returns:
        Dict[str, Any]: Model information including:
            - max_tokens: Maximum context window
            - max_input_tokens: Maximum input tokens
            - max_output_tokens: Maximum output tokens
            - input_cost_per_token: Input cost per token
            - output_cost_per_token: Output cost per token
            - litellm_provider: Provider name
            - mode: Model mode (chat, completion, embedding)
            - supports_function_calling: Function calling support
            - supports_parallel_function_calling: Parallel function calling
            - supports_vision: Vision capability support
            - supports_tool_choice: Tool choice support
    
    Examples:
        # Get OpenAI model info
        info = get_model_info("gpt-4")
        print(f"Max tokens: {info['max_tokens']}")
        print(f"Input cost: ${info['input_cost_per_token']}")
        
        # Get provider-specific model info
        info = get_model_info("claude-3-sonnet-20240229", "anthropic")
        
        # Get custom model info
        info = get_model_info("custom/model", api_base="https://api.example.com")
    """

def get_model_list() -> List[str]:
    """
    Get list of all supported models across all providers.

    Returns:
        List[str]: List of model identifiers
    """

def get_supported_openai_params(
    model: str,
    custom_llm_provider: str
) -> List[str]:
    """
    Get list of OpenAI parameters supported by a provider/model.

    Args:
        model (str): Model identifier
        custom_llm_provider (str): Provider name

    Returns:
        List[str]: List of supported parameter names
    
    Examples:
        # Check what parameters Anthropic supports
        params = get_supported_openai_params("claude-3-sonnet-20240229", "anthropic")
        print("Supported params:", params)
        
        # Check Cohere parameter support
        params = get_supported_openai_params("command-nightly", "cohere")
    """

def get_llm_provider(
    model: str,
    custom_llm_provider: Optional[str] = None,
    api_base: Optional[str] = None
) -> Tuple[str, str, str, Optional[str]]:
    """
    Detect and return provider information for a model.

    Args:
        model (str): Model identifier
        custom_llm_provider (Optional[str]): Provider override
        api_base (Optional[str]): Custom API base

    Returns:
        Tuple[str, str, str, Optional[str]]: (model, custom_llm_provider, dynamic_api_key, api_base)
    
    Examples:
        # Auto-detect provider
        model, provider, api_key, api_base = get_llm_provider("gpt-4")
        print(f"Provider: {provider}")
        
        # Check Azure OpenAI
        model, provider, api_key, api_base = get_llm_provider("azure/gpt-4")
    """

Capability Detection

Check model capabilities and feature support across different providers.

def supports_function_calling(
    model: str,
    custom_llm_provider: Optional[str] = None
) -> bool:
    """
    Check if model supports function calling.

    Args:
        model (str): Model identifier
        custom_llm_provider (Optional[str]): Provider override

    Returns:
        bool: True if function calling is supported
    """

def supports_parallel_function_calling(
    model: str,
    custom_llm_provider: Optional[str] = None
) -> bool:
    """
    Check if model supports parallel function calling.

    Args:
        model (str): Model identifier
        custom_llm_provider (Optional[str]): Provider override

    Returns:
        bool: True if parallel function calling is supported
    """

def supports_vision(
    model: str,
    custom_llm_provider: Optional[str] = None
) -> bool:
    """
    Check if model supports vision/image inputs.

    Args:
        model (str): Model identifier
        custom_llm_provider (Optional[str]): Provider override

    Returns:
        bool: True if vision is supported
    """

def supports_response_schema(
    model: str,
    custom_llm_provider: Optional[str] = None
) -> bool:
    """
    Check if model supports structured response schemas.

    Args:
        model (str): Model identifier
        custom_llm_provider (Optional[str]): Provider override

    Returns:
        bool: True if response schema is supported
    """

def supports_system_messages(
    model: str,
    custom_llm_provider: Optional[str] = None
) -> bool:
    """
    Check if model supports system messages.

    Args:
        model (str): Model identifier
        custom_llm_provider (Optional[str]): Provider override

    Returns:
        bool: True if system messages are supported
    """

def supports_tool_choice(
    model: str,
    custom_llm_provider: Optional[str] = None
) -> bool:
    """
    Check if model supports tool choice parameter.

    Args:
        model (str): Model identifier
        custom_llm_provider (Optional[str]): Provider override

    Returns:
        bool: True if tool choice is supported
    """

def supports_audio_input(
    model: str,
    custom_llm_provider: Optional[str] = None
) -> bool:
    """
    Check if model supports audio input.

    Args:
        model (str): Model identifier
        custom_llm_provider (Optional[str]): Provider override

    Returns:
        bool: True if audio input is supported
    """

def supports_audio_output(
    model: str,
    custom_llm_provider: Optional[str] = None
) -> bool:
    """
    Check if model supports audio output.

    Args:
        model (str): Model identifier
        custom_llm_provider (Optional[str]): Provider override

    Returns:
        bool: True if audio output is supported
    """

def supports_reasoning(
    model: str,
    custom_llm_provider: Optional[str] = None
) -> bool:
    """
    Check if model supports reasoning capabilities (like OpenAI o1).

    Args:
        model (str): Model identifier
        custom_llm_provider (Optional[str]): Provider override

    Returns:
        bool: True if reasoning mode is supported
    """

def supports_prompt_caching(
    model: str,
    custom_llm_provider: Optional[str] = None
) -> bool:
    """
    Check if model supports prompt caching.

    Args:
        model (str): Model identifier
        custom_llm_provider (Optional[str]): Provider override

    Returns:
        bool: True if prompt caching is supported
    """

def supports_computer_use(
    model: str,
    custom_llm_provider: Optional[str] = None
) -> bool:
    """
    Check if model supports computer use/interaction capabilities.

    Args:
        model (str): Model identifier
        custom_llm_provider (Optional[str]): Provider override

    Returns:
        bool: True if computer use is supported
    """

Environment Validation

Validate API keys, environment setup, and provider configurations.

def validate_environment(
    model: str,
    api_key: Optional[str] = None
) -> Dict[str, str]:
    """
    Validate environment configuration for a model.

    Args:
        model (str): Model to validate environment for
        api_key (Optional[str]): API key to validate

    Returns:
        Dict[str, str]: Validation results with missing/invalid keys
    
    Raises:
        ValueError: Invalid model or missing required configuration
    
    Examples:
        # Validate OpenAI setup
        validation = validate_environment("gpt-4")
        if validation:
            print("Missing configuration:", validation)
        
        # Validate with specific API key
        validation = validate_environment("gpt-4", "sk-test-key")
        
        # Validate Azure setup
        validation = validate_environment("azure/gpt-4")
    """

def check_valid_key(model: str, api_key: str) -> bool:
    """
    Test if an API key is valid for a model.

    Args:
        model (str): Model identifier
        api_key (str): API key to test

    Returns:
        bool: True if key is valid
    
    Examples:
        # Test OpenAI key
        is_valid = check_valid_key("gpt-4", "sk-test-key")
        
        # Test Anthropic key
        is_valid = check_valid_key("claude-3-sonnet-20240229", "test-key")
    """

def get_optional_params(model: str) -> List[str]:
    """
    Get list of optional parameters for a model.

    Args:
        model (str): Model identifier

    Returns:
        List[str]: List of optional parameter names
    """

def get_required_params(model: str) -> List[str]:
    """
    Get list of required parameters for a model.

    Args:
        model (str): Model identifier

    Returns:
        List[str]: List of required parameter names
    """

Batch Processing Utilities

Utilities for processing multiple requests efficiently.

def batch_completion(
    requests: List[Dict[str, Any]],
    max_workers: int = 5,
    timeout: float = 60.0
) -> List[Union[ModelResponse, Exception]]:
    """
    Process multiple completion requests in parallel.

    Args:
        requests (List[Dict]): List of completion request parameters
        max_workers (int): Maximum concurrent workers
        timeout (float): Timeout per request

    Returns:
        List[Union[ModelResponse, Exception]]: Results or exceptions for each request
    
    Examples:
        requests = [
            {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello 1"}]},
            {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello 2"}]},
            {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello 3"}]}
        ]
        
        results = batch_completion(requests, max_workers=3)
        
        for i, result in enumerate(results):
            if isinstance(result, Exception):
                print(f"Request {i} failed: {result}")
            else:
                print(f"Request {i}: {result.choices[0].message.content}")
    """

async def abatch_completion(
    requests: List[Dict[str, Any]],
    max_concurrent: int = 5
) -> List[Union[ModelResponse, Exception]]:
    """
    Async version of batch completion processing.

    Args:
        requests (List[Dict]): List of completion request parameters
        max_concurrent (int): Maximum concurrent requests

    Returns:
        List[Union[ModelResponse, Exception]]: Results or exceptions for each request
    """

Type Definitions

class CostPerToken:
    """Cost configuration for custom models"""
    input_cost_per_token: float
    output_cost_per_token: float
    litellm_provider: Optional[str] = None
    mode: Optional[Literal["chat", "completion", "embedding"]] = None

class TokenizerConfig:
    """Custom tokenizer configuration"""
    tokenizer_name: str
    tokenizer_params: Dict[str, Any]
    encoding_name: Optional[str] = None

class ModelCapabilities:
    """Model capability flags"""
    supports_function_calling: bool = False
    supports_parallel_function_calling: bool = False
    supports_vision: bool = False
    supports_response_schema: bool = False
    supports_system_messages: bool = False
    supports_tool_choice: bool = False
    supports_audio_input: bool = False
    supports_audio_output: bool = False
    supports_reasoning: bool = False
    supports_prompt_caching: bool = False
    supports_computer_use: bool = False
    max_tokens: Optional[int] = None
    max_input_tokens: Optional[int] = None
    max_output_tokens: Optional[int] = None

Usage Examples

Token Counting and Cost Estimation

import litellm

# Count tokens for different input types
text_tokens = litellm.token_counter(model="gpt-4", text="Hello, world!")
print(f"Text tokens: {text_tokens}")

messages = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "What is machine learning?"}
]
message_tokens = litellm.token_counter(model="gpt-4", messages=messages)
print(f"Message tokens: {message_tokens}")

# Estimate total cost before making request
prompt_tokens = litellm.token_counter(model="gpt-4", messages=messages)
estimated_response_tokens = 200  # Estimate
estimated_cost = litellm.cost_per_token(
    model="gpt-4",
    prompt_tokens=prompt_tokens,
    completion_tokens=estimated_response_tokens
)
print(f"Estimated cost: ${estimated_cost:.6f}")

# Make request and calculate actual cost
response = litellm.completion(model="gpt-4", messages=messages)
actual_cost = litellm.completion_cost(response)
print(f"Actual cost: ${actual_cost:.6f}")

Model Capability Detection

import litellm

def check_model_capabilities(model: str):
    """Check and display all capabilities for a model."""
    
    capabilities = {
        "Function Calling": litellm.supports_function_calling(model),
        "Parallel Function Calling": litellm.supports_parallel_function_calling(model),
        "Vision": litellm.supports_vision(model),
        "Response Schema": litellm.supports_response_schema(model),
        "System Messages": litellm.supports_system_messages(model),
        "Tool Choice": litellm.supports_tool_choice(model),
        "Audio Input": litellm.supports_audio_input(model),
        "Audio Output": litellm.supports_audio_output(model),
        "Reasoning": litellm.supports_reasoning(model),
        "Prompt Caching": litellm.supports_prompt_caching(model),
        "Computer Use": litellm.supports_computer_use(model)
    }
    
    print(f"Capabilities for {model}:")
    for capability, supported in capabilities.items():
        status = "✓" if supported else "✗"
        print(f"  {status} {capability}")
    
    # Get detailed model info
    model_info = litellm.get_model_info(model)
    print(f"\nModel Info:")
    print(f"  Max tokens: {model_info.get('max_tokens', 'Unknown')}")
    print(f"  Provider: {model_info.get('litellm_provider', 'Unknown')}")
    print(f"  Input cost: ${model_info.get('input_cost_per_token', 0)}")
    print(f"  Output cost: ${model_info.get('output_cost_per_token', 0)}")

# Check capabilities for different models
models_to_check = [
    "gpt-4",
    "gpt-4-vision-preview", 
    "claude-3-sonnet-20240229",
    "gemini-pro"
]

for model in models_to_check:
    check_model_capabilities(model)
    print("-" * 50)

Environment Validation and Setup

import litellm
import os

def setup_and_validate_providers():
    """Setup and validate multiple provider configurations."""
    
    providers_to_check = [
        ("gpt-4", "OpenAI"),
        ("claude-3-sonnet-20240229", "Anthropic"),
        ("command-nightly", "Cohere"),
        ("gemini-pro", "Google"),
        ("bedrock/anthropic.claude-v2", "AWS Bedrock"),
        ("azure/gpt-4", "Azure OpenAI")
    ]
    
    for model, provider_name in providers_to_check:
        print(f"\nValidating {provider_name} ({model}):")
        
        try:
            # Validate environment
            validation_result = litellm.validate_environment(model)
            
            if not validation_result:
                print("  ✓ Environment is properly configured")
                
                # Test with a simple request if environment is valid
                try:
                    response = litellm.completion(
                        model=model,
                        messages=[{"role": "user", "content": "Hello"}],
                        max_tokens=5
                    )
                    print("  ✓ API call successful")
                    
                    # Calculate cost
                    cost = litellm.completion_cost(response)
                    print(f"  ✓ Request cost: ${cost:.6f}")
                    
                except Exception as e:
                    print(f"  ✗ API call failed: {e}")
            else:
                print("  ✗ Missing configuration:")
                for key, message in validation_result.items():
                    print(f"    - {key}: {message}")
                    
        except Exception as e:
            print(f"  ✗ Validation failed: {e}")

# Run validation
setup_and_validate_providers()

# Set up missing environment variables
def setup_missing_env_vars():
    """Interactively setup missing environment variables."""
    
    env_vars = {
        "OPENAI_API_KEY": "OpenAI API key",
        "ANTHROPIC_API_KEY": "Anthropic API key", 
        "COHERE_API_KEY": "Cohere API key",
        "GOOGLE_APPLICATION_CREDENTIALS": "Google credentials file path",
        "AWS_ACCESS_KEY_ID": "AWS access key",
        "AZURE_API_KEY": "Azure OpenAI API key"
    }
    
    for var_name, description in env_vars.items():
        if not os.environ.get(var_name):
            value = input(f"Enter {description} (or press Enter to skip): ").strip()
            if value:
                os.environ[var_name] = value
                print(f"Set {var_name}")

# Uncomment to run interactive setup
# setup_missing_env_vars()

Batch Processing

import litellm
import asyncio

def process_batch_sync():
    """Process multiple requests synchronously with batch utility."""
    
    requests = [
        {
            "model": "gpt-3.5-turbo",
            "messages": [{"role": "user", "content": f"Count to {i}"}],
            "max_tokens": 50
        }
        for i in range(1, 6)
    ]
    
    print("Processing batch synchronously...")
    results = litellm.batch_completion(requests, max_workers=3)
    
    for i, result in enumerate(results):
        if isinstance(result, Exception):
            print(f"Request {i+1} failed: {result}")
        else:
            content = result.choices[0].message.content
            cost = litellm.completion_cost(result)
            print(f"Request {i+1}: {content} (${cost:.6f})")

async def process_batch_async():
    """Process multiple requests asynchronously."""
    
    requests = [
        {
            "model": "gpt-3.5-turbo",
            "messages": [{"role": "user", "content": f"What is {i} + {i}?"}],
            "max_tokens": 20
        }
        for i in range(1, 11)
    ]
    
    print("Processing batch asynchronously...")
    results = await litellm.abatch_completion(requests, max_concurrent=5)
    
    total_cost = 0
    for i, result in enumerate(results):
        if isinstance(result, Exception):
            print(f"Request {i+1} failed: {result}")
        else:
            content = result.choices[0].message.content.strip()
            cost = litellm.completion_cost(result)
            total_cost += cost
            print(f"Request {i+1}: {content} (${cost:.6f})")
    
    print(f"Total batch cost: ${total_cost:.6f}")

# Run batch processing examples
process_batch_sync()
asyncio.run(process_batch_async())

Advanced Cost Management

import litellm
from typing import List, Dict, Any

class CostTracker:
    """Advanced cost tracking and budget management."""
    
    def __init__(self, daily_budget: float = 10.0):
        self.daily_budget = daily_budget
        self.current_cost = 0.0
        self.requests = []
    
    def estimate_request_cost(self, model: str, messages: List[Dict], max_tokens: int = 256) -> float:
        """Estimate cost before making request."""
        prompt_tokens = litellm.token_counter(model=model, messages=messages)
        estimated_cost = litellm.cost_per_token(
            model=model,
            prompt_tokens=prompt_tokens,
            completion_tokens=max_tokens
        )
        return estimated_cost
    
    def can_afford_request(self, estimated_cost: float) -> bool:
        """Check if request fits within budget."""
        return (self.current_cost + estimated_cost) <= self.daily_budget
    
    def track_request(self, model: str, response: Any, estimated_cost: float):
        """Track completed request cost."""
        actual_cost = litellm.completion_cost(response)
        self.current_cost += actual_cost
        
        self.requests.append({
            "model": model,
            "estimated_cost": estimated_cost,
            "actual_cost": actual_cost,
            "tokens_used": response.usage.total_tokens if hasattr(response, 'usage') else 0
        })
        
        print(f"Request: ${actual_cost:.6f} (est: ${estimated_cost:.6f})")
        print(f"Budget: ${self.current_cost:.2f}/${self.daily_budget:.2f}")
    
    def safe_completion(self, **kwargs):
        """Make completion with budget checking."""
        model = kwargs.get("model")
        messages = kwargs.get("messages")
        max_tokens = kwargs.get("max_tokens", 256)
        
        # Estimate cost
        estimated_cost = self.estimate_request_cost(model, messages, max_tokens)
        
        if not self.can_afford_request(estimated_cost):
            raise litellm.BudgetExceededError(
                f"Request would exceed budget: ${estimated_cost:.6f} "
                f"(remaining: ${self.daily_budget - self.current_cost:.6f})"
            )
        
        # Make request
        response = litellm.completion(**kwargs)
        
        # Track cost
        self.track_request(model, response, estimated_cost)
        
        return response
    
    def get_stats(self) -> Dict[str, Any]:
        """Get cost tracking statistics."""
        if not self.requests:
            return {"total_requests": 0, "total_cost": 0}
        
        total_requests = len(self.requests)
        total_tokens = sum(r["tokens_used"] for r in self.requests)
        avg_cost_per_request = self.current_cost / total_requests
        
        model_usage = {}
        for request in self.requests:
            model = request["model"]
            if model not in model_usage:
                model_usage[model] = {"requests": 0, "cost": 0, "tokens": 0}
            model_usage[model]["requests"] += 1
            model_usage[model]["cost"] += request["actual_cost"]
            model_usage[model]["tokens"] += request["tokens_used"]
        
        return {
            "total_requests": total_requests,
            "total_cost": self.current_cost,
            "total_tokens": total_tokens,
            "avg_cost_per_request": avg_cost_per_request,
            "budget_used": (self.current_cost / self.daily_budget) * 100,
            "model_usage": model_usage
        }

# Usage example
tracker = CostTracker(daily_budget=5.00)

try:
    # Make tracked requests
    response1 = tracker.safe_completion(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": "What is AI?"}],
        max_tokens=100
    )
    
    response2 = tracker.safe_completion(
        model="gpt-4",
        messages=[{"role": "user", "content": "Explain quantum computing"}],
        max_tokens=200
    )
    
    # Get statistics
    stats = tracker.get_stats()
    print("\nCost Tracking Statistics:")
    print(f"Total requests: {stats['total_requests']}")
    print(f"Total cost: ${stats['total_cost']:.6f}")
    print(f"Budget used: {stats['budget_used']:.1f}%")
    print(f"Average cost per request: ${stats['avg_cost_per_request']:.6f}")
    
    print("\nModel usage breakdown:")
    for model, usage in stats['model_usage'].items():
        print(f"  {model}: {usage['requests']} requests, "
              f"${usage['cost']:.6f}, {usage['tokens']} tokens")

except litellm.BudgetExceededError as e:
    print(f"Budget exceeded: {e}")

Install with Tessl CLI