Tessl Tile for pypi/litellm@1.76.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

core-completion.md exceptions.md index.md other-apis.md providers.md router.md utilities.md

utilities.mddocs/

0
# Utilities & Helpers
1

2
Comprehensive utility functions for token counting, cost calculation, model information, capability detection, and validation. These tools enable advanced LLM management, optimization, and monitoring across all supported providers.
3

4
## Capabilities
5

6
### Token Counting
7

8
Accurate token counting for prompts, messages, and completions across different model tokenizers.
9

10
```python { .api }
11
def token_counter(
12
    model: str = "",
13
    text: Union[str, List[str]] = "",
14
    messages: Optional[List[Dict[str, str]]] = None,
15
    count_response_tokens: Optional[bool] = False,
16
    custom_tokenizer: Optional[Dict] = None
17
) -> int:
18
    """
19
    Count tokens for text or messages using model-specific tokenizers.
20

21
    Args:
22
        model (str): Model identifier for tokenizer selection
23
        text (Union[str, List[str]]): Text string or list of strings to count
24
        messages (Optional[List[Dict]]): Chat messages in OpenAI format
25
        count_response_tokens (Optional[bool]): Include estimated response tokens
26
        custom_tokenizer (Optional[Dict]): Custom tokenizer configuration
27

28
    Returns:
29
        int: Total token count
30

31
    Raises:
32
        ValueError: Invalid input parameters or unsupported model
33
        ImportError: Required tokenizer package not installed
34
    
35
    Examples:
36
        # Count tokens in text
37
        tokens = token_counter(model="gpt-4", text="Hello, world!")
38
        
39
        # Count tokens in messages
40
        messages = [
41
            {"role": "system", "content": "You are helpful."},
42
            {"role": "user", "content": "Hello!"}
43
        ]
44
        tokens = token_counter(model="gpt-4", messages=messages)
45
        
46
        # Include response token estimation
47
        tokens = token_counter(
48
            model="gpt-4", 
49
            messages=messages, 
50
            count_response_tokens=True
51
        )
52
    """
53

54
def encode(
55
    model: str,
56
    text: str,
57
    custom_tokenizer: Optional[Dict] = None
58
) -> List[int]:
59
    """
60
    Encode text to tokens using model-specific tokenizer.
61

62
    Args:
63
        model (str): Model identifier
64
        text (str): Text to encode
65
        custom_tokenizer (Optional[Dict]): Custom tokenizer configuration
66

67
    Returns:
68
        List[int]: List of token IDs
69
    """
70

71
def decode(
72
    model: str,
73
    tokens: List[int],
74
    custom_tokenizer: Optional[Dict] = None
75
) -> str:
76
    """
77
    Decode tokens back to text using model-specific tokenizer.
78

79
    Args:
80
        model (str): Model identifier
81
        tokens (List[int]): Token IDs to decode
82
        custom_tokenizer (Optional[Dict]): Custom tokenizer configuration
83

84
    Returns:
85
        str: Decoded text string
86
    """
87
```
88

89
### Cost Calculation
90

91
Calculate costs for completions, embeddings, and other API operations across all providers.
92

93
```python { .api }
94
def completion_cost(
95
    completion_response: Union[ModelResponse, EmbeddingResponse, ImageResponse, TranscriptionResponse],
96
    model: Optional[str] = None,
97
    prompt_tokens: Optional[int] = None,
98
    completion_tokens: Optional[int] = None,
99
    custom_cost_per_token: Optional[CostPerToken] = None
100
) -> float:
101
    """
102
    Calculate cost for a completion response.
103

104
    Args:
105
        completion_response: Response object from LiteLLM API call
106
        model (Optional[str]): Model identifier override
107
        prompt_tokens (Optional[int]): Input token count override
108
        completion_tokens (Optional[int]): Output token count override
109
        custom_cost_per_token (Optional[CostPerToken]): Custom cost configuration
110

111
    Returns:
112
        float: Cost in USD
113

114
    Raises:
115
        ValueError: Missing required information for cost calculation
116
    
117
    Examples:
118
        # Calculate cost from response
119
        response = litellm.completion(model="gpt-4", messages=messages)
120
        cost = completion_cost(response)
121
        
122
        # Calculate with custom token counts
123
        cost = completion_cost(
124
            response, 
125
            prompt_tokens=100, 
126
            completion_tokens=50
127
        )
128
        
129
        # Calculate with custom cost configuration
130
        custom_costs = CostPerToken(
131
            input_cost_per_token=0.00001,
132
            output_cost_per_token=0.00003
133
        )
134
        cost = completion_cost(response, custom_cost_per_token=custom_costs)
135
    """
136

137
def cost_per_token(
138
    model: str = "",
139
    prompt_tokens: int = 0,
140
    completion_tokens: int = 0,
141
    custom_cost_per_token: Optional[CostPerToken] = None
142
) -> float:
143
    """
144
    Calculate cost based on token counts and model pricing.
145

146
    Args:
147
        model (str): Model identifier
148
        prompt_tokens (int): Input token count
149
        completion_tokens (int): Output token count
150
        custom_cost_per_token (Optional[CostPerToken]): Custom pricing
151

152
    Returns:
153
        float: Total cost in USD
154
    
155
    Examples:
156
        # Calculate cost for specific token counts
157
        cost = cost_per_token(
158
            model="gpt-4",
159
            prompt_tokens=100,
160
            completion_tokens=50
161
        )
162
        
163
        # Calculate with custom pricing
164
        cost = cost_per_token(
165
            model="custom-model",
166
            prompt_tokens=100,
167
            completion_tokens=50,
168
            custom_cost_per_token=CostPerToken(
169
                input_cost_per_token=0.00001,
170
                output_cost_per_token=0.00002
171
            )
172
        )
173
    """
174

175
def get_max_budget() -> float:
176
    """
177
    Get current maximum budget limit.
178

179
    Returns:
180
        float: Maximum budget in USD
181
    """
182

183
def set_max_budget(budget: float) -> None:
184
    """
185
    Set maximum budget limit for API usage.
186

187
    Args:
188
        budget (float): Maximum budget in USD
189
    """
190
```
191

192
### Model Information
193

194
Retrieve detailed information about models including capabilities, pricing, and technical specifications.
195

196
```python { .api }
197
def get_model_info(
198
    model: str,
199
    custom_llm_provider: Optional[str] = None,
200
    api_base: Optional[str] = None
201
) -> Dict[str, Any]:
202
    """
203
    Get comprehensive information about a model.
204

205
    Args:
206
        model (str): Model identifier
207
        custom_llm_provider (Optional[str]): Provider override
208
        api_base (Optional[str]): Custom API base URL
209

210
    Returns:
211
        Dict[str, Any]: Model information including:
212
            - max_tokens: Maximum context window
213
            - max_input_tokens: Maximum input tokens
214
            - max_output_tokens: Maximum output tokens
215
            - input_cost_per_token: Input cost per token
216
            - output_cost_per_token: Output cost per token
217
            - litellm_provider: Provider name
218
            - mode: Model mode (chat, completion, embedding)
219
            - supports_function_calling: Function calling support
220
            - supports_parallel_function_calling: Parallel function calling
221
            - supports_vision: Vision capability support
222
            - supports_tool_choice: Tool choice support
223
    
224
    Examples:
225
        # Get OpenAI model info
226
        info = get_model_info("gpt-4")
227
        print(f"Max tokens: {info['max_tokens']}")
228
        print(f"Input cost: ${info['input_cost_per_token']}")
229
        
230
        # Get provider-specific model info
231
        info = get_model_info("claude-3-sonnet-20240229", "anthropic")
232
        
233
        # Get custom model info
234
        info = get_model_info("custom/model", api_base="https://api.example.com")
235
    """
236

237
def get_model_list() -> List[str]:
238
    """
239
    Get list of all supported models across all providers.
240

241
    Returns:
242
        List[str]: List of model identifiers
243
    """
244

245
def get_supported_openai_params(
246
    model: str,
247
    custom_llm_provider: str
248
) -> List[str]:
249
    """
250
    Get list of OpenAI parameters supported by a provider/model.
251

252
    Args:
253
        model (str): Model identifier
254
        custom_llm_provider (str): Provider name
255

256
    Returns:
257
        List[str]: List of supported parameter names
258
    
259
    Examples:
260
        # Check what parameters Anthropic supports
261
        params = get_supported_openai_params("claude-3-sonnet-20240229", "anthropic")
262
        print("Supported params:", params)
263
        
264
        # Check Cohere parameter support
265
        params = get_supported_openai_params("command-nightly", "cohere")
266
    """
267

268
def get_llm_provider(
269
    model: str,
270
    custom_llm_provider: Optional[str] = None,
271
    api_base: Optional[str] = None
272
) -> Tuple[str, str, str, Optional[str]]:
273
    """
274
    Detect and return provider information for a model.
275

276
    Args:
277
        model (str): Model identifier
278
        custom_llm_provider (Optional[str]): Provider override
279
        api_base (Optional[str]): Custom API base
280

281
    Returns:
282
        Tuple[str, str, str, Optional[str]]: (model, custom_llm_provider, dynamic_api_key, api_base)
283
    
284
    Examples:
285
        # Auto-detect provider
286
        model, provider, api_key, api_base = get_llm_provider("gpt-4")
287
        print(f"Provider: {provider}")
288
        
289
        # Check Azure OpenAI
290
        model, provider, api_key, api_base = get_llm_provider("azure/gpt-4")
291
    """
292
```
293

294
### Capability Detection
295

296
Check model capabilities and feature support across different providers.
297

298
```python { .api }
299
def supports_function_calling(
300
    model: str,
301
    custom_llm_provider: Optional[str] = None
302
) -> bool:
303
    """
304
    Check if model supports function calling.
305

306
    Args:
307
        model (str): Model identifier
308
        custom_llm_provider (Optional[str]): Provider override
309

310
    Returns:
311
        bool: True if function calling is supported
312
    """
313

314
def supports_parallel_function_calling(
315
    model: str,
316
    custom_llm_provider: Optional[str] = None
317
) -> bool:
318
    """
319
    Check if model supports parallel function calling.
320

321
    Args:
322
        model (str): Model identifier
323
        custom_llm_provider (Optional[str]): Provider override
324

325
    Returns:
326
        bool: True if parallel function calling is supported
327
    """
328

329
def supports_vision(
330
    model: str,
331
    custom_llm_provider: Optional[str] = None
332
) -> bool:
333
    """
334
    Check if model supports vision/image inputs.
335

336
    Args:
337
        model (str): Model identifier
338
        custom_llm_provider (Optional[str]): Provider override
339

340
    Returns:
341
        bool: True if vision is supported
342
    """
343

344
def supports_response_schema(
345
    model: str,
346
    custom_llm_provider: Optional[str] = None
347
) -> bool:
348
    """
349
    Check if model supports structured response schemas.
350

351
    Args:
352
        model (str): Model identifier
353
        custom_llm_provider (Optional[str]): Provider override
354

355
    Returns:
356
        bool: True if response schema is supported
357
    """
358

359
def supports_system_messages(
360
    model: str,
361
    custom_llm_provider: Optional[str] = None
362
) -> bool:
363
    """
364
    Check if model supports system messages.
365

366
    Args:
367
        model (str): Model identifier
368
        custom_llm_provider (Optional[str]): Provider override
369

370
    Returns:
371
        bool: True if system messages are supported
372
    """
373

374
def supports_tool_choice(
375
    model: str,
376
    custom_llm_provider: Optional[str] = None
377
) -> bool:
378
    """
379
    Check if model supports tool choice parameter.
380

381
    Args:
382
        model (str): Model identifier
383
        custom_llm_provider (Optional[str]): Provider override
384

385
    Returns:
386
        bool: True if tool choice is supported
387
    """
388

389
def supports_audio_input(
390
    model: str,
391
    custom_llm_provider: Optional[str] = None
392
) -> bool:
393
    """
394
    Check if model supports audio input.
395

396
    Args:
397
        model (str): Model identifier
398
        custom_llm_provider (Optional[str]): Provider override
399

400
    Returns:
401
        bool: True if audio input is supported
402
    """
403

404
def supports_audio_output(
405
    model: str,
406
    custom_llm_provider: Optional[str] = None
407
) -> bool:
408
    """
409
    Check if model supports audio output.
410

411
    Args:
412
        model (str): Model identifier
413
        custom_llm_provider (Optional[str]): Provider override
414

415
    Returns:
416
        bool: True if audio output is supported
417
    """
418

419
def supports_reasoning(
420
    model: str,
421
    custom_llm_provider: Optional[str] = None
422
) -> bool:
423
    """
424
    Check if model supports reasoning capabilities (like OpenAI o1).
425

426
    Args:
427
        model (str): Model identifier
428
        custom_llm_provider (Optional[str]): Provider override
429

430
    Returns:
431
        bool: True if reasoning mode is supported
432
    """
433

434
def supports_prompt_caching(
435
    model: str,
436
    custom_llm_provider: Optional[str] = None
437
) -> bool:
438
    """
439
    Check if model supports prompt caching.
440

441
    Args:
442
        model (str): Model identifier
443
        custom_llm_provider (Optional[str]): Provider override
444

445
    Returns:
446
        bool: True if prompt caching is supported
447
    """
448

449
def supports_computer_use(
450
    model: str,
451
    custom_llm_provider: Optional[str] = None
452
) -> bool:
453
    """
454
    Check if model supports computer use/interaction capabilities.
455

456
    Args:
457
        model (str): Model identifier
458
        custom_llm_provider (Optional[str]): Provider override
459

460
    Returns:
461
        bool: True if computer use is supported
462
    """
463
```
464

465
### Environment Validation
466

467
Validate API keys, environment setup, and provider configurations.
468

469
```python { .api }
470
def validate_environment(
471
    model: str,
472
    api_key: Optional[str] = None
473
) -> Dict[str, str]:
474
    """
475
    Validate environment configuration for a model.
476

477
    Args:
478
        model (str): Model to validate environment for
479
        api_key (Optional[str]): API key to validate
480

481
    Returns:
482
        Dict[str, str]: Validation results with missing/invalid keys
483
    
484
    Raises:
485
        ValueError: Invalid model or missing required configuration
486
    
487
    Examples:
488
        # Validate OpenAI setup
489
        validation = validate_environment("gpt-4")
490
        if validation:
491
            print("Missing configuration:", validation)
492
        
493
        # Validate with specific API key
494
        validation = validate_environment("gpt-4", "sk-test-key")
495
        
496
        # Validate Azure setup
497
        validation = validate_environment("azure/gpt-4")
498
    """
499

500
def check_valid_key(model: str, api_key: str) -> bool:
501
    """
502
    Test if an API key is valid for a model.
503

504
    Args:
505
        model (str): Model identifier
506
        api_key (str): API key to test
507

508
    Returns:
509
        bool: True if key is valid
510
    
511
    Examples:
512
        # Test OpenAI key
513
        is_valid = check_valid_key("gpt-4", "sk-test-key")
514
        
515
        # Test Anthropic key
516
        is_valid = check_valid_key("claude-3-sonnet-20240229", "test-key")
517
    """
518

519
def get_optional_params(model: str) -> List[str]:
520
    """
521
    Get list of optional parameters for a model.
522

523
    Args:
524
        model (str): Model identifier
525

526
    Returns:
527
        List[str]: List of optional parameter names
528
    """
529

530
def get_required_params(model: str) -> List[str]:
531
    """
532
    Get list of required parameters for a model.
533

534
    Args:
535
        model (str): Model identifier
536

537
    Returns:
538
        List[str]: List of required parameter names
539
    """
540
```
541

542
### Batch Processing Utilities
543

544
Utilities for processing multiple requests efficiently.
545

546
```python { .api }
547
def batch_completion(
548
    requests: List[Dict[str, Any]],
549
    max_workers: int = 5,
550
    timeout: float = 60.0
551
) -> List[Union[ModelResponse, Exception]]:
552
    """
553
    Process multiple completion requests in parallel.
554

555
    Args:
556
        requests (List[Dict]): List of completion request parameters
557
        max_workers (int): Maximum concurrent workers
558
        timeout (float): Timeout per request
559

560
    Returns:
561
        List[Union[ModelResponse, Exception]]: Results or exceptions for each request
562
    
563
    Examples:
564
        requests = [
565
            {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello 1"}]},
566
            {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello 2"}]},
567
            {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello 3"}]}
568
        ]
569
        
570
        results = batch_completion(requests, max_workers=3)
571
        
572
        for i, result in enumerate(results):
573
            if isinstance(result, Exception):
574
                print(f"Request {i} failed: {result}")
575
            else:
576
                print(f"Request {i}: {result.choices[0].message.content}")
577
    """
578

579
async def abatch_completion(
580
    requests: List[Dict[str, Any]],
581
    max_concurrent: int = 5
582
) -> List[Union[ModelResponse, Exception]]:
583
    """
584
    Async version of batch completion processing.
585

586
    Args:
587
        requests (List[Dict]): List of completion request parameters
588
        max_concurrent (int): Maximum concurrent requests
589

590
    Returns:
591
        List[Union[ModelResponse, Exception]]: Results or exceptions for each request
592
    """
593
```
594

595
## Type Definitions
596

597
```python { .api }
598
class CostPerToken:
599
    """Cost configuration for custom models"""
600
    input_cost_per_token: float
601
    output_cost_per_token: float
602
    litellm_provider: Optional[str] = None
603
    mode: Optional[Literal["chat", "completion", "embedding"]] = None
604

605
class TokenizerConfig:
606
    """Custom tokenizer configuration"""
607
    tokenizer_name: str
608
    tokenizer_params: Dict[str, Any]
609
    encoding_name: Optional[str] = None
610

611
class ModelCapabilities:
612
    """Model capability flags"""
613
    supports_function_calling: bool = False
614
    supports_parallel_function_calling: bool = False
615
    supports_vision: bool = False
616
    supports_response_schema: bool = False
617
    supports_system_messages: bool = False
618
    supports_tool_choice: bool = False
619
    supports_audio_input: bool = False
620
    supports_audio_output: bool = False
621
    supports_reasoning: bool = False
622
    supports_prompt_caching: bool = False
623
    supports_computer_use: bool = False
624
    max_tokens: Optional[int] = None
625
    max_input_tokens: Optional[int] = None
626
    max_output_tokens: Optional[int] = None
627
```
628

629
## Usage Examples
630

631
### Token Counting and Cost Estimation
632

633
```python
634
import litellm
635

636
# Count tokens for different input types
637
text_tokens = litellm.token_counter(model="gpt-4", text="Hello, world!")
638
print(f"Text tokens: {text_tokens}")
639

640
messages = [
641
    {"role": "system", "content": "You are a helpful assistant."},
642
    {"role": "user", "content": "What is machine learning?"}
643
]
644
message_tokens = litellm.token_counter(model="gpt-4", messages=messages)
645
print(f"Message tokens: {message_tokens}")
646

647
# Estimate total cost before making request
648
prompt_tokens = litellm.token_counter(model="gpt-4", messages=messages)
649
estimated_response_tokens = 200  # Estimate
650
estimated_cost = litellm.cost_per_token(
651
    model="gpt-4",
652
    prompt_tokens=prompt_tokens,
653
    completion_tokens=estimated_response_tokens
654
)
655
print(f"Estimated cost: ${estimated_cost:.6f}")
656

657
# Make request and calculate actual cost
658
response = litellm.completion(model="gpt-4", messages=messages)
659
actual_cost = litellm.completion_cost(response)
660
print(f"Actual cost: ${actual_cost:.6f}")
661
```
662

663
### Model Capability Detection
664

665
```python
666
import litellm
667

668
def check_model_capabilities(model: str):
669
    """Check and display all capabilities for a model."""
670
    
671
    capabilities = {
672
        "Function Calling": litellm.supports_function_calling(model),
673
        "Parallel Function Calling": litellm.supports_parallel_function_calling(model),
674
        "Vision": litellm.supports_vision(model),
675
        "Response Schema": litellm.supports_response_schema(model),
676
        "System Messages": litellm.supports_system_messages(model),
677
        "Tool Choice": litellm.supports_tool_choice(model),
678
        "Audio Input": litellm.supports_audio_input(model),
679
        "Audio Output": litellm.supports_audio_output(model),
680
        "Reasoning": litellm.supports_reasoning(model),
681
        "Prompt Caching": litellm.supports_prompt_caching(model),
682
        "Computer Use": litellm.supports_computer_use(model)
683
    }
684
    
685
    print(f"Capabilities for {model}:")
686
    for capability, supported in capabilities.items():
687
        status = "✓" if supported else "✗"
688
        print(f"  {status} {capability}")
689
    
690
    # Get detailed model info
691
    model_info = litellm.get_model_info(model)
692
    print(f"\nModel Info:")
693
    print(f"  Max tokens: {model_info.get('max_tokens', 'Unknown')}")
694
    print(f"  Provider: {model_info.get('litellm_provider', 'Unknown')}")
695
    print(f"  Input cost: ${model_info.get('input_cost_per_token', 0)}")
696
    print(f"  Output cost: ${model_info.get('output_cost_per_token', 0)}")
697

698
# Check capabilities for different models
699
models_to_check = [
700
    "gpt-4",
701
    "gpt-4-vision-preview", 
702
    "claude-3-sonnet-20240229",
703
    "gemini-pro"
704
]
705

706
for model in models_to_check:
707
    check_model_capabilities(model)
708
    print("-" * 50)
709
```
710

711
### Environment Validation and Setup
712

713
```python
714
import litellm
715
import os
716

717
def setup_and_validate_providers():
718
    """Setup and validate multiple provider configurations."""
719
    
720
    providers_to_check = [
721
        ("gpt-4", "OpenAI"),
722
        ("claude-3-sonnet-20240229", "Anthropic"),
723
        ("command-nightly", "Cohere"),
724
        ("gemini-pro", "Google"),
725
        ("bedrock/anthropic.claude-v2", "AWS Bedrock"),
726
        ("azure/gpt-4", "Azure OpenAI")
727
    ]
728
    
729
    for model, provider_name in providers_to_check:
730
        print(f"\nValidating {provider_name} ({model}):")
731
        
732
        try:
733
            # Validate environment
734
            validation_result = litellm.validate_environment(model)
735
            
736
            if not validation_result:
737
                print("  ✓ Environment is properly configured")
738
                
739
                # Test with a simple request if environment is valid
740
                try:
741
                    response = litellm.completion(
742
                        model=model,
743
                        messages=[{"role": "user", "content": "Hello"}],
744
                        max_tokens=5
745
                    )
746
                    print("  ✓ API call successful")
747
                    
748
                    # Calculate cost
749
                    cost = litellm.completion_cost(response)
750
                    print(f"  ✓ Request cost: ${cost:.6f}")
751
                    
752
                except Exception as e:
753
                    print(f"  ✗ API call failed: {e}")
754
            else:
755
                print("  ✗ Missing configuration:")
756
                for key, message in validation_result.items():
757
                    print(f"    - {key}: {message}")
758
                    
759
        except Exception as e:
760
            print(f"  ✗ Validation failed: {e}")
761

762
# Run validation
763
setup_and_validate_providers()
764

765
# Set up missing environment variables
766
def setup_missing_env_vars():
767
    """Interactively setup missing environment variables."""
768
    
769
    env_vars = {
770
        "OPENAI_API_KEY": "OpenAI API key",
771
        "ANTHROPIC_API_KEY": "Anthropic API key", 
772
        "COHERE_API_KEY": "Cohere API key",
773
        "GOOGLE_APPLICATION_CREDENTIALS": "Google credentials file path",
774
        "AWS_ACCESS_KEY_ID": "AWS access key",
775
        "AZURE_API_KEY": "Azure OpenAI API key"
776
    }
777
    
778
    for var_name, description in env_vars.items():
779
        if not os.environ.get(var_name):
780
            value = input(f"Enter {description} (or press Enter to skip): ").strip()
781
            if value:
782
                os.environ[var_name] = value
783
                print(f"Set {var_name}")
784

785
# Uncomment to run interactive setup
786
# setup_missing_env_vars()
787
```
788

789
### Batch Processing
790

791
```python
792
import litellm
793
import asyncio
794

795
def process_batch_sync():
796
    """Process multiple requests synchronously with batch utility."""
797
    
798
    requests = [
799
        {
800
            "model": "gpt-3.5-turbo",
801
            "messages": [{"role": "user", "content": f"Count to {i}"}],
802
            "max_tokens": 50
803
        }
804
        for i in range(1, 6)
805
    ]
806
    
807
    print("Processing batch synchronously...")
808
    results = litellm.batch_completion(requests, max_workers=3)
809
    
810
    for i, result in enumerate(results):
811
        if isinstance(result, Exception):
812
            print(f"Request {i+1} failed: {result}")
813
        else:
814
            content = result.choices[0].message.content
815
            cost = litellm.completion_cost(result)
816
            print(f"Request {i+1}: {content} (${cost:.6f})")
817

818
async def process_batch_async():
819
    """Process multiple requests asynchronously."""
820
    
821
    requests = [
822
        {
823
            "model": "gpt-3.5-turbo",
824
            "messages": [{"role": "user", "content": f"What is {i} + {i}?"}],
825
            "max_tokens": 20
826
        }
827
        for i in range(1, 11)
828
    ]
829
    
830
    print("Processing batch asynchronously...")
831
    results = await litellm.abatch_completion(requests, max_concurrent=5)
832
    
833
    total_cost = 0
834
    for i, result in enumerate(results):
835
        if isinstance(result, Exception):
836
            print(f"Request {i+1} failed: {result}")
837
        else:
838
            content = result.choices[0].message.content.strip()
839
            cost = litellm.completion_cost(result)
840
            total_cost += cost
841
            print(f"Request {i+1}: {content} (${cost:.6f})")
842
    
843
    print(f"Total batch cost: ${total_cost:.6f}")
844

845
# Run batch processing examples
846
process_batch_sync()
847
asyncio.run(process_batch_async())
848
```
849

850
### Advanced Cost Management
851

852
```python
853
import litellm
854
from typing import List, Dict, Any
855

856
class CostTracker:
857
    """Advanced cost tracking and budget management."""
858
    
859
    def __init__(self, daily_budget: float = 10.0):
860
        self.daily_budget = daily_budget
861
        self.current_cost = 0.0
862
        self.requests = []
863
    
864
    def estimate_request_cost(self, model: str, messages: List[Dict], max_tokens: int = 256) -> float:
865
        """Estimate cost before making request."""
866
        prompt_tokens = litellm.token_counter(model=model, messages=messages)
867
        estimated_cost = litellm.cost_per_token(
868
            model=model,
869
            prompt_tokens=prompt_tokens,
870
            completion_tokens=max_tokens
871
        )
872
        return estimated_cost
873
    
874
    def can_afford_request(self, estimated_cost: float) -> bool:
875
        """Check if request fits within budget."""
876
        return (self.current_cost + estimated_cost) <= self.daily_budget
877
    
878
    def track_request(self, model: str, response: Any, estimated_cost: float):
879
        """Track completed request cost."""
880
        actual_cost = litellm.completion_cost(response)
881
        self.current_cost += actual_cost
882
        
883
        self.requests.append({
884
            "model": model,
885
            "estimated_cost": estimated_cost,
886
            "actual_cost": actual_cost,
887
            "tokens_used": response.usage.total_tokens if hasattr(response, 'usage') else 0
888
        })
889
        
890
        print(f"Request: ${actual_cost:.6f} (est: ${estimated_cost:.6f})")
891
        print(f"Budget: ${self.current_cost:.2f}/${self.daily_budget:.2f}")
892
    
893
    def safe_completion(self, **kwargs):
894
        """Make completion with budget checking."""
895
        model = kwargs.get("model")
896
        messages = kwargs.get("messages")
897
        max_tokens = kwargs.get("max_tokens", 256)
898
        
899
        # Estimate cost
900
        estimated_cost = self.estimate_request_cost(model, messages, max_tokens)
901
        
902
        if not self.can_afford_request(estimated_cost):
903
            raise litellm.BudgetExceededError(
904
                f"Request would exceed budget: ${estimated_cost:.6f} "
905
                f"(remaining: ${self.daily_budget - self.current_cost:.6f})"
906
            )
907
        
908
        # Make request
909
        response = litellm.completion(**kwargs)
910
        
911
        # Track cost
912
        self.track_request(model, response, estimated_cost)
913
        
914
        return response
915
    
916
    def get_stats(self) -> Dict[str, Any]:
917
        """Get cost tracking statistics."""
918
        if not self.requests:
919
            return {"total_requests": 0, "total_cost": 0}
920
        
921
        total_requests = len(self.requests)
922
        total_tokens = sum(r["tokens_used"] for r in self.requests)
923
        avg_cost_per_request = self.current_cost / total_requests
924
        
925
        model_usage = {}
926
        for request in self.requests:
927
            model = request["model"]
928
            if model not in model_usage:
929
                model_usage[model] = {"requests": 0, "cost": 0, "tokens": 0}
930
            model_usage[model]["requests"] += 1
931
            model_usage[model]["cost"] += request["actual_cost"]
932
            model_usage[model]["tokens"] += request["tokens_used"]
933
        
934
        return {
935
            "total_requests": total_requests,
936
            "total_cost": self.current_cost,
937
            "total_tokens": total_tokens,
938
            "avg_cost_per_request": avg_cost_per_request,
939
            "budget_used": (self.current_cost / self.daily_budget) * 100,
940
            "model_usage": model_usage
941
        }
942

943
# Usage example
944
tracker = CostTracker(daily_budget=5.00)
945

946
try:
947
    # Make tracked requests
948
    response1 = tracker.safe_completion(
949
        model="gpt-3.5-turbo",
950
        messages=[{"role": "user", "content": "What is AI?"}],
951
        max_tokens=100
952
    )
953
    
954
    response2 = tracker.safe_completion(
955
        model="gpt-4",
956
        messages=[{"role": "user", "content": "Explain quantum computing"}],
957
        max_tokens=200
958
    )
959
    
960
    # Get statistics
961
    stats = tracker.get_stats()
962
    print("\nCost Tracking Statistics:")
963
    print(f"Total requests: {stats['total_requests']}")
964
    print(f"Total cost: ${stats['total_cost']:.6f}")
965
    print(f"Budget used: {stats['budget_used']:.1f}%")
966
    print(f"Average cost per request: ${stats['avg_cost_per_request']:.6f}")
967
    
968
    print("\nModel usage breakdown:")
969
    for model, usage in stats['model_usage'].items():
970
        print(f"  {model}: {usage['requests']} requests, "
971
              f"${usage['cost']:.6f}, {usage['tokens']} tokens")
972

973
except litellm.BudgetExceededError as e:
974
    print(f"Budget exceeded: {e}")
975
```

Version

Tile

Files

utilities.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

utilities.mddocs/