0
# Utilities & Helpers
1
2
Comprehensive utility functions for token counting, cost calculation, model information, capability detection, and validation. These tools enable advanced LLM management, optimization, and monitoring across all supported providers.
3
4
## Capabilities
5
6
### Token Counting
7
8
Accurate token counting for prompts, messages, and completions across different model tokenizers.
9
10
```python { .api }
11
def token_counter(
12
model: str = "",
13
text: Union[str, List[str]] = "",
14
messages: Optional[List[Dict[str, str]]] = None,
15
count_response_tokens: Optional[bool] = False,
16
custom_tokenizer: Optional[Dict] = None
17
) -> int:
18
"""
19
Count tokens for text or messages using model-specific tokenizers.
20
21
Args:
22
model (str): Model identifier for tokenizer selection
23
text (Union[str, List[str]]): Text string or list of strings to count
24
messages (Optional[List[Dict]]): Chat messages in OpenAI format
25
count_response_tokens (Optional[bool]): Include estimated response tokens
26
custom_tokenizer (Optional[Dict]): Custom tokenizer configuration
27
28
Returns:
29
int: Total token count
30
31
Raises:
32
ValueError: Invalid input parameters or unsupported model
33
ImportError: Required tokenizer package not installed
34
35
Examples:
36
# Count tokens in text
37
tokens = token_counter(model="gpt-4", text="Hello, world!")
38
39
# Count tokens in messages
40
messages = [
41
{"role": "system", "content": "You are helpful."},
42
{"role": "user", "content": "Hello!"}
43
]
44
tokens = token_counter(model="gpt-4", messages=messages)
45
46
# Include response token estimation
47
tokens = token_counter(
48
model="gpt-4",
49
messages=messages,
50
count_response_tokens=True
51
)
52
"""
53
54
def encode(
55
model: str,
56
text: str,
57
custom_tokenizer: Optional[Dict] = None
58
) -> List[int]:
59
"""
60
Encode text to tokens using model-specific tokenizer.
61
62
Args:
63
model (str): Model identifier
64
text (str): Text to encode
65
custom_tokenizer (Optional[Dict]): Custom tokenizer configuration
66
67
Returns:
68
List[int]: List of token IDs
69
"""
70
71
def decode(
72
model: str,
73
tokens: List[int],
74
custom_tokenizer: Optional[Dict] = None
75
) -> str:
76
"""
77
Decode tokens back to text using model-specific tokenizer.
78
79
Args:
80
model (str): Model identifier
81
tokens (List[int]): Token IDs to decode
82
custom_tokenizer (Optional[Dict]): Custom tokenizer configuration
83
84
Returns:
85
str: Decoded text string
86
"""
87
```
88
89
### Cost Calculation
90
91
Calculate costs for completions, embeddings, and other API operations across all providers.
92
93
```python { .api }
94
def completion_cost(
95
completion_response: Union[ModelResponse, EmbeddingResponse, ImageResponse, TranscriptionResponse],
96
model: Optional[str] = None,
97
prompt_tokens: Optional[int] = None,
98
completion_tokens: Optional[int] = None,
99
custom_cost_per_token: Optional[CostPerToken] = None
100
) -> float:
101
"""
102
Calculate cost for a completion response.
103
104
Args:
105
completion_response: Response object from LiteLLM API call
106
model (Optional[str]): Model identifier override
107
prompt_tokens (Optional[int]): Input token count override
108
completion_tokens (Optional[int]): Output token count override
109
custom_cost_per_token (Optional[CostPerToken]): Custom cost configuration
110
111
Returns:
112
float: Cost in USD
113
114
Raises:
115
ValueError: Missing required information for cost calculation
116
117
Examples:
118
# Calculate cost from response
119
response = litellm.completion(model="gpt-4", messages=messages)
120
cost = completion_cost(response)
121
122
# Calculate with custom token counts
123
cost = completion_cost(
124
response,
125
prompt_tokens=100,
126
completion_tokens=50
127
)
128
129
# Calculate with custom cost configuration
130
custom_costs = CostPerToken(
131
input_cost_per_token=0.00001,
132
output_cost_per_token=0.00003
133
)
134
cost = completion_cost(response, custom_cost_per_token=custom_costs)
135
"""
136
137
def cost_per_token(
138
model: str = "",
139
prompt_tokens: int = 0,
140
completion_tokens: int = 0,
141
custom_cost_per_token: Optional[CostPerToken] = None
142
) -> float:
143
"""
144
Calculate cost based on token counts and model pricing.
145
146
Args:
147
model (str): Model identifier
148
prompt_tokens (int): Input token count
149
completion_tokens (int): Output token count
150
custom_cost_per_token (Optional[CostPerToken]): Custom pricing
151
152
Returns:
153
float: Total cost in USD
154
155
Examples:
156
# Calculate cost for specific token counts
157
cost = cost_per_token(
158
model="gpt-4",
159
prompt_tokens=100,
160
completion_tokens=50
161
)
162
163
# Calculate with custom pricing
164
cost = cost_per_token(
165
model="custom-model",
166
prompt_tokens=100,
167
completion_tokens=50,
168
custom_cost_per_token=CostPerToken(
169
input_cost_per_token=0.00001,
170
output_cost_per_token=0.00002
171
)
172
)
173
"""
174
175
def get_max_budget() -> float:
176
"""
177
Get current maximum budget limit.
178
179
Returns:
180
float: Maximum budget in USD
181
"""
182
183
def set_max_budget(budget: float) -> None:
184
"""
185
Set maximum budget limit for API usage.
186
187
Args:
188
budget (float): Maximum budget in USD
189
"""
190
```
191
192
### Model Information
193
194
Retrieve detailed information about models including capabilities, pricing, and technical specifications.
195
196
```python { .api }
197
def get_model_info(
198
model: str,
199
custom_llm_provider: Optional[str] = None,
200
api_base: Optional[str] = None
201
) -> Dict[str, Any]:
202
"""
203
Get comprehensive information about a model.
204
205
Args:
206
model (str): Model identifier
207
custom_llm_provider (Optional[str]): Provider override
208
api_base (Optional[str]): Custom API base URL
209
210
Returns:
211
Dict[str, Any]: Model information including:
212
- max_tokens: Maximum context window
213
- max_input_tokens: Maximum input tokens
214
- max_output_tokens: Maximum output tokens
215
- input_cost_per_token: Input cost per token
216
- output_cost_per_token: Output cost per token
217
- litellm_provider: Provider name
218
- mode: Model mode (chat, completion, embedding)
219
- supports_function_calling: Function calling support
220
- supports_parallel_function_calling: Parallel function calling
221
- supports_vision: Vision capability support
222
- supports_tool_choice: Tool choice support
223
224
Examples:
225
# Get OpenAI model info
226
info = get_model_info("gpt-4")
227
print(f"Max tokens: {info['max_tokens']}")
228
print(f"Input cost: ${info['input_cost_per_token']}")
229
230
# Get provider-specific model info
231
info = get_model_info("claude-3-sonnet-20240229", "anthropic")
232
233
# Get custom model info
234
info = get_model_info("custom/model", api_base="https://api.example.com")
235
"""
236
237
def get_model_list() -> List[str]:
238
"""
239
Get list of all supported models across all providers.
240
241
Returns:
242
List[str]: List of model identifiers
243
"""
244
245
def get_supported_openai_params(
246
model: str,
247
custom_llm_provider: str
248
) -> List[str]:
249
"""
250
Get list of OpenAI parameters supported by a provider/model.
251
252
Args:
253
model (str): Model identifier
254
custom_llm_provider (str): Provider name
255
256
Returns:
257
List[str]: List of supported parameter names
258
259
Examples:
260
# Check what parameters Anthropic supports
261
params = get_supported_openai_params("claude-3-sonnet-20240229", "anthropic")
262
print("Supported params:", params)
263
264
# Check Cohere parameter support
265
params = get_supported_openai_params("command-nightly", "cohere")
266
"""
267
268
def get_llm_provider(
269
model: str,
270
custom_llm_provider: Optional[str] = None,
271
api_base: Optional[str] = None
272
) -> Tuple[str, str, str, Optional[str]]:
273
"""
274
Detect and return provider information for a model.
275
276
Args:
277
model (str): Model identifier
278
custom_llm_provider (Optional[str]): Provider override
279
api_base (Optional[str]): Custom API base
280
281
Returns:
282
Tuple[str, str, str, Optional[str]]: (model, custom_llm_provider, dynamic_api_key, api_base)
283
284
Examples:
285
# Auto-detect provider
286
model, provider, api_key, api_base = get_llm_provider("gpt-4")
287
print(f"Provider: {provider}")
288
289
# Check Azure OpenAI
290
model, provider, api_key, api_base = get_llm_provider("azure/gpt-4")
291
"""
292
```
293
294
### Capability Detection
295
296
Check model capabilities and feature support across different providers.
297
298
```python { .api }
299
def supports_function_calling(
300
model: str,
301
custom_llm_provider: Optional[str] = None
302
) -> bool:
303
"""
304
Check if model supports function calling.
305
306
Args:
307
model (str): Model identifier
308
custom_llm_provider (Optional[str]): Provider override
309
310
Returns:
311
bool: True if function calling is supported
312
"""
313
314
def supports_parallel_function_calling(
315
model: str,
316
custom_llm_provider: Optional[str] = None
317
) -> bool:
318
"""
319
Check if model supports parallel function calling.
320
321
Args:
322
model (str): Model identifier
323
custom_llm_provider (Optional[str]): Provider override
324
325
Returns:
326
bool: True if parallel function calling is supported
327
"""
328
329
def supports_vision(
330
model: str,
331
custom_llm_provider: Optional[str] = None
332
) -> bool:
333
"""
334
Check if model supports vision/image inputs.
335
336
Args:
337
model (str): Model identifier
338
custom_llm_provider (Optional[str]): Provider override
339
340
Returns:
341
bool: True if vision is supported
342
"""
343
344
def supports_response_schema(
345
model: str,
346
custom_llm_provider: Optional[str] = None
347
) -> bool:
348
"""
349
Check if model supports structured response schemas.
350
351
Args:
352
model (str): Model identifier
353
custom_llm_provider (Optional[str]): Provider override
354
355
Returns:
356
bool: True if response schema is supported
357
"""
358
359
def supports_system_messages(
360
model: str,
361
custom_llm_provider: Optional[str] = None
362
) -> bool:
363
"""
364
Check if model supports system messages.
365
366
Args:
367
model (str): Model identifier
368
custom_llm_provider (Optional[str]): Provider override
369
370
Returns:
371
bool: True if system messages are supported
372
"""
373
374
def supports_tool_choice(
375
model: str,
376
custom_llm_provider: Optional[str] = None
377
) -> bool:
378
"""
379
Check if model supports tool choice parameter.
380
381
Args:
382
model (str): Model identifier
383
custom_llm_provider (Optional[str]): Provider override
384
385
Returns:
386
bool: True if tool choice is supported
387
"""
388
389
def supports_audio_input(
390
model: str,
391
custom_llm_provider: Optional[str] = None
392
) -> bool:
393
"""
394
Check if model supports audio input.
395
396
Args:
397
model (str): Model identifier
398
custom_llm_provider (Optional[str]): Provider override
399
400
Returns:
401
bool: True if audio input is supported
402
"""
403
404
def supports_audio_output(
405
model: str,
406
custom_llm_provider: Optional[str] = None
407
) -> bool:
408
"""
409
Check if model supports audio output.
410
411
Args:
412
model (str): Model identifier
413
custom_llm_provider (Optional[str]): Provider override
414
415
Returns:
416
bool: True if audio output is supported
417
"""
418
419
def supports_reasoning(
420
model: str,
421
custom_llm_provider: Optional[str] = None
422
) -> bool:
423
"""
424
Check if model supports reasoning capabilities (like OpenAI o1).
425
426
Args:
427
model (str): Model identifier
428
custom_llm_provider (Optional[str]): Provider override
429
430
Returns:
431
bool: True if reasoning mode is supported
432
"""
433
434
def supports_prompt_caching(
435
model: str,
436
custom_llm_provider: Optional[str] = None
437
) -> bool:
438
"""
439
Check if model supports prompt caching.
440
441
Args:
442
model (str): Model identifier
443
custom_llm_provider (Optional[str]): Provider override
444
445
Returns:
446
bool: True if prompt caching is supported
447
"""
448
449
def supports_computer_use(
450
model: str,
451
custom_llm_provider: Optional[str] = None
452
) -> bool:
453
"""
454
Check if model supports computer use/interaction capabilities.
455
456
Args:
457
model (str): Model identifier
458
custom_llm_provider (Optional[str]): Provider override
459
460
Returns:
461
bool: True if computer use is supported
462
"""
463
```
464
465
### Environment Validation
466
467
Validate API keys, environment setup, and provider configurations.
468
469
```python { .api }
470
def validate_environment(
471
model: str,
472
api_key: Optional[str] = None
473
) -> Dict[str, str]:
474
"""
475
Validate environment configuration for a model.
476
477
Args:
478
model (str): Model to validate environment for
479
api_key (Optional[str]): API key to validate
480
481
Returns:
482
Dict[str, str]: Validation results with missing/invalid keys
483
484
Raises:
485
ValueError: Invalid model or missing required configuration
486
487
Examples:
488
# Validate OpenAI setup
489
validation = validate_environment("gpt-4")
490
if validation:
491
print("Missing configuration:", validation)
492
493
# Validate with specific API key
494
validation = validate_environment("gpt-4", "sk-test-key")
495
496
# Validate Azure setup
497
validation = validate_environment("azure/gpt-4")
498
"""
499
500
def check_valid_key(model: str, api_key: str) -> bool:
501
"""
502
Test if an API key is valid for a model.
503
504
Args:
505
model (str): Model identifier
506
api_key (str): API key to test
507
508
Returns:
509
bool: True if key is valid
510
511
Examples:
512
# Test OpenAI key
513
is_valid = check_valid_key("gpt-4", "sk-test-key")
514
515
# Test Anthropic key
516
is_valid = check_valid_key("claude-3-sonnet-20240229", "test-key")
517
"""
518
519
def get_optional_params(model: str) -> List[str]:
520
"""
521
Get list of optional parameters for a model.
522
523
Args:
524
model (str): Model identifier
525
526
Returns:
527
List[str]: List of optional parameter names
528
"""
529
530
def get_required_params(model: str) -> List[str]:
531
"""
532
Get list of required parameters for a model.
533
534
Args:
535
model (str): Model identifier
536
537
Returns:
538
List[str]: List of required parameter names
539
"""
540
```
541
542
### Batch Processing Utilities
543
544
Utilities for processing multiple requests efficiently.
545
546
```python { .api }
547
def batch_completion(
548
requests: List[Dict[str, Any]],
549
max_workers: int = 5,
550
timeout: float = 60.0
551
) -> List[Union[ModelResponse, Exception]]:
552
"""
553
Process multiple completion requests in parallel.
554
555
Args:
556
requests (List[Dict]): List of completion request parameters
557
max_workers (int): Maximum concurrent workers
558
timeout (float): Timeout per request
559
560
Returns:
561
List[Union[ModelResponse, Exception]]: Results or exceptions for each request
562
563
Examples:
564
requests = [
565
{"model": "gpt-4", "messages": [{"role": "user", "content": "Hello 1"}]},
566
{"model": "gpt-4", "messages": [{"role": "user", "content": "Hello 2"}]},
567
{"model": "gpt-4", "messages": [{"role": "user", "content": "Hello 3"}]}
568
]
569
570
results = batch_completion(requests, max_workers=3)
571
572
for i, result in enumerate(results):
573
if isinstance(result, Exception):
574
print(f"Request {i} failed: {result}")
575
else:
576
print(f"Request {i}: {result.choices[0].message.content}")
577
"""
578
579
async def abatch_completion(
580
requests: List[Dict[str, Any]],
581
max_concurrent: int = 5
582
) -> List[Union[ModelResponse, Exception]]:
583
"""
584
Async version of batch completion processing.
585
586
Args:
587
requests (List[Dict]): List of completion request parameters
588
max_concurrent (int): Maximum concurrent requests
589
590
Returns:
591
List[Union[ModelResponse, Exception]]: Results or exceptions for each request
592
"""
593
```
594
595
## Type Definitions
596
597
```python { .api }
598
class CostPerToken:
599
"""Cost configuration for custom models"""
600
input_cost_per_token: float
601
output_cost_per_token: float
602
litellm_provider: Optional[str] = None
603
mode: Optional[Literal["chat", "completion", "embedding"]] = None
604
605
class TokenizerConfig:
606
"""Custom tokenizer configuration"""
607
tokenizer_name: str
608
tokenizer_params: Dict[str, Any]
609
encoding_name: Optional[str] = None
610
611
class ModelCapabilities:
612
"""Model capability flags"""
613
supports_function_calling: bool = False
614
supports_parallel_function_calling: bool = False
615
supports_vision: bool = False
616
supports_response_schema: bool = False
617
supports_system_messages: bool = False
618
supports_tool_choice: bool = False
619
supports_audio_input: bool = False
620
supports_audio_output: bool = False
621
supports_reasoning: bool = False
622
supports_prompt_caching: bool = False
623
supports_computer_use: bool = False
624
max_tokens: Optional[int] = None
625
max_input_tokens: Optional[int] = None
626
max_output_tokens: Optional[int] = None
627
```
628
629
## Usage Examples
630
631
### Token Counting and Cost Estimation
632
633
```python
634
import litellm
635
636
# Count tokens for different input types
637
text_tokens = litellm.token_counter(model="gpt-4", text="Hello, world!")
638
print(f"Text tokens: {text_tokens}")
639
640
messages = [
641
{"role": "system", "content": "You are a helpful assistant."},
642
{"role": "user", "content": "What is machine learning?"}
643
]
644
message_tokens = litellm.token_counter(model="gpt-4", messages=messages)
645
print(f"Message tokens: {message_tokens}")
646
647
# Estimate total cost before making request
648
prompt_tokens = litellm.token_counter(model="gpt-4", messages=messages)
649
estimated_response_tokens = 200 # Estimate
650
estimated_cost = litellm.cost_per_token(
651
model="gpt-4",
652
prompt_tokens=prompt_tokens,
653
completion_tokens=estimated_response_tokens
654
)
655
print(f"Estimated cost: ${estimated_cost:.6f}")
656
657
# Make request and calculate actual cost
658
response = litellm.completion(model="gpt-4", messages=messages)
659
actual_cost = litellm.completion_cost(response)
660
print(f"Actual cost: ${actual_cost:.6f}")
661
```
662
663
### Model Capability Detection
664
665
```python
666
import litellm
667
668
def check_model_capabilities(model: str):
669
"""Check and display all capabilities for a model."""
670
671
capabilities = {
672
"Function Calling": litellm.supports_function_calling(model),
673
"Parallel Function Calling": litellm.supports_parallel_function_calling(model),
674
"Vision": litellm.supports_vision(model),
675
"Response Schema": litellm.supports_response_schema(model),
676
"System Messages": litellm.supports_system_messages(model),
677
"Tool Choice": litellm.supports_tool_choice(model),
678
"Audio Input": litellm.supports_audio_input(model),
679
"Audio Output": litellm.supports_audio_output(model),
680
"Reasoning": litellm.supports_reasoning(model),
681
"Prompt Caching": litellm.supports_prompt_caching(model),
682
"Computer Use": litellm.supports_computer_use(model)
683
}
684
685
print(f"Capabilities for {model}:")
686
for capability, supported in capabilities.items():
687
status = "✓" if supported else "✗"
688
print(f" {status} {capability}")
689
690
# Get detailed model info
691
model_info = litellm.get_model_info(model)
692
print(f"\nModel Info:")
693
print(f" Max tokens: {model_info.get('max_tokens', 'Unknown')}")
694
print(f" Provider: {model_info.get('litellm_provider', 'Unknown')}")
695
print(f" Input cost: ${model_info.get('input_cost_per_token', 0)}")
696
print(f" Output cost: ${model_info.get('output_cost_per_token', 0)}")
697
698
# Check capabilities for different models
699
models_to_check = [
700
"gpt-4",
701
"gpt-4-vision-preview",
702
"claude-3-sonnet-20240229",
703
"gemini-pro"
704
]
705
706
for model in models_to_check:
707
check_model_capabilities(model)
708
print("-" * 50)
709
```
710
711
### Environment Validation and Setup
712
713
```python
714
import litellm
715
import os
716
717
def setup_and_validate_providers():
718
"""Setup and validate multiple provider configurations."""
719
720
providers_to_check = [
721
("gpt-4", "OpenAI"),
722
("claude-3-sonnet-20240229", "Anthropic"),
723
("command-nightly", "Cohere"),
724
("gemini-pro", "Google"),
725
("bedrock/anthropic.claude-v2", "AWS Bedrock"),
726
("azure/gpt-4", "Azure OpenAI")
727
]
728
729
for model, provider_name in providers_to_check:
730
print(f"\nValidating {provider_name} ({model}):")
731
732
try:
733
# Validate environment
734
validation_result = litellm.validate_environment(model)
735
736
if not validation_result:
737
print(" ✓ Environment is properly configured")
738
739
# Test with a simple request if environment is valid
740
try:
741
response = litellm.completion(
742
model=model,
743
messages=[{"role": "user", "content": "Hello"}],
744
max_tokens=5
745
)
746
print(" ✓ API call successful")
747
748
# Calculate cost
749
cost = litellm.completion_cost(response)
750
print(f" ✓ Request cost: ${cost:.6f}")
751
752
except Exception as e:
753
print(f" ✗ API call failed: {e}")
754
else:
755
print(" ✗ Missing configuration:")
756
for key, message in validation_result.items():
757
print(f" - {key}: {message}")
758
759
except Exception as e:
760
print(f" ✗ Validation failed: {e}")
761
762
# Run validation
763
setup_and_validate_providers()
764
765
# Set up missing environment variables
766
def setup_missing_env_vars():
767
"""Interactively setup missing environment variables."""
768
769
env_vars = {
770
"OPENAI_API_KEY": "OpenAI API key",
771
"ANTHROPIC_API_KEY": "Anthropic API key",
772
"COHERE_API_KEY": "Cohere API key",
773
"GOOGLE_APPLICATION_CREDENTIALS": "Google credentials file path",
774
"AWS_ACCESS_KEY_ID": "AWS access key",
775
"AZURE_API_KEY": "Azure OpenAI API key"
776
}
777
778
for var_name, description in env_vars.items():
779
if not os.environ.get(var_name):
780
value = input(f"Enter {description} (or press Enter to skip): ").strip()
781
if value:
782
os.environ[var_name] = value
783
print(f"Set {var_name}")
784
785
# Uncomment to run interactive setup
786
# setup_missing_env_vars()
787
```
788
789
### Batch Processing
790
791
```python
792
import litellm
793
import asyncio
794
795
def process_batch_sync():
796
"""Process multiple requests synchronously with batch utility."""
797
798
requests = [
799
{
800
"model": "gpt-3.5-turbo",
801
"messages": [{"role": "user", "content": f"Count to {i}"}],
802
"max_tokens": 50
803
}
804
for i in range(1, 6)
805
]
806
807
print("Processing batch synchronously...")
808
results = litellm.batch_completion(requests, max_workers=3)
809
810
for i, result in enumerate(results):
811
if isinstance(result, Exception):
812
print(f"Request {i+1} failed: {result}")
813
else:
814
content = result.choices[0].message.content
815
cost = litellm.completion_cost(result)
816
print(f"Request {i+1}: {content} (${cost:.6f})")
817
818
async def process_batch_async():
819
"""Process multiple requests asynchronously."""
820
821
requests = [
822
{
823
"model": "gpt-3.5-turbo",
824
"messages": [{"role": "user", "content": f"What is {i} + {i}?"}],
825
"max_tokens": 20
826
}
827
for i in range(1, 11)
828
]
829
830
print("Processing batch asynchronously...")
831
results = await litellm.abatch_completion(requests, max_concurrent=5)
832
833
total_cost = 0
834
for i, result in enumerate(results):
835
if isinstance(result, Exception):
836
print(f"Request {i+1} failed: {result}")
837
else:
838
content = result.choices[0].message.content.strip()
839
cost = litellm.completion_cost(result)
840
total_cost += cost
841
print(f"Request {i+1}: {content} (${cost:.6f})")
842
843
print(f"Total batch cost: ${total_cost:.6f}")
844
845
# Run batch processing examples
846
process_batch_sync()
847
asyncio.run(process_batch_async())
848
```
849
850
### Advanced Cost Management
851
852
```python
853
import litellm
854
from typing import List, Dict, Any
855
856
class CostTracker:
857
"""Advanced cost tracking and budget management."""
858
859
def __init__(self, daily_budget: float = 10.0):
860
self.daily_budget = daily_budget
861
self.current_cost = 0.0
862
self.requests = []
863
864
def estimate_request_cost(self, model: str, messages: List[Dict], max_tokens: int = 256) -> float:
865
"""Estimate cost before making request."""
866
prompt_tokens = litellm.token_counter(model=model, messages=messages)
867
estimated_cost = litellm.cost_per_token(
868
model=model,
869
prompt_tokens=prompt_tokens,
870
completion_tokens=max_tokens
871
)
872
return estimated_cost
873
874
def can_afford_request(self, estimated_cost: float) -> bool:
875
"""Check if request fits within budget."""
876
return (self.current_cost + estimated_cost) <= self.daily_budget
877
878
def track_request(self, model: str, response: Any, estimated_cost: float):
879
"""Track completed request cost."""
880
actual_cost = litellm.completion_cost(response)
881
self.current_cost += actual_cost
882
883
self.requests.append({
884
"model": model,
885
"estimated_cost": estimated_cost,
886
"actual_cost": actual_cost,
887
"tokens_used": response.usage.total_tokens if hasattr(response, 'usage') else 0
888
})
889
890
print(f"Request: ${actual_cost:.6f} (est: ${estimated_cost:.6f})")
891
print(f"Budget: ${self.current_cost:.2f}/${self.daily_budget:.2f}")
892
893
def safe_completion(self, **kwargs):
894
"""Make completion with budget checking."""
895
model = kwargs.get("model")
896
messages = kwargs.get("messages")
897
max_tokens = kwargs.get("max_tokens", 256)
898
899
# Estimate cost
900
estimated_cost = self.estimate_request_cost(model, messages, max_tokens)
901
902
if not self.can_afford_request(estimated_cost):
903
raise litellm.BudgetExceededError(
904
f"Request would exceed budget: ${estimated_cost:.6f} "
905
f"(remaining: ${self.daily_budget - self.current_cost:.6f})"
906
)
907
908
# Make request
909
response = litellm.completion(**kwargs)
910
911
# Track cost
912
self.track_request(model, response, estimated_cost)
913
914
return response
915
916
def get_stats(self) -> Dict[str, Any]:
917
"""Get cost tracking statistics."""
918
if not self.requests:
919
return {"total_requests": 0, "total_cost": 0}
920
921
total_requests = len(self.requests)
922
total_tokens = sum(r["tokens_used"] for r in self.requests)
923
avg_cost_per_request = self.current_cost / total_requests
924
925
model_usage = {}
926
for request in self.requests:
927
model = request["model"]
928
if model not in model_usage:
929
model_usage[model] = {"requests": 0, "cost": 0, "tokens": 0}
930
model_usage[model]["requests"] += 1
931
model_usage[model]["cost"] += request["actual_cost"]
932
model_usage[model]["tokens"] += request["tokens_used"]
933
934
return {
935
"total_requests": total_requests,
936
"total_cost": self.current_cost,
937
"total_tokens": total_tokens,
938
"avg_cost_per_request": avg_cost_per_request,
939
"budget_used": (self.current_cost / self.daily_budget) * 100,
940
"model_usage": model_usage
941
}
942
943
# Usage example
944
tracker = CostTracker(daily_budget=5.00)
945
946
try:
947
# Make tracked requests
948
response1 = tracker.safe_completion(
949
model="gpt-3.5-turbo",
950
messages=[{"role": "user", "content": "What is AI?"}],
951
max_tokens=100
952
)
953
954
response2 = tracker.safe_completion(
955
model="gpt-4",
956
messages=[{"role": "user", "content": "Explain quantum computing"}],
957
max_tokens=200
958
)
959
960
# Get statistics
961
stats = tracker.get_stats()
962
print("\nCost Tracking Statistics:")
963
print(f"Total requests: {stats['total_requests']}")
964
print(f"Total cost: ${stats['total_cost']:.6f}")
965
print(f"Budget used: {stats['budget_used']:.1f}%")
966
print(f"Average cost per request: ${stats['avg_cost_per_request']:.6f}")
967
968
print("\nModel usage breakdown:")
969
for model, usage in stats['model_usage'].items():
970
print(f" {model}: {usage['requests']} requests, "
971
f"${usage['cost']:.6f}, {usage['tokens']} tokens")
972
973
except litellm.BudgetExceededError as e:
974
print(f"Budget exceeded: {e}")
975
```