or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

core-completion.mdexceptions.mdindex.mdother-apis.mdproviders.mdrouter.mdutilities.md

utilities.mddocs/

0

# Utilities & Helpers

1

2

Comprehensive utility functions for token counting, cost calculation, model information, capability detection, and validation. These tools enable advanced LLM management, optimization, and monitoring across all supported providers.

3

4

## Capabilities

5

6

### Token Counting

7

8

Accurate token counting for prompts, messages, and completions across different model tokenizers.

9

10

```python { .api }

11

def token_counter(

12

model: str = "",

13

text: Union[str, List[str]] = "",

14

messages: Optional[List[Dict[str, str]]] = None,

15

count_response_tokens: Optional[bool] = False,

16

custom_tokenizer: Optional[Dict] = None

17

) -> int:

18

"""

19

Count tokens for text or messages using model-specific tokenizers.

20

21

Args:

22

model (str): Model identifier for tokenizer selection

23

text (Union[str, List[str]]): Text string or list of strings to count

24

messages (Optional[List[Dict]]): Chat messages in OpenAI format

25

count_response_tokens (Optional[bool]): Include estimated response tokens

26

custom_tokenizer (Optional[Dict]): Custom tokenizer configuration

27

28

Returns:

29

int: Total token count

30

31

Raises:

32

ValueError: Invalid input parameters or unsupported model

33

ImportError: Required tokenizer package not installed

34

35

Examples:

36

# Count tokens in text

37

tokens = token_counter(model="gpt-4", text="Hello, world!")

38

39

# Count tokens in messages

40

messages = [

41

{"role": "system", "content": "You are helpful."},

42

{"role": "user", "content": "Hello!"}

43

]

44

tokens = token_counter(model="gpt-4", messages=messages)

45

46

# Include response token estimation

47

tokens = token_counter(

48

model="gpt-4",

49

messages=messages,

50

count_response_tokens=True

51

)

52

"""

53

54

def encode(

55

model: str,

56

text: str,

57

custom_tokenizer: Optional[Dict] = None

58

) -> List[int]:

59

"""

60

Encode text to tokens using model-specific tokenizer.

61

62

Args:

63

model (str): Model identifier

64

text (str): Text to encode

65

custom_tokenizer (Optional[Dict]): Custom tokenizer configuration

66

67

Returns:

68

List[int]: List of token IDs

69

"""

70

71

def decode(

72

model: str,

73

tokens: List[int],

74

custom_tokenizer: Optional[Dict] = None

75

) -> str:

76

"""

77

Decode tokens back to text using model-specific tokenizer.

78

79

Args:

80

model (str): Model identifier

81

tokens (List[int]): Token IDs to decode

82

custom_tokenizer (Optional[Dict]): Custom tokenizer configuration

83

84

Returns:

85

str: Decoded text string

86

"""

87

```

88

89

### Cost Calculation

90

91

Calculate costs for completions, embeddings, and other API operations across all providers.

92

93

```python { .api }

94

def completion_cost(

95

completion_response: Union[ModelResponse, EmbeddingResponse, ImageResponse, TranscriptionResponse],

96

model: Optional[str] = None,

97

prompt_tokens: Optional[int] = None,

98

completion_tokens: Optional[int] = None,

99

custom_cost_per_token: Optional[CostPerToken] = None

100

) -> float:

101

"""

102

Calculate cost for a completion response.

103

104

Args:

105

completion_response: Response object from LiteLLM API call

106

model (Optional[str]): Model identifier override

107

prompt_tokens (Optional[int]): Input token count override

108

completion_tokens (Optional[int]): Output token count override

109

custom_cost_per_token (Optional[CostPerToken]): Custom cost configuration

110

111

Returns:

112

float: Cost in USD

113

114

Raises:

115

ValueError: Missing required information for cost calculation

116

117

Examples:

118

# Calculate cost from response

119

response = litellm.completion(model="gpt-4", messages=messages)

120

cost = completion_cost(response)

121

122

# Calculate with custom token counts

123

cost = completion_cost(

124

response,

125

prompt_tokens=100,

126

completion_tokens=50

127

)

128

129

# Calculate with custom cost configuration

130

custom_costs = CostPerToken(

131

input_cost_per_token=0.00001,

132

output_cost_per_token=0.00003

133

)

134

cost = completion_cost(response, custom_cost_per_token=custom_costs)

135

"""

136

137

def cost_per_token(

138

model: str = "",

139

prompt_tokens: int = 0,

140

completion_tokens: int = 0,

141

custom_cost_per_token: Optional[CostPerToken] = None

142

) -> float:

143

"""

144

Calculate cost based on token counts and model pricing.

145

146

Args:

147

model (str): Model identifier

148

prompt_tokens (int): Input token count

149

completion_tokens (int): Output token count

150

custom_cost_per_token (Optional[CostPerToken]): Custom pricing

151

152

Returns:

153

float: Total cost in USD

154

155

Examples:

156

# Calculate cost for specific token counts

157

cost = cost_per_token(

158

model="gpt-4",

159

prompt_tokens=100,

160

completion_tokens=50

161

)

162

163

# Calculate with custom pricing

164

cost = cost_per_token(

165

model="custom-model",

166

prompt_tokens=100,

167

completion_tokens=50,

168

custom_cost_per_token=CostPerToken(

169

input_cost_per_token=0.00001,

170

output_cost_per_token=0.00002

171

)

172

)

173

"""

174

175

def get_max_budget() -> float:

176

"""

177

Get current maximum budget limit.

178

179

Returns:

180

float: Maximum budget in USD

181

"""

182

183

def set_max_budget(budget: float) -> None:

184

"""

185

Set maximum budget limit for API usage.

186

187

Args:

188

budget (float): Maximum budget in USD

189

"""

190

```

191

192

### Model Information

193

194

Retrieve detailed information about models including capabilities, pricing, and technical specifications.

195

196

```python { .api }

197

def get_model_info(

198

model: str,

199

custom_llm_provider: Optional[str] = None,

200

api_base: Optional[str] = None

201

) -> Dict[str, Any]:

202

"""

203

Get comprehensive information about a model.

204

205

Args:

206

model (str): Model identifier

207

custom_llm_provider (Optional[str]): Provider override

208

api_base (Optional[str]): Custom API base URL

209

210

Returns:

211

Dict[str, Any]: Model information including:

212

- max_tokens: Maximum context window

213

- max_input_tokens: Maximum input tokens

214

- max_output_tokens: Maximum output tokens

215

- input_cost_per_token: Input cost per token

216

- output_cost_per_token: Output cost per token

217

- litellm_provider: Provider name

218

- mode: Model mode (chat, completion, embedding)

219

- supports_function_calling: Function calling support

220

- supports_parallel_function_calling: Parallel function calling

221

- supports_vision: Vision capability support

222

- supports_tool_choice: Tool choice support

223

224

Examples:

225

# Get OpenAI model info

226

info = get_model_info("gpt-4")

227

print(f"Max tokens: {info['max_tokens']}")

228

print(f"Input cost: ${info['input_cost_per_token']}")

229

230

# Get provider-specific model info

231

info = get_model_info("claude-3-sonnet-20240229", "anthropic")

232

233

# Get custom model info

234

info = get_model_info("custom/model", api_base="https://api.example.com")

235

"""

236

237

def get_model_list() -> List[str]:

238

"""

239

Get list of all supported models across all providers.

240

241

Returns:

242

List[str]: List of model identifiers

243

"""

244

245

def get_supported_openai_params(

246

model: str,

247

custom_llm_provider: str

248

) -> List[str]:

249

"""

250

Get list of OpenAI parameters supported by a provider/model.

251

252

Args:

253

model (str): Model identifier

254

custom_llm_provider (str): Provider name

255

256

Returns:

257

List[str]: List of supported parameter names

258

259

Examples:

260

# Check what parameters Anthropic supports

261

params = get_supported_openai_params("claude-3-sonnet-20240229", "anthropic")

262

print("Supported params:", params)

263

264

# Check Cohere parameter support

265

params = get_supported_openai_params("command-nightly", "cohere")

266

"""

267

268

def get_llm_provider(

269

model: str,

270

custom_llm_provider: Optional[str] = None,

271

api_base: Optional[str] = None

272

) -> Tuple[str, str, str, Optional[str]]:

273

"""

274

Detect and return provider information for a model.

275

276

Args:

277

model (str): Model identifier

278

custom_llm_provider (Optional[str]): Provider override

279

api_base (Optional[str]): Custom API base

280

281

Returns:

282

Tuple[str, str, str, Optional[str]]: (model, custom_llm_provider, dynamic_api_key, api_base)

283

284

Examples:

285

# Auto-detect provider

286

model, provider, api_key, api_base = get_llm_provider("gpt-4")

287

print(f"Provider: {provider}")

288

289

# Check Azure OpenAI

290

model, provider, api_key, api_base = get_llm_provider("azure/gpt-4")

291

"""

292

```

293

294

### Capability Detection

295

296

Check model capabilities and feature support across different providers.

297

298

```python { .api }

299

def supports_function_calling(

300

model: str,

301

custom_llm_provider: Optional[str] = None

302

) -> bool:

303

"""

304

Check if model supports function calling.

305

306

Args:

307

model (str): Model identifier

308

custom_llm_provider (Optional[str]): Provider override

309

310

Returns:

311

bool: True if function calling is supported

312

"""

313

314

def supports_parallel_function_calling(

315

model: str,

316

custom_llm_provider: Optional[str] = None

317

) -> bool:

318

"""

319

Check if model supports parallel function calling.

320

321

Args:

322

model (str): Model identifier

323

custom_llm_provider (Optional[str]): Provider override

324

325

Returns:

326

bool: True if parallel function calling is supported

327

"""

328

329

def supports_vision(

330

model: str,

331

custom_llm_provider: Optional[str] = None

332

) -> bool:

333

"""

334

Check if model supports vision/image inputs.

335

336

Args:

337

model (str): Model identifier

338

custom_llm_provider (Optional[str]): Provider override

339

340

Returns:

341

bool: True if vision is supported

342

"""

343

344

def supports_response_schema(

345

model: str,

346

custom_llm_provider: Optional[str] = None

347

) -> bool:

348

"""

349

Check if model supports structured response schemas.

350

351

Args:

352

model (str): Model identifier

353

custom_llm_provider (Optional[str]): Provider override

354

355

Returns:

356

bool: True if response schema is supported

357

"""

358

359

def supports_system_messages(

360

model: str,

361

custom_llm_provider: Optional[str] = None

362

) -> bool:

363

"""

364

Check if model supports system messages.

365

366

Args:

367

model (str): Model identifier

368

custom_llm_provider (Optional[str]): Provider override

369

370

Returns:

371

bool: True if system messages are supported

372

"""

373

374

def supports_tool_choice(

375

model: str,

376

custom_llm_provider: Optional[str] = None

377

) -> bool:

378

"""

379

Check if model supports tool choice parameter.

380

381

Args:

382

model (str): Model identifier

383

custom_llm_provider (Optional[str]): Provider override

384

385

Returns:

386

bool: True if tool choice is supported

387

"""

388

389

def supports_audio_input(

390

model: str,

391

custom_llm_provider: Optional[str] = None

392

) -> bool:

393

"""

394

Check if model supports audio input.

395

396

Args:

397

model (str): Model identifier

398

custom_llm_provider (Optional[str]): Provider override

399

400

Returns:

401

bool: True if audio input is supported

402

"""

403

404

def supports_audio_output(

405

model: str,

406

custom_llm_provider: Optional[str] = None

407

) -> bool:

408

"""

409

Check if model supports audio output.

410

411

Args:

412

model (str): Model identifier

413

custom_llm_provider (Optional[str]): Provider override

414

415

Returns:

416

bool: True if audio output is supported

417

"""

418

419

def supports_reasoning(

420

model: str,

421

custom_llm_provider: Optional[str] = None

422

) -> bool:

423

"""

424

Check if model supports reasoning capabilities (like OpenAI o1).

425

426

Args:

427

model (str): Model identifier

428

custom_llm_provider (Optional[str]): Provider override

429

430

Returns:

431

bool: True if reasoning mode is supported

432

"""

433

434

def supports_prompt_caching(

435

model: str,

436

custom_llm_provider: Optional[str] = None

437

) -> bool:

438

"""

439

Check if model supports prompt caching.

440

441

Args:

442

model (str): Model identifier

443

custom_llm_provider (Optional[str]): Provider override

444

445

Returns:

446

bool: True if prompt caching is supported

447

"""

448

449

def supports_computer_use(

450

model: str,

451

custom_llm_provider: Optional[str] = None

452

) -> bool:

453

"""

454

Check if model supports computer use/interaction capabilities.

455

456

Args:

457

model (str): Model identifier

458

custom_llm_provider (Optional[str]): Provider override

459

460

Returns:

461

bool: True if computer use is supported

462

"""

463

```

464

465

### Environment Validation

466

467

Validate API keys, environment setup, and provider configurations.

468

469

```python { .api }

470

def validate_environment(

471

model: str,

472

api_key: Optional[str] = None

473

) -> Dict[str, str]:

474

"""

475

Validate environment configuration for a model.

476

477

Args:

478

model (str): Model to validate environment for

479

api_key (Optional[str]): API key to validate

480

481

Returns:

482

Dict[str, str]: Validation results with missing/invalid keys

483

484

Raises:

485

ValueError: Invalid model or missing required configuration

486

487

Examples:

488

# Validate OpenAI setup

489

validation = validate_environment("gpt-4")

490

if validation:

491

print("Missing configuration:", validation)

492

493

# Validate with specific API key

494

validation = validate_environment("gpt-4", "sk-test-key")

495

496

# Validate Azure setup

497

validation = validate_environment("azure/gpt-4")

498

"""

499

500

def check_valid_key(model: str, api_key: str) -> bool:

501

"""

502

Test if an API key is valid for a model.

503

504

Args:

505

model (str): Model identifier

506

api_key (str): API key to test

507

508

Returns:

509

bool: True if key is valid

510

511

Examples:

512

# Test OpenAI key

513

is_valid = check_valid_key("gpt-4", "sk-test-key")

514

515

# Test Anthropic key

516

is_valid = check_valid_key("claude-3-sonnet-20240229", "test-key")

517

"""

518

519

def get_optional_params(model: str) -> List[str]:

520

"""

521

Get list of optional parameters for a model.

522

523

Args:

524

model (str): Model identifier

525

526

Returns:

527

List[str]: List of optional parameter names

528

"""

529

530

def get_required_params(model: str) -> List[str]:

531

"""

532

Get list of required parameters for a model.

533

534

Args:

535

model (str): Model identifier

536

537

Returns:

538

List[str]: List of required parameter names

539

"""

540

```

541

542

### Batch Processing Utilities

543

544

Utilities for processing multiple requests efficiently.

545

546

```python { .api }

547

def batch_completion(

548

requests: List[Dict[str, Any]],

549

max_workers: int = 5,

550

timeout: float = 60.0

551

) -> List[Union[ModelResponse, Exception]]:

552

"""

553

Process multiple completion requests in parallel.

554

555

Args:

556

requests (List[Dict]): List of completion request parameters

557

max_workers (int): Maximum concurrent workers

558

timeout (float): Timeout per request

559

560

Returns:

561

List[Union[ModelResponse, Exception]]: Results or exceptions for each request

562

563

Examples:

564

requests = [

565

{"model": "gpt-4", "messages": [{"role": "user", "content": "Hello 1"}]},

566

{"model": "gpt-4", "messages": [{"role": "user", "content": "Hello 2"}]},

567

{"model": "gpt-4", "messages": [{"role": "user", "content": "Hello 3"}]}

568

]

569

570

results = batch_completion(requests, max_workers=3)

571

572

for i, result in enumerate(results):

573

if isinstance(result, Exception):

574

print(f"Request {i} failed: {result}")

575

else:

576

print(f"Request {i}: {result.choices[0].message.content}")

577

"""

578

579

async def abatch_completion(

580

requests: List[Dict[str, Any]],

581

max_concurrent: int = 5

582

) -> List[Union[ModelResponse, Exception]]:

583

"""

584

Async version of batch completion processing.

585

586

Args:

587

requests (List[Dict]): List of completion request parameters

588

max_concurrent (int): Maximum concurrent requests

589

590

Returns:

591

List[Union[ModelResponse, Exception]]: Results or exceptions for each request

592

"""

593

```

594

595

## Type Definitions

596

597

```python { .api }

598

class CostPerToken:

599

"""Cost configuration for custom models"""

600

input_cost_per_token: float

601

output_cost_per_token: float

602

litellm_provider: Optional[str] = None

603

mode: Optional[Literal["chat", "completion", "embedding"]] = None

604

605

class TokenizerConfig:

606

"""Custom tokenizer configuration"""

607

tokenizer_name: str

608

tokenizer_params: Dict[str, Any]

609

encoding_name: Optional[str] = None

610

611

class ModelCapabilities:

612

"""Model capability flags"""

613

supports_function_calling: bool = False

614

supports_parallel_function_calling: bool = False

615

supports_vision: bool = False

616

supports_response_schema: bool = False

617

supports_system_messages: bool = False

618

supports_tool_choice: bool = False

619

supports_audio_input: bool = False

620

supports_audio_output: bool = False

621

supports_reasoning: bool = False

622

supports_prompt_caching: bool = False

623

supports_computer_use: bool = False

624

max_tokens: Optional[int] = None

625

max_input_tokens: Optional[int] = None

626

max_output_tokens: Optional[int] = None

627

```

628

629

## Usage Examples

630

631

### Token Counting and Cost Estimation

632

633

```python

634

import litellm

635

636

# Count tokens for different input types

637

text_tokens = litellm.token_counter(model="gpt-4", text="Hello, world!")

638

print(f"Text tokens: {text_tokens}")

639

640

messages = [

641

{"role": "system", "content": "You are a helpful assistant."},

642

{"role": "user", "content": "What is machine learning?"}

643

]

644

message_tokens = litellm.token_counter(model="gpt-4", messages=messages)

645

print(f"Message tokens: {message_tokens}")

646

647

# Estimate total cost before making request

648

prompt_tokens = litellm.token_counter(model="gpt-4", messages=messages)

649

estimated_response_tokens = 200 # Estimate

650

estimated_cost = litellm.cost_per_token(

651

model="gpt-4",

652

prompt_tokens=prompt_tokens,

653

completion_tokens=estimated_response_tokens

654

)

655

print(f"Estimated cost: ${estimated_cost:.6f}")

656

657

# Make request and calculate actual cost

658

response = litellm.completion(model="gpt-4", messages=messages)

659

actual_cost = litellm.completion_cost(response)

660

print(f"Actual cost: ${actual_cost:.6f}")

661

```

662

663

### Model Capability Detection

664

665

```python

666

import litellm

667

668

def check_model_capabilities(model: str):

669

"""Check and display all capabilities for a model."""

670

671

capabilities = {

672

"Function Calling": litellm.supports_function_calling(model),

673

"Parallel Function Calling": litellm.supports_parallel_function_calling(model),

674

"Vision": litellm.supports_vision(model),

675

"Response Schema": litellm.supports_response_schema(model),

676

"System Messages": litellm.supports_system_messages(model),

677

"Tool Choice": litellm.supports_tool_choice(model),

678

"Audio Input": litellm.supports_audio_input(model),

679

"Audio Output": litellm.supports_audio_output(model),

680

"Reasoning": litellm.supports_reasoning(model),

681

"Prompt Caching": litellm.supports_prompt_caching(model),

682

"Computer Use": litellm.supports_computer_use(model)

683

}

684

685

print(f"Capabilities for {model}:")

686

for capability, supported in capabilities.items():

687

status = "✓" if supported else "✗"

688

print(f" {status} {capability}")

689

690

# Get detailed model info

691

model_info = litellm.get_model_info(model)

692

print(f"\nModel Info:")

693

print(f" Max tokens: {model_info.get('max_tokens', 'Unknown')}")

694

print(f" Provider: {model_info.get('litellm_provider', 'Unknown')}")

695

print(f" Input cost: ${model_info.get('input_cost_per_token', 0)}")

696

print(f" Output cost: ${model_info.get('output_cost_per_token', 0)}")

697

698

# Check capabilities for different models

699

models_to_check = [

700

"gpt-4",

701

"gpt-4-vision-preview",

702

"claude-3-sonnet-20240229",

703

"gemini-pro"

704

]

705

706

for model in models_to_check:

707

check_model_capabilities(model)

708

print("-" * 50)

709

```

710

711

### Environment Validation and Setup

712

713

```python

714

import litellm

715

import os

716

717

def setup_and_validate_providers():

718

"""Setup and validate multiple provider configurations."""

719

720

providers_to_check = [

721

("gpt-4", "OpenAI"),

722

("claude-3-sonnet-20240229", "Anthropic"),

723

("command-nightly", "Cohere"),

724

("gemini-pro", "Google"),

725

("bedrock/anthropic.claude-v2", "AWS Bedrock"),

726

("azure/gpt-4", "Azure OpenAI")

727

]

728

729

for model, provider_name in providers_to_check:

730

print(f"\nValidating {provider_name} ({model}):")

731

732

try:

733

# Validate environment

734

validation_result = litellm.validate_environment(model)

735

736

if not validation_result:

737

print(" ✓ Environment is properly configured")

738

739

# Test with a simple request if environment is valid

740

try:

741

response = litellm.completion(

742

model=model,

743

messages=[{"role": "user", "content": "Hello"}],

744

max_tokens=5

745

)

746

print(" ✓ API call successful")

747

748

# Calculate cost

749

cost = litellm.completion_cost(response)

750

print(f" ✓ Request cost: ${cost:.6f}")

751

752

except Exception as e:

753

print(f" ✗ API call failed: {e}")

754

else:

755

print(" ✗ Missing configuration:")

756

for key, message in validation_result.items():

757

print(f" - {key}: {message}")

758

759

except Exception as e:

760

print(f" ✗ Validation failed: {e}")

761

762

# Run validation

763

setup_and_validate_providers()

764

765

# Set up missing environment variables

766

def setup_missing_env_vars():

767

"""Interactively setup missing environment variables."""

768

769

env_vars = {

770

"OPENAI_API_KEY": "OpenAI API key",

771

"ANTHROPIC_API_KEY": "Anthropic API key",

772

"COHERE_API_KEY": "Cohere API key",

773

"GOOGLE_APPLICATION_CREDENTIALS": "Google credentials file path",

774

"AWS_ACCESS_KEY_ID": "AWS access key",

775

"AZURE_API_KEY": "Azure OpenAI API key"

776

}

777

778

for var_name, description in env_vars.items():

779

if not os.environ.get(var_name):

780

value = input(f"Enter {description} (or press Enter to skip): ").strip()

781

if value:

782

os.environ[var_name] = value

783

print(f"Set {var_name}")

784

785

# Uncomment to run interactive setup

786

# setup_missing_env_vars()

787

```

788

789

### Batch Processing

790

791

```python

792

import litellm

793

import asyncio

794

795

def process_batch_sync():

796

"""Process multiple requests synchronously with batch utility."""

797

798

requests = [

799

{

800

"model": "gpt-3.5-turbo",

801

"messages": [{"role": "user", "content": f"Count to {i}"}],

802

"max_tokens": 50

803

}

804

for i in range(1, 6)

805

]

806

807

print("Processing batch synchronously...")

808

results = litellm.batch_completion(requests, max_workers=3)

809

810

for i, result in enumerate(results):

811

if isinstance(result, Exception):

812

print(f"Request {i+1} failed: {result}")

813

else:

814

content = result.choices[0].message.content

815

cost = litellm.completion_cost(result)

816

print(f"Request {i+1}: {content} (${cost:.6f})")

817

818

async def process_batch_async():

819

"""Process multiple requests asynchronously."""

820

821

requests = [

822

{

823

"model": "gpt-3.5-turbo",

824

"messages": [{"role": "user", "content": f"What is {i} + {i}?"}],

825

"max_tokens": 20

826

}

827

for i in range(1, 11)

828

]

829

830

print("Processing batch asynchronously...")

831

results = await litellm.abatch_completion(requests, max_concurrent=5)

832

833

total_cost = 0

834

for i, result in enumerate(results):

835

if isinstance(result, Exception):

836

print(f"Request {i+1} failed: {result}")

837

else:

838

content = result.choices[0].message.content.strip()

839

cost = litellm.completion_cost(result)

840

total_cost += cost

841

print(f"Request {i+1}: {content} (${cost:.6f})")

842

843

print(f"Total batch cost: ${total_cost:.6f}")

844

845

# Run batch processing examples

846

process_batch_sync()

847

asyncio.run(process_batch_async())

848

```

849

850

### Advanced Cost Management

851

852

```python

853

import litellm

854

from typing import List, Dict, Any

855

856

class CostTracker:

857

"""Advanced cost tracking and budget management."""

858

859

def __init__(self, daily_budget: float = 10.0):

860

self.daily_budget = daily_budget

861

self.current_cost = 0.0

862

self.requests = []

863

864

def estimate_request_cost(self, model: str, messages: List[Dict], max_tokens: int = 256) -> float:

865

"""Estimate cost before making request."""

866

prompt_tokens = litellm.token_counter(model=model, messages=messages)

867

estimated_cost = litellm.cost_per_token(

868

model=model,

869

prompt_tokens=prompt_tokens,

870

completion_tokens=max_tokens

871

)

872

return estimated_cost

873

874

def can_afford_request(self, estimated_cost: float) -> bool:

875

"""Check if request fits within budget."""

876

return (self.current_cost + estimated_cost) <= self.daily_budget

877

878

def track_request(self, model: str, response: Any, estimated_cost: float):

879

"""Track completed request cost."""

880

actual_cost = litellm.completion_cost(response)

881

self.current_cost += actual_cost

882

883

self.requests.append({

884

"model": model,

885

"estimated_cost": estimated_cost,

886

"actual_cost": actual_cost,

887

"tokens_used": response.usage.total_tokens if hasattr(response, 'usage') else 0

888

})

889

890

print(f"Request: ${actual_cost:.6f} (est: ${estimated_cost:.6f})")

891

print(f"Budget: ${self.current_cost:.2f}/${self.daily_budget:.2f}")

892

893

def safe_completion(self, **kwargs):

894

"""Make completion with budget checking."""

895

model = kwargs.get("model")

896

messages = kwargs.get("messages")

897

max_tokens = kwargs.get("max_tokens", 256)

898

899

# Estimate cost

900

estimated_cost = self.estimate_request_cost(model, messages, max_tokens)

901

902

if not self.can_afford_request(estimated_cost):

903

raise litellm.BudgetExceededError(

904

f"Request would exceed budget: ${estimated_cost:.6f} "

905

f"(remaining: ${self.daily_budget - self.current_cost:.6f})"

906

)

907

908

# Make request

909

response = litellm.completion(**kwargs)

910

911

# Track cost

912

self.track_request(model, response, estimated_cost)

913

914

return response

915

916

def get_stats(self) -> Dict[str, Any]:

917

"""Get cost tracking statistics."""

918

if not self.requests:

919

return {"total_requests": 0, "total_cost": 0}

920

921

total_requests = len(self.requests)

922

total_tokens = sum(r["tokens_used"] for r in self.requests)

923

avg_cost_per_request = self.current_cost / total_requests

924

925

model_usage = {}

926

for request in self.requests:

927

model = request["model"]

928

if model not in model_usage:

929

model_usage[model] = {"requests": 0, "cost": 0, "tokens": 0}

930

model_usage[model]["requests"] += 1

931

model_usage[model]["cost"] += request["actual_cost"]

932

model_usage[model]["tokens"] += request["tokens_used"]

933

934

return {

935

"total_requests": total_requests,

936

"total_cost": self.current_cost,

937

"total_tokens": total_tokens,

938

"avg_cost_per_request": avg_cost_per_request,

939

"budget_used": (self.current_cost / self.daily_budget) * 100,

940

"model_usage": model_usage

941

}

942

943

# Usage example

944

tracker = CostTracker(daily_budget=5.00)

945

946

try:

947

# Make tracked requests

948

response1 = tracker.safe_completion(

949

model="gpt-3.5-turbo",

950

messages=[{"role": "user", "content": "What is AI?"}],

951

max_tokens=100

952

)

953

954

response2 = tracker.safe_completion(

955

model="gpt-4",

956

messages=[{"role": "user", "content": "Explain quantum computing"}],

957

max_tokens=200

958

)

959

960

# Get statistics

961

stats = tracker.get_stats()

962

print("\nCost Tracking Statistics:")

963

print(f"Total requests: {stats['total_requests']}")

964

print(f"Total cost: ${stats['total_cost']:.6f}")

965

print(f"Budget used: {stats['budget_used']:.1f}%")

966

print(f"Average cost per request: ${stats['avg_cost_per_request']:.6f}")

967

968

print("\nModel usage breakdown:")

969

for model, usage in stats['model_usage'].items():

970

print(f" {model}: {usage['requests']} requests, "

971

f"${usage['cost']:.6f}, {usage['tokens']} tokens")

972

973

except litellm.BudgetExceededError as e:

974

print(f"Budget exceeded: {e}")

975

```