or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

core-completion.mdexceptions.mdindex.mdother-apis.mdproviders.mdrouter.mdutilities.md

exceptions.mddocs/

0

# Exception Handling

1

2

Comprehensive exception hierarchy for robust error handling across all LLM providers. LiteLLM provides consistent error types with detailed context information, enabling reliable error handling and retry logic in production applications.

3

4

## Capabilities

5

6

### Base Exception Classes

7

8

LiteLLM exceptions inherit from OpenAI's exception hierarchy while adding provider-specific context and enhanced error information.

9

10

```python { .api }

11

class AuthenticationError(openai.AuthenticationError):

12

"""

13

Authentication failure with API key or credentials.

14

15

Attributes:

16

message (str): Error description

17

llm_provider (str): Provider that failed authentication

18

model (str): Model being accessed

19

response (Optional[httpx.Response]): HTTP response object

20

"""

21

22

class InvalidRequestError(openai.BadRequestError):

23

"""

24

Invalid request parameters or unsupported operations.

25

26

Common causes:

27

- Invalid model name

28

- Unsupported parameters for provider

29

- Malformed request data

30

31

Attributes:

32

message (str): Error description

33

model (str): Model that caused the error

34

llm_provider (str): Provider name

35

"""

36

37

class NotFoundError(openai.NotFoundError):

38

"""

39

Requested resource not found (model, deployment, etc.).

40

41

Attributes:

42

message (str): Error description

43

model (str): Model that was not found

44

llm_provider (str): Provider name

45

"""

46

47

class BadRequestError(openai.BadRequestError):

48

"""

49

Malformed request or invalid parameters.

50

51

Attributes:

52

message (str): Error description

53

model (str): Model being accessed

54

llm_provider (str): Provider name

55

"""

56

57

class RateLimitError(openai.RateLimitError):

58

"""

59

Rate limit exceeded for API usage.

60

61

Attributes:

62

message (str): Error description

63

model (str): Model being accessed

64

llm_provider (str): Provider name

65

retry_after (Optional[int]): Seconds to wait before retry

66

"""

67

68

class ServiceUnavailableError(openai.APIStatusError):

69

"""

70

Provider service temporarily unavailable.

71

72

Attributes:

73

message (str): Error description

74

model (str): Model being accessed

75

llm_provider (str): Provider name

76

status_code (int): HTTP status code

77

"""

78

79

class InternalServerError(openai.InternalServerError):

80

"""

81

Provider internal server error (5xx status codes).

82

83

Attributes:

84

message (str): Error description

85

model (str): Model being accessed

86

llm_provider (str): Provider name

87

status_code (int): HTTP status code

88

"""

89

90

class Timeout(openai.APITimeoutError):

91

"""

92

Request timeout exceeded.

93

94

Attributes:

95

message (str): Error description

96

model (str): Model being accessed

97

llm_provider (str): Provider name

98

"""

99

100

class APIError(openai.APIError):

101

"""

102

Generic API error for unexpected failures.

103

104

Attributes:

105

message (str): Error description

106

model (str): Model being accessed

107

llm_provider (str): Provider name

108

status_code (Optional[int]): HTTP status code if available

109

"""

110

111

class APIConnectionError(openai.APIConnectionError):

112

"""

113

Connection failure to provider API.

114

115

Attributes:

116

message (str): Error description

117

model (str): Model being accessed

118

llm_provider (str): Provider name

119

"""

120

121

class APIResponseValidationError(openai.APIResponseValidationError):

122

"""

123

Response validation failure or unexpected format.

124

125

Attributes:

126

message (str): Error description

127

model (str): Model being accessed

128

llm_provider (str): Provider name

129

"""

130

131

class OpenAIError(openai.OpenAIError):

132

"""

133

Base class for all LiteLLM/OpenAI exceptions.

134

135

Attributes:

136

message (str): Error description

137

"""

138

```

139

140

### LiteLLM-Specific Exception Classes

141

142

Advanced exception types for LiteLLM-specific functionality and enhanced error handling.

143

144

```python { .api }

145

class ContextWindowExceededError(BadRequestError):

146

"""

147

Input exceeds model's maximum context window.

148

149

Raised when the input tokens exceed the model's context limit.

150

Often triggers automatic fallback to models with larger context windows.

151

152

Attributes:

153

message (str): Error description with token counts

154

model (str): Model with insufficient context

155

llm_provider (str): Provider name

156

max_tokens (Optional[int]): Model's maximum context size

157

current_tokens (Optional[int]): Actual input tokens

158

"""

159

160

class ContentPolicyViolationError(BadRequestError):

161

"""

162

Content violates provider's usage policies.

163

164

Raised when input or generated content violates safety policies.

165

May trigger fallback to alternative providers with different policies.

166

167

Attributes:

168

message (str): Error description

169

model (str): Model that flagged content

170

llm_provider (str): Provider name

171

violation_type (Optional[str]): Type of policy violation

172

"""

173

174

class BudgetExceededError(Exception):

175

"""

176

Usage exceeds configured budget limits.

177

178

Raised when cumulative costs exceed budget thresholds set in LiteLLM configuration.

179

180

Attributes:

181

message (str): Error description with budget information

182

current_cost (float): Current accumulated cost

183

max_budget (float): Maximum allowed budget

184

"""

185

186

class UnsupportedParamsError(BadRequestError):

187

"""

188

Parameters not supported by the target provider.

189

190

Raised when using OpenAI parameters that aren't supported by other providers.

191

Can be handled with drop_params=True or modify_params=True.

192

193

Attributes:

194

message (str): Error description

195

model (str): Target model

196

llm_provider (str): Provider name

197

unsupported_params (List[str]): List of unsupported parameter names

198

"""

199

200

class JSONSchemaValidationError(APIResponseValidationError):

201

"""

202

Response doesn't match expected JSON schema.

203

204

Raised when response_format with JSON schema is specified but response

205

doesn't conform to the schema.

206

207

Attributes:

208

message (str): Validation error details

209

model (str): Model that produced invalid response

210

schema_errors (List[str]): List of validation errors

211

"""

212

213

class UnprocessableEntityError(openai.UnprocessableEntityError):

214

"""

215

Request was well-formed but couldn't be processed.

216

217

HTTP 422 error for semantically invalid requests.

218

219

Attributes:

220

message (str): Error description

221

model (str): Model being accessed

222

llm_provider (str): Provider name

223

"""

224

225

class LiteLLMUnknownProvider(BadRequestError):

226

"""

227

Provider not recognized by LiteLLM.

228

229

Raised when specifying an unknown custom_llm_provider or model format.

230

231

Attributes:

232

message (str): Error description

233

provider (str): Unknown provider name

234

available_providers (List[str]): List of supported providers

235

"""

236

237

class GuardrailRaisedException(Exception):

238

"""

239

Custom guardrail check failed.

240

241

Raised by user-defined guardrail functions that reject requests or responses.

242

243

Attributes:

244

message (str): Guardrail failure reason

245

guardrail_name (str): Name of failed guardrail

246

"""

247

248

class BlockedPiiEntityError(Exception):

249

"""

250

PII (Personally Identifiable Information) detected and blocked.

251

252

Raised when PII detection guardrails identify and block sensitive information.

253

254

Attributes:

255

message (str): Error description

256

detected_entities (List[str]): Types of PII detected

257

"""

258

259

class MockException(openai.APIError):

260

"""

261

Exception for testing and mocking purposes.

262

263

Used in test environments to simulate various error conditions.

264

265

Attributes:

266

message (str): Mock error message

267

status_code (int): Simulated HTTP status code

268

"""

269

270

class ImageFetchError(BadRequestError):

271

"""

272

Error fetching image for vision models.

273

274

Raised when image URLs are inaccessible or in unsupported formats.

275

276

Attributes:

277

message (str): Error description

278

image_url (str): URL that failed to fetch

279

status_code (Optional[int]): HTTP status from image fetch

280

"""

281

282

class RejectedRequestError(BadRequestError):

283

"""

284

Request was rejected by provider-specific filtering.

285

286

Raised when providers reject requests based on internal policies

287

beyond standard content policy violations.

288

289

Attributes:

290

message (str): Rejection reason

291

model (str): Model that rejected request

292

llm_provider (str): Provider name

293

"""

294

295

class MidStreamFallbackError(ServiceUnavailableError):

296

"""

297

Error during streaming that requires fallback.

298

299

Raised when streaming responses fail mid-stream and require

300

switching to a fallback provider.

301

302

Attributes:

303

message (str): Error description

304

original_model (str): Model that failed during streaming

305

fallback_model (Optional[str]): Fallback model to use

306

"""

307

```

308

309

### Exception Attributes

310

311

```python { .api }

312

class LiteLLMExceptionAttributes:

313

"""Common attributes available on LiteLLM exceptions"""

314

315

# Core identification

316

message: str # Human-readable error description

317

model: Optional[str] # Model that caused the error

318

llm_provider: Optional[str] # Provider name (openai, anthropic, etc.)

319

320

# HTTP context

321

status_code: Optional[int] # HTTP status code from provider

322

response: Optional[httpx.Response] # Full HTTP response object

323

request: Optional[httpx.Request] # Original HTTP request

324

325

# Retry and timing

326

retry_after: Optional[int] # Seconds to wait before retry (rate limits)

327

response_ms: Optional[float] # Response time in milliseconds

328

329

# Provider-specific context

330

provider_original_error: Optional[str] # Original error from provider

331

provider_error_code: Optional[str] # Provider-specific error code

332

333

# Token and cost context

334

prompt_tokens: Optional[int] # Input tokens when error occurred

335

completion_tokens: Optional[int] # Output tokens when error occurred

336

total_tokens: Optional[int] # Total tokens when error occurred

337

338

# Request context

339

litellm_params: Optional[Dict] # LiteLLM parameters used

340

original_response: Optional[Dict] # Raw response from provider

341

```

342

343

## Usage Examples

344

345

### Basic Exception Handling

346

347

```python

348

import litellm

349

from litellm import (

350

AuthenticationError, RateLimitError, ContextWindowExceededError,

351

ContentPolicyViolationError, BudgetExceededError

352

)

353

354

def safe_completion(model, messages, **kwargs):

355

try:

356

response = litellm.completion(

357

model=model,

358

messages=messages,

359

**kwargs

360

)

361

return response

362

363

except AuthenticationError as e:

364

print(f"Authentication failed for {e.llm_provider}: {e.message}")

365

# Handle API key issues

366

return None

367

368

except RateLimitError as e:

369

print(f"Rate limit hit for {e.model}: {e.message}")

370

if e.retry_after:

371

print(f"Retry after {e.retry_after} seconds")

372

# Implement backoff or queue request

373

return None

374

375

except ContextWindowExceededError as e:

376

print(f"Context window exceeded: {e.current_tokens}/{e.max_tokens} tokens")

377

# Try with shorter input or different model

378

return None

379

380

except ContentPolicyViolationError as e:

381

print(f"Content policy violation: {e.message}")

382

# Handle content filtering

383

return None

384

385

except BudgetExceededError as e:

386

print(f"Budget exceeded: ${e.current_cost:.2f}/${e.max_budget:.2f}")

387

# Handle budget management

388

return None

389

390

except Exception as e:

391

print(f"Unexpected error: {type(e).__name__}: {e}")

392

return None

393

394

# Usage

395

response = safe_completion(

396

model="gpt-4",

397

messages=[{"role": "user", "content": "Hello!"}]

398

)

399

```

400

401

### Advanced Error Handling with Retry Logic

402

403

```python

404

import time

405

import random

406

from typing import Optional

407

408

def completion_with_retry(

409

model: str,

410

messages: list,

411

max_retries: int = 3,

412

base_delay: float = 1.0,

413

max_delay: float = 60.0,

414

**kwargs

415

) -> Optional[litellm.ModelResponse]:

416

"""

417

Completion with exponential backoff retry logic.

418

"""

419

420

for attempt in range(max_retries + 1):

421

try:

422

response = litellm.completion(

423

model=model,

424

messages=messages,

425

**kwargs

426

)

427

return response

428

429

except RateLimitError as e:

430

if attempt == max_retries:

431

raise e

432

433

# Use retry_after if provided, otherwise exponential backoff

434

if e.retry_after:

435

delay = min(e.retry_after, max_delay)

436

else:

437

delay = min(base_delay * (2 ** attempt), max_delay)

438

439

# Add jitter to prevent thundering herd

440

jitter = random.uniform(0, 0.1 * delay)

441

total_delay = delay + jitter

442

443

print(f"Rate limited, retrying in {total_delay:.1f}s (attempt {attempt + 1})")

444

time.sleep(total_delay)

445

446

except (ServiceUnavailableError, InternalServerError, APIConnectionError) as e:

447

if attempt == max_retries:

448

raise e

449

450

delay = min(base_delay * (2 ** attempt), max_delay)

451

jitter = random.uniform(0, 0.1 * delay)

452

total_delay = delay + jitter

453

454

print(f"Service error, retrying in {total_delay:.1f}s: {e}")

455

time.sleep(total_delay)

456

457

except (AuthenticationError, InvalidRequestError, NotFoundError) as e:

458

# Don't retry these errors

459

print(f"Non-retryable error: {e}")

460

raise e

461

462

return None

463

464

# Usage

465

try:

466

response = completion_with_retry(

467

model="gpt-4",

468

messages=[{"role": "user", "content": "Hello!"}],

469

max_retries=5

470

)

471

except Exception as e:

472

print(f"All retries failed: {e}")

473

```

474

475

### Fallback Strategy Implementation

476

477

```python

478

def completion_with_fallbacks(

479

primary_model: str,

480

messages: list,

481

fallback_models: list = None,

482

**kwargs

483

):

484

"""

485

Try primary model, fall back to alternatives on certain errors.

486

"""

487

488

if fallback_models is None:

489

fallback_models = ["gpt-3.5-turbo", "claude-3-haiku-20240307"]

490

491

models_to_try = [primary_model] + fallback_models

492

493

for i, model in enumerate(models_to_try):

494

try:

495

response = litellm.completion(

496

model=model,

497

messages=messages,

498

**kwargs

499

)

500

501

if i > 0: # Used fallback

502

print(f"Successfully used fallback model: {model}")

503

504

return response

505

506

except ContextWindowExceededError as e:

507

print(f"Context window exceeded for {model}, trying fallback")

508

if i == len(models_to_try) - 1: # Last model

509

raise e

510

continue

511

512

except ContentPolicyViolationError as e:

513

print(f"Content policy violation for {model}, trying fallback")

514

if i == len(models_to_try) - 1: # Last model

515

raise e

516

continue

517

518

except RateLimitError as e:

519

print(f"Rate limit for {model}, trying fallback")

520

if i == len(models_to_try) - 1: # Last model

521

raise e

522

continue

523

524

except (AuthenticationError, InvalidRequestError) as e:

525

# Don't fallback for these errors

526

raise e

527

528

raise Exception("All fallback models failed")

529

530

# Usage

531

response = completion_with_fallbacks(

532

primary_model="gpt-4",

533

messages=[{"role": "user", "content": "Very long prompt..."}],

534

fallback_models=["claude-3-sonnet-20240229", "gpt-3.5-turbo-16k"]

535

)

536

```

537

538

### Budget Management with Exceptions

539

540

```python

541

class BudgetManager:

542

def __init__(self, max_budget: float):

543

self.max_budget = max_budget

544

self.current_cost = 0.0

545

546

def check_budget(self, estimated_cost: float):

547

if self.current_cost + estimated_cost > self.max_budget:

548

raise BudgetExceededError(

549

f"Estimated cost ${estimated_cost:.4f} would exceed budget "

550

f"(${self.current_cost:.4f}/${self.max_budget:.2f})"

551

)

552

553

def track_completion(self, response):

554

if hasattr(response, '_hidden_params') and 'response_cost' in response._hidden_params:

555

cost = response._hidden_params['response_cost']

556

self.current_cost += cost

557

print(f"Request cost: ${cost:.4f}, Total: ${self.current_cost:.4f}")

558

559

# Usage

560

budget_manager = BudgetManager(max_budget=10.00)

561

562

def budget_aware_completion(model, messages, **kwargs):

563

# Estimate cost before making request

564

estimated_tokens = litellm.token_counter(model=model, messages=messages)

565

estimated_cost = litellm.cost_per_token(

566

model=model,

567

prompt_tokens=estimated_tokens,

568

completion_tokens=kwargs.get('max_tokens', 256)

569

)

570

571

try:

572

budget_manager.check_budget(estimated_cost)

573

574

response = litellm.completion(

575

model=model,

576

messages=messages,

577

**kwargs

578

)

579

580

budget_manager.track_completion(response)

581

return response

582

583

except BudgetExceededError as e:

584

print(f"Budget management: {e}")

585

# Could fallback to cheaper model

586

return None

587

588

response = budget_aware_completion(

589

model="gpt-4",

590

messages=[{"role": "user", "content": "Hello!"}]

591

)

592

```

593

594

### Provider-Specific Error Handling

595

596

```python

597

def handle_provider_specific_errors(model, messages, **kwargs):

598

try:

599

response = litellm.completion(

600

model=model,

601

messages=messages,

602

**kwargs

603

)

604

return response

605

606

except Exception as e:

607

# Check provider-specific error context

608

if hasattr(e, 'llm_provider'):

609

provider = e.llm_provider

610

611

if provider == "openai":

612

if "insufficient_quota" in str(e).lower():

613

print("OpenAI quota exceeded, switching to backup provider")

614

return litellm.completion(

615

model="claude-3-sonnet-20240229",

616

messages=messages,

617

**kwargs

618

)

619

620

elif provider == "anthropic":

621

if "overloaded" in str(e).lower():

622

print("Anthropic overloaded, trying OpenAI")

623

return litellm.completion(

624

model="gpt-4",

625

messages=messages,

626

**kwargs

627

)

628

629

elif provider == "cohere":

630

if isinstance(e, UnsupportedParamsError):

631

print("Removing unsupported parameters for Cohere")

632

# Remove OpenAI-specific parameters

633

clean_kwargs = {k: v for k, v in kwargs.items()

634

if k not in ['logit_bias', 'seed']}

635

return litellm.completion(

636

model=model,

637

messages=messages,

638

**clean_kwargs

639

)

640

641

# Re-raise if not handled

642

raise e

643

```

644

645

### Custom Exception Classes

646

647

```python

648

class CustomLiteLLMError(Exception):

649

"""Custom application-specific error"""

650

def __init__(self, message: str, model: str, cost: float = 0.0):

651

super().__init__(message)

652

self.model = model

653

self.cost = cost

654

655

def application_completion_wrapper(model, messages, **kwargs):

656

"""Application-specific completion wrapper with custom error handling"""

657

658

try:

659

response = litellm.completion(

660

model=model,

661

messages=messages,

662

**kwargs

663

)

664

665

# Custom validation

666

if not response.choices or not response.choices[0].message.content:

667

raise CustomLiteLLMError(

668

"Empty response received",

669

model=model,

670

cost=litellm.completion_cost(response)

671

)

672

673

return response

674

675

except ContextWindowExceededError as e:

676

# Convert to custom error with application context

677

raise CustomLiteLLMError(

678

f"Input too long for model {model}. "

679

f"Required: {e.current_tokens}, Max: {e.max_tokens}",

680

model=model

681

)

682

683

except ContentPolicyViolationError as e:

684

# Custom content policy handling

685

raise CustomLiteLLMError(

686

f"Content rejected by {model}: {e.violation_type or 'policy violation'}",

687

model=model

688

)

689

690

# Usage with custom error handling

691

try:

692

response = application_completion_wrapper(

693

model="gpt-4",

694

messages=[{"role": "user", "content": "Hello!"}]

695

)

696

except CustomLiteLLMError as e:

697

print(f"Application error with {e.model}: {e}")

698

print(f"Cost incurred: ${e.cost:.4f}")

699

```