tessl install tessl/pypi-kserve@0.16.1KServe is a comprehensive Python SDK that provides standardized interfaces for building and deploying machine learning model serving infrastructure on Kubernetes.
Custom exception classes for handling inference errors, model lifecycle issues, and validation failures with appropriate HTTP status codes.
KServe provides specific exception classes for different error scenarios with associated HTTP status codes.
class InferenceError(RuntimeError):
"""
Inference execution failure (HTTP 500).
Raised when model inference fails during execution.
Args:
reason (str): Error message
status (str, optional): Status code
debug_details (any, optional): Debug information
"""
def __init__(self, reason, status=None, debug_details=None): ...
class InvalidInput(ValueError):
"""
Invalid input data (HTTP 400).
Raised when request input data is malformed or invalid.
Args:
reason (str): Error message describing the validation failure
"""
def __init__(self, reason): ...
class ModelNotFound(Exception):
"""
Model does not exist (HTTP 404).
Raised when requested model is not registered in the repository.
Args:
model_name (str): Name of the missing model
"""
def __init__(self, model_name=None): ...
class ModelNotReady(RuntimeError):
"""
Model not initialized (HTTP 503).
Raised when model exists but is not ready to serve requests.
Args:
model_name (str): Name of the model that is not ready
detail (str, optional): Additional detail message
"""
def __init__(self, model_name: str, detail: str = None): ...
class ModelMissingError(Exception):
"""
Model not found in registry.
Raised when attempting to retrieve a model that doesn't exist in the repository.
Args:
path (str): Path to the missing model
"""
def __init__(self, path): ...
class UnsupportedProtocol(Exception):
"""
Unknown or unsupported protocol.
Raised when an unsupported protocol version is requested.
Args:
protocol_version (str): The unsupported protocol identifier
"""
def __init__(self, protocol_version=None): ...
class ServerNotReady(RuntimeError):
"""
Server not ready (HTTP 503).
Raised when server is starting up and not ready to accept requests.
Args:
detail (str, optional): Additional detail message
"""
def __init__(self, detail: str = None): ...
class ServerNotLive(RuntimeError):
"""
Server not live (HTTP 503).
Raised when server process is not responding to liveness checks.
Args:
detail (str, optional): Additional detail message
"""
def __init__(self, detail: str = None): ...
class NoModelReady(RuntimeError):
"""
No models ready (HTTP 503).
Raised when no models in the repository are ready to serve.
Args:
models (list): List of model objects that are not ready
detail (str, optional): Additional detail message
"""
def __init__(self, models: [], detail: str = None): ...
class WorkersShouldBeLessThanMaxWorkersError(Exception):
"""
Configuration error for worker settings.
Raised when worker configuration is invalid.
Args:
max_workers (int, optional): Maximum workers allowed
"""
def __init__(self, max_workers=None): ...KServe also exports standard OpenAPI exceptions for API client operations.
class OpenApiException(Exception):
"""
Base exception for OpenAPI operations.
Attributes:
status (int): HTTP status code
reason (str): Error reason
body (str): Response body
headers (dict): Response headers
"""
status: int
reason: str
body: str
headers: dict
class ApiTypeError(OpenApiException, TypeError):
"""
Type validation error.
Raised when API parameter has incorrect type.
"""
class ApiValueError(OpenApiException, ValueError):
"""
Value validation error.
Raised when API parameter has invalid value.
"""
class ApiKeyError(OpenApiException, KeyError):
"""
Key error in API operation.
Raised when required key is missing.
"""
class ApiException(OpenApiException):
"""
General API exception.
Raised for API operation failures.
Args:
status (int): HTTP status code
reason (str): Error reason
http_resp (object): HTTP response object
"""KServe provides built-in async error handlers that convert exceptions to appropriate HTTP responses.
async def exception_handler(_, exc: Exception) -> JSONResponse:
"""
Generic exception handler.
Args:
_ : Request (unused, position parameter for FastAPI compatibility)
exc (Exception): Raised exception
Returns:
JSONResponse: Error response with status 500
"""
async def invalid_input_handler(_, exc: InvalidInput) -> JSONResponse:
"""
Handler for invalid input errors.
Args:
_ : Request (unused, position parameter for FastAPI compatibility)
exc (InvalidInput): Invalid input exception
Returns:
JSONResponse: Error response with status 400
"""
async def inference_error_handler(_, exc: InferenceError) -> JSONResponse:
"""
Handler for inference errors.
Args:
_ : Request (unused, position parameter for FastAPI compatibility)
exc (InferenceError): Inference error exception
Returns:
JSONResponse: Error response with status 500
"""
async def generic_exception_handler(_, exc: Exception) -> JSONResponse:
"""
Generic exception handler with type name in error.
Args:
_ : Request (unused, position parameter for FastAPI compatibility)
exc (Exception): Raised exception
Returns:
JSONResponse: Error response with status 500 including exception type
"""
async def model_not_found_handler(_, exc: ModelNotFound) -> JSONResponse:
"""
Handler for model not found errors.
Args:
_ : Request (unused, position parameter for FastAPI compatibility)
exc (ModelNotFound): Model not found exception
Returns:
JSONResponse: Error response with status 404
"""
async def model_not_ready_handler(_, exc: ModelNotReady) -> JSONResponse:
"""
Handler for model not ready errors.
Args:
_ : Request (unused, position parameter for FastAPI compatibility)
exc (ModelNotReady): Model not ready exception
Returns:
JSONResponse: Error response with status 503
"""
async def not_implemented_error_handler(_, exc: NotImplementedError) -> JSONResponse:
"""
Handler for not implemented errors.
Args:
_ : Request (unused, position parameter for FastAPI compatibility)
exc (NotImplementedError): Not implemented exception
Returns:
JSONResponse: Error response with status 501
"""
async def unsupported_protocol_error_handler(_, exc: UnsupportedProtocol) -> JSONResponse:
"""
Handler for unsupported protocol errors.
Args:
_ : Request (unused, position parameter for FastAPI compatibility)
exc (UnsupportedProtocol): Unsupported protocol exception
Returns:
JSONResponse: Error response with status 501
"""
async def server_not_ready_handler(_, exc: ServerNotReady) -> JSONResponse:
"""
Handler for server not ready errors.
Args:
_ : Request (unused, position parameter for FastAPI compatibility)
exc (ServerNotReady): Server not ready exception
Returns:
JSONResponse: Error response with status 503
"""
async def server_not_live_handler(_, exc: ServerNotLive) -> JSONResponse:
"""
Handler for server not live errors.
Args:
_ : Request (unused, position parameter for FastAPI compatibility)
exc (ServerNotLive): Server not live exception
Returns:
JSONResponse: Error response with status 503
"""from kserve import Model
from kserve.errors import InvalidInput, InferenceError, ModelNotReady
class MyModel(Model):
def predict(self, payload, headers=None):
# Check model readiness
if not self.ready:
raise ModelNotReady(self.name)
# Validate input
instances = payload.get("instances")
if instances is None:
raise InvalidInput("Missing 'instances' in request payload")
if not isinstance(instances, list):
raise InvalidInput("'instances' must be a list")
if len(instances) == 0:
raise InvalidInput("'instances' cannot be empty")
# Run inference
try:
predictions = self.model.predict(instances)
return {"predictions": predictions.tolist()}
except Exception as e:
raise InferenceError(f"Prediction failed: {str(e)}")from kserve import ModelRepository
from kserve.errors import ModelMissingError
repository = ModelRepository()
# Add models to repository
repository.update(model1)
repository.update(model2)
# Safe model retrieval
try:
model = repository.get_model("my-model")
result = model.predict(payload)
except ModelMissingError as e:
print(f"Model not found: {e}")
# Handle missing model
except ModelNotReady as e:
print(f"Model not ready: {e}")
# Wait or retryfrom kserve import Model
from kserve.errors import InvalidInput
import numpy as np
class ImageClassifier(Model):
def predict(self, payload, headers=None):
# Validate input structure
instances = payload.get("instances")
if instances is None:
raise InvalidInput("Request must contain 'instances' field")
# Validate input shape
for idx, instance in enumerate(instances):
arr = np.array(instance)
if arr.shape != (224, 224, 3):
raise InvalidInput(
f"Input {idx} has invalid shape {arr.shape}. "
f"Expected (224, 224, 3)"
)
# Validate data type
if not all(isinstance(x, (list, np.ndarray)) for x in instances):
raise InvalidInput("All instances must be arrays")
# Run prediction
predictions = self.model.predict(instances)
return {"predictions": predictions.tolist()}from kserve.errors import UnsupportedProtocol
def get_protocol_handler(protocol_version: str):
"""Get handler for specified protocol"""
handlers = {
"v1": V1Handler(),
"v2": V2Handler()
}
if protocol_version not in handlers:
raise UnsupportedProtocol(
f"Protocol {protocol_version} not supported. "
f"Supported protocols: {list(handlers.keys())}"
)
return handlers[protocol_version]from kserve import Model
from kserve.errors import InvalidInput, InferenceError
class ValidationModel(Model):
def predict(self, payload, headers=None):
# Custom validation with detailed messages
instances = payload.get("instances")
if instances is None:
raise InvalidInput(
"Missing required field 'instances'. "
"Request body must be: {\"instances\": [[...]]}"
)
if len(instances) > 32:
raise InvalidInput(
f"Batch size {len(instances)} exceeds maximum of 32. "
f"Please reduce the number of instances."
)
# Validate each instance
for idx, instance in enumerate(instances):
if len(instance) != 4:
raise InvalidInput(
f"Instance {idx} has {len(instance)} features. "
f"Expected 4 features."
)
# Run prediction with error handling
try:
predictions = self.model.predict(instances)
return {"predictions": predictions.tolist()}
except ValueError as e:
raise InferenceError(f"Invalid values in input: {str(e)}")
except Exception as e:
raise InferenceError(f"Prediction failed: {str(e)}")from kserve import Model, ModelServer
from kserve.errors import ModelNotReady, NoModelReady
class DelayedLoadModel(Model):
def __init__(self, name: str):
super().__init__(name)
self.ready = False
def load(self):
import time
time.sleep(5) # Simulate slow loading
self.model = load_model()
self.ready = True
def predict(self, payload, headers=None):
if not self.ready:
raise ModelNotReady(
f"Model {self.name} is still loading. Please try again."
)
instances = payload["instances"]
predictions = self.model.predict(instances)
return {"predictions": predictions.tolist()}from kserve import ModelRepository
from kserve.errors import ModelMissingError, NoModelReady
def safe_inference(repository: ModelRepository, model_name: str, payload: dict):
"""Safely perform inference with error handling"""
try:
# Check if model exists
if not repository.is_model_ready(model_name):
return {
"error": f"Model {model_name} is not ready",
"code": 503
}
# Get model
model = repository.get_model(model_name)
# Run inference
result = model.predict(payload)
return result
except ModelMissingError:
return {
"error": f"Model {model_name} not found",
"code": 404
}
except Exception as e:
return {
"error": f"Inference failed: {str(e)}",
"code": 500
}from kserve.errors import WorkersShouldBeLessThanMaxWorkersError
def validate_worker_config(workers: int, max_workers: int):
"""Validate worker configuration"""
if workers > max_workers:
raise WorkersShouldBeLessThanMaxWorkersError(
f"Number of workers ({workers}) exceeds max workers ({max_workers})"
)| Exception | HTTP Status | Description |
|---|---|---|
InvalidInput | 400 | Bad Request - Invalid input data |
ModelNotFound | 404 | Not Found - Model does not exist |
InferenceError | 500 | Internal Server Error - Inference failed |
ModelNotReady | 503 | Service Unavailable - Model not ready |
ServerNotReady | 503 | Service Unavailable - Server not ready |
ServerNotLive | 503 | Service Unavailable - Server not live |
NoModelReady | 503 | Service Unavailable - No models ready |
UnsupportedProtocol | 400 | Bad Request - Protocol not supported |
ModelMissingError | 404 | Not Found - Model missing from registry |
KServe returns errors in a consistent JSON format:
{
"error": "Error message describing what went wrong",
"code": 500
}For example:
{
"error": "Model sklearn-iris is not ready",
"code": 503
}{
"error": "Missing 'instances' in request payload",
"code": 400
}from kserve import Model
from kserve.errors import InvalidInput
class BestPracticeModel(Model):
def predict(self, payload, headers=None):
# Validate at the beginning
self._validate_input(payload)
# Then run inference
instances = payload["instances"]
predictions = self.model.predict(instances)
return {"predictions": predictions.tolist()}
def _validate_input(self, payload):
"""Validate input payload"""
if "instances" not in payload:
raise InvalidInput("Missing 'instances' field")
instances = payload["instances"]
if not isinstance(instances, list):
raise InvalidInput("'instances' must be a list")
if len(instances) == 0:
raise InvalidInput("'instances' cannot be empty")from kserve.errors import InvalidInput
# Bad: Vague error message
raise InvalidInput("Invalid input")
# Good: Specific error message
raise InvalidInput(
"Input shape [1, 3, 224, 224] is invalid. Expected shape: [batch_size, 3, 224, 224] "
"where batch_size <= 32"
)from kserve.errors import InvalidInput, InferenceError, ModelNotReady
# Use InvalidInput for client errors (400)
if len(instances) > max_batch_size:
raise InvalidInput(f"Batch size exceeds maximum of {max_batch_size}")
# Use InferenceError for server errors (500)
try:
result = self.model.predict(instances)
except Exception as e:
raise InferenceError(f"Prediction failed: {e}")
# Use ModelNotReady for service unavailable (503)
if not self.ready:
raise ModelNotReady(self.name)from kserve import Model, logger
from kserve.errors import InferenceError
class LoggingModel(Model):
def predict(self, payload, headers=None):
try:
instances = payload["instances"]
predictions = self.model.predict(instances)
return {"predictions": predictions.tolist()}
except Exception as e:
# Log error details
logger.error(f"Inference error for model {self.name}: {e}", exc_info=True)
# Raise appropriate exception
raise InferenceError(f"Prediction failed: {str(e)}")from kserve import Model
from kserve.errors import InferenceError, InvalidInput
class AsyncModel(Model):
async def predict(self, payload, headers=None):
# Validate input
if "instances" not in payload:
raise InvalidInput("Missing 'instances' field")
try:
# Async inference
instances = payload["instances"]
predictions = await self.async_predict(instances)
return {"predictions": predictions}
except ValueError as e:
raise InvalidInput(f"Invalid input values: {e}")
except Exception as e:
raise InferenceError(f"Async prediction failed: {e}")from kserve import ModelRepository
from kserve.errors import ModelNotReady, ModelMissingError
def get_available_model(repository: ModelRepository, preferred_models: list):
"""Try to get first available model from preferred list"""
for model_name in preferred_models:
try:
if repository.is_model_ready(model_name):
return repository.get_model(model_name)
except ModelMissingError:
continue
# No models available
raise NoModelReady("No models from preferred list are ready")from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse
from kserve import ModelServer
# Create custom error handler
async def custom_error_handler(request: Request, exc: Exception):
"""Custom error handler with logging"""
import logging
logger = logging.getLogger("kserve")
# Log error
logger.error(f"Error processing request: {exc}", exc_info=True)
# Return custom response
return JSONResponse(
status_code=500,
content={
"error": str(exc),
"code": 500,
"request_id": request.headers.get("x-request-id", "unknown")
}
)
# Register handler
app = FastAPI()
app.add_exception_handler(Exception, custom_error_handler)import pytest
from kserve import Model
from kserve.errors import InvalidInput, InferenceError
class TestModel(Model):
def predict(self, payload, headers=None):
if "instances" not in payload:
raise InvalidInput("Missing instances")
return {"predictions": [1, 2, 3]}
def test_invalid_input():
"""Test that invalid input raises InvalidInput"""
model = TestModel("test-model")
model.ready = True
with pytest.raises(InvalidInput):
model.predict({})
def test_valid_input():
"""Test that valid input succeeds"""
model = TestModel("test-model")
model.ready = True
result = model.predict({"instances": [[1, 2, 3]]})
assert "predictions" in result