or run

tessl search
Log in

Version

Workspace
tessl
Visibility
Public
Created
Last updated
Describes
pypipkg:pypi/kserve@0.16.x

docs

index.md
tile.json

tessl/pypi-kserve

tessl install tessl/pypi-kserve@0.16.1

KServe is a comprehensive Python SDK that provides standardized interfaces for building and deploying machine learning model serving infrastructure on Kubernetes.

errors.mddocs/reference/

Error Handling

Custom exception classes for handling inference errors, model lifecycle issues, and validation failures with appropriate HTTP status codes.

Capabilities

Custom KServe Errors

KServe provides specific exception classes for different error scenarios with associated HTTP status codes.

class InferenceError(RuntimeError):
    """
    Inference execution failure (HTTP 500).
    Raised when model inference fails during execution.

    Args:
        reason (str): Error message
        status (str, optional): Status code
        debug_details (any, optional): Debug information
    """
    def __init__(self, reason, status=None, debug_details=None): ...

class InvalidInput(ValueError):
    """
    Invalid input data (HTTP 400).
    Raised when request input data is malformed or invalid.

    Args:
        reason (str): Error message describing the validation failure
    """
    def __init__(self, reason): ...

class ModelNotFound(Exception):
    """
    Model does not exist (HTTP 404).
    Raised when requested model is not registered in the repository.

    Args:
        model_name (str): Name of the missing model
    """
    def __init__(self, model_name=None): ...

class ModelNotReady(RuntimeError):
    """
    Model not initialized (HTTP 503).
    Raised when model exists but is not ready to serve requests.

    Args:
        model_name (str): Name of the model that is not ready
        detail (str, optional): Additional detail message
    """
    def __init__(self, model_name: str, detail: str = None): ...

class ModelMissingError(Exception):
    """
    Model not found in registry.
    Raised when attempting to retrieve a model that doesn't exist in the repository.

    Args:
        path (str): Path to the missing model
    """
    def __init__(self, path): ...

class UnsupportedProtocol(Exception):
    """
    Unknown or unsupported protocol.
    Raised when an unsupported protocol version is requested.

    Args:
        protocol_version (str): The unsupported protocol identifier
    """
    def __init__(self, protocol_version=None): ...

class ServerNotReady(RuntimeError):
    """
    Server not ready (HTTP 503).
    Raised when server is starting up and not ready to accept requests.

    Args:
        detail (str, optional): Additional detail message
    """
    def __init__(self, detail: str = None): ...

class ServerNotLive(RuntimeError):
    """
    Server not live (HTTP 503).
    Raised when server process is not responding to liveness checks.

    Args:
        detail (str, optional): Additional detail message
    """
    def __init__(self, detail: str = None): ...

class NoModelReady(RuntimeError):
    """
    No models ready (HTTP 503).
    Raised when no models in the repository are ready to serve.

    Args:
        models (list): List of model objects that are not ready
        detail (str, optional): Additional detail message
    """
    def __init__(self, models: [], detail: str = None): ...

class WorkersShouldBeLessThanMaxWorkersError(Exception):
    """
    Configuration error for worker settings.
    Raised when worker configuration is invalid.

    Args:
        max_workers (int, optional): Maximum workers allowed
    """
    def __init__(self, max_workers=None): ...

OpenAPI Exceptions

KServe also exports standard OpenAPI exceptions for API client operations.

class OpenApiException(Exception):
    """
    Base exception for OpenAPI operations.

    Attributes:
        status (int): HTTP status code
        reason (str): Error reason
        body (str): Response body
        headers (dict): Response headers
    """
    status: int
    reason: str
    body: str
    headers: dict

class ApiTypeError(OpenApiException, TypeError):
    """
    Type validation error.
    Raised when API parameter has incorrect type.
    """

class ApiValueError(OpenApiException, ValueError):
    """
    Value validation error.
    Raised when API parameter has invalid value.
    """

class ApiKeyError(OpenApiException, KeyError):
    """
    Key error in API operation.
    Raised when required key is missing.
    """

class ApiException(OpenApiException):
    """
    General API exception.
    Raised for API operation failures.

    Args:
        status (int): HTTP status code
        reason (str): Error reason
        http_resp (object): HTTP response object
    """

Error Handlers

KServe provides built-in async error handlers that convert exceptions to appropriate HTTP responses.

async def exception_handler(_, exc: Exception) -> JSONResponse:
    """
    Generic exception handler.

    Args:
        _ : Request (unused, position parameter for FastAPI compatibility)
        exc (Exception): Raised exception

    Returns:
        JSONResponse: Error response with status 500
    """

async def invalid_input_handler(_, exc: InvalidInput) -> JSONResponse:
    """
    Handler for invalid input errors.

    Args:
        _ : Request (unused, position parameter for FastAPI compatibility)
        exc (InvalidInput): Invalid input exception

    Returns:
        JSONResponse: Error response with status 400
    """

async def inference_error_handler(_, exc: InferenceError) -> JSONResponse:
    """
    Handler for inference errors.

    Args:
        _ : Request (unused, position parameter for FastAPI compatibility)
        exc (InferenceError): Inference error exception

    Returns:
        JSONResponse: Error response with status 500
    """

async def generic_exception_handler(_, exc: Exception) -> JSONResponse:
    """
    Generic exception handler with type name in error.

    Args:
        _ : Request (unused, position parameter for FastAPI compatibility)
        exc (Exception): Raised exception

    Returns:
        JSONResponse: Error response with status 500 including exception type
    """

async def model_not_found_handler(_, exc: ModelNotFound) -> JSONResponse:
    """
    Handler for model not found errors.

    Args:
        _ : Request (unused, position parameter for FastAPI compatibility)
        exc (ModelNotFound): Model not found exception

    Returns:
        JSONResponse: Error response with status 404
    """

async def model_not_ready_handler(_, exc: ModelNotReady) -> JSONResponse:
    """
    Handler for model not ready errors.

    Args:
        _ : Request (unused, position parameter for FastAPI compatibility)
        exc (ModelNotReady): Model not ready exception

    Returns:
        JSONResponse: Error response with status 503
    """

async def not_implemented_error_handler(_, exc: NotImplementedError) -> JSONResponse:
    """
    Handler for not implemented errors.

    Args:
        _ : Request (unused, position parameter for FastAPI compatibility)
        exc (NotImplementedError): Not implemented exception

    Returns:
        JSONResponse: Error response with status 501
    """

async def unsupported_protocol_error_handler(_, exc: UnsupportedProtocol) -> JSONResponse:
    """
    Handler for unsupported protocol errors.

    Args:
        _ : Request (unused, position parameter for FastAPI compatibility)
        exc (UnsupportedProtocol): Unsupported protocol exception

    Returns:
        JSONResponse: Error response with status 501
    """

async def server_not_ready_handler(_, exc: ServerNotReady) -> JSONResponse:
    """
    Handler for server not ready errors.

    Args:
        _ : Request (unused, position parameter for FastAPI compatibility)
        exc (ServerNotReady): Server not ready exception

    Returns:
        JSONResponse: Error response with status 503
    """

async def server_not_live_handler(_, exc: ServerNotLive) -> JSONResponse:
    """
    Handler for server not live errors.

    Args:
        _ : Request (unused, position parameter for FastAPI compatibility)
        exc (ServerNotLive): Server not live exception

    Returns:
        JSONResponse: Error response with status 503
    """

Usage Examples

Raising Errors in Custom Models

from kserve import Model
from kserve.errors import InvalidInput, InferenceError, ModelNotReady

class MyModel(Model):
    def predict(self, payload, headers=None):
        # Check model readiness
        if not self.ready:
            raise ModelNotReady(self.name)

        # Validate input
        instances = payload.get("instances")
        if instances is None:
            raise InvalidInput("Missing 'instances' in request payload")

        if not isinstance(instances, list):
            raise InvalidInput("'instances' must be a list")

        if len(instances) == 0:
            raise InvalidInput("'instances' cannot be empty")

        # Run inference
        try:
            predictions = self.model.predict(instances)
            return {"predictions": predictions.tolist()}
        except Exception as e:
            raise InferenceError(f"Prediction failed: {str(e)}")

Handling Model Repository Errors

from kserve import ModelRepository
from kserve.errors import ModelMissingError

repository = ModelRepository()

# Add models to repository
repository.update(model1)
repository.update(model2)

# Safe model retrieval
try:
    model = repository.get_model("my-model")
    result = model.predict(payload)
except ModelMissingError as e:
    print(f"Model not found: {e}")
    # Handle missing model
except ModelNotReady as e:
    print(f"Model not ready: {e}")
    # Wait or retry

Validating Input Data

from kserve import Model
from kserve.errors import InvalidInput
import numpy as np

class ImageClassifier(Model):
    def predict(self, payload, headers=None):
        # Validate input structure
        instances = payload.get("instances")
        if instances is None:
            raise InvalidInput("Request must contain 'instances' field")

        # Validate input shape
        for idx, instance in enumerate(instances):
            arr = np.array(instance)
            if arr.shape != (224, 224, 3):
                raise InvalidInput(
                    f"Input {idx} has invalid shape {arr.shape}. "
                    f"Expected (224, 224, 3)"
                )

        # Validate data type
        if not all(isinstance(x, (list, np.ndarray)) for x in instances):
            raise InvalidInput("All instances must be arrays")

        # Run prediction
        predictions = self.model.predict(instances)
        return {"predictions": predictions.tolist()}

Handling Protocol Errors

from kserve.errors import UnsupportedProtocol

def get_protocol_handler(protocol_version: str):
    """Get handler for specified protocol"""
    handlers = {
        "v1": V1Handler(),
        "v2": V2Handler()
    }

    if protocol_version not in handlers:
        raise UnsupportedProtocol(
            f"Protocol {protocol_version} not supported. "
            f"Supported protocols: {list(handlers.keys())}"
        )

    return handlers[protocol_version]

Custom Error Responses

from kserve import Model
from kserve.errors import InvalidInput, InferenceError

class ValidationModel(Model):
    def predict(self, payload, headers=None):
        # Custom validation with detailed messages
        instances = payload.get("instances")

        if instances is None:
            raise InvalidInput(
                "Missing required field 'instances'. "
                "Request body must be: {\"instances\": [[...]]}"
            )

        if len(instances) > 32:
            raise InvalidInput(
                f"Batch size {len(instances)} exceeds maximum of 32. "
                f"Please reduce the number of instances."
            )

        # Validate each instance
        for idx, instance in enumerate(instances):
            if len(instance) != 4:
                raise InvalidInput(
                    f"Instance {idx} has {len(instance)} features. "
                    f"Expected 4 features."
                )

        # Run prediction with error handling
        try:
            predictions = self.model.predict(instances)
            return {"predictions": predictions.tolist()}
        except ValueError as e:
            raise InferenceError(f"Invalid values in input: {str(e)}")
        except Exception as e:
            raise InferenceError(f"Prediction failed: {str(e)}")

Checking Model Readiness

from kserve import Model, ModelServer
from kserve.errors import ModelNotReady, NoModelReady

class DelayedLoadModel(Model):
    def __init__(self, name: str):
        super().__init__(name)
        self.ready = False

    def load(self):
        import time
        time.sleep(5)  # Simulate slow loading
        self.model = load_model()
        self.ready = True

    def predict(self, payload, headers=None):
        if not self.ready:
            raise ModelNotReady(
                f"Model {self.name} is still loading. Please try again."
            )

        instances = payload["instances"]
        predictions = self.model.predict(instances)
        return {"predictions": predictions.tolist()}

Error Handling in Repository

from kserve import ModelRepository
from kserve.errors import ModelMissingError, NoModelReady

def safe_inference(repository: ModelRepository, model_name: str, payload: dict):
    """Safely perform inference with error handling"""
    try:
        # Check if model exists
        if not repository.is_model_ready(model_name):
            return {
                "error": f"Model {model_name} is not ready",
                "code": 503
            }

        # Get model
        model = repository.get_model(model_name)

        # Run inference
        result = model.predict(payload)
        return result

    except ModelMissingError:
        return {
            "error": f"Model {model_name} not found",
            "code": 404
        }
    except Exception as e:
        return {
            "error": f"Inference failed: {str(e)}",
            "code": 500
        }

Worker Configuration Error

from kserve.errors import WorkersShouldBeLessThanMaxWorkersError

def validate_worker_config(workers: int, max_workers: int):
    """Validate worker configuration"""
    if workers > max_workers:
        raise WorkersShouldBeLessThanMaxWorkersError(
            f"Number of workers ({workers}) exceeds max workers ({max_workers})"
        )

HTTP Status Code Mapping

ExceptionHTTP StatusDescription
InvalidInput400Bad Request - Invalid input data
ModelNotFound404Not Found - Model does not exist
InferenceError500Internal Server Error - Inference failed
ModelNotReady503Service Unavailable - Model not ready
ServerNotReady503Service Unavailable - Server not ready
ServerNotLive503Service Unavailable - Server not live
NoModelReady503Service Unavailable - No models ready
UnsupportedProtocol400Bad Request - Protocol not supported
ModelMissingError404Not Found - Model missing from registry

Error Response Format

KServe returns errors in a consistent JSON format:

{
  "error": "Error message describing what went wrong",
  "code": 500
}

For example:

{
  "error": "Model sklearn-iris is not ready",
  "code": 503
}
{
  "error": "Missing 'instances' in request payload",
  "code": 400
}

Best Practices

1. Validate Early

from kserve import Model
from kserve.errors import InvalidInput

class BestPracticeModel(Model):
    def predict(self, payload, headers=None):
        # Validate at the beginning
        self._validate_input(payload)

        # Then run inference
        instances = payload["instances"]
        predictions = self.model.predict(instances)
        return {"predictions": predictions.tolist()}

    def _validate_input(self, payload):
        """Validate input payload"""
        if "instances" not in payload:
            raise InvalidInput("Missing 'instances' field")

        instances = payload["instances"]
        if not isinstance(instances, list):
            raise InvalidInput("'instances' must be a list")

        if len(instances) == 0:
            raise InvalidInput("'instances' cannot be empty")

2. Provide Helpful Error Messages

from kserve.errors import InvalidInput

# Bad: Vague error message
raise InvalidInput("Invalid input")

# Good: Specific error message
raise InvalidInput(
    "Input shape [1, 3, 224, 224] is invalid. Expected shape: [batch_size, 3, 224, 224] "
    "where batch_size <= 32"
)

3. Use Appropriate Exception Types

from kserve.errors import InvalidInput, InferenceError, ModelNotReady

# Use InvalidInput for client errors (400)
if len(instances) > max_batch_size:
    raise InvalidInput(f"Batch size exceeds maximum of {max_batch_size}")

# Use InferenceError for server errors (500)
try:
    result = self.model.predict(instances)
except Exception as e:
    raise InferenceError(f"Prediction failed: {e}")

# Use ModelNotReady for service unavailable (503)
if not self.ready:
    raise ModelNotReady(self.name)

4. Log Errors

from kserve import Model, logger
from kserve.errors import InferenceError

class LoggingModel(Model):
    def predict(self, payload, headers=None):
        try:
            instances = payload["instances"]
            predictions = self.model.predict(instances)
            return {"predictions": predictions.tolist()}
        except Exception as e:
            # Log error details
            logger.error(f"Inference error for model {self.name}: {e}", exc_info=True)
            # Raise appropriate exception
            raise InferenceError(f"Prediction failed: {str(e)}")

5. Handle Async Errors

from kserve import Model
from kserve.errors import InferenceError, InvalidInput

class AsyncModel(Model):
    async def predict(self, payload, headers=None):
        # Validate input
        if "instances" not in payload:
            raise InvalidInput("Missing 'instances' field")

        try:
            # Async inference
            instances = payload["instances"]
            predictions = await self.async_predict(instances)
            return {"predictions": predictions}
        except ValueError as e:
            raise InvalidInput(f"Invalid input values: {e}")
        except Exception as e:
            raise InferenceError(f"Async prediction failed: {e}")

6. Graceful Degradation

from kserve import ModelRepository
from kserve.errors import ModelNotReady, ModelMissingError

def get_available_model(repository: ModelRepository, preferred_models: list):
    """Try to get first available model from preferred list"""
    for model_name in preferred_models:
        try:
            if repository.is_model_ready(model_name):
                return repository.get_model(model_name)
        except ModelMissingError:
            continue

    # No models available
    raise NoModelReady("No models from preferred list are ready")

Custom Error Handler

from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse
from kserve import ModelServer

# Create custom error handler
async def custom_error_handler(request: Request, exc: Exception):
    """Custom error handler with logging"""
    import logging
    logger = logging.getLogger("kserve")

    # Log error
    logger.error(f"Error processing request: {exc}", exc_info=True)

    # Return custom response
    return JSONResponse(
        status_code=500,
        content={
            "error": str(exc),
            "code": 500,
            "request_id": request.headers.get("x-request-id", "unknown")
        }
    )

# Register handler
app = FastAPI()
app.add_exception_handler(Exception, custom_error_handler)

Testing Error Handling

import pytest
from kserve import Model
from kserve.errors import InvalidInput, InferenceError

class TestModel(Model):
    def predict(self, payload, headers=None):
        if "instances" not in payload:
            raise InvalidInput("Missing instances")
        return {"predictions": [1, 2, 3]}

def test_invalid_input():
    """Test that invalid input raises InvalidInput"""
    model = TestModel("test-model")
    model.ready = True

    with pytest.raises(InvalidInput):
        model.predict({})

def test_valid_input():
    """Test that valid input succeeds"""
    model = TestModel("test-model")
    model.ready = True

    result = model.predict({"instances": [[1, 2, 3]]})
    assert "predictions" in result