or run

tessl search
Log in

Version

Workspace
tessl
Visibility
Public
Created
Last updated
Describes
pypipkg:pypi/kserve@0.16.x

docs

index.md
tile.json

tessl/pypi-kserve

tessl install tessl/pypi-kserve@0.16.1

KServe is a comprehensive Python SDK that provides standardized interfaces for building and deploying machine learning model serving infrastructure on Kubernetes.

configuration.mddocs/reference/

Configuration

Configure predictor endpoints, API clients, and server behavior with support for protocol selection, timeouts, retries, and SSL.

Capabilities

PredictorConfig

Configuration for predictor endpoints used in model chaining and transformers.

class PredictorConfig:
    """
    Configuration for predictor endpoints.

    Attributes:
        predictor_host (str): Predictor host URL
        predictor_protocol (str): Protocol version ("v1" or "v2")
        predictor_use_ssl (bool): Use SSL/TLS for connections
        predictor_request_timeout_seconds (int): Request timeout in seconds
        predictor_request_retries (int): Number of retry attempts
        predictor_health_check (bool): Enable health checks
    """
    predictor_host: str
    predictor_protocol: str
    predictor_use_ssl: bool
    predictor_request_timeout_seconds: int
    predictor_request_retries: int
    predictor_health_check: bool

    @property
    def predictor_base_url(self) -> str:
        """
        Get the base URL for the predictor.

        Returns:
            str: Complete predictor URL with protocol and host
        """

Usage:

from kserve import PredictorConfig

# Create predictor configuration
config = PredictorConfig()
config.predictor_host = "sklearn-predictor.default.svc.cluster.local"
config.predictor_protocol = "v2"
config.predictor_use_ssl = False
config.predictor_request_timeout_seconds = 60
config.predictor_request_retries = 3
config.predictor_health_check = True

# Get base URL
base_url = config.predictor_base_url
print(f"Predictor URL: {base_url}")

Configuration

OpenAPI client configuration for Kubernetes API operations.

class Configuration:
    """
    Configuration for API client.

    Args:
        host (str, optional): API server host URL
        api_key (dict, optional): API keys for authentication
        api_key_prefix (dict, optional): API key prefixes
        username (str, optional): Username for HTTP basic auth
        password (str, optional): Password for HTTP basic auth
        verify_ssl (bool): Verify SSL certificates (default: True)
        ssl_ca_cert (str, optional): Path to CA certificate file
        cert_file (str, optional): Path to client certificate file
        key_file (str, optional): Path to client key file
        connection_pool_maxsize (int): Connection pool max size (default: 4)
        proxy (str, optional): Proxy URL
        proxy_headers (dict, optional): Proxy headers
        retries (int): Number of retries (default: 0)
        debug (bool): Enable debug mode (default: False)
    """
    def __init__(
        self,
        host: str = None,
        api_key: Dict[str, str] = None,
        api_key_prefix: Dict[str, str] = None,
        username: str = None,
        password: str = None,
        verify_ssl: bool = True,
        ssl_ca_cert: str = None,
        cert_file: str = None,
        key_file: str = None,
        connection_pool_maxsize: int = 4,
        proxy: str = None,
        proxy_headers: Dict[str, str] = None,
        retries: int = 0,
        debug: bool = False
    ): ...

    # Properties
    @property
    def host(self) -> str:
        """API server host"""

    @property
    def verify_ssl(self) -> bool:
        """Verify SSL certificates"""

    @property
    def ssl_ca_cert(self) -> str:
        """Path to CA certificate"""

    @property
    def cert_file(self) -> str:
        """Path to client certificate"""

    @property
    def key_file(self) -> str:
        """Path to client key"""

    @property
    def connection_pool_maxsize(self) -> int:
        """Connection pool max size"""

    @property
    def retries(self) -> int:
        """Number of retries"""

    @property
    def debug(self) -> bool:
        """Debug mode enabled"""

Usage:

from kserve import Configuration, KServeClient

# Create configuration
config = Configuration(
    host="https://kubernetes.default.svc",
    verify_ssl=True,
    ssl_ca_cert="/var/run/secrets/kubernetes.io/serviceaccount/ca.crt",
    connection_pool_maxsize=10,
    retries=3
)

# Use with KServeClient
client = KServeClient(client_configuration=config)

RESTConfig

Configuration for REST inference clients.

class RESTConfig:
    """
    Configuration for InferenceRESTClient.

    Args:
        protocol (str): Protocol version ("v1" or "v2", default: "v2")
        timeout (int): Default request timeout in seconds (default: 60)
        retries (int): Number of retry attempts (default: 0)
        verify_ssl (bool): Verify SSL certificates (default: True)
        cert (str, optional): Path to SSL client certificate
        key (str, optional): Path to SSL client key
    """
    def __init__(
        self,
        protocol: str = "v2",
        timeout: int = 60,
        retries: int = 0,
        verify_ssl: bool = True,
        cert: str = None,
        key: str = None
    ): ...

    # Properties
    @property
    def protocol(self) -> str:
        """Protocol version"""

    @property
    def timeout(self) -> int:
        """Request timeout"""

    @property
    def retries(self) -> int:
        """Number of retries"""

    @property
    def verify_ssl(self) -> bool:
        """Verify SSL certificates"""

    @property
    def cert(self) -> str:
        """Client certificate path"""

    @property
    def key(self) -> str:
        """Client key path"""

Usage:

from kserve import InferenceRESTClient, RESTConfig

# Create REST configuration
config = RESTConfig(
    protocol="v2",
    timeout=120,
    retries=3,
    verify_ssl=True,
    cert="/path/to/client-cert.pem",
    key="/path/to/client-key.pem"
)

# Create client with configuration
client = InferenceRESTClient(url="https://model.example.com:8080", config=config)

Usage Examples

PredictorConfig for Transformers

from kserve import Model, PredictorConfig
import requests

class ImageTransformer(Model):
    def __init__(self, name: str, predictor_host: str):
        super().__init__(name)
        self.predictor_host = predictor_host

    def preprocess(self, body, headers=None):
        # Transform image data
        image_data = body["instances"]
        processed = self.resize_and_normalize(image_data)
        return {"instances": processed}

    def postprocess(self, response, headers=None):
        # Add metadata to response
        predictions = response["predictions"]
        return {
            "predictions": predictions,
            "transformer": self.name
        }

# Configure predictor
config = PredictorConfig()
config.predictor_host = "resnet-predictor.default.svc.cluster.local"
config.predictor_protocol = "v1"
config.predictor_use_ssl = False
config.predictor_request_timeout_seconds = 30

# Use configuration
transformer = ImageTransformer("image-transformer", config.predictor_host)

Configuration with Custom CA Certificate

from kserve import Configuration, KServeClient

# Configuration with custom CA
config = Configuration(
    host="https://k8s-api.example.com:6443",
    verify_ssl=True,
    ssl_ca_cert="/etc/ssl/certs/custom-ca.crt",
    connection_pool_maxsize=20,
    retries=5
)

# Create client
client = KServeClient(client_configuration=config)

# Use client
isvcs = client.list(namespace="default")

Configuration with Mutual TLS

from kserve import Configuration, KServeClient

# Configuration with client certificates
config = Configuration(
    host="https://k8s-api.example.com:6443",
    verify_ssl=True,
    ssl_ca_cert="/etc/ssl/certs/ca.crt",
    cert_file="/etc/ssl/certs/client.crt",
    key_file="/etc/ssl/private/client.key"
)

client = KServeClient(client_configuration=config)

Configuration with API Key

from kserve import Configuration

# Configuration with API key
config = Configuration(
    host="https://api.example.com",
    api_key={"authorization": "Bearer token123"},
    api_key_prefix={"authorization": "Bearer"}
)

Configuration with HTTP Basic Auth

from kserve import Configuration

# Configuration with username/password
config = Configuration(
    host="https://api.example.com",
    username="admin",
    password="secret123",
    verify_ssl=True
)

Configuration with Proxy

from kserve import Configuration

# Configuration with proxy
config = Configuration(
    host="https://k8s-api.example.com:6443",
    proxy="http://proxy.example.com:8080",
    proxy_headers={"Proxy-Authorization": "Basic dXNlcjpwYXNz"}
)

RESTConfig for Different Protocols

from kserve import InferenceRESTClient, RESTConfig

# v1 protocol configuration
v1_config = RESTConfig(
    protocol="v1",
    timeout=30,
    retries=2
)
v1_client = InferenceRESTClient(url="http://localhost:8080", config=v1_config)

# v2 protocol configuration
v2_config = RESTConfig(
    protocol="v2",
    timeout=60,
    retries=3
)
v2_client = InferenceRESTClient(url="http://localhost:8080", config=v2_config)

RESTConfig with SSL

from kserve import InferenceRESTClient, RESTConfig

# HTTPS with certificate verification
ssl_config = RESTConfig(
    protocol="v2",
    verify_ssl=True,
    cert="/path/to/client.crt",
    key="/path/to/client.key",
    timeout=90
)

client = InferenceRESTClient(url="https://secure-model.example.com:8443", config=ssl_config)

RESTConfig without SSL Verification

from kserve import InferenceRESTClient, RESTConfig

# Disable SSL verification (not recommended for production)
config = RESTConfig(
    protocol="v2",
    verify_ssl=False,
    timeout=60
)

client = InferenceRESTClient(url="https://localhost:8080", config=config)

Configuration with Retries

from kserve import Configuration, KServeClient

# Configuration with retry policy
config = Configuration(
    host="https://k8s-api.example.com:6443",
    retries=5,  # Retry up to 5 times
    connection_pool_maxsize=10
)

client = KServeClient(client_configuration=config)

Debug Configuration

from kserve import Configuration, KServeClient

# Enable debug mode
config = Configuration(
    host="https://k8s-api.example.com:6443",
    debug=True  # Enable detailed logging
)

client = KServeClient(client_configuration=config)

Context Management

Setting Predictor Config in Context

from kserve.context import set_predictor_config, get_predictor_config

def set_predictor_config(config: PredictorConfig) -> None:
    """
    Store predictor configuration in async context.

    Args:
        config (PredictorConfig): Predictor configuration to store
    """

def get_predictor_config() -> Optional[PredictorConfig]:
    """
    Retrieve predictor configuration from context.

    Returns:
        PredictorConfig or None: Stored configuration or None if not set
    """

Usage:

from kserve import PredictorConfig
from kserve.context import set_predictor_config, get_predictor_config

# Set configuration in context
config = PredictorConfig()
config.predictor_host = "predictor.default.svc.cluster.local"
config.predictor_protocol = "v2"
set_predictor_config(config)

# Retrieve configuration from context
retrieved_config = get_predictor_config()
if retrieved_config:
    print(f"Predictor host: {retrieved_config.predictor_host}")

Using Context in Async Models

from kserve import Model, PredictorConfig
from kserve.context import set_predictor_config, get_predictor_config
import httpx

class TransformerModel(Model):
    async def predict(self, payload, headers=None):
        # Get predictor configuration from context
        config = get_predictor_config()
        if not config:
            raise ValueError("Predictor configuration not set")

        # Make request to predictor
        async with httpx.AsyncClient() as client:
            url = f"{config.predictor_base_url}/v2/models/predictor/infer"
            response = await client.post(
                url,
                json=payload,
                timeout=config.predictor_request_timeout_seconds
            )
            return response.json()

# Set configuration before starting
config = PredictorConfig()
config.predictor_host = "http://predictor:8080"
config.predictor_protocol = "v2"
set_predictor_config(config)

Environment-Based Configuration

Loading Configuration from Environment

import os
from kserve import PredictorConfig

def load_predictor_config_from_env() -> PredictorConfig:
    """Load predictor configuration from environment variables"""
    config = PredictorConfig()
    config.predictor_host = os.getenv("PREDICTOR_HOST", "localhost:8080")
    config.predictor_protocol = os.getenv("PREDICTOR_PROTOCOL", "v2")
    config.predictor_use_ssl = os.getenv("PREDICTOR_USE_SSL", "false").lower() == "true"
    config.predictor_request_timeout_seconds = int(os.getenv("PREDICTOR_TIMEOUT", "60"))
    config.predictor_request_retries = int(os.getenv("PREDICTOR_RETRIES", "3"))
    config.predictor_health_check = os.getenv("PREDICTOR_HEALTH_CHECK", "true").lower() == "true"
    return config

# Use environment-based configuration
config = load_predictor_config_from_env()

Kubernetes Service Account Configuration

from kserve import Configuration, KServeClient
import os

def load_in_cluster_config() -> Configuration:
    """Load configuration for in-cluster authentication"""
    token_path = "/var/run/secrets/kubernetes.io/serviceaccount/token"
    ca_cert_path = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"

    with open(token_path, "r") as f:
        token = f.read().strip()

    config = Configuration(
        host="https://kubernetes.default.svc",
        api_key={"authorization": token},
        api_key_prefix={"authorization": "Bearer"},
        ssl_ca_cert=ca_cert_path,
        verify_ssl=True
    )

    return config

# Use in-cluster configuration
if os.path.exists("/var/run/secrets/kubernetes.io/serviceaccount"):
    config = load_in_cluster_config()
    client = KServeClient(client_configuration=config)

Complete Example

from kserve import (
    KServeClient,
    Configuration,
    InferenceRESTClient,
    RESTConfig,
    PredictorConfig
)
import os

def create_kserve_client() -> KServeClient:
    """Create KServeClient with custom configuration"""
    # Check if running in cluster
    if os.path.exists("/var/run/secrets/kubernetes.io/serviceaccount"):
        # In-cluster configuration
        token_path = "/var/run/secrets/kubernetes.io/serviceaccount/token"
        ca_cert_path = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"

        with open(token_path, "r") as f:
            token = f.read().strip()

        config = Configuration(
            host="https://kubernetes.default.svc",
            api_key={"authorization": token},
            api_key_prefix={"authorization": "Bearer"},
            ssl_ca_cert=ca_cert_path,
            verify_ssl=True,
            retries=3
        )
    else:
        # Out-of-cluster configuration (uses kubeconfig)
        config = Configuration(
            retries=3,
            connection_pool_maxsize=10
        )

    return KServeClient(client_configuration=config)

def create_inference_client(url: str, use_ssl: bool = False) -> InferenceRESTClient:
    """Create InferenceRESTClient with custom configuration"""
    config = RESTConfig(
        protocol="v2",
        timeout=120,
        retries=3,
        verify_ssl=use_ssl
    )

    return InferenceRESTClient(url=url, config=config)

def create_predictor_config(host: str) -> PredictorConfig:
    """Create PredictorConfig"""
    config = PredictorConfig()
    config.predictor_host = host
    config.predictor_protocol = "v2"
    config.predictor_use_ssl = False
    config.predictor_request_timeout_seconds = 60
    config.predictor_request_retries = 3
    config.predictor_health_check = True
    return config

# Usage
if __name__ == "__main__":
    # Create KServe client for Kubernetes operations
    kserve_client = create_kserve_client()

    # Create inference client for making predictions
    inference_client = create_inference_client("http://localhost:8080")

    # Create predictor configuration for transformers
    predictor_config = create_predictor_config("predictor.default.svc.cluster.local")

    print("Configuration created successfully")

Configuration Best Practices

1. Use Environment Variables

# Load sensitive data from environment
config = Configuration(
    host=os.getenv("K8S_API_HOST"),
    api_key={"authorization": os.getenv("K8S_API_TOKEN")},
    api_key_prefix={"authorization": "Bearer"}
)

2. Implement Timeout Strategies

# Short timeout for health checks
health_config = RESTConfig(protocol="v2", timeout=5)

# Longer timeout for inference
inference_config = RESTConfig(protocol="v2", timeout=120)

3. Use Retries for Resilience

# Configure retries for transient failures
config = Configuration(
    host="https://k8s-api.example.com",
    retries=5,
    connection_pool_maxsize=10
)

4. Separate Configs by Environment

def get_config(environment: str) -> Configuration:
    if environment == "production":
        return Configuration(
            host="https://k8s-prod.example.com",
            verify_ssl=True,
            ssl_ca_cert="/etc/ssl/prod-ca.crt",
            retries=5
        )
    else:
        return Configuration(
            host="https://k8s-dev.example.com",
            verify_ssl=False,
            retries=2
        )