tessl install tessl/pypi-kserve@0.16.1KServe is a comprehensive Python SDK that provides standardized interfaces for building and deploying machine learning model serving infrastructure on Kubernetes.
Configure predictor endpoints, API clients, and server behavior with support for protocol selection, timeouts, retries, and SSL.
Configuration for predictor endpoints used in model chaining and transformers.
class PredictorConfig:
"""
Configuration for predictor endpoints.
Attributes:
predictor_host (str): Predictor host URL
predictor_protocol (str): Protocol version ("v1" or "v2")
predictor_use_ssl (bool): Use SSL/TLS for connections
predictor_request_timeout_seconds (int): Request timeout in seconds
predictor_request_retries (int): Number of retry attempts
predictor_health_check (bool): Enable health checks
"""
predictor_host: str
predictor_protocol: str
predictor_use_ssl: bool
predictor_request_timeout_seconds: int
predictor_request_retries: int
predictor_health_check: bool
@property
def predictor_base_url(self) -> str:
"""
Get the base URL for the predictor.
Returns:
str: Complete predictor URL with protocol and host
"""Usage:
from kserve import PredictorConfig
# Create predictor configuration
config = PredictorConfig()
config.predictor_host = "sklearn-predictor.default.svc.cluster.local"
config.predictor_protocol = "v2"
config.predictor_use_ssl = False
config.predictor_request_timeout_seconds = 60
config.predictor_request_retries = 3
config.predictor_health_check = True
# Get base URL
base_url = config.predictor_base_url
print(f"Predictor URL: {base_url}")OpenAPI client configuration for Kubernetes API operations.
class Configuration:
"""
Configuration for API client.
Args:
host (str, optional): API server host URL
api_key (dict, optional): API keys for authentication
api_key_prefix (dict, optional): API key prefixes
username (str, optional): Username for HTTP basic auth
password (str, optional): Password for HTTP basic auth
verify_ssl (bool): Verify SSL certificates (default: True)
ssl_ca_cert (str, optional): Path to CA certificate file
cert_file (str, optional): Path to client certificate file
key_file (str, optional): Path to client key file
connection_pool_maxsize (int): Connection pool max size (default: 4)
proxy (str, optional): Proxy URL
proxy_headers (dict, optional): Proxy headers
retries (int): Number of retries (default: 0)
debug (bool): Enable debug mode (default: False)
"""
def __init__(
self,
host: str = None,
api_key: Dict[str, str] = None,
api_key_prefix: Dict[str, str] = None,
username: str = None,
password: str = None,
verify_ssl: bool = True,
ssl_ca_cert: str = None,
cert_file: str = None,
key_file: str = None,
connection_pool_maxsize: int = 4,
proxy: str = None,
proxy_headers: Dict[str, str] = None,
retries: int = 0,
debug: bool = False
): ...
# Properties
@property
def host(self) -> str:
"""API server host"""
@property
def verify_ssl(self) -> bool:
"""Verify SSL certificates"""
@property
def ssl_ca_cert(self) -> str:
"""Path to CA certificate"""
@property
def cert_file(self) -> str:
"""Path to client certificate"""
@property
def key_file(self) -> str:
"""Path to client key"""
@property
def connection_pool_maxsize(self) -> int:
"""Connection pool max size"""
@property
def retries(self) -> int:
"""Number of retries"""
@property
def debug(self) -> bool:
"""Debug mode enabled"""Usage:
from kserve import Configuration, KServeClient
# Create configuration
config = Configuration(
host="https://kubernetes.default.svc",
verify_ssl=True,
ssl_ca_cert="/var/run/secrets/kubernetes.io/serviceaccount/ca.crt",
connection_pool_maxsize=10,
retries=3
)
# Use with KServeClient
client = KServeClient(client_configuration=config)Configuration for REST inference clients.
class RESTConfig:
"""
Configuration for InferenceRESTClient.
Args:
protocol (str): Protocol version ("v1" or "v2", default: "v2")
timeout (int): Default request timeout in seconds (default: 60)
retries (int): Number of retry attempts (default: 0)
verify_ssl (bool): Verify SSL certificates (default: True)
cert (str, optional): Path to SSL client certificate
key (str, optional): Path to SSL client key
"""
def __init__(
self,
protocol: str = "v2",
timeout: int = 60,
retries: int = 0,
verify_ssl: bool = True,
cert: str = None,
key: str = None
): ...
# Properties
@property
def protocol(self) -> str:
"""Protocol version"""
@property
def timeout(self) -> int:
"""Request timeout"""
@property
def retries(self) -> int:
"""Number of retries"""
@property
def verify_ssl(self) -> bool:
"""Verify SSL certificates"""
@property
def cert(self) -> str:
"""Client certificate path"""
@property
def key(self) -> str:
"""Client key path"""Usage:
from kserve import InferenceRESTClient, RESTConfig
# Create REST configuration
config = RESTConfig(
protocol="v2",
timeout=120,
retries=3,
verify_ssl=True,
cert="/path/to/client-cert.pem",
key="/path/to/client-key.pem"
)
# Create client with configuration
client = InferenceRESTClient(url="https://model.example.com:8080", config=config)from kserve import Model, PredictorConfig
import requests
class ImageTransformer(Model):
def __init__(self, name: str, predictor_host: str):
super().__init__(name)
self.predictor_host = predictor_host
def preprocess(self, body, headers=None):
# Transform image data
image_data = body["instances"]
processed = self.resize_and_normalize(image_data)
return {"instances": processed}
def postprocess(self, response, headers=None):
# Add metadata to response
predictions = response["predictions"]
return {
"predictions": predictions,
"transformer": self.name
}
# Configure predictor
config = PredictorConfig()
config.predictor_host = "resnet-predictor.default.svc.cluster.local"
config.predictor_protocol = "v1"
config.predictor_use_ssl = False
config.predictor_request_timeout_seconds = 30
# Use configuration
transformer = ImageTransformer("image-transformer", config.predictor_host)from kserve import Configuration, KServeClient
# Configuration with custom CA
config = Configuration(
host="https://k8s-api.example.com:6443",
verify_ssl=True,
ssl_ca_cert="/etc/ssl/certs/custom-ca.crt",
connection_pool_maxsize=20,
retries=5
)
# Create client
client = KServeClient(client_configuration=config)
# Use client
isvcs = client.list(namespace="default")from kserve import Configuration, KServeClient
# Configuration with client certificates
config = Configuration(
host="https://k8s-api.example.com:6443",
verify_ssl=True,
ssl_ca_cert="/etc/ssl/certs/ca.crt",
cert_file="/etc/ssl/certs/client.crt",
key_file="/etc/ssl/private/client.key"
)
client = KServeClient(client_configuration=config)from kserve import Configuration
# Configuration with API key
config = Configuration(
host="https://api.example.com",
api_key={"authorization": "Bearer token123"},
api_key_prefix={"authorization": "Bearer"}
)from kserve import Configuration
# Configuration with username/password
config = Configuration(
host="https://api.example.com",
username="admin",
password="secret123",
verify_ssl=True
)from kserve import Configuration
# Configuration with proxy
config = Configuration(
host="https://k8s-api.example.com:6443",
proxy="http://proxy.example.com:8080",
proxy_headers={"Proxy-Authorization": "Basic dXNlcjpwYXNz"}
)from kserve import InferenceRESTClient, RESTConfig
# v1 protocol configuration
v1_config = RESTConfig(
protocol="v1",
timeout=30,
retries=2
)
v1_client = InferenceRESTClient(url="http://localhost:8080", config=v1_config)
# v2 protocol configuration
v2_config = RESTConfig(
protocol="v2",
timeout=60,
retries=3
)
v2_client = InferenceRESTClient(url="http://localhost:8080", config=v2_config)from kserve import InferenceRESTClient, RESTConfig
# HTTPS with certificate verification
ssl_config = RESTConfig(
protocol="v2",
verify_ssl=True,
cert="/path/to/client.crt",
key="/path/to/client.key",
timeout=90
)
client = InferenceRESTClient(url="https://secure-model.example.com:8443", config=ssl_config)from kserve import InferenceRESTClient, RESTConfig
# Disable SSL verification (not recommended for production)
config = RESTConfig(
protocol="v2",
verify_ssl=False,
timeout=60
)
client = InferenceRESTClient(url="https://localhost:8080", config=config)from kserve import Configuration, KServeClient
# Configuration with retry policy
config = Configuration(
host="https://k8s-api.example.com:6443",
retries=5, # Retry up to 5 times
connection_pool_maxsize=10
)
client = KServeClient(client_configuration=config)from kserve import Configuration, KServeClient
# Enable debug mode
config = Configuration(
host="https://k8s-api.example.com:6443",
debug=True # Enable detailed logging
)
client = KServeClient(client_configuration=config)from kserve.context import set_predictor_config, get_predictor_config
def set_predictor_config(config: PredictorConfig) -> None:
"""
Store predictor configuration in async context.
Args:
config (PredictorConfig): Predictor configuration to store
"""
def get_predictor_config() -> Optional[PredictorConfig]:
"""
Retrieve predictor configuration from context.
Returns:
PredictorConfig or None: Stored configuration or None if not set
"""Usage:
from kserve import PredictorConfig
from kserve.context import set_predictor_config, get_predictor_config
# Set configuration in context
config = PredictorConfig()
config.predictor_host = "predictor.default.svc.cluster.local"
config.predictor_protocol = "v2"
set_predictor_config(config)
# Retrieve configuration from context
retrieved_config = get_predictor_config()
if retrieved_config:
print(f"Predictor host: {retrieved_config.predictor_host}")from kserve import Model, PredictorConfig
from kserve.context import set_predictor_config, get_predictor_config
import httpx
class TransformerModel(Model):
async def predict(self, payload, headers=None):
# Get predictor configuration from context
config = get_predictor_config()
if not config:
raise ValueError("Predictor configuration not set")
# Make request to predictor
async with httpx.AsyncClient() as client:
url = f"{config.predictor_base_url}/v2/models/predictor/infer"
response = await client.post(
url,
json=payload,
timeout=config.predictor_request_timeout_seconds
)
return response.json()
# Set configuration before starting
config = PredictorConfig()
config.predictor_host = "http://predictor:8080"
config.predictor_protocol = "v2"
set_predictor_config(config)import os
from kserve import PredictorConfig
def load_predictor_config_from_env() -> PredictorConfig:
"""Load predictor configuration from environment variables"""
config = PredictorConfig()
config.predictor_host = os.getenv("PREDICTOR_HOST", "localhost:8080")
config.predictor_protocol = os.getenv("PREDICTOR_PROTOCOL", "v2")
config.predictor_use_ssl = os.getenv("PREDICTOR_USE_SSL", "false").lower() == "true"
config.predictor_request_timeout_seconds = int(os.getenv("PREDICTOR_TIMEOUT", "60"))
config.predictor_request_retries = int(os.getenv("PREDICTOR_RETRIES", "3"))
config.predictor_health_check = os.getenv("PREDICTOR_HEALTH_CHECK", "true").lower() == "true"
return config
# Use environment-based configuration
config = load_predictor_config_from_env()from kserve import Configuration, KServeClient
import os
def load_in_cluster_config() -> Configuration:
"""Load configuration for in-cluster authentication"""
token_path = "/var/run/secrets/kubernetes.io/serviceaccount/token"
ca_cert_path = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
with open(token_path, "r") as f:
token = f.read().strip()
config = Configuration(
host="https://kubernetes.default.svc",
api_key={"authorization": token},
api_key_prefix={"authorization": "Bearer"},
ssl_ca_cert=ca_cert_path,
verify_ssl=True
)
return config
# Use in-cluster configuration
if os.path.exists("/var/run/secrets/kubernetes.io/serviceaccount"):
config = load_in_cluster_config()
client = KServeClient(client_configuration=config)from kserve import (
KServeClient,
Configuration,
InferenceRESTClient,
RESTConfig,
PredictorConfig
)
import os
def create_kserve_client() -> KServeClient:
"""Create KServeClient with custom configuration"""
# Check if running in cluster
if os.path.exists("/var/run/secrets/kubernetes.io/serviceaccount"):
# In-cluster configuration
token_path = "/var/run/secrets/kubernetes.io/serviceaccount/token"
ca_cert_path = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
with open(token_path, "r") as f:
token = f.read().strip()
config = Configuration(
host="https://kubernetes.default.svc",
api_key={"authorization": token},
api_key_prefix={"authorization": "Bearer"},
ssl_ca_cert=ca_cert_path,
verify_ssl=True,
retries=3
)
else:
# Out-of-cluster configuration (uses kubeconfig)
config = Configuration(
retries=3,
connection_pool_maxsize=10
)
return KServeClient(client_configuration=config)
def create_inference_client(url: str, use_ssl: bool = False) -> InferenceRESTClient:
"""Create InferenceRESTClient with custom configuration"""
config = RESTConfig(
protocol="v2",
timeout=120,
retries=3,
verify_ssl=use_ssl
)
return InferenceRESTClient(url=url, config=config)
def create_predictor_config(host: str) -> PredictorConfig:
"""Create PredictorConfig"""
config = PredictorConfig()
config.predictor_host = host
config.predictor_protocol = "v2"
config.predictor_use_ssl = False
config.predictor_request_timeout_seconds = 60
config.predictor_request_retries = 3
config.predictor_health_check = True
return config
# Usage
if __name__ == "__main__":
# Create KServe client for Kubernetes operations
kserve_client = create_kserve_client()
# Create inference client for making predictions
inference_client = create_inference_client("http://localhost:8080")
# Create predictor configuration for transformers
predictor_config = create_predictor_config("predictor.default.svc.cluster.local")
print("Configuration created successfully")# Load sensitive data from environment
config = Configuration(
host=os.getenv("K8S_API_HOST"),
api_key={"authorization": os.getenv("K8S_API_TOKEN")},
api_key_prefix={"authorization": "Bearer"}
)# Short timeout for health checks
health_config = RESTConfig(protocol="v2", timeout=5)
# Longer timeout for inference
inference_config = RESTConfig(protocol="v2", timeout=120)# Configure retries for transient failures
config = Configuration(
host="https://k8s-api.example.com",
retries=5,
connection_pool_maxsize=10
)def get_config(environment: str) -> Configuration:
if environment == "production":
return Configuration(
host="https://k8s-prod.example.com",
verify_ssl=True,
ssl_ca_cert="/etc/ssl/prod-ca.crt",
retries=5
)
else:
return Configuration(
host="https://k8s-dev.example.com",
verify_ssl=False,
retries=2
)