tessl install tessl/pypi-kserve@0.16.1KServe is a comprehensive Python SDK that provides standardized interfaces for building and deploying machine learning model serving infrastructure on Kubernetes.
Manage KServe resources on Kubernetes clusters including InferenceServices, TrainedModels, InferenceGraphs, and ServingRuntimes with full CRUD operations and status tracking.
The KServeClient provides a Python interface to the Kubernetes API for managing KServe custom resources.
class KServeClient:
"""
Kubernetes API client for KServe resources.
Args:
config_file (str, optional): Path to kubeconfig file
config_dict (dict, optional): Dictionary containing kubeconfig
context (str, optional): Kubernetes context to use
client_configuration (Configuration, optional): API client configuration
persist_config (bool): Whether to persist configuration (default: True)
"""
def __init__(
self,
config_file: str = None,
config_dict: dict = None,
context: str = None,
client_configuration: Configuration = None,
persist_config: bool = True
): ...
# Properties
@property
def core_api(self):
"""CoreV1Api instance for core Kubernetes resources"""
@property
def app_api(self):
"""AppsV1Api instance for Kubernetes apps resources"""
@property
def api_instance(self):
"""CustomObjectsApi instance for custom resources"""
@property
def hpa_v2_api(self):
"""AutoscalingV2Api instance for HorizontalPodAutoscaler resources"""Create a new InferenceService resource.
def create(
self,
inferenceservice: V1beta1InferenceService,
namespace: str = None,
watch: bool = False,
timeout_seconds: int = 600
) -> V1beta1InferenceService:
"""
Create an InferenceService.
Args:
inferenceservice (V1beta1InferenceService): InferenceService object to create
namespace (str, optional): Kubernetes namespace (default: current namespace)
watch (bool): Wait for InferenceService to be ready (default: False)
timeout_seconds (int): Timeout for watch in seconds (default: 600)
Returns:
V1beta1InferenceService: Created InferenceService object
"""Usage:
from kserve import KServeClient, V1beta1InferenceService, V1beta1InferenceServiceSpec, V1beta1PredictorSpec, V1beta1SKLearnSpec
# Create client
kserve_client = KServeClient()
# Define InferenceService
isvc = V1beta1InferenceService(
api_version="serving.kserve.io/v1beta1",
kind="InferenceService",
metadata={
"name": "sklearn-iris",
"namespace": "default"
},
spec=V1beta1InferenceServiceSpec(
predictor=V1beta1PredictorSpec(
sklearn=V1beta1SKLearnSpec(
storage_uri="gs://kfserving-examples/models/sklearn/iris"
)
)
)
)
# Create InferenceService
kserve_client.create(isvc)Create and wait for ready:
# Create and wait
kserve_client.create(isvc, namespace="default", watch=True, timeout_seconds=300)Retrieve an existing InferenceService.
def get(
self,
name: str,
namespace: str = None,
version: str = None
) -> V1beta1InferenceService:
"""
Get an InferenceService.
Args:
name (str): Name of the InferenceService
namespace (str, optional): Kubernetes namespace
version (str, optional): API version (default: v1beta1)
Returns:
V1beta1InferenceService: InferenceService object
"""Usage:
# Get InferenceService
isvc = kserve_client.get("sklearn-iris", namespace="default")
print(f"Name: {isvc.metadata.name}")
print(f"Status: {isvc.status}")List all InferenceServices in a namespace.
def list(
self,
namespace: str = None,
label_selector: str = None,
field_selector: str = None,
timeout_seconds: int = None,
watch: bool = False
) -> List[V1beta1InferenceService]:
"""
List InferenceServices.
Args:
namespace (str, optional): Kubernetes namespace (default: all namespaces)
label_selector (str, optional): Label selector filter (e.g., "app=myapp")
field_selector (str, optional): Field selector filter
timeout_seconds (int, optional): Request timeout
watch (bool): Watch for changes (default: False)
Returns:
list: List of V1beta1InferenceService objects
"""Usage:
# List all InferenceServices in namespace
isvcs = kserve_client.list(namespace="default")
for isvc in isvcs:
print(f"Name: {isvc.metadata.name}, Ready: {isvc.status.conditions}")
# List with label selector
isvcs = kserve_client.list(namespace="default", label_selector="framework=sklearn")Update an existing InferenceService.
def patch(
self,
name: str,
inferenceservice: V1beta1InferenceService,
namespace: str = None
) -> V1beta1InferenceService:
"""
Patch an InferenceService.
Args:
name (str): Name of the InferenceService
inferenceservice (V1beta1InferenceService): Updated InferenceService object
namespace (str, optional): Kubernetes namespace
Returns:
V1beta1InferenceService: Updated InferenceService object
"""
def replace(
self,
name: str,
inferenceservice: V1beta1InferenceService,
namespace: str = None
) -> V1beta1InferenceService:
"""
Replace an InferenceService.
Args:
name (str): Name of the InferenceService
inferenceservice (V1beta1InferenceService): New InferenceService object
namespace (str, optional): Kubernetes namespace
Returns:
V1beta1InferenceService: Replaced InferenceService object
"""Usage:
# Get existing InferenceService
isvc = kserve_client.get("sklearn-iris", namespace="default")
# Update storage URI
isvc.spec.predictor.sklearn.storage_uri = "gs://new-bucket/models/sklearn/iris"
# Patch the InferenceService
kserve_client.patch("sklearn-iris", isvc, namespace="default")
# Or replace entirely
kserve_client.replace("sklearn-iris", isvc, namespace="default")Delete an InferenceService.
def delete(
self,
name: str,
namespace: str = None
) -> object:
"""
Delete an InferenceService.
Args:
name (str): Name of the InferenceService
namespace (str, optional): Kubernetes namespace
Returns:
object: Deletion status
"""Usage:
# Delete InferenceService
kserve_client.delete("sklearn-iris", namespace="default")Wait for an InferenceService to become ready.
def wait_isvc_ready(
self,
name: str,
namespace: str = None,
timeout_seconds: int = 600,
polling_interval: int = 10
) -> V1beta1InferenceService:
"""
Wait for InferenceService to be ready.
Args:
name (str): Name of the InferenceService
namespace (str, optional): Kubernetes namespace
timeout_seconds (int): Maximum time to wait (default: 600)
polling_interval (int): Polling interval in seconds (default: 10)
Returns:
V1beta1InferenceService: Ready InferenceService object
Raises:
RuntimeError: If InferenceService does not become ready within timeout
"""Usage:
# Wait for InferenceService to be ready
try:
isvc = kserve_client.wait_isvc_ready("sklearn-iris", namespace="default", timeout_seconds=300)
print(f"InferenceService is ready: {isvc.status.url}")
except RuntimeError as e:
print(f"InferenceService did not become ready: {e}")Check if an InferenceService is ready.
def is_isvc_ready(
self,
name: str,
namespace: str = None
) -> bool:
"""
Check if InferenceService is ready.
Args:
name (str): Name of the InferenceService
namespace (str, optional): Kubernetes namespace
Returns:
bool: True if InferenceService is ready
"""Usage:
# Check readiness
if kserve_client.is_isvc_ready("sklearn-iris", namespace="default"):
print("InferenceService is ready")
else:
print("InferenceService is not ready")Get the status of an InferenceService.
def get_isvc_status(
self,
name: str,
namespace: str = None
) -> V1beta1InferenceServiceStatus:
"""
Get InferenceService status.
Args:
name (str): Name of the InferenceService
namespace (str, optional): Kubernetes namespace
Returns:
V1beta1InferenceServiceStatus: Status object with conditions and URLs
"""Usage:
# Get status
status = kserve_client.get_isvc_status("sklearn-iris", namespace="default")
print(f"URL: {status.url}")
print(f"Conditions: {status.conditions}")
print(f"Components: {status.components}")Restart an InferenceService.
def rollout_restart(
self,
name: str,
namespace: str = None
) -> V1beta1InferenceService:
"""
Rollout restart an InferenceService.
Args:
name (str): Name of the InferenceService
namespace (str, optional): Kubernetes namespace
Returns:
V1beta1InferenceService: Updated InferenceService object
"""Usage:
# Restart InferenceService
kserve_client.rollout_restart("sklearn-iris", namespace="default")Create a TrainedModel resource.
def create_trained_model(
self,
trainedmodel: V1alpha1TrainedModel,
namespace: str = None
) -> V1alpha1TrainedModel:
"""
Create a TrainedModel.
Args:
trainedmodel (V1alpha1TrainedModel): TrainedModel object to create
namespace (str, optional): Kubernetes namespace
Returns:
V1alpha1TrainedModel: Created TrainedModel object
"""Usage:
from kserve import V1alpha1TrainedModel, V1alpha1TrainedModelSpec, V1alpha1ModelSpec
# Define TrainedModel
trained_model = V1alpha1TrainedModel(
api_version="serving.kserve.io/v1alpha1",
kind="TrainedModel",
metadata={"name": "model-v1", "namespace": "default"},
spec=V1alpha1TrainedModelSpec(
inference_service="sklearn-iris",
model=V1alpha1ModelSpec(
storage_uri="gs://kfserving-examples/models/sklearn/iris/v1",
framework="sklearn",
memory="1Gi"
)
)
)
# Create TrainedModel
kserve_client.create_trained_model(trained_model, namespace="default")def get_trained_model(
self,
name: str,
namespace: str = None
) -> V1alpha1TrainedModel:
"""
Get a TrainedModel.
Args:
name (str): Name of the TrainedModel
namespace (str, optional): Kubernetes namespace
Returns:
V1alpha1TrainedModel: TrainedModel object
"""def list_trained_models(
self,
namespace: str = None,
label_selector: str = None
) -> List[V1alpha1TrainedModel]:
"""
List TrainedModels.
Args:
namespace (str, optional): Kubernetes namespace
label_selector (str, optional): Label selector filter
Returns:
list: List of V1alpha1TrainedModel objects
"""def patch_trained_model(
self,
name: str,
trainedmodel: V1alpha1TrainedModel,
namespace: str = None
) -> V1alpha1TrainedModel:
"""
Patch a TrainedModel.
Args:
name (str): Name of the TrainedModel
trainedmodel (V1alpha1TrainedModel): Updated TrainedModel object
namespace (str, optional): Kubernetes namespace
Returns:
V1alpha1TrainedModel: Updated TrainedModel object
"""def delete_trained_model(
self,
name: str,
namespace: str = None
) -> object:
"""
Delete a TrainedModel.
Args:
name (str): Name of the TrainedModel
namespace (str, optional): Kubernetes namespace
Returns:
object: Deletion status
"""def wait_model_ready(
self,
name: str,
namespace: str = None,
timeout_seconds: int = 600,
polling_interval: int = 10
) -> V1alpha1TrainedModel:
"""
Wait for TrainedModel to be ready.
Args:
name (str): Name of the TrainedModel
namespace (str, optional): Kubernetes namespace
timeout_seconds (int): Maximum time to wait (default: 600)
polling_interval (int): Polling interval in seconds (default: 10)
Returns:
V1alpha1TrainedModel: Ready TrainedModel object
Raises:
RuntimeError: If TrainedModel does not become ready within timeout
"""def create_inference_graph(
self,
inferencegraph: V1alpha1InferenceGraph,
namespace: str = None
) -> V1alpha1InferenceGraph:
"""
Create an InferenceGraph.
Args:
inferencegraph (V1alpha1InferenceGraph): InferenceGraph object to create
namespace (str, optional): Kubernetes namespace
Returns:
V1alpha1InferenceGraph: Created InferenceGraph object
"""Usage:
from kserve import V1alpha1InferenceGraph, V1alpha1InferenceGraphSpec, V1alpha1InferenceStep
# Define InferenceGraph
graph = V1alpha1InferenceGraph(
api_version="serving.kserve.io/v1alpha1",
kind="InferenceGraph",
metadata={"name": "ensemble", "namespace": "default"},
spec=V1alpha1InferenceGraphSpec(
nodes={
"root": V1alpha1InferenceStep(
node_name="root",
service_name="sklearn-iris"
)
}
)
)
# Create InferenceGraph
kserve_client.create_inference_graph(graph, namespace="default")def get_inference_graph(
self,
name: str,
namespace: str = None
) -> V1alpha1InferenceGraph:
"""
Get an InferenceGraph.
Args:
name (str): Name of the InferenceGraph
namespace (str, optional): Kubernetes namespace
Returns:
V1alpha1InferenceGraph: InferenceGraph object
"""def list_inference_graphs(
self,
namespace: str = None,
label_selector: str = None
) -> List[V1alpha1InferenceGraph]:
"""
List InferenceGraphs.
Args:
namespace (str, optional): Kubernetes namespace
label_selector (str, optional): Label selector filter
Returns:
list: List of V1alpha1InferenceGraph objects
"""def patch_inference_graph(
self,
name: str,
inferencegraph: V1alpha1InferenceGraph,
namespace: str = None
) -> V1alpha1InferenceGraph:
"""
Patch an InferenceGraph.
Args:
name (str): Name of the InferenceGraph
inferencegraph (V1alpha1InferenceGraph): Updated InferenceGraph object
namespace (str, optional): Kubernetes namespace
Returns:
V1alpha1InferenceGraph: Updated InferenceGraph object
"""def delete_inference_graph(
self,
name: str,
namespace: str = None
) -> object:
"""
Delete an InferenceGraph.
Args:
name (str): Name of the InferenceGraph
namespace (str, optional): Kubernetes namespace
Returns:
object: Deletion status
"""def is_ig_ready(
self,
name: str,
namespace: str = None
) -> bool:
"""
Check if InferenceGraph is ready.
Args:
name (str): Name of the InferenceGraph
namespace (str, optional): Kubernetes namespace
Returns:
bool: True if InferenceGraph is ready
"""def wait_ig_ready(
self,
name: str,
namespace: str = None,
timeout_seconds: int = 600,
polling_interval: int = 10
) -> V1alpha1InferenceGraph:
"""
Wait for InferenceGraph to be ready.
Args:
name (str): Name of the InferenceGraph
namespace (str, optional): Kubernetes namespace
timeout_seconds (int): Maximum time to wait (default: 600)
polling_interval (int): Polling interval in seconds (default: 10)
Returns:
V1alpha1InferenceGraph: Ready InferenceGraph object
Raises:
RuntimeError: If InferenceGraph does not become ready within timeout
"""def create_serving_runtime(
self,
servingruntime: V1alpha1ServingRuntime,
namespace: str = None
) -> V1alpha1ServingRuntime:
"""
Create a ServingRuntime.
Args:
servingruntime (V1alpha1ServingRuntime): ServingRuntime object to create
namespace (str, optional): Kubernetes namespace
Returns:
V1alpha1ServingRuntime: Created ServingRuntime object
"""Usage:
from kserve import V1alpha1ServingRuntime, V1alpha1ServingRuntimeSpec, V1alpha1ServingRuntimePodSpec, V1alpha1SupportedModelFormat
# Define ServingRuntime
runtime = V1alpha1ServingRuntime(
api_version="serving.kserve.io/v1alpha1",
kind="ServingRuntime",
metadata={"name": "custom-runtime", "namespace": "default"},
spec=V1alpha1ServingRuntimeSpec(
supported_model_formats=[
V1alpha1SupportedModelFormat(
name="sklearn",
version="1",
auto_select=True
)
],
containers=[
V1alpha1Container(
name="kserve-container",
image="kserve/sklearnserver:latest",
resources={
"limits": {"cpu": "1", "memory": "2Gi"},
"requests": {"cpu": "500m", "memory": "1Gi"}
}
)
]
)
)
# Create ServingRuntime
kserve_client.create_serving_runtime(runtime, namespace="default")def get_serving_runtime(
self,
name: str,
namespace: str = None
) -> V1alpha1ServingRuntime:
"""
Get a ServingRuntime.
Args:
name (str): Name of the ServingRuntime
namespace (str, optional): Kubernetes namespace
Returns:
V1alpha1ServingRuntime: ServingRuntime object
"""def list_serving_runtimes(
self,
namespace: str = None,
label_selector: str = None
) -> List[V1alpha1ServingRuntime]:
"""
List ServingRuntimes.
Args:
namespace (str, optional): Kubernetes namespace
label_selector (str, optional): Label selector filter
Returns:
list: List of V1alpha1ServingRuntime objects
"""def patch_serving_runtime(
self,
name: str,
servingruntime: V1alpha1ServingRuntime,
namespace: str = None
) -> V1alpha1ServingRuntime:
"""
Patch a ServingRuntime.
Args:
name (str): Name of the ServingRuntime
servingruntime (V1alpha1ServingRuntime): Updated ServingRuntime object
namespace (str, optional): Kubernetes namespace
Returns:
V1alpha1ServingRuntime: Updated ServingRuntime object
"""def delete_serving_runtime(
self,
name: str,
namespace: str = None
) -> object:
"""
Delete a ServingRuntime.
Args:
name (str): Name of the ServingRuntime
namespace (str, optional): Kubernetes namespace
Returns:
object: Deletion status
"""def create_cluster_serving_runtime(
self,
clusterservingruntime: V1alpha1ClusterServingRuntime,
namespace: str = None
) -> V1alpha1ClusterServingRuntime:
"""
Create a ClusterServingRuntime.
Args:
clusterservingruntime (V1alpha1ClusterServingRuntime): ClusterServingRuntime object
namespace (str, optional): Kubernetes namespace
Returns:
V1alpha1ClusterServingRuntime: Created ClusterServingRuntime object
"""def get_cluster_serving_runtime(
self,
name: str
) -> V1alpha1ClusterServingRuntime:
"""
Get a ClusterServingRuntime.
Args:
name (str): Name of the ClusterServingRuntime
Returns:
V1alpha1ClusterServingRuntime: ClusterServingRuntime object
"""def list_cluster_serving_runtimes(
self,
label_selector: str = None
) -> List[V1alpha1ClusterServingRuntime]:
"""
List ClusterServingRuntimes.
Args:
label_selector (str, optional): Label selector filter
Returns:
list: List of V1alpha1ClusterServingRuntime objects
"""def delete_cluster_serving_runtime(
self,
name: str
) -> object:
"""
Delete a ClusterServingRuntime.
Args:
name (str): Name of the ClusterServingRuntime
Returns:
object: Deletion status
"""def create_local_model_node_group(
self,
localmodnodegroup: V1alpha1LocalModelNodeGroup,
namespace: str = None
) -> V1alpha1LocalModelNodeGroup:
"""
Create a LocalModelNodeGroup.
Args:
localmodnodegroup (V1alpha1LocalModelNodeGroup): LocalModelNodeGroup object to create
namespace (str, optional): Kubernetes namespace
Returns:
V1alpha1LocalModelNodeGroup: Created LocalModelNodeGroup object
"""def get_local_model_node_group(
self,
name: str,
namespace: str = None
) -> V1alpha1LocalModelNodeGroup:
"""
Get a LocalModelNodeGroup.
Args:
name (str): Name of the LocalModelNodeGroup
namespace (str, optional): Kubernetes namespace
Returns:
V1alpha1LocalModelNodeGroup: LocalModelNodeGroup object
"""def list_local_model_node_groups(
self,
namespace: str = None,
label_selector: str = None
) -> List[V1alpha1LocalModelNodeGroup]:
"""
List LocalModelNodeGroups.
Args:
namespace (str, optional): Kubernetes namespace
label_selector (str, optional): Label selector filter
Returns:
list: List of V1alpha1LocalModelNodeGroup objects
"""def delete_local_model_node_group(
self,
name: str,
namespace: str = None
) -> object:
"""
Delete a LocalModelNodeGroup.
Args:
name (str): Name of the LocalModelNodeGroup
namespace (str, optional): Kubernetes namespace
Returns:
object: Deletion status
"""def create_local_model_cache(
self,
localmodelcache: V1alpha1LocalModelCache,
namespace: str = None
) -> V1alpha1LocalModelCache:
"""
Create a LocalModelCache.
Args:
localmodelcache (V1alpha1LocalModelCache): LocalModelCache object to create
namespace (str, optional): Kubernetes namespace
Returns:
V1alpha1LocalModelCache: Created LocalModelCache object
"""def get_local_model_cache(
self,
name: str,
namespace: str = None
) -> V1alpha1LocalModelCache:
"""
Get a LocalModelCache.
Args:
name (str): Name of the LocalModelCache
namespace (str, optional): Kubernetes namespace
Returns:
V1alpha1LocalModelCache: LocalModelCache object
"""def list_local_model_caches(
self,
namespace: str = None,
label_selector: str = None
) -> List[V1alpha1LocalModelCache]:
"""
List LocalModelCaches.
Args:
namespace (str, optional): Kubernetes namespace
label_selector (str, optional): Label selector filter
Returns:
list: List of V1alpha1LocalModelCache objects
"""def delete_local_model_cache(
self,
name: str,
namespace: str = None
) -> object:
"""
Delete a LocalModelCache.
Args:
name (str): Name of the LocalModelCache
namespace (str, optional): Kubernetes namespace
Returns:
object: Deletion status
"""def is_local_model_cache_ready(
self,
name: str,
namespace: str = None
) -> bool:
"""
Check if LocalModelCache is ready.
Args:
name (str): Name of the LocalModelCache
namespace (str, optional): Kubernetes namespace
Returns:
bool: True if LocalModelCache is ready
"""def wait_local_model_cache_ready(
self,
name: str,
namespace: str = None,
timeout_seconds: int = 600,
polling_interval: int = 10
) -> V1alpha1LocalModelCache:
"""
Wait for LocalModelCache to be ready.
Args:
name (str): Name of the LocalModelCache
namespace (str, optional): Kubernetes namespace
timeout_seconds (int): Maximum time to wait (default: 600)
polling_interval (int): Polling interval in seconds (default: 10)
Returns:
V1alpha1LocalModelCache: Ready LocalModelCache object
Raises:
RuntimeError: If LocalModelCache does not become ready within timeout
"""Configure storage credentials for GCS, S3, or Azure.
def set_credentials(
self,
storage_type: str,
namespace: str = None,
credentials_file: str = None,
service_account: str = None,
**kwargs
) -> None:
"""
Set storage credentials.
Args:
storage_type (str): Storage type ("GCS", "S3", or "Azure")
namespace (str, optional): Kubernetes namespace
credentials_file (str, optional): Path to credentials file
service_account (str, optional): Service account name
**kwargs: Additional storage-specific parameters
S3 kwargs:
- s3_endpoint: S3 endpoint URL
- s3_region: AWS region
- s3_use_https: Use HTTPS (default: True)
- s3_verify_ssl: Verify SSL (default: True)
- s3_use_virtual_bucket: Use virtual bucket addressing
- aws_access_key_id: AWS access key ID
- aws_secret_access_key: AWS secret access key
"""Usage:
# Set GCS credentials
kserve_client.set_credentials(
storage_type="GCS",
namespace="default",
credentials_file="/path/to/gcs-credentials.json",
service_account="kserve-sa"
)
# Set S3 credentials
kserve_client.set_credentials(
storage_type="S3",
namespace="default",
service_account="kserve-sa",
aws_access_key_id="AKIAIOSFODNN7EXAMPLE",
aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
s3_endpoint="s3.amazonaws.com",
s3_region="us-west-2"
)
# Set Azure credentials
kserve_client.set_credentials(
storage_type="Azure",
namespace="default",
credentials_file="/path/to/azure-credentials.json",
service_account="kserve-sa"
)Make inference requests to an InferenceService.
def generate(
self,
name: str,
data: Dict,
namespace: str = None,
protocol_version: str = "v2"
) -> Dict:
"""
Generate predictions from an InferenceService.
Args:
name (str): Name of the InferenceService
data (dict): Input data for inference
namespace (str, optional): Kubernetes namespace
protocol_version (str): Protocol version ("v1" or "v2", default: "v2")
Returns:
dict: Prediction response
"""Usage:
# Make prediction (v2 protocol)
response = kserve_client.generate(
name="sklearn-iris",
namespace="default",
data={
"inputs": [
{
"name": "input-0",
"shape": [1, 4],
"datatype": "FP32",
"data": [[5.1, 3.5, 1.4, 0.2]]
}
]
}
)
print(response)
# Make prediction (v1 protocol)
response = kserve_client.generate(
name="sklearn-iris",
namespace="default",
protocol_version="v1",
data={
"instances": [
[5.1, 3.5, 1.4, 0.2]
]
}
)from kserve import (
KServeClient,
V1beta1InferenceService,
V1beta1InferenceServiceSpec,
V1beta1PredictorSpec,
V1beta1SKLearnSpec
)
# Create client
kserve_client = KServeClient()
# Define InferenceService
isvc = V1beta1InferenceService(
api_version="serving.kserve.io/v1beta1",
kind="InferenceService",
metadata={
"name": "sklearn-iris",
"namespace": "default"
},
spec=V1beta1InferenceServiceSpec(
predictor=V1beta1PredictorSpec(
sklearn=V1beta1SKLearnSpec(
storage_uri="gs://kfserving-examples/models/sklearn/iris",
resources={
"limits": {"cpu": "1", "memory": "2Gi"},
"requests": {"cpu": "500m", "memory": "1Gi"}
}
)
)
)
)
# Create and wait for ready
print("Creating InferenceService...")
kserve_client.create(isvc, namespace="default", watch=True, timeout_seconds=300)
print("InferenceService is ready!")
# Get status
status = kserve_client.get_isvc_status("sklearn-iris", namespace="default")
print(f"URL: {status.url}")
# Make prediction
response = kserve_client.generate(
name="sklearn-iris",
namespace="default",
data={
"inputs": [
{
"name": "input-0",
"shape": [1, 4],
"datatype": "FP32",
"data": [[5.1, 3.5, 1.4, 0.2]]
}
]
}
)
print(f"Prediction: {response}")
# Delete InferenceService
print("Deleting InferenceService...")
kserve_client.delete("sklearn-iris", namespace="default")
print("InferenceService deleted!")from kubernetes.client.rest import ApiException
try:
# Create InferenceService
kserve_client.create(isvc, namespace="default")
except ApiException as e:
if e.status == 409:
print("InferenceService already exists")
else:
print(f"API error: {e}")
except RuntimeError as e:
print(f"Runtime error: {e}")from kserve.api.watch import isvc_watch
# Watch InferenceService changes
isvc_watch(
client=kserve_client,
name="sklearn-iris",
namespace="default",
timeout_seconds=300
)