or run

tessl search
Log in

Version

Workspace
tessl
Visibility
Public
Created
Last updated
Describes
pypipkg:pypi/kserve@0.16.x

docs

index.md
tile.json

tessl/pypi-kserve

tessl install tessl/pypi-kserve@0.16.1

KServe is a comprehensive Python SDK that provides standardized interfaces for building and deploying machine learning model serving infrastructure on Kubernetes.

kserve-client.mddocs/reference/

Kubernetes Control Plane Client

Manage KServe resources on Kubernetes clusters including InferenceServices, TrainedModels, InferenceGraphs, and ServingRuntimes with full CRUD operations and status tracking.

Capabilities

KServeClient

The KServeClient provides a Python interface to the Kubernetes API for managing KServe custom resources.

class KServeClient:
    """
    Kubernetes API client for KServe resources.

    Args:
        config_file (str, optional): Path to kubeconfig file
        config_dict (dict, optional): Dictionary containing kubeconfig
        context (str, optional): Kubernetes context to use
        client_configuration (Configuration, optional): API client configuration
        persist_config (bool): Whether to persist configuration (default: True)
    """
    def __init__(
        self,
        config_file: str = None,
        config_dict: dict = None,
        context: str = None,
        client_configuration: Configuration = None,
        persist_config: bool = True
    ): ...

    # Properties
    @property
    def core_api(self):
        """CoreV1Api instance for core Kubernetes resources"""

    @property
    def app_api(self):
        """AppsV1Api instance for Kubernetes apps resources"""

    @property
    def api_instance(self):
        """CustomObjectsApi instance for custom resources"""

    @property
    def hpa_v2_api(self):
        """AutoscalingV2Api instance for HorizontalPodAutoscaler resources"""

InferenceService Management

Create InferenceService

Create a new InferenceService resource.

def create(
    self,
    inferenceservice: V1beta1InferenceService,
    namespace: str = None,
    watch: bool = False,
    timeout_seconds: int = 600
) -> V1beta1InferenceService:
    """
    Create an InferenceService.

    Args:
        inferenceservice (V1beta1InferenceService): InferenceService object to create
        namespace (str, optional): Kubernetes namespace (default: current namespace)
        watch (bool): Wait for InferenceService to be ready (default: False)
        timeout_seconds (int): Timeout for watch in seconds (default: 600)

    Returns:
        V1beta1InferenceService: Created InferenceService object
    """

Usage:

from kserve import KServeClient, V1beta1InferenceService, V1beta1InferenceServiceSpec, V1beta1PredictorSpec, V1beta1SKLearnSpec

# Create client
kserve_client = KServeClient()

# Define InferenceService
isvc = V1beta1InferenceService(
    api_version="serving.kserve.io/v1beta1",
    kind="InferenceService",
    metadata={
        "name": "sklearn-iris",
        "namespace": "default"
    },
    spec=V1beta1InferenceServiceSpec(
        predictor=V1beta1PredictorSpec(
            sklearn=V1beta1SKLearnSpec(
                storage_uri="gs://kfserving-examples/models/sklearn/iris"
            )
        )
    )
)

# Create InferenceService
kserve_client.create(isvc)

Create and wait for ready:

# Create and wait
kserve_client.create(isvc, namespace="default", watch=True, timeout_seconds=300)

Get InferenceService

Retrieve an existing InferenceService.

def get(
    self,
    name: str,
    namespace: str = None,
    version: str = None
) -> V1beta1InferenceService:
    """
    Get an InferenceService.

    Args:
        name (str): Name of the InferenceService
        namespace (str, optional): Kubernetes namespace
        version (str, optional): API version (default: v1beta1)

    Returns:
        V1beta1InferenceService: InferenceService object
    """

Usage:

# Get InferenceService
isvc = kserve_client.get("sklearn-iris", namespace="default")

print(f"Name: {isvc.metadata.name}")
print(f"Status: {isvc.status}")

List InferenceServices

List all InferenceServices in a namespace.

def list(
    self,
    namespace: str = None,
    label_selector: str = None,
    field_selector: str = None,
    timeout_seconds: int = None,
    watch: bool = False
) -> List[V1beta1InferenceService]:
    """
    List InferenceServices.

    Args:
        namespace (str, optional): Kubernetes namespace (default: all namespaces)
        label_selector (str, optional): Label selector filter (e.g., "app=myapp")
        field_selector (str, optional): Field selector filter
        timeout_seconds (int, optional): Request timeout
        watch (bool): Watch for changes (default: False)

    Returns:
        list: List of V1beta1InferenceService objects
    """

Usage:

# List all InferenceServices in namespace
isvcs = kserve_client.list(namespace="default")
for isvc in isvcs:
    print(f"Name: {isvc.metadata.name}, Ready: {isvc.status.conditions}")

# List with label selector
isvcs = kserve_client.list(namespace="default", label_selector="framework=sklearn")

Update InferenceService

Update an existing InferenceService.

def patch(
    self,
    name: str,
    inferenceservice: V1beta1InferenceService,
    namespace: str = None
) -> V1beta1InferenceService:
    """
    Patch an InferenceService.

    Args:
        name (str): Name of the InferenceService
        inferenceservice (V1beta1InferenceService): Updated InferenceService object
        namespace (str, optional): Kubernetes namespace

    Returns:
        V1beta1InferenceService: Updated InferenceService object
    """

def replace(
    self,
    name: str,
    inferenceservice: V1beta1InferenceService,
    namespace: str = None
) -> V1beta1InferenceService:
    """
    Replace an InferenceService.

    Args:
        name (str): Name of the InferenceService
        inferenceservice (V1beta1InferenceService): New InferenceService object
        namespace (str, optional): Kubernetes namespace

    Returns:
        V1beta1InferenceService: Replaced InferenceService object
    """

Usage:

# Get existing InferenceService
isvc = kserve_client.get("sklearn-iris", namespace="default")

# Update storage URI
isvc.spec.predictor.sklearn.storage_uri = "gs://new-bucket/models/sklearn/iris"

# Patch the InferenceService
kserve_client.patch("sklearn-iris", isvc, namespace="default")

# Or replace entirely
kserve_client.replace("sklearn-iris", isvc, namespace="default")

Delete InferenceService

Delete an InferenceService.

def delete(
    self,
    name: str,
    namespace: str = None
) -> object:
    """
    Delete an InferenceService.

    Args:
        name (str): Name of the InferenceService
        namespace (str, optional): Kubernetes namespace

    Returns:
        object: Deletion status
    """

Usage:

# Delete InferenceService
kserve_client.delete("sklearn-iris", namespace="default")

InferenceService Status Operations

Wait for InferenceService Ready

Wait for an InferenceService to become ready.

def wait_isvc_ready(
    self,
    name: str,
    namespace: str = None,
    timeout_seconds: int = 600,
    polling_interval: int = 10
) -> V1beta1InferenceService:
    """
    Wait for InferenceService to be ready.

    Args:
        name (str): Name of the InferenceService
        namespace (str, optional): Kubernetes namespace
        timeout_seconds (int): Maximum time to wait (default: 600)
        polling_interval (int): Polling interval in seconds (default: 10)

    Returns:
        V1beta1InferenceService: Ready InferenceService object

    Raises:
        RuntimeError: If InferenceService does not become ready within timeout
    """

Usage:

# Wait for InferenceService to be ready
try:
    isvc = kserve_client.wait_isvc_ready("sklearn-iris", namespace="default", timeout_seconds=300)
    print(f"InferenceService is ready: {isvc.status.url}")
except RuntimeError as e:
    print(f"InferenceService did not become ready: {e}")

Check InferenceService Ready

Check if an InferenceService is ready.

def is_isvc_ready(
    self,
    name: str,
    namespace: str = None
) -> bool:
    """
    Check if InferenceService is ready.

    Args:
        name (str): Name of the InferenceService
        namespace (str, optional): Kubernetes namespace

    Returns:
        bool: True if InferenceService is ready
    """

Usage:

# Check readiness
if kserve_client.is_isvc_ready("sklearn-iris", namespace="default"):
    print("InferenceService is ready")
else:
    print("InferenceService is not ready")

Get InferenceService Status

Get the status of an InferenceService.

def get_isvc_status(
    self,
    name: str,
    namespace: str = None
) -> V1beta1InferenceServiceStatus:
    """
    Get InferenceService status.

    Args:
        name (str): Name of the InferenceService
        namespace (str, optional): Kubernetes namespace

    Returns:
        V1beta1InferenceServiceStatus: Status object with conditions and URLs
    """

Usage:

# Get status
status = kserve_client.get_isvc_status("sklearn-iris", namespace="default")

print(f"URL: {status.url}")
print(f"Conditions: {status.conditions}")
print(f"Components: {status.components}")

Rollout Restart

Restart an InferenceService.

def rollout_restart(
    self,
    name: str,
    namespace: str = None
) -> V1beta1InferenceService:
    """
    Rollout restart an InferenceService.

    Args:
        name (str): Name of the InferenceService
        namespace (str, optional): Kubernetes namespace

    Returns:
        V1beta1InferenceService: Updated InferenceService object
    """

Usage:

# Restart InferenceService
kserve_client.rollout_restart("sklearn-iris", namespace="default")

TrainedModel Management

Create TrainedModel

Create a TrainedModel resource.

def create_trained_model(
    self,
    trainedmodel: V1alpha1TrainedModel,
    namespace: str = None
) -> V1alpha1TrainedModel:
    """
    Create a TrainedModel.

    Args:
        trainedmodel (V1alpha1TrainedModel): TrainedModel object to create
        namespace (str, optional): Kubernetes namespace

    Returns:
        V1alpha1TrainedModel: Created TrainedModel object
    """

Usage:

from kserve import V1alpha1TrainedModel, V1alpha1TrainedModelSpec, V1alpha1ModelSpec

# Define TrainedModel
trained_model = V1alpha1TrainedModel(
    api_version="serving.kserve.io/v1alpha1",
    kind="TrainedModel",
    metadata={"name": "model-v1", "namespace": "default"},
    spec=V1alpha1TrainedModelSpec(
        inference_service="sklearn-iris",
        model=V1alpha1ModelSpec(
            storage_uri="gs://kfserving-examples/models/sklearn/iris/v1",
            framework="sklearn",
            memory="1Gi"
        )
    )
)

# Create TrainedModel
kserve_client.create_trained_model(trained_model, namespace="default")

Get TrainedModel

def get_trained_model(
    self,
    name: str,
    namespace: str = None
) -> V1alpha1TrainedModel:
    """
    Get a TrainedModel.

    Args:
        name (str): Name of the TrainedModel
        namespace (str, optional): Kubernetes namespace

    Returns:
        V1alpha1TrainedModel: TrainedModel object
    """

List TrainedModels

def list_trained_models(
    self,
    namespace: str = None,
    label_selector: str = None
) -> List[V1alpha1TrainedModel]:
    """
    List TrainedModels.

    Args:
        namespace (str, optional): Kubernetes namespace
        label_selector (str, optional): Label selector filter

    Returns:
        list: List of V1alpha1TrainedModel objects
    """

Patch TrainedModel

def patch_trained_model(
    self,
    name: str,
    trainedmodel: V1alpha1TrainedModel,
    namespace: str = None
) -> V1alpha1TrainedModel:
    """
    Patch a TrainedModel.

    Args:
        name (str): Name of the TrainedModel
        trainedmodel (V1alpha1TrainedModel): Updated TrainedModel object
        namespace (str, optional): Kubernetes namespace

    Returns:
        V1alpha1TrainedModel: Updated TrainedModel object
    """

Delete TrainedModel

def delete_trained_model(
    self,
    name: str,
    namespace: str = None
) -> object:
    """
    Delete a TrainedModel.

    Args:
        name (str): Name of the TrainedModel
        namespace (str, optional): Kubernetes namespace

    Returns:
        object: Deletion status
    """

Wait for TrainedModel Ready

def wait_model_ready(
    self,
    name: str,
    namespace: str = None,
    timeout_seconds: int = 600,
    polling_interval: int = 10
) -> V1alpha1TrainedModel:
    """
    Wait for TrainedModel to be ready.

    Args:
        name (str): Name of the TrainedModel
        namespace (str, optional): Kubernetes namespace
        timeout_seconds (int): Maximum time to wait (default: 600)
        polling_interval (int): Polling interval in seconds (default: 10)

    Returns:
        V1alpha1TrainedModel: Ready TrainedModel object

    Raises:
        RuntimeError: If TrainedModel does not become ready within timeout
    """

InferenceGraph Management

Create InferenceGraph

def create_inference_graph(
    self,
    inferencegraph: V1alpha1InferenceGraph,
    namespace: str = None
) -> V1alpha1InferenceGraph:
    """
    Create an InferenceGraph.

    Args:
        inferencegraph (V1alpha1InferenceGraph): InferenceGraph object to create
        namespace (str, optional): Kubernetes namespace

    Returns:
        V1alpha1InferenceGraph: Created InferenceGraph object
    """

Usage:

from kserve import V1alpha1InferenceGraph, V1alpha1InferenceGraphSpec, V1alpha1InferenceStep

# Define InferenceGraph
graph = V1alpha1InferenceGraph(
    api_version="serving.kserve.io/v1alpha1",
    kind="InferenceGraph",
    metadata={"name": "ensemble", "namespace": "default"},
    spec=V1alpha1InferenceGraphSpec(
        nodes={
            "root": V1alpha1InferenceStep(
                node_name="root",
                service_name="sklearn-iris"
            )
        }
    )
)

# Create InferenceGraph
kserve_client.create_inference_graph(graph, namespace="default")

Get InferenceGraph

def get_inference_graph(
    self,
    name: str,
    namespace: str = None
) -> V1alpha1InferenceGraph:
    """
    Get an InferenceGraph.

    Args:
        name (str): Name of the InferenceGraph
        namespace (str, optional): Kubernetes namespace

    Returns:
        V1alpha1InferenceGraph: InferenceGraph object
    """

List InferenceGraphs

def list_inference_graphs(
    self,
    namespace: str = None,
    label_selector: str = None
) -> List[V1alpha1InferenceGraph]:
    """
    List InferenceGraphs.

    Args:
        namespace (str, optional): Kubernetes namespace
        label_selector (str, optional): Label selector filter

    Returns:
        list: List of V1alpha1InferenceGraph objects
    """

Patch InferenceGraph

def patch_inference_graph(
    self,
    name: str,
    inferencegraph: V1alpha1InferenceGraph,
    namespace: str = None
) -> V1alpha1InferenceGraph:
    """
    Patch an InferenceGraph.

    Args:
        name (str): Name of the InferenceGraph
        inferencegraph (V1alpha1InferenceGraph): Updated InferenceGraph object
        namespace (str, optional): Kubernetes namespace

    Returns:
        V1alpha1InferenceGraph: Updated InferenceGraph object
    """

Delete InferenceGraph

def delete_inference_graph(
    self,
    name: str,
    namespace: str = None
) -> object:
    """
    Delete an InferenceGraph.

    Args:
        name (str): Name of the InferenceGraph
        namespace (str, optional): Kubernetes namespace

    Returns:
        object: Deletion status
    """

Check InferenceGraph Ready

def is_ig_ready(
    self,
    name: str,
    namespace: str = None
) -> bool:
    """
    Check if InferenceGraph is ready.

    Args:
        name (str): Name of the InferenceGraph
        namespace (str, optional): Kubernetes namespace

    Returns:
        bool: True if InferenceGraph is ready
    """

Wait for InferenceGraph Ready

def wait_ig_ready(
    self,
    name: str,
    namespace: str = None,
    timeout_seconds: int = 600,
    polling_interval: int = 10
) -> V1alpha1InferenceGraph:
    """
    Wait for InferenceGraph to be ready.

    Args:
        name (str): Name of the InferenceGraph
        namespace (str, optional): Kubernetes namespace
        timeout_seconds (int): Maximum time to wait (default: 600)
        polling_interval (int): Polling interval in seconds (default: 10)

    Returns:
        V1alpha1InferenceGraph: Ready InferenceGraph object

    Raises:
        RuntimeError: If InferenceGraph does not become ready within timeout
    """

ServingRuntime Management

Create ServingRuntime

def create_serving_runtime(
    self,
    servingruntime: V1alpha1ServingRuntime,
    namespace: str = None
) -> V1alpha1ServingRuntime:
    """
    Create a ServingRuntime.

    Args:
        servingruntime (V1alpha1ServingRuntime): ServingRuntime object to create
        namespace (str, optional): Kubernetes namespace

    Returns:
        V1alpha1ServingRuntime: Created ServingRuntime object
    """

Usage:

from kserve import V1alpha1ServingRuntime, V1alpha1ServingRuntimeSpec, V1alpha1ServingRuntimePodSpec, V1alpha1SupportedModelFormat

# Define ServingRuntime
runtime = V1alpha1ServingRuntime(
    api_version="serving.kserve.io/v1alpha1",
    kind="ServingRuntime",
    metadata={"name": "custom-runtime", "namespace": "default"},
    spec=V1alpha1ServingRuntimeSpec(
        supported_model_formats=[
            V1alpha1SupportedModelFormat(
                name="sklearn",
                version="1",
                auto_select=True
            )
        ],
        containers=[
            V1alpha1Container(
                name="kserve-container",
                image="kserve/sklearnserver:latest",
                resources={
                    "limits": {"cpu": "1", "memory": "2Gi"},
                    "requests": {"cpu": "500m", "memory": "1Gi"}
                }
            )
        ]
    )
)

# Create ServingRuntime
kserve_client.create_serving_runtime(runtime, namespace="default")

Get ServingRuntime

def get_serving_runtime(
    self,
    name: str,
    namespace: str = None
) -> V1alpha1ServingRuntime:
    """
    Get a ServingRuntime.

    Args:
        name (str): Name of the ServingRuntime
        namespace (str, optional): Kubernetes namespace

    Returns:
        V1alpha1ServingRuntime: ServingRuntime object
    """

List ServingRuntimes

def list_serving_runtimes(
    self,
    namespace: str = None,
    label_selector: str = None
) -> List[V1alpha1ServingRuntime]:
    """
    List ServingRuntimes.

    Args:
        namespace (str, optional): Kubernetes namespace
        label_selector (str, optional): Label selector filter

    Returns:
        list: List of V1alpha1ServingRuntime objects
    """

Patch ServingRuntime

def patch_serving_runtime(
    self,
    name: str,
    servingruntime: V1alpha1ServingRuntime,
    namespace: str = None
) -> V1alpha1ServingRuntime:
    """
    Patch a ServingRuntime.

    Args:
        name (str): Name of the ServingRuntime
        servingruntime (V1alpha1ServingRuntime): Updated ServingRuntime object
        namespace (str, optional): Kubernetes namespace

    Returns:
        V1alpha1ServingRuntime: Updated ServingRuntime object
    """

Delete ServingRuntime

def delete_serving_runtime(
    self,
    name: str,
    namespace: str = None
) -> object:
    """
    Delete a ServingRuntime.

    Args:
        name (str): Name of the ServingRuntime
        namespace (str, optional): Kubernetes namespace

    Returns:
        object: Deletion status
    """

ClusterServingRuntime Management

Create ClusterServingRuntime

def create_cluster_serving_runtime(
    self,
    clusterservingruntime: V1alpha1ClusterServingRuntime,
    namespace: str = None
) -> V1alpha1ClusterServingRuntime:
    """
    Create a ClusterServingRuntime.

    Args:
        clusterservingruntime (V1alpha1ClusterServingRuntime): ClusterServingRuntime object
        namespace (str, optional): Kubernetes namespace

    Returns:
        V1alpha1ClusterServingRuntime: Created ClusterServingRuntime object
    """

Get ClusterServingRuntime

def get_cluster_serving_runtime(
    self,
    name: str
) -> V1alpha1ClusterServingRuntime:
    """
    Get a ClusterServingRuntime.

    Args:
        name (str): Name of the ClusterServingRuntime

    Returns:
        V1alpha1ClusterServingRuntime: ClusterServingRuntime object
    """

List ClusterServingRuntimes

def list_cluster_serving_runtimes(
    self,
    label_selector: str = None
) -> List[V1alpha1ClusterServingRuntime]:
    """
    List ClusterServingRuntimes.

    Args:
        label_selector (str, optional): Label selector filter

    Returns:
        list: List of V1alpha1ClusterServingRuntime objects
    """

Delete ClusterServingRuntime

def delete_cluster_serving_runtime(
    self,
    name: str
) -> object:
    """
    Delete a ClusterServingRuntime.

    Args:
        name (str): Name of the ClusterServingRuntime

    Returns:
        object: Deletion status
    """

LocalModelNodeGroup Management

Create LocalModelNodeGroup

def create_local_model_node_group(
    self,
    localmodnodegroup: V1alpha1LocalModelNodeGroup,
    namespace: str = None
) -> V1alpha1LocalModelNodeGroup:
    """
    Create a LocalModelNodeGroup.

    Args:
        localmodnodegroup (V1alpha1LocalModelNodeGroup): LocalModelNodeGroup object to create
        namespace (str, optional): Kubernetes namespace

    Returns:
        V1alpha1LocalModelNodeGroup: Created LocalModelNodeGroup object
    """

Get LocalModelNodeGroup

def get_local_model_node_group(
    self,
    name: str,
    namespace: str = None
) -> V1alpha1LocalModelNodeGroup:
    """
    Get a LocalModelNodeGroup.

    Args:
        name (str): Name of the LocalModelNodeGroup
        namespace (str, optional): Kubernetes namespace

    Returns:
        V1alpha1LocalModelNodeGroup: LocalModelNodeGroup object
    """

List LocalModelNodeGroups

def list_local_model_node_groups(
    self,
    namespace: str = None,
    label_selector: str = None
) -> List[V1alpha1LocalModelNodeGroup]:
    """
    List LocalModelNodeGroups.

    Args:
        namespace (str, optional): Kubernetes namespace
        label_selector (str, optional): Label selector filter

    Returns:
        list: List of V1alpha1LocalModelNodeGroup objects
    """

Delete LocalModelNodeGroup

def delete_local_model_node_group(
    self,
    name: str,
    namespace: str = None
) -> object:
    """
    Delete a LocalModelNodeGroup.

    Args:
        name (str): Name of the LocalModelNodeGroup
        namespace (str, optional): Kubernetes namespace

    Returns:
        object: Deletion status
    """

LocalModelCache Management

Create LocalModelCache

def create_local_model_cache(
    self,
    localmodelcache: V1alpha1LocalModelCache,
    namespace: str = None
) -> V1alpha1LocalModelCache:
    """
    Create a LocalModelCache.

    Args:
        localmodelcache (V1alpha1LocalModelCache): LocalModelCache object to create
        namespace (str, optional): Kubernetes namespace

    Returns:
        V1alpha1LocalModelCache: Created LocalModelCache object
    """

Get LocalModelCache

def get_local_model_cache(
    self,
    name: str,
    namespace: str = None
) -> V1alpha1LocalModelCache:
    """
    Get a LocalModelCache.

    Args:
        name (str): Name of the LocalModelCache
        namespace (str, optional): Kubernetes namespace

    Returns:
        V1alpha1LocalModelCache: LocalModelCache object
    """

List LocalModelCaches

def list_local_model_caches(
    self,
    namespace: str = None,
    label_selector: str = None
) -> List[V1alpha1LocalModelCache]:
    """
    List LocalModelCaches.

    Args:
        namespace (str, optional): Kubernetes namespace
        label_selector (str, optional): Label selector filter

    Returns:
        list: List of V1alpha1LocalModelCache objects
    """

Delete LocalModelCache

def delete_local_model_cache(
    self,
    name: str,
    namespace: str = None
) -> object:
    """
    Delete a LocalModelCache.

    Args:
        name (str): Name of the LocalModelCache
        namespace (str, optional): Kubernetes namespace

    Returns:
        object: Deletion status
    """

Check LocalModelCache Ready

def is_local_model_cache_ready(
    self,
    name: str,
    namespace: str = None
) -> bool:
    """
    Check if LocalModelCache is ready.

    Args:
        name (str): Name of the LocalModelCache
        namespace (str, optional): Kubernetes namespace

    Returns:
        bool: True if LocalModelCache is ready
    """

Wait for LocalModelCache Ready

def wait_local_model_cache_ready(
    self,
    name: str,
    namespace: str = None,
    timeout_seconds: int = 600,
    polling_interval: int = 10
) -> V1alpha1LocalModelCache:
    """
    Wait for LocalModelCache to be ready.

    Args:
        name (str): Name of the LocalModelCache
        namespace (str, optional): Kubernetes namespace
        timeout_seconds (int): Maximum time to wait (default: 600)
        polling_interval (int): Polling interval in seconds (default: 10)

    Returns:
        V1alpha1LocalModelCache: Ready LocalModelCache object

    Raises:
        RuntimeError: If LocalModelCache does not become ready within timeout
    """

Credential Management

Set Storage Credentials

Configure storage credentials for GCS, S3, or Azure.

def set_credentials(
    self,
    storage_type: str,
    namespace: str = None,
    credentials_file: str = None,
    service_account: str = None,
    **kwargs
) -> None:
    """
    Set storage credentials.

    Args:
        storage_type (str): Storage type ("GCS", "S3", or "Azure")
        namespace (str, optional): Kubernetes namespace
        credentials_file (str, optional): Path to credentials file
        service_account (str, optional): Service account name
        **kwargs: Additional storage-specific parameters

    S3 kwargs:
        - s3_endpoint: S3 endpoint URL
        - s3_region: AWS region
        - s3_use_https: Use HTTPS (default: True)
        - s3_verify_ssl: Verify SSL (default: True)
        - s3_use_virtual_bucket: Use virtual bucket addressing
        - aws_access_key_id: AWS access key ID
        - aws_secret_access_key: AWS secret access key
    """

Usage:

# Set GCS credentials
kserve_client.set_credentials(
    storage_type="GCS",
    namespace="default",
    credentials_file="/path/to/gcs-credentials.json",
    service_account="kserve-sa"
)

# Set S3 credentials
kserve_client.set_credentials(
    storage_type="S3",
    namespace="default",
    service_account="kserve-sa",
    aws_access_key_id="AKIAIOSFODNN7EXAMPLE",
    aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
    s3_endpoint="s3.amazonaws.com",
    s3_region="us-west-2"
)

# Set Azure credentials
kserve_client.set_credentials(
    storage_type="Azure",
    namespace="default",
    credentials_file="/path/to/azure-credentials.json",
    service_account="kserve-sa"
)

Inference Operations

Generate Predictions

Make inference requests to an InferenceService.

def generate(
    self,
    name: str,
    data: Dict,
    namespace: str = None,
    protocol_version: str = "v2"
) -> Dict:
    """
    Generate predictions from an InferenceService.

    Args:
        name (str): Name of the InferenceService
        data (dict): Input data for inference
        namespace (str, optional): Kubernetes namespace
        protocol_version (str): Protocol version ("v1" or "v2", default: "v2")

    Returns:
        dict: Prediction response
    """

Usage:

# Make prediction (v2 protocol)
response = kserve_client.generate(
    name="sklearn-iris",
    namespace="default",
    data={
        "inputs": [
            {
                "name": "input-0",
                "shape": [1, 4],
                "datatype": "FP32",
                "data": [[5.1, 3.5, 1.4, 0.2]]
            }
        ]
    }
)

print(response)

# Make prediction (v1 protocol)
response = kserve_client.generate(
    name="sklearn-iris",
    namespace="default",
    protocol_version="v1",
    data={
        "instances": [
            [5.1, 3.5, 1.4, 0.2]
        ]
    }
)

Complete Example

from kserve import (
    KServeClient,
    V1beta1InferenceService,
    V1beta1InferenceServiceSpec,
    V1beta1PredictorSpec,
    V1beta1SKLearnSpec
)

# Create client
kserve_client = KServeClient()

# Define InferenceService
isvc = V1beta1InferenceService(
    api_version="serving.kserve.io/v1beta1",
    kind="InferenceService",
    metadata={
        "name": "sklearn-iris",
        "namespace": "default"
    },
    spec=V1beta1InferenceServiceSpec(
        predictor=V1beta1PredictorSpec(
            sklearn=V1beta1SKLearnSpec(
                storage_uri="gs://kfserving-examples/models/sklearn/iris",
                resources={
                    "limits": {"cpu": "1", "memory": "2Gi"},
                    "requests": {"cpu": "500m", "memory": "1Gi"}
                }
            )
        )
    )
)

# Create and wait for ready
print("Creating InferenceService...")
kserve_client.create(isvc, namespace="default", watch=True, timeout_seconds=300)
print("InferenceService is ready!")

# Get status
status = kserve_client.get_isvc_status("sklearn-iris", namespace="default")
print(f"URL: {status.url}")

# Make prediction
response = kserve_client.generate(
    name="sklearn-iris",
    namespace="default",
    data={
        "inputs": [
            {
                "name": "input-0",
                "shape": [1, 4],
                "datatype": "FP32",
                "data": [[5.1, 3.5, 1.4, 0.2]]
            }
        ]
    }
)
print(f"Prediction: {response}")

# Delete InferenceService
print("Deleting InferenceService...")
kserve_client.delete("sklearn-iris", namespace="default")
print("InferenceService deleted!")

Error Handling

from kubernetes.client.rest import ApiException

try:
    # Create InferenceService
    kserve_client.create(isvc, namespace="default")
except ApiException as e:
    if e.status == 409:
        print("InferenceService already exists")
    else:
        print(f"API error: {e}")
except RuntimeError as e:
    print(f"Runtime error: {e}")

Watch for Changes

from kserve.api.watch import isvc_watch

# Watch InferenceService changes
isvc_watch(
    client=kserve_client,
    name="sklearn-iris",
    namespace="default",
    timeout_seconds=300
)