tessl install tessl/pypi-kserve@0.16.1KServe is a comprehensive Python SDK that provides standardized interfaces for building and deploying machine learning model serving infrastructure on Kubernetes.
Comprehensive data models for KServe Kubernetes resources including InferenceServices, TrainedModels, InferenceGraphs, ServingRuntimes, and framework-specific predictor specifications.
KServe provides auto-generated Python models for all Kubernetes custom resources. These models follow the Kubernetes API conventions and support both v1alpha1 and v1beta1 API versions.
The main resource for deploying machine learning models.
class V1beta1InferenceService:
"""
InferenceService custom resource.
Attributes:
api_version (str): API version ("serving.kserve.io/v1beta1")
kind (str): Resource kind ("InferenceService")
metadata (object): Kubernetes metadata
spec (V1beta1InferenceServiceSpec): InferenceService specification
status (V1beta1InferenceServiceStatus): InferenceService status
"""
api_version: str
kind: str
metadata: object
spec: V1beta1InferenceServiceSpec
status: V1beta1InferenceServiceStatusSpecification for InferenceService.
class V1beta1InferenceServiceSpec:
"""
InferenceService specification.
Attributes:
predictor (V1beta1PredictorSpec): Predictor component (required)
transformer (V1beta1TransformerSpec): Transformer component (optional)
explainer (V1beta1ExplainerSpec): Explainer component (optional)
"""
predictor: V1beta1PredictorSpec
transformer: V1beta1TransformerSpec
explainer: V1beta1ExplainerSpecStatus of InferenceService.
class V1beta1InferenceServiceStatus:
"""
InferenceService status.
Attributes:
url (str): External URL for accessing the service
address (KnativeAddressable): Internal address
conditions (list): Status conditions
components (dict): Component statuses
model_status (dict): Model status information
"""
url: str
address: KnativeAddressable
conditions: List[KnativeCondition]
components: Dict[str, V1beta1ComponentStatusSpec]
model_status: DictMain predictor specification supporting multiple frameworks.
class V1beta1PredictorSpec:
"""
Predictor component specification.
Attributes:
# Framework-specific specs
sklearn (V1beta1SKLearnSpec): Scikit-learn predictor
xgboost (V1beta1XGBoostSpec): XGBoost predictor
tensorflow (V1beta1TFServingSpec): TensorFlow Serving predictor
pytorch (V1beta1TorchServeSpec): PyTorch predictor
triton (V1beta1TritonSpec): NVIDIA Triton predictor
onnx (V1beta1ONNXRuntimeSpec): ONNX Runtime predictor
huggingface (V1beta1HuggingFaceRuntimeSpec): Hugging Face predictor
pmml (V1beta1PMMLSpec): PMML predictor
lightgbm (V1beta1LightGBMSpec): LightGBM predictor
paddle (V1beta1PaddleServerSpec): PaddlePaddle predictor
# Custom predictor
model (V1beta1ModelSpec): Generic model spec
containers (list): Custom container specs
# Configuration
min_replicas (int): Minimum replicas
max_replicas (int): Maximum replicas
scale_target (int): Scaling target
scale_metric (str): Scaling metric
timeout (int): Request timeout
logger (V1beta1LoggerSpec): Logger configuration
batcher (V1beta1Batcher): Batching configuration
service_account_name (str): Service account
affinity (object): Pod affinity
tolerations (list): Pod tolerations
node_selector (dict): Node selector
"""
# Framework specs
sklearn: V1beta1SKLearnSpec
xgboost: V1beta1XGBoostSpec
tensorflow: V1beta1TFServingSpec
pytorch: V1beta1TorchServeSpec
triton: V1beta1TritonSpec
onnx: V1beta1ONNXRuntimeSpec
huggingface: V1beta1HuggingFaceRuntimeSpec
pmml: V1beta1PMMLSpec
lightgbm: V1beta1LightGBMSpec
paddle: V1beta1PaddleServerSpec
# Generic model
model: V1beta1ModelSpec
containers: List[object]
# Scaling
min_replicas: int
max_replicas: int
scale_target: int
scale_metric: str
# Other configs
timeout: int
logger: V1beta1LoggerSpec
batcher: V1beta1Batcher
service_account_name: strclass V1beta1SKLearnSpec:
"""
Scikit-learn predictor specification.
Attributes:
storage_uri (str): Model storage URI (required)
protocol_version (str): Protocol version ("v1" or "v2")
runtime_version (str): SKLearn server version
resources (dict): Resource requirements
env (list): Environment variables
"""
storage_uri: str
protocol_version: str
runtime_version: str
resources: dict
env: List[object]class V1beta1XGBoostSpec:
"""
XGBoost predictor specification.
Attributes:
storage_uri (str): Model storage URI (required)
protocol_version (str): Protocol version
runtime_version (str): XGBoost server version
resources (dict): Resource requirements
env (list): Environment variables
"""
storage_uri: str
protocol_version: str
runtime_version: str
resources: dict
env: List[object]class V1beta1LightGBMSpec:
"""
LightGBM predictor specification.
Attributes:
storage_uri (str): Model storage URI (required)
protocol_version (str): Protocol version
runtime_version (str): LightGBM server version
resources (dict): Resource requirements
env (list): Environment variables
"""
storage_uri: str
protocol_version: str
runtime_version: str
resources: dict
env: List[object]class V1beta1TFServingSpec:
"""
TensorFlow Serving predictor specification.
Attributes:
storage_uri (str): Model storage URI (required)
runtime_version (str): TensorFlow version
resources (dict): Resource requirements
env (list): Environment variables
"""
storage_uri: str
runtime_version: str
resources: dict
env: List[object]class V1beta1TorchServeSpec:
"""
PyTorch TorchServe predictor specification.
Attributes:
storage_uri (str): Model storage URI (required)
protocol_version (str): Protocol version
runtime_version (str): TorchServe version
resources (dict): Resource requirements
env (list): Environment variables
"""
storage_uri: str
protocol_version: str
runtime_version: str
resources: dict
env: List[object]class V1beta1TritonSpec:
"""
NVIDIA Triton Inference Server predictor specification.
Attributes:
storage_uri (str): Model storage URI (required)
protocol_version (str): Protocol version
runtime_version (str): Triton version
resources (dict): Resource requirements
env (list): Environment variables
args (list): Command arguments
"""
storage_uri: str
protocol_version: str
runtime_version: str
resources: dict
env: List[object]
args: List[str]class V1beta1ONNXRuntimeSpec:
"""
ONNX Runtime predictor specification.
Attributes:
storage_uri (str): Model storage URI (required)
protocol_version (str): Protocol version
runtime_version (str): ONNX Runtime version
resources (dict): Resource requirements
env (list): Environment variables
"""
storage_uri: str
protocol_version: str
runtime_version: str
resources: dict
env: List[object]class V1beta1PMMLSpec:
"""
PMML predictor specification.
Attributes:
storage_uri (str): Model storage URI (required)
protocol_version (str): Protocol version
runtime_version (str): PMML server version
resources (dict): Resource requirements
env (list): Environment variables
"""
storage_uri: str
protocol_version: str
runtime_version: str
resources: dict
env: List[object]class V1beta1PaddleServerSpec:
"""
PaddlePaddle predictor specification.
Attributes:
storage_uri (str): Model storage URI (required)
protocol_version (str): Protocol version
runtime_version (str): Paddle server version
resources (dict): Resource requirements
env (list): Environment variables
"""
storage_uri: str
protocol_version: str
runtime_version: str
resources: dict
env: List[object]class V1beta1HuggingFaceRuntimeSpec:
"""
Hugging Face transformer predictor specification.
Attributes:
storage_uri (str): Model storage URI (required)
protocol_version (str): Protocol version
runtime_version (str): Hugging Face server version
resources (dict): Resource requirements
env (list): Environment variables
"""
storage_uri: str
protocol_version: str
runtime_version: str
resources: dict
env: List[object]Generic model specification.
class V1beta1ModelSpec:
"""
Generic model specification.
Attributes:
model_format (V1beta1ModelFormat): Model format
storage_uri (str): Model storage URI
protocol_version (str): Protocol version
runtime (str): Runtime to use
resources (dict): Resource requirements
env (list): Environment variables
args (list): Command arguments
ports (list): Container ports
"""
model_format: V1beta1ModelFormat
storage_uri: str
protocol_version: str
runtime: str
resources: dict
env: List[object]
args: List[str]
ports: List[object]Model format specification.
class V1beta1ModelFormat:
"""
Model format specification.
Attributes:
name (str): Format name (e.g., "sklearn", "tensorflow", "onnx")
version (str): Format version
"""
name: str
version: strTransformer component specification.
class V1beta1TransformerSpec:
"""
Transformer component specification.
Attributes:
containers (list): Custom container specs
min_replicas (int): Minimum replicas
max_replicas (int): Maximum replicas
timeout (int): Request timeout
logger (V1beta1LoggerSpec): Logger configuration
service_account_name (str): Service account
"""
containers: List[object]
min_replicas: int
max_replicas: int
timeout: int
logger: V1beta1LoggerSpec
service_account_name: strExplainer component specification.
class V1beta1ExplainerSpec:
"""
Explainer component specification.
Attributes:
alibi (V1beta1AlibiExplainerSpec): Alibi explainer
art (V1beta1ARTExplainerSpec): ART explainer
containers (list): Custom container specs
min_replicas (int): Minimum replicas
max_replicas (int): Maximum replicas
"""
alibi: object
art: V1beta1ARTExplainerSpec
containers: List[object]
min_replicas: int
max_replicas: intAdversarial Robustness Toolbox explainer specification.
class V1beta1ARTExplainerSpec:
"""
ART explainer specification.
Attributes:
type (str): Explainer type
storage_uri (str): Model storage URI
runtime_version (str): ART version
resources (dict): Resource requirements
"""
type: str
storage_uri: str
runtime_version: str
resources: dictclass V1beta1CustomPredictor:
"""
Custom predictor specification.
Attributes:
containers (list): Container specifications
"""
containers: List[object]class V1beta1CustomTransformer:
"""
Custom transformer specification.
Attributes:
containers (list): Container specifications
"""
containers: List[object]class V1beta1CustomExplainer:
"""
Custom explainer specification.
Attributes:
containers (list): Container specifications
"""
containers: List[object]Storage configuration.
class V1beta1StorageSpec:
"""
Storage specification.
Attributes:
storage_uri (str): Storage URI (gs://, s3://, pvc://, etc.)
path (str): Path within storage
schema_path (str): Schema file path
key (str): Encryption key
parameters (dict): Additional parameters
"""
storage_uri: str
path: str
schema_path: str
key: str
parameters: dictLogger configuration for request/response logging.
class V1beta1LoggerSpec:
"""
Logger specification.
Attributes:
mode (str): Logging mode ("all", "request", "response")
url (str): Logger endpoint URL
"""
mode: str
url: strRequest batching configuration.
class V1beta1Batcher:
"""
Request batcher specification.
Attributes:
max_batch_size (int): Maximum batch size
max_latency (int): Maximum latency in milliseconds
timeout (int): Batch timeout in seconds
"""
max_batch_size: int
max_latency: int
timeout: intPod specification for components.
class V1beta1PodSpec:
"""
Pod specification.
Attributes:
service_account_name (str): Service account
node_selector (dict): Node selector labels
affinity (object): Pod affinity
tolerations (list): Pod tolerations
volumes (list): Volume specifications
"""
service_account_name: str
node_selector: dict
affinity: object
tolerations: List[object]
volumes: List[object]class V1beta1AutoScalingSpec:
"""
Autoscaling specification.
Attributes:
min_replicas (int): Minimum number of replicas
max_replicas (int): Maximum number of replicas
scale_target (int): Target value for scaling
scale_metric (str): Metric for scaling ("concurrency", "rps", "cpu")
metrics (list): Custom metrics
"""
min_replicas: int
max_replicas: int
scale_target: int
scale_metric: str
metrics: List[V1beta1MetricsSpec]class V1beta1MetricsSpec:
"""
Metrics specification for autoscaling.
Attributes:
type (str): Metric type ("Resource", "Pods", "External")
resource (V1beta1ResourceMetricSource): Resource metric
pods (V1beta1PodMetrics): Pod metric
external (V1beta1ExternalMetrics): External metric
"""
type: str
resource: V1beta1ResourceMetricSource
pods: V1beta1PodMetrics
external: V1beta1ExternalMetricsclass V1beta1ResourceMetricSource:
"""
Resource metric source.
Attributes:
name (str): Resource name ("cpu", "memory")
target (V1beta1MetricTarget): Target specification
"""
name: str
target: V1beta1MetricTargetclass V1beta1PodMetrics:
"""
Pod metrics specification.
Attributes:
metric (V1beta1PodMetricSource): Metric source
target (V1beta1MetricTarget): Target specification
"""
metric: V1beta1PodMetricSource
target: V1beta1MetricTargetclass V1beta1ExternalMetrics:
"""
External metrics specification.
Attributes:
metric (V1beta1ExternalMetricSource): Metric source
target (V1beta1MetricTarget): Target specification
authentication (V1beta1ExtMetricAuthentication): Authentication config
"""
metric: V1beta1ExternalMetricSource
target: V1beta1MetricTarget
authentication: V1beta1ExtMetricAuthenticationclass V1beta1MetricTarget:
"""
Metric target specification.
Attributes:
type (str): Target type ("Value", "AverageValue", "Utilization")
value (str): Target value
average_value (str): Average target value
average_utilization (int): Average utilization percentage
"""
type: str
value: str
average_value: str
average_utilization: intTrained model resource for multi-model serving.
class V1alpha1TrainedModel:
"""
TrainedModel custom resource.
Attributes:
api_version (str): API version ("serving.kserve.io/v1alpha1")
kind (str): Resource kind ("TrainedModel")
metadata (object): Kubernetes metadata
spec (V1alpha1TrainedModelSpec): TrainedModel specification
"""
api_version: str
kind: str
metadata: object
spec: V1alpha1TrainedModelSpecclass V1alpha1TrainedModelSpec:
"""
TrainedModel specification.
Attributes:
inference_service (str): Parent InferenceService name
model (V1alpha1ModelSpec): Model specification
"""
inference_service: str
model: V1alpha1ModelSpecclass V1alpha1ModelSpec:
"""
Model specification for TrainedModel.
Attributes:
storage_uri (str): Model storage URI
framework (str): Model framework
memory (str): Memory requirement
storage_helper (V1alpha1StorageHelper): Storage helper config
"""
storage_uri: str
framework: str
memory: str
storage_helper: V1alpha1StorageHelperInference graph for routing and ensemble models.
class V1alpha1InferenceGraph:
"""
InferenceGraph custom resource.
Attributes:
api_version (str): API version ("serving.kserve.io/v1alpha1")
kind (str): Resource kind ("InferenceGraph")
metadata (object): Kubernetes metadata
spec (V1alpha1InferenceGraphSpec): InferenceGraph specification
status (V1alpha1InferenceGraphStatus): InferenceGraph status
"""
api_version: str
kind: str
metadata: object
spec: V1alpha1InferenceGraphSpec
status: V1alpha1InferenceGraphStatusclass V1alpha1InferenceGraphSpec:
"""
InferenceGraph specification.
Attributes:
nodes (dict): Graph nodes mapping node names to InferenceStep
router (V1alpha1InferenceRouter): Router configuration
affinity (object): Pod affinity
resources (dict): Resource requirements
"""
nodes: Dict[str, V1alpha1InferenceStep]
router: V1alpha1InferenceRouter
affinity: object
resources: dictclass V1alpha1InferenceStep:
"""
Inference step in graph.
Attributes:
node_name (str): Node name
service_name (str): Service to call
service_url (str): Service URL
data (str): Data path
steps (list): Nested steps
condition (str): Conditional expression
"""
node_name: str
service_name: str
service_url: str
data: str
steps: List[V1alpha1InferenceStep]
condition: strclass V1alpha1InferenceRouter:
"""
Router configuration for InferenceGraph.
Attributes:
router_type (str): Router type ("Sequence", "Splitter", "Ensemble", "Switch")
"""
router_type: strclass V1alpha1InferenceTarget:
"""
Inference target specification.
Attributes:
service_name (str): Target service name
service_url (str): Target service URL
weight (int): Traffic weight
"""
service_name: str
service_url: str
weight: intServing runtime resource.
class V1alpha1ServingRuntime:
"""
ServingRuntime custom resource.
Attributes:
api_version (str): API version ("serving.kserve.io/v1alpha1")
kind (str): Resource kind ("ServingRuntime")
metadata (object): Kubernetes metadata
spec (V1alpha1ServingRuntimeSpec): ServingRuntime specification
"""
api_version: str
kind: str
metadata: object
spec: V1alpha1ServingRuntimeSpecclass V1alpha1ServingRuntimeSpec:
"""
ServingRuntime specification.
Attributes:
supported_model_formats (list): Supported model formats
containers (list): Container specifications
protocol_versions (list): Supported protocol versions
disabled (bool): Whether runtime is disabled
multi_model (bool): Whether runtime supports multi-model serving
grpc_data_endpoint (str): gRPC data endpoint
grpc_multi_model_management_endpoint (str): gRPC management endpoint
built_in_adapter (V1alpha1BuiltInAdapter): Built-in adapter config
"""
supported_model_formats: List[V1alpha1SupportedModelFormat]
containers: List[V1alpha1Container]
protocol_versions: List[str]
disabled: bool
multi_model: bool
grpc_data_endpoint: str
grpc_multi_model_management_endpoint: str
built_in_adapter: V1alpha1BuiltInAdapterclass V1alpha1SupportedModelFormat:
"""
Supported model format specification.
Attributes:
name (str): Format name
version (str): Format version
auto_select (bool): Auto-select this format
priority (int): Selection priority
"""
name: str
version: str
auto_select: bool
priority: intclass V1alpha1Container:
"""
Container specification for ServingRuntime.
Attributes:
name (str): Container name
image (str): Container image
command (list): Container command
args (list): Container arguments
env (list): Environment variables
resources (dict): Resource requirements
ports (list): Container ports
"""
name: str
image: str
command: List[str]
args: List[str]
env: List[object]
resources: dict
ports: List[object]Cluster-scoped serving runtime.
class V1alpha1ClusterServingRuntime:
"""
ClusterServingRuntime custom resource.
Attributes:
api_version (str): API version ("serving.kserve.io/v1alpha1")
kind (str): Resource kind ("ClusterServingRuntime")
metadata (object): Kubernetes metadata
spec (V1alpha1ServingRuntimeSpec): ServingRuntime specification
"""
api_version: str
kind: str
metadata: object
spec: V1alpha1ServingRuntimeSpecclass V1alpha1BuiltInAdapter:
"""
Built-in adapter configuration for ServingRuntime.
Attributes:
server_type (str): Server type ("mlserver", "triton", "ovms")
runtime_management_port (int): Management port
mem_buffer_bytes (int): Memory buffer size
model_loading_timeout_seconds (int): Model loading timeout
"""
server_type: str
runtime_management_port: int
mem_buffer_bytes: int
model_loading_timeout_seconds: intGlobal configuration for InferenceServices.
class V1beta1InferenceServicesConfig:
"""
Global InferenceServices configuration.
Attributes:
transformers (V1beta1TransformersConfig): Transformers config
predictors (V1beta1PredictorsConfig): Predictors config
explainers (V1beta1ExplainersConfig): Explainers config
ingress (V1beta1IngressConfig): Ingress config
deploy (V1beta1DeployConfig): Deploy config
"""
transformers: V1beta1TransformersConfig
predictors: V1beta1PredictorsConfig
explainers: V1beta1ExplainersConfig
ingress: V1beta1IngressConfig
deploy: V1beta1DeployConfigclass V1beta1PredictorsConfig:
"""
Predictors configuration.
Attributes:
sklearn (V1beta1PredictorConfig): SKLearn config
xgboost (V1beta1PredictorConfig): XGBoost config
tensorflow (V1beta1PredictorConfig): TensorFlow config
pytorch (V1beta1PredictorConfig): PyTorch config
triton (V1beta1PredictorConfig): Triton config
onnx (V1beta1PredictorConfig): ONNX config
"""
sklearn: V1beta1PredictorConfig
xgboost: V1beta1PredictorConfig
tensorflow: V1beta1PredictorConfig
pytorch: V1beta1PredictorConfig
triton: V1beta1PredictorConfig
onnx: V1beta1PredictorConfigclass V1beta1PredictorConfig:
"""
Predictor configuration.
Attributes:
container_image (str): Container image
default_image_version (str): Default image version
default_timeout (int): Default timeout
protocols (V1beta1PredictorProtocols): Supported protocols
"""
container_image: str
default_image_version: str
default_timeout: int
protocols: V1beta1PredictorProtocolsclass V1beta1PredictorProtocols:
"""
Supported protocols for predictor.
Attributes:
v1 (dict): v1 protocol config
v2 (dict): v2 protocol config
"""
v1: dict
v2: dictclass KnativeCondition:
"""
Knative condition status.
Attributes:
type (str): Condition type
status (str): Status ("True", "False", "Unknown")
reason (str): Reason for condition
message (str): Human-readable message
last_transition_time (KnativeVolatileTime): Last transition time
severity (str): Condition severity
"""
type: str
status: str
reason: str
message: str
last_transition_time: KnativeVolatileTime
severity: strclass KnativeAddressable:
"""
Knative addressable specification.
Attributes:
url (KnativeURL): Addressable URL
"""
url: KnativeURLclass KnativeURL:
"""
Knative URL type.
Attributes:
scheme (str): URL scheme
host (str): URL host
path (str): URL path
"""
scheme: str
host: str
path: strfrom kserve import (
V1beta1InferenceService,
V1beta1InferenceServiceSpec,
V1beta1PredictorSpec,
V1beta1SKLearnSpec
)
isvc = V1beta1InferenceService(
api_version="serving.kserve.io/v1beta1",
kind="InferenceService",
metadata={
"name": "sklearn-iris",
"namespace": "default",
"labels": {"app": "iris-classifier"}
},
spec=V1beta1InferenceServiceSpec(
predictor=V1beta1PredictorSpec(
min_replicas=1,
max_replicas=3,
sklearn=V1beta1SKLearnSpec(
storage_uri="gs://kfserving-examples/models/sklearn/iris",
protocol_version="v2",
resources={
"limits": {"cpu": "1", "memory": "2Gi"},
"requests": {"cpu": "500m", "memory": "1Gi"}
}
)
)
)
)from kserve import (
V1beta1InferenceService,
V1beta1InferenceServiceSpec,
V1beta1PredictorSpec,
V1beta1TransformerSpec,
V1beta1TFServingSpec
)
isvc = V1beta1InferenceService(
api_version="serving.kserve.io/v1beta1",
kind="InferenceService",
metadata={"name": "image-classifier", "namespace": "default"},
spec=V1beta1InferenceServiceSpec(
transformer=V1beta1TransformerSpec(
containers=[{
"name": "transformer",
"image": "myrepo/image-transformer:latest",
"env": [{"name": "PROTOCOL", "value": "v2"}]
}],
min_replicas=1
),
predictor=V1beta1PredictorSpec(
tensorflow=V1beta1TFServingSpec(
storage_uri="gs://models/resnet50"
)
)
)
)from kserve import (
V1alpha1TrainedModel,
V1alpha1TrainedModelSpec,
V1alpha1ModelSpec
)
trained_model = V1alpha1TrainedModel(
api_version="serving.kserve.io/v1alpha1",
kind="TrainedModel",
metadata={"name": "model-v2", "namespace": "default"},
spec=V1alpha1TrainedModelSpec(
inference_service="sklearn-iris",
model=V1alpha1ModelSpec(
storage_uri="gs://models/sklearn/iris/v2",
framework="sklearn",
memory="1Gi"
)
)
)from kserve import (
V1alpha1InferenceGraph,
V1alpha1InferenceGraphSpec,
V1alpha1InferenceStep,
V1alpha1InferenceRouter
)
graph = V1alpha1InferenceGraph(
api_version="serving.kserve.io/v1alpha1",
kind="InferenceGraph",
metadata={"name": "ensemble-model", "namespace": "default"},
spec=V1alpha1InferenceGraphSpec(
router=V1alpha1InferenceRouter(router_type="Sequence"),
nodes={
"root": V1alpha1InferenceStep(
node_name="root",
service_name="preprocessor",
steps=[
V1alpha1InferenceStep(
node_name="model",
service_name="sklearn-iris"
)
]
)
}
)
)from kserve import (
V1alpha1ServingRuntime,
V1alpha1ServingRuntimeSpec,
V1alpha1Container,
V1alpha1SupportedModelFormat
)
runtime = V1alpha1ServingRuntime(
api_version="serving.kserve.io/v1alpha1",
kind="ServingRuntime",
metadata={"name": "custom-sklearn", "namespace": "default"},
spec=V1alpha1ServingRuntimeSpec(
supported_model_formats=[
V1alpha1SupportedModelFormat(
name="sklearn",
version="1",
auto_select=True
)
],
protocol_versions=["v1", "v2"],
containers=[
V1alpha1Container(
name="kserve-container",
image="custom/sklearn-server:latest",
args=["--model_name={{.Name}}"],
resources={
"limits": {"cpu": "2", "memory": "4Gi"},
"requests": {"cpu": "1", "memory": "2Gi"}
}
)
],
multi_model=True
)
)