0
# Resource Models
1
2
Comprehensive set of Kubernetes Custom Resource Definitions for defining inference services, serving runtimes, and model configurations. These models represent the complete KServe API for deploying and managing ML models on Kubernetes.
3
4
## Capabilities
5
6
### InferenceService Resources (V1Beta1)
7
8
Core resources for deploying and managing ML model inference services.
9
10
```python { .api }
11
class V1beta1InferenceService:
12
def __init__(self,
13
api_version: str = "serving.kserve.io/v1beta1",
14
kind: str = "InferenceService",
15
metadata: Optional[Dict[str, Any]] = None,
16
spec: Optional['V1beta1InferenceServiceSpec'] = None,
17
status: Optional['V1beta1InferenceServiceStatus'] = None):
18
"""
19
Main KServe inference service resource.
20
21
Args:
22
api_version (str): Kubernetes API version
23
kind (str): Resource kind
24
metadata (Dict[str, Any], optional): Kubernetes metadata
25
spec (V1beta1InferenceServiceSpec, optional): Service specification
26
status (V1beta1InferenceServiceStatus, optional): Service status
27
"""
28
29
class V1beta1InferenceServiceSpec:
30
def __init__(self,
31
predictor: 'V1beta1PredictorSpec',
32
transformer: Optional['V1beta1TransformerSpec'] = None,
33
explainer: Optional['V1beta1ExplainerSpec'] = None):
34
"""
35
InferenceService specification.
36
37
Args:
38
predictor (V1beta1PredictorSpec): Predictor component spec
39
transformer (V1beta1TransformerSpec, optional): Transformer component spec
40
explainer (V1beta1ExplainerSpec, optional): Explainer component spec
41
"""
42
43
class V1beta1InferenceServiceStatus:
44
def __init__(self,
45
url: Optional[str] = None,
46
conditions: Optional[List['KnativeCondition']] = None,
47
components: Optional[Dict[str, Any]] = None):
48
"""
49
InferenceService status information.
50
51
Args:
52
url (str, optional): Service endpoint URL
53
conditions (List[KnativeCondition], optional): Status conditions
54
components (Dict[str, Any], optional): Component statuses
55
"""
56
57
class V1beta1InferenceServiceList:
58
def __init__(self,
59
api_version: str = "serving.kserve.io/v1beta1",
60
kind: str = "InferenceServiceList",
61
items: List['V1beta1InferenceService'] = None,
62
metadata: Optional[Dict[str, Any]] = None):
63
"""List of InferenceServices."""
64
```
65
66
### Component Specifications
67
68
Specifications for the three main components of inference services.
69
70
```python { .api }
71
class V1beta1PredictorSpec:
72
def __init__(self,
73
sklearn: Optional['V1beta1SKLearnSpec'] = None,
74
tensorflow: Optional['V1beta1TFServingSpec'] = None,
75
pytorch: Optional['V1beta1TorchServeSpec'] = None,
76
xgboost: Optional['V1beta1XGBoostSpec'] = None,
77
lightgbm: Optional['V1beta1LightGBMSpec'] = None,
78
onnx: Optional['V1beta1ONNXRuntimeSpec'] = None,
79
triton: Optional['V1beta1TritonSpec'] = None,
80
pmml: Optional['V1beta1PMMLSpec'] = None,
81
paddle: Optional['V1beta1PaddleServerSpec'] = None,
82
custom: Optional['V1beta1CustomPredictor'] = None):
83
"""
84
Predictor component specification.
85
86
Args:
87
sklearn (V1beta1SKLearnSpec, optional): Scikit-learn predictor
88
tensorflow (V1beta1TFServingSpec, optional): TensorFlow Serving predictor
89
pytorch (V1beta1TorchServeSpec, optional): PyTorch TorchServe predictor
90
xgboost (V1beta1XGBoostSpec, optional): XGBoost predictor
91
lightgbm (V1beta1LightGBMSpec, optional): LightGBM predictor
92
onnx (V1beta1ONNXRuntimeSpec, optional): ONNX Runtime predictor
93
triton (V1beta1TritonSpec, optional): NVIDIA Triton predictor
94
pmml (V1beta1PMMLSpec, optional): PMML predictor
95
paddle (V1beta1PaddleServerSpec, optional): PaddlePaddle predictor
96
custom (V1beta1CustomPredictor, optional): Custom predictor container
97
"""
98
99
class V1beta1TransformerSpec:
100
def __init__(self,
101
custom: Optional['V1beta1CustomTransformer'] = None):
102
"""
103
Transformer component specification.
104
105
Args:
106
custom (V1beta1CustomTransformer, optional): Custom transformer container
107
"""
108
109
class V1beta1ExplainerSpec:
110
def __init__(self,
111
art: Optional['V1beta1ARTExplainerSpec'] = None,
112
custom: Optional['V1beta1CustomExplainer'] = None):
113
"""
114
Explainer component specification.
115
116
Args:
117
art (V1beta1ARTExplainerSpec, optional): ART-based explainer
118
custom (V1beta1CustomExplainer, optional): Custom explainer container
119
"""
120
```
121
122
### Framework-Specific Predictors
123
124
Specifications for different ML framework servers.
125
126
```python { .api }
127
class V1beta1SKLearnSpec:
128
def __init__(self,
129
storage_uri: str,
130
protocol_version: Optional[str] = None,
131
runtime_version: Optional[str] = None,
132
resources: Optional[Dict[str, Any]] = None):
133
"""
134
Scikit-learn predictor specification.
135
136
Args:
137
storage_uri (str): Model storage location
138
protocol_version (str, optional): Protocol version (v1, v2)
139
runtime_version (str, optional): Runtime version
140
resources (Dict[str, Any], optional): Resource requirements
141
"""
142
143
class V1beta1TFServingSpec:
144
def __init__(self,
145
storage_uri: str,
146
protocol_version: Optional[str] = None,
147
runtime_version: Optional[str] = None):
148
"""TensorFlow Serving predictor specification."""
149
150
class V1beta1TorchServeSpec:
151
def __init__(self,
152
storage_uri: str,
153
protocol_version: Optional[str] = None,
154
runtime_version: Optional[str] = None):
155
"""PyTorch TorchServe predictor specification."""
156
157
class V1beta1XGBoostSpec:
158
def __init__(self,
159
storage_uri: str,
160
protocol_version: Optional[str] = None,
161
runtime_version: Optional[str] = None):
162
"""XGBoost predictor specification."""
163
164
class V1beta1LightGBMSpec:
165
def __init__(self,
166
storage_uri: str,
167
protocol_version: Optional[str] = None,
168
runtime_version: Optional[str] = None):
169
"""LightGBM predictor specification."""
170
171
class V1beta1ONNXRuntimeSpec:
172
def __init__(self,
173
storage_uri: str,
174
protocol_version: Optional[str] = None,
175
runtime_version: Optional[str] = None):
176
"""ONNX Runtime predictor specification."""
177
178
class V1beta1TritonSpec:
179
def __init__(self,
180
storage_uri: str,
181
protocol_version: Optional[str] = None,
182
runtime_version: Optional[str] = None):
183
"""NVIDIA Triton predictor specification."""
184
185
class V1beta1PMMLSpec:
186
def __init__(self,
187
storage_uri: str,
188
protocol_version: Optional[str] = None,
189
runtime_version: Optional[str] = None):
190
"""PMML predictor specification."""
191
192
class V1beta1PaddleServerSpec:
193
def __init__(self,
194
storage_uri: str,
195
protocol_version: Optional[str] = None,
196
runtime_version: Optional[str] = None):
197
"""PaddlePaddle predictor specification."""
198
```
199
200
### Custom Component Specifications
201
202
Specifications for custom containers and runtime configurations.
203
204
```python { .api }
205
class V1beta1CustomPredictor:
206
def __init__(self,
207
image: str,
208
command: Optional[List[str]] = None,
209
args: Optional[List[str]] = None,
210
env: Optional[List[Dict[str, str]]] = None,
211
resources: Optional[Dict[str, Any]] = None,
212
ports: Optional[List[Dict[str, Any]]] = None):
213
"""
214
Custom predictor container specification.
215
216
Args:
217
image (str): Container image
218
command (List[str], optional): Container command
219
args (List[str], optional): Container arguments
220
env (List[Dict[str, str]], optional): Environment variables
221
resources (Dict[str, Any], optional): Resource requirements
222
ports (List[Dict[str, Any]], optional): Container ports
223
"""
224
225
class V1beta1CustomTransformer:
226
def __init__(self,
227
image: str,
228
command: Optional[List[str]] = None,
229
args: Optional[List[str]] = None,
230
env: Optional[List[Dict[str, str]]] = None):
231
"""Custom transformer container specification."""
232
233
class V1beta1CustomExplainer:
234
def __init__(self,
235
image: str,
236
command: Optional[List[str]] = None,
237
args: Optional[List[str]] = None,
238
env: Optional[List[Dict[str, str]]] = None):
239
"""Custom explainer container specification."""
240
```
241
242
### Autoscaling Specifications
243
244
Configuration for automatic scaling based on metrics.
245
246
```python { .api }
247
class V1beta1AutoScalingSpec:
248
def __init__(self,
249
min_replicas: Optional[int] = None,
250
max_replicas: Optional[int] = None,
251
target_utilization_percentage: Optional[int] = None,
252
metrics: Optional[List['V1beta1MetricsSpec']] = None):
253
"""
254
Autoscaling specification.
255
256
Args:
257
min_replicas (int, optional): Minimum number of replicas
258
max_replicas (int, optional): Maximum number of replicas
259
target_utilization_percentage (int, optional): Target CPU utilization
260
metrics (List[V1beta1MetricsSpec], optional): Custom metrics
261
"""
262
263
class V1beta1MetricsSpec:
264
def __init__(self,
265
type: str,
266
resource: Optional['V1beta1ResourceMetricSource'] = None,
267
pods: Optional['V1beta1PodMetricSource'] = None,
268
external: Optional['V1beta1ExternalMetricSource'] = None):
269
"""
270
Metrics specification for autoscaling.
271
272
Args:
273
type (str): Metric type (Resource, Pods, External)
274
resource (V1beta1ResourceMetricSource, optional): Resource metric
275
pods (V1beta1PodMetricSource, optional): Pod metric
276
external (V1beta1ExternalMetricSource, optional): External metric
277
"""
278
279
class V1beta1ResourceMetricSource:
280
def __init__(self,
281
name: str,
282
target: 'V1beta1MetricTarget'):
283
"""Resource-based metric source."""
284
285
class V1beta1PodMetricSource:
286
def __init__(self,
287
metric: 'V1beta1ExternalMetrics',
288
target: 'V1beta1MetricTarget'):
289
"""Pod-based metric source."""
290
291
class V1beta1ExternalMetricSource:
292
def __init__(self,
293
metric: 'V1beta1ExternalMetrics',
294
target: 'V1beta1MetricTarget'):
295
"""External metric source."""
296
297
class V1beta1MetricTarget:
298
def __init__(self,
299
type: str,
300
average_utilization: Optional[int] = None,
301
average_value: Optional[str] = None,
302
value: Optional[str] = None):
303
"""Metric target specification."""
304
```
305
306
### ServingRuntime Resources (V1Alpha1)
307
308
Runtime definitions for model serving containers and configurations.
309
310
```python { .api }
311
class V1alpha1ServingRuntime:
312
def __init__(self,
313
api_version: str = "serving.kserve.io/v1alpha1",
314
kind: str = "ServingRuntime",
315
metadata: Optional[Dict[str, Any]] = None,
316
spec: Optional['V1alpha1ServingRuntimeSpec'] = None):
317
"""
318
Namespace-scoped serving runtime definition.
319
320
Args:
321
api_version (str): Kubernetes API version
322
kind (str): Resource kind
323
metadata (Dict[str, Any], optional): Kubernetes metadata
324
spec (V1alpha1ServingRuntimeSpec, optional): Runtime specification
325
"""
326
327
class V1alpha1ClusterServingRuntime:
328
def __init__(self,
329
api_version: str = "serving.kserve.io/v1alpha1",
330
kind: str = "ClusterServingRuntime",
331
metadata: Optional[Dict[str, Any]] = None,
332
spec: Optional['V1alpha1ServingRuntimeSpec'] = None):
333
"""Cluster-wide serving runtime definition."""
334
335
class V1alpha1ServingRuntimeSpec:
336
def __init__(self,
337
supported_model_formats: List['V1alpha1SupportedModelFormat'],
338
containers: List['V1alpha1Container'],
339
protocol_versions: Optional[List[str]] = None,
340
multi_model: Optional[bool] = None):
341
"""
342
Serving runtime specification.
343
344
Args:
345
supported_model_formats (List[V1alpha1SupportedModelFormat]): Supported formats
346
containers (List[V1alpha1Container]): Runtime containers
347
protocol_versions (List[str], optional): Supported protocol versions
348
multi_model (bool, optional): Multi-model support flag
349
"""
350
351
class V1alpha1SupportedModelFormat:
352
def __init__(self,
353
name: str,
354
version: Optional[str] = None,
355
auto_select: Optional[bool] = None,
356
priority: Optional[int] = None):
357
"""
358
Supported model format specification.
359
360
Args:
361
name (str): Format name (sklearn, tensorflow, pytorch, etc.)
362
version (str, optional): Format version
363
auto_select (bool, optional): Auto-selection flag
364
priority (int, optional): Selection priority
365
"""
366
367
class V1alpha1Container:
368
def __init__(self,
369
name: str,
370
image: str,
371
command: Optional[List[str]] = None,
372
args: Optional[List[str]] = None,
373
env: Optional[List[Dict[str, str]]] = None,
374
resources: Optional[Dict[str, Any]] = None):
375
"""Container specification for serving runtime."""
376
```
377
378
### TrainedModel Resources (V1Alpha1)
379
380
Resources for managing trained model artifacts and versions.
381
382
```python { .api }
383
class V1alpha1TrainedModel:
384
def __init__(self,
385
api_version: str = "serving.kserve.io/v1alpha1",
386
kind: str = "TrainedModel",
387
metadata: Optional[Dict[str, Any]] = None,
388
spec: Optional['V1alpha1TrainedModelSpec'] = None):
389
"""
390
TrainedModel resource for model artifact management.
391
392
Args:
393
api_version (str): Kubernetes API version
394
kind (str): Resource kind
395
metadata (Dict[str, Any], optional): Kubernetes metadata
396
spec (V1alpha1TrainedModelSpec, optional): Model specification
397
"""
398
399
class V1alpha1TrainedModelSpec:
400
def __init__(self,
401
inference_service: str,
402
model: 'V1alpha1ModelSpec'):
403
"""
404
TrainedModel specification.
405
406
Args:
407
inference_service (str): Target InferenceService name
408
model (V1alpha1ModelSpec): Model configuration
409
"""
410
411
class V1alpha1ModelSpec:
412
def __init__(self,
413
storage_uri: str,
414
model_format: Dict[str, str],
415
memory: Optional[str] = None,
416
framework: Optional[str] = None):
417
"""
418
Model specification.
419
420
Args:
421
storage_uri (str): Model storage location
422
model_format (Dict[str, str]): Model format specification
423
memory (str, optional): Memory requirements
424
framework (str, optional): ML framework
425
"""
426
```
427
428
### InferenceGraph Resources (V1Alpha1)
429
430
Resources for multi-model inference pipelines and routing.
431
432
```python { .api }
433
class V1alpha1InferenceGraph:
434
def __init__(self,
435
api_version: str = "serving.kserve.io/v1alpha1",
436
kind: str = "InferenceGraph",
437
metadata: Optional[Dict[str, Any]] = None,
438
spec: Optional['V1alpha1InferenceGraphSpec'] = None,
439
status: Optional['V1alpha1InferenceGraphStatus'] = None):
440
"""
441
InferenceGraph for multi-model inference pipelines.
442
443
Args:
444
api_version (str): Kubernetes API version
445
kind (str): Resource kind
446
metadata (Dict[str, Any], optional): Kubernetes metadata
447
spec (V1alpha1InferenceGraphSpec, optional): Graph specification
448
status (V1alpha1InferenceGraphStatus, optional): Graph status
449
"""
450
451
class V1alpha1InferenceGraphSpec:
452
def __init__(self,
453
nodes: Dict[str, 'V1alpha1InferenceStep']):
454
"""
455
InferenceGraph specification.
456
457
Args:
458
nodes (Dict[str, V1alpha1InferenceStep]): Pipeline steps
459
"""
460
461
class V1alpha1InferenceStep:
462
def __init__(self,
463
step_name: str,
464
service_name: str,
465
data: str,
466
condition: Optional[str] = None):
467
"""
468
Individual step in inference pipeline.
469
470
Args:
471
step_name (str): Step identifier
472
service_name (str): Target service name
473
data (str): Data flow expression
474
condition (str, optional): Conditional execution
475
"""
476
477
class V1alpha1InferenceRouter:
478
def __init__(self,
479
rules: List[Dict[str, Any]]):
480
"""
481
Routing configuration for inference graphs.
482
483
Args:
484
rules (List[Dict[str, Any]]): Routing rules
485
"""
486
487
class V1alpha1InferenceTarget:
488
def __init__(self,
489
service_name: str,
490
weight: Optional[int] = None):
491
"""
492
Target model for inference step.
493
494
Args:
495
service_name (str): Target service name
496
weight (int, optional): Routing weight
497
"""
498
```
499
500
## Usage Examples
501
502
### Complete InferenceService Definition
503
504
```python
505
from kserve import (
506
V1beta1InferenceService, V1beta1InferenceServiceSpec,
507
V1beta1PredictorSpec, V1beta1SKLearnSpec,
508
V1beta1AutoScalingSpec, V1beta1LoggerSpec
509
)
510
511
# Complete inference service with all components
512
isvc = V1beta1InferenceService(
513
api_version="serving.kserve.io/v1beta1",
514
kind="InferenceService",
515
metadata={
516
"name": "advanced-sklearn",
517
"namespace": "ml-prod",
518
"labels": {
519
"app": "ml-inference",
520
"version": "v1.0"
521
},
522
"annotations": {
523
"serving.kserve.io/enable-prometheus-scraping": "true"
524
}
525
},
526
spec=V1beta1InferenceServiceSpec(
527
predictor=V1beta1PredictorSpec(
528
sklearn=V1beta1SKLearnSpec(
529
storage_uri="s3://ml-models/sklearn/iris/v1/",
530
protocol_version="v2",
531
runtime_version="1.3.0",
532
resources={
533
"requests": {"cpu": "100m", "memory": "256Mi"},
534
"limits": {"cpu": "1", "memory": "2Gi"}
535
}
536
),
537
min_replicas=1,
538
max_replicas=10,
539
scale_metric="concurrency",
540
scale_target=100
541
)
542
)
543
)
544
```
545
546
### Custom ServingRuntime Definition
547
548
```python
549
from kserve import (
550
V1alpha1ClusterServingRuntime, V1alpha1ServingRuntimeSpec,
551
V1alpha1SupportedModelFormat, V1alpha1Container
552
)
553
554
# Define custom serving runtime
555
runtime = V1alpha1ClusterServingRuntime(
556
metadata={
557
"name": "custom-pytorch-runtime"
558
},
559
spec=V1alpha1ServingRuntimeSpec(
560
supported_model_formats=[
561
V1alpha1SupportedModelFormat(
562
name="pytorch",
563
version="1.0",
564
auto_select=True,
565
priority=1
566
)
567
],
568
containers=[
569
V1alpha1Container(
570
name="kserve-container",
571
image="custom-pytorch-server:latest",
572
args=[
573
"--model_name={{.Name}}",
574
"--model_dir=/mnt/models",
575
"--http_port=8080"
576
],
577
env=[
578
{"name": "STORAGE_URI", "value": "{{.StorageUri}}"}
579
],
580
resources={
581
"requests": {"cpu": "1", "memory": "2Gi"},
582
"limits": {"cpu": "4", "memory": "8Gi", "nvidia.com/gpu": "1"}
583
}
584
)
585
],
586
protocol_versions=["v1", "v2"],
587
multi_model=True
588
)
589
)
590
```
591
592
## Types
593
594
```python { .api }
595
from typing import List, Dict, Any, Optional, Union
596
597
ResourceMetadata = Dict[str, Any]
598
ResourceSpec = Dict[str, Any]
599
ResourceStatus = Dict[str, Any]
600
ContainerImage = str
601
StorageURI = str
602
ModelFormat = Dict[str, str]
603
```