0
# Kubernetes Client
1
2
Python client for managing KServe resources in Kubernetes clusters including InferenceServices, TrainedModels, and InferenceGraphs. This client provides high-level operations for deploying and managing ML models on Kubernetes.
3
4
## Capabilities
5
6
### KServe Client
7
8
Main Kubernetes API client for KServe custom resources with CRUD operations and credential management.
9
10
```python { .api }
11
class KServeClient:
12
def __init__(self,
13
config_file: Optional[str] = None,
14
context: Optional[str] = None,
15
client_configuration: Optional['Configuration'] = None,
16
persist_config: bool = True):
17
"""
18
Initialize KServe Kubernetes client.
19
20
Args:
21
config_file (str, optional): Path to kubeconfig file
22
context (str, optional): Kubernetes context to use
23
client_configuration (Configuration, optional): Custom client config
24
persist_config (bool): Whether to persist config changes
25
"""
26
27
def create(self,
28
obj: Union['V1beta1InferenceService', 'V1alpha1TrainedModel', 'V1alpha1InferenceGraph'],
29
namespace: str = "default",
30
**kwargs) -> Any:
31
"""
32
Create KServe resource in cluster.
33
34
Args:
35
obj: KServe resource object to create
36
namespace (str): Kubernetes namespace
37
**kwargs: Additional API parameters
38
39
Returns:
40
Created resource object
41
42
Raises:
43
ApiException: If creation fails
44
"""
45
46
def get(self,
47
name: str,
48
namespace: str = "default",
49
version: str = "v1beta1",
50
**kwargs):
51
"""
52
Get KServe resource by name.
53
54
Args:
55
name (str): Resource name
56
namespace (str): Kubernetes namespace
57
version (str): API version (v1beta1, v1alpha1)
58
**kwargs: Additional API parameters
59
60
Returns:
61
Resource object
62
63
Raises:
64
ApiException: If resource not found
65
"""
66
67
def patch(self,
68
name: str,
69
obj: Any,
70
namespace: str = "default",
71
**kwargs):
72
"""
73
Patch existing KServe resource.
74
75
Args:
76
name (str): Resource name to patch
77
obj: Resource object with changes
78
namespace (str): Kubernetes namespace
79
**kwargs: Additional API parameters
80
81
Returns:
82
Updated resource object
83
"""
84
85
def replace(self,
86
name: str,
87
obj: Any,
88
namespace: str = "default",
89
**kwargs):
90
"""
91
Replace existing KServe resource.
92
93
Args:
94
name (str): Resource name to replace
95
obj: New resource object
96
namespace (str): Kubernetes namespace
97
**kwargs: Additional API parameters
98
99
Returns:
100
Replaced resource object
101
"""
102
103
def delete(self,
104
name: str,
105
namespace: str = "default",
106
version: str = "v1beta1",
107
**kwargs):
108
"""
109
Delete KServe resource.
110
111
Args:
112
name (str): Resource name to delete
113
namespace (str): Kubernetes namespace
114
version (str): API version
115
**kwargs: Additional API parameters
116
117
Returns:
118
Deletion status
119
"""
120
121
def list(self,
122
namespace: str = "default",
123
version: str = "v1beta1",
124
**kwargs) -> List[Any]:
125
"""
126
List KServe resources in namespace.
127
128
Args:
129
namespace (str): Kubernetes namespace
130
version (str): API version
131
**kwargs: Additional API parameters
132
133
Returns:
134
List[Any]: List of resources
135
"""
136
137
def wait_isvc_ready(self,
138
name: str,
139
namespace: str = "default",
140
timeout_seconds: int = 600,
141
polling_interval: int = 10) -> bool:
142
"""
143
Wait for InferenceService to become ready.
144
145
Args:
146
name (str): InferenceService name
147
namespace (str): Kubernetes namespace
148
timeout_seconds (int): Maximum wait time
149
polling_interval (int): Polling interval in seconds
150
151
Returns:
152
bool: True if ready, False if timeout
153
"""
154
```
155
156
### TrainedModel Operations
157
158
Specialized operations for managing TrainedModel resources.
159
160
```python { .api }
161
class KServeClient:
162
def create_trained_model(self,
163
trained_model: 'V1alpha1TrainedModel',
164
namespace: str = "default",
165
**kwargs):
166
"""
167
Create TrainedModel resource.
168
169
Args:
170
trained_model (V1alpha1TrainedModel): TrainedModel object
171
namespace (str): Kubernetes namespace
172
**kwargs: Additional API parameters
173
174
Returns:
175
Created TrainedModel
176
"""
177
178
def get_trained_model(self,
179
name: str,
180
namespace: str = "default",
181
**kwargs) -> 'V1alpha1TrainedModel':
182
"""
183
Get TrainedModel by name.
184
185
Args:
186
name (str): TrainedModel name
187
namespace (str): Kubernetes namespace
188
**kwargs: Additional API parameters
189
190
Returns:
191
V1alpha1TrainedModel: TrainedModel object
192
"""
193
194
def delete_trained_model(self,
195
name: str,
196
namespace: str = "default",
197
**kwargs):
198
"""
199
Delete TrainedModel resource.
200
201
Args:
202
name (str): TrainedModel name
203
namespace (str): Kubernetes namespace
204
**kwargs: Additional API parameters
205
206
Returns:
207
Deletion status
208
"""
209
210
def list_trained_models(self,
211
namespace: str = "default",
212
**kwargs) -> List['V1alpha1TrainedModel']:
213
"""
214
List TrainedModels in namespace.
215
216
Args:
217
namespace (str): Kubernetes namespace
218
**kwargs: Additional API parameters
219
220
Returns:
221
List[V1alpha1TrainedModel]: List of TrainedModels
222
"""
223
```
224
225
### InferenceGraph Operations
226
227
Operations for managing multi-model inference pipelines.
228
229
```python { .api }
230
class KServeClient:
231
def create_inference_graph(self,
232
inference_graph: 'V1alpha1InferenceGraph',
233
namespace: str = "default",
234
**kwargs):
235
"""
236
Create InferenceGraph resource.
237
238
Args:
239
inference_graph (V1alpha1InferenceGraph): InferenceGraph object
240
namespace (str): Kubernetes namespace
241
**kwargs: Additional API parameters
242
243
Returns:
244
Created InferenceGraph
245
"""
246
247
def get_inference_graph(self,
248
name: str,
249
namespace: str = "default",
250
**kwargs) -> 'V1alpha1InferenceGraph':
251
"""
252
Get InferenceGraph by name.
253
254
Args:
255
name (str): InferenceGraph name
256
namespace (str): Kubernetes namespace
257
**kwargs: Additional API parameters
258
259
Returns:
260
V1alpha1InferenceGraph: InferenceGraph object
261
"""
262
263
def delete_inference_graph(self,
264
name: str,
265
namespace: str = "default",
266
**kwargs):
267
"""
268
Delete InferenceGraph resource.
269
270
Args:
271
name (str): InferenceGraph name
272
namespace (str): Kubernetes namespace
273
**kwargs: Additional API parameters
274
275
Returns:
276
Deletion status
277
"""
278
279
def list_inference_graphs(self,
280
namespace: str = "default",
281
**kwargs) -> List['V1alpha1InferenceGraph']:
282
"""
283
List InferenceGraphs in namespace.
284
285
Args:
286
namespace (str): Kubernetes namespace
287
**kwargs: Additional API parameters
288
289
Returns:
290
List[V1alpha1InferenceGraph]: List of InferenceGraphs
291
"""
292
```
293
294
### Credential Management
295
296
Methods for managing storage and authentication credentials.
297
298
```python { .api }
299
class KServeClient:
300
def set_credentials(self,
301
storage_type: str,
302
namespace: str = "default",
303
**kwargs):
304
"""
305
Set storage credentials for model access.
306
307
Args:
308
storage_type (str): Storage type (s3, gcs, azure, etc.)
309
namespace (str): Kubernetes namespace
310
**kwargs: Credential parameters (access_key, secret_key, etc.)
311
"""
312
```
313
314
## Usage Examples
315
316
### Basic InferenceService Deployment
317
318
```python
319
from kserve import KServeClient
320
from kserve import V1beta1InferenceService, V1beta1InferenceServiceSpec
321
from kserve import V1beta1PredictorSpec, V1beta1SKLearnSpec
322
323
# Initialize client
324
client = KServeClient()
325
326
# Define InferenceService
327
isvc = V1beta1InferenceService(
328
api_version="serving.kserve.io/v1beta1",
329
kind="InferenceService",
330
metadata={
331
"name": "sklearn-iris",
332
"namespace": "default"
333
},
334
spec=V1beta1InferenceServiceSpec(
335
predictor=V1beta1PredictorSpec(
336
sklearn=V1beta1SKLearnSpec(
337
storage_uri="gs://kfserving-examples/models/sklearn/1.0/model"
338
)
339
)
340
)
341
)
342
343
# Create InferenceService
344
created_isvc = client.create(isvc, namespace="default")
345
print(f"Created InferenceService: {created_isvc.metadata.name}")
346
347
# Wait for readiness
348
ready = client.wait_isvc_ready("sklearn-iris", namespace="default")
349
if ready:
350
print("InferenceService is ready!")
351
else:
352
print("InferenceService failed to become ready")
353
```
354
355
### Advanced InferenceService with Transformer
356
357
```python
358
from kserve import (
359
V1beta1InferenceService, V1beta1InferenceServiceSpec,
360
V1beta1PredictorSpec, V1beta1TransformerSpec,
361
V1beta1CustomPredictor, V1beta1CustomTransformer
362
)
363
364
# Define custom transformer and predictor
365
transformer_spec = V1beta1TransformerSpec(
366
custom=V1beta1CustomTransformer(
367
image="my-transformer:latest",
368
env=[{"name": "STORAGE_URI", "value": "s3://my-bucket/preprocessor"}]
369
)
370
)
371
372
predictor_spec = V1beta1PredictorSpec(
373
custom=V1beta1CustomPredictor(
374
image="my-predictor:latest",
375
env=[{"name": "MODEL_NAME", "value": "my-model"}]
376
)
377
)
378
379
# Create InferenceService with transformer
380
isvc = V1beta1InferenceService(
381
api_version="serving.kserve.io/v1beta1",
382
kind="InferenceService",
383
metadata={"name": "custom-pipeline", "namespace": "ml-models"},
384
spec=V1beta1InferenceServiceSpec(
385
transformer=transformer_spec,
386
predictor=predictor_spec
387
)
388
)
389
390
client.create(isvc, namespace="ml-models")
391
```
392
393
### TrainedModel Management
394
395
```python
396
from kserve import V1alpha1TrainedModel, V1alpha1TrainedModelSpec
397
398
# Create TrainedModel
399
trained_model = V1alpha1TrainedModel(
400
api_version="serving.kserve.io/v1alpha1",
401
kind="TrainedModel",
402
metadata={
403
"name": "bert-model-v1",
404
"namespace": "nlp-models"
405
},
406
spec=V1alpha1TrainedModelSpec(
407
inference_service="bert-service",
408
model=V1alpha1ModelSpec(
409
model_format={"name": "pytorch"},
410
storage_uri="s3://ml-models/bert/v1/model.pt"
411
)
412
)
413
)
414
415
# Create and manage TrainedModel
416
created_model = client.create_trained_model(trained_model, namespace="nlp-models")
417
418
# List all trained models
419
models = client.list_trained_models(namespace="nlp-models")
420
for model in models.items:
421
print(f"Model: {model.metadata.name}, Status: {model.status}")
422
423
# Get specific model
424
model = client.get_trained_model("bert-model-v1", namespace="nlp-models")
425
print(f"Model URI: {model.spec.model.storage_uri}")
426
```
427
428
### InferenceGraph for Multi-Model Pipeline
429
430
```python
431
from kserve import (
432
V1alpha1InferenceGraph, V1alpha1InferenceGraphSpec,
433
V1alpha1InferenceStep, V1alpha1InferenceTarget
434
)
435
436
# Define inference pipeline steps
437
preprocessor_step = V1alpha1InferenceStep(
438
step_name="preprocessor",
439
service_name="text-preprocessor",
440
data="$request"
441
)
442
443
classifier_step = V1alpha1InferenceStep(
444
step_name="classifier",
445
service_name="bert-classifier",
446
data="$preprocessor"
447
)
448
449
postprocessor_step = V1alpha1InferenceStep(
450
step_name="postprocessor",
451
service_name="result-formatter",
452
data="$classifier"
453
)
454
455
# Create InferenceGraph
456
graph = V1alpha1InferenceGraph(
457
api_version="serving.kserve.io/v1alpha1",
458
kind="InferenceGraph",
459
metadata={
460
"name": "text-classification-pipeline",
461
"namespace": "nlp-models"
462
},
463
spec=V1alpha1InferenceGraphSpec(
464
nodes={
465
"preprocessor": preprocessor_step,
466
"classifier": classifier_step,
467
"postprocessor": postprocessor_step
468
}
469
)
470
)
471
472
# Deploy pipeline
473
client.create_inference_graph(graph, namespace="nlp-models")
474
```
475
476
### Credential Management
477
478
```python
479
# Set S3 credentials
480
client.set_credentials(
481
storage_type="s3",
482
namespace="ml-models",
483
access_key_id="YOUR_ACCESS_KEY",
484
secret_access_key="YOUR_SECRET_KEY",
485
region="us-west-2"
486
)
487
488
# Set GCS credentials
489
client.set_credentials(
490
storage_type="gcs",
491
namespace="ml-models",
492
service_account_key="path/to/service-account.json"
493
)
494
495
# Set Azure credentials
496
client.set_credentials(
497
storage_type="azure",
498
namespace="ml-models",
499
account_name="mystorageaccount",
500
account_key="YOUR_ACCOUNT_KEY"
501
)
502
```
503
504
### Resource Monitoring and Updates
505
506
```python
507
import time
508
509
def monitor_inference_service(name: str, namespace: str = "default"):
510
"""Monitor InferenceService status and updates."""
511
512
while True:
513
try:
514
# Get current status
515
isvc = client.get(name, namespace=namespace)
516
517
status = isvc.status
518
if status and status.conditions:
519
for condition in status.conditions:
520
print(f"{condition.type}: {condition.status} - {condition.message}")
521
522
# Check if ready
523
if status and status.url:
524
print(f"Service URL: {status.url}")
525
break
526
527
except Exception as e:
528
print(f"Error monitoring service: {e}")
529
530
time.sleep(10)
531
532
# Monitor service
533
monitor_inference_service("sklearn-iris")
534
```
535
536
### Batch Operations
537
538
```python
539
# Deploy multiple models
540
models = [
541
("model-a", "s3://bucket/model-a/"),
542
("model-b", "s3://bucket/model-b/"),
543
("model-c", "s3://bucket/model-c/")
544
]
545
546
deployed_services = []
547
for name, uri in models:
548
isvc = create_inference_service(name, uri)
549
result = client.create(isvc, namespace="batch-deployment")
550
deployed_services.append(result)
551
552
# Wait for all to be ready
553
for service in deployed_services:
554
name = service.metadata.name
555
ready = client.wait_isvc_ready(name, namespace="batch-deployment")
556
print(f"Service {name}: {'Ready' if ready else 'Failed'}")
557
558
# List all services
559
services = client.list(namespace="batch-deployment")
560
print(f"Total services: {len(services.items)}")
561
```
562
563
## Types
564
565
```python { .api }
566
from typing import List, Dict, Any, Optional, Union
567
from kubernetes.client import ApiException
568
569
ResourceName = str
570
Namespace = str
571
ApiVersion = str
572
ResourceObject = Union['V1beta1InferenceService', 'V1alpha1TrainedModel', 'V1alpha1InferenceGraph']
573
```