or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

framework-servers.mdindex.mdinference-clients.mdkubernetes-client.mdmodel-serving.mdprotocol.mdresource-models.mdstorage.md

kubernetes-client.mddocs/

0

# Kubernetes Client

1

2

Python client for managing KServe resources in Kubernetes clusters including InferenceServices, TrainedModels, and InferenceGraphs. This client provides high-level operations for deploying and managing ML models on Kubernetes.

3

4

## Capabilities

5

6

### KServe Client

7

8

Main Kubernetes API client for KServe custom resources with CRUD operations and credential management.

9

10

```python { .api }

11

class KServeClient:

12

def __init__(self,

13

config_file: Optional[str] = None,

14

context: Optional[str] = None,

15

client_configuration: Optional['Configuration'] = None,

16

persist_config: bool = True):

17

"""

18

Initialize KServe Kubernetes client.

19

20

Args:

21

config_file (str, optional): Path to kubeconfig file

22

context (str, optional): Kubernetes context to use

23

client_configuration (Configuration, optional): Custom client config

24

persist_config (bool): Whether to persist config changes

25

"""

26

27

def create(self,

28

obj: Union['V1beta1InferenceService', 'V1alpha1TrainedModel', 'V1alpha1InferenceGraph'],

29

namespace: str = "default",

30

**kwargs) -> Any:

31

"""

32

Create KServe resource in cluster.

33

34

Args:

35

obj: KServe resource object to create

36

namespace (str): Kubernetes namespace

37

**kwargs: Additional API parameters

38

39

Returns:

40

Created resource object

41

42

Raises:

43

ApiException: If creation fails

44

"""

45

46

def get(self,

47

name: str,

48

namespace: str = "default",

49

version: str = "v1beta1",

50

**kwargs):

51

"""

52

Get KServe resource by name.

53

54

Args:

55

name (str): Resource name

56

namespace (str): Kubernetes namespace

57

version (str): API version (v1beta1, v1alpha1)

58

**kwargs: Additional API parameters

59

60

Returns:

61

Resource object

62

63

Raises:

64

ApiException: If resource not found

65

"""

66

67

def patch(self,

68

name: str,

69

obj: Any,

70

namespace: str = "default",

71

**kwargs):

72

"""

73

Patch existing KServe resource.

74

75

Args:

76

name (str): Resource name to patch

77

obj: Resource object with changes

78

namespace (str): Kubernetes namespace

79

**kwargs: Additional API parameters

80

81

Returns:

82

Updated resource object

83

"""

84

85

def replace(self,

86

name: str,

87

obj: Any,

88

namespace: str = "default",

89

**kwargs):

90

"""

91

Replace existing KServe resource.

92

93

Args:

94

name (str): Resource name to replace

95

obj: New resource object

96

namespace (str): Kubernetes namespace

97

**kwargs: Additional API parameters

98

99

Returns:

100

Replaced resource object

101

"""

102

103

def delete(self,

104

name: str,

105

namespace: str = "default",

106

version: str = "v1beta1",

107

**kwargs):

108

"""

109

Delete KServe resource.

110

111

Args:

112

name (str): Resource name to delete

113

namespace (str): Kubernetes namespace

114

version (str): API version

115

**kwargs: Additional API parameters

116

117

Returns:

118

Deletion status

119

"""

120

121

def list(self,

122

namespace: str = "default",

123

version: str = "v1beta1",

124

**kwargs) -> List[Any]:

125

"""

126

List KServe resources in namespace.

127

128

Args:

129

namespace (str): Kubernetes namespace

130

version (str): API version

131

**kwargs: Additional API parameters

132

133

Returns:

134

List[Any]: List of resources

135

"""

136

137

def wait_isvc_ready(self,

138

name: str,

139

namespace: str = "default",

140

timeout_seconds: int = 600,

141

polling_interval: int = 10) -> bool:

142

"""

143

Wait for InferenceService to become ready.

144

145

Args:

146

name (str): InferenceService name

147

namespace (str): Kubernetes namespace

148

timeout_seconds (int): Maximum wait time

149

polling_interval (int): Polling interval in seconds

150

151

Returns:

152

bool: True if ready, False if timeout

153

"""

154

```

155

156

### TrainedModel Operations

157

158

Specialized operations for managing TrainedModel resources.

159

160

```python { .api }

161

class KServeClient:

162

def create_trained_model(self,

163

trained_model: 'V1alpha1TrainedModel',

164

namespace: str = "default",

165

**kwargs):

166

"""

167

Create TrainedModel resource.

168

169

Args:

170

trained_model (V1alpha1TrainedModel): TrainedModel object

171

namespace (str): Kubernetes namespace

172

**kwargs: Additional API parameters

173

174

Returns:

175

Created TrainedModel

176

"""

177

178

def get_trained_model(self,

179

name: str,

180

namespace: str = "default",

181

**kwargs) -> 'V1alpha1TrainedModel':

182

"""

183

Get TrainedModel by name.

184

185

Args:

186

name (str): TrainedModel name

187

namespace (str): Kubernetes namespace

188

**kwargs: Additional API parameters

189

190

Returns:

191

V1alpha1TrainedModel: TrainedModel object

192

"""

193

194

def delete_trained_model(self,

195

name: str,

196

namespace: str = "default",

197

**kwargs):

198

"""

199

Delete TrainedModel resource.

200

201

Args:

202

name (str): TrainedModel name

203

namespace (str): Kubernetes namespace

204

**kwargs: Additional API parameters

205

206

Returns:

207

Deletion status

208

"""

209

210

def list_trained_models(self,

211

namespace: str = "default",

212

**kwargs) -> List['V1alpha1TrainedModel']:

213

"""

214

List TrainedModels in namespace.

215

216

Args:

217

namespace (str): Kubernetes namespace

218

**kwargs: Additional API parameters

219

220

Returns:

221

List[V1alpha1TrainedModel]: List of TrainedModels

222

"""

223

```

224

225

### InferenceGraph Operations

226

227

Operations for managing multi-model inference pipelines.

228

229

```python { .api }

230

class KServeClient:

231

def create_inference_graph(self,

232

inference_graph: 'V1alpha1InferenceGraph',

233

namespace: str = "default",

234

**kwargs):

235

"""

236

Create InferenceGraph resource.

237

238

Args:

239

inference_graph (V1alpha1InferenceGraph): InferenceGraph object

240

namespace (str): Kubernetes namespace

241

**kwargs: Additional API parameters

242

243

Returns:

244

Created InferenceGraph

245

"""

246

247

def get_inference_graph(self,

248

name: str,

249

namespace: str = "default",

250

**kwargs) -> 'V1alpha1InferenceGraph':

251

"""

252

Get InferenceGraph by name.

253

254

Args:

255

name (str): InferenceGraph name

256

namespace (str): Kubernetes namespace

257

**kwargs: Additional API parameters

258

259

Returns:

260

V1alpha1InferenceGraph: InferenceGraph object

261

"""

262

263

def delete_inference_graph(self,

264

name: str,

265

namespace: str = "default",

266

**kwargs):

267

"""

268

Delete InferenceGraph resource.

269

270

Args:

271

name (str): InferenceGraph name

272

namespace (str): Kubernetes namespace

273

**kwargs: Additional API parameters

274

275

Returns:

276

Deletion status

277

"""

278

279

def list_inference_graphs(self,

280

namespace: str = "default",

281

**kwargs) -> List['V1alpha1InferenceGraph']:

282

"""

283

List InferenceGraphs in namespace.

284

285

Args:

286

namespace (str): Kubernetes namespace

287

**kwargs: Additional API parameters

288

289

Returns:

290

List[V1alpha1InferenceGraph]: List of InferenceGraphs

291

"""

292

```

293

294

### Credential Management

295

296

Methods for managing storage and authentication credentials.

297

298

```python { .api }

299

class KServeClient:

300

def set_credentials(self,

301

storage_type: str,

302

namespace: str = "default",

303

**kwargs):

304

"""

305

Set storage credentials for model access.

306

307

Args:

308

storage_type (str): Storage type (s3, gcs, azure, etc.)

309

namespace (str): Kubernetes namespace

310

**kwargs: Credential parameters (access_key, secret_key, etc.)

311

"""

312

```

313

314

## Usage Examples

315

316

### Basic InferenceService Deployment

317

318

```python

319

from kserve import KServeClient

320

from kserve import V1beta1InferenceService, V1beta1InferenceServiceSpec

321

from kserve import V1beta1PredictorSpec, V1beta1SKLearnSpec

322

323

# Initialize client

324

client = KServeClient()

325

326

# Define InferenceService

327

isvc = V1beta1InferenceService(

328

api_version="serving.kserve.io/v1beta1",

329

kind="InferenceService",

330

metadata={

331

"name": "sklearn-iris",

332

"namespace": "default"

333

},

334

spec=V1beta1InferenceServiceSpec(

335

predictor=V1beta1PredictorSpec(

336

sklearn=V1beta1SKLearnSpec(

337

storage_uri="gs://kfserving-examples/models/sklearn/1.0/model"

338

)

339

)

340

)

341

)

342

343

# Create InferenceService

344

created_isvc = client.create(isvc, namespace="default")

345

print(f"Created InferenceService: {created_isvc.metadata.name}")

346

347

# Wait for readiness

348

ready = client.wait_isvc_ready("sklearn-iris", namespace="default")

349

if ready:

350

print("InferenceService is ready!")

351

else:

352

print("InferenceService failed to become ready")

353

```

354

355

### Advanced InferenceService with Transformer

356

357

```python

358

from kserve import (

359

V1beta1InferenceService, V1beta1InferenceServiceSpec,

360

V1beta1PredictorSpec, V1beta1TransformerSpec,

361

V1beta1CustomPredictor, V1beta1CustomTransformer

362

)

363

364

# Define custom transformer and predictor

365

transformer_spec = V1beta1TransformerSpec(

366

custom=V1beta1CustomTransformer(

367

image="my-transformer:latest",

368

env=[{"name": "STORAGE_URI", "value": "s3://my-bucket/preprocessor"}]

369

)

370

)

371

372

predictor_spec = V1beta1PredictorSpec(

373

custom=V1beta1CustomPredictor(

374

image="my-predictor:latest",

375

env=[{"name": "MODEL_NAME", "value": "my-model"}]

376

)

377

)

378

379

# Create InferenceService with transformer

380

isvc = V1beta1InferenceService(

381

api_version="serving.kserve.io/v1beta1",

382

kind="InferenceService",

383

metadata={"name": "custom-pipeline", "namespace": "ml-models"},

384

spec=V1beta1InferenceServiceSpec(

385

transformer=transformer_spec,

386

predictor=predictor_spec

387

)

388

)

389

390

client.create(isvc, namespace="ml-models")

391

```

392

393

### TrainedModel Management

394

395

```python

396

from kserve import V1alpha1TrainedModel, V1alpha1TrainedModelSpec

397

398

# Create TrainedModel

399

trained_model = V1alpha1TrainedModel(

400

api_version="serving.kserve.io/v1alpha1",

401

kind="TrainedModel",

402

metadata={

403

"name": "bert-model-v1",

404

"namespace": "nlp-models"

405

},

406

spec=V1alpha1TrainedModelSpec(

407

inference_service="bert-service",

408

model=V1alpha1ModelSpec(

409

model_format={"name": "pytorch"},

410

storage_uri="s3://ml-models/bert/v1/model.pt"

411

)

412

)

413

)

414

415

# Create and manage TrainedModel

416

created_model = client.create_trained_model(trained_model, namespace="nlp-models")

417

418

# List all trained models

419

models = client.list_trained_models(namespace="nlp-models")

420

for model in models.items:

421

print(f"Model: {model.metadata.name}, Status: {model.status}")

422

423

# Get specific model

424

model = client.get_trained_model("bert-model-v1", namespace="nlp-models")

425

print(f"Model URI: {model.spec.model.storage_uri}")

426

```

427

428

### InferenceGraph for Multi-Model Pipeline

429

430

```python

431

from kserve import (

432

V1alpha1InferenceGraph, V1alpha1InferenceGraphSpec,

433

V1alpha1InferenceStep, V1alpha1InferenceTarget

434

)

435

436

# Define inference pipeline steps

437

preprocessor_step = V1alpha1InferenceStep(

438

step_name="preprocessor",

439

service_name="text-preprocessor",

440

data="$request"

441

)

442

443

classifier_step = V1alpha1InferenceStep(

444

step_name="classifier",

445

service_name="bert-classifier",

446

data="$preprocessor"

447

)

448

449

postprocessor_step = V1alpha1InferenceStep(

450

step_name="postprocessor",

451

service_name="result-formatter",

452

data="$classifier"

453

)

454

455

# Create InferenceGraph

456

graph = V1alpha1InferenceGraph(

457

api_version="serving.kserve.io/v1alpha1",

458

kind="InferenceGraph",

459

metadata={

460

"name": "text-classification-pipeline",

461

"namespace": "nlp-models"

462

},

463

spec=V1alpha1InferenceGraphSpec(

464

nodes={

465

"preprocessor": preprocessor_step,

466

"classifier": classifier_step,

467

"postprocessor": postprocessor_step

468

}

469

)

470

)

471

472

# Deploy pipeline

473

client.create_inference_graph(graph, namespace="nlp-models")

474

```

475

476

### Credential Management

477

478

```python

479

# Set S3 credentials

480

client.set_credentials(

481

storage_type="s3",

482

namespace="ml-models",

483

access_key_id="YOUR_ACCESS_KEY",

484

secret_access_key="YOUR_SECRET_KEY",

485

region="us-west-2"

486

)

487

488

# Set GCS credentials

489

client.set_credentials(

490

storage_type="gcs",

491

namespace="ml-models",

492

service_account_key="path/to/service-account.json"

493

)

494

495

# Set Azure credentials

496

client.set_credentials(

497

storage_type="azure",

498

namespace="ml-models",

499

account_name="mystorageaccount",

500

account_key="YOUR_ACCOUNT_KEY"

501

)

502

```

503

504

### Resource Monitoring and Updates

505

506

```python

507

import time

508

509

def monitor_inference_service(name: str, namespace: str = "default"):

510

"""Monitor InferenceService status and updates."""

511

512

while True:

513

try:

514

# Get current status

515

isvc = client.get(name, namespace=namespace)

516

517

status = isvc.status

518

if status and status.conditions:

519

for condition in status.conditions:

520

print(f"{condition.type}: {condition.status} - {condition.message}")

521

522

# Check if ready

523

if status and status.url:

524

print(f"Service URL: {status.url}")

525

break

526

527

except Exception as e:

528

print(f"Error monitoring service: {e}")

529

530

time.sleep(10)

531

532

# Monitor service

533

monitor_inference_service("sklearn-iris")

534

```

535

536

### Batch Operations

537

538

```python

539

# Deploy multiple models

540

models = [

541

("model-a", "s3://bucket/model-a/"),

542

("model-b", "s3://bucket/model-b/"),

543

("model-c", "s3://bucket/model-c/")

544

]

545

546

deployed_services = []

547

for name, uri in models:

548

isvc = create_inference_service(name, uri)

549

result = client.create(isvc, namespace="batch-deployment")

550

deployed_services.append(result)

551

552

# Wait for all to be ready

553

for service in deployed_services:

554

name = service.metadata.name

555

ready = client.wait_isvc_ready(name, namespace="batch-deployment")

556

print(f"Service {name}: {'Ready' if ready else 'Failed'}")

557

558

# List all services

559

services = client.list(namespace="batch-deployment")

560

print(f"Total services: {len(services.items)}")

561

```

562

563

## Types

564

565

```python { .api }

566

from typing import List, Dict, Any, Optional, Union

567

from kubernetes.client import ApiException

568

569

ResourceName = str

570

Namespace = str

571

ApiVersion = str

572

ResourceObject = Union['V1beta1InferenceService', 'V1alpha1TrainedModel', 'V1alpha1InferenceGraph']

573

```