or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

framework-servers.mdindex.mdinference-clients.mdkubernetes-client.mdmodel-serving.mdprotocol.mdresource-models.mdstorage.md

resource-models.mddocs/

0

# Resource Models

1

2

Comprehensive set of Kubernetes Custom Resource Definitions for defining inference services, serving runtimes, and model configurations. These models represent the complete KServe API for deploying and managing ML models on Kubernetes.

3

4

## Capabilities

5

6

### InferenceService Resources (V1Beta1)

7

8

Core resources for deploying and managing ML model inference services.

9

10

```python { .api }

11

class V1beta1InferenceService:

12

def __init__(self,

13

api_version: str = "serving.kserve.io/v1beta1",

14

kind: str = "InferenceService",

15

metadata: Optional[Dict[str, Any]] = None,

16

spec: Optional['V1beta1InferenceServiceSpec'] = None,

17

status: Optional['V1beta1InferenceServiceStatus'] = None):

18

"""

19

Main KServe inference service resource.

20

21

Args:

22

api_version (str): Kubernetes API version

23

kind (str): Resource kind

24

metadata (Dict[str, Any], optional): Kubernetes metadata

25

spec (V1beta1InferenceServiceSpec, optional): Service specification

26

status (V1beta1InferenceServiceStatus, optional): Service status

27

"""

28

29

class V1beta1InferenceServiceSpec:

30

def __init__(self,

31

predictor: 'V1beta1PredictorSpec',

32

transformer: Optional['V1beta1TransformerSpec'] = None,

33

explainer: Optional['V1beta1ExplainerSpec'] = None):

34

"""

35

InferenceService specification.

36

37

Args:

38

predictor (V1beta1PredictorSpec): Predictor component spec

39

transformer (V1beta1TransformerSpec, optional): Transformer component spec

40

explainer (V1beta1ExplainerSpec, optional): Explainer component spec

41

"""

42

43

class V1beta1InferenceServiceStatus:

44

def __init__(self,

45

url: Optional[str] = None,

46

conditions: Optional[List['KnativeCondition']] = None,

47

components: Optional[Dict[str, Any]] = None):

48

"""

49

InferenceService status information.

50

51

Args:

52

url (str, optional): Service endpoint URL

53

conditions (List[KnativeCondition], optional): Status conditions

54

components (Dict[str, Any], optional): Component statuses

55

"""

56

57

class V1beta1InferenceServiceList:

58

def __init__(self,

59

api_version: str = "serving.kserve.io/v1beta1",

60

kind: str = "InferenceServiceList",

61

items: List['V1beta1InferenceService'] = None,

62

metadata: Optional[Dict[str, Any]] = None):

63

"""List of InferenceServices."""

64

```

65

66

### Component Specifications

67

68

Specifications for the three main components of inference services.

69

70

```python { .api }

71

class V1beta1PredictorSpec:

72

def __init__(self,

73

sklearn: Optional['V1beta1SKLearnSpec'] = None,

74

tensorflow: Optional['V1beta1TFServingSpec'] = None,

75

pytorch: Optional['V1beta1TorchServeSpec'] = None,

76

xgboost: Optional['V1beta1XGBoostSpec'] = None,

77

lightgbm: Optional['V1beta1LightGBMSpec'] = None,

78

onnx: Optional['V1beta1ONNXRuntimeSpec'] = None,

79

triton: Optional['V1beta1TritonSpec'] = None,

80

pmml: Optional['V1beta1PMMLSpec'] = None,

81

paddle: Optional['V1beta1PaddleServerSpec'] = None,

82

custom: Optional['V1beta1CustomPredictor'] = None):

83

"""

84

Predictor component specification.

85

86

Args:

87

sklearn (V1beta1SKLearnSpec, optional): Scikit-learn predictor

88

tensorflow (V1beta1TFServingSpec, optional): TensorFlow Serving predictor

89

pytorch (V1beta1TorchServeSpec, optional): PyTorch TorchServe predictor

90

xgboost (V1beta1XGBoostSpec, optional): XGBoost predictor

91

lightgbm (V1beta1LightGBMSpec, optional): LightGBM predictor

92

onnx (V1beta1ONNXRuntimeSpec, optional): ONNX Runtime predictor

93

triton (V1beta1TritonSpec, optional): NVIDIA Triton predictor

94

pmml (V1beta1PMMLSpec, optional): PMML predictor

95

paddle (V1beta1PaddleServerSpec, optional): PaddlePaddle predictor

96

custom (V1beta1CustomPredictor, optional): Custom predictor container

97

"""

98

99

class V1beta1TransformerSpec:

100

def __init__(self,

101

custom: Optional['V1beta1CustomTransformer'] = None):

102

"""

103

Transformer component specification.

104

105

Args:

106

custom (V1beta1CustomTransformer, optional): Custom transformer container

107

"""

108

109

class V1beta1ExplainerSpec:

110

def __init__(self,

111

art: Optional['V1beta1ARTExplainerSpec'] = None,

112

custom: Optional['V1beta1CustomExplainer'] = None):

113

"""

114

Explainer component specification.

115

116

Args:

117

art (V1beta1ARTExplainerSpec, optional): ART-based explainer

118

custom (V1beta1CustomExplainer, optional): Custom explainer container

119

"""

120

```

121

122

### Framework-Specific Predictors

123

124

Specifications for different ML framework servers.

125

126

```python { .api }

127

class V1beta1SKLearnSpec:

128

def __init__(self,

129

storage_uri: str,

130

protocol_version: Optional[str] = None,

131

runtime_version: Optional[str] = None,

132

resources: Optional[Dict[str, Any]] = None):

133

"""

134

Scikit-learn predictor specification.

135

136

Args:

137

storage_uri (str): Model storage location

138

protocol_version (str, optional): Protocol version (v1, v2)

139

runtime_version (str, optional): Runtime version

140

resources (Dict[str, Any], optional): Resource requirements

141

"""

142

143

class V1beta1TFServingSpec:

144

def __init__(self,

145

storage_uri: str,

146

protocol_version: Optional[str] = None,

147

runtime_version: Optional[str] = None):

148

"""TensorFlow Serving predictor specification."""

149

150

class V1beta1TorchServeSpec:

151

def __init__(self,

152

storage_uri: str,

153

protocol_version: Optional[str] = None,

154

runtime_version: Optional[str] = None):

155

"""PyTorch TorchServe predictor specification."""

156

157

class V1beta1XGBoostSpec:

158

def __init__(self,

159

storage_uri: str,

160

protocol_version: Optional[str] = None,

161

runtime_version: Optional[str] = None):

162

"""XGBoost predictor specification."""

163

164

class V1beta1LightGBMSpec:

165

def __init__(self,

166

storage_uri: str,

167

protocol_version: Optional[str] = None,

168

runtime_version: Optional[str] = None):

169

"""LightGBM predictor specification."""

170

171

class V1beta1ONNXRuntimeSpec:

172

def __init__(self,

173

storage_uri: str,

174

protocol_version: Optional[str] = None,

175

runtime_version: Optional[str] = None):

176

"""ONNX Runtime predictor specification."""

177

178

class V1beta1TritonSpec:

179

def __init__(self,

180

storage_uri: str,

181

protocol_version: Optional[str] = None,

182

runtime_version: Optional[str] = None):

183

"""NVIDIA Triton predictor specification."""

184

185

class V1beta1PMMLSpec:

186

def __init__(self,

187

storage_uri: str,

188

protocol_version: Optional[str] = None,

189

runtime_version: Optional[str] = None):

190

"""PMML predictor specification."""

191

192

class V1beta1PaddleServerSpec:

193

def __init__(self,

194

storage_uri: str,

195

protocol_version: Optional[str] = None,

196

runtime_version: Optional[str] = None):

197

"""PaddlePaddle predictor specification."""

198

```

199

200

### Custom Component Specifications

201

202

Specifications for custom containers and runtime configurations.

203

204

```python { .api }

205

class V1beta1CustomPredictor:

206

def __init__(self,

207

image: str,

208

command: Optional[List[str]] = None,

209

args: Optional[List[str]] = None,

210

env: Optional[List[Dict[str, str]]] = None,

211

resources: Optional[Dict[str, Any]] = None,

212

ports: Optional[List[Dict[str, Any]]] = None):

213

"""

214

Custom predictor container specification.

215

216

Args:

217

image (str): Container image

218

command (List[str], optional): Container command

219

args (List[str], optional): Container arguments

220

env (List[Dict[str, str]], optional): Environment variables

221

resources (Dict[str, Any], optional): Resource requirements

222

ports (List[Dict[str, Any]], optional): Container ports

223

"""

224

225

class V1beta1CustomTransformer:

226

def __init__(self,

227

image: str,

228

command: Optional[List[str]] = None,

229

args: Optional[List[str]] = None,

230

env: Optional[List[Dict[str, str]]] = None):

231

"""Custom transformer container specification."""

232

233

class V1beta1CustomExplainer:

234

def __init__(self,

235

image: str,

236

command: Optional[List[str]] = None,

237

args: Optional[List[str]] = None,

238

env: Optional[List[Dict[str, str]]] = None):

239

"""Custom explainer container specification."""

240

```

241

242

### Autoscaling Specifications

243

244

Configuration for automatic scaling based on metrics.

245

246

```python { .api }

247

class V1beta1AutoScalingSpec:

248

def __init__(self,

249

min_replicas: Optional[int] = None,

250

max_replicas: Optional[int] = None,

251

target_utilization_percentage: Optional[int] = None,

252

metrics: Optional[List['V1beta1MetricsSpec']] = None):

253

"""

254

Autoscaling specification.

255

256

Args:

257

min_replicas (int, optional): Minimum number of replicas

258

max_replicas (int, optional): Maximum number of replicas

259

target_utilization_percentage (int, optional): Target CPU utilization

260

metrics (List[V1beta1MetricsSpec], optional): Custom metrics

261

"""

262

263

class V1beta1MetricsSpec:

264

def __init__(self,

265

type: str,

266

resource: Optional['V1beta1ResourceMetricSource'] = None,

267

pods: Optional['V1beta1PodMetricSource'] = None,

268

external: Optional['V1beta1ExternalMetricSource'] = None):

269

"""

270

Metrics specification for autoscaling.

271

272

Args:

273

type (str): Metric type (Resource, Pods, External)

274

resource (V1beta1ResourceMetricSource, optional): Resource metric

275

pods (V1beta1PodMetricSource, optional): Pod metric

276

external (V1beta1ExternalMetricSource, optional): External metric

277

"""

278

279

class V1beta1ResourceMetricSource:

280

def __init__(self,

281

name: str,

282

target: 'V1beta1MetricTarget'):

283

"""Resource-based metric source."""

284

285

class V1beta1PodMetricSource:

286

def __init__(self,

287

metric: 'V1beta1ExternalMetrics',

288

target: 'V1beta1MetricTarget'):

289

"""Pod-based metric source."""

290

291

class V1beta1ExternalMetricSource:

292

def __init__(self,

293

metric: 'V1beta1ExternalMetrics',

294

target: 'V1beta1MetricTarget'):

295

"""External metric source."""

296

297

class V1beta1MetricTarget:

298

def __init__(self,

299

type: str,

300

average_utilization: Optional[int] = None,

301

average_value: Optional[str] = None,

302

value: Optional[str] = None):

303

"""Metric target specification."""

304

```

305

306

### ServingRuntime Resources (V1Alpha1)

307

308

Runtime definitions for model serving containers and configurations.

309

310

```python { .api }

311

class V1alpha1ServingRuntime:

312

def __init__(self,

313

api_version: str = "serving.kserve.io/v1alpha1",

314

kind: str = "ServingRuntime",

315

metadata: Optional[Dict[str, Any]] = None,

316

spec: Optional['V1alpha1ServingRuntimeSpec'] = None):

317

"""

318

Namespace-scoped serving runtime definition.

319

320

Args:

321

api_version (str): Kubernetes API version

322

kind (str): Resource kind

323

metadata (Dict[str, Any], optional): Kubernetes metadata

324

spec (V1alpha1ServingRuntimeSpec, optional): Runtime specification

325

"""

326

327

class V1alpha1ClusterServingRuntime:

328

def __init__(self,

329

api_version: str = "serving.kserve.io/v1alpha1",

330

kind: str = "ClusterServingRuntime",

331

metadata: Optional[Dict[str, Any]] = None,

332

spec: Optional['V1alpha1ServingRuntimeSpec'] = None):

333

"""Cluster-wide serving runtime definition."""

334

335

class V1alpha1ServingRuntimeSpec:

336

def __init__(self,

337

supported_model_formats: List['V1alpha1SupportedModelFormat'],

338

containers: List['V1alpha1Container'],

339

protocol_versions: Optional[List[str]] = None,

340

multi_model: Optional[bool] = None):

341

"""

342

Serving runtime specification.

343

344

Args:

345

supported_model_formats (List[V1alpha1SupportedModelFormat]): Supported formats

346

containers (List[V1alpha1Container]): Runtime containers

347

protocol_versions (List[str], optional): Supported protocol versions

348

multi_model (bool, optional): Multi-model support flag

349

"""

350

351

class V1alpha1SupportedModelFormat:

352

def __init__(self,

353

name: str,

354

version: Optional[str] = None,

355

auto_select: Optional[bool] = None,

356

priority: Optional[int] = None):

357

"""

358

Supported model format specification.

359

360

Args:

361

name (str): Format name (sklearn, tensorflow, pytorch, etc.)

362

version (str, optional): Format version

363

auto_select (bool, optional): Auto-selection flag

364

priority (int, optional): Selection priority

365

"""

366

367

class V1alpha1Container:

368

def __init__(self,

369

name: str,

370

image: str,

371

command: Optional[List[str]] = None,

372

args: Optional[List[str]] = None,

373

env: Optional[List[Dict[str, str]]] = None,

374

resources: Optional[Dict[str, Any]] = None):

375

"""Container specification for serving runtime."""

376

```

377

378

### TrainedModel Resources (V1Alpha1)

379

380

Resources for managing trained model artifacts and versions.

381

382

```python { .api }

383

class V1alpha1TrainedModel:

384

def __init__(self,

385

api_version: str = "serving.kserve.io/v1alpha1",

386

kind: str = "TrainedModel",

387

metadata: Optional[Dict[str, Any]] = None,

388

spec: Optional['V1alpha1TrainedModelSpec'] = None):

389

"""

390

TrainedModel resource for model artifact management.

391

392

Args:

393

api_version (str): Kubernetes API version

394

kind (str): Resource kind

395

metadata (Dict[str, Any], optional): Kubernetes metadata

396

spec (V1alpha1TrainedModelSpec, optional): Model specification

397

"""

398

399

class V1alpha1TrainedModelSpec:

400

def __init__(self,

401

inference_service: str,

402

model: 'V1alpha1ModelSpec'):

403

"""

404

TrainedModel specification.

405

406

Args:

407

inference_service (str): Target InferenceService name

408

model (V1alpha1ModelSpec): Model configuration

409

"""

410

411

class V1alpha1ModelSpec:

412

def __init__(self,

413

storage_uri: str,

414

model_format: Dict[str, str],

415

memory: Optional[str] = None,

416

framework: Optional[str] = None):

417

"""

418

Model specification.

419

420

Args:

421

storage_uri (str): Model storage location

422

model_format (Dict[str, str]): Model format specification

423

memory (str, optional): Memory requirements

424

framework (str, optional): ML framework

425

"""

426

```

427

428

### InferenceGraph Resources (V1Alpha1)

429

430

Resources for multi-model inference pipelines and routing.

431

432

```python { .api }

433

class V1alpha1InferenceGraph:

434

def __init__(self,

435

api_version: str = "serving.kserve.io/v1alpha1",

436

kind: str = "InferenceGraph",

437

metadata: Optional[Dict[str, Any]] = None,

438

spec: Optional['V1alpha1InferenceGraphSpec'] = None,

439

status: Optional['V1alpha1InferenceGraphStatus'] = None):

440

"""

441

InferenceGraph for multi-model inference pipelines.

442

443

Args:

444

api_version (str): Kubernetes API version

445

kind (str): Resource kind

446

metadata (Dict[str, Any], optional): Kubernetes metadata

447

spec (V1alpha1InferenceGraphSpec, optional): Graph specification

448

status (V1alpha1InferenceGraphStatus, optional): Graph status

449

"""

450

451

class V1alpha1InferenceGraphSpec:

452

def __init__(self,

453

nodes: Dict[str, 'V1alpha1InferenceStep']):

454

"""

455

InferenceGraph specification.

456

457

Args:

458

nodes (Dict[str, V1alpha1InferenceStep]): Pipeline steps

459

"""

460

461

class V1alpha1InferenceStep:

462

def __init__(self,

463

step_name: str,

464

service_name: str,

465

data: str,

466

condition: Optional[str] = None):

467

"""

468

Individual step in inference pipeline.

469

470

Args:

471

step_name (str): Step identifier

472

service_name (str): Target service name

473

data (str): Data flow expression

474

condition (str, optional): Conditional execution

475

"""

476

477

class V1alpha1InferenceRouter:

478

def __init__(self,

479

rules: List[Dict[str, Any]]):

480

"""

481

Routing configuration for inference graphs.

482

483

Args:

484

rules (List[Dict[str, Any]]): Routing rules

485

"""

486

487

class V1alpha1InferenceTarget:

488

def __init__(self,

489

service_name: str,

490

weight: Optional[int] = None):

491

"""

492

Target model for inference step.

493

494

Args:

495

service_name (str): Target service name

496

weight (int, optional): Routing weight

497

"""

498

```

499

500

## Usage Examples

501

502

### Complete InferenceService Definition

503

504

```python

505

from kserve import (

506

V1beta1InferenceService, V1beta1InferenceServiceSpec,

507

V1beta1PredictorSpec, V1beta1SKLearnSpec,

508

V1beta1AutoScalingSpec, V1beta1LoggerSpec

509

)

510

511

# Complete inference service with all components

512

isvc = V1beta1InferenceService(

513

api_version="serving.kserve.io/v1beta1",

514

kind="InferenceService",

515

metadata={

516

"name": "advanced-sklearn",

517

"namespace": "ml-prod",

518

"labels": {

519

"app": "ml-inference",

520

"version": "v1.0"

521

},

522

"annotations": {

523

"serving.kserve.io/enable-prometheus-scraping": "true"

524

}

525

},

526

spec=V1beta1InferenceServiceSpec(

527

predictor=V1beta1PredictorSpec(

528

sklearn=V1beta1SKLearnSpec(

529

storage_uri="s3://ml-models/sklearn/iris/v1/",

530

protocol_version="v2",

531

runtime_version="1.3.0",

532

resources={

533

"requests": {"cpu": "100m", "memory": "256Mi"},

534

"limits": {"cpu": "1", "memory": "2Gi"}

535

}

536

),

537

min_replicas=1,

538

max_replicas=10,

539

scale_metric="concurrency",

540

scale_target=100

541

)

542

)

543

)

544

```

545

546

### Custom ServingRuntime Definition

547

548

```python

549

from kserve import (

550

V1alpha1ClusterServingRuntime, V1alpha1ServingRuntimeSpec,

551

V1alpha1SupportedModelFormat, V1alpha1Container

552

)

553

554

# Define custom serving runtime

555

runtime = V1alpha1ClusterServingRuntime(

556

metadata={

557

"name": "custom-pytorch-runtime"

558

},

559

spec=V1alpha1ServingRuntimeSpec(

560

supported_model_formats=[

561

V1alpha1SupportedModelFormat(

562

name="pytorch",

563

version="1.0",

564

auto_select=True,

565

priority=1

566

)

567

],

568

containers=[

569

V1alpha1Container(

570

name="kserve-container",

571

image="custom-pytorch-server:latest",

572

args=[

573

"--model_name={{.Name}}",

574

"--model_dir=/mnt/models",

575

"--http_port=8080"

576

],

577

env=[

578

{"name": "STORAGE_URI", "value": "{{.StorageUri}}"}

579

],

580

resources={

581

"requests": {"cpu": "1", "memory": "2Gi"},

582

"limits": {"cpu": "4", "memory": "8Gi", "nvidia.com/gpu": "1"}

583

}

584

)

585

],

586

protocol_versions=["v1", "v2"],

587

multi_model=True

588

)

589

)

590

```

591

592

## Types

593

594

```python { .api }

595

from typing import List, Dict, Any, Optional, Union

596

597

ResourceMetadata = Dict[str, Any]

598

ResourceSpec = Dict[str, Any]

599

ResourceStatus = Dict[str, Any]

600

ContainerImage = str

601

StorageURI = str

602

ModelFormat = Dict[str, str]

603

```