or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

framework-servers.mdindex.mdinference-clients.mdkubernetes-client.mdmodel-serving.mdprotocol.mdresource-models.mdstorage.md

framework-servers.mddocs/

0

# Framework Servers

1

2

Pre-built model servers for popular ML frameworks that extend the core KServe functionality with framework-specific optimizations. These servers provide ready-to-use implementations for common ML frameworks without requiring custom code.

3

4

## Capabilities

5

6

### Scikit-learn Server

7

8

Ready-to-use server for scikit-learn models with support for joblib and pickle formats.

9

10

```python { .api }

11

from sklearnserver import SKLearnModel

12

13

class SKLearnModel(Model):

14

def __init__(self, name: str, model_dir: str):

15

"""

16

Scikit-learn model server.

17

18

Args:

19

name (str): Model name

20

model_dir (str): Directory containing model files

21

"""

22

23

def load(self):

24

"""Load scikit-learn model from joblib or pickle file."""

25

26

async def predict(self, payload: Dict[str, Any]) -> Dict[str, Any]:

27

"""

28

Make predictions using scikit-learn model.

29

30

Args:

31

payload (Dict[str, Any]): Input data with 'instances' key

32

33

Returns:

34

Dict[str, Any]: Predictions in {'predictions': []} format

35

"""

36

37

def explain(self, payload: Dict[str, Any]) -> Dict[str, Any]:

38

"""Generate explanations for scikit-learn predictions."""

39

```

40

41

### XGBoost Server

42

43

Optimized server for XGBoost models supporting both native and scikit-learn API formats.

44

45

```python { .api }

46

from xgbserver import XGBoostModel

47

48

class XGBoostModel(Model):

49

def __init__(self, name: str, model_dir: str):

50

"""

51

XGBoost model server.

52

53

Args:

54

name (str): Model name

55

model_dir (str): Directory containing XGBoost model files

56

"""

57

58

def load(self):

59

"""Load XGBoost model from .bst, .json, or .pkl file."""

60

61

async def predict(self, payload: Dict[str, Any]) -> Dict[str, Any]:

62

"""

63

Make predictions using XGBoost model.

64

65

Args:

66

payload (Dict[str, Any]): Input data

67

68

Returns:

69

Dict[str, Any]: XGBoost predictions

70

"""

71

```

72

73

### LightGBM Server

74

75

High-performance server for LightGBM models with native format support.

76

77

```python { .api }

78

from lgbserver import LightGBMModel

79

80

class LightGBMModel(Model):

81

def __init__(self, name: str, model_dir: str):

82

"""

83

LightGBM model server.

84

85

Args:

86

name (str): Model name

87

model_dir (str): Directory containing LightGBM model files

88

"""

89

90

def load(self):

91

"""Load LightGBM model from native format."""

92

93

async def predict(self, payload: Dict[str, Any]) -> Dict[str, Any]:

94

"""

95

Make predictions using LightGBM model.

96

97

Args:

98

payload (Dict[str, Any]): Input data

99

100

Returns:

101

Dict[str, Any]: LightGBM predictions

102

"""

103

```

104

105

### HuggingFace Server

106

107

Advanced server for Hugging Face transformer models and LLMs with GPU acceleration and streaming support.

108

109

```python { .api }

110

from huggingfaceserver import HuggingfaceGenerativeModel, HuggingfaceEncoderModel

111

112

# Base class for HuggingFace models

113

class HuggingFaceModel(Model):

114

def __init__(self, name: str, model_dir: str):

115

"""

116

HuggingFace transformer model server.

117

118

Args:

119

name (str): Model name

120

model_dir (str): Directory containing HuggingFace model files

121

"""

122

123

def load(self):

124

"""Load HuggingFace model and tokenizer."""

125

126

async def predict(self, payload: Dict[str, Any]) -> Dict[str, Any]:

127

"""

128

Generate predictions using HuggingFace model.

129

130

Args:

131

payload (Dict[str, Any]): Input text data

132

133

Returns:

134

Dict[str, Any]: Model outputs (tokens, probabilities, etc.)

135

"""

136

137

async def generate(self, payload: Dict[str, Any]) -> Dict[str, Any]:

138

"""

139

Generate text using language model.

140

141

Args:

142

payload (Dict[str, Any]): Generation parameters and prompts

143

144

Returns:

145

Dict[str, Any]: Generated text and metadata

146

"""

147

148

# Usage for different HuggingFace tasks

149

def create_classification_model(model_name: str):

150

"""Create text classification model."""

151

152

def create_generation_model(model_name: str):

153

"""Create text generation model."""

154

155

def create_embedding_model(model_name: str):

156

"""Create text embedding model."""

157

```

158

159

### PMML Server

160

161

Server for PMML (Predictive Model Markup Language) models supporting PMML 4.x format.

162

163

```python { .api }

164

from pmmlserver import PMMLModel

165

166

class PMMLModel(Model):

167

def __init__(self, name: str, model_dir: str):

168

"""

169

PMML model server.

170

171

Args:

172

name (str): Model name

173

model_dir (str): Directory containing PMML files

174

"""

175

176

def load(self):

177

"""Load PMML model from .pmml file."""

178

179

async def predict(self, payload: Dict[str, Any]) -> Dict[str, Any]:

180

"""

181

Make predictions using PMML model.

182

183

Args:

184

payload (Dict[str, Any]): Input data

185

186

Returns:

187

Dict[str, Any]: PMML predictions

188

"""

189

```

190

191

### PaddlePaddle Server

192

193

Server for PaddlePaddle deep learning models with inference format support.

194

195

```python { .api }

196

from paddleserver import PaddleModel

197

198

class PaddleModel(Model):

199

def __init__(self, name: str, model_dir: str):

200

"""

201

PaddlePaddle model server.

202

203

Args:

204

name (str): Model name

205

model_dir (str): Directory containing PaddlePaddle model files

206

"""

207

208

def load(self):

209

"""Load PaddlePaddle model from inference format."""

210

211

async def predict(self, payload: Dict[str, Any]) -> Dict[str, Any]:

212

"""

213

Make predictions using PaddlePaddle model.

214

215

Args:

216

payload (Dict[str, Any]): Input data

217

218

Returns:

219

Dict[str, Any]: PaddlePaddle predictions

220

"""

221

```

222

223

### ART Explainer Server

224

225

Explainability server using IBM's Adversarial Robustness Toolbox (ART) for model interpretability.

226

227

```python { .api }

228

from artexplainer.artserver import ARTExplainer

229

230

class ARTExplainer(Model):

231

def __init__(self, name: str, predictor_host: str):

232

"""

233

ART-based explainer server.

234

235

Args:

236

name (str): Explainer name

237

predictor_host (str): Host of the predictor service

238

"""

239

240

def load(self):

241

"""Initialize ART explainer algorithms."""

242

243

async def explain(self, payload: Dict[str, Any]) -> Dict[str, Any]:

244

"""

245

Generate explanations using ART algorithms.

246

247

Args:

248

payload (Dict[str, Any]): Input data and explanation parameters

249

250

Returns:

251

Dict[str, Any]: Explanation results and visualizations

252

"""

253

254

# Available explanation methods

255

def lime_tabular_explainer():

256

"""LIME explainer for tabular data."""

257

258

def shap_explainer():

259

"""SHAP explainer for feature importance."""

260

261

def integrated_gradients():

262

"""Integrated gradients for attribution."""

263

```

264

265

### AIF Fairness Server

266

267

Bias detection and fairness analysis server using IBM's AI Fairness 360 (AIF360) toolkit.

268

269

```python { .api }

270

from aiffairness.aifserver import AIFFairnessModel

271

272

class AIFFairnessModel(Model):

273

def __init__(self, name: str, predictor_host: str):

274

"""

275

AI Fairness analysis server.

276

277

Args:

278

name (str): Fairness analyzer name

279

predictor_host (str): Host of the predictor service

280

"""

281

282

def load(self):

283

"""Initialize fairness metrics and detectors."""

284

285

async def explain(self, payload: Dict[str, Any]) -> Dict[str, Any]:

286

"""

287

Analyze model predictions for bias and fairness.

288

289

Args:

290

payload (Dict[str, Any]): Input data with protected attributes

291

292

Returns:

293

Dict[str, Any]: Fairness metrics and bias analysis

294

"""

295

296

async def detect_bias(self, payload: Dict[str, Any]) -> Dict[str, Any]:

297

"""

298

Detect bias in model predictions.

299

300

Args:

301

payload (Dict[str, Any]): Prediction data with demographics

302

303

Returns:

304

Dict[str, Any]: Bias detection results

305

"""

306

307

# Available fairness metrics

308

def demographic_parity():

309

"""Demographic parity fairness metric."""

310

311

def equalized_odds():

312

"""Equalized odds fairness metric."""

313

314

def individual_fairness():

315

"""Individual fairness analysis."""

316

```

317

318

## Usage Examples

319

320

### Deploying Scikit-learn Model

321

322

```python

323

from kserve import ModelServer

324

from sklearnserver import SKLearnModel

325

326

# Create and start scikit-learn server

327

if __name__ == "__main__":

328

model = SKLearnModel("iris-classifier", "/mnt/models")

329

model.load()

330

331

# Start server

332

ModelServer().start([model])

333

```

334

335

### Using Framework Servers with Kubernetes

336

337

```python

338

from kserve import KServeClient, V1beta1InferenceService, V1beta1InferenceServiceSpec

339

from kserve import V1beta1PredictorSpec, V1beta1XGBoostSpec

340

341

# Deploy XGBoost model using built-in server

342

client = KServeClient()

343

344

xgb_isvc = V1beta1InferenceService(

345

api_version="serving.kserve.io/v1beta1",

346

kind="InferenceService",

347

metadata={"name": "xgboost-model", "namespace": "default"},

348

spec=V1beta1InferenceServiceSpec(

349

predictor=V1beta1PredictorSpec(

350

xgboost=V1beta1XGBoostSpec(

351

storage_uri="s3://my-bucket/xgboost-model/",

352

runtime_version="3.1.0"

353

)

354

)

355

)

356

)

357

358

client.create(xgb_isvc)

359

```

360

361

### Advanced HuggingFace LLM Setup

362

363

```python

364

from huggingfaceserver import HuggingFaceModel

365

from kserve import ModelServer

366

import torch

367

368

class LLMModel(HuggingFaceModel):

369

def __init__(self, name: str, model_dir: str):

370

super().__init__(name, model_dir)

371

self.max_length = 512

372

self.temperature = 0.8

373

374

async def generate(self, payload):

375

# Extract generation parameters

376

prompts = payload.get("instances", [])

377

max_length = payload.get("parameters", {}).get("max_length", self.max_length)

378

temperature = payload.get("parameters", {}).get("temperature", self.temperature)

379

380

# Generate responses

381

responses = []

382

for prompt in prompts:

383

# Use HuggingFace generation

384

inputs = self.tokenizer(prompt, return_tensors="pt")

385

386

with torch.no_grad():

387

outputs = self.model.generate(

388

inputs.input_ids,

389

max_length=max_length,

390

temperature=temperature,

391

do_sample=True,

392

pad_token_id=self.tokenizer.eos_token_id

393

)

394

395

response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

396

responses.append(response)

397

398

return {"predictions": responses}

399

400

# Start LLM server

401

if __name__ == "__main__":

402

llm = LLMModel("gpt2-chat", "/mnt/models/gpt2")

403

ModelServer(http_port=8080).start([llm])

404

```

405

406

### Multi-Framework Ensemble

407

408

```python

409

from kserve import ModelServer

410

from sklearnserver import SKLearnModel

411

from xgbserver import XGBoostModel

412

from lgbserver import LightGBMModel

413

414

class EnsembleModel:

415

def __init__(self):

416

self.sklearn_model = SKLearnModel("sklearn", "/models/sklearn")

417

self.xgb_model = XGBoostModel("xgboost", "/models/xgb")

418

self.lgb_model = LightGBMModel("lightgbm", "/models/lgb")

419

420

async def predict(self, payload):

421

# Get predictions from all models

422

sklearn_pred = await self.sklearn_model.predict(payload)

423

xgb_pred = await self.xgb_model.predict(payload)

424

lgb_pred = await self.lgb_model.predict(payload)

425

426

# Ensemble logic (simple averaging)

427

ensemble_pred = (

428

sklearn_pred["predictions"] +

429

xgb_pred["predictions"] +

430

lgb_pred["predictions"]

431

) / 3

432

433

return {"predictions": ensemble_pred.tolist()}

434

435

# Deploy ensemble

436

ensemble = EnsembleModel()

437

ModelServer().start([ensemble])

438

```

439

440

### Explainability Pipeline

441

442

```python

443

from artexplainer.artserver import ARTExplainer

444

from kserve import ModelServer

445

446

# Create explainer for existing predictor

447

explainer = ARTExplainer(

448

name="model-explainer",

449

predictor_host="http://sklearn-predictor:8080"

450

)

451

452

# Start explainer server

453

ModelServer(http_port=8080).start([explainer])

454

455

# Usage example

456

async def explain_prediction():

457

payload = {

458

"instances": [[5.1, 3.5, 1.4, 0.2]],

459

"parameters": {

460

"explanation_type": "lime",

461

"num_features": 4

462

}

463

}

464

465

explanation = await explainer.explain(payload)

466

return explanation

467

```

468

469

### Custom Framework Integration

470

471

```python

472

from kserve import Model, ModelServer

473

import tensorflow as tf

474

475

class CustomTensorFlowModel(Model):

476

def __init__(self, name: str, model_path: str):

477

super().__init__(name)

478

self.model_path = model_path

479

self.model = None

480

481

def load(self):

482

"""Load TensorFlow SavedModel."""

483

self.model = tf.saved_model.load(self.model_path)

484

self.ready = True

485

486

async def predict(self, payload):

487

"""TensorFlow model prediction."""

488

instances = payload["instances"]

489

490

# Convert to tensor

491

input_tensor = tf.convert_to_tensor(instances, dtype=tf.float32)

492

493

# Make prediction

494

predictions = self.model(input_tensor)

495

496

return {

497

"predictions": predictions.numpy().tolist()

498

}

499

500

# Use custom model with ModelServer

501

custom_model = CustomTensorFlowModel("custom-tf", "/path/to/saved_model")

502

ModelServer().start([custom_model])

503

```

504

505

## Framework Compatibility

506

507

### Supported Model Formats

508

509

```python { .api }

510

# Scikit-learn

511

SKLEARN_FORMATS = [".joblib", ".pkl", ".pickle"]

512

513

# XGBoost

514

XGBOOST_FORMATS = [".bst", ".json", ".pkl", ".ubj"]

515

516

# LightGBM

517

LIGHTGBM_FORMATS = [".txt", ".json", ".pkl"]

518

519

# HuggingFace

520

HUGGINGFACE_FORMATS = ["pytorch_model.bin", "tf_model.h5", "model.safetensors"]

521

522

# PMML

523

PMML_FORMATS = [".pmml", ".xml"]

524

525

# PaddlePaddle

526

PADDLE_FORMATS = ["__model__", "model.pdmodel"]

527

```

528

529

### Resource Requirements

530

531

```python { .api }

532

# Typical resource requirements by framework

533

FRAMEWORK_RESOURCES = {

534

"sklearn": {"cpu": "100m", "memory": "256Mi"},

535

"xgboost": {"cpu": "200m", "memory": "512Mi"},

536

"lightgbm": {"cpu": "200m", "memory": "512Mi"},

537

"huggingface": {"cpu": "1000m", "memory": "2Gi", "gpu": "optional"},

538

"pmml": {"cpu": "100m", "memory": "256Mi"},

539

"paddle": {"cpu": "500m", "memory": "1Gi"}

540

}

541

```

542

543

## Types

544

545

```python { .api }

546

from typing import Dict, Any, List, Optional

547

548

ModelDir = str

549

ModelFormat = str

550

FrameworkName = str

551

PredictionPayload = Dict[str, Any]

552

PredictionResult = Dict[str, Any]

553

ExplanationResult = Dict[str, Any]

554

BiasAnalysis = Dict[str, Any]

555

```