or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

amazon-algorithms.mdautoml.mdcore-training.mddata-processing.mddebugging-profiling.mdexperiments.mdframework-training.mdhyperparameter-tuning.mdindex.mdmodel-monitoring.mdmodel-serving.mdremote-functions.md

model-serving.mddocs/

0

# Model Serving and Inference

1

2

Comprehensive model deployment and inference capabilities including real-time endpoints, batch transform, serverless inference, and multi-model endpoints with custom serialization and deserialization support.

3

4

## Capabilities

5

6

### Model Builder

7

8

Unified model building interface for creating deployable models with automatic container selection and configuration.

9

10

```python { .api }

11

class ModelBuilder:

12

"""

13

Unified interface for building deployable models with automatic configuration.

14

15

Parameters:

16

- model_path (str, optional): Path to model artifacts

17

- schema_builder (SchemaBuilder, optional): Input/output schema builder

18

- inference_spec (InferenceSpec, optional): Custom inference specification

19

- env_vars (dict, optional): Environment variables

20

- dependencies (dict, optional): Model dependencies

21

"""

22

def __init__(self, model_path: str = None, schema_builder: 'SchemaBuilder' = None,

23

inference_spec: 'InferenceSpec' = None, **kwargs): ...

24

25

def build(self, mode: 'Mode', role: str, sagemaker_session: 'Session') -> 'Model':

26

"""

27

Build a deployable SageMaker model.

28

29

Parameters:

30

- mode (Mode): Deployment mode (LOCAL_CONTAINER, SAGEMAKER_ENDPOINT)

31

- role (str): IAM role ARN

32

- sagemaker_session (Session): SageMaker session

33

34

Returns:

35

Model: Deployable SageMaker model

36

"""

37

38

class InferenceSpec:

39

"""

40

Base class for custom inference specifications.

41

"""

42

def load(self, model_dir: str):

43

"""

44

Load model artifacts and initialize inference components.

45

46

Parameters:

47

- model_dir (str): Directory containing model artifacts

48

"""

49

50

def invoke(self, input_object, model):

51

"""

52

Run inference on input data.

53

54

Parameters:

55

- input_object: Input data for inference

56

- model: Loaded model object

57

58

Returns:

59

Inference results

60

"""

61

62

class Mode:

63

"""

64

Deployment mode enumeration.

65

"""

66

LOCAL_CONTAINER = "LOCAL_CONTAINER"

67

SAGEMAKER_ENDPOINT = "SAGEMAKER_ENDPOINT"

68

69

class SchemaBuilder:

70

"""

71

Builder for input/output schemas and sample data.

72

73

Parameters:

74

- sample_input: Sample input data for schema inference

75

- sample_output: Sample output data for schema inference

76

"""

77

def __init__(self, sample_input=None, sample_output=None): ...

78

79

class CustomPayloadTranslator:

80

"""

81

Base class for custom payload transformation.

82

"""

83

def serialize_payload_to_bytes(self, payload) -> bytes:

84

"""

85

Serialize payload to bytes for transmission.

86

87

Parameters:

88

- payload: Input payload

89

90

Returns:

91

bytes: Serialized payload

92

"""

93

94

def deserialize_payload_from_stream(self, stream) -> any:

95

"""

96

Deserialize payload from byte stream.

97

98

Parameters:

99

- stream: Input byte stream

100

101

Returns:

102

Deserialized payload

103

"""

104

105

class ModelServer:

106

"""

107

Model server type enumeration.

108

"""

109

TORCHSERVE = "TORCHSERVE"

110

TENSORFLOW_SERVING = "TENSORFLOW_SERVING"

111

TRITON = "TRITON"

112

DJL = "DJL"

113

```

114

115

### Serializers

116

117

Input serialization classes for converting data to formats expected by SageMaker endpoints.

118

119

```python { .api }

120

class BaseSerializer:

121

"""

122

Base class for all serializers.

123

"""

124

def serialize(self, data) -> bytes:

125

"""

126

Serialize data to bytes.

127

128

Parameters:

129

- data: Data to serialize

130

131

Returns:

132

bytes: Serialized data

133

"""

134

135

class SimpleBaseSerializer(BaseSerializer):

136

"""

137

Simplified base serializer with content type support.

138

139

Parameters:

140

- content_type (str): MIME content type

141

"""

142

def __init__(self, content_type: str): ...

143

144

class CSVSerializer(SimpleBaseSerializer):

145

"""

146

Serialize data to CSV format.

147

"""

148

def __init__(self): ...

149

150

def serialize(self, data) -> bytes:

151

"""

152

Serialize data to CSV bytes.

153

154

Parameters:

155

- data: Pandas DataFrame, numpy array, or list

156

157

Returns:

158

bytes: CSV-formatted data

159

"""

160

161

class JSONSerializer(SimpleBaseSerializer):

162

"""

163

Serialize data to JSON format.

164

"""

165

def __init__(self): ...

166

167

def serialize(self, data) -> bytes:

168

"""

169

Serialize data to JSON bytes.

170

171

Parameters:

172

- data: JSON-serializable data

173

174

Returns:

175

bytes: JSON-formatted data

176

"""

177

178

class JSONLinesSerializer(SimpleBaseSerializer):

179

"""

180

Serialize data to JSON Lines format.

181

"""

182

def __init__(self): ...

183

184

class NumpySerializer(SimpleBaseSerializer):

185

"""

186

Serialize NumPy arrays to binary format.

187

"""

188

def __init__(self, dtype: str = None): ...

189

190

def serialize(self, data) -> bytes:

191

"""

192

Serialize NumPy array to bytes.

193

194

Parameters:

195

- data: NumPy array

196

197

Returns:

198

bytes: Serialized array data

199

"""

200

201

class TorchTensorSerializer(SimpleBaseSerializer):

202

"""

203

Serialize PyTorch tensors to binary format.

204

"""

205

def __init__(self, dtype: str = None): ...

206

207

class StringSerializer(SimpleBaseSerializer):

208

"""

209

Serialize strings to UTF-8 bytes.

210

"""

211

def __init__(self): ...

212

213

class DataSerializer(SimpleBaseSerializer):

214

"""

215

Generic data serializer with custom serialization function.

216

217

Parameters:

218

- content_type (str): MIME content type

219

- encoder (callable): Custom encoding function

220

"""

221

def __init__(self, content_type: str, encoder: callable): ...

222

223

class IdentitySerializer(SimpleBaseSerializer):

224

"""

225

Pass-through serializer for pre-serialized data.

226

"""

227

def __init__(self, content_type: str): ...

228

229

class LibSVMSerializer(SimpleBaseSerializer):

230

"""

231

Serialize data to LibSVM format.

232

"""

233

def __init__(self): ...

234

235

class SparseMatrixSerializer(SimpleBaseSerializer):

236

"""

237

Serialize sparse matrices to binary format.

238

"""

239

def __init__(self): ...

240

241

class RecordSerializer(SimpleBaseSerializer):

242

"""

243

Serialize data to RecordIO format.

244

"""

245

def __init__(self): ...

246

```

247

248

### Deserializers

249

250

Output deserialization classes for converting SageMaker endpoint responses to usable data formats.

251

252

```python { .api }

253

class BaseDeserializer:

254

"""

255

Base class for all deserializers.

256

"""

257

def deserialize(self, stream, content_type: str):

258

"""

259

Deserialize data from byte stream.

260

261

Parameters:

262

- stream: Input byte stream

263

- content_type (str): MIME content type

264

265

Returns:

266

Deserialized data

267

"""

268

269

class SimpleBaseDeserializer(BaseDeserializer):

270

"""

271

Simplified base deserializer.

272

"""

273

def deserialize(self, stream, content_type: str): ...

274

275

class BytesDeserializer(SimpleBaseDeserializer):

276

"""

277

Deserialize data as raw bytes.

278

"""

279

def deserialize(self, stream, content_type: str) -> bytes: ...

280

281

class StringDeserializer(SimpleBaseDeserializer):

282

"""

283

Deserialize data as UTF-8 string.

284

"""

285

def deserialize(self, stream, content_type: str) -> str: ...

286

287

class CSVDeserializer(SimpleBaseDeserializer):

288

"""

289

Deserialize CSV data to DataFrame or list.

290

291

Parameters:

292

- encoding (str): Text encoding (default: "utf-8")

293

"""

294

def __init__(self, encoding: str = "utf-8"): ...

295

296

def deserialize(self, stream, content_type: str): ...

297

298

class JSONDeserializer(SimpleBaseDeserializer):

299

"""

300

Deserialize JSON data to Python objects.

301

"""

302

def deserialize(self, stream, content_type: str): ...

303

304

class JSONLinesDeserializer(SimpleBaseDeserializer):

305

"""

306

Deserialize JSON Lines data to list of objects.

307

"""

308

def deserialize(self, stream, content_type: str) -> list: ...

309

310

class NumpyDeserializer(SimpleBaseDeserializer):

311

"""

312

Deserialize binary data to NumPy arrays.

313

314

Parameters:

315

- dtype (str, optional): Target NumPy data type

316

- allow_pickle (bool): Allow pickle deserialization

317

"""

318

def __init__(self, dtype: str = None, allow_pickle: bool = True): ...

319

320

class PandasDeserializer(SimpleBaseDeserializer):

321

"""

322

Deserialize data to pandas DataFrame.

323

"""

324

def deserialize(self, stream, content_type: str) -> 'pandas.DataFrame': ...

325

326

class TorchTensorDeserializer(SimpleBaseDeserializer):

327

"""

328

Deserialize binary data to PyTorch tensors.

329

330

Parameters:

331

- dtype (str, optional): Target tensor data type

332

"""

333

def __init__(self, dtype: str = None): ...

334

335

class SparseMatrixDeserializer(SimpleBaseDeserializer):

336

"""

337

Deserialize binary data to sparse matrices.

338

"""

339

def deserialize(self, stream, content_type: str): ...

340

```

341

342

### Content Type Constants

343

344

Standard MIME content types for serialization and deserialization.

345

346

```python { .api }

347

# Content type constants

348

CONTENT_TYPE_JSON = "application/json"

349

CONTENT_TYPE_CSV = "text/csv"

350

CONTENT_TYPE_OCTET_STREAM = "application/octet-stream"

351

CONTENT_TYPE_NPY = "application/x-npy"

352

```

353

354

### Batch Transform

355

356

Batch inference capabilities for processing large datasets asynchronously.

357

358

```python { .api }

359

class Transformer:

360

"""

361

SageMaker batch transform job for batch inference.

362

363

Parameters:

364

- model_name (str): SageMaker model name

365

- instance_count (int): Number of transform instances

366

- instance_type (str): EC2 instance type

367

- output_path (str): S3 path for transform results

368

- strategy (str, optional): Data splitting strategy

369

- assemble_with (str, optional): Result assembly method

370

- accept (str, optional): Accept header for output format

371

- max_concurrent_transforms (int, optional): Max concurrent transforms

372

- max_payload (int, optional): Maximum payload size in MB

373

- env (dict, optional): Environment variables

374

"""

375

def __init__(self, model_name: str, instance_count: int, instance_type: str,

376

output_path: str, strategy: str = None, **kwargs): ...

377

378

def transform(self, data: str, data_type: str = "S3Prefix", content_type: str = None,

379

compression_type: str = None, split_type: str = None,

380

job_name: str = None, wait: bool = True, logs: bool = True,

381

experiment_config: dict = None) -> 'TransformJob':

382

"""

383

Start a batch transform job.

384

385

Parameters:

386

- data (str): S3 path to input data

387

- data_type (str): Input data type ("S3Prefix", "ManifestFile")

388

- content_type (str, optional): Input content type

389

- compression_type (str, optional): Input compression ("Gzip", "None")

390

- split_type (str, optional): Data splitting method

391

- job_name (str, optional): Transform job name

392

- wait (bool): Wait for job completion

393

- logs (bool): Show job logs

394

- experiment_config (dict, optional): Experiment configuration

395

396

Returns:

397

TransformJob: Transform job object

398

"""

399

400

def delete_model(self): ...

401

402

class TransformJob:

403

"""

404

Represents a running or completed batch transform job.

405

"""

406

def __init__(self, sagemaker_session: 'Session', job_name: str): ...

407

408

def wait(self, logs: bool = True): ...

409

410

def stop(self): ...

411

412

def describe(self) -> dict: ...

413

```

414

415

## Usage Examples

416

417

### Real-time Endpoint with Custom Serialization

418

419

```python

420

from sagemaker import Model, Predictor

421

from sagemaker.serializers import JSONSerializer

422

from sagemaker.deserializers import JSONDeserializer

423

424

# Create model

425

model = Model(

426

image_uri="123456789012.dkr.ecr.us-west-2.amazonaws.com/my-model:latest",

427

model_data="s3://my-bucket/model.tar.gz",

428

role=role

429

)

430

431

# Deploy with custom serialization

432

predictor = model.deploy(

433

initial_instance_count=1,

434

instance_type="ml.m5.large",

435

serializer=JSONSerializer(),

436

deserializer=JSONDeserializer()

437

)

438

439

# Make predictions

440

input_data = {"instances": [[1, 2, 3, 4]]}

441

predictions = predictor.predict(input_data)

442

print(predictions)

443

444

# Clean up

445

predictor.delete_endpoint()

446

```

447

448

### Batch Transform Job

449

450

```python

451

from sagemaker import Transformer

452

453

# Create transformer

454

transformer = Transformer(

455

model_name="my-model",

456

instance_count=1,

457

instance_type="ml.m5.large",

458

output_path="s3://my-bucket/batch-transform-output"

459

)

460

461

# Start batch transform job

462

transformer.transform(

463

data="s3://my-bucket/batch-input-data",

464

content_type="text/csv",

465

split_type="Line",

466

wait=True

467

)

468

469

# Results are available in the output S3 path

470

```

471

472

### ModelBuilder Example

473

474

```python

475

from sagemaker.serve import ModelBuilder, SchemaBuilder

476

import pandas as pd

477

478

# Create sample data for schema inference

479

sample_input = pd.DataFrame([[1, 2, 3, 4]], columns=["a", "b", "c", "d"])

480

sample_output = [[0.8, 0.2]]

481

482

# Create schema builder

483

schema_builder = SchemaBuilder(

484

sample_input=sample_input,

485

sample_output=sample_output

486

)

487

488

# Build model

489

model_builder = ModelBuilder(

490

model_path="./my-model",

491

schema_builder=schema_builder

492

)

493

494

# Deploy model

495

model = model_builder.build(

496

mode=Mode.SAGEMAKER_ENDPOINT,

497

role=role,

498

sagemaker_session=session

499

)

500

501

predictor = model.deploy(

502

initial_instance_count=1,

503

instance_type="ml.m5.large"

504

)

505

```

506

507

### Multi-Model Endpoint

508

509

```python

510

from sagemaker import MultiDataModel

511

512

# Create multi-model endpoint

513

multi_model = MultiDataModel(

514

name="my-multi-model",

515

model_data_prefix="s3://my-bucket/models/",

516

image_uri="123456789012.dkr.ecr.us-west-2.amazonaws.com/inference:latest",

517

role=role

518

)

519

520

# Deploy multi-model endpoint

521

predictor = multi_model.deploy(

522

initial_instance_count=1,

523

instance_type="ml.m5.large"

524

)

525

526

# Add models dynamically

527

multi_model.add_model("model-a", "s3://my-bucket/models/model-a.tar.gz")

528

multi_model.add_model("model-b", "s3://my-bucket/models/model-b.tar.gz")

529

530

# Make predictions with specific model

531

predictions = predictor.predict(data, target_model="model-a")

532

```