0
# Model Serving and Inference
1
2
Comprehensive model deployment and inference capabilities including real-time endpoints, batch transform, serverless inference, and multi-model endpoints with custom serialization and deserialization support.
3
4
## Capabilities
5
6
### Model Builder
7
8
Unified model building interface for creating deployable models with automatic container selection and configuration.
9
10
```python { .api }
11
class ModelBuilder:
12
"""
13
Unified interface for building deployable models with automatic configuration.
14
15
Parameters:
16
- model_path (str, optional): Path to model artifacts
17
- schema_builder (SchemaBuilder, optional): Input/output schema builder
18
- inference_spec (InferenceSpec, optional): Custom inference specification
19
- env_vars (dict, optional): Environment variables
20
- dependencies (dict, optional): Model dependencies
21
"""
22
def __init__(self, model_path: str = None, schema_builder: 'SchemaBuilder' = None,
23
inference_spec: 'InferenceSpec' = None, **kwargs): ...
24
25
def build(self, mode: 'Mode', role: str, sagemaker_session: 'Session') -> 'Model':
26
"""
27
Build a deployable SageMaker model.
28
29
Parameters:
30
- mode (Mode): Deployment mode (LOCAL_CONTAINER, SAGEMAKER_ENDPOINT)
31
- role (str): IAM role ARN
32
- sagemaker_session (Session): SageMaker session
33
34
Returns:
35
Model: Deployable SageMaker model
36
"""
37
38
class InferenceSpec:
39
"""
40
Base class for custom inference specifications.
41
"""
42
def load(self, model_dir: str):
43
"""
44
Load model artifacts and initialize inference components.
45
46
Parameters:
47
- model_dir (str): Directory containing model artifacts
48
"""
49
50
def invoke(self, input_object, model):
51
"""
52
Run inference on input data.
53
54
Parameters:
55
- input_object: Input data for inference
56
- model: Loaded model object
57
58
Returns:
59
Inference results
60
"""
61
62
class Mode:
63
"""
64
Deployment mode enumeration.
65
"""
66
LOCAL_CONTAINER = "LOCAL_CONTAINER"
67
SAGEMAKER_ENDPOINT = "SAGEMAKER_ENDPOINT"
68
69
class SchemaBuilder:
70
"""
71
Builder for input/output schemas and sample data.
72
73
Parameters:
74
- sample_input: Sample input data for schema inference
75
- sample_output: Sample output data for schema inference
76
"""
77
def __init__(self, sample_input=None, sample_output=None): ...
78
79
class CustomPayloadTranslator:
80
"""
81
Base class for custom payload transformation.
82
"""
83
def serialize_payload_to_bytes(self, payload) -> bytes:
84
"""
85
Serialize payload to bytes for transmission.
86
87
Parameters:
88
- payload: Input payload
89
90
Returns:
91
bytes: Serialized payload
92
"""
93
94
def deserialize_payload_from_stream(self, stream) -> any:
95
"""
96
Deserialize payload from byte stream.
97
98
Parameters:
99
- stream: Input byte stream
100
101
Returns:
102
Deserialized payload
103
"""
104
105
class ModelServer:
106
"""
107
Model server type enumeration.
108
"""
109
TORCHSERVE = "TORCHSERVE"
110
TENSORFLOW_SERVING = "TENSORFLOW_SERVING"
111
TRITON = "TRITON"
112
DJL = "DJL"
113
```
114
115
### Serializers
116
117
Input serialization classes for converting data to formats expected by SageMaker endpoints.
118
119
```python { .api }
120
class BaseSerializer:
121
"""
122
Base class for all serializers.
123
"""
124
def serialize(self, data) -> bytes:
125
"""
126
Serialize data to bytes.
127
128
Parameters:
129
- data: Data to serialize
130
131
Returns:
132
bytes: Serialized data
133
"""
134
135
class SimpleBaseSerializer(BaseSerializer):
136
"""
137
Simplified base serializer with content type support.
138
139
Parameters:
140
- content_type (str): MIME content type
141
"""
142
def __init__(self, content_type: str): ...
143
144
class CSVSerializer(SimpleBaseSerializer):
145
"""
146
Serialize data to CSV format.
147
"""
148
def __init__(self): ...
149
150
def serialize(self, data) -> bytes:
151
"""
152
Serialize data to CSV bytes.
153
154
Parameters:
155
- data: Pandas DataFrame, numpy array, or list
156
157
Returns:
158
bytes: CSV-formatted data
159
"""
160
161
class JSONSerializer(SimpleBaseSerializer):
162
"""
163
Serialize data to JSON format.
164
"""
165
def __init__(self): ...
166
167
def serialize(self, data) -> bytes:
168
"""
169
Serialize data to JSON bytes.
170
171
Parameters:
172
- data: JSON-serializable data
173
174
Returns:
175
bytes: JSON-formatted data
176
"""
177
178
class JSONLinesSerializer(SimpleBaseSerializer):
179
"""
180
Serialize data to JSON Lines format.
181
"""
182
def __init__(self): ...
183
184
class NumpySerializer(SimpleBaseSerializer):
185
"""
186
Serialize NumPy arrays to binary format.
187
"""
188
def __init__(self, dtype: str = None): ...
189
190
def serialize(self, data) -> bytes:
191
"""
192
Serialize NumPy array to bytes.
193
194
Parameters:
195
- data: NumPy array
196
197
Returns:
198
bytes: Serialized array data
199
"""
200
201
class TorchTensorSerializer(SimpleBaseSerializer):
202
"""
203
Serialize PyTorch tensors to binary format.
204
"""
205
def __init__(self, dtype: str = None): ...
206
207
class StringSerializer(SimpleBaseSerializer):
208
"""
209
Serialize strings to UTF-8 bytes.
210
"""
211
def __init__(self): ...
212
213
class DataSerializer(SimpleBaseSerializer):
214
"""
215
Generic data serializer with custom serialization function.
216
217
Parameters:
218
- content_type (str): MIME content type
219
- encoder (callable): Custom encoding function
220
"""
221
def __init__(self, content_type: str, encoder: callable): ...
222
223
class IdentitySerializer(SimpleBaseSerializer):
224
"""
225
Pass-through serializer for pre-serialized data.
226
"""
227
def __init__(self, content_type: str): ...
228
229
class LibSVMSerializer(SimpleBaseSerializer):
230
"""
231
Serialize data to LibSVM format.
232
"""
233
def __init__(self): ...
234
235
class SparseMatrixSerializer(SimpleBaseSerializer):
236
"""
237
Serialize sparse matrices to binary format.
238
"""
239
def __init__(self): ...
240
241
class RecordSerializer(SimpleBaseSerializer):
242
"""
243
Serialize data to RecordIO format.
244
"""
245
def __init__(self): ...
246
```
247
248
### Deserializers
249
250
Output deserialization classes for converting SageMaker endpoint responses to usable data formats.
251
252
```python { .api }
253
class BaseDeserializer:
254
"""
255
Base class for all deserializers.
256
"""
257
def deserialize(self, stream, content_type: str):
258
"""
259
Deserialize data from byte stream.
260
261
Parameters:
262
- stream: Input byte stream
263
- content_type (str): MIME content type
264
265
Returns:
266
Deserialized data
267
"""
268
269
class SimpleBaseDeserializer(BaseDeserializer):
270
"""
271
Simplified base deserializer.
272
"""
273
def deserialize(self, stream, content_type: str): ...
274
275
class BytesDeserializer(SimpleBaseDeserializer):
276
"""
277
Deserialize data as raw bytes.
278
"""
279
def deserialize(self, stream, content_type: str) -> bytes: ...
280
281
class StringDeserializer(SimpleBaseDeserializer):
282
"""
283
Deserialize data as UTF-8 string.
284
"""
285
def deserialize(self, stream, content_type: str) -> str: ...
286
287
class CSVDeserializer(SimpleBaseDeserializer):
288
"""
289
Deserialize CSV data to DataFrame or list.
290
291
Parameters:
292
- encoding (str): Text encoding (default: "utf-8")
293
"""
294
def __init__(self, encoding: str = "utf-8"): ...
295
296
def deserialize(self, stream, content_type: str): ...
297
298
class JSONDeserializer(SimpleBaseDeserializer):
299
"""
300
Deserialize JSON data to Python objects.
301
"""
302
def deserialize(self, stream, content_type: str): ...
303
304
class JSONLinesDeserializer(SimpleBaseDeserializer):
305
"""
306
Deserialize JSON Lines data to list of objects.
307
"""
308
def deserialize(self, stream, content_type: str) -> list: ...
309
310
class NumpyDeserializer(SimpleBaseDeserializer):
311
"""
312
Deserialize binary data to NumPy arrays.
313
314
Parameters:
315
- dtype (str, optional): Target NumPy data type
316
- allow_pickle (bool): Allow pickle deserialization
317
"""
318
def __init__(self, dtype: str = None, allow_pickle: bool = True): ...
319
320
class PandasDeserializer(SimpleBaseDeserializer):
321
"""
322
Deserialize data to pandas DataFrame.
323
"""
324
def deserialize(self, stream, content_type: str) -> 'pandas.DataFrame': ...
325
326
class TorchTensorDeserializer(SimpleBaseDeserializer):
327
"""
328
Deserialize binary data to PyTorch tensors.
329
330
Parameters:
331
- dtype (str, optional): Target tensor data type
332
"""
333
def __init__(self, dtype: str = None): ...
334
335
class SparseMatrixDeserializer(SimpleBaseDeserializer):
336
"""
337
Deserialize binary data to sparse matrices.
338
"""
339
def deserialize(self, stream, content_type: str): ...
340
```
341
342
### Content Type Constants
343
344
Standard MIME content types for serialization and deserialization.
345
346
```python { .api }
347
# Content type constants
348
CONTENT_TYPE_JSON = "application/json"
349
CONTENT_TYPE_CSV = "text/csv"
350
CONTENT_TYPE_OCTET_STREAM = "application/octet-stream"
351
CONTENT_TYPE_NPY = "application/x-npy"
352
```
353
354
### Batch Transform
355
356
Batch inference capabilities for processing large datasets asynchronously.
357
358
```python { .api }
359
class Transformer:
360
"""
361
SageMaker batch transform job for batch inference.
362
363
Parameters:
364
- model_name (str): SageMaker model name
365
- instance_count (int): Number of transform instances
366
- instance_type (str): EC2 instance type
367
- output_path (str): S3 path for transform results
368
- strategy (str, optional): Data splitting strategy
369
- assemble_with (str, optional): Result assembly method
370
- accept (str, optional): Accept header for output format
371
- max_concurrent_transforms (int, optional): Max concurrent transforms
372
- max_payload (int, optional): Maximum payload size in MB
373
- env (dict, optional): Environment variables
374
"""
375
def __init__(self, model_name: str, instance_count: int, instance_type: str,
376
output_path: str, strategy: str = None, **kwargs): ...
377
378
def transform(self, data: str, data_type: str = "S3Prefix", content_type: str = None,
379
compression_type: str = None, split_type: str = None,
380
job_name: str = None, wait: bool = True, logs: bool = True,
381
experiment_config: dict = None) -> 'TransformJob':
382
"""
383
Start a batch transform job.
384
385
Parameters:
386
- data (str): S3 path to input data
387
- data_type (str): Input data type ("S3Prefix", "ManifestFile")
388
- content_type (str, optional): Input content type
389
- compression_type (str, optional): Input compression ("Gzip", "None")
390
- split_type (str, optional): Data splitting method
391
- job_name (str, optional): Transform job name
392
- wait (bool): Wait for job completion
393
- logs (bool): Show job logs
394
- experiment_config (dict, optional): Experiment configuration
395
396
Returns:
397
TransformJob: Transform job object
398
"""
399
400
def delete_model(self): ...
401
402
class TransformJob:
403
"""
404
Represents a running or completed batch transform job.
405
"""
406
def __init__(self, sagemaker_session: 'Session', job_name: str): ...
407
408
def wait(self, logs: bool = True): ...
409
410
def stop(self): ...
411
412
def describe(self) -> dict: ...
413
```
414
415
## Usage Examples
416
417
### Real-time Endpoint with Custom Serialization
418
419
```python
420
from sagemaker import Model, Predictor
421
from sagemaker.serializers import JSONSerializer
422
from sagemaker.deserializers import JSONDeserializer
423
424
# Create model
425
model = Model(
426
image_uri="123456789012.dkr.ecr.us-west-2.amazonaws.com/my-model:latest",
427
model_data="s3://my-bucket/model.tar.gz",
428
role=role
429
)
430
431
# Deploy with custom serialization
432
predictor = model.deploy(
433
initial_instance_count=1,
434
instance_type="ml.m5.large",
435
serializer=JSONSerializer(),
436
deserializer=JSONDeserializer()
437
)
438
439
# Make predictions
440
input_data = {"instances": [[1, 2, 3, 4]]}
441
predictions = predictor.predict(input_data)
442
print(predictions)
443
444
# Clean up
445
predictor.delete_endpoint()
446
```
447
448
### Batch Transform Job
449
450
```python
451
from sagemaker import Transformer
452
453
# Create transformer
454
transformer = Transformer(
455
model_name="my-model",
456
instance_count=1,
457
instance_type="ml.m5.large",
458
output_path="s3://my-bucket/batch-transform-output"
459
)
460
461
# Start batch transform job
462
transformer.transform(
463
data="s3://my-bucket/batch-input-data",
464
content_type="text/csv",
465
split_type="Line",
466
wait=True
467
)
468
469
# Results are available in the output S3 path
470
```
471
472
### ModelBuilder Example
473
474
```python
475
from sagemaker.serve import ModelBuilder, SchemaBuilder
476
import pandas as pd
477
478
# Create sample data for schema inference
479
sample_input = pd.DataFrame([[1, 2, 3, 4]], columns=["a", "b", "c", "d"])
480
sample_output = [[0.8, 0.2]]
481
482
# Create schema builder
483
schema_builder = SchemaBuilder(
484
sample_input=sample_input,
485
sample_output=sample_output
486
)
487
488
# Build model
489
model_builder = ModelBuilder(
490
model_path="./my-model",
491
schema_builder=schema_builder
492
)
493
494
# Deploy model
495
model = model_builder.build(
496
mode=Mode.SAGEMAKER_ENDPOINT,
497
role=role,
498
sagemaker_session=session
499
)
500
501
predictor = model.deploy(
502
initial_instance_count=1,
503
instance_type="ml.m5.large"
504
)
505
```
506
507
### Multi-Model Endpoint
508
509
```python
510
from sagemaker import MultiDataModel
511
512
# Create multi-model endpoint
513
multi_model = MultiDataModel(
514
name="my-multi-model",
515
model_data_prefix="s3://my-bucket/models/",
516
image_uri="123456789012.dkr.ecr.us-west-2.amazonaws.com/inference:latest",
517
role=role
518
)
519
520
# Deploy multi-model endpoint
521
predictor = multi_model.deploy(
522
initial_instance_count=1,
523
instance_type="ml.m5.large"
524
)
525
526
# Add models dynamically
527
multi_model.add_model("model-a", "s3://my-bucket/models/model-a.tar.gz")
528
multi_model.add_model("model-b", "s3://my-bucket/models/model-b.tar.gz")
529
530
# Make predictions with specific model
531
predictions = predictor.predict(data, target_model="model-a")
532
```