Open source library for training and deploying models on Amazon SageMaker.
npx @tessl/cli install tessl/pypi-sagemaker@2.251.0A comprehensive Python library for training and deploying machine learning models on Amazon SageMaker. Provides high-level abstractions and APIs for the complete machine learning workflow including data preprocessing, model training, hyperparameter tuning, batch inference, and real-time endpoint deployment across popular frameworks like TensorFlow, PyTorch, Scikit-learn, XGBoost, and Hugging Face.
pip install sagemakerimport sagemakerCommon session and role management:
from sagemaker import Session, get_execution_roleTraining and model deployment:
from sagemaker import Estimator, Model, Predictor
from sagemaker.inputs import TrainingInputimport sagemaker
from sagemaker import Session, get_execution_role
from sagemaker.sklearn import SKLearn
# Set up SageMaker session and IAM role
sagemaker_session = Session()
role = get_execution_role()
# Create a scikit-learn estimator
sklearn_estimator = SKLearn(
entry_point="train.py",
framework_version="1.2-1",
instance_type="ml.m5.large",
role=role,
sagemaker_session=sagemaker_session
)
# Train the model
sklearn_estimator.fit({"training": "s3://my-bucket/train-data"})
# Deploy the model
predictor = sklearn_estimator.deploy(
initial_instance_count=1,
instance_type="ml.m5.large"
)
# Make predictions
predictions = predictor.predict(test_data)
# Clean up
predictor.delete_endpoint()The SageMaker Python SDK follows a layered architecture that abstracts AWS SageMaker complexity:
This design enables developers to focus on ML logic while the SDK handles AWS service integration, resource management, and deployment complexities.
Fundamental classes for training models and managing deployments including estimators, models, predictors, and session management. These form the foundation of the SageMaker workflow.
class Estimator:
def __init__(self, image_uri: str, role: str = None, instance_count: int = None,
instance_type: str = None, keep_alive_period_in_seconds: int = None,
volume_size: int = 30, max_run: int = 24*60*60, input_mode: str = "File",
output_path: str = None, base_job_name: str = None,
sagemaker_session: Session = None, hyperparameters: dict = None,
tags: list = None, subnets: list = None, security_group_ids: list = None,
**kwargs): ...
def fit(self, inputs, wait: bool = True, logs: str = "All", job_name: str = None,
experiment_config: dict = None): ...
def deploy(self, initial_instance_count: int, instance_type: str, **kwargs) -> Predictor: ...
class Model:
def __init__(self, image_uri: str = None, model_data: str = None, role: str = None,
predictor_cls: callable = None, env: dict = None, name: str = None,
vpc_config: dict = None, sagemaker_session: Session = None,
enable_network_isolation: bool = None, model_kms_key: str = None,
image_config: dict = None, source_dir: str = None, code_location: str = None,
entry_point: str = None, container_log_level: int = logging.INFO,
dependencies: list = None, git_config: dict = None, **kwargs): ...
def deploy(self, initial_instance_count: int, instance_type: str, **kwargs) -> Predictor: ...
class Predictor:
def predict(self, data, **kwargs): ...
def delete_endpoint(self): ...
class Session:
def __init__(self, boto_session=None, sagemaker_client=None, sagemaker_runtime_client=None,
sagemaker_featurestore_runtime_client=None, default_bucket: str = None,
settings=None, sagemaker_metrics_client=None, sagemaker_config: dict = None,
default_bucket_prefix: str = None): ...
def upload_data(self, path: str, bucket: str, key_prefix: str) -> str: ...
def get_execution_role(sagemaker_session: Session = None, use_default: bool = False) -> str: ...Support for popular ML frameworks including PyTorch, TensorFlow, Scikit-learn, XGBoost, Hugging Face, and MXNet. Each framework provides optimized containers and training configurations.
# PyTorch
class PyTorch(Estimator):
def __init__(self, entry_point: str, framework_version: str, py_version: str, **kwargs): ...
# TensorFlow
class TensorFlow(Estimator):
def __init__(self, entry_point: str, framework_version: str, py_version: str, **kwargs): ...
# Scikit-learn
class SKLearn(Estimator):
def __init__(self, entry_point: str, framework_version: str, **kwargs): ...
# XGBoost
class XGBoost(Estimator):
def __init__(self, entry_point: str, framework_version: str, **kwargs): ...
# Hugging Face
class HuggingFace(Estimator):
def __init__(self, entry_point: str, transformers_version: str, pytorch_version: str, **kwargs): ...Pre-built, optimized algorithms for common ML tasks including clustering, dimensionality reduction, classification, regression, and anomaly detection.
# Clustering
class KMeans(Estimator):
def __init__(self, role: str, instance_count: int, instance_type: str, k: int, **kwargs): ...
# Dimensionality Reduction
class PCA(Estimator):
def __init__(self, role: str, instance_count: int, instance_type: str, num_components: int, **kwargs): ...
# Classification/Regression
class LinearLearner(Estimator):
def __init__(self, role: str, instance_count: int, instance_type: str, **kwargs): ...
# Anomaly Detection
class RandomCutForest(Estimator):
def __init__(self, role: str, instance_count: int, instance_type: str, **kwargs): ...Automated machine learning capabilities for tabular data, image classification, text classification, and time series forecasting with minimal configuration required.
# AutoML v1
class AutoML:
def __init__(self, role: str = None, target_attribute_name: str = None,
output_kms_key: str = None, output_path: str = None,
base_job_name: str = None, compression_type: str = None,
sagemaker_session: Session = None, volume_kms_key: str = None,
encrypt_inter_container_traffic: bool = None,
vpc_config: dict = None, problem_type: str = None,
max_candidates: int = None, **kwargs): ...
def fit(self, inputs, wait: bool = True, logs: bool = True,
job_name: str = None): ...
class AutoMLInput:
def __init__(self, inputs, target_attribute_name: str, compression: str = None,
channel_type: str = None, content_type: str = None,
s3_data_type: str = None, sample_weight_attribute_name: str = None): ...
# AutoML v2
class AutoMLV2:
def __init__(self, role: str = None, output_kms_key: str = None,
output_path: str = None, base_job_name: str = None,
sagemaker_session: Session = None, volume_kms_key: str = None,
encrypt_inter_container_traffic: bool = None, **kwargs): ...
def fit(self, inputs, wait: bool = True, logs: bool = True,
job_name: str = None): ...
class AutoMLDataChannel:
def __init__(self, s3_data_source: str, target_attribute_name: str = None,
channel_type: str = None, content_type: str = None,
compression_type: str = None, sample_weight_attribute_name: str = None): ...
# Configuration classes
class AutoMLTabularConfig:
def __init__(self, target_attribute_name: str, problem_type: str = None,
job_objective: dict = None, **kwargs): ...
class AutoMLTimeSeriesForecastingConfig:
def __init__(self, forecast_frequency: str, forecast_horizon: int,
forecast_quantiles: list = None, **kwargs): ...Comprehensive model deployment options including real-time endpoints, batch transform, serverless inference, and multi-model endpoints with custom serialization support.
# Model deployment
class ModelBuilder:
def __init__(self, **kwargs): ...
def build(self, mode: Mode, role: str, sagemaker_session: Session) -> Model: ...
# Inference specification
class InferenceSpec:
def load(self, model_dir: str): ...
def invoke(self, input_object, model): ...
# Serializers
class JSONSerializer(BaseSerializer):
def serialize(self, data) -> bytes: ...
class CSVSerializer(BaseSerializer):
def serialize(self, data) -> bytes: ...
# Deserializers
class JSONDeserializer(BaseDeserializer):
def deserialize(self, stream, content_type: str): ...Data preprocessing capabilities including built-in processing containers, custom processing jobs, and Spark integration for large-scale data transformation.
class Processor:
def __init__(self, role: str, image_uri: str, instance_count: int, instance_type: str, **kwargs): ...
def run(self, inputs: List[ProcessingInput], outputs: List[ProcessingOutput], **kwargs): ...
class ScriptProcessor(Processor):
def __init__(self, command: List[str], **kwargs): ...
# Framework processors
class PyTorchProcessor(Processor): ...
class SKLearnProcessor(Processor): ...
class SparkMLProcessor(Processor): ...Comprehensive model monitoring including data quality, model quality, bias detection, and explainability analysis with scheduled monitoring jobs.
class ModelMonitor:
def __init__(self, role: str, **kwargs): ...
def create_monitoring_schedule(self, **kwargs): ...
class DefaultModelMonitor(ModelMonitor): ...
class ModelBiasMonitor(ModelMonitor):
def __init__(self, role: str, **kwargs): ...
class ModelExplainabilityMonitor(ModelMonitor):
def __init__(self, role: str, **kwargs): ...
class DataCaptureConfig:
def __init__(self, enable_capture: bool, sampling_percentage: int, **kwargs): ...Automated hyperparameter optimization with support for multiple search strategies, early stopping, and warm starting from previous tuning jobs.
class HyperparameterTuner:
def __init__(self, estimator: Estimator, objective_metric_name: str,
hyperparameter_ranges: dict, **kwargs): ...
def fit(self, inputs, **kwargs): ...
def deploy(self, initial_instance_count: int, instance_type: str, **kwargs) -> Predictor: ...
class IntegerParameter:
def __init__(self, min_value: int, max_value: int): ...
class ContinuousParameter:
def __init__(self, min_value: float, max_value: float): ...
class CategoricalParameter:
def __init__(self, values: List[str]): ...Experiment management and tracking capabilities for organizing ML workflows, comparing runs, and tracking metrics across training jobs.
class Experiment:
def __init__(self, experiment_name: str, description: str = None, **kwargs): ...
def create(self) -> dict: ...
class Run:
def __init__(self, experiment_name: str, sagemaker_session: Session = None): ...
def log_parameter(self, name: str, value): ...
def log_metric(self, name: str, value: float, step: int = None): ...
def load_run(sagemaker_session: Session = None, **kwargs) -> Run: ...
def list_runs(experiment_name: str = None, **kwargs) -> List[dict]: ...Comprehensive model debugging and performance profiling tools including tensor analysis, system metrics collection, and framework-specific profiling.
class ProfilerConfig:
def __init__(self, s3_output_path: str = None, profiling_interval_millis: int = None, **kwargs): ...
class Profiler:
def __init__(self, **kwargs): ...
class DebuggerHookConfig:
def __init__(self, s3_output_path: str, **kwargs): ...
class Rule:
def __init__(self, name: str, image_uri: str, **kwargs): ...
class ProfilerRule(Rule):
def __init__(self, name: str, **kwargs): ...Execute Python functions remotely on SageMaker compute with automatic dependency management, data transfer, and result retrieval.
@remote(
instance_type: str,
instance_count: int = 1,
role: str = None,
**kwargs
)
def remote_function(): ...
class RemoteExecutor:
def __init__(self, **kwargs): ...
def submit(self, func, *args, **kwargs): ...# Training input configuration
class TrainingInput:
def __init__(self, s3_data: str, s3_data_type: str = "S3Prefix", **kwargs): ...
# Processing input/output
class ProcessingInput:
def __init__(self, source: str, destination: str, **kwargs): ...
class ProcessingOutput:
def __init__(self, source: str, s3_upload_path: str, **kwargs): ...
# Model metrics
class ModelMetrics:
def __init__(self, model_statistics: MetricsSource = None,
model_constraints: MetricsSource = None, **kwargs): ...
class MetricsSource:
def __init__(self, s3_uri: str, content_type: str): ...
# Network configuration
class NetworkConfig:
def __init__(self, enable_network_isolation: bool = False,
security_group_ids: List[str] = None, **kwargs): ...
# Instance configuration
class InstanceConfig:
def __init__(self, instance_type: str, instance_count: int = 1, **kwargs): ...