Pre-trained models and solutions from SageMaker JumpStart with easy deployment and fine-tuning.
Access and discover JumpStart models with filtering and search capabilities.
class JumpStartModelsAccessor:
"""
Access JumpStart model catalog.
Methods:
list_models(filters=None, region=None) -> List[str]
List available model IDs.
Parameters:
filters: Optional[Dict] - Filter criteria
- task: str - Task type (e.g., "text-generation")
- framework: str - Framework (e.g., "pytorch")
region: Optional[str] - AWS region
Returns:
List[str]: Model IDs
Example:
models = accessor.list_models(filters={"task": "text-generation"})
get_model_specs(model_id, version="*", region=None) -> JumpStartModelSpecs
Get model specifications.
Parameters:
model_id: str - JumpStart model ID (required)
version: str - Model version (default: "*" for latest)
region: Optional[str] - AWS region
Returns:
JumpStartModelSpecs: Model specifications
Raises:
ValueError: If model not found
search(query, filters=None, region=None) -> List[str]
Search for models.
Parameters:
query: str - Search query (required)
filters: Optional[Dict] - Additional filters
region: Optional[str] - AWS region
Returns:
List[str]: Matching model IDs
Usage:
Discover and access pre-trained models from JumpStart catalog.
Notes:
- Catalog updated regularly with new models
- Not all models available in all regions
- Some models require EULA acceptance
"""Usage:
from sagemaker.core.jumpstart import JumpStartModelsAccessor
accessor = JumpStartModelsAccessor()
# List all models
all_models = accessor.list_models()
print(f"Total JumpStart models: {len(all_models)}")
# Filter by task type
text_gen_models = accessor.list_models(
filters={"task": "text-generation"},
region="us-west-2"
)
print(f"\nText generation models: {len(text_gen_models)}")
for model_id in text_gen_models[:5]:
print(f" - {model_id}")
# Filter by framework
pytorch_models = accessor.list_models(
filters={"framework": "pytorch"}
)
# Filter by multiple criteria
vision_models = accessor.list_models(
filters={
"task": "image-classification",
"framework": "tensorflow"
}
)
# Search for specific models
llama_models = accessor.search(query="llama", region="us-west-2")
print(f"\nLlama models: {llama_models}")
falcon_models = accessor.search(query="falcon")
print(f"Falcon models: {falcon_models}")
# Get model specifications
try:
specs = accessor.get_model_specs(
model_id="meta-llama/Llama-2-7b-hf",
version="*", # Latest version
region="us-west-2"
)
print(f"\n{specs.model_id} specifications:")
print(f" Default instance: {specs.default_instance_type}")
print(f" Supported tasks: {specs.supported_tasks}")
print(f" Framework: {specs.framework}")
print(f" Framework version: {specs.framework_version}")
print(f" Model parameters: {specs.model_parameters}")
except ValueError as e:
print(f"Model not found: {e}")Configuration for using JumpStart models with ModelBuilder and ModelTrainer.
class JumpStartConfig:
"""
JumpStart model configuration.
Parameters:
model_id: str - JumpStart model ID (required)
- Format: "provider/model-name"
- Example: "meta-llama/Llama-2-7b-hf", "huggingface-llm-falcon-7b-bf16"
model_version: Optional[str] - Model version (default: "*" for latest)
- Format: "1.0.0" or "*"
region: Optional[str] - AWS region
- Defaults to session region
tolerate_vulnerable_model: bool - Allow vulnerable models (default: False)
- Set True to deploy models with known vulnerabilities
tolerate_deprecated_model: bool - Allow deprecated models (default: False)
- Set True to use deprecated models
model_type: Optional[JumpStartModelType] - Model type
- OPEN_WEIGHTS, PROPRIETARY, MARKETPLACE
hub_arn: Optional[str] - Private hub ARN
- For enterprise private JumpStart hubs
Methods:
get_model_specs() -> JumpStartModelSpecs
Get model specifications.
Returns:
JumpStartModelSpecs: Model specs
Raises:
ValueError: Invalid model or version
validate() -> None
Validate configuration.
Raises:
ValueError: Invalid configuration or model not available
Attributes:
model_id: str - Model identifier
model_version: str - Model version
Notes:
- Model availability varies by region
- Some models require EULA acceptance
- Vulnerable models blocked by default for security
- Deprecated models may lack support
"""Usage:
from sagemaker.core.jumpstart import JumpStartConfig
# Create JumpStart config
try:
config = JumpStartConfig(
model_id="meta-llama/Llama-2-7b-hf",
model_version="1.0.0",
region="us-west-2",
tolerate_deprecated_model=False
)
# Validate configuration
config.validate()
# Get model specs
specs = config.get_model_specs()
print(f"Model: {specs.model_id}")
print(f"Default instance: {specs.default_instance_type}")
except ValueError as e:
print(f"Invalid configuration: {e}")class ModelBuilder:
"""
ModelBuilder with JumpStart integration.
Class Methods:
from_jumpstart_config(jumpstart_config, role_arn, compute=None, network=None,
image_uri=None, env_vars=None, model_kms_key=None,
resource_requirements=None, tolerate_vulnerable_model=False,
tolerate_deprecated_model=False, sagemaker_session=None,
schema_builder=None) -> ModelBuilder
Create ModelBuilder from JumpStart config.
Parameters:
jumpstart_config: JumpStartConfig - JumpStart config (required)
role_arn: str - IAM role ARN (required)
compute: Optional[Compute] - Compute config
network: Optional[Network] - Network config
image_uri: Optional[str] - Override container image
env_vars: Optional[Dict] - Environment variables
model_kms_key: Optional[str] - KMS key for model
resource_requirements: Optional[Dict] - Resource requirements
tolerate_vulnerable_model: bool - Allow vulnerable (default: False)
tolerate_deprecated_model: bool - Allow deprecated (default: False)
sagemaker_session: Optional[Session] - Session
schema_builder: Optional[SchemaBuilder] - Schema builder
Returns:
ModelBuilder: Configured builder for JumpStart model
Raises:
ValueError: Invalid model or configuration
"""Usage:
from sagemaker.serve import ModelBuilder
from sagemaker.core.jumpstart import JumpStartConfig
# Deploy JumpStart model
config = JumpStartConfig(
model_id="huggingface-llm-falcon-7b-bf16",
region="us-west-2"
)
builder = ModelBuilder.from_jumpstart_config(
jumpstart_config=config,
role_arn="arn:aws:iam::123456789012:role/SageMakerRole",
env_vars={
"MAX_INPUT_LENGTH": "1024",
"MAX_TOTAL_TOKENS": "2048"
}
)
# Deploy to endpoint
try:
endpoint = builder.deploy(
endpoint_name="falcon-endpoint",
initial_instance_count=1,
instance_type="ml.g5.2xlarge",
wait=True
)
# Use endpoint for inference
response = endpoint.invoke({
"inputs": "What is machine learning?",
"parameters": {
"max_new_tokens": 256,
"temperature": 0.7,
"top_p": 0.9
}
})
print(f"Response: {response}")
except ValueError as e:
if "EULA" in str(e):
print("Model requires EULA acceptance")
else:
print(f"Deployment error: {e}")class ModelTrainer:
"""
ModelTrainer with JumpStart integration.
Class Methods:
from_jumpstart_config(jumpstart_config, compute, networking=None,
source_code=None, hyperparameters=None, role=None,
sagemaker_session=None, base_job_name=None, tags=None) -> ModelTrainer
Create ModelTrainer from JumpStart config.
Parameters:
jumpstart_config: JumpStartConfig - JumpStart config (required)
compute: Compute - Compute configuration (required)
networking: Optional[Networking] - Network config
source_code: Optional[SourceCode] - Custom training code
hyperparameters: Optional[Dict] - Training hyperparameters
role: Optional[str] - IAM role ARN
sagemaker_session: Optional[Session] - Session
base_job_name: Optional[str] - Base job name
tags: Optional[List[Tag]] - Tags
Returns:
ModelTrainer: Configured trainer for fine-tuning JumpStart model
"""Usage:
from sagemaker.train import ModelTrainer
from sagemaker.core.jumpstart import JumpStartConfig
from sagemaker.train.configs import Compute, InputData
# Fine-tune JumpStart model
config = JumpStartConfig(
model_id="meta-llama/Llama-2-7b-hf",
region="us-west-2"
)
trainer = ModelTrainer.from_jumpstart_config(
jumpstart_config=config,
compute=Compute(
instance_type="ml.g5.12xlarge",
instance_count=1,
volume_size_in_gb=256
),
role="arn:aws:iam::123456789012:role/SageMakerRole",
hyperparameters={
"epochs": 3,
"learning_rate": 2e-4,
"per_device_train_batch_size": 4,
"lora_r": 8,
"lora_alpha": 16
}
)
# Prepare training data
train_data = InputData(
channel_name="training",
data_source="s3://my-bucket/fine-tune-data/train.jsonl"
)
val_data = InputData(
channel_name="validation",
data_source="s3://my-bucket/fine-tune-data/val.jsonl"
)
# Fine-tune model
try:
trainer.train(input_data_config=[train_data, val_data])
print("Fine-tuning completed")
job = trainer._latest_training_job
print(f"Model: {job.model_artifacts}")
except ValueError as e:
if "EULA" in str(e):
print("Model requires EULA acceptance - use SFTTrainer with accept_eula=True")Access sample payloads for JumpStart models.
class JumpStartS3PayloadAccessor:
"""
Access sample payloads for JumpStart models.
Methods:
get_payload(model_id, version="*", region=None) -> dict
Get sample payload for model.
Parameters:
model_id: str - JumpStart model ID (required)
version: str - Model version (default: "*")
region: Optional[str] - AWS region
Returns:
dict: Sample payload for model inference
list_payloads(model_id, version="*", region=None) -> List[dict]
List all sample payloads for model.
Parameters:
model_id: str - JumpStart model ID (required)
version: str - Model version (default: "*")
region: Optional[str] - AWS region
Returns:
List[dict]: All sample payloads
Usage:
Retrieve example inputs for testing JumpStart models.
Notes:
- Payloads demonstrate correct input format
- Use for endpoint testing
- Multiple payloads show various input patterns
"""Usage:
from sagemaker.core.jumpstart import JumpStartS3PayloadAccessor
from sagemaker.core.payloads import retrieve_example, retrieve_all_examples
# Get sample payload for specific model
payload = retrieve_example(
model_id="meta-llama/Llama-2-7b-hf",
model_version="*",
region="us-west-2"
)
print(f"Sample payload: {payload}")
# Use for testing endpoint
endpoint = builder.deploy(endpoint_name="llama-test-endpoint")
response = endpoint.invoke(payload)
print(f"Response: {response}")
# Get all example payloads
all_payloads = retrieve_all_examples(
model_id="huggingface-llm-falcon-7b-bf16",
region="us-west-2"
)
print(f"Available {len(all_payloads)} sample payloads:")
for i, payload in enumerate(all_payloads):
print(f" Payload {i+1}: {list(payload.keys())}")
# Test with all payloads
for payload in all_payloads:
try:
response = endpoint.invoke(payload)
print(f"✓ Payload {payload} succeeded")
except Exception as e:
print(f"✗ Payload {payload} failed: {e}")class JumpStartModelType(Enum):
"""
JumpStart model types.
Values:
OPEN_WEIGHTS = "OPEN_WEIGHTS"
Open weights models (e.g., Llama, Falcon, Mistral)
- Freely available
- May require EULA acceptance
- Can be fine-tuned
PROPRIETARY = "PROPRIETARY"
Proprietary models
- Commercial licenses
- Usage restrictions
MARKETPLACE = "MARKETPLACE"
AWS Marketplace models
- Third-party providers
- Subscription required
- Additional costs
Usage:
Filter models by type in JumpStartConfig.
Notes:
- OPEN_WEIGHTS: Most flexible, no additional costs
- PROPRIETARY: Check licensing terms
- MARKETPLACE: Subscription fees apply
"""class HubContentCapability(Enum):
"""
Model capabilities and task types.
Values:
TEXT_GENERATION = "TEXT_GENERATION"
Text generation models (LLMs)
TEXT_EMBEDDING = "TEXT_EMBEDDING"
Text embedding models
IMAGE_GENERATION = "IMAGE_GENERATION"
Image generation models (Stable Diffusion, etc.)
IMAGE_CLASSIFICATION = "IMAGE_CLASSIFICATION"
Image classification models
OBJECT_DETECTION = "OBJECT_DETECTION"
Object detection models
QUESTION_ANSWERING = "QUESTION_ANSWERING"
QA models
SUMMARIZATION = "SUMMARIZATION"
Summarization models
TRANSLATION = "TRANSLATION"
Translation models
FILL_MASK = "FILL_MASK"
Masked language models
Usage:
Filter models by capability.
"""class ModelFramework(Enum):
"""
Model frameworks.
Values:
PYTORCH = "PYTORCH"
PyTorch models
TENSORFLOW = "TENSORFLOW"
TensorFlow models
XGBOOST = "XGBOOST"
XGBoost models
SKLEARN = "SKLEARN"
Scikit-learn models
HUGGINGFACE = "HUGGINGFACE"
HuggingFace Transformers
MXNET = "MXNET"
Apache MXNet models
Usage:
Filter models by framework.
"""from sagemaker.core.jumpstart import JumpStartConfig
# Use enterprise private hub
config = JumpStartConfig(
model_id="my-company/custom-model",
hub_arn="arn:aws:sagemaker:us-west-2:123456789012:hub/company-hub",
region="us-west-2"
)
builder = ModelBuilder.from_jumpstart_config(
jumpstart_config=config,
role_arn=role
)
# Deploy company's private model
endpoint = builder.deploy(endpoint_name="company-model-endpoint")from sagemaker.serve import ModelBuilder
from sagemaker.core.jumpstart import JumpStartConfig
# Get benchmark metrics for JumpStart model
config = JumpStartConfig(model_id="meta-llama/Llama-2-7b-hf")
builder = ModelBuilder.from_jumpstart_config(
jumpstart_config=config,
role_arn=role
)
# Display benchmark metrics
try:
benchmarks = builder.display_benchmark_metrics(
model_id="meta-llama/Llama-2-7b-hf",
model_version="*"
)
# Shows MMLU, BBH, and other standard benchmarks
print("Model Benchmarks:")
for benchmark, score in benchmarks.items():
print(f" {benchmark}: {score}")
except Exception as e:
print(f"Benchmarks not available: {e}")from sagemaker.core.jumpstart import JumpStartModelsAccessor
accessor = JumpStartModelsAccessor()
specs = accessor.get_model_specs(
model_id="meta-llama/Llama-2-13b-hf",
region="us-west-2"
)
# Get instance type recommendations
print(f"Default instance: {specs.default_instance_type}")
print(f"\nSupported instance types:")
for instance_type in specs.supported_inference_instance_types:
print(f" - {instance_type}")
print(f"\nRecommended instance types:")
for instance_type in specs.recommended_instance_types:
print(f" - {instance_type}")
# Choose based on requirements:
# - ml.g5.xlarge: Single GPU, cost-effective for development
# - ml.g5.2xlarge: Single GPU, production use
# - ml.g5.12xlarge: 4 GPUs, faster inference or model parallelism
# - ml.g5.48xlarge: 8 GPUs, very large models or high throughputfrom sagemaker.train import SFTTrainer
from sagemaker.train.common import TrainingType
# Fine-tune JumpStart model with LoRA
trainer = SFTTrainer(
model="meta-llama/Llama-2-7b-hf",
training_type=TrainingType.LORA, # Parameter-efficient
training_dataset="s3://my-bucket/train.jsonl",
validation_dataset="s3://my-bucket/val.jsonl",
s3_output_path="s3://my-bucket/output",
accept_eula=True # Required for Llama
)
# Configure LoRA parameters
trainer.hyperparameters.epochs = 3
trainer.hyperparameters.learning_rate = 2e-4
trainer.hyperparameters.per_device_train_batch_size = 4
# LoRA-specific settings
trainer.hyperparameters.lora_r = 8 # LoRA rank
trainer.hyperparameters.lora_alpha = 16 # LoRA alpha (typically 2*lora_r)
trainer.hyperparameters.lora_dropout = 0.05
trainer.hyperparameters.target_modules = ["q_proj", "v_proj"] # Attention layers
# Train
job = trainer.train()
print(f"Fine-tuned model: {job.model_artifacts}")from sagemaker.core.jumpstart import JumpStartModelsAccessor
# Compare similar LLMs
models = [
"meta-llama/Llama-2-7b-hf",
"meta-llama/Llama-2-13b-hf",
"huggingface-llm-falcon-7b-bf16",
"huggingface-llm-falcon-40b-bf16"
]
accessor = JumpStartModelsAccessor()
print("Model Comparison:")
print(f"{'Model':<45} {'Parameters':<15} {'Default Instance':<20}")
print("-" * 80)
for model_id in models:
try:
specs = accessor.get_model_specs(model_id=model_id, region="us-west-2")
params = specs.model_parameters
instance = specs.default_instance_type
print(f"{model_id:<45} {params:<15} {instance:<20}")
except ValueError:
print(f"{model_id:<45} {'Not available'}")
# Choose based on:
# - Model size (parameters)
# - Inference cost (instance type)
# - Task performance (benchmarks)
# - License requirementsfrom sagemaker.serve import ModelBuilder
from sagemaker.core.jumpstart import JumpStartConfig
config = JumpStartConfig(model_id="meta-llama/Llama-2-7b-hf")
builder = ModelBuilder.from_jumpstart_config(
jumpstart_config=config,
role_arn=role
)
# List available deployment configs
configs = builder.list_deployment_configs()
print(f"Available configurations: {configs}")
# Set specific configuration optimized for instance type
builder.set_deployment_config(
config_name="ml.g5.2xlarge.config",
instance_type="ml.g5.2xlarge"
)
# Get current configuration
current = builder.get_deployment_config()
print(f"Current config: {current}")
# Deploy with optimized configuration
endpoint = builder.deploy(endpoint_name="optimized-llm-endpoint")from sagemaker.serve import ModelBuilder
from sagemaker.core.jumpstart import JumpStartConfig
# Deploy and register JumpStart model
config = JumpStartConfig(model_id="meta-llama/Llama-2-7b-hf")
builder = ModelBuilder.from_jumpstart_config(
jumpstart_config=config,
role_arn=role
)
# Register to model registry
model_package = builder.register(
model_package_group_name="llama-models",
content_types=["application/json"],
response_types=["application/json"],
inference_instances=["ml.g5.2xlarge", "ml.g5.12xlarge"],
transform_instances=["ml.g5.2xlarge"],
model_approval_status="PendingManualApproval",
approval_description="Llama-2-7b baseline model",
customer_metadata_properties={
"source": "jumpstart",
"model_id": "meta-llama/Llama-2-7b-hf",
"use_case": "customer_support_chatbot"
}
)
print(f"Registered: {model_package.model_package_arn}")
# Later: deploy from registry
from sagemaker.core.resources import ModelPackage
registered_model = ModelPackage.get(model_package_arn)
if registered_model.model_approval_status == "Approved":
builder = ModelBuilder(model=registered_model, role_arn=role)
endpoint = builder.deploy(endpoint_name="approved-llama-endpoint")class SageMakerSettings:
"""
SageMaker environment settings for JumpStart.
Attributes:
include_unsupported: bool - Include unsupported models
- Models not officially supported in region
include_vulnerable: bool - Include vulnerable models
- Models with known security vulnerabilities
include_deprecated: bool - Include deprecated models
- Models marked for deprecation
Usage:
Global settings for JumpStart model filtering.
Control security and support requirements.
Notes:
- Default: exclude unsupported, vulnerable, deprecated
- Override for specific use cases
- Security implications for vulnerable models
"""Usage:
from sagemaker.core.jumpstart import SageMakerSettings
# Configure global settings
settings = SageMakerSettings()
settings.include_deprecated = False # Exclude deprecated
settings.include_vulnerable = False # Exclude vulnerable (security)
settings.include_unsupported = False # Exclude unsupported in region
# These settings affect:
# - Model listing
# - Model validation
# - Deployment permissionsModel Not Available in Region:
EULA Not Accepted:
Insufficient Instance Capacity:
Version Not Found:
Authentication Failed:
Deployment Config Incompatible: