Configuration classes for SageMaker Clarify explainability to interpret model predictions using SHAP values.
Base configuration for model explainability.
class ExplainerConfig:
"""
Base configuration for explainability (imported from sagemaker.core.explainer).
Used with endpoints to enable online explainability for predictions.
Base class for ClarifyExplainerConfig.
Notes:
- Abstract base configuration
- Subclass: ClarifyExplainerConfig
- Enable real-time explanations
"""Main configuration for SageMaker Clarify explainer.
class ClarifyExplainerConfig:
"""
Configuration for SageMaker Clarify explainer.
Parameters:
shap_config: ClarifyShapConfig - SHAP analysis configuration (required)
enable_explanations: Optional[str] - JMESPath expression to filter records for explanation
- "`true`": Always explain
- "probability > `0.8`": Explain when probability >0.8
- JMESPath expression evaluated against prediction
inference_config: Optional[ClarifyInferenceConfig] - Model container inference configuration
- How to parse model inputs/outputs
Methods:
_to_request_dict() -> dict
Generate request dictionary for API.
Returns:
dict: API request structure
Notes:
- Attach to endpoint for online explainability
- SHAP values computed per prediction
- Filter expensive explanations with enable_explanations
- Adds latency to predictions (~100-500ms)
"""Usage:
from sagemaker.core.explainer import (
ClarifyExplainerConfig,
ClarifyShapConfig,
ClarifyShapBaselineConfig,
ClarifyInferenceConfig
)
# Configure SHAP baseline
baseline_config = ClarifyShapBaselineConfig(
mime_type="text/csv",
shap_baseline="0,0,0,0" # Baseline feature values
)
# Configure SHAP analysis
shap_config = ClarifyShapConfig(
shap_baseline_config=baseline_config,
number_of_samples=100, # Samples for Kernel SHAP
seed=42, # Reproducibility
use_logit=False
)
# Configure inference behavior
inference_config = ClarifyInferenceConfig(
feature_headers=["feature1", "feature2", "feature3", "feature4"],
max_payload_in_mb=6,
max_record_count=10 # Records per batch
)
# Create explainer config
explainer_config = ClarifyExplainerConfig(
shap_config=shap_config,
inference_config=inference_config,
enable_explanations="`true`" # Always explain
)
# Use with endpoint deployment
endpoint = builder.deploy(
endpoint_name="explainable-endpoint",
explainer_config=explainer_config
)
# Or update existing endpoint
endpoint.update_endpoint(explainer_config=explainer_config)Conditional Explanations:
# Only explain high-confidence predictions
explainer_config = ClarifyExplainerConfig(
shap_config=shap_config,
inference_config=inference_config,
enable_explanations="probability > `0.9`" # Only when confident
)
# Only explain positive predictions
explainer_config = ClarifyExplainerConfig(
shap_config=shap_config,
inference_config=inference_config,
enable_explanations="predicted_label == `1`"
)
# Complex condition
explainer_config = ClarifyExplainerConfig(
shap_config=shap_config,
inference_config=inference_config,
enable_explanations="probability > `0.8` && predicted_label == `1`"
)Configuration for model container inference behavior.
class ClarifyInferenceConfig:
"""
Inference configuration for model container.
Parameters:
feature_headers: Optional[List[str]] - Feature names for response payload
- Used for labeling SHAP values
feature_types: Optional[List[str]] - Feature data types
- Options: ["text"], ["numerical"], ["categorical"]
features_attribute: Optional[str] - JMESPath to extract features from JSON Lines
- Example: "features" for {"features": [1, 2, 3]}
probability_index: Optional[int] - Index to extract probability from CSV output
- Zero-based index
probability_attribute: Optional[str] - JMESPath to extract probability from JSON Lines
- Example: "prediction.probability"
label_index: Optional[int] - Index to extract label from CSV output
label_attribute: Optional[str] - JMESPath to extract label from JSON Lines
label_headers: Optional[List[str]] - Label names for classification
- For multiclass: ["class_0", "class_1", "class_2"]
max_payload_in_mb: Optional[int] - Maximum payload size (default: 6)
- Range: 1-6 MB
max_record_count: Optional[int] - Maximum records per request
- Batching for efficiency
content_template: Optional[str] - Template for formatting JSON records
- Use $features placeholder for feature values
Methods:
_to_request_dict() -> dict
Generate request dictionary.
Returns:
dict: API request structure
Notes:
- Configure how Clarify interacts with model
- Supports CSV and JSON Lines formats
- JMESPath for flexible field extraction
- Batching improves throughput
"""CSV Format Configuration:
from sagemaker.core.explainer import ClarifyInferenceConfig
# CSV input/output
inference_config = ClarifyInferenceConfig(
feature_headers=["age", "income", "education", "occupation"],
probability_index=1, # Second column has probability
max_payload_in_mb=6
)JSON Lines Configuration:
# JSON Lines format
# Input: {"features": [1.0, 2.0, 3.0]}
# Output: {"prediction": {"label": 1, "probability": 0.85}}
inference_config = ClarifyInferenceConfig(
feature_headers=["feature1", "feature2", "feature3"],
features_attribute="features", # Extract from input
probability_attribute="prediction.probability", # Extract from output
label_attribute="prediction.label",
content_template='{"features": $features}', # Format for model
max_record_count=10 # Batch 10 records per request
)Multiclass Classification:
# Multiclass model output
inference_config = ClarifyInferenceConfig(
feature_headers=["text"],
feature_types=["text"],
label_headers=["negative", "neutral", "positive"], # 3 classes
probability_attribute="predictions", # Array of probabilities
max_payload_in_mb=6
)Configuration for SHAP (SHapley Additive exPlanations) analysis.
class ClarifyShapConfig:
"""
SHAP analysis configuration.
Parameters:
shap_baseline_config: ClarifyShapBaselineConfig - Baseline configuration (required)
number_of_samples: Optional[int] - Number of samples for Kernel SHAP (default: 100)
- Range: 1-10000
- More samples = better accuracy, higher latency
seed: Optional[int] - Random seed for deterministic results
- Range: 0-2147483647
use_logit: Optional[bool] - Use logit function for predictions (default: False)
- Convert probabilities to log-odds
- Recommended for probability-based models
text_config: Optional[ClarifyTextConfig] - Text explainability configuration
- For NLP models
Methods:
_to_request_dict() -> dict
Generate request dictionary.
Notes:
- Kernel SHAP: model-agnostic method
- Baseline represents "neutral" or "typical" instance
- number_of_samples affects explanation quality and latency
- use_logit for better SHAP values with probability models
"""Usage:
from sagemaker.core.explainer import (
ClarifyShapConfig,
ClarifyShapBaselineConfig,
ClarifyTextConfig
)
# Numerical data SHAP config
baseline_config = ClarifyShapBaselineConfig(
mime_type="text/csv",
shap_baseline="0.5,0.3,0.2,0.1" # Mean or median of features
)
shap_config = ClarifyShapConfig(
shap_baseline_config=baseline_config,
number_of_samples=500, # High accuracy
seed=42,
use_logit=True # For probability models
)
# Text data SHAP config
text_config = ClarifyTextConfig(
language="en",
granularity="token"
)
text_shap_config = ClarifyShapConfig(
shap_baseline_config=ClarifyShapBaselineConfig(
mime_type="application/jsonlines",
shap_baseline='{"text": ""}' # Empty text baseline
),
number_of_samples=100,
text_config=text_config
)Configuration for SHAP baseline data.
class ClarifyShapBaselineConfig:
"""
SHAP baseline configuration.
Parameters:
mime_type: Optional[str] - Baseline data MIME type (default: "text/csv")
- "text/csv", "application/jsonlines"
shap_baseline: Optional[str] - Inline baseline data
- Maximum 4 KB
- Format matches mime_type
- Multiple instances separated by newlines
shap_baseline_uri: Optional[str] - S3 URI for baseline data
- For baselines >4 KB
- Format matches mime_type
Methods:
_to_request_dict() -> dict
Generate request dictionary.
Notes:
- Provide either shap_baseline or shap_baseline_uri, not both
- Baseline format must match training dataset format
- Multiple baseline instances recommended (3-10)
- Baseline should represent typical/neutral instances
"""Usage:
from sagemaker.core.explainer import ClarifyShapBaselineConfig
# Inline baseline (small data <4KB)
baseline_config = ClarifyShapBaselineConfig(
mime_type="text/csv",
shap_baseline="0,0,0,0\n1,1,1,1\n2,2,2,2" # 3 baseline instances
)
# S3 baseline (large data)
baseline_config = ClarifyShapBaselineConfig(
mime_type="text/csv",
shap_baseline_uri="s3://my-bucket/baselines/baseline.csv"
)
# JSON Lines baseline
baseline_config = ClarifyShapBaselineConfig(
mime_type="application/jsonlines",
shap_baseline='{"features":[0,0,0,0]}\n{"features":[1,1,1,1]}'
)
# Baseline from training data statistics
# Use mean, median, or representative samples
import pandas as pd
df = pd.read_csv("s3://bucket/training-data.csv")
baseline_instances = [
df.mean().tolist(), # Mean
df.median().tolist(), # Median
df.quantile(0.25).tolist(), # 25th percentile
df.quantile(0.75).tolist() # 75th percentile
]
baseline_str = '\n'.join([','.join(map(str, inst)) for inst in baseline_instances])
baseline_config = ClarifyShapBaselineConfig(
mime_type="text/csv",
shap_baseline=baseline_str
)Configuration for text feature explainability.
class ClarifyTextConfig:
"""
Text explainability configuration for NLP models.
Parameters:
language: str - Language code (required)
- ISO 639-1: "en", "es", "fr", "de", "it", "pt", "zh", "ja", etc.
- ISO 639-3: For less common languages
granularity: str - Text unit granularity (required)
- "token": Token-level explanations (most granular)
- "sentence": Sentence-level explanations
- "paragraph": Paragraph-level explanations
Methods:
_to_request_dict() -> dict
Generate request dictionary.
Supported Languages:
Common: en, es, fr, de, it, pt, nl, pl, ru, ja, zh, ko, ar, hi, etc.
See AWS documentation for complete list.
Notes:
- Granularity affects explanation detail and latency
- Token: Shows individual word/subword importance
- Sentence: Shows sentence-level importance
- Paragraph: Coarsest, fastest
"""Usage:
from sagemaker.core.explainer import ClarifyTextConfig
# Token-level English text
text_config = ClarifyTextConfig(
language="en",
granularity="token"
)
# Sentence-level Spanish text
text_config = ClarifyTextConfig(
language="es",
granularity="sentence"
)
# Paragraph-level French text
text_config = ClarifyTextConfig(
language="fr",
granularity="paragraph"
)
# For multilingual models, use primary language
multilingual_config = ClarifyTextConfig(
language="en", # Primary language
granularity="token"
)from sagemaker.core.explainer import (
ClarifyExplainerConfig,
ClarifyShapConfig,
ClarifyShapBaselineConfig,
ClarifyInferenceConfig
)
# Step 1: Configure baseline
baseline_config = ClarifyShapBaselineConfig(
mime_type="text/csv",
shap_baseline_uri="s3://my-bucket/baselines/tabular_baseline.csv"
)
# Step 2: Configure SHAP
shap_config = ClarifyShapConfig(
shap_baseline_config=baseline_config,
number_of_samples=1000, # High accuracy
seed=42,
use_logit=True # For probability model
)
# Step 3: Configure inference
inference_config = ClarifyInferenceConfig(
feature_headers=["age", "income", "education", "employment"],
max_payload_in_mb=6,
max_record_count=100 # Batch for efficiency
)
# Step 4: Create explainer
explainer_config = ClarifyExplainerConfig(
shap_config=shap_config,
inference_config=inference_config
)
# Deploy with explainability
endpoint = builder.deploy(
endpoint_name="explainable-tabular-endpoint",
explainer_config=explainer_config
)
# Make prediction with explanation
response = endpoint.invoke(data=[35, 50000, 16, 5])
# Response includes:
# {
# "predictions": 0.85,
# "explanations": {
# "kernel_shap": [
# {"feature": "age", "shap_value": 0.02},
# {"feature": "income", "shap_value": 0.15},
# {"feature": "education", "shap_value": 0.08},
# {"feature": "employment", "shap_value": -0.01}
# ]
# }
# }from sagemaker.core.explainer import (
ClarifyExplainerConfig,
ClarifyShapConfig,
ClarifyShapBaselineConfig,
ClarifyInferenceConfig,
ClarifyTextConfig
)
# Configure text handling
text_config = ClarifyTextConfig(
language="en",
granularity="token"
)
# Configure baseline for NLP
baseline_config = ClarifyShapBaselineConfig(
mime_type="application/jsonlines",
shap_baseline='{"text": ""}' # Empty text baseline
)
# Configure SHAP for text
shap_config = ClarifyShapConfig(
shap_baseline_config=baseline_config,
number_of_samples=50, # Lower for text (slower)
text_config=text_config,
seed=42
)
# Configure inference for text model
inference_config = ClarifyInferenceConfig(
feature_types=["text"],
feature_headers=["review_text"],
features_attribute="text", # Input field
probability_attribute="predictions", # Output field
label_headers=["negative", "positive"],
content_template='{"text": $features}'
)
# Create explainer
explainer_config = ClarifyExplainerConfig(
shap_config=shap_config,
inference_config=inference_config,
enable_explanations="`true`"
)
# Deploy
endpoint = builder.deploy(
endpoint_name="explainable-nlp-endpoint",
explainer_config=explainer_config
)
# Predict with token importance
response = endpoint.invoke(data={
"text": "This product is amazing and works great!"
})
# Response includes token-level SHAP values
# Shows which words contributed to positive sentimentfrom sagemaker.core.explainer import ClarifyExplainerConfig, ClarifyShapConfig, ClarifyShapBaselineConfig
# Configure components
baseline_config = ClarifyShapBaselineConfig(
mime_type="text/csv",
shap_baseline="0.5,0.5,0.5,0.5"
)
shap_config = ClarifyShapConfig(
shap_baseline_config=baseline_config,
number_of_samples=100
)
# Always explain
always_explain = ClarifyExplainerConfig(
shap_config=shap_config,
enable_explanations="`true`"
)
# Explain only uncertain predictions (probability near 0.5)
uncertain_explain = ClarifyExplainerConfig(
shap_config=shap_config,
enable_explanations="probability > `0.4` && probability < `0.6`"
)
# Explain only high-risk predictions
high_risk_explain = ClarifyExplainerConfig(
shap_config=shap_config,
enable_explanations="predicted_label == `1` && probability > `0.9`"
)from sagemaker.serve import ModelBuilder
from sagemaker.core.explainer import (
ClarifyExplainerConfig,
ClarifyShapConfig,
ClarifyShapBaselineConfig,
ClarifyInferenceConfig
)
# Build model
builder = ModelBuilder(
model="my-model",
model_path="s3://bucket/model.tar.gz",
role_arn=role_arn
)
model = builder.build()
# Configure explainability
baseline_config = ClarifyShapBaselineConfig(
mime_type="text/csv",
shap_baseline="0,0,0,0"
)
shap_config = ClarifyShapConfig(
shap_baseline_config=baseline_config,
number_of_samples=100
)
inference_config = ClarifyInferenceConfig(
feature_headers=["f1", "f2", "f3", "f4"]
)
explainer_config = ClarifyExplainerConfig(
shap_config=shap_config,
inference_config=inference_config
)
# Deploy with explainability
try:
endpoint = builder.deploy(
endpoint_name="explainable-endpoint",
initial_instance_count=1,
instance_type="ml.m5.xlarge",
explainer_config=explainer_config
)
# Wait for deployment
endpoint.wait_for_in_service()
# Make prediction with explanation
response = endpoint.invoke(data=[1.0, 2.0, 3.0, 4.0])
print(f"Prediction: {response['predictions']}")
print(f"SHAP values: {response['explanations']}")
except ValueError as e:
print(f"Configuration error: {e}")Update Existing Endpoint:
from sagemaker.core.resources import Endpoint
# Get existing endpoint
endpoint = Endpoint.get("my-existing-endpoint")
# Create new config with explainability
from sagemaker.core.resources import EndpointConfig
new_config = EndpointConfig.create(
endpoint_config_name="config-with-explainer",
production_variants=[{
"VariantName": "variant-1",
"ModelName": endpoint.endpoint_config_name,
"InitialInstanceCount": 2,
"InstanceType": "ml.m5.xlarge"
}],
explainer_config=explainer_config._to_request_dict()
)
# Update endpoint
endpoint.update(endpoint_config_name=new_config.endpoint_config_name)
endpoint.wait_for_in_service()
print("Endpoint updated with explainability")Baseline Format Mismatch:
Feature Count Mismatch:
JMESPath Evaluation Error:
Text Config Language Not Supported:
Explanation Timeout:
Endpoint Update Failed: