A unified approach to explain the output of any machine learning model.
npx @tessl/cli install tessl/pypi-shap@0.48.0SHAP (SHapley Additive exPlanations) is a comprehensive machine learning explainability library that provides a game-theoretic approach to explain the output of any machine learning model. The library connects optimal credit allocation with local explanations using classic Shapley values from game theory, offering a unified framework that encompasses multiple explanation methods including LIME, DeepLIFT, and others.
pip install shapimport shapCommon imports for specific functionality:
# Core explanation classes
from shap import Explanation, Cohorts
# Explainers
from shap import TreeExplainer, KernelExplainer, DeepExplainer
from shap import LinearExplainer, GradientExplainer
from shap.explainers import other # Alternative explainers (LIME, MAPLE, etc.)
# Plotting functions
import shap.plots as shap_plots
# or individual imports
from shap import force_plot, waterfall_plot, summary_plot
# Datasets and utilities
from shap import datasets, utilsimport shap
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
# Load a dataset
X, y = shap.datasets.adult()
# Train a model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X, y)
# Create explainer and compute SHAP values
explainer = shap.TreeExplainer(model)
shap_values = explainer(X)
# Visualize explanations
shap.plots.waterfall(shap_values[0]) # Single prediction
shap.plots.beeswarm(shap_values) # All predictions
shap.plots.bar(shap_values) # Feature importanceSHAP provides a unified explainability framework built around several key components:
This design enables high-performance explanations across diverse model architectures while maintaining mathematical guarantees and providing intuitive visualizations for understanding model behavior.
High-performance explanation algorithms optimized for specific model types, providing exact or approximate SHAP values with mathematical guarantees for local accuracy and consistency.
class TreeExplainer:
def __init__(self, model, data=None, model_output="raw", feature_perturbation="auto", feature_names=None): ...
def __call__(self, X, y=None, interactions=False, check_additivity=True) -> Explanation: ...
class KernelExplainer:
def __init__(self, model, data, feature_names=None, link="identity"): ...
def __call__(self, X, l1_reg="num_features(10)", silent=False) -> Explanation: ...
class DeepExplainer:
def __init__(self, model, data, session=None, learning_phase_flags=None): ...
def __call__(self, X) -> Explanation: ...Comprehensive visualization functions for understanding and communicating model explanations, including interactive plots, summary visualizations, and detailed analysis charts.
def waterfall(shap_values, max_display=10, show=True): ...
def beeswarm(shap_values, max_display=10, order=Explanation.abs.mean(0), show=True): ...
def bar(shap_values, max_display=10, order=Explanation.abs, show=True): ...
def force(base_value, shap_values=None, features=None, matplotlib=False, show=True): ...
def heatmap(shap_values, instance_order=Explanation.hclust(), max_display=10, show=True): ...Built-in datasets, masking strategies, utility functions, and helper classes for data preprocessing, sampling, and analysis workflows.
# Datasets
def adult(display=False, n_points=None) -> tuple[pd.DataFrame, np.ndarray]: ...
def california(n_points=None) -> tuple[pd.DataFrame, np.ndarray]: ...
def imagenet50(resolution=224, n_points=None) -> tuple[np.ndarray, np.ndarray]: ...
# Maskers
class Independent:
def __init__(self, data, max_samples=100): ...
class Text:
def __init__(self, tokenizer=None, mask_token=None, output_type="string"): ...
# Utilities
def sample(X, nsamples=100, random_state=0): ...
def approximate_interactions(index, shap_values, X, feature_names=None) -> np.ndarray: ...Core types and classes used throughout the SHAP library:
class Explanation:
"""Container for SHAP values with rich metadata and operations."""
def __init__(self, values, base_values=None, data=None, display_data=None,
instance_names=None, feature_names=None, output_names=None,
output_indexes=None, lower_bounds=None, upper_bounds=None,
error_std=None, main_effects=None, hierarchical_values=None,
clustering=None, compute_time=None): ...
# Core properties
values: np.ndarray # SHAP attribution values
base_values: np.ndarray # Model baseline values
data: np.ndarray # Original input data
feature_names: list[str] # Feature names
output_names: list[str] # Output names
# Analysis methods
def mean(self, axis=None) -> 'Explanation': ...
def max(self, axis=None) -> 'Explanation': ...
def sum(self, axis=None, grouping=None) -> 'Explanation': ...
def sample(self, max_samples, replace=False, random_state=0) -> 'Explanation': ...
def hclust(self, metric="sqeuclidean", axis=0): ...
def cohorts(self, cohorts) -> 'Cohorts': ...
class Cohorts:
"""Manages multiple explanation cohorts for comparative analysis."""
def __init__(self, explanations, cohort_labels=None, cohort_names=None): ...