tessl/pypi-shap

A unified approach to explain the output of any machine learning model.

—

Pending

Overview

Eval results

Files

Model Explainers

Name: tessl/pypi-shap
Author: tessl

SHAP provides specialized explainer algorithms optimized for different model types, each offering unique performance characteristics and mathematical guarantees. All explainers implement both modern (__call__) and legacy (shap_values) interfaces.

Capabilities

Tree Ensemble Explainers

High-speed exact algorithms for tree-based models including XGBoost, LightGBM, CatBoost, and scikit-learn tree ensembles.

class TreeExplainer:
    """
    Exact SHAP values for tree ensemble models using optimized algorithms.
    
    Supports XGBoost, LightGBM, CatBoost, scikit-learn tree models with
    polynomial time complexity and exact mathematical guarantees.
    """
    def __init__(
        self, 
        model, 
        data=None, 
        model_output="raw", 
        feature_perturbation="auto", 
        feature_names=None,
        link=None,
        linearize_link=None
    ):
        """
        Parameters:
        - model: Tree-based ML model (XGBoost, LightGBM, CatBoost, sklearn)
        - data: Background dataset for feature integration (optional)
        - model_output: Output format ("raw", "probability", "log_loss")
        - feature_perturbation: Perturbation method ("auto", "interventional", "tree_path_dependent")  
        - feature_names: List of feature names (optional)
        """
    
    def __call__(self, X, y=None, interactions=False, check_additivity=True, approximate=False) -> Explanation:
        """
        Compute SHAP values for input samples.
        
        Parameters:
        - X: Input samples (array-like, DataFrame)
        - y: Target values for multi-output models (optional)
        - interactions: Compute interaction values (bool)
        - check_additivity: Verify SHAP values sum correctly (bool)
        - approximate: Use approximation for speed (bool)
        
        Returns:
        Explanation object with SHAP values and metadata
        """
    
    def shap_values(self, X, y=None, tree_limit=None, approximate=False, check_additivity=True):
        """Legacy interface returning raw numpy arrays."""
    
    def shap_interaction_values(self, X, y=None, tree_limit=None):
        """Compute SHAP interaction values (pairwise feature interactions)."""
    
    @property
    def expected_value(self):
        """Expected value of model output (baseline)."""

class GPUTreeExplainer(TreeExplainer):
    """
    GPU-accelerated tree explanations (experimental).
    
    Requires CUDA build with 'CUDA_PATH' environment variable.
    """
    def __init__(self, model):
        """Initialize GPU tree explainer for supported tree models."""

Usage Example:

import shap
from xgboost import XGBClassifier

# Train model
model = XGBClassifier()
model.fit(X_train, y_train)

# Create explainer and compute SHAP values
explainer = shap.TreeExplainer(model)
shap_values = explainer(X_test)

# Access components
print(f"Expected value: {explainer.expected_value}")
print(f"SHAP values shape: {shap_values.values.shape}")

Model-Agnostic Explainers

Universal explainers that work with any model type through sampling-based approaches.

class KernelExplainer:
    """
    Model-agnostic explainer using weighted linear regression.
    
    Works with any model by sampling around input and solving
    optimization problem for SHAP values. Provides theoretical guarantees.
    """
    def __init__(self, model, data, feature_names=None, link="identity"):
        """
        Parameters:
        - model: Function/model taking samples and returning predictions
        - data: Background dataset for masking (array, DataFrame, sparse matrix)
        - feature_names: List of feature names (optional)
        - link: Link function ("identity" or "logit")
        """
    
    def __call__(self, X, l1_reg="num_features(10)", silent=False) -> Explanation:
        """
        Compute SHAP values through sampling and optimization.
        
        Parameters:
        - X: Input samples to explain
        - l1_reg: Regularization ("num_features(int)", "aic", "bic", or float)
        - silent: Hide progress bar (bool)
        """
    
    def shap_values(self, X, nsamples="auto", l1_reg="num_features(10)", silent=False):
        """
        Legacy interface with additional parameters.
        
        Parameters:
        - nsamples: Number of samples ("auto" or int)
        - l1_reg: Regularization method
        - silent: Hide progress bar
        - gc_collect: Run garbage collection
        """
    
    @property
    def expected_value(self):
        """Expected value of model output."""

class PermutationExplainer:
    """
    Model-agnostic explainer using permutation sampling.
    
    Approximates SHAP values by iterating through feature permutations.
    Guarantees local accuracy with hierarchical structure support.
    """
    def __init__(self, model, masker, link="identity", feature_names=None, seed=None):
        """
        Parameters:
        - model: Model function to explain
        - masker: Masker object for feature perturbation
        - link: Link function for output transformation
        - seed: Random seed for reproducibility
        """
    
    def __call__(self, *args, max_evals=500, main_effects=False, error_bounds=False, 
                 batch_size="auto", outputs=None, silent=False):
        """Compute SHAP values using permutation sampling."""

class SamplingExplainer(KernelExplainer):
    """
    Extension of Shapley sampling (IME) method.
    
    Assumes feature independence and works well with large background datasets.
    """
    def __init__(self, model, data, **kwargs):
        """Initialize sampling explainer with feature independence assumption."""
    
    def __call__(self, X, y=None, nsamples=2000):
        """Compute SHAP values under feature independence."""

Deep Learning Explainers

Specialized explainers for neural networks using gradient-based and compositional approaches.

class DeepExplainer:
    """
    Deep learning explainer using compositional rules from DeepLIFT.
    
    Supports TensorFlow and PyTorch models with automatic framework detection.
    Uses backpropagation for efficient computation.
    """
    def __init__(self, model, data, session=None, learning_phase_flags=None):
        """
        Parameters:
        - model: Neural network model
          - TensorFlow: (input_tensors, output_tensor) or tf.keras.Model
          - PyTorch: nn.Module or (model, layer) tuple
        - data: Background dataset matching model input format
        - session: TensorFlow session (optional)
        - learning_phase_flags: Custom learning phase flags (TensorFlow)
        """
    
    def __call__(self, X) -> Explanation:
        """Compute SHAP values using compositional rules."""
    
    def shap_values(self, X, ranked_outputs=None, output_rank_order="max", check_additivity=True):
        """
        Legacy interface with output ranking.
        
        Parameters:
        - ranked_outputs: Number of top outputs to explain
        - output_rank_order: Ranking method ("max", "min", "max_abs")
        - check_additivity: Verify SHAP values sum correctly
        """

class GradientExplainer:
    """
    Gradient-based explainer for neural networks.
    
    Uses integration over straight-line paths in input space.
    Supports both TensorFlow and PyTorch.
    """
    def __init__(self, model, data, session=None, batch_size=50, local_smoothing=0):
        """
        Parameters:
        - model: Neural network model (TensorFlow or PyTorch)
        - data: Background dataset for integration
        - batch_size: Batch size for gradient computation
        - local_smoothing: Local smoothing parameter
        """
    
    def __call__(self, X, nsamples=200) -> Explanation:
        """
        Compute SHAP values using gradient integration.
        
        Parameters:
        - X: Input samples (framework-specific tensor format)
        - nsamples: Number of background samples for integration
        """
    
    def shap_values(self, X, nsamples=200, ranked_outputs=None, 
                    output_rank_order="max", rseed=None, return_variances=False):
        """
        Legacy interface with variance estimation.
        
        Parameters:
        - return_variances: Return variance estimates along with SHAP values
        - rseed: Random seed for reproducibility
        """

Linear Model Explainers

Optimized explainers for linear models with correlation handling.

class LinearExplainer:
    """
    Explainer for linear models with feature correlation support.
    
    Handles sklearn linear models or (coefficients, intercept) tuples
    with efficient computation and correlation-aware masking.
    """
    def __init__(self, model, masker, link="identity", nsamples=1000, feature_perturbation=None):
        """
        Parameters:
        - model: Linear model (sklearn model or (coef, intercept) tuple)
        - masker: Masker object, data matrix, or (mean, covariance) tuple
        - link: Link function for output transformation
        - nsamples: Samples for correlation estimation
        - feature_perturbation: "interventional" or "correlation_dependent" (deprecated)
        """
    
    def shap_values(self, X):
        """
        Compute SHAP values for linear model.
        
        Parameters:
        - X: Input samples (array, DataFrame, or sparse matrix)
        
        Returns:
        Array of SHAP values matching input shape
        """
    
    @property
    def expected_value(self):
        """Expected value of model output."""
    
    @property
    def coef(self):
        """Model coefficients."""
    
    @property 
    def intercept(self):
        """Model intercept."""

class AdditiveExplainer:
    """
    Explainer for generalized additive models.
    
    Optimized for models with only first-order effects (no interactions).
    Assumes additive structure for efficient computation.
    """
    def __init__(self, model, masker, link=None, feature_names=None):
        """Initialize explainer for additive models without interactions."""
    
    def __call__(self, *args, max_evals=None, silent=False):
        """Compute SHAP values assuming additive model structure."""

Exact and Advanced Explainers

Specialized explainers for specific use cases and mathematical guarantees.

class ExactExplainer:
    """
    Exact SHAP computation via optimized enumeration.
    
    Computes exact SHAP values for models with small feature sets (<15 features).
    Uses gray codes for efficient evaluation ordering.
    """
    def __init__(self, model, masker, link="identity", linearize_link=True, feature_names=None):
        """Initialize exact explainer for small feature sets."""
    
    def __call__(self, *args, max_evals=100000, main_effects=False, 
                 error_bounds=False, batch_size="auto", interactions=1, silent=False):
        """
        Compute exact SHAP values.
        
        Parameters:
        - max_evals: Maximum model evaluations before stopping
        - main_effects: Compute main effects separately
        - error_bounds: Compute confidence bounds
        - interactions: Interaction order to compute (1 for main effects only)
        """

class PartitionExplainer:
    """
    Partition SHAP using hierarchical Owen values.
    
    Computes Owen values through feature hierarchy with quadratic runtime.
    Handles correlated features via hierarchical clustering.
    """
    def __init__(self, model, masker, output_names=None, link="identity", 
                 linearize_link=True, feature_names=None):
        """Initialize partition explainer with hierarchical feature grouping."""
    
    def __call__(self, *args, max_evals=500, fixed_context=None, main_effects=False,
                 error_bounds=False, batch_size="auto", outputs=None, silent=False):
        """Compute Owen values through feature partitioning."""

class CoalitionExplainer:
    """
    Coalition-based explanations using Winter values.
    
    Recursive Owen values for predefined feature coalitions.
    """
    def __init__(self, model, masker, output_names=None, link="identity", 
                 linearize_link=True, feature_names=None, partition_tree=None):
        """
        Initialize coalition explainer.
        
        Parameters:
        - partition_tree: Dictionary defining hierarchical feature groups
        """

Usage Patterns

Choosing the Right Explainer

TreeExplainer: Use for XGBoost, LightGBM, CatBoost, sklearn tree models (fastest, exact)
KernelExplainer: Universal fallback for any model (slower, model-agnostic)
DeepExplainer: Neural networks with TensorFlow/PyTorch (fast, compositional rules)
GradientExplainer: Neural networks requiring gradient information
LinearExplainer: Linear models with correlation handling
ExactExplainer: Small feature sets requiring mathematical guarantees
PartitionExplainer: Correlated features requiring hierarchical explanations

Common Interface Pattern

All explainers follow consistent patterns:

# Modern interface (recommended)
explainer = shap.TreeExplainer(model)
explanation = explainer(X)  # Returns Explanation object

# Legacy interface (backward compatibility)  
shap_values = explainer.shap_values(X)  # Returns numpy array

# Access baseline
baseline = explainer.expected_value

Alternative and Benchmark Explainers

Additional explainers for specialized use cases, benchmarking, and integration with other explanation libraries.

class Coefficient:
    """
    Returns model coefficients as feature attributions.
    
    Benchmark explainer that simply returns model coefficients for each
    sample. Only works with linear models having a coef_ attribute.
    """
    def __init__(self, model):
        """
        Parameters:
        - model: Linear model with coef_ attribute (sklearn linear models)
        """
    
    def attributions(self, X):
        """
        Return tiled coefficients as attributions.
        
        Parameters:
        - X: Input samples (array-like)
        
        Returns:
        numpy.ndarray: Coefficients tiled for each sample
        """

class LimeTabular:
    """
    LIME integration wrapper for tabular data explanations.
    
    Wraps lime.lime_tabular.LimeTabularExplainer into SHAP interface.
    Requires lime package installation.
    """
    def __init__(self, model, data, mode="classification"):
        """
        Parameters:
        - model: Model function taking samples and returning predictions
        - data: Background dataset for LIME (array-like or DataFrame)
        - mode: "classification" or "regression"
        """
    
    def attributions(self, X, nsamples=5000, num_features=None):
        """
        Compute LIME explanations through SHAP interface.
        
        Parameters:
        - X: Input samples to explain
        - nsamples: Number of samples for LIME perturbation
        - num_features: Number of features to include in explanation
        
        Returns:
        Attributions array(s) for each output dimension
        """

class Maple:
    """
    Model-Agnostic Locally-Accurate Explanations (MAPLE).
    
    Local linear approximation method that builds decision trees
    around query points for explanations.
    """
    def __init__(self, model, data, verbose=False):
        """
        Parameters:
        - model: Model function to explain
        - data: Training data for building local models
        - verbose: Print debugging information
        """
    
    def attributions(self, X):
        """Compute MAPLE attributions using local linear models."""

class TreeMaple(Maple):
    """
    Tree-based variant of MAPLE explainer.
    
    Uses tree ensemble models as local approximators instead
    of linear models for complex decision boundaries.
    """
    def __init__(self, model, data, verbose=False):
        """Initialize TreeMaple with tree-based local models."""

class Random:
    """
    Random baseline explainer for benchmarking.
    
    Returns random attributions for comparison with actual explainers.
    Used to establish baseline performance in evaluation studies.
    """
    def __init__(self, model, data, seed=None):
        """
        Parameters:
        - model: Model to explain (used for output dimensionality)
        - data: Background data (used for feature dimensionality)
        - seed: Random seed for reproducibility
        """
    
    def attributions(self, X):
        """
        Generate random attributions.
        
        Returns:
        Random attributions matching input shape
        """

class TreeGain:
    """
    Tree gain-based feature importance as explanations.
    
    Uses feature importance from tree models as local attributions.
    Benchmark method that doesn't provide true SHAP values.
    """
    def __init__(self, model, data=None):
        """
        Parameters:
        - model: Tree-based model with feature_importances_ attribute
        - data: Background data (optional, used for baseline estimation)
        """
    
    def attributions(self, X):
        """
        Return tree feature importances as attributions.
        
        Returns:
        Feature importances tiled for each sample
        """

Alternative Explainers Usage:

import shap

# Coefficient explainer for linear models
from sklearn.linear_model import LogicalRegression
model = LogisticRegression().fit(X_train, y_train)
explainer = shap.explainers.other.Coefficient(model)
attributions = explainer.attributions(X_test)

# LIME integration (requires: pip install lime)
explainer = shap.explainers.other.LimeTabular(model, X_train, mode="classification")
attributions = explainer.attributions(X_test, nsamples=1000)

# MAPLE local linear explanations
explainer = shap.explainers.other.Maple(model, X_train)
attributions = explainer.attributions(X_test)

# Random baseline for benchmarking
explainer = shap.explainers.other.Random(model, X_train, seed=42)
random_attributions = explainer.attributions(X_test)