Fit interpretable models and explain blackbox machine learning with comprehensive interpretability tools.
—
Differentially private machine learning models that provide formal privacy guarantees while maintaining interpretability for sensitive data applications.
Explainable Boosting Machine with formal differential privacy guarantees, suitable for sensitive datasets in healthcare, finance, and other privacy-critical domains.
class DPExplainableBoostingClassifier:
def __init__(
self,
epsilon=1.0,
delta=None,
feature_names=None,
feature_types=None,
max_bins=1024,
interactions=0,
validation_size=0.15,
outer_bags=16,
learning_rate=0.01,
max_rounds=25000,
early_stopping_rounds=50,
random_state=None,
n_jobs=-2,
**kwargs
):
"""
Differentially private EBM classifier.
Parameters:
epsilon (float): Privacy budget parameter
delta (float, optional): Privacy parameter for approximate DP
feature_names (list, optional): Names for features
feature_types (list, optional): Types for features
max_bins (int): Maximum bins for continuous features
interactions (int): Number of feature interactions (limited for privacy)
validation_size (float): Proportion for validation set
outer_bags (int): Number of outer bags
learning_rate (float): Learning rate
max_rounds (int): Maximum boosting rounds
early_stopping_rounds (int): Early stopping patience
random_state (int, optional): Random seed
n_jobs (int): Parallel jobs
**kwargs: Additional EBM parameters
"""
def fit(self, X, y, sample_weight=None):
"""Fit DP-EBM classifier with privacy guarantees."""
def predict(self, X):
"""Make predictions."""
def predict_proba(self, X):
"""Predict class probabilities."""
def explain_global(self, name=None):
"""Get global explanation with privacy considerations."""
def explain_local(self, X, y=None, name=None):
"""Get local explanations with privacy considerations."""
class DPExplainableBoostingRegressor:
def __init__(
self,
epsilon=1.0,
delta=None,
feature_names=None,
feature_types=None,
max_bins=1024,
interactions=0,
validation_size=0.15,
outer_bags=16,
learning_rate=0.01,
max_rounds=25000,
early_stopping_rounds=50,
random_state=None,
n_jobs=-2,
**kwargs
):
"""
Differentially private EBM regressor.
Parameters: Same as DPExplainableBoostingClassifier
"""
def fit(self, X, y, sample_weight=None):
"""Fit DP-EBM regressor with privacy guarantees."""
def predict(self, X):
"""Make predictions."""
def explain_global(self, name=None):
"""Get global explanation with privacy considerations."""
def explain_local(self, X, y=None, name=None):
"""Get local explanations with privacy considerations."""from interpret.privacy import DPExplainableBoostingClassifier
from interpret import show
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
# Load sensitive dataset
data = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(
data.data, data.target, test_size=0.2, random_state=42
)
# Train with differential privacy
dp_ebm = DPExplainableBoostingClassifier(
epsilon=1.0, # Privacy budget
feature_names=data.feature_names,
interactions=0, # Disable interactions for stronger privacy
random_state=42
)
dp_ebm.fit(X_train, y_train)
# Get explanations (privacy-preserving)
global_exp = dp_ebm.explain_global(name="DP-EBM Global")
show(global_exp)
local_exp = dp_ebm.explain_local(X_test[:5], name="DP-EBM Local")
show(local_exp)import numpy as np
from sklearn.metrics import accuracy_score
# Compare different epsilon values
epsilons = [0.1, 0.5, 1.0, 2.0, 5.0]
results = []
for eps in epsilons:
dp_model = DPExplainableBoostingClassifier(
epsilon=eps,
random_state=42,
interactions=0
)
dp_model.fit(X_train, y_train)
pred = dp_model.predict(X_test)
acc = accuracy_score(y_test, pred)
results.append({
'epsilon': eps,
'accuracy': acc,
'privacy_strength': 'High' if eps < 1.0 else 'Medium' if eps < 5.0 else 'Low'
})
print(f"ε={eps}: Accuracy={acc:.4f}, Privacy={results[-1]['privacy_strength']}")
# Visualize trade-off
for result in results:
model = DPExplainableBoostingClassifier(epsilon=result['epsilon'], random_state=42)
model.fit(X_train, y_train)
exp = model.explain_global(name=f"ε={result['epsilon']}")
show(exp)from interpret.privacy import DPExplainableBoostingRegressor
from sklearn.datasets import load_diabetes
from sklearn.metrics import mean_squared_error
# Load regression dataset
diabetes = load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(
diabetes.data, diabetes.target, test_size=0.2, random_state=42
)
# Train DP regressor
dp_regressor = DPExplainableBoostingRegressor(
epsilon=2.0,
feature_names=diabetes.feature_names,
random_state=42
)
dp_regressor.fit(X_train, y_train)
# Evaluate privacy-utility trade-off
pred = dp_regressor.predict(X_test)
mse = mean_squared_error(y_test, pred)
print(f"DP-EBM MSE: {mse:.2f}")
# Get explanations
global_exp = dp_regressor.explain_global(name="DP Regression Global")
show(global_exp)from interpret.glassbox import ExplainableBoostingClassifier
# Compare standard EBM vs DP-EBM
models = {
'Standard EBM': ExplainableBoostingClassifier(random_state=42),
'DP-EBM (ε=1.0)': DPExplainableBoostingClassifier(epsilon=1.0, random_state=42),
'DP-EBM (ε=0.5)': DPExplainableBoostingClassifier(epsilon=0.5, random_state=42)
}
for name, model in models.items():
model.fit(X_train, y_train)
pred = model.predict(X_test)
acc = accuracy_score(y_test, pred)
print(f"{name}: Accuracy = {acc:.4f}")
# Show global explanations
global_exp = model.explain_global(name=f"{name} Global")
show(global_exp)# Analyze privacy-utility curve
privacy_results = []
for eps in np.logspace(-1, 1, 10): # 0.1 to 10
dp_model = DPExplainableBoostingClassifier(
epsilon=eps,
interactions=0, # Safer for privacy
random_state=42
)
dp_model.fit(X_train, y_train)
accuracy = accuracy_score(y_test, dp_model.predict(X_test))
privacy_results.append((eps, accuracy))
# Plot privacy-utility curve (conceptual)
for eps, acc in privacy_results:
print(f"ε={eps:.2f}: Accuracy={acc:.4f}")interactions=0 for stronger privacyInstall with Tessl CLI
npx tessl i tessl/pypi-interpret