or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

blackbox.mddata.mdglassbox.mdgreybox.mdindex.mdperformance.mdprivacy.mdutils.mdvisualization.md

privacy.mddocs/

0

# Privacy-Preserving ML

1

2

Differentially private machine learning models that provide formal privacy guarantees while maintaining interpretability for sensitive data applications.

3

4

## Capabilities

5

6

### Differentially Private EBM

7

8

Explainable Boosting Machine with formal differential privacy guarantees, suitable for sensitive datasets in healthcare, finance, and other privacy-critical domains.

9

10

```python { .api }

11

class DPExplainableBoostingClassifier:

12

def __init__(

13

self,

14

epsilon=1.0,

15

delta=None,

16

feature_names=None,

17

feature_types=None,

18

max_bins=1024,

19

interactions=0,

20

validation_size=0.15,

21

outer_bags=16,

22

learning_rate=0.01,

23

max_rounds=25000,

24

early_stopping_rounds=50,

25

random_state=None,

26

n_jobs=-2,

27

**kwargs

28

):

29

"""

30

Differentially private EBM classifier.

31

32

Parameters:

33

epsilon (float): Privacy budget parameter

34

delta (float, optional): Privacy parameter for approximate DP

35

feature_names (list, optional): Names for features

36

feature_types (list, optional): Types for features

37

max_bins (int): Maximum bins for continuous features

38

interactions (int): Number of feature interactions (limited for privacy)

39

validation_size (float): Proportion for validation set

40

outer_bags (int): Number of outer bags

41

learning_rate (float): Learning rate

42

max_rounds (int): Maximum boosting rounds

43

early_stopping_rounds (int): Early stopping patience

44

random_state (int, optional): Random seed

45

n_jobs (int): Parallel jobs

46

**kwargs: Additional EBM parameters

47

"""

48

49

def fit(self, X, y, sample_weight=None):

50

"""Fit DP-EBM classifier with privacy guarantees."""

51

52

def predict(self, X):

53

"""Make predictions."""

54

55

def predict_proba(self, X):

56

"""Predict class probabilities."""

57

58

def explain_global(self, name=None):

59

"""Get global explanation with privacy considerations."""

60

61

def explain_local(self, X, y=None, name=None):

62

"""Get local explanations with privacy considerations."""

63

64

class DPExplainableBoostingRegressor:

65

def __init__(

66

self,

67

epsilon=1.0,

68

delta=None,

69

feature_names=None,

70

feature_types=None,

71

max_bins=1024,

72

interactions=0,

73

validation_size=0.15,

74

outer_bags=16,

75

learning_rate=0.01,

76

max_rounds=25000,

77

early_stopping_rounds=50,

78

random_state=None,

79

n_jobs=-2,

80

**kwargs

81

):

82

"""

83

Differentially private EBM regressor.

84

85

Parameters: Same as DPExplainableBoostingClassifier

86

"""

87

88

def fit(self, X, y, sample_weight=None):

89

"""Fit DP-EBM regressor with privacy guarantees."""

90

91

def predict(self, X):

92

"""Make predictions."""

93

94

def explain_global(self, name=None):

95

"""Get global explanation with privacy considerations."""

96

97

def explain_local(self, X, y=None, name=None):

98

"""Get local explanations with privacy considerations."""

99

```

100

101

## Usage Examples

102

103

### Basic DP-EBM Usage

104

105

```python

106

from interpret.privacy import DPExplainableBoostingClassifier

107

from interpret import show

108

from sklearn.datasets import load_breast_cancer

109

from sklearn.model_selection import train_test_split

110

111

# Load sensitive dataset

112

data = load_breast_cancer()

113

X_train, X_test, y_train, y_test = train_test_split(

114

data.data, data.target, test_size=0.2, random_state=42

115

)

116

117

# Train with differential privacy

118

dp_ebm = DPExplainableBoostingClassifier(

119

epsilon=1.0, # Privacy budget

120

feature_names=data.feature_names,

121

interactions=0, # Disable interactions for stronger privacy

122

random_state=42

123

)

124

dp_ebm.fit(X_train, y_train)

125

126

# Get explanations (privacy-preserving)

127

global_exp = dp_ebm.explain_global(name="DP-EBM Global")

128

show(global_exp)

129

130

local_exp = dp_ebm.explain_local(X_test[:5], name="DP-EBM Local")

131

show(local_exp)

132

```

133

134

### Privacy Budget Analysis

135

136

```python

137

import numpy as np

138

from sklearn.metrics import accuracy_score

139

140

# Compare different epsilon values

141

epsilons = [0.1, 0.5, 1.0, 2.0, 5.0]

142

results = []

143

144

for eps in epsilons:

145

dp_model = DPExplainableBoostingClassifier(

146

epsilon=eps,

147

random_state=42,

148

interactions=0

149

)

150

dp_model.fit(X_train, y_train)

151

152

pred = dp_model.predict(X_test)

153

acc = accuracy_score(y_test, pred)

154

155

results.append({

156

'epsilon': eps,

157

'accuracy': acc,

158

'privacy_strength': 'High' if eps < 1.0 else 'Medium' if eps < 5.0 else 'Low'

159

})

160

161

print(f"ε={eps}: Accuracy={acc:.4f}, Privacy={results[-1]['privacy_strength']}")

162

163

# Visualize trade-off

164

for result in results:

165

model = DPExplainableBoostingClassifier(epsilon=result['epsilon'], random_state=42)

166

model.fit(X_train, y_train)

167

exp = model.explain_global(name=f"ε={result['epsilon']}")

168

show(exp)

169

```

170

171

### Regression with Privacy

172

173

```python

174

from interpret.privacy import DPExplainableBoostingRegressor

175

from sklearn.datasets import load_diabetes

176

from sklearn.metrics import mean_squared_error

177

178

# Load regression dataset

179

diabetes = load_diabetes()

180

X_train, X_test, y_train, y_test = train_test_split(

181

diabetes.data, diabetes.target, test_size=0.2, random_state=42

182

)

183

184

# Train DP regressor

185

dp_regressor = DPExplainableBoostingRegressor(

186

epsilon=2.0,

187

feature_names=diabetes.feature_names,

188

random_state=42

189

)

190

dp_regressor.fit(X_train, y_train)

191

192

# Evaluate privacy-utility trade-off

193

pred = dp_regressor.predict(X_test)

194

mse = mean_squared_error(y_test, pred)

195

print(f"DP-EBM MSE: {mse:.2f}")

196

197

# Get explanations

198

global_exp = dp_regressor.explain_global(name="DP Regression Global")

199

show(global_exp)

200

```

201

202

### Privacy-Preserving Model Comparison

203

204

```python

205

from interpret.glassbox import ExplainableBoostingClassifier

206

207

# Compare standard EBM vs DP-EBM

208

models = {

209

'Standard EBM': ExplainableBoostingClassifier(random_state=42),

210

'DP-EBM (ε=1.0)': DPExplainableBoostingClassifier(epsilon=1.0, random_state=42),

211

'DP-EBM (ε=0.5)': DPExplainableBoostingClassifier(epsilon=0.5, random_state=42)

212

}

213

214

for name, model in models.items():

215

model.fit(X_train, y_train)

216

pred = model.predict(X_test)

217

acc = accuracy_score(y_test, pred)

218

219

print(f"{name}: Accuracy = {acc:.4f}")

220

221

# Show global explanations

222

global_exp = model.explain_global(name=f"{name} Global")

223

show(global_exp)

224

```

225

226

## Privacy Considerations

227

228

### Epsilon Selection Guidelines

229

230

- **ε < 1.0**: Strong privacy protection, may reduce model utility

231

- **ε = 1.0**: Standard choice balancing privacy and utility

232

- **ε > 1.0**: Weaker privacy protection, better model utility

233

- **ε > 10**: Minimal privacy protection

234

235

### Privacy-Utility Trade-offs

236

237

```python

238

# Analyze privacy-utility curve

239

privacy_results = []

240

241

for eps in np.logspace(-1, 1, 10): # 0.1 to 10

242

dp_model = DPExplainableBoostingClassifier(

243

epsilon=eps,

244

interactions=0, # Safer for privacy

245

random_state=42

246

)

247

dp_model.fit(X_train, y_train)

248

249

accuracy = accuracy_score(y_test, dp_model.predict(X_test))

250

privacy_results.append((eps, accuracy))

251

252

# Plot privacy-utility curve (conceptual)

253

for eps, acc in privacy_results:

254

print(f"ε={eps:.2f}: Accuracy={acc:.4f}")

255

```

256

257

### Best Practices

258

259

1. **Minimize interactions**: Set `interactions=0` for stronger privacy

260

2. **Validate epsilon choice**: Consider sensitivity of your data

261

3. **Use composition theorems**: Track cumulative privacy budget

262

4. **Validate explanations**: Ensure explanations don't leak private information

263

5. **Consider delta parameter**: Use for approximate DP when needed