A Python package to assess and improve fairness of machine learning models
—
Neural network-based approaches using adversarial training to learn fair representations while maintaining predictive utility. These methods use adversarial networks to remove sensitive information from learned representations.
Implements adversarial fairness for classification tasks using neural networks. Trains a predictor network alongside an adversary network that tries to predict sensitive attributes from the predictor's internal representations.
class AdversarialFairnessClassifier:
def __init__(self, backend="torch", *, predictor_model=None, adversary_model=None,
alpha=1.0, epochs=1, batch_size=32, shuffle=True, progress_updates=None,
skip_validation=False, callbacks=None, random_state=None):
"""
Adversarial fairness classifier using neural networks.
Parameters:
- backend: str, neural network backend ("torch" or "tensorflow")
- predictor_model: neural network model for prediction task
- adversary_model: neural network model for adversary task
- alpha: float, strength of adversarial training (higher = more fairness emphasis)
- epochs: int, number of training epochs
- batch_size: int, batch size for training
- shuffle: bool, whether to shuffle training data
- progress_updates: callable, callback for training progress updates
- skip_validation: bool, whether to skip input validation
- callbacks: list, training callbacks
- random_state: int, random seed for reproducibility
"""
def fit(self, X, y, *, sensitive_features, sample_weight=None):
"""
Fit the adversarial fairness classifier.
Parameters:
- X: array-like, feature matrix
- y: array-like, target values
- sensitive_features: array-like, sensitive feature values
- sample_weight: array-like, optional sample weights
Returns:
self
"""
def predict(self, X):
"""
Make predictions using the trained fair classifier.
Parameters:
- X: array-like, feature matrix
Returns:
array-like: Predicted class labels
"""
def predict_proba(self, X):
"""
Predict class probabilities.
Parameters:
- X: array-like, feature matrix
Returns:
array-like: Predicted class probabilities, shape (n_samples, n_classes)
"""from fairlearn.adversarial import AdversarialFairnessClassifier
import numpy as np
# Create adversarial fairness classifier
afc = AdversarialFairnessClassifier(
backend="torch", # or "tensorflow"
alpha=1.0, # Fairness strength
epochs=50, # Training epochs
batch_size=64,
random_state=42
)
# Fit the model
afc.fit(X_train, y_train, sensitive_features=A_train)
# Make predictions
predictions = afc.predict(X_test)
probabilities = afc.predict_proba(X_test)Implements adversarial fairness for regression tasks, training a predictor to minimize prediction error while preventing an adversary from predicting sensitive attributes.
class AdversarialFairnessRegressor:
def __init__(self, backend="torch", *, predictor_model=None, adversary_model=None,
alpha=1.0, epochs=1, batch_size=32, shuffle=True, progress_updates=None,
skip_validation=False, callbacks=None, random_state=None):
"""
Adversarial fairness regressor using neural networks.
Parameters:
- backend: str, neural network backend ("torch" or "tensorflow")
- predictor_model: neural network model for regression task
- adversary_model: neural network model for adversary task
- alpha: float, strength of adversarial training
- epochs: int, number of training epochs
- batch_size: int, batch size for training
- shuffle: bool, whether to shuffle training data
- progress_updates: callable, callback for training progress updates
- skip_validation: bool, whether to skip input validation
- callbacks: list, training callbacks
- random_state: int, random seed for reproducibility
"""
def fit(self, X, y, *, sensitive_features, sample_weight=None):
"""
Fit the adversarial fairness regressor.
Parameters:
- X: array-like, feature matrix
- y: array-like, continuous target values
- sensitive_features: array-like, sensitive feature values
- sample_weight: array-like, optional sample weights
Returns:
self
"""
def predict(self, X):
"""
Make regression predictions.
Parameters:
- X: array-like, feature matrix
Returns:
array-like: Predicted continuous values
"""The default backend uses PyTorch for neural network implementation:
# Using PyTorch backend (default)
classifier = AdversarialFairnessClassifier(
backend="torch",
epochs=100,
batch_size=128
)Alternative backend using TensorFlow:
# Using TensorFlow backend
classifier = AdversarialFairnessClassifier(
backend="tensorflow",
epochs=100,
batch_size=128
)You can provide custom neural network architectures:
import torch
import torch.nn as nn
# Define custom predictor model
class CustomPredictor(nn.Module):
def __init__(self, input_dim, hidden_dim=64):
super().__init__()
self.layers = nn.Sequential(
nn.Linear(input_dim, hidden_dim),
nn.ReLU(),
nn.Dropout(0.2),
nn.Linear(hidden_dim, hidden_dim),
nn.ReLU(),
nn.Dropout(0.2),
nn.Linear(hidden_dim, 1),
nn.Sigmoid()
)
def forward(self, x):
return self.layers(x)
# Use custom model
predictor = CustomPredictor(input_dim=X_train.shape[1])
classifier = AdversarialFairnessClassifier(
backend="torch",
predictor_model=predictor,
alpha=2.0,
epochs=200
)Customize the adversary network architecture:
class CustomAdversary(nn.Module):
def __init__(self, input_dim, n_sensitive_classes):
super().__init__()
self.layers = nn.Sequential(
nn.Linear(input_dim, 32),
nn.ReLU(),
nn.Linear(32, 16),
nn.ReLU(),
nn.Linear(16, n_sensitive_classes),
nn.Softmax(dim=1)
)
def forward(self, x):
return self.layers(x)
# Create adversary for binary sensitive attribute
adversary = CustomAdversary(
input_dim=64, # Should match predictor's representation size
n_sensitive_classes=2
)
classifier = AdversarialFairnessClassifier(
backend="torch",
predictor_model=predictor,
adversary_model=adversary,
alpha=1.5
)Key hyperparameters to tune for adversarial training:
# Alpha controls fairness-accuracy trade-off
alphas = [0.1, 0.5, 1.0, 2.0, 5.0]
results = {}
for alpha in alphas:
classifier = AdversarialFairnessClassifier(
alpha=alpha,
epochs=100,
batch_size=64,
random_state=42
)
classifier.fit(X_train, y_train, sensitive_features=A_train)
predictions = classifier.predict(X_test)
# Evaluate fairness and accuracy
results[alpha] = evaluate_model(predictions, y_test, A_test)Monitor training progress with custom callbacks:
def progress_callback(epoch, predictor_loss, adversary_loss, adversary_accuracy):
"""Callback to monitor training progress."""
if epoch % 10 == 0:
print(f"Epoch {epoch}: Predictor Loss={predictor_loss:.4f}, "
f"Adversary Loss={adversary_loss:.4f}, "
f"Adversary Acc={adversary_accuracy:.4f}")
classifier = AdversarialFairnessClassifier(
progress_updates=progress_callback,
epochs=200
)Handle sensitive attributes with multiple categories:
# Sensitive feature with 3 categories
sensitive_features = ['group_A', 'group_B', 'group_C'] * (len(X_train) // 3)
classifier = AdversarialFairnessClassifier(
alpha=1.0,
epochs=150
)
classifier.fit(X_train, y_train, sensitive_features=sensitive_features)Choose appropriate batch sizes based on dataset size:
# For small datasets
small_classifier = AdversarialFairnessClassifier(batch_size=16)
# For large datasets
large_classifier = AdversarialFairnessClassifier(batch_size=256)
# Adaptive batch size based on data size
batch_size = min(128, len(X_train) // 10)
adaptive_classifier = AdversarialFairnessClassifier(batch_size=batch_size)Implement custom early stopping:
class EarlyStoppingCallback:
def __init__(self, patience=10, min_delta=0.001):
self.patience = patience
self.min_delta = min_delta
self.best_loss = float('inf')
self.wait = 0
def __call__(self, epoch, predictor_loss, adversary_loss, adversary_accuracy):
if predictor_loss < self.best_loss - self.min_delta:
self.best_loss = predictor_loss
self.wait = 0
else:
self.wait += 1
if self.wait >= self.patience:
print(f"Early stopping at epoch {epoch}")
return True # Signal to stop training
return False
early_stopping = EarlyStoppingCallback(patience=15)
classifier = AdversarialFairnessClassifier(
callbacks=[early_stopping],
epochs=1000 # Large number, early stopping will control actual epochs
)Combine with fairness assessment tools:
from fairlearn.metrics import MetricFrame, equalized_odds_difference
# Train adversarial model
afc = AdversarialFairnessClassifier(alpha=1.0, epochs=100)
afc.fit(X_train, y_train, sensitive_features=A_train)
# Get predictions and evaluate
predictions = afc.predict(X_test)
probabilities = afc.predict_proba(X_test)
# Assess fairness
fairness_metrics = MetricFrame(
metrics={
'accuracy': lambda y_true, y_pred: (y_true == y_pred).mean(),
'selection_rate': lambda y_true, y_pred: y_pred.mean()
},
y_true=y_test,
y_pred=predictions,
sensitive_features=A_test
)
print("Adversarial fairness results:")
print(fairness_metrics.by_group)
print(f"Equalized odds difference: {equalized_odds_difference(y_test, predictions, sensitive_features=A_test)}")# Recommended starting configuration
classifier = AdversarialFairnessClassifier(
backend="torch",
alpha=1.0, # Balanced trade-off
epochs=100, # Sufficient for convergence
batch_size=64, # Good balance for most datasets
shuffle=True, # Important for training stability
random_state=42 # For reproducibility
)Install with Tessl CLI
npx tessl i tessl/pypi-fairlearn