A scikit-learn-compatible module for estimating prediction intervals using conformal prediction methods.
Conformal prediction methods for classification that provide prediction sets containing the true label with specified probability. MAPIE implements split conformal and cross conformal approaches with various conformity scores for robust uncertainty quantification in classification tasks.
Implements split conformal prediction for classification, providing prediction sets with exact finite-sample coverage guarantees. Divides data into training and conformalization sets for efficient uncertainty quantification.
class SplitConformalClassifier:
"""
Split conformal classification predictor.
Parameters:
- estimator: ClassifierMixin, base classification estimator (default: LogisticRegression())
- confidence_level: Union[float, List[float]], target coverage level (default: 0.9)
- conformity_score: Union[str, BaseClassificationScore], conformity score method (default: "lac")
- prefit: bool, whether estimator is already fitted (default: True)
- n_jobs: Optional[int], number of parallel jobs
- verbose: int, verbosity level (default: 0)
- random_state: Optional[int], random seed
"""
def __init__(self, estimator=None, confidence_level=0.9, conformity_score="lac", prefit=True, n_jobs=None, verbose=0, random_state=None): ...
def fit(self, X_train, y_train, fit_params=None):
"""
Fit the base classifier.
Parameters:
- X_train: ArrayLike, training features
- y_train: ArrayLike, training labels
- fit_params: Optional[Dict], parameters passed to estimator.fit()
Returns:
Self
"""
def conformalize(self, X_conformalize, y_conformalize, predict_params=None):
"""
Estimate prediction set thresholds using conformalization set.
Parameters:
- X_conformalize: ArrayLike, conformalization features
- y_conformalize: ArrayLike, conformalization labels
- predict_params: Optional[Dict], parameters passed to estimator.predict()
Returns:
Self
"""
def predict_set(self, X, conformity_score_params=None):
"""
Predict sets for new data.
Parameters:
- X: ArrayLike, test features
- conformity_score_params: Optional[Dict], parameters for conformity score computation
Returns:
Tuple[NDArray, NDArray]: (prediction_sets, prediction_probabilities)
"""
def predict(self, X):
"""
Predict most likely labels for new data.
Parameters:
- X: ArrayLike, test features
Returns:
NDArray: predicted labels
"""Implements cross conformal prediction using cross-validation for classification. Provides better data utilization and robust prediction sets with ensemble aggregation.
class CrossConformalClassifier:
"""
Cross conformal classification predictor.
Parameters:
- estimator: ClassifierMixin, base classification estimator (default: LogisticRegression())
- confidence_level: Union[float, List[float]], target coverage level (default: 0.9)
- conformity_score: Union[str, BaseClassificationScore], conformity score method (default: "lac")
- cv: Union[int, BaseCrossValidator], cross-validation strategy (default: 5)
- n_jobs: Optional[int], number of parallel jobs
- verbose: int, verbosity level (default: 0)
- random_state: Optional[int], random seed
"""
def __init__(self, estimator=None, confidence_level=0.9, conformity_score="lac", cv=5, n_jobs=None, verbose=0, random_state=None): ...
def fit_conformalize(self, X, y, groups=None, fit_params=None, predict_params=None):
"""
Fit classifiers and compute conformity scores using cross-validation.
Parameters:
- X: ArrayLike, input features
- y: ArrayLike, class labels
- groups: Optional[ArrayLike], group labels for cross-validation
- fit_params: Optional[Dict], parameters passed to estimator.fit()
- predict_params: Optional[Dict], parameters passed to estimator.predict()
Returns:
Self
"""
def predict_set(self, X, conformity_score_params=None, agg_scores="mean"):
"""
Predict sets using cross conformal method.
Parameters:
- X: ArrayLike, test features
- conformity_score_params: Optional[Dict], parameters for conformity score computation
- agg_scores: str, score aggregation method ("mean", "crossval") (default: "mean")
Returns:
Tuple[NDArray, NDArray]: (prediction_sets, prediction_probabilities)
"""
def predict(self, X):
"""
Predict most likely labels using ensemble voting.
Parameters:
- X: ArrayLike, test features
Returns:
NDArray: predicted labels
"""from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from mapie.classification import SplitConformalClassifier
import numpy as np
# Prepare data
X_train, X_calib, y_train, y_calib = train_test_split(X, y, test_size=0.3, stratify=y)
# Fit base classifier
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
# Create conformal predictor
mapie_clf = SplitConformalClassifier(
estimator=rf,
prefit=True,
confidence_level=0.9
)
# Conformalize
mapie_clf.conformalize(X_calib, y_calib)
# Predict with sets
y_pred_sets, y_pred_proba = mapie_clf.predict_set(X_test)from mapie.classification import CrossConformalClassifier
from sklearn.linear_model import LogisticRegression
# Multi-level prediction sets
mapie_clf = CrossConformalClassifier(
estimator=LogisticRegression(),
confidence_level=[0.8, 0.9, 0.95],
conformity_score="aps", # Adaptive Prediction Sets
cv=10,
random_state=42
)
# Fit and predict
mapie_clf.fit_conformalize(X, y)
y_pred_sets, y_pred_proba = mapie_clf.predict_set(X_test)
# y_pred_sets shape: (n_samples, n_classes, n_confidence_levels)from mapie.conformity_scores.sets import RAPSConformityScore
# RAPS (Regularized Adaptive Prediction Sets) conformity score
raps_score = RAPSConformityScore(
penalty=0.01, # Regularization penalty
weight_penalty=0.1 # Weight regularization
)
mapie_clf = SplitConformalClassifier(
estimator=RandomForestClassifier(n_estimators=50),
conformity_score=raps_score,
confidence_level=0.9
)Available conformity scores for classification:
conformity_score="lac" # Default score based on 1 - P(y_true)Default conformity score using the complement of the true class probability. Simple and effective for most classification tasks.
conformity_score="top_k" # Based on ranking of true classUses the rank of the true class in the sorted probability predictions. Good for scenarios where ranking matters.
conformity_score="aps" # Adaptive prediction setsProvides adaptive prediction sets that automatically adjust set sizes based on prediction confidence. Effective for controlling set size while maintaining coverage.
from mapie.conformity_scores.sets import RAPSConformityScore
raps_score = RAPSConformityScore(penalty=0.01)
conformity_score=raps_scoreEnhanced version of APS with regularization terms to prevent overly large prediction sets. Includes penalty terms for set size control.
from mapie.conformity_scores import BaseClassificationScore
class CustomScore(BaseClassificationScore):
def __call__(self, y_prob_true, y_prob_pred):
"""
Compute conformity scores.
Parameters:
- y_prob_true: NDArray, probabilities for true classes
- y_prob_pred: NDArray, predicted probabilities
Returns:
NDArray: conformity scores
"""
return 1 - y_prob_true # Example implementationMAPIE handles multi-class classification automatically:
# Works with any number of classes
y_multiclass = np.array([0, 1, 2, 3, 1, 2, 0]) # 4 classes
mapie_clf.fit_conformalize(X, y_multiclass)# Analyze prediction sets
y_pred_sets, _ = mapie_clf.predict_set(X_test)
# Set sizes
set_sizes = np.sum(y_pred_sets, axis=1)
mean_set_size = np.mean(set_sizes)
# Coverage analysis
coverage = np.mean([y_test[i] in np.where(y_pred_sets[i])[0]
for i in range(len(y_test))])
print(f"Average set size: {mean_set_size:.2f}")
print(f"Empirical coverage: {coverage:.3f}")# Stratified cross-validation for imbalanced data
from sklearn.model_selection import StratifiedKFold
stratified_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
mapie_clf = CrossConformalClassifier(
estimator=RandomForestClassifier(class_weight='balanced'),
cv=stratified_cv,
confidence_level=0.9
)Install with Tessl CLI
npx tessl i tessl/pypi-mapie