or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

calibration.mdclassification.mdindex.mdmetrics.mdregression.mdrisk-control.mdutils.md

classification.mddocs/

0

# Classification Methods

1

2

Conformal prediction methods for classification that provide prediction sets containing the true label with specified probability. MAPIE implements split conformal and cross conformal approaches with various conformity scores for robust uncertainty quantification in classification tasks.

3

4

## Capabilities

5

6

### Split Conformal Classification

7

8

Implements split conformal prediction for classification, providing prediction sets with exact finite-sample coverage guarantees. Divides data into training and conformalization sets for efficient uncertainty quantification.

9

10

```python { .api }

11

class SplitConformalClassifier:

12

"""

13

Split conformal classification predictor.

14

15

Parameters:

16

- estimator: ClassifierMixin, base classification estimator (default: LogisticRegression())

17

- confidence_level: Union[float, List[float]], target coverage level (default: 0.9)

18

- conformity_score: Union[str, BaseClassificationScore], conformity score method (default: "lac")

19

- prefit: bool, whether estimator is already fitted (default: True)

20

- n_jobs: Optional[int], number of parallel jobs

21

- verbose: int, verbosity level (default: 0)

22

- random_state: Optional[int], random seed

23

"""

24

def __init__(self, estimator=None, confidence_level=0.9, conformity_score="lac", prefit=True, n_jobs=None, verbose=0, random_state=None): ...

25

26

def fit(self, X_train, y_train, fit_params=None):

27

"""

28

Fit the base classifier.

29

30

Parameters:

31

- X_train: ArrayLike, training features

32

- y_train: ArrayLike, training labels

33

- fit_params: Optional[Dict], parameters passed to estimator.fit()

34

35

Returns:

36

Self

37

"""

38

39

def conformalize(self, X_conformalize, y_conformalize, predict_params=None):

40

"""

41

Estimate prediction set thresholds using conformalization set.

42

43

Parameters:

44

- X_conformalize: ArrayLike, conformalization features

45

- y_conformalize: ArrayLike, conformalization labels

46

- predict_params: Optional[Dict], parameters passed to estimator.predict()

47

48

Returns:

49

Self

50

"""

51

52

def predict_set(self, X, conformity_score_params=None):

53

"""

54

Predict sets for new data.

55

56

Parameters:

57

- X: ArrayLike, test features

58

- conformity_score_params: Optional[Dict], parameters for conformity score computation

59

60

Returns:

61

Tuple[NDArray, NDArray]: (prediction_sets, prediction_probabilities)

62

"""

63

64

def predict(self, X):

65

"""

66

Predict most likely labels for new data.

67

68

Parameters:

69

- X: ArrayLike, test features

70

71

Returns:

72

NDArray: predicted labels

73

"""

74

```

75

76

### Cross Conformal Classification

77

78

Implements cross conformal prediction using cross-validation for classification. Provides better data utilization and robust prediction sets with ensemble aggregation.

79

80

```python { .api }

81

class CrossConformalClassifier:

82

"""

83

Cross conformal classification predictor.

84

85

Parameters:

86

- estimator: ClassifierMixin, base classification estimator (default: LogisticRegression())

87

- confidence_level: Union[float, List[float]], target coverage level (default: 0.9)

88

- conformity_score: Union[str, BaseClassificationScore], conformity score method (default: "lac")

89

- cv: Union[int, BaseCrossValidator], cross-validation strategy (default: 5)

90

- n_jobs: Optional[int], number of parallel jobs

91

- verbose: int, verbosity level (default: 0)

92

- random_state: Optional[int], random seed

93

"""

94

def __init__(self, estimator=None, confidence_level=0.9, conformity_score="lac", cv=5, n_jobs=None, verbose=0, random_state=None): ...

95

96

def fit_conformalize(self, X, y, groups=None, fit_params=None, predict_params=None):

97

"""

98

Fit classifiers and compute conformity scores using cross-validation.

99

100

Parameters:

101

- X: ArrayLike, input features

102

- y: ArrayLike, class labels

103

- groups: Optional[ArrayLike], group labels for cross-validation

104

- fit_params: Optional[Dict], parameters passed to estimator.fit()

105

- predict_params: Optional[Dict], parameters passed to estimator.predict()

106

107

Returns:

108

Self

109

"""

110

111

def predict_set(self, X, conformity_score_params=None, agg_scores="mean"):

112

"""

113

Predict sets using cross conformal method.

114

115

Parameters:

116

- X: ArrayLike, test features

117

- conformity_score_params: Optional[Dict], parameters for conformity score computation

118

- agg_scores: str, score aggregation method ("mean", "crossval") (default: "mean")

119

120

Returns:

121

Tuple[NDArray, NDArray]: (prediction_sets, prediction_probabilities)

122

"""

123

124

def predict(self, X):

125

"""

126

Predict most likely labels using ensemble voting.

127

128

Parameters:

129

- X: ArrayLike, test features

130

131

Returns:

132

NDArray: predicted labels

133

"""

134

```

135

136

## Usage Examples

137

138

### Basic Split Conformal Classification

139

140

```python

141

from sklearn.ensemble import RandomForestClassifier

142

from sklearn.model_selection import train_test_split

143

from mapie.classification import SplitConformalClassifier

144

import numpy as np

145

146

# Prepare data

147

X_train, X_calib, y_train, y_calib = train_test_split(X, y, test_size=0.3, stratify=y)

148

149

# Fit base classifier

150

rf = RandomForestClassifier(n_estimators=100, random_state=42)

151

rf.fit(X_train, y_train)

152

153

# Create conformal predictor

154

mapie_clf = SplitConformalClassifier(

155

estimator=rf,

156

prefit=True,

157

confidence_level=0.9

158

)

159

160

# Conformalize

161

mapie_clf.conformalize(X_calib, y_calib)

162

163

# Predict with sets

164

y_pred_sets, y_pred_proba = mapie_clf.predict_set(X_test)

165

```

166

167

### Cross Conformal with Multiple Confidence Levels

168

169

```python

170

from mapie.classification import CrossConformalClassifier

171

from sklearn.linear_model import LogisticRegression

172

173

# Multi-level prediction sets

174

mapie_clf = CrossConformalClassifier(

175

estimator=LogisticRegression(),

176

confidence_level=[0.8, 0.9, 0.95],

177

conformity_score="aps", # Adaptive Prediction Sets

178

cv=10,

179

random_state=42

180

)

181

182

# Fit and predict

183

mapie_clf.fit_conformalize(X, y)

184

y_pred_sets, y_pred_proba = mapie_clf.predict_set(X_test)

185

186

# y_pred_sets shape: (n_samples, n_classes, n_confidence_levels)

187

```

188

189

### Using Different Conformity Scores

190

191

```python

192

from mapie.conformity_scores.sets import RAPSConformityScore

193

194

# RAPS (Regularized Adaptive Prediction Sets) conformity score

195

raps_score = RAPSConformityScore(

196

penalty=0.01, # Regularization penalty

197

weight_penalty=0.1 # Weight regularization

198

)

199

200

mapie_clf = SplitConformalClassifier(

201

estimator=RandomForestClassifier(n_estimators=50),

202

conformity_score=raps_score,

203

confidence_level=0.9

204

)

205

```

206

207

## Conformity Scores

208

209

Available conformity scores for classification:

210

211

### Least Ambiguous Classification (LAC)

212

213

```python { .api }

214

conformity_score="lac" # Default score based on 1 - P(y_true)

215

```

216

217

Default conformity score using the complement of the true class probability. Simple and effective for most classification tasks.

218

219

### Top-K Score

220

221

```python { .api }

222

conformity_score="top_k" # Based on ranking of true class

223

```

224

225

Uses the rank of the true class in the sorted probability predictions. Good for scenarios where ranking matters.

226

227

### Adaptive Prediction Sets (APS)

228

229

```python { .api }

230

conformity_score="aps" # Adaptive prediction sets

231

```

232

233

Provides adaptive prediction sets that automatically adjust set sizes based on prediction confidence. Effective for controlling set size while maintaining coverage.

234

235

### Regularized Adaptive Prediction Sets (RAPS)

236

237

```python { .api }

238

from mapie.conformity_scores.sets import RAPSConformityScore

239

240

raps_score = RAPSConformityScore(penalty=0.01)

241

conformity_score=raps_score

242

```

243

244

Enhanced version of APS with regularization terms to prevent overly large prediction sets. Includes penalty terms for set size control.

245

246

### Custom Conformity Scores

247

248

```python { .api }

249

from mapie.conformity_scores import BaseClassificationScore

250

251

class CustomScore(BaseClassificationScore):

252

def __call__(self, y_prob_true, y_prob_pred):

253

"""

254

Compute conformity scores.

255

256

Parameters:

257

- y_prob_true: NDArray, probabilities for true classes

258

- y_prob_pred: NDArray, predicted probabilities

259

260

Returns:

261

NDArray: conformity scores

262

"""

263

return 1 - y_prob_true # Example implementation

264

```

265

266

## Advanced Features

267

268

### Multi-Class Support

269

270

MAPIE handles multi-class classification automatically:

271

272

```python

273

# Works with any number of classes

274

y_multiclass = np.array([0, 1, 2, 3, 1, 2, 0]) # 4 classes

275

mapie_clf.fit_conformalize(X, y_multiclass)

276

```

277

278

### Prediction Set Analysis

279

280

```python

281

# Analyze prediction sets

282

y_pred_sets, _ = mapie_clf.predict_set(X_test)

283

284

# Set sizes

285

set_sizes = np.sum(y_pred_sets, axis=1)

286

mean_set_size = np.mean(set_sizes)

287

288

# Coverage analysis

289

coverage = np.mean([y_test[i] in np.where(y_pred_sets[i])[0]

290

for i in range(len(y_test))])

291

292

print(f"Average set size: {mean_set_size:.2f}")

293

print(f"Empirical coverage: {coverage:.3f}")

294

```

295

296

### Handling Imbalanced Classes

297

298

```python

299

# Stratified cross-validation for imbalanced data

300

from sklearn.model_selection import StratifiedKFold

301

302

stratified_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

303

304

mapie_clf = CrossConformalClassifier(

305

estimator=RandomForestClassifier(class_weight='balanced'),

306

cv=stratified_cv,

307

confidence_level=0.9

308

)

309

```