0
# Model Evaluation
1
2
Tools for assessing model performance including classification metrics, regression metrics, clustering evaluation, and ranking metrics. MLlib provides comprehensive evaluation capabilities for all types of machine learning tasks.
3
4
## Capabilities
5
6
### Classification Evaluation
7
8
```scala { .api }
9
/**
10
* BinaryClassificationEvaluator - evaluation metrics for binary classification
11
* Supports ROC AUC and PR AUC metrics
12
*/
13
class BinaryClassificationEvaluator extends Evaluator with HasLabelCol with HasFeaturesCol with HasWeightCol {
14
def setLabelCol(value: String): this.type
15
def setRawPredictionCol(value: String): this.type
16
def setMetricName(value: String): this.type
17
def setWeightCol(value: String): this.type
18
def setNumBins(value: Int): this.type
19
}
20
21
/**
22
* MulticlassClassificationEvaluator - evaluation metrics for multiclass classification
23
* Supports accuracy, F1, precision, recall, and other metrics
24
*/
25
class MulticlassClassificationEvaluator extends Evaluator with HasLabelCol with HasPredictionCol with HasWeightCol {
26
def setLabelCol(value: String): this.type
27
def setPredictionCol(value: String): this.type
28
def setMetricName(value: String): this.type
29
def setWeightCol(value: String): this.type
30
def setMetricLabel(value: Double): this.type
31
def setBeta(value: Double): this.type
32
def setEps(value: Double): this.type
33
}
34
35
/**
36
* MultilabelClassificationEvaluator - evaluation metrics for multilabel classification
37
* Supports subset accuracy, accuracy, hamming loss, precision, recall, and F1
38
*/
39
class MultilabelClassificationEvaluator extends Evaluator with HasLabelCol with HasPredictionCol {
40
def setLabelCol(value: String): this.type
41
def setPredictionCol(value: String): this.type
42
def setMetricName(value: String): this.type
43
}
44
```
45
46
### Regression Evaluation
47
48
```scala { .api }
49
/**
50
* RegressionEvaluator - evaluation metrics for regression models
51
* Supports RMSE, MSE, R-squared, MAE, and explained variance
52
*/
53
class RegressionEvaluator extends Evaluator with HasLabelCol with HasPredictionCol with HasWeightCol {
54
def setLabelCol(value: String): this.type
55
def setPredictionCol(value: String): this.type
56
def setMetricName(value: String): this.type
57
def setWeightCol(value: String): this.type
58
def setThroughOrigin(value: Boolean): this.type
59
}
60
```
61
62
### Clustering Evaluation
63
64
```scala { .api }
65
/**
66
* ClusteringEvaluator - evaluation metrics for clustering models
67
* Supports silhouette analysis for cluster quality assessment
68
*/
69
class ClusteringEvaluator extends Evaluator with HasFeaturesCol with HasPredictionCol with HasWeightCol {
70
def setFeaturesCol(value: String): this.type
71
def setPredictionCol(value: String): this.type
72
def setMetricName(value: String): this.type
73
def setDistanceMeasure(value: String): this.type
74
def setWeightCol(value: String): this.type
75
}
76
```
77
78
### Ranking Evaluation
79
80
```scala { .api }
81
/**
82
* RankingEvaluator - evaluation metrics for ranking and recommendation models
83
* Supports mean average precision and mean average precision at K
84
*/
85
class RankingEvaluator extends Evaluator with HasLabelCol with HasPredictionCol {
86
def setLabelCol(value: String): this.type
87
def setPredictionCol(value: String): this.type
88
def setMetricName(value: String): this.type
89
def setK(value: Int): this.type
90
}
91
```
92
93
### Base Evaluator Class
94
95
```scala { .api }
96
/**
97
* Evaluator - abstract base class for all model evaluators
98
* Provides common interface for model evaluation
99
*/
100
abstract class Evaluator extends Params {
101
def evaluate(dataset: Dataset[_]): Double
102
def isLargerBetter: Boolean
103
def copy(extra: ParamMap): Evaluator
104
}
105
```
106
107
## Usage Examples
108
109
### Binary Classification Evaluation
110
111
```scala
112
import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator
113
114
// ROC AUC evaluation
115
val evaluatorROC = new BinaryClassificationEvaluator()
116
.setLabelCol("label")
117
.setRawPredictionCol("rawPrediction")
118
.setMetricName("areaUnderROC")
119
120
val auc = evaluatorROC.evaluate(predictions)
121
println(s"AUC = $auc")
122
123
// PR AUC evaluation
124
val evaluatorPR = new BinaryClassificationEvaluator()
125
.setLabelCol("label")
126
.setRawPredictionCol("rawPrediction")
127
.setMetricName("areaUnderPR")
128
129
val aupr = evaluatorPR.evaluate(predictions)
130
println(s"Area under PR curve = $aupr")
131
```
132
133
### Multiclass Classification Evaluation
134
135
```scala
136
import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
137
138
// Accuracy
139
val evaluatorAcc = new MulticlassClassificationEvaluator()
140
.setLabelCol("label")
141
.setPredictionCol("prediction")
142
.setMetricName("accuracy")
143
144
val accuracy = evaluatorAcc.evaluate(predictions)
145
println(s"Accuracy = $accuracy")
146
147
// F1 Score
148
val evaluatorF1 = new MulticlassClassificationEvaluator()
149
.setLabelCol("label")
150
.setPredictionCol("prediction")
151
.setMetricName("f1")
152
153
val f1 = evaluatorF1.evaluate(predictions)
154
println(s"F1 score = $f1")
155
156
// Precision and Recall
157
val precision = new MulticlassClassificationEvaluator()
158
.setMetricName("weightedPrecision")
159
.evaluate(predictions)
160
161
val recall = new MulticlassClassificationEvaluator()
162
.setMetricName("weightedRecall")
163
.evaluate(predictions)
164
165
println(s"Weighted Precision = $precision")
166
println(s"Weighted Recall = $recall")
167
```
168
169
### Regression Evaluation
170
171
```scala
172
import org.apache.spark.ml.evaluation.RegressionEvaluator
173
174
// RMSE
175
val evaluatorRMSE = new RegressionEvaluator()
176
.setLabelCol("label")
177
.setPredictionCol("prediction")
178
.setMetricName("rmse")
179
180
val rmse = evaluatorRMSE.evaluate(predictions)
181
println(s"Root Mean Squared Error (RMSE) = $rmse")
182
183
// R-squared
184
val evaluatorR2 = new RegressionEvaluator()
185
.setLabelCol("label")
186
.setPredictionCol("prediction")
187
.setMetricName("r2")
188
189
val r2 = evaluatorR2.evaluate(predictions)
190
println(s"R-squared = $r2")
191
192
// MAE
193
val evaluatorMAE = new RegressionEvaluator()
194
.setLabelCol("label")
195
.setPredictionCol("prediction")
196
.setMetricName("mae")
197
198
val mae = evaluatorMAE.evaluate(predictions)
199
println(s"Mean Absolute Error (MAE) = $mae")
200
```
201
202
### Clustering Evaluation
203
204
```scala
205
import org.apache.spark.ml.evaluation.ClusteringEvaluator
206
207
val evaluator = new ClusteringEvaluator()
208
.setFeaturesCol("features")
209
.setPredictionCol("prediction")
210
.setMetricName("silhouette")
211
.setDistanceMeasure("squaredEuclidean")
212
213
val silhouette = evaluator.evaluate(predictions)
214
println(s"Silhouette with squared euclidean distance = $silhouette")
215
```
216
217
## Supported Metrics
218
219
### Binary Classification Metrics
220
- `areaUnderROC`: Area under ROC curve
221
- `areaUnderPR`: Area under Precision-Recall curve
222
223
### Multiclass Classification Metrics
224
- `accuracy`: Overall accuracy
225
- `f1`: F1 score (macro-averaged)
226
- `weightedPrecision`: Weighted precision
227
- `weightedRecall`: Weighted recall
228
- `weightedFMeasure`: Weighted F-measure
229
- `truePositiveRateByLabel`: True positive rate for specific label
230
- `falsePositiveRateByLabel`: False positive rate for specific label
231
- `precisionByLabel`: Precision for specific label
232
- `recallByLabel`: Recall for specific label
233
- `fMeasureByLabel`: F-measure for specific label
234
235
### Multilabel Classification Metrics
236
- `subsetAccuracy`: Subset accuracy (exact match)
237
- `accuracy`: Accuracy
238
- `hammingLoss`: Hamming loss
239
- `precision`: Precision
240
- `recall`: Recall
241
- `f1Measure`: F1 measure
242
243
### Regression Metrics
244
- `rmse`: Root Mean Squared Error
245
- `mse`: Mean Squared Error
246
- `r2`: R-squared (coefficient of determination)
247
- `mae`: Mean Absolute Error
248
- `var`: Explained variance
249
250
### Clustering Metrics
251
- `silhouette`: Silhouette coefficient
252
253
### Ranking Metrics
254
- `meanAveragePrecision`: Mean Average Precision
255
- `meanAveragePrecisionAtK`: Mean Average Precision at K