0
# Model Evaluation and Validation
1
2
Orange3 provides a comprehensive evaluation framework for assessing machine learning model performance through various validation techniques and metrics.
3
4
## Capabilities
5
6
### Cross-Validation Methods
7
8
Different strategies for splitting data and validating model performance.
9
10
```python { .api }
11
class CrossValidation:
12
"""
13
k-fold cross-validation.
14
15
Args:
16
data: Dataset for evaluation
17
learners: List of learners to evaluate
18
k: Number of folds
19
stratified: Use stratified sampling
20
random_state: Random seed for reproducibility
21
"""
22
def __init__(self, data, learners, k=10, stratified=True, random_state=None): ...
23
24
class LeaveOneOut:
25
"""
26
Leave-one-out cross-validation.
27
28
Args:
29
data: Dataset for evaluation
30
learners: List of learners to evaluate
31
"""
32
def __init__(self, data, learners): ...
33
34
class TestOnTrainingData:
35
"""
36
Evaluate on the same data used for training.
37
38
Args:
39
data: Training dataset
40
learners: List of learners to evaluate
41
"""
42
def __init__(self, data, learners): ...
43
44
class TestOnTestData:
45
"""
46
Train on one dataset, test on another.
47
48
Args:
49
train_data: Training dataset
50
test_data: Test dataset
51
learners: List of learners to evaluate
52
"""
53
def __init__(self, train_data, test_data, learners): ...
54
```
55
56
### Classification Metrics
57
58
Performance measures for classification tasks.
59
60
```python { .api }
61
def CA(results):
62
"""
63
Classification Accuracy.
64
65
Args:
66
results: Evaluation results object
67
68
Returns:
69
ndarray: Accuracy scores for each learner
70
"""
71
72
def AUC(results):
73
"""
74
Area Under the ROC Curve.
75
76
Args:
77
results: Evaluation results object
78
79
Returns:
80
ndarray: AUC scores for each learner
81
"""
82
83
def Precision(results, pos_label=1, average='binary'):
84
"""
85
Precision score.
86
87
Args:
88
results: Evaluation results object
89
pos_label: Positive class label
90
average: Averaging strategy
91
92
Returns:
93
ndarray: Precision scores
94
"""
95
96
def Recall(results, pos_label=1, average='binary'):
97
"""
98
Recall score.
99
100
Args:
101
results: Evaluation results object
102
pos_label: Positive class label
103
average: Averaging strategy
104
105
Returns:
106
ndarray: Recall scores
107
"""
108
109
def F1(results, pos_label=1, average='binary'):
110
"""
111
F1 score.
112
113
Args:
114
results: Evaluation results object
115
pos_label: Positive class label
116
average: Averaging strategy
117
118
Returns:
119
ndarray: F1 scores
120
"""
121
122
def PrecisionRecallFSupport(results, pos_label=1, average='binary'):
123
"""
124
Combined precision, recall, and F1 scores.
125
126
Args:
127
results: Evaluation results object
128
pos_label: Positive class label
129
average: Averaging strategy
130
131
Returns:
132
tuple: (precision, recall, f1, support) arrays
133
"""
134
```
135
136
### Regression Metrics
137
138
Performance measures for regression tasks.
139
140
```python { .api }
141
def MSE(results):
142
"""
143
Mean Squared Error.
144
145
Args:
146
results: Evaluation results object
147
148
Returns:
149
ndarray: MSE scores for each learner
150
"""
151
152
def RMSE(results):
153
"""
154
Root Mean Squared Error.
155
156
Args:
157
results: Evaluation results object
158
159
Returns:
160
ndarray: RMSE scores for each learner
161
"""
162
163
def MAE(results):
164
"""
165
Mean Absolute Error.
166
167
Args:
168
results: Evaluation results object
169
170
Returns:
171
ndarray: MAE scores for each learner
172
"""
173
174
def R2(results):
175
"""
176
R-squared coefficient of determination.
177
178
Args:
179
results: Evaluation results object
180
181
Returns:
182
ndarray: R² scores for each learner
183
"""
184
```
185
186
### Advanced Classification Metrics
187
188
Specialized metrics for detailed model analysis.
189
190
```python { .api }
191
def LogLoss(results):
192
"""
193
Logarithmic loss for probabilistic predictions.
194
195
Args:
196
results: Evaluation results object
197
198
Returns:
199
ndarray: Log loss scores
200
"""
201
202
def Specificity(results, pos_label=1):
203
"""
204
Specificity (True Negative Rate).
205
206
Args:
207
results: Evaluation results object
208
pos_label: Positive class label
209
210
Returns:
211
ndarray: Specificity scores
212
"""
213
214
def Sensitivity(results, pos_label=1):
215
"""
216
Sensitivity (True Positive Rate, same as Recall).
217
218
Args:
219
results: Evaluation results object
220
pos_label: Positive class label
221
222
Returns:
223
ndarray: Sensitivity scores
224
"""
225
```
226
227
### Results Container
228
229
Object containing evaluation results and predictions.
230
231
```python { .api }
232
class Results:
233
"""
234
Container for evaluation results.
235
236
Attributes:
237
data: Original dataset
238
predicted: Predicted values
239
probabilities: Prediction probabilities (if available)
240
actual: True target values
241
learners: List of learners used
242
folds: Cross-validation fold information
243
"""
244
def __init__(self, data=None, learners=None): ...
245
246
@property
247
def predicted(self):
248
"""Predicted class labels."""
249
250
@property
251
def probabilities(self):
252
"""Prediction probabilities."""
253
254
@property
255
def actual(self):
256
"""True class labels."""
257
```
258
259
### Clustering Evaluation
260
261
Metrics for unsupervised learning evaluation.
262
263
```python { .api }
264
class ClusteringEvaluation:
265
"""Evaluation methods for clustering algorithms."""
266
267
@staticmethod
268
def adjusted_rand_score(labels_true, labels_pred):
269
"""Adjusted Rand Index."""
270
271
@staticmethod
272
def silhouette_score(X, labels):
273
"""Silhouette coefficient."""
274
275
@staticmethod
276
def calinski_harabasz_score(X, labels):
277
"""Calinski-Harabasz index."""
278
```
279
280
### Statistical Testing
281
282
Statistical significance testing for model comparison.
283
284
```python { .api }
285
def compute_CD(avranks, n, alpha='0.05', test='nemenyi'):
286
"""
287
Compute critical difference for statistical significance testing.
288
289
Args:
290
avranks: Average ranks of methods
291
n: Number of datasets
292
alpha: Significance level
293
test: Statistical test ('nemenyi', 'bonferroni-dunn')
294
295
Returns:
296
float: Critical difference value
297
"""
298
```
299
300
### Usage Examples
301
302
```python
303
# Basic evaluation workflow
304
from Orange.data import Table
305
from Orange.classification import TreeLearner, LogisticRegressionLearner
306
from Orange.evaluation import CrossValidation, CA, AUC, Precision, Recall, F1
307
308
# Load data
309
data = Table("iris")
310
311
# Create learners
312
learners = [
313
TreeLearner(max_depth=5),
314
LogisticRegressionLearner(C=1.0)
315
]
316
317
# Cross-validation
318
results = CrossValidation(data, learners, k=10, stratified=True)
319
320
# Calculate metrics
321
accuracies = CA(results)
322
auc_scores = AUC(results)
323
precisions = Precision(results)
324
recalls = Recall(results)
325
f1_scores = F1(results)
326
327
print("Classification Results:")
328
for i, learner in enumerate(learners):
329
print(f"{learner.__class__.__name__}:")
330
print(f" Accuracy: {accuracies[i]:.3f}")
331
print(f" AUC: {auc_scores[i]:.3f}")
332
print(f" Precision: {precisions[i]:.3f}")
333
print(f" Recall: {recalls[i]:.3f}")
334
print(f" F1: {f1_scores[i]:.3f}")
335
336
# Train-test split evaluation
337
from Orange.evaluation import TestOnTestData
338
train_data = data[:100]
339
test_data = data[100:]
340
test_results = TestOnTestData(train_data, test_data, learners)
341
test_accuracies = CA(test_results)
342
343
# Regression evaluation example
344
from Orange.data import Table
345
from Orange.regression import LinearRegressionLearner, TreeLearner
346
from Orange.evaluation import MSE, RMSE, MAE, R2
347
348
housing_data = Table("housing")
349
reg_learners = [
350
LinearRegressionLearner(),
351
TreeLearner()
352
]
353
354
reg_results = CrossValidation(housing_data, reg_learners, k=5)
355
mse_scores = MSE(reg_results)
356
rmse_scores = RMSE(reg_results)
357
mae_scores = MAE(reg_results)
358
r2_scores = R2(reg_results)
359
360
print("Regression Results:")
361
for i, learner in enumerate(reg_learners):
362
print(f"{learner.__class__.__name__}:")
363
print(f" MSE: {mse_scores[i]:.3f}")
364
print(f" RMSE: {rmse_scores[i]:.3f}")
365
print(f" MAE: {mae_scores[i]:.3f}")
366
print(f" R²: {r2_scores[i]:.3f}")
367
```