or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

classification.mdclustering.mddata-handling.mddistance.mdevaluation.mdindex.mdpreprocessing.mdprojection.mdregression.mdwidgets.md

evaluation.mddocs/

0

# Model Evaluation and Validation

1

2

Orange3 provides a comprehensive evaluation framework for assessing machine learning model performance through various validation techniques and metrics.

3

4

## Capabilities

5

6

### Cross-Validation Methods

7

8

Different strategies for splitting data and validating model performance.

9

10

```python { .api }

11

class CrossValidation:

12

"""

13

k-fold cross-validation.

14

15

Args:

16

data: Dataset for evaluation

17

learners: List of learners to evaluate

18

k: Number of folds

19

stratified: Use stratified sampling

20

random_state: Random seed for reproducibility

21

"""

22

def __init__(self, data, learners, k=10, stratified=True, random_state=None): ...

23

24

class LeaveOneOut:

25

"""

26

Leave-one-out cross-validation.

27

28

Args:

29

data: Dataset for evaluation

30

learners: List of learners to evaluate

31

"""

32

def __init__(self, data, learners): ...

33

34

class TestOnTrainingData:

35

"""

36

Evaluate on the same data used for training.

37

38

Args:

39

data: Training dataset

40

learners: List of learners to evaluate

41

"""

42

def __init__(self, data, learners): ...

43

44

class TestOnTestData:

45

"""

46

Train on one dataset, test on another.

47

48

Args:

49

train_data: Training dataset

50

test_data: Test dataset

51

learners: List of learners to evaluate

52

"""

53

def __init__(self, train_data, test_data, learners): ...

54

```

55

56

### Classification Metrics

57

58

Performance measures for classification tasks.

59

60

```python { .api }

61

def CA(results):

62

"""

63

Classification Accuracy.

64

65

Args:

66

results: Evaluation results object

67

68

Returns:

69

ndarray: Accuracy scores for each learner

70

"""

71

72

def AUC(results):

73

"""

74

Area Under the ROC Curve.

75

76

Args:

77

results: Evaluation results object

78

79

Returns:

80

ndarray: AUC scores for each learner

81

"""

82

83

def Precision(results, pos_label=1, average='binary'):

84

"""

85

Precision score.

86

87

Args:

88

results: Evaluation results object

89

pos_label: Positive class label

90

average: Averaging strategy

91

92

Returns:

93

ndarray: Precision scores

94

"""

95

96

def Recall(results, pos_label=1, average='binary'):

97

"""

98

Recall score.

99

100

Args:

101

results: Evaluation results object

102

pos_label: Positive class label

103

average: Averaging strategy

104

105

Returns:

106

ndarray: Recall scores

107

"""

108

109

def F1(results, pos_label=1, average='binary'):

110

"""

111

F1 score.

112

113

Args:

114

results: Evaluation results object

115

pos_label: Positive class label

116

average: Averaging strategy

117

118

Returns:

119

ndarray: F1 scores

120

"""

121

122

def PrecisionRecallFSupport(results, pos_label=1, average='binary'):

123

"""

124

Combined precision, recall, and F1 scores.

125

126

Args:

127

results: Evaluation results object

128

pos_label: Positive class label

129

average: Averaging strategy

130

131

Returns:

132

tuple: (precision, recall, f1, support) arrays

133

"""

134

```

135

136

### Regression Metrics

137

138

Performance measures for regression tasks.

139

140

```python { .api }

141

def MSE(results):

142

"""

143

Mean Squared Error.

144

145

Args:

146

results: Evaluation results object

147

148

Returns:

149

ndarray: MSE scores for each learner

150

"""

151

152

def RMSE(results):

153

"""

154

Root Mean Squared Error.

155

156

Args:

157

results: Evaluation results object

158

159

Returns:

160

ndarray: RMSE scores for each learner

161

"""

162

163

def MAE(results):

164

"""

165

Mean Absolute Error.

166

167

Args:

168

results: Evaluation results object

169

170

Returns:

171

ndarray: MAE scores for each learner

172

"""

173

174

def R2(results):

175

"""

176

R-squared coefficient of determination.

177

178

Args:

179

results: Evaluation results object

180

181

Returns:

182

ndarray: R² scores for each learner

183

"""

184

```

185

186

### Advanced Classification Metrics

187

188

Specialized metrics for detailed model analysis.

189

190

```python { .api }

191

def LogLoss(results):

192

"""

193

Logarithmic loss for probabilistic predictions.

194

195

Args:

196

results: Evaluation results object

197

198

Returns:

199

ndarray: Log loss scores

200

"""

201

202

def Specificity(results, pos_label=1):

203

"""

204

Specificity (True Negative Rate).

205

206

Args:

207

results: Evaluation results object

208

pos_label: Positive class label

209

210

Returns:

211

ndarray: Specificity scores

212

"""

213

214

def Sensitivity(results, pos_label=1):

215

"""

216

Sensitivity (True Positive Rate, same as Recall).

217

218

Args:

219

results: Evaluation results object

220

pos_label: Positive class label

221

222

Returns:

223

ndarray: Sensitivity scores

224

"""

225

```

226

227

### Results Container

228

229

Object containing evaluation results and predictions.

230

231

```python { .api }

232

class Results:

233

"""

234

Container for evaluation results.

235

236

Attributes:

237

data: Original dataset

238

predicted: Predicted values

239

probabilities: Prediction probabilities (if available)

240

actual: True target values

241

learners: List of learners used

242

folds: Cross-validation fold information

243

"""

244

def __init__(self, data=None, learners=None): ...

245

246

@property

247

def predicted(self):

248

"""Predicted class labels."""

249

250

@property

251

def probabilities(self):

252

"""Prediction probabilities."""

253

254

@property

255

def actual(self):

256

"""True class labels."""

257

```

258

259

### Clustering Evaluation

260

261

Metrics for unsupervised learning evaluation.

262

263

```python { .api }

264

class ClusteringEvaluation:

265

"""Evaluation methods for clustering algorithms."""

266

267

@staticmethod

268

def adjusted_rand_score(labels_true, labels_pred):

269

"""Adjusted Rand Index."""

270

271

@staticmethod

272

def silhouette_score(X, labels):

273

"""Silhouette coefficient."""

274

275

@staticmethod

276

def calinski_harabasz_score(X, labels):

277

"""Calinski-Harabasz index."""

278

```

279

280

### Statistical Testing

281

282

Statistical significance testing for model comparison.

283

284

```python { .api }

285

def compute_CD(avranks, n, alpha='0.05', test='nemenyi'):

286

"""

287

Compute critical difference for statistical significance testing.

288

289

Args:

290

avranks: Average ranks of methods

291

n: Number of datasets

292

alpha: Significance level

293

test: Statistical test ('nemenyi', 'bonferroni-dunn')

294

295

Returns:

296

float: Critical difference value

297

"""

298

```

299

300

### Usage Examples

301

302

```python

303

# Basic evaluation workflow

304

from Orange.data import Table

305

from Orange.classification import TreeLearner, LogisticRegressionLearner

306

from Orange.evaluation import CrossValidation, CA, AUC, Precision, Recall, F1

307

308

# Load data

309

data = Table("iris")

310

311

# Create learners

312

learners = [

313

TreeLearner(max_depth=5),

314

LogisticRegressionLearner(C=1.0)

315

]

316

317

# Cross-validation

318

results = CrossValidation(data, learners, k=10, stratified=True)

319

320

# Calculate metrics

321

accuracies = CA(results)

322

auc_scores = AUC(results)

323

precisions = Precision(results)

324

recalls = Recall(results)

325

f1_scores = F1(results)

326

327

print("Classification Results:")

328

for i, learner in enumerate(learners):

329

print(f"{learner.__class__.__name__}:")

330

print(f" Accuracy: {accuracies[i]:.3f}")

331

print(f" AUC: {auc_scores[i]:.3f}")

332

print(f" Precision: {precisions[i]:.3f}")

333

print(f" Recall: {recalls[i]:.3f}")

334

print(f" F1: {f1_scores[i]:.3f}")

335

336

# Train-test split evaluation

337

from Orange.evaluation import TestOnTestData

338

train_data = data[:100]

339

test_data = data[100:]

340

test_results = TestOnTestData(train_data, test_data, learners)

341

test_accuracies = CA(test_results)

342

343

# Regression evaluation example

344

from Orange.data import Table

345

from Orange.regression import LinearRegressionLearner, TreeLearner

346

from Orange.evaluation import MSE, RMSE, MAE, R2

347

348

housing_data = Table("housing")

349

reg_learners = [

350

LinearRegressionLearner(),

351

TreeLearner()

352

]

353

354

reg_results = CrossValidation(housing_data, reg_learners, k=5)

355

mse_scores = MSE(reg_results)

356

rmse_scores = RMSE(reg_results)

357

mae_scores = MAE(reg_results)

358

r2_scores = R2(reg_results)

359

360

print("Regression Results:")

361

for i, learner in enumerate(reg_learners):

362

print(f"{learner.__class__.__name__}:")

363

print(f" MSE: {mse_scores[i]:.3f}")

364

print(f" RMSE: {rmse_scores[i]:.3f}")

365

print(f" MAE: {mae_scores[i]:.3f}")

366

print(f" R²: {r2_scores[i]:.3f}")

367

```