or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

datasets.mdexport.mdhub.mdindex.mdmetrics.mdmodels.mdpipelines.mdpreprocessors.mdtraining.mdutilities.md

metrics.mddocs/

0

# Metrics and Evaluation

1

2

ModelScope's metrics framework provides comprehensive evaluation capabilities across different domains and tasks. The framework supports both built-in metrics and custom metric implementations for model performance assessment.

3

4

## Capabilities

5

6

### Base Metric Class

7

8

Abstract base class for all metrics providing common interface and functionality.

9

10

```python { .api }

11

class Metric:

12

"""

13

Base metric class for model evaluation.

14

"""

15

16

def __init__(self, **kwargs):

17

"""

18

Initialize metric with configuration parameters.

19

20

Parameters:

21

- **kwargs: Metric-specific configuration options

22

"""

23

24

def add(self, outputs, inputs):

25

"""

26

Add batch outputs and inputs to metric computation.

27

28

Parameters:

29

- outputs: Model outputs for the batch

30

- inputs: Corresponding inputs/targets for the batch

31

"""

32

33

def evaluate(self):

34

"""

35

Compute final metric value from accumulated data.

36

37

Returns:

38

Dictionary containing metric results

39

"""

40

41

def merge(self, other):

42

"""

43

Merge another metric instance into this one.

44

45

Parameters:

46

- other: Another metric instance of the same type

47

"""

48

49

def reset(self):

50

"""

51

Reset metric state for new evaluation round.

52

"""

53

```

54

55

### Metric Builder

56

57

Factory function for creating metrics from configuration.

58

59

```python { .api }

60

def task_default_metrics(task: str) -> list:

61

"""

62

Get default metrics for a specific task.

63

64

Parameters:

65

- task: Task identifier (e.g., 'text-classification', 'image-classification')

66

67

Returns:

68

List of default metric instances for the task

69

"""

70

71

def build_metric(cfg: dict, default_args: dict = None):

72

"""

73

Build metric from configuration dictionary.

74

75

Parameters:

76

- cfg: Metric configuration dictionary

77

- default_args: Default arguments to merge

78

79

Returns:

80

Metric instance

81

"""

82

```

83

84

## Text and NLP Metrics

85

86

### Classification Metrics

87

88

```python { .api }

89

class AccuracyMetric(Metric):

90

"""

91

Accuracy metric for classification tasks.

92

"""

93

94

def __init__(self, **kwargs):

95

"""Initialize accuracy metric."""

96

97

class SequenceClassificationMetric(Metric):

98

"""

99

Comprehensive metrics for sequence classification including accuracy, precision, recall, and F1.

100

"""

101

102

def __init__(self, average: str = 'macro', **kwargs):

103

"""

104

Initialize sequence classification metrics.

105

106

Parameters:

107

- average: Averaging strategy ('macro', 'micro', 'weighted')

108

"""

109

110

class TokenClassificationMetric(Metric):

111

"""

112

Metrics for token-level classification tasks like NER.

113

"""

114

115

def __init__(self, label_list: list = None, **kwargs):

116

"""

117

Initialize token classification metrics.

118

119

Parameters:

120

- label_list: List of class labels

121

"""

122

```

123

124

### Text Generation Metrics

125

126

```python { .api }

127

class BleuMetric(Metric):

128

"""

129

BLEU score metric for text generation and translation.

130

"""

131

132

def __init__(self, n_gram: int = 4, smooth: bool = False, **kwargs):

133

"""

134

Initialize BLEU metric.

135

136

Parameters:

137

- n_gram: Maximum n-gram order (default: 4)

138

- smooth: Whether to apply smoothing

139

"""

140

141

class TextGenerationMetric(Metric):

142

"""

143

Comprehensive metrics for text generation including BLEU, ROUGE, and other generation metrics.

144

"""

145

146

def __init__(self, metrics: list = None, **kwargs):

147

"""

148

Initialize text generation metrics.

149

150

Parameters:

151

- metrics: List of specific metrics to compute

152

"""

153

154

class PplMetric(Metric):

155

"""

156

Perplexity metric for language modeling.

157

"""

158

159

def __init__(self, **kwargs):

160

"""Initialize perplexity metric."""

161

```

162

163

### Text Ranking and Retrieval

164

165

```python { .api }

166

class TextRankingMetric(Metric):

167

"""

168

Metrics for text ranking and retrieval tasks.

169

"""

170

171

def __init__(self, k_values: list = None, **kwargs):

172

"""

173

Initialize text ranking metrics.

174

175

Parameters:

176

- k_values: List of k values for top-k metrics (default: [1, 5, 10])

177

"""

178

```

179

180

## Computer Vision Metrics

181

182

### Image Quality Assessment

183

184

```python { .api }

185

class ImageQualityAssessmentMosMetric(Metric):

186

"""

187

Mean Opinion Score (MOS) metric for image quality assessment.

188

"""

189

190

def __init__(self, **kwargs):

191

"""Initialize MOS metric for image quality."""

192

193

class ImageQualityAssessmentDegradationMetric(Metric):

194

"""

195

Image degradation assessment metric.

196

"""

197

198

def __init__(self, **kwargs):

199

"""Initialize image degradation metric."""

200

```

201

202

### Image Enhancement Metrics

203

204

```python { .api }

205

class ImageColorEnhanceMetric(Metric):

206

"""

207

Metrics for evaluating image color enhancement quality.

208

"""

209

210

def __init__(self, **kwargs):

211

"""Initialize color enhancement metrics."""

212

213

class ImageColorizationMetric(Metric):

214

"""

215

Metrics for image colorization tasks.

216

"""

217

218

def __init__(self, **kwargs):

219

"""Initialize colorization metrics."""

220

221

class ImageDenoiseMetric(Metric):

222

"""

223

Metrics for image denoising evaluation.

224

"""

225

226

def __init__(self, **kwargs):

227

"""Initialize denoising metrics."""

228

229

class ImageInpaintingMetric(Metric):

230

"""

231

Metrics for image inpainting quality assessment.

232

"""

233

234

def __init__(self, **kwargs):

235

"""Initialize inpainting metrics."""

236

237

class ImagePortraitEnhancementMetric(Metric):

238

"""

239

Specialized metrics for portrait enhancement evaluation.

240

"""

241

242

def __init__(self, **kwargs):

243

"""Initialize portrait enhancement metrics."""

244

```

245

246

### Object Detection and Segmentation

247

248

```python { .api }

249

class ImageInstanceSegmentationCOCOMetric(Metric):

250

"""

251

COCO-style metrics for instance segmentation evaluation.

252

"""

253

254

def __init__(self, ann_file: str = None, **kwargs):

255

"""

256

Initialize COCO segmentation metrics.

257

258

Parameters:

259

- ann_file: Path to COCO annotation file

260

"""

261

```

262

263

### OCR Metrics

264

265

```python { .api }

266

class OCRRecognitionMetric(Metric):

267

"""

268

Metrics for Optical Character Recognition evaluation.

269

"""

270

271

def __init__(self, **kwargs):

272

"""Initialize OCR recognition metrics."""

273

```

274

275

## Video Processing Metrics

276

277

### Video Enhancement and Processing

278

279

```python { .api }

280

class VideoFrameInterpolationMetric(Metric):

281

"""

282

Metrics for video frame interpolation quality assessment.

283

"""

284

285

def __init__(self, **kwargs):

286

"""Initialize frame interpolation metrics."""

287

288

class VideoStabilizationMetric(Metric):

289

"""

290

Metrics for video stabilization evaluation.

291

"""

292

293

def __init__(self, **kwargs):

294

"""Initialize video stabilization metrics."""

295

296

class VideoSuperResolutionMetric(Metric):

297

"""

298

Metrics for video super-resolution quality assessment.

299

"""

300

301

def __init__(self, **kwargs):

302

"""Initialize video super-resolution metrics."""

303

```

304

305

### Video Analysis Metrics

306

307

```python { .api }

308

class VideoSummarizationMetric(Metric):

309

"""

310

Metrics for video summarization evaluation.

311

"""

312

313

def __init__(self, **kwargs):

314

"""Initialize video summarization metrics."""

315

316

class MovieSceneSegmentationMetric(Metric):

317

"""

318

Metrics for movie scene segmentation evaluation.

319

"""

320

321

def __init__(self, **kwargs):

322

"""Initialize scene segmentation metrics."""

323

324

class ReferringVideoObjectSegmentationMetric(Metric):

325

"""

326

Metrics for referring video object segmentation.

327

"""

328

329

def __init__(self, **kwargs):

330

"""Initialize referring video object segmentation metrics."""

331

```

332

333

## Audio Processing Metrics

334

335

```python { .api }

336

class AudioNoiseMetric(Metric):

337

"""

338

Metrics for audio noise evaluation and assessment.

339

"""

340

341

def __init__(self, **kwargs):

342

"""Initialize audio noise metrics."""

343

```

344

345

## General Purpose Metrics

346

347

```python { .api }

348

class LossMetric(Metric):

349

"""

350

Generic loss metric for tracking training and validation losses.

351

"""

352

353

def __init__(self, **kwargs):

354

"""Initialize loss metric."""

355

```

356

357

## Usage Examples

358

359

### Basic Metric Usage

360

361

```python

362

from modelscope import AccuracyMetric, BleuMetric

363

364

# Initialize accuracy metric

365

accuracy = AccuracyMetric()

366

367

# Add predictions and labels

368

for batch_outputs, batch_labels in evaluation_data:

369

accuracy.add(batch_outputs, batch_labels)

370

371

# Compute final accuracy

372

results = accuracy.evaluate()

373

print(f"Accuracy: {results['accuracy']}")

374

375

# BLEU metric for text generation

376

bleu = BleuMetric(n_gram=4)

377

for generated_texts, reference_texts in text_data:

378

bleu.add(generated_texts, reference_texts)

379

380

bleu_score = bleu.evaluate()

381

print(f"BLEU Score: {bleu_score['bleu']}")

382

```

383

384

### Task-Specific Default Metrics

385

386

```python

387

from modelscope import task_default_metrics

388

389

# Get default metrics for text classification

390

text_metrics = task_default_metrics('text-classification')

391

print(f"Default text classification metrics: {[type(m).__name__ for m in text_metrics]}")

392

393

# Get default metrics for image classification

394

image_metrics = task_default_metrics('image-classification')

395

print(f"Default image classification metrics: {[type(m).__name__ for m in image_metrics]}")

396

397

# Use default metrics in evaluation

398

for metric in text_metrics:

399

for outputs, inputs in eval_data:

400

metric.add(outputs, inputs)

401

results = metric.evaluate()

402

print(f"{type(metric).__name__}: {results}")

403

```

404

405

### Custom Metric Implementation

406

407

```python

408

from modelscope import Metric

409

410

class CustomF1Metric(Metric):

411

def __init__(self, average='macro', **kwargs):

412

super().__init__(**kwargs)

413

self.average = average

414

self.predictions = []

415

self.targets = []

416

417

def add(self, outputs, inputs):

418

# Extract predictions and targets

419

preds = outputs['predictions']

420

targets = inputs['labels']

421

422

self.predictions.extend(preds)

423

self.targets.extend(targets)

424

425

def evaluate(self):

426

from sklearn.metrics import f1_score

427

f1 = f1_score(self.targets, self.predictions, average=self.average)

428

return {'f1_score': f1}

429

430

def reset(self):

431

self.predictions = []

432

self.targets = []

433

434

# Use custom metric

435

custom_metric = CustomF1Metric(average='weighted')

436

for outputs, inputs in eval_data:

437

custom_metric.add(outputs, inputs)

438

439

results = custom_metric.evaluate()

440

print(f"Custom F1 Score: {results['f1_score']}")

441

```

442

443

### Metric Configuration from Dictionary

444

445

```python

446

from modelscope import build_metric

447

448

# Define metric configuration

449

metric_config = {

450

'type': 'AccuracyMetric',

451

'top_k': 5, # For top-k accuracy

452

}

453

454

# Build metric from configuration

455

metric = build_metric(metric_config)

456

457

# Use the metric

458

for outputs, inputs in eval_data:

459

metric.add(outputs, inputs)

460

461

results = metric.evaluate()

462

print(f"Top-5 Accuracy: {results}")

463

```

464

465

### Multiple Metrics Evaluation

466

467

```python

468

from modelscope import AccuracyMetric, SequenceClassificationMetric, LossMetric

469

470

# Initialize multiple metrics

471

metrics = {

472

'accuracy': AccuracyMetric(),

473

'classification': SequenceClassificationMetric(average='macro'),

474

'loss': LossMetric()

475

}

476

477

# Evaluate with multiple metrics

478

for outputs, inputs in eval_data:

479

for metric in metrics.values():

480

metric.add(outputs, inputs)

481

482

# Collect all results

483

all_results = {}

484

for name, metric in metrics.items():

485

results = metric.evaluate()

486

all_results.update({f"{name}_{k}": v for k, v in results.items()})

487

488

print(f"All evaluation results: {all_results}")

489

```

490

491

### Metric Merging for Distributed Evaluation

492

493

```python

494

from modelscope import AccuracyMetric

495

496

# Create metrics on different processes/devices

497

metric_1 = AccuracyMetric()

498

metric_2 = AccuracyMetric()

499

500

# Evaluate on different data partitions

501

for outputs, inputs in partition_1:

502

metric_1.add(outputs, inputs)

503

504

for outputs, inputs in partition_2:

505

metric_2.add(outputs, inputs)

506

507

# Merge metrics for final result

508

metric_1.merge(metric_2)

509

final_results = metric_1.evaluate()

510

print(f"Merged accuracy: {final_results['accuracy']}")

511

```