ModelScope brings the notion of Model-as-a-Service to life with unified interfaces for state-of-the-art machine learning models.
—
ModelScope's metrics framework provides comprehensive evaluation capabilities across different domains and tasks. The framework supports both built-in metrics and custom metric implementations for model performance assessment.
Abstract base class for all metrics providing common interface and functionality.
class Metric:
"""
Base metric class for model evaluation.
"""
def __init__(self, **kwargs):
"""
Initialize metric with configuration parameters.
Parameters:
- **kwargs: Metric-specific configuration options
"""
def add(self, outputs, inputs):
"""
Add batch outputs and inputs to metric computation.
Parameters:
- outputs: Model outputs for the batch
- inputs: Corresponding inputs/targets for the batch
"""
def evaluate(self):
"""
Compute final metric value from accumulated data.
Returns:
Dictionary containing metric results
"""
def merge(self, other):
"""
Merge another metric instance into this one.
Parameters:
- other: Another metric instance of the same type
"""
def reset(self):
"""
Reset metric state for new evaluation round.
"""Factory function for creating metrics from configuration.
def task_default_metrics(task: str) -> list:
"""
Get default metrics for a specific task.
Parameters:
- task: Task identifier (e.g., 'text-classification', 'image-classification')
Returns:
List of default metric instances for the task
"""
def build_metric(cfg: dict, default_args: dict = None):
"""
Build metric from configuration dictionary.
Parameters:
- cfg: Metric configuration dictionary
- default_args: Default arguments to merge
Returns:
Metric instance
"""class AccuracyMetric(Metric):
"""
Accuracy metric for classification tasks.
"""
def __init__(self, **kwargs):
"""Initialize accuracy metric."""
class SequenceClassificationMetric(Metric):
"""
Comprehensive metrics for sequence classification including accuracy, precision, recall, and F1.
"""
def __init__(self, average: str = 'macro', **kwargs):
"""
Initialize sequence classification metrics.
Parameters:
- average: Averaging strategy ('macro', 'micro', 'weighted')
"""
class TokenClassificationMetric(Metric):
"""
Metrics for token-level classification tasks like NER.
"""
def __init__(self, label_list: list = None, **kwargs):
"""
Initialize token classification metrics.
Parameters:
- label_list: List of class labels
"""class BleuMetric(Metric):
"""
BLEU score metric for text generation and translation.
"""
def __init__(self, n_gram: int = 4, smooth: bool = False, **kwargs):
"""
Initialize BLEU metric.
Parameters:
- n_gram: Maximum n-gram order (default: 4)
- smooth: Whether to apply smoothing
"""
class TextGenerationMetric(Metric):
"""
Comprehensive metrics for text generation including BLEU, ROUGE, and other generation metrics.
"""
def __init__(self, metrics: list = None, **kwargs):
"""
Initialize text generation metrics.
Parameters:
- metrics: List of specific metrics to compute
"""
class PplMetric(Metric):
"""
Perplexity metric for language modeling.
"""
def __init__(self, **kwargs):
"""Initialize perplexity metric."""class TextRankingMetric(Metric):
"""
Metrics for text ranking and retrieval tasks.
"""
def __init__(self, k_values: list = None, **kwargs):
"""
Initialize text ranking metrics.
Parameters:
- k_values: List of k values for top-k metrics (default: [1, 5, 10])
"""class ImageQualityAssessmentMosMetric(Metric):
"""
Mean Opinion Score (MOS) metric for image quality assessment.
"""
def __init__(self, **kwargs):
"""Initialize MOS metric for image quality."""
class ImageQualityAssessmentDegradationMetric(Metric):
"""
Image degradation assessment metric.
"""
def __init__(self, **kwargs):
"""Initialize image degradation metric."""class ImageColorEnhanceMetric(Metric):
"""
Metrics for evaluating image color enhancement quality.
"""
def __init__(self, **kwargs):
"""Initialize color enhancement metrics."""
class ImageColorizationMetric(Metric):
"""
Metrics for image colorization tasks.
"""
def __init__(self, **kwargs):
"""Initialize colorization metrics."""
class ImageDenoiseMetric(Metric):
"""
Metrics for image denoising evaluation.
"""
def __init__(self, **kwargs):
"""Initialize denoising metrics."""
class ImageInpaintingMetric(Metric):
"""
Metrics for image inpainting quality assessment.
"""
def __init__(self, **kwargs):
"""Initialize inpainting metrics."""
class ImagePortraitEnhancementMetric(Metric):
"""
Specialized metrics for portrait enhancement evaluation.
"""
def __init__(self, **kwargs):
"""Initialize portrait enhancement metrics."""class ImageInstanceSegmentationCOCOMetric(Metric):
"""
COCO-style metrics for instance segmentation evaluation.
"""
def __init__(self, ann_file: str = None, **kwargs):
"""
Initialize COCO segmentation metrics.
Parameters:
- ann_file: Path to COCO annotation file
"""class OCRRecognitionMetric(Metric):
"""
Metrics for Optical Character Recognition evaluation.
"""
def __init__(self, **kwargs):
"""Initialize OCR recognition metrics."""class VideoFrameInterpolationMetric(Metric):
"""
Metrics for video frame interpolation quality assessment.
"""
def __init__(self, **kwargs):
"""Initialize frame interpolation metrics."""
class VideoStabilizationMetric(Metric):
"""
Metrics for video stabilization evaluation.
"""
def __init__(self, **kwargs):
"""Initialize video stabilization metrics."""
class VideoSuperResolutionMetric(Metric):
"""
Metrics for video super-resolution quality assessment.
"""
def __init__(self, **kwargs):
"""Initialize video super-resolution metrics."""class VideoSummarizationMetric(Metric):
"""
Metrics for video summarization evaluation.
"""
def __init__(self, **kwargs):
"""Initialize video summarization metrics."""
class MovieSceneSegmentationMetric(Metric):
"""
Metrics for movie scene segmentation evaluation.
"""
def __init__(self, **kwargs):
"""Initialize scene segmentation metrics."""
class ReferringVideoObjectSegmentationMetric(Metric):
"""
Metrics for referring video object segmentation.
"""
def __init__(self, **kwargs):
"""Initialize referring video object segmentation metrics."""class AudioNoiseMetric(Metric):
"""
Metrics for audio noise evaluation and assessment.
"""
def __init__(self, **kwargs):
"""Initialize audio noise metrics."""class LossMetric(Metric):
"""
Generic loss metric for tracking training and validation losses.
"""
def __init__(self, **kwargs):
"""Initialize loss metric."""from modelscope import AccuracyMetric, BleuMetric
# Initialize accuracy metric
accuracy = AccuracyMetric()
# Add predictions and labels
for batch_outputs, batch_labels in evaluation_data:
accuracy.add(batch_outputs, batch_labels)
# Compute final accuracy
results = accuracy.evaluate()
print(f"Accuracy: {results['accuracy']}")
# BLEU metric for text generation
bleu = BleuMetric(n_gram=4)
for generated_texts, reference_texts in text_data:
bleu.add(generated_texts, reference_texts)
bleu_score = bleu.evaluate()
print(f"BLEU Score: {bleu_score['bleu']}")from modelscope import task_default_metrics
# Get default metrics for text classification
text_metrics = task_default_metrics('text-classification')
print(f"Default text classification metrics: {[type(m).__name__ for m in text_metrics]}")
# Get default metrics for image classification
image_metrics = task_default_metrics('image-classification')
print(f"Default image classification metrics: {[type(m).__name__ for m in image_metrics]}")
# Use default metrics in evaluation
for metric in text_metrics:
for outputs, inputs in eval_data:
metric.add(outputs, inputs)
results = metric.evaluate()
print(f"{type(metric).__name__}: {results}")from modelscope import Metric
class CustomF1Metric(Metric):
def __init__(self, average='macro', **kwargs):
super().__init__(**kwargs)
self.average = average
self.predictions = []
self.targets = []
def add(self, outputs, inputs):
# Extract predictions and targets
preds = outputs['predictions']
targets = inputs['labels']
self.predictions.extend(preds)
self.targets.extend(targets)
def evaluate(self):
from sklearn.metrics import f1_score
f1 = f1_score(self.targets, self.predictions, average=self.average)
return {'f1_score': f1}
def reset(self):
self.predictions = []
self.targets = []
# Use custom metric
custom_metric = CustomF1Metric(average='weighted')
for outputs, inputs in eval_data:
custom_metric.add(outputs, inputs)
results = custom_metric.evaluate()
print(f"Custom F1 Score: {results['f1_score']}")from modelscope import build_metric
# Define metric configuration
metric_config = {
'type': 'AccuracyMetric',
'top_k': 5, # For top-k accuracy
}
# Build metric from configuration
metric = build_metric(metric_config)
# Use the metric
for outputs, inputs in eval_data:
metric.add(outputs, inputs)
results = metric.evaluate()
print(f"Top-5 Accuracy: {results}")from modelscope import AccuracyMetric, SequenceClassificationMetric, LossMetric
# Initialize multiple metrics
metrics = {
'accuracy': AccuracyMetric(),
'classification': SequenceClassificationMetric(average='macro'),
'loss': LossMetric()
}
# Evaluate with multiple metrics
for outputs, inputs in eval_data:
for metric in metrics.values():
metric.add(outputs, inputs)
# Collect all results
all_results = {}
for name, metric in metrics.items():
results = metric.evaluate()
all_results.update({f"{name}_{k}": v for k, v in results.items()})
print(f"All evaluation results: {all_results}")from modelscope import AccuracyMetric
# Create metrics on different processes/devices
metric_1 = AccuracyMetric()
metric_2 = AccuracyMetric()
# Evaluate on different data partitions
for outputs, inputs in partition_1:
metric_1.add(outputs, inputs)
for outputs, inputs in partition_2:
metric_2.add(outputs, inputs)
# Merge metrics for final result
metric_1.merge(metric_2)
final_results = metric_1.evaluate()
print(f"Merged accuracy: {final_results['accuracy']}")Install with Tessl CLI
npx tessl i tessl/pypi-modelscope