CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-modelscope

ModelScope brings the notion of Model-as-a-Service to life with unified interfaces for state-of-the-art machine learning models.

Pending
Overview
Eval results
Files

metrics.mddocs/

Metrics and Evaluation

ModelScope's metrics framework provides comprehensive evaluation capabilities across different domains and tasks. The framework supports both built-in metrics and custom metric implementations for model performance assessment.

Capabilities

Base Metric Class

Abstract base class for all metrics providing common interface and functionality.

class Metric:
    """
    Base metric class for model evaluation.
    """
    
    def __init__(self, **kwargs):
        """
        Initialize metric with configuration parameters.
        
        Parameters:
        - **kwargs: Metric-specific configuration options
        """
    
    def add(self, outputs, inputs):
        """
        Add batch outputs and inputs to metric computation.
        
        Parameters:
        - outputs: Model outputs for the batch
        - inputs: Corresponding inputs/targets for the batch
        """
    
    def evaluate(self):
        """
        Compute final metric value from accumulated data.
        
        Returns:
        Dictionary containing metric results
        """
    
    def merge(self, other):
        """
        Merge another metric instance into this one.
        
        Parameters:
        - other: Another metric instance of the same type
        """
    
    def reset(self):
        """
        Reset metric state for new evaluation round.
        """

Metric Builder

Factory function for creating metrics from configuration.

def task_default_metrics(task: str) -> list:
    """
    Get default metrics for a specific task.
    
    Parameters:
    - task: Task identifier (e.g., 'text-classification', 'image-classification')
    
    Returns:
    List of default metric instances for the task
    """

def build_metric(cfg: dict, default_args: dict = None):
    """
    Build metric from configuration dictionary.
    
    Parameters:
    - cfg: Metric configuration dictionary
    - default_args: Default arguments to merge
    
    Returns:
    Metric instance
    """

Text and NLP Metrics

Classification Metrics

class AccuracyMetric(Metric):
    """
    Accuracy metric for classification tasks.
    """
    
    def __init__(self, **kwargs):
        """Initialize accuracy metric."""

class SequenceClassificationMetric(Metric):
    """
    Comprehensive metrics for sequence classification including accuracy, precision, recall, and F1.
    """
    
    def __init__(self, average: str = 'macro', **kwargs):
        """
        Initialize sequence classification metrics.
        
        Parameters:
        - average: Averaging strategy ('macro', 'micro', 'weighted')
        """

class TokenClassificationMetric(Metric):
    """
    Metrics for token-level classification tasks like NER.
    """
    
    def __init__(self, label_list: list = None, **kwargs):
        """
        Initialize token classification metrics.
        
        Parameters:
        - label_list: List of class labels
        """

Text Generation Metrics

class BleuMetric(Metric):
    """
    BLEU score metric for text generation and translation.
    """
    
    def __init__(self, n_gram: int = 4, smooth: bool = False, **kwargs):
        """
        Initialize BLEU metric.
        
        Parameters:
        - n_gram: Maximum n-gram order (default: 4)
        - smooth: Whether to apply smoothing
        """

class TextGenerationMetric(Metric):
    """
    Comprehensive metrics for text generation including BLEU, ROUGE, and other generation metrics.
    """
    
    def __init__(self, metrics: list = None, **kwargs):
        """
        Initialize text generation metrics.
        
        Parameters:
        - metrics: List of specific metrics to compute
        """

class PplMetric(Metric):
    """
    Perplexity metric for language modeling.
    """
    
    def __init__(self, **kwargs):
        """Initialize perplexity metric."""

Text Ranking and Retrieval

class TextRankingMetric(Metric):
    """
    Metrics for text ranking and retrieval tasks.
    """
    
    def __init__(self, k_values: list = None, **kwargs):
        """
        Initialize text ranking metrics.
        
        Parameters:
        - k_values: List of k values for top-k metrics (default: [1, 5, 10])
        """

Computer Vision Metrics

Image Quality Assessment

class ImageQualityAssessmentMosMetric(Metric):
    """
    Mean Opinion Score (MOS) metric for image quality assessment.
    """
    
    def __init__(self, **kwargs):
        """Initialize MOS metric for image quality."""

class ImageQualityAssessmentDegradationMetric(Metric):
    """
    Image degradation assessment metric.
    """
    
    def __init__(self, **kwargs):
        """Initialize image degradation metric."""

Image Enhancement Metrics

class ImageColorEnhanceMetric(Metric):
    """
    Metrics for evaluating image color enhancement quality.
    """
    
    def __init__(self, **kwargs):
        """Initialize color enhancement metrics."""

class ImageColorizationMetric(Metric):
    """
    Metrics for image colorization tasks.
    """
    
    def __init__(self, **kwargs):
        """Initialize colorization metrics."""

class ImageDenoiseMetric(Metric):
    """
    Metrics for image denoising evaluation.
    """
    
    def __init__(self, **kwargs):
        """Initialize denoising metrics."""

class ImageInpaintingMetric(Metric):
    """
    Metrics for image inpainting quality assessment.
    """
    
    def __init__(self, **kwargs):
        """Initialize inpainting metrics."""

class ImagePortraitEnhancementMetric(Metric):
    """
    Specialized metrics for portrait enhancement evaluation.
    """
    
    def __init__(self, **kwargs):
        """Initialize portrait enhancement metrics."""

Object Detection and Segmentation

class ImageInstanceSegmentationCOCOMetric(Metric):
    """
    COCO-style metrics for instance segmentation evaluation.
    """
    
    def __init__(self, ann_file: str = None, **kwargs):
        """
        Initialize COCO segmentation metrics.
        
        Parameters:
        - ann_file: Path to COCO annotation file
        """

OCR Metrics

class OCRRecognitionMetric(Metric):
    """
    Metrics for Optical Character Recognition evaluation.
    """
    
    def __init__(self, **kwargs):
        """Initialize OCR recognition metrics."""

Video Processing Metrics

Video Enhancement and Processing

class VideoFrameInterpolationMetric(Metric):
    """
    Metrics for video frame interpolation quality assessment.
    """
    
    def __init__(self, **kwargs):
        """Initialize frame interpolation metrics."""

class VideoStabilizationMetric(Metric):
    """
    Metrics for video stabilization evaluation.
    """
    
    def __init__(self, **kwargs):
        """Initialize video stabilization metrics."""

class VideoSuperResolutionMetric(Metric):
    """
    Metrics for video super-resolution quality assessment.
    """
    
    def __init__(self, **kwargs):
        """Initialize video super-resolution metrics."""

Video Analysis Metrics

class VideoSummarizationMetric(Metric):
    """
    Metrics for video summarization evaluation.
    """
    
    def __init__(self, **kwargs):
        """Initialize video summarization metrics."""

class MovieSceneSegmentationMetric(Metric):
    """
    Metrics for movie scene segmentation evaluation.
    """
    
    def __init__(self, **kwargs):
        """Initialize scene segmentation metrics."""

class ReferringVideoObjectSegmentationMetric(Metric):
    """
    Metrics for referring video object segmentation.
    """
    
    def __init__(self, **kwargs):
        """Initialize referring video object segmentation metrics."""

Audio Processing Metrics

class AudioNoiseMetric(Metric):
    """
    Metrics for audio noise evaluation and assessment.
    """
    
    def __init__(self, **kwargs):
        """Initialize audio noise metrics."""

General Purpose Metrics

class LossMetric(Metric):
    """
    Generic loss metric for tracking training and validation losses.
    """
    
    def __init__(self, **kwargs):
        """Initialize loss metric."""

Usage Examples

Basic Metric Usage

from modelscope import AccuracyMetric, BleuMetric

# Initialize accuracy metric
accuracy = AccuracyMetric()

# Add predictions and labels
for batch_outputs, batch_labels in evaluation_data:
    accuracy.add(batch_outputs, batch_labels)

# Compute final accuracy
results = accuracy.evaluate()
print(f"Accuracy: {results['accuracy']}")

# BLEU metric for text generation
bleu = BleuMetric(n_gram=4)
for generated_texts, reference_texts in text_data:
    bleu.add(generated_texts, reference_texts)

bleu_score = bleu.evaluate()
print(f"BLEU Score: {bleu_score['bleu']}")

Task-Specific Default Metrics

from modelscope import task_default_metrics

# Get default metrics for text classification
text_metrics = task_default_metrics('text-classification')
print(f"Default text classification metrics: {[type(m).__name__ for m in text_metrics]}")

# Get default metrics for image classification
image_metrics = task_default_metrics('image-classification')
print(f"Default image classification metrics: {[type(m).__name__ for m in image_metrics]}")

# Use default metrics in evaluation
for metric in text_metrics:
    for outputs, inputs in eval_data:
        metric.add(outputs, inputs)
    results = metric.evaluate()
    print(f"{type(metric).__name__}: {results}")

Custom Metric Implementation

from modelscope import Metric

class CustomF1Metric(Metric):
    def __init__(self, average='macro', **kwargs):
        super().__init__(**kwargs)
        self.average = average
        self.predictions = []
        self.targets = []
    
    def add(self, outputs, inputs):
        # Extract predictions and targets
        preds = outputs['predictions']
        targets = inputs['labels']
        
        self.predictions.extend(preds)
        self.targets.extend(targets)
    
    def evaluate(self):
        from sklearn.metrics import f1_score
        f1 = f1_score(self.targets, self.predictions, average=self.average)
        return {'f1_score': f1}
    
    def reset(self):
        self.predictions = []
        self.targets = []

# Use custom metric
custom_metric = CustomF1Metric(average='weighted')
for outputs, inputs in eval_data:
    custom_metric.add(outputs, inputs)

results = custom_metric.evaluate()
print(f"Custom F1 Score: {results['f1_score']}")

Metric Configuration from Dictionary

from modelscope import build_metric

# Define metric configuration
metric_config = {
    'type': 'AccuracyMetric',
    'top_k': 5,  # For top-k accuracy
}

# Build metric from configuration
metric = build_metric(metric_config)

# Use the metric
for outputs, inputs in eval_data:
    metric.add(outputs, inputs)

results = metric.evaluate()
print(f"Top-5 Accuracy: {results}")

Multiple Metrics Evaluation

from modelscope import AccuracyMetric, SequenceClassificationMetric, LossMetric

# Initialize multiple metrics
metrics = {
    'accuracy': AccuracyMetric(),
    'classification': SequenceClassificationMetric(average='macro'),
    'loss': LossMetric()
}

# Evaluate with multiple metrics
for outputs, inputs in eval_data:
    for metric in metrics.values():
        metric.add(outputs, inputs)

# Collect all results
all_results = {}
for name, metric in metrics.items():
    results = metric.evaluate()
    all_results.update({f"{name}_{k}": v for k, v in results.items()})

print(f"All evaluation results: {all_results}")

Metric Merging for Distributed Evaluation

from modelscope import AccuracyMetric

# Create metrics on different processes/devices
metric_1 = AccuracyMetric()
metric_2 = AccuracyMetric()

# Evaluate on different data partitions
for outputs, inputs in partition_1:
    metric_1.add(outputs, inputs)

for outputs, inputs in partition_2:
    metric_2.add(outputs, inputs)

# Merge metrics for final result
metric_1.merge(metric_2)
final_results = metric_1.evaluate()
print(f"Merged accuracy: {final_results['accuracy']}")

Install with Tessl CLI

npx tessl i tessl/pypi-modelscope

docs

datasets.md

export.md

hub.md

index.md

metrics.md

models.md

pipelines.md

preprocessors.md

training.md

utilities.md

tile.json