PyTorch native metrics library providing 400+ rigorously tested metrics across classification, regression, audio, image, text, and other ML domains
Metrics for evaluating multimodal AI systems including video-audio synchronization and cross-modal quality assessment for applications involving multiple data modalities.
Metrics for evaluating lip-sync and audio-visual alignment quality.
class LipVertexError(Metric):
def __init__(
self,
**kwargs
): ...Deep learning-based metrics for evaluating cross-modal quality (require optional dependencies).
class CLIPScore(Metric):
def __init__(
self,
model_name_or_path: str = "openai/clip-vit-base-patch16",
**kwargs
): ...
class CLIPImageQualityAssessment(Metric):
def __init__(
self,
model_name_or_path: str = "openai/clip-vit-base-patch16",
**kwargs
): ...import torch
from torchmetrics.multimodal import LipVertexError
# Lip vertex error for video analysis
lve = LipVertexError()
# Sample video landmarks (batch, time, landmarks, coords)
preds = torch.randn(2, 10, 68, 2) # 2 videos, 10 frames, 68 landmarks, x-y coords
target = torch.randn(2, 10, 68, 2)
# Compute lip synchronization error
lve_score = lve(preds, target)
print(f"Lip Vertex Error: {lve_score:.4f}")
# CLIP Score (requires transformers)
try:
from torchmetrics.multimodal import CLIPScore
clip_metric = CLIPScore()
# Sample text and images
images = torch.randint(0, 256, (4, 3, 224, 224), dtype=torch.uint8)
texts = ["a photo of a cat", "a dog playing", "a beautiful sunset", "a city skyline"]
# Compute CLIP score
clip_score = clip_metric(images, texts)
print(f"CLIP Score: {clip_score:.4f}")
except ImportError:
print("CLIP metrics require 'transformers' package")VideoLandmarks = Tensor # Shape: (batch, time, landmarks, coordinates)
TextPrompts = List[str] # Text descriptions or promptsInstall with Tessl CLI
npx tessl i tessl/pypi-torchmetrics