Tessl Tile for pypi/torchmetrics@1.8.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

audio.md classification.md clustering.md detection.md functional.md image.md index.md multimodal.md nominal.md regression.md retrieval.md segmentation.md shape.md text.md utilities.md video.md

audio.mddocs/

0
# Audio Metrics
1

2
Specialized metrics for audio processing and speech evaluation including signal-to-noise ratios, perceptual quality measures, and separation metrics for speech and audio applications.
3

4
## Capabilities
5

6
### Signal Distortion Ratio Metrics
7

8
Measures the quality of audio signal reconstruction and separation.
9

10
```python { .api }
11
class ScaleInvariantSignalDistortionRatio(Metric):
12
    def __init__(
13
        self,
14
        zero_mean: bool = True,
15
        **kwargs
16
    ): ...
17

18
class SignalDistortionRatio(Metric):
19
    def __init__(
20
        self,
21
        use_cg_iter: Optional[int] = None,
22
        filter_length: int = 512,
23
        zero_mean: bool = True,
24
        load_diag: Optional[float] = None,
25
        **kwargs
26
    ): ...
27

28
class SourceAggregatedSignalDistortionRatio(Metric):
29
    def __init__(
30
        self,
31
        scale_invariant: bool = True,
32
        zero_mean: bool = True,
33
        **kwargs
34
    ): ...
35
```
36

37
### Signal-to-Noise Ratio Metrics
38

39
Evaluates the ratio of signal power to noise power in audio signals.
40

41
```python { .api }
42
class ScaleInvariantSignalNoiseRatio(Metric):
43
    def __init__(
44
        self,
45
        zero_mean: bool = True,
46
        **kwargs
47
    ): ...
48

49
class SignalNoiseRatio(Metric):
50
    def __init__(
51
        self,
52
        zero_mean: bool = True,
53
        **kwargs
54
    ): ...
55

56
class ComplexScaleInvariantSignalNoiseRatio(Metric):
57
    def __init__(
58
        self,
59
        zero_mean: bool = True,
60
        **kwargs
61
    ): ...
62
```
63

64
### Source Separation Metrics
65

66
Specialized metrics for evaluating audio source separation tasks.
67

68
```python { .api }
69
class PermutationInvariantTraining(Metric):
70
    def __init__(
71
        self,
72
        metric: Union[Callable, Metric],
73
        mode: str = "speaker-wise",
74
        eval_func: str = "max",
75
        **kwargs
76
    ): ...
77
```
78

79
### Perceptual Quality Metrics
80

81
Metrics that evaluate audio quality from a human perception perspective (require optional dependencies).
82

83
```python { .api }
84
class PerceptualEvaluationSpeechQuality(Metric):
85
    def __init__(
86
        self,
87
        fs: int,
88
        mode: str = "wb",
89
        keep_same_device: bool = False,
90
        **kwargs
91
    ): ...
92

93
class ShortTimeObjectiveIntelligibility(Metric):
94
    def __init__(
95
        self,
96
        fs: int,
97
        extended: bool = False,
98
        keep_same_device: bool = False,
99
        **kwargs
100
    ): ...
101
```
102

103
### Advanced Audio Quality Metrics
104

105
Sophisticated metrics for speech and audio quality assessment (require optional dependencies).
106

107
```python { .api }
108
class SpeechReverberationModulationEnergyRatio(Metric):
109
    def __init__(
110
        self,
111
        fs: int,
112
        max_cf: float = 128.0,
113
        norm: bool = False,
114
        fast: bool = True,
115
        **kwargs
116
    ): ...
117

118
class DeepNoiseSuppressionMeanOpinionScore(Metric):
119
    def __init__(
120
        self,
121
        fs: int = 16000,
122
        personalized: bool = False,
123
        **kwargs
124
    ): ...
125

126
class NonIntrusiveSpeechQualityAssessment(Metric):
127
    def __init__(
128
        self,
129
        fs: int = 16000,
130
        **kwargs
131
    ): ...
132
```
133

134
## Usage Examples
135

136
### Basic Signal Quality Metrics
137

138
```python
139
import torch
140
from torchmetrics.audio import (
141
    ScaleInvariantSignalDistortionRatio,
142
    ScaleInvariantSignalNoiseRatio
143
)
144

145
# Initialize metrics
146
si_sdr = ScaleInvariantSignalDistortionRatio()
147
si_snr = ScaleInvariantSignalNoiseRatio()
148

149
# Sample audio data (batch_size, time)
150
preds = torch.randn(4, 8000)  # 4 samples, 8000 time steps
151
target = torch.randn(4, 8000)
152

153
# Compute signal quality metrics
154
sdr_score = si_sdr(preds, target)
155
snr_score = si_snr(preds, target)
156

157
print(f"SI-SDR: {sdr_score:.4f} dB")
158
print(f"SI-SNR: {snr_score:.4f} dB")
159
```
160

161
### Source Separation Evaluation
162

163
```python
164
from torchmetrics.audio import PermutationInvariantTraining, ScaleInvariantSignalDistortionRatio
165

166
# Initialize PIT wrapper with SI-SDR
167
base_metric = ScaleInvariantSignalDistortionRatio()
168
pit_metric = PermutationInvariantTraining(
169
    metric=base_metric,
170
    mode="speaker-wise",
171
    eval_func="max"
172
)
173

174
# Source separation scenario: 2 sources, 2 estimates
175
# Shape: (batch, num_speakers, time)
176
preds = torch.randn(4, 2, 8000)  # 4 batches, 2 estimated sources
177
target = torch.randn(4, 2, 8000)  # 4 batches, 2 true sources
178

179
# Compute PIT score (handles permutation)
180
pit_score = pit_metric(preds, target)
181
print(f"PIT SI-SDR: {pit_score:.4f} dB")
182
```
183

184
### Perceptual Quality Assessment
185

186
```python
187
from torchmetrics.audio import PerceptualEvaluationSpeechQuality
188

189
# Initialize PESQ metric (requires pesq package)
190
try:
191
    pesq_metric = PerceptualEvaluationSpeechQuality(fs=16000, mode="wb")
192
    
193
    # Sample speech signals at 16kHz
194
    preds = torch.randn(4, 16000)  # 1 second of audio
195
    target = torch.randn(4, 16000)
196
    
197
    # Compute PESQ score
198
    pesq_score = pesq_metric(preds, target)
199
    print(f"PESQ: {pesq_score:.4f}")
200
    
201
except ImportError:
202
    print("PESQ requires the 'pesq' package: pip install pesq")
203
```
204

205
### Speech Intelligibility
206

207
```python
208
from torchmetrics.audio import ShortTimeObjectiveIntelligibility
209

210
# Initialize STOI metric (requires pystoi package)
211
try:
212
    stoi_metric = ShortTimeObjectiveIntelligibility(fs=16000, extended=False)
213
    
214
    # Sample speech signals
215
    preds = torch.randn(2, 16000)
216
    target = torch.randn(2, 16000)
217
    
218
    # Compute STOI score
219
    stoi_score = stoi_metric(preds, target)
220
    print(f"STOI: {stoi_score:.4f}")
221
    
222
except ImportError:
223
    print("STOI requires the 'pystoi' package: pip install pystoi")
224
```
225

226
### Multi-channel Audio Processing
227

228
```python
229
from torchmetrics.audio import SignalDistortionRatio
230

231
# Traditional SDR for multi-channel audio
232
sdr_metric = SignalDistortionRatio()
233

234
# Multi-channel audio (batch, channels, time)
235
preds = torch.randn(2, 2, 8000)  # Stereo audio
236
target = torch.randn(2, 2, 8000)
237

238
# Compute SDR
239
sdr_score = sdr_metric(preds, target)
240
print(f"SDR: {sdr_score:.4f} dB")
241
```
242

243
### Advanced Speech Quality
244

245
```python
246
from torchmetrics.audio import DeepNoiseSuppressionMeanOpinionScore
247

248
# DNS-MOS for speech enhancement (requires librosa and onnxruntime)
249
try:
250
    dns_mos = DeepNoiseSuppressionMeanOpinionScore(fs=16000)
251
    
252
    # Enhanced speech samples
253
    preds = torch.randn(3, 16000)
254
    target = torch.randn(3, 16000)
255
    
256
    # Compute DNS-MOS
257
    mos_score = dns_mos(preds, target)
258
    print(f"DNS-MOS: {mos_score:.4f}")
259
    
260
except ImportError:
261
    print("DNS-MOS requires 'librosa' and 'onnxruntime' packages")
262
```
263

264
## Types
265

266
```python { .api }
267
from typing import Union, Optional, Callable
268
import torch
269
from torch import Tensor
270

271
AudioTensor = Tensor  # Shape: (..., time) or (..., channels, time)
272
SeparationTensor = Tensor  # Shape: (..., num_sources, time)
273

274
EvalFunc = Union["max", "min", "mean"]
275
SeparationMode = Union["speaker-wise", "permutation-wise"]
276
PESQMode = Union["wb", "nb"]  # wideband or narrowband
277
```

Version

Tile

Files

audio.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

audio.mddocs/