0
# Audio Effects and Filtering
1
2
Comprehensive audio effects processing including filters, EQ, dynamic effects, and spatial audio processing capabilities. TorchAudio provides both functional and transform-based interfaces for applying professional audio effects.
3
4
## Capabilities
5
6
### Biquad Filters
7
8
Second-order IIR filters for frequency-domain processing.
9
10
```python { .api }
11
def biquad(waveform: torch.Tensor, b0: float, b1: float, b2: float,
12
a0: float, a1: float, a2: float) -> torch.Tensor:
13
"""
14
Apply generic biquad IIR filter.
15
16
Args:
17
waveform: Input audio (..., time)
18
b0, b1, b2: Numerator coefficients
19
a0, a1, a2: Denominator coefficients
20
21
Returns:
22
Tensor: Filtered audio
23
"""
24
25
def lowpass_biquad(waveform: torch.Tensor, sample_rate: int, cutoff_freq: float, Q: float = 0.707) -> torch.Tensor:
26
"""Apply lowpass biquad filter."""
27
28
def highpass_biquad(waveform: torch.Tensor, sample_rate: int, cutoff_freq: float, Q: float = 0.707) -> torch.Tensor:
29
"""Apply highpass biquad filter."""
30
31
def bandpass_biquad(waveform: torch.Tensor, sample_rate: int, central_freq: float, Q: float = 0.707,
32
const_skirt_gain: bool = False) -> torch.Tensor:
33
"""Apply bandpass biquad filter."""
34
35
def bandreject_biquad(waveform: torch.Tensor, sample_rate: int, central_freq: float, Q: float = 0.707,
36
const_skirt_gain: bool = False) -> torch.Tensor:
37
"""Apply band-reject (notch) biquad filter."""
38
39
def allpass_biquad(waveform: torch.Tensor, sample_rate: int, central_freq: float, Q: float = 0.707) -> torch.Tensor:
40
"""Apply allpass biquad filter for phase adjustment."""
41
42
def equalizer_biquad(waveform: torch.Tensor, sample_rate: int, center_freq: float, gain: float, Q: float = 0.707) -> torch.Tensor:
43
"""Apply peaking equalizer biquad filter."""
44
45
def bass_biquad(waveform: torch.Tensor, sample_rate: int, gain: float, central_freq: float = 100.0, Q: float = 0.707) -> torch.Tensor:
46
"""Apply bass shelf biquad filter."""
47
48
def treble_biquad(waveform: torch.Tensor, sample_rate: int, gain: float, central_freq: float = 3000.0, Q: float = 0.707) -> torch.Tensor:
49
"""Apply treble shelf biquad filter."""
50
51
def deemph_biquad(waveform: torch.Tensor, sample_rate: int) -> torch.Tensor:
52
"""Apply de-emphasis biquad filter."""
53
54
def riaa_biquad(waveform: torch.Tensor, sample_rate: int) -> torch.Tensor:
55
"""Apply RIAA equalization curve."""
56
```
57
58
### Dynamic Effects
59
60
Time-varying effects for creative audio processing.
61
62
```python { .api }
63
def flanger(waveform: torch.Tensor, sample_rate: int, delay: float = 0.0,
64
depth: float = 2.0, regen: float = 0.0, width: float = 71.0,
65
speed: float = 0.5, shape: float = 1.0, phase: float = 25.0,
66
interp: str = "linear") -> torch.Tensor:
67
"""
68
Apply flanger effect.
69
70
Args:
71
waveform: Input audio (..., time)
72
sample_rate: Sample rate
73
delay: Base delay in milliseconds
74
depth: Delay modulation depth
75
regen: Regeneration (feedback) amount
76
width: Delay line width
77
speed: LFO speed in Hz
78
shape: LFO waveform shape
79
phase: Stereo phase difference
80
interp: Interpolation method
81
82
Returns:
83
Tensor: Flanged audio
84
"""
85
86
def phaser(waveform: torch.Tensor, sample_rate: int, gain_in: float = 0.4,
87
gain_out: float = 0.74, delay_ms: float = 3.0, decay: float = 0.4,
88
mod_speed: float = 2.0, sinusoidal: bool = True) -> torch.Tensor:
89
"""
90
Apply phaser effect.
91
92
Args:
93
waveform: Input audio (..., time)
94
sample_rate: Sample rate
95
gain_in: Input gain
96
gain_out: Output gain
97
delay_ms: Delay in milliseconds
98
decay: Decay amount
99
mod_speed: Modulation speed
100
sinusoidal: Use sinusoidal modulation
101
102
Returns:
103
Tensor: Phased audio
104
"""
105
106
def overdrive(waveform: torch.Tensor, gain: float = 20.0, colour: float = 20.0) -> torch.Tensor:
107
"""
108
Apply overdrive distortion effect.
109
110
Args:
111
waveform: Input audio (..., time)
112
gain: Overdrive gain
113
colour: Color control
114
115
Returns:
116
Tensor: Overdriven audio
117
"""
118
119
def contrast(waveform: torch.Tensor, enhancement_amount: float = 75.0) -> torch.Tensor:
120
"""
121
Apply contrast enhancement.
122
123
Args:
124
waveform: Input audio (..., time)
125
enhancement_amount: Enhancement amount
126
127
Returns:
128
Tensor: Enhanced audio
129
"""
130
```
131
132
### Gain and Level Control
133
134
Functions for controlling audio levels and dynamics.
135
136
```python { .api }
137
def gain(waveform: torch.Tensor, gain_db: float) -> torch.Tensor:
138
"""
139
Apply gain in decibels.
140
141
Args:
142
waveform: Input audio (..., time)
143
gain_db: Gain in decibels
144
145
Returns:
146
Tensor: Gained audio
147
"""
148
149
def dcshift(waveform: torch.Tensor, shift: float, limiter_gain: Optional[float] = None) -> torch.Tensor:
150
"""
151
Apply DC shift to audio.
152
153
Args:
154
waveform: Input audio (..., time)
155
shift: DC shift amount
156
limiter_gain: Optional limiter gain
157
158
Returns:
159
Tensor: DC-shifted audio
160
"""
161
162
def dither(waveform: torch.Tensor, density_function: str = "RPDF",
163
noise_shaping: str = "none") -> torch.Tensor:
164
"""
165
Apply dithering to audio.
166
167
Args:
168
waveform: Input audio (..., time)
169
density_function: Probability density function ("RPDF", "TPDF", "GPDF")
170
noise_shaping: Noise shaping method
171
172
Returns:
173
Tensor: Dithered audio
174
"""
175
```
176
177
### Voice Activity Detection
178
179
Detect speech/non-speech segments in audio.
180
181
```python { .api }
182
def vad(waveform: torch.Tensor, sample_rate: int, trigger_level: float = 7.0,
183
trigger_time: float = 0.25, search_time: float = 1.0, allowed_gap: float = 0.25,
184
pre_trigger_time: float = 0.0, boot_time: float = 0.35, noise_up_time: float = 0.1,
185
noise_down_time: float = 0.01, noise_reduction_amount: float = 1.35,
186
measure_freq: float = 20.0, measure_duration: Optional[float] = None,
187
measure_smooth_time: float = 0.025, hp_filter_freq: float = 50.0,
188
lp_filter_freq: Optional[float] = None, compress_factor: Optional[float] = None) -> torch.Tensor:
189
"""
190
Voice Activity Detection - detect speech vs silence/noise.
191
192
Args:
193
waveform: Input audio (..., time)
194
sample_rate: Sample rate
195
trigger_level: Trigger level in dB above noise
196
trigger_time: Time to trigger voice detection
197
search_time: Time to search for quieter/shorter bursts
198
allowed_gap: Allowed gap between voice segments
199
pre_trigger_time: Audio to include before trigger
200
boot_time: Time to gather noise stats at start
201
noise_up_time: Time constant for noise level adaptation (up)
202
noise_down_time: Time constant for noise level adaptation (down)
203
noise_reduction_amount: Amount of noise reduction
204
measure_freq: Frequency of level measurements
205
measure_duration: Duration of measurements
206
measure_smooth_time: Smoothing time for measurements
207
hp_filter_freq: High-pass filter frequency
208
lp_filter_freq: Low-pass filter frequency
209
compress_factor: Dynamic range compression factor
210
211
Returns:
212
Tensor: Audio with silence removed/marked
213
"""
214
```
215
216
### Advanced Filtering
217
218
Higher-order and specialized filtering algorithms.
219
220
```python { .api }
221
def lfilter(waveform: torch.Tensor, a_coeffs: torch.Tensor, b_coeffs: torch.Tensor,
222
zi: Optional[torch.Tensor] = None) -> torch.Tensor:
223
"""
224
Apply IIR filter using difference equation.
225
226
Args:
227
waveform: Input signal (..., time)
228
a_coeffs: Denominator coefficients (autoregressive)
229
b_coeffs: Numerator coefficients (moving average)
230
zi: Initial conditions for filter delays
231
232
Returns:
233
Tensor: Filtered signal
234
"""
235
236
def filtfilt(waveform: torch.Tensor, a_coeffs: torch.Tensor, b_coeffs: torch.Tensor,
237
clamp: bool = True) -> torch.Tensor:
238
"""
239
Apply zero-phase filtering using forward-backward filter.
240
241
Args:
242
waveform: Input signal (..., time)
243
a_coeffs: Denominator coefficients
244
b_coeffs: Numerator coefficients
245
clamp: Whether to clamp output to prevent numerical issues
246
247
Returns:
248
Tensor: Zero-phase filtered signal
249
"""
250
```
251
252
### Pre/De-emphasis
253
254
Frequency response shaping filters commonly used in speech processing.
255
256
```python { .api }
257
def preemphasis(waveform: torch.Tensor, coeff: float = 0.97) -> torch.Tensor:
258
"""
259
Apply pre-emphasis filter (high-pass).
260
261
Args:
262
waveform: Input audio (..., time)
263
coeff: Pre-emphasis coefficient
264
265
Returns:
266
Tensor: Pre-emphasized audio
267
"""
268
269
def deemphasis(waveform: torch.Tensor, coeff: float = 0.97) -> torch.Tensor:
270
"""
271
Apply de-emphasis filter (low-pass).
272
273
Args:
274
waveform: Input audio (..., time)
275
coeff: De-emphasis coefficient
276
277
Returns:
278
Tensor: De-emphasized audio
279
"""
280
```
281
282
## Usage Examples
283
284
### Multi-band EQ
285
286
```python
287
import torch
288
import torchaudio
289
import torchaudio.functional as F
290
291
def apply_multiband_eq(waveform: torch.Tensor, sample_rate: int) -> torch.Tensor:
292
"""Apply multi-band equalization."""
293
294
# Apply bass boost at 80 Hz
295
audio = F.bass_biquad(waveform, sample_rate, gain=3.0, central_freq=80.0, Q=0.7)
296
297
# Apply mid cut at 500 Hz
298
audio = F.equalizer_biquad(audio, sample_rate, center_freq=500.0, gain=-2.0, Q=1.0)
299
300
# Apply presence boost at 3 kHz
301
audio = F.equalizer_biquad(audio, sample_rate, center_freq=3000.0, gain=2.0, Q=0.8)
302
303
# Apply treble boost at 8 kHz
304
audio = F.treble_biquad(audio, sample_rate, gain=1.5, central_freq=8000.0, Q=0.7)
305
306
return audio
307
308
# Load and process audio
309
waveform, sr = torchaudio.load("audio.wav")
310
eq_audio = apply_multiband_eq(waveform, sr)
311
torchaudio.save("eq_audio.wav", eq_audio, sr)
312
```
313
314
### Creative Effects Chain
315
316
```python
317
import torch
318
import torchaudio.functional as F
319
320
def creative_effects_chain(waveform: torch.Tensor, sample_rate: int) -> torch.Tensor:
321
"""Apply creative effects chain."""
322
323
# Apply slight overdrive
324
audio = F.overdrive(waveform, gain=5.0, colour=15.0)
325
326
# Add flanger effect
327
audio = F.flanger(audio, sample_rate, delay=2.0, depth=3.0, regen=0.3, speed=0.8)
328
329
# Apply phaser for movement
330
audio = F.phaser(audio, sample_rate, gain_in=0.5, gain_out=0.8, mod_speed=1.5)
331
332
# Final contrast enhancement
333
audio = F.contrast(audio, enhancement_amount=50.0)
334
335
return audio
336
337
# Apply creative effects
338
waveform, sr = torchaudio.load("input.wav")
339
processed = creative_effects_chain(waveform, sr)
340
torchaudio.save("creative_output.wav", processed, sr)
341
```
342
343
### Noise Reduction Pipeline
344
345
```python
346
import torch
347
import torchaudio.functional as F
348
349
def noise_reduction_pipeline(waveform: torch.Tensor, sample_rate: int) -> torch.Tensor:
350
"""Apply noise reduction processing."""
351
352
# High-pass filter to remove low-frequency noise
353
audio = F.highpass_biquad(waveform, sample_rate, cutoff_freq=80.0, Q=0.707)
354
355
# Apply voice activity detection and processing
356
audio = F.vad(
357
audio, sample_rate,
358
trigger_level=6.0,
359
trigger_time=0.2,
360
noise_reduction_amount=2.0,
361
hp_filter_freq=60.0
362
)
363
364
# De-emphasis to counteract any harshness
365
audio = F.deemphasis(audio, coeff=0.95)
366
367
return audio
368
369
# Clean up noisy audio
370
noisy_audio, sr = torchaudio.load("noisy_speech.wav")
371
clean_audio = noise_reduction_pipeline(noisy_audio, sr)
372
torchaudio.save("clean_speech.wav", clean_audio, sr)
373
```
374
375
### Mastering Chain
376
377
```python
378
import torch
379
import torchaudio.functional as F
380
381
def mastering_chain(waveform: torch.Tensor, sample_rate: int) -> torch.Tensor:
382
"""Apply audio mastering chain."""
383
384
# Subtle high-pass to clean up sub-bass
385
audio = F.highpass_biquad(waveform, sample_rate, cutoff_freq=30.0, Q=0.5)
386
387
# Gentle compression simulation with overdrive
388
audio = F.overdrive(audio, gain=2.0, colour=5.0)
389
390
# Enhance presence
391
audio = F.equalizer_biquad(audio, sample_rate, center_freq=2500.0, gain=1.0, Q=0.8)
392
393
# Add air with treble boost
394
audio = F.treble_biquad(audio, sample_rate, gain=0.8, central_freq=10000.0, Q=1.0)
395
396
# Final gain adjustment
397
audio = F.gain(audio, gain_db=-1.0) # Slight level reduction
398
399
return audio
400
401
# Master audio track
402
raw_mix, sr = torchaudio.load("raw_mix.wav")
403
mastered = mastering_chain(raw_mix, sr)
404
torchaudio.save("mastered_track.wav", mastered, sr)
405
```
406
407
These audio effects provide professional-grade processing capabilities for music production, audio restoration, creative sound design, and broadcast audio applications.