An audio package for PyTorch providing GPU-accelerated audio I/O operations, signal processing transforms, and machine learning utilities for audio data.
Comprehensive audio effects processing including filters, EQ, dynamic effects, and spatial audio processing capabilities. TorchAudio provides both functional and transform-based interfaces for applying professional audio effects.
Second-order IIR filters for frequency-domain processing.
def biquad(waveform: torch.Tensor, b0: float, b1: float, b2: float,
a0: float, a1: float, a2: float) -> torch.Tensor:
"""
Apply generic biquad IIR filter.
Args:
waveform: Input audio (..., time)
b0, b1, b2: Numerator coefficients
a0, a1, a2: Denominator coefficients
Returns:
Tensor: Filtered audio
"""
def lowpass_biquad(waveform: torch.Tensor, sample_rate: int, cutoff_freq: float, Q: float = 0.707) -> torch.Tensor:
"""Apply lowpass biquad filter."""
def highpass_biquad(waveform: torch.Tensor, sample_rate: int, cutoff_freq: float, Q: float = 0.707) -> torch.Tensor:
"""Apply highpass biquad filter."""
def bandpass_biquad(waveform: torch.Tensor, sample_rate: int, central_freq: float, Q: float = 0.707,
const_skirt_gain: bool = False) -> torch.Tensor:
"""Apply bandpass biquad filter."""
def bandreject_biquad(waveform: torch.Tensor, sample_rate: int, central_freq: float, Q: float = 0.707,
const_skirt_gain: bool = False) -> torch.Tensor:
"""Apply band-reject (notch) biquad filter."""
def allpass_biquad(waveform: torch.Tensor, sample_rate: int, central_freq: float, Q: float = 0.707) -> torch.Tensor:
"""Apply allpass biquad filter for phase adjustment."""
def equalizer_biquad(waveform: torch.Tensor, sample_rate: int, center_freq: float, gain: float, Q: float = 0.707) -> torch.Tensor:
"""Apply peaking equalizer biquad filter."""
def bass_biquad(waveform: torch.Tensor, sample_rate: int, gain: float, central_freq: float = 100.0, Q: float = 0.707) -> torch.Tensor:
"""Apply bass shelf biquad filter."""
def treble_biquad(waveform: torch.Tensor, sample_rate: int, gain: float, central_freq: float = 3000.0, Q: float = 0.707) -> torch.Tensor:
"""Apply treble shelf biquad filter."""
def deemph_biquad(waveform: torch.Tensor, sample_rate: int) -> torch.Tensor:
"""Apply de-emphasis biquad filter."""
def riaa_biquad(waveform: torch.Tensor, sample_rate: int) -> torch.Tensor:
"""Apply RIAA equalization curve."""Time-varying effects for creative audio processing.
def flanger(waveform: torch.Tensor, sample_rate: int, delay: float = 0.0,
depth: float = 2.0, regen: float = 0.0, width: float = 71.0,
speed: float = 0.5, shape: float = 1.0, phase: float = 25.0,
interp: str = "linear") -> torch.Tensor:
"""
Apply flanger effect.
Args:
waveform: Input audio (..., time)
sample_rate: Sample rate
delay: Base delay in milliseconds
depth: Delay modulation depth
regen: Regeneration (feedback) amount
width: Delay line width
speed: LFO speed in Hz
shape: LFO waveform shape
phase: Stereo phase difference
interp: Interpolation method
Returns:
Tensor: Flanged audio
"""
def phaser(waveform: torch.Tensor, sample_rate: int, gain_in: float = 0.4,
gain_out: float = 0.74, delay_ms: float = 3.0, decay: float = 0.4,
mod_speed: float = 2.0, sinusoidal: bool = True) -> torch.Tensor:
"""
Apply phaser effect.
Args:
waveform: Input audio (..., time)
sample_rate: Sample rate
gain_in: Input gain
gain_out: Output gain
delay_ms: Delay in milliseconds
decay: Decay amount
mod_speed: Modulation speed
sinusoidal: Use sinusoidal modulation
Returns:
Tensor: Phased audio
"""
def overdrive(waveform: torch.Tensor, gain: float = 20.0, colour: float = 20.0) -> torch.Tensor:
"""
Apply overdrive distortion effect.
Args:
waveform: Input audio (..., time)
gain: Overdrive gain
colour: Color control
Returns:
Tensor: Overdriven audio
"""
def contrast(waveform: torch.Tensor, enhancement_amount: float = 75.0) -> torch.Tensor:
"""
Apply contrast enhancement.
Args:
waveform: Input audio (..., time)
enhancement_amount: Enhancement amount
Returns:
Tensor: Enhanced audio
"""Functions for controlling audio levels and dynamics.
def gain(waveform: torch.Tensor, gain_db: float) -> torch.Tensor:
"""
Apply gain in decibels.
Args:
waveform: Input audio (..., time)
gain_db: Gain in decibels
Returns:
Tensor: Gained audio
"""
def dcshift(waveform: torch.Tensor, shift: float, limiter_gain: Optional[float] = None) -> torch.Tensor:
"""
Apply DC shift to audio.
Args:
waveform: Input audio (..., time)
shift: DC shift amount
limiter_gain: Optional limiter gain
Returns:
Tensor: DC-shifted audio
"""
def dither(waveform: torch.Tensor, density_function: str = "RPDF",
noise_shaping: str = "none") -> torch.Tensor:
"""
Apply dithering to audio.
Args:
waveform: Input audio (..., time)
density_function: Probability density function ("RPDF", "TPDF", "GPDF")
noise_shaping: Noise shaping method
Returns:
Tensor: Dithered audio
"""Detect speech/non-speech segments in audio.
def vad(waveform: torch.Tensor, sample_rate: int, trigger_level: float = 7.0,
trigger_time: float = 0.25, search_time: float = 1.0, allowed_gap: float = 0.25,
pre_trigger_time: float = 0.0, boot_time: float = 0.35, noise_up_time: float = 0.1,
noise_down_time: float = 0.01, noise_reduction_amount: float = 1.35,
measure_freq: float = 20.0, measure_duration: Optional[float] = None,
measure_smooth_time: float = 0.025, hp_filter_freq: float = 50.0,
lp_filter_freq: Optional[float] = None, compress_factor: Optional[float] = None) -> torch.Tensor:
"""
Voice Activity Detection - detect speech vs silence/noise.
Args:
waveform: Input audio (..., time)
sample_rate: Sample rate
trigger_level: Trigger level in dB above noise
trigger_time: Time to trigger voice detection
search_time: Time to search for quieter/shorter bursts
allowed_gap: Allowed gap between voice segments
pre_trigger_time: Audio to include before trigger
boot_time: Time to gather noise stats at start
noise_up_time: Time constant for noise level adaptation (up)
noise_down_time: Time constant for noise level adaptation (down)
noise_reduction_amount: Amount of noise reduction
measure_freq: Frequency of level measurements
measure_duration: Duration of measurements
measure_smooth_time: Smoothing time for measurements
hp_filter_freq: High-pass filter frequency
lp_filter_freq: Low-pass filter frequency
compress_factor: Dynamic range compression factor
Returns:
Tensor: Audio with silence removed/marked
"""Higher-order and specialized filtering algorithms.
def lfilter(waveform: torch.Tensor, a_coeffs: torch.Tensor, b_coeffs: torch.Tensor,
zi: Optional[torch.Tensor] = None) -> torch.Tensor:
"""
Apply IIR filter using difference equation.
Args:
waveform: Input signal (..., time)
a_coeffs: Denominator coefficients (autoregressive)
b_coeffs: Numerator coefficients (moving average)
zi: Initial conditions for filter delays
Returns:
Tensor: Filtered signal
"""
def filtfilt(waveform: torch.Tensor, a_coeffs: torch.Tensor, b_coeffs: torch.Tensor,
clamp: bool = True) -> torch.Tensor:
"""
Apply zero-phase filtering using forward-backward filter.
Args:
waveform: Input signal (..., time)
a_coeffs: Denominator coefficients
b_coeffs: Numerator coefficients
clamp: Whether to clamp output to prevent numerical issues
Returns:
Tensor: Zero-phase filtered signal
"""Frequency response shaping filters commonly used in speech processing.
def preemphasis(waveform: torch.Tensor, coeff: float = 0.97) -> torch.Tensor:
"""
Apply pre-emphasis filter (high-pass).
Args:
waveform: Input audio (..., time)
coeff: Pre-emphasis coefficient
Returns:
Tensor: Pre-emphasized audio
"""
def deemphasis(waveform: torch.Tensor, coeff: float = 0.97) -> torch.Tensor:
"""
Apply de-emphasis filter (low-pass).
Args:
waveform: Input audio (..., time)
coeff: De-emphasis coefficient
Returns:
Tensor: De-emphasized audio
"""import torch
import torchaudio
import torchaudio.functional as F
def apply_multiband_eq(waveform: torch.Tensor, sample_rate: int) -> torch.Tensor:
"""Apply multi-band equalization."""
# Apply bass boost at 80 Hz
audio = F.bass_biquad(waveform, sample_rate, gain=3.0, central_freq=80.0, Q=0.7)
# Apply mid cut at 500 Hz
audio = F.equalizer_biquad(audio, sample_rate, center_freq=500.0, gain=-2.0, Q=1.0)
# Apply presence boost at 3 kHz
audio = F.equalizer_biquad(audio, sample_rate, center_freq=3000.0, gain=2.0, Q=0.8)
# Apply treble boost at 8 kHz
audio = F.treble_biquad(audio, sample_rate, gain=1.5, central_freq=8000.0, Q=0.7)
return audio
# Load and process audio
waveform, sr = torchaudio.load("audio.wav")
eq_audio = apply_multiband_eq(waveform, sr)
torchaudio.save("eq_audio.wav", eq_audio, sr)import torch
import torchaudio.functional as F
def creative_effects_chain(waveform: torch.Tensor, sample_rate: int) -> torch.Tensor:
"""Apply creative effects chain."""
# Apply slight overdrive
audio = F.overdrive(waveform, gain=5.0, colour=15.0)
# Add flanger effect
audio = F.flanger(audio, sample_rate, delay=2.0, depth=3.0, regen=0.3, speed=0.8)
# Apply phaser for movement
audio = F.phaser(audio, sample_rate, gain_in=0.5, gain_out=0.8, mod_speed=1.5)
# Final contrast enhancement
audio = F.contrast(audio, enhancement_amount=50.0)
return audio
# Apply creative effects
waveform, sr = torchaudio.load("input.wav")
processed = creative_effects_chain(waveform, sr)
torchaudio.save("creative_output.wav", processed, sr)import torch
import torchaudio.functional as F
def noise_reduction_pipeline(waveform: torch.Tensor, sample_rate: int) -> torch.Tensor:
"""Apply noise reduction processing."""
# High-pass filter to remove low-frequency noise
audio = F.highpass_biquad(waveform, sample_rate, cutoff_freq=80.0, Q=0.707)
# Apply voice activity detection and processing
audio = F.vad(
audio, sample_rate,
trigger_level=6.0,
trigger_time=0.2,
noise_reduction_amount=2.0,
hp_filter_freq=60.0
)
# De-emphasis to counteract any harshness
audio = F.deemphasis(audio, coeff=0.95)
return audio
# Clean up noisy audio
noisy_audio, sr = torchaudio.load("noisy_speech.wav")
clean_audio = noise_reduction_pipeline(noisy_audio, sr)
torchaudio.save("clean_speech.wav", clean_audio, sr)import torch
import torchaudio.functional as F
def mastering_chain(waveform: torch.Tensor, sample_rate: int) -> torch.Tensor:
"""Apply audio mastering chain."""
# Subtle high-pass to clean up sub-bass
audio = F.highpass_biquad(waveform, sample_rate, cutoff_freq=30.0, Q=0.5)
# Gentle compression simulation with overdrive
audio = F.overdrive(audio, gain=2.0, colour=5.0)
# Enhance presence
audio = F.equalizer_biquad(audio, sample_rate, center_freq=2500.0, gain=1.0, Q=0.8)
# Add air with treble boost
audio = F.treble_biquad(audio, sample_rate, gain=0.8, central_freq=10000.0, Q=1.0)
# Final gain adjustment
audio = F.gain(audio, gain_db=-1.0) # Slight level reduction
return audio
# Master audio track
raw_mix, sr = torchaudio.load("raw_mix.wav")
mastered = mastering_chain(raw_mix, sr)
torchaudio.save("mastered_track.wav", mastered, sr)These audio effects provide professional-grade processing capabilities for music production, audio restoration, creative sound design, and broadcast audio applications.
Install with Tessl CLI
npx tessl i tessl/pypi-torchaudio