tessl install tessl/pypi-livekit@1.0.0Python Real-time SDK for LiveKit providing WebRTC-based video, audio, and data streaming capabilities
The LiveKit SDK provides WebRTC audio processing capabilities through the AudioProcessingModule (APM) class, including echo cancellation, noise suppression, high-pass filtering, and automatic gain control.
Key concepts:
from livekit import AudioProcessingModuleclass AudioProcessingModule:
"""WebRTC audio processing (echo cancellation, noise suppression, etc).
The APM processes audio frames to improve quality by removing echo,
suppressing noise, filtering low frequencies, and controlling gain.
All processing is done in-place (modifies frames).
"""
def __init__(
self,
*,
echo_cancellation: bool = False,
noise_suppression: bool = False,
high_pass_filter: bool = False,
auto_gain_control: bool = False
) -> None:
"""Initialize AudioProcessingModule with desired features.
Args:
echo_cancellation: Enable acoustic echo cancellation (AEC)
Type: bool
Default: False
Removes speaker audio from microphone
Requires process_reverse_stream()
noise_suppression: Enable noise suppression (NS)
Type: bool
Default: False
Reduces stationary background noise
(fans, AC, hum)
high_pass_filter: Enable high-pass filter (HPF)
Type: bool
Default: False
Removes low-frequency noise
(rumble, wind, <80-100Hz)
auto_gain_control: Enable automatic gain control (AGC)
Type: bool
Default: False
Normalizes audio levels
Maintains consistent volume
Returns:
AudioProcessingModule instance
Example:
>>> # Enable all features
>>> apm = AudioProcessingModule(
... echo_cancellation=True,
... noise_suppression=True,
... high_pass_filter=True,
... auto_gain_control=True
... )
>>>
>>> # Only noise suppression
>>> apm = AudioProcessingModule(noise_suppression=True)
Note:
Features can be enabled/disabled independently.
More features = higher CPU usage but better quality.
Performance impact:
- Noise suppression: Low
- High-pass filter: Very low
- Auto gain control: Low
- Echo cancellation: Medium (requires reverse stream)
"""def process_stream(self, data: AudioFrame) -> None:
"""Process audio frame using configured features (modifies in-place).
Args:
data: AudioFrame to process
Type: AudioFrame
MUST be exactly 10ms duration
Frame requirements:
- 48kHz: 480 samples per channel
- 16kHz: 160 samples per channel
- 32kHz: 320 samples per channel
- 8kHz: 80 samples per channel
Returns:
None (modifies frame in-place)
Raises:
RuntimeError: If processing fails or frame duration is not 10ms
Common cause: Wrong sample count for sample rate
Example:
>>> from livekit import AudioFrame
>>>
>>> # Create 10ms frame at 48kHz
>>> frame = AudioFrame.create(
... sample_rate=48000,
... num_channels=1,
... samples_per_channel=480 # 10ms at 48kHz
... )
>>>
>>> # Process in-place
>>> apm.process_stream(frame)
>>>
>>> # Frame is now processed (noise suppressed, etc.)
Note:
Frame MUST be exactly 10ms duration.
Calculate samples: sample_rate / 100
Processing is in-place (modifies frame data directly).
Original frame data is overwritten.
Call order:
1. process_reverse_stream() with speaker audio
2. set_stream_delay_ms() for accurate timing
3. process_stream() with microphone audio
"""
def process_reverse_stream(self, data: AudioFrame) -> None:
"""Process reverse audio frame for echo cancellation (modifies in-place).
The reverse stream is the audio being played out (speaker audio).
This is required for echo cancellation to work effectively.
Args:
data: AudioFrame to process
Type: AudioFrame
MUST be exactly 10ms duration
Should be same sample rate as capture stream
Returns:
None (modifies frame in-place)
Raises:
RuntimeError: If processing fails or frame duration is not 10ms
Example:
>>> # Process playback audio for echo cancellation
>>> playback_frame = AudioFrame.create(48000, 1, 480)
>>> # Fill with speaker audio data
>>> apm.process_reverse_stream(playback_frame)
>>>
>>> # Then process microphone audio
>>> capture_frame = AudioFrame.create(48000, 1, 480)
>>> # Fill with microphone audio data
>>> apm.process_stream(capture_frame)
Note:
Only needed when echo_cancellation=True.
Reverse stream = audio being played to speaker.
Capture stream = audio from microphone.
For echo cancellation to work:
1. Process reverse stream (speaker audio)
2. Set stream delay
3. Process capture stream (microphone audio)
Without reverse stream, echo cancellation won't work.
"""
def set_stream_delay_ms(self, delay_ms: int) -> None:
"""Set stream delay for echo cancellation.
The delay represents the time difference between when audio is
captured and when it's played back. Accurate delay information
improves echo cancellation quality.
Args:
delay_ms: Delay in milliseconds
Type: int
Typical range: 0-500ms
Higher values = worse echo cancellation
Returns:
None
Raises:
RuntimeError: If setting delay fails
Example:
>>> # Set 50ms delay (typical)
>>> apm.set_stream_delay_ms(50)
>>>
>>> # Measure actual delay and set
>>> measured_delay = measure_audio_delay()
>>> apm.set_stream_delay_ms(measured_delay)
Note:
Only used when echo_cancellation=True.
More accurate delay = better echo cancellation.
Delay sources:
- Audio driver buffering
- Processing latency
- System latency
Typical delays:
- Low-latency setup: 10-30ms
- Normal setup: 30-100ms
- High-latency setup: 100-500ms
Update delay if it changes (e.g., buffer size change).
"""from livekit import AudioProcessingModule, AudioFrame
# Create APM with noise suppression only
apm = AudioProcessingModule(noise_suppression=True)
# Process audio frames (must be 10ms each)
frame = AudioFrame.create(
sample_rate=48000,
num_channels=1,
samples_per_channel=480 # 10ms at 48kHz
)
# Fill frame with audio data from microphone
# ... (frame data population)
# Process in-place
apm.process_stream(frame)
# Frame now has noise suppressed
# Use for capture, encoding, or further processingfrom livekit import AudioProcessingModule, AudioFrame
# Enable all features
apm = AudioProcessingModule(
echo_cancellation=True,
noise_suppression=True,
high_pass_filter=True,
auto_gain_control=True
)
# Set delay for echo cancellation (adjust based on system)
apm.set_stream_delay_ms(50)
# Process capture stream (microphone)
capture_frame = AudioFrame.create(48000, 1, 480)
# Fill with microphone audio
apm.process_stream(capture_frame)
# Process reverse stream (speaker output)
playback_frame = AudioFrame.create(48000, 1, 480)
# Fill with speaker audio
apm.process_reverse_stream(playback_frame)import asyncio
from livekit import (
AudioSource,
LocalAudioTrack,
AudioFrame,
AudioProcessingModule,
)
async def capture_with_apm(source: AudioSource):
"""Capture audio with processing applied."""
# Create APM
apm = AudioProcessingModule(
noise_suppression=True,
auto_gain_control=True,
high_pass_filter=True
)
sample_rate = 48000
samples_per_channel = 480 # 10ms at 48kHz
frame_duration = 0.01 # 10ms
while True:
# Get audio from input device
raw_frame = get_audio_from_device(sample_rate, samples_per_channel)
# Process with APM (in-place)
apm.process_stream(raw_frame)
# Capture processed audio to source
await source.capture_frame(raw_frame)
# Wait for next frame
await asyncio.sleep(frame_duration)
def get_audio_from_device(sample_rate: int, samples_per_channel: int) -> AudioFrame:
"""Placeholder for actual audio capture.
In production, replace with actual device capture using:
- sounddevice library
- pyaudio
- MediaDevices (see utilities.md)
"""
frame = AudioFrame.create(sample_rate, 1, samples_per_channel)
# TODO: Fill with actual microphone data
return framefrom livekit import MediaDevices, AudioProcessingModule
# Media devices with built-in APM
# Requires: pip install sounddevice
devices = MediaDevices()
# Open input with echo cancellation
# APM is applied automatically internally
capture = devices.open_input(
enable_aec=True, # Echo cancellation
noise_suppression=True, # Noise suppression
auto_gain_control=True, # Gain control
high_pass_filter=True # High-pass filter
)
# The capture.source already has APM applied
source = capture.source
# Create track and publish
track = LocalAudioTrack.create_audio_track("mic", source)
await room.local_participant.publish_track(track)
# Open output for playback (for reverse stream)
player = devices.open_output()
await player.add_track(remote_audio_track)
await player.start()
# MediaDevices handles APM reverse stream automaticallyThe APM requires exactly 10ms audio frames. Calculate samples per channel:
# Sample rate to samples per channel mapping (for 10ms)
SAMPLE_RATES_10MS = {
8000: 80, # 8kHz: 80 samples = 10ms (narrowband)
16000: 160, # 16kHz: 160 samples = 10ms (wideband)
32000: 320, # 32kHz: 320 samples = 10ms (super-wideband)
48000: 480, # 48kHz: 480 samples = 10ms (full-band, recommended)
}
sample_rate = 48000
samples_per_channel = SAMPLE_RATES_10MS[sample_rate]
# Create frame with correct duration
frame = AudioFrame.create(sample_rate, 1, samples_per_channel)
# Verify duration
assert frame.duration == 0.01, "Frame must be exactly 10ms"
# Process
apm.process_stream(frame)import asyncio
import numpy as np
from livekit import (
Room,
AudioSource,
LocalAudioTrack,
AudioFrame,
AudioProcessingModule,
TrackPublishOptions,
)
async def main():
# Create room and connect
room = Room()
await room.connect(url, token)
# Create APM with all features
apm = AudioProcessingModule(
echo_cancellation=True,
noise_suppression=True,
high_pass_filter=True,
auto_gain_control=True
)
# Set delay for echo cancellation
apm.set_stream_delay_ms(50)
# Create audio source and track
source = AudioSource(sample_rate=48000, num_channels=1)
track = LocalAudioTrack.create_audio_track("processed-audio", source)
# Publish track
await room.local_participant.publish_track(track, TrackPublishOptions())
# Start audio processing task
asyncio.create_task(process_audio_loop(source, apm))
# Keep running
await asyncio.sleep(60)
# Cleanup
await source.aclose()
await room.disconnect()
async def process_audio_loop(source: AudioSource, apm: AudioProcessingModule):
"""Continuously capture and process audio."""
sample_rate = 48000
num_channels = 1
samples_per_channel = 480 # 10ms at 48kHz
frame_duration = samples_per_channel / sample_rate # 0.01 seconds
while True:
# Simulate audio capture
# In production, get from actual microphone
frame = AudioFrame.create(
sample_rate=sample_rate,
num_channels=num_channels,
samples_per_channel=samples_per_channel
)
# TODO: Fill frame with actual audio data from microphone
# For now, frame is initialized with zeros (silence)
# Process with APM (in-place modification)
apm.process_stream(frame)
# Capture processed frame to source
await source.capture_frame(frame)
# Wait for next frame
await asyncio.sleep(frame_duration)
if __name__ == "__main__":
asyncio.run(main())# ALWAYS use exactly 10ms frames
# Correct: 10ms at 48kHz
sample_rate = 48000
samples_per_channel = 480 # sample_rate / 100
frame = AudioFrame.create(sample_rate, 1, samples_per_channel)
apm.process_stream(frame) # Works
# Incorrect: 20ms at 48kHz
# samples_per_channel = 960 # 20ms
# frame = AudioFrame.create(48000, 1, 960)
# apm.process_stream(frame) # Raises RuntimeError
# Correct: 10ms at 16kHz
frame_16k = AudioFrame.create(16000, 1, 160)
apm.process_stream(frame_16k) # Works# For echo cancellation to work effectively
apm = AudioProcessingModule(echo_cancellation=True)
# MUST process both streams:
# 1. Process speaker audio (what's being played)
playback_frame = get_speaker_audio()
apm.process_reverse_stream(playback_frame)
# 2. Set accurate delay
apm.set_stream_delay_ms(measured_delay)
# 3. Process microphone audio (what's being captured)
capture_frame = get_microphone_audio()
apm.process_stream(capture_frame)
# Without reverse stream, echo cancellation won't work# For noisy environments (office, outdoor)
apm_noisy = AudioProcessingModule(
noise_suppression=True, # Remove background noise
high_pass_filter=True # Remove rumble/wind
)
# For echo-prone setups (laptop speakers, speakerphone)
apm_echo = AudioProcessingModule(
echo_cancellation=True, # Remove speaker echo
noise_suppression=True # Also suppress noise
)
# For varying microphone volumes
apm_gain = AudioProcessingModule(
auto_gain_control=True # Normalize levels
)
# For professional setup (all features)
apm_pro = AudioProcessingModule(
echo_cancellation=True,
noise_suppression=True,
high_pass_filter=True,
auto_gain_control=True
)def safe_process_stream(apm: AudioProcessingModule, frame: AudioFrame) -> bool:
"""Process stream with error handling.
Returns:
bool: True if processed successfully, False otherwise
"""
try:
apm.process_stream(frame)
return True
except RuntimeError as e:
print(f"APM processing failed: {e}")
# Verify frame is exactly 10ms
expected_samples = frame.sample_rate // 100
if frame.samples_per_channel != expected_samples:
print(f"Wrong frame size: {frame.samples_per_channel}, "
f"expected: {expected_samples}")
return False
# Usage
if safe_process_stream(apm, frame):
await source.capture_frame(frame)
else:
# Use unprocessed frame or skip
print("Skipping frame due to processing error")# MediaDevices handles APM automatically (recommended)
from livekit import MediaDevices
devices = MediaDevices()
# Open input with APM features
# APM is applied automatically to captured audio
capture = devices.open_input(
enable_aec=True, # Echo cancellation
noise_suppression=True, # Noise suppression
auto_gain_control=True, # Gain control
high_pass_filter=True # High-pass filter
)
# The capture.source already has APM applied
# No need to manually call process_stream()
source = capture.source
track = LocalAudioTrack.create_audio_track("mic", source)
await room.local_participant.publish_track(track)
# MediaDevices manages reverse stream automaticallyPurpose: Removes acoustic echo caused by speaker audio being picked up by the microphone.
How it works:
Requirements:
Use cases:
Purpose: Reduces stationary background noise.
Examples of noise removed:
Use cases:
Purpose: Removes low-frequency components.
Frequency range: Typically removes <80-100Hz
Examples of noise removed:
Use cases:
Purpose: Automatically adjusts audio levels to maintain consistent volume.
How it works:
Use cases:
class AdaptiveAPM:
"""APM that adapts to audio conditions."""
def __init__(self):
self.apm = AudioProcessingModule(
noise_suppression=True,
auto_gain_control=True
)
self.noise_level = 0.0
def process_adaptive(self, frame: AudioFrame):
"""Process with adaptive parameters."""
# Measure noise level
self.noise_level = self.measure_noise(frame)
# Adjust processing based on conditions
if self.noise_level > 0.1:
# High noise environment
if not self.apm:
# Create with aggressive noise suppression
self.apm = AudioProcessingModule(
noise_suppression=True,
high_pass_filter=True
)
# Process frame
self.apm.process_stream(frame)
def measure_noise(self, frame: AudioFrame) -> float:
"""Estimate background noise level."""
import numpy as np
samples = np.frombuffer(frame.data, dtype=np.int16)
# Simple RMS calculation
rms = np.sqrt(np.mean(samples.astype(np.float32) ** 2))
return rms / 32767.0 # Normalize