tessl/pypi-sounddevice

Python bindings for PortAudio library providing cross-platform audio I/O functionality with NumPy integration.

—

Pending

Overview

Eval results

Files

Stream Processing

Name: tessl/pypi-sounddevice
Author: tessl

Low-level stream classes for advanced audio processing with full control over buffering, callbacks, and real-time operation. These classes provide the foundation for building sophisticated audio applications with precise timing and custom processing workflows.

Capabilities

NumPy-based Streams

High-performance audio streams that work directly with NumPy arrays, providing efficient memory management and integration with scientific computing workflows.

class Stream:
    """
    Bidirectional audio stream for simultaneous input and output.
    
    Parameters:
    - samplerate (float, optional): Sample rate in Hz
    - blocksize (int, optional): Block size for audio processing
    - device (int or str or tuple, optional): Input/output device specification
    - channels (int or tuple, optional): Number of channels
    - dtype (str or numpy.dtype, optional): Data type for audio samples
    - latency (float or str, optional): Desired latency in seconds or 'low'/'high'
    - extra_settings (object, optional): Platform-specific settings
    - callback (function, optional): User callback function for real-time processing
    - finished_callback (function, optional): Callback when stream finishes
    - clip_off (bool, optional): Disable clipping of out-of-range samples
    - dither_off (bool, optional): Disable dithering
    - never_drop_input (bool, optional): Never drop input samples
    - prime_output_buffers_using_stream_callback (bool, optional): Prime buffers with callback
    """
    
    def start(self): ...
    def stop(self, ignore_errors=True): ...
    def abort(self, ignore_errors=True): ...
    def close(self, ignore_errors=True): ...
    def read(self, frames): ...
    def write(self, data): ...
    
    @property
    def read_available(self): ...
    @property
    def write_available(self): ...
    @property
    def active(self): ...
    @property
    def stopped(self): ...
    @property
    def closed(self): ...
    @property
    def samplerate(self): ...
    @property
    def blocksize(self): ...
    @property
    def device(self): ...
    @property
    def channels(self): ...
    @property
    def dtype(self): ...
    @property
    def samplesize(self): ...
    @property
    def latency(self): ...
    @property
    def time(self): ...
    @property
    def cpu_load(self): ...

class InputStream:
    """
    Input-only audio stream with NumPy arrays.
    
    Parameters: Same as Stream class
    """
    
    def start(self): ...
    def stop(self, ignore_errors=True): ...
    def abort(self, ignore_errors=True): ...
    def close(self, ignore_errors=True): ...
    def read(self, frames): ...
    
    @property
    def read_available(self): ...
    @property
    def active(self): ...
    @property
    def stopped(self): ...
    @property
    def closed(self): ...
    @property
    def samplerate(self): ...
    @property
    def blocksize(self): ...
    @property
    def device(self): ...
    @property
    def channels(self): ...
    @property
    def dtype(self): ...
    @property
    def samplesize(self): ...
    @property
    def latency(self): ...
    @property
    def time(self): ...
    @property
    def cpu_load(self): ...

class OutputStream:
    """
    Output-only audio stream with NumPy arrays.
    
    Parameters: Same as Stream class
    """
    
    def start(self): ...
    def stop(self, ignore_errors=True): ...
    def abort(self, ignore_errors=True): ...
    def close(self, ignore_errors=True): ...
    def write(self, data): ...
    
    @property
    def write_available(self): ...
    @property
    def active(self): ...
    @property
    def stopped(self): ...
    @property
    def closed(self): ...
    @property
    def samplerate(self): ...
    @property
    def blocksize(self): ...
    @property
    def device(self): ...
    @property
    def channels(self): ...
    @property
    def dtype(self): ...
    @property
    def samplesize(self): ...
    @property
    def latency(self): ...
    @property
    def time(self): ...
    @property
    def cpu_load(self): ...

Raw Buffer Streams

Buffer-based audio streams that work with Python buffer objects when NumPy is not available or when direct memory control is needed.

class RawStream:
    """
    Bidirectional raw audio stream using Python buffer objects.
    
    Parameters: Same as Stream class
    """
    
    def start(self): ...
    def stop(self, ignore_errors=True): ...
    def abort(self, ignore_errors=True): ...
    def close(self, ignore_errors=True): ...
    def read(self, frames): ...
    def write(self, data): ...
    
    @property
    def read_available(self): ...
    @property
    def write_available(self): ...
    @property
    def active(self): ...
    @property
    def stopped(self): ...
    @property
    def closed(self): ...
    @property
    def samplerate(self): ...
    @property
    def blocksize(self): ...
    @property
    def device(self): ...
    @property
    def channels(self): ...
    @property
    def dtype(self): ...
    @property
    def samplesize(self): ...
    @property
    def latency(self): ...
    @property
    def time(self): ...
    @property
    def cpu_load(self): ...

class RawInputStream:
    """
    Input-only raw audio stream using Python buffer objects.
    
    Parameters: Same as Stream class
    """
    
    def start(self): ...
    def stop(self): ...
    def close(self): ...
    def read(self, frames): ...
    def read_available(self): ...
    
    @property
    def active(self): ...
    @property
    def stopped(self): ...
    @property
    def closed(self): ...
    @property
    def samplerate(self): ...
    @property
    def blocksize(self): ...
    @property
    def latency(self): ...
    @property
    def cpu_load(self): ...

class RawOutputStream:
    """
    Output-only raw audio stream using Python buffer objects.
    
    Parameters: Same as Stream class  
    """
    
    def start(self): ...
    def stop(self): ...
    def close(self): ...
    def write(self, data): ...
    def write_available(self): ...
    
    @property
    def active(self): ...
    @property
    def stopped(self): ...
    @property
    def closed(self): ...
    @property
    def samplerate(self): ...
    @property
    def blocksize(self): ...
    @property
    def latency(self): ...
    @property
    def cpu_load(self): ...

Usage Examples

Basic Stream Recording

import sounddevice as sd
import numpy as np

# Create and configure input stream
with sd.InputStream(samplerate=44100, channels=2, blocksize=1024) as stream:
    print("Recording... Press Ctrl+C to stop")
    try:
        while True:
            # Read audio data
            data, overflowed = stream.read(1024)
            if overflowed:
                print("Input overflow detected!")
            
            # Process audio data here
            # For example, calculate RMS level
            rms = np.sqrt(np.mean(data**2))
            print(f"RMS level: {rms:.4f}")
            
    except KeyboardInterrupt:
        print("Recording stopped")

Real-time Audio Processing with Callbacks

import sounddevice as sd
import numpy as np

# Global variable to store processed audio
processed_audio = []

def audio_callback(indata, outdata, frames, time, status):
    """Real-time audio processing callback."""
    if status:
        print(f"Stream status: {status}")
    
    # Apply simple gain and highpass filter
    gain = 0.5
    processed = indata * gain
    
    # Simple high-pass filter (remove DC offset)
    if hasattr(audio_callback, 'prev_sample'):
        processed = processed - audio_callback.prev_sample * 0.95
    audio_callback.prev_sample = processed[-1] if len(processed) > 0 else 0
    
    # Copy processed audio to output
    outdata[:] = processed
    processed_audio.append(processed.copy())

# Create bidirectional stream with callback
with sd.Stream(callback=audio_callback, channels=2, samplerate=44100):
    print("Real-time processing active. Press Enter to stop.")
    input()

print(f"Processed {len(processed_audio)} blocks of audio")

Custom Stream Configuration

import sounddevice as sd
import numpy as np

# Configure stream for specific device and low latency
device_info = sd.query_devices()
print("Available devices:")
for i, device in enumerate(device_info):
    print(f"  {i}: {device['name']}")

# Select specific input and output devices
input_device = 1  # Replace with desired input device index
output_device = 2  # Replace with desired output device index

# Create stream with custom configuration
stream = sd.Stream(
    device=(input_device, output_device),
    samplerate=48000,
    channels=(1, 2),  # 1 input channel, 2 output channels
    dtype=np.float32,
    latency='low',
    blocksize=512
)

with stream:
    print(f"Stream info:")
    print(f"  Sample rate: {stream.samplerate}")
    print(f"  Block size: {stream.blocksize}")
    print(f"  Latency: {stream.latency}")
    print(f"  CPU load: {stream.cpu_load}")
    
    # Record some audio
    recording = []
    for _ in range(100):  # Record 100 blocks
        data, overflowed = stream.read(512)
        recording.append(data)
    
    # Convert to single array
    recording = np.concatenate(recording, axis=0)
    print(f"Recorded {len(recording)} samples")
    
    # Play back with processing
    for block in np.array_split(recording, 100):
        # Duplicate mono input to stereo output
        stereo_block = np.column_stack([block, block])
        stream.write(stereo_block)

Using Raw Streams (without NumPy)

import sounddevice as sd
import array

# Create raw input stream for systems without NumPy
with sd.RawInputStream(samplerate=44100, channels=1, dtype='int16') as stream:
    print("Recording raw audio...")
    
    # Read raw audio data
    raw_data, overflowed = stream.read(4410)  # 0.1 seconds at 44100 Hz
    
    # Convert to Python array for processing
    audio_array = array.array('h')  # 'h' for signed short (int16)
    audio_array.frombytes(raw_data)
    
    print(f"Recorded {len(audio_array)} samples")
    print(f"Sample values: {audio_array[:10]}...")  # First 10 samples

Stream Context Management

import sounddevice as sd
import numpy as np

# Streams automatically start and stop with context manager
def process_audio_file(input_file, output_file):
    # This example assumes you have audio data loaded
    # In practice, you'd load from a file using scipy.io.wavfile or similar
    
    with sd.OutputStream(samplerate=44100, channels=2) as output_stream:
        with sd.InputStream(samplerate=44100, channels=1) as input_stream:
            
            print("Streams started automatically")
            
            # Process audio in chunks
            chunk_size = 1024
            while True:
                try:
                    # Read from input
                    input_data, _ = input_stream.read(chunk_size)
                    
                    # Process (e.g., convert mono to stereo)
                    stereo_data = np.column_stack([input_data, input_data])
                    
                    # Write to output
                    output_stream.write(stereo_data)
                    
                except KeyboardInterrupt:
                    break
            
    print("Streams stopped automatically")

Callback Function Signature

Audio callback functions must follow this signature:

def callback(indata, outdata, frames, time, status):
    """
    Audio processing callback function.
    
    Parameters:
    - indata (numpy.ndarray): Input audio data
    - outdata (numpy.ndarray): Output audio buffer to fill
    - frames (int): Number of frames in this callback
    - time (object): Time information for this callback
    - status (CallbackFlags): Status flags indicating stream conditions
    
    Returns:
    None (modify outdata in-place)
    
    Raises:
    CallbackStop: To stop the stream
    CallbackAbort: To abort the stream
    """