tessl/pypi-soundfile

An audio library based on libsndfile, CFFI and NumPy for reading and writing sound files

—

Pending

Overview

Eval results

Files

File Information and Format Support

Name: tessl/pypi-soundfile
Author: tessl

Utilities for querying file information, discovering available formats and subtypes, validating format combinations, and retrieving format defaults. These functions help with format detection and validation.

Capabilities

File Information

Query detailed information about audio files without loading the full audio data.

def info(file, verbose=False):
    """
    Get information about a sound file.
    
    Parameters:
    - file: str or file-like, path to audio file or file-like object
    - verbose: bool, if True include extra format information
    
    Returns:
    - _SoundFileInfo: object with file information attributes:
      - name: str, file name
      - samplerate: int, sample rate in Hz
      - frames: int, total number of frames
      - channels: int, number of audio channels
      - duration: float, duration in seconds (frames / samplerate)
      - format: str, major format
      - subtype: str, audio subtype
      - endian: str, byte order
      - format_info: str, human-readable format description
      - subtype_info: str, human-readable subtype description
      - sections: int, number of sections
      - extra_info: str, additional information
      - verbose: bool, whether verbose information was requested
    """

class _SoundFileInfo:
    """
    Information object returned by info() function.
    
    Attributes:
    - name: str, file name or path
    - samplerate: int, sample rate in Hz
    - frames: int, total number of frames
    - channels: int, number of audio channels
    - duration: float, duration in seconds (frames / samplerate)
    - format: str, major format
    - subtype: str, audio subtype
    - endian: str, byte order
    - format_info: str, human-readable format description
    - subtype_info: str, human-readable subtype description
    - sections: int, number of sections
    - extra_info: str, additional information
    - verbose: bool, whether verbose information was requested
    """

Available Formats

Discover supported file formats and their capabilities.

def available_formats():
    """
    Get dictionary of available major file formats.
    
    Returns:
    - dict: mapping of format names to descriptions
      Keys include: 'WAV', 'AIFF', 'AU', 'RAW', 'PAF', 'SVX', 'NIST', 
                   'VOC', 'IRCAM', 'W64', 'MAT4', 'MAT5', 'PVF', 'XI', 
                   'HTK', 'SDS', 'AVR', 'WAVEX', 'SD2', 'FLAC', 'CAF', 
                   'WVE', 'OGG', 'MPC2K', 'RF64', 'MP3'
    """

Available Subtypes

Discover supported audio data subtypes, optionally filtered by format.

def available_subtypes(format=None):
    """
    Get dictionary of available audio subtypes.
    
    Parameters:
    - format: str, optional format to filter subtypes for
    
    Returns:
    - dict: mapping of subtype names to descriptions
      Keys include: 'PCM_S8', 'PCM_16', 'PCM_24', 'PCM_32', 'PCM_U8',
                   'FLOAT', 'DOUBLE', 'ULAW', 'ALAW', 'IMA_ADPCM',
                   'MS_ADPCM', 'GSM610', 'VOX_ADPCM', 'VORBIS', 'OPUS',
                   'ALAC_16', 'ALAC_20', 'ALAC_24', 'ALAC_32',
                   'MPEG_LAYER_I', 'MPEG_LAYER_II', 'MPEG_LAYER_III'
    """

Format Validation

Check if format combinations are valid before attempting file operations.

def check_format(format, subtype=None, endian=None):
    """
    Check if combination of format/subtype/endian is valid.
    
    Parameters:
    - format: str, major format name (e.g., 'WAV', 'FLAC')
    - subtype: str, optional subtype (e.g., 'PCM_16', 'FLOAT')
    - endian: str, optional endianness ('FILE', 'LITTLE', 'BIG', 'CPU')
    
    Returns:
    - bool: True if the combination is valid, False otherwise
    """

Default Subtypes

Get the default subtype for a given format.

def default_subtype(format):
    """
    Get the default subtype for a given format.
    
    Parameters:
    - format: str, major format name
    
    Returns:
    - str or None: default subtype name, or None if format is invalid
    """

Usage Examples

File Exploration

import soundfile as sf

# Get basic file information
file_info = sf.info('audio.wav')
print(f'File: {file_info.name}')
print(f'Duration: {file_info.frames / file_info.samplerate:.2f} seconds')
print(f'Sample rate: {file_info.samplerate} Hz')
print(f'Channels: {file_info.channels}')
print(f'Format: {file_info.format} ({file_info.format_info})')
print(f'Subtype: {file_info.subtype} ({file_info.subtype_info})')

# Get verbose information
verbose_info = sf.info('audio.wav', verbose=True)
print(f'Extra info: {verbose_info.extra_info}')

Format Discovery

import soundfile as sf

# List all available formats
formats = sf.available_formats()
print("Available formats:")
for fmt, description in formats.items():
    print(f'  {fmt}: {description}')

# List subtypes for a specific format
wav_subtypes = sf.available_subtypes('WAV')
print("\\nWAV subtypes:")
for subtype, description in wav_subtypes.items():
    print(f'  {subtype}: {description}')

# Get all subtypes
all_subtypes = sf.available_subtypes()
print(f"\\nTotal subtypes available: {len(all_subtypes)}")

Format Validation and Defaults

import soundfile as sf

# Check if format combinations are valid
combinations = [
    ('WAV', 'PCM_16', 'LITTLE'),
    ('FLAC', 'PCM_24', None),
    ('OGG', 'VORBIS', None),
    ('MP3', 'MPEG_LAYER_III', None),
    ('WAV', 'VORBIS', None),  # Invalid combination
]

for fmt, subtype, endian in combinations:
    is_valid = sf.check_format(fmt, subtype, endian)
    print(f'{fmt}/{subtype}/{endian}: {"Valid" if is_valid else "Invalid"}')

# Get default subtypes for formats
formats_to_check = ['WAV', 'FLAC', 'OGG', 'AIFF']
for fmt in formats_to_check:
    default = sf.default_subtype(fmt)
    print(f'{fmt} default subtype: {default}')

Smart Format Selection

import soundfile as sf
import numpy as np

def write_with_best_quality(filename, data, samplerate):
    """Write audio with the best quality subtype for the format."""
    
    # Determine format from filename extension
    ext = filename.split('.')[-1].upper()
    format_map = {
        'WAV': 'WAV',
        'FLAC': 'FLAC', 
        'OGG': 'OGG',
        'AIFF': 'AIFF'
    }
    
    format_name = format_map.get(ext, 'WAV')
    
    # Get available subtypes for this format
    subtypes = sf.available_subtypes(format_name)
    
    # Choose best quality subtype
    quality_preference = ['FLOAT', 'DOUBLE', 'PCM_32', 'PCM_24', 'PCM_16']
    best_subtype = None
    
    for preferred in quality_preference:
        if preferred in subtypes:
            # Verify this combination is valid
            if sf.check_format(format_name, preferred):
                best_subtype = preferred
                break
    
    if best_subtype is None:
        # Fall back to default
        best_subtype = sf.default_subtype(format_name)
    
    print(f'Writing {filename} as {format_name}/{best_subtype}')
    sf.write(filename, data, samplerate, format=format_name, subtype=best_subtype)

# Example usage
data = np.random.randn(44100, 2)  # 1 second stereo
write_with_best_quality('output.wav', data, 44100)
write_with_best_quality('output.flac', data, 44100)

File Format Analysis

import soundfile as sf
import os

def analyze_audio_directory(directory):
    """Analyze all audio files in a directory."""
    
    audio_extensions = ['.wav', '.flac', '.ogg', '.aiff', '.au', '.mp3']
    
    print(f"Analyzing audio files in: {directory}")
    print("-" * 50)
    
    for filename in os.listdir(directory):
        if any(filename.lower().endswith(ext) for ext in audio_extensions):
            filepath = os.path.join(directory, filename)
            try:
                info = sf.info(filepath)
                duration = info.frames / info.samplerate
                file_size = os.path.getsize(filepath) / (1024 * 1024)  # MB
                
                print(f"File: {filename}")
                print(f"  Format: {info.format}/{info.subtype}")
                print(f"  Duration: {duration:.2f}s")
                print(f"  Sample Rate: {info.samplerate} Hz")
                print(f"  Channels: {info.channels}")
                print(f"  Size: {file_size:.1f} MB")
                print()
                
            except Exception as e:
                print(f"Error reading {filename}: {e}")

# Example usage (commented out since directory may not exist)
# analyze_audio_directory('/path/to/audio/files')

Install with Tessl CLI