An audio library based on libsndfile, CFFI and NumPy for reading and writing sound files
—
Utilities for querying file information, discovering available formats and subtypes, validating format combinations, and retrieving format defaults. These functions help with format detection and validation.
Query detailed information about audio files without loading the full audio data.
def info(file, verbose=False):
"""
Get information about a sound file.
Parameters:
- file: str or file-like, path to audio file or file-like object
- verbose: bool, if True include extra format information
Returns:
- _SoundFileInfo: object with file information attributes:
- name: str, file name
- samplerate: int, sample rate in Hz
- frames: int, total number of frames
- channels: int, number of audio channels
- duration: float, duration in seconds (frames / samplerate)
- format: str, major format
- subtype: str, audio subtype
- endian: str, byte order
- format_info: str, human-readable format description
- subtype_info: str, human-readable subtype description
- sections: int, number of sections
- extra_info: str, additional information
- verbose: bool, whether verbose information was requested
"""
class _SoundFileInfo:
"""
Information object returned by info() function.
Attributes:
- name: str, file name or path
- samplerate: int, sample rate in Hz
- frames: int, total number of frames
- channels: int, number of audio channels
- duration: float, duration in seconds (frames / samplerate)
- format: str, major format
- subtype: str, audio subtype
- endian: str, byte order
- format_info: str, human-readable format description
- subtype_info: str, human-readable subtype description
- sections: int, number of sections
- extra_info: str, additional information
- verbose: bool, whether verbose information was requested
"""Discover supported file formats and their capabilities.
def available_formats():
"""
Get dictionary of available major file formats.
Returns:
- dict: mapping of format names to descriptions
Keys include: 'WAV', 'AIFF', 'AU', 'RAW', 'PAF', 'SVX', 'NIST',
'VOC', 'IRCAM', 'W64', 'MAT4', 'MAT5', 'PVF', 'XI',
'HTK', 'SDS', 'AVR', 'WAVEX', 'SD2', 'FLAC', 'CAF',
'WVE', 'OGG', 'MPC2K', 'RF64', 'MP3'
"""Discover supported audio data subtypes, optionally filtered by format.
def available_subtypes(format=None):
"""
Get dictionary of available audio subtypes.
Parameters:
- format: str, optional format to filter subtypes for
Returns:
- dict: mapping of subtype names to descriptions
Keys include: 'PCM_S8', 'PCM_16', 'PCM_24', 'PCM_32', 'PCM_U8',
'FLOAT', 'DOUBLE', 'ULAW', 'ALAW', 'IMA_ADPCM',
'MS_ADPCM', 'GSM610', 'VOX_ADPCM', 'VORBIS', 'OPUS',
'ALAC_16', 'ALAC_20', 'ALAC_24', 'ALAC_32',
'MPEG_LAYER_I', 'MPEG_LAYER_II', 'MPEG_LAYER_III'
"""Check if format combinations are valid before attempting file operations.
def check_format(format, subtype=None, endian=None):
"""
Check if combination of format/subtype/endian is valid.
Parameters:
- format: str, major format name (e.g., 'WAV', 'FLAC')
- subtype: str, optional subtype (e.g., 'PCM_16', 'FLOAT')
- endian: str, optional endianness ('FILE', 'LITTLE', 'BIG', 'CPU')
Returns:
- bool: True if the combination is valid, False otherwise
"""Get the default subtype for a given format.
def default_subtype(format):
"""
Get the default subtype for a given format.
Parameters:
- format: str, major format name
Returns:
- str or None: default subtype name, or None if format is invalid
"""import soundfile as sf
# Get basic file information
file_info = sf.info('audio.wav')
print(f'File: {file_info.name}')
print(f'Duration: {file_info.frames / file_info.samplerate:.2f} seconds')
print(f'Sample rate: {file_info.samplerate} Hz')
print(f'Channels: {file_info.channels}')
print(f'Format: {file_info.format} ({file_info.format_info})')
print(f'Subtype: {file_info.subtype} ({file_info.subtype_info})')
# Get verbose information
verbose_info = sf.info('audio.wav', verbose=True)
print(f'Extra info: {verbose_info.extra_info}')import soundfile as sf
# List all available formats
formats = sf.available_formats()
print("Available formats:")
for fmt, description in formats.items():
print(f' {fmt}: {description}')
# List subtypes for a specific format
wav_subtypes = sf.available_subtypes('WAV')
print("\\nWAV subtypes:")
for subtype, description in wav_subtypes.items():
print(f' {subtype}: {description}')
# Get all subtypes
all_subtypes = sf.available_subtypes()
print(f"\\nTotal subtypes available: {len(all_subtypes)}")import soundfile as sf
# Check if format combinations are valid
combinations = [
('WAV', 'PCM_16', 'LITTLE'),
('FLAC', 'PCM_24', None),
('OGG', 'VORBIS', None),
('MP3', 'MPEG_LAYER_III', None),
('WAV', 'VORBIS', None), # Invalid combination
]
for fmt, subtype, endian in combinations:
is_valid = sf.check_format(fmt, subtype, endian)
print(f'{fmt}/{subtype}/{endian}: {"Valid" if is_valid else "Invalid"}')
# Get default subtypes for formats
formats_to_check = ['WAV', 'FLAC', 'OGG', 'AIFF']
for fmt in formats_to_check:
default = sf.default_subtype(fmt)
print(f'{fmt} default subtype: {default}')import soundfile as sf
import numpy as np
def write_with_best_quality(filename, data, samplerate):
"""Write audio with the best quality subtype for the format."""
# Determine format from filename extension
ext = filename.split('.')[-1].upper()
format_map = {
'WAV': 'WAV',
'FLAC': 'FLAC',
'OGG': 'OGG',
'AIFF': 'AIFF'
}
format_name = format_map.get(ext, 'WAV')
# Get available subtypes for this format
subtypes = sf.available_subtypes(format_name)
# Choose best quality subtype
quality_preference = ['FLOAT', 'DOUBLE', 'PCM_32', 'PCM_24', 'PCM_16']
best_subtype = None
for preferred in quality_preference:
if preferred in subtypes:
# Verify this combination is valid
if sf.check_format(format_name, preferred):
best_subtype = preferred
break
if best_subtype is None:
# Fall back to default
best_subtype = sf.default_subtype(format_name)
print(f'Writing {filename} as {format_name}/{best_subtype}')
sf.write(filename, data, samplerate, format=format_name, subtype=best_subtype)
# Example usage
data = np.random.randn(44100, 2) # 1 second stereo
write_with_best_quality('output.wav', data, 44100)
write_with_best_quality('output.flac', data, 44100)import soundfile as sf
import os
def analyze_audio_directory(directory):
"""Analyze all audio files in a directory."""
audio_extensions = ['.wav', '.flac', '.ogg', '.aiff', '.au', '.mp3']
print(f"Analyzing audio files in: {directory}")
print("-" * 50)
for filename in os.listdir(directory):
if any(filename.lower().endswith(ext) for ext in audio_extensions):
filepath = os.path.join(directory, filename)
try:
info = sf.info(filepath)
duration = info.frames / info.samplerate
file_size = os.path.getsize(filepath) / (1024 * 1024) # MB
print(f"File: {filename}")
print(f" Format: {info.format}/{info.subtype}")
print(f" Duration: {duration:.2f}s")
print(f" Sample Rate: {info.samplerate} Hz")
print(f" Channels: {info.channels}")
print(f" Size: {file_size:.1f} MB")
print()
except Exception as e:
print(f"Error reading {filename}: {e}")
# Example usage (commented out since directory may not exist)
# analyze_audio_directory('/path/to/audio/files')Install with Tessl CLI
npx tessl i tessl/pypi-soundfile