Pythonic bindings for FFmpeg's libraries enabling multimedia processing with audio/video encoding, decoding, format conversion, and stream manipulation.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Comprehensive audio handling capabilities including frames, streams, format conversion, resampling, and FIFO buffering. PyAV provides full access to FFmpeg's audio processing with NumPy integration.
Audio frame objects contain uncompressed audio data with format and timing information.
class AudioFrame:
"""Container for uncompressed audio data."""
# Properties
samples: int # Number of audio samples
sample_rate: int # Sample rate in Hz
rate: int # Alias for sample_rate
format: AudioFormat # Audio sample format
layout: AudioLayout # Channel layout
planes: tuple[AudioPlane, ...] # Audio data planes
pts: int # Presentation timestamp
time: float # Time in seconds
side_data: SideDataContainer # Additional frame data
def __init__(self, format='s16', layout='stereo', samples=0, align=1):
"""
Create an audio frame.
Parameters:
- format: str | AudioFormat - Sample format
- layout: str | AudioLayout - Channel layout
- samples: int - Number of samples per channel
- align: int - Memory alignment
"""
@staticmethod
def from_ndarray(array, format='s16', layout='stereo') -> 'AudioFrame':
"""
Create frame from NumPy array.
Parameters:
- array: np.ndarray - Audio data array
- format: str - Target sample format
- layout: str - Channel layout
Returns:
New AudioFrame object
"""
def to_ndarray(self, format=None) -> np.ndarray:
"""
Convert to NumPy array.
Parameters:
- format: str - Target format (None uses current format)
Returns:
NumPy array with audio data
"""
def make_writable(self) -> None:
"""Ensure frame data is writable."""Audio sample format specifications and conversions.
class AudioFormat:
"""Audio sample format specification."""
# Properties
name: str # Format name (e.g., 's16', 'flt')
bytes: int # Bytes per sample
bits: int # Bits per sample
is_planar: bool # True if planar format
is_packed: bool # True if packed format
planar: 'AudioFormat' # Equivalent planar format
packed: 'AudioFormat' # Equivalent packed format
container_name: str # Container-friendly name
def __init__(self, name):
"""
Create audio format.
Parameters:
- name: str | AudioFormat - Format name or existing format
"""Channel layout specifications for multi-channel audio.
class AudioLayout:
"""Audio channel layout specification."""
# Properties
name: str # Layout name (e.g., 'mono', 'stereo', '5.1')
nb_channels: int # Number of channels
channels: tuple[AudioChannel, ...] # Individual channel objects
def __init__(self, layout):
"""
Create audio layout.
Parameters:
- layout: str | int | AudioLayout - Layout specification
"""
class AudioChannel:
"""Individual audio channel."""
name: str # Channel name (e.g., 'FL', 'FR', 'C')
description: str # Human-readable descriptionAudio format conversion and resampling for compatibility between different audio specifications.
class AudioResampler:
"""Audio format converter and resampler."""
# Properties
rate: int # Output sample rate
frame_size: int # Output frame size
format: AudioFormat # Output format
graph: Graph | None # Filter graph used
def __init__(self, format=None, layout=None, rate=None, frame_size=None):
"""
Create audio resampler.
Parameters:
- format: str | AudioFormat - Output format
- layout: str | AudioLayout - Output layout
- rate: int - Output sample rate
- frame_size: int - Output frame size
"""
def resample(self, frame=None) -> list[AudioFrame]:
"""
Resample audio frame.
Parameters:
- frame: AudioFrame | None - Input frame (None flushes)
Returns:
List of resampled frames
"""First-in-first-out buffer for audio frames, useful for managing variable frame sizes.
class AudioFifo:
"""FIFO buffer for audio frames."""
# Properties
format: AudioFormat # Audio format
layout: AudioLayout # Channel layout
sample_rate: int # Sample rate
samples: int # Current samples in buffer
samples_written: int # Total samples written
samples_read: int # Total samples read
pts_per_sample: Fraction # PTS increment per sample
def __init__(self, format='s16', layout='stereo', sample_rate=48000):
"""
Create audio FIFO.
Parameters:
- format: str - Audio format
- layout: str - Channel layout
- sample_rate: int - Sample rate
"""
def write(self, frame) -> None:
"""
Write frame to FIFO.
Parameters:
- frame: AudioFrame - Frame to write
"""
def read(self, samples=0, partial=False) -> AudioFrame | None:
"""
Read frame from FIFO.
Parameters:
- samples: int - Number of samples to read (0 for all)
- partial: bool - Allow partial reads
Returns:
AudioFrame or None if insufficient data
"""
def read_many(self, samples, partial=True) -> list[AudioFrame]:
"""
Read multiple frames.
Parameters:
- samples: int - Samples per frame
- partial: bool - Allow partial final frame
Returns:
List of audio frames
"""Audio stream objects for encoding and decoding.
class AudioStream:
"""Audio stream in a container."""
# Properties
type: Literal['audio'] # Stream type
codec_context: AudioCodecContext # Codec context
frame_size: int # Encoder frame size
sample_rate: int # Sample rate
rate: int # Alias for sample_rate
bit_rate: int # Bitrate
channels: int # Number of channels
format: AudioFormat # Sample format
layout: AudioLayout # Channel layout
def encode(self, frame=None) -> list[Packet]:
"""
Encode audio frame.
Parameters:
- frame: AudioFrame | None - Frame to encode (None flushes)
Returns:
List of encoded packets
"""
def decode(self, packet=None) -> list[AudioFrame]:
"""
Decode audio packet.
Parameters:
- packet: Packet | None - Packet to decode (None flushes)
Returns:
List of decoded frames
"""Audio-specific codec context for encoding and decoding.
class AudioCodecContext:
"""Audio codec context."""
# Properties
type: Literal['audio'] # Context type
frame_size: int # Samples per frame
sample_rate: int # Sample rate
rate: int # Alias for sample_rate
format: AudioFormat # Sample format
layout: AudioLayout # Channel layout
channels: int # Number of channels
bit_rate: int # Target bitrate
def encode(self, frame=None) -> list[Packet]:
"""Encode audio frame to packets."""
def encode_lazy(self, frame=None) -> Iterator[Packet]:
"""Lazy encoding iterator."""
def decode(self, packet=None) -> list[AudioFrame]:
"""Decode packet to audio frames."""Individual audio data planes for planar formats.
class AudioPlane:
"""Audio data plane."""
buffer_size: int # Size of audio buffer
frame: AudioFrame # Parent frame
index: int # Plane index
# Inherits Buffer methods for data access
def update(self, input: bytes) -> None: ...
def __buffer__(self, flags: int) -> memoryview: ...
def __bytes__(self) -> bytes: ...import av
import numpy as np
# Open audio file
container = av.open('audio.wav')
audio_stream = container.streams.audio[0]
print(f"Sample rate: {audio_stream.sample_rate}")
print(f"Channels: {audio_stream.channels}")
print(f"Format: {audio_stream.format}")
# Decode all frames
for frame in container.decode(audio_stream):
# Convert to numpy array
array = frame.to_ndarray()
print(f"Frame: {array.shape} samples")
# Process audio data
processed = np.multiply(array, 0.5) # Reduce volume
# Create new frame from processed data
new_frame = av.AudioFrame.from_ndarray(
processed,
format=frame.format.name,
layout=frame.layout.name,
sample_rate=frame.sample_rate
)
container.close()import av
# Setup resampler
resampler = av.AudioResampler(
format='s16', # 16-bit signed integer
layout='stereo', # 2 channels
rate=44100 # 44.1kHz
)
# Open input
container = av.open('input.flac')
stream = container.streams.audio[0]
# Process frames
for frame in container.decode(stream):
# Resample to target format
resampled_frames = resampler.resample(frame)
for resampled_frame in resampled_frames:
print(f"Resampled: {resampled_frame.format.name} "
f"{resampled_frame.layout.name} "
f"{resampled_frame.sample_rate}Hz")
# Flush resampler
final_frames = resampler.resample(None)
for frame in final_frames:
print(f"Final frame: {frame.samples} samples")
container.close()import av
import numpy as np
# Create output container
output = av.open('output.aac', 'w')
# Add audio stream
stream = output.add_stream('aac', rate=44100)
stream.channels = 2
stream.layout = 'stereo'
stream.sample_rate = 44100
# Create FIFO for frame size management
fifo = av.AudioFifo(
format=stream.format.name,
layout=stream.layout.name,
sample_rate=stream.sample_rate
)
# Generate audio data
duration = 5.0 # seconds
sample_count = int(duration * stream.sample_rate)
t = np.linspace(0, duration, sample_count)
frequency = 440 # A4 note
# Generate stereo sine wave
left_channel = np.sin(2 * np.pi * frequency * t) * 0.3
right_channel = np.sin(2 * np.pi * frequency * 1.5 * t) * 0.3
audio_data = np.column_stack([left_channel, right_channel])
# Create frame and write to FIFO
frame = av.AudioFrame.from_ndarray(
audio_data.astype(np.float32),
format='flt',
layout='stereo',
sample_rate=stream.sample_rate
)
fifo.write(frame)
# Read and encode in codec-appropriate frame sizes
frame_count = 0
while fifo.samples >= stream.frame_size:
frame = fifo.read(stream.frame_size)
frame.pts = frame_count * stream.frame_size
frame.time_base = stream.time_base
for packet in stream.encode(frame):
output.mux(packet)
frame_count += 1
# Flush encoder
for packet in stream.encode():
output.mux(packet)
output.close()import av
import numpy as np
# Open 5.1 surround sound file
container = av.open('surround.ac3')
stream = container.streams.audio[0]
print(f"Layout: {stream.layout.name}")
print(f"Channels: {stream.channels}")
for i, channel in enumerate(stream.layout.channels):
print(f" Channel {i}: {channel.name} ({channel.description})")
# Process each channel separately
for frame in container.decode(stream):
array = frame.to_ndarray()
if frame.format.is_planar:
# Planar format - each channel is separate plane
for i, plane in enumerate(frame.planes):
channel_data = np.frombuffer(plane, dtype=np.float32)
print(f"Channel {i}: {len(channel_data)} samples")
else:
# Packed format - channels interleaved
for i in range(frame.channels):
channel_data = array[i::frame.channels]
print(f"Channel {i}: {len(channel_data)} samples")
container.close()import av
import numpy as np
def analyze_audio(filename):
container = av.open(filename)
stream = container.streams.audio[0]
# Collect all audio data
all_samples = []
frame_count = 0
for frame in container.decode(stream):
array = frame.to_ndarray()
all_samples.append(array)
frame_count += 1
# Frame-level analysis
rms = np.sqrt(np.mean(array**2))
peak = np.max(np.abs(array))
print(f"Frame {frame_count}: RMS={rms:.3f}, Peak={peak:.3f}")
# Overall analysis
if all_samples:
all_audio = np.concatenate(all_samples)
duration = len(all_audio) / stream.sample_rate
overall_rms = np.sqrt(np.mean(all_audio**2))
overall_peak = np.max(np.abs(all_audio))
print(f"\nOverall Analysis:")
print(f"Duration: {duration:.2f} seconds")
print(f"RMS Level: {overall_rms:.3f}")
print(f"Peak Level: {overall_peak:.3f}")
print(f"Dynamic Range: {20*np.log10(overall_peak/overall_rms):.1f} dB")
container.close()
# Analyze audio file
analyze_audio('music.wav')Install with Tessl CLI
npx tessl i tessl/pypi-av