Tessl Tile for pypi/av@15.1.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

audio.md codecs.md containers.md filters.md index.md streams.md video.md

audio.mddocs/

0
# Audio Processing
1

2
Comprehensive audio handling capabilities including frames, streams, format conversion, resampling, and FIFO buffering. PyAV provides full access to FFmpeg's audio processing with NumPy integration.
3

4
## Capabilities
5

6
### Audio Frames
7

8
Audio frame objects contain uncompressed audio data with format and timing information.
9

10
```python { .api }
11
class AudioFrame:
12
    """Container for uncompressed audio data."""
13
    
14
    # Properties
15
    samples: int                    # Number of audio samples
16
    sample_rate: int               # Sample rate in Hz
17
    rate: int                      # Alias for sample_rate
18
    format: AudioFormat            # Audio sample format
19
    layout: AudioLayout            # Channel layout
20
    planes: tuple[AudioPlane, ...] # Audio data planes
21
    pts: int                       # Presentation timestamp
22
    time: float                    # Time in seconds
23
    side_data: SideDataContainer   # Additional frame data
24
    
25
    def __init__(self, format='s16', layout='stereo', samples=0, align=1):
26
        """
27
        Create an audio frame.
28
        
29
        Parameters:
30
        - format: str | AudioFormat - Sample format
31
        - layout: str | AudioLayout - Channel layout  
32
        - samples: int - Number of samples per channel
33
        - align: int - Memory alignment
34
        """
35
    
36
    @staticmethod
37
    def from_ndarray(array, format='s16', layout='stereo') -> 'AudioFrame':
38
        """
39
        Create frame from NumPy array.
40
        
41
        Parameters:
42
        - array: np.ndarray - Audio data array
43
        - format: str - Target sample format
44
        - layout: str - Channel layout
45
        
46
        Returns:
47
        New AudioFrame object
48
        """
49
    
50
    def to_ndarray(self, format=None) -> np.ndarray:
51
        """
52
        Convert to NumPy array.
53
        
54
        Parameters:
55
        - format: str - Target format (None uses current format)
56
        
57
        Returns:
58
        NumPy array with audio data
59
        """
60
    
61
    def make_writable(self) -> None:
62
        """Ensure frame data is writable."""
63
```
64

65
### Audio Formats
66

67
Audio sample format specifications and conversions.
68

69
```python { .api }
70
class AudioFormat:
71
    """Audio sample format specification."""
72
    
73
    # Properties
74
    name: str              # Format name (e.g., 's16', 'flt')
75
    bytes: int             # Bytes per sample
76
    bits: int              # Bits per sample
77
    is_planar: bool        # True if planar format
78
    is_packed: bool        # True if packed format
79
    planar: 'AudioFormat'  # Equivalent planar format
80
    packed: 'AudioFormat'  # Equivalent packed format
81
    container_name: str    # Container-friendly name
82
    
83
    def __init__(self, name):
84
        """
85
        Create audio format.
86
        
87
        Parameters:
88
        - name: str | AudioFormat - Format name or existing format
89
        """
90
```
91

92
### Audio Layouts
93

94
Channel layout specifications for multi-channel audio.
95

96
```python { .api }
97
class AudioLayout:
98
    """Audio channel layout specification."""
99
    
100
    # Properties  
101
    name: str                       # Layout name (e.g., 'mono', 'stereo', '5.1')
102
    nb_channels: int                # Number of channels
103
    channels: tuple[AudioChannel, ...] # Individual channel objects
104
    
105
    def __init__(self, layout):
106
        """
107
        Create audio layout.
108
        
109
        Parameters:
110
        - layout: str | int | AudioLayout - Layout specification
111
        """
112

113
class AudioChannel:
114
    """Individual audio channel."""
115
    
116
    name: str         # Channel name (e.g., 'FL', 'FR', 'C')
117
    description: str  # Human-readable description
118
```
119

120
### Audio Resampling
121

122
Audio format conversion and resampling for compatibility between different audio specifications.
123

124
```python { .api }
125
class AudioResampler:
126
    """Audio format converter and resampler."""
127
    
128
    # Properties
129
    rate: int              # Output sample rate
130
    frame_size: int        # Output frame size
131
    format: AudioFormat    # Output format
132
    graph: Graph | None    # Filter graph used
133
    
134
    def __init__(self, format=None, layout=None, rate=None, frame_size=None):
135
        """
136
        Create audio resampler.
137
        
138
        Parameters:
139
        - format: str | AudioFormat - Output format
140
        - layout: str | AudioLayout - Output layout
141
        - rate: int - Output sample rate
142
        - frame_size: int - Output frame size
143
        """
144
    
145
    def resample(self, frame=None) -> list[AudioFrame]:
146
        """
147
        Resample audio frame.
148
        
149
        Parameters:
150
        - frame: AudioFrame | None - Input frame (None flushes)
151
        
152
        Returns:
153
        List of resampled frames
154
        """
155
```
156

157
### Audio FIFO
158

159
First-in-first-out buffer for audio frames, useful for managing variable frame sizes.
160

161
```python { .api }
162
class AudioFifo:
163
    """FIFO buffer for audio frames."""
164
    
165
    # Properties
166
    format: AudioFormat      # Audio format
167
    layout: AudioLayout      # Channel layout
168
    sample_rate: int         # Sample rate
169
    samples: int             # Current samples in buffer
170
    samples_written: int     # Total samples written
171
    samples_read: int        # Total samples read
172
    pts_per_sample: Fraction # PTS increment per sample
173
    
174
    def __init__(self, format='s16', layout='stereo', sample_rate=48000):
175
        """
176
        Create audio FIFO.
177
        
178
        Parameters:
179
        - format: str - Audio format
180
        - layout: str - Channel layout
181
        - sample_rate: int - Sample rate
182
        """
183
    
184
    def write(self, frame) -> None:
185
        """
186
        Write frame to FIFO.
187
        
188
        Parameters:
189
        - frame: AudioFrame - Frame to write
190
        """
191
    
192
    def read(self, samples=0, partial=False) -> AudioFrame | None:
193
        """
194
        Read frame from FIFO.
195
        
196
        Parameters:
197
        - samples: int - Number of samples to read (0 for all)
198
        - partial: bool - Allow partial reads
199
        
200
        Returns:
201
        AudioFrame or None if insufficient data
202
        """
203
    
204
    def read_many(self, samples, partial=True) -> list[AudioFrame]:
205
        """
206
        Read multiple frames.
207
        
208
        Parameters:
209
        - samples: int - Samples per frame
210
        - partial: bool - Allow partial final frame
211
        
212
        Returns:
213
        List of audio frames
214
        """
215
```
216

217
### Audio Streams
218

219
Audio stream objects for encoding and decoding.
220

221
```python { .api }
222
class AudioStream:
223
    """Audio stream in a container."""
224
    
225
    # Properties
226
    type: Literal['audio']      # Stream type
227
    codec_context: AudioCodecContext # Codec context
228
    frame_size: int             # Encoder frame size
229
    sample_rate: int            # Sample rate
230
    rate: int                   # Alias for sample_rate
231
    bit_rate: int              # Bitrate
232
    channels: int               # Number of channels
233
    format: AudioFormat         # Sample format
234
    layout: AudioLayout         # Channel layout
235
    
236
    def encode(self, frame=None) -> list[Packet]:
237
        """
238
        Encode audio frame.
239
        
240
        Parameters:
241
        - frame: AudioFrame | None - Frame to encode (None flushes)
242
        
243
        Returns:
244
        List of encoded packets
245
        """
246
    
247
    def decode(self, packet=None) -> list[AudioFrame]:
248
        """
249
        Decode audio packet.
250
        
251
        Parameters:
252
        - packet: Packet | None - Packet to decode (None flushes)
253
        
254
        Returns:
255
        List of decoded frames
256
        """
257
```
258

259
### Audio Codec Context
260

261
Audio-specific codec context for encoding and decoding.
262

263
```python { .api }
264
class AudioCodecContext:
265
    """Audio codec context."""
266
    
267
    # Properties
268
    type: Literal['audio']     # Context type
269
    frame_size: int            # Samples per frame
270
    sample_rate: int           # Sample rate
271
    rate: int                  # Alias for sample_rate  
272
    format: AudioFormat        # Sample format
273
    layout: AudioLayout        # Channel layout
274
    channels: int              # Number of channels
275
    bit_rate: int             # Target bitrate
276
    
277
    def encode(self, frame=None) -> list[Packet]:
278
        """Encode audio frame to packets."""
279
    
280
    def encode_lazy(self, frame=None) -> Iterator[Packet]:
281
        """Lazy encoding iterator."""
282
    
283
    def decode(self, packet=None) -> list[AudioFrame]:
284
        """Decode packet to audio frames."""
285
```
286

287
### Audio Planes
288

289
Individual audio data planes for planar formats.
290

291
```python { .api }
292
class AudioPlane:
293
    """Audio data plane."""
294
    
295
    buffer_size: int        # Size of audio buffer
296
    frame: AudioFrame       # Parent frame
297
    index: int             # Plane index
298
    
299
    # Inherits Buffer methods for data access
300
    def update(self, input: bytes) -> None: ...
301
    def __buffer__(self, flags: int) -> memoryview: ...
302
    def __bytes__(self) -> bytes: ...
303
```
304

305
## Usage Examples
306

307
### Basic Audio Processing
308

309
```python
310
import av
311
import numpy as np
312

313
# Open audio file
314
container = av.open('audio.wav')
315
audio_stream = container.streams.audio[0]
316

317
print(f"Sample rate: {audio_stream.sample_rate}")
318
print(f"Channels: {audio_stream.channels}")
319
print(f"Format: {audio_stream.format}")
320

321
# Decode all frames
322
for frame in container.decode(audio_stream):
323
    # Convert to numpy array
324
    array = frame.to_ndarray()
325
    print(f"Frame: {array.shape} samples")
326
    
327
    # Process audio data
328
    processed = np.multiply(array, 0.5)  # Reduce volume
329
    
330
    # Create new frame from processed data
331
    new_frame = av.AudioFrame.from_ndarray(
332
        processed, 
333
        format=frame.format.name,
334
        layout=frame.layout.name,
335
        sample_rate=frame.sample_rate
336
    )
337

338
container.close()
339
```
340

341
### Audio Format Conversion
342

343
```python
344
import av
345

346
# Setup resampler
347
resampler = av.AudioResampler(
348
    format='s16',      # 16-bit signed integer
349
    layout='stereo',   # 2 channels
350
    rate=44100         # 44.1kHz
351
)
352

353
# Open input
354
container = av.open('input.flac')
355
stream = container.streams.audio[0]
356

357
# Process frames
358
for frame in container.decode(stream):
359
    # Resample to target format
360
    resampled_frames = resampler.resample(frame)
361
    
362
    for resampled_frame in resampled_frames:
363
        print(f"Resampled: {resampled_frame.format.name} "
364
              f"{resampled_frame.layout.name} "
365
              f"{resampled_frame.sample_rate}Hz")
366

367
# Flush resampler
368
final_frames = resampler.resample(None)
369
for frame in final_frames:
370
    print(f"Final frame: {frame.samples} samples")
371

372
container.close()
373
```
374

375
### Audio Encoding
376

377
```python
378
import av
379
import numpy as np
380

381
# Create output container
382
output = av.open('output.aac', 'w')
383

384
# Add audio stream
385
stream = output.add_stream('aac', rate=44100)
386
stream.channels = 2
387
stream.layout = 'stereo'
388
stream.sample_rate = 44100
389

390
# Create FIFO for frame size management
391
fifo = av.AudioFifo(
392
    format=stream.format.name,
393
    layout=stream.layout.name,
394
    sample_rate=stream.sample_rate
395
)
396

397
# Generate audio data
398
duration = 5.0  # seconds
399
sample_count = int(duration * stream.sample_rate)
400
t = np.linspace(0, duration, sample_count)
401
frequency = 440  # A4 note
402

403
# Generate stereo sine wave
404
left_channel = np.sin(2 * np.pi * frequency * t) * 0.3
405
right_channel = np.sin(2 * np.pi * frequency * 1.5 * t) * 0.3
406
audio_data = np.column_stack([left_channel, right_channel])
407

408
# Create frame and write to FIFO
409
frame = av.AudioFrame.from_ndarray(
410
    audio_data.astype(np.float32),
411
    format='flt',
412
    layout='stereo',
413
    sample_rate=stream.sample_rate
414
)
415
fifo.write(frame)
416

417
# Read and encode in codec-appropriate frame sizes
418
frame_count = 0
419
while fifo.samples >= stream.frame_size:
420
    frame = fifo.read(stream.frame_size)
421
    frame.pts = frame_count * stream.frame_size
422
    frame.time_base = stream.time_base
423
    
424
    for packet in stream.encode(frame):
425
        output.mux(packet)
426
    
427
    frame_count += 1
428

429
# Flush encoder
430
for packet in stream.encode():
431
    output.mux(packet)
432

433
output.close()
434
```
435

436
### Multi-Channel Audio Processing
437

438
```python
439
import av
440
import numpy as np
441

442
# Open 5.1 surround sound file
443
container = av.open('surround.ac3')
444
stream = container.streams.audio[0]
445

446
print(f"Layout: {stream.layout.name}")
447
print(f"Channels: {stream.channels}")
448
for i, channel in enumerate(stream.layout.channels):
449
    print(f"  Channel {i}: {channel.name} ({channel.description})")
450

451
# Process each channel separately
452
for frame in container.decode(stream):
453
    array = frame.to_ndarray()
454
    
455
    if frame.format.is_planar:
456
        # Planar format - each channel is separate plane
457
        for i, plane in enumerate(frame.planes):
458
            channel_data = np.frombuffer(plane, dtype=np.float32)
459
            print(f"Channel {i}: {len(channel_data)} samples")
460
    else:
461
        # Packed format - channels interleaved
462
        for i in range(frame.channels):
463
            channel_data = array[i::frame.channels]
464
            print(f"Channel {i}: {len(channel_data)} samples")
465

466
container.close()
467
```
468

469
### Audio Analysis
470

471
```python
472
import av
473
import numpy as np
474

475
def analyze_audio(filename):
476
    container = av.open(filename)
477
    stream = container.streams.audio[0]
478
    
479
    # Collect all audio data
480
    all_samples = []
481
    frame_count = 0
482
    
483
    for frame in container.decode(stream):
484
        array = frame.to_ndarray()
485
        all_samples.append(array)
486
        frame_count += 1
487
        
488
        # Frame-level analysis
489
        rms = np.sqrt(np.mean(array**2))
490
        peak = np.max(np.abs(array))
491
        print(f"Frame {frame_count}: RMS={rms:.3f}, Peak={peak:.3f}")
492
    
493
    # Overall analysis
494
    if all_samples:
495
        all_audio = np.concatenate(all_samples)
496
        duration = len(all_audio) / stream.sample_rate
497
        overall_rms = np.sqrt(np.mean(all_audio**2))
498
        overall_peak = np.max(np.abs(all_audio))
499
        
500
        print(f"\nOverall Analysis:")
501
        print(f"Duration: {duration:.2f} seconds")
502
        print(f"RMS Level: {overall_rms:.3f}")
503
        print(f"Peak Level: {overall_peak:.3f}")
504
        print(f"Dynamic Range: {20*np.log10(overall_peak/overall_rms):.1f} dB")
505
    
506
    container.close()
507

508
# Analyze audio file
509
analyze_audio('music.wav')
510
```

Version

Tile

Files

audio.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

audio.mddocs/