0
# Packet and Stream Management
1
2
Low-level packet handling and stream operations for precise control over media data flow and timing. PyAV provides comprehensive access to FFmpeg's stream and packet management capabilities.
3
4
## Capabilities
5
6
### Packets
7
8
Packets contain compressed media data with timing and metadata information.
9
10
```python { .api }
11
class Packet:
12
"""Container for compressed media data."""
13
14
# Properties
15
stream: Stream # Associated stream
16
stream_index: int # Stream index in container
17
time_base: Fraction # Time base for timestamps
18
pts: int | None # Presentation timestamp
19
dts: int | None # Decode timestamp
20
pos: int # Byte position in stream
21
size: int # Packet size in bytes
22
duration: int # Packet duration in time_base units
23
opaque: object | None # User data
24
25
# Packet flags
26
is_keyframe: bool # True if keyframe
27
is_corrupt: bool # True if corrupt
28
is_discard: bool # True if should be discarded
29
is_trusted: bool # True if trusted
30
is_disposable: bool # True if disposable
31
32
def __init__(self, size=0):
33
"""
34
Create packet.
35
36
Parameters:
37
- size: int - Initial packet size
38
"""
39
40
def decode(self) -> list[SubtitleSet]:
41
"""
42
Decode subtitle packet.
43
44
Returns:
45
List of subtitle sets (for subtitle packets only)
46
"""
47
48
# Inherits Buffer methods
49
def update(self, input: bytes) -> None: ...
50
def __buffer__(self, flags: int) -> memoryview: ...
51
def __bytes__(self) -> bytes: ...
52
```
53
54
### Base Stream
55
56
Base stream class with common properties and methods.
57
58
```python { .api }
59
class Disposition(Flag):
60
"""Stream disposition flags."""
61
DEFAULT = 1 # Default stream
62
DUB = 2 # Dubbed stream
63
ORIGINAL = 4 # Original language
64
COMMENT = 8 # Commentary
65
LYRICS = 16 # Lyrics
66
KARAOKE = 32 # Karaoke
67
FORCED = 64 # Forced subtitles
68
HEARING_IMPAIRED = 128 # Hearing impaired
69
VISUAL_IMPAIRED = 256 # Visual impaired
70
CLEAN_EFFECTS = 512 # Clean effects
71
ATTACHED_PIC = 1024 # Attached picture
72
TIMED_THUMBNAILS = 2048 # Timed thumbnails
73
CAPTIONS = 4096 # Captions
74
DESCRIPTIONS = 8192 # Descriptions
75
METADATA = 16384 # Metadata
76
DEPENDENT = 32768 # Dependent stream
77
STILL_IMAGE = 65536 # Still image
78
79
class Stream:
80
"""Base media stream."""
81
82
# Properties
83
index: int # Stream index
84
id: int # Stream ID
85
type: str # Stream type ('video', 'audio', 'subtitle', etc.)
86
profile: str | None # Codec profile
87
codec_context: CodecContext # Codec context
88
container: Container # Parent container
89
metadata: dict[str, str] # Stream metadata
90
disposition: int # Disposition flags
91
92
# Timing
93
time_base: Fraction # Stream time base
94
start_time: int | None # Start time in time_base units
95
duration: int | None # Duration in time_base units
96
frames: int # Number of frames (0 if unknown)
97
98
# Language and title
99
language: str | None # Language code
100
title: str | None # Stream title
101
102
def encode(self, frame=None) -> list[Packet]:
103
"""
104
Encode frame to packets.
105
106
Parameters:
107
- frame: Frame | None - Frame to encode (None flushes)
108
109
Returns:
110
List of encoded packets
111
"""
112
113
def decode(self, packet=None) -> list[Frame]:
114
"""
115
Decode packet to frames.
116
117
Parameters:
118
- packet: Packet | None - Packet to decode (None flushes)
119
120
Returns:
121
List of decoded frames
122
"""
123
```
124
125
### Specialized Stream Types
126
127
Stream subclasses for different media types.
128
129
```python { .api }
130
class AudioStream(Stream):
131
"""Audio stream with audio-specific properties."""
132
133
type: Literal['audio'] # Stream type
134
codec_context: AudioCodecContext
135
136
# Audio properties (delegated to codec context)
137
frame_size: int
138
sample_rate: int
139
rate: int
140
bit_rate: int
141
channels: int
142
format: AudioFormat
143
layout: AudioLayout
144
145
class VideoStream(Stream):
146
"""Video stream with video-specific properties."""
147
148
type: Literal['video'] # Stream type
149
codec_context: VideoCodecContext
150
151
# Video properties (delegated to codec context)
152
width: int
153
height: int
154
format: VideoFormat
155
pix_fmt: str
156
framerate: Fraction
157
rate: Fraction
158
bit_rate: int
159
max_bit_rate: int
160
sample_aspect_ratio: Fraction
161
display_aspect_ratio: Fraction
162
163
class SubtitleStream(Stream):
164
"""Subtitle stream."""
165
166
type: Literal['subtitle'] # Stream type
167
codec_context: SubtitleCodecContext
168
169
def decode(self, packet=None) -> list[SubtitleSet]:
170
"""Decode subtitle packet."""
171
172
def decode2(self, packet=None) -> list[SubtitleSet]:
173
"""Alternative decode method."""
174
175
class DataStream(Stream):
176
"""Data stream for non-media data."""
177
178
type: Literal['data'] # Stream type
179
180
class AttachmentStream(Stream):
181
"""Attachment stream (e.g., cover art)."""
182
183
type: Literal['attachment'] # Stream type
184
mimetype: str | None # MIME type of attachment
185
```
186
187
### Stream Container Management
188
189
The StreamContainer provides organized access to streams by type.
190
191
```python { .api }
192
class StreamContainer:
193
"""Container managing streams in a media file."""
194
195
# Stream collections by type
196
video: tuple[VideoStream, ...]
197
audio: tuple[AudioStream, ...]
198
subtitles: tuple[SubtitleStream, ...]
199
attachments: tuple[AttachmentStream, ...]
200
data: tuple[DataStream, ...]
201
other: tuple[Stream, ...]
202
203
def __len__(self) -> int:
204
"""Total number of streams."""
205
206
def __iter__(self) -> Iterator[Stream]:
207
"""Iterate over all streams."""
208
209
def __getitem__(self, index: int) -> Stream:
210
"""Get stream by index."""
211
212
def get(self, *, video=None, audio=None, subtitles=None, data=None) -> list[Stream]:
213
"""
214
Get streams by type and criteria.
215
216
Parameters:
217
- video: int | tuple - Video stream selection
218
- audio: int | tuple - Audio stream selection
219
- subtitles: int | tuple - Subtitle stream selection
220
- data: int | tuple - Data stream selection
221
222
Returns:
223
List of matching streams
224
"""
225
226
def best(self, kind) -> Stream | None:
227
"""
228
Get the best stream of a given type.
229
230
Parameters:
231
- kind: str - Stream type ('video', 'audio', 'subtitle')
232
233
Returns:
234
Best stream of the specified type or None
235
"""
236
```
237
238
## Usage Examples
239
240
### Basic Packet Inspection
241
242
```python
243
import av
244
245
# Open container and examine packets
246
container = av.open('sample.mp4')
247
248
print(f"Container has {len(container.streams)} streams:")
249
for i, stream in enumerate(container.streams):
250
print(f" Stream {i}: {stream.type} ({stream.codec_context.name})")
251
if stream.language:
252
print(f" Language: {stream.language}")
253
if stream.title:
254
print(f" Title: {stream.title}")
255
256
# Process packets directly
257
packet_count = 0
258
for packet in container.demux():
259
stream = packet.stream
260
261
print(f"Packet {packet_count}:")
262
print(f" Stream: {stream.index} ({stream.type})")
263
print(f" Size: {packet.size} bytes")
264
print(f" PTS: {packet.pts}")
265
print(f" DTS: {packet.dts}")
266
print(f" Duration: {packet.duration}")
267
print(f" Keyframe: {packet.is_keyframe}")
268
print(f" Time: {packet.pts * stream.time_base if packet.pts else None}")
269
270
packet_count += 1
271
if packet_count >= 10: # Examine first 10 packets
272
break
273
274
container.close()
275
```
276
277
### Stream Selection and Analysis
278
279
```python
280
import av
281
282
def analyze_streams(filename):
283
"""Analyze all streams in a media file."""
284
285
container = av.open(filename)
286
287
print(f"File: {filename}")
288
print(f"Format: {container.format.name} ({container.format.long_name})")
289
print(f"Duration: {container.duration / av.time_base:.2f} seconds")
290
print(f"Total streams: {len(container.streams)}")
291
292
# Video streams
293
if container.streams.video:
294
print(f"\nVideo streams ({len(container.streams.video)}):")
295
for i, stream in enumerate(container.streams.video):
296
print(f" Stream {stream.index}:")
297
print(f" Codec: {stream.codec_context.name}")
298
print(f" Resolution: {stream.width}x{stream.height}")
299
print(f" Pixel format: {stream.format.name}")
300
print(f" Frame rate: {stream.framerate}")
301
print(f" Bitrate: {stream.bit_rate}")
302
print(f" Duration: {stream.duration * stream.time_base if stream.duration else 'Unknown'}")
303
304
# Check disposition
305
if stream.disposition & av.stream.Disposition.DEFAULT:
306
print(f" Default: Yes")
307
if stream.language:
308
print(f" Language: {stream.language}")
309
310
# Audio streams
311
if container.streams.audio:
312
print(f"\nAudio streams ({len(container.streams.audio)}):")
313
for i, stream in enumerate(container.streams.audio):
314
print(f" Stream {stream.index}:")
315
print(f" Codec: {stream.codec_context.name}")
316
print(f" Sample rate: {stream.sample_rate}")
317
print(f" Channels: {stream.channels}")
318
print(f" Layout: {stream.layout.name}")
319
print(f" Format: {stream.format.name}")
320
print(f" Bitrate: {stream.bit_rate}")
321
322
if stream.disposition & av.stream.Disposition.DEFAULT:
323
print(f" Default: Yes")
324
if stream.language:
325
print(f" Language: {stream.language}")
326
if stream.title:
327
print(f" Title: {stream.title}")
328
329
# Subtitle streams
330
if container.streams.subtitles:
331
print(f"\nSubtitle streams ({len(container.streams.subtitles)}):")
332
for i, stream in enumerate(container.streams.subtitles):
333
print(f" Stream {stream.index}:")
334
print(f" Codec: {stream.codec_context.name}")
335
if stream.language:
336
print(f" Language: {stream.language}")
337
if stream.title:
338
print(f" Title: {stream.title}")
339
340
# Check subtitle disposition
341
if stream.disposition & av.stream.Disposition.FORCED:
342
print(f" Forced: Yes")
343
if stream.disposition & av.stream.Disposition.HEARING_IMPAIRED:
344
print(f" Hearing impaired: Yes")
345
346
# Attachment streams (cover art, etc.)
347
if container.streams.attachments:
348
print(f"\nAttachment streams ({len(container.streams.attachments)}):")
349
for stream in container.streams.attachments:
350
print(f" Stream {stream.index}:")
351
print(f" Codec: {stream.codec_context.name}")
352
print(f" MIME type: {stream.mimetype}")
353
if stream.title:
354
print(f" Filename: {stream.title}")
355
356
# Find best streams
357
best_video = container.streams.best('video')
358
best_audio = container.streams.best('audio')
359
360
if best_video:
361
print(f"\nBest video stream: {best_video.index}")
362
if best_audio:
363
print(f"Best audio stream: {best_audio.index}")
364
365
container.close()
366
367
# Analyze file
368
analyze_streams('movie.mkv')
369
```
370
371
### Precise Packet Timing
372
373
```python
374
import av
375
376
def extract_keyframes(input_file, output_dir):
377
"""Extract keyframes with precise timing information."""
378
379
import os
380
381
if not os.path.exists(output_dir):
382
os.makedirs(output_dir)
383
384
container = av.open(input_file)
385
video_stream = container.streams.video[0]
386
387
print(f"Video stream info:")
388
print(f" Time base: {video_stream.time_base}")
389
print(f" Frame rate: {video_stream.framerate}")
390
print(f" Total duration: {container.duration / av.time_base:.2f}s")
391
392
keyframe_count = 0
393
394
for packet in container.demux(video_stream):
395
if packet.is_keyframe:
396
# Decode keyframe packet
397
for frame in packet.decode():
398
# Calculate precise timing
399
pts_seconds = packet.pts * video_stream.time_base if packet.pts else 0
400
dts_seconds = packet.dts * video_stream.time_base if packet.dts else 0
401
402
print(f"Keyframe {keyframe_count}:")
403
print(f" PTS: {packet.pts} ({pts_seconds:.3f}s)")
404
print(f" DTS: {packet.dts} ({dts_seconds:.3f}s)")
405
print(f" Size: {packet.size} bytes")
406
print(f" Position: {packet.pos}")
407
408
# Save keyframe
409
output_path = os.path.join(
410
output_dir,
411
f"keyframe_{keyframe_count:04d}_{pts_seconds:.3f}s.jpg"
412
)
413
frame.save(output_path)
414
415
keyframe_count += 1
416
417
# Limit extraction
418
if keyframe_count >= 20:
419
break
420
421
if keyframe_count >= 20:
422
break
423
424
container.close()
425
print(f"Extracted {keyframe_count} keyframes to {output_dir}")
426
427
# Extract keyframes
428
extract_keyframes('video.mp4', 'keyframes/')
429
```
430
431
### Multi-Stream Processing
432
433
```python
434
import av
435
436
def process_multi_stream(input_file, output_file):
437
"""Process multiple streams with different handling."""
438
439
input_container = av.open(input_file)
440
output_container = av.open(output_file, 'w')
441
442
# Map input streams to output streams
443
stream_mapping = {}
444
445
# Process video streams
446
for input_stream in input_container.streams.video:
447
output_stream = output_container.add_stream('h264', rate=input_stream.framerate)
448
output_stream.width = input_stream.width // 2 # Half resolution
449
output_stream.height = input_stream.height // 2
450
output_stream.pix_fmt = 'yuv420p'
451
452
stream_mapping[input_stream.index] = output_stream
453
print(f"Video stream {input_stream.index}: {input_stream.width}x{input_stream.height} -> {output_stream.width}x{output_stream.height}")
454
455
# Process audio streams (copy first audio stream only)
456
if input_container.streams.audio:
457
input_stream = input_container.streams.audio[0]
458
output_stream = output_container.add_stream('aac', rate=input_stream.sample_rate)
459
output_stream.channels = input_stream.channels
460
output_stream.layout = input_stream.layout
461
462
stream_mapping[input_stream.index] = output_stream
463
print(f"Audio stream {input_stream.index}: {input_stream.sample_rate}Hz {input_stream.channels}ch")
464
465
# Process packets by stream
466
frame_counts = {}
467
468
for packet in input_container.demux():
469
input_stream_index = packet.stream_index
470
471
if input_stream_index not in stream_mapping:
472
continue # Skip unmapped streams
473
474
input_stream = input_container.streams[input_stream_index]
475
output_stream = stream_mapping[input_stream_index]
476
477
# Initialize frame counter
478
if input_stream_index not in frame_counts:
479
frame_counts[input_stream_index] = 0
480
481
# Decode and process frames
482
for frame in packet.decode():
483
if input_stream.type == 'video':
484
# Resize video frame
485
resized_frame = frame.reformat(
486
width=output_stream.width,
487
height=output_stream.height
488
)
489
resized_frame.pts = frame_counts[input_stream_index]
490
resized_frame.time_base = output_stream.time_base
491
492
# Encode and mux
493
for out_packet in output_stream.encode(resized_frame):
494
output_container.mux(out_packet)
495
496
elif input_stream.type == 'audio':
497
# Pass through audio (could apply processing here)
498
frame.pts = frame_counts[input_stream_index] * output_stream.frame_size
499
frame.time_base = output_stream.time_base
500
501
for out_packet in output_stream.encode(frame):
502
output_container.mux(out_packet)
503
504
frame_counts[input_stream_index] += 1
505
506
# Flush all encoders
507
for output_stream in stream_mapping.values():
508
for packet in output_stream.encode():
509
output_container.mux(packet)
510
511
# Report processing
512
for stream_index, count in frame_counts.items():
513
stream_type = input_container.streams[stream_index].type
514
print(f"Processed {count} {stream_type} frames from stream {stream_index}")
515
516
input_container.close()
517
output_container.close()
518
519
# Process multiple streams
520
process_multi_stream('input.mkv', 'processed.mp4')
521
```
522
523
### Stream Metadata Manipulation
524
525
```python
526
import av
527
528
def copy_with_metadata(input_file, output_file, new_metadata=None):
529
"""Copy file while modifying stream metadata."""
530
531
input_container = av.open(input_file)
532
output_container = av.open(output_file, 'w')
533
534
# Copy container metadata
535
for key, value in input_container.metadata.items():
536
output_container.metadata[key] = value
537
538
# Add new container metadata
539
if new_metadata:
540
for key, value in new_metadata.items():
541
output_container.metadata[key] = value
542
543
# Process streams
544
for input_stream in input_container.streams:
545
if input_stream.type == 'video':
546
output_stream = output_container.add_stream_from_template(input_stream)
547
548
# Copy video metadata
549
for key, value in input_stream.metadata.items():
550
output_stream.metadata[key] = value
551
552
# Set custom metadata
553
output_stream.metadata['encoder'] = 'PyAV'
554
output_stream.metadata['processed_by'] = 'Python script'
555
556
elif input_stream.type == 'audio':
557
output_stream = output_container.add_stream_from_template(input_stream)
558
559
# Copy and modify audio metadata
560
for key, value in input_stream.metadata.items():
561
output_stream.metadata[key] = value
562
563
# Language tagging
564
if not input_stream.language:
565
output_stream.language = 'eng' # Default to English
566
567
# Title modification
568
if input_stream.title:
569
output_stream.title = f"Enhanced {input_stream.title}"
570
else:
571
output_stream.title = f"Audio Track {input_stream.index}"
572
573
# Copy data with metadata preservation
574
for packet in input_container.demux():
575
input_stream = packet.stream
576
output_stream = output_container.streams[input_stream.index]
577
578
# Update packet stream reference
579
packet.stream = output_stream
580
output_container.mux(packet)
581
582
print("Metadata copying complete:")
583
print(f" Container metadata: {len(output_container.metadata)} entries")
584
for i, stream in enumerate(output_container.streams):
585
print(f" Stream {i} metadata: {len(stream.metadata)} entries")
586
if stream.language:
587
print(f" Language: {stream.language}")
588
if stream.title:
589
print(f" Title: {stream.title}")
590
591
input_container.close()
592
output_container.close()
593
594
# Copy with metadata
595
new_metadata = {
596
'title': 'Processed Video',
597
'artist': 'PyAV Processing',
598
'creation_time': '2024-01-01T00:00:00.000000Z'
599
}
600
601
copy_with_metadata('input.mp4', 'output_with_metadata.mp4', new_metadata)
602
```
603
604
### Stream Time Synchronization
605
606
```python
607
import av
608
609
def synchronize_streams(input_file, output_file, audio_delay_ms=0):
610
"""Synchronize audio and video streams with optional delay."""
611
612
input_container = av.open(input_file)
613
output_container = av.open(output_file, 'w')
614
615
# Get streams
616
video_stream = input_container.streams.video[0]
617
audio_stream = input_container.streams.audio[0]
618
619
# Create output streams
620
out_video = output_container.add_stream_from_template(video_stream)
621
out_audio = output_container.add_stream_from_template(audio_stream)
622
623
print(f"Input timing:")
624
print(f" Video time base: {video_stream.time_base}")
625
print(f" Audio time base: {audio_stream.time_base}")
626
print(f" Audio delay: {audio_delay_ms}ms")
627
628
# Calculate delay in audio time base units
629
audio_delay_units = int(audio_delay_ms * audio_stream.sample_rate / 1000)
630
631
# Track timing
632
video_pts = 0
633
audio_pts = audio_delay_units # Start with delay
634
635
# Process packets with timing adjustment
636
for packet in input_container.demux():
637
if packet.stream == video_stream:
638
# Process video packets
639
for frame in packet.decode():
640
frame.pts = video_pts
641
frame.time_base = out_video.time_base
642
643
for out_packet in out_video.encode(frame):
644
output_container.mux(out_packet)
645
646
video_pts += 1
647
648
elif packet.stream == audio_stream:
649
# Process audio packets with delay
650
for frame in packet.decode():
651
frame.pts = audio_pts
652
frame.time_base = out_audio.time_base
653
654
for out_packet in out_audio.encode(frame):
655
output_container.mux(out_packet)
656
657
audio_pts += frame.samples
658
659
# Flush encoders
660
for packet in out_video.encode():
661
output_container.mux(packet)
662
for packet in out_audio.encode():
663
output_container.mux(packet)
664
665
print(f"Synchronization complete:")
666
print(f" Final video PTS: {video_pts}")
667
print(f" Final audio PTS: {audio_pts}")
668
print(f" Audio delay applied: {audio_delay_ms}ms")
669
670
input_container.close()
671
output_container.close()
672
673
# Synchronize with 100ms audio delay
674
synchronize_streams('input.mp4', 'synchronized.mp4', audio_delay_ms=100)
675
```