or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

audio.mdcodecs.mdcontainers.mdfilters.mdindex.mdstreams.mdvideo.md

streams.mddocs/

0

# Packet and Stream Management

1

2

Low-level packet handling and stream operations for precise control over media data flow and timing. PyAV provides comprehensive access to FFmpeg's stream and packet management capabilities.

3

4

## Capabilities

5

6

### Packets

7

8

Packets contain compressed media data with timing and metadata information.

9

10

```python { .api }

11

class Packet:

12

"""Container for compressed media data."""

13

14

# Properties

15

stream: Stream # Associated stream

16

stream_index: int # Stream index in container

17

time_base: Fraction # Time base for timestamps

18

pts: int | None # Presentation timestamp

19

dts: int | None # Decode timestamp

20

pos: int # Byte position in stream

21

size: int # Packet size in bytes

22

duration: int # Packet duration in time_base units

23

opaque: object | None # User data

24

25

# Packet flags

26

is_keyframe: bool # True if keyframe

27

is_corrupt: bool # True if corrupt

28

is_discard: bool # True if should be discarded

29

is_trusted: bool # True if trusted

30

is_disposable: bool # True if disposable

31

32

def __init__(self, size=0):

33

"""

34

Create packet.

35

36

Parameters:

37

- size: int - Initial packet size

38

"""

39

40

def decode(self) -> list[SubtitleSet]:

41

"""

42

Decode subtitle packet.

43

44

Returns:

45

List of subtitle sets (for subtitle packets only)

46

"""

47

48

# Inherits Buffer methods

49

def update(self, input: bytes) -> None: ...

50

def __buffer__(self, flags: int) -> memoryview: ...

51

def __bytes__(self) -> bytes: ...

52

```

53

54

### Base Stream

55

56

Base stream class with common properties and methods.

57

58

```python { .api }

59

class Disposition(Flag):

60

"""Stream disposition flags."""

61

DEFAULT = 1 # Default stream

62

DUB = 2 # Dubbed stream

63

ORIGINAL = 4 # Original language

64

COMMENT = 8 # Commentary

65

LYRICS = 16 # Lyrics

66

KARAOKE = 32 # Karaoke

67

FORCED = 64 # Forced subtitles

68

HEARING_IMPAIRED = 128 # Hearing impaired

69

VISUAL_IMPAIRED = 256 # Visual impaired

70

CLEAN_EFFECTS = 512 # Clean effects

71

ATTACHED_PIC = 1024 # Attached picture

72

TIMED_THUMBNAILS = 2048 # Timed thumbnails

73

CAPTIONS = 4096 # Captions

74

DESCRIPTIONS = 8192 # Descriptions

75

METADATA = 16384 # Metadata

76

DEPENDENT = 32768 # Dependent stream

77

STILL_IMAGE = 65536 # Still image

78

79

class Stream:

80

"""Base media stream."""

81

82

# Properties

83

index: int # Stream index

84

id: int # Stream ID

85

type: str # Stream type ('video', 'audio', 'subtitle', etc.)

86

profile: str | None # Codec profile

87

codec_context: CodecContext # Codec context

88

container: Container # Parent container

89

metadata: dict[str, str] # Stream metadata

90

disposition: int # Disposition flags

91

92

# Timing

93

time_base: Fraction # Stream time base

94

start_time: int | None # Start time in time_base units

95

duration: int | None # Duration in time_base units

96

frames: int # Number of frames (0 if unknown)

97

98

# Language and title

99

language: str | None # Language code

100

title: str | None # Stream title

101

102

def encode(self, frame=None) -> list[Packet]:

103

"""

104

Encode frame to packets.

105

106

Parameters:

107

- frame: Frame | None - Frame to encode (None flushes)

108

109

Returns:

110

List of encoded packets

111

"""

112

113

def decode(self, packet=None) -> list[Frame]:

114

"""

115

Decode packet to frames.

116

117

Parameters:

118

- packet: Packet | None - Packet to decode (None flushes)

119

120

Returns:

121

List of decoded frames

122

"""

123

```

124

125

### Specialized Stream Types

126

127

Stream subclasses for different media types.

128

129

```python { .api }

130

class AudioStream(Stream):

131

"""Audio stream with audio-specific properties."""

132

133

type: Literal['audio'] # Stream type

134

codec_context: AudioCodecContext

135

136

# Audio properties (delegated to codec context)

137

frame_size: int

138

sample_rate: int

139

rate: int

140

bit_rate: int

141

channels: int

142

format: AudioFormat

143

layout: AudioLayout

144

145

class VideoStream(Stream):

146

"""Video stream with video-specific properties."""

147

148

type: Literal['video'] # Stream type

149

codec_context: VideoCodecContext

150

151

# Video properties (delegated to codec context)

152

width: int

153

height: int

154

format: VideoFormat

155

pix_fmt: str

156

framerate: Fraction

157

rate: Fraction

158

bit_rate: int

159

max_bit_rate: int

160

sample_aspect_ratio: Fraction

161

display_aspect_ratio: Fraction

162

163

class SubtitleStream(Stream):

164

"""Subtitle stream."""

165

166

type: Literal['subtitle'] # Stream type

167

codec_context: SubtitleCodecContext

168

169

def decode(self, packet=None) -> list[SubtitleSet]:

170

"""Decode subtitle packet."""

171

172

def decode2(self, packet=None) -> list[SubtitleSet]:

173

"""Alternative decode method."""

174

175

class DataStream(Stream):

176

"""Data stream for non-media data."""

177

178

type: Literal['data'] # Stream type

179

180

class AttachmentStream(Stream):

181

"""Attachment stream (e.g., cover art)."""

182

183

type: Literal['attachment'] # Stream type

184

mimetype: str | None # MIME type of attachment

185

```

186

187

### Stream Container Management

188

189

The StreamContainer provides organized access to streams by type.

190

191

```python { .api }

192

class StreamContainer:

193

"""Container managing streams in a media file."""

194

195

# Stream collections by type

196

video: tuple[VideoStream, ...]

197

audio: tuple[AudioStream, ...]

198

subtitles: tuple[SubtitleStream, ...]

199

attachments: tuple[AttachmentStream, ...]

200

data: tuple[DataStream, ...]

201

other: tuple[Stream, ...]

202

203

def __len__(self) -> int:

204

"""Total number of streams."""

205

206

def __iter__(self) -> Iterator[Stream]:

207

"""Iterate over all streams."""

208

209

def __getitem__(self, index: int) -> Stream:

210

"""Get stream by index."""

211

212

def get(self, *, video=None, audio=None, subtitles=None, data=None) -> list[Stream]:

213

"""

214

Get streams by type and criteria.

215

216

Parameters:

217

- video: int | tuple - Video stream selection

218

- audio: int | tuple - Audio stream selection

219

- subtitles: int | tuple - Subtitle stream selection

220

- data: int | tuple - Data stream selection

221

222

Returns:

223

List of matching streams

224

"""

225

226

def best(self, kind) -> Stream | None:

227

"""

228

Get the best stream of a given type.

229

230

Parameters:

231

- kind: str - Stream type ('video', 'audio', 'subtitle')

232

233

Returns:

234

Best stream of the specified type or None

235

"""

236

```

237

238

## Usage Examples

239

240

### Basic Packet Inspection

241

242

```python

243

import av

244

245

# Open container and examine packets

246

container = av.open('sample.mp4')

247

248

print(f"Container has {len(container.streams)} streams:")

249

for i, stream in enumerate(container.streams):

250

print(f" Stream {i}: {stream.type} ({stream.codec_context.name})")

251

if stream.language:

252

print(f" Language: {stream.language}")

253

if stream.title:

254

print(f" Title: {stream.title}")

255

256

# Process packets directly

257

packet_count = 0

258

for packet in container.demux():

259

stream = packet.stream

260

261

print(f"Packet {packet_count}:")

262

print(f" Stream: {stream.index} ({stream.type})")

263

print(f" Size: {packet.size} bytes")

264

print(f" PTS: {packet.pts}")

265

print(f" DTS: {packet.dts}")

266

print(f" Duration: {packet.duration}")

267

print(f" Keyframe: {packet.is_keyframe}")

268

print(f" Time: {packet.pts * stream.time_base if packet.pts else None}")

269

270

packet_count += 1

271

if packet_count >= 10: # Examine first 10 packets

272

break

273

274

container.close()

275

```

276

277

### Stream Selection and Analysis

278

279

```python

280

import av

281

282

def analyze_streams(filename):

283

"""Analyze all streams in a media file."""

284

285

container = av.open(filename)

286

287

print(f"File: {filename}")

288

print(f"Format: {container.format.name} ({container.format.long_name})")

289

print(f"Duration: {container.duration / av.time_base:.2f} seconds")

290

print(f"Total streams: {len(container.streams)}")

291

292

# Video streams

293

if container.streams.video:

294

print(f"\nVideo streams ({len(container.streams.video)}):")

295

for i, stream in enumerate(container.streams.video):

296

print(f" Stream {stream.index}:")

297

print(f" Codec: {stream.codec_context.name}")

298

print(f" Resolution: {stream.width}x{stream.height}")

299

print(f" Pixel format: {stream.format.name}")

300

print(f" Frame rate: {stream.framerate}")

301

print(f" Bitrate: {stream.bit_rate}")

302

print(f" Duration: {stream.duration * stream.time_base if stream.duration else 'Unknown'}")

303

304

# Check disposition

305

if stream.disposition & av.stream.Disposition.DEFAULT:

306

print(f" Default: Yes")

307

if stream.language:

308

print(f" Language: {stream.language}")

309

310

# Audio streams

311

if container.streams.audio:

312

print(f"\nAudio streams ({len(container.streams.audio)}):")

313

for i, stream in enumerate(container.streams.audio):

314

print(f" Stream {stream.index}:")

315

print(f" Codec: {stream.codec_context.name}")

316

print(f" Sample rate: {stream.sample_rate}")

317

print(f" Channels: {stream.channels}")

318

print(f" Layout: {stream.layout.name}")

319

print(f" Format: {stream.format.name}")

320

print(f" Bitrate: {stream.bit_rate}")

321

322

if stream.disposition & av.stream.Disposition.DEFAULT:

323

print(f" Default: Yes")

324

if stream.language:

325

print(f" Language: {stream.language}")

326

if stream.title:

327

print(f" Title: {stream.title}")

328

329

# Subtitle streams

330

if container.streams.subtitles:

331

print(f"\nSubtitle streams ({len(container.streams.subtitles)}):")

332

for i, stream in enumerate(container.streams.subtitles):

333

print(f" Stream {stream.index}:")

334

print(f" Codec: {stream.codec_context.name}")

335

if stream.language:

336

print(f" Language: {stream.language}")

337

if stream.title:

338

print(f" Title: {stream.title}")

339

340

# Check subtitle disposition

341

if stream.disposition & av.stream.Disposition.FORCED:

342

print(f" Forced: Yes")

343

if stream.disposition & av.stream.Disposition.HEARING_IMPAIRED:

344

print(f" Hearing impaired: Yes")

345

346

# Attachment streams (cover art, etc.)

347

if container.streams.attachments:

348

print(f"\nAttachment streams ({len(container.streams.attachments)}):")

349

for stream in container.streams.attachments:

350

print(f" Stream {stream.index}:")

351

print(f" Codec: {stream.codec_context.name}")

352

print(f" MIME type: {stream.mimetype}")

353

if stream.title:

354

print(f" Filename: {stream.title}")

355

356

# Find best streams

357

best_video = container.streams.best('video')

358

best_audio = container.streams.best('audio')

359

360

if best_video:

361

print(f"\nBest video stream: {best_video.index}")

362

if best_audio:

363

print(f"Best audio stream: {best_audio.index}")

364

365

container.close()

366

367

# Analyze file

368

analyze_streams('movie.mkv')

369

```

370

371

### Precise Packet Timing

372

373

```python

374

import av

375

376

def extract_keyframes(input_file, output_dir):

377

"""Extract keyframes with precise timing information."""

378

379

import os

380

381

if not os.path.exists(output_dir):

382

os.makedirs(output_dir)

383

384

container = av.open(input_file)

385

video_stream = container.streams.video[0]

386

387

print(f"Video stream info:")

388

print(f" Time base: {video_stream.time_base}")

389

print(f" Frame rate: {video_stream.framerate}")

390

print(f" Total duration: {container.duration / av.time_base:.2f}s")

391

392

keyframe_count = 0

393

394

for packet in container.demux(video_stream):

395

if packet.is_keyframe:

396

# Decode keyframe packet

397

for frame in packet.decode():

398

# Calculate precise timing

399

pts_seconds = packet.pts * video_stream.time_base if packet.pts else 0

400

dts_seconds = packet.dts * video_stream.time_base if packet.dts else 0

401

402

print(f"Keyframe {keyframe_count}:")

403

print(f" PTS: {packet.pts} ({pts_seconds:.3f}s)")

404

print(f" DTS: {packet.dts} ({dts_seconds:.3f}s)")

405

print(f" Size: {packet.size} bytes")

406

print(f" Position: {packet.pos}")

407

408

# Save keyframe

409

output_path = os.path.join(

410

output_dir,

411

f"keyframe_{keyframe_count:04d}_{pts_seconds:.3f}s.jpg"

412

)

413

frame.save(output_path)

414

415

keyframe_count += 1

416

417

# Limit extraction

418

if keyframe_count >= 20:

419

break

420

421

if keyframe_count >= 20:

422

break

423

424

container.close()

425

print(f"Extracted {keyframe_count} keyframes to {output_dir}")

426

427

# Extract keyframes

428

extract_keyframes('video.mp4', 'keyframes/')

429

```

430

431

### Multi-Stream Processing

432

433

```python

434

import av

435

436

def process_multi_stream(input_file, output_file):

437

"""Process multiple streams with different handling."""

438

439

input_container = av.open(input_file)

440

output_container = av.open(output_file, 'w')

441

442

# Map input streams to output streams

443

stream_mapping = {}

444

445

# Process video streams

446

for input_stream in input_container.streams.video:

447

output_stream = output_container.add_stream('h264', rate=input_stream.framerate)

448

output_stream.width = input_stream.width // 2 # Half resolution

449

output_stream.height = input_stream.height // 2

450

output_stream.pix_fmt = 'yuv420p'

451

452

stream_mapping[input_stream.index] = output_stream

453

print(f"Video stream {input_stream.index}: {input_stream.width}x{input_stream.height} -> {output_stream.width}x{output_stream.height}")

454

455

# Process audio streams (copy first audio stream only)

456

if input_container.streams.audio:

457

input_stream = input_container.streams.audio[0]

458

output_stream = output_container.add_stream('aac', rate=input_stream.sample_rate)

459

output_stream.channels = input_stream.channels

460

output_stream.layout = input_stream.layout

461

462

stream_mapping[input_stream.index] = output_stream

463

print(f"Audio stream {input_stream.index}: {input_stream.sample_rate}Hz {input_stream.channels}ch")

464

465

# Process packets by stream

466

frame_counts = {}

467

468

for packet in input_container.demux():

469

input_stream_index = packet.stream_index

470

471

if input_stream_index not in stream_mapping:

472

continue # Skip unmapped streams

473

474

input_stream = input_container.streams[input_stream_index]

475

output_stream = stream_mapping[input_stream_index]

476

477

# Initialize frame counter

478

if input_stream_index not in frame_counts:

479

frame_counts[input_stream_index] = 0

480

481

# Decode and process frames

482

for frame in packet.decode():

483

if input_stream.type == 'video':

484

# Resize video frame

485

resized_frame = frame.reformat(

486

width=output_stream.width,

487

height=output_stream.height

488

)

489

resized_frame.pts = frame_counts[input_stream_index]

490

resized_frame.time_base = output_stream.time_base

491

492

# Encode and mux

493

for out_packet in output_stream.encode(resized_frame):

494

output_container.mux(out_packet)

495

496

elif input_stream.type == 'audio':

497

# Pass through audio (could apply processing here)

498

frame.pts = frame_counts[input_stream_index] * output_stream.frame_size

499

frame.time_base = output_stream.time_base

500

501

for out_packet in output_stream.encode(frame):

502

output_container.mux(out_packet)

503

504

frame_counts[input_stream_index] += 1

505

506

# Flush all encoders

507

for output_stream in stream_mapping.values():

508

for packet in output_stream.encode():

509

output_container.mux(packet)

510

511

# Report processing

512

for stream_index, count in frame_counts.items():

513

stream_type = input_container.streams[stream_index].type

514

print(f"Processed {count} {stream_type} frames from stream {stream_index}")

515

516

input_container.close()

517

output_container.close()

518

519

# Process multiple streams

520

process_multi_stream('input.mkv', 'processed.mp4')

521

```

522

523

### Stream Metadata Manipulation

524

525

```python

526

import av

527

528

def copy_with_metadata(input_file, output_file, new_metadata=None):

529

"""Copy file while modifying stream metadata."""

530

531

input_container = av.open(input_file)

532

output_container = av.open(output_file, 'w')

533

534

# Copy container metadata

535

for key, value in input_container.metadata.items():

536

output_container.metadata[key] = value

537

538

# Add new container metadata

539

if new_metadata:

540

for key, value in new_metadata.items():

541

output_container.metadata[key] = value

542

543

# Process streams

544

for input_stream in input_container.streams:

545

if input_stream.type == 'video':

546

output_stream = output_container.add_stream_from_template(input_stream)

547

548

# Copy video metadata

549

for key, value in input_stream.metadata.items():

550

output_stream.metadata[key] = value

551

552

# Set custom metadata

553

output_stream.metadata['encoder'] = 'PyAV'

554

output_stream.metadata['processed_by'] = 'Python script'

555

556

elif input_stream.type == 'audio':

557

output_stream = output_container.add_stream_from_template(input_stream)

558

559

# Copy and modify audio metadata

560

for key, value in input_stream.metadata.items():

561

output_stream.metadata[key] = value

562

563

# Language tagging

564

if not input_stream.language:

565

output_stream.language = 'eng' # Default to English

566

567

# Title modification

568

if input_stream.title:

569

output_stream.title = f"Enhanced {input_stream.title}"

570

else:

571

output_stream.title = f"Audio Track {input_stream.index}"

572

573

# Copy data with metadata preservation

574

for packet in input_container.demux():

575

input_stream = packet.stream

576

output_stream = output_container.streams[input_stream.index]

577

578

# Update packet stream reference

579

packet.stream = output_stream

580

output_container.mux(packet)

581

582

print("Metadata copying complete:")

583

print(f" Container metadata: {len(output_container.metadata)} entries")

584

for i, stream in enumerate(output_container.streams):

585

print(f" Stream {i} metadata: {len(stream.metadata)} entries")

586

if stream.language:

587

print(f" Language: {stream.language}")

588

if stream.title:

589

print(f" Title: {stream.title}")

590

591

input_container.close()

592

output_container.close()

593

594

# Copy with metadata

595

new_metadata = {

596

'title': 'Processed Video',

597

'artist': 'PyAV Processing',

598

'creation_time': '2024-01-01T00:00:00.000000Z'

599

}

600

601

copy_with_metadata('input.mp4', 'output_with_metadata.mp4', new_metadata)

602

```

603

604

### Stream Time Synchronization

605

606

```python

607

import av

608

609

def synchronize_streams(input_file, output_file, audio_delay_ms=0):

610

"""Synchronize audio and video streams with optional delay."""

611

612

input_container = av.open(input_file)

613

output_container = av.open(output_file, 'w')

614

615

# Get streams

616

video_stream = input_container.streams.video[0]

617

audio_stream = input_container.streams.audio[0]

618

619

# Create output streams

620

out_video = output_container.add_stream_from_template(video_stream)

621

out_audio = output_container.add_stream_from_template(audio_stream)

622

623

print(f"Input timing:")

624

print(f" Video time base: {video_stream.time_base}")

625

print(f" Audio time base: {audio_stream.time_base}")

626

print(f" Audio delay: {audio_delay_ms}ms")

627

628

# Calculate delay in audio time base units

629

audio_delay_units = int(audio_delay_ms * audio_stream.sample_rate / 1000)

630

631

# Track timing

632

video_pts = 0

633

audio_pts = audio_delay_units # Start with delay

634

635

# Process packets with timing adjustment

636

for packet in input_container.demux():

637

if packet.stream == video_stream:

638

# Process video packets

639

for frame in packet.decode():

640

frame.pts = video_pts

641

frame.time_base = out_video.time_base

642

643

for out_packet in out_video.encode(frame):

644

output_container.mux(out_packet)

645

646

video_pts += 1

647

648

elif packet.stream == audio_stream:

649

# Process audio packets with delay

650

for frame in packet.decode():

651

frame.pts = audio_pts

652

frame.time_base = out_audio.time_base

653

654

for out_packet in out_audio.encode(frame):

655

output_container.mux(out_packet)

656

657

audio_pts += frame.samples

658

659

# Flush encoders

660

for packet in out_video.encode():

661

output_container.mux(packet)

662

for packet in out_audio.encode():

663

output_container.mux(packet)

664

665

print(f"Synchronization complete:")

666

print(f" Final video PTS: {video_pts}")

667

print(f" Final audio PTS: {audio_pts}")

668

print(f" Audio delay applied: {audio_delay_ms}ms")

669

670

input_container.close()

671

output_container.close()

672

673

# Synchronize with 100ms audio delay

674

synchronize_streams('input.mp4', 'synchronized.mp4', audio_delay_ms=100)

675

```