or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

audio.mdcodecs.mdcontainers.mdfilters.mdindex.mdstreams.mdvideo.md

video.mddocs/

0

# Video Processing

1

2

Complete video handling with frames, streams, format conversion, reformatting, and image operations. PyAV provides comprehensive video processing capabilities with NumPy and PIL integration.

3

4

## Capabilities

5

6

### Video Frames

7

8

Video frame objects contain uncompressed video data with format, timing, and metadata information.

9

10

```python { .api }

11

class VideoFrame:

12

"""Container for uncompressed video data."""

13

14

# Properties

15

width: int # Frame width in pixels

16

height: int # Frame height in pixels

17

format: VideoFormat # Pixel format

18

planes: tuple[VideoPlane, ...] # Video data planes

19

pts: int # Presentation timestamp

20

time: float # Time in seconds

21

pict_type: int # Picture type (I, P, B frame)

22

interlaced_frame: bool # True if interlaced

23

colorspace: int # Color space

24

color_range: int # Color range (limited/full)

25

side_data: SideDataContainer # Additional frame data

26

27

def __init__(self, width=0, height=0, format='yuv420p'):

28

"""

29

Create a video frame.

30

31

Parameters:

32

- width: int - Frame width

33

- height: int - Frame height

34

- format: str | VideoFormat - Pixel format

35

"""

36

37

@staticmethod

38

def from_ndarray(array, format='rgb24') -> 'VideoFrame':

39

"""

40

Create frame from NumPy array.

41

42

Parameters:

43

- array: np.ndarray - Image data (HxWxC or HxW)

44

- format: str - Target pixel format

45

46

Returns:

47

New VideoFrame object

48

"""

49

50

@staticmethod

51

def from_image(img) -> 'VideoFrame':

52

"""

53

Create frame from PIL Image.

54

55

Parameters:

56

- img: PIL.Image - Source image

57

58

Returns:

59

New VideoFrame object

60

"""

61

62

def to_ndarray(self, format=None, width=None, height=None) -> np.ndarray:

63

"""

64

Convert to NumPy array.

65

66

Parameters:

67

- format: str - Target format (None uses current)

68

- width: int - Target width (None uses current)

69

- height: int - Target height (None uses current)

70

71

Returns:

72

NumPy array with image data

73

"""

74

75

def to_image(self, **kwargs):

76

"""

77

Convert to PIL Image.

78

79

Returns:

80

PIL.Image object

81

"""

82

83

def reformat(self, width=None, height=None, format=None,

84

src_colorspace=None, dst_colorspace=None,

85

interpolation=None) -> 'VideoFrame':

86

"""

87

Convert frame format/size.

88

89

Parameters:

90

- width: int - Target width

91

- height: int - Target height

92

- format: str - Target pixel format

93

- src_colorspace: int - Source colorspace

94

- dst_colorspace: int - Destination colorspace

95

- interpolation: int - Scaling algorithm

96

97

Returns:

98

New reformatted frame

99

"""

100

101

def save(self, file, **kwargs) -> None:

102

"""

103

Save frame to image file.

104

105

Parameters:

106

- file: str - Output file path

107

- **kwargs: Format-specific options

108

"""

109

```

110

111

### Video Formats

112

113

Pixel format specifications and properties.

114

115

```python { .api }

116

class VideoFormat:

117

"""Video pixel format specification."""

118

119

# Properties

120

name: str # Format name (e.g., 'yuv420p', 'rgb24')

121

bits_per_pixel: int # Bits per pixel

122

padded_bits_per_pixel: int # Padded bits per pixel

123

is_big_endian: bool # True if big endian

124

has_palette: bool # True if paletted format

125

is_bit_stream: bool # True if bitstream format

126

is_planar: bool # True if planar format

127

is_rgb: bool # True if RGB format

128

width: int # Format width

129

height: int # Format height

130

components: tuple[VideoFormatComponent, ...] # Format components

131

132

def __init__(self, name):

133

"""

134

Create video format.

135

136

Parameters:

137

- name: str | VideoFormat - Format name or existing format

138

"""

139

140

def chroma_width(self, luma_width=0) -> int:

141

"""Get chroma width for given luma width."""

142

143

def chroma_height(self, luma_height=0) -> int:

144

"""Get chroma height for given luma height."""

145

146

class VideoFormatComponent:

147

"""Video format component (color channel)."""

148

149

plane: int # Plane index

150

bits: int # Bits per component

151

is_alpha: bool # True if alpha channel

152

is_luma: bool # True if luma channel

153

is_chroma: bool # True if chroma channel

154

width: int # Component width

155

height: int # Component height

156

```

157

158

### Video Reformatting

159

160

Advanced video format conversion and scaling operations.

161

162

```python { .api }

163

class VideoReformatter:

164

"""Video format converter and scaler."""

165

166

def reformat(self, frame, width=None, height=None, format=None,

167

src_colorspace=None, dst_colorspace=None,

168

interpolation=None) -> VideoFrame:

169

"""

170

Reformat video frame.

171

172

Parameters:

173

- frame: VideoFrame - Input frame

174

- width: int - Target width

175

- height: int - Target height

176

- format: str - Target format

177

- src_colorspace: int - Source colorspace

178

- dst_colorspace: int - Target colorspace

179

- interpolation: int - Scaling algorithm

180

181

Returns:

182

Reformatted video frame

183

"""

184

185

# Enumeration constants

186

class Interpolation(IntEnum):

187

"""Scaling interpolation methods."""

188

FAST_BILINEAR = 1

189

BILINEAR = 2

190

BICUBIC = 4

191

X = 8

192

POINT = 16

193

AREA = 32

194

BICUBLIN = 64

195

GAUSS = 128

196

SINC = 256

197

LANCZOS = 512

198

SPLINE = 1024

199

200

class Colorspace(IntEnum):

201

"""Video colorspaces."""

202

RGB = 0

203

BT709 = 1

204

UNSPECIFIED = 2

205

RESERVED = 3

206

FCC = 4

207

BT470BG = 5

208

SMPTE170M = 6

209

SMPTE240M = 7

210

YCGCO = 8

211

BT2020_NCL = 9

212

BT2020_CL = 10

213

SMPTE2085 = 11

214

215

class ColorRange(IntEnum):

216

"""Color value ranges."""

217

UNSPECIFIED = 0

218

MPEG = 1 # Limited range (TV)

219

JPEG = 2 # Full range (PC)

220

```

221

222

### Video Streams

223

224

Video stream objects for encoding and decoding.

225

226

```python { .api }

227

class VideoStream:

228

"""Video stream in a container."""

229

230

# Properties

231

type: Literal['video'] # Stream type

232

codec_context: VideoCodecContext # Codec context

233

width: int # Frame width

234

height: int # Frame height

235

format: VideoFormat # Pixel format

236

pix_fmt: str # Pixel format name

237

framerate: Fraction # Frame rate

238

rate: Fraction # Alias for framerate

239

bit_rate: int # Bitrate

240

max_bit_rate: int # Maximum bitrate

241

sample_aspect_ratio: Fraction # Sample aspect ratio

242

display_aspect_ratio: Fraction # Display aspect ratio

243

244

def encode(self, frame=None) -> list[Packet]:

245

"""

246

Encode video frame.

247

248

Parameters:

249

- frame: VideoFrame | None - Frame to encode (None flushes)

250

251

Returns:

252

List of encoded packets

253

"""

254

255

def encode_lazy(self, frame=None) -> Iterator[Packet]:

256

"""

257

Lazy encoding iterator.

258

259

Parameters:

260

- frame: VideoFrame | None - Frame to encode (None flushes)

261

262

Yields:

263

Encoded packets

264

"""

265

266

def decode(self, packet=None) -> list[VideoFrame]:

267

"""

268

Decode video packet.

269

270

Parameters:

271

- packet: Packet | None - Packet to decode (None flushes)

272

273

Returns:

274

List of decoded frames

275

"""

276

```

277

278

### Video Codec Context

279

280

Video-specific codec context for encoding and decoding.

281

282

```python { .api }

283

class VideoCodecContext:

284

"""Video codec context."""

285

286

# Properties

287

type: Literal['video'] # Context type

288

format: VideoFormat | None # Pixel format

289

width: int # Frame width

290

height: int # Frame height

291

bits_per_coded_sample: int # Bits per coded sample

292

pix_fmt: str | None # Pixel format name

293

framerate: Fraction # Frame rate

294

rate: Fraction # Alias for framerate

295

gop_size: int # GOP size

296

sample_aspect_ratio: Fraction # Sample aspect ratio

297

display_aspect_ratio: Fraction # Display aspect ratio

298

has_b_frames: bool # Uses B-frames

299

max_b_frames: int # Maximum B-frames

300

bit_rate: int # Target bitrate

301

302

# Color properties

303

colorspace: int # Color space

304

color_range: int # Color range

305

color_primaries: int # Color primaries

306

color_trc: int # Transfer characteristics

307

308

# Quality control

309

qmin: int # Minimum quantizer

310

qmax: int # Maximum quantizer

311

312

def encode(self, frame=None) -> list[Packet]:

313

"""Encode video frame to packets."""

314

315

def encode_lazy(self, frame=None) -> Iterator[Packet]:

316

"""Lazy encoding iterator."""

317

318

def decode(self, packet=None) -> list[VideoFrame]:

319

"""Decode packet to video frames."""

320

```

321

322

### Video Planes

323

324

Individual video data planes for planar formats.

325

326

```python { .api }

327

class VideoPlane:

328

"""Video data plane."""

329

330

line_size: int # Bytes per line (including padding)

331

width: int # Plane width

332

height: int # Plane height

333

buffer_size: int # Total buffer size

334

frame: VideoFrame # Parent frame

335

index: int # Plane index

336

337

# Inherits Buffer methods for data access

338

def update(self, input: bytes) -> None: ...

339

def __buffer__(self, flags: int) -> memoryview: ...

340

def __bytes__(self) -> bytes: ...

341

```

342

343

### Picture Types

344

345

```python { .api }

346

class PictureType(IntEnum):

347

"""Video frame types."""

348

NONE = 0 # Undefined

349

I = 1 # Intra frame (keyframe)

350

P = 2 # Predicted frame

351

B = 3 # Bidirectional frame

352

S = 4 # S(GMC)-VOP MPEG-4

353

SI = 5 # SI-VOP MPEG-4

354

SP = 6 # SP-VOP MPEG-4

355

BI = 7 # BI-VOP

356

```

357

358

## Usage Examples

359

360

### Basic Video Processing

361

362

```python

363

import av

364

import numpy as np

365

366

# Open video file

367

container = av.open('video.mp4')

368

video_stream = container.streams.video[0]

369

370

print(f"Resolution: {video_stream.width}x{video_stream.height}")

371

print(f"Frame rate: {video_stream.framerate}")

372

print(f"Pixel format: {video_stream.format}")

373

print(f"Duration: {container.duration / av.time_base} seconds")

374

375

# Process frames

376

frame_count = 0

377

for frame in container.decode(video_stream):

378

print(f"Frame {frame_count}: {frame.width}x{frame.height} "

379

f"at {frame.time:.3f}s")

380

381

# Convert to numpy array

382

array = frame.to_ndarray(format='rgb24')

383

print(f"Array shape: {array.shape}")

384

385

# Process first few frames only

386

frame_count += 1

387

if frame_count >= 10:

388

break

389

390

container.close()

391

```

392

393

### Video Format Conversion

394

395

```python

396

import av

397

398

# Open input video

399

input_container = av.open('input.avi')

400

input_stream = input_container.streams.video[0]

401

402

# Create output container

403

output_container = av.open('output.mp4', 'w')

404

405

# Add video stream with different settings

406

output_stream = output_container.add_stream('h264', rate=30)

407

output_stream.width = 1280

408

output_stream.height = 720

409

output_stream.pix_fmt = 'yuv420p'

410

output_stream.bit_rate = 2000000 # 2 Mbps

411

412

frame_count = 0

413

for frame in input_container.decode(input_stream):

414

# Reformat frame to target specifications

415

new_frame = frame.reformat(

416

width=output_stream.width,

417

height=output_stream.height,

418

format=output_stream.pix_fmt

419

)

420

421

# Set timing

422

new_frame.pts = frame_count

423

new_frame.time_base = output_stream.time_base

424

425

# Encode and write

426

for packet in output_stream.encode(new_frame):

427

output_container.mux(packet)

428

429

frame_count += 1

430

431

# Flush encoder

432

for packet in output_stream.encode():

433

output_container.mux(packet)

434

435

input_container.close()

436

output_container.close()

437

```

438

439

### Creating Video from Images

440

441

```python

442

import av

443

import numpy as np

444

from PIL import Image

445

446

# Create output container

447

output = av.open('generated.mp4', 'w')

448

449

# Add video stream

450

stream = output.add_stream('h264', rate=24)

451

stream.width = 640

452

stream.height = 480

453

stream.pix_fmt = 'yuv420p'

454

455

# Generate frames

456

for i in range(120): # 5 seconds at 24fps

457

# Create gradient image

458

array = np.zeros((480, 640, 3), dtype=np.uint8)

459

460

# Animated color gradient

461

phase = i / 120.0 * 2 * np.pi

462

for y in range(480):

463

for x in range(640):

464

array[y, x, 0] = int(128 + 127 * np.sin(phase + x/100)) # Red

465

array[y, x, 1] = int(128 + 127 * np.sin(phase + y/100)) # Green

466

array[y, x, 2] = int(128 + 127 * np.sin(phase + (x+y)/100)) # Blue

467

468

# Create frame

469

frame = av.VideoFrame.from_ndarray(array, format='rgb24')

470

frame.pts = i

471

frame.time_base = stream.time_base

472

473

# Encode and write

474

for packet in stream.encode(frame):

475

output.mux(packet)

476

477

# Flush encoder

478

for packet in stream.encode():

479

output.mux(packet)

480

481

output.close()

482

```

483

484

### Frame Analysis and Processing

485

486

```python

487

import av

488

import numpy as np

489

490

def analyze_frame(frame):

491

"""Analyze video frame properties."""

492

array = frame.to_ndarray(format='rgb24')

493

494

# Basic statistics

495

mean_brightness = np.mean(array)

496

std_brightness = np.std(array)

497

498

# Color channel analysis

499

red_mean = np.mean(array[:, :, 0])

500

green_mean = np.mean(array[:, :, 1])

501

blue_mean = np.mean(array[:, :, 2])

502

503

return {

504

'brightness_mean': mean_brightness,

505

'brightness_std': std_brightness,

506

'red_mean': red_mean,

507

'green_mean': green_mean,

508

'blue_mean': blue_mean,

509

'aspect_ratio': frame.width / frame.height

510

}

511

512

# Process video

513

container = av.open('video.mp4')

514

stream = container.streams.video[0]

515

516

scene_changes = []

517

prev_brightness = None

518

519

for i, frame in enumerate(container.decode(stream)):

520

stats = analyze_frame(frame)

521

522

print(f"Frame {i}: brightness={stats['brightness_mean']:.1f} "

523

f"aspect={stats['aspect_ratio']:.2f}")

524

525

# Detect scene changes

526

if prev_brightness is not None:

527

brightness_change = abs(stats['brightness_mean'] - prev_brightness)

528

if brightness_change > 50: # Threshold for scene change

529

scene_changes.append((i, frame.time))

530

print(f" Scene change detected at {frame.time:.2f}s")

531

532

prev_brightness = stats['brightness_mean']

533

534

# Process first 100 frames only

535

if i >= 100:

536

break

537

538

print(f"\nFound {len(scene_changes)} scene changes:")

539

for frame_num, time in scene_changes:

540

print(f" Frame {frame_num} at {time:.2f}s")

541

542

container.close()

543

```

544

545

### Video Thumbnails

546

547

```python

548

import av

549

import os

550

551

def extract_thumbnails(video_path, output_dir, count=10):

552

"""Extract thumbnails from video at regular intervals."""

553

554

if not os.path.exists(output_dir):

555

os.makedirs(output_dir)

556

557

container = av.open(video_path)

558

stream = container.streams.video[0]

559

560

# Calculate frame interval

561

total_frames = stream.frames

562

if total_frames == 0:

563

# Estimate from duration and frame rate

564

duration = container.duration / av.time_base

565

total_frames = int(duration * float(stream.framerate))

566

567

frame_interval = max(1, total_frames // count)

568

569

print(f"Extracting {count} thumbnails from {total_frames} frames")

570

571

thumbnails_saved = 0

572

for i, frame in enumerate(container.decode(stream)):

573

if i % frame_interval == 0 and thumbnails_saved < count:

574

# Convert to RGB and save

575

rgb_frame = frame.reformat(format='rgb24')

576

image = rgb_frame.to_image()

577

578

# Save thumbnail

579

thumbnail_path = os.path.join(

580

output_dir,

581

f"thumbnail_{thumbnails_saved:03d}_{frame.time:.2f}s.jpg"

582

)

583

image.save(thumbnail_path, quality=85)

584

585

print(f"Saved thumbnail {thumbnails_saved + 1}: {thumbnail_path}")

586

thumbnails_saved += 1

587

588

container.close()

589

return thumbnails_saved

590

591

# Extract thumbnails

592

count = extract_thumbnails('movie.mp4', 'thumbnails/', count=12)

593

print(f"Successfully extracted {count} thumbnails")

594

```

595

596

### Advanced Color Processing

597

598

```python

599

import av

600

import numpy as np

601

602

def apply_color_grading(frame, brightness=0, contrast=1.0, saturation=1.0):

603

"""Apply color grading to video frame."""

604

605

# Convert to RGB for processing

606

rgb_frame = frame.reformat(format='rgb24')

607

array = rgb_frame.to_ndarray()

608

609

# Convert to float for processing

610

float_array = array.astype(np.float32) / 255.0

611

612

# Apply brightness

613

float_array += brightness / 255.0

614

615

# Apply contrast

616

float_array = (float_array - 0.5) * contrast + 0.5

617

618

# Convert to HSV for saturation adjustment

619

# Simplified saturation adjustment (full HSV conversion omitted for brevity)

620

if saturation != 1.0:

621

gray = np.dot(float_array, [0.299, 0.587, 0.114])

622

float_array = gray[..., np.newaxis] + (float_array - gray[..., np.newaxis]) * saturation

623

624

# Clamp values and convert back to uint8

625

float_array = np.clip(float_array, 0.0, 1.0)

626

processed_array = (float_array * 255).astype(np.uint8)

627

628

# Create new frame

629

processed_frame = av.VideoFrame.from_ndarray(processed_array, format='rgb24')

630

processed_frame.pts = frame.pts

631

processed_frame.time_base = frame.time_base

632

633

return processed_frame

634

635

# Process video with color grading

636

input_container = av.open('input.mp4')

637

output_container = av.open('graded.mp4', 'w')

638

639

input_stream = input_container.streams.video[0]

640

output_stream = output_container.add_stream('h264', rate=input_stream.framerate)

641

output_stream.width = input_stream.width

642

output_stream.height = input_stream.height

643

output_stream.pix_fmt = 'yuv420p'

644

645

for frame in input_container.decode(input_stream):

646

# Apply color grading

647

graded_frame = apply_color_grading(

648

frame,

649

brightness=10, # Slightly brighter

650

contrast=1.1, # Slightly more contrast

651

saturation=1.2 # More saturated

652

)

653

654

# Convert to output format

655

output_frame = graded_frame.reformat(format='yuv420p')

656

657

# Encode and write

658

for packet in output_stream.encode(output_frame):

659

output_container.mux(packet)

660

661

# Flush and close

662

for packet in output_stream.encode():

663

output_container.mux(packet)

664

665

input_container.close()

666

output_container.close()

667

```