0
# Video Processing
1
2
Complete video handling with frames, streams, format conversion, reformatting, and image operations. PyAV provides comprehensive video processing capabilities with NumPy and PIL integration.
3
4
## Capabilities
5
6
### Video Frames
7
8
Video frame objects contain uncompressed video data with format, timing, and metadata information.
9
10
```python { .api }
11
class VideoFrame:
12
"""Container for uncompressed video data."""
13
14
# Properties
15
width: int # Frame width in pixels
16
height: int # Frame height in pixels
17
format: VideoFormat # Pixel format
18
planes: tuple[VideoPlane, ...] # Video data planes
19
pts: int # Presentation timestamp
20
time: float # Time in seconds
21
pict_type: int # Picture type (I, P, B frame)
22
interlaced_frame: bool # True if interlaced
23
colorspace: int # Color space
24
color_range: int # Color range (limited/full)
25
side_data: SideDataContainer # Additional frame data
26
27
def __init__(self, width=0, height=0, format='yuv420p'):
28
"""
29
Create a video frame.
30
31
Parameters:
32
- width: int - Frame width
33
- height: int - Frame height
34
- format: str | VideoFormat - Pixel format
35
"""
36
37
@staticmethod
38
def from_ndarray(array, format='rgb24') -> 'VideoFrame':
39
"""
40
Create frame from NumPy array.
41
42
Parameters:
43
- array: np.ndarray - Image data (HxWxC or HxW)
44
- format: str - Target pixel format
45
46
Returns:
47
New VideoFrame object
48
"""
49
50
@staticmethod
51
def from_image(img) -> 'VideoFrame':
52
"""
53
Create frame from PIL Image.
54
55
Parameters:
56
- img: PIL.Image - Source image
57
58
Returns:
59
New VideoFrame object
60
"""
61
62
def to_ndarray(self, format=None, width=None, height=None) -> np.ndarray:
63
"""
64
Convert to NumPy array.
65
66
Parameters:
67
- format: str - Target format (None uses current)
68
- width: int - Target width (None uses current)
69
- height: int - Target height (None uses current)
70
71
Returns:
72
NumPy array with image data
73
"""
74
75
def to_image(self, **kwargs):
76
"""
77
Convert to PIL Image.
78
79
Returns:
80
PIL.Image object
81
"""
82
83
def reformat(self, width=None, height=None, format=None,
84
src_colorspace=None, dst_colorspace=None,
85
interpolation=None) -> 'VideoFrame':
86
"""
87
Convert frame format/size.
88
89
Parameters:
90
- width: int - Target width
91
- height: int - Target height
92
- format: str - Target pixel format
93
- src_colorspace: int - Source colorspace
94
- dst_colorspace: int - Destination colorspace
95
- interpolation: int - Scaling algorithm
96
97
Returns:
98
New reformatted frame
99
"""
100
101
def save(self, file, **kwargs) -> None:
102
"""
103
Save frame to image file.
104
105
Parameters:
106
- file: str - Output file path
107
- **kwargs: Format-specific options
108
"""
109
```
110
111
### Video Formats
112
113
Pixel format specifications and properties.
114
115
```python { .api }
116
class VideoFormat:
117
"""Video pixel format specification."""
118
119
# Properties
120
name: str # Format name (e.g., 'yuv420p', 'rgb24')
121
bits_per_pixel: int # Bits per pixel
122
padded_bits_per_pixel: int # Padded bits per pixel
123
is_big_endian: bool # True if big endian
124
has_palette: bool # True if paletted format
125
is_bit_stream: bool # True if bitstream format
126
is_planar: bool # True if planar format
127
is_rgb: bool # True if RGB format
128
width: int # Format width
129
height: int # Format height
130
components: tuple[VideoFormatComponent, ...] # Format components
131
132
def __init__(self, name):
133
"""
134
Create video format.
135
136
Parameters:
137
- name: str | VideoFormat - Format name or existing format
138
"""
139
140
def chroma_width(self, luma_width=0) -> int:
141
"""Get chroma width for given luma width."""
142
143
def chroma_height(self, luma_height=0) -> int:
144
"""Get chroma height for given luma height."""
145
146
class VideoFormatComponent:
147
"""Video format component (color channel)."""
148
149
plane: int # Plane index
150
bits: int # Bits per component
151
is_alpha: bool # True if alpha channel
152
is_luma: bool # True if luma channel
153
is_chroma: bool # True if chroma channel
154
width: int # Component width
155
height: int # Component height
156
```
157
158
### Video Reformatting
159
160
Advanced video format conversion and scaling operations.
161
162
```python { .api }
163
class VideoReformatter:
164
"""Video format converter and scaler."""
165
166
def reformat(self, frame, width=None, height=None, format=None,
167
src_colorspace=None, dst_colorspace=None,
168
interpolation=None) -> VideoFrame:
169
"""
170
Reformat video frame.
171
172
Parameters:
173
- frame: VideoFrame - Input frame
174
- width: int - Target width
175
- height: int - Target height
176
- format: str - Target format
177
- src_colorspace: int - Source colorspace
178
- dst_colorspace: int - Target colorspace
179
- interpolation: int - Scaling algorithm
180
181
Returns:
182
Reformatted video frame
183
"""
184
185
# Enumeration constants
186
class Interpolation(IntEnum):
187
"""Scaling interpolation methods."""
188
FAST_BILINEAR = 1
189
BILINEAR = 2
190
BICUBIC = 4
191
X = 8
192
POINT = 16
193
AREA = 32
194
BICUBLIN = 64
195
GAUSS = 128
196
SINC = 256
197
LANCZOS = 512
198
SPLINE = 1024
199
200
class Colorspace(IntEnum):
201
"""Video colorspaces."""
202
RGB = 0
203
BT709 = 1
204
UNSPECIFIED = 2
205
RESERVED = 3
206
FCC = 4
207
BT470BG = 5
208
SMPTE170M = 6
209
SMPTE240M = 7
210
YCGCO = 8
211
BT2020_NCL = 9
212
BT2020_CL = 10
213
SMPTE2085 = 11
214
215
class ColorRange(IntEnum):
216
"""Color value ranges."""
217
UNSPECIFIED = 0
218
MPEG = 1 # Limited range (TV)
219
JPEG = 2 # Full range (PC)
220
```
221
222
### Video Streams
223
224
Video stream objects for encoding and decoding.
225
226
```python { .api }
227
class VideoStream:
228
"""Video stream in a container."""
229
230
# Properties
231
type: Literal['video'] # Stream type
232
codec_context: VideoCodecContext # Codec context
233
width: int # Frame width
234
height: int # Frame height
235
format: VideoFormat # Pixel format
236
pix_fmt: str # Pixel format name
237
framerate: Fraction # Frame rate
238
rate: Fraction # Alias for framerate
239
bit_rate: int # Bitrate
240
max_bit_rate: int # Maximum bitrate
241
sample_aspect_ratio: Fraction # Sample aspect ratio
242
display_aspect_ratio: Fraction # Display aspect ratio
243
244
def encode(self, frame=None) -> list[Packet]:
245
"""
246
Encode video frame.
247
248
Parameters:
249
- frame: VideoFrame | None - Frame to encode (None flushes)
250
251
Returns:
252
List of encoded packets
253
"""
254
255
def encode_lazy(self, frame=None) -> Iterator[Packet]:
256
"""
257
Lazy encoding iterator.
258
259
Parameters:
260
- frame: VideoFrame | None - Frame to encode (None flushes)
261
262
Yields:
263
Encoded packets
264
"""
265
266
def decode(self, packet=None) -> list[VideoFrame]:
267
"""
268
Decode video packet.
269
270
Parameters:
271
- packet: Packet | None - Packet to decode (None flushes)
272
273
Returns:
274
List of decoded frames
275
"""
276
```
277
278
### Video Codec Context
279
280
Video-specific codec context for encoding and decoding.
281
282
```python { .api }
283
class VideoCodecContext:
284
"""Video codec context."""
285
286
# Properties
287
type: Literal['video'] # Context type
288
format: VideoFormat | None # Pixel format
289
width: int # Frame width
290
height: int # Frame height
291
bits_per_coded_sample: int # Bits per coded sample
292
pix_fmt: str | None # Pixel format name
293
framerate: Fraction # Frame rate
294
rate: Fraction # Alias for framerate
295
gop_size: int # GOP size
296
sample_aspect_ratio: Fraction # Sample aspect ratio
297
display_aspect_ratio: Fraction # Display aspect ratio
298
has_b_frames: bool # Uses B-frames
299
max_b_frames: int # Maximum B-frames
300
bit_rate: int # Target bitrate
301
302
# Color properties
303
colorspace: int # Color space
304
color_range: int # Color range
305
color_primaries: int # Color primaries
306
color_trc: int # Transfer characteristics
307
308
# Quality control
309
qmin: int # Minimum quantizer
310
qmax: int # Maximum quantizer
311
312
def encode(self, frame=None) -> list[Packet]:
313
"""Encode video frame to packets."""
314
315
def encode_lazy(self, frame=None) -> Iterator[Packet]:
316
"""Lazy encoding iterator."""
317
318
def decode(self, packet=None) -> list[VideoFrame]:
319
"""Decode packet to video frames."""
320
```
321
322
### Video Planes
323
324
Individual video data planes for planar formats.
325
326
```python { .api }
327
class VideoPlane:
328
"""Video data plane."""
329
330
line_size: int # Bytes per line (including padding)
331
width: int # Plane width
332
height: int # Plane height
333
buffer_size: int # Total buffer size
334
frame: VideoFrame # Parent frame
335
index: int # Plane index
336
337
# Inherits Buffer methods for data access
338
def update(self, input: bytes) -> None: ...
339
def __buffer__(self, flags: int) -> memoryview: ...
340
def __bytes__(self) -> bytes: ...
341
```
342
343
### Picture Types
344
345
```python { .api }
346
class PictureType(IntEnum):
347
"""Video frame types."""
348
NONE = 0 # Undefined
349
I = 1 # Intra frame (keyframe)
350
P = 2 # Predicted frame
351
B = 3 # Bidirectional frame
352
S = 4 # S(GMC)-VOP MPEG-4
353
SI = 5 # SI-VOP MPEG-4
354
SP = 6 # SP-VOP MPEG-4
355
BI = 7 # BI-VOP
356
```
357
358
## Usage Examples
359
360
### Basic Video Processing
361
362
```python
363
import av
364
import numpy as np
365
366
# Open video file
367
container = av.open('video.mp4')
368
video_stream = container.streams.video[0]
369
370
print(f"Resolution: {video_stream.width}x{video_stream.height}")
371
print(f"Frame rate: {video_stream.framerate}")
372
print(f"Pixel format: {video_stream.format}")
373
print(f"Duration: {container.duration / av.time_base} seconds")
374
375
# Process frames
376
frame_count = 0
377
for frame in container.decode(video_stream):
378
print(f"Frame {frame_count}: {frame.width}x{frame.height} "
379
f"at {frame.time:.3f}s")
380
381
# Convert to numpy array
382
array = frame.to_ndarray(format='rgb24')
383
print(f"Array shape: {array.shape}")
384
385
# Process first few frames only
386
frame_count += 1
387
if frame_count >= 10:
388
break
389
390
container.close()
391
```
392
393
### Video Format Conversion
394
395
```python
396
import av
397
398
# Open input video
399
input_container = av.open('input.avi')
400
input_stream = input_container.streams.video[0]
401
402
# Create output container
403
output_container = av.open('output.mp4', 'w')
404
405
# Add video stream with different settings
406
output_stream = output_container.add_stream('h264', rate=30)
407
output_stream.width = 1280
408
output_stream.height = 720
409
output_stream.pix_fmt = 'yuv420p'
410
output_stream.bit_rate = 2000000 # 2 Mbps
411
412
frame_count = 0
413
for frame in input_container.decode(input_stream):
414
# Reformat frame to target specifications
415
new_frame = frame.reformat(
416
width=output_stream.width,
417
height=output_stream.height,
418
format=output_stream.pix_fmt
419
)
420
421
# Set timing
422
new_frame.pts = frame_count
423
new_frame.time_base = output_stream.time_base
424
425
# Encode and write
426
for packet in output_stream.encode(new_frame):
427
output_container.mux(packet)
428
429
frame_count += 1
430
431
# Flush encoder
432
for packet in output_stream.encode():
433
output_container.mux(packet)
434
435
input_container.close()
436
output_container.close()
437
```
438
439
### Creating Video from Images
440
441
```python
442
import av
443
import numpy as np
444
from PIL import Image
445
446
# Create output container
447
output = av.open('generated.mp4', 'w')
448
449
# Add video stream
450
stream = output.add_stream('h264', rate=24)
451
stream.width = 640
452
stream.height = 480
453
stream.pix_fmt = 'yuv420p'
454
455
# Generate frames
456
for i in range(120): # 5 seconds at 24fps
457
# Create gradient image
458
array = np.zeros((480, 640, 3), dtype=np.uint8)
459
460
# Animated color gradient
461
phase = i / 120.0 * 2 * np.pi
462
for y in range(480):
463
for x in range(640):
464
array[y, x, 0] = int(128 + 127 * np.sin(phase + x/100)) # Red
465
array[y, x, 1] = int(128 + 127 * np.sin(phase + y/100)) # Green
466
array[y, x, 2] = int(128 + 127 * np.sin(phase + (x+y)/100)) # Blue
467
468
# Create frame
469
frame = av.VideoFrame.from_ndarray(array, format='rgb24')
470
frame.pts = i
471
frame.time_base = stream.time_base
472
473
# Encode and write
474
for packet in stream.encode(frame):
475
output.mux(packet)
476
477
# Flush encoder
478
for packet in stream.encode():
479
output.mux(packet)
480
481
output.close()
482
```
483
484
### Frame Analysis and Processing
485
486
```python
487
import av
488
import numpy as np
489
490
def analyze_frame(frame):
491
"""Analyze video frame properties."""
492
array = frame.to_ndarray(format='rgb24')
493
494
# Basic statistics
495
mean_brightness = np.mean(array)
496
std_brightness = np.std(array)
497
498
# Color channel analysis
499
red_mean = np.mean(array[:, :, 0])
500
green_mean = np.mean(array[:, :, 1])
501
blue_mean = np.mean(array[:, :, 2])
502
503
return {
504
'brightness_mean': mean_brightness,
505
'brightness_std': std_brightness,
506
'red_mean': red_mean,
507
'green_mean': green_mean,
508
'blue_mean': blue_mean,
509
'aspect_ratio': frame.width / frame.height
510
}
511
512
# Process video
513
container = av.open('video.mp4')
514
stream = container.streams.video[0]
515
516
scene_changes = []
517
prev_brightness = None
518
519
for i, frame in enumerate(container.decode(stream)):
520
stats = analyze_frame(frame)
521
522
print(f"Frame {i}: brightness={stats['brightness_mean']:.1f} "
523
f"aspect={stats['aspect_ratio']:.2f}")
524
525
# Detect scene changes
526
if prev_brightness is not None:
527
brightness_change = abs(stats['brightness_mean'] - prev_brightness)
528
if brightness_change > 50: # Threshold for scene change
529
scene_changes.append((i, frame.time))
530
print(f" Scene change detected at {frame.time:.2f}s")
531
532
prev_brightness = stats['brightness_mean']
533
534
# Process first 100 frames only
535
if i >= 100:
536
break
537
538
print(f"\nFound {len(scene_changes)} scene changes:")
539
for frame_num, time in scene_changes:
540
print(f" Frame {frame_num} at {time:.2f}s")
541
542
container.close()
543
```
544
545
### Video Thumbnails
546
547
```python
548
import av
549
import os
550
551
def extract_thumbnails(video_path, output_dir, count=10):
552
"""Extract thumbnails from video at regular intervals."""
553
554
if not os.path.exists(output_dir):
555
os.makedirs(output_dir)
556
557
container = av.open(video_path)
558
stream = container.streams.video[0]
559
560
# Calculate frame interval
561
total_frames = stream.frames
562
if total_frames == 0:
563
# Estimate from duration and frame rate
564
duration = container.duration / av.time_base
565
total_frames = int(duration * float(stream.framerate))
566
567
frame_interval = max(1, total_frames // count)
568
569
print(f"Extracting {count} thumbnails from {total_frames} frames")
570
571
thumbnails_saved = 0
572
for i, frame in enumerate(container.decode(stream)):
573
if i % frame_interval == 0 and thumbnails_saved < count:
574
# Convert to RGB and save
575
rgb_frame = frame.reformat(format='rgb24')
576
image = rgb_frame.to_image()
577
578
# Save thumbnail
579
thumbnail_path = os.path.join(
580
output_dir,
581
f"thumbnail_{thumbnails_saved:03d}_{frame.time:.2f}s.jpg"
582
)
583
image.save(thumbnail_path, quality=85)
584
585
print(f"Saved thumbnail {thumbnails_saved + 1}: {thumbnail_path}")
586
thumbnails_saved += 1
587
588
container.close()
589
return thumbnails_saved
590
591
# Extract thumbnails
592
count = extract_thumbnails('movie.mp4', 'thumbnails/', count=12)
593
print(f"Successfully extracted {count} thumbnails")
594
```
595
596
### Advanced Color Processing
597
598
```python
599
import av
600
import numpy as np
601
602
def apply_color_grading(frame, brightness=0, contrast=1.0, saturation=1.0):
603
"""Apply color grading to video frame."""
604
605
# Convert to RGB for processing
606
rgb_frame = frame.reformat(format='rgb24')
607
array = rgb_frame.to_ndarray()
608
609
# Convert to float for processing
610
float_array = array.astype(np.float32) / 255.0
611
612
# Apply brightness
613
float_array += brightness / 255.0
614
615
# Apply contrast
616
float_array = (float_array - 0.5) * contrast + 0.5
617
618
# Convert to HSV for saturation adjustment
619
# Simplified saturation adjustment (full HSV conversion omitted for brevity)
620
if saturation != 1.0:
621
gray = np.dot(float_array, [0.299, 0.587, 0.114])
622
float_array = gray[..., np.newaxis] + (float_array - gray[..., np.newaxis]) * saturation
623
624
# Clamp values and convert back to uint8
625
float_array = np.clip(float_array, 0.0, 1.0)
626
processed_array = (float_array * 255).astype(np.uint8)
627
628
# Create new frame
629
processed_frame = av.VideoFrame.from_ndarray(processed_array, format='rgb24')
630
processed_frame.pts = frame.pts
631
processed_frame.time_base = frame.time_base
632
633
return processed_frame
634
635
# Process video with color grading
636
input_container = av.open('input.mp4')
637
output_container = av.open('graded.mp4', 'w')
638
639
input_stream = input_container.streams.video[0]
640
output_stream = output_container.add_stream('h264', rate=input_stream.framerate)
641
output_stream.width = input_stream.width
642
output_stream.height = input_stream.height
643
output_stream.pix_fmt = 'yuv420p'
644
645
for frame in input_container.decode(input_stream):
646
# Apply color grading
647
graded_frame = apply_color_grading(
648
frame,
649
brightness=10, # Slightly brighter
650
contrast=1.1, # Slightly more contrast
651
saturation=1.2 # More saturated
652
)
653
654
# Convert to output format
655
output_frame = graded_frame.reformat(format='yuv420p')
656
657
# Encode and write
658
for packet in output_stream.encode(output_frame):
659
output_container.mux(packet)
660
661
# Flush and close
662
for packet in output_stream.encode():
663
output_container.mux(packet)
664
665
input_container.close()
666
output_container.close()
667
```