Pythonic bindings for FFmpeg's libraries enabling multimedia processing with audio/video encoding, decoding, format conversion, and stream manipulation.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Complete video handling with frames, streams, format conversion, reformatting, and image operations. PyAV provides comprehensive video processing capabilities with NumPy and PIL integration.
Video frame objects contain uncompressed video data with format, timing, and metadata information.
class VideoFrame:
"""Container for uncompressed video data."""
# Properties
width: int # Frame width in pixels
height: int # Frame height in pixels
format: VideoFormat # Pixel format
planes: tuple[VideoPlane, ...] # Video data planes
pts: int # Presentation timestamp
time: float # Time in seconds
pict_type: int # Picture type (I, P, B frame)
interlaced_frame: bool # True if interlaced
colorspace: int # Color space
color_range: int # Color range (limited/full)
side_data: SideDataContainer # Additional frame data
def __init__(self, width=0, height=0, format='yuv420p'):
"""
Create a video frame.
Parameters:
- width: int - Frame width
- height: int - Frame height
- format: str | VideoFormat - Pixel format
"""
@staticmethod
def from_ndarray(array, format='rgb24') -> 'VideoFrame':
"""
Create frame from NumPy array.
Parameters:
- array: np.ndarray - Image data (HxWxC or HxW)
- format: str - Target pixel format
Returns:
New VideoFrame object
"""
@staticmethod
def from_image(img) -> 'VideoFrame':
"""
Create frame from PIL Image.
Parameters:
- img: PIL.Image - Source image
Returns:
New VideoFrame object
"""
def to_ndarray(self, format=None, width=None, height=None) -> np.ndarray:
"""
Convert to NumPy array.
Parameters:
- format: str - Target format (None uses current)
- width: int - Target width (None uses current)
- height: int - Target height (None uses current)
Returns:
NumPy array with image data
"""
def to_image(self, **kwargs):
"""
Convert to PIL Image.
Returns:
PIL.Image object
"""
def reformat(self, width=None, height=None, format=None,
src_colorspace=None, dst_colorspace=None,
interpolation=None) -> 'VideoFrame':
"""
Convert frame format/size.
Parameters:
- width: int - Target width
- height: int - Target height
- format: str - Target pixel format
- src_colorspace: int - Source colorspace
- dst_colorspace: int - Destination colorspace
- interpolation: int - Scaling algorithm
Returns:
New reformatted frame
"""
def save(self, file, **kwargs) -> None:
"""
Save frame to image file.
Parameters:
- file: str - Output file path
- **kwargs: Format-specific options
"""Pixel format specifications and properties.
class VideoFormat:
"""Video pixel format specification."""
# Properties
name: str # Format name (e.g., 'yuv420p', 'rgb24')
bits_per_pixel: int # Bits per pixel
padded_bits_per_pixel: int # Padded bits per pixel
is_big_endian: bool # True if big endian
has_palette: bool # True if paletted format
is_bit_stream: bool # True if bitstream format
is_planar: bool # True if planar format
is_rgb: bool # True if RGB format
width: int # Format width
height: int # Format height
components: tuple[VideoFormatComponent, ...] # Format components
def __init__(self, name):
"""
Create video format.
Parameters:
- name: str | VideoFormat - Format name or existing format
"""
def chroma_width(self, luma_width=0) -> int:
"""Get chroma width for given luma width."""
def chroma_height(self, luma_height=0) -> int:
"""Get chroma height for given luma height."""
class VideoFormatComponent:
"""Video format component (color channel)."""
plane: int # Plane index
bits: int # Bits per component
is_alpha: bool # True if alpha channel
is_luma: bool # True if luma channel
is_chroma: bool # True if chroma channel
width: int # Component width
height: int # Component heightAdvanced video format conversion and scaling operations.
class VideoReformatter:
"""Video format converter and scaler."""
def reformat(self, frame, width=None, height=None, format=None,
src_colorspace=None, dst_colorspace=None,
interpolation=None) -> VideoFrame:
"""
Reformat video frame.
Parameters:
- frame: VideoFrame - Input frame
- width: int - Target width
- height: int - Target height
- format: str - Target format
- src_colorspace: int - Source colorspace
- dst_colorspace: int - Target colorspace
- interpolation: int - Scaling algorithm
Returns:
Reformatted video frame
"""
# Enumeration constants
class Interpolation(IntEnum):
"""Scaling interpolation methods."""
FAST_BILINEAR = 1
BILINEAR = 2
BICUBIC = 4
X = 8
POINT = 16
AREA = 32
BICUBLIN = 64
GAUSS = 128
SINC = 256
LANCZOS = 512
SPLINE = 1024
class Colorspace(IntEnum):
"""Video colorspaces."""
RGB = 0
BT709 = 1
UNSPECIFIED = 2
RESERVED = 3
FCC = 4
BT470BG = 5
SMPTE170M = 6
SMPTE240M = 7
YCGCO = 8
BT2020_NCL = 9
BT2020_CL = 10
SMPTE2085 = 11
class ColorRange(IntEnum):
"""Color value ranges."""
UNSPECIFIED = 0
MPEG = 1 # Limited range (TV)
JPEG = 2 # Full range (PC)Video stream objects for encoding and decoding.
class VideoStream:
"""Video stream in a container."""
# Properties
type: Literal['video'] # Stream type
codec_context: VideoCodecContext # Codec context
width: int # Frame width
height: int # Frame height
format: VideoFormat # Pixel format
pix_fmt: str # Pixel format name
framerate: Fraction # Frame rate
rate: Fraction # Alias for framerate
bit_rate: int # Bitrate
max_bit_rate: int # Maximum bitrate
sample_aspect_ratio: Fraction # Sample aspect ratio
display_aspect_ratio: Fraction # Display aspect ratio
def encode(self, frame=None) -> list[Packet]:
"""
Encode video frame.
Parameters:
- frame: VideoFrame | None - Frame to encode (None flushes)
Returns:
List of encoded packets
"""
def encode_lazy(self, frame=None) -> Iterator[Packet]:
"""
Lazy encoding iterator.
Parameters:
- frame: VideoFrame | None - Frame to encode (None flushes)
Yields:
Encoded packets
"""
def decode(self, packet=None) -> list[VideoFrame]:
"""
Decode video packet.
Parameters:
- packet: Packet | None - Packet to decode (None flushes)
Returns:
List of decoded frames
"""Video-specific codec context for encoding and decoding.
class VideoCodecContext:
"""Video codec context."""
# Properties
type: Literal['video'] # Context type
format: VideoFormat | None # Pixel format
width: int # Frame width
height: int # Frame height
bits_per_coded_sample: int # Bits per coded sample
pix_fmt: str | None # Pixel format name
framerate: Fraction # Frame rate
rate: Fraction # Alias for framerate
gop_size: int # GOP size
sample_aspect_ratio: Fraction # Sample aspect ratio
display_aspect_ratio: Fraction # Display aspect ratio
has_b_frames: bool # Uses B-frames
max_b_frames: int # Maximum B-frames
bit_rate: int # Target bitrate
# Color properties
colorspace: int # Color space
color_range: int # Color range
color_primaries: int # Color primaries
color_trc: int # Transfer characteristics
# Quality control
qmin: int # Minimum quantizer
qmax: int # Maximum quantizer
def encode(self, frame=None) -> list[Packet]:
"""Encode video frame to packets."""
def encode_lazy(self, frame=None) -> Iterator[Packet]:
"""Lazy encoding iterator."""
def decode(self, packet=None) -> list[VideoFrame]:
"""Decode packet to video frames."""Individual video data planes for planar formats.
class VideoPlane:
"""Video data plane."""
line_size: int # Bytes per line (including padding)
width: int # Plane width
height: int # Plane height
buffer_size: int # Total buffer size
frame: VideoFrame # Parent frame
index: int # Plane index
# Inherits Buffer methods for data access
def update(self, input: bytes) -> None: ...
def __buffer__(self, flags: int) -> memoryview: ...
def __bytes__(self) -> bytes: ...class PictureType(IntEnum):
"""Video frame types."""
NONE = 0 # Undefined
I = 1 # Intra frame (keyframe)
P = 2 # Predicted frame
B = 3 # Bidirectional frame
S = 4 # S(GMC)-VOP MPEG-4
SI = 5 # SI-VOP MPEG-4
SP = 6 # SP-VOP MPEG-4
BI = 7 # BI-VOPimport av
import numpy as np
# Open video file
container = av.open('video.mp4')
video_stream = container.streams.video[0]
print(f"Resolution: {video_stream.width}x{video_stream.height}")
print(f"Frame rate: {video_stream.framerate}")
print(f"Pixel format: {video_stream.format}")
print(f"Duration: {container.duration / av.time_base} seconds")
# Process frames
frame_count = 0
for frame in container.decode(video_stream):
print(f"Frame {frame_count}: {frame.width}x{frame.height} "
f"at {frame.time:.3f}s")
# Convert to numpy array
array = frame.to_ndarray(format='rgb24')
print(f"Array shape: {array.shape}")
# Process first few frames only
frame_count += 1
if frame_count >= 10:
break
container.close()import av
# Open input video
input_container = av.open('input.avi')
input_stream = input_container.streams.video[0]
# Create output container
output_container = av.open('output.mp4', 'w')
# Add video stream with different settings
output_stream = output_container.add_stream('h264', rate=30)
output_stream.width = 1280
output_stream.height = 720
output_stream.pix_fmt = 'yuv420p'
output_stream.bit_rate = 2000000 # 2 Mbps
frame_count = 0
for frame in input_container.decode(input_stream):
# Reformat frame to target specifications
new_frame = frame.reformat(
width=output_stream.width,
height=output_stream.height,
format=output_stream.pix_fmt
)
# Set timing
new_frame.pts = frame_count
new_frame.time_base = output_stream.time_base
# Encode and write
for packet in output_stream.encode(new_frame):
output_container.mux(packet)
frame_count += 1
# Flush encoder
for packet in output_stream.encode():
output_container.mux(packet)
input_container.close()
output_container.close()import av
import numpy as np
from PIL import Image
# Create output container
output = av.open('generated.mp4', 'w')
# Add video stream
stream = output.add_stream('h264', rate=24)
stream.width = 640
stream.height = 480
stream.pix_fmt = 'yuv420p'
# Generate frames
for i in range(120): # 5 seconds at 24fps
# Create gradient image
array = np.zeros((480, 640, 3), dtype=np.uint8)
# Animated color gradient
phase = i / 120.0 * 2 * np.pi
for y in range(480):
for x in range(640):
array[y, x, 0] = int(128 + 127 * np.sin(phase + x/100)) # Red
array[y, x, 1] = int(128 + 127 * np.sin(phase + y/100)) # Green
array[y, x, 2] = int(128 + 127 * np.sin(phase + (x+y)/100)) # Blue
# Create frame
frame = av.VideoFrame.from_ndarray(array, format='rgb24')
frame.pts = i
frame.time_base = stream.time_base
# Encode and write
for packet in stream.encode(frame):
output.mux(packet)
# Flush encoder
for packet in stream.encode():
output.mux(packet)
output.close()import av
import numpy as np
def analyze_frame(frame):
"""Analyze video frame properties."""
array = frame.to_ndarray(format='rgb24')
# Basic statistics
mean_brightness = np.mean(array)
std_brightness = np.std(array)
# Color channel analysis
red_mean = np.mean(array[:, :, 0])
green_mean = np.mean(array[:, :, 1])
blue_mean = np.mean(array[:, :, 2])
return {
'brightness_mean': mean_brightness,
'brightness_std': std_brightness,
'red_mean': red_mean,
'green_mean': green_mean,
'blue_mean': blue_mean,
'aspect_ratio': frame.width / frame.height
}
# Process video
container = av.open('video.mp4')
stream = container.streams.video[0]
scene_changes = []
prev_brightness = None
for i, frame in enumerate(container.decode(stream)):
stats = analyze_frame(frame)
print(f"Frame {i}: brightness={stats['brightness_mean']:.1f} "
f"aspect={stats['aspect_ratio']:.2f}")
# Detect scene changes
if prev_brightness is not None:
brightness_change = abs(stats['brightness_mean'] - prev_brightness)
if brightness_change > 50: # Threshold for scene change
scene_changes.append((i, frame.time))
print(f" Scene change detected at {frame.time:.2f}s")
prev_brightness = stats['brightness_mean']
# Process first 100 frames only
if i >= 100:
break
print(f"\nFound {len(scene_changes)} scene changes:")
for frame_num, time in scene_changes:
print(f" Frame {frame_num} at {time:.2f}s")
container.close()import av
import os
def extract_thumbnails(video_path, output_dir, count=10):
"""Extract thumbnails from video at regular intervals."""
if not os.path.exists(output_dir):
os.makedirs(output_dir)
container = av.open(video_path)
stream = container.streams.video[0]
# Calculate frame interval
total_frames = stream.frames
if total_frames == 0:
# Estimate from duration and frame rate
duration = container.duration / av.time_base
total_frames = int(duration * float(stream.framerate))
frame_interval = max(1, total_frames // count)
print(f"Extracting {count} thumbnails from {total_frames} frames")
thumbnails_saved = 0
for i, frame in enumerate(container.decode(stream)):
if i % frame_interval == 0 and thumbnails_saved < count:
# Convert to RGB and save
rgb_frame = frame.reformat(format='rgb24')
image = rgb_frame.to_image()
# Save thumbnail
thumbnail_path = os.path.join(
output_dir,
f"thumbnail_{thumbnails_saved:03d}_{frame.time:.2f}s.jpg"
)
image.save(thumbnail_path, quality=85)
print(f"Saved thumbnail {thumbnails_saved + 1}: {thumbnail_path}")
thumbnails_saved += 1
container.close()
return thumbnails_saved
# Extract thumbnails
count = extract_thumbnails('movie.mp4', 'thumbnails/', count=12)
print(f"Successfully extracted {count} thumbnails")import av
import numpy as np
def apply_color_grading(frame, brightness=0, contrast=1.0, saturation=1.0):
"""Apply color grading to video frame."""
# Convert to RGB for processing
rgb_frame = frame.reformat(format='rgb24')
array = rgb_frame.to_ndarray()
# Convert to float for processing
float_array = array.astype(np.float32) / 255.0
# Apply brightness
float_array += brightness / 255.0
# Apply contrast
float_array = (float_array - 0.5) * contrast + 0.5
# Convert to HSV for saturation adjustment
# Simplified saturation adjustment (full HSV conversion omitted for brevity)
if saturation != 1.0:
gray = np.dot(float_array, [0.299, 0.587, 0.114])
float_array = gray[..., np.newaxis] + (float_array - gray[..., np.newaxis]) * saturation
# Clamp values and convert back to uint8
float_array = np.clip(float_array, 0.0, 1.0)
processed_array = (float_array * 255).astype(np.uint8)
# Create new frame
processed_frame = av.VideoFrame.from_ndarray(processed_array, format='rgb24')
processed_frame.pts = frame.pts
processed_frame.time_base = frame.time_base
return processed_frame
# Process video with color grading
input_container = av.open('input.mp4')
output_container = av.open('graded.mp4', 'w')
input_stream = input_container.streams.video[0]
output_stream = output_container.add_stream('h264', rate=input_stream.framerate)
output_stream.width = input_stream.width
output_stream.height = input_stream.height
output_stream.pix_fmt = 'yuv420p'
for frame in input_container.decode(input_stream):
# Apply color grading
graded_frame = apply_color_grading(
frame,
brightness=10, # Slightly brighter
contrast=1.1, # Slightly more contrast
saturation=1.2 # More saturated
)
# Convert to output format
output_frame = graded_frame.reformat(format='yuv420p')
# Encode and write
for packet in output_stream.encode(output_frame):
output_container.mux(packet)
# Flush and close
for packet in output_stream.encode():
output_container.mux(packet)
input_container.close()
output_container.close()Install with Tessl CLI
npx tessl i tessl/pypi-av