CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-av

Pythonic bindings for FFmpeg's libraries enabling multimedia processing with audio/video encoding, decoding, format conversion, and stream manipulation.

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

video.mddocs/

Video Processing

Complete video handling with frames, streams, format conversion, reformatting, and image operations. PyAV provides comprehensive video processing capabilities with NumPy and PIL integration.

Capabilities

Video Frames

Video frame objects contain uncompressed video data with format, timing, and metadata information.

class VideoFrame:
    """Container for uncompressed video data."""
    
    # Properties
    width: int                      # Frame width in pixels
    height: int                     # Frame height in pixels
    format: VideoFormat             # Pixel format
    planes: tuple[VideoPlane, ...]  # Video data planes
    pts: int                        # Presentation timestamp
    time: float                     # Time in seconds
    pict_type: int                  # Picture type (I, P, B frame)
    interlaced_frame: bool          # True if interlaced
    colorspace: int                 # Color space
    color_range: int                # Color range (limited/full)
    side_data: SideDataContainer    # Additional frame data
    
    def __init__(self, width=0, height=0, format='yuv420p'):
        """
        Create a video frame.
        
        Parameters:
        - width: int - Frame width
        - height: int - Frame height  
        - format: str | VideoFormat - Pixel format
        """
    
    @staticmethod
    def from_ndarray(array, format='rgb24') -> 'VideoFrame':
        """
        Create frame from NumPy array.
        
        Parameters:
        - array: np.ndarray - Image data (HxWxC or HxW)
        - format: str - Target pixel format
        
        Returns:
        New VideoFrame object
        """
    
    @staticmethod
    def from_image(img) -> 'VideoFrame':
        """
        Create frame from PIL Image.
        
        Parameters:
        - img: PIL.Image - Source image
        
        Returns:
        New VideoFrame object
        """
    
    def to_ndarray(self, format=None, width=None, height=None) -> np.ndarray:
        """
        Convert to NumPy array.
        
        Parameters:
        - format: str - Target format (None uses current)
        - width: int - Target width (None uses current)
        - height: int - Target height (None uses current)
        
        Returns:
        NumPy array with image data
        """
    
    def to_image(self, **kwargs):
        """
        Convert to PIL Image.
        
        Returns:
        PIL.Image object
        """
    
    def reformat(self, width=None, height=None, format=None, 
                 src_colorspace=None, dst_colorspace=None, 
                 interpolation=None) -> 'VideoFrame':
        """
        Convert frame format/size.
        
        Parameters:
        - width: int - Target width
        - height: int - Target height
        - format: str - Target pixel format
        - src_colorspace: int - Source colorspace
        - dst_colorspace: int - Destination colorspace
        - interpolation: int - Scaling algorithm
        
        Returns:
        New reformatted frame
        """
    
    def save(self, file, **kwargs) -> None:
        """
        Save frame to image file.
        
        Parameters:
        - file: str - Output file path
        - **kwargs: Format-specific options
        """

Video Formats

Pixel format specifications and properties.

class VideoFormat:
    """Video pixel format specification."""
    
    # Properties
    name: str                               # Format name (e.g., 'yuv420p', 'rgb24')
    bits_per_pixel: int                     # Bits per pixel
    padded_bits_per_pixel: int              # Padded bits per pixel
    is_big_endian: bool                     # True if big endian
    has_palette: bool                       # True if paletted format
    is_bit_stream: bool                     # True if bitstream format
    is_planar: bool                         # True if planar format
    is_rgb: bool                           # True if RGB format
    width: int                             # Format width
    height: int                            # Format height
    components: tuple[VideoFormatComponent, ...] # Format components
    
    def __init__(self, name):
        """
        Create video format.
        
        Parameters:
        - name: str | VideoFormat - Format name or existing format
        """
    
    def chroma_width(self, luma_width=0) -> int:
        """Get chroma width for given luma width."""
    
    def chroma_height(self, luma_height=0) -> int:
        """Get chroma height for given luma height."""

class VideoFormatComponent:
    """Video format component (color channel)."""
    
    plane: int       # Plane index
    bits: int        # Bits per component
    is_alpha: bool   # True if alpha channel
    is_luma: bool    # True if luma channel
    is_chroma: bool  # True if chroma channel
    width: int       # Component width
    height: int      # Component height

Video Reformatting

Advanced video format conversion and scaling operations.

class VideoReformatter:
    """Video format converter and scaler."""
    
    def reformat(self, frame, width=None, height=None, format=None,
                 src_colorspace=None, dst_colorspace=None,
                 interpolation=None) -> VideoFrame:
        """
        Reformat video frame.
        
        Parameters:
        - frame: VideoFrame - Input frame
        - width: int - Target width
        - height: int - Target height  
        - format: str - Target format
        - src_colorspace: int - Source colorspace
        - dst_colorspace: int - Target colorspace
        - interpolation: int - Scaling algorithm
        
        Returns:
        Reformatted video frame
        """

# Enumeration constants
class Interpolation(IntEnum):
    """Scaling interpolation methods."""
    FAST_BILINEAR = 1
    BILINEAR = 2
    BICUBIC = 4
    X = 8
    POINT = 16
    AREA = 32
    BICUBLIN = 64
    GAUSS = 128
    SINC = 256
    LANCZOS = 512
    SPLINE = 1024

class Colorspace(IntEnum):
    """Video colorspaces."""
    RGB = 0
    BT709 = 1
    UNSPECIFIED = 2
    RESERVED = 3
    FCC = 4
    BT470BG = 5
    SMPTE170M = 6
    SMPTE240M = 7
    YCGCO = 8
    BT2020_NCL = 9
    BT2020_CL = 10
    SMPTE2085 = 11

class ColorRange(IntEnum):
    """Color value ranges."""
    UNSPECIFIED = 0
    MPEG = 1        # Limited range (TV)
    JPEG = 2        # Full range (PC)

Video Streams

Video stream objects for encoding and decoding.

class VideoStream:
    """Video stream in a container."""
    
    # Properties
    type: Literal['video']          # Stream type
    codec_context: VideoCodecContext # Codec context
    width: int                      # Frame width
    height: int                     # Frame height
    format: VideoFormat             # Pixel format
    pix_fmt: str                    # Pixel format name
    framerate: Fraction             # Frame rate
    rate: Fraction                  # Alias for framerate
    bit_rate: int                   # Bitrate
    max_bit_rate: int              # Maximum bitrate
    sample_aspect_ratio: Fraction   # Sample aspect ratio
    display_aspect_ratio: Fraction  # Display aspect ratio
    
    def encode(self, frame=None) -> list[Packet]:
        """
        Encode video frame.
        
        Parameters:
        - frame: VideoFrame | None - Frame to encode (None flushes)
        
        Returns:
        List of encoded packets
        """
    
    def encode_lazy(self, frame=None) -> Iterator[Packet]:
        """
        Lazy encoding iterator.
        
        Parameters:
        - frame: VideoFrame | None - Frame to encode (None flushes)
        
        Yields:
        Encoded packets
        """
    
    def decode(self, packet=None) -> list[VideoFrame]:
        """
        Decode video packet.
        
        Parameters:
        - packet: Packet | None - Packet to decode (None flushes)
        
        Returns:
        List of decoded frames
        """

Video Codec Context

Video-specific codec context for encoding and decoding.

class VideoCodecContext:
    """Video codec context."""
    
    # Properties
    type: Literal['video']         # Context type
    format: VideoFormat | None     # Pixel format
    width: int                     # Frame width
    height: int                    # Frame height
    bits_per_coded_sample: int     # Bits per coded sample
    pix_fmt: str | None           # Pixel format name
    framerate: Fraction            # Frame rate
    rate: Fraction                # Alias for framerate
    gop_size: int                 # GOP size
    sample_aspect_ratio: Fraction  # Sample aspect ratio
    display_aspect_ratio: Fraction # Display aspect ratio
    has_b_frames: bool            # Uses B-frames
    max_b_frames: int             # Maximum B-frames
    bit_rate: int                 # Target bitrate
    
    # Color properties
    colorspace: int               # Color space
    color_range: int              # Color range
    color_primaries: int          # Color primaries
    color_trc: int               # Transfer characteristics
    
    # Quality control
    qmin: int                     # Minimum quantizer
    qmax: int                     # Maximum quantizer
    
    def encode(self, frame=None) -> list[Packet]:
        """Encode video frame to packets."""
    
    def encode_lazy(self, frame=None) -> Iterator[Packet]:
        """Lazy encoding iterator."""
    
    def decode(self, packet=None) -> list[VideoFrame]:
        """Decode packet to video frames."""

Video Planes

Individual video data planes for planar formats.

class VideoPlane:
    """Video data plane."""
    
    line_size: int      # Bytes per line (including padding)
    width: int          # Plane width
    height: int         # Plane height
    buffer_size: int    # Total buffer size
    frame: VideoFrame   # Parent frame
    index: int          # Plane index
    
    # Inherits Buffer methods for data access
    def update(self, input: bytes) -> None: ...
    def __buffer__(self, flags: int) -> memoryview: ...
    def __bytes__(self) -> bytes: ...

Picture Types

class PictureType(IntEnum):
    """Video frame types."""
    NONE = 0    # Undefined
    I = 1       # Intra frame (keyframe)
    P = 2       # Predicted frame
    B = 3       # Bidirectional frame
    S = 4       # S(GMC)-VOP MPEG-4
    SI = 5      # SI-VOP MPEG-4  
    SP = 6      # SP-VOP MPEG-4
    BI = 7      # BI-VOP

Usage Examples

Basic Video Processing

import av
import numpy as np

# Open video file
container = av.open('video.mp4')
video_stream = container.streams.video[0]

print(f"Resolution: {video_stream.width}x{video_stream.height}")
print(f"Frame rate: {video_stream.framerate}")
print(f"Pixel format: {video_stream.format}")
print(f"Duration: {container.duration / av.time_base} seconds")

# Process frames
frame_count = 0
for frame in container.decode(video_stream):
    print(f"Frame {frame_count}: {frame.width}x{frame.height} "
          f"at {frame.time:.3f}s")
    
    # Convert to numpy array
    array = frame.to_ndarray(format='rgb24')
    print(f"Array shape: {array.shape}")
    
    # Process first few frames only
    frame_count += 1
    if frame_count >= 10:
        break

container.close()

Video Format Conversion

import av

# Open input video
input_container = av.open('input.avi')
input_stream = input_container.streams.video[0]

# Create output container
output_container = av.open('output.mp4', 'w')

# Add video stream with different settings
output_stream = output_container.add_stream('h264', rate=30)
output_stream.width = 1280
output_stream.height = 720
output_stream.pix_fmt = 'yuv420p'
output_stream.bit_rate = 2000000  # 2 Mbps

frame_count = 0
for frame in input_container.decode(input_stream):
    # Reformat frame to target specifications
    new_frame = frame.reformat(
        width=output_stream.width,
        height=output_stream.height,
        format=output_stream.pix_fmt
    )
    
    # Set timing
    new_frame.pts = frame_count
    new_frame.time_base = output_stream.time_base
    
    # Encode and write
    for packet in output_stream.encode(new_frame):
        output_container.mux(packet)
    
    frame_count += 1

# Flush encoder
for packet in output_stream.encode():
    output_container.mux(packet)

input_container.close()
output_container.close()

Creating Video from Images

import av
import numpy as np
from PIL import Image

# Create output container
output = av.open('generated.mp4', 'w')

# Add video stream
stream = output.add_stream('h264', rate=24)
stream.width = 640
stream.height = 480
stream.pix_fmt = 'yuv420p'

# Generate frames
for i in range(120):  # 5 seconds at 24fps
    # Create gradient image
    array = np.zeros((480, 640, 3), dtype=np.uint8)
    
    # Animated color gradient
    phase = i / 120.0 * 2 * np.pi
    for y in range(480):
        for x in range(640):
            array[y, x, 0] = int(128 + 127 * np.sin(phase + x/100))  # Red
            array[y, x, 1] = int(128 + 127 * np.sin(phase + y/100))  # Green
            array[y, x, 2] = int(128 + 127 * np.sin(phase + (x+y)/100)) # Blue
    
    # Create frame
    frame = av.VideoFrame.from_ndarray(array, format='rgb24')
    frame.pts = i
    frame.time_base = stream.time_base
    
    # Encode and write
    for packet in stream.encode(frame):
        output.mux(packet)

# Flush encoder
for packet in stream.encode():
    output.mux(packet)

output.close()

Frame Analysis and Processing

import av
import numpy as np

def analyze_frame(frame):
    """Analyze video frame properties."""
    array = frame.to_ndarray(format='rgb24')
    
    # Basic statistics
    mean_brightness = np.mean(array)
    std_brightness = np.std(array)
    
    # Color channel analysis
    red_mean = np.mean(array[:, :, 0])
    green_mean = np.mean(array[:, :, 1])
    blue_mean = np.mean(array[:, :, 2])
    
    return {
        'brightness_mean': mean_brightness,
        'brightness_std': std_brightness,
        'red_mean': red_mean,
        'green_mean': green_mean,
        'blue_mean': blue_mean,
        'aspect_ratio': frame.width / frame.height
    }

# Process video
container = av.open('video.mp4')
stream = container.streams.video[0]

scene_changes = []
prev_brightness = None

for i, frame in enumerate(container.decode(stream)):
    stats = analyze_frame(frame)
    
    print(f"Frame {i}: brightness={stats['brightness_mean']:.1f} "
          f"aspect={stats['aspect_ratio']:.2f}")
    
    # Detect scene changes
    if prev_brightness is not None:
        brightness_change = abs(stats['brightness_mean'] - prev_brightness)
        if brightness_change > 50:  # Threshold for scene change
            scene_changes.append((i, frame.time))
            print(f"  Scene change detected at {frame.time:.2f}s")
    
    prev_brightness = stats['brightness_mean']
    
    # Process first 100 frames only
    if i >= 100:
        break

print(f"\nFound {len(scene_changes)} scene changes:")
for frame_num, time in scene_changes:
    print(f"  Frame {frame_num} at {time:.2f}s")

container.close()

Video Thumbnails

import av
import os

def extract_thumbnails(video_path, output_dir, count=10):
    """Extract thumbnails from video at regular intervals."""
    
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    container = av.open(video_path)
    stream = container.streams.video[0]
    
    # Calculate frame interval
    total_frames = stream.frames
    if total_frames == 0:
        # Estimate from duration and frame rate
        duration = container.duration / av.time_base
        total_frames = int(duration * float(stream.framerate))
    
    frame_interval = max(1, total_frames // count)
    
    print(f"Extracting {count} thumbnails from {total_frames} frames")
    
    thumbnails_saved = 0
    for i, frame in enumerate(container.decode(stream)):
        if i % frame_interval == 0 and thumbnails_saved < count:
            # Convert to RGB and save
            rgb_frame = frame.reformat(format='rgb24')
            image = rgb_frame.to_image()
            
            # Save thumbnail
            thumbnail_path = os.path.join(
                output_dir, 
                f"thumbnail_{thumbnails_saved:03d}_{frame.time:.2f}s.jpg"
            )
            image.save(thumbnail_path, quality=85)
            
            print(f"Saved thumbnail {thumbnails_saved + 1}: {thumbnail_path}")
            thumbnails_saved += 1
    
    container.close()
    return thumbnails_saved

# Extract thumbnails
count = extract_thumbnails('movie.mp4', 'thumbnails/', count=12)
print(f"Successfully extracted {count} thumbnails")

Advanced Color Processing

import av
import numpy as np

def apply_color_grading(frame, brightness=0, contrast=1.0, saturation=1.0):
    """Apply color grading to video frame."""
    
    # Convert to RGB for processing
    rgb_frame = frame.reformat(format='rgb24')
    array = rgb_frame.to_ndarray()
    
    # Convert to float for processing
    float_array = array.astype(np.float32) / 255.0
    
    # Apply brightness
    float_array += brightness / 255.0
    
    # Apply contrast
    float_array = (float_array - 0.5) * contrast + 0.5
    
    # Convert to HSV for saturation adjustment
    # Simplified saturation adjustment (full HSV conversion omitted for brevity)
    if saturation != 1.0:
        gray = np.dot(float_array, [0.299, 0.587, 0.114])
        float_array = gray[..., np.newaxis] + (float_array - gray[..., np.newaxis]) * saturation
    
    # Clamp values and convert back to uint8
    float_array = np.clip(float_array, 0.0, 1.0)
    processed_array = (float_array * 255).astype(np.uint8)
    
    # Create new frame
    processed_frame = av.VideoFrame.from_ndarray(processed_array, format='rgb24')
    processed_frame.pts = frame.pts
    processed_frame.time_base = frame.time_base
    
    return processed_frame

# Process video with color grading
input_container = av.open('input.mp4')
output_container = av.open('graded.mp4', 'w')

input_stream = input_container.streams.video[0]
output_stream = output_container.add_stream('h264', rate=input_stream.framerate)
output_stream.width = input_stream.width
output_stream.height = input_stream.height
output_stream.pix_fmt = 'yuv420p'

for frame in input_container.decode(input_stream):
    # Apply color grading
    graded_frame = apply_color_grading(
        frame,
        brightness=10,    # Slightly brighter
        contrast=1.1,     # Slightly more contrast
        saturation=1.2    # More saturated
    )
    
    # Convert to output format
    output_frame = graded_frame.reformat(format='yuv420p')
    
    # Encode and write
    for packet in output_stream.encode(output_frame):
        output_container.mux(packet)

# Flush and close
for packet in output_stream.encode():
    output_container.mux(packet)

input_container.close()
output_container.close()

Install with Tessl CLI

npx tessl i tessl/pypi-av

docs

audio.md

codecs.md

containers.md

filters.md

index.md

streams.md

video.md

tile.json