CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-av

Pythonic bindings for FFmpeg's libraries enabling multimedia processing with audio/video encoding, decoding, format conversion, and stream manipulation.

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

codecs.mddocs/

Codec Management

Codec contexts for encoding and decoding with hardware acceleration support. PyAV provides access to all FFmpeg codecs with comprehensive parameter control and hardware acceleration capabilities.

Capabilities

Codec Discovery

Find and inspect available codecs in the system.

# Global codec availability
codecs_available: set[str]  # Set of available codec names

class Codec:
    """Codec information and factory."""
    
    # Properties
    name: str                    # Codec name (e.g., 'h264', 'aac')
    canonical_name: str          # Canonical codec name
    long_name: str              # Descriptive name
    type: str                   # Codec type ('video', 'audio', 'subtitle')
    id: int                     # Codec ID number
    is_encoder: bool            # True if can encode
    is_decoder: bool            # True if can decode
    mode: str                   # 'r' for decoder, 'w' for encoder
    descriptor: Descriptor      # Codec descriptor with options
    
    # Format support
    frame_rates: tuple[Fraction, ...]      # Supported frame rates
    audio_rates: tuple[int, ...]           # Supported sample rates
    video_formats: tuple[VideoFormat, ...] # Supported pixel formats
    audio_formats: tuple[AudioFormat, ...] # Supported sample formats
    
    # Capabilities
    properties: int             # Codec properties flags
    
    def create(self, kind=None) -> CodecContext:
        """
        Create codec context.
        
        Parameters:
        - kind: str - Context type ('encoder' or 'decoder')
        
        Returns:
        Appropriate codec context (AudioCodecContext or VideoCodecContext)
        """

def dump_codecs() -> None:
    """Print all available codecs to stdout."""

def dump_hwconfigs() -> None:
    """Print hardware configurations to stdout."""

Base Codec Context

Base codec context with common encoding/decoding functionality.

class CodecContext:
    """Base codec context for encoding/decoding."""
    
    # Properties
    name: str                   # Codec name
    type: str                   # Context type ('audio', 'video', 'subtitle')
    codec: Codec               # Associated codec
    options: dict[str, str]    # Codec options
    extradata: bytes | None    # Codec extradata
    time_base: Fraction        # Time base for timestamps
    codec_tag: int            # Codec tag
    profile: str | None       # Codec profile
    profiles: tuple[str, ...]  # Available profiles
    
    # Bitrate control
    bit_rate: int             # Target bitrate
    bit_rate_tolerance: int   # Bitrate tolerance
    
    # Threading
    thread_count: int         # Number of threads
    thread_type: int          # Threading type flags
    
    # Flags
    flags: int                # Codec flags
    flags2: int               # Additional codec flags
    
    def open(self, codec=None, **kwargs) -> None:
        """
        Open codec context.
        
        Parameters:
        - codec: Codec | str - Codec to use
        - **kwargs: Additional codec options
        """
    
    def create(self, codec, mode='r') -> 'CodecContext':
        """
        Create new codec context.
        
        Parameters:
        - codec: str | Codec - Codec name or object
        - mode: str - 'r' for decoder, 'w' for encoder
        
        Returns:
        New codec context
        """
    
    def parse(self, data: bytes = b'') -> list[Packet]:
        """
        Parse raw data into packets.
        
        Parameters:
        - data: bytes - Raw data to parse
        
        Returns:
        List of parsed packets
        """
    
    def flush_buffers(self) -> None:
        """Flush internal codec buffers."""

# Threading and flag enums
class ThreadType(Flag):
    """Threading types."""
    FRAME = 1    # Frame-level threading
    SLICE = 2    # Slice-level threading

class Flags(IntEnum):
    """Primary codec flags."""
    # Encoding flags
    QSCALE = 2           # Use fixed qscale
    TRUNCATED = 8        # Input bitstream might be truncated
    LOW_DELAY = 524288   # Force low delay
    GLOBAL_HEADER = 4194304  # Place global headers in extradata
    
class Flags2(IntEnum):
    """Secondary codec flags."""
    FAST = 1        # Allow non-spec compliant speedup tricks
    LOCAL_HEADER = 8 # Place global headers in each keyframe

Hardware Acceleration

Hardware-accelerated encoding and decoding support.

class HWDeviceType(IntEnum):
    """Hardware device types."""
    NONE = 0
    VDPAU = 1
    CUDA = 2
    VAAPI = 3
    DXVA2 = 4
    QSV = 5
    VIDEOTOOLBOX = 6
    D3D11VA = 7
    DRM = 8
    OPENCL = 9
    MEDIACODEC = 10
    VULKAN = 11

class HWConfigMethod(IntEnum):
    """Hardware configuration methods."""
    HW_DEVICE_CTX = 1
    HW_FRAMES_CTX = 2
    INTERNAL = 4
    AD_HOC = 8

class HWConfig:
    """Hardware acceleration configuration."""
    
    device_type: HWDeviceType   # Hardware device type
    pix_fmt: str               # Hardware pixel format
    methods: int               # Supported methods

class HWAccel:
    """Hardware acceleration interface."""
    
    @staticmethod
    def create(device_type, device=None) -> 'HWAccel':
        """
        Create hardware acceleration context.
        
        Parameters:
        - device_type: str | HWDeviceType - Device type
        - device: str - Specific device (optional)
        
        Returns:
        Hardware acceleration context
        """

def hwdevices_available() -> list[str]:
    """
    Get available hardware devices.
    
    Returns:
    List of available hardware device names
    """

Video Codec Context

Video-specific codec context with video encoding/decoding parameters.

class VideoCodecContext(CodecContext):
    """Video codec context."""
    
    # Video properties
    format: VideoFormat | None     # Pixel format
    width: int                     # Frame width
    height: int                    # Frame height
    coded_width: int              # Coded width (with padding)
    coded_height: int             # Coded height (with padding)
    bits_per_coded_sample: int    # Bits per coded sample
    pix_fmt: str | None           # Pixel format name
    
    # Frame rate and timing
    framerate: Fraction           # Frame rate
    rate: Fraction               # Alias for framerate
    time_base: Fraction          # Time base
    ticks_per_frame: int         # Ticks per frame
    
    # GOP structure
    gop_size: int                # GOP size
    has_b_frames: bool           # Uses B-frames  
    max_b_frames: int            # Maximum B-frames
    
    # Aspect ratios
    sample_aspect_ratio: Fraction  # Sample aspect ratio
    display_aspect_ratio: Fraction # Display aspect ratio
    
    # Color properties
    colorspace: int              # Color space
    color_range: int             # Color range
    color_primaries: int         # Color primaries
    color_trc: int              # Transfer characteristics
    chroma_sample_location: int  # Chroma sample location
    
    # Quality control
    qmin: int                    # Minimum quantizer
    qmax: int                    # Maximum quantizer
    qcompress: float             # Quantizer compression
    qblur: float                 # Quantizer blur
    
    # Rate control
    rc_max_rate: int            # Maximum bitrate
    rc_min_rate: int            # Minimum bitrate
    rc_buffer_size: int         # Rate control buffer size
    
    def encode(self, frame=None) -> list[Packet]:
        """
        Encode video frame.
        
        Parameters:
        - frame: VideoFrame | None - Frame to encode (None flushes)
        
        Returns:
        List of encoded packets
        """
    
    def encode_lazy(self, frame=None) -> Iterator[Packet]:
        """
        Lazy encoding iterator.
        
        Parameters:
        - frame: VideoFrame | None - Frame to encode (None flushes)
        
        Yields:
        Encoded packets as they become available
        """
    
    def decode(self, packet=None) -> list[VideoFrame]:
        """
        Decode video packet.
        
        Parameters:
        - packet: Packet | None - Packet to decode (None flushes)
        
        Returns:
        List of decoded frames
        """

Audio Codec Context

Audio-specific codec context with audio encoding/decoding parameters.

class AudioCodecContext(CodecContext):
    """Audio codec context."""
    
    # Audio properties
    frame_size: int              # Samples per frame
    sample_rate: int             # Sample rate in Hz
    rate: int                    # Alias for sample_rate
    format: AudioFormat          # Sample format
    layout: AudioLayout          # Channel layout
    channels: int                # Number of channels
    
    # Quality control
    cutoff: int                  # Cutoff frequency
    
    def encode(self, frame=None) -> list[Packet]:
        """
        Encode audio frame.
        
        Parameters:
        - frame: AudioFrame | None - Frame to encode (None flushes)
        
        Returns:
        List of encoded packets
        """
    
    def encode_lazy(self, frame=None) -> Iterator[Packet]:
        """
        Lazy encoding iterator.
        
        Parameters:
        - frame: AudioFrame | None - Frame to encode (None flushes)
        
        Yields:
        Encoded packets as they become available
        """
    
    def decode(self, packet=None) -> list[AudioFrame]:
        """
        Decode audio packet.
        
        Parameters:
        - packet: Packet | None - Packet to decode (None flushes)
        
        Returns:
        List of decoded frames
        """

Properties and Capabilities

class Properties(Flag):
    """Codec properties."""
    INTRA_ONLY = 1      # Intra frames only
    LOSSY = 2           # Lossy compression
    LOSSLESS = 4        # Lossless compression
    REORDER = 8         # Codec reorders frames
    BITMAP_SUB = 16     # Bitmap subtitles
    TEXT_SUB = 32       # Text subtitles

class Capabilities(IntEnum):
    """Codec capabilities."""
    DRAW_HORIZ_BAND = 1     # Supports draw_horiz_band
    DR1 = 2                 # Supports direct rendering
    TRUNCATED = 8           # Supports truncated bitstreams
    HWACCEL = 16           # Supports hardware acceleration
    DELAY = 32             # Has encoding/decoding delay
    SMALL_LAST_FRAME = 64  # Supports small last frame
    HWACCEL_VDPAU = 128    # VDPAU hardware acceleration
    SUBFRAMES = 256        # Supports subframes
    EXPERIMENTAL = 512     # Experimental codec
    CHANNEL_CONF = 1024    # Channel configuration
    NEG_LINESIZES = 2048   # Negative line sizes
    FRAME_THREADS = 4096   # Frame threading
    SLICE_THREADS = 8192   # Slice threading
    PARAM_CHANGE = 16384   # Parameter changes
    AUTO_THREADS = 32768   # Automatic threading
    VARIABLE_FRAME_SIZE = 65536  # Variable frame size

Usage Examples

Basic Encoding

import av
import numpy as np

# Create output container
output = av.open('encoded.mp4', 'w')

# Add video stream with specific codec
video_stream = output.add_stream('libx264', rate=30)
video_stream.width = 1920
video_stream.height = 1080
video_stream.pix_fmt = 'yuv420p'

# Configure codec options
video_stream.codec_context.options = {
    'preset': 'medium',
    'crf': '23'
}

# Add audio stream
audio_stream = output.add_stream('aac', rate=44100)
audio_stream.channels = 2
audio_stream.layout = 'stereo'

# Generate and encode frames
for i in range(90):  # 3 seconds at 30fps
    # Create video frame
    array = np.random.randint(0, 255, (1080, 1920, 3), dtype=np.uint8)
    frame = av.VideoFrame.from_ndarray(array, format='rgb24')
    frame.pts = i
    frame.time_base = video_stream.time_base
    
    # Encode and mux
    for packet in video_stream.encode(frame):
        output.mux(packet)

# Flush encoders
for packet in video_stream.encode():
    output.mux(packet)
for packet in audio_stream.encode():
    output.mux(packet)

output.close()

Codec Discovery and Selection

import av

# List all available codecs
print("Available video encoders:")
for codec_name in av.codecs_available:
    codec = av.Codec(codec_name, 'w')
    if codec.type == 'video' and codec.is_encoder:
        print(f"  {codec.name}: {codec.long_name}")

print("\nAvailable audio encoders:")
for codec_name in av.codecs_available:
    codec = av.Codec(codec_name, 'w')
    if codec.type == 'audio' and codec.is_encoder:
        print(f"  {codec.name}: {codec.long_name}")

# Check specific codec capabilities
h264_codec = av.Codec('h264', 'w')
print(f"\nH.264 codec info:")
print(f"  Long name: {h264_codec.long_name}")
print(f"  Is encoder: {h264_codec.is_encoder}")
print(f"  Supported pixel formats: {[fmt.name for fmt in h264_codec.video_formats]}")
print(f"  Properties: {h264_codec.properties}")

# Create codec context
ctx = h264_codec.create()
print(f"  Context type: {ctx.type}")
print(f"  Available profiles: {ctx.profiles}")

Hardware Acceleration

import av

# Check available hardware devices
hw_devices = av.hwdevices_available()
print(f"Available hardware devices: {hw_devices}")

if 'cuda' in hw_devices:
    # Create output with hardware encoding
    output = av.open('hw_encoded.mp4', 'w')
    
    # Add hardware-accelerated video stream
    stream = output.add_stream('h264_nvenc', rate=30)  # NVIDIA hardware encoder
    stream.width = 1920
    stream.height = 1080
    stream.pix_fmt = 'yuv420p'
    
    # Configure hardware-specific options
    stream.codec_context.options = {
        'preset': 'fast',
        'rc': 'cbr',
        'cbr': 'true',
        'b': '5M'
    }
    
    print("Using NVIDIA hardware encoding")
    
elif 'vaapi' in hw_devices:
    # Use VAAPI (Linux Intel/AMD)
    output = av.open('hw_encoded.mp4', 'w')
    
    stream = output.add_stream('h264_vaapi', rate=30)
    stream.width = 1920
    stream.height = 1080
    stream.pix_fmt = 'nv12'  # VAAPI preferred format
    
    print("Using VAAPI hardware encoding")
    
else:
    print("No hardware acceleration available, using software encoding")
    output = av.open('sw_encoded.mp4', 'w')
    stream = output.add_stream('libx264', rate=30)
    stream.width = 1920
    stream.height = 1080
    stream.pix_fmt = 'yuv420p'

# ... encoding loop ...
output.close()

Advanced Codec Configuration

import av

# Create output with advanced codec settings
output = av.open('advanced.mp4', 'w')

# Video stream with detailed configuration
video_stream = output.add_stream('libx264', rate=24)
video_stream.width = 1920
video_stream.height = 1080
video_stream.pix_fmt = 'yuv420p'

# Configure video codec context
ctx = video_stream.codec_context
ctx.bit_rate = 5000000  # 5 Mbps
ctx.gop_size = 48       # GOP size (2 seconds at 24fps)
ctx.max_b_frames = 2    # B-frame configuration
ctx.flags |= av.codec.context.Flags.GLOBAL_HEADER

# Advanced x264 options
ctx.options = {
    'preset': 'slow',           # Quality preset
    'tune': 'film',             # Content tuning
    'crf': '18',                # Constant rate factor
    'profile': 'high',          # H.264 profile
    'level': '4.1',             # H.264 level
    'x264-params': 'keyint=48:min-keyint=12:scenecut=40'
}

# Audio stream with AAC configuration
audio_stream = output.add_stream('aac', rate=48000)
audio_stream.channels = 2
audio_stream.layout = 'stereo'

# Configure audio codec
audio_ctx = audio_stream.codec_context
audio_ctx.bit_rate = 192000  # 192 kbps
audio_ctx.options = {
    'profile': 'aac_low',
    'cutoff': '18000'
}

print(f"Video codec: {video_stream.codec_context.name}")
print(f"  Bitrate: {ctx.bit_rate}")
print(f"  GOP size: {ctx.gop_size}")
print(f"  B-frames: {ctx.max_b_frames}")

print(f"Audio codec: {audio_stream.codec_context.name}")
print(f"  Bitrate: {audio_ctx.bit_rate}")
print(f"  Sample rate: {audio_ctx.sample_rate}")

# ... encoding process ...
output.close()

Decoding with Different Codecs

import av

def analyze_container_codecs(filename):
    """Analyze codecs used in a media file."""
    
    container = av.open(filename)
    
    print(f"Container format: {container.format.name}")
    print(f"Duration: {container.duration / av.time_base:.2f} seconds")
    
    for i, stream in enumerate(container.streams):
        codec = stream.codec_context
        
        print(f"\nStream {i} ({stream.type}):")
        print(f"  Codec: {codec.name}")
        print(f"  Bitrate: {codec.bit_rate}")
        
        if stream.type == 'video':
            print(f"  Resolution: {codec.width}x{codec.height}")
            print(f"  Pixel format: {codec.pix_fmt}")
            print(f"  Frame rate: {stream.framerate}")
            print(f"  Profile: {codec.profile}")
            
        elif stream.type == 'audio':
            print(f"  Sample rate: {codec.sample_rate}")
            print(f"  Channels: {codec.channels}")
            print(f"  Sample format: {codec.format.name}")
            print(f"  Channel layout: {codec.layout.name}")
    
    # Test decoding capabilities
    for stream in container.streams.video[:1]:  # First video stream
        print(f"\nTesting video decoding...")
        frame_count = 0
        for frame in container.decode(stream):
            frame_count += 1
            if frame_count >= 5:  # Test first 5 frames
                break
        print(f"Successfully decoded {frame_count} frames")
    
    container.close()

# Analyze file
analyze_container_codecs('sample.mp4')

Custom Codec Parameters

import av

def create_high_quality_encoder():
    """Create high-quality video encoder with custom parameters."""
    
    output = av.open('high_quality.mp4', 'w')
    
    # Create video stream
    stream = output.add_stream('libx264', rate=25)
    stream.width = 3840
    stream.height = 2160
    stream.pix_fmt = 'yuv420p10le'  # 10-bit encoding
    
    # High quality settings
    ctx = stream.codec_context
    ctx.bit_rate = 50000000  # 50 Mbps
    ctx.gop_size = 50        # 2-second GOP
    ctx.max_b_frames = 4     # More B-frames for efficiency
    ctx.qmin = 10            # Higher minimum quality
    ctx.qmax = 30            # Lower maximum quantizer
    
    # Professional encoding options
    ctx.options = {
        'preset': 'veryslow',    # Best compression
        'tune': 'film',          # Film content
        'crf': '16',             # Very high quality
        'profile': 'high10',     # 10-bit profile  
        'level': '5.1',          # 4K level
        'psy-rd': '1.0:0.15',    # Psychovisual optimization
        'deblock': '1:1',        # Deblocking filter
        'ref': '8',              # Reference frames
        'bframes': '4',          # B-frame count
        'b-adapt': '2',          # Adaptive B-frames
        'direct': 'auto',        # Direct MV prediction
        'me': 'umh',             # Motion estimation
        'subme': '10',           # Sub-pixel motion estimation
        'analyse': 'all',        # Partition analysis
        'trellis': '2',          # Trellis quantization
        'no-fast-pskip': None,   # Disable fast P-skip
        'no-dct-decimate': None, # Disable DCT decimation
    }
    
    return output, stream

# Create high-quality encoder
output, stream = create_high_quality_encoder()
print(f"Created high-quality encoder:")
print(f"  Resolution: {stream.width}x{stream.height}")
print(f"  Pixel format: {stream.pix_fmt}")
print(f"  Bitrate: {stream.codec_context.bit_rate}")
print(f"  Preset: {stream.codec_context.options.get('preset')}")

# ... encoding process ...
output.close()

Install with Tessl CLI

npx tessl i tessl/pypi-av

docs

audio.md

codecs.md

containers.md

filters.md

index.md

streams.md

video.md

tile.json