tessl install tessl/pypi-livekit@1.0.0Python Real-time SDK for LiveKit providing WebRTC-based video, audio, and data streaming capabilities
Video processing in the LiveKit SDK primarily involves frame format conversion and manipulation through the VideoFrame class. The SDK provides efficient format conversion between RGB and YUV formats, as well as vertical flipping.
Key concepts:
from livekit import VideoFrame, VideoBufferTypedef convert(
self,
type: proto_video.VideoBufferType.ValueType,
*,
flip_y: bool = False
) -> VideoFrame:
"""Convert frame to different format.
Args:
type: Target format type
Type: VideoBufferType enum value
Examples:
- VideoBufferType.RGBA: RGB with alpha (packed)
- VideoBufferType.I420: YUV 4:2:0 (planar)
- VideoBufferType.NV12: YUV 4:2:0 semi-planar
flip_y: Whether to flip vertically
Type: bool
Default: False
Useful for camera APIs that produce inverted frames
Returns:
VideoFrame: New frame in target format
Original frame unchanged
Raises:
Exception: If conversion not supported
Some format combinations may not be available
RuntimeError: If conversion fails
Example:
>>> # Convert RGBA to I420 (for encoding)
>>> frame_rgba: VideoFrame = ...
>>> frame_i420 = frame_rgba.convert(VideoBufferType.I420)
>>>
>>> # Convert with vertical flip
>>> frame_flipped = frame.convert(VideoBufferType.RGBA, flip_y=True)
>>>
>>> # Convert I420 to RGBA (for display)
>>> frame_display = frame_i420.convert(VideoBufferType.RGBA)
Note:
Conversion creates new frame (not in-place).
Original frame remains valid and unchanged.
Conversion performance:
- Packed to packed (RGBA to BGRA): Fast
- Planar to planar (I420 to I422): Fast
- Packed to planar (RGBA to I420): Moderate
- Planar to packed (I420 to RGBA): Moderate
Memory allocation:
Each conversion allocates new buffer.
For hot paths, minimize conversions or reuse.
"""def get_plane(self, plane_nth: int) -> Optional[memoryview]:
"""Get a specific plane for multi-plane formats.
Args:
plane_nth: Plane index (0-based)
Type: int
For I420:
- 0: Y plane (luminance)
- 1: U plane (chrominance blue)
- 2: V plane (chrominance red)
For I420A:
- 0: Y plane
- 1: U plane
- 2: V plane
- 3: A plane (alpha)
Returns:
memoryview: View of plane data
None: If plane doesn't exist or format is packed
Raises:
IndexError: If plane_nth out of range
Example:
>>> # Access I420 planes
>>> frame_i420: VideoFrame = ...
>>> y_plane = frame.get_plane(0) # Y plane (luminance)
>>> u_plane = frame.get_plane(1) # U plane (chrominance)
>>> v_plane = frame.get_plane(2) # V plane (chrominance)
>>>
>>> if y_plane:
... print(f"Y plane size: {len(y_plane)}")
... # Modify Y plane directly
... # y_plane[0] = 128
Note:
Only works for planar formats (I420, I422, I444, etc.).
Returns None for packed formats (RGBA, BGRA, etc.).
Plane sizes for I420 (width W, height H):
- Y plane: W * H bytes
- U plane: (W/2) * (H/2) bytes
- V plane: (W/2) * (H/2) bytes
Modifying plane data directly modifies the frame.
"""from livekit import VideoFrame, VideoBufferType
# RGBA to I420 (common for encoding)
# I420 is more efficient for video codecs
rgba_frame: VideoFrame = ...
i420_frame = rgba_frame.convert(VideoBufferType.I420)
# I420 to RGBA (common for display/processing)
# RGBA is easier to work with for pixel manipulation
rgba_frame = i420_frame.convert(VideoBufferType.RGBA)
# RGBA to NV12 (for hardware encoders)
nv12_frame = rgba_frame.convert(VideoBufferType.NV12)# Flip video vertically (useful for some camera APIs)
# Some cameras produce upside-down frames
flipped = frame.convert(VideoBufferType.RGBA, flip_y=True)
# Common use case: OpenGL textures (origin bottom-left)
opengl_frame = camera_frame.convert(VideoBufferType.RGBA, flip_y=True)from livekit import VideoFrame, VideoBufferType
frame_i420: VideoFrame = ...
# Access individual planes
y_plane = frame_i420.get_plane(0) # Luminance (brightness)
u_plane = frame_i420.get_plane(1) # Chrominance U (blue difference)
v_plane = frame_i420.get_plane(2) # Chrominance V (red difference)
if y_plane and u_plane and v_plane:
print(f"Y plane size: {len(y_plane)}") # width * height
print(f"U plane size: {len(u_plane)}") # (width/2) * (height/2)
print(f"V plane size: {len(v_plane)}") # (width/2) * (height/2)
# Modify brightness (Y plane)
# Increase brightness by 10
for i in range(len(y_plane)):
y_plane[i] = min(255, y_plane[i] + 10)from livekit import VideoBufferType
# Packed RGB formats (single buffer)
VideoBufferType.RGBA # 4 bytes per pixel: R, G, B, A
VideoBufferType.ABGR # 4 bytes per pixel: A, B, G, R
VideoBufferType.ARGB # 4 bytes per pixel: A, R, G, B
VideoBufferType.BGRA # 4 bytes per pixel: B, G, R, A
VideoBufferType.RGB24 # 3 bytes per pixel: R, G, B (no alpha)
# Planar YUV formats (multiple planes)
VideoBufferType.I420 # YUV 4:2:0 (most common, best for encoding)
VideoBufferType.I420A # YUV 4:2:0 with alpha plane
VideoBufferType.I422 # YUV 4:2:2 (higher chroma resolution)
VideoBufferType.I444 # YUV 4:4:4 (full chroma, no subsampling)
VideoBufferType.I010 # YUV 4:2:0 10-bit (HDR content)
VideoBufferType.NV12 # YUV 4:2:0 semi-planar (hardware encoders)
# Buffer size calculations:
# RGBA: width * height * 4 bytes
# RGB24: width * height * 3 bytes
# I420: width * height * 1.5 bytes (Y + U/4 + V/4)
# I422: width * height * 2 bytes (Y + U/2 + V/2)
# I444: width * height * 3 bytes (Y + U + V)from livekit import VideoFrame, VideoBufferType
# Create RGBA frame
width, height = 1920, 1080
rgba_data = bytearray(width * height * 4)
# Fill with solid color (red)
for i in range(0, len(rgba_data), 4):
rgba_data[i] = 255 # Red
rgba_data[i+1] = 0 # Green
rgba_data[i+2] = 0 # Blue
rgba_data[i+3] = 255 # Alpha
frame_rgba = VideoFrame(width, height, VideoBufferType.RGBA, rgba_data)
# Convert to I420 for encoding
frame_i420 = frame_rgba.convert(VideoBufferType.I420)
# Access I420 planes
y_plane = frame_i420.get_plane(0)
u_plane = frame_i420.get_plane(1)
v_plane = frame_i420.get_plane(2)
print(f"Y plane: {len(y_plane)} bytes") # 1920 * 1080
print(f"U plane: {len(u_plane)} bytes") # 960 * 540
print(f"V plane: {len(v_plane)} bytes") # 960 * 540
# Convert back to RGBA for display
frame_display = frame_i420.convert(VideoBufferType.RGBA)
# Flip if needed (for OpenGL, etc.)
frame_flipped = frame_display.convert(VideoBufferType.RGBA, flip_y=True)# Use I420 for encoding/transmission (efficient)
# I420 uses 1.5 bytes per pixel vs 4 for RGBA
frame_i420 = frame.convert(VideoBufferType.I420)
source.capture_frame(frame_i420)
# Use RGBA for processing/display (convenient)
# RGBA is easier for pixel manipulation
frame_rgba = frame.convert(VideoBufferType.RGBA)
# Process pixels...# Bad: Multiple conversions in loop
for _ in range(100):
frame_i420 = frame_rgba.convert(VideoBufferType.I420)
frame_back = frame_i420.convert(VideoBufferType.RGBA)
frame_i420_again = frame_back.convert(VideoBufferType.I420)
# 300 conversions total!
# Good: Convert once, reuse
frame_i420 = frame_rgba.convert(VideoBufferType.I420)
for _ in range(100):
# Use frame_i420 directly
source.capture_frame(frame_i420)def safe_convert(frame: VideoFrame, target_type: VideoBufferType) -> Optional[VideoFrame]:
"""Convert with error handling.
Returns:
VideoFrame if successful, None otherwise
"""
try:
return frame.convert(target_type)
except Exception as e:
print(f"Conversion failed: {e}")
print(f" From: {frame.type}")
print(f" To: {target_type}")
print(f" Size: {frame.width}x{frame.height}")
return None
# Usage
converted = safe_convert(frame, VideoBufferType.I420)
if converted:
source.capture_frame(converted)# For video encoding: Use I420 or NV12
encoding_frame = rgba_frame.convert(VideoBufferType.I420)
# For hardware encoding: Use NV12
hw_encoding_frame = rgba_frame.convert(VideoBufferType.NV12)
# For display/UI: Use RGBA or BGRA
display_frame = i420_frame.convert(VideoBufferType.RGBA)
# For OpenCV processing: Use BGRA or BGR24
opencv_frame = frame.convert(VideoBufferType.BGRA)
# For storage/archival: Use I420 (smaller size)
storage_frame = frame.convert(VideoBufferType.I420)# YUV formats have different chroma subsampling:
# I420 (4:2:0): Most common, smallest size
# - Full luminance resolution
# - Chroma at 1/4 resolution
# - Good for most use cases
frame_420 = frame.convert(VideoBufferType.I420)
# I422 (4:2:2): Better chroma horizontally
# - Full luminance resolution
# - Chroma at 1/2 resolution horizontally, full vertically
# - Better for editing, graphics
frame_422 = frame.convert(VideoBufferType.I422)
# I444 (4:4:4): No subsampling
# - Full resolution for all components
# - Highest quality
# - Largest size (3 bytes per pixel)
frame_444 = frame.convert(VideoBufferType.I444)
# Choose based on use case:
# - Streaming/conferencing: I420
# - Video editing: I422 or I444
# - High quality archival: I444def adjust_brightness(frame: VideoFrame, delta: int) -> VideoFrame:
"""Adjust frame brightness.
Args:
frame: Input frame (any format)
delta: Brightness adjustment (-255 to 255)
Returns:
VideoFrame with adjusted brightness
"""
# Convert to I420 for efficient Y plane manipulation
if frame.type != VideoBufferType.I420:
frame = frame.convert(VideoBufferType.I420)
# Get Y plane (luminance/brightness)
y_plane = frame.get_plane(0)
if y_plane:
# Adjust brightness (Y values)
for i in range(len(y_plane)):
new_val = y_plane[i] + delta
y_plane[i] = max(0, min(255, new_val)) # Clamp to 0-255
return frame
# Usage
brighter_frame = adjust_brightness(frame, delta=20)def to_grayscale(frame: VideoFrame) -> VideoFrame:
"""Convert frame to grayscale.
Args:
frame: Input frame
Returns:
VideoFrame in grayscale
"""
# Convert to I420 and zero out chroma
if frame.type != VideoBufferType.I420:
frame = frame.convert(VideoBufferType.I420)
# Zero out U and V planes (chroma)
u_plane = frame.get_plane(1)
v_plane = frame.get_plane(2)
if u_plane:
for i in range(len(u_plane)):
u_plane[i] = 128 # Neutral chroma
if v_plane:
for i in range(len(v_plane)):
v_plane[i] = 128 # Neutral chroma
return framefrom livekit import VideoFrame, VideoBufferType
# Create RGBA frame
width, height = 1920, 1080
rgba_data = bytearray(width * height * 4)
# Fill with gradient
for y in range(height):
for x in range(width):
idx = (y * width + x) * 4
rgba_data[idx] = int(255 * x / width) # Red gradient
rgba_data[idx+1] = int(255 * y / height) # Green gradient
rgba_data[idx+2] = 128 # Blue constant
rgba_data[idx+3] = 255 # Alpha opaque
frame_rgba = VideoFrame(width, height, VideoBufferType.RGBA, rgba_data)
# Convert to I420 for encoding
frame_i420 = frame_rgba.convert(VideoBufferType.I420)
# Access I420 planes
y_plane = frame_i420.get_plane(0)
u_plane = frame_i420.get_plane(1)
v_plane = frame_i420.get_plane(2)
# Modify Y plane (brightness)
if y_plane:
for i in range(len(y_plane)):
y_plane[i] = min(255, y_plane[i] + 10) # Increase brightness
# Convert back to RGBA for display
frame_display = frame_i420.convert(VideoBufferType.RGBA)
# Flip for OpenGL rendering
frame_flipped = frame_display.convert(VideoBufferType.RGBA, flip_y=True)# Format characteristics for 1920x1080 frame:
width, height = 1920, 1080
# RGBA (packed)
# - Size: 1920 * 1080 * 4 = 8,294,400 bytes (~8.3 MB)
# - Easy to process (direct pixel access)
# - Good for: Display, simple processing
# I420 (planar)
# - Size: 1920 * 1080 * 1.5 = 3,110,400 bytes (~3.1 MB)
# - More efficient (37% of RGBA size)
# - Good for: Encoding, streaming
# I422 (planar)
# - Size: 1920 * 1080 * 2 = 4,147,200 bytes (~4.1 MB)
# - Better chroma than I420
# - Good for: Editing, processing
# NV12 (semi-planar)
# - Size: 1920 * 1080 * 1.5 = 3,110,400 bytes (~3.1 MB)
# - Same size as I420
# - Good for: Hardware encoding