tessl/pypi-pyturbo-jpeg

A Python wrapper of libjpeg-turbo for decoding and encoding JPEG images.

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview

Eval results

Files

Utility Functions

Name: tessl/pypi-pyturbo-jpeg
Author: tessl

Buffer size calculation, scaling factor information, and helper utilities for optimizing memory usage, determining available scaling options, and working with JPEG processing parameters.

Capabilities

Buffer Size Calculation

Calculate the maximum buffer size needed for JPEG encoding operations to optimize memory allocation.

def buffer_size(img_array: np.ndarray, jpeg_subsample: int = TJSAMP_422) -> int:
    """
    Calculate maximum number of bytes of compressed JPEG data.
    
    Args:
        img_array: Input image array
        jpeg_subsample: JPEG subsampling type (TJSAMP_* constants)
    
    Returns:
        int: Maximum buffer size needed in bytes
    
    Notes:
        - Use this for pre-allocating buffers for in-place encoding
        - Actual encoded size will typically be smaller
        - Calculation accounts for worst-case compression scenario
    """

Scaling Factor Information

Access available scaling factors supported by the libjpeg-turbo library for decode operations.

@property
def scaling_factors(self) -> frozenset[tuple[int, int]]:
    """
    Available scaling factors for decode operations.
    
    Returns:
        frozenset: Set of (numerator, denominator) tuples representing valid scaling factors
    
    Notes:
        - Common factors include (1,8), (1,4), (1,2), (1,1), (2,1) etc.
        - Use these for efficient decode-time scaling
        - Scaling happens during JPEG decompression for better performance
    """

Constants and Arrays

Pixel Size Information

tjPixelSize: list[int] = [3, 3, 4, 4, 4, 4, 1, 4, 4, 4, 4, 4]    # Bytes per pixel for each pixel format
# Index corresponds to TJPF_* constants:
# TJPF_RGB=0: 3 bytes, TJPF_BGR=1: 3 bytes, TJPF_RGBX=2: 4 bytes, 
# TJPF_BGRX=3: 4 bytes, TJPF_XBGR=4: 4 bytes, TJPF_XRGB=5: 4 bytes,
# TJPF_GRAY=6: 1 byte, TJPF_RGBA=7: 4 bytes, TJPF_BGRA=8: 4 bytes,
# TJPF_ABGR=9: 4 bytes, TJPF_ARGB=10: 4 bytes, TJPF_CMYK=11: 4 bytes

MCU Dimensions

tjMCUWidth: list[int] = [8, 16, 16, 8, 8, 32]     # MCU width in pixels for each subsampling type
tjMCUHeight: list[int] = [8, 8, 16, 8, 16, 8]    # MCU height in pixels for each subsampling type

# MCU sizes by subsampling index:
# TJSAMP_444=0: 8x8, TJSAMP_422=1: 16x8, TJSAMP_420=2: 16x16
# TJSAMP_GRAY=3: 8x8, TJSAMP_440=4: 8x16, TJSAMP_411=5: 32x8

Error Constants

TJERR_WARNING: int = 0    # Warning error level
TJERR_FATAL: int = 1      # Fatal error level

MCU Constants

MCU_WIDTH: int = 8        # Base MCU width in pixels
MCU_HEIGHT: int = 8       # Base MCU height in pixels
MCU_SIZE: int = 64        # Base MCU size in pixels (8x8)

Platform Library Paths

DEFAULT_LIB_PATHS: dict[str, list[str]]    # Default libjpeg-turbo library paths by platform

# Platform-specific paths:
# 'Darwin': macOS paths (/usr/local/opt/jpeg-turbo/lib/, /opt/homebrew/opt/jpeg-turbo/lib/)
# 'Linux': Linux paths (/usr/lib/x86_64-linux-gnu/, /usr/lib64/, etc.)
# 'Windows': Windows paths (C:/libjpeg-turbo64/bin/)
# 'FreeBSD': FreeBSD paths (/usr/local/lib/)
# 'NetBSD': NetBSD paths (/usr/pkg/lib/)

Utility Functions

Nibble Operations

def split_byte_into_nibbles(value: int) -> tuple[int, int]:
    """
    Split byte into two 4-bit nibbles.
    
    Args:
        value: Byte value (0-255)
    
    Returns:
        tuple[int, int]: (first_nibble, second_nibble) where each is 0-15
    """

Usage Examples

Buffer Size Calculation

import numpy as np
from turbojpeg import TurboJPEG, TJSAMP_420, TJSAMP_444

jpeg = TurboJPEG()

# Create sample image
image = np.random.randint(0, 256, (1080, 1920, 3), dtype=np.uint8)

# Calculate buffer sizes for different subsampling
size_422 = jpeg.buffer_size(image)  # Default TJSAMP_422
size_420 = jpeg.buffer_size(image, TJSAMP_420)
size_444 = jpeg.buffer_size(image, TJSAMP_444)

print(f"Image shape: {image.shape}")
print(f"Buffer size 4:2:2: {size_422:,} bytes")
print(f"Buffer size 4:2:0: {size_420:,} bytes")  
print(f"Buffer size 4:4:4: {size_444:,} bytes")

# Pre-allocate buffer for in-place encoding
buffer = bytearray(size_422)
encoded_data, actual_size = jpeg.encode(image, dst=buffer)

print(f"Actual encoded size: {actual_size:,} bytes")
print(f"Buffer utilization: {actual_size/size_422*100:.1f}%")

Scaling Factor Usage

# Check all available scaling factors
print("Available scaling factors:")
for num, denom in sorted(jpeg.scaling_factors):
    scale = num / denom
    print(f"  {num}/{denom} = {scale:.3f}x")

# Common scaling factors and their uses
common_scales = {
    (1, 8): "1/8 scale - tiny thumbnails",
    (1, 4): "1/4 scale - small thumbnails", 
    (1, 2): "1/2 scale - medium thumbnails",
    (1, 1): "1:1 scale - original size",
    (2, 1): "2x scale - upscaling"
}

print("\nCommon uses:")
for factor, description in common_scales.items():
    if factor in jpeg.scaling_factors:
        print(f"  {factor[0]}/{factor[1]}: {description}")

Efficient Thumbnail Generation

def generate_thumbnails(jpeg_data, sizes):
    """Generate multiple thumbnail sizes efficiently."""
    
    jpeg = TurboJPEG()
    thumbnails = {}
    
    # Get available scaling factors
    available_factors = jpeg.scaling_factors
    
    for name, target_size in sizes.items():
        # Find best scaling factor
        best_factor = None
        best_scale = float('inf')
        
        for num, denom in available_factors:
            scale = num / denom
            if 0.1 <= scale <= 1.0:  # Only downscaling factors
                if abs(scale - target_size) < abs(best_scale - target_size):
                    best_factor = (num, denom)
                    best_scale = scale
        
        if best_factor:
            # Decode at reduced scale
            thumbnail = jpeg.decode(
                jpeg_data, 
                scaling_factor=best_factor
            )
            thumbnails[name] = thumbnail
            print(f"{name}: using {best_factor[0]}/{best_factor[1]} = {best_scale:.3f}x")
        else:
            # Fallback to full decode + resize
            print(f"{name}: no suitable scaling factor, using full decode")
    
    return thumbnails

# Usage
with open('photo.jpg', 'rb') as f:
    photo_data = f.read()

thumbnail_sizes = {
    'tiny': 0.125,    # 1/8 scale
    'small': 0.25,    # 1/4 scale  
    'medium': 0.5,    # 1/2 scale
}

thumbs = generate_thumbnails(photo_data, thumbnail_sizes)

Memory Optimization

def encode_with_optimal_buffer(image_array, quality=85):
    """Encode with pre-calculated optimal buffer size."""
    
    jpeg = TurboJPEG()
    
    # Calculate exact buffer size needed
    max_size = jpeg.buffer_size(image_array)
    
    # Allocate buffer
    buffer = bytearray(max_size)
    
    # Encode in-place
    result_buffer, actual_size = jpeg.encode(
        image_array, 
        quality=quality,
        dst=buffer
    )
    
    # Return only the used portion
    return bytes(buffer[:actual_size])

# Compare with standard encoding
image = np.random.randint(0, 256, (480, 640, 3), dtype=np.uint8)

# Standard encoding (allocates internally)
standard_result = jpeg.encode(image)

# Optimized encoding (pre-allocated buffer)  
optimal_result = encode_with_optimal_buffer(image)

print(f"Standard result size: {len(standard_result)} bytes")
print(f"Optimal result size: {len(optimal_result)} bytes")
print(f"Results identical: {standard_result == optimal_result}")

Platform-Specific Library Loading

import platform
from turbojpeg import TurboJPEG, DEFAULT_LIB_PATHS

def create_jpeg_processor():
    """Create TurboJPEG processor with platform-specific fallbacks."""
    
    # Try default initialization first
    try:
        return TurboJPEG()
    except RuntimeError as e:
        print(f"Default initialization failed: {e}")
    
    # Try platform-specific paths
    system = platform.system()
    if system in DEFAULT_LIB_PATHS:
        for lib_path in DEFAULT_LIB_PATHS[system]:
            try:
                print(f"Trying {lib_path}...")
                return TurboJPEG(lib_path=lib_path)
            except (RuntimeError, OSError):
                continue
    
    # Final fallback - let user specify
    raise RuntimeError(
        "Could not locate libjpeg-turbo library. "
        "Please install libjpeg-turbo or specify lib_path manually."
    )

# Usage
try:
    jpeg = create_jpeg_processor()
    print("TurboJPEG initialized successfully")
    print(f"Available scaling factors: {len(jpeg.scaling_factors)}")
except RuntimeError as e:
    print(f"Failed to initialize: {e}")

Working with MCU Alignment

def get_mcu_aligned_crop(width, height, subsample, x, y, w, h):
    """Calculate MCU-aligned crop coordinates."""
    
    from turbojpeg import tjMCUWidth, tjMCUHeight
    
    mcu_w = tjMCUWidth[subsample]
    mcu_h = tjMCUHeight[subsample]
    
    # Align to MCU boundaries
    aligned_x = (x // mcu_w) * mcu_w
    aligned_y = (y // mcu_h) * mcu_h
    aligned_w = ((w + mcu_w - 1) // mcu_w) * mcu_w
    aligned_h = ((h + mcu_h - 1) // mcu_h) * mcu_h
    
    # Ensure within image bounds
    aligned_w = min(aligned_w, width - aligned_x)
    aligned_h = min(aligned_h, height - aligned_y)
    
    return aligned_x, aligned_y, aligned_w, aligned_h

# Usage
jpeg = TurboJPEG()

with open('image.jpg', 'rb') as f:
    jpeg_data = f.read()

# Get image properties
width, height, subsample, _ = jpeg.decode_header(jpeg_data)

# Desired crop
desired_crop = (100, 150, 300, 200)

# Get MCU-aligned version
aligned_crop = get_mcu_aligned_crop(
    width, height, subsample, *desired_crop
)

print(f"Desired crop: {desired_crop}")
print(f"MCU-aligned crop: {aligned_crop}")
print(f"Subsampling: {subsample} (MCU: {tjMCUWidth[subsample]}x{tjMCUHeight[subsample]})")

# Perform aligned crop (fast)
cropped = jpeg.crop(jpeg_data, *aligned_crop)

Install with Tessl CLI