CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-pypdfium2

Python bindings to PDFium for comprehensive PDF manipulation, rendering, and processing

Pending
Overview
Eval results
Files

image-bitmap.mddocs/

Image and Bitmap Operations

Image rendering, manipulation, and extraction with support for multiple output formats including PIL Images, NumPy arrays, and raw bitmaps. The PdfBitmap class provides comprehensive bitmap handling capabilities.

Capabilities

Bitmap Creation

Create bitmap objects from various sources including raw PDFium bitmaps, PIL Images, and new empty bitmaps.

class PdfBitmap:
    @classmethod
    def from_raw(cls, raw, rev_byteorder=False, ex_buffer=None) -> PdfBitmap:
        """
        Create bitmap from raw PDFium bitmap handle.
        
        Parameters:
        - raw: FPDF_BITMAP, raw PDFium bitmap handle
        - rev_byteorder: bool, reverse byte order for pixel data
        - ex_buffer: optional external buffer for pixel data
        
        Returns:
        PdfBitmap: Bitmap object wrapping the raw handle
        """
    
    @classmethod
    def new_native(cls, width: int, height: int, format: int, rev_byteorder=False, buffer=None) -> PdfBitmap:
        """
        Create new native PDFium bitmap.
        
        Parameters:
        - width: int, bitmap width in pixels
        - height: int, bitmap height in pixels
        - format: int, PDFium bitmap format constant
        - rev_byteorder: bool, reverse byte order
        - buffer: optional ctypes array for pixel data
        
        Returns:
        PdfBitmap: New native bitmap
        """
    
    @classmethod
    def new_foreign(cls, width: int, height: int, format: int, rev_byteorder=False, force_packed=False) -> PdfBitmap:
        """
        Create new foreign bitmap with external buffer.
        
        Parameters:
        - width: int, bitmap width in pixels
        - height: int, bitmap height in pixels  
        - format: int, PDFium bitmap format constant
        - rev_byteorder: bool, reverse byte order
        - force_packed: bool, force packed pixel format
        
        Returns:
        PdfBitmap: New foreign bitmap
        """
    
    @classmethod
    def new_foreign_simple(cls, width: int, height: int, use_alpha: bool, rev_byteorder=False) -> PdfBitmap:
        """
        Create simple foreign bitmap with automatic format selection.
        
        Parameters:
        - width: int, bitmap width in pixels
        - height: int, bitmap height in pixels
        - use_alpha: bool, include alpha channel
        - rev_byteorder: bool, reverse byte order
        
        Returns:
        PdfBitmap: New foreign bitmap with RGB or RGBA format
        """
    
    @classmethod
    def from_pil(cls, pil_image: PIL.Image, recopy=False) -> PdfBitmap:
        """
        Create bitmap from PIL Image.
        
        Parameters:
        - pil_image: PIL.Image, source image
        - recopy: bool, force copy of image data
        
        Returns:
        PdfBitmap: Bitmap containing PIL image data
        """

Bitmap creation examples:

import pypdfium2 as pdfium
from PIL import Image
import numpy as np

# Create empty RGB bitmap (800x600)
bitmap = pdfium.PdfBitmap.new_foreign_simple(800, 600, use_alpha=False)

# Create bitmap with alpha channel
alpha_bitmap = pdfium.PdfBitmap.new_foreign_simple(800, 600, use_alpha=True)

# Create from PIL Image
pil_img = Image.open("photo.jpg")
bitmap_from_pil = pdfium.PdfBitmap.from_pil(pil_img)

# Create native PDFium bitmap with specific format
native_bitmap = pdfium.PdfBitmap.new_native(
    width=1024, 
    height=768, 
    format=pdfium.raw.FPDFBitmap_BGRA,  # Using raw PDFium constants
    rev_byteorder=False
)

Bitmap Properties

Access bitmap metadata and properties for format and dimension information.

@property
def raw(self) -> FPDF_BITMAP:
    """Raw PDFium bitmap handle for low-level operations."""

@property
def buffer(self) -> ctypes.Array:
    """Ctypes array containing pixel data."""

@property
def width(self) -> int:
    """Bitmap width in pixels."""

@property
def height(self) -> int:
    """Bitmap height in pixels."""

@property
def stride(self) -> int:
    """Number of bytes per scanline."""

@property
def format(self) -> int:
    """PDFium bitmap format constant."""

@property
def rev_byteorder(self) -> bool:
    """Whether byte order is reversed."""

@property
def n_channels(self) -> int:
    """Number of color channels per pixel."""

@property
def mode(self) -> str:
    """PIL-compatible mode string (RGB, RGBA, etc.)."""

Property usage:

# Examine bitmap properties
bitmap = page.render(scale=2.0)

print(f"Bitmap dimensions: {bitmap.width} x {bitmap.height}")
print(f"Stride: {bitmap.stride} bytes per line")
print(f"Channels: {bitmap.n_channels}")
print(f"Mode: {bitmap.mode}")
print(f"Format: {bitmap.format}")

# Calculate memory usage
pixels = bitmap.width * bitmap.height
memory_mb = (pixels * bitmap.n_channels) / (1024 * 1024)
print(f"Memory usage: {memory_mb:.1f} MB")

Bitmap Information

Get structured bitmap information as a named tuple.

def get_info(self) -> PdfBitmapInfo:
    """
    Get comprehensive bitmap information.
    
    Returns:
    PdfBitmapInfo: Named tuple with bitmap metadata
    """

# PdfBitmapInfo named tuple
class PdfBitmapInfo(NamedTuple):
    width: int
    height: int  
    stride: int
    format: int
    rev_byteorder: bool
    n_channels: int
    mode: str

Example:

bitmap = page.render()
info = bitmap.get_info()

print(f"Bitmap Info:")
print(f"  Dimensions: {info.width} x {info.height}")
print(f"  Stride: {info.stride}")
print(f"  Format: {info.format}")
print(f"  Channels: {info.n_channels}")
print(f"  Mode: {info.mode}")
print(f"  Byte order reversed: {info.rev_byteorder}")

Bitmap Manipulation

Modify bitmap content with drawing and filling operations.

def fill_rect(self, left: int, top: int, width: int, height: int, color: int):
    """
    Fill rectangular area with solid color.
    
    Parameters:
    - left: int, left edge of rectangle in pixels
    - top: int, top edge of rectangle in pixels  
    - width: int, rectangle width in pixels
    - height: int, rectangle height in pixels
    - color: int, color value (format depends on bitmap format)
    """

Fill operations:

# Create bitmap and fill areas
bitmap = pdfium.PdfBitmap.new_foreign_simple(800, 600, use_alpha=False)

# Fill entire bitmap with white background
bitmap.fill_rect(0, 0, 800, 600, 0xFFFFFF)  # White

# Add colored rectangles
bitmap.fill_rect(100, 150, 200, 100, 0xFF0000)  # Red rectangle
bitmap.fill_rect(400, 200, 150, 150, 0x00FF00)  # Green rectangle
bitmap.fill_rect(200, 350, 300, 50, 0x0000FF)   # Blue rectangle

# Convert to PIL and save
pil_image = bitmap.to_pil()
pil_image.save("colored_rectangles.png")

Format Conversion

Convert bitmaps to various output formats including PIL Images and NumPy arrays.

def to_numpy(self) -> numpy.ndarray:
    """
    Convert bitmap to NumPy array.
    
    Returns:
    numpy.ndarray: Image data as NumPy array with shape (height, width, channels)
    """

def to_pil(self) -> PIL.Image:
    """
    Convert bitmap to PIL Image.
    
    Returns:
    PIL.Image: PIL Image object with bitmap data
    """

Conversion examples:

# Render page to bitmap
bitmap = page.render(scale=2.0, draw_annots=True)

# Convert to PIL Image for further processing
pil_image = bitmap.to_pil()
pil_image = pil_image.rotate(90)  # Rotate image
pil_image.save("rotated_page.png")

# Convert to NumPy array for analysis
numpy_array = bitmap.to_numpy()
print(f"Array shape: {numpy_array.shape}")
print(f"Data type: {numpy_array.dtype}")
print(f"Min/Max values: {numpy_array.min()}, {numpy_array.max()}")

# Analyze image statistics
import numpy as np
mean_color = np.mean(numpy_array, axis=(0, 1))
print(f"Average color: {mean_color}")

# Create grayscale version
if len(numpy_array.shape) == 3 and numpy_array.shape[2] >= 3:
    grayscale = np.dot(numpy_array[...,:3], [0.299, 0.587, 0.114])
    gray_image = Image.fromarray(grayscale.astype(np.uint8), mode='L')
    gray_image.save("grayscale_page.png")

High-Quality Rendering

Advanced rendering options for high-quality output and specific use cases.

def render_high_quality(page, dpi=300, format='PNG'):
    """High-quality page rendering example."""
    
    # Calculate scale for desired DPI
    scale = dpi / 72.0
    
    # Render with quality settings
    bitmap = page.render(
        scale=scale,
        colour=(255, 255, 255, 255),  # White background
        draw_annots=True,              # Include annotations
        draw_forms=True,               # Include form fields
        no_smoothing=False,            # Enable anti-aliasing
        optimize_mode='print'          # Optimize for printing
    )
    
    # Convert to PIL with high quality
    pil_image = bitmap.to_pil()
    
    # Save with format-specific options
    if format.upper() == 'PNG':
        pil_image.save("high_quality.png", 
                      format='PNG', 
                      optimize=True)
    elif format.upper() == 'JPEG':
        pil_image.save("high_quality.jpg", 
                      format='JPEG', 
                      quality=95, 
                      optimize=True)
    elif format.upper() == 'TIFF':
        pil_image.save("high_quality.tiff", 
                      format='TIFF', 
                      compression='lzw')
    
    return pil_image

# Usage
pdf = pdfium.PdfDocument("document.pdf")
page = pdf[0]

# Render at print quality (300 DPI)
high_res_image = render_high_quality(page, dpi=300, format='PNG')
print(f"High resolution image: {high_res_image.size}")

Batch Processing

Process multiple pages efficiently with consistent rendering settings.

def render_all_pages(pdf_path, output_dir, dpi=150, format='PNG'):
    """Render all pages of a PDF to images."""
    import os
    
    pdf = pdfium.PdfDocument(pdf_path)
    
    for i, page in enumerate(pdf):
        print(f"Rendering page {i+1}/{len(pdf)}")
        
        # Render page
        bitmap = page.render(
            scale=dpi/72.0,
            colour=(255, 255, 255),
            draw_annots=True
        )
        
        # Convert and save
        pil_image = bitmap.to_pil()
        
        filename = f"page_{i+1:03d}.{format.lower()}"
        filepath = os.path.join(output_dir, filename)
        
        if format.upper() == 'PNG':
            pil_image.save(filepath, format='PNG', optimize=True)
        elif format.upper() == 'JPEG':
            pil_image.save(filepath, format='JPEG', quality=90)
        
        print(f"Saved: {filepath}")
    
    pdf.close()
    print(f"Rendered {len(pdf)} pages")

# Usage
render_all_pages("document.pdf", "output_images/", dpi=200, format='PNG')

Memory Management

Efficient memory handling for large bitmaps and batch processing.

def process_large_pdf_efficiently(pdf_path):
    """Process large PDF with memory management."""
    
    pdf = pdfium.PdfDocument(pdf_path)
    
    for i in range(len(pdf)):
        # Process one page at a time
        page = pdf[i]
        
        # Create text page for analysis
        textpage = page.get_textpage()
        char_count = textpage.count_chars()
        
        if char_count > 1000:  # Only render text-heavy pages
            # Render at reasonable resolution
            bitmap = page.render(scale=1.5)
            
            # Process bitmap
            numpy_array = bitmap.to_numpy()
            
            # Analyze or save as needed
            print(f"Page {i+1}: {char_count} chars, image shape {numpy_array.shape}")
            
            # Clean up explicitly to free memory
            del bitmap
            del numpy_array
        
        # Clean up page objects
        del textpage
        del page
    
    pdf.close()

Bitmap Format Constants

Common PDFium bitmap format constants available through the raw module:

# Available through pypdfium2.raw
FPDFBitmap_Unknown = 0  # Unknown format
FPDFBitmap_Gray = 1     # Grayscale
FPDFBitmap_BGR = 2      # BGR 24-bit
FPDFBitmap_BGRx = 3     # BGRx 32-bit  
FPDFBitmap_BGRA = 4     # BGRA 32-bit

Usage:

import pypdfium2 as pdfium

# Create bitmap with specific format
bitmap = pdfium.PdfBitmap.new_native(
    800, 600, 
    pdfium.raw.FPDFBitmap_BGRA,
    rev_byteorder=False
)

Custom Rendering Colors

PdfColorScheme Class

Custom color scheme for rendering PDF pages with specific color mappings.

class PdfColorScheme:
    """
    Rendering color scheme for customizing PDF page appearance.
    
    Allows specification of custom colors for different PDF elements
    during rendering operations. Each color should be provided as 
    RGBA values with components ranging from 0 to 255.
    
    Attributes:
    - colors: dict, color mappings for different PDF elements
    """
    
    def __init__(self, path_fill: list[int], path_stroke: list[int], text_fill: list[int], text_stroke: list[int]):
        """
        Initialize color scheme with element colors.
        
        Parameters:
        - path_fill: list[int], RGBA color for path fill operations [R, G, B, A]
        - path_stroke: list[int], RGBA color for path stroke operations [R, G, B, A]  
        - text_fill: list[int], RGBA color for text fill operations [R, G, B, A]
        - text_stroke: list[int], RGBA color for text stroke operations [R, G, B, A]
        
        Each color component should be an integer from 0-255.
        """
    
    def convert(self, rev_byteorder: bool) -> FPDF_COLORSCHEME:
        """
        Convert color scheme to PDFium format.
        
        Parameters:
        - rev_byteorder: bool, whether to use reverse byte order
        
        Returns:
        FPDF_COLORSCHEME: PDFium-compatible color scheme object
        
        Internal method used during rendering to convert Python color
        values to the format expected by PDFium's rendering engine.
        """

Creating and using custom color schemes:

import pypdfium2 as pdfium

# Define custom colors (RGBA values 0-255)
dark_theme = pdfium.PdfColorScheme(
    path_fill=[40, 40, 40, 255],    # Dark gray for filled shapes
    path_stroke=[100, 100, 100, 255], # Light gray for shape outlines  
    text_fill=[220, 220, 220, 255],  # Light gray for text
    text_stroke=[255, 255, 255, 255] # White for text outlines
)

# Render page with custom colors
pdf = pdfium.PdfDocument("document.pdf")
page = pdf[0]

bitmap = page.render(
    scale=2.0,
    color_scheme=dark_theme,
    fill_color=(30, 30, 30, 255)  # Dark background
)

# Save dark theme version
pil_image = bitmap.to_pil()
pil_image.save("document_dark_theme.png")

# High contrast color scheme for accessibility
high_contrast = pdfium.PdfColorScheme(
    path_fill=[0, 0, 0, 255],        # Black for shapes
    path_stroke=[0, 0, 0, 255],      # Black for outlines
    text_fill=[0, 0, 0, 255],        # Black for text
    text_stroke=[0, 0, 0, 255]       # Black for text outlines
)

# Render with high contrast on white background
high_contrast_bitmap = page.render(
    scale=2.0,
    color_scheme=high_contrast,
    fill_color=(255, 255, 255, 255)  # White background
)

high_contrast_pil = high_contrast_bitmap.to_pil()
high_contrast_pil.save("document_high_contrast.png")

Advanced color scheme examples:

def create_sepia_scheme():
    """Create sepia-toned color scheme."""
    sepia_brown = [160, 130, 98, 255]   # Sepia brown
    sepia_dark = [101, 67, 33, 255]     # Dark sepia
    
    return pdfium.PdfColorScheme(
        path_fill=sepia_brown,
        path_stroke=sepia_dark,
        text_fill=sepia_dark,
        text_stroke=sepia_dark
    )

def create_blueprint_scheme():
    """Create blueprint-style color scheme."""
    blueprint_blue = [0, 162, 232, 255]   # Blueprint blue
    blueprint_white = [255, 255, 255, 255] # White lines
    
    return pdfium.PdfColorScheme(
        path_fill=blueprint_blue,
        path_stroke=blueprint_white,
        text_fill=blueprint_white,
        text_stroke=blueprint_white
    )

def render_with_multiple_themes(page, output_prefix):
    """Render page with different color themes."""
    
    themes = {
        'original': None,  # No color scheme = original colors
        'sepia': create_sepia_scheme(),
        'blueprint': create_blueprint_scheme(),
        'dark': pdfium.PdfColorScheme(
            path_fill=[60, 60, 60, 255],
            path_stroke=[120, 120, 120, 255], 
            text_fill=[200, 200, 200, 255],
            text_stroke=[240, 240, 240, 255]
        )
    }
    
    backgrounds = {
        'original': (255, 255, 255, 255),  # White
        'sepia': (245, 235, 215, 255),     # Antique white
        'blueprint': (25, 25, 112, 255),   # Dark blue
        'dark': (20, 20, 20, 255)          # Very dark gray
    }
    
    for theme_name, color_scheme in themes.items():
        print(f"Rendering {theme_name} theme...")
        
        bitmap = page.render(
            scale=2.0,
            color_scheme=color_scheme,
            fill_color=backgrounds[theme_name]
        )
        
        pil_image = bitmap.to_pil()
        pil_image.save(f"{output_prefix}_{theme_name}.png")
        
        print(f"Saved: {output_prefix}_{theme_name}.png")

# Usage
pdf = pdfium.PdfDocument("document.pdf")
page = pdf[0]
render_with_multiple_themes(page, "themed_page")

# Batch process with custom theme
def batch_render_with_theme(pdf_path, color_scheme, output_dir):
    """Render all pages with custom color scheme."""
    import os
    
    pdf = pdfium.PdfDocument(pdf_path)
    os.makedirs(output_dir, exist_ok=True)
    
    for i, page in enumerate(pdf):
        bitmap = page.render(
            scale=1.5,
            color_scheme=color_scheme,
            fill_color=(245, 245, 245, 255)  # Light background
        )
        
        pil_image = bitmap.to_pil()
        filename = f"page_{i+1:03d}_themed.png"
        filepath = os.path.join(output_dir, filename)
        pil_image.save(filepath)
        
        print(f"Rendered page {i+1} with custom theme")
    
    pdf.close()

# Apply sepia theme to entire document
sepia_theme = create_sepia_scheme()
batch_render_with_theme("document.pdf", sepia_theme, "sepia_output/")

Install with Tessl CLI

npx tessl i tessl/pypi-pypdfium2

docs

attachments.md

cli-tools.md

document-management.md

image-bitmap.md

index.md

page-manipulation.md

page-objects.md

text-processing.md

transformation.md

version-info.md

tile.json