Python bindings to PDFium for comprehensive PDF manipulation, rendering, and processing
—
Image rendering, manipulation, and extraction with support for multiple output formats including PIL Images, NumPy arrays, and raw bitmaps. The PdfBitmap class provides comprehensive bitmap handling capabilities.
Create bitmap objects from various sources including raw PDFium bitmaps, PIL Images, and new empty bitmaps.
class PdfBitmap:
@classmethod
def from_raw(cls, raw, rev_byteorder=False, ex_buffer=None) -> PdfBitmap:
"""
Create bitmap from raw PDFium bitmap handle.
Parameters:
- raw: FPDF_BITMAP, raw PDFium bitmap handle
- rev_byteorder: bool, reverse byte order for pixel data
- ex_buffer: optional external buffer for pixel data
Returns:
PdfBitmap: Bitmap object wrapping the raw handle
"""
@classmethod
def new_native(cls, width: int, height: int, format: int, rev_byteorder=False, buffer=None) -> PdfBitmap:
"""
Create new native PDFium bitmap.
Parameters:
- width: int, bitmap width in pixels
- height: int, bitmap height in pixels
- format: int, PDFium bitmap format constant
- rev_byteorder: bool, reverse byte order
- buffer: optional ctypes array for pixel data
Returns:
PdfBitmap: New native bitmap
"""
@classmethod
def new_foreign(cls, width: int, height: int, format: int, rev_byteorder=False, force_packed=False) -> PdfBitmap:
"""
Create new foreign bitmap with external buffer.
Parameters:
- width: int, bitmap width in pixels
- height: int, bitmap height in pixels
- format: int, PDFium bitmap format constant
- rev_byteorder: bool, reverse byte order
- force_packed: bool, force packed pixel format
Returns:
PdfBitmap: New foreign bitmap
"""
@classmethod
def new_foreign_simple(cls, width: int, height: int, use_alpha: bool, rev_byteorder=False) -> PdfBitmap:
"""
Create simple foreign bitmap with automatic format selection.
Parameters:
- width: int, bitmap width in pixels
- height: int, bitmap height in pixels
- use_alpha: bool, include alpha channel
- rev_byteorder: bool, reverse byte order
Returns:
PdfBitmap: New foreign bitmap with RGB or RGBA format
"""
@classmethod
def from_pil(cls, pil_image: PIL.Image, recopy=False) -> PdfBitmap:
"""
Create bitmap from PIL Image.
Parameters:
- pil_image: PIL.Image, source image
- recopy: bool, force copy of image data
Returns:
PdfBitmap: Bitmap containing PIL image data
"""Bitmap creation examples:
import pypdfium2 as pdfium
from PIL import Image
import numpy as np
# Create empty RGB bitmap (800x600)
bitmap = pdfium.PdfBitmap.new_foreign_simple(800, 600, use_alpha=False)
# Create bitmap with alpha channel
alpha_bitmap = pdfium.PdfBitmap.new_foreign_simple(800, 600, use_alpha=True)
# Create from PIL Image
pil_img = Image.open("photo.jpg")
bitmap_from_pil = pdfium.PdfBitmap.from_pil(pil_img)
# Create native PDFium bitmap with specific format
native_bitmap = pdfium.PdfBitmap.new_native(
width=1024,
height=768,
format=pdfium.raw.FPDFBitmap_BGRA, # Using raw PDFium constants
rev_byteorder=False
)Access bitmap metadata and properties for format and dimension information.
@property
def raw(self) -> FPDF_BITMAP:
"""Raw PDFium bitmap handle for low-level operations."""
@property
def buffer(self) -> ctypes.Array:
"""Ctypes array containing pixel data."""
@property
def width(self) -> int:
"""Bitmap width in pixels."""
@property
def height(self) -> int:
"""Bitmap height in pixels."""
@property
def stride(self) -> int:
"""Number of bytes per scanline."""
@property
def format(self) -> int:
"""PDFium bitmap format constant."""
@property
def rev_byteorder(self) -> bool:
"""Whether byte order is reversed."""
@property
def n_channels(self) -> int:
"""Number of color channels per pixel."""
@property
def mode(self) -> str:
"""PIL-compatible mode string (RGB, RGBA, etc.)."""Property usage:
# Examine bitmap properties
bitmap = page.render(scale=2.0)
print(f"Bitmap dimensions: {bitmap.width} x {bitmap.height}")
print(f"Stride: {bitmap.stride} bytes per line")
print(f"Channels: {bitmap.n_channels}")
print(f"Mode: {bitmap.mode}")
print(f"Format: {bitmap.format}")
# Calculate memory usage
pixels = bitmap.width * bitmap.height
memory_mb = (pixels * bitmap.n_channels) / (1024 * 1024)
print(f"Memory usage: {memory_mb:.1f} MB")Get structured bitmap information as a named tuple.
def get_info(self) -> PdfBitmapInfo:
"""
Get comprehensive bitmap information.
Returns:
PdfBitmapInfo: Named tuple with bitmap metadata
"""
# PdfBitmapInfo named tuple
class PdfBitmapInfo(NamedTuple):
width: int
height: int
stride: int
format: int
rev_byteorder: bool
n_channels: int
mode: strExample:
bitmap = page.render()
info = bitmap.get_info()
print(f"Bitmap Info:")
print(f" Dimensions: {info.width} x {info.height}")
print(f" Stride: {info.stride}")
print(f" Format: {info.format}")
print(f" Channels: {info.n_channels}")
print(f" Mode: {info.mode}")
print(f" Byte order reversed: {info.rev_byteorder}")Modify bitmap content with drawing and filling operations.
def fill_rect(self, left: int, top: int, width: int, height: int, color: int):
"""
Fill rectangular area with solid color.
Parameters:
- left: int, left edge of rectangle in pixels
- top: int, top edge of rectangle in pixels
- width: int, rectangle width in pixels
- height: int, rectangle height in pixels
- color: int, color value (format depends on bitmap format)
"""Fill operations:
# Create bitmap and fill areas
bitmap = pdfium.PdfBitmap.new_foreign_simple(800, 600, use_alpha=False)
# Fill entire bitmap with white background
bitmap.fill_rect(0, 0, 800, 600, 0xFFFFFF) # White
# Add colored rectangles
bitmap.fill_rect(100, 150, 200, 100, 0xFF0000) # Red rectangle
bitmap.fill_rect(400, 200, 150, 150, 0x00FF00) # Green rectangle
bitmap.fill_rect(200, 350, 300, 50, 0x0000FF) # Blue rectangle
# Convert to PIL and save
pil_image = bitmap.to_pil()
pil_image.save("colored_rectangles.png")Convert bitmaps to various output formats including PIL Images and NumPy arrays.
def to_numpy(self) -> numpy.ndarray:
"""
Convert bitmap to NumPy array.
Returns:
numpy.ndarray: Image data as NumPy array with shape (height, width, channels)
"""
def to_pil(self) -> PIL.Image:
"""
Convert bitmap to PIL Image.
Returns:
PIL.Image: PIL Image object with bitmap data
"""Conversion examples:
# Render page to bitmap
bitmap = page.render(scale=2.0, draw_annots=True)
# Convert to PIL Image for further processing
pil_image = bitmap.to_pil()
pil_image = pil_image.rotate(90) # Rotate image
pil_image.save("rotated_page.png")
# Convert to NumPy array for analysis
numpy_array = bitmap.to_numpy()
print(f"Array shape: {numpy_array.shape}")
print(f"Data type: {numpy_array.dtype}")
print(f"Min/Max values: {numpy_array.min()}, {numpy_array.max()}")
# Analyze image statistics
import numpy as np
mean_color = np.mean(numpy_array, axis=(0, 1))
print(f"Average color: {mean_color}")
# Create grayscale version
if len(numpy_array.shape) == 3 and numpy_array.shape[2] >= 3:
grayscale = np.dot(numpy_array[...,:3], [0.299, 0.587, 0.114])
gray_image = Image.fromarray(grayscale.astype(np.uint8), mode='L')
gray_image.save("grayscale_page.png")Advanced rendering options for high-quality output and specific use cases.
def render_high_quality(page, dpi=300, format='PNG'):
"""High-quality page rendering example."""
# Calculate scale for desired DPI
scale = dpi / 72.0
# Render with quality settings
bitmap = page.render(
scale=scale,
colour=(255, 255, 255, 255), # White background
draw_annots=True, # Include annotations
draw_forms=True, # Include form fields
no_smoothing=False, # Enable anti-aliasing
optimize_mode='print' # Optimize for printing
)
# Convert to PIL with high quality
pil_image = bitmap.to_pil()
# Save with format-specific options
if format.upper() == 'PNG':
pil_image.save("high_quality.png",
format='PNG',
optimize=True)
elif format.upper() == 'JPEG':
pil_image.save("high_quality.jpg",
format='JPEG',
quality=95,
optimize=True)
elif format.upper() == 'TIFF':
pil_image.save("high_quality.tiff",
format='TIFF',
compression='lzw')
return pil_image
# Usage
pdf = pdfium.PdfDocument("document.pdf")
page = pdf[0]
# Render at print quality (300 DPI)
high_res_image = render_high_quality(page, dpi=300, format='PNG')
print(f"High resolution image: {high_res_image.size}")Process multiple pages efficiently with consistent rendering settings.
def render_all_pages(pdf_path, output_dir, dpi=150, format='PNG'):
"""Render all pages of a PDF to images."""
import os
pdf = pdfium.PdfDocument(pdf_path)
for i, page in enumerate(pdf):
print(f"Rendering page {i+1}/{len(pdf)}")
# Render page
bitmap = page.render(
scale=dpi/72.0,
colour=(255, 255, 255),
draw_annots=True
)
# Convert and save
pil_image = bitmap.to_pil()
filename = f"page_{i+1:03d}.{format.lower()}"
filepath = os.path.join(output_dir, filename)
if format.upper() == 'PNG':
pil_image.save(filepath, format='PNG', optimize=True)
elif format.upper() == 'JPEG':
pil_image.save(filepath, format='JPEG', quality=90)
print(f"Saved: {filepath}")
pdf.close()
print(f"Rendered {len(pdf)} pages")
# Usage
render_all_pages("document.pdf", "output_images/", dpi=200, format='PNG')Efficient memory handling for large bitmaps and batch processing.
def process_large_pdf_efficiently(pdf_path):
"""Process large PDF with memory management."""
pdf = pdfium.PdfDocument(pdf_path)
for i in range(len(pdf)):
# Process one page at a time
page = pdf[i]
# Create text page for analysis
textpage = page.get_textpage()
char_count = textpage.count_chars()
if char_count > 1000: # Only render text-heavy pages
# Render at reasonable resolution
bitmap = page.render(scale=1.5)
# Process bitmap
numpy_array = bitmap.to_numpy()
# Analyze or save as needed
print(f"Page {i+1}: {char_count} chars, image shape {numpy_array.shape}")
# Clean up explicitly to free memory
del bitmap
del numpy_array
# Clean up page objects
del textpage
del page
pdf.close()Common PDFium bitmap format constants available through the raw module:
# Available through pypdfium2.raw
FPDFBitmap_Unknown = 0 # Unknown format
FPDFBitmap_Gray = 1 # Grayscale
FPDFBitmap_BGR = 2 # BGR 24-bit
FPDFBitmap_BGRx = 3 # BGRx 32-bit
FPDFBitmap_BGRA = 4 # BGRA 32-bitUsage:
import pypdfium2 as pdfium
# Create bitmap with specific format
bitmap = pdfium.PdfBitmap.new_native(
800, 600,
pdfium.raw.FPDFBitmap_BGRA,
rev_byteorder=False
)Custom color scheme for rendering PDF pages with specific color mappings.
class PdfColorScheme:
"""
Rendering color scheme for customizing PDF page appearance.
Allows specification of custom colors for different PDF elements
during rendering operations. Each color should be provided as
RGBA values with components ranging from 0 to 255.
Attributes:
- colors: dict, color mappings for different PDF elements
"""
def __init__(self, path_fill: list[int], path_stroke: list[int], text_fill: list[int], text_stroke: list[int]):
"""
Initialize color scheme with element colors.
Parameters:
- path_fill: list[int], RGBA color for path fill operations [R, G, B, A]
- path_stroke: list[int], RGBA color for path stroke operations [R, G, B, A]
- text_fill: list[int], RGBA color for text fill operations [R, G, B, A]
- text_stroke: list[int], RGBA color for text stroke operations [R, G, B, A]
Each color component should be an integer from 0-255.
"""
def convert(self, rev_byteorder: bool) -> FPDF_COLORSCHEME:
"""
Convert color scheme to PDFium format.
Parameters:
- rev_byteorder: bool, whether to use reverse byte order
Returns:
FPDF_COLORSCHEME: PDFium-compatible color scheme object
Internal method used during rendering to convert Python color
values to the format expected by PDFium's rendering engine.
"""Creating and using custom color schemes:
import pypdfium2 as pdfium
# Define custom colors (RGBA values 0-255)
dark_theme = pdfium.PdfColorScheme(
path_fill=[40, 40, 40, 255], # Dark gray for filled shapes
path_stroke=[100, 100, 100, 255], # Light gray for shape outlines
text_fill=[220, 220, 220, 255], # Light gray for text
text_stroke=[255, 255, 255, 255] # White for text outlines
)
# Render page with custom colors
pdf = pdfium.PdfDocument("document.pdf")
page = pdf[0]
bitmap = page.render(
scale=2.0,
color_scheme=dark_theme,
fill_color=(30, 30, 30, 255) # Dark background
)
# Save dark theme version
pil_image = bitmap.to_pil()
pil_image.save("document_dark_theme.png")
# High contrast color scheme for accessibility
high_contrast = pdfium.PdfColorScheme(
path_fill=[0, 0, 0, 255], # Black for shapes
path_stroke=[0, 0, 0, 255], # Black for outlines
text_fill=[0, 0, 0, 255], # Black for text
text_stroke=[0, 0, 0, 255] # Black for text outlines
)
# Render with high contrast on white background
high_contrast_bitmap = page.render(
scale=2.0,
color_scheme=high_contrast,
fill_color=(255, 255, 255, 255) # White background
)
high_contrast_pil = high_contrast_bitmap.to_pil()
high_contrast_pil.save("document_high_contrast.png")Advanced color scheme examples:
def create_sepia_scheme():
"""Create sepia-toned color scheme."""
sepia_brown = [160, 130, 98, 255] # Sepia brown
sepia_dark = [101, 67, 33, 255] # Dark sepia
return pdfium.PdfColorScheme(
path_fill=sepia_brown,
path_stroke=sepia_dark,
text_fill=sepia_dark,
text_stroke=sepia_dark
)
def create_blueprint_scheme():
"""Create blueprint-style color scheme."""
blueprint_blue = [0, 162, 232, 255] # Blueprint blue
blueprint_white = [255, 255, 255, 255] # White lines
return pdfium.PdfColorScheme(
path_fill=blueprint_blue,
path_stroke=blueprint_white,
text_fill=blueprint_white,
text_stroke=blueprint_white
)
def render_with_multiple_themes(page, output_prefix):
"""Render page with different color themes."""
themes = {
'original': None, # No color scheme = original colors
'sepia': create_sepia_scheme(),
'blueprint': create_blueprint_scheme(),
'dark': pdfium.PdfColorScheme(
path_fill=[60, 60, 60, 255],
path_stroke=[120, 120, 120, 255],
text_fill=[200, 200, 200, 255],
text_stroke=[240, 240, 240, 255]
)
}
backgrounds = {
'original': (255, 255, 255, 255), # White
'sepia': (245, 235, 215, 255), # Antique white
'blueprint': (25, 25, 112, 255), # Dark blue
'dark': (20, 20, 20, 255) # Very dark gray
}
for theme_name, color_scheme in themes.items():
print(f"Rendering {theme_name} theme...")
bitmap = page.render(
scale=2.0,
color_scheme=color_scheme,
fill_color=backgrounds[theme_name]
)
pil_image = bitmap.to_pil()
pil_image.save(f"{output_prefix}_{theme_name}.png")
print(f"Saved: {output_prefix}_{theme_name}.png")
# Usage
pdf = pdfium.PdfDocument("document.pdf")
page = pdf[0]
render_with_multiple_themes(page, "themed_page")
# Batch process with custom theme
def batch_render_with_theme(pdf_path, color_scheme, output_dir):
"""Render all pages with custom color scheme."""
import os
pdf = pdfium.PdfDocument(pdf_path)
os.makedirs(output_dir, exist_ok=True)
for i, page in enumerate(pdf):
bitmap = page.render(
scale=1.5,
color_scheme=color_scheme,
fill_color=(245, 245, 245, 255) # Light background
)
pil_image = bitmap.to_pil()
filename = f"page_{i+1:03d}_themed.png"
filepath = os.path.join(output_dir, filename)
pil_image.save(filepath)
print(f"Rendered page {i+1} with custom theme")
pdf.close()
# Apply sepia theme to entire document
sepia_theme = create_sepia_scheme()
batch_render_with_theme("document.pdf", sepia_theme, "sepia_output/")Install with Tessl CLI
npx tessl i tessl/pypi-pypdfium2