High performance Python library for data extraction, analysis, conversion & manipulation of PDF and other documents.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
High-performance rendering of document pages to various image formats with comprehensive control over resolution, color spaces, transformations, and output quality. PyMuPDF provides efficient pixel-level rendering suitable for both display and print applications.
Render document pages to raster images with full control over output parameters.
class Page:
def get_pixmap(self, matrix: Matrix = None, colorspace: Colorspace = None,
clip: Rect = None, alpha: bool = False, annots: bool = True,
aa: int = 8) -> Pixmap:
"""
Render page to Pixmap image.
Parameters:
- matrix: transformation matrix for scaling/rotation
- colorspace: target color space (csRGB, csGRAY, csCMYK)
- clip: rectangle to limit rendering area
- alpha: include alpha channel for transparency
- annots: include annotations in rendering
- aa: anti-aliasing level (0-8, higher = smoother)
Returns:
Pixmap object containing rendered page image
"""Raster image representation with comprehensive manipulation capabilities.
class Pixmap:
def __init__(self, colorspace: Colorspace, bbox: typing.Union[Rect, IRect],
alpha: bool = False):
"""
Create empty pixmap.
Parameters:
- colorspace: color space (csRGB, csGRAY, csCMYK)
- bbox: bounding rectangle
- alpha: include alpha channel
"""
def save(self, filename: str, output: str = None, jpg_quality: int = 95) -> None:
"""
Save pixmap to file.
Parameters:
- filename: output file path
- output: format override ("png", "jpg", "pnm", "pam", "psd", "ps")
- jpg_quality: JPEG quality (0-100)
"""
def tobytes(self, output: str = "png", jpg_quality: int = 95) -> bytes:
"""
Convert pixmap to bytes.
Parameters:
- output: output format ("png", "jpg", "pnm", "pam", "psd", "ps")
- jpg_quality: JPEG quality for JPG output
Returns:
Image data as bytes
"""
def pil_save(self, filename: str, format: str = None, **kwargs) -> None:
"""
Save using PIL with additional format support.
Parameters:
- filename: output file path
- format: PIL format ("PNG", "JPEG", "TIFF", etc.)
- kwargs: additional PIL save parameters
"""
def pil_tobytes(self, format: str = "PNG", **kwargs) -> bytes:
"""
Convert to bytes using PIL.
Parameters:
- format: PIL format ("PNG", "JPEG", "TIFF", etc.)
- kwargs: additional PIL parameters
Returns:
Image data as bytes
"""
def copy(self) -> Pixmap:
"""
Create a copy of the pixmap.
Returns:
New Pixmap object with identical content
"""
def pixel(self, x: int, y: int) -> list:
"""
Get pixel color values at coordinates.
Parameters:
- x: x coordinate
- y: y coordinate
Returns:
List of color component values
"""
def set_pixel(self, x: int, y: int, color: typing.Union[list, tuple]) -> None:
"""
Set pixel color at coordinates.
Parameters:
- x: x coordinate
- y: y coordinate
- color: color values as list/tuple
"""
def invert_irect(self, irect: IRect = None) -> None:
"""
Invert colors in rectangle area.
Parameters:
- irect: rectangle to invert (None for entire pixmap)
"""
def gamma_with(self, gamma: float) -> None:
"""
Apply gamma correction.
Parameters:
- gamma: gamma correction value
"""
def tint_with(self, red: int, green: int, blue: int) -> None:
"""
Apply color tint.
Parameters:
- red: red tint value (0-255)
- green: green tint value (0-255)
- blue: blue tint value (0-255)
"""
def shrink(self, factor: int) -> None:
"""
Shrink pixmap by factor.
Parameters:
- factor: shrink factor (must be > 1)
"""
def set_rect(self, rect: IRect, color: typing.Union[list, tuple]) -> None:
"""
Fill rectangle with color.
Parameters:
- rect: rectangle to fill
- color: fill color values
"""
def clear_with(self, value: int = 255) -> None:
"""
Clear pixmap with value.
Parameters:
- value: clear value for all channels
"""
@property
def width(self) -> int:
"""Pixmap width in pixels."""
@property
def height(self) -> int:
"""Pixmap height in pixels."""
@property
def n(self) -> int:
"""Number of color components per pixel."""
@property
def stride(self) -> int:
"""Number of bytes per row."""
@property
def samples(self) -> bytes:
"""Raw pixel data as bytes."""
@property
def colorspace(self) -> Colorspace:
"""Pixmap color space."""
@property
def alpha(self) -> bool:
"""True if pixmap has alpha channel."""
@property
def size(self) -> int:
"""Size of pixel data in bytes."""
@property
def irect(self) -> IRect:
"""Integer rectangle of pixmap bounds."""
def set_origin(self, x: int, y: int) -> None:
"""
Set pixmap origin coordinates.
Parameters:
- x: x origin
- y: y origin
"""Manage color space conversions and properties.
class Colorspace:
def __init__(self, n: int):
"""
Create color space.
Parameters:
- n: number of color components
"""
@property
def name(self) -> str:
"""Color space name."""
@property
def n(self) -> int:
"""Number of color components."""
# Pre-defined color spaces
csRGB: Colorspace # RGB color space
csGRAY: Colorspace # Grayscale color space
csCMYK: Colorspace # CMYK color spaceIntermediate rendering format for reusable page rendering.
class DisplayList:
def __init__(self, page: Page):
"""
Create display list from page.
Parameters:
- page: source Page object
"""
def run(self, device, matrix: Matrix, area: Rect) -> None:
"""
Run display list through device.
Parameters:
- device: target device
- matrix: transformation matrix
- area: clipping area
"""
def get_pixmap(self, matrix: Matrix = None, colorspace: Colorspace = None,
alpha: bool = False, clip: Rect = None) -> Pixmap:
"""
Render display list to pixmap.
Parameters:
- matrix: transformation matrix
- colorspace: target color space
- alpha: include alpha channel
- clip: clipping rectangle
Returns:
Rendered Pixmap object
"""
def get_textpage(self, flags: int = 0) -> TextPage:
"""
Extract text from display list.
Parameters:
- flags: text extraction flags
Returns:
TextPage object
"""
@property
def rect(self) -> Rect:
"""Display list bounding rectangle."""import pymupdf
doc = pymupdf.open("document.pdf")
page = doc.load_page(0)
# Render at default resolution
pix = page.get_pixmap()
pix.save("page_default.png")
# Render at 2x resolution
mat = pymupdf.Matrix(2, 2) # 2x scale
pix_hires = page.get_pixmap(matrix=mat)
pix_hires.save("page_hires.png")
# Render specific area only
clip_rect = pymupdf.Rect(100, 100, 400, 400)
pix_clip = page.get_pixmap(clip=clip_rect)
pix_clip.save("page_clipped.png")
doc.close()import pymupdf
doc = pymupdf.open("document.pdf")
page = doc.load_page(0)
# RGB rendering (default)
pix_rgb = page.get_pixmap()
pix_rgb.save("page_rgb.png")
# Grayscale rendering
pix_gray = page.get_pixmap(colorspace=pymupdf.csGRAY)
pix_gray.save("page_gray.png")
# High-quality JPEG output
pix_rgb.save("page_quality.jpg", jpg_quality=95)
# Save as different formats
pix_rgb.pil_save("page.tiff", format="TIFF", compression="lzw")
doc.close()import pymupdf
doc = pymupdf.open("document.pdf")
page = doc.load_page(0)
# Calculate matrix for specific DPI
zoom_x = 300 / 72 # 300 DPI (default is 72 DPI)
zoom_y = 300 / 72
mat = pymupdf.Matrix(zoom_x, zoom_y)
# Render with high anti-aliasing
pix = page.get_pixmap(matrix=mat, aa=8)
pix.save("page_300dpi.png")
# Check output dimensions
print(f"Rendered size: {pix.width} x {pix.height}")
doc.close()import pymupdf
doc = pymupdf.open("document.pdf")
page = doc.load_page(0)
# Render page
pix = page.get_pixmap()
# Apply image enhancements
pix.gamma_with(1.2) # Increase gamma
pix.tint_with(10, 10, 0) # Slight yellow tint
# Get pixel information
width, height = pix.width, pix.height
print(f"Image size: {width} x {height}")
print(f"Color components: {pix.n}")
print(f"Has alpha: {pix.alpha}")
# Access individual pixels
center_x, center_y = width // 2, height // 2
pixel_color = pix.pixel(center_x, center_y)
print(f"Center pixel color: {pixel_color}")
# Save modified image
pix.save("page_enhanced.png")
doc.close()import pymupdf
def render_all_pages(doc_path: str, output_dir: str, dpi: int = 150):
"""Render all pages of a document to PNG files."""
import os
doc = pymupdf.open(doc_path)
zoom = dpi / 72
mat = pymupdf.Matrix(zoom, zoom)
os.makedirs(output_dir, exist_ok=True)
for page_num in range(doc.page_count):
page = doc.load_page(page_num)
pix = page.get_pixmap(matrix=mat)
output_path = os.path.join(output_dir, f"page_{page_num + 1:03d}.png")
pix.save(output_path)
print(f"Rendered page {page_num + 1}/{doc.page_count}")
doc.close()
print(f"All pages rendered to {output_dir}")
# Usage
render_all_pages("document.pdf", "output_images", dpi=200)import pymupdf
doc = pymupdf.open("document.pdf")
page = doc.load_page(0)
# Create display list once
display_list = pymupdf.DisplayList(page)
# Render multiple times with different parameters
matrices = [
pymupdf.Matrix(1, 1), # 1x scale
pymupdf.Matrix(2, 2), # 2x scale
pymupdf.Matrix(0.5, 0.5) # 0.5x scale
]
for i, mat in enumerate(matrices):
pix = display_list.get_pixmap(matrix=mat)
pix.save(f"page_scale_{i}.png")
doc.close()import pymupdf
doc = pymupdf.open("document.pdf")
page = doc.load_page(0)
# Render with alpha channel
pix = page.get_pixmap(alpha=True)
# Save as PNG to preserve transparency
pix.save("page_transparent.png")
# Check if image has transparency
if pix.alpha:
print("Image has alpha channel")
# Create version without alpha
pix_no_alpha = pymupdf.Pixmap(pymupdf.csRGB, pix, 0) # Drop alpha
pix_no_alpha.save("page_no_alpha.png")
doc.close()Install with Tessl CLI
npx tessl i tessl/pypi-pymupdf