Read and write PDFs with Python, powered by qpdf
—
Page-level operations including manipulation, rotation, content parsing, overlays, and coordinate transformations. These capabilities enable comprehensive page handling for PDF documents.
The Page class provides comprehensive page-level operations including content manipulation, geometric transformations, and overlay functionality.
class Page(Object):
"""
PDF page object with content and properties.
Represents a single page in a PDF document with all its content,
resources, and geometric properties.
"""
def rotate(self, angle: int, *, relative: bool = True) -> None:
"""
Rotate the page by the specified angle.
Parameters:
- angle (int): Rotation angle in degrees (must be multiple of 90)
- relative (bool): If True, rotate relative to current rotation;
if False, set absolute rotation
Raises:
ValueError: If angle is not a multiple of 90 degrees
"""
def add_overlay(self, other: Page) -> None:
"""
Add another page as an overlay on top of this page.
The overlay page content is drawn on top of this page's content.
Both pages must be from the same PDF or compatible PDFs.
Parameters:
- other (Page): Page to use as overlay
Raises:
ForeignObjectError: If pages are from incompatible PDFs
"""
def add_underlay(self, other: Page) -> None:
"""
Add another page as an underlay beneath this page.
The underlay page content is drawn beneath this page's content.
Both pages must be from the same PDF or compatible PDFs.
Parameters:
- other (Page): Page to use as underlay
Raises:
ForeignObjectError: If pages are from incompatible PDFs
"""
def parse_contents(self) -> list[ContentStreamInstruction]:
"""
Parse the page's content stream into individual instructions.
Returns:
list[ContentStreamInstruction]: List of content stream instructions
that make up the page content
Raises:
PdfParsingError: If content stream cannot be parsed
"""
@property
def mediabox(self) -> Rectangle:
"""
The page's media box defining the physical page boundaries.
The media box defines the boundaries of the physical medium
on which the page is intended to be displayed or printed.
Returns:
Rectangle: Media box coordinates (llx, lly, urx, ury)
"""
@property
def cropbox(self) -> Rectangle:
"""
The page's crop box defining the visible page region.
The crop box defines the region to which the contents of the page
should be clipped when displayed or printed.
Returns:
Rectangle: Crop box coordinates (llx, lly, urx, ury)
"""
@property
def trimbox(self) -> Rectangle:
"""
The page's trim box defining the intended finished page size.
Returns:
Rectangle: Trim box coordinates (llx, lly, urx, ury)
"""
@property
def artbox(self) -> Rectangle:
"""
The page's art box defining the meaningful content area.
Returns:
Rectangle: Art box coordinates (llx, lly, urx, ury)
"""
@property
def bleedbox(self) -> Rectangle:
"""
The page's bleed box defining the clipping path for production.
Returns:
Rectangle: Bleed box coordinates (llx, lly, urx, ury)
"""
@property
def resources(self) -> Dictionary:
"""
The page's resource dictionary containing fonts, images, etc.
Returns:
Dictionary: Resource dictionary with fonts, XObjects, patterns, etc.
"""
@property
def images(self) -> dict[Name, PdfImage]:
"""
Dictionary of images referenced by this page.
Returns:
dict[Name, PdfImage]: Mapping of image names to PdfImage objects
"""
@property
def rotation(self) -> int:
"""
Current rotation of the page in degrees.
Returns:
int: Rotation angle (0, 90, 180, or 270 degrees)
"""
@property
def contents(self) -> Object:
"""
The page's content stream(s).
May be a single Stream object or Array of Stream objects.
Returns:
Object: Content stream or array of content streams
"""Geometric rectangle representation for page boundaries and coordinate operations.
class Rectangle:
"""
PDF rectangle representing a bounding box with four coordinates.
Coordinates are specified as (llx, lly, urx, ury) where:
- llx, lly: lower-left corner coordinates
- urx, ury: upper-right corner coordinates
"""
def __init__(self, llx: float, lly: float, urx: float, ury: float) -> None:
"""
Create a rectangle with the specified coordinates.
Parameters:
- llx (float): Lower-left X coordinate
- lly (float): Lower-left Y coordinate
- urx (float): Upper-right X coordinate
- ury (float): Upper-right Y coordinate
"""
@property
def width(self) -> float:
"""
Rectangle width (urx - llx).
Returns:
float: Width of the rectangle
"""
@property
def height(self) -> float:
"""
Rectangle height (ury - lly).
Returns:
float: Height of the rectangle
"""
@property
def lower_left(self) -> tuple[float, float]:
"""
Lower-left corner coordinates.
Returns:
tuple[float, float]: (llx, lly) coordinates
"""
@property
def upper_right(self) -> tuple[float, float]:
"""
Upper-right corner coordinates.
Returns:
tuple[float, float]: (urx, ury) coordinates
"""
@property
def lower_right(self) -> tuple[float, float]:
"""
Lower-right corner coordinates.
Returns:
tuple[float, float]: (urx, lly) coordinates
"""
@property
def upper_left(self) -> tuple[float, float]:
"""
Upper-left corner coordinates.
Returns:
tuple[float, float]: (llx, ury) coordinates
"""
def __and__(self, other: Rectangle) -> Rectangle:
"""
Rectangle intersection (& operator).
Parameters:
- other (Rectangle): Rectangle to intersect with
Returns:
Rectangle: Intersection of the two rectangles
"""
def __le__(self, other: Rectangle) -> bool:
"""
Test if this rectangle is contained within another (<= operator).
Parameters:
- other (Rectangle): Rectangle to test containment against
Returns:
bool: True if this rectangle is fully contained in other
"""
def __eq__(self, other: Rectangle) -> bool:
"""
Test rectangle equality.
Parameters:
- other (Rectangle): Rectangle to compare with
Returns:
bool: True if rectangles have same coordinates
"""Objects representing parsed content stream instructions for low-level content manipulation.
class ContentStreamInstruction:
"""
Parsed content stream instruction representing an operator and its operands.
Content streams contain sequences of instructions that define what
appears on a page (text, graphics, images, etc.).
"""
@property
def operands(self) -> list[Object]:
"""
List of operand objects for this instruction.
Returns:
list[Object]: PDF objects that serve as operands to the operator
"""
@property
def operator(self) -> Operator:
"""
The PDF operator for this instruction.
Returns:
Operator: PDF operator object (e.g., 'Tj' for show text)
"""
class ContentStreamInlineImage:
"""
Inline image found within a content stream.
Represents images embedded directly in the content stream
rather than referenced as external objects.
"""
@property
def operands(self) -> list[Object]:
"""
Operands associated with the inline image.
Returns:
list[Object]: Image operands
"""
@property
def operator(self) -> Operator:
"""
The operator associated with this inline image.
Returns:
Operator: Usually the 'EI' (end inline image) operator
"""
@property
def iimage(self) -> PdfInlineImage:
"""
The inline image object.
Returns:
PdfInlineImage: Inline image that can be processed or extracted
"""import pikepdf
# Open a PDF
pdf = pikepdf.open('document.pdf')
# Get the first page
page = pdf.pages[0]
# Rotate page 90 degrees clockwise
page.rotate(90, relative=True)
# Get page dimensions
media_box = page.mediabox
print(f"Page size: {media_box.width} x {media_box.height} points")
# Access page rotation
current_rotation = page.rotation
print(f"Current rotation: {current_rotation} degrees")
pdf.save('rotated_document.pdf')
pdf.close()import pikepdf
# Open PDFs
main_pdf = pikepdf.open('main_document.pdf')
overlay_pdf = pikepdf.open('overlay_content.pdf')
# Get pages
main_page = main_pdf.pages[0]
overlay_page = overlay_pdf.pages[0]
# Copy overlay page to main PDF
copied_overlay = main_pdf.copy_foreign(overlay_page)
# Add as overlay (on top of existing content)
main_page.add_overlay(copied_overlay)
# Or add as underlay (beneath existing content)
# main_page.add_underlay(copied_overlay)
main_pdf.save('document_with_overlay.pdf')
main_pdf.close()
overlay_pdf.close()import pikepdf
pdf = pikepdf.open('document.pdf')
page = pdf.pages[0]
# Access different page boxes
media_box = page.mediabox
crop_box = page.cropbox
trim_box = page.trimbox
art_box = page.artbox
bleed_box = page.bleedbox
print(f"Media box: {media_box.width} x {media_box.height}")
print(f"Crop box: {crop_box.width} x {crop_box.height}")
# Modify crop box to create margins
new_crop = pikepdf.Rectangle(
media_box.lower_left[0] + 36, # 0.5 inch margin
media_box.lower_left[1] + 36,
media_box.upper_right[0] - 36,
media_box.upper_right[1] - 36
)
page.cropbox = new_crop
pdf.save('cropped_document.pdf')
pdf.close()import pikepdf
pdf = pikepdf.open('document.pdf')
page = pdf.pages[0]
# Parse page content into instructions
instructions = page.parse_contents()
# Iterate through content stream instructions
for instruction in instructions:
operator = instruction.operator
operands = instruction.operands
# Look for text showing operations
if str(operator) == 'Tj': # Show text
text_string = operands[0]
print(f"Found text: {text_string}")
# Look for image placement operations
elif str(operator) == 'Do': # Invoke XObject
xobject_name = operands[0]
print(f"Found XObject reference: {xobject_name}")
pdf.close()import pikepdf
pdf = pikepdf.open('document.pdf')
page = pdf.pages[0]
# Access page resources
resources = page.resources
# Check for fonts
if '/Font' in resources:
fonts = resources['/Font']
print(f"Page uses {len(fonts)} fonts:")
for font_name, font_obj in fonts.items():
print(f" {font_name}: {font_obj.get('/BaseFont', 'Unknown')}")
# Check for images
if '/XObject' in resources:
xobjects = resources['/XObject']
for name, obj in xobjects.items():
if obj.get('/Subtype') == pikepdf.Name.Image:
print(f"Found image: {name}")
# Access images through convenience property
page_images = page.images
for name, image in page_images.items():
print(f"Image {name}: {image.width}x{image.height}, {image.bpc} bpc")
pdf.close()import pikepdf
# Create rectangles
page_rect = pikepdf.Rectangle(0, 0, 612, 792) # US Letter
margin_rect = pikepdf.Rectangle(36, 36, 576, 756) # 0.5" margins
# Calculate dimensions
print(f"Page dimensions: {page_rect.width} x {page_rect.height}")
print(f"Margin area: {margin_rect.width} x {margin_rect.height}")
# Test containment
is_contained = margin_rect <= page_rect
print(f"Margin rect fits in page: {is_contained}")
# Calculate intersection
if margin_rect <= page_rect:
intersection = page_rect & margin_rect
print(f"Intersection: {intersection.width} x {intersection.height}")
# Access corner coordinates
ll = page_rect.lower_left
ur = page_rect.upper_right
print(f"Lower-left: {ll}, Upper-right: {ur}")import pikepdf
pdf = pikepdf.open('multi_page_document.pdf')
# Rotate all pages
for i, page in enumerate(pdf.pages):
if i % 2 == 0: # Even pages (0, 2, 4...)
page.rotate(0) # Portrait
else: # Odd pages (1, 3, 5...)
page.rotate(90) # Landscape
print(f"Page {i+1}: {page.mediabox.width} x {page.mediabox.height}")
# Extract pages into separate PDFs
for i, page in enumerate(pdf.pages):
single_page_pdf = pikepdf.new()
single_page_pdf.pages.append(page)
single_page_pdf.save(f'page_{i+1}.pdf')
single_page_pdf.close()
pdf.close()Install with Tessl CLI
npx tessl i tessl/pypi-pikepdf