tessl/pypi-pikepdf

Read and write PDFs with Python, powered by qpdf

—

Pending

Overview

Eval results

Files

Page Operations

Name: tessl/pypi-pikepdf
Author: tessl

Page-level operations including manipulation, rotation, content parsing, overlays, and coordinate transformations. These capabilities enable comprehensive page handling for PDF documents.

Capabilities

Page Class

The Page class provides comprehensive page-level operations including content manipulation, geometric transformations, and overlay functionality.

class Page(Object):
    """
    PDF page object with content and properties.
    
    Represents a single page in a PDF document with all its content,
    resources, and geometric properties.
    """
    
    def rotate(self, angle: int, *, relative: bool = True) -> None:
        """
        Rotate the page by the specified angle.
        
        Parameters:
        - angle (int): Rotation angle in degrees (must be multiple of 90)
        - relative (bool): If True, rotate relative to current rotation; 
                          if False, set absolute rotation
        
        Raises:
        ValueError: If angle is not a multiple of 90 degrees
        """
    
    def add_overlay(self, other: Page) -> None:
        """
        Add another page as an overlay on top of this page.
        
        The overlay page content is drawn on top of this page's content.
        Both pages must be from the same PDF or compatible PDFs.
        
        Parameters:
        - other (Page): Page to use as overlay
        
        Raises:
        ForeignObjectError: If pages are from incompatible PDFs
        """
    
    def add_underlay(self, other: Page) -> None:
        """
        Add another page as an underlay beneath this page.
        
        The underlay page content is drawn beneath this page's content.
        Both pages must be from the same PDF or compatible PDFs.
        
        Parameters:
        - other (Page): Page to use as underlay
        
        Raises:
        ForeignObjectError: If pages are from incompatible PDFs
        """
    
    def parse_contents(self) -> list[ContentStreamInstruction]:
        """
        Parse the page's content stream into individual instructions.
        
        Returns:
        list[ContentStreamInstruction]: List of content stream instructions
                                      that make up the page content
        
        Raises:
        PdfParsingError: If content stream cannot be parsed
        """
    
    @property
    def mediabox(self) -> Rectangle:
        """
        The page's media box defining the physical page boundaries.
        
        The media box defines the boundaries of the physical medium
        on which the page is intended to be displayed or printed.
        
        Returns:
        Rectangle: Media box coordinates (llx, lly, urx, ury)
        """
    
    @property
    def cropbox(self) -> Rectangle:
        """
        The page's crop box defining the visible page region.
        
        The crop box defines the region to which the contents of the page
        should be clipped when displayed or printed.
        
        Returns:
        Rectangle: Crop box coordinates (llx, lly, urx, ury)
        """
    
    @property
    def trimbox(self) -> Rectangle:
        """
        The page's trim box defining the intended finished page size.
        
        Returns:
        Rectangle: Trim box coordinates (llx, lly, urx, ury)
        """
    
    @property
    def artbox(self) -> Rectangle:
        """
        The page's art box defining the meaningful content area.
        
        Returns:
        Rectangle: Art box coordinates (llx, lly, urx, ury)
        """
    
    @property
    def bleedbox(self) -> Rectangle:
        """
        The page's bleed box defining the clipping path for production.
        
        Returns:
        Rectangle: Bleed box coordinates (llx, lly, urx, ury)
        """
    
    @property
    def resources(self) -> Dictionary:
        """
        The page's resource dictionary containing fonts, images, etc.
        
        Returns:
        Dictionary: Resource dictionary with fonts, XObjects, patterns, etc.
        """
    
    @property
    def images(self) -> dict[Name, PdfImage]:
        """
        Dictionary of images referenced by this page.
        
        Returns:
        dict[Name, PdfImage]: Mapping of image names to PdfImage objects
        """
    
    @property
    def rotation(self) -> int:
        """
        Current rotation of the page in degrees.
        
        Returns:
        int: Rotation angle (0, 90, 180, or 270 degrees)
        """
    
    @property
    def contents(self) -> Object:
        """
        The page's content stream(s).
        
        May be a single Stream object or Array of Stream objects.
        
        Returns:
        Object: Content stream or array of content streams
        """

Rectangle Class

Geometric rectangle representation for page boundaries and coordinate operations.

class Rectangle:
    """
    PDF rectangle representing a bounding box with four coordinates.
    
    Coordinates are specified as (llx, lly, urx, ury) where:
    - llx, lly: lower-left corner coordinates
    - urx, ury: upper-right corner coordinates
    """
    
    def __init__(self, llx: float, lly: float, urx: float, ury: float) -> None:
        """
        Create a rectangle with the specified coordinates.
        
        Parameters:
        - llx (float): Lower-left X coordinate
        - lly (float): Lower-left Y coordinate  
        - urx (float): Upper-right X coordinate
        - ury (float): Upper-right Y coordinate
        """
    
    @property
    def width(self) -> float:
        """
        Rectangle width (urx - llx).
        
        Returns:
        float: Width of the rectangle
        """
    
    @property
    def height(self) -> float:
        """
        Rectangle height (ury - lly).
        
        Returns:
        float: Height of the rectangle
        """
    
    @property
    def lower_left(self) -> tuple[float, float]:
        """
        Lower-left corner coordinates.
        
        Returns:
        tuple[float, float]: (llx, lly) coordinates
        """
    
    @property
    def upper_right(self) -> tuple[float, float]:
        """
        Upper-right corner coordinates.
        
        Returns:
        tuple[float, float]: (urx, ury) coordinates
        """
    
    @property
    def lower_right(self) -> tuple[float, float]:
        """
        Lower-right corner coordinates.
        
        Returns:
        tuple[float, float]: (urx, lly) coordinates
        """
    
    @property
    def upper_left(self) -> tuple[float, float]:
        """
        Upper-left corner coordinates.
        
        Returns:
        tuple[float, float]: (llx, ury) coordinates
        """
    
    def __and__(self, other: Rectangle) -> Rectangle:
        """
        Rectangle intersection (& operator).
        
        Parameters:
        - other (Rectangle): Rectangle to intersect with
        
        Returns:
        Rectangle: Intersection of the two rectangles
        """
    
    def __le__(self, other: Rectangle) -> bool:
        """
        Test if this rectangle is contained within another (<= operator).
        
        Parameters:
        - other (Rectangle): Rectangle to test containment against
        
        Returns:
        bool: True if this rectangle is fully contained in other
        """
    
    def __eq__(self, other: Rectangle) -> bool:
        """
        Test rectangle equality.
        
        Parameters:
        - other (Rectangle): Rectangle to compare with
        
        Returns:
        bool: True if rectangles have same coordinates
        """

Content Stream Instructions

Objects representing parsed content stream instructions for low-level content manipulation.

class ContentStreamInstruction:
    """
    Parsed content stream instruction representing an operator and its operands.
    
    Content streams contain sequences of instructions that define what
    appears on a page (text, graphics, images, etc.).
    """
    
    @property
    def operands(self) -> list[Object]:
        """
        List of operand objects for this instruction.
        
        Returns:
        list[Object]: PDF objects that serve as operands to the operator
        """
    
    @property
    def operator(self) -> Operator:
        """
        The PDF operator for this instruction.
        
        Returns:
        Operator: PDF operator object (e.g., 'Tj' for show text)
        """

class ContentStreamInlineImage:
    """
    Inline image found within a content stream.
    
    Represents images embedded directly in the content stream
    rather than referenced as external objects.
    """
    
    @property
    def operands(self) -> list[Object]:
        """
        Operands associated with the inline image.
        
        Returns:
        list[Object]: Image operands
        """
    
    @property
    def operator(self) -> Operator:
        """
        The operator associated with this inline image.
        
        Returns:
        Operator: Usually the 'EI' (end inline image) operator
        """
    
    @property
    def iimage(self) -> PdfInlineImage:
        """
        The inline image object.
        
        Returns:
        PdfInlineImage: Inline image that can be processed or extracted
        """

Usage Examples

Basic Page Operations

import pikepdf

# Open a PDF
pdf = pikepdf.open('document.pdf')

# Get the first page
page = pdf.pages[0]

# Rotate page 90 degrees clockwise
page.rotate(90, relative=True)

# Get page dimensions
media_box = page.mediabox
print(f"Page size: {media_box.width} x {media_box.height} points")

# Access page rotation
current_rotation = page.rotation
print(f"Current rotation: {current_rotation} degrees")

pdf.save('rotated_document.pdf')
pdf.close()

Page Overlays and Underlays

import pikepdf

# Open PDFs
main_pdf = pikepdf.open('main_document.pdf')
overlay_pdf = pikepdf.open('overlay_content.pdf')

# Get pages
main_page = main_pdf.pages[0]
overlay_page = overlay_pdf.pages[0]

# Copy overlay page to main PDF
copied_overlay = main_pdf.copy_foreign(overlay_page)

# Add as overlay (on top of existing content)
main_page.add_overlay(copied_overlay)

# Or add as underlay (beneath existing content)
# main_page.add_underlay(copied_overlay)

main_pdf.save('document_with_overlay.pdf')
main_pdf.close()
overlay_pdf.close()

Working with Page Boxes

import pikepdf

pdf = pikepdf.open('document.pdf')
page = pdf.pages[0]

# Access different page boxes
media_box = page.mediabox
crop_box = page.cropbox
trim_box = page.trimbox
art_box = page.artbox
bleed_box = page.bleedbox

print(f"Media box: {media_box.width} x {media_box.height}")
print(f"Crop box: {crop_box.width} x {crop_box.height}")

# Modify crop box to create margins
new_crop = pikepdf.Rectangle(
    media_box.lower_left[0] + 36,  # 0.5 inch margin
    media_box.lower_left[1] + 36,
    media_box.upper_right[0] - 36,
    media_box.upper_right[1] - 36
)
page.cropbox = new_crop

pdf.save('cropped_document.pdf')
pdf.close()

Content Stream Parsing

import pikepdf

pdf = pikepdf.open('document.pdf')
page = pdf.pages[0]

# Parse page content into instructions
instructions = page.parse_contents()

# Iterate through content stream instructions
for instruction in instructions:
    operator = instruction.operator
    operands = instruction.operands
    
    # Look for text showing operations
    if str(operator) == 'Tj':  # Show text
        text_string = operands[0]
        print(f"Found text: {text_string}")
    
    # Look for image placement operations
    elif str(operator) == 'Do':  # Invoke XObject
        xobject_name = operands[0]
        print(f"Found XObject reference: {xobject_name}")

pdf.close()

Page Resource Management

import pikepdf

pdf = pikepdf.open('document.pdf')
page = pdf.pages[0]

# Access page resources
resources = page.resources

# Check for fonts
if '/Font' in resources:
    fonts = resources['/Font']
    print(f"Page uses {len(fonts)} fonts:")
    for font_name, font_obj in fonts.items():
        print(f"  {font_name}: {font_obj.get('/BaseFont', 'Unknown')}")

# Check for images
if '/XObject' in resources:
    xobjects = resources['/XObject']
    for name, obj in xobjects.items():
        if obj.get('/Subtype') == pikepdf.Name.Image:
            print(f"Found image: {name}")

# Access images through convenience property
page_images = page.images
for name, image in page_images.items():
    print(f"Image {name}: {image.width}x{image.height}, {image.bpc} bpc")

pdf.close()

Rectangle Operations

import pikepdf

# Create rectangles
page_rect = pikepdf.Rectangle(0, 0, 612, 792)  # US Letter
margin_rect = pikepdf.Rectangle(36, 36, 576, 756)  # 0.5" margins

# Calculate dimensions
print(f"Page dimensions: {page_rect.width} x {page_rect.height}")
print(f"Margin area: {margin_rect.width} x {margin_rect.height}")

# Test containment
is_contained = margin_rect <= page_rect
print(f"Margin rect fits in page: {is_contained}")

# Calculate intersection
if margin_rect <= page_rect:
    intersection = page_rect & margin_rect
    print(f"Intersection: {intersection.width} x {intersection.height}")

# Access corner coordinates
ll = page_rect.lower_left
ur = page_rect.upper_right
print(f"Lower-left: {ll}, Upper-right: {ur}")

Multiple Page Operations

import pikepdf

pdf = pikepdf.open('multi_page_document.pdf')

# Rotate all pages
for i, page in enumerate(pdf.pages):
    if i % 2 == 0:  # Even pages (0, 2, 4...)
        page.rotate(0)  # Portrait
    else:  # Odd pages (1, 3, 5...)
        page.rotate(90)  # Landscape
    
    print(f"Page {i+1}: {page.mediabox.width} x {page.mediabox.height}")

# Extract pages into separate PDFs
for i, page in enumerate(pdf.pages):
    single_page_pdf = pikepdf.new()
    single_page_pdf.pages.append(page)
    single_page_pdf.save(f'page_{i+1}.pdf')
    single_page_pdf.close()

pdf.close()

Install with Tessl CLI