tessl/pypi-pypdf

A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview

Eval results

Files

Page Operations

Name: tessl/pypi-pypdf
Author: tessl

Comprehensive page manipulation capabilities including transformations, merging, cropping, and geometric operations. The PageObject class provides the foundation for all page-level operations in pypdf.

Capabilities

Page Objects

PageObject represents individual PDF pages with complete access to page content, properties, and transformation capabilities.

class PageObject:
    @staticmethod
    def create_blank_page(width: float, height: float) -> PageObject:
        """
        Create a blank page with specified dimensions.
        
        Args:
            width: Page width in points
            height: Page height in points
            
        Returns:
            New blank PageObject
        """

    def extract_text(
        self,
        extraction_mode: str = "plain",
        layout_mode_space_vertically: bool = True,
        layout_mode_scale_weight: float = 1.25,
        layout_mode_strip_rotated: bool = True,
        orientations: tuple | int = (0, 90, 180, 270),
        space_width: float = 200.0,
        visitor_text=None
    ) -> str:
        """
        Extract text from the page.
        
        Args:
            extraction_mode: Text extraction mode ("plain" or "layout", default: "plain")
            layout_mode_space_vertically: Insert spaces for vertical gaps
            layout_mode_scale_weight: Weight for layout scaling
            layout_mode_strip_rotated: Strip rotated text
            orientations: Text orientations to consider
            space_width: Width threshold for spaces
            visitor_text: Custom text visitor function
            
        Returns:
            Extracted text as string
        """

    def scale(self, sx: float, sy: float) -> PageObject:
        """
        Scale the page by given factors.
        
        Args:
            sx: Horizontal scaling factor
            sy: Vertical scaling factor
            
        Returns:
            Self for method chaining
        """

    def scale_by(self, factor: float) -> PageObject:
        """
        Scale the page uniformly by a factor.
        
        Args:
            factor: Scaling factor
            
        Returns:
            Self for method chaining
        """

    def scale_to(self, width: float, height: float) -> PageObject:
        """
        Scale the page to specific dimensions.
        
        Args:
            width: Target width in points
            height: Target height in points
            
        Returns:
            Self for method chaining
        """

    def rotate(self, angle: int) -> PageObject:
        """
        Rotate the page by the given angle.
        
        Args:
            angle: Rotation angle in degrees (90, 180, 270, etc.)
            
        Returns:
            Self for method chaining
        """

    def rotate_clockwise(self, angle: int) -> PageObject:
        """
        Rotate the page clockwise.
        
        Args:
            angle: Rotation angle in degrees
            
        Returns:
            Self for method chaining
        """

    def rotate_counter_clockwise(self, angle: int) -> PageObject:
        """
        Rotate the page counter-clockwise.
        
        Args:
            angle: Rotation angle in degrees
            
        Returns:
            Self for method chaining
        """

    def transfer_rotation_to_content(self) -> PageObject:
        """
        Apply the page's rotation to its content and reset rotation to 0.
        
        Returns:
            Self for method chaining
        """

    def merge_page(self, page2: PageObject) -> None:
        """
        Merge another page's content onto this page.
        
        Args:
            page2: PageObject to merge onto this page
        """

    def merge_translated_page(self, page2: PageObject, tx: float, ty: float) -> None:
        """
        Merge another page with translation offset.
        
        Args:
            page2: PageObject to merge
            tx: Translation offset in x direction
            ty: Translation offset in y direction
        """

    def merge_rotated_page(self, page2: PageObject, rotation: float) -> None:
        """
        Merge another page with rotation.
        
        Args:
            page2: PageObject to merge
            rotation: Rotation angle in degrees
        """

    def merge_scaled_page(self, page2: PageObject, scale: float, expand: bool = False) -> None:
        """
        Merge another page with scaling.
        
        Args:
            page2: PageObject to merge
            scale: Scaling factor
            expand: Whether to expand the page to fit scaled content
        """

    def merge_rotated_scaled_page(
        self, 
        page2: PageObject, 
        rotation: float, 
        scale: float, 
        expand: bool = False
    ) -> None:
        """
        Merge another page with rotation and scaling.
        
        Args:
            page2: PageObject to merge
            rotation: Rotation angle in degrees
            scale: Scaling factor
            expand: Whether to expand the page to fit transformed content
        """

    def merge_transformed_page(
        self, 
        page2: PageObject, 
        ctm, 
        expand: bool = False
    ) -> None:
        """
        Merge another page with custom transformation matrix.
        
        Args:
            page2: PageObject to merge
            ctm: Transformation matrix
            expand: Whether to expand the page to fit transformed content
        """

    def add_transformation(self, ctm) -> None:
        """
        Apply a transformation matrix to the page.
        
        Args:
            ctm: Transformation matrix
        """

### Page Box Properties

Access and modify PDF page boundaries and dimensions through five different box types, each serving specific purposes in the PDF specification.

```python { .api }
# Box Properties (all return RectangleObject)
@property
def mediabox(self) -> RectangleObject:
    """
    The boundaries of the physical medium on which the page is intended 
    to be displayed or printed. This is the largest box and defines the 
    overall page size.
    """

@property  
def cropbox(self) -> RectangleObject:
    """
    The visible region of default user space. When displayed or printed, 
    contents outside this box are clipped. Falls back to mediabox if not set.
    """

@property
def bleedbox(self) -> RectangleObject:
    """
    The region to which contents should be clipped when output in a 
    production environment. Used for printing with bleed margins.
    Falls back to cropbox, then mediabox if not set.
    """

@property
def trimbox(self) -> RectangleObject:
    """
    The intended dimensions of the finished page after trimming.
    Falls back to cropbox, then mediabox if not set.
    """

@property
def artbox(self) -> RectangleObject:
    """
    The extent of the page's meaningful content as intended by the 
    page's creator. Falls back to cropbox, then mediabox if not set.
    """

# RectangleObject Properties and Methods
class RectangleObject:
    # Individual coordinates (read/write)
    @property
    def left(self) -> FloatObject: ...
    
    @property
    def bottom(self) -> FloatObject: ...
    
    @property
    def right(self) -> FloatObject: ...
    
    @property
    def top(self) -> FloatObject: ...
    
    # Corner positions (read/write)
    @property
    def lower_left(self) -> tuple[float, float]: ...
    
    @property
    def lower_right(self) -> tuple[float, float]: ...
    
    @property
    def upper_left(self) -> tuple[float, float]: ...
    
    @property
    def upper_right(self) -> tuple[float, float]: ...
    
    # Dimensions (read-only)
    @property
    def width(self) -> float: ...
    
    @property
    def height(self) -> float: ...
    
    def scale(self, sx: float, sy: float) -> RectangleObject:
        """
        Create a new scaled rectangle.
        
        Args:
            sx: Horizontal scale factor
            sy: Vertical scale factor
            
        Returns:
            New scaled RectangleObject
        """

@property
def rotation(self) -> int:
    """Get the page rotation angle in degrees."""

@property
def user_unit(self) -> float:
    """Get the user unit scale factor."""

@property
def images(self):
    """Get images on the page."""

@property
def page_number(self) -> int | None:
    """Get the page number in the document."""

@property
def annotations(self):
    """Get page annotations."""

@property
def mediabox(self):
    """Get the media box (page boundaries)."""

@property
def cropbox(self):
    """Get the crop box (visible page area)."""

@property
def bleedbox(self):
    """Get the bleed box (printable area with bleed)."""

@property
def trimbox(self):
    """Get the trim box (final trimmed page size)."""

@property
def artbox(self):
    """Get the art box (meaningful content area)."""

### Transformation Matrix

The Transformation class provides a convenient interface for creating and combining geometric transformations.

```python { .api }
class Transformation:
    def __init__(self, ctm=(1, 0, 0, 1, 0, 0)):
        """
        Initialize a transformation matrix.
        
        Args:
            ctm: 6-element transformation matrix tuple (a, b, c, d, e, f)
        """

    def translate(self, tx: float = 0, ty: float = 0) -> Transformation:
        """
        Add translation to the transformation.
        
        Args:
            tx: Translation in x direction
            ty: Translation in y direction
            
        Returns:
            Self for method chaining
        """

    def scale(self, sx: float = 1, sy: float | None = None) -> Transformation:
        """
        Add scaling to the transformation.
        
        Args:
            sx: Horizontal scaling factor
            sy: Vertical scaling factor (defaults to sx)
            
        Returns:
            Self for method chaining
        """

    def rotate(self, rotation: float) -> Transformation:
        """
        Add rotation to the transformation.
        
        Args:
            rotation: Rotation angle in degrees
            
        Returns:
            Self for method chaining
        """

    def transform(self, m) -> Transformation:
        """
        Apply another transformation matrix.
        
        Args:
            m: Transformation matrix to apply
            
        Returns:
            Self for method chaining
        """

    def apply_on(self, pt, as_object: bool = False):
        """
        Apply the transformation to a point.
        
        Args:
            pt: Point coordinates
            as_object: Return as object instead of tuple
            
        Returns:
            Transformed point coordinates
        """

    @property
    def matrix(self):
        """Get the transformation matrix."""

Usage Examples

Basic Page Transformations

from pypdf import PdfReader, PdfWriter

reader = PdfReader("input.pdf")
writer = PdfWriter()

for page in reader.pages:
    # Scale page to 150%
    page.scale_by(1.5)
    
    # Rotate page 90 degrees clockwise
    page.rotate_clockwise(90)
    
    writer.add_page(page)

with open("transformed.pdf", "wb") as output:
    writer.write(output)

Page Merging

from pypdf import PdfReader, PdfWriter

reader = PdfReader("document.pdf")
overlay = PdfReader("watermark.pdf")

writer = PdfWriter()

for page in reader.pages:
    # Merge watermark onto each page
    page.merge_page(overlay.pages[0])
    writer.add_page(page)

with open("watermarked.pdf", "wb") as output:
    writer.write(output)

Advanced Transformations

from pypdf import PdfReader, PdfWriter, Transformation

reader = PdfReader("input.pdf")
writer = PdfWriter()

# Create complex transformation
transform = Transformation()
transform.translate(100, 50)  # Move 100 points right, 50 up
transform.scale(0.8, 1.2)     # Scale 80% horizontally, 120% vertically
transform.rotate(15)          # Rotate 15 degrees

for page in reader.pages:
    # Apply transformation matrix
    page.add_transformation(transform.matrix)
    writer.add_page(page)

with open("complex_transform.pdf", "wb") as output:
    writer.write(output)

Creating Blank Pages

from pypdf import PdfWriter, PageObject, PaperSize

writer = PdfWriter()

# Create pages with different sizes
letter_page = PageObject.create_blank_page(612, 792)  # Letter size
a4_page = PageObject.create_blank_page(*PaperSize.A4)  # A4 size

writer.add_page(letter_page)
writer.add_page(a4_page)

with open("blank_pages.pdf", "wb") as output:
    writer.write(output)

Page Cropping and Boundaries

from pypdf import PdfReader, PdfWriter

reader = PdfReader("input.pdf")
writer = PdfWriter()

for page in reader.pages:
    # Get current page boundaries
    media_box = page.mediabox
    
    # Create crop box (crop 50 points from each side)
    crop_box = [
        media_box.left + 50,
        media_box.bottom + 50,
        media_box.right - 50,
        media_box.top - 50
    ]
    
    # Apply crop box
    page.cropbox = crop_box
    writer.add_page(page)

with open("cropped.pdf", "wb") as output:
    writer.write(output)

Multi-Page Overlay

from pypdf import PdfReader, PdfWriter

base_doc = PdfReader("base.pdf")
overlay_doc = PdfReader("overlay.pdf")

writer = PdfWriter()

for i, page in enumerate(base_doc.pages):
    # Use different overlay pages if available
    overlay_index = i % len(overlay_doc.pages)
    overlay_page = overlay_doc.pages[overlay_index]
    
    # Scale overlay to fit page
    page_width = float(page.mediabox.width)
    page_height = float(page.mediabox.height)
    overlay_width = float(overlay_page.mediabox.width)
    overlay_height = float(overlay_page.mediabox.height)
    
    scale_x = page_width / overlay_width
    scale_y = page_height / overlay_height
    scale = min(scale_x, scale_y)
    
    overlay_page.scale_by(scale)
    page.merge_page(overlay_page)
    
    writer.add_page(page)

with open("multi_overlay.pdf", "wb") as output:
    writer.write(output)

Install with Tessl CLI