CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-pypdf

A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

page-operations.mddocs/

Page Operations

Comprehensive page manipulation capabilities including transformations, merging, cropping, and geometric operations. The PageObject class provides the foundation for all page-level operations in pypdf.

Capabilities

Page Objects

PageObject represents individual PDF pages with complete access to page content, properties, and transformation capabilities.

class PageObject:
    @staticmethod
    def create_blank_page(width: float, height: float) -> PageObject:
        """
        Create a blank page with specified dimensions.
        
        Args:
            width: Page width in points
            height: Page height in points
            
        Returns:
            New blank PageObject
        """

    def extract_text(
        self,
        extraction_mode: str = "plain",
        layout_mode_space_vertically: bool = True,
        layout_mode_scale_weight: float = 1.25,
        layout_mode_strip_rotated: bool = True,
        orientations: tuple | int = (0, 90, 180, 270),
        space_width: float = 200.0,
        visitor_text=None
    ) -> str:
        """
        Extract text from the page.
        
        Args:
            extraction_mode: Text extraction mode ("plain" or "layout", default: "plain")
            layout_mode_space_vertically: Insert spaces for vertical gaps
            layout_mode_scale_weight: Weight for layout scaling
            layout_mode_strip_rotated: Strip rotated text
            orientations: Text orientations to consider
            space_width: Width threshold for spaces
            visitor_text: Custom text visitor function
            
        Returns:
            Extracted text as string
        """

    def scale(self, sx: float, sy: float) -> PageObject:
        """
        Scale the page by given factors.
        
        Args:
            sx: Horizontal scaling factor
            sy: Vertical scaling factor
            
        Returns:
            Self for method chaining
        """

    def scale_by(self, factor: float) -> PageObject:
        """
        Scale the page uniformly by a factor.
        
        Args:
            factor: Scaling factor
            
        Returns:
            Self for method chaining
        """

    def scale_to(self, width: float, height: float) -> PageObject:
        """
        Scale the page to specific dimensions.
        
        Args:
            width: Target width in points
            height: Target height in points
            
        Returns:
            Self for method chaining
        """

    def rotate(self, angle: int) -> PageObject:
        """
        Rotate the page by the given angle.
        
        Args:
            angle: Rotation angle in degrees (90, 180, 270, etc.)
            
        Returns:
            Self for method chaining
        """

    def rotate_clockwise(self, angle: int) -> PageObject:
        """
        Rotate the page clockwise.
        
        Args:
            angle: Rotation angle in degrees
            
        Returns:
            Self for method chaining
        """

    def rotate_counter_clockwise(self, angle: int) -> PageObject:
        """
        Rotate the page counter-clockwise.
        
        Args:
            angle: Rotation angle in degrees
            
        Returns:
            Self for method chaining
        """

    def transfer_rotation_to_content(self) -> PageObject:
        """
        Apply the page's rotation to its content and reset rotation to 0.
        
        Returns:
            Self for method chaining
        """

    def merge_page(self, page2: PageObject) -> None:
        """
        Merge another page's content onto this page.
        
        Args:
            page2: PageObject to merge onto this page
        """

    def merge_translated_page(self, page2: PageObject, tx: float, ty: float) -> None:
        """
        Merge another page with translation offset.
        
        Args:
            page2: PageObject to merge
            tx: Translation offset in x direction
            ty: Translation offset in y direction
        """

    def merge_rotated_page(self, page2: PageObject, rotation: float) -> None:
        """
        Merge another page with rotation.
        
        Args:
            page2: PageObject to merge
            rotation: Rotation angle in degrees
        """

    def merge_scaled_page(self, page2: PageObject, scale: float, expand: bool = False) -> None:
        """
        Merge another page with scaling.
        
        Args:
            page2: PageObject to merge
            scale: Scaling factor
            expand: Whether to expand the page to fit scaled content
        """

    def merge_rotated_scaled_page(
        self, 
        page2: PageObject, 
        rotation: float, 
        scale: float, 
        expand: bool = False
    ) -> None:
        """
        Merge another page with rotation and scaling.
        
        Args:
            page2: PageObject to merge
            rotation: Rotation angle in degrees
            scale: Scaling factor
            expand: Whether to expand the page to fit transformed content
        """

    def merge_transformed_page(
        self, 
        page2: PageObject, 
        ctm, 
        expand: bool = False
    ) -> None:
        """
        Merge another page with custom transformation matrix.
        
        Args:
            page2: PageObject to merge
            ctm: Transformation matrix
            expand: Whether to expand the page to fit transformed content
        """

    def add_transformation(self, ctm) -> None:
        """
        Apply a transformation matrix to the page.
        
        Args:
            ctm: Transformation matrix
        """

### Page Box Properties

Access and modify PDF page boundaries and dimensions through five different box types, each serving specific purposes in the PDF specification.

```python { .api }
# Box Properties (all return RectangleObject)
@property
def mediabox(self) -> RectangleObject:
    """
    The boundaries of the physical medium on which the page is intended 
    to be displayed or printed. This is the largest box and defines the 
    overall page size.
    """

@property  
def cropbox(self) -> RectangleObject:
    """
    The visible region of default user space. When displayed or printed, 
    contents outside this box are clipped. Falls back to mediabox if not set.
    """

@property
def bleedbox(self) -> RectangleObject:
    """
    The region to which contents should be clipped when output in a 
    production environment. Used for printing with bleed margins.
    Falls back to cropbox, then mediabox if not set.
    """

@property
def trimbox(self) -> RectangleObject:
    """
    The intended dimensions of the finished page after trimming.
    Falls back to cropbox, then mediabox if not set.
    """

@property
def artbox(self) -> RectangleObject:
    """
    The extent of the page's meaningful content as intended by the 
    page's creator. Falls back to cropbox, then mediabox if not set.
    """

# RectangleObject Properties and Methods
class RectangleObject:
    # Individual coordinates (read/write)
    @property
    def left(self) -> FloatObject: ...
    
    @property
    def bottom(self) -> FloatObject: ...
    
    @property
    def right(self) -> FloatObject: ...
    
    @property
    def top(self) -> FloatObject: ...
    
    # Corner positions (read/write)
    @property
    def lower_left(self) -> tuple[float, float]: ...
    
    @property
    def lower_right(self) -> tuple[float, float]: ...
    
    @property
    def upper_left(self) -> tuple[float, float]: ...
    
    @property
    def upper_right(self) -> tuple[float, float]: ...
    
    # Dimensions (read-only)
    @property
    def width(self) -> float: ...
    
    @property
    def height(self) -> float: ...
    
    def scale(self, sx: float, sy: float) -> RectangleObject:
        """
        Create a new scaled rectangle.
        
        Args:
            sx: Horizontal scale factor
            sy: Vertical scale factor
            
        Returns:
            New scaled RectangleObject
        """
@property
def rotation(self) -> int:
    """Get the page rotation angle in degrees."""

@property
def user_unit(self) -> float:
    """Get the user unit scale factor."""

@property
def images(self):
    """Get images on the page."""

@property
def page_number(self) -> int | None:
    """Get the page number in the document."""

@property
def annotations(self):
    """Get page annotations."""

@property
def mediabox(self):
    """Get the media box (page boundaries)."""

@property
def cropbox(self):
    """Get the crop box (visible page area)."""

@property
def bleedbox(self):
    """Get the bleed box (printable area with bleed)."""

@property
def trimbox(self):
    """Get the trim box (final trimmed page size)."""

@property
def artbox(self):
    """Get the art box (meaningful content area)."""
### Transformation Matrix

The Transformation class provides a convenient interface for creating and combining geometric transformations.

```python { .api }
class Transformation:
    def __init__(self, ctm=(1, 0, 0, 1, 0, 0)):
        """
        Initialize a transformation matrix.
        
        Args:
            ctm: 6-element transformation matrix tuple (a, b, c, d, e, f)
        """

    def translate(self, tx: float = 0, ty: float = 0) -> Transformation:
        """
        Add translation to the transformation.
        
        Args:
            tx: Translation in x direction
            ty: Translation in y direction
            
        Returns:
            Self for method chaining
        """

    def scale(self, sx: float = 1, sy: float | None = None) -> Transformation:
        """
        Add scaling to the transformation.
        
        Args:
            sx: Horizontal scaling factor
            sy: Vertical scaling factor (defaults to sx)
            
        Returns:
            Self for method chaining
        """

    def rotate(self, rotation: float) -> Transformation:
        """
        Add rotation to the transformation.
        
        Args:
            rotation: Rotation angle in degrees
            
        Returns:
            Self for method chaining
        """

    def transform(self, m) -> Transformation:
        """
        Apply another transformation matrix.
        
        Args:
            m: Transformation matrix to apply
            
        Returns:
            Self for method chaining
        """

    def apply_on(self, pt, as_object: bool = False):
        """
        Apply the transformation to a point.
        
        Args:
            pt: Point coordinates
            as_object: Return as object instead of tuple
            
        Returns:
            Transformed point coordinates
        """

    @property
    def matrix(self):
        """Get the transformation matrix."""

Usage Examples

Basic Page Transformations

from pypdf import PdfReader, PdfWriter

reader = PdfReader("input.pdf")
writer = PdfWriter()

for page in reader.pages:
    # Scale page to 150%
    page.scale_by(1.5)
    
    # Rotate page 90 degrees clockwise
    page.rotate_clockwise(90)
    
    writer.add_page(page)

with open("transformed.pdf", "wb") as output:
    writer.write(output)

Page Merging

from pypdf import PdfReader, PdfWriter

reader = PdfReader("document.pdf")
overlay = PdfReader("watermark.pdf")

writer = PdfWriter()

for page in reader.pages:
    # Merge watermark onto each page
    page.merge_page(overlay.pages[0])
    writer.add_page(page)

with open("watermarked.pdf", "wb") as output:
    writer.write(output)

Advanced Transformations

from pypdf import PdfReader, PdfWriter, Transformation

reader = PdfReader("input.pdf")
writer = PdfWriter()

# Create complex transformation
transform = Transformation()
transform.translate(100, 50)  # Move 100 points right, 50 up
transform.scale(0.8, 1.2)     # Scale 80% horizontally, 120% vertically
transform.rotate(15)          # Rotate 15 degrees

for page in reader.pages:
    # Apply transformation matrix
    page.add_transformation(transform.matrix)
    writer.add_page(page)

with open("complex_transform.pdf", "wb") as output:
    writer.write(output)

Creating Blank Pages

from pypdf import PdfWriter, PageObject, PaperSize

writer = PdfWriter()

# Create pages with different sizes
letter_page = PageObject.create_blank_page(612, 792)  # Letter size
a4_page = PageObject.create_blank_page(*PaperSize.A4)  # A4 size

writer.add_page(letter_page)
writer.add_page(a4_page)

with open("blank_pages.pdf", "wb") as output:
    writer.write(output)

Page Cropping and Boundaries

from pypdf import PdfReader, PdfWriter

reader = PdfReader("input.pdf")
writer = PdfWriter()

for page in reader.pages:
    # Get current page boundaries
    media_box = page.mediabox
    
    # Create crop box (crop 50 points from each side)
    crop_box = [
        media_box.left + 50,
        media_box.bottom + 50,
        media_box.right - 50,
        media_box.top - 50
    ]
    
    # Apply crop box
    page.cropbox = crop_box
    writer.add_page(page)

with open("cropped.pdf", "wb") as output:
    writer.write(output)

Multi-Page Overlay

from pypdf import PdfReader, PdfWriter

base_doc = PdfReader("base.pdf")
overlay_doc = PdfReader("overlay.pdf")

writer = PdfWriter()

for i, page in enumerate(base_doc.pages):
    # Use different overlay pages if available
    overlay_index = i % len(overlay_doc.pages)
    overlay_page = overlay_doc.pages[overlay_index]
    
    # Scale overlay to fit page
    page_width = float(page.mediabox.width)
    page_height = float(page.mediabox.height)
    overlay_width = float(overlay_page.mediabox.width)
    overlay_height = float(overlay_page.mediabox.height)
    
    scale_x = page_width / overlay_width
    scale_y = page_height / overlay_height
    scale = min(scale_x, scale_y)
    
    overlay_page.scale_by(scale)
    page.merge_page(overlay_page)
    
    writer.add_page(page)

with open("multi_overlay.pdf", "wb") as output:
    writer.write(output)

Install with Tessl CLI

npx tessl i tessl/pypi-pypdf

docs

annotations.md

form-fields.md

index.md

metadata.md

page-operations.md

reading-writing.md

text-extraction.md

utilities.md

tile.json