tessl/pypi-py-pdf2

A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files.

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview

Eval results

Files

Page Manipulation

Name: tessl/pypi-py-pdf2
Author: tessl

Transform, scale, rotate, crop, and merge individual PDF pages with precise control over page geometry and content. The PageObject class and Transformation utilities provide comprehensive page manipulation capabilities.

Capabilities

PageObject Class

Represents a single PDF page with methods for content extraction, geometric transformations, and page merging.

class PageObject(DictionaryObject):
    """PDF page object with transformation and content capabilities."""

    @property
    def mediabox(self) -> RectangleObject:
        """The page's media box (full page size)."""

    @property
    def cropbox(self) -> RectangleObject:
        """The page's crop box (visible area)."""

    @property
    def bleedbox(self) -> RectangleObject:
        """The page's bleed box (printing area)."""

    @property
    def trimbox(self) -> RectangleObject:
        """The page's trim box (finished page size)."""

    @property
    def artbox(self) -> RectangleObject:
        """The page's art box (meaningful content area)."""

    @property
    def annotations(self) -> Optional[ArrayObject]:
        """Page annotations if present."""

    def extract_text(self, visitor_text=None) -> str:
        """
        Extract text content from the page.

        Args:
            visitor_text (callable, optional): Custom text visitor function

        Returns:
            str: Extracted text content
        """

    def scale(self, sx: float, sy: float) -> None:
        """
        Scale the page by given factors.

        Args:
            sx (float): Horizontal scale factor
            sy (float): Vertical scale factor
        """

    def scale_by(self, factor: float) -> None:
        """
        Scale the page uniformly.

        Args:
            factor (float): Scale factor for both dimensions
        """

    def scale_to(self, width: float, height: float) -> None:
        """
        Scale the page to specific dimensions.

        Args:
            width (float): Target width in points
            height (float): Target height in points
        """

    def rotate(self, angle: int) -> 'PageObject':
        """
        Rotate the page by the given angle.

        Args:
            angle (int): Rotation angle in degrees (must be multiple of 90)

        Returns:
            PageObject: Self for method chaining
        """

    def rotate_clockwise(self, angle: int) -> 'PageObject':
        """
        DEPRECATED: Use rotate() instead.
        Rotate the page clockwise.

        Args:
            angle (int): Rotation angle in degrees

        Returns:
            PageObject: Self for method chaining
        """

    def rotate_counter_clockwise(self, angle: int) -> 'PageObject':
        """
        DEPRECATED: Use rotate() instead.
        Rotate the page counter-clockwise.

        Args:
            angle (int): Rotation angle in degrees

        Returns:
            PageObject: Self for method chaining
        """

    def merge_page(self, page2: 'PageObject') -> None:
        """
        Merge another page's content onto this page.

        Args:
            page2 (PageObject): Page to merge onto this page
        """

    def merge_scaled_page(self, page2: 'PageObject', scale: float, expand: bool = False) -> None:
        """
        Merge a scaled page onto this page.

        Args:
            page2 (PageObject): Page to merge
            scale (float): Scale factor for the merged page
            expand (bool): Whether to expand page size if needed
        """

    def merge_rotated_page(self, page2: 'PageObject', rotation: int, expand: bool = False) -> None:
        """
        Merge a rotated page onto this page.

        Args:
            page2 (PageObject): Page to merge
            rotation (int): Rotation angle in degrees
            expand (bool): Whether to expand page size if needed
        """

    def merge_scaled_translated_page(
        self, 
        page2: 'PageObject', 
        scale: float, 
        tx: float, 
        ty: float, 
        expand: bool = False
    ) -> None:
        """
        Merge a scaled and translated page onto this page.

        Args:
            page2 (PageObject): Page to merge
            scale (float): Scale factor
            tx (float): X translation in points
            ty (float): Y translation in points
            expand (bool): Whether to expand page size if needed
        """

    def merge_rotated_scaled_page(
        self, 
        page2: 'PageObject', 
        rotation: int, 
        scale: float, 
        expand: bool = False
    ) -> None:
        """
        Merge a rotated and scaled page onto this page.

        Args:
            page2 (PageObject): Page to merge
            rotation (int): Rotation angle in degrees
            scale (float): Scale factor
            expand (bool): Whether to expand page size if needed
        """

    def merge_rotated_scaled_translated_page(
        self, 
        page2: 'PageObject', 
        rotation: int, 
        scale: float, 
        tx: float, 
        ty: float, 
        expand: bool = False
    ) -> None:
        """
        Merge a page with full transformation onto this page.

        Args:
            page2 (PageObject): Page to merge
            rotation (int): Rotation angle in degrees
            scale (float): Scale factor
            tx (float): X translation in points
            ty (float): Y translation in points
            expand (bool): Whether to expand page size if needed
        """

    def merge_transformed_page(
        self, 
        page2: 'PageObject', 
        ctm: Transformation, 
        expand: bool = False
    ) -> None:
        """
        Merge a page with custom transformation matrix.

        Args:
            page2 (PageObject): Page to merge
            ctm (Transformation): Current transformation matrix
            expand (bool): Whether to expand page size if needed
        """

    def add_transformation(self, ctm: Transformation) -> None:
        """
        Add a transformation to the page.

        Args:
            ctm (Transformation): Transformation matrix to apply
        """

    def get_fonts(self) -> Tuple[Set[str], Set[str]]:
        """
        Get fonts used on the page.

        Returns:
            tuple: (font_names, font_subsets) sets
        """

    def get_images(self) -> Dict[str, Any]:
        """
        Get images embedded in the page.

        Returns:
            dict: Image information by name
        """

    @staticmethod
    def create_blank_page(pdf=None, width: float = 612, height: float = 792) -> 'PageObject':
        """
        Create a blank page.

        Args:
            pdf: Optional PDF reader reference
            width (float): Page width in points (default: 612 - 8.5")
            height (float): Page height in points (default: 792 - 11")

        Returns:
            PageObject: New blank page
        """

Transformation Class

2D coordinate transformation operations for precise page geometry control.

class Transformation:
    """2D transformation matrix for page operations."""

    def __init__(self, ctm: Tuple[float, float, float, float, float, float] = (1, 0, 0, 1, 0, 0)):
        """
        Initialize transformation matrix.

        Args:
            ctm: 6-element transformation matrix (a, b, c, d, e, f)
        """

    def translate(self, tx: float = 0, ty: float = 0) -> 'Transformation':
        """
        Add translation to the transformation.

        Args:
            tx (float): X translation in points
            ty (float): Y translation in points

        Returns:
            Transformation: New transformation with translation applied
        """

    def scale(self, sx: Optional[float] = None, sy: Optional[float] = None) -> 'Transformation':
        """
        Add scaling to the transformation.

        Args:
            sx: X scale factor (default: 1.0)
            sy: Y scale factor (default: same as sx)

        Returns:
            Transformation: New transformation with scaling applied
        """

    def rotate(self, rotation: float) -> 'Transformation':
        """
        Add rotation to the transformation.

        Args:
            rotation (float): Rotation angle in degrees

        Returns:
            Transformation: New transformation with rotation applied
        """

Rectangle Objects

Geometric rectangle representation for page boundaries and regions.

class RectangleObject(ArrayObject):
    """PDF rectangle object for geometric regions."""

    @property
    def left(self) -> float:
        """Left coordinate."""

    @property
    def bottom(self) -> float:
        """Bottom coordinate."""

    @property
    def right(self) -> float:
        """Right coordinate."""

    @property
    def top(self) -> float:
        """Top coordinate."""

    @property
    def width(self) -> float:
        """Rectangle width."""

    @property
    def height(self) -> float:
        """Rectangle height."""

    def scale(self, sx: float, sy: float) -> 'RectangleObject':
        """
        Scale the rectangle.

        Args:
            sx (float): X scale factor
            sy (float): Y scale factor

        Returns:
            RectangleObject: New scaled rectangle
        """

    def normalize(self) -> 'RectangleObject':
        """
        Normalize rectangle coordinates.

        Returns:
            RectangleObject: Normalized rectangle
        """

    def intersect(self, other: 'RectangleObject') -> 'RectangleObject':
        """
        Calculate intersection with another rectangle.

        Args:
            other (RectangleObject): Rectangle to intersect with

        Returns:
            RectangleObject: Intersection rectangle
        """

    def union(self, other: 'RectangleObject') -> 'RectangleObject':
        """
        Calculate union with another rectangle.

        Args:
            other (RectangleObject): Rectangle to union with

        Returns:
            RectangleObject: Union rectangle
        """

Usage Examples

Basic Page Transformations

from PyPDF2 import PdfReader, PdfWriter

# Read source PDF
reader = PdfReader("source.pdf")
writer = PdfWriter()

# Get first page
page = reader.pages[0]

# Scale the page to 50% size
page.scale(0.5, 0.5)

# Rotate 90 degrees clockwise
page.rotate(90)

# Add to writer
writer.add_page(page)

# Save result
with open("transformed.pdf", "wb") as output_file:
    writer.write(output_file)

Advanced Page Merging

from PyPDF2 import PdfReader, PdfWriter

# Read source files
reader1 = PdfReader("background.pdf")
reader2 = PdfReader("overlay.pdf")
writer = PdfWriter()

# Get pages
background = reader1.pages[0]
overlay = reader2.pages[0]

# Scale overlay to fit in corner
overlay.scale(0.3, 0.3)

# Merge overlay onto background
background.merge_scaled_translated_page(
    overlay, 
    scale=0.5, 
    tx=400,  # Position in bottom-right
    ty=100,
    expand=False
)

writer.add_page(background)

with open("merged_pages.pdf", "wb") as output_file:
    writer.write(output_file)

Working with Page Dimensions

from PyPDF2 import PdfReader, PdfWriter
from PyPDF2.generic import RectangleObject

reader = PdfReader("document.pdf")
writer = PdfWriter()

for page in reader.pages:
    # Get current dimensions
    mediabox = page.mediabox
    print(f"Page size: {mediabox.width} x {mediabox.height} points")
    
    # Convert to inches (72 points = 1 inch)
    width_inches = float(mediabox.width) / 72
    height_inches = float(mediabox.height) / 72
    print(f"Page size: {width_inches:.1f}\" x {height_inches:.1f}\"")
    
    # Crop page to center area
    crop_margin = 50  # 50 points margin
    page.cropbox = RectangleObject([
        float(mediabox.left) + crop_margin,
        float(mediabox.bottom) + crop_margin,
        float(mediabox.right) - crop_margin,
        float(mediabox.top) - crop_margin
    ])
    
    writer.add_page(page)

with open("cropped.pdf", "wb") as output_file:
    writer.write(output_file)

Creating Custom Transformations

from PyPDF2 import PdfReader, PdfWriter, Transformation

reader = PdfReader("source.pdf")
writer = PdfWriter()

page = reader.pages[0]

# Create complex transformation
transform = (Transformation()
            .rotate(45)        # Rotate 45 degrees
            .scale(0.8, 1.2)   # Scale differently in X and Y
            .translate(100, 50) # Move to new position
)

# Apply transformation
page.add_transformation(transform)
writer.add_page(page)

with open("custom_transform.pdf", "wb") as output_file:
    writer.write(output_file)

Text and Image Extraction

from PyPDF2 import PdfReader

reader = PdfReader("document.pdf")

for page_num, page in enumerate(reader.pages):
    # Extract text
    text = page.extract_text()
    print(f"Page {page_num + 1} text:")
    print(text[:200] + "..." if len(text) > 200 else text)
    
    # Get font information
    font_names, font_subsets = page.get_fonts()
    print(f"Fonts used: {font_names}")
    
    # Get images
    images = page.get_images()
    print(f"Images found: {len(images)}")
    for img_name, img_info in images.items():
        print(f"  - {img_name}: {img_info}")

Creating Blank Pages with Content

from PyPDF2 import PdfWriter, PageObject
from PyPDF2.generic import RectangleObject

writer = PdfWriter()

# Create custom sized blank page (A4: 595 x 842 points)
blank_page = PageObject.create_blank_page(width=595, height=842)

# You can then add content or merge other pages onto it
writer.add_page(blank_page)

# Create US Letter sized page (8.5" x 11" = 612 x 792 points)
letter_page = PageObject.create_blank_page(width=612, height=792)
writer.add_page(letter_page)

with open("blank_pages.pdf", "wb") as output_file:
    writer.write(output_file)

Utility Functions

Page Size Utilities

def set_custom_rtl(_min: int, _max: int, specials: List[int]) -> Tuple[int, int, List[int]]:
    """
    Configure right-to-left text parameters for text extraction.

    Args:
        _min (int): Minimum character code for RTL
        _max (int): Maximum character code for RTL  
        specials (list): Special character codes to handle as RTL

    Returns:
        tuple: Configuration tuple with min, max, and specials
    """

This function helps configure text extraction for right-to-left languages and custom character sets.

Install with Tessl CLI