A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Comprehensive page manipulation capabilities including transformations, merging, cropping, and geometric operations. The PageObject class provides the foundation for all page-level operations in pypdf.
PageObject represents individual PDF pages with complete access to page content, properties, and transformation capabilities.
class PageObject:
@staticmethod
def create_blank_page(width: float, height: float) -> PageObject:
"""
Create a blank page with specified dimensions.
Args:
width: Page width in points
height: Page height in points
Returns:
New blank PageObject
"""
def extract_text(
self,
extraction_mode: str = "plain",
layout_mode_space_vertically: bool = True,
layout_mode_scale_weight: float = 1.25,
layout_mode_strip_rotated: bool = True,
orientations: tuple | int = (0, 90, 180, 270),
space_width: float = 200.0,
visitor_text=None
) -> str:
"""
Extract text from the page.
Args:
extraction_mode: Text extraction mode ("plain" or "layout", default: "plain")
layout_mode_space_vertically: Insert spaces for vertical gaps
layout_mode_scale_weight: Weight for layout scaling
layout_mode_strip_rotated: Strip rotated text
orientations: Text orientations to consider
space_width: Width threshold for spaces
visitor_text: Custom text visitor function
Returns:
Extracted text as string
"""
def scale(self, sx: float, sy: float) -> PageObject:
"""
Scale the page by given factors.
Args:
sx: Horizontal scaling factor
sy: Vertical scaling factor
Returns:
Self for method chaining
"""
def scale_by(self, factor: float) -> PageObject:
"""
Scale the page uniformly by a factor.
Args:
factor: Scaling factor
Returns:
Self for method chaining
"""
def scale_to(self, width: float, height: float) -> PageObject:
"""
Scale the page to specific dimensions.
Args:
width: Target width in points
height: Target height in points
Returns:
Self for method chaining
"""
def rotate(self, angle: int) -> PageObject:
"""
Rotate the page by the given angle.
Args:
angle: Rotation angle in degrees (90, 180, 270, etc.)
Returns:
Self for method chaining
"""
def rotate_clockwise(self, angle: int) -> PageObject:
"""
Rotate the page clockwise.
Args:
angle: Rotation angle in degrees
Returns:
Self for method chaining
"""
def rotate_counter_clockwise(self, angle: int) -> PageObject:
"""
Rotate the page counter-clockwise.
Args:
angle: Rotation angle in degrees
Returns:
Self for method chaining
"""
def transfer_rotation_to_content(self) -> PageObject:
"""
Apply the page's rotation to its content and reset rotation to 0.
Returns:
Self for method chaining
"""
def merge_page(self, page2: PageObject) -> None:
"""
Merge another page's content onto this page.
Args:
page2: PageObject to merge onto this page
"""
def merge_translated_page(self, page2: PageObject, tx: float, ty: float) -> None:
"""
Merge another page with translation offset.
Args:
page2: PageObject to merge
tx: Translation offset in x direction
ty: Translation offset in y direction
"""
def merge_rotated_page(self, page2: PageObject, rotation: float) -> None:
"""
Merge another page with rotation.
Args:
page2: PageObject to merge
rotation: Rotation angle in degrees
"""
def merge_scaled_page(self, page2: PageObject, scale: float, expand: bool = False) -> None:
"""
Merge another page with scaling.
Args:
page2: PageObject to merge
scale: Scaling factor
expand: Whether to expand the page to fit scaled content
"""
def merge_rotated_scaled_page(
self,
page2: PageObject,
rotation: float,
scale: float,
expand: bool = False
) -> None:
"""
Merge another page with rotation and scaling.
Args:
page2: PageObject to merge
rotation: Rotation angle in degrees
scale: Scaling factor
expand: Whether to expand the page to fit transformed content
"""
def merge_transformed_page(
self,
page2: PageObject,
ctm,
expand: bool = False
) -> None:
"""
Merge another page with custom transformation matrix.
Args:
page2: PageObject to merge
ctm: Transformation matrix
expand: Whether to expand the page to fit transformed content
"""
def add_transformation(self, ctm) -> None:
"""
Apply a transformation matrix to the page.
Args:
ctm: Transformation matrix
"""
### Page Box Properties
Access and modify PDF page boundaries and dimensions through five different box types, each serving specific purposes in the PDF specification.
```python { .api }
# Box Properties (all return RectangleObject)
@property
def mediabox(self) -> RectangleObject:
"""
The boundaries of the physical medium on which the page is intended
to be displayed or printed. This is the largest box and defines the
overall page size.
"""
@property
def cropbox(self) -> RectangleObject:
"""
The visible region of default user space. When displayed or printed,
contents outside this box are clipped. Falls back to mediabox if not set.
"""
@property
def bleedbox(self) -> RectangleObject:
"""
The region to which contents should be clipped when output in a
production environment. Used for printing with bleed margins.
Falls back to cropbox, then mediabox if not set.
"""
@property
def trimbox(self) -> RectangleObject:
"""
The intended dimensions of the finished page after trimming.
Falls back to cropbox, then mediabox if not set.
"""
@property
def artbox(self) -> RectangleObject:
"""
The extent of the page's meaningful content as intended by the
page's creator. Falls back to cropbox, then mediabox if not set.
"""
# RectangleObject Properties and Methods
class RectangleObject:
# Individual coordinates (read/write)
@property
def left(self) -> FloatObject: ...
@property
def bottom(self) -> FloatObject: ...
@property
def right(self) -> FloatObject: ...
@property
def top(self) -> FloatObject: ...
# Corner positions (read/write)
@property
def lower_left(self) -> tuple[float, float]: ...
@property
def lower_right(self) -> tuple[float, float]: ...
@property
def upper_left(self) -> tuple[float, float]: ...
@property
def upper_right(self) -> tuple[float, float]: ...
# Dimensions (read-only)
@property
def width(self) -> float: ...
@property
def height(self) -> float: ...
def scale(self, sx: float, sy: float) -> RectangleObject:
"""
Create a new scaled rectangle.
Args:
sx: Horizontal scale factor
sy: Vertical scale factor
Returns:
New scaled RectangleObject
"""@property
def rotation(self) -> int:
"""Get the page rotation angle in degrees."""
@property
def user_unit(self) -> float:
"""Get the user unit scale factor."""
@property
def images(self):
"""Get images on the page."""
@property
def page_number(self) -> int | None:
"""Get the page number in the document."""
@property
def annotations(self):
"""Get page annotations."""
@property
def mediabox(self):
"""Get the media box (page boundaries)."""
@property
def cropbox(self):
"""Get the crop box (visible page area)."""
@property
def bleedbox(self):
"""Get the bleed box (printable area with bleed)."""
@property
def trimbox(self):
"""Get the trim box (final trimmed page size)."""
@property
def artbox(self):
"""Get the art box (meaningful content area)."""### Transformation Matrix
The Transformation class provides a convenient interface for creating and combining geometric transformations.
```python { .api }
class Transformation:
def __init__(self, ctm=(1, 0, 0, 1, 0, 0)):
"""
Initialize a transformation matrix.
Args:
ctm: 6-element transformation matrix tuple (a, b, c, d, e, f)
"""
def translate(self, tx: float = 0, ty: float = 0) -> Transformation:
"""
Add translation to the transformation.
Args:
tx: Translation in x direction
ty: Translation in y direction
Returns:
Self for method chaining
"""
def scale(self, sx: float = 1, sy: float | None = None) -> Transformation:
"""
Add scaling to the transformation.
Args:
sx: Horizontal scaling factor
sy: Vertical scaling factor (defaults to sx)
Returns:
Self for method chaining
"""
def rotate(self, rotation: float) -> Transformation:
"""
Add rotation to the transformation.
Args:
rotation: Rotation angle in degrees
Returns:
Self for method chaining
"""
def transform(self, m) -> Transformation:
"""
Apply another transformation matrix.
Args:
m: Transformation matrix to apply
Returns:
Self for method chaining
"""
def apply_on(self, pt, as_object: bool = False):
"""
Apply the transformation to a point.
Args:
pt: Point coordinates
as_object: Return as object instead of tuple
Returns:
Transformed point coordinates
"""
@property
def matrix(self):
"""Get the transformation matrix."""from pypdf import PdfReader, PdfWriter
reader = PdfReader("input.pdf")
writer = PdfWriter()
for page in reader.pages:
# Scale page to 150%
page.scale_by(1.5)
# Rotate page 90 degrees clockwise
page.rotate_clockwise(90)
writer.add_page(page)
with open("transformed.pdf", "wb") as output:
writer.write(output)from pypdf import PdfReader, PdfWriter
reader = PdfReader("document.pdf")
overlay = PdfReader("watermark.pdf")
writer = PdfWriter()
for page in reader.pages:
# Merge watermark onto each page
page.merge_page(overlay.pages[0])
writer.add_page(page)
with open("watermarked.pdf", "wb") as output:
writer.write(output)from pypdf import PdfReader, PdfWriter, Transformation
reader = PdfReader("input.pdf")
writer = PdfWriter()
# Create complex transformation
transform = Transformation()
transform.translate(100, 50) # Move 100 points right, 50 up
transform.scale(0.8, 1.2) # Scale 80% horizontally, 120% vertically
transform.rotate(15) # Rotate 15 degrees
for page in reader.pages:
# Apply transformation matrix
page.add_transformation(transform.matrix)
writer.add_page(page)
with open("complex_transform.pdf", "wb") as output:
writer.write(output)from pypdf import PdfWriter, PageObject, PaperSize
writer = PdfWriter()
# Create pages with different sizes
letter_page = PageObject.create_blank_page(612, 792) # Letter size
a4_page = PageObject.create_blank_page(*PaperSize.A4) # A4 size
writer.add_page(letter_page)
writer.add_page(a4_page)
with open("blank_pages.pdf", "wb") as output:
writer.write(output)from pypdf import PdfReader, PdfWriter
reader = PdfReader("input.pdf")
writer = PdfWriter()
for page in reader.pages:
# Get current page boundaries
media_box = page.mediabox
# Create crop box (crop 50 points from each side)
crop_box = [
media_box.left + 50,
media_box.bottom + 50,
media_box.right - 50,
media_box.top - 50
]
# Apply crop box
page.cropbox = crop_box
writer.add_page(page)
with open("cropped.pdf", "wb") as output:
writer.write(output)from pypdf import PdfReader, PdfWriter
base_doc = PdfReader("base.pdf")
overlay_doc = PdfReader("overlay.pdf")
writer = PdfWriter()
for i, page in enumerate(base_doc.pages):
# Use different overlay pages if available
overlay_index = i % len(overlay_doc.pages)
overlay_page = overlay_doc.pages[overlay_index]
# Scale overlay to fit page
page_width = float(page.mediabox.width)
page_height = float(page.mediabox.height)
overlay_width = float(overlay_page.mediabox.width)
overlay_height = float(overlay_page.mediabox.height)
scale_x = page_width / overlay_width
scale_y = page_height / overlay_height
scale = min(scale_x, scale_y)
overlay_page.scale_by(scale)
page.merge_page(overlay_page)
writer.add_page(page)
with open("multi_overlay.pdf", "wb") as output:
writer.write(output)Install with Tessl CLI
npx tessl i tessl/pypi-pypdf