A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Complete annotation system supporting markup annotations (highlights, text annotations, shapes) and interactive elements (links, popups) with full customization capabilities. pypdf provides comprehensive annotation support for creating interactive PDFs.
Foundation classes for all annotation types with common properties and methods.
class AnnotationDictionary:
"""Base class for all PDF annotations."""
def __init__(self, **kwargs):
"""
Initialize annotation with properties.
Args:
**kwargs: Annotation properties
"""
class NO_FLAGS:
"""Constant for annotations with no flags."""Annotations that mark up document content with visual highlighting, text, and shapes.
class MarkupAnnotation(AnnotationDictionary):
"""Base class for markup annotations."""
class Highlight(MarkupAnnotation):
"""Highlight annotation for marking important text."""
def __init__(
self,
rect,
quad_points,
highlight_color: str = "ffff00",
**kwargs
):
"""
Create a highlight annotation.
Args:
rect: Rectangle defining annotation bounds
quad_points: Points defining highlighted area
highlight_color: Highlight color in hex format
**kwargs: Additional annotation properties
"""
class Text(MarkupAnnotation):
"""Text annotation (sticky note)."""
def __init__(
self,
rect,
text: str,
icon: str = "Note",
**kwargs
):
"""
Create a text annotation.
Args:
rect: Rectangle defining annotation position
text: Annotation text content
icon: Icon type ("Note", "Comment", "Key", etc.)
**kwargs: Additional annotation properties
"""
class FreeText(MarkupAnnotation):
"""Free text annotation for adding text directly to the page."""
def __init__(
self,
rect,
text: str,
font: str = "Helvetica",
font_size: float = 12,
**kwargs
):
"""
Create a free text annotation.
Args:
rect: Rectangle defining text area
text: Text content
font: Font name
font_size: Font size in points
**kwargs: Additional annotation properties
"""
class Line(MarkupAnnotation):
"""Line annotation for drawing lines."""
def __init__(
self,
p1: tuple,
p2: tuple,
line_color: str = "000000",
line_width: float = 1,
**kwargs
):
"""
Create a line annotation.
Args:
p1: Start point (x, y)
p2: End point (x, y)
line_color: Line color in hex format
line_width: Line width in points
**kwargs: Additional annotation properties
"""
class Rectangle(MarkupAnnotation):
"""Rectangle annotation for drawing rectangles."""
def __init__(
self,
rect,
stroke_color: str = "000000",
fill_color: str | None = None,
line_width: float = 1,
**kwargs
):
"""
Create a rectangle annotation.
Args:
rect: Rectangle coordinates
stroke_color: Border color in hex format
fill_color: Fill color in hex format (None for no fill)
line_width: Border width in points
**kwargs: Additional annotation properties
"""
class Ellipse(MarkupAnnotation):
"""Ellipse annotation for drawing ellipses and circles."""
def __init__(
self,
rect,
stroke_color: str = "000000",
fill_color: str | None = None,
line_width: float = 1,
**kwargs
):
"""
Create an ellipse annotation.
Args:
rect: Bounding rectangle for ellipse
stroke_color: Border color in hex format
fill_color: Fill color in hex format (None for no fill)
line_width: Border width in points
**kwargs: Additional annotation properties
"""
class Polygon(MarkupAnnotation):
"""Polygon annotation for drawing multi-sided shapes."""
def __init__(
self,
vertices: list,
stroke_color: str = "000000",
fill_color: str | None = None,
line_width: float = 1,
**kwargs
):
"""
Create a polygon annotation.
Args:
vertices: List of (x, y) coordinates defining polygon vertices
stroke_color: Border color in hex format
fill_color: Fill color in hex format (None for no fill)
line_width: Border width in points
**kwargs: Additional annotation properties
"""
class PolyLine(MarkupAnnotation):
"""Polyline annotation for drawing connected line segments."""
def __init__(
self,
vertices: list,
line_color: str = "000000",
line_width: float = 1,
**kwargs
):
"""
Create a polyline annotation.
Args:
vertices: List of (x, y) coordinates defining line points
line_color: Line color in hex format
line_width: Line width in points
**kwargs: Additional annotation properties
"""Non-markup annotations that provide interactive functionality.
class Link:
"""Link annotation for creating clickable links."""
def __init__(
self,
rect,
target,
**kwargs
):
"""
Create a link annotation.
Args:
rect: Rectangle defining clickable area
target: Target URL or internal destination
**kwargs: Additional annotation properties
"""
class Popup:
"""Popup annotation associated with other annotations."""
def __init__(
self,
rect,
parent,
**kwargs
):
"""
Create a popup annotation.
Args:
rect: Rectangle defining popup area
parent: Parent annotation this popup belongs to
**kwargs: Additional annotation properties
"""from pypdf import PdfReader, PdfWriter
from pypdf.annotations import Text
reader = PdfReader("document.pdf")
writer = PdfWriter()
# Add text annotation to first page
page = reader.pages[0]
text_annotation = Text(
rect=(100, 100, 200, 150), # x1, y1, x2, y2
text="This is a note about this section",
icon="Comment"
)
page.annotations.append(text_annotation)
writer.add_page(page)
# Copy remaining pages
for page in reader.pages[1:]:
writer.add_page(page)
with open("annotated.pdf", "wb") as output:
writer.write(output)from pypdf import PdfReader, PdfWriter
from pypdf.annotations import Highlight
reader = PdfReader("document.pdf")
writer = PdfWriter()
page = reader.pages[0]
# Highlight annotation requires quad points defining the highlighted area
# For simplicity, using rectangle coordinates
highlight = Highlight(
rect=(100, 200, 300, 220),
quad_points=[(100, 200), (300, 200), (100, 220), (300, 220)],
highlight_color="ffff00" # Yellow highlight
)
page.annotations.append(highlight)
writer.add_page(page)
# Copy remaining pages
for page in reader.pages[1:]:
writer.add_page(page)
with open("highlighted.pdf", "wb") as output:
writer.write(output)from pypdf import PdfReader, PdfWriter
from pypdf.annotations import Rectangle, Ellipse, Line
reader = PdfReader("document.pdf")
writer = PdfWriter()
page = reader.pages[0]
# Add rectangle
rectangle = Rectangle(
rect=(50, 50, 150, 100),
stroke_color="ff0000", # Red border
fill_color="ffcccc", # Light red fill
line_width=2
)
# Add ellipse
ellipse = Ellipse(
rect=(200, 200, 300, 250),
stroke_color="0000ff", # Blue border
fill_color="ccccff", # Light blue fill
line_width=1.5
)
# Add line
line = Line(
p1=(100, 300),
p2=(400, 350),
line_color="00ff00", # Green line
line_width=3
)
# Add all annotations to the page
page.annotations.extend([rectangle, ellipse, line])
writer.add_page(page)
# Copy remaining pages
for page in reader.pages[1:]:
writer.add_page(page)
with open("shapes.pdf", "wb") as output:
writer.write(output)from pypdf import PdfReader, PdfWriter
from pypdf.annotations import FreeText
reader = PdfReader("document.pdf")
writer = PdfWriter()
page = reader.pages[0]
# Add free text annotation
free_text = FreeText(
rect=(100, 400, 300, 450),
text="This text appears directly on the page",
font="Arial",
font_size=14
)
page.annotations.append(free_text)
writer.add_page(page)
# Copy remaining pages
for page in reader.pages[1:]:
writer.add_page(page)
with open("free_text.pdf", "wb") as output:
writer.write(output)from pypdf import PdfReader, PdfWriter
from pypdf.annotations import Link
reader = PdfReader("document.pdf")
writer = PdfWriter()
page = reader.pages[0]
# External URL link
url_link = Link(
rect=(100, 100, 200, 120),
target="https://example.com"
)
# Internal page link (go to page 2)
page_link = Link(
rect=(100, 150, 200, 170),
target={"type": "goto", "page": 1} # 0-indexed page number
)
page.annotations.extend([url_link, page_link])
writer.add_page(page)
# Copy remaining pages
for page in reader.pages[1:]:
writer.add_page(page)
with open("links.pdf", "wb") as output:
writer.write(output)from pypdf import PdfReader, PdfWriter
from pypdf.annotations import Polygon
reader = PdfReader("document.pdf")
writer = PdfWriter()
page = reader.pages[0]
# Create a pentagon
pentagon_vertices = [
(200, 300), # Top point
(250, 250), # Top right
(225, 200), # Bottom right
(175, 200), # Bottom left
(150, 250) # Top left
]
pentagon = Polygon(
vertices=pentagon_vertices,
stroke_color="800080", # Purple border
fill_color="dda0dd", # Plum fill
line_width=2
)
page.annotations.append(pentagon)
writer.add_page(page)
# Copy remaining pages
for page in reader.pages[1:]:
writer.add_page(page)
with open("polygon.pdf", "wb") as output:
writer.write(output)from pypdf import PdfReader
reader = PdfReader("annotated_document.pdf")
for page_num, page in enumerate(reader.pages):
print(f"Page {page_num + 1}:")
if page.annotations:
for i, annotation in enumerate(page.annotations):
print(f" Annotation {i + 1}:")
print(f" Type: {annotation.get('/Subtype', 'Unknown')}")
print(f" Rectangle: {annotation.get('/Rect', 'Not specified')}")
# Check for text content
if '/Contents' in annotation:
print(f" Text: {annotation['/Contents']}")
# Check for appearance
if '/AP' in annotation:
print(f" Has appearance stream")
print()
else:
print(" No annotations found")
print()from pypdf import PdfReader, PdfWriter
def remove_annotations(input_pdf: str, output_pdf: str):
"""Remove all annotations from a PDF."""
reader = PdfReader(input_pdf)
writer = PdfWriter()
for page in reader.pages:
# Clear annotations
if page.annotations:
page.annotations.clear()
writer.add_page(page)
with open(output_pdf, "wb") as output:
writer.write(output)
def filter_annotations_by_type(input_pdf: str, output_pdf: str, keep_types: list):
"""Keep only specific annotation types."""
reader = PdfReader(input_pdf)
writer = PdfWriter()
for page in reader.pages:
if page.annotations:
# Filter annotations
filtered_annotations = []
for annotation in page.annotations:
annotation_type = annotation.get('/Subtype')
if annotation_type in keep_types:
filtered_annotations.append(annotation)
# Replace annotations with filtered list
page.annotations.clear()
page.annotations.extend(filtered_annotations)
writer.add_page(page)
with open(output_pdf, "wb") as output:
writer.write(output)
# Remove all annotations
remove_annotations("annotated.pdf", "clean.pdf")
# Keep only text and highlight annotations
filter_annotations_by_type(
"annotated.pdf",
"filtered.pdf",
['/Text', '/Highlight']
)from pypdf import PdfReader, PdfWriter
from pypdf.annotations import Text
from pathlib import Path
def add_review_annotations(pdf_directory: str, reviewer_name: str):
"""Add review annotations to all PDFs in a directory."""
for pdf_path in Path(pdf_directory).glob("*.pdf"):
try:
reader = PdfReader(str(pdf_path))
writer = PdfWriter()
# Add review annotation to first page
if reader.pages:
first_page = reader.pages[0]
review_note = Text(
rect=(50, 750, 150, 800), # Top-left corner
text=f"Reviewed by: {reviewer_name}",
icon="Key"
)
first_page.annotations.append(review_note)
writer.add_page(first_page)
# Copy remaining pages
for page in reader.pages[1:]:
writer.add_page(page)
# Save with "_reviewed" suffix
output_path = pdf_path.with_stem(f"{pdf_path.stem}_reviewed")
with open(output_path, "wb") as output:
writer.write(output)
print(f"Added review annotation to {pdf_path.name}")
except Exception as e:
print(f"Error processing {pdf_path.name}: {e}")
# Add review annotations to all PDFs
add_review_annotations("documents/", "John Reviewer")Install with Tessl CLI
npx tessl i tessl/pypi-pypdf