tessl/pypi-pypdf

A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview

Eval results

Files

Annotations

Name: tessl/pypi-pypdf
Author: tessl

Complete annotation system supporting markup annotations (highlights, text annotations, shapes) and interactive elements (links, popups) with full customization capabilities. pypdf provides comprehensive annotation support for creating interactive PDFs.

Capabilities

Base Annotation Classes

Foundation classes for all annotation types with common properties and methods.

class AnnotationDictionary:
    """Base class for all PDF annotations."""
    
    def __init__(self, **kwargs):
        """
        Initialize annotation with properties.
        
        Args:
            **kwargs: Annotation properties
        """

class NO_FLAGS:
    """Constant for annotations with no flags."""

Markup Annotations

Annotations that mark up document content with visual highlighting, text, and shapes.

class MarkupAnnotation(AnnotationDictionary):
    """Base class for markup annotations."""

class Highlight(MarkupAnnotation):
    """Highlight annotation for marking important text."""
    
    def __init__(
        self,
        rect,
        quad_points,
        highlight_color: str = "ffff00",
        **kwargs
    ):
        """
        Create a highlight annotation.
        
        Args:
            rect: Rectangle defining annotation bounds
            quad_points: Points defining highlighted area
            highlight_color: Highlight color in hex format
            **kwargs: Additional annotation properties
        """

class Text(MarkupAnnotation):
    """Text annotation (sticky note)."""
    
    def __init__(
        self,
        rect,
        text: str,
        icon: str = "Note",
        **kwargs
    ):
        """
        Create a text annotation.
        
        Args:
            rect: Rectangle defining annotation position
            text: Annotation text content
            icon: Icon type ("Note", "Comment", "Key", etc.)
            **kwargs: Additional annotation properties
        """

class FreeText(MarkupAnnotation):
    """Free text annotation for adding text directly to the page."""
    
    def __init__(
        self,
        rect,
        text: str,
        font: str = "Helvetica",
        font_size: float = 12,
        **kwargs
    ):
        """
        Create a free text annotation.
        
        Args:
            rect: Rectangle defining text area
            text: Text content
            font: Font name
            font_size: Font size in points
            **kwargs: Additional annotation properties
        """

class Line(MarkupAnnotation):
    """Line annotation for drawing lines."""
    
    def __init__(
        self,
        p1: tuple,
        p2: tuple,
        line_color: str = "000000",
        line_width: float = 1,
        **kwargs
    ):
        """
        Create a line annotation.
        
        Args:
            p1: Start point (x, y)
            p2: End point (x, y)
            line_color: Line color in hex format
            line_width: Line width in points
            **kwargs: Additional annotation properties
        """

class Rectangle(MarkupAnnotation):
    """Rectangle annotation for drawing rectangles."""
    
    def __init__(
        self,
        rect,
        stroke_color: str = "000000",
        fill_color: str | None = None,
        line_width: float = 1,
        **kwargs
    ):
        """
        Create a rectangle annotation.
        
        Args:
            rect: Rectangle coordinates
            stroke_color: Border color in hex format
            fill_color: Fill color in hex format (None for no fill)
            line_width: Border width in points
            **kwargs: Additional annotation properties
        """

class Ellipse(MarkupAnnotation):
    """Ellipse annotation for drawing ellipses and circles."""
    
    def __init__(
        self,
        rect,
        stroke_color: str = "000000",
        fill_color: str | None = None,
        line_width: float = 1,
        **kwargs
    ):
        """
        Create an ellipse annotation.
        
        Args:
            rect: Bounding rectangle for ellipse
            stroke_color: Border color in hex format
            fill_color: Fill color in hex format (None for no fill)
            line_width: Border width in points
            **kwargs: Additional annotation properties
        """

class Polygon(MarkupAnnotation):
    """Polygon annotation for drawing multi-sided shapes."""
    
    def __init__(
        self,
        vertices: list,
        stroke_color: str = "000000",
        fill_color: str | None = None,
        line_width: float = 1,
        **kwargs
    ):
        """
        Create a polygon annotation.
        
        Args:
            vertices: List of (x, y) coordinates defining polygon vertices
            stroke_color: Border color in hex format
            fill_color: Fill color in hex format (None for no fill)
            line_width: Border width in points
            **kwargs: Additional annotation properties
        """

class PolyLine(MarkupAnnotation):
    """Polyline annotation for drawing connected line segments."""
    
    def __init__(
        self,
        vertices: list,
        line_color: str = "000000",
        line_width: float = 1,
        **kwargs
    ):
        """
        Create a polyline annotation.
        
        Args:
            vertices: List of (x, y) coordinates defining line points
            line_color: Line color in hex format
            line_width: Line width in points
            **kwargs: Additional annotation properties
        """

Interactive Annotations

Non-markup annotations that provide interactive functionality.

class Link:
    """Link annotation for creating clickable links."""
    
    def __init__(
        self,
        rect,
        target,
        **kwargs
    ):
        """
        Create a link annotation.
        
        Args:
            rect: Rectangle defining clickable area
            target: Target URL or internal destination
            **kwargs: Additional annotation properties
        """

class Popup:
    """Popup annotation associated with other annotations."""
    
    def __init__(
        self,
        rect,
        parent,
        **kwargs
    ):
        """
        Create a popup annotation.
        
        Args:
            rect: Rectangle defining popup area
            parent: Parent annotation this popup belongs to
            **kwargs: Additional annotation properties
        """

Usage Examples

Adding Text Annotations

from pypdf import PdfReader, PdfWriter
from pypdf.annotations import Text

reader = PdfReader("document.pdf")
writer = PdfWriter()

# Add text annotation to first page
page = reader.pages[0]

text_annotation = Text(
    rect=(100, 100, 200, 150),  # x1, y1, x2, y2
    text="This is a note about this section",
    icon="Comment"
)

page.annotations.append(text_annotation)
writer.add_page(page)

# Copy remaining pages
for page in reader.pages[1:]:
    writer.add_page(page)

with open("annotated.pdf", "wb") as output:
    writer.write(output)

Adding Highlight Annotations

from pypdf import PdfReader, PdfWriter
from pypdf.annotations import Highlight

reader = PdfReader("document.pdf")
writer = PdfWriter()

page = reader.pages[0]

# Highlight annotation requires quad points defining the highlighted area
# For simplicity, using rectangle coordinates
highlight = Highlight(
    rect=(100, 200, 300, 220),
    quad_points=[(100, 200), (300, 200), (100, 220), (300, 220)],
    highlight_color="ffff00"  # Yellow highlight
)

page.annotations.append(highlight)
writer.add_page(page)

# Copy remaining pages
for page in reader.pages[1:]:
    writer.add_page(page)

with open("highlighted.pdf", "wb") as output:
    writer.write(output)

Adding Shape Annotations

from pypdf import PdfReader, PdfWriter
from pypdf.annotations import Rectangle, Ellipse, Line

reader = PdfReader("document.pdf")
writer = PdfWriter()

page = reader.pages[0]

# Add rectangle
rectangle = Rectangle(
    rect=(50, 50, 150, 100),
    stroke_color="ff0000",  # Red border
    fill_color="ffcccc",    # Light red fill
    line_width=2
)

# Add ellipse
ellipse = Ellipse(
    rect=(200, 200, 300, 250),
    stroke_color="0000ff",  # Blue border
    fill_color="ccccff",    # Light blue fill
    line_width=1.5
)

# Add line
line = Line(
    p1=(100, 300),
    p2=(400, 350),
    line_color="00ff00",    # Green line
    line_width=3
)

# Add all annotations to the page
page.annotations.extend([rectangle, ellipse, line])
writer.add_page(page)

# Copy remaining pages
for page in reader.pages[1:]:
    writer.add_page(page)

with open("shapes.pdf", "wb") as output:
    writer.write(output)

Adding Free Text Annotations

from pypdf import PdfReader, PdfWriter
from pypdf.annotations import FreeText

reader = PdfReader("document.pdf")
writer = PdfWriter()

page = reader.pages[0]

# Add free text annotation
free_text = FreeText(
    rect=(100, 400, 300, 450),
    text="This text appears directly on the page",
    font="Arial",
    font_size=14
)

page.annotations.append(free_text)
writer.add_page(page)

# Copy remaining pages
for page in reader.pages[1:]:
    writer.add_page(page)

with open("free_text.pdf", "wb") as output:
    writer.write(output)

Creating Link Annotations

from pypdf import PdfReader, PdfWriter
from pypdf.annotations import Link

reader = PdfReader("document.pdf")
writer = PdfWriter()

page = reader.pages[0]

# External URL link
url_link = Link(
    rect=(100, 100, 200, 120),
    target="https://example.com"
)

# Internal page link (go to page 2)
page_link = Link(
    rect=(100, 150, 200, 170),
    target={"type": "goto", "page": 1}  # 0-indexed page number
)

page.annotations.extend([url_link, page_link])
writer.add_page(page)

# Copy remaining pages
for page in reader.pages[1:]:
    writer.add_page(page)

with open("links.pdf", "wb") as output:
    writer.write(output)

Complex Polygon Annotation

from pypdf import PdfReader, PdfWriter
from pypdf.annotations import Polygon

reader = PdfReader("document.pdf")
writer = PdfWriter()

page = reader.pages[0]

# Create a pentagon
pentagon_vertices = [
    (200, 300),  # Top point
    (250, 250),  # Top right
    (225, 200),  # Bottom right
    (175, 200),  # Bottom left
    (150, 250)   # Top left
]

pentagon = Polygon(
    vertices=pentagon_vertices,
    stroke_color="800080",  # Purple border
    fill_color="dda0dd",    # Plum fill
    line_width=2
)

page.annotations.append(pentagon)
writer.add_page(page)

# Copy remaining pages
for page in reader.pages[1:]:
    writer.add_page(page)

with open("polygon.pdf", "wb") as output:
    writer.write(output)

Reading Existing Annotations

from pypdf import PdfReader

reader = PdfReader("annotated_document.pdf")

for page_num, page in enumerate(reader.pages):
    print(f"Page {page_num + 1}:")
    
    if page.annotations:
        for i, annotation in enumerate(page.annotations):
            print(f"  Annotation {i + 1}:")
            print(f"    Type: {annotation.get('/Subtype', 'Unknown')}")
            print(f"    Rectangle: {annotation.get('/Rect', 'Not specified')}")
            
            # Check for text content
            if '/Contents' in annotation:
                print(f"    Text: {annotation['/Contents']}")
            
            # Check for appearance
            if '/AP' in annotation:
                print(f"    Has appearance stream")
            
            print()
    else:
        print("  No annotations found")
    print()

Annotation Management

from pypdf import PdfReader, PdfWriter

def remove_annotations(input_pdf: str, output_pdf: str):
    """Remove all annotations from a PDF."""
    reader = PdfReader(input_pdf)
    writer = PdfWriter()
    
    for page in reader.pages:
        # Clear annotations
        if page.annotations:
            page.annotations.clear()
        writer.add_page(page)
    
    with open(output_pdf, "wb") as output:
        writer.write(output)

def filter_annotations_by_type(input_pdf: str, output_pdf: str, keep_types: list):
    """Keep only specific annotation types."""
    reader = PdfReader(input_pdf)
    writer = PdfWriter()
    
    for page in reader.pages:
        if page.annotations:
            # Filter annotations
            filtered_annotations = []
            for annotation in page.annotations:
                annotation_type = annotation.get('/Subtype')
                if annotation_type in keep_types:
                    filtered_annotations.append(annotation)
            
            # Replace annotations with filtered list
            page.annotations.clear()
            page.annotations.extend(filtered_annotations)
        
        writer.add_page(page)
    
    with open(output_pdf, "wb") as output:
        writer.write(output)

# Remove all annotations
remove_annotations("annotated.pdf", "clean.pdf")

# Keep only text and highlight annotations
filter_annotations_by_type(
    "annotated.pdf", 
    "filtered.pdf", 
    ['/Text', '/Highlight']
)

Batch Annotation Processing

from pypdf import PdfReader, PdfWriter
from pypdf.annotations import Text
from pathlib import Path

def add_review_annotations(pdf_directory: str, reviewer_name: str):
    """Add review annotations to all PDFs in a directory."""
    
    for pdf_path in Path(pdf_directory).glob("*.pdf"):
        try:
            reader = PdfReader(str(pdf_path))
            writer = PdfWriter()
            
            # Add review annotation to first page
            if reader.pages:
                first_page = reader.pages[0]
                
                review_note = Text(
                    rect=(50, 750, 150, 800),  # Top-left corner
                    text=f"Reviewed by: {reviewer_name}",
                    icon="Key"
                )
                
                first_page.annotations.append(review_note)
                writer.add_page(first_page)
                
                # Copy remaining pages
                for page in reader.pages[1:]:
                    writer.add_page(page)
                
                # Save with "_reviewed" suffix
                output_path = pdf_path.with_stem(f"{pdf_path.stem}_reviewed")
                with open(output_path, "wb") as output:
                    writer.write(output)
                
                print(f"Added review annotation to {pdf_path.name}")
                
        except Exception as e:
            print(f"Error processing {pdf_path.name}: {e}")

# Add review annotations to all PDFs
add_review_annotations("documents/", "John Reviewer")

Install with Tessl CLI