CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-pymupdf

High performance Python library for data extraction, analysis, conversion & manipulation of PDF and other documents.

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

annotations-forms.mddocs/

Annotations and Forms

Comprehensive annotation handling and interactive forms support for PDF documents. PyMuPDF provides complete control over PDF annotations including creation, modification, deletion, and rendering of various annotation types.

Capabilities

Annotation Management

Core annotation operations for working with PDF annotations.

class Page:
    def first_annot(self) -> Annot:
        """
        Get first annotation on page.
        
        Returns:
        First Annot object or None if no annotations
        """
    
    def load_annot(self, ident: typing.Union[str, int]) -> Annot:
        """
        Load specific annotation by identifier.
        
        Parameters:
        - ident: annotation identifier (xref number or unique name)
        
        Returns:
        Annot object
        """
    
    def annots(self, types: list = None) -> list:
        """
        Get list of annotations on page.
        
        Parameters:
        - types: filter by annotation types (list of integers)
        
        Returns:
        List of Annot objects
        """
    
    def annot_names(self) -> list:
        """
        Get list of annotation names on page.
        
        Returns:
        List of annotation unique names
        """
    
    def add_text_annot(self, point: Point, text: str, icon: str = "Note") -> Annot:
        """
        Add text annotation.
        
        Parameters:
        - point: annotation position
        - text: annotation content
        - icon: icon name ("Note", "Comment", "Key", "Help", etc.)
        
        Returns:
        New Annot object
        """
    
    def add_highlight_annot(self, quads: typing.Union[Quad, list]) -> Annot:
        """
        Add highlight annotation.
        
        Parameters:
        - quads: Quad object or list of Quad objects to highlight
        
        Returns:
        New Annot object
        """
    
    def add_underline_annot(self, quads: typing.Union[Quad, list]) -> Annot:
        """
        Add underline annotation.
        
        Parameters:
        - quads: Quad object or list of Quad objects to underline
        
        Returns:
        New Annot object
        """
    
    def add_strikeout_annot(self, quads: typing.Union[Quad, list]) -> Annot:
        """
        Add strikeout annotation.
        
        Parameters:
        - quads: Quad object or list of Quad objects to strike out
        
        Returns:
        New Annot object
        """
    
    def add_squiggly_annot(self, quads: typing.Union[Quad, list]) -> Annot:
        """
        Add squiggly underline annotation.
        
        Parameters:
        - quads: Quad object or list of Quad objects for squiggly underline
        
        Returns:
        New Annot object
        """
    
    def add_rect_annot(self, rect: Rect) -> Annot:
        """
        Add rectangle annotation.
        
        Parameters:
        - rect: rectangle coordinates
        
        Returns:
        New Annot object
        """
    
    def add_circle_annot(self, rect: Rect) -> Annot:
        """
        Add circle annotation.
        
        Parameters:
        - rect: bounding rectangle for circle
        
        Returns:
        New Annot object
        """
    
    def add_line_annot(self, p1: Point, p2: Point) -> Annot:
        """
        Add line annotation.
        
        Parameters:
        - p1: start point
        - p2: end point
        
        Returns:
        New Annot object
        """
    
    def add_polyline_annot(self, points: list) -> Annot:
        """
        Add polyline annotation.
        
        Parameters:
        - points: list of Point objects
        
        Returns:
        New Annot object
        """
    
    def add_polygon_annot(self, points: list) -> Annot:
        """
        Add polygon annotation.
        
        Parameters:
        - points: list of Point objects
        
        Returns:
        New Annot object
        """
    
    def add_freetext_annot(self, rect: Rect, text: str, **kwargs) -> Annot:
        """
        Add free text annotation.
        
        Parameters:
        - rect: annotation rectangle
        - text: text content
        - fontsize: font size
        - fontname: font name
        - text_color: text color
        - fill_color: background color
        - align: text alignment (0=left, 1=center, 2=right)
        
        Returns:
        New Annot object
        """
    
    def add_ink_annot(self, handwriting: list) -> Annot:
        """
        Add ink annotation (freehand drawing).
        
        Parameters:
        - handwriting: list of lists of Point objects (strokes)
        
        Returns:
        New Annot object
        """
    
    def add_stamp_annot(self, rect: Rect, stamp: int = 0) -> Annot:
        """
        Add stamp annotation.
        
        Parameters:
        - rect: stamp rectangle
        - stamp: stamp type (0-13 for predefined stamps)
        
        Returns:
        New Annot object
        """

Annotation Class

Individual annotation object with comprehensive manipulation capabilities.

class Annot:
    def set_info(self, content: str = None, title: str = None, 
                 creationDate: str = None, modDate: str = None, 
                 subject: str = None) -> None:
        """
        Set annotation information.
        
        Parameters:
        - content: annotation content/text
        - title: annotation title/author
        - creationDate: creation date string
        - modDate: modification date string
        - subject: annotation subject
        """
    
    def get_info(self) -> dict:
        """
        Get annotation information.
        
        Returns:
        Dictionary with content, title, creationDate, modDate, subject
        """
    
    def set_rect(self, rect: Rect) -> None:
        """
        Set annotation rectangle.
        
        Parameters:
        - rect: new annotation rectangle
        """
    
    def set_colors(self, colors: dict = None) -> None:
        """
        Set annotation colors.
        
        Parameters:
        - colors: dictionary with 'stroke' and/or 'fill' color lists
        """
    
    def set_border(self, border: dict = None) -> None:
        """
        Set annotation border properties.
        
        Parameters:
        - border: dictionary with 'width', 'style', 'dashes' keys
        """
    
    def set_flags(self, flags: int) -> None:
        """
        Set annotation flags.
        
        Parameters:
        - flags: annotation flags (bitwise combination)
        """
    
    def set_oc(self, xref: int) -> None:
        """
        Set optional content (layer) reference.
        
        Parameters:
        - xref: optional content group xref
        """
    
    def update(self, opacity: float = -1, blend_mode: str = None, 
              fontsize: float = 0, text_color: list = None,
              border_color: list = None, fill_color: list = None) -> None:
        """
        Update annotation appearance.
        
        Parameters:
        - opacity: annotation opacity (0-1)
        - blend_mode: PDF blend mode
        - fontsize: font size for text annotations
        - text_color: text color as RGB list
        - border_color: border color as RGB list  
        - fill_color: fill color as RGB list
        """
    
    def delete(self) -> None:
        """Delete annotation from page."""
    
    def get_pixmap(self, matrix: Matrix = None, colorspace: Colorspace = None, 
                   alpha: bool = False) -> Pixmap:
        """
        Render annotation to pixmap.
        
        Parameters:
        - matrix: transformation matrix
        - colorspace: target color space
        - alpha: include alpha channel
        
        Returns:
        Pixmap with annotation rendering
        """
    
    def get_sound(self) -> dict:
        """
        Get sound annotation data.
        
        Returns:
        Dictionary with sound properties
        """
    
    def get_file(self) -> bytes:
        """
        Get file attachment annotation data.
        
        Returns:
        File data as bytes
        """
    
    def set_name(self, name: str) -> None:
        """
        Set annotation unique name.
        
        Parameters:
        - name: unique annotation name
        """
    
    @property
    def type(self) -> list:
        """Annotation type as [type_number, type_string]."""
    
    @property
    def rect(self) -> Rect:
        """Annotation rectangle."""
    
    @property
    def next(self) -> Annot:
        """Next annotation on page."""
    
    @property
    def xref(self) -> int:
        """Annotation xref number."""
    
    @property
    def parent(self) -> Page:
        """Parent page object."""
    
    @property
    def flags(self) -> int:
        """Annotation flags."""
    
    @property
    def line_ends(self) -> list:
        """Line ending styles for line annotations."""
    
    @property
    def vertices(self) -> list:
        """Vertices for polygon/polyline annotations."""
    
    @property
    def colors(self) -> dict:
        """Annotation colors dictionary."""
    
    @property
    def border(self) -> dict:
        """Annotation border properties."""

Form Field Operations

Handle interactive PDF forms and form fields.

class Page:
    def first_widget(self) -> Widget:
        """
        Get first form widget on page.
        
        Returns:
        First Widget object or None
        """
    
    def load_widget(self, xref: int) -> Widget:
        """
        Load widget by xref number.
        
        Parameters:
        - xref: widget xref number
        
        Returns:
        Widget object
        """

Widget Class

Interactive form field representation.

class Widget:
    def field_name(self) -> str:
        """
        Get field name.
        
        Returns:
        Form field name
        """
    
    def field_value(self) -> typing.Any:
        """
        Get field value.
        
        Returns:
        Current field value
        """
    
    def field_type(self) -> int:
        """
        Get field type.
        
        Returns:
        Field type number
        """
    
    def field_type_string(self) -> str:
        """
        Get field type as string.
        
        Returns:
        Field type string ("Text", "Button", "Choice", etc.)
        """
    
    def field_flags(self) -> int:
        """
        Get field flags.
        
        Returns:
        Field flags bitfield
        """
    
    def field_display(self) -> int:
        """
        Get field display mode.
        
        Returns:
        Display mode (0=visible, 1=hidden, 2=no print, 3=no view)
        """
    
    def set_field_value(self, value: typing.Any, ignore_limits: bool = False) -> bool:
        """
        Set field value.
        
        Parameters:
        - value: new field value
        - ignore_limits: ignore field validation limits
        
        Returns:
        True if value was set successfully
        """
    
    def reset_field(self) -> None:
        """Reset field to default value."""
    
    def update(self) -> None:
        """Update widget appearance."""
    
    @property
    def rect(self) -> Rect:
        """Widget rectangle."""
    
    @property
    def xref(self) -> int:
        """Widget xref number."""
    
    @property
    def parent(self) -> Page:
        """Parent page object."""
    
    @property
    def next(self) -> Widget:
        """Next widget on page."""

Redaction Operations

Handle content redaction (permanent removal).

class Page:
    def add_redact_annot(self, rect: Rect, text: str = "", 
                        fill: list = None, text_color: list = None,
                        cross_out: bool = True, **kwargs) -> Annot:
        """
        Add redaction annotation.
        
        Parameters:
        - rect: area to redact
        - text: replacement text (optional)
        - fill: fill color for redacted area
        - text_color: replacement text color
        - cross_out: draw diagonal lines over area
        - fontname: font for replacement text
        - fontsize: font size for replacement text
        - align: text alignment (0=left, 1=center, 2=right)
        
        Returns:
        New redaction Annot object
        """
    
    def apply_redactions(self, images: int = 2, graphics: int = 2, 
                        text: int = 2) -> bool:
        """
        Apply all redaction annotations on page.
        
        Parameters:
        - images: how to handle images (0=ignore, 1=remove if overlapping, 2=remove if any overlap)
        - graphics: how to handle graphics (0=ignore, 1=remove if overlapping, 2=remove if any overlap)  
        - text: how to handle text (0=ignore, 1=remove if overlapping, 2=remove if any overlap)
        
        Returns:
        True if redactions were applied
        """
    
    def get_redactions(self) -> list:
        """
        Get list of redaction annotations.
        
        Returns:
        List of redaction Annot objects
        """

Usage Examples

Basic Annotation Operations

import pymupdf

doc = pymupdf.open("document.pdf")
page = doc.load_page(0)

# Add text annotation
point = pymupdf.Point(100, 100)
annot = page.add_text_annot(point, "This is a note", icon="Comment")
annot.set_info(title="Author Name", subject="Review Comment")
annot.update()

# Add highlight annotation  
rect = pymupdf.Rect(100, 200, 300, 220)
quad = rect.quad
highlight = page.add_highlight_annot(quad)
highlight.set_colors({"stroke": [1, 1, 0]})  # Yellow highlight
highlight.update()

# Save document with annotations
doc.save("annotated_document.pdf")
doc.close()

Working with Existing Annotations

import pymupdf

doc = pymupdf.open("annotated_document.pdf")
page = doc.load_page(0)

# Iterate through all annotations
for annot in page.annots():
    info = annot.get_info()
    print(f"Type: {annot.type[1]}")
    print(f"Content: {info['content']}")
    print(f"Author: {info['title']}")
    print(f"Rectangle: {annot.rect}")
    
    # Modify annotation
    if annot.type[1] == "Text":
        annot.set_info(content="Updated content")
        annot.update()

# Remove all highlight annotations
for annot in page.annots():
    if annot.type[1] == "Highlight":
        annot.delete()

doc.save("modified_annotations.pdf")
doc.close()

Advanced Annotation Creation

import pymupdf

doc = pymupdf.open("document.pdf")
page = doc.load_page(0)

# Add free text annotation with formatting
rect = pymupdf.Rect(100, 100, 400, 150)
freetext = page.add_freetext_annot(
    rect, 
    "This is formatted text",
    fontsize=12,
    fontname="Arial",
    text_color=[0, 0, 1],  # Blue text
    fill_color=[1, 1, 0.8],  # Light yellow background
    align=1  # Center aligned
)
freetext.update()

# Add ink annotation (freehand drawing)
strokes = [
    [pymupdf.Point(200, 200), pymupdf.Point(250, 180), pymupdf.Point(300, 200)],
    [pymupdf.Point(200, 220), pymupdf.Point(250, 240), pymupdf.Point(300, 220)]
]
ink = page.add_ink_annot(strokes)
ink.set_colors({"stroke": [1, 0, 0]})  # Red ink
ink.set_border({"width": 2})
ink.update()

# Add stamp annotation
stamp_rect = pymupdf.Rect(400, 400, 500, 450)
stamp = page.add_stamp_annot(stamp_rect, stamp=5)  # "APPROVED" stamp
stamp.update()

doc.save("advanced_annotations.pdf")
doc.close()

Form Field Manipulation

import pymupdf

doc = pymupdf.open("form_document.pdf")

# Iterate through all form fields
for page_num in range(doc.page_count):
    page = doc.load_page(page_num)
    
    widget = page.first_widget()
    while widget:
        field_name = widget.field_name()
        field_type = widget.field_type_string()
        current_value = widget.field_value()
        
        print(f"Field: {field_name}, Type: {field_type}, Value: {current_value}")
        
        # Set field values based on name
        if field_name == "Name":
            widget.set_field_value("John Doe")
        elif field_name == "Email":
            widget.set_field_value("john.doe@example.com")
        elif field_name == "Subscribe" and field_type == "CheckBox":
            widget.set_field_value(True)
        
        widget.update()
        widget = widget.next

# Save filled form
doc.save("filled_form.pdf")
doc.close()

Content Redaction

import pymupdf

doc = pymupdf.open("sensitive_document.pdf")
page = doc.load_page(0)

# Search for sensitive information
sensitive_terms = ["SSN", "Social Security", "confidential"]

for term in sensitive_terms:
    text_instances = page.search_for(term)
    for inst in text_instances:
        # Add redaction annotation
        redact = page.add_redact_annot(
            inst, 
            text="[REDACTED]",
            fill=[0, 0, 0],  # Black fill
            text_color=[1, 1, 1],  # White text
            cross_out=True
        )

# Apply all redaction annotations
page.apply_redactions()

# Save redacted document
doc.save("redacted_document.pdf")
doc.close()

Annotation Export and Import

import pymupdf
import json

def export_annotations(doc_path: str) -> dict:
    """Export all annotations to a dictionary."""
    doc = pymupdf.open(doc_path)
    annotations = {}
    
    for page_num in range(doc.page_count):
        page = doc.load_page(page_num)
        page_annots = []
        
        for annot in page.annots():
            annot_data = {
                "type": annot.type,
                "rect": list(annot.rect),
                "info": annot.get_info(),
                "colors": annot.colors,
                "border": annot.border
            }
            page_annots.append(annot_data)
        
        if page_annots:
            annotations[page_num] = page_annots
    
    doc.close()
    return annotations

def import_annotations(doc_path: str, annotations: dict, output_path: str):
    """Import annotations from dictionary to document."""
    doc = pymupdf.open(doc_path)
    
    for page_num, page_annots in annotations.items():
        page = doc.load_page(int(page_num))
        
        for annot_data in page_annots:
            rect = pymupdf.Rect(annot_data["rect"])
            
            # Create annotation based on type
            if annot_data["type"][1] == "Text":
                annot = page.add_text_annot(rect.tl, annot_data["info"]["content"])
            elif annot_data["type"][1] == "Highlight":
                annot = page.add_highlight_annot(rect.quad)
            # ... handle other types
            
            # Apply properties
            annot.set_info(**annot_data["info"])
            if annot_data["colors"]:
                annot.set_colors(annot_data["colors"])
            if annot_data["border"]:
                annot.set_border(annot_data["border"])
            annot.update()
    
    doc.save(output_path)
    doc.close()

# Usage
annotations = export_annotations("source.pdf")
with open("annotations.json", "w") as f:
    json.dump(annotations, f, indent=2)

# Later, import to another document
with open("annotations.json", "r") as f:
    annotations = json.load(f)
import_annotations("target.pdf", annotations, "target_with_annotations.pdf")

Install with Tessl CLI

npx tessl i tessl/pypi-pymupdf

docs

annotations-forms.md

document-creation-modification.md

document-operations.md

document-rendering.md

geometry-transformations.md

index.md

page-content-extraction.md

table-extraction.md

tile.json