High performance Python library for data extraction, analysis, conversion & manipulation of PDF and other documents.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Comprehensive annotation handling and interactive forms support for PDF documents. PyMuPDF provides complete control over PDF annotations including creation, modification, deletion, and rendering of various annotation types.
Core annotation operations for working with PDF annotations.
class Page:
def first_annot(self) -> Annot:
"""
Get first annotation on page.
Returns:
First Annot object or None if no annotations
"""
def load_annot(self, ident: typing.Union[str, int]) -> Annot:
"""
Load specific annotation by identifier.
Parameters:
- ident: annotation identifier (xref number or unique name)
Returns:
Annot object
"""
def annots(self, types: list = None) -> list:
"""
Get list of annotations on page.
Parameters:
- types: filter by annotation types (list of integers)
Returns:
List of Annot objects
"""
def annot_names(self) -> list:
"""
Get list of annotation names on page.
Returns:
List of annotation unique names
"""
def add_text_annot(self, point: Point, text: str, icon: str = "Note") -> Annot:
"""
Add text annotation.
Parameters:
- point: annotation position
- text: annotation content
- icon: icon name ("Note", "Comment", "Key", "Help", etc.)
Returns:
New Annot object
"""
def add_highlight_annot(self, quads: typing.Union[Quad, list]) -> Annot:
"""
Add highlight annotation.
Parameters:
- quads: Quad object or list of Quad objects to highlight
Returns:
New Annot object
"""
def add_underline_annot(self, quads: typing.Union[Quad, list]) -> Annot:
"""
Add underline annotation.
Parameters:
- quads: Quad object or list of Quad objects to underline
Returns:
New Annot object
"""
def add_strikeout_annot(self, quads: typing.Union[Quad, list]) -> Annot:
"""
Add strikeout annotation.
Parameters:
- quads: Quad object or list of Quad objects to strike out
Returns:
New Annot object
"""
def add_squiggly_annot(self, quads: typing.Union[Quad, list]) -> Annot:
"""
Add squiggly underline annotation.
Parameters:
- quads: Quad object or list of Quad objects for squiggly underline
Returns:
New Annot object
"""
def add_rect_annot(self, rect: Rect) -> Annot:
"""
Add rectangle annotation.
Parameters:
- rect: rectangle coordinates
Returns:
New Annot object
"""
def add_circle_annot(self, rect: Rect) -> Annot:
"""
Add circle annotation.
Parameters:
- rect: bounding rectangle for circle
Returns:
New Annot object
"""
def add_line_annot(self, p1: Point, p2: Point) -> Annot:
"""
Add line annotation.
Parameters:
- p1: start point
- p2: end point
Returns:
New Annot object
"""
def add_polyline_annot(self, points: list) -> Annot:
"""
Add polyline annotation.
Parameters:
- points: list of Point objects
Returns:
New Annot object
"""
def add_polygon_annot(self, points: list) -> Annot:
"""
Add polygon annotation.
Parameters:
- points: list of Point objects
Returns:
New Annot object
"""
def add_freetext_annot(self, rect: Rect, text: str, **kwargs) -> Annot:
"""
Add free text annotation.
Parameters:
- rect: annotation rectangle
- text: text content
- fontsize: font size
- fontname: font name
- text_color: text color
- fill_color: background color
- align: text alignment (0=left, 1=center, 2=right)
Returns:
New Annot object
"""
def add_ink_annot(self, handwriting: list) -> Annot:
"""
Add ink annotation (freehand drawing).
Parameters:
- handwriting: list of lists of Point objects (strokes)
Returns:
New Annot object
"""
def add_stamp_annot(self, rect: Rect, stamp: int = 0) -> Annot:
"""
Add stamp annotation.
Parameters:
- rect: stamp rectangle
- stamp: stamp type (0-13 for predefined stamps)
Returns:
New Annot object
"""Individual annotation object with comprehensive manipulation capabilities.
class Annot:
def set_info(self, content: str = None, title: str = None,
creationDate: str = None, modDate: str = None,
subject: str = None) -> None:
"""
Set annotation information.
Parameters:
- content: annotation content/text
- title: annotation title/author
- creationDate: creation date string
- modDate: modification date string
- subject: annotation subject
"""
def get_info(self) -> dict:
"""
Get annotation information.
Returns:
Dictionary with content, title, creationDate, modDate, subject
"""
def set_rect(self, rect: Rect) -> None:
"""
Set annotation rectangle.
Parameters:
- rect: new annotation rectangle
"""
def set_colors(self, colors: dict = None) -> None:
"""
Set annotation colors.
Parameters:
- colors: dictionary with 'stroke' and/or 'fill' color lists
"""
def set_border(self, border: dict = None) -> None:
"""
Set annotation border properties.
Parameters:
- border: dictionary with 'width', 'style', 'dashes' keys
"""
def set_flags(self, flags: int) -> None:
"""
Set annotation flags.
Parameters:
- flags: annotation flags (bitwise combination)
"""
def set_oc(self, xref: int) -> None:
"""
Set optional content (layer) reference.
Parameters:
- xref: optional content group xref
"""
def update(self, opacity: float = -1, blend_mode: str = None,
fontsize: float = 0, text_color: list = None,
border_color: list = None, fill_color: list = None) -> None:
"""
Update annotation appearance.
Parameters:
- opacity: annotation opacity (0-1)
- blend_mode: PDF blend mode
- fontsize: font size for text annotations
- text_color: text color as RGB list
- border_color: border color as RGB list
- fill_color: fill color as RGB list
"""
def delete(self) -> None:
"""Delete annotation from page."""
def get_pixmap(self, matrix: Matrix = None, colorspace: Colorspace = None,
alpha: bool = False) -> Pixmap:
"""
Render annotation to pixmap.
Parameters:
- matrix: transformation matrix
- colorspace: target color space
- alpha: include alpha channel
Returns:
Pixmap with annotation rendering
"""
def get_sound(self) -> dict:
"""
Get sound annotation data.
Returns:
Dictionary with sound properties
"""
def get_file(self) -> bytes:
"""
Get file attachment annotation data.
Returns:
File data as bytes
"""
def set_name(self, name: str) -> None:
"""
Set annotation unique name.
Parameters:
- name: unique annotation name
"""
@property
def type(self) -> list:
"""Annotation type as [type_number, type_string]."""
@property
def rect(self) -> Rect:
"""Annotation rectangle."""
@property
def next(self) -> Annot:
"""Next annotation on page."""
@property
def xref(self) -> int:
"""Annotation xref number."""
@property
def parent(self) -> Page:
"""Parent page object."""
@property
def flags(self) -> int:
"""Annotation flags."""
@property
def line_ends(self) -> list:
"""Line ending styles for line annotations."""
@property
def vertices(self) -> list:
"""Vertices for polygon/polyline annotations."""
@property
def colors(self) -> dict:
"""Annotation colors dictionary."""
@property
def border(self) -> dict:
"""Annotation border properties."""Handle interactive PDF forms and form fields.
class Page:
def first_widget(self) -> Widget:
"""
Get first form widget on page.
Returns:
First Widget object or None
"""
def load_widget(self, xref: int) -> Widget:
"""
Load widget by xref number.
Parameters:
- xref: widget xref number
Returns:
Widget object
"""Interactive form field representation.
class Widget:
def field_name(self) -> str:
"""
Get field name.
Returns:
Form field name
"""
def field_value(self) -> typing.Any:
"""
Get field value.
Returns:
Current field value
"""
def field_type(self) -> int:
"""
Get field type.
Returns:
Field type number
"""
def field_type_string(self) -> str:
"""
Get field type as string.
Returns:
Field type string ("Text", "Button", "Choice", etc.)
"""
def field_flags(self) -> int:
"""
Get field flags.
Returns:
Field flags bitfield
"""
def field_display(self) -> int:
"""
Get field display mode.
Returns:
Display mode (0=visible, 1=hidden, 2=no print, 3=no view)
"""
def set_field_value(self, value: typing.Any, ignore_limits: bool = False) -> bool:
"""
Set field value.
Parameters:
- value: new field value
- ignore_limits: ignore field validation limits
Returns:
True if value was set successfully
"""
def reset_field(self) -> None:
"""Reset field to default value."""
def update(self) -> None:
"""Update widget appearance."""
@property
def rect(self) -> Rect:
"""Widget rectangle."""
@property
def xref(self) -> int:
"""Widget xref number."""
@property
def parent(self) -> Page:
"""Parent page object."""
@property
def next(self) -> Widget:
"""Next widget on page."""Handle content redaction (permanent removal).
class Page:
def add_redact_annot(self, rect: Rect, text: str = "",
fill: list = None, text_color: list = None,
cross_out: bool = True, **kwargs) -> Annot:
"""
Add redaction annotation.
Parameters:
- rect: area to redact
- text: replacement text (optional)
- fill: fill color for redacted area
- text_color: replacement text color
- cross_out: draw diagonal lines over area
- fontname: font for replacement text
- fontsize: font size for replacement text
- align: text alignment (0=left, 1=center, 2=right)
Returns:
New redaction Annot object
"""
def apply_redactions(self, images: int = 2, graphics: int = 2,
text: int = 2) -> bool:
"""
Apply all redaction annotations on page.
Parameters:
- images: how to handle images (0=ignore, 1=remove if overlapping, 2=remove if any overlap)
- graphics: how to handle graphics (0=ignore, 1=remove if overlapping, 2=remove if any overlap)
- text: how to handle text (0=ignore, 1=remove if overlapping, 2=remove if any overlap)
Returns:
True if redactions were applied
"""
def get_redactions(self) -> list:
"""
Get list of redaction annotations.
Returns:
List of redaction Annot objects
"""import pymupdf
doc = pymupdf.open("document.pdf")
page = doc.load_page(0)
# Add text annotation
point = pymupdf.Point(100, 100)
annot = page.add_text_annot(point, "This is a note", icon="Comment")
annot.set_info(title="Author Name", subject="Review Comment")
annot.update()
# Add highlight annotation
rect = pymupdf.Rect(100, 200, 300, 220)
quad = rect.quad
highlight = page.add_highlight_annot(quad)
highlight.set_colors({"stroke": [1, 1, 0]}) # Yellow highlight
highlight.update()
# Save document with annotations
doc.save("annotated_document.pdf")
doc.close()import pymupdf
doc = pymupdf.open("annotated_document.pdf")
page = doc.load_page(0)
# Iterate through all annotations
for annot in page.annots():
info = annot.get_info()
print(f"Type: {annot.type[1]}")
print(f"Content: {info['content']}")
print(f"Author: {info['title']}")
print(f"Rectangle: {annot.rect}")
# Modify annotation
if annot.type[1] == "Text":
annot.set_info(content="Updated content")
annot.update()
# Remove all highlight annotations
for annot in page.annots():
if annot.type[1] == "Highlight":
annot.delete()
doc.save("modified_annotations.pdf")
doc.close()import pymupdf
doc = pymupdf.open("document.pdf")
page = doc.load_page(0)
# Add free text annotation with formatting
rect = pymupdf.Rect(100, 100, 400, 150)
freetext = page.add_freetext_annot(
rect,
"This is formatted text",
fontsize=12,
fontname="Arial",
text_color=[0, 0, 1], # Blue text
fill_color=[1, 1, 0.8], # Light yellow background
align=1 # Center aligned
)
freetext.update()
# Add ink annotation (freehand drawing)
strokes = [
[pymupdf.Point(200, 200), pymupdf.Point(250, 180), pymupdf.Point(300, 200)],
[pymupdf.Point(200, 220), pymupdf.Point(250, 240), pymupdf.Point(300, 220)]
]
ink = page.add_ink_annot(strokes)
ink.set_colors({"stroke": [1, 0, 0]}) # Red ink
ink.set_border({"width": 2})
ink.update()
# Add stamp annotation
stamp_rect = pymupdf.Rect(400, 400, 500, 450)
stamp = page.add_stamp_annot(stamp_rect, stamp=5) # "APPROVED" stamp
stamp.update()
doc.save("advanced_annotations.pdf")
doc.close()import pymupdf
doc = pymupdf.open("form_document.pdf")
# Iterate through all form fields
for page_num in range(doc.page_count):
page = doc.load_page(page_num)
widget = page.first_widget()
while widget:
field_name = widget.field_name()
field_type = widget.field_type_string()
current_value = widget.field_value()
print(f"Field: {field_name}, Type: {field_type}, Value: {current_value}")
# Set field values based on name
if field_name == "Name":
widget.set_field_value("John Doe")
elif field_name == "Email":
widget.set_field_value("john.doe@example.com")
elif field_name == "Subscribe" and field_type == "CheckBox":
widget.set_field_value(True)
widget.update()
widget = widget.next
# Save filled form
doc.save("filled_form.pdf")
doc.close()import pymupdf
doc = pymupdf.open("sensitive_document.pdf")
page = doc.load_page(0)
# Search for sensitive information
sensitive_terms = ["SSN", "Social Security", "confidential"]
for term in sensitive_terms:
text_instances = page.search_for(term)
for inst in text_instances:
# Add redaction annotation
redact = page.add_redact_annot(
inst,
text="[REDACTED]",
fill=[0, 0, 0], # Black fill
text_color=[1, 1, 1], # White text
cross_out=True
)
# Apply all redaction annotations
page.apply_redactions()
# Save redacted document
doc.save("redacted_document.pdf")
doc.close()import pymupdf
import json
def export_annotations(doc_path: str) -> dict:
"""Export all annotations to a dictionary."""
doc = pymupdf.open(doc_path)
annotations = {}
for page_num in range(doc.page_count):
page = doc.load_page(page_num)
page_annots = []
for annot in page.annots():
annot_data = {
"type": annot.type,
"rect": list(annot.rect),
"info": annot.get_info(),
"colors": annot.colors,
"border": annot.border
}
page_annots.append(annot_data)
if page_annots:
annotations[page_num] = page_annots
doc.close()
return annotations
def import_annotations(doc_path: str, annotations: dict, output_path: str):
"""Import annotations from dictionary to document."""
doc = pymupdf.open(doc_path)
for page_num, page_annots in annotations.items():
page = doc.load_page(int(page_num))
for annot_data in page_annots:
rect = pymupdf.Rect(annot_data["rect"])
# Create annotation based on type
if annot_data["type"][1] == "Text":
annot = page.add_text_annot(rect.tl, annot_data["info"]["content"])
elif annot_data["type"][1] == "Highlight":
annot = page.add_highlight_annot(rect.quad)
# ... handle other types
# Apply properties
annot.set_info(**annot_data["info"])
if annot_data["colors"]:
annot.set_colors(annot_data["colors"])
if annot_data["border"]:
annot.set_border(annot_data["border"])
annot.update()
doc.save(output_path)
doc.close()
# Usage
annotations = export_annotations("source.pdf")
with open("annotations.json", "w") as f:
json.dump(annotations, f, indent=2)
# Later, import to another document
with open("annotations.json", "r") as f:
annotations = json.load(f)
import_annotations("target.pdf", annotations, "target_with_annotations.pdf")Install with Tessl CLI
npx tessl i tessl/pypi-pymupdf