High performance Python library for data extraction, analysis, conversion & manipulation of PDF and other documents.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Creating new documents and modifying existing ones including page insertion, deletion, content manipulation, and adding text, images, and other content elements. PyMuPDF provides comprehensive tools for both creating documents from scratch and programmatically modifying existing documents.
Create new PDF documents with custom page sizes and properties.
def open() -> Document:
"""
Create new empty document.
Returns:
New Document object
"""
class Document:
def new_page(self, pno: int = -1, width: float = 595, height: float = 842) -> Page:
"""
Create a new page in the document.
Parameters:
- pno: insertion position (-1 to append at end)
- width: page width in points (default: A4 width)
- height: page height in points (default: A4 height)
Returns:
New Page object
"""
def insert_page(self, pno: int, text: str = None, fontsize: int = 11,
width: float = 595, height: float = 842, **kwargs) -> Page:
"""
Insert page with optional text content.
Parameters:
- pno: insertion position
- text: initial text content
- fontsize: text font size
- width: page width in points
- height: page height in points
- fontname: font name
- fontfile: path to font file
- color: text color as RGB list
Returns:
New Page object
"""Add text content to pages with comprehensive formatting options.
class Page:
def insert_text(self, point: Point, text: str, fontsize: float = 11,
fontname: str = "helv", fontfile: str = None,
set_simple: bool = False, encoding: int = 0,
color: list = None, fill: list = None,
render_mode: int = 0, border_width: float = 1,
rotate: int = 0, morph: tuple = None,
stroke_opacity: float = 1, fill_opacity: float = 1,
oc: int = 0) -> int:
"""
Insert text at specified position.
Parameters:
- point: insertion point (bottom-left of text)
- text: text content to insert
- fontsize: font size in points
- fontname: font name ("helv", "times", "cour", etc.)
- fontfile: path to external font file
- set_simple: use simple font encoding
- encoding: text encoding (0=Latin, 1=Greek, 2=Cyrillic)
- color: text color as RGB list [r, g, b]
- fill: fill color as RGB list
- render_mode: text rendering mode (0=fill, 1=stroke, 2=fill+stroke, etc.)
- border_width: stroke width for outlined text
- rotate: rotation angle in degrees
- morph: morphing parameters (point, matrix) tuple
- stroke_opacity: stroke opacity (0-1)
- fill_opacity: fill opacity (0-1)
- oc: optional content group reference
Returns:
Number of successfully inserted characters
"""
def insert_textbox(self, rect: Rect, buffer: str, fontsize: float = 11,
fontname: str = "helv", fontfile: str = None,
set_simple: bool = False, encoding: int = 0,
color: list = None, fill: list = None,
render_mode: int = 0, border_width: float = 1,
lineheight: float = None, align: int = 0,
rotate: int = 0, morph: tuple = None,
stroke_opacity: float = 1, fill_opacity: float = 1,
oc: int = 0, expandtabs: int = 8) -> float:
"""
Insert text within a rectangle with automatic wrapping.
Parameters:
- rect: rectangle to contain text
- buffer: text content
- fontsize: font size in points
- fontname: font name
- fontfile: path to external font file
- set_simple: use simple font encoding
- encoding: text encoding
- color: text color as RGB list
- fill: fill color as RGB list
- render_mode: text rendering mode
- border_width: stroke width
- lineheight: line height multiplier
- align: text alignment (0=left, 1=center, 2=right, 3=justify)
- rotate: rotation angle
- morph: morphing parameters
- stroke_opacity: stroke opacity
- fill_opacity: fill opacity
- oc: optional content group reference
- expandtabs: tab expansion size
Returns:
Unused vertical space in rectangle
"""Add images to pages with positioning and scaling options.
class Page:
def insert_image(self, rect: Rect, filename: str = None,
stream: bytes = None, pixmap: Pixmap = None,
mask: Pixmap = None, rotate: int = 0,
xref: int = 0, oc: int = 0, keep_proportion: bool = True,
overlay: bool = True, alpha: int = -1) -> int:
"""
Insert image into page.
Parameters:
- rect: target rectangle for image
- filename: path to image file
- stream: image data as bytes
- pixmap: Pixmap object to insert
- mask: optional mask Pixmap for transparency
- rotate: rotation angle (0, 90, 180, 270)
- xref: reuse existing image by xref number
- oc: optional content group reference
- keep_proportion: maintain image aspect ratio
- overlay: draw as overlay (True) or underlay (False)
- alpha: alpha/transparency value (0-255, -1 for automatic)
Returns:
Cross-reference number of inserted image
"""Add vector graphics and shapes to pages.
class Shape:
def __init__(self, page: Page):
"""
Create shape drawing context for page.
Parameters:
- page: target Page object
"""
def draw_line(self, p1: Point, p2: Point) -> Point:
"""
Draw line between two points.
Parameters:
- p1: start point
- p2: end point
Returns:
End point for chaining
"""
def draw_bezier(self, p1: Point, p2: Point, p3: Point, p4: Point) -> Point:
"""
Draw cubic Bezier curve.
Parameters:
- p1: start point
- p2: first control point
- p3: second control point
- p4: end point
Returns:
End point for chaining
"""
def draw_rect(self, rect: Rect) -> Point:
"""
Draw rectangle.
Parameters:
- rect: rectangle to draw
Returns:
Bottom-right corner point
"""
def draw_oval(self, rect: Rect) -> Point:
"""
Draw oval/ellipse within rectangle.
Parameters:
- rect: bounding rectangle
Returns:
Bottom-right corner point
"""
def draw_circle(self, center: Point, radius: float) -> Point:
"""
Draw circle.
Parameters:
- center: circle center point
- radius: circle radius
Returns:
Center point
"""
def draw_sector(self, center: Point, point: Point, angle: float) -> Point:
"""
Draw circular sector.
Parameters:
- center: sector center
- point: radius end point
- angle: sector angle in degrees
Returns:
Center point
"""
def draw_polyline(self, points: list) -> Point:
"""
Draw connected line segments.
Parameters:
- points: list of Point objects
Returns:
Last point
"""
def draw_polygon(self, points: list) -> Point:
"""
Draw closed polygon.
Parameters:
- points: list of Point objects defining vertices
Returns:
First point
"""
def draw_squiggle(self, p1: Point, p2: Point, breadth: float = 2) -> Point:
"""
Draw squiggly line (wavy underline).
Parameters:
- p1: start point
- p2: end point
- breadth: wave amplitude
Returns:
End point
"""
def finish(self, fill: list = None, color: list = None,
dashes: str = None, even_odd: bool = False,
closePath: bool = False, lineJoin: int = 0,
lineCap: int = 0, width: float = 1,
stroke_opacity: float = 1, fill_opacity: float = 1,
oc: int = 0) -> None:
"""
Apply styling and finalize drawing operations.
Parameters:
- fill: fill color as RGB list
- color: stroke color as RGB list
- dashes: dash pattern string
- even_odd: use even-odd fill rule
- closePath: close the current path
- lineJoin: line join style (0=miter, 1=round, 2=bevel)
- lineCap: line cap style (0=butt, 1=round, 2=square)
- width: line width
- stroke_opacity: stroke opacity (0-1)
- fill_opacity: fill opacity (0-1)
- oc: optional content group reference
"""
def commit(self, overlay: bool = True) -> None:
"""
Commit all drawing operations to page.
Parameters:
- overlay: draw as overlay (True) or underlay (False)
"""Modify page properties and content arrangement.
class Page:
def set_rotation(self, rotation: int) -> None:
"""
Set page rotation.
Parameters:
- rotation: rotation angle (0, 90, 180, 270)
"""
def set_cropbox(self, rect: Rect) -> None:
"""
Set page crop box.
Parameters:
- rect: new crop box rectangle
"""
def set_mediabox(self, rect: Rect) -> None:
"""
Set page media box.
Parameters:
- rect: new media box rectangle
"""
def clean_contents(self) -> bool:
"""
Clean and optimize page content stream.
Returns:
True if changes were made
"""
def wrap_contents(self) -> None:
"""Wrap page contents in a balanced way."""Work with fonts for text insertion and formatting.
class Font:
def __init__(self, fontname: str = "helv", fontfile: str = None,
fontbuffer: bytes = None, script: int = 0,
language: str = None, ordering: int = -1,
is_bold: bool = False, is_italic: bool = False,
is_serif: bool = False, embed: bool = True):
"""
Create or load font object.
Parameters:
- fontname: font name or base14 font identifier
- fontfile: path to font file (.ttf, .otf, etc.)
- fontbuffer: font data as bytes
- script: script identifier for Unicode
- language: language code
- ordering: CJK font ordering
- is_bold: prefer bold variant
- is_italic: prefer italic variant
- is_serif: prefer serif variant
- embed: embed font in PDF
"""
def glyph_advance(self, chr: int, script: int = 0, language: str = None,
wmode: int = 0) -> float:
"""
Get glyph advance width.
Parameters:
- chr: character code
- script: script identifier
- language: language code
- wmode: writing mode (0=horizontal, 1=vertical)
Returns:
Glyph advance width
"""
def glyph_bbox(self, chr: int, script: int = 0, language: str = None,
wmode: int = 0) -> Rect:
"""
Get glyph bounding box.
Parameters:
- chr: character code
- script: script identifier
- language: language code
- wmode: writing mode
Returns:
Glyph bounding rectangle
"""
def text_length(self, text: str, fontsize: float = 11, script: int = 0,
language: str = None, wmode: int = 0) -> float:
"""
Calculate text length in points.
Parameters:
- text: text string
- fontsize: font size in points
- script: script identifier
- language: language code
- wmode: writing mode
Returns:
Text width in points
"""
@property
def name(self) -> str:
"""Font name."""
@property
def flags(self) -> dict:
"""Font flags dictionary."""
@property
def bbox(self) -> Rect:
"""Font bounding box."""
@property
def is_writable(self) -> bool:
"""True if font can be used for text insertion."""Sophisticated text layout and formatting capabilities.
class TextWriter:
def __init__(self, page_rect: Rect, opacity: float = 1, color: list = None):
"""
Create text writer for advanced text layout.
Parameters:
- page_rect: page rectangle bounds
- opacity: text opacity (0-1)
- color: default text color as RGB list
"""
def append(self, pos: Point, text: str, font: Font = None,
fontsize: float = 11, language: str = None,
script: int = 0, wmode: int = 0, bidi_level: int = 0,
markup_dir: int = 0, small_caps: bool = False) -> Rect:
"""
Append text at position.
Parameters:
- pos: text position
- text: text content
- font: Font object to use
- fontsize: font size in points
- language: language code for text shaping
- script: script identifier
- wmode: writing mode (0=horizontal, 1=vertical)
- bidi_level: bidirectional text level
- markup_dir: markup direction
- small_caps: use small capitals
Returns:
Text bounding rectangle
"""
def write_text(self, page: Page, opacity: float = None,
color: list = None, oc: int = 0,
overlay: bool = True, morph: tuple = None,
matrix: Matrix = None, render_mode: int = 0,
stroke_opacity: float = 1, fill_opacity: float = 1,
stroke_color: list = None) -> None:
"""
Write accumulated text to page.
Parameters:
- page: target Page object
- opacity: text opacity override
- color: text color override
- oc: optional content group reference
- overlay: draw as overlay (True) or underlay (False)
- morph: morphing transformation
- matrix: additional transformation matrix
- render_mode: text rendering mode
- stroke_opacity: stroke opacity
- fill_opacity: fill opacity
- stroke_color: stroke color for outlined text
"""
def fill_textbox(self, rect: Rect, text: str, pos: Point = None,
font: Font = None, fontsize: float = 11,
lineheight: float = None, align: int = 0,
warn: bool = True) -> int:
"""
Fill rectangle with text and automatic line wrapping.
Parameters:
- rect: containing rectangle
- text: text content
- pos: starting position within rectangle
- font: Font object
- fontsize: font size
- lineheight: line height multiplier
- align: text alignment (0=left, 1=center, 2=right, 3=justify)
- warn: warn if text doesn't fit
Returns:
Number of characters that didn't fit
"""
@property
def text_rect(self) -> Rect:
"""Bounding rectangle of all added text."""
@property
def last_point(self) -> Point:
"""Position after last text insertion."""
@property
def opacity(self) -> float:
"""Text opacity."""import pymupdf
# Create new document
doc = pymupdf.open()
# Add pages with different sizes
page1 = doc.new_page() # Default A4
page2 = doc.new_page(width=792, height=612) # US Letter landscape
page3 = doc.new_page(width=297, height=420) # A3
# Set document metadata
doc.set_metadata({
"title": "My New Document",
"author": "Author Name",
"subject": "Document Subject",
"creator": "PyMuPDF Script",
"producer": "PyMuPDF",
"creationDate": pymupdf.get_pdf_now(),
"modDate": pymupdf.get_pdf_now()
})
# Save new document
doc.save("new_document.pdf")
doc.close()import pymupdf
doc = pymupdf.open()
page = doc.new_page()
# Insert simple text
point = pymupdf.Point(50, 750) # Top-left area
page.insert_text(point, "Hello, World!", fontsize=16, color=[0, 0, 1])
# Insert formatted text box
rect = pymupdf.Rect(50, 600, 500, 700)
text = """This is a longer text that will be automatically wrapped within the
specified rectangle. It demonstrates text box functionality with automatic
line breaks and formatting options."""
page.insert_textbox(
rect,
text,
fontsize=12,
align=3, # Justified
lineheight=1.2,
color=[0.2, 0.2, 0.2]
)
# Insert text with custom font
try:
# Use built-in font
page.insert_text(
pymupdf.Point(50, 550),
"Text with Times font",
fontname="times",
fontsize=14,
color=[1, 0, 0]
)
except Exception as e:
print(f"Font error: {e}")
doc.save("text_document.pdf")
doc.close()import pymupdf
doc = pymupdf.open()
page = doc.new_page()
# Create TextWriter for advanced text layout
writer = pymupdf.TextWriter(page.rect, color=[0, 0, 0])
# Create custom font
font = pymupdf.Font("helv") # Helvetica
# Add text with different formatting
y_pos = 750
# Title
writer.append(
pymupdf.Point(50, y_pos),
"Document Title",
font=font,
fontsize=24
)
y_pos -= 40
# Subtitle
writer.append(
pymupdf.Point(50, y_pos),
"Subtitle with different formatting",
font=font,
fontsize=16
)
y_pos -= 30
# Body text
body_text = "This is body text with normal formatting. "
writer.append(
pymupdf.Point(50, y_pos),
body_text,
font=font,
fontsize=12
)
# Fill text box with automatic wrapping
rect = pymupdf.Rect(50, 500, 550, 650)
long_text = """Lorem ipsum dolor sit amet, consectetur adipiscing elit.
Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris."""
overflow = writer.fill_textbox(
rect,
long_text,
font=font,
fontsize=11,
lineheight=1.4,
align=0 # Left aligned
)
if overflow > 0:
print(f"Warning: {overflow} characters didn't fit")
# Write all text to page
writer.write_text(page)
doc.save("advanced_text.pdf")
doc.close()import pymupdf
doc = pymupdf.open()
page = doc.new_page()
# Insert image from file
try:
image_rect = pymupdf.Rect(100, 400, 400, 600)
page.insert_image(
image_rect,
filename="sample_image.jpg",
keep_proportion=True,
overlay=True
)
except Exception as e:
print(f"Image insertion failed: {e}")
# Create simple colored rectangle as image substitute
shape = pymupdf.Shape(page)
shape.draw_rect(pymupdf.Rect(100, 200, 400, 350))
shape.finish(fill=[0.8, 0.8, 1.0], color=[0, 0, 1], width=2)
shape.commit()
# Add caption
page.insert_text(
pymupdf.Point(100, 180),
"Image Caption",
fontsize=10,
color=[0.5, 0.5, 0.5]
)
doc.save("document_with_images.pdf")
doc.close()import pymupdf
doc = pymupdf.open()
page = doc.new_page()
# Create shape drawing context
shape = pymupdf.Shape(page)
# Draw various shapes
# Rectangle
shape.draw_rect(pymupdf.Rect(50, 700, 150, 750))
shape.finish(fill=[1, 0, 0], color=[0.5, 0, 0], width=2)
# Circle
shape.draw_circle(pymupdf.Point(250, 725), 25)
shape.finish(fill=[0, 1, 0], color=[0, 0.5, 0], width=2)
# Line
shape.draw_line(pymupdf.Point(50, 650), pymupdf.Point(300, 650))
shape.finish(color=[0, 0, 1], width=3)
# Polygon (triangle)
triangle_points = [
pymupdf.Point(400, 700),
pymupdf.Point(450, 750),
pymupdf.Point(350, 750)
]
shape.draw_polygon(triangle_points)
shape.finish(fill=[1, 1, 0], color=[0.5, 0.5, 0], width=2)
# Bezier curve
shape.draw_bezier(
pymupdf.Point(50, 600),
pymupdf.Point(150, 550),
pymupdf.Point(250, 550),
pymupdf.Point(350, 600)
)
shape.finish(color=[1, 0, 1], width=3)
# Commit all shapes
shape.commit(overlay=True)
# Add labels
labels = [
(pymupdf.Point(100, 680), "Rectangle"),
(pymupdf.Point(250, 680), "Circle"),
(pymupdf.Point(100, 630), "Line"),
(pymupdf.Point(400, 680), "Triangle"),
(pymupdf.Point(200, 580), "Bezier Curve")
]
for point, text in labels:
page.insert_text(point, text, fontsize=10)
doc.save("shapes_document.pdf")
doc.close()import pymupdf
doc = pymupdf.open()
page = doc.new_page()
# Add form title
page.insert_text(
pymupdf.Point(50, 750),
"Sample Form",
fontsize=18,
color=[0, 0, 0.5]
)
# Create form fields by adding annotations
# Text field
text_field_rect = pymupdf.Rect(150, 700, 400, 720)
text_annot = page.add_freetext_annot(
text_field_rect,
"Enter your name",
fontsize=12,
align=0
)
text_annot.set_border({"width": 1, "style": "solid"})
text_annot.update()
# Label for text field
page.insert_text(
pymupdf.Point(50, 715),
"Name:",
fontsize=12
)
# Checkbox simulation (rectangle with X)
checkbox_rect = pymupdf.Rect(150, 650, 170, 670)
shape = pymupdf.Shape(page)
shape.draw_rect(checkbox_rect)
shape.finish(fill=[1, 1, 1], color=[0, 0, 0], width=1)
shape.commit()
# Add checkbox label
page.insert_text(
pymupdf.Point(50, 665),
"Subscribe to newsletter:",
fontsize=12
)
# Instructions
instructions = """Instructions:
1. Fill in your name in the text field above
2. Check the box if you want to subscribe
3. Save the document when complete"""
page.insert_textbox(
pymupdf.Rect(50, 500, 500, 600),
instructions,
fontsize=10,
lineheight=1.3
)
doc.save("form_document.pdf")
doc.close()import pymupdf
def create_report_document(title: str, content_files: list, output_path: str):
"""Create report by combining multiple content sources."""
# Create new document
doc = pymupdf.open()
# Title page
title_page = doc.new_page()
# Add title
title_page.insert_text(
pymupdf.Point(50, 400),
title,
fontsize=24,
color=[0, 0, 0.5]
)
# Add creation date
import datetime
date_str = datetime.datetime.now().strftime("%B %d, %Y")
title_page.insert_text(
pymupdf.Point(50, 350),
f"Generated on {date_str}",
fontsize=12,
color=[0.5, 0.5, 0.5]
)
# Process content files
for file_path in content_files:
try:
if file_path.endswith('.pdf'):
# Insert PDF content
source_doc = pymupdf.open(file_path)
doc.insert_pdf(source_doc)
source_doc.close()
elif file_path.endswith('.txt'):
# Insert text content
with open(file_path, 'r', encoding='utf-8') as f:
text_content = f.read()
content_page = doc.new_page()
content_page.insert_textbox(
pymupdf.Rect(50, 50, 545, 792),
text_content,
fontsize=11,
lineheight=1.3
)
except Exception as e:
print(f"Error processing {file_path}: {e}")
# Save combined document
doc.save(output_path)
doc.close()
print(f"Report saved to {output_path}")
# Usage
content_files = [
"introduction.txt",
"data_analysis.pdf",
"conclusions.txt"
]
create_report_document(
"Monthly Report",
content_files,
"monthly_report.pdf"
)import pymupdf
class DocumentTemplate:
def __init__(self, template_path: str = None):
"""Create document template."""
if template_path:
self.doc = pymupdf.open(template_path)
else:
self.doc = pymupdf.open()
self._create_default_template()
def _create_default_template(self):
"""Create a default template."""
page = self.doc.new_page()
# Header area
header_rect = pymupdf.Rect(50, 750, 550, 792)
shape = pymupdf.Shape(page)
shape.draw_rect(header_rect)
shape.finish(fill=[0.9, 0.9, 0.9], color=[0.5, 0.5, 0.5])
shape.commit()
# Placeholder text
page.insert_text(
pymupdf.Point(60, 775),
"{{TITLE}}",
fontsize=16,
color=[0.5, 0.5, 0.5]
)
page.insert_text(
pymupdf.Point(60, 720),
"{{CONTENT}}",
fontsize=12,
color=[0.5, 0.5, 0.5]
)
def fill_template(self, replacements: dict) -> pymupdf.Document:
"""Fill template with actual content."""
# Create copy of template
new_doc = pymupdf.open()
new_doc.insert_pdf(self.doc)
for page_num in range(new_doc.page_count):
page = new_doc.load_page(page_num)
# Get existing text
text_dict = page.get_text("dict")
# Remove placeholder text and add real content
for block in text_dict["blocks"]:
if "lines" in block:
for line in block["lines"]:
for span in line["spans"]:
text = span["text"]
for placeholder, replacement in replacements.items():
if placeholder in text:
# Remove old text (simplified approach)
# In practice, you'd need more sophisticated replacement
# Add new text
bbox = span["bbox"]
point = pymupdf.Point(bbox[0], bbox[1])
new_text = text.replace(placeholder, replacement)
page.insert_text(
point,
new_text,
fontsize=span["size"],
fontname=span["font"]
)
return new_doc
def close(self):
"""Close template document."""
self.doc.close()
# Usage
template = DocumentTemplate()
filled_doc = template.fill_template({
"{{TITLE}}": "Project Status Report",
"{{CONTENT}}": "This project is proceeding according to schedule..."
})
filled_doc.save("filled_document.pdf")
filled_doc.close()
template.close()Install with Tessl CLI
npx tessl i tessl/pypi-pymupdf