A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Create new PDF files, add and manage pages, insert content, add metadata and security features. The PdfWriter class provides comprehensive PDF creation and modification capabilities.
Main class for creating and writing PDF files with full control over pages, metadata, security, and advanced features.
class PdfWriter:
def __init__(self, fileobj: Union[str, bytes] = ""):
"""
Initialize a PdfWriter instance.
Args:
fileobj: Optional output file path or object
"""
@property
def pdf_header(self) -> bytes:
"""PDF version header bytes."""
@property
def pages(self) -> List[PageObject]:
"""List of pages in the current PDF."""
@property
def page_layout(self) -> Optional[str]:
"""Page layout preference for the document."""
@property
def page_mode(self) -> Optional[PagemodeType]:
"""Page mode preference for the document."""
@property
def open_destination(self) -> Union[None, Destination, TextStringObject, ByteStringObject]:
"""Destination to open when document is first displayed."""
@property
def threads(self) -> ArrayObject:
"""Article threads in the document."""
def add_page(self, page: PageObject) -> None:
"""
Add a page to the PDF.
Args:
page (PageObject): Page to add to the document
"""
def insert_page(self, page: PageObject, index: int = 0) -> None:
"""
Insert a page at a specific position.
Args:
page (PageObject): Page to insert
index (int): Position to insert page (default: 0 - beginning)
"""
def get_page(self, page_number: int) -> PageObject:
"""
Get a specific page by number.
Args:
page_number (int): Zero-based page index
Returns:
PageObject: The requested page
Raises:
IndexError: If page number is out of range
"""
def add_blank_page(self, width: float, height: float) -> PageObject:
"""
Add a blank page to the PDF.
Args:
width (float): Page width in points
height (float): Page height in points
Returns:
PageObject: The created blank page
"""
def insert_blank_page(self, width: float, height: float, index: int = 0) -> PageObject:
"""
Insert a blank page at a specific position.
Args:
width (float): Page width in points
height (float): Page height in points
index (int): Position to insert page (default: 0 - beginning)
Returns:
PageObject: The created blank page
"""
def write(self, stream) -> Tuple[bool, Union[FileIO, BytesIO, BufferedReader, BufferedWriter]]:
"""
Write the PDF to a stream.
Args:
stream (file-like object): Output stream for PDF data
Returns:
tuple: Success status and stream object
"""
def add_metadata(self, infos: Dict[str, Any]) -> None:
"""
Add metadata to the PDF.
Args:
infos (dict): Metadata dictionary with keys like 'Title', 'Author', 'Subject', etc.
"""
def add_js(self, javascript: str) -> None:
"""
Add JavaScript to the PDF.
Args:
javascript (str): JavaScript code to embed
"""
def add_attachment(self, filename: str, data: bytes) -> None:
"""
Add a file attachment to the PDF.
Args:
filename (str): Name for the attached file
data (bytes): File content as bytes
"""
def encrypt(
self,
user_password: str,
owner_password: str = "",
use_128bit: bool = True,
permissions_flag: int = -1
) -> None:
"""
Encrypt the PDF with password protection.
Args:
user_password (str): Password for opening the PDF
owner_password (str): Password for full access (default: same as user_password)
use_128bit (bool): Use 128-bit encryption (default: True)
permissions_flag (int): Permission flags (-1 for full permissions)
"""
def add_outline_item(
self,
title: str,
page_number: int,
parent: IndirectObject = None,
color: Tuple[float, float, float] = None,
bold: bool = False,
italic: bool = False,
fit: str = "/Fit",
*args
) -> IndirectObject:
"""
Add an outline (bookmark) item.
Args:
title (str): Bookmark title
page_number (int): Target page number
parent (IndirectObject, optional): Parent bookmark
color (tuple, optional): RGB color tuple
bold (bool): Bold text (default: False)
italic (bool): Italic text (default: False)
fit (FitType): Fit type for destination
Returns:
IndirectObject: Created outline item
"""
def add_named_destination(self, title: str, page_number: int) -> IndirectObject:
"""
Add a named destination.
Args:
title (str): Destination name
page_number (int): Target page number
Returns:
IndirectObject: Created destination
"""
def remove_links(self) -> None:
"""Remove all links from all pages."""
def remove_images(self, ignore_byte_string_object: bool = False) -> None:
"""
Remove images from all pages.
Args:
ignore_byte_string_object (bool): Whether to ignore byte string objects
"""
def remove_text(self, ignore_byte_string_object: bool = False) -> None:
"""
Remove text from all pages.
Args:
ignore_byte_string_object (bool): Whether to ignore byte string objects
"""
def add_annotation(self, page_number: int, annotation) -> None:
"""
Add an annotation to a specific page.
Args:
page_number (int): Target page number
annotation: Annotation object to add
"""
def clone_document_from_reader(
self,
reader: PdfReader,
after_page_append: Callable = None
) -> None:
"""
Clone all pages and metadata from a reader.
Args:
reader (PdfReader): Source reader to clone from
after_page_append (callable, optional): Callback after each page
"""
def append_pages_from_reader(
self,
reader: PdfReader,
after_page_append: Callable = None
) -> None:
"""
Append all pages from a reader.
Args:
reader (PdfReader): Source reader
after_page_append (callable, optional): Callback after each page
"""
def update_page_form_field_values(
self,
page: PageObject,
fields: Dict[str, Any],
flags: int = 0
) -> None:
"""
Update form field values on a page.
Args:
page (PageObject): Target page
fields (dict): Field names and new values
flags (int): Update flags
"""from PyPDF2 import PdfWriter, PageObject
from PyPDF2.generic import RectangleObject
# Create a new PDF writer
writer = PdfWriter()
# Add a blank page (8.5" x 11" = 612 x 792 points)
page = writer.add_blank_page(612, 792)
# Write to file
with open("new_document.pdf", "wb") as output_file:
writer.write(output_file)from PyPDF2 import PdfReader, PdfWriter
# Read source PDF
reader = PdfReader("source.pdf")
writer = PdfWriter()
# Copy specific pages
writer.add_page(reader.pages[0]) # First page
writer.add_page(reader.pages[2]) # Third page
# Or copy all pages
for page in reader.pages:
writer.add_page(page)
# Write output
with open("copied_pages.pdf", "wb") as output_file:
writer.write(output_file)from PyPDF2 import PdfWriter, PdfReader
reader = PdfReader("input.pdf")
writer = PdfWriter()
# Copy pages
for page in reader.pages:
writer.add_page(page)
# Add metadata
writer.add_metadata({
'/Title': 'My Document',
'/Author': 'Jane Doe',
'/Subject': 'PDF Creation Example',
'/Creator': 'PyPDF2 Script',
'/Producer': 'PyPDF2'
})
# Encrypt the PDF
writer.encrypt(
user_password="user123",
owner_password="owner456",
use_128bit=True,
permissions_flag=0b11111100 # Allow printing, copying, etc.
)
with open("secure_document.pdf", "wb") as output_file:
writer.write(output_file)from PyPDF2 import PdfWriter, PdfReader
reader = PdfReader("chapters.pdf")
writer = PdfWriter()
# Copy all pages
for page in reader.pages:
writer.add_page(page)
# Add bookmarks
chapter1 = writer.add_outline_item("Chapter 1: Introduction", 0)
chapter2 = writer.add_outline_item("Chapter 2: Methods", 5)
chapter3 = writer.add_outline_item("Chapter 3: Results", 10)
# Add sub-bookmarks
writer.add_outline_item("1.1 Background", 1, parent=chapter1)
writer.add_outline_item("1.2 Objectives", 3, parent=chapter1)
# Add named destinations
writer.add_named_destination("Introduction", 0)
writer.add_named_destination("Conclusion", len(reader.pages) - 1)
with open("structured_document.pdf", "wb") as output_file:
writer.write(output_file)from PyPDF2 import PdfWriter, PdfReader
reader = PdfReader("base.pdf")
writer = PdfWriter()
# Copy pages
for page in reader.pages:
writer.add_page(page)
# Add JavaScript for automatic printing
writer.add_js("""
this.print({
bUI: true,
bSilent: false,
bShrinkToFit: true
});
""")
# Add file attachment
with open("data.xlsx", "rb") as attachment_file:
attachment_data = attachment_file.read()
writer.add_attachment("data.xlsx", attachment_data)
with open("enhanced_document.pdf", "wb") as output_file:
writer.write(output_file)from PyPDF2 import PdfReader, PdfWriter
# Read PDF with form fields
reader = PdfReader("form.pdf")
writer = PdfWriter()
# Copy pages
for page in reader.pages:
writer.add_page(page)
# Update form field values
form_data = {
"name": "John Smith",
"email": "john@example.com",
"phone": "(555) 123-4567"
}
# Update fields on first page
writer.update_page_form_field_values(writer.pages[0], form_data)
with open("filled_form.pdf", "wb") as output_file:
writer.write(output_file)class PdfFileWriter:
"""DEPRECATED: Use PdfWriter instead. Will be removed in PyPDF2 3.0.0."""This class is deprecated and should not be used in new code. All functionality has been moved to PdfWriter with the same API.
Install with Tessl CLI
npx tessl i tessl/pypi-py-pdf2