A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Core functionality for opening, reading, creating, and saving PDF documents. This module provides the fundamental classes for all PDF operations in pypdf.
The PdfReader class provides comprehensive PDF file reading capabilities with support for encrypted documents, strict parsing modes, and complete document analysis.
class PdfReader:
def __init__(self, stream, strict: bool = False, password: str | None = None):
"""
Initialize a PDF reader.
Args:
stream: Path to PDF file, file-like object, or bytes
strict: Enable strict parsing mode (default: False)
password: Password for encrypted PDFs (default: None)
"""
def decrypt(self, password: str) -> PasswordType:
"""
Decrypt an encrypted PDF.
Args:
password: Password to decrypt the PDF
Returns:
PasswordType indicating the type of password used
"""
def get_object(self, indirect_reference):
"""
Retrieve a PDF object by its indirect reference.
Args:
indirect_reference: Indirect object reference
Returns:
The requested PDF object
"""
def close(self) -> None:
"""Close the PDF file and free resources."""
def __enter__(self):
"""Context manager entry."""
def __exit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit."""
@property
def is_encrypted(self) -> bool:
"""Check if the PDF is encrypted."""
@property
def metadata(self) -> DocumentInformation | None:
"""Get document metadata."""
@property
def pages(self):
"""Access to PDF pages collection."""
@property
def root_object(self):
"""Get the PDF catalog (root) object."""
@property
def pdf_header(self) -> str:
"""Get the PDF version header."""
@property
def xmp_metadata(self):
"""Get XMP metadata if present."""The PdfWriter class enables PDF creation, modification, and output generation with support for encryption, incremental updates, and comprehensive page management.
class PdfWriter:
def __init__(self, clone_from=None, incremental: bool = False):
"""
Initialize a PDF writer.
Args:
clone_from: PdfReader to clone structure from (optional)
incremental: Enable incremental updates (default: False)
"""
def add_page(self, page: PageObject) -> None:
"""
Add a page to the document.
Args:
page: PageObject to add
"""
def insert_page(self, page: PageObject, index: int) -> None:
"""
Insert a page at a specific position.
Args:
page: PageObject to insert
index: Position to insert at
"""
def add_blank_page(self, width: float, height: float) -> PageObject:
"""
Add a blank page with specified dimensions.
Args:
width: Page width in points
height: Page height in points
Returns:
The created PageObject
"""
def insert_blank_page(self, width: float, height: float, index: int) -> PageObject:
"""
Insert a blank page at a specific position.
Args:
width: Page width in points
height: Page height in points
index: Position to insert at
Returns:
The created PageObject
"""
def append_pages_from_reader(self, reader: PdfReader, after_page_append=None) -> None:
"""
Append all pages from another PDF reader.
Args:
reader: PdfReader to copy pages from
after_page_append: Optional callback function called after each page
"""
def write(self, stream) -> None:
"""
Write the PDF to a stream.
Args:
stream: Output stream (file-like object)
"""
def write_stream(self, stream) -> None:
"""
Alias for write() method.
Args:
stream: Output stream (file-like object)
"""
def encrypt(
self,
user_password: str,
owner_password: str | None = None,
use_128bit: bool = True,
permissions_flag: int = -1,
user_access_permissions: int | None = None
) -> None:
"""
Encrypt the PDF with password protection.
Args:
user_password: Password for opening the PDF
owner_password: Password for full access (defaults to user_password)
use_128bit: Use 128-bit encryption (default: True)
permissions_flag: Permissions bit flags
user_access_permissions: User access permissions
"""
def add_js(self, javascript: str) -> None:
"""
Add JavaScript to the PDF.
Args:
javascript: JavaScript code to add
"""
def add_attachment(self, filename: str, data: bytes) -> None:
"""
Add a file attachment to the PDF.
Args:
filename: Name of the attached file
data: File data as bytes
"""
def set_need_appearances_writer(self, state: bool = True) -> None:
"""
Set the needAppearances flag for form fields.
Args:
state: Whether to enable automatic appearance generation
"""
def clone_reader_document_root(self, reader: PdfReader) -> None:
"""
Clone the document structure from another PDF reader.
Args:
reader: PdfReader to clone from
"""
def clone_document_from_reader(self, reader: PdfReader, after_page_append=None) -> None:
"""
Clone an entire document from a reader.
Args:
reader: PdfReader to clone from
after_page_append: Optional callback after each page
"""
def compress_identical_objects(self, remove_duplicate_page_inheritable_objects: bool = True) -> None:
"""
Compress identical objects to reduce file size.
Args:
remove_duplicate_page_inheritable_objects: Remove duplicate inheritable objects
"""
def generate_file_identifiers(self) -> None:
"""Generate unique file identifiers for the PDF."""
def add_metadata(self, infos: dict[str, Any]) -> None:
"""
Add metadata dictionary to the PDF.
Args:
infos: Dictionary of metadata key-value pairs
"""
def get_reference(self, obj: PdfObject) -> IndirectObject:
"""
Get indirect reference for a PDF object.
Args:
obj: PDF object to get reference for
Returns:
Indirect object reference
"""
def update_page_form_field_values(
self,
page: PageObject,
fields: dict,
flags: int = 0
) -> None:
"""
Update form field values on a page.
Args:
page: PageObject containing the form
fields: Dictionary mapping field names to values
flags: Form field flags
"""
def __enter__(self):
"""Context manager entry."""
def __exit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit."""
@property
def is_encrypted(self) -> bool:
"""Check if the writer will produce an encrypted PDF."""
@property
def root_object(self):
"""Get the PDF catalog (root) object."""
@property
def pdf_header(self) -> str:
"""Get the PDF version header."""
@property
def xmp_metadata(self):
"""Get XMP metadata if present."""
@property
def metadata(self) -> DocumentInformation | None:
"""Get document metadata."""
@property
def page_layout(self):
"""Get or set the page layout mode."""
@property
def page_mode(self):
"""Get or set the page viewing mode."""from pypdf import PdfReader
# Read from file path
reader = PdfReader("document.pdf")
print(f"Number of pages: {len(reader.pages)}")
# Read encrypted PDF
reader = PdfReader("encrypted.pdf", password="secret")
# Context manager usage
with PdfReader("document.pdf") as reader:
for page in reader.pages:
text = page.extract_text()
print(text)from pypdf import PdfWriter, PdfReader
# Create new PDF
writer = PdfWriter()
writer.add_blank_page(612, 792) # Letter size
with open("blank.pdf", "wb") as output:
writer.write(output)
# Copy pages from existing PDF
reader = PdfReader("source.pdf")
writer = PdfWriter()
writer.append_pages_from_reader(reader)
with open("copy.pdf", "wb") as output:
writer.write(output)
# Encrypt PDF
writer.encrypt("user_password", "owner_password")from pypdf import PdfReader, PdfWriter
def merge_pdfs(input_files: list[str], output_file: str):
writer = PdfWriter()
for filename in input_files:
reader = PdfReader(filename)
writer.append_pages_from_reader(reader)
with open(output_file, "wb") as output:
writer.write(output)
merge_pdfs(["doc1.pdf", "doc2.pdf", "doc3.pdf"], "merged.pdf")from pypdf import PdfReader, PdfWriter
# Open existing PDF for incremental update
reader = PdfReader("existing.pdf")
writer = PdfWriter(clone_from=reader, incremental=True)
# Make modifications
writer.add_blank_page(612, 792)
# Save with incremental update
with open("existing.pdf", "wb") as output:
writer.write(output)Install with Tessl CLI
npx tessl i tessl/pypi-pypdf