Read and write PDFs with Python, powered by qpdf
—
Fundamental PDF document operations providing the essential functionality for opening, creating, saving, and manipulating PDF files. These operations form the foundation of all pikepdf functionality.
The main Pdf class provides comprehensive document-level operations including file I/O, metadata access, and document structure manipulation.
class Pdf:
"""
Main PDF document class representing a complete PDF file.
"""
@staticmethod
def open(filename, *, password=None, hex_password=None, ignore_xref_streams=False,
suppress_warnings=True, attempt_recovery=True, inherit_page_attributes=True,
access_mode=AccessMode.default) -> Pdf:
"""
Open an existing PDF file.
Parameters:
- filename (str | pathlib.Path | IO): Path to PDF file or file-like object
- password (str, optional): Password for encrypted PDFs
- hex_password (str, optional): Password as hex string
- ignore_xref_streams (bool): Ignore cross-reference streams
- suppress_warnings (bool): Suppress QPDF warnings
- attempt_recovery (bool): Attempt to recover damaged PDFs
- inherit_page_attributes (bool): Inherit page attributes from page tree
- access_mode (AccessMode): File access mode
Returns:
Pdf: The opened PDF document
Raises:
PdfError: If the file cannot be opened
PasswordError: If password is required or incorrect
"""
@staticmethod
def new() -> Pdf:
"""
Create a new empty PDF document.
Returns:
Pdf: A new empty PDF document
"""
def save(self, filename, *, static_id=False, preserve_pdfa=True,
min_version=None, force_version=None, fix_metadata_version=True,
compress_streams=True, stream_decode_level=None,
object_stream_mode=ObjectStreamMode.preserve,
normalize_content=False, linearize=False, qdf=False,
progress=None, encryption=None, samefile_check=True) -> None:
"""
Save the PDF to a file.
Parameters:
- filename (str | pathlib.Path | IO): Output path or file-like object
- static_id (bool): Use static document ID for reproducible output
- preserve_pdfa (bool): Maintain PDF/A compliance
- min_version (str, optional): Minimum PDF version (e.g., '1.4')
- force_version (str, optional): Force specific PDF version
- fix_metadata_version (bool): Update metadata version to match PDF version
- compress_streams (bool): Compress stream objects
- stream_decode_level (StreamDecodeLevel, optional): Stream decoding level
- object_stream_mode (ObjectStreamMode): Object stream handling
- normalize_content (bool): Normalize content streams
- linearize (bool): Create linearized (fast web view) PDF
- qdf (bool): Save in QPDF's inspection format
- progress (callable, optional): Progress callback function
- encryption (Encryption, optional): Encryption settings
- samefile_check (bool): Check if saving to same file
Raises:
PdfError: If the file cannot be saved
"""
def close(self) -> None:
"""
Close the PDF and release resources.
The PDF object becomes unusable after closing.
"""
def copy_foreign(self, other_pdf_obj: Object) -> Object:
"""
Copy an object from another PDF into this PDF.
Parameters:
- other_pdf_obj (Object): Object from another PDF to copy
Returns:
Object: The copied object owned by this PDF
Raises:
ForeignObjectError: If the object cannot be copied
"""
def make_indirect(self, obj: Object) -> Object:
"""
Convert a direct object to an indirect object.
Parameters:
- obj (Object): Object to make indirect
Returns:
Object: The indirect object
"""
def add_blank_page(self, *, page_size=(612, 792)) -> Page:
"""
Add a blank page to the PDF.
Parameters:
- page_size (tuple): Page dimensions (width, height) in points
Returns:
Page: The newly created page
"""
@property
def Root(self) -> Dictionary:
"""
The PDF's document catalog (root object).
Returns:
Dictionary: Document catalog containing page tree and other references
"""
@property
def pages(self) -> list[Page]:
"""
List of all pages in the PDF.
Returns:
list[Page]: Pages that can be indexed, sliced, and modified
"""
@property
def objects(self) -> dict[tuple[int, int], Object]:
"""
Mapping of all indirect objects in the PDF.
Returns:
dict: Mapping from (objid, generation) to Object
"""
@property
def is_encrypted(self) -> bool:
"""
Whether the PDF is encrypted.
Returns:
bool: True if the PDF has encryption
"""
@property
def pdf_version(self) -> str:
"""
PDF version string (e.g., '1.4', '1.7').
Returns:
str: PDF version
"""
@property
def trailer(self) -> Dictionary:
"""
The PDF's trailer dictionary.
Returns:
Dictionary: Trailer containing cross-reference information
"""
@property
def docinfo(self) -> Dictionary:
"""
Document information dictionary.
Returns:
Dictionary: Document metadata (title, author, etc.)
"""
def check(self) -> list[str]:
"""
Check PDF for structural problems.
Returns:
list[str]: List of problems found (empty if no problems)
"""Global functions that provide shortcuts to common PDF operations.
def open(filename, **kwargs) -> Pdf:
"""
Open an existing PDF file (alias for Pdf.open).
Parameters:
- filename: Path to PDF file or file-like object
- **kwargs: Same arguments as Pdf.open()
Returns:
Pdf: The opened PDF document
"""
def new() -> Pdf:
"""
Create a new empty PDF document (alias for Pdf.new).
Returns:
Pdf: A new empty PDF document
"""Control how PDF files are accessed and loaded into memory.
from enum import Enum
class AccessMode(Enum):
"""File access modes for opening PDFs."""
default = ... # Standard file access
mmap = ... # Memory-mapped file access when possible
mmap_only = ... # Require memory-mapped access
stream = ... # Stream-based access for large filesControl how object streams are handled during save operations.
class ObjectStreamMode(Enum):
"""Object stream handling modes."""
disable = ... # Don't use object streams
preserve = ... # Keep existing object streams
generate = ... # Generate new object streams for compressionControl the level of stream decoding performed when reading PDFs.
class StreamDecodeLevel(Enum):
"""Stream decoding levels."""
none = ... # No stream decoding
generalized = ... # Decode common filters
specialized = ... # Decode specialized filters
all = ... # Decode all supported filtersimport pikepdf
# Open and read a PDF
with pikepdf.open('document.pdf') as pdf:
print(f"PDF version: {pdf.pdf_version}")
print(f"Number of pages: {len(pdf.pages)}")
print(f"Encrypted: {pdf.is_encrypted}")
# Create a new PDF with a blank page
new_pdf = pikepdf.new()
new_pdf.add_blank_page(page_size=(612, 792)) # US Letter
new_pdf.save('blank.pdf')
new_pdf.close()import pikepdf
# Open password-protected PDF
try:
pdf = pikepdf.open('encrypted.pdf', password='secret')
print("Successfully opened encrypted PDF")
pdf.close()
except pikepdf.PasswordError:
print("Incorrect password")import pikepdf
from pikepdf import Encryption, Permissions
# Open and save with compression and linearization
pdf = pikepdf.open('input.pdf')
# Configure encryption
encryption = Encryption(
owner='owner_password',
user='user_password',
allow=Permissions(print_highres=True, extract=False)
)
# Save with advanced options
pdf.save('output.pdf',
linearize=True, # Fast web view
compress_streams=True,
encryption=encryption,
fix_metadata_version=True)
pdf.close()import pikepdf
# Combine multiple PDFs
pdf1 = pikepdf.open('doc1.pdf')
pdf2 = pikepdf.open('doc2.pdf')
combined = pikepdf.new()
# Copy all pages from both PDFs
for page in pdf1.pages:
combined.pages.append(page)
for page in pdf2.pages:
combined.pages.append(page)
combined.save('combined.pdf')
# Close all PDFs
pdf1.close()
pdf2.close()
combined.close()Install with Tessl CLI
npx tessl i tessl/pypi-pikepdf