Read and write PDFs with Python, powered by qpdf
npx @tessl/cli install tessl/pypi-pikepdf@9.10.0A comprehensive Python library for reading, writing, and manipulating PDF files, built on top of the mature qpdf C++ library. It provides a Pythonic API for PDF operations including page manipulation, metadata editing, form field handling, encryption/decryption, and content transformation with superior performance compared to pure Python alternatives.
pip install pikepdfimport pikepdfCommon for working with PDFs:
from pikepdf import Pdfimport pikepdf
# Open an existing PDF
pdf = pikepdf.open('input.pdf')
# Or use the Pdf class directly
pdf = pikepdf.Pdf.open('input.pdf')
# Create a new empty PDF
new_pdf = pikepdf.new()
# Add a blank page
new_pdf.add_blank_page(page_size=(612, 792)) # Letter size
# Access pages
first_page = pdf.pages[0]
# Rotate a page
first_page.rotate(90, relative=True)
# Copy pages between PDFs
new_pdf.pages.append(first_page)
# Save the PDF
pdf.save('output.pdf')
new_pdf.save('new_document.pdf')
# Always close PDFs when done
pdf.close()
new_pdf.close()pikepdf is built on a layered architecture that provides both low-level control and high-level convenience:
This design enables pikepdf to handle all PDF versions (1.1-1.7), maintain compatibility with PDF/A standards, and provide exceptional performance for production applications.
Fundamental PDF document operations including opening, creating, saving, and basic manipulation of PDF files and their structure.
class Pdf:
@staticmethod
def open(filename, *, password=None, hex_password=None, ignore_xref_streams=False,
suppress_warnings=True, attempt_recovery=True, inherit_page_attributes=True,
access_mode=AccessMode.default) -> Pdf: ...
@staticmethod
def new() -> Pdf: ...
def save(self, filename, *, static_id=False, preserve_pdfa=True,
min_version=None, force_version=None, fix_metadata_version=True,
compress_streams=True, stream_decode_level=None,
object_stream_mode=ObjectStreamMode.preserve,
normalize_content=False, linearize=False, qdf=False,
progress=None, encryption=None, samefile_check=True) -> None: ...
def close(self) -> None: ...
def open(filename, **kwargs) -> Pdf: ... # Alias for Pdf.open()
def new() -> Pdf: ... # Alias for Pdf.new()PDF object types and data structures for manipulating the internal representation of PDF content, including arrays, dictionaries, names, strings, and streams.
class Object:
def is_owned_by(self, possible_owner: Pdf) -> bool: ...
def same_owner_as(self, other: Object) -> bool: ...
def with_same_owner_as(self, other: Object) -> Object: ...
@staticmethod
def parse(data: str, *, pdf_context: Pdf = None) -> Object: ...
def unparse(self, *, resolved: bool = False) -> str: ...
class Array(Object): ...
class Dictionary(Object): ...
class Name(Object): ...
class String(Object): ...
class Stream(Object): ...Page-level operations including manipulation, rotation, content parsing, overlays, and coordinate transformations.
class Page(Object):
def rotate(self, angle: int, *, relative: bool = True) -> None: ...
def add_overlay(self, other: Page) -> None: ...
def add_underlay(self, other: Page) -> None: ...
def parse_contents(self) -> list[ContentStreamInstruction]: ...
@property
def mediabox(self) -> Rectangle: ...
@property
def cropbox(self) -> Rectangle: ...Interactive PDF elements including form fields, annotations, and user input handling with comprehensive field type support.
class AcroForm:
@property
def exists(self) -> bool: ...
@property
def fields(self) -> list[AcroFormField]: ...
def add_field(self, field: AcroFormField) -> None: ...
def remove_fields(self, names: list[str]) -> None: ...
class AcroFormField:
@property
def field_type(self) -> str: ...
@property
def fully_qualified_name(self) -> str: ...
def set_value(self, value) -> None: ...
class Annotation(Object):
@property
def subtype(self) -> Name: ...
@property
def rect(self) -> Rectangle: ...Image extraction, manipulation, and graphics operations including support for various formats and color spaces.
class PdfImage:
def extract_to(self, *, fileprefix: str = 'image') -> str: ...
def as_pil_image(self) -> Any: ... # PIL.Image
@property
def width(self) -> int: ...
@property
def height(self) -> int: ...
@property
def bpc(self) -> int: ... # bits per component
@property
def colorspace(self) -> Name: ...
class PdfInlineImage:
def as_pil_image(self) -> Any: ... # PIL.ImagePDF encryption, decryption, password handling, and permission management for document security.
class Encryption:
def __init__(self, *, owner: str = '', user: str = '', R: int = 6,
allow: Permissions = None, aes: bool = True,
metadata: bool = True) -> None: ...
class Permissions:
accessibility: bool
assemble: bool
extract: bool
modify_annotation: bool
modify_assembly: bool
modify_form: bool
modify_other: bool
print_lowres: bool
print_highres: boolDocument metadata, XMP data, and PDF properties including titles, authors, creation dates, and custom metadata fields.
class PdfMetadata:
def __init__(self, pdf: Pdf, *, sync_docinfo: bool = True) -> None: ...
@property
def pdfa_status(self) -> str: ...
def load_from_docinfo(self, docinfo: Dictionary, *, delete_missing: bool = False) -> None: ...Metadata and Document Properties
Document navigation structure including bookmarks, table of contents, and document outline management.
class Outline:
@property
def root(self) -> OutlineItem: ...
def open_all(self) -> None: ...
def close_all(self) -> None: ...
class OutlineItem:
@property
def title(self) -> str: ...
@property
def destination(self) -> PageLocation: ...
@property
def action(self) -> Dictionary: ...
def make_page_destination(pdf: Pdf, page_num: int, *, view_type: str = 'Fit') -> Array: ...Low-level content stream parsing, token filtering, and PDF operator manipulation for advanced content processing.
def parse_content_stream(page_or_stream) -> list[ContentStreamInstruction]: ...
def unparse_content_stream(instructions: list[ContentStreamInstruction]) -> bytes: ...
class ContentStreamInstruction:
@property
def operands(self) -> list[Object]: ...
@property
def operator(self) -> Operator: ...
class TokenFilter:
def handle_token(self, token: Token) -> None: ...
class Token:
@property
def type_(self) -> TokenType: ...
@property
def raw_value(self) -> bytes: ...
@property
def value(self) -> Object: ...Embedded file management including attachment, extraction, and metadata handling for portfolio PDFs and file attachments.
class AttachedFileSpec:
@staticmethod
def from_filepath(pdf: Pdf, path: str, *, description: str = '',
relationship: str = '/Unspecified') -> AttachedFileSpec: ...
def get_file(self) -> bytes: ...
def get_all_filenames(self) -> dict[str, str]: ...
@property
def filename(self) -> str: ...
@property
def description(self) -> str: ...Specialized operations including matrix transformations, coordinate systems, job interface, and tree structures for advanced PDF manipulation.
class Matrix:
def __init__(self, *args) -> None: ...
@staticmethod
def identity() -> Matrix: ...
def translated(self, dx: float, dy: float) -> Matrix: ...
def scaled(self, sx: float, sy: float) -> Matrix: ...
def rotated(self, angle_degrees: float) -> Matrix: ...
class Rectangle:
def __init__(self, llx: float, lly: float, urx: float, ury: float) -> None: ...
@property
def width(self) -> float: ...
@property
def height(self) -> float: ...
class Job:
def run(self) -> int: ...
def check_configuration(self) -> bool: ...
def create_pdf(self) -> Pdf: ...from enum import Enum
class ObjectType(Enum):
uninitialized = ...
null = ...
boolean = ...
integer = ...
real = ...
string = ...
name_ = ...
array = ...
dictionary = ...
stream = ...
operator = ...
inlineimage = ...
class AccessMode(Enum):
default = ...
mmap = ...
mmap_only = ...
stream = ...
class StreamDecodeLevel(Enum):
none = ...
generalized = ...
specialized = ...
all = ...
class ObjectStreamMode(Enum):
disable = ...
preserve = ...
generate = ...# Core exceptions
class PdfError(Exception): ...
class PasswordError(PdfError): ...
class DataDecodingError(PdfError): ...
class JobUsageError(PdfError): ...
class ForeignObjectError(PdfError): ...
class DeletedObjectError(PdfError): ...
# Model exceptions
class DependencyError(Exception): ...
class OutlineStructureError(Exception): ...
class UnsupportedImageTypeError(Exception): ...
class InvalidPdfImageError(Exception): ...
class HifiPrintImageNotTranscodableError(Exception): ...Access to higher-level PDF constructs and specialized functionality through the models submodule.
import pikepdf.models
# Direct access to model classes and functions:
# pikepdf.models.PdfMetadata
# pikepdf.models.EncryptionInfo
# pikepdf.models.ContentStreamInstructions
# pikepdf.models.UnparseableContentStreamInstructions
# All model classes are also available directly from main pikepdf moduledef get_decimal_precision() -> int: ...
def set_decimal_precision(precision: int) -> None: ...
def set_flate_compression_level(level: int) -> None: ...__version__: str # pikepdf package version
__libqpdf_version__: str # Underlying QPDF library version