CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-pikepdf

Read and write PDFs with Python, powered by qpdf

Pending
Overview
Eval results
Files

core-operations.mddocs/

Core PDF Operations

Fundamental PDF document operations providing the essential functionality for opening, creating, saving, and manipulating PDF files. These operations form the foundation of all pikepdf functionality.

Capabilities

PDF Document Management

The main Pdf class provides comprehensive document-level operations including file I/O, metadata access, and document structure manipulation.

class Pdf:
    """
    Main PDF document class representing a complete PDF file.
    """
    
    @staticmethod
    def open(filename, *, password=None, hex_password=None, ignore_xref_streams=False,
             suppress_warnings=True, attempt_recovery=True, inherit_page_attributes=True,
             access_mode=AccessMode.default) -> Pdf:
        """
        Open an existing PDF file.
        
        Parameters:
        - filename (str | pathlib.Path | IO): Path to PDF file or file-like object
        - password (str, optional): Password for encrypted PDFs
        - hex_password (str, optional): Password as hex string
        - ignore_xref_streams (bool): Ignore cross-reference streams
        - suppress_warnings (bool): Suppress QPDF warnings
        - attempt_recovery (bool): Attempt to recover damaged PDFs
        - inherit_page_attributes (bool): Inherit page attributes from page tree
        - access_mode (AccessMode): File access mode
        
        Returns:
        Pdf: The opened PDF document
        
        Raises:
        PdfError: If the file cannot be opened
        PasswordError: If password is required or incorrect
        """
    
    @staticmethod  
    def new() -> Pdf:
        """
        Create a new empty PDF document.
        
        Returns:
        Pdf: A new empty PDF document
        """
    
    def save(self, filename, *, static_id=False, preserve_pdfa=True,
             min_version=None, force_version=None, fix_metadata_version=True,
             compress_streams=True, stream_decode_level=None,
             object_stream_mode=ObjectStreamMode.preserve,
             normalize_content=False, linearize=False, qdf=False,
             progress=None, encryption=None, samefile_check=True) -> None:
        """
        Save the PDF to a file.
        
        Parameters:
        - filename (str | pathlib.Path | IO): Output path or file-like object
        - static_id (bool): Use static document ID for reproducible output
        - preserve_pdfa (bool): Maintain PDF/A compliance
        - min_version (str, optional): Minimum PDF version (e.g., '1.4')
        - force_version (str, optional): Force specific PDF version
        - fix_metadata_version (bool): Update metadata version to match PDF version
        - compress_streams (bool): Compress stream objects
        - stream_decode_level (StreamDecodeLevel, optional): Stream decoding level
        - object_stream_mode (ObjectStreamMode): Object stream handling
        - normalize_content (bool): Normalize content streams
        - linearize (bool): Create linearized (fast web view) PDF
        - qdf (bool): Save in QPDF's inspection format
        - progress (callable, optional): Progress callback function
        - encryption (Encryption, optional): Encryption settings
        - samefile_check (bool): Check if saving to same file
        
        Raises:
        PdfError: If the file cannot be saved
        """
    
    def close(self) -> None:
        """
        Close the PDF and release resources.
        
        The PDF object becomes unusable after closing.
        """
    
    def copy_foreign(self, other_pdf_obj: Object) -> Object:
        """
        Copy an object from another PDF into this PDF.
        
        Parameters:
        - other_pdf_obj (Object): Object from another PDF to copy
        
        Returns:
        Object: The copied object owned by this PDF
        
        Raises:
        ForeignObjectError: If the object cannot be copied
        """
    
    def make_indirect(self, obj: Object) -> Object:
        """
        Convert a direct object to an indirect object.
        
        Parameters:
        - obj (Object): Object to make indirect
        
        Returns:
        Object: The indirect object
        """
    
    def add_blank_page(self, *, page_size=(612, 792)) -> Page:
        """
        Add a blank page to the PDF.
        
        Parameters:
        - page_size (tuple): Page dimensions (width, height) in points
        
        Returns:
        Page: The newly created page
        """

    @property
    def Root(self) -> Dictionary:
        """
        The PDF's document catalog (root object).
        
        Returns:
        Dictionary: Document catalog containing page tree and other references
        """
    
    @property
    def pages(self) -> list[Page]:
        """
        List of all pages in the PDF.
        
        Returns:
        list[Page]: Pages that can be indexed, sliced, and modified
        """
    
    @property
    def objects(self) -> dict[tuple[int, int], Object]:
        """
        Mapping of all indirect objects in the PDF.
        
        Returns:
        dict: Mapping from (objid, generation) to Object
        """
    
    @property
    def is_encrypted(self) -> bool:
        """
        Whether the PDF is encrypted.
        
        Returns:
        bool: True if the PDF has encryption
        """
    
    @property
    def pdf_version(self) -> str:
        """
        PDF version string (e.g., '1.4', '1.7').
        
        Returns:
        str: PDF version
        """
    
    @property
    def trailer(self) -> Dictionary:
        """
        The PDF's trailer dictionary.
        
        Returns:
        Dictionary: Trailer containing cross-reference information
        """
    
    @property
    def docinfo(self) -> Dictionary:
        """
        Document information dictionary.
        
        Returns:
        Dictionary: Document metadata (title, author, etc.)
        """

    def check(self) -> list[str]:
        """
        Check PDF for structural problems.
        
        Returns:
        list[str]: List of problems found (empty if no problems)
        """

Convenience Functions

Global functions that provide shortcuts to common PDF operations.

def open(filename, **kwargs) -> Pdf:
    """
    Open an existing PDF file (alias for Pdf.open).
    
    Parameters:
    - filename: Path to PDF file or file-like object
    - **kwargs: Same arguments as Pdf.open()
    
    Returns:
    Pdf: The opened PDF document
    """

def new() -> Pdf:
    """
    Create a new empty PDF document (alias for Pdf.new).
    
    Returns:
    Pdf: A new empty PDF document
    """

Access Modes

Control how PDF files are accessed and loaded into memory.

from enum import Enum

class AccessMode(Enum):
    """File access modes for opening PDFs."""
    default = ...  # Standard file access
    mmap = ...  # Memory-mapped file access when possible
    mmap_only = ...  # Require memory-mapped access
    stream = ...  # Stream-based access for large files

Object Stream Modes

Control how object streams are handled during save operations.

class ObjectStreamMode(Enum):
    """Object stream handling modes."""
    disable = ...  # Don't use object streams
    preserve = ...  # Keep existing object streams
    generate = ...  # Generate new object streams for compression

Stream Decode Levels

Control the level of stream decoding performed when reading PDFs.

class StreamDecodeLevel(Enum):
    """Stream decoding levels."""
    none = ...  # No stream decoding
    generalized = ...  # Decode common filters
    specialized = ...  # Decode specialized filters
    all = ...  # Decode all supported filters

Usage Examples

Basic PDF Operations

import pikepdf

# Open and read a PDF
with pikepdf.open('document.pdf') as pdf:
    print(f"PDF version: {pdf.pdf_version}")
    print(f"Number of pages: {len(pdf.pages)}")
    print(f"Encrypted: {pdf.is_encrypted}")

# Create a new PDF with a blank page
new_pdf = pikepdf.new()
new_pdf.add_blank_page(page_size=(612, 792))  # US Letter
new_pdf.save('blank.pdf')
new_pdf.close()

Working with Encrypted PDFs

import pikepdf

# Open password-protected PDF
try:
    pdf = pikepdf.open('encrypted.pdf', password='secret')
    print("Successfully opened encrypted PDF")
    pdf.close()
except pikepdf.PasswordError:
    print("Incorrect password")

Advanced Save Options

import pikepdf
from pikepdf import Encryption, Permissions

# Open and save with compression and linearization
pdf = pikepdf.open('input.pdf')

# Configure encryption
encryption = Encryption(
    owner='owner_password',
    user='user_password', 
    allow=Permissions(print_highres=True, extract=False)
)

# Save with advanced options
pdf.save('output.pdf',
         linearize=True,  # Fast web view
         compress_streams=True, 
         encryption=encryption,
         fix_metadata_version=True)

pdf.close()

Page Management

import pikepdf

# Combine multiple PDFs
pdf1 = pikepdf.open('doc1.pdf')
pdf2 = pikepdf.open('doc2.pdf')

combined = pikepdf.new()

# Copy all pages from both PDFs
for page in pdf1.pages:
    combined.pages.append(page)
    
for page in pdf2.pages:
    combined.pages.append(page)

combined.save('combined.pdf')

# Close all PDFs
pdf1.close() 
pdf2.close()
combined.close()

Install with Tessl CLI

npx tessl i tessl/pypi-pikepdf

docs

advanced.md

attachments.md

content-streams.md

core-operations.md

encryption.md

forms.md

images.md

index.md

metadata.md

objects.md

outlines.md

pages.md

tile.json