A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files.
npx @tessl/cli install tessl/pypi-py-pdf2@2.12.0A pure-Python PDF library capable of splitting, merging, cropping, and transforming PDF files. PyPDF2 can retrieve text and metadata from PDFs as well as add custom data, viewing options, and passwords to PDF files. It provides comprehensive PDF processing capabilities for developers working with PDF documents programmatically.
pip install PyPDF2import PyPDF2Common patterns for specific functionality:
from PyPDF2 import PdfReader, PdfWriter, PdfMerger
from PyPDF2 import PageObject, Transformation
from PyPDF2 import DocumentInformation, PasswordType
from PyPDF2 import PageRange, PaperSize, parse_filename_page_rangesfrom PyPDF2 import PdfReader, PdfWriter, PdfMerger
# Reading a PDF file
reader = PdfReader("input.pdf")
print(f"Number of pages: {len(reader.pages)}")
print(f"Title: {reader.metadata.title}")
# Extract text from first page
page = reader.pages[0]
text = page.extract_text()
print(text)
# Writing a new PDF
writer = PdfWriter()
writer.add_page(page)
with open("output.pdf", "wb") as output_file:
writer.write(output_file)
# Merging multiple PDFs
merger = PdfMerger()
merger.append("file1.pdf")
merger.append("file2.pdf")
merger.write("merged.pdf")
merger.close()PyPDF2 is built around four core components:
The library maintains both high-level convenience classes and low-level generic objects, enabling everything from simple PDF operations to advanced PDF specification-level manipulation.
Read PDF files, access pages, extract metadata and text content, handle encrypted documents with password protection.
class PdfReader:
def __init__(self, stream: Union[str, bytes, Path], strict: bool = False, password: Union[None, str, bytes] = None): ...
@property
def pages(self) -> List[PageObject]: ...
@property
def metadata(self) -> DocumentInformation: ...
@property
def is_encrypted(self) -> bool: ...
def decrypt(self, password: Union[str, bytes]) -> PasswordType: ...
def get_page(self, page_number: int) -> PageObject: ...Create new PDF files, add pages, insert blank pages, add metadata, encryption, annotations, and JavaScript.
class PdfWriter:
def __init__(self, fileobj: Union[str, bytes] = ""): ...
def add_page(self, page: PageObject) -> None: ...
def insert_page(self, page: PageObject, index: int = 0) -> None: ...
def add_blank_page(self, width: float, height: float) -> PageObject: ...
def write(self, stream) -> None: ...
def encrypt(self, user_password: str, owner_password: str = "", use_128bit: bool = True, permissions_flag: int = -1) -> None: ...Merge multiple PDF files with control over page ranges, bookmarks, and document properties.
class PdfMerger:
def __init__(self, strict: bool = False, fileobj: Union[Path, str, bytes] = ""): ...
def merge(self, page_number: int, fileobj, outline_item: str = None, pages = None, import_outline: bool = True) -> None: ...
def append(self, fileobj, outline_item: str = None, pages = None, import_outline: bool = True) -> None: ...
def write(self, fileobj) -> None: ...
def close(self) -> None: ...Transform, scale, rotate, crop, and merge individual PDF pages with precise control over page geometry.
class PageObject:
def extract_text(self, visitor_text=None) -> str: ...
def scale(self, sx: float, sy: float) -> None: ...
def rotate(self, angle: int) -> 'PageObject': ...
def merge_page(self, page2: 'PageObject') -> None: ...
@property
def mediabox(self) -> RectangleObject: ...
@property
def cropbox(self) -> RectangleObject: ...Low-level PDF object types for advanced manipulation, constants, and type definitions used throughout the library.
class DictionaryObject(dict): ...
class ArrayObject(list): ...
class RectangleObject(ArrayObject): ...
class IndirectObject: ...
# Page Range Utilities
class PageRange:
def __init__(self, arg: Union[slice, "PageRange", str]): ...
@staticmethod
def valid(input: Any) -> bool: ...
def to_slice(self) -> slice: ...
def indices(self, n: int) -> Tuple[int, int, int]: ...
# Transformation
class Transformation:
def __init__(self, ctm: Tuple[float, float, float, float, float, float] = (1, 0, 0, 1, 0, 0)): ...
@property
def matrix(self) -> Tuple[Tuple[float, float, float], Tuple[float, float, float], Tuple[float, float, float]]: ...
def scale(self, sx: Optional[float] = None, sy: Optional[float] = None) -> "Transformation": ...
def translate(self, tx: float = 0, ty: float = 0) -> "Transformation": ...
def rotate(self, rotation: float) -> "Transformation": ...
# Enumerations
class PasswordType: ...
# Utility functions
def parse_filename_page_ranges(args: List[Union[str, PageRange, None]]) -> List[Tuple[str, PageRange]]: ...
# Version information
__version__: str # Current PyPDF2 versionException classes for comprehensive error handling and utility functions for specialized operations.
class PyPdfError(Exception): ...
class PdfReadError(PyPdfError): ...
class WrongPasswordError(PdfReadError): ...
class FileNotDecryptedError(PdfReadError): ...
# Paper size utilities
class PaperSize:
A0: Dimensions
A4: Dimensions
# ... more sizes