Python bindings to PDFium for comprehensive PDF manipulation, rendering, and processing
—
pypdfium2 provides a comprehensive command-line interface for common PDF operations. All tools can be accessed via the pypdfium2 command or programmatically through the API.
from pypdfium2 import cli_main# Show version information
pypdfium2 --version
# Get help for all commands
pypdfium2 --help
# Get help for specific command
pypdfium2 render --helpdef cli_main(raw_args=None):
"""
Main CLI entry point for pypdfium2 command-line tools.
Parameters:
- raw_args: list[str] | None, command arguments (defaults to sys.argv[1:])
Returns:
int: Exit code (0 for success, non-zero for errors)
Provides programmatic access to all CLI functionality,
allowing integration of pypdfium2 tools in Python applications.
"""
def api_main(raw_args=None):
"""
Alternative API entry point with same functionality as cli_main.
Parameters:
- raw_args: list[str] | None, command arguments
Returns:
int: Exit code
"""Programmatic usage example:
import pypdfium2
# Convert images to PDF programmatically
exit_code = pypdfium2.cli_main([
'imgtopdf',
'image1.jpg', 'image2.png',
'--output', 'combined.pdf'
])
if exit_code == 0:
print("Successfully created PDF")Display comprehensive information about PDF documents including metadata, page details, and document properties.
pypdfium2 pdfinfo document.pdf
pypdfium2 pdfinfo --password secret encrypted.pdf
pypdfium2 pdfinfo --pages document.pdf # Include page-level detailsExtract text content from PDF pages with various formatting and output options.
pypdfium2 extract-text document.pdf
pypdfium2 extract-text document.pdf --pages 1-5
pypdfium2 extract-text document.pdf --output text.txt
pypdfium2 extract-text document.pdf --no-layout # Disable layout preservationExtract embedded images from PDF pages to image files.
pypdfium2 extract-images document.pdf
pypdfium2 extract-images document.pdf --pages 1,3,5
pypdfium2 extract-images document.pdf --output images/
pypdfium2 extract-images document.pdf --format pngConvert image files to PDF documents with size and layout options.
pypdfium2 imgtopdf image1.jpg image2.png --output combined.pdf
pypdfium2 imgtopdf *.jpg --size letter --output photos.pdf
pypdfium2 imgtopdf image.png --width 8.5 --height 11 --output sized.pdfRender PDF pages to image files with customizable resolution, format, and rendering options.
pypdfium2 render document.pdf
pypdfium2 render document.pdf --scale 2.0 # High resolution
pypdfium2 render document.pdf --pages 1-10 --format png
pypdfium2 render document.pdf --width 1920 --height 1080
pypdfium2 render document.pdf --output rendered/Rearrange, merge, and reorganize PDF documents and pages.
pypdfium2 arrange input1.pdf input2.pdf --output merged.pdf
pypdfium2 arrange document.pdf --pages 1,3,5-10 --output selected.pdf
pypdfium2 arrange doc1.pdf doc2.pdf --rotate 90 --output rotated.pdfArrange multiple pages on single pages in various grid layouts.
pypdfium2 tile document.pdf --grid 2x2 --output tiled.pdf
pypdfium2 tile document.pdf --grid 1x2 --pages 1-20 --output booklet.pdf
pypdfium2 tile document.pdf --grid 3x3 --scale 0.8 --output ninup.pdfDisplay and extract PDF document outline/bookmark structure.
pypdfium2 toc document.pdf
pypdfium2 toc document.pdf --max-depth 3
pypdfium2 toc document.pdf --output bookmarks.txtAnalyze and display information about objects within PDF pages.
pypdfium2 pageobjects document.pdf
pypdfium2 pageobjects document.pdf --pages 1-5
pypdfium2 pageobjects document.pdf --type image # Only image objects
pypdfium2 pageobjects document.pdf --verboseList, extract, and manage embedded file attachments within PDF documents.
pypdfium2 attachments document.pdf # List attachments
pypdfium2 attachments document.pdf --extract # Extract all
pypdfium2 attachments document.pdf --extract --output attachments/
pypdfium2 attachments document.pdf --index 0 --extract # Extract specificMost commands support these common options:
--help, -h: Show command-specific help--pages: Specify page ranges (e.g., 1-5, 1,3,5, all)--password: Password for encrypted PDFs--output, -o: Output file or directory path--verbose, -v: Enable verbose output--quiet, -q: Suppress non-error outputCLI tools return standard exit codes:
0: Success1: General error2: Invalid arguments3: File not found or access errorWhen using programmatically, check exit codes:
import pypdfium2
result = pypdfium2.cli_main(['pdfinfo', 'nonexistent.pdf'])
if result != 0:
print("Command failed")import pypdfium2
import glob
# Process all PDFs in directory
for pdf_file in glob.glob("*.pdf"):
# Extract text from each PDF
exit_code = pypdfium2.cli_main([
'extract-text',
pdf_file,
'--output', f"{pdf_file}.txt"
])
if exit_code == 0:
print(f"Processed {pdf_file}")
else:
print(f"Failed to process {pdf_file}")import pypdfium2
import subprocess
def process_document(pdf_path, output_dir):
"""Process PDF with multiple operations."""
operations = [
# Get document info
['pdfinfo', pdf_path, '--output', f"{output_dir}/info.txt"],
# Extract text
['extract-text', pdf_path, '--output', f"{output_dir}/text.txt"],
# Render first page as thumbnail
['render', pdf_path, '--pages', '1', '--width', '200',
'--output', f"{output_dir}/thumbnail.png"],
# Extract table of contents
['toc', pdf_path, '--output', f"{output_dir}/toc.txt"]
]
results = {}
for operation in operations:
exit_code = pypdfium2.cli_main(operation)
operation_name = operation[0]
results[operation_name] = exit_code == 0
return results
# Process document
results = process_document("report.pdf", "output/")
print(f"Processing results: {results}")Install with Tessl CLI
npx tessl i tessl/pypi-pypdfium2