Read and write PDFs with Python, powered by qpdf
—
Document navigation structure including bookmarks, table of contents, and document outline management. These capabilities enable comprehensive navigation and document structure organization.
Comprehensive document outline and bookmark management with hierarchical navigation support.
class Outline:
"""
PDF bookmark/outline tree manager.
Provides access to the document's navigation structure including
bookmarks, table of contents, and hierarchical outline items.
"""
@property
def root(self) -> OutlineItem:
"""
Root outline item containing all top-level bookmarks.
Returns:
OutlineItem: Root of the outline hierarchy
"""
def open_all(self) -> None:
"""
Expand all outline items to show the complete structure.
Makes all bookmark levels visible in the outline panel
by setting their open state to True.
"""
def close_all(self) -> None:
"""
Collapse all outline items to show only top-level bookmarks.
Hides all nested bookmark levels by setting their
open state to False.
"""
def __len__(self) -> int:
"""
Number of top-level outline items.
Returns:
int: Count of direct children of the root outline item
"""
def __iter__(self) -> Iterator[OutlineItem]:
"""
Iterate over top-level outline items.
Yields:
OutlineItem: Each top-level bookmark item
"""
def __getitem__(self, index: int) -> OutlineItem:
"""
Get a top-level outline item by index.
Parameters:
- index (int): Index of the outline item
Returns:
OutlineItem: Outline item at the specified index
"""
def __delitem__(self, index: int) -> None:
"""
Delete a top-level outline item by index.
Parameters:
- index (int): Index of the outline item to delete
"""
def insert(self, index: int, item: OutlineItem) -> None:
"""
Insert an outline item at the specified index.
Parameters:
- index (int): Position to insert at
- item (OutlineItem): Outline item to insert
"""
def append(self, item: OutlineItem) -> None:
"""
Add an outline item at the end of the top level.
Parameters:
- item (OutlineItem): Outline item to append
"""Individual bookmark entries with navigation destinations and hierarchical structure.
class OutlineItem:
"""
Individual bookmark/outline item with title, destination, and children.
Represents a single entry in the document's outline hierarchy,
containing navigation information and potential child items.
"""
def __init__(self, title: str, destination: PageLocation = None,
action: Dictionary = None, *, obj: Object = None) -> None:
"""
Create a new outline item.
Parameters:
- title (str): Display title for the bookmark
- destination (PageLocation, optional): Page destination to navigate to
- action (Dictionary, optional): PDF action dictionary for navigation
- obj (Object, optional): Existing PDF outline object to wrap
"""
@property
def title(self) -> str:
"""
Display title of the bookmark.
Returns:
str: Bookmark title shown in outline panel
"""
@title.setter
def title(self, value: str) -> None:
"""Set the bookmark title."""
@property
def destination(self) -> PageLocation:
"""
Page destination for this bookmark.
Returns:
PageLocation: Destination within the document
"""
@destination.setter
def destination(self, value: PageLocation) -> None:
"""Set the bookmark destination."""
@property
def action(self) -> Dictionary:
"""
PDF action dictionary for navigation or other operations.
Returns:
Dictionary: Action dictionary (e.g., GoTo, URI, Named actions)
"""
@action.setter
def action(self, value: Dictionary) -> None:
"""Set the bookmark action."""
@property
def is_open(self) -> bool:
"""
Whether this outline item is expanded to show children.
Returns:
bool: True if children are visible in the outline
"""
@is_open.setter
def is_open(self, value: bool) -> None:
"""Set whether this outline item is expanded."""
@property
def color(self) -> tuple[float, float, float]:
"""
RGB color for the bookmark text.
Returns:
tuple[float, float, float]: RGB values (0.0 to 1.0)
"""
@color.setter
def color(self, value: tuple[float, float, float]) -> None:
"""Set the bookmark text color."""
@property
def italic(self) -> bool:
"""
Whether the bookmark text is displayed in italics.
Returns:
bool: True if text should be italic
"""
@italic.setter
def italic(self, value: bool) -> None:
"""Set whether bookmark text is italic."""
@property
def bold(self) -> bool:
"""
Whether the bookmark text is displayed in bold.
Returns:
bool: True if text should be bold
"""
@bold.setter
def bold(self, value: bool) -> None:
"""Set whether bookmark text is bold."""
@property
def children(self) -> list[OutlineItem]:
"""
Child outline items under this item.
Returns:
list[OutlineItem]: Nested bookmark items
"""
def __len__(self) -> int:
"""Number of child outline items."""
def __iter__(self) -> Iterator[OutlineItem]:
"""Iterate over child outline items."""
def __getitem__(self, index: int) -> OutlineItem:
"""Get a child outline item by index."""
def __delitem__(self, index: int) -> None:
"""Delete a child outline item by index."""
def insert(self, index: int, item: OutlineItem) -> None:
"""
Insert a child outline item at the specified index.
Parameters:
- index (int): Position to insert at
- item (OutlineItem): Child outline item to insert
"""
def append(self, item: OutlineItem) -> None:
"""
Add a child outline item at the end.
Parameters:
- item (OutlineItem): Child outline item to append
"""Destination specifications for bookmarks and navigation actions.
class PageLocation:
"""
Page location specification for bookmark destinations.
Defines where within a page the destination should navigate to,
including zoom level and viewport positioning.
"""
def __init__(self, page: Page, *, view_type: str = 'Fit',
top: float = None, left: float = None,
bottom: float = None, right: float = None,
zoom: float = None) -> None:
"""
Create a page destination.
Parameters:
- page (Page): Target page for navigation
- view_type (str): Destination type ('Fit', 'FitH', 'FitV', 'FitR', 'XYZ')
- top (float, optional): Top coordinate for view
- left (float, optional): Left coordinate for view
- bottom (float, optional): Bottom coordinate for view
- right (float, optional): Right coordinate for view
- zoom (float, optional): Zoom factor for view
"""
@property
def page(self) -> Page:
"""
Target page for this destination.
Returns:
Page: Page object to navigate to
"""
@property
def view_type(self) -> str:
"""
Type of destination view.
Common view types:
- 'Fit': Fit entire page in window
- 'FitH': Fit page width, specific top coordinate
- 'FitV': Fit page height, specific left coordinate
- 'FitR': Fit rectangle in window
- 'XYZ': Specific coordinates and zoom
Returns:
str: View type identifier
"""
@property
def top(self) -> float:
"""Top coordinate for the destination view."""
@property
def left(self) -> float:
"""Left coordinate for the destination view."""
@property
def zoom(self) -> float:
"""Zoom factor for the destination view."""Helper functions for creating destinations and managing outline operations.
def make_page_destination(pdf: Pdf, page_num: int, *,
view_type: str = 'Fit', top: float = None,
left: float = None, zoom: float = None) -> Array:
"""
Create a page destination array for bookmarks.
Parameters:
- pdf (Pdf): PDF document containing the target page
- page_num (int): Zero-based page number (0 = first page)
- view_type (str): Destination view type
- top (float, optional): Top coordinate for view positioning
- left (float, optional): Left coordinate for view positioning
- zoom (float, optional): Zoom level for view
Returns:
Array: PDF destination array for use in outline items
Examples:
- make_page_destination(pdf, 0): Go to page 1, fit in window
- make_page_destination(pdf, 5, view_type='XYZ', top=700, zoom=1.5):
Go to page 6, position at top=700, zoom 150%
"""Specialized exceptions for outline operations.
class OutlineStructureError(Exception):
"""
Raised when outline structure operations fail.
This can occur with:
- Circular references in outline hierarchy
- Invalid outline item relationships
- Corrupted outline data structures
"""import pikepdf
# Open PDF with bookmarks
pdf = pikepdf.open('document_with_bookmarks.pdf')
# Access the outline
with pdf.open_outline() as outline:
print(f"Document has {len(outline)} top-level bookmarks")
# Iterate through top-level bookmarks
for i, item in enumerate(outline):
print(f"{i+1}. {item.title}")
# Check destination
if item.destination:
dest_page = item.destination.page
page_num = pdf.pages.index(dest_page) + 1 # Convert to 1-based
print(f" -> Page {page_num} ({item.destination.view_type})")
# Check for children
if len(item.children) > 0:
print(f" Has {len(item.children)} sub-items:")
for j, child in enumerate(item.children):
print(f" {j+1}. {child.title}")
if child.destination:
child_page_num = pdf.pages.index(child.destination.page) + 1
print(f" -> Page {child_page_num}")
pdf.close()import pikepdf
# Open or create PDF
pdf = pikepdf.open('document.pdf')
# Create outline if it doesn't exist
with pdf.open_outline() as outline:
# Create bookmarks for each page
for i, page in enumerate(pdf.pages):
# Create page destination
destination = pikepdf.make_page_destination(pdf, i, view_type='Fit')
# Create bookmark
bookmark = pikepdf.OutlineItem(
title=f"Page {i+1}",
destination=pikepdf.PageLocation(page, view_type='Fit')
)
# Add to outline
outline.append(bookmark)
print(f"Created {len(outline)} bookmarks")
pdf.save('document_with_bookmarks.pdf')
pdf.close()import pikepdf
# Create a PDF with structured content
pdf = pikepdf.open('structured_document.pdf')
with pdf.open_outline() as outline:
# Chapter 1
chapter1 = pikepdf.OutlineItem(
title="1. Introduction",
destination=pikepdf.PageLocation(pdf.pages[0], view_type='Fit')
)
# Add sections to Chapter 1
section1_1 = pikepdf.OutlineItem(
title="1.1 Overview",
destination=pikepdf.PageLocation(pdf.pages[0], view_type='XYZ', top=600)
)
section1_2 = pikepdf.OutlineItem(
title="1.2 Scope",
destination=pikepdf.PageLocation(pdf.pages[1], view_type='Fit')
)
chapter1.append(section1_1)
chapter1.append(section1_2)
# Chapter 2
chapter2 = pikepdf.OutlineItem(
title="2. Technical Details",
destination=pikepdf.PageLocation(pdf.pages[2], view_type='Fit')
)
# Add sections to Chapter 2
section2_1 = pikepdf.OutlineItem(
title="2.1 Architecture",
destination=pikepdf.PageLocation(pdf.pages[2], view_type='FitH', top=700)
)
section2_2 = pikepdf.OutlineItem(
title="2.2 Implementation",
destination=pikepdf.PageLocation(pdf.pages[3], view_type='Fit')
)
# Add subsections to 2.2
subsection2_2_1 = pikepdf.OutlineItem(
title="2.2.1 Core Components",
destination=pikepdf.PageLocation(pdf.pages[3], view_type='XYZ', top=500)
)
subsection2_2_2 = pikepdf.OutlineItem(
title="2.2.2 Integration",
destination=pikepdf.PageLocation(pdf.pages[4], view_type='Fit')
)
section2_2.append(subsection2_2_1)
section2_2.append(subsection2_2_2)
chapter2.append(section2_1)
chapter2.append(section2_2)
# Chapter 3
chapter3 = pikepdf.OutlineItem(
title="3. Conclusion",
destination=pikepdf.PageLocation(pdf.pages[5], view_type='Fit')
)
# Add all chapters to outline
outline.append(chapter1)
outline.append(chapter2)
outline.append(chapter3)
# Expand Chapter 2 by default
chapter2.is_open = True
print("Created hierarchical outline structure")
pdf.save('structured_with_outline.pdf')
pdf.close()import pikepdf
pdf = pikepdf.open('document.pdf')
with pdf.open_outline() as outline:
# Create styled bookmarks
# Red, bold chapter heading
chapter = pikepdf.OutlineItem(
title="Important Chapter",
destination=pikepdf.PageLocation(pdf.pages[0])
)
chapter.color = (1.0, 0.0, 0.0) # Red
chapter.bold = True
# Blue, italic section
section = pikepdf.OutlineItem(
title="Special Section",
destination=pikepdf.PageLocation(pdf.pages[1])
)
section.color = (0.0, 0.0, 1.0) # Blue
section.italic = True
# Green, bold and italic subsection
subsection = pikepdf.OutlineItem(
title="Critical Information",
destination=pikepdf.PageLocation(pdf.pages[1], view_type='XYZ', top=400)
)
subsection.color = (0.0, 0.8, 0.0) # Green
subsection.bold = True
subsection.italic = True
# Build hierarchy
section.append(subsection)
chapter.append(section)
outline.append(chapter)
pdf.save('styled_bookmarks.pdf')
pdf.close()import pikepdf
import re
def generate_outline_from_content(pdf_path, output_path):
"""Generate outline based on content analysis."""
pdf = pikepdf.open(pdf_path)
# This is a simplified example - real implementation would need
# sophisticated text extraction and analysis
outline_items = []
for page_num, page in enumerate(pdf.pages):
try:
# Parse page content to find headings
instructions = page.parse_contents()
# Look for text that might be headings
# (In practice, you'd analyze font sizes, positions, styles)
potential_headings = []
for instruction in instructions:
if (hasattr(instruction, 'operator') and
str(instruction.operator) == 'Tj' and
instruction.operands):
text_obj = instruction.operands[0]
text = str(text_obj)
# Simple heuristics for headings
if (len(text) < 100 and # Not too long
(re.match(r'^\d+\.', text) or # Starts with number
text.isupper() or # All caps
re.match(r'^Chapter|^Section', text, re.I))): # Keywords
potential_headings.append((text, page_num))
# Create bookmarks for found headings
for heading_text, page_index in potential_headings:
bookmark = pikepdf.OutlineItem(
title=heading_text.strip(),
destination=pikepdf.PageLocation(
pdf.pages[page_index],
view_type='Fit'
)
)
outline_items.append(bookmark)
except Exception as e:
print(f"Could not analyze page {page_num}: {e}")
# Add bookmarks to outline
with pdf.open_outline() as outline:
for item in outline_items:
outline.append(item)
print(f"Generated {len(outline_items)} bookmarks")
pdf.save(output_path)
pdf.close()
# Generate outline automatically
# generate_outline_from_content('document.pdf', 'auto_outlined.pdf')import pikepdf
def reorganize_outline(pdf_path):
"""Reorganize and clean up document outline."""
pdf = pikepdf.open(pdf_path)
with pdf.open_outline() as outline:
print(f"Original outline has {len(outline)} items")
# Collect all outline items recursively
def collect_items(parent_item):
items = []
for child in parent_item.children:
items.append(child)
items.extend(collect_items(child))
return items
all_items = []
for top_level in outline:
all_items.append(top_level)
all_items.extend(collect_items(top_level))
# Group items by page
page_items = {}
for item in all_items:
if item.destination and item.destination.page:
page_num = pdf.pages.index(item.destination.page)
if page_num not in page_items:
page_items[page_num] = []
page_items[page_num].append(item)
# Clear existing outline
while len(outline) > 0:
del outline[0]
# Rebuild outline sorted by page
for page_num in sorted(page_items.keys()):
items = page_items[page_num]
if len(items) == 1:
# Single item for this page
outline.append(items[0])
else:
# Multiple items - create page group
page_group = pikepdf.OutlineItem(
title=f"Page {page_num + 1}",
destination=pikepdf.PageLocation(pdf.pages[page_num])
)
for item in items:
page_group.append(item)
outline.append(page_group)
print(f"Reorganized outline now has {len(outline)} top-level items")
pdf.save()
pdf.close()
# Reorganize outline structure
# reorganize_outline('document.pdf')import pikepdf
import json
def export_outline_to_json(pdf_path, json_path):
"""Export outline structure to JSON format."""
pdf = pikepdf.open(pdf_path)
def item_to_dict(item):
"""Convert outline item to dictionary."""
item_dict = {
'title': item.title,
'is_open': item.is_open
}
if item.destination:
page_num = pdf.pages.index(item.destination.page)
item_dict['destination'] = {
'page': page_num,
'view_type': item.destination.view_type
}
if item.destination.top is not None:
item_dict['destination']['top'] = item.destination.top
if item.destination.left is not None:
item_dict['destination']['left'] = item.destination.left
if item.destination.zoom is not None:
item_dict['destination']['zoom'] = item.destination.zoom
# Style information
if item.color != (0.0, 0.0, 0.0): # Non-black
item_dict['color'] = list(item.color)
if item.bold:
item_dict['bold'] = True
if item.italic:
item_dict['italic'] = True
# Children
if len(item.children) > 0:
item_dict['children'] = [item_to_dict(child) for child in item.children]
return item_dict
# Export outline
with pdf.open_outline() as outline:
outline_data = {
'outline': [item_to_dict(item) for item in outline]
}
# Save to JSON
with open(json_path, 'w', encoding='utf-8') as f:
json.dump(outline_data, f, indent=2, ensure_ascii=False)
pdf.close()
print(f"Exported outline to {json_path}")
def import_outline_from_json(pdf_path, json_path, output_path):
"""Import outline structure from JSON format."""
# Load JSON
with open(json_path, 'r', encoding='utf-8') as f:
outline_data = json.load(f)
pdf = pikepdf.open(pdf_path)
def dict_to_item(item_dict):
"""Convert dictionary to outline item."""
# Create destination
destination = None
if 'destination' in item_dict:
dest_info = item_dict['destination']
page_num = dest_info['page']
if page_num < len(pdf.pages):
destination = pikepdf.PageLocation(
pdf.pages[page_num],
view_type=dest_info.get('view_type', 'Fit'),
top=dest_info.get('top'),
left=dest_info.get('left'),
zoom=dest_info.get('zoom')
)
# Create item
item = pikepdf.OutlineItem(
title=item_dict['title'],
destination=destination
)
# Apply styling
if 'color' in item_dict:
item.color = tuple(item_dict['color'])
if item_dict.get('bold'):
item.bold = True
if item_dict.get('italic'):
item.italic = True
if 'is_open' in item_dict:
item.is_open = item_dict['is_open']
# Add children
if 'children' in item_dict:
for child_dict in item_dict['children']:
child_item = dict_to_item(child_dict)
item.append(child_item)
return item
# Import outline
with pdf.open_outline() as outline:
# Clear existing outline
while len(outline) > 0:
del outline[0]
# Add imported items
for item_dict in outline_data['outline']:
item = dict_to_item(item_dict)
outline.append(item)
pdf.save(output_path)
pdf.close()
print(f"Imported outline from {json_path} to {output_path}")
# Export and import outline
# export_outline_to_json('document.pdf', 'outline.json')
# import_outline_from_json('new_document.pdf', 'outline.json', 'document_with_imported_outline.pdf')Install with Tessl CLI
npx tessl i tessl/pypi-pikepdf