CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-pikepdf

Read and write PDFs with Python, powered by qpdf

Pending
Overview
Eval results
Files

outlines.mddocs/

Outlines and Bookmarks

Document navigation structure including bookmarks, table of contents, and document outline management. These capabilities enable comprehensive navigation and document structure organization.

Capabilities

Outline Class

Comprehensive document outline and bookmark management with hierarchical navigation support.

class Outline:
    """
    PDF bookmark/outline tree manager.
    
    Provides access to the document's navigation structure including
    bookmarks, table of contents, and hierarchical outline items.
    """
    
    @property
    def root(self) -> OutlineItem:
        """
        Root outline item containing all top-level bookmarks.
        
        Returns:
        OutlineItem: Root of the outline hierarchy
        """
    
    def open_all(self) -> None:
        """
        Expand all outline items to show the complete structure.
        
        Makes all bookmark levels visible in the outline panel
        by setting their open state to True.
        """
    
    def close_all(self) -> None:
        """
        Collapse all outline items to show only top-level bookmarks.
        
        Hides all nested bookmark levels by setting their
        open state to False.
        """
    
    def __len__(self) -> int:
        """
        Number of top-level outline items.
        
        Returns:
        int: Count of direct children of the root outline item
        """
    
    def __iter__(self) -> Iterator[OutlineItem]:
        """
        Iterate over top-level outline items.
        
        Yields:
        OutlineItem: Each top-level bookmark item
        """
    
    def __getitem__(self, index: int) -> OutlineItem:
        """
        Get a top-level outline item by index.
        
        Parameters:
        - index (int): Index of the outline item
        
        Returns:
        OutlineItem: Outline item at the specified index
        """
    
    def __delitem__(self, index: int) -> None:
        """
        Delete a top-level outline item by index.
        
        Parameters:
        - index (int): Index of the outline item to delete
        """
    
    def insert(self, index: int, item: OutlineItem) -> None:
        """
        Insert an outline item at the specified index.
        
        Parameters:
        - index (int): Position to insert at
        - item (OutlineItem): Outline item to insert
        """
    
    def append(self, item: OutlineItem) -> None:
        """
        Add an outline item at the end of the top level.
        
        Parameters:
        - item (OutlineItem): Outline item to append
        """

OutlineItem Class

Individual bookmark entries with navigation destinations and hierarchical structure.

class OutlineItem:
    """
    Individual bookmark/outline item with title, destination, and children.
    
    Represents a single entry in the document's outline hierarchy,
    containing navigation information and potential child items.
    """
    
    def __init__(self, title: str, destination: PageLocation = None, 
                 action: Dictionary = None, *, obj: Object = None) -> None:
        """
        Create a new outline item.
        
        Parameters:
        - title (str): Display title for the bookmark
        - destination (PageLocation, optional): Page destination to navigate to
        - action (Dictionary, optional): PDF action dictionary for navigation
        - obj (Object, optional): Existing PDF outline object to wrap
        """
    
    @property
    def title(self) -> str:
        """
        Display title of the bookmark.
        
        Returns:
        str: Bookmark title shown in outline panel
        """
    
    @title.setter
    def title(self, value: str) -> None:
        """Set the bookmark title."""
    
    @property
    def destination(self) -> PageLocation:
        """
        Page destination for this bookmark.
        
        Returns:
        PageLocation: Destination within the document
        """
    
    @destination.setter
    def destination(self, value: PageLocation) -> None:
        """Set the bookmark destination."""
    
    @property
    def action(self) -> Dictionary:
        """
        PDF action dictionary for navigation or other operations.
        
        Returns:
        Dictionary: Action dictionary (e.g., GoTo, URI, Named actions)
        """
    
    @action.setter
    def action(self, value: Dictionary) -> None:
        """Set the bookmark action."""
    
    @property
    def is_open(self) -> bool:
        """
        Whether this outline item is expanded to show children.
        
        Returns:
        bool: True if children are visible in the outline
        """
    
    @is_open.setter
    def is_open(self, value: bool) -> None:
        """Set whether this outline item is expanded."""
    
    @property
    def color(self) -> tuple[float, float, float]:
        """
        RGB color for the bookmark text.
        
        Returns:
        tuple[float, float, float]: RGB values (0.0 to 1.0)
        """
    
    @color.setter
    def color(self, value: tuple[float, float, float]) -> None:
        """Set the bookmark text color."""
    
    @property
    def italic(self) -> bool:
        """
        Whether the bookmark text is displayed in italics.
        
        Returns:
        bool: True if text should be italic
        """
    
    @italic.setter
    def italic(self, value: bool) -> None:
        """Set whether bookmark text is italic."""
    
    @property
    def bold(self) -> bool:
        """
        Whether the bookmark text is displayed in bold.
        
        Returns:
        bool: True if text should be bold
        """
    
    @bold.setter
    def bold(self, value: bool) -> None:
        """Set whether bookmark text is bold."""
    
    @property
    def children(self) -> list[OutlineItem]:
        """
        Child outline items under this item.
        
        Returns:
        list[OutlineItem]: Nested bookmark items
        """
    
    def __len__(self) -> int:
        """Number of child outline items."""
    
    def __iter__(self) -> Iterator[OutlineItem]:
        """Iterate over child outline items."""
    
    def __getitem__(self, index: int) -> OutlineItem:
        """Get a child outline item by index."""
    
    def __delitem__(self, index: int) -> None:
        """Delete a child outline item by index."""
    
    def insert(self, index: int, item: OutlineItem) -> None:
        """
        Insert a child outline item at the specified index.
        
        Parameters:
        - index (int): Position to insert at
        - item (OutlineItem): Child outline item to insert
        """
    
    def append(self, item: OutlineItem) -> None:
        """
        Add a child outline item at the end.
        
        Parameters:
        - item (OutlineItem): Child outline item to append
        """

PageLocation Class

Destination specifications for bookmarks and navigation actions.

class PageLocation:
    """
    Page location specification for bookmark destinations.
    
    Defines where within a page the destination should navigate to,
    including zoom level and viewport positioning.
    """
    
    def __init__(self, page: Page, *, view_type: str = 'Fit', 
                 top: float = None, left: float = None, 
                 bottom: float = None, right: float = None,
                 zoom: float = None) -> None:
        """
        Create a page destination.
        
        Parameters:
        - page (Page): Target page for navigation
        - view_type (str): Destination type ('Fit', 'FitH', 'FitV', 'FitR', 'XYZ')
        - top (float, optional): Top coordinate for view
        - left (float, optional): Left coordinate for view
        - bottom (float, optional): Bottom coordinate for view  
        - right (float, optional): Right coordinate for view
        - zoom (float, optional): Zoom factor for view
        """
    
    @property
    def page(self) -> Page:
        """
        Target page for this destination.
        
        Returns:
        Page: Page object to navigate to
        """
    
    @property
    def view_type(self) -> str:
        """
        Type of destination view.
        
        Common view types:
        - 'Fit': Fit entire page in window
        - 'FitH': Fit page width, specific top coordinate
        - 'FitV': Fit page height, specific left coordinate
        - 'FitR': Fit rectangle in window
        - 'XYZ': Specific coordinates and zoom
        
        Returns:
        str: View type identifier
        """
    
    @property
    def top(self) -> float:
        """Top coordinate for the destination view."""
    
    @property
    def left(self) -> float:
        """Left coordinate for the destination view."""
    
    @property
    def zoom(self) -> float:
        """Zoom factor for the destination view."""

Outline Utility Functions

Helper functions for creating destinations and managing outline operations.

def make_page_destination(pdf: Pdf, page_num: int, *, 
                         view_type: str = 'Fit', top: float = None, 
                         left: float = None, zoom: float = None) -> Array:
    """
    Create a page destination array for bookmarks.
    
    Parameters:
    - pdf (Pdf): PDF document containing the target page
    - page_num (int): Zero-based page number (0 = first page)
    - view_type (str): Destination view type
    - top (float, optional): Top coordinate for view positioning
    - left (float, optional): Left coordinate for view positioning
    - zoom (float, optional): Zoom level for view
    
    Returns:
    Array: PDF destination array for use in outline items
    
    Examples:
    - make_page_destination(pdf, 0): Go to page 1, fit in window
    - make_page_destination(pdf, 5, view_type='XYZ', top=700, zoom=1.5): 
      Go to page 6, position at top=700, zoom 150%
    """

Outline Structure Exceptions

Specialized exceptions for outline operations.

class OutlineStructureError(Exception):
    """
    Raised when outline structure operations fail.
    
    This can occur with:
    - Circular references in outline hierarchy
    - Invalid outline item relationships  
    - Corrupted outline data structures
    """

Usage Examples

Reading Existing Outlines

import pikepdf

# Open PDF with bookmarks
pdf = pikepdf.open('document_with_bookmarks.pdf')

# Access the outline
with pdf.open_outline() as outline:
    print(f"Document has {len(outline)} top-level bookmarks")
    
    # Iterate through top-level bookmarks
    for i, item in enumerate(outline):
        print(f"{i+1}. {item.title}")
        
        # Check destination
        if item.destination:
            dest_page = item.destination.page
            page_num = pdf.pages.index(dest_page) + 1  # Convert to 1-based
            print(f"   -> Page {page_num} ({item.destination.view_type})")
        
        # Check for children
        if len(item.children) > 0:
            print(f"   Has {len(item.children)} sub-items:")
            for j, child in enumerate(item.children):
                print(f"     {j+1}. {child.title}")
                if child.destination:
                    child_page_num = pdf.pages.index(child.destination.page) + 1
                    print(f"        -> Page {child_page_num}")

pdf.close()

Creating Simple Bookmarks

import pikepdf

# Open or create PDF
pdf = pikepdf.open('document.pdf')

# Create outline if it doesn't exist
with pdf.open_outline() as outline:
    # Create bookmarks for each page
    for i, page in enumerate(pdf.pages):
        # Create page destination
        destination = pikepdf.make_page_destination(pdf, i, view_type='Fit')
        
        # Create bookmark
        bookmark = pikepdf.OutlineItem(
            title=f"Page {i+1}",
            destination=pikepdf.PageLocation(page, view_type='Fit')
        )
        
        # Add to outline
        outline.append(bookmark)
    
    print(f"Created {len(outline)} bookmarks")

pdf.save('document_with_bookmarks.pdf')
pdf.close()

Creating Hierarchical Outlines

import pikepdf

# Create a PDF with structured content
pdf = pikepdf.open('structured_document.pdf')

with pdf.open_outline() as outline:
    # Chapter 1
    chapter1 = pikepdf.OutlineItem(
        title="1. Introduction",
        destination=pikepdf.PageLocation(pdf.pages[0], view_type='Fit')
    )
    
    # Add sections to Chapter 1
    section1_1 = pikepdf.OutlineItem(
        title="1.1 Overview", 
        destination=pikepdf.PageLocation(pdf.pages[0], view_type='XYZ', top=600)
    )
    section1_2 = pikepdf.OutlineItem(
        title="1.2 Scope",
        destination=pikepdf.PageLocation(pdf.pages[1], view_type='Fit')
    )
    
    chapter1.append(section1_1)
    chapter1.append(section1_2)
    
    # Chapter 2  
    chapter2 = pikepdf.OutlineItem(
        title="2. Technical Details",
        destination=pikepdf.PageLocation(pdf.pages[2], view_type='Fit')
    )
    
    # Add sections to Chapter 2
    section2_1 = pikepdf.OutlineItem(
        title="2.1 Architecture",
        destination=pikepdf.PageLocation(pdf.pages[2], view_type='FitH', top=700)
    )
    section2_2 = pikepdf.OutlineItem(
        title="2.2 Implementation", 
        destination=pikepdf.PageLocation(pdf.pages[3], view_type='Fit')
    )
    
    # Add subsections to 2.2
    subsection2_2_1 = pikepdf.OutlineItem(
        title="2.2.1 Core Components",
        destination=pikepdf.PageLocation(pdf.pages[3], view_type='XYZ', top=500)
    )
    subsection2_2_2 = pikepdf.OutlineItem(
        title="2.2.2 Integration",
        destination=pikepdf.PageLocation(pdf.pages[4], view_type='Fit')
    )
    
    section2_2.append(subsection2_2_1)
    section2_2.append(subsection2_2_2)
    
    chapter2.append(section2_1)
    chapter2.append(section2_2)
    
    # Chapter 3
    chapter3 = pikepdf.OutlineItem(
        title="3. Conclusion",
        destination=pikepdf.PageLocation(pdf.pages[5], view_type='Fit')
    )
    
    # Add all chapters to outline
    outline.append(chapter1)
    outline.append(chapter2)
    outline.append(chapter3)
    
    # Expand Chapter 2 by default
    chapter2.is_open = True

print("Created hierarchical outline structure")
pdf.save('structured_with_outline.pdf')
pdf.close()

Styling Bookmarks

import pikepdf

pdf = pikepdf.open('document.pdf')

with pdf.open_outline() as outline:
    # Create styled bookmarks
    
    # Red, bold chapter heading
    chapter = pikepdf.OutlineItem(
        title="Important Chapter",
        destination=pikepdf.PageLocation(pdf.pages[0])
    )
    chapter.color = (1.0, 0.0, 0.0)  # Red
    chapter.bold = True
    
    # Blue, italic section
    section = pikepdf.OutlineItem(
        title="Special Section",
        destination=pikepdf.PageLocation(pdf.pages[1])
    )
    section.color = (0.0, 0.0, 1.0)  # Blue  
    section.italic = True
    
    # Green, bold and italic subsection
    subsection = pikepdf.OutlineItem(
        title="Critical Information",
        destination=pikepdf.PageLocation(pdf.pages[1], view_type='XYZ', top=400)
    )
    subsection.color = (0.0, 0.8, 0.0)  # Green
    subsection.bold = True
    subsection.italic = True
    
    # Build hierarchy
    section.append(subsection)
    chapter.append(section)
    outline.append(chapter)

pdf.save('styled_bookmarks.pdf')
pdf.close()

Automated Outline Generation

import pikepdf
import re

def generate_outline_from_content(pdf_path, output_path):
    """Generate outline based on content analysis."""
    
    pdf = pikepdf.open(pdf_path)
    
    # This is a simplified example - real implementation would need
    # sophisticated text extraction and analysis
    outline_items = []
    
    for page_num, page in enumerate(pdf.pages):
        try:
            # Parse page content to find headings
            instructions = page.parse_contents()
            
            # Look for text that might be headings
            # (In practice, you'd analyze font sizes, positions, styles)
            potential_headings = []
            
            for instruction in instructions:
                if (hasattr(instruction, 'operator') and 
                    str(instruction.operator) == 'Tj' and 
                    instruction.operands):
                    
                    text_obj = instruction.operands[0]
                    text = str(text_obj)
                    
                    # Simple heuristics for headings
                    if (len(text) < 100 and  # Not too long
                        (re.match(r'^\d+\.', text) or  # Starts with number
                         text.isupper() or  # All caps
                         re.match(r'^Chapter|^Section', text, re.I))):  # Keywords
                        
                        potential_headings.append((text, page_num))
            
            # Create bookmarks for found headings
            for heading_text, page_index in potential_headings:
                bookmark = pikepdf.OutlineItem(
                    title=heading_text.strip(),
                    destination=pikepdf.PageLocation(
                        pdf.pages[page_index], 
                        view_type='Fit'
                    )
                )
                outline_items.append(bookmark)
                
        except Exception as e:
            print(f"Could not analyze page {page_num}: {e}")
    
    # Add bookmarks to outline
    with pdf.open_outline() as outline:
        for item in outline_items:
            outline.append(item)
        
        print(f"Generated {len(outline_items)} bookmarks")
    
    pdf.save(output_path)
    pdf.close()

# Generate outline automatically
# generate_outline_from_content('document.pdf', 'auto_outlined.pdf')

Outline Manipulation and Editing

import pikepdf

def reorganize_outline(pdf_path):
    """Reorganize and clean up document outline."""
    
    pdf = pikepdf.open(pdf_path)
    
    with pdf.open_outline() as outline:
        print(f"Original outline has {len(outline)} items")
        
        # Collect all outline items recursively
        def collect_items(parent_item):
            items = []
            for child in parent_item.children:
                items.append(child)
                items.extend(collect_items(child))
            return items
        
        all_items = []
        for top_level in outline:
            all_items.append(top_level)
            all_items.extend(collect_items(top_level))
        
        # Group items by page
        page_items = {}
        for item in all_items:
            if item.destination and item.destination.page:
                page_num = pdf.pages.index(item.destination.page)
                if page_num not in page_items:
                    page_items[page_num] = []
                page_items[page_num].append(item)
        
        # Clear existing outline
        while len(outline) > 0:
            del outline[0]
        
        # Rebuild outline sorted by page
        for page_num in sorted(page_items.keys()):
            items = page_items[page_num]
            
            if len(items) == 1:
                # Single item for this page
                outline.append(items[0])
            else:
                # Multiple items - create page group
                page_group = pikepdf.OutlineItem(
                    title=f"Page {page_num + 1}",
                    destination=pikepdf.PageLocation(pdf.pages[page_num])
                )
                
                for item in items:
                    page_group.append(item)
                
                outline.append(page_group)
        
        print(f"Reorganized outline now has {len(outline)} top-level items")
    
    pdf.save()
    pdf.close()

# Reorganize outline structure
# reorganize_outline('document.pdf')

Outline Export and Import

import pikepdf
import json

def export_outline_to_json(pdf_path, json_path):
    """Export outline structure to JSON format."""
    
    pdf = pikepdf.open(pdf_path)
    
    def item_to_dict(item):
        """Convert outline item to dictionary."""
        item_dict = {
            'title': item.title,
            'is_open': item.is_open
        }
        
        if item.destination:
            page_num = pdf.pages.index(item.destination.page)
            item_dict['destination'] = {
                'page': page_num,
                'view_type': item.destination.view_type
            }
            
            if item.destination.top is not None:
                item_dict['destination']['top'] = item.destination.top
            if item.destination.left is not None:
                item_dict['destination']['left'] = item.destination.left
            if item.destination.zoom is not None:
                item_dict['destination']['zoom'] = item.destination.zoom
        
        # Style information
        if item.color != (0.0, 0.0, 0.0):  # Non-black
            item_dict['color'] = list(item.color)
        if item.bold:
            item_dict['bold'] = True
        if item.italic:
            item_dict['italic'] = True
        
        # Children
        if len(item.children) > 0:
            item_dict['children'] = [item_to_dict(child) for child in item.children]
        
        return item_dict
    
    # Export outline
    with pdf.open_outline() as outline:
        outline_data = {
            'outline': [item_to_dict(item) for item in outline]
        }
    
    # Save to JSON
    with open(json_path, 'w', encoding='utf-8') as f:
        json.dump(outline_data, f, indent=2, ensure_ascii=False)
    
    pdf.close()
    print(f"Exported outline to {json_path}")

def import_outline_from_json(pdf_path, json_path, output_path):
    """Import outline structure from JSON format."""
    
    # Load JSON
    with open(json_path, 'r', encoding='utf-8') as f:
        outline_data = json.load(f)
    
    pdf = pikepdf.open(pdf_path)
    
    def dict_to_item(item_dict):
        """Convert dictionary to outline item."""
        
        # Create destination
        destination = None
        if 'destination' in item_dict:
            dest_info = item_dict['destination']
            page_num = dest_info['page']
            
            if page_num < len(pdf.pages):
                destination = pikepdf.PageLocation(
                    pdf.pages[page_num],
                    view_type=dest_info.get('view_type', 'Fit'),
                    top=dest_info.get('top'),
                    left=dest_info.get('left'),
                    zoom=dest_info.get('zoom')
                )
        
        # Create item
        item = pikepdf.OutlineItem(
            title=item_dict['title'],
            destination=destination
        )
        
        # Apply styling
        if 'color' in item_dict:
            item.color = tuple(item_dict['color'])
        if item_dict.get('bold'):
            item.bold = True
        if item_dict.get('italic'):
            item.italic = True
        if 'is_open' in item_dict:
            item.is_open = item_dict['is_open']
        
        # Add children
        if 'children' in item_dict:
            for child_dict in item_dict['children']:
                child_item = dict_to_item(child_dict)
                item.append(child_item)
        
        return item
    
    # Import outline
    with pdf.open_outline() as outline:
        # Clear existing outline
        while len(outline) > 0:
            del outline[0]
        
        # Add imported items
        for item_dict in outline_data['outline']:
            item = dict_to_item(item_dict)
            outline.append(item)
    
    pdf.save(output_path)
    pdf.close()
    print(f"Imported outline from {json_path} to {output_path}")

# Export and import outline
# export_outline_to_json('document.pdf', 'outline.json')
# import_outline_from_json('new_document.pdf', 'outline.json', 'document_with_imported_outline.pdf')

Install with Tessl CLI

npx tessl i tessl/pypi-pikepdf

docs

advanced.md

attachments.md

content-streams.md

core-operations.md

encryption.md

forms.md

images.md

index.md

metadata.md

objects.md

outlines.md

pages.md

tile.json