0
# Outlines and Bookmarks
1
2
Document navigation structure including bookmarks, table of contents, and document outline management. These capabilities enable comprehensive navigation and document structure organization.
3
4
## Capabilities
5
6
### Outline Class
7
8
Comprehensive document outline and bookmark management with hierarchical navigation support.
9
10
```python { .api }
11
class Outline:
12
"""
13
PDF bookmark/outline tree manager.
14
15
Provides access to the document's navigation structure including
16
bookmarks, table of contents, and hierarchical outline items.
17
"""
18
19
@property
20
def root(self) -> OutlineItem:
21
"""
22
Root outline item containing all top-level bookmarks.
23
24
Returns:
25
OutlineItem: Root of the outline hierarchy
26
"""
27
28
def open_all(self) -> None:
29
"""
30
Expand all outline items to show the complete structure.
31
32
Makes all bookmark levels visible in the outline panel
33
by setting their open state to True.
34
"""
35
36
def close_all(self) -> None:
37
"""
38
Collapse all outline items to show only top-level bookmarks.
39
40
Hides all nested bookmark levels by setting their
41
open state to False.
42
"""
43
44
def __len__(self) -> int:
45
"""
46
Number of top-level outline items.
47
48
Returns:
49
int: Count of direct children of the root outline item
50
"""
51
52
def __iter__(self) -> Iterator[OutlineItem]:
53
"""
54
Iterate over top-level outline items.
55
56
Yields:
57
OutlineItem: Each top-level bookmark item
58
"""
59
60
def __getitem__(self, index: int) -> OutlineItem:
61
"""
62
Get a top-level outline item by index.
63
64
Parameters:
65
- index (int): Index of the outline item
66
67
Returns:
68
OutlineItem: Outline item at the specified index
69
"""
70
71
def __delitem__(self, index: int) -> None:
72
"""
73
Delete a top-level outline item by index.
74
75
Parameters:
76
- index (int): Index of the outline item to delete
77
"""
78
79
def insert(self, index: int, item: OutlineItem) -> None:
80
"""
81
Insert an outline item at the specified index.
82
83
Parameters:
84
- index (int): Position to insert at
85
- item (OutlineItem): Outline item to insert
86
"""
87
88
def append(self, item: OutlineItem) -> None:
89
"""
90
Add an outline item at the end of the top level.
91
92
Parameters:
93
- item (OutlineItem): Outline item to append
94
"""
95
```
96
97
### OutlineItem Class
98
99
Individual bookmark entries with navigation destinations and hierarchical structure.
100
101
```python { .api }
102
class OutlineItem:
103
"""
104
Individual bookmark/outline item with title, destination, and children.
105
106
Represents a single entry in the document's outline hierarchy,
107
containing navigation information and potential child items.
108
"""
109
110
def __init__(self, title: str, destination: PageLocation = None,
111
action: Dictionary = None, *, obj: Object = None) -> None:
112
"""
113
Create a new outline item.
114
115
Parameters:
116
- title (str): Display title for the bookmark
117
- destination (PageLocation, optional): Page destination to navigate to
118
- action (Dictionary, optional): PDF action dictionary for navigation
119
- obj (Object, optional): Existing PDF outline object to wrap
120
"""
121
122
@property
123
def title(self) -> str:
124
"""
125
Display title of the bookmark.
126
127
Returns:
128
str: Bookmark title shown in outline panel
129
"""
130
131
@title.setter
132
def title(self, value: str) -> None:
133
"""Set the bookmark title."""
134
135
@property
136
def destination(self) -> PageLocation:
137
"""
138
Page destination for this bookmark.
139
140
Returns:
141
PageLocation: Destination within the document
142
"""
143
144
@destination.setter
145
def destination(self, value: PageLocation) -> None:
146
"""Set the bookmark destination."""
147
148
@property
149
def action(self) -> Dictionary:
150
"""
151
PDF action dictionary for navigation or other operations.
152
153
Returns:
154
Dictionary: Action dictionary (e.g., GoTo, URI, Named actions)
155
"""
156
157
@action.setter
158
def action(self, value: Dictionary) -> None:
159
"""Set the bookmark action."""
160
161
@property
162
def is_open(self) -> bool:
163
"""
164
Whether this outline item is expanded to show children.
165
166
Returns:
167
bool: True if children are visible in the outline
168
"""
169
170
@is_open.setter
171
def is_open(self, value: bool) -> None:
172
"""Set whether this outline item is expanded."""
173
174
@property
175
def color(self) -> tuple[float, float, float]:
176
"""
177
RGB color for the bookmark text.
178
179
Returns:
180
tuple[float, float, float]: RGB values (0.0 to 1.0)
181
"""
182
183
@color.setter
184
def color(self, value: tuple[float, float, float]) -> None:
185
"""Set the bookmark text color."""
186
187
@property
188
def italic(self) -> bool:
189
"""
190
Whether the bookmark text is displayed in italics.
191
192
Returns:
193
bool: True if text should be italic
194
"""
195
196
@italic.setter
197
def italic(self, value: bool) -> None:
198
"""Set whether bookmark text is italic."""
199
200
@property
201
def bold(self) -> bool:
202
"""
203
Whether the bookmark text is displayed in bold.
204
205
Returns:
206
bool: True if text should be bold
207
"""
208
209
@bold.setter
210
def bold(self, value: bool) -> None:
211
"""Set whether bookmark text is bold."""
212
213
@property
214
def children(self) -> list[OutlineItem]:
215
"""
216
Child outline items under this item.
217
218
Returns:
219
list[OutlineItem]: Nested bookmark items
220
"""
221
222
def __len__(self) -> int:
223
"""Number of child outline items."""
224
225
def __iter__(self) -> Iterator[OutlineItem]:
226
"""Iterate over child outline items."""
227
228
def __getitem__(self, index: int) -> OutlineItem:
229
"""Get a child outline item by index."""
230
231
def __delitem__(self, index: int) -> None:
232
"""Delete a child outline item by index."""
233
234
def insert(self, index: int, item: OutlineItem) -> None:
235
"""
236
Insert a child outline item at the specified index.
237
238
Parameters:
239
- index (int): Position to insert at
240
- item (OutlineItem): Child outline item to insert
241
"""
242
243
def append(self, item: OutlineItem) -> None:
244
"""
245
Add a child outline item at the end.
246
247
Parameters:
248
- item (OutlineItem): Child outline item to append
249
"""
250
```
251
252
### PageLocation Class
253
254
Destination specifications for bookmarks and navigation actions.
255
256
```python { .api }
257
class PageLocation:
258
"""
259
Page location specification for bookmark destinations.
260
261
Defines where within a page the destination should navigate to,
262
including zoom level and viewport positioning.
263
"""
264
265
def __init__(self, page: Page, *, view_type: str = 'Fit',
266
top: float = None, left: float = None,
267
bottom: float = None, right: float = None,
268
zoom: float = None) -> None:
269
"""
270
Create a page destination.
271
272
Parameters:
273
- page (Page): Target page for navigation
274
- view_type (str): Destination type ('Fit', 'FitH', 'FitV', 'FitR', 'XYZ')
275
- top (float, optional): Top coordinate for view
276
- left (float, optional): Left coordinate for view
277
- bottom (float, optional): Bottom coordinate for view
278
- right (float, optional): Right coordinate for view
279
- zoom (float, optional): Zoom factor for view
280
"""
281
282
@property
283
def page(self) -> Page:
284
"""
285
Target page for this destination.
286
287
Returns:
288
Page: Page object to navigate to
289
"""
290
291
@property
292
def view_type(self) -> str:
293
"""
294
Type of destination view.
295
296
Common view types:
297
- 'Fit': Fit entire page in window
298
- 'FitH': Fit page width, specific top coordinate
299
- 'FitV': Fit page height, specific left coordinate
300
- 'FitR': Fit rectangle in window
301
- 'XYZ': Specific coordinates and zoom
302
303
Returns:
304
str: View type identifier
305
"""
306
307
@property
308
def top(self) -> float:
309
"""Top coordinate for the destination view."""
310
311
@property
312
def left(self) -> float:
313
"""Left coordinate for the destination view."""
314
315
@property
316
def zoom(self) -> float:
317
"""Zoom factor for the destination view."""
318
```
319
320
### Outline Utility Functions
321
322
Helper functions for creating destinations and managing outline operations.
323
324
```python { .api }
325
def make_page_destination(pdf: Pdf, page_num: int, *,
326
view_type: str = 'Fit', top: float = None,
327
left: float = None, zoom: float = None) -> Array:
328
"""
329
Create a page destination array for bookmarks.
330
331
Parameters:
332
- pdf (Pdf): PDF document containing the target page
333
- page_num (int): Zero-based page number (0 = first page)
334
- view_type (str): Destination view type
335
- top (float, optional): Top coordinate for view positioning
336
- left (float, optional): Left coordinate for view positioning
337
- zoom (float, optional): Zoom level for view
338
339
Returns:
340
Array: PDF destination array for use in outline items
341
342
Examples:
343
- make_page_destination(pdf, 0): Go to page 1, fit in window
344
- make_page_destination(pdf, 5, view_type='XYZ', top=700, zoom=1.5):
345
Go to page 6, position at top=700, zoom 150%
346
"""
347
```
348
349
### Outline Structure Exceptions
350
351
Specialized exceptions for outline operations.
352
353
```python { .api }
354
class OutlineStructureError(Exception):
355
"""
356
Raised when outline structure operations fail.
357
358
This can occur with:
359
- Circular references in outline hierarchy
360
- Invalid outline item relationships
361
- Corrupted outline data structures
362
"""
363
```
364
365
## Usage Examples
366
367
### Reading Existing Outlines
368
369
```python
370
import pikepdf
371
372
# Open PDF with bookmarks
373
pdf = pikepdf.open('document_with_bookmarks.pdf')
374
375
# Access the outline
376
with pdf.open_outline() as outline:
377
print(f"Document has {len(outline)} top-level bookmarks")
378
379
# Iterate through top-level bookmarks
380
for i, item in enumerate(outline):
381
print(f"{i+1}. {item.title}")
382
383
# Check destination
384
if item.destination:
385
dest_page = item.destination.page
386
page_num = pdf.pages.index(dest_page) + 1 # Convert to 1-based
387
print(f" -> Page {page_num} ({item.destination.view_type})")
388
389
# Check for children
390
if len(item.children) > 0:
391
print(f" Has {len(item.children)} sub-items:")
392
for j, child in enumerate(item.children):
393
print(f" {j+1}. {child.title}")
394
if child.destination:
395
child_page_num = pdf.pages.index(child.destination.page) + 1
396
print(f" -> Page {child_page_num}")
397
398
pdf.close()
399
```
400
401
### Creating Simple Bookmarks
402
403
```python
404
import pikepdf
405
406
# Open or create PDF
407
pdf = pikepdf.open('document.pdf')
408
409
# Create outline if it doesn't exist
410
with pdf.open_outline() as outline:
411
# Create bookmarks for each page
412
for i, page in enumerate(pdf.pages):
413
# Create page destination
414
destination = pikepdf.make_page_destination(pdf, i, view_type='Fit')
415
416
# Create bookmark
417
bookmark = pikepdf.OutlineItem(
418
title=f"Page {i+1}",
419
destination=pikepdf.PageLocation(page, view_type='Fit')
420
)
421
422
# Add to outline
423
outline.append(bookmark)
424
425
print(f"Created {len(outline)} bookmarks")
426
427
pdf.save('document_with_bookmarks.pdf')
428
pdf.close()
429
```
430
431
### Creating Hierarchical Outlines
432
433
```python
434
import pikepdf
435
436
# Create a PDF with structured content
437
pdf = pikepdf.open('structured_document.pdf')
438
439
with pdf.open_outline() as outline:
440
# Chapter 1
441
chapter1 = pikepdf.OutlineItem(
442
title="1. Introduction",
443
destination=pikepdf.PageLocation(pdf.pages[0], view_type='Fit')
444
)
445
446
# Add sections to Chapter 1
447
section1_1 = pikepdf.OutlineItem(
448
title="1.1 Overview",
449
destination=pikepdf.PageLocation(pdf.pages[0], view_type='XYZ', top=600)
450
)
451
section1_2 = pikepdf.OutlineItem(
452
title="1.2 Scope",
453
destination=pikepdf.PageLocation(pdf.pages[1], view_type='Fit')
454
)
455
456
chapter1.append(section1_1)
457
chapter1.append(section1_2)
458
459
# Chapter 2
460
chapter2 = pikepdf.OutlineItem(
461
title="2. Technical Details",
462
destination=pikepdf.PageLocation(pdf.pages[2], view_type='Fit')
463
)
464
465
# Add sections to Chapter 2
466
section2_1 = pikepdf.OutlineItem(
467
title="2.1 Architecture",
468
destination=pikepdf.PageLocation(pdf.pages[2], view_type='FitH', top=700)
469
)
470
section2_2 = pikepdf.OutlineItem(
471
title="2.2 Implementation",
472
destination=pikepdf.PageLocation(pdf.pages[3], view_type='Fit')
473
)
474
475
# Add subsections to 2.2
476
subsection2_2_1 = pikepdf.OutlineItem(
477
title="2.2.1 Core Components",
478
destination=pikepdf.PageLocation(pdf.pages[3], view_type='XYZ', top=500)
479
)
480
subsection2_2_2 = pikepdf.OutlineItem(
481
title="2.2.2 Integration",
482
destination=pikepdf.PageLocation(pdf.pages[4], view_type='Fit')
483
)
484
485
section2_2.append(subsection2_2_1)
486
section2_2.append(subsection2_2_2)
487
488
chapter2.append(section2_1)
489
chapter2.append(section2_2)
490
491
# Chapter 3
492
chapter3 = pikepdf.OutlineItem(
493
title="3. Conclusion",
494
destination=pikepdf.PageLocation(pdf.pages[5], view_type='Fit')
495
)
496
497
# Add all chapters to outline
498
outline.append(chapter1)
499
outline.append(chapter2)
500
outline.append(chapter3)
501
502
# Expand Chapter 2 by default
503
chapter2.is_open = True
504
505
print("Created hierarchical outline structure")
506
pdf.save('structured_with_outline.pdf')
507
pdf.close()
508
```
509
510
### Styling Bookmarks
511
512
```python
513
import pikepdf
514
515
pdf = pikepdf.open('document.pdf')
516
517
with pdf.open_outline() as outline:
518
# Create styled bookmarks
519
520
# Red, bold chapter heading
521
chapter = pikepdf.OutlineItem(
522
title="Important Chapter",
523
destination=pikepdf.PageLocation(pdf.pages[0])
524
)
525
chapter.color = (1.0, 0.0, 0.0) # Red
526
chapter.bold = True
527
528
# Blue, italic section
529
section = pikepdf.OutlineItem(
530
title="Special Section",
531
destination=pikepdf.PageLocation(pdf.pages[1])
532
)
533
section.color = (0.0, 0.0, 1.0) # Blue
534
section.italic = True
535
536
# Green, bold and italic subsection
537
subsection = pikepdf.OutlineItem(
538
title="Critical Information",
539
destination=pikepdf.PageLocation(pdf.pages[1], view_type='XYZ', top=400)
540
)
541
subsection.color = (0.0, 0.8, 0.0) # Green
542
subsection.bold = True
543
subsection.italic = True
544
545
# Build hierarchy
546
section.append(subsection)
547
chapter.append(section)
548
outline.append(chapter)
549
550
pdf.save('styled_bookmarks.pdf')
551
pdf.close()
552
```
553
554
### Automated Outline Generation
555
556
```python
557
import pikepdf
558
import re
559
560
def generate_outline_from_content(pdf_path, output_path):
561
"""Generate outline based on content analysis."""
562
563
pdf = pikepdf.open(pdf_path)
564
565
# This is a simplified example - real implementation would need
566
# sophisticated text extraction and analysis
567
outline_items = []
568
569
for page_num, page in enumerate(pdf.pages):
570
try:
571
# Parse page content to find headings
572
instructions = page.parse_contents()
573
574
# Look for text that might be headings
575
# (In practice, you'd analyze font sizes, positions, styles)
576
potential_headings = []
577
578
for instruction in instructions:
579
if (hasattr(instruction, 'operator') and
580
str(instruction.operator) == 'Tj' and
581
instruction.operands):
582
583
text_obj = instruction.operands[0]
584
text = str(text_obj)
585
586
# Simple heuristics for headings
587
if (len(text) < 100 and # Not too long
588
(re.match(r'^\d+\.', text) or # Starts with number
589
text.isupper() or # All caps
590
re.match(r'^Chapter|^Section', text, re.I))): # Keywords
591
592
potential_headings.append((text, page_num))
593
594
# Create bookmarks for found headings
595
for heading_text, page_index in potential_headings:
596
bookmark = pikepdf.OutlineItem(
597
title=heading_text.strip(),
598
destination=pikepdf.PageLocation(
599
pdf.pages[page_index],
600
view_type='Fit'
601
)
602
)
603
outline_items.append(bookmark)
604
605
except Exception as e:
606
print(f"Could not analyze page {page_num}: {e}")
607
608
# Add bookmarks to outline
609
with pdf.open_outline() as outline:
610
for item in outline_items:
611
outline.append(item)
612
613
print(f"Generated {len(outline_items)} bookmarks")
614
615
pdf.save(output_path)
616
pdf.close()
617
618
# Generate outline automatically
619
# generate_outline_from_content('document.pdf', 'auto_outlined.pdf')
620
```
621
622
### Outline Manipulation and Editing
623
624
```python
625
import pikepdf
626
627
def reorganize_outline(pdf_path):
628
"""Reorganize and clean up document outline."""
629
630
pdf = pikepdf.open(pdf_path)
631
632
with pdf.open_outline() as outline:
633
print(f"Original outline has {len(outline)} items")
634
635
# Collect all outline items recursively
636
def collect_items(parent_item):
637
items = []
638
for child in parent_item.children:
639
items.append(child)
640
items.extend(collect_items(child))
641
return items
642
643
all_items = []
644
for top_level in outline:
645
all_items.append(top_level)
646
all_items.extend(collect_items(top_level))
647
648
# Group items by page
649
page_items = {}
650
for item in all_items:
651
if item.destination and item.destination.page:
652
page_num = pdf.pages.index(item.destination.page)
653
if page_num not in page_items:
654
page_items[page_num] = []
655
page_items[page_num].append(item)
656
657
# Clear existing outline
658
while len(outline) > 0:
659
del outline[0]
660
661
# Rebuild outline sorted by page
662
for page_num in sorted(page_items.keys()):
663
items = page_items[page_num]
664
665
if len(items) == 1:
666
# Single item for this page
667
outline.append(items[0])
668
else:
669
# Multiple items - create page group
670
page_group = pikepdf.OutlineItem(
671
title=f"Page {page_num + 1}",
672
destination=pikepdf.PageLocation(pdf.pages[page_num])
673
)
674
675
for item in items:
676
page_group.append(item)
677
678
outline.append(page_group)
679
680
print(f"Reorganized outline now has {len(outline)} top-level items")
681
682
pdf.save()
683
pdf.close()
684
685
# Reorganize outline structure
686
# reorganize_outline('document.pdf')
687
```
688
689
### Outline Export and Import
690
691
```python
692
import pikepdf
693
import json
694
695
def export_outline_to_json(pdf_path, json_path):
696
"""Export outline structure to JSON format."""
697
698
pdf = pikepdf.open(pdf_path)
699
700
def item_to_dict(item):
701
"""Convert outline item to dictionary."""
702
item_dict = {
703
'title': item.title,
704
'is_open': item.is_open
705
}
706
707
if item.destination:
708
page_num = pdf.pages.index(item.destination.page)
709
item_dict['destination'] = {
710
'page': page_num,
711
'view_type': item.destination.view_type
712
}
713
714
if item.destination.top is not None:
715
item_dict['destination']['top'] = item.destination.top
716
if item.destination.left is not None:
717
item_dict['destination']['left'] = item.destination.left
718
if item.destination.zoom is not None:
719
item_dict['destination']['zoom'] = item.destination.zoom
720
721
# Style information
722
if item.color != (0.0, 0.0, 0.0): # Non-black
723
item_dict['color'] = list(item.color)
724
if item.bold:
725
item_dict['bold'] = True
726
if item.italic:
727
item_dict['italic'] = True
728
729
# Children
730
if len(item.children) > 0:
731
item_dict['children'] = [item_to_dict(child) for child in item.children]
732
733
return item_dict
734
735
# Export outline
736
with pdf.open_outline() as outline:
737
outline_data = {
738
'outline': [item_to_dict(item) for item in outline]
739
}
740
741
# Save to JSON
742
with open(json_path, 'w', encoding='utf-8') as f:
743
json.dump(outline_data, f, indent=2, ensure_ascii=False)
744
745
pdf.close()
746
print(f"Exported outline to {json_path}")
747
748
def import_outline_from_json(pdf_path, json_path, output_path):
749
"""Import outline structure from JSON format."""
750
751
# Load JSON
752
with open(json_path, 'r', encoding='utf-8') as f:
753
outline_data = json.load(f)
754
755
pdf = pikepdf.open(pdf_path)
756
757
def dict_to_item(item_dict):
758
"""Convert dictionary to outline item."""
759
760
# Create destination
761
destination = None
762
if 'destination' in item_dict:
763
dest_info = item_dict['destination']
764
page_num = dest_info['page']
765
766
if page_num < len(pdf.pages):
767
destination = pikepdf.PageLocation(
768
pdf.pages[page_num],
769
view_type=dest_info.get('view_type', 'Fit'),
770
top=dest_info.get('top'),
771
left=dest_info.get('left'),
772
zoom=dest_info.get('zoom')
773
)
774
775
# Create item
776
item = pikepdf.OutlineItem(
777
title=item_dict['title'],
778
destination=destination
779
)
780
781
# Apply styling
782
if 'color' in item_dict:
783
item.color = tuple(item_dict['color'])
784
if item_dict.get('bold'):
785
item.bold = True
786
if item_dict.get('italic'):
787
item.italic = True
788
if 'is_open' in item_dict:
789
item.is_open = item_dict['is_open']
790
791
# Add children
792
if 'children' in item_dict:
793
for child_dict in item_dict['children']:
794
child_item = dict_to_item(child_dict)
795
item.append(child_item)
796
797
return item
798
799
# Import outline
800
with pdf.open_outline() as outline:
801
# Clear existing outline
802
while len(outline) > 0:
803
del outline[0]
804
805
# Add imported items
806
for item_dict in outline_data['outline']:
807
item = dict_to_item(item_dict)
808
outline.append(item)
809
810
pdf.save(output_path)
811
pdf.close()
812
print(f"Imported outline from {json_path} to {output_path}")
813
814
# Export and import outline
815
# export_outline_to_json('document.pdf', 'outline.json')
816
# import_outline_from_json('new_document.pdf', 'outline.json', 'document_with_imported_outline.pdf')
817
```