0
# Page Operations
1
2
Page-level operations including manipulation, rotation, content parsing, overlays, and coordinate transformations. These capabilities enable comprehensive page handling for PDF documents.
3
4
## Capabilities
5
6
### Page Class
7
8
The Page class provides comprehensive page-level operations including content manipulation, geometric transformations, and overlay functionality.
9
10
```python { .api }
11
class Page(Object):
12
"""
13
PDF page object with content and properties.
14
15
Represents a single page in a PDF document with all its content,
16
resources, and geometric properties.
17
"""
18
19
def rotate(self, angle: int, *, relative: bool = True) -> None:
20
"""
21
Rotate the page by the specified angle.
22
23
Parameters:
24
- angle (int): Rotation angle in degrees (must be multiple of 90)
25
- relative (bool): If True, rotate relative to current rotation;
26
if False, set absolute rotation
27
28
Raises:
29
ValueError: If angle is not a multiple of 90 degrees
30
"""
31
32
def add_overlay(self, other: Page) -> None:
33
"""
34
Add another page as an overlay on top of this page.
35
36
The overlay page content is drawn on top of this page's content.
37
Both pages must be from the same PDF or compatible PDFs.
38
39
Parameters:
40
- other (Page): Page to use as overlay
41
42
Raises:
43
ForeignObjectError: If pages are from incompatible PDFs
44
"""
45
46
def add_underlay(self, other: Page) -> None:
47
"""
48
Add another page as an underlay beneath this page.
49
50
The underlay page content is drawn beneath this page's content.
51
Both pages must be from the same PDF or compatible PDFs.
52
53
Parameters:
54
- other (Page): Page to use as underlay
55
56
Raises:
57
ForeignObjectError: If pages are from incompatible PDFs
58
"""
59
60
def parse_contents(self) -> list[ContentStreamInstruction]:
61
"""
62
Parse the page's content stream into individual instructions.
63
64
Returns:
65
list[ContentStreamInstruction]: List of content stream instructions
66
that make up the page content
67
68
Raises:
69
PdfParsingError: If content stream cannot be parsed
70
"""
71
72
@property
73
def mediabox(self) -> Rectangle:
74
"""
75
The page's media box defining the physical page boundaries.
76
77
The media box defines the boundaries of the physical medium
78
on which the page is intended to be displayed or printed.
79
80
Returns:
81
Rectangle: Media box coordinates (llx, lly, urx, ury)
82
"""
83
84
@property
85
def cropbox(self) -> Rectangle:
86
"""
87
The page's crop box defining the visible page region.
88
89
The crop box defines the region to which the contents of the page
90
should be clipped when displayed or printed.
91
92
Returns:
93
Rectangle: Crop box coordinates (llx, lly, urx, ury)
94
"""
95
96
@property
97
def trimbox(self) -> Rectangle:
98
"""
99
The page's trim box defining the intended finished page size.
100
101
Returns:
102
Rectangle: Trim box coordinates (llx, lly, urx, ury)
103
"""
104
105
@property
106
def artbox(self) -> Rectangle:
107
"""
108
The page's art box defining the meaningful content area.
109
110
Returns:
111
Rectangle: Art box coordinates (llx, lly, urx, ury)
112
"""
113
114
@property
115
def bleedbox(self) -> Rectangle:
116
"""
117
The page's bleed box defining the clipping path for production.
118
119
Returns:
120
Rectangle: Bleed box coordinates (llx, lly, urx, ury)
121
"""
122
123
@property
124
def resources(self) -> Dictionary:
125
"""
126
The page's resource dictionary containing fonts, images, etc.
127
128
Returns:
129
Dictionary: Resource dictionary with fonts, XObjects, patterns, etc.
130
"""
131
132
@property
133
def images(self) -> dict[Name, PdfImage]:
134
"""
135
Dictionary of images referenced by this page.
136
137
Returns:
138
dict[Name, PdfImage]: Mapping of image names to PdfImage objects
139
"""
140
141
@property
142
def rotation(self) -> int:
143
"""
144
Current rotation of the page in degrees.
145
146
Returns:
147
int: Rotation angle (0, 90, 180, or 270 degrees)
148
"""
149
150
@property
151
def contents(self) -> Object:
152
"""
153
The page's content stream(s).
154
155
May be a single Stream object or Array of Stream objects.
156
157
Returns:
158
Object: Content stream or array of content streams
159
"""
160
```
161
162
### Rectangle Class
163
164
Geometric rectangle representation for page boundaries and coordinate operations.
165
166
```python { .api }
167
class Rectangle:
168
"""
169
PDF rectangle representing a bounding box with four coordinates.
170
171
Coordinates are specified as (llx, lly, urx, ury) where:
172
- llx, lly: lower-left corner coordinates
173
- urx, ury: upper-right corner coordinates
174
"""
175
176
def __init__(self, llx: float, lly: float, urx: float, ury: float) -> None:
177
"""
178
Create a rectangle with the specified coordinates.
179
180
Parameters:
181
- llx (float): Lower-left X coordinate
182
- lly (float): Lower-left Y coordinate
183
- urx (float): Upper-right X coordinate
184
- ury (float): Upper-right Y coordinate
185
"""
186
187
@property
188
def width(self) -> float:
189
"""
190
Rectangle width (urx - llx).
191
192
Returns:
193
float: Width of the rectangle
194
"""
195
196
@property
197
def height(self) -> float:
198
"""
199
Rectangle height (ury - lly).
200
201
Returns:
202
float: Height of the rectangle
203
"""
204
205
@property
206
def lower_left(self) -> tuple[float, float]:
207
"""
208
Lower-left corner coordinates.
209
210
Returns:
211
tuple[float, float]: (llx, lly) coordinates
212
"""
213
214
@property
215
def upper_right(self) -> tuple[float, float]:
216
"""
217
Upper-right corner coordinates.
218
219
Returns:
220
tuple[float, float]: (urx, ury) coordinates
221
"""
222
223
@property
224
def lower_right(self) -> tuple[float, float]:
225
"""
226
Lower-right corner coordinates.
227
228
Returns:
229
tuple[float, float]: (urx, lly) coordinates
230
"""
231
232
@property
233
def upper_left(self) -> tuple[float, float]:
234
"""
235
Upper-left corner coordinates.
236
237
Returns:
238
tuple[float, float]: (llx, ury) coordinates
239
"""
240
241
def __and__(self, other: Rectangle) -> Rectangle:
242
"""
243
Rectangle intersection (& operator).
244
245
Parameters:
246
- other (Rectangle): Rectangle to intersect with
247
248
Returns:
249
Rectangle: Intersection of the two rectangles
250
"""
251
252
def __le__(self, other: Rectangle) -> bool:
253
"""
254
Test if this rectangle is contained within another (<= operator).
255
256
Parameters:
257
- other (Rectangle): Rectangle to test containment against
258
259
Returns:
260
bool: True if this rectangle is fully contained in other
261
"""
262
263
def __eq__(self, other: Rectangle) -> bool:
264
"""
265
Test rectangle equality.
266
267
Parameters:
268
- other (Rectangle): Rectangle to compare with
269
270
Returns:
271
bool: True if rectangles have same coordinates
272
"""
273
```
274
275
### Content Stream Instructions
276
277
Objects representing parsed content stream instructions for low-level content manipulation.
278
279
```python { .api }
280
class ContentStreamInstruction:
281
"""
282
Parsed content stream instruction representing an operator and its operands.
283
284
Content streams contain sequences of instructions that define what
285
appears on a page (text, graphics, images, etc.).
286
"""
287
288
@property
289
def operands(self) -> list[Object]:
290
"""
291
List of operand objects for this instruction.
292
293
Returns:
294
list[Object]: PDF objects that serve as operands to the operator
295
"""
296
297
@property
298
def operator(self) -> Operator:
299
"""
300
The PDF operator for this instruction.
301
302
Returns:
303
Operator: PDF operator object (e.g., 'Tj' for show text)
304
"""
305
306
class ContentStreamInlineImage:
307
"""
308
Inline image found within a content stream.
309
310
Represents images embedded directly in the content stream
311
rather than referenced as external objects.
312
"""
313
314
@property
315
def operands(self) -> list[Object]:
316
"""
317
Operands associated with the inline image.
318
319
Returns:
320
list[Object]: Image operands
321
"""
322
323
@property
324
def operator(self) -> Operator:
325
"""
326
The operator associated with this inline image.
327
328
Returns:
329
Operator: Usually the 'EI' (end inline image) operator
330
"""
331
332
@property
333
def iimage(self) -> PdfInlineImage:
334
"""
335
The inline image object.
336
337
Returns:
338
PdfInlineImage: Inline image that can be processed or extracted
339
"""
340
```
341
342
## Usage Examples
343
344
### Basic Page Operations
345
346
```python
347
import pikepdf
348
349
# Open a PDF
350
pdf = pikepdf.open('document.pdf')
351
352
# Get the first page
353
page = pdf.pages[0]
354
355
# Rotate page 90 degrees clockwise
356
page.rotate(90, relative=True)
357
358
# Get page dimensions
359
media_box = page.mediabox
360
print(f"Page size: {media_box.width} x {media_box.height} points")
361
362
# Access page rotation
363
current_rotation = page.rotation
364
print(f"Current rotation: {current_rotation} degrees")
365
366
pdf.save('rotated_document.pdf')
367
pdf.close()
368
```
369
370
### Page Overlays and Underlays
371
372
```python
373
import pikepdf
374
375
# Open PDFs
376
main_pdf = pikepdf.open('main_document.pdf')
377
overlay_pdf = pikepdf.open('overlay_content.pdf')
378
379
# Get pages
380
main_page = main_pdf.pages[0]
381
overlay_page = overlay_pdf.pages[0]
382
383
# Copy overlay page to main PDF
384
copied_overlay = main_pdf.copy_foreign(overlay_page)
385
386
# Add as overlay (on top of existing content)
387
main_page.add_overlay(copied_overlay)
388
389
# Or add as underlay (beneath existing content)
390
# main_page.add_underlay(copied_overlay)
391
392
main_pdf.save('document_with_overlay.pdf')
393
main_pdf.close()
394
overlay_pdf.close()
395
```
396
397
### Working with Page Boxes
398
399
```python
400
import pikepdf
401
402
pdf = pikepdf.open('document.pdf')
403
page = pdf.pages[0]
404
405
# Access different page boxes
406
media_box = page.mediabox
407
crop_box = page.cropbox
408
trim_box = page.trimbox
409
art_box = page.artbox
410
bleed_box = page.bleedbox
411
412
print(f"Media box: {media_box.width} x {media_box.height}")
413
print(f"Crop box: {crop_box.width} x {crop_box.height}")
414
415
# Modify crop box to create margins
416
new_crop = pikepdf.Rectangle(
417
media_box.lower_left[0] + 36, # 0.5 inch margin
418
media_box.lower_left[1] + 36,
419
media_box.upper_right[0] - 36,
420
media_box.upper_right[1] - 36
421
)
422
page.cropbox = new_crop
423
424
pdf.save('cropped_document.pdf')
425
pdf.close()
426
```
427
428
### Content Stream Parsing
429
430
```python
431
import pikepdf
432
433
pdf = pikepdf.open('document.pdf')
434
page = pdf.pages[0]
435
436
# Parse page content into instructions
437
instructions = page.parse_contents()
438
439
# Iterate through content stream instructions
440
for instruction in instructions:
441
operator = instruction.operator
442
operands = instruction.operands
443
444
# Look for text showing operations
445
if str(operator) == 'Tj': # Show text
446
text_string = operands[0]
447
print(f"Found text: {text_string}")
448
449
# Look for image placement operations
450
elif str(operator) == 'Do': # Invoke XObject
451
xobject_name = operands[0]
452
print(f"Found XObject reference: {xobject_name}")
453
454
pdf.close()
455
```
456
457
### Page Resource Management
458
459
```python
460
import pikepdf
461
462
pdf = pikepdf.open('document.pdf')
463
page = pdf.pages[0]
464
465
# Access page resources
466
resources = page.resources
467
468
# Check for fonts
469
if '/Font' in resources:
470
fonts = resources['/Font']
471
print(f"Page uses {len(fonts)} fonts:")
472
for font_name, font_obj in fonts.items():
473
print(f" {font_name}: {font_obj.get('/BaseFont', 'Unknown')}")
474
475
# Check for images
476
if '/XObject' in resources:
477
xobjects = resources['/XObject']
478
for name, obj in xobjects.items():
479
if obj.get('/Subtype') == pikepdf.Name.Image:
480
print(f"Found image: {name}")
481
482
# Access images through convenience property
483
page_images = page.images
484
for name, image in page_images.items():
485
print(f"Image {name}: {image.width}x{image.height}, {image.bpc} bpc")
486
487
pdf.close()
488
```
489
490
### Rectangle Operations
491
492
```python
493
import pikepdf
494
495
# Create rectangles
496
page_rect = pikepdf.Rectangle(0, 0, 612, 792) # US Letter
497
margin_rect = pikepdf.Rectangle(36, 36, 576, 756) # 0.5" margins
498
499
# Calculate dimensions
500
print(f"Page dimensions: {page_rect.width} x {page_rect.height}")
501
print(f"Margin area: {margin_rect.width} x {margin_rect.height}")
502
503
# Test containment
504
is_contained = margin_rect <= page_rect
505
print(f"Margin rect fits in page: {is_contained}")
506
507
# Calculate intersection
508
if margin_rect <= page_rect:
509
intersection = page_rect & margin_rect
510
print(f"Intersection: {intersection.width} x {intersection.height}")
511
512
# Access corner coordinates
513
ll = page_rect.lower_left
514
ur = page_rect.upper_right
515
print(f"Lower-left: {ll}, Upper-right: {ur}")
516
```
517
518
### Multiple Page Operations
519
520
```python
521
import pikepdf
522
523
pdf = pikepdf.open('multi_page_document.pdf')
524
525
# Rotate all pages
526
for i, page in enumerate(pdf.pages):
527
if i % 2 == 0: # Even pages (0, 2, 4...)
528
page.rotate(0) # Portrait
529
else: # Odd pages (1, 3, 5...)
530
page.rotate(90) # Landscape
531
532
print(f"Page {i+1}: {page.mediabox.width} x {page.mediabox.height}")
533
534
# Extract pages into separate PDFs
535
for i, page in enumerate(pdf.pages):
536
single_page_pdf = pikepdf.new()
537
single_page_pdf.pages.append(page)
538
single_page_pdf.save(f'page_{i+1}.pdf')
539
single_page_pdf.close()
540
541
pdf.close()
542
```