0
# Page Operations
1
2
Comprehensive page manipulation capabilities including transformations, merging, cropping, and geometric operations. The PageObject class provides the foundation for all page-level operations in pypdf.
3
4
## Capabilities
5
6
### Page Objects
7
8
PageObject represents individual PDF pages with complete access to page content, properties, and transformation capabilities.
9
10
```python { .api }
11
class PageObject:
12
@staticmethod
13
def create_blank_page(width: float, height: float) -> PageObject:
14
"""
15
Create a blank page with specified dimensions.
16
17
Args:
18
width: Page width in points
19
height: Page height in points
20
21
Returns:
22
New blank PageObject
23
"""
24
25
def extract_text(
26
self,
27
extraction_mode: str = "plain",
28
layout_mode_space_vertically: bool = True,
29
layout_mode_scale_weight: float = 1.25,
30
layout_mode_strip_rotated: bool = True,
31
orientations: tuple | int = (0, 90, 180, 270),
32
space_width: float = 200.0,
33
visitor_text=None
34
) -> str:
35
"""
36
Extract text from the page.
37
38
Args:
39
extraction_mode: Text extraction mode ("plain" or "layout", default: "plain")
40
layout_mode_space_vertically: Insert spaces for vertical gaps
41
layout_mode_scale_weight: Weight for layout scaling
42
layout_mode_strip_rotated: Strip rotated text
43
orientations: Text orientations to consider
44
space_width: Width threshold for spaces
45
visitor_text: Custom text visitor function
46
47
Returns:
48
Extracted text as string
49
"""
50
51
def scale(self, sx: float, sy: float) -> PageObject:
52
"""
53
Scale the page by given factors.
54
55
Args:
56
sx: Horizontal scaling factor
57
sy: Vertical scaling factor
58
59
Returns:
60
Self for method chaining
61
"""
62
63
def scale_by(self, factor: float) -> PageObject:
64
"""
65
Scale the page uniformly by a factor.
66
67
Args:
68
factor: Scaling factor
69
70
Returns:
71
Self for method chaining
72
"""
73
74
def scale_to(self, width: float, height: float) -> PageObject:
75
"""
76
Scale the page to specific dimensions.
77
78
Args:
79
width: Target width in points
80
height: Target height in points
81
82
Returns:
83
Self for method chaining
84
"""
85
86
def rotate(self, angle: int) -> PageObject:
87
"""
88
Rotate the page by the given angle.
89
90
Args:
91
angle: Rotation angle in degrees (90, 180, 270, etc.)
92
93
Returns:
94
Self for method chaining
95
"""
96
97
def rotate_clockwise(self, angle: int) -> PageObject:
98
"""
99
Rotate the page clockwise.
100
101
Args:
102
angle: Rotation angle in degrees
103
104
Returns:
105
Self for method chaining
106
"""
107
108
def rotate_counter_clockwise(self, angle: int) -> PageObject:
109
"""
110
Rotate the page counter-clockwise.
111
112
Args:
113
angle: Rotation angle in degrees
114
115
Returns:
116
Self for method chaining
117
"""
118
119
def transfer_rotation_to_content(self) -> PageObject:
120
"""
121
Apply the page's rotation to its content and reset rotation to 0.
122
123
Returns:
124
Self for method chaining
125
"""
126
127
def merge_page(self, page2: PageObject) -> None:
128
"""
129
Merge another page's content onto this page.
130
131
Args:
132
page2: PageObject to merge onto this page
133
"""
134
135
def merge_translated_page(self, page2: PageObject, tx: float, ty: float) -> None:
136
"""
137
Merge another page with translation offset.
138
139
Args:
140
page2: PageObject to merge
141
tx: Translation offset in x direction
142
ty: Translation offset in y direction
143
"""
144
145
def merge_rotated_page(self, page2: PageObject, rotation: float) -> None:
146
"""
147
Merge another page with rotation.
148
149
Args:
150
page2: PageObject to merge
151
rotation: Rotation angle in degrees
152
"""
153
154
def merge_scaled_page(self, page2: PageObject, scale: float, expand: bool = False) -> None:
155
"""
156
Merge another page with scaling.
157
158
Args:
159
page2: PageObject to merge
160
scale: Scaling factor
161
expand: Whether to expand the page to fit scaled content
162
"""
163
164
def merge_rotated_scaled_page(
165
self,
166
page2: PageObject,
167
rotation: float,
168
scale: float,
169
expand: bool = False
170
) -> None:
171
"""
172
Merge another page with rotation and scaling.
173
174
Args:
175
page2: PageObject to merge
176
rotation: Rotation angle in degrees
177
scale: Scaling factor
178
expand: Whether to expand the page to fit transformed content
179
"""
180
181
def merge_transformed_page(
182
self,
183
page2: PageObject,
184
ctm,
185
expand: bool = False
186
) -> None:
187
"""
188
Merge another page with custom transformation matrix.
189
190
Args:
191
page2: PageObject to merge
192
ctm: Transformation matrix
193
expand: Whether to expand the page to fit transformed content
194
"""
195
196
def add_transformation(self, ctm) -> None:
197
"""
198
Apply a transformation matrix to the page.
199
200
Args:
201
ctm: Transformation matrix
202
"""
203
204
### Page Box Properties
205
206
Access and modify PDF page boundaries and dimensions through five different box types, each serving specific purposes in the PDF specification.
207
208
```python { .api }
209
# Box Properties (all return RectangleObject)
210
@property
211
def mediabox(self) -> RectangleObject:
212
"""
213
The boundaries of the physical medium on which the page is intended
214
to be displayed or printed. This is the largest box and defines the
215
overall page size.
216
"""
217
218
@property
219
def cropbox(self) -> RectangleObject:
220
"""
221
The visible region of default user space. When displayed or printed,
222
contents outside this box are clipped. Falls back to mediabox if not set.
223
"""
224
225
@property
226
def bleedbox(self) -> RectangleObject:
227
"""
228
The region to which contents should be clipped when output in a
229
production environment. Used for printing with bleed margins.
230
Falls back to cropbox, then mediabox if not set.
231
"""
232
233
@property
234
def trimbox(self) -> RectangleObject:
235
"""
236
The intended dimensions of the finished page after trimming.
237
Falls back to cropbox, then mediabox if not set.
238
"""
239
240
@property
241
def artbox(self) -> RectangleObject:
242
"""
243
The extent of the page's meaningful content as intended by the
244
page's creator. Falls back to cropbox, then mediabox if not set.
245
"""
246
247
# RectangleObject Properties and Methods
248
class RectangleObject:
249
# Individual coordinates (read/write)
250
@property
251
def left(self) -> FloatObject: ...
252
253
@property
254
def bottom(self) -> FloatObject: ...
255
256
@property
257
def right(self) -> FloatObject: ...
258
259
@property
260
def top(self) -> FloatObject: ...
261
262
# Corner positions (read/write)
263
@property
264
def lower_left(self) -> tuple[float, float]: ...
265
266
@property
267
def lower_right(self) -> tuple[float, float]: ...
268
269
@property
270
def upper_left(self) -> tuple[float, float]: ...
271
272
@property
273
def upper_right(self) -> tuple[float, float]: ...
274
275
# Dimensions (read-only)
276
@property
277
def width(self) -> float: ...
278
279
@property
280
def height(self) -> float: ...
281
282
def scale(self, sx: float, sy: float) -> RectangleObject:
283
"""
284
Create a new scaled rectangle.
285
286
Args:
287
sx: Horizontal scale factor
288
sy: Vertical scale factor
289
290
Returns:
291
New scaled RectangleObject
292
"""
293
```
294
295
@property
296
def rotation(self) -> int:
297
"""Get the page rotation angle in degrees."""
298
299
@property
300
def user_unit(self) -> float:
301
"""Get the user unit scale factor."""
302
303
@property
304
def images(self):
305
"""Get images on the page."""
306
307
@property
308
def page_number(self) -> int | None:
309
"""Get the page number in the document."""
310
311
@property
312
def annotations(self):
313
"""Get page annotations."""
314
315
@property
316
def mediabox(self):
317
"""Get the media box (page boundaries)."""
318
319
@property
320
def cropbox(self):
321
"""Get the crop box (visible page area)."""
322
323
@property
324
def bleedbox(self):
325
"""Get the bleed box (printable area with bleed)."""
326
327
@property
328
def trimbox(self):
329
"""Get the trim box (final trimmed page size)."""
330
331
@property
332
def artbox(self):
333
"""Get the art box (meaningful content area)."""
334
```
335
336
### Transformation Matrix
337
338
The Transformation class provides a convenient interface for creating and combining geometric transformations.
339
340
```python { .api }
341
class Transformation:
342
def __init__(self, ctm=(1, 0, 0, 1, 0, 0)):
343
"""
344
Initialize a transformation matrix.
345
346
Args:
347
ctm: 6-element transformation matrix tuple (a, b, c, d, e, f)
348
"""
349
350
def translate(self, tx: float = 0, ty: float = 0) -> Transformation:
351
"""
352
Add translation to the transformation.
353
354
Args:
355
tx: Translation in x direction
356
ty: Translation in y direction
357
358
Returns:
359
Self for method chaining
360
"""
361
362
def scale(self, sx: float = 1, sy: float | None = None) -> Transformation:
363
"""
364
Add scaling to the transformation.
365
366
Args:
367
sx: Horizontal scaling factor
368
sy: Vertical scaling factor (defaults to sx)
369
370
Returns:
371
Self for method chaining
372
"""
373
374
def rotate(self, rotation: float) -> Transformation:
375
"""
376
Add rotation to the transformation.
377
378
Args:
379
rotation: Rotation angle in degrees
380
381
Returns:
382
Self for method chaining
383
"""
384
385
def transform(self, m) -> Transformation:
386
"""
387
Apply another transformation matrix.
388
389
Args:
390
m: Transformation matrix to apply
391
392
Returns:
393
Self for method chaining
394
"""
395
396
def apply_on(self, pt, as_object: bool = False):
397
"""
398
Apply the transformation to a point.
399
400
Args:
401
pt: Point coordinates
402
as_object: Return as object instead of tuple
403
404
Returns:
405
Transformed point coordinates
406
"""
407
408
@property
409
def matrix(self):
410
"""Get the transformation matrix."""
411
```
412
413
## Usage Examples
414
415
### Basic Page Transformations
416
417
```python
418
from pypdf import PdfReader, PdfWriter
419
420
reader = PdfReader("input.pdf")
421
writer = PdfWriter()
422
423
for page in reader.pages:
424
# Scale page to 150%
425
page.scale_by(1.5)
426
427
# Rotate page 90 degrees clockwise
428
page.rotate_clockwise(90)
429
430
writer.add_page(page)
431
432
with open("transformed.pdf", "wb") as output:
433
writer.write(output)
434
```
435
436
### Page Merging
437
438
```python
439
from pypdf import PdfReader, PdfWriter
440
441
reader = PdfReader("document.pdf")
442
overlay = PdfReader("watermark.pdf")
443
444
writer = PdfWriter()
445
446
for page in reader.pages:
447
# Merge watermark onto each page
448
page.merge_page(overlay.pages[0])
449
writer.add_page(page)
450
451
with open("watermarked.pdf", "wb") as output:
452
writer.write(output)
453
```
454
455
### Advanced Transformations
456
457
```python
458
from pypdf import PdfReader, PdfWriter, Transformation
459
460
reader = PdfReader("input.pdf")
461
writer = PdfWriter()
462
463
# Create complex transformation
464
transform = Transformation()
465
transform.translate(100, 50) # Move 100 points right, 50 up
466
transform.scale(0.8, 1.2) # Scale 80% horizontally, 120% vertically
467
transform.rotate(15) # Rotate 15 degrees
468
469
for page in reader.pages:
470
# Apply transformation matrix
471
page.add_transformation(transform.matrix)
472
writer.add_page(page)
473
474
with open("complex_transform.pdf", "wb") as output:
475
writer.write(output)
476
```
477
478
### Creating Blank Pages
479
480
```python
481
from pypdf import PdfWriter, PageObject, PaperSize
482
483
writer = PdfWriter()
484
485
# Create pages with different sizes
486
letter_page = PageObject.create_blank_page(612, 792) # Letter size
487
a4_page = PageObject.create_blank_page(*PaperSize.A4) # A4 size
488
489
writer.add_page(letter_page)
490
writer.add_page(a4_page)
491
492
with open("blank_pages.pdf", "wb") as output:
493
writer.write(output)
494
```
495
496
### Page Cropping and Boundaries
497
498
```python
499
from pypdf import PdfReader, PdfWriter
500
501
reader = PdfReader("input.pdf")
502
writer = PdfWriter()
503
504
for page in reader.pages:
505
# Get current page boundaries
506
media_box = page.mediabox
507
508
# Create crop box (crop 50 points from each side)
509
crop_box = [
510
media_box.left + 50,
511
media_box.bottom + 50,
512
media_box.right - 50,
513
media_box.top - 50
514
]
515
516
# Apply crop box
517
page.cropbox = crop_box
518
writer.add_page(page)
519
520
with open("cropped.pdf", "wb") as output:
521
writer.write(output)
522
```
523
524
### Multi-Page Overlay
525
526
```python
527
from pypdf import PdfReader, PdfWriter
528
529
base_doc = PdfReader("base.pdf")
530
overlay_doc = PdfReader("overlay.pdf")
531
532
writer = PdfWriter()
533
534
for i, page in enumerate(base_doc.pages):
535
# Use different overlay pages if available
536
overlay_index = i % len(overlay_doc.pages)
537
overlay_page = overlay_doc.pages[overlay_index]
538
539
# Scale overlay to fit page
540
page_width = float(page.mediabox.width)
541
page_height = float(page.mediabox.height)
542
overlay_width = float(overlay_page.mediabox.width)
543
overlay_height = float(overlay_page.mediabox.height)
544
545
scale_x = page_width / overlay_width
546
scale_y = page_height / overlay_height
547
scale = min(scale_x, scale_y)
548
549
overlay_page.scale_by(scale)
550
page.merge_page(overlay_page)
551
552
writer.add_page(page)
553
554
with open("multi_overlay.pdf", "wb") as output:
555
writer.write(output)
556
```