0
# Page Manipulation
1
2
Transform, scale, rotate, crop, and merge individual PDF pages with precise control over page geometry and content. The PageObject class and Transformation utilities provide comprehensive page manipulation capabilities.
3
4
## Capabilities
5
6
### PageObject Class
7
8
Represents a single PDF page with methods for content extraction, geometric transformations, and page merging.
9
10
```python { .api }
11
class PageObject(DictionaryObject):
12
"""PDF page object with transformation and content capabilities."""
13
14
@property
15
def mediabox(self) -> RectangleObject:
16
"""The page's media box (full page size)."""
17
18
@property
19
def cropbox(self) -> RectangleObject:
20
"""The page's crop box (visible area)."""
21
22
@property
23
def bleedbox(self) -> RectangleObject:
24
"""The page's bleed box (printing area)."""
25
26
@property
27
def trimbox(self) -> RectangleObject:
28
"""The page's trim box (finished page size)."""
29
30
@property
31
def artbox(self) -> RectangleObject:
32
"""The page's art box (meaningful content area)."""
33
34
@property
35
def annotations(self) -> Optional[ArrayObject]:
36
"""Page annotations if present."""
37
38
def extract_text(self, visitor_text=None) -> str:
39
"""
40
Extract text content from the page.
41
42
Args:
43
visitor_text (callable, optional): Custom text visitor function
44
45
Returns:
46
str: Extracted text content
47
"""
48
49
def scale(self, sx: float, sy: float) -> None:
50
"""
51
Scale the page by given factors.
52
53
Args:
54
sx (float): Horizontal scale factor
55
sy (float): Vertical scale factor
56
"""
57
58
def scale_by(self, factor: float) -> None:
59
"""
60
Scale the page uniformly.
61
62
Args:
63
factor (float): Scale factor for both dimensions
64
"""
65
66
def scale_to(self, width: float, height: float) -> None:
67
"""
68
Scale the page to specific dimensions.
69
70
Args:
71
width (float): Target width in points
72
height (float): Target height in points
73
"""
74
75
def rotate(self, angle: int) -> 'PageObject':
76
"""
77
Rotate the page by the given angle.
78
79
Args:
80
angle (int): Rotation angle in degrees (must be multiple of 90)
81
82
Returns:
83
PageObject: Self for method chaining
84
"""
85
86
def rotate_clockwise(self, angle: int) -> 'PageObject':
87
"""
88
DEPRECATED: Use rotate() instead.
89
Rotate the page clockwise.
90
91
Args:
92
angle (int): Rotation angle in degrees
93
94
Returns:
95
PageObject: Self for method chaining
96
"""
97
98
def rotate_counter_clockwise(self, angle: int) -> 'PageObject':
99
"""
100
DEPRECATED: Use rotate() instead.
101
Rotate the page counter-clockwise.
102
103
Args:
104
angle (int): Rotation angle in degrees
105
106
Returns:
107
PageObject: Self for method chaining
108
"""
109
110
def merge_page(self, page2: 'PageObject') -> None:
111
"""
112
Merge another page's content onto this page.
113
114
Args:
115
page2 (PageObject): Page to merge onto this page
116
"""
117
118
def merge_scaled_page(self, page2: 'PageObject', scale: float, expand: bool = False) -> None:
119
"""
120
Merge a scaled page onto this page.
121
122
Args:
123
page2 (PageObject): Page to merge
124
scale (float): Scale factor for the merged page
125
expand (bool): Whether to expand page size if needed
126
"""
127
128
def merge_rotated_page(self, page2: 'PageObject', rotation: int, expand: bool = False) -> None:
129
"""
130
Merge a rotated page onto this page.
131
132
Args:
133
page2 (PageObject): Page to merge
134
rotation (int): Rotation angle in degrees
135
expand (bool): Whether to expand page size if needed
136
"""
137
138
def merge_scaled_translated_page(
139
self,
140
page2: 'PageObject',
141
scale: float,
142
tx: float,
143
ty: float,
144
expand: bool = False
145
) -> None:
146
"""
147
Merge a scaled and translated page onto this page.
148
149
Args:
150
page2 (PageObject): Page to merge
151
scale (float): Scale factor
152
tx (float): X translation in points
153
ty (float): Y translation in points
154
expand (bool): Whether to expand page size if needed
155
"""
156
157
def merge_rotated_scaled_page(
158
self,
159
page2: 'PageObject',
160
rotation: int,
161
scale: float,
162
expand: bool = False
163
) -> None:
164
"""
165
Merge a rotated and scaled page onto this page.
166
167
Args:
168
page2 (PageObject): Page to merge
169
rotation (int): Rotation angle in degrees
170
scale (float): Scale factor
171
expand (bool): Whether to expand page size if needed
172
"""
173
174
def merge_rotated_scaled_translated_page(
175
self,
176
page2: 'PageObject',
177
rotation: int,
178
scale: float,
179
tx: float,
180
ty: float,
181
expand: bool = False
182
) -> None:
183
"""
184
Merge a page with full transformation onto this page.
185
186
Args:
187
page2 (PageObject): Page to merge
188
rotation (int): Rotation angle in degrees
189
scale (float): Scale factor
190
tx (float): X translation in points
191
ty (float): Y translation in points
192
expand (bool): Whether to expand page size if needed
193
"""
194
195
def merge_transformed_page(
196
self,
197
page2: 'PageObject',
198
ctm: Transformation,
199
expand: bool = False
200
) -> None:
201
"""
202
Merge a page with custom transformation matrix.
203
204
Args:
205
page2 (PageObject): Page to merge
206
ctm (Transformation): Current transformation matrix
207
expand (bool): Whether to expand page size if needed
208
"""
209
210
def add_transformation(self, ctm: Transformation) -> None:
211
"""
212
Add a transformation to the page.
213
214
Args:
215
ctm (Transformation): Transformation matrix to apply
216
"""
217
218
def get_fonts(self) -> Tuple[Set[str], Set[str]]:
219
"""
220
Get fonts used on the page.
221
222
Returns:
223
tuple: (font_names, font_subsets) sets
224
"""
225
226
def get_images(self) -> Dict[str, Any]:
227
"""
228
Get images embedded in the page.
229
230
Returns:
231
dict: Image information by name
232
"""
233
234
@staticmethod
235
def create_blank_page(pdf=None, width: float = 612, height: float = 792) -> 'PageObject':
236
"""
237
Create a blank page.
238
239
Args:
240
pdf: Optional PDF reader reference
241
width (float): Page width in points (default: 612 - 8.5")
242
height (float): Page height in points (default: 792 - 11")
243
244
Returns:
245
PageObject: New blank page
246
"""
247
```
248
249
### Transformation Class
250
251
2D coordinate transformation operations for precise page geometry control.
252
253
```python { .api }
254
class Transformation:
255
"""2D transformation matrix for page operations."""
256
257
def __init__(self, ctm: Tuple[float, float, float, float, float, float] = (1, 0, 0, 1, 0, 0)):
258
"""
259
Initialize transformation matrix.
260
261
Args:
262
ctm: 6-element transformation matrix (a, b, c, d, e, f)
263
"""
264
265
def translate(self, tx: float = 0, ty: float = 0) -> 'Transformation':
266
"""
267
Add translation to the transformation.
268
269
Args:
270
tx (float): X translation in points
271
ty (float): Y translation in points
272
273
Returns:
274
Transformation: New transformation with translation applied
275
"""
276
277
def scale(self, sx: Optional[float] = None, sy: Optional[float] = None) -> 'Transformation':
278
"""
279
Add scaling to the transformation.
280
281
Args:
282
sx: X scale factor (default: 1.0)
283
sy: Y scale factor (default: same as sx)
284
285
Returns:
286
Transformation: New transformation with scaling applied
287
"""
288
289
def rotate(self, rotation: float) -> 'Transformation':
290
"""
291
Add rotation to the transformation.
292
293
Args:
294
rotation (float): Rotation angle in degrees
295
296
Returns:
297
Transformation: New transformation with rotation applied
298
"""
299
300
```
301
302
### Rectangle Objects
303
304
Geometric rectangle representation for page boundaries and regions.
305
306
```python { .api }
307
class RectangleObject(ArrayObject):
308
"""PDF rectangle object for geometric regions."""
309
310
@property
311
def left(self) -> float:
312
"""Left coordinate."""
313
314
@property
315
def bottom(self) -> float:
316
"""Bottom coordinate."""
317
318
@property
319
def right(self) -> float:
320
"""Right coordinate."""
321
322
@property
323
def top(self) -> float:
324
"""Top coordinate."""
325
326
@property
327
def width(self) -> float:
328
"""Rectangle width."""
329
330
@property
331
def height(self) -> float:
332
"""Rectangle height."""
333
334
def scale(self, sx: float, sy: float) -> 'RectangleObject':
335
"""
336
Scale the rectangle.
337
338
Args:
339
sx (float): X scale factor
340
sy (float): Y scale factor
341
342
Returns:
343
RectangleObject: New scaled rectangle
344
"""
345
346
def normalize(self) -> 'RectangleObject':
347
"""
348
Normalize rectangle coordinates.
349
350
Returns:
351
RectangleObject: Normalized rectangle
352
"""
353
354
def intersect(self, other: 'RectangleObject') -> 'RectangleObject':
355
"""
356
Calculate intersection with another rectangle.
357
358
Args:
359
other (RectangleObject): Rectangle to intersect with
360
361
Returns:
362
RectangleObject: Intersection rectangle
363
"""
364
365
def union(self, other: 'RectangleObject') -> 'RectangleObject':
366
"""
367
Calculate union with another rectangle.
368
369
Args:
370
other (RectangleObject): Rectangle to union with
371
372
Returns:
373
RectangleObject: Union rectangle
374
"""
375
```
376
377
## Usage Examples
378
379
### Basic Page Transformations
380
381
```python
382
from PyPDF2 import PdfReader, PdfWriter
383
384
# Read source PDF
385
reader = PdfReader("source.pdf")
386
writer = PdfWriter()
387
388
# Get first page
389
page = reader.pages[0]
390
391
# Scale the page to 50% size
392
page.scale(0.5, 0.5)
393
394
# Rotate 90 degrees clockwise
395
page.rotate(90)
396
397
# Add to writer
398
writer.add_page(page)
399
400
# Save result
401
with open("transformed.pdf", "wb") as output_file:
402
writer.write(output_file)
403
```
404
405
### Advanced Page Merging
406
407
```python
408
from PyPDF2 import PdfReader, PdfWriter
409
410
# Read source files
411
reader1 = PdfReader("background.pdf")
412
reader2 = PdfReader("overlay.pdf")
413
writer = PdfWriter()
414
415
# Get pages
416
background = reader1.pages[0]
417
overlay = reader2.pages[0]
418
419
# Scale overlay to fit in corner
420
overlay.scale(0.3, 0.3)
421
422
# Merge overlay onto background
423
background.merge_scaled_translated_page(
424
overlay,
425
scale=0.5,
426
tx=400, # Position in bottom-right
427
ty=100,
428
expand=False
429
)
430
431
writer.add_page(background)
432
433
with open("merged_pages.pdf", "wb") as output_file:
434
writer.write(output_file)
435
```
436
437
### Working with Page Dimensions
438
439
```python
440
from PyPDF2 import PdfReader, PdfWriter
441
from PyPDF2.generic import RectangleObject
442
443
reader = PdfReader("document.pdf")
444
writer = PdfWriter()
445
446
for page in reader.pages:
447
# Get current dimensions
448
mediabox = page.mediabox
449
print(f"Page size: {mediabox.width} x {mediabox.height} points")
450
451
# Convert to inches (72 points = 1 inch)
452
width_inches = float(mediabox.width) / 72
453
height_inches = float(mediabox.height) / 72
454
print(f"Page size: {width_inches:.1f}\" x {height_inches:.1f}\"")
455
456
# Crop page to center area
457
crop_margin = 50 # 50 points margin
458
page.cropbox = RectangleObject([
459
float(mediabox.left) + crop_margin,
460
float(mediabox.bottom) + crop_margin,
461
float(mediabox.right) - crop_margin,
462
float(mediabox.top) - crop_margin
463
])
464
465
writer.add_page(page)
466
467
with open("cropped.pdf", "wb") as output_file:
468
writer.write(output_file)
469
```
470
471
### Creating Custom Transformations
472
473
```python
474
from PyPDF2 import PdfReader, PdfWriter, Transformation
475
476
reader = PdfReader("source.pdf")
477
writer = PdfWriter()
478
479
page = reader.pages[0]
480
481
# Create complex transformation
482
transform = (Transformation()
483
.rotate(45) # Rotate 45 degrees
484
.scale(0.8, 1.2) # Scale differently in X and Y
485
.translate(100, 50) # Move to new position
486
)
487
488
# Apply transformation
489
page.add_transformation(transform)
490
writer.add_page(page)
491
492
with open("custom_transform.pdf", "wb") as output_file:
493
writer.write(output_file)
494
```
495
496
### Text and Image Extraction
497
498
```python
499
from PyPDF2 import PdfReader
500
501
reader = PdfReader("document.pdf")
502
503
for page_num, page in enumerate(reader.pages):
504
# Extract text
505
text = page.extract_text()
506
print(f"Page {page_num + 1} text:")
507
print(text[:200] + "..." if len(text) > 200 else text)
508
509
# Get font information
510
font_names, font_subsets = page.get_fonts()
511
print(f"Fonts used: {font_names}")
512
513
# Get images
514
images = page.get_images()
515
print(f"Images found: {len(images)}")
516
for img_name, img_info in images.items():
517
print(f" - {img_name}: {img_info}")
518
```
519
520
### Creating Blank Pages with Content
521
522
```python
523
from PyPDF2 import PdfWriter, PageObject
524
from PyPDF2.generic import RectangleObject
525
526
writer = PdfWriter()
527
528
# Create custom sized blank page (A4: 595 x 842 points)
529
blank_page = PageObject.create_blank_page(width=595, height=842)
530
531
# You can then add content or merge other pages onto it
532
writer.add_page(blank_page)
533
534
# Create US Letter sized page (8.5" x 11" = 612 x 792 points)
535
letter_page = PageObject.create_blank_page(width=612, height=792)
536
writer.add_page(letter_page)
537
538
with open("blank_pages.pdf", "wb") as output_file:
539
writer.write(output_file)
540
```
541
542
## Utility Functions
543
544
### Page Size Utilities
545
546
```python { .api }
547
def set_custom_rtl(_min: int, _max: int, specials: List[int]) -> Tuple[int, int, List[int]]:
548
"""
549
Configure right-to-left text parameters for text extraction.
550
551
Args:
552
_min (int): Minimum character code for RTL
553
_max (int): Maximum character code for RTL
554
specials (list): Special character codes to handle as RTL
555
556
Returns:
557
tuple: Configuration tuple with min, max, and specials
558
"""
559
```
560
561
This function helps configure text extraction for right-to-left languages and custom character sets.