0
# Page Manipulation
1
2
Page-level operations including rendering, rotation, dimension management, bounding box manipulation, and content processing. The PdfPage class provides comprehensive access to individual PDF pages.
3
4
## Capabilities
5
6
### Page Dimensions
7
8
Access and manage page dimensions in PDF coordinate units (1/72 inch).
9
10
```python { .api }
11
def get_width(self) -> float:
12
"""Get page width in PDF units."""
13
14
def get_height(self) -> float:
15
"""Get page height in PDF units."""
16
17
def get_size(self) -> tuple[float, float]:
18
"""Get page dimensions as (width, height) tuple."""
19
```
20
21
Example:
22
23
```python
24
import pypdfium2 as pdfium
25
26
pdf = pdfium.PdfDocument("document.pdf")
27
page = pdf[0]
28
29
width = page.get_width()
30
height = page.get_height()
31
w, h = page.get_size()
32
33
print(f"Page size: {width} x {height} PDF units")
34
print(f"Page size: {width/72:.1f} x {height/72:.1f} inches")
35
```
36
37
### Page Rotation
38
39
Manage page rotation in 90-degree increments.
40
41
```python { .api }
42
def get_rotation(self) -> int:
43
"""
44
Get current page rotation in degrees.
45
46
Returns:
47
int: Clockwise rotation (0, 90, 180, or 270 degrees)
48
"""
49
50
def set_rotation(self, rotation: int):
51
"""
52
Set page rotation.
53
54
Parameters:
55
- rotation: int, clockwise rotation in degrees (0, 90, 180, 270)
56
"""
57
```
58
59
Example:
60
61
```python
62
page = pdf[0]
63
64
# Check current rotation
65
current_rotation = page.get_rotation()
66
print(f"Current rotation: {current_rotation} degrees")
67
68
# Rotate page 90 degrees clockwise
69
page.set_rotation(90)
70
71
# Rotate back to normal
72
page.set_rotation(0)
73
```
74
75
### Bounding Boxes
76
77
Access and modify various page bounding boxes that define page geometry and layout.
78
79
```python { .api }
80
def get_mediabox(self, fallback_ok=True) -> tuple | None:
81
"""
82
Get media box coordinates.
83
84
Parameters:
85
- fallback_ok: bool, allow fallback to other boxes if media box not set
86
87
Returns:
88
tuple: (left, bottom, right, top) coordinates or None
89
"""
90
91
def set_mediabox(self, left: float, bottom: float, right: float, top: float):
92
"""Set media box coordinates."""
93
94
def get_cropbox(self, fallback_ok=True) -> tuple | None:
95
"""Get crop box coordinates (visible area)."""
96
97
def set_cropbox(self, left: float, bottom: float, right: float, top: float):
98
"""Set crop box coordinates."""
99
100
def get_bleedbox(self, fallback_ok=True) -> tuple | None:
101
"""Get bleed box coordinates (printing bleed area)."""
102
103
def set_bleedbox(self, left: float, bottom: float, right: float, top: float):
104
"""Set bleed box coordinates."""
105
106
def get_trimbox(self, fallback_ok=True) -> tuple | None:
107
"""Get trim box coordinates (final trimmed page)."""
108
109
def set_trimbox(self, left: float, bottom: float, right: float, top: float):
110
"""Set trim box coordinates."""
111
112
def get_artbox(self, fallback_ok=True) -> tuple | None:
113
"""Get art box coordinates (meaningful content area)."""
114
115
def set_artbox(self, left: float, bottom: float, right: float, top: float):
116
"""Set art box coordinates."""
117
```
118
119
Box hierarchy and usage:
120
121
```python
122
page = pdf[0]
123
124
# Get media box (full page dimensions)
125
mediabox = page.get_mediabox()
126
print(f"Media box: {mediabox}")
127
128
# Get crop box (visible area when displayed)
129
cropbox = page.get_cropbox()
130
if cropbox:
131
print(f"Crop box: {cropbox}")
132
133
# Set custom crop area
134
page.set_cropbox(50, 50, 550, 750) # Crop 50 units from each edge
135
```
136
137
### Page Rendering
138
139
Render pages to various image formats with extensive customization options.
140
141
```python { .api }
142
def render(self, rotation=0, scale=1, ...) -> PdfBitmap:
143
"""
144
Render page to bitmap.
145
146
Parameters:
147
- rotation: int, additional rotation in degrees (0, 90, 180, 270)
148
- scale: float, scaling factor (1.0 = 72 DPI, 2.0 = 144 DPI)
149
- crop: tuple, optional crop box (left, bottom, right, top)
150
- colour: tuple, background color as (R, G, B) or (R, G, B, A)
151
- fill_to_size: bool, scale to exact size maintaining aspect ratio
152
- optimize_mode: str, rendering optimization mode
153
- draw_annots: bool, include annotations in rendering
154
- draw_forms: bool, include form fields in rendering
155
- grayscale: bool, render in grayscale
156
- rev_byteorder: bool, reverse byte order for pixel data
157
- prefer_bgrx: bool, prefer BGRX pixel format
158
- no_smoothing: bool, disable anti-aliasing
159
- force_halftone: bool, force halftone for images
160
- limit_image_cache: bool, limit image cache usage
161
- force_text_matrix: bool, force text matrix transformation
162
163
Returns:
164
PdfBitmap: Rendered bitmap object
165
"""
166
167
def render_topil(self, **kwargs) -> PIL.Image:
168
"""
169
Render page directly to PIL Image.
170
171
Same parameters as render() method.
172
173
Returns:
174
PIL.Image: PIL Image object
175
"""
176
177
def render_tonumpy(self, **kwargs) -> numpy.ndarray:
178
"""
179
Render page directly to NumPy array.
180
181
Same parameters as render() method.
182
183
Returns:
184
numpy.ndarray: Image data as NumPy array
185
"""
186
```
187
188
Rendering examples:
189
190
```python
191
page = pdf[0]
192
193
# Basic rendering at 150 DPI
194
bitmap = page.render(scale=150/72)
195
pil_image = bitmap.to_pil()
196
pil_image.save("page_150dpi.png")
197
198
# Direct PIL rendering with custom background
199
pil_image = page.render_topil(
200
scale=2.0,
201
colour=(255, 255, 255, 255), # White background
202
draw_annots=True
203
)
204
pil_image.save("page_with_annotations.png")
205
206
# Render rotated page
207
rotated_bitmap = page.render(rotation=90, scale=1.5)
208
209
# Render cropped area
210
cropped_bitmap = page.render(
211
crop=(100, 100, 500, 700),
212
scale=2.0
213
)
214
215
# Grayscale rendering
216
gray_array = page.render_tonumpy(
217
grayscale=True,
218
scale=1.0
219
)
220
```
221
222
### Text Processing
223
224
Access text content and structure within the page.
225
226
```python { .api }
227
def get_textpage(self) -> PdfTextPage:
228
"""
229
Get text page object for text extraction and analysis.
230
231
Returns:
232
PdfTextPage: Text page object providing text extraction capabilities
233
"""
234
```
235
236
Example:
237
238
```python
239
page = pdf[0]
240
textpage = page.get_textpage()
241
242
# Extract all text
243
all_text = textpage.get_text_range()
244
print(f"Page text: {all_text}")
245
246
# Extract text from specific area
247
bounded_text = textpage.get_text_bounded(
248
left=100, bottom=100, right=500, top=700
249
)
250
```
251
252
### Page Objects
253
254
Access and manipulate individual objects within the page (text, images, graphics).
255
256
```python { .api }
257
def count_objects(self) -> int:
258
"""Get number of page objects."""
259
260
def get_object(self, index: int) -> PdfObject:
261
"""
262
Get page object by index.
263
264
Parameters:
265
- index: int, object index (0-based)
266
267
Returns:
268
PdfObject: Page object (may be PdfImage, PdfText, etc.)
269
"""
270
271
def insert_object(self, obj: PdfObject):
272
"""
273
Insert page object into page.
274
275
Parameters:
276
- obj: PdfObject, object to insert
277
"""
278
279
def remove_object(self, obj: PdfObject):
280
"""
281
Remove page object from page.
282
283
Parameters:
284
- obj: PdfObject, object to remove
285
"""
286
287
def gen_content(self):
288
"""Generate content stream from page objects."""
289
```
290
291
Working with page objects:
292
293
```python
294
page = pdf[0]
295
296
# Count objects on page
297
obj_count = page.count_objects()
298
print(f"Page has {obj_count} objects")
299
300
# Iterate through objects
301
for i in range(obj_count):
302
obj = page.get_object(i)
303
print(f"Object {i}: type {obj.type}")
304
305
# Check if it's an image
306
if isinstance(obj, pdfium.PdfImage):
307
print(f" Image size: {obj.get_size()}")
308
# Extract image
309
obj.extract(f"image_{i}.png")
310
311
# Create new image object (requires raw image data)
312
# new_image = pdfium.PdfImage.new(pdf)
313
# page.insert_object(new_image)
314
```
315
316
## Properties
317
318
```python { .api }
319
@property
320
def raw(self) -> FPDF_PAGE:
321
"""Raw PDFium page handle for low-level operations."""
322
323
@property
324
def pdf(self) -> PdfDocument:
325
"""Parent document containing this page."""
326
327
@property
328
def formenv(self):
329
"""Form environment reference for interactive elements."""
330
```
331
332
## Common Page Operations
333
334
### Page Size Conversion
335
336
```python
337
# Convert between units
338
def pdf_to_inches(pdf_units):
339
return pdf_units / 72
340
341
def inches_to_pdf(inches):
342
return inches * 72
343
344
def pdf_to_mm(pdf_units):
345
return pdf_units * 25.4 / 72
346
347
# Standard page sizes in PDF units
348
PAGE_SIZES = {
349
'letter': (612, 792),
350
'a4': (595, 842),
351
'legal': (612, 1008),
352
'tabloid': (792, 1224)
353
}
354
```
355
356
### Aspect Ratio Calculations
357
358
```python
359
page = pdf[0]
360
width, height = page.get_size()
361
aspect_ratio = width / height
362
363
print(f"Aspect ratio: {aspect_ratio:.2f}")
364
if abs(aspect_ratio - 8.5/11) < 0.01:
365
print("This is likely US Letter size")
366
elif abs(aspect_ratio - 210/297) < 0.01:
367
print("This is likely A4 size")
368
```