Tessl Tile for pypi/pypdfium2@4.30.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

attachments.md cli-tools.md document-management.md image-bitmap.md index.md page-manipulation.md page-objects.md text-processing.md transformation.md version-info.md

page-manipulation.mddocs/

0
# Page Manipulation
1

2
Page-level operations including rendering, rotation, dimension management, bounding box manipulation, and content processing. The PdfPage class provides comprehensive access to individual PDF pages.
3

4
## Capabilities
5

6
### Page Dimensions
7

8
Access and manage page dimensions in PDF coordinate units (1/72 inch).
9

10
```python { .api }
11
def get_width(self) -> float:
12
    """Get page width in PDF units."""
13

14
def get_height(self) -> float:
15
    """Get page height in PDF units."""
16

17
def get_size(self) -> tuple[float, float]:
18
    """Get page dimensions as (width, height) tuple."""
19
```
20

21
Example:
22

23
```python
24
import pypdfium2 as pdfium
25

26
pdf = pdfium.PdfDocument("document.pdf")
27
page = pdf[0]
28

29
width = page.get_width()
30
height = page.get_height()
31
w, h = page.get_size()
32

33
print(f"Page size: {width} x {height} PDF units")
34
print(f"Page size: {width/72:.1f} x {height/72:.1f} inches")
35
```
36

37
### Page Rotation
38

39
Manage page rotation in 90-degree increments.
40

41
```python { .api }
42
def get_rotation(self) -> int:
43
    """
44
    Get current page rotation in degrees.
45
    
46
    Returns:
47
    int: Clockwise rotation (0, 90, 180, or 270 degrees)
48
    """
49

50
def set_rotation(self, rotation: int):
51
    """
52
    Set page rotation.
53
    
54
    Parameters:
55
    - rotation: int, clockwise rotation in degrees (0, 90, 180, 270)
56
    """
57
```
58

59
Example:
60

61
```python
62
page = pdf[0]
63

64
# Check current rotation
65
current_rotation = page.get_rotation()
66
print(f"Current rotation: {current_rotation} degrees")
67

68
# Rotate page 90 degrees clockwise
69
page.set_rotation(90)
70

71
# Rotate back to normal
72
page.set_rotation(0)
73
```
74

75
### Bounding Boxes
76

77
Access and modify various page bounding boxes that define page geometry and layout.
78

79
```python { .api }
80
def get_mediabox(self, fallback_ok=True) -> tuple | None:
81
    """
82
    Get media box coordinates.
83
    
84
    Parameters:
85
    - fallback_ok: bool, allow fallback to other boxes if media box not set
86
    
87
    Returns:  
88
    tuple: (left, bottom, right, top) coordinates or None
89
    """
90

91
def set_mediabox(self, left: float, bottom: float, right: float, top: float):
92
    """Set media box coordinates."""
93

94
def get_cropbox(self, fallback_ok=True) -> tuple | None:
95
    """Get crop box coordinates (visible area)."""
96

97
def set_cropbox(self, left: float, bottom: float, right: float, top: float):
98
    """Set crop box coordinates."""
99

100
def get_bleedbox(self, fallback_ok=True) -> tuple | None:
101
    """Get bleed box coordinates (printing bleed area)."""
102

103
def set_bleedbox(self, left: float, bottom: float, right: float, top: float):
104
    """Set bleed box coordinates."""
105

106
def get_trimbox(self, fallback_ok=True) -> tuple | None:
107
    """Get trim box coordinates (final trimmed page)."""
108

109
def set_trimbox(self, left: float, bottom: float, right: float, top: float):
110
    """Set trim box coordinates."""
111

112
def get_artbox(self, fallback_ok=True) -> tuple | None:
113
    """Get art box coordinates (meaningful content area)."""
114

115
def set_artbox(self, left: float, bottom: float, right: float, top: float):
116
    """Set art box coordinates."""
117
```
118

119
Box hierarchy and usage:
120

121
```python
122
page = pdf[0]
123

124
# Get media box (full page dimensions)
125
mediabox = page.get_mediabox()
126
print(f"Media box: {mediabox}")
127

128
# Get crop box (visible area when displayed)
129
cropbox = page.get_cropbox()
130
if cropbox:
131
    print(f"Crop box: {cropbox}")
132

133
# Set custom crop area
134
page.set_cropbox(50, 50, 550, 750)  # Crop 50 units from each edge
135
```
136

137
### Page Rendering
138

139
Render pages to various image formats with extensive customization options.
140

141
```python { .api }
142
def render(self, rotation=0, scale=1, ...) -> PdfBitmap:
143
    """
144
    Render page to bitmap.
145
    
146
    Parameters:
147
    - rotation: int, additional rotation in degrees (0, 90, 180, 270)
148
    - scale: float, scaling factor (1.0 = 72 DPI, 2.0 = 144 DPI)
149
    - crop: tuple, optional crop box (left, bottom, right, top)
150
    - colour: tuple, background color as (R, G, B) or (R, G, B, A)
151
    - fill_to_size: bool, scale to exact size maintaining aspect ratio
152
    - optimize_mode: str, rendering optimization mode
153
    - draw_annots: bool, include annotations in rendering
154
    - draw_forms: bool, include form fields in rendering
155
    - grayscale: bool, render in grayscale
156
    - rev_byteorder: bool, reverse byte order for pixel data
157
    - prefer_bgrx: bool, prefer BGRX pixel format
158
    - no_smoothing: bool, disable anti-aliasing
159
    - force_halftone: bool, force halftone for images
160
    - limit_image_cache: bool, limit image cache usage
161
    - force_text_matrix: bool, force text matrix transformation
162
    
163
    Returns:
164
    PdfBitmap: Rendered bitmap object
165
    """
166

167
def render_topil(self, **kwargs) -> PIL.Image:
168
    """
169
    Render page directly to PIL Image.
170
    
171
    Same parameters as render() method.
172
    
173
    Returns:
174
    PIL.Image: PIL Image object
175
    """
176

177
def render_tonumpy(self, **kwargs) -> numpy.ndarray:
178
    """
179
    Render page directly to NumPy array.
180
    
181
    Same parameters as render() method.
182
    
183
    Returns:
184
    numpy.ndarray: Image data as NumPy array
185
    """
186
```
187

188
Rendering examples:
189

190
```python
191
page = pdf[0]
192

193
# Basic rendering at 150 DPI
194
bitmap = page.render(scale=150/72)
195
pil_image = bitmap.to_pil()
196
pil_image.save("page_150dpi.png")
197

198
# Direct PIL rendering with custom background
199
pil_image = page.render_topil(
200
    scale=2.0,
201
    colour=(255, 255, 255, 255),  # White background
202
    draw_annots=True
203
)
204
pil_image.save("page_with_annotations.png")
205

206
# Render rotated page
207
rotated_bitmap = page.render(rotation=90, scale=1.5)
208

209
# Render cropped area
210
cropped_bitmap = page.render(
211
    crop=(100, 100, 500, 700),
212
    scale=2.0
213
)
214

215
# Grayscale rendering
216
gray_array = page.render_tonumpy(
217
    grayscale=True,
218
    scale=1.0
219
)
220
```
221

222
### Text Processing
223

224
Access text content and structure within the page.
225

226
```python { .api }
227
def get_textpage(self) -> PdfTextPage:
228
    """
229
    Get text page object for text extraction and analysis.
230
    
231
    Returns:
232
    PdfTextPage: Text page object providing text extraction capabilities
233
    """
234
```
235

236
Example:
237

238
```python
239
page = pdf[0]
240
textpage = page.get_textpage()
241

242
# Extract all text
243
all_text = textpage.get_text_range()
244
print(f"Page text: {all_text}")
245

246
# Extract text from specific area
247
bounded_text = textpage.get_text_bounded(
248
    left=100, bottom=100, right=500, top=700
249
)
250
```
251

252
### Page Objects
253

254
Access and manipulate individual objects within the page (text, images, graphics).
255

256
```python { .api }
257
def count_objects(self) -> int:
258
    """Get number of page objects."""
259

260
def get_object(self, index: int) -> PdfObject:
261
    """
262
    Get page object by index.
263
    
264
    Parameters:
265
    - index: int, object index (0-based)
266
    
267
    Returns:
268
    PdfObject: Page object (may be PdfImage, PdfText, etc.)
269
    """
270

271
def insert_object(self, obj: PdfObject):
272
    """
273
    Insert page object into page.
274
    
275
    Parameters:
276
    - obj: PdfObject, object to insert
277
    """
278

279
def remove_object(self, obj: PdfObject):
280
    """
281
    Remove page object from page.
282
    
283
    Parameters:
284
    - obj: PdfObject, object to remove
285
    """
286

287
def gen_content(self):
288
    """Generate content stream from page objects."""
289
```
290

291
Working with page objects:
292

293
```python
294
page = pdf[0]
295

296
# Count objects on page
297
obj_count = page.count_objects()
298
print(f"Page has {obj_count} objects")
299

300
# Iterate through objects
301
for i in range(obj_count):
302
    obj = page.get_object(i)
303
    print(f"Object {i}: type {obj.type}")
304
    
305
    # Check if it's an image
306
    if isinstance(obj, pdfium.PdfImage):
307
        print(f"  Image size: {obj.get_size()}")
308
        # Extract image
309
        obj.extract(f"image_{i}.png")
310

311
# Create new image object (requires raw image data)
312
# new_image = pdfium.PdfImage.new(pdf)
313
# page.insert_object(new_image)
314
```
315

316
## Properties
317

318
```python { .api }
319
@property
320
def raw(self) -> FPDF_PAGE:
321
    """Raw PDFium page handle for low-level operations."""
322

323
@property
324
def pdf(self) -> PdfDocument:
325
    """Parent document containing this page."""
326

327
@property  
328
def formenv(self):
329
    """Form environment reference for interactive elements."""
330
```
331

332
## Common Page Operations
333

334
### Page Size Conversion
335

336
```python
337
# Convert between units
338
def pdf_to_inches(pdf_units):
339
    return pdf_units / 72
340

341
def inches_to_pdf(inches):
342
    return inches * 72
343

344
def pdf_to_mm(pdf_units):
345
    return pdf_units * 25.4 / 72
346

347
# Standard page sizes in PDF units
348
PAGE_SIZES = {
349
    'letter': (612, 792),
350
    'a4': (595, 842),
351
    'legal': (612, 1008),
352
    'tabloid': (792, 1224)
353
}
354
```
355

356
### Aspect Ratio Calculations
357

358
```python
359
page = pdf[0]
360
width, height = page.get_size()
361
aspect_ratio = width / height
362

363
print(f"Aspect ratio: {aspect_ratio:.2f}")
364
if abs(aspect_ratio - 8.5/11) < 0.01:
365
    print("This is likely US Letter size")
366
elif abs(aspect_ratio - 210/297) < 0.01:
367
    print("This is likely A4 size")
368
```

Version

Tile

Files

page-manipulation.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

page-manipulation.mddocs/