0
# Image and Bitmap Operations
1
2
Image rendering, manipulation, and extraction with support for multiple output formats including PIL Images, NumPy arrays, and raw bitmaps. The PdfBitmap class provides comprehensive bitmap handling capabilities.
3
4
## Capabilities
5
6
### Bitmap Creation
7
8
Create bitmap objects from various sources including raw PDFium bitmaps, PIL Images, and new empty bitmaps.
9
10
```python { .api }
11
class PdfBitmap:
12
@classmethod
13
def from_raw(cls, raw, rev_byteorder=False, ex_buffer=None) -> PdfBitmap:
14
"""
15
Create bitmap from raw PDFium bitmap handle.
16
17
Parameters:
18
- raw: FPDF_BITMAP, raw PDFium bitmap handle
19
- rev_byteorder: bool, reverse byte order for pixel data
20
- ex_buffer: optional external buffer for pixel data
21
22
Returns:
23
PdfBitmap: Bitmap object wrapping the raw handle
24
"""
25
26
@classmethod
27
def new_native(cls, width: int, height: int, format: int, rev_byteorder=False, buffer=None) -> PdfBitmap:
28
"""
29
Create new native PDFium bitmap.
30
31
Parameters:
32
- width: int, bitmap width in pixels
33
- height: int, bitmap height in pixels
34
- format: int, PDFium bitmap format constant
35
- rev_byteorder: bool, reverse byte order
36
- buffer: optional ctypes array for pixel data
37
38
Returns:
39
PdfBitmap: New native bitmap
40
"""
41
42
@classmethod
43
def new_foreign(cls, width: int, height: int, format: int, rev_byteorder=False, force_packed=False) -> PdfBitmap:
44
"""
45
Create new foreign bitmap with external buffer.
46
47
Parameters:
48
- width: int, bitmap width in pixels
49
- height: int, bitmap height in pixels
50
- format: int, PDFium bitmap format constant
51
- rev_byteorder: bool, reverse byte order
52
- force_packed: bool, force packed pixel format
53
54
Returns:
55
PdfBitmap: New foreign bitmap
56
"""
57
58
@classmethod
59
def new_foreign_simple(cls, width: int, height: int, use_alpha: bool, rev_byteorder=False) -> PdfBitmap:
60
"""
61
Create simple foreign bitmap with automatic format selection.
62
63
Parameters:
64
- width: int, bitmap width in pixels
65
- height: int, bitmap height in pixels
66
- use_alpha: bool, include alpha channel
67
- rev_byteorder: bool, reverse byte order
68
69
Returns:
70
PdfBitmap: New foreign bitmap with RGB or RGBA format
71
"""
72
73
@classmethod
74
def from_pil(cls, pil_image: PIL.Image, recopy=False) -> PdfBitmap:
75
"""
76
Create bitmap from PIL Image.
77
78
Parameters:
79
- pil_image: PIL.Image, source image
80
- recopy: bool, force copy of image data
81
82
Returns:
83
PdfBitmap: Bitmap containing PIL image data
84
"""
85
```
86
87
Bitmap creation examples:
88
89
```python
90
import pypdfium2 as pdfium
91
from PIL import Image
92
import numpy as np
93
94
# Create empty RGB bitmap (800x600)
95
bitmap = pdfium.PdfBitmap.new_foreign_simple(800, 600, use_alpha=False)
96
97
# Create bitmap with alpha channel
98
alpha_bitmap = pdfium.PdfBitmap.new_foreign_simple(800, 600, use_alpha=True)
99
100
# Create from PIL Image
101
pil_img = Image.open("photo.jpg")
102
bitmap_from_pil = pdfium.PdfBitmap.from_pil(pil_img)
103
104
# Create native PDFium bitmap with specific format
105
native_bitmap = pdfium.PdfBitmap.new_native(
106
width=1024,
107
height=768,
108
format=pdfium.raw.FPDFBitmap_BGRA, # Using raw PDFium constants
109
rev_byteorder=False
110
)
111
```
112
113
### Bitmap Properties
114
115
Access bitmap metadata and properties for format and dimension information.
116
117
```python { .api }
118
@property
119
def raw(self) -> FPDF_BITMAP:
120
"""Raw PDFium bitmap handle for low-level operations."""
121
122
@property
123
def buffer(self) -> ctypes.Array:
124
"""Ctypes array containing pixel data."""
125
126
@property
127
def width(self) -> int:
128
"""Bitmap width in pixels."""
129
130
@property
131
def height(self) -> int:
132
"""Bitmap height in pixels."""
133
134
@property
135
def stride(self) -> int:
136
"""Number of bytes per scanline."""
137
138
@property
139
def format(self) -> int:
140
"""PDFium bitmap format constant."""
141
142
@property
143
def rev_byteorder(self) -> bool:
144
"""Whether byte order is reversed."""
145
146
@property
147
def n_channels(self) -> int:
148
"""Number of color channels per pixel."""
149
150
@property
151
def mode(self) -> str:
152
"""PIL-compatible mode string (RGB, RGBA, etc.)."""
153
```
154
155
Property usage:
156
157
```python
158
# Examine bitmap properties
159
bitmap = page.render(scale=2.0)
160
161
print(f"Bitmap dimensions: {bitmap.width} x {bitmap.height}")
162
print(f"Stride: {bitmap.stride} bytes per line")
163
print(f"Channels: {bitmap.n_channels}")
164
print(f"Mode: {bitmap.mode}")
165
print(f"Format: {bitmap.format}")
166
167
# Calculate memory usage
168
pixels = bitmap.width * bitmap.height
169
memory_mb = (pixels * bitmap.n_channels) / (1024 * 1024)
170
print(f"Memory usage: {memory_mb:.1f} MB")
171
```
172
173
### Bitmap Information
174
175
Get structured bitmap information as a named tuple.
176
177
```python { .api }
178
def get_info(self) -> PdfBitmapInfo:
179
"""
180
Get comprehensive bitmap information.
181
182
Returns:
183
PdfBitmapInfo: Named tuple with bitmap metadata
184
"""
185
186
# PdfBitmapInfo named tuple
187
class PdfBitmapInfo(NamedTuple):
188
width: int
189
height: int
190
stride: int
191
format: int
192
rev_byteorder: bool
193
n_channels: int
194
mode: str
195
```
196
197
Example:
198
199
```python
200
bitmap = page.render()
201
info = bitmap.get_info()
202
203
print(f"Bitmap Info:")
204
print(f" Dimensions: {info.width} x {info.height}")
205
print(f" Stride: {info.stride}")
206
print(f" Format: {info.format}")
207
print(f" Channels: {info.n_channels}")
208
print(f" Mode: {info.mode}")
209
print(f" Byte order reversed: {info.rev_byteorder}")
210
```
211
212
### Bitmap Manipulation
213
214
Modify bitmap content with drawing and filling operations.
215
216
```python { .api }
217
def fill_rect(self, left: int, top: int, width: int, height: int, color: int):
218
"""
219
Fill rectangular area with solid color.
220
221
Parameters:
222
- left: int, left edge of rectangle in pixels
223
- top: int, top edge of rectangle in pixels
224
- width: int, rectangle width in pixels
225
- height: int, rectangle height in pixels
226
- color: int, color value (format depends on bitmap format)
227
"""
228
```
229
230
Fill operations:
231
232
```python
233
# Create bitmap and fill areas
234
bitmap = pdfium.PdfBitmap.new_foreign_simple(800, 600, use_alpha=False)
235
236
# Fill entire bitmap with white background
237
bitmap.fill_rect(0, 0, 800, 600, 0xFFFFFF) # White
238
239
# Add colored rectangles
240
bitmap.fill_rect(100, 150, 200, 100, 0xFF0000) # Red rectangle
241
bitmap.fill_rect(400, 200, 150, 150, 0x00FF00) # Green rectangle
242
bitmap.fill_rect(200, 350, 300, 50, 0x0000FF) # Blue rectangle
243
244
# Convert to PIL and save
245
pil_image = bitmap.to_pil()
246
pil_image.save("colored_rectangles.png")
247
```
248
249
### Format Conversion
250
251
Convert bitmaps to various output formats including PIL Images and NumPy arrays.
252
253
```python { .api }
254
def to_numpy(self) -> numpy.ndarray:
255
"""
256
Convert bitmap to NumPy array.
257
258
Returns:
259
numpy.ndarray: Image data as NumPy array with shape (height, width, channels)
260
"""
261
262
def to_pil(self) -> PIL.Image:
263
"""
264
Convert bitmap to PIL Image.
265
266
Returns:
267
PIL.Image: PIL Image object with bitmap data
268
"""
269
```
270
271
Conversion examples:
272
273
```python
274
# Render page to bitmap
275
bitmap = page.render(scale=2.0, draw_annots=True)
276
277
# Convert to PIL Image for further processing
278
pil_image = bitmap.to_pil()
279
pil_image = pil_image.rotate(90) # Rotate image
280
pil_image.save("rotated_page.png")
281
282
# Convert to NumPy array for analysis
283
numpy_array = bitmap.to_numpy()
284
print(f"Array shape: {numpy_array.shape}")
285
print(f"Data type: {numpy_array.dtype}")
286
print(f"Min/Max values: {numpy_array.min()}, {numpy_array.max()}")
287
288
# Analyze image statistics
289
import numpy as np
290
mean_color = np.mean(numpy_array, axis=(0, 1))
291
print(f"Average color: {mean_color}")
292
293
# Create grayscale version
294
if len(numpy_array.shape) == 3 and numpy_array.shape[2] >= 3:
295
grayscale = np.dot(numpy_array[...,:3], [0.299, 0.587, 0.114])
296
gray_image = Image.fromarray(grayscale.astype(np.uint8), mode='L')
297
gray_image.save("grayscale_page.png")
298
```
299
300
### High-Quality Rendering
301
302
Advanced rendering options for high-quality output and specific use cases.
303
304
```python
305
def render_high_quality(page, dpi=300, format='PNG'):
306
"""High-quality page rendering example."""
307
308
# Calculate scale for desired DPI
309
scale = dpi / 72.0
310
311
# Render with quality settings
312
bitmap = page.render(
313
scale=scale,
314
colour=(255, 255, 255, 255), # White background
315
draw_annots=True, # Include annotations
316
draw_forms=True, # Include form fields
317
no_smoothing=False, # Enable anti-aliasing
318
optimize_mode='print' # Optimize for printing
319
)
320
321
# Convert to PIL with high quality
322
pil_image = bitmap.to_pil()
323
324
# Save with format-specific options
325
if format.upper() == 'PNG':
326
pil_image.save("high_quality.png",
327
format='PNG',
328
optimize=True)
329
elif format.upper() == 'JPEG':
330
pil_image.save("high_quality.jpg",
331
format='JPEG',
332
quality=95,
333
optimize=True)
334
elif format.upper() == 'TIFF':
335
pil_image.save("high_quality.tiff",
336
format='TIFF',
337
compression='lzw')
338
339
return pil_image
340
341
# Usage
342
pdf = pdfium.PdfDocument("document.pdf")
343
page = pdf[0]
344
345
# Render at print quality (300 DPI)
346
high_res_image = render_high_quality(page, dpi=300, format='PNG')
347
print(f"High resolution image: {high_res_image.size}")
348
```
349
350
### Batch Processing
351
352
Process multiple pages efficiently with consistent rendering settings.
353
354
```python
355
def render_all_pages(pdf_path, output_dir, dpi=150, format='PNG'):
356
"""Render all pages of a PDF to images."""
357
import os
358
359
pdf = pdfium.PdfDocument(pdf_path)
360
361
for i, page in enumerate(pdf):
362
print(f"Rendering page {i+1}/{len(pdf)}")
363
364
# Render page
365
bitmap = page.render(
366
scale=dpi/72.0,
367
colour=(255, 255, 255),
368
draw_annots=True
369
)
370
371
# Convert and save
372
pil_image = bitmap.to_pil()
373
374
filename = f"page_{i+1:03d}.{format.lower()}"
375
filepath = os.path.join(output_dir, filename)
376
377
if format.upper() == 'PNG':
378
pil_image.save(filepath, format='PNG', optimize=True)
379
elif format.upper() == 'JPEG':
380
pil_image.save(filepath, format='JPEG', quality=90)
381
382
print(f"Saved: {filepath}")
383
384
pdf.close()
385
print(f"Rendered {len(pdf)} pages")
386
387
# Usage
388
render_all_pages("document.pdf", "output_images/", dpi=200, format='PNG')
389
```
390
391
### Memory Management
392
393
Efficient memory handling for large bitmaps and batch processing.
394
395
```python
396
def process_large_pdf_efficiently(pdf_path):
397
"""Process large PDF with memory management."""
398
399
pdf = pdfium.PdfDocument(pdf_path)
400
401
for i in range(len(pdf)):
402
# Process one page at a time
403
page = pdf[i]
404
405
# Create text page for analysis
406
textpage = page.get_textpage()
407
char_count = textpage.count_chars()
408
409
if char_count > 1000: # Only render text-heavy pages
410
# Render at reasonable resolution
411
bitmap = page.render(scale=1.5)
412
413
# Process bitmap
414
numpy_array = bitmap.to_numpy()
415
416
# Analyze or save as needed
417
print(f"Page {i+1}: {char_count} chars, image shape {numpy_array.shape}")
418
419
# Clean up explicitly to free memory
420
del bitmap
421
del numpy_array
422
423
# Clean up page objects
424
del textpage
425
del page
426
427
pdf.close()
428
```
429
430
## Bitmap Format Constants
431
432
Common PDFium bitmap format constants available through the raw module:
433
434
```python
435
# Available through pypdfium2.raw
436
FPDFBitmap_Unknown = 0 # Unknown format
437
FPDFBitmap_Gray = 1 # Grayscale
438
FPDFBitmap_BGR = 2 # BGR 24-bit
439
FPDFBitmap_BGRx = 3 # BGRx 32-bit
440
FPDFBitmap_BGRA = 4 # BGRA 32-bit
441
```
442
443
Usage:
444
445
```python
446
import pypdfium2 as pdfium
447
448
# Create bitmap with specific format
449
bitmap = pdfium.PdfBitmap.new_native(
450
800, 600,
451
pdfium.raw.FPDFBitmap_BGRA,
452
rev_byteorder=False
453
)
454
```
455
456
## Custom Rendering Colors
457
458
### PdfColorScheme Class
459
460
Custom color scheme for rendering PDF pages with specific color mappings.
461
462
```python { .api }
463
class PdfColorScheme:
464
"""
465
Rendering color scheme for customizing PDF page appearance.
466
467
Allows specification of custom colors for different PDF elements
468
during rendering operations. Each color should be provided as
469
RGBA values with components ranging from 0 to 255.
470
471
Attributes:
472
- colors: dict, color mappings for different PDF elements
473
"""
474
475
def __init__(self, path_fill: list[int], path_stroke: list[int], text_fill: list[int], text_stroke: list[int]):
476
"""
477
Initialize color scheme with element colors.
478
479
Parameters:
480
- path_fill: list[int], RGBA color for path fill operations [R, G, B, A]
481
- path_stroke: list[int], RGBA color for path stroke operations [R, G, B, A]
482
- text_fill: list[int], RGBA color for text fill operations [R, G, B, A]
483
- text_stroke: list[int], RGBA color for text stroke operations [R, G, B, A]
484
485
Each color component should be an integer from 0-255.
486
"""
487
488
def convert(self, rev_byteorder: bool) -> FPDF_COLORSCHEME:
489
"""
490
Convert color scheme to PDFium format.
491
492
Parameters:
493
- rev_byteorder: bool, whether to use reverse byte order
494
495
Returns:
496
FPDF_COLORSCHEME: PDFium-compatible color scheme object
497
498
Internal method used during rendering to convert Python color
499
values to the format expected by PDFium's rendering engine.
500
"""
501
```
502
503
Creating and using custom color schemes:
504
505
```python
506
import pypdfium2 as pdfium
507
508
# Define custom colors (RGBA values 0-255)
509
dark_theme = pdfium.PdfColorScheme(
510
path_fill=[40, 40, 40, 255], # Dark gray for filled shapes
511
path_stroke=[100, 100, 100, 255], # Light gray for shape outlines
512
text_fill=[220, 220, 220, 255], # Light gray for text
513
text_stroke=[255, 255, 255, 255] # White for text outlines
514
)
515
516
# Render page with custom colors
517
pdf = pdfium.PdfDocument("document.pdf")
518
page = pdf[0]
519
520
bitmap = page.render(
521
scale=2.0,
522
color_scheme=dark_theme,
523
fill_color=(30, 30, 30, 255) # Dark background
524
)
525
526
# Save dark theme version
527
pil_image = bitmap.to_pil()
528
pil_image.save("document_dark_theme.png")
529
530
# High contrast color scheme for accessibility
531
high_contrast = pdfium.PdfColorScheme(
532
path_fill=[0, 0, 0, 255], # Black for shapes
533
path_stroke=[0, 0, 0, 255], # Black for outlines
534
text_fill=[0, 0, 0, 255], # Black for text
535
text_stroke=[0, 0, 0, 255] # Black for text outlines
536
)
537
538
# Render with high contrast on white background
539
high_contrast_bitmap = page.render(
540
scale=2.0,
541
color_scheme=high_contrast,
542
fill_color=(255, 255, 255, 255) # White background
543
)
544
545
high_contrast_pil = high_contrast_bitmap.to_pil()
546
high_contrast_pil.save("document_high_contrast.png")
547
```
548
549
Advanced color scheme examples:
550
551
```python
552
def create_sepia_scheme():
553
"""Create sepia-toned color scheme."""
554
sepia_brown = [160, 130, 98, 255] # Sepia brown
555
sepia_dark = [101, 67, 33, 255] # Dark sepia
556
557
return pdfium.PdfColorScheme(
558
path_fill=sepia_brown,
559
path_stroke=sepia_dark,
560
text_fill=sepia_dark,
561
text_stroke=sepia_dark
562
)
563
564
def create_blueprint_scheme():
565
"""Create blueprint-style color scheme."""
566
blueprint_blue = [0, 162, 232, 255] # Blueprint blue
567
blueprint_white = [255, 255, 255, 255] # White lines
568
569
return pdfium.PdfColorScheme(
570
path_fill=blueprint_blue,
571
path_stroke=blueprint_white,
572
text_fill=blueprint_white,
573
text_stroke=blueprint_white
574
)
575
576
def render_with_multiple_themes(page, output_prefix):
577
"""Render page with different color themes."""
578
579
themes = {
580
'original': None, # No color scheme = original colors
581
'sepia': create_sepia_scheme(),
582
'blueprint': create_blueprint_scheme(),
583
'dark': pdfium.PdfColorScheme(
584
path_fill=[60, 60, 60, 255],
585
path_stroke=[120, 120, 120, 255],
586
text_fill=[200, 200, 200, 255],
587
text_stroke=[240, 240, 240, 255]
588
)
589
}
590
591
backgrounds = {
592
'original': (255, 255, 255, 255), # White
593
'sepia': (245, 235, 215, 255), # Antique white
594
'blueprint': (25, 25, 112, 255), # Dark blue
595
'dark': (20, 20, 20, 255) # Very dark gray
596
}
597
598
for theme_name, color_scheme in themes.items():
599
print(f"Rendering {theme_name} theme...")
600
601
bitmap = page.render(
602
scale=2.0,
603
color_scheme=color_scheme,
604
fill_color=backgrounds[theme_name]
605
)
606
607
pil_image = bitmap.to_pil()
608
pil_image.save(f"{output_prefix}_{theme_name}.png")
609
610
print(f"Saved: {output_prefix}_{theme_name}.png")
611
612
# Usage
613
pdf = pdfium.PdfDocument("document.pdf")
614
page = pdf[0]
615
render_with_multiple_themes(page, "themed_page")
616
617
# Batch process with custom theme
618
def batch_render_with_theme(pdf_path, color_scheme, output_dir):
619
"""Render all pages with custom color scheme."""
620
import os
621
622
pdf = pdfium.PdfDocument(pdf_path)
623
os.makedirs(output_dir, exist_ok=True)
624
625
for i, page in enumerate(pdf):
626
bitmap = page.render(
627
scale=1.5,
628
color_scheme=color_scheme,
629
fill_color=(245, 245, 245, 255) # Light background
630
)
631
632
pil_image = bitmap.to_pil()
633
filename = f"page_{i+1:03d}_themed.png"
634
filepath = os.path.join(output_dir, filename)
635
pil_image.save(filepath)
636
637
print(f"Rendered page {i+1} with custom theme")
638
639
pdf.close()
640
641
# Apply sepia theme to entire document
642
sepia_theme = create_sepia_scheme()
643
batch_render_with_theme("document.pdf", sepia_theme, "sepia_output/")
644
```