or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

attachments.mdcli-tools.mddocument-management.mdimage-bitmap.mdindex.mdpage-manipulation.mdpage-objects.mdtext-processing.mdtransformation.mdversion-info.md

image-bitmap.mddocs/

0

# Image and Bitmap Operations

1

2

Image rendering, manipulation, and extraction with support for multiple output formats including PIL Images, NumPy arrays, and raw bitmaps. The PdfBitmap class provides comprehensive bitmap handling capabilities.

3

4

## Capabilities

5

6

### Bitmap Creation

7

8

Create bitmap objects from various sources including raw PDFium bitmaps, PIL Images, and new empty bitmaps.

9

10

```python { .api }

11

class PdfBitmap:

12

@classmethod

13

def from_raw(cls, raw, rev_byteorder=False, ex_buffer=None) -> PdfBitmap:

14

"""

15

Create bitmap from raw PDFium bitmap handle.

16

17

Parameters:

18

- raw: FPDF_BITMAP, raw PDFium bitmap handle

19

- rev_byteorder: bool, reverse byte order for pixel data

20

- ex_buffer: optional external buffer for pixel data

21

22

Returns:

23

PdfBitmap: Bitmap object wrapping the raw handle

24

"""

25

26

@classmethod

27

def new_native(cls, width: int, height: int, format: int, rev_byteorder=False, buffer=None) -> PdfBitmap:

28

"""

29

Create new native PDFium bitmap.

30

31

Parameters:

32

- width: int, bitmap width in pixels

33

- height: int, bitmap height in pixels

34

- format: int, PDFium bitmap format constant

35

- rev_byteorder: bool, reverse byte order

36

- buffer: optional ctypes array for pixel data

37

38

Returns:

39

PdfBitmap: New native bitmap

40

"""

41

42

@classmethod

43

def new_foreign(cls, width: int, height: int, format: int, rev_byteorder=False, force_packed=False) -> PdfBitmap:

44

"""

45

Create new foreign bitmap with external buffer.

46

47

Parameters:

48

- width: int, bitmap width in pixels

49

- height: int, bitmap height in pixels

50

- format: int, PDFium bitmap format constant

51

- rev_byteorder: bool, reverse byte order

52

- force_packed: bool, force packed pixel format

53

54

Returns:

55

PdfBitmap: New foreign bitmap

56

"""

57

58

@classmethod

59

def new_foreign_simple(cls, width: int, height: int, use_alpha: bool, rev_byteorder=False) -> PdfBitmap:

60

"""

61

Create simple foreign bitmap with automatic format selection.

62

63

Parameters:

64

- width: int, bitmap width in pixels

65

- height: int, bitmap height in pixels

66

- use_alpha: bool, include alpha channel

67

- rev_byteorder: bool, reverse byte order

68

69

Returns:

70

PdfBitmap: New foreign bitmap with RGB or RGBA format

71

"""

72

73

@classmethod

74

def from_pil(cls, pil_image: PIL.Image, recopy=False) -> PdfBitmap:

75

"""

76

Create bitmap from PIL Image.

77

78

Parameters:

79

- pil_image: PIL.Image, source image

80

- recopy: bool, force copy of image data

81

82

Returns:

83

PdfBitmap: Bitmap containing PIL image data

84

"""

85

```

86

87

Bitmap creation examples:

88

89

```python

90

import pypdfium2 as pdfium

91

from PIL import Image

92

import numpy as np

93

94

# Create empty RGB bitmap (800x600)

95

bitmap = pdfium.PdfBitmap.new_foreign_simple(800, 600, use_alpha=False)

96

97

# Create bitmap with alpha channel

98

alpha_bitmap = pdfium.PdfBitmap.new_foreign_simple(800, 600, use_alpha=True)

99

100

# Create from PIL Image

101

pil_img = Image.open("photo.jpg")

102

bitmap_from_pil = pdfium.PdfBitmap.from_pil(pil_img)

103

104

# Create native PDFium bitmap with specific format

105

native_bitmap = pdfium.PdfBitmap.new_native(

106

width=1024,

107

height=768,

108

format=pdfium.raw.FPDFBitmap_BGRA, # Using raw PDFium constants

109

rev_byteorder=False

110

)

111

```

112

113

### Bitmap Properties

114

115

Access bitmap metadata and properties for format and dimension information.

116

117

```python { .api }

118

@property

119

def raw(self) -> FPDF_BITMAP:

120

"""Raw PDFium bitmap handle for low-level operations."""

121

122

@property

123

def buffer(self) -> ctypes.Array:

124

"""Ctypes array containing pixel data."""

125

126

@property

127

def width(self) -> int:

128

"""Bitmap width in pixels."""

129

130

@property

131

def height(self) -> int:

132

"""Bitmap height in pixels."""

133

134

@property

135

def stride(self) -> int:

136

"""Number of bytes per scanline."""

137

138

@property

139

def format(self) -> int:

140

"""PDFium bitmap format constant."""

141

142

@property

143

def rev_byteorder(self) -> bool:

144

"""Whether byte order is reversed."""

145

146

@property

147

def n_channels(self) -> int:

148

"""Number of color channels per pixel."""

149

150

@property

151

def mode(self) -> str:

152

"""PIL-compatible mode string (RGB, RGBA, etc.)."""

153

```

154

155

Property usage:

156

157

```python

158

# Examine bitmap properties

159

bitmap = page.render(scale=2.0)

160

161

print(f"Bitmap dimensions: {bitmap.width} x {bitmap.height}")

162

print(f"Stride: {bitmap.stride} bytes per line")

163

print(f"Channels: {bitmap.n_channels}")

164

print(f"Mode: {bitmap.mode}")

165

print(f"Format: {bitmap.format}")

166

167

# Calculate memory usage

168

pixels = bitmap.width * bitmap.height

169

memory_mb = (pixels * bitmap.n_channels) / (1024 * 1024)

170

print(f"Memory usage: {memory_mb:.1f} MB")

171

```

172

173

### Bitmap Information

174

175

Get structured bitmap information as a named tuple.

176

177

```python { .api }

178

def get_info(self) -> PdfBitmapInfo:

179

"""

180

Get comprehensive bitmap information.

181

182

Returns:

183

PdfBitmapInfo: Named tuple with bitmap metadata

184

"""

185

186

# PdfBitmapInfo named tuple

187

class PdfBitmapInfo(NamedTuple):

188

width: int

189

height: int

190

stride: int

191

format: int

192

rev_byteorder: bool

193

n_channels: int

194

mode: str

195

```

196

197

Example:

198

199

```python

200

bitmap = page.render()

201

info = bitmap.get_info()

202

203

print(f"Bitmap Info:")

204

print(f" Dimensions: {info.width} x {info.height}")

205

print(f" Stride: {info.stride}")

206

print(f" Format: {info.format}")

207

print(f" Channels: {info.n_channels}")

208

print(f" Mode: {info.mode}")

209

print(f" Byte order reversed: {info.rev_byteorder}")

210

```

211

212

### Bitmap Manipulation

213

214

Modify bitmap content with drawing and filling operations.

215

216

```python { .api }

217

def fill_rect(self, left: int, top: int, width: int, height: int, color: int):

218

"""

219

Fill rectangular area with solid color.

220

221

Parameters:

222

- left: int, left edge of rectangle in pixels

223

- top: int, top edge of rectangle in pixels

224

- width: int, rectangle width in pixels

225

- height: int, rectangle height in pixels

226

- color: int, color value (format depends on bitmap format)

227

"""

228

```

229

230

Fill operations:

231

232

```python

233

# Create bitmap and fill areas

234

bitmap = pdfium.PdfBitmap.new_foreign_simple(800, 600, use_alpha=False)

235

236

# Fill entire bitmap with white background

237

bitmap.fill_rect(0, 0, 800, 600, 0xFFFFFF) # White

238

239

# Add colored rectangles

240

bitmap.fill_rect(100, 150, 200, 100, 0xFF0000) # Red rectangle

241

bitmap.fill_rect(400, 200, 150, 150, 0x00FF00) # Green rectangle

242

bitmap.fill_rect(200, 350, 300, 50, 0x0000FF) # Blue rectangle

243

244

# Convert to PIL and save

245

pil_image = bitmap.to_pil()

246

pil_image.save("colored_rectangles.png")

247

```

248

249

### Format Conversion

250

251

Convert bitmaps to various output formats including PIL Images and NumPy arrays.

252

253

```python { .api }

254

def to_numpy(self) -> numpy.ndarray:

255

"""

256

Convert bitmap to NumPy array.

257

258

Returns:

259

numpy.ndarray: Image data as NumPy array with shape (height, width, channels)

260

"""

261

262

def to_pil(self) -> PIL.Image:

263

"""

264

Convert bitmap to PIL Image.

265

266

Returns:

267

PIL.Image: PIL Image object with bitmap data

268

"""

269

```

270

271

Conversion examples:

272

273

```python

274

# Render page to bitmap

275

bitmap = page.render(scale=2.0, draw_annots=True)

276

277

# Convert to PIL Image for further processing

278

pil_image = bitmap.to_pil()

279

pil_image = pil_image.rotate(90) # Rotate image

280

pil_image.save("rotated_page.png")

281

282

# Convert to NumPy array for analysis

283

numpy_array = bitmap.to_numpy()

284

print(f"Array shape: {numpy_array.shape}")

285

print(f"Data type: {numpy_array.dtype}")

286

print(f"Min/Max values: {numpy_array.min()}, {numpy_array.max()}")

287

288

# Analyze image statistics

289

import numpy as np

290

mean_color = np.mean(numpy_array, axis=(0, 1))

291

print(f"Average color: {mean_color}")

292

293

# Create grayscale version

294

if len(numpy_array.shape) == 3 and numpy_array.shape[2] >= 3:

295

grayscale = np.dot(numpy_array[...,:3], [0.299, 0.587, 0.114])

296

gray_image = Image.fromarray(grayscale.astype(np.uint8), mode='L')

297

gray_image.save("grayscale_page.png")

298

```

299

300

### High-Quality Rendering

301

302

Advanced rendering options for high-quality output and specific use cases.

303

304

```python

305

def render_high_quality(page, dpi=300, format='PNG'):

306

"""High-quality page rendering example."""

307

308

# Calculate scale for desired DPI

309

scale = dpi / 72.0

310

311

# Render with quality settings

312

bitmap = page.render(

313

scale=scale,

314

colour=(255, 255, 255, 255), # White background

315

draw_annots=True, # Include annotations

316

draw_forms=True, # Include form fields

317

no_smoothing=False, # Enable anti-aliasing

318

optimize_mode='print' # Optimize for printing

319

)

320

321

# Convert to PIL with high quality

322

pil_image = bitmap.to_pil()

323

324

# Save with format-specific options

325

if format.upper() == 'PNG':

326

pil_image.save("high_quality.png",

327

format='PNG',

328

optimize=True)

329

elif format.upper() == 'JPEG':

330

pil_image.save("high_quality.jpg",

331

format='JPEG',

332

quality=95,

333

optimize=True)

334

elif format.upper() == 'TIFF':

335

pil_image.save("high_quality.tiff",

336

format='TIFF',

337

compression='lzw')

338

339

return pil_image

340

341

# Usage

342

pdf = pdfium.PdfDocument("document.pdf")

343

page = pdf[0]

344

345

# Render at print quality (300 DPI)

346

high_res_image = render_high_quality(page, dpi=300, format='PNG')

347

print(f"High resolution image: {high_res_image.size}")

348

```

349

350

### Batch Processing

351

352

Process multiple pages efficiently with consistent rendering settings.

353

354

```python

355

def render_all_pages(pdf_path, output_dir, dpi=150, format='PNG'):

356

"""Render all pages of a PDF to images."""

357

import os

358

359

pdf = pdfium.PdfDocument(pdf_path)

360

361

for i, page in enumerate(pdf):

362

print(f"Rendering page {i+1}/{len(pdf)}")

363

364

# Render page

365

bitmap = page.render(

366

scale=dpi/72.0,

367

colour=(255, 255, 255),

368

draw_annots=True

369

)

370

371

# Convert and save

372

pil_image = bitmap.to_pil()

373

374

filename = f"page_{i+1:03d}.{format.lower()}"

375

filepath = os.path.join(output_dir, filename)

376

377

if format.upper() == 'PNG':

378

pil_image.save(filepath, format='PNG', optimize=True)

379

elif format.upper() == 'JPEG':

380

pil_image.save(filepath, format='JPEG', quality=90)

381

382

print(f"Saved: {filepath}")

383

384

pdf.close()

385

print(f"Rendered {len(pdf)} pages")

386

387

# Usage

388

render_all_pages("document.pdf", "output_images/", dpi=200, format='PNG')

389

```

390

391

### Memory Management

392

393

Efficient memory handling for large bitmaps and batch processing.

394

395

```python

396

def process_large_pdf_efficiently(pdf_path):

397

"""Process large PDF with memory management."""

398

399

pdf = pdfium.PdfDocument(pdf_path)

400

401

for i in range(len(pdf)):

402

# Process one page at a time

403

page = pdf[i]

404

405

# Create text page for analysis

406

textpage = page.get_textpage()

407

char_count = textpage.count_chars()

408

409

if char_count > 1000: # Only render text-heavy pages

410

# Render at reasonable resolution

411

bitmap = page.render(scale=1.5)

412

413

# Process bitmap

414

numpy_array = bitmap.to_numpy()

415

416

# Analyze or save as needed

417

print(f"Page {i+1}: {char_count} chars, image shape {numpy_array.shape}")

418

419

# Clean up explicitly to free memory

420

del bitmap

421

del numpy_array

422

423

# Clean up page objects

424

del textpage

425

del page

426

427

pdf.close()

428

```

429

430

## Bitmap Format Constants

431

432

Common PDFium bitmap format constants available through the raw module:

433

434

```python

435

# Available through pypdfium2.raw

436

FPDFBitmap_Unknown = 0 # Unknown format

437

FPDFBitmap_Gray = 1 # Grayscale

438

FPDFBitmap_BGR = 2 # BGR 24-bit

439

FPDFBitmap_BGRx = 3 # BGRx 32-bit

440

FPDFBitmap_BGRA = 4 # BGRA 32-bit

441

```

442

443

Usage:

444

445

```python

446

import pypdfium2 as pdfium

447

448

# Create bitmap with specific format

449

bitmap = pdfium.PdfBitmap.new_native(

450

800, 600,

451

pdfium.raw.FPDFBitmap_BGRA,

452

rev_byteorder=False

453

)

454

```

455

456

## Custom Rendering Colors

457

458

### PdfColorScheme Class

459

460

Custom color scheme for rendering PDF pages with specific color mappings.

461

462

```python { .api }

463

class PdfColorScheme:

464

"""

465

Rendering color scheme for customizing PDF page appearance.

466

467

Allows specification of custom colors for different PDF elements

468

during rendering operations. Each color should be provided as

469

RGBA values with components ranging from 0 to 255.

470

471

Attributes:

472

- colors: dict, color mappings for different PDF elements

473

"""

474

475

def __init__(self, path_fill: list[int], path_stroke: list[int], text_fill: list[int], text_stroke: list[int]):

476

"""

477

Initialize color scheme with element colors.

478

479

Parameters:

480

- path_fill: list[int], RGBA color for path fill operations [R, G, B, A]

481

- path_stroke: list[int], RGBA color for path stroke operations [R, G, B, A]

482

- text_fill: list[int], RGBA color for text fill operations [R, G, B, A]

483

- text_stroke: list[int], RGBA color for text stroke operations [R, G, B, A]

484

485

Each color component should be an integer from 0-255.

486

"""

487

488

def convert(self, rev_byteorder: bool) -> FPDF_COLORSCHEME:

489

"""

490

Convert color scheme to PDFium format.

491

492

Parameters:

493

- rev_byteorder: bool, whether to use reverse byte order

494

495

Returns:

496

FPDF_COLORSCHEME: PDFium-compatible color scheme object

497

498

Internal method used during rendering to convert Python color

499

values to the format expected by PDFium's rendering engine.

500

"""

501

```

502

503

Creating and using custom color schemes:

504

505

```python

506

import pypdfium2 as pdfium

507

508

# Define custom colors (RGBA values 0-255)

509

dark_theme = pdfium.PdfColorScheme(

510

path_fill=[40, 40, 40, 255], # Dark gray for filled shapes

511

path_stroke=[100, 100, 100, 255], # Light gray for shape outlines

512

text_fill=[220, 220, 220, 255], # Light gray for text

513

text_stroke=[255, 255, 255, 255] # White for text outlines

514

)

515

516

# Render page with custom colors

517

pdf = pdfium.PdfDocument("document.pdf")

518

page = pdf[0]

519

520

bitmap = page.render(

521

scale=2.0,

522

color_scheme=dark_theme,

523

fill_color=(30, 30, 30, 255) # Dark background

524

)

525

526

# Save dark theme version

527

pil_image = bitmap.to_pil()

528

pil_image.save("document_dark_theme.png")

529

530

# High contrast color scheme for accessibility

531

high_contrast = pdfium.PdfColorScheme(

532

path_fill=[0, 0, 0, 255], # Black for shapes

533

path_stroke=[0, 0, 0, 255], # Black for outlines

534

text_fill=[0, 0, 0, 255], # Black for text

535

text_stroke=[0, 0, 0, 255] # Black for text outlines

536

)

537

538

# Render with high contrast on white background

539

high_contrast_bitmap = page.render(

540

scale=2.0,

541

color_scheme=high_contrast,

542

fill_color=(255, 255, 255, 255) # White background

543

)

544

545

high_contrast_pil = high_contrast_bitmap.to_pil()

546

high_contrast_pil.save("document_high_contrast.png")

547

```

548

549

Advanced color scheme examples:

550

551

```python

552

def create_sepia_scheme():

553

"""Create sepia-toned color scheme."""

554

sepia_brown = [160, 130, 98, 255] # Sepia brown

555

sepia_dark = [101, 67, 33, 255] # Dark sepia

556

557

return pdfium.PdfColorScheme(

558

path_fill=sepia_brown,

559

path_stroke=sepia_dark,

560

text_fill=sepia_dark,

561

text_stroke=sepia_dark

562

)

563

564

def create_blueprint_scheme():

565

"""Create blueprint-style color scheme."""

566

blueprint_blue = [0, 162, 232, 255] # Blueprint blue

567

blueprint_white = [255, 255, 255, 255] # White lines

568

569

return pdfium.PdfColorScheme(

570

path_fill=blueprint_blue,

571

path_stroke=blueprint_white,

572

text_fill=blueprint_white,

573

text_stroke=blueprint_white

574

)

575

576

def render_with_multiple_themes(page, output_prefix):

577

"""Render page with different color themes."""

578

579

themes = {

580

'original': None, # No color scheme = original colors

581

'sepia': create_sepia_scheme(),

582

'blueprint': create_blueprint_scheme(),

583

'dark': pdfium.PdfColorScheme(

584

path_fill=[60, 60, 60, 255],

585

path_stroke=[120, 120, 120, 255],

586

text_fill=[200, 200, 200, 255],

587

text_stroke=[240, 240, 240, 255]

588

)

589

}

590

591

backgrounds = {

592

'original': (255, 255, 255, 255), # White

593

'sepia': (245, 235, 215, 255), # Antique white

594

'blueprint': (25, 25, 112, 255), # Dark blue

595

'dark': (20, 20, 20, 255) # Very dark gray

596

}

597

598

for theme_name, color_scheme in themes.items():

599

print(f"Rendering {theme_name} theme...")

600

601

bitmap = page.render(

602

scale=2.0,

603

color_scheme=color_scheme,

604

fill_color=backgrounds[theme_name]

605

)

606

607

pil_image = bitmap.to_pil()

608

pil_image.save(f"{output_prefix}_{theme_name}.png")

609

610

print(f"Saved: {output_prefix}_{theme_name}.png")

611

612

# Usage

613

pdf = pdfium.PdfDocument("document.pdf")

614

page = pdf[0]

615

render_with_multiple_themes(page, "themed_page")

616

617

# Batch process with custom theme

618

def batch_render_with_theme(pdf_path, color_scheme, output_dir):

619

"""Render all pages with custom color scheme."""

620

import os

621

622

pdf = pdfium.PdfDocument(pdf_path)

623

os.makedirs(output_dir, exist_ok=True)

624

625

for i, page in enumerate(pdf):

626

bitmap = page.render(

627

scale=1.5,

628

color_scheme=color_scheme,

629

fill_color=(245, 245, 245, 255) # Light background

630

)

631

632

pil_image = bitmap.to_pil()

633

filename = f"page_{i+1:03d}_themed.png"

634

filepath = os.path.join(output_dir, filename)

635

pil_image.save(filepath)

636

637

print(f"Rendered page {i+1} with custom theme")

638

639

pdf.close()

640

641

# Apply sepia theme to entire document

642

sepia_theme = create_sepia_scheme()

643

batch_render_with_theme("document.pdf", sepia_theme, "sepia_output/")

644

```