or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

advanced.mdattachments.mdcontent-streams.mdcore-operations.mdencryption.mdforms.mdimages.mdindex.mdmetadata.mdobjects.mdoutlines.mdpages.md

advanced.mddocs/

0

# Advanced Operations

1

2

Specialized operations including matrix transformations, coordinate systems, job interface, and tree structures for advanced PDF manipulation. These capabilities enable sophisticated PDF processing and analysis workflows.

3

4

## Capabilities

5

6

### Matrix Class

7

8

2D transformation matrix for coordinate transformations and geometric operations.

9

10

```python { .api }

11

class Matrix:

12

"""

13

PDF transformation matrix for geometric operations.

14

15

Represents a 2D transformation matrix with 6 elements in the form:

16

[a b c d e f] which corresponds to the transformation:

17

x' = a*x + c*y + e

18

y' = b*x + d*y + f

19

20

Used for scaling, rotation, translation, and skewing operations.

21

"""

22

23

def __init__(self, a: float = 1, b: float = 0, c: float = 0,

24

d: float = 1, e: float = 0, f: float = 0) -> None:

25

"""

26

Create a transformation matrix with specified elements.

27

28

Parameters:

29

- a (float): X-scaling component

30

- b (float): Y-skewing component

31

- c (float): X-skewing component

32

- d (float): Y-scaling component

33

- e (float): X-translation component

34

- f (float): Y-translation component

35

36

Default creates an identity matrix (no transformation).

37

"""

38

39

@staticmethod

40

def identity() -> Matrix:

41

"""

42

Create an identity matrix that performs no transformation.

43

44

Returns:

45

Matrix: Identity matrix [1 0 0 1 0 0]

46

"""

47

48

def translated(self, dx: float, dy: float) -> Matrix:

49

"""

50

Create a new matrix with translation applied.

51

52

Parameters:

53

- dx (float): Translation distance in X direction

54

- dy (float): Translation distance in Y direction

55

56

Returns:

57

Matrix: New matrix with translation transformation

58

"""

59

60

def scaled(self, sx: float, sy: float = None) -> Matrix:

61

"""

62

Create a new matrix with scaling applied.

63

64

Parameters:

65

- sx (float): Scale factor in X direction

66

- sy (float, optional): Scale factor in Y direction (defaults to sx for uniform scaling)

67

68

Returns:

69

Matrix: New matrix with scaling transformation

70

"""

71

72

def rotated(self, angle_degrees: float) -> Matrix:

73

"""

74

Create a new matrix with rotation applied.

75

76

Parameters:

77

- angle_degrees (float): Rotation angle in degrees (positive = counterclockwise)

78

79

Returns:

80

Matrix: New matrix with rotation transformation

81

"""

82

83

def inverse(self) -> Matrix:

84

"""

85

Calculate the inverse of this transformation matrix.

86

87

Returns:

88

Matrix: Inverse transformation matrix

89

90

Raises:

91

ValueError: If matrix is not invertible (determinant is zero)

92

"""

93

94

def transform(self, point: tuple[float, float]) -> tuple[float, float]:

95

"""

96

Transform a point using this matrix.

97

98

Parameters:

99

- point (tuple[float, float]): Point coordinates (x, y)

100

101

Returns:

102

tuple[float, float]: Transformed point coordinates (x', y')

103

"""

104

105

def __mul__(self, other: Matrix) -> Matrix:

106

"""

107

Matrix multiplication (composition of transformations).

108

109

Parameters:

110

- other (Matrix): Matrix to multiply with

111

112

Returns:

113

Matrix: Result of matrix multiplication

114

"""

115

116

@property

117

def a(self) -> float:

118

"""X-scaling component of the transformation."""

119

120

@property

121

def b(self) -> float:

122

"""Y-skewing component of the transformation."""

123

124

@property

125

def c(self) -> float:

126

"""X-skewing component of the transformation."""

127

128

@property

129

def d(self) -> float:

130

"""Y-scaling component of the transformation."""

131

132

@property

133

def e(self) -> float:

134

"""X-translation component of the transformation."""

135

136

@property

137

def f(self) -> float:

138

"""Y-translation component of the transformation."""

139

```

140

141

### Job Interface

142

143

Command-line job interface providing access to qpdf functionality.

144

145

```python { .api }

146

class Job:

147

"""

148

Command-line job interface for advanced PDF operations.

149

150

Provides access to qpdf's command-line functionality through

151

a programmatic interface, enabling complex PDF processing workflows.

152

"""

153

154

def run(self) -> int:

155

"""

156

Execute the configured job.

157

158

Returns:

159

int: Exit code (0 for success, non-zero for failure)

160

"""

161

162

def check_configuration(self) -> bool:

163

"""

164

Validate the job configuration without executing.

165

166

Returns:

167

bool: True if configuration is valid

168

169

Raises:

170

JobUsageError: If configuration has errors

171

"""

172

173

def create_pdf(self) -> Pdf:

174

"""

175

Create a PDF object from the job configuration.

176

177

Returns:

178

Pdf: PDF object created by the job

179

180

Raises:

181

JobUsageError: If job doesn't create a PDF

182

"""

183

184

def write_pdf(self, pdf: Pdf) -> None:

185

"""

186

Write a PDF using the job's output configuration.

187

188

Parameters:

189

- pdf (Pdf): PDF to write using job settings

190

"""

191

192

@property

193

def creates_output(self) -> bool:

194

"""

195

Whether this job creates output files.

196

197

Returns:

198

bool: True if job will create output

199

"""

200

201

@property

202

def has_warnings(self) -> bool:

203

"""

204

Whether the job execution produced warnings.

205

206

Returns:

207

bool: True if warnings were generated

208

"""

209

210

@property

211

def exit_code(self) -> int:

212

"""

213

Exit code from the last job execution.

214

215

Returns:

216

int: Exit code (0 = success)

217

"""

218

219

@staticmethod

220

def json_out_schema() -> dict:

221

"""

222

Get the JSON schema for job output format.

223

224

Returns:

225

dict: JSON schema describing output structure

226

"""

227

228

@staticmethod

229

def job_json_schema() -> dict:

230

"""

231

Get the JSON schema for job configuration format.

232

233

Returns:

234

dict: JSON schema describing job configuration structure

235

"""

236

```

237

238

### Tree Structures

239

240

Specialized tree data structures for PDF name trees and number trees.

241

242

```python { .api }

243

class NameTree:

244

"""

245

PDF name tree structure for sorted key-value storage.

246

247

Name trees provide efficient storage and retrieval of key-value pairs

248

where keys are byte strings sorted in lexical order.

249

250

Implements MutableMapping[bytes, Object] interface.

251

"""

252

253

@staticmethod

254

def new(pdf: Pdf) -> NameTree:

255

"""

256

Create a new empty name tree.

257

258

Parameters:

259

- pdf (Pdf): PDF document to create the tree in

260

261

Returns:

262

NameTree: New empty name tree

263

"""

264

265

def __len__(self) -> int:

266

"""Number of entries in the name tree."""

267

268

def __iter__(self) -> Iterator[bytes]:

269

"""Iterate over keys in the name tree."""

270

271

def __getitem__(self, key: bytes) -> Object:

272

"""

273

Get value by key.

274

275

Parameters:

276

- key (bytes): Key to look up

277

278

Returns:

279

Object: Value associated with the key

280

281

Raises:

282

KeyError: If key is not found

283

"""

284

285

def __setitem__(self, key: bytes, value: Object) -> None:

286

"""

287

Set key-value pair.

288

289

Parameters:

290

- key (bytes): Key for the entry

291

- value (Object): Value to store

292

"""

293

294

def __delitem__(self, key: bytes) -> None:

295

"""

296

Delete entry by key.

297

298

Parameters:

299

- key (bytes): Key to delete

300

301

Raises:

302

KeyError: If key is not found

303

"""

304

305

def __contains__(self, key: bytes) -> bool:

306

"""Check if key exists in the tree."""

307

308

class NumberTree:

309

"""

310

PDF number tree structure for sorted numeric key-value storage.

311

312

Number trees provide efficient storage and retrieval of key-value pairs

313

where keys are integers sorted in numeric order.

314

315

Implements MutableMapping[int, Object] interface.

316

"""

317

318

@staticmethod

319

def new(pdf: Pdf) -> NumberTree:

320

"""

321

Create a new empty number tree.

322

323

Parameters:

324

- pdf (Pdf): PDF document to create the tree in

325

326

Returns:

327

NumberTree: New empty number tree

328

"""

329

330

def __len__(self) -> int:

331

"""Number of entries in the number tree."""

332

333

def __iter__(self) -> Iterator[int]:

334

"""Iterate over keys in the number tree."""

335

336

def __getitem__(self, key: int) -> Object:

337

"""

338

Get value by numeric key.

339

340

Parameters:

341

- key (int): Numeric key to look up

342

343

Returns:

344

Object: Value associated with the key

345

346

Raises:

347

KeyError: If key is not found

348

"""

349

350

def __setitem__(self, key: int, value: Object) -> None:

351

"""

352

Set key-value pair.

353

354

Parameters:

355

- key (int): Numeric key for the entry

356

- value (Object): Value to store

357

"""

358

359

def __delitem__(self, key: int) -> None:

360

"""

361

Delete entry by numeric key.

362

363

Parameters:

364

- key (int): Key to delete

365

366

Raises:

367

KeyError: If key is not found

368

"""

369

370

def __contains__(self, key: int) -> bool:

371

"""Check if numeric key exists in the tree."""

372

```

373

374

### Coordinate Transformation Utilities

375

376

Helper functions for working with coordinate systems and transformations.

377

378

```python { .api }

379

def get_objects_with_ctm(pdf: Pdf) -> list[tuple[Object, Matrix]]:

380

"""

381

Find objects with coordinate transformation matrices (CTM).

382

383

Scans the PDF for objects that have associated transformation

384

matrices, useful for analyzing coordinate system changes.

385

386

Parameters:

387

- pdf (Pdf): PDF document to analyze

388

389

Returns:

390

list[tuple[Object, Matrix]]: List of (object, transformation_matrix) pairs

391

"""

392

```

393

394

### Settings and Configuration

395

396

Global pikepdf configuration functions for controlling behavior.

397

398

```python { .api }

399

def get_decimal_precision() -> int:

400

"""

401

Get the current decimal precision for floating-point output.

402

403

Controls how many decimal places are used when writing

404

floating-point numbers to PDF files.

405

406

Returns:

407

int: Current precision (number of decimal places)

408

"""

409

410

def set_decimal_precision(precision: int) -> None:

411

"""

412

Set the decimal precision for floating-point output.

413

414

Parameters:

415

- precision (int): Number of decimal places (typically 2-6)

416

417

Raises:

418

ValueError: If precision is out of valid range

419

"""

420

421

def set_flate_compression_level(level: int) -> None:

422

"""

423

Set the compression level for Flate (deflate) streams.

424

425

Controls the trade-off between compression speed and compression ratio

426

when compressing PDF streams using Flate encoding.

427

428

Parameters:

429

- level (int): Compression level (0-9, where 0=no compression, 9=maximum compression)

430

431

Raises:

432

ValueError: If level is out of valid range (0-9)

433

"""

434

```

435

436

### Helper Classes

437

438

Utility classes for advanced PDF object manipulation.

439

440

```python { .api }

441

class ObjectHelper:

442

"""

443

Helper class for PDF object operations.

444

445

Provides utility methods for advanced object manipulation

446

and analysis that don't fit into the main object classes.

447

"""

448

449

# Note: Specific methods would be documented based on actual implementation

450

# This class provides low-level object utilities

451

```

452

453

## Usage Examples

454

455

### Matrix Transformations

456

457

```python

458

import pikepdf

459

import math

460

461

# Create various transformation matrices

462

identity = pikepdf.Matrix.identity()

463

print(f"Identity matrix: [{identity.a}, {identity.b}, {identity.c}, {identity.d}, {identity.e}, {identity.f}]")

464

465

# Translation

466

translate = pikepdf.Matrix().translated(100, 50)

467

print(f"Translation (100, 50): [{translate.a}, {translate.b}, {translate.c}, {translate.d}, {translate.e}, {translate.f}]")

468

469

# Scaling

470

scale = pikepdf.Matrix().scaled(2.0, 1.5) # 2x width, 1.5x height

471

print(f"Scaling (2.0, 1.5): [{scale.a}, {scale.b}, {scale.c}, {scale.d}, {scale.e}, {scale.f}]")

472

473

# Rotation (45 degrees)

474

rotate = pikepdf.Matrix().rotated(45)

475

print(f"Rotation 45°: [{rotate.a:.3f}, {rotate.b:.3f}, {rotate.c:.3f}, {rotate.d:.3f}, {rotate.e}, {rotate.f}]")

476

477

# Combined transformation: scale, then rotate, then translate

478

combined = pikepdf.Matrix().scaled(1.5, 1.5).rotated(30).translated(100, 200)

479

print(f"Combined transform: [{combined.a:.3f}, {combined.b:.3f}, {combined.c:.3f}, {combined.d:.3f}, {combined.e:.1f}, {combined.f:.1f}]")

480

481

# Transform points

482

original_point = (10, 20)

483

transformed_point = combined.transform(original_point)

484

print(f"Point {original_point} -> {transformed_point}")

485

486

# Matrix multiplication (composition)

487

m1 = pikepdf.Matrix().scaled(2, 2)

488

m2 = pikepdf.Matrix().rotated(90)

489

m3 = m1 * m2 # Apply m1 first, then m2

490

print(f"Matrix multiplication result: [{m3.a:.3f}, {m3.b:.3f}, {m3.c:.3f}, {m3.d:.3f}, {m3.e}, {m3.f}]")

491

492

# Inverse transformation

493

original_matrix = pikepdf.Matrix().scaled(2, 3).translated(10, 15)

494

inverse_matrix = original_matrix.inverse()

495

496

# Verify inverse (should return original point)

497

point = (5, 7)

498

transformed = original_matrix.transform(point)

499

back_to_original = inverse_matrix.transform(transformed)

500

print(f"Original: {point}, Transformed: {transformed}, Back: {back_to_original}")

501

```

502

503

### Applying Transformations to PDF Content

504

505

```python

506

import pikepdf

507

508

def apply_transformation_to_page(page, matrix):

509

"""Apply a transformation matrix to all content on a page."""

510

511

# Get existing content

512

if '/Contents' in page:

513

existing_content = page['/Contents']

514

515

# Create transformation commands

516

transform_commands = f"""

517

q

518

{matrix.a} {matrix.b} {matrix.c} {matrix.d} {matrix.e} {matrix.f} cm

519

"""

520

521

restore_commands = "\nQ"

522

523

# Wrap existing content with transformation

524

if isinstance(existing_content, pikepdf.Array):

525

# Multiple content streams

526

transform_stream = pikepdf.Stream(page.owner, transform_commands.encode())

527

restore_stream = pikepdf.Stream(page.owner, restore_commands.encode())

528

529

new_contents = pikepdf.Array([transform_stream])

530

new_contents.extend(existing_content)

531

new_contents.append(restore_stream)

532

533

page['/Contents'] = new_contents

534

else:

535

# Single content stream

536

new_content = transform_commands.encode() + existing_content.read_bytes() + restore_commands.encode()

537

page['/Contents'] = pikepdf.Stream(page.owner, new_content)

538

539

# Apply transformation to a PDF page

540

pdf = pikepdf.open('document.pdf')

541

page = pdf.pages[0]

542

543

# Create a transformation matrix (rotate 15 degrees and scale 90%)

544

transform_matrix = pikepdf.Matrix().rotated(15).scaled(0.9, 0.9)

545

546

# Apply transformation

547

apply_transformation_to_page(page, transform_matrix)

548

549

pdf.save('transformed_document.pdf')

550

pdf.close()

551

print("Applied transformation to page content")

552

```

553

554

### Working with Name and Number Trees

555

556

```python

557

import pikepdf

558

559

# Create a PDF with name tree

560

pdf = pikepdf.new()

561

562

# Create a name tree for storing named destinations

563

name_tree = pikepdf.NameTree.new(pdf)

564

565

# Add entries to the name tree

566

destinations = {

567

b'chapter1': pikepdf.Array([pdf.pages[0], pikepdf.Name.Fit]),

568

b'section1.1': pikepdf.Array([pdf.pages[0], pikepdf.Name.FitH, 700]),

569

b'appendix': pikepdf.Array([pdf.pages[0], pikepdf.Name.FitV, 100]),

570

}

571

572

for name, destination in destinations.items():

573

name_tree[name] = destination

574

print(f"Added destination: {name.decode()} -> {destination}")

575

576

print(f"Name tree contains {len(name_tree)} entries")

577

578

# Iterate through name tree

579

print("All entries in name tree:")

580

for key in name_tree:

581

value = name_tree[key]

582

print(f" {key.decode()}: {value}")

583

584

# Create a number tree for page labels

585

number_tree = pikepdf.NumberTree.new(pdf)

586

587

# Add page labels (page number -> label format)

588

page_labels = {

589

0: pikepdf.Dictionary({'/S': pikepdf.Name.r}), # Roman numerals

590

5: pikepdf.Dictionary({'/S': pikepdf.Name.D, '/P': pikepdf.String('Page ')}), # Decimal with prefix

591

10: pikepdf.Dictionary({'/S': pikepdf.Name.a}), # Lowercase letters

592

}

593

594

for page_num, label_dict in page_labels.items():

595

number_tree[page_num] = label_dict

596

print(f"Added page label: Page {page_num} -> {label_dict}")

597

598

print(f"Number tree contains {len(number_tree)} entries")

599

600

# Save PDF with trees

601

pdf.save('document_with_trees.pdf')

602

pdf.close()

603

```

604

605

### Advanced Job Interface Usage

606

607

```python

608

import pikepdf

609

import json

610

611

def process_pdf_with_job_interface(input_pdf, output_pdf, operations):

612

"""Use job interface for complex PDF processing."""

613

614

try:

615

# Create a job configuration

616

job_config = {

617

'inputFile': input_pdf,

618

'outputFile': output_pdf,

619

'staticId': True, # Reproducible output

620

'deterministicId': True,

621

'operations': operations

622

}

623

624

# Create job from configuration

625

job = pikepdf.Job()

626

627

# Configure job (this is simplified - actual API may differ)

628

# In practice, you'd use specific job configuration methods

629

630

# Validate configuration

631

if job.check_configuration():

632

print("Job configuration is valid")

633

634

# Execute the job

635

exit_code = job.run()

636

637

if exit_code == 0:

638

print(f"Job completed successfully: {input_pdf} -> {output_pdf}")

639

640

if job.has_warnings:

641

print("Job completed with warnings")

642

643

return True

644

else:

645

print(f"Job failed with exit code: {exit_code}")

646

return False

647

else:

648

print("Job configuration is invalid")

649

return False

650

651

except pikepdf.JobUsageError as e:

652

print(f"Job usage error: {e}")

653

return False

654

655

# Example job operations

656

operations = [

657

{'operation': 'qdf', 'parameters': {}}, # Convert to QDF format for inspection

658

{'operation': 'optimize', 'parameters': {'compress-streams': True}},

659

{'operation': 'linearize', 'parameters': {}} # Linearize for fast web view

660

]

661

662

# Process PDF with job interface

663

# success = process_pdf_with_job_interface('input.pdf', 'output.pdf', operations)

664

```

665

666

### Configuration and Settings Management

667

668

```python

669

import pikepdf

670

671

def configure_pikepdf_settings():

672

"""Configure pikepdf global settings for optimal performance."""

673

674

# Get current settings

675

current_precision = pikepdf.settings.get_decimal_precision()

676

print(f"Current decimal precision: {current_precision}")

677

678

# Set precision for clean output (fewer decimal places)

679

pikepdf.settings.set_decimal_precision(3)

680

print("Set decimal precision to 3 places")

681

682

# Set compression level for optimal balance of speed and size

683

pikepdf.settings.set_flate_compression_level(6) # Medium compression

684

print("Set Flate compression level to 6 (medium)")

685

686

# Verify settings

687

new_precision = pikepdf.settings.get_decimal_precision()

688

print(f"New decimal precision: {new_precision}")

689

690

def create_optimized_pdf():

691

"""Create a PDF with optimized settings."""

692

693

# Configure settings for clean, compact output

694

configure_pikepdf_settings()

695

696

# Create PDF

697

pdf = pikepdf.new()

698

page = pdf.add_blank_page()

699

700

# Add content with floating-point coordinates

701

content = """

702

BT

703

/F1 12 Tf

704

100.123456789 700.987654321 Td

705

(Optimized PDF with controlled precision) Tj

706

ET

707

"""

708

709

content_stream = pikepdf.Stream(pdf, content.encode())

710

page['/Contents'] = content_stream

711

712

# Save with compression and optimization

713

pdf.save('optimized_output.pdf',

714

compress_streams=True,

715

normalize_content=True)

716

pdf.close()

717

718

print("Created optimized PDF with controlled precision and compression")

719

720

# Configure and create optimized PDF

721

# create_optimized_pdf()

722

```

723

724

### Advanced Object Analysis

725

726

```python

727

import pikepdf

728

729

def analyze_object_relationships(pdf_path):

730

"""Analyze complex object relationships in a PDF."""

731

732

pdf = pikepdf.open(pdf_path)

733

734

analysis = {

735

'total_objects': len(pdf.objects),

736

'object_types': {},

737

'indirect_objects': 0,

738

'shared_objects': {},

739

'complex_structures': {}

740

}

741

742

# Analyze all objects

743

for (obj_id, gen), obj in pdf.objects.items():

744

# Count object types

745

obj_type = str(obj._type_code)

746

analysis['object_types'][obj_type] = analysis['object_types'].get(obj_type, 0) + 1

747

748

if obj.is_indirect:

749

analysis['indirect_objects'] += 1

750

751

# Find shared objects (referenced multiple times)

752

if obj.is_indirect:

753

# Count references (this is simplified - would need full PDF traversal)

754

analysis['shared_objects'][f"{obj_id}/{gen}"] = {

755

'type': obj_type,

756

'size': len(str(obj)) if hasattr(obj, '__str__') else 0

757

}

758

759

# Find coordinate transformation matrices

760

try:

761

ctm_objects = pikepdf.get_objects_with_ctm(pdf)

762

analysis['complex_structures']['objects_with_ctm'] = len(ctm_objects)

763

764

print(f"Found {len(ctm_objects)} objects with coordinate transformations:")

765

for obj, matrix in ctm_objects[:5]: # Show first 5

766

print(f" Object {obj}: Matrix [{matrix.a:.2f}, {matrix.b:.2f}, {matrix.c:.2f}, {matrix.d:.2f}, {matrix.e:.2f}, {matrix.f:.2f}]")

767

768

except Exception as e:

769

print(f"Could not analyze CTM objects: {e}")

770

771

pdf.close()

772

773

print(f"\nPDF Object Analysis for {pdf_path}:")

774

print(f"Total objects: {analysis['total_objects']}")

775

print(f"Indirect objects: {analysis['indirect_objects']}")

776

777

print(f"\nObject types:")

778

for obj_type, count in sorted(analysis['object_types'].items()):

779

print(f" {obj_type}: {count}")

780

781

return analysis

782

783

# Analyze object relationships

784

# analysis = analyze_object_relationships('complex_document.pdf')

785

```

786

787

### Performance Optimization Techniques

788

789

```python

790

import pikepdf

791

import time

792

from pathlib import Path

793

794

def benchmark_pdf_operations(pdf_path):

795

"""Benchmark various PDF operations for performance analysis."""

796

797

operations = {}

798

799

# Time PDF opening

800

start_time = time.time()

801

pdf = pikepdf.open(pdf_path)

802

operations['open'] = time.time() - start_time

803

804

# Time page access

805

start_time = time.time()

806

page_count = len(pdf.pages)

807

first_page = pdf.pages[0] if page_count > 0 else None

808

operations['page_access'] = time.time() - start_time

809

810

# Time content parsing

811

if first_page:

812

start_time = time.time()

813

try:

814

instructions = pikepdf.parse_content_stream(first_page)

815

operations['content_parsing'] = time.time() - start_time

816

operations['instruction_count'] = len(instructions)

817

except Exception as e:

818

operations['content_parsing'] = f"Failed: {e}"

819

820

# Time object iteration

821

start_time = time.time()

822

object_count = len(pdf.objects)

823

operations['object_iteration'] = time.time() - start_time

824

operations['object_count'] = object_count

825

826

# Time save operation

827

output_path = Path(pdf_path).with_suffix('.benchmark.pdf')

828

start_time = time.time()

829

pdf.save(str(output_path))

830

operations['save'] = time.time() - start_time

831

832

pdf.close()

833

834

# Clean up benchmark file

835

if output_path.exists():

836

output_path.unlink()

837

838

print(f"Performance Benchmark for {pdf_path}:")

839

print(f" Open: {operations['open']:.3f}s")

840

print(f" Page access ({page_count} pages): {operations['page_access']:.3f}s")

841

if 'content_parsing' in operations:

842

if isinstance(operations['content_parsing'], str):

843

print(f" Content parsing: {operations['content_parsing']}")

844

else:

845

print(f" Content parsing ({operations.get('instruction_count', 0)} instructions): {operations['content_parsing']:.3f}s")

846

print(f" Object iteration ({object_count} objects): {operations['object_iteration']:.3f}s")

847

print(f" Save: {operations['save']:.3f}s")

848

849

return operations

850

851

def optimize_pdf_processing():

852

"""Demonstrate techniques for optimizing PDF processing performance."""

853

854

# Configure for optimal performance

855

pikepdf.settings.set_decimal_precision(2) # Reduce precision for speed

856

pikepdf.settings.set_flate_compression_level(1) # Fast compression

857

858

print("Configured pikepdf for performance:")

859

print(f" Decimal precision: {pikepdf.settings.get_decimal_precision()}")

860

print(" Compression level: 1 (fast)")

861

862

# Performance tips:

863

print("\nPerformance optimization tips:")

864

print("1. Use access_mode=pikepdf.AccessMode.mmap for large files")

865

print("2. Set suppress_warnings=True to reduce overhead")

866

print("3. Use static_id=True for reproducible output without timestamp overhead")

867

print("4. Consider stream_decode_level for controlling decoding complexity")

868

print("5. Process pages in batches for large documents")

869

print("6. Cache parsed content streams if reusing")

870

print("7. Use pikepdf.new() instead of opening/clearing for new documents")

871

872

# Run performance analysis

873

# if Path('document.pdf').exists():

874

# benchmark_pdf_operations('document.pdf')

875

876

optimize_pdf_processing()

877

```