or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

annotations-forms.mddocument-creation-modification.mddocument-operations.mddocument-rendering.mdgeometry-transformations.mdindex.mdpage-content-extraction.mdtable-extraction.md

document-creation-modification.mddocs/

0

# Document Creation and Modification

1

2

Creating new documents and modifying existing ones including page insertion, deletion, content manipulation, and adding text, images, and other content elements. PyMuPDF provides comprehensive tools for both creating documents from scratch and programmatically modifying existing documents.

3

4

## Capabilities

5

6

### New Document Creation

7

8

Create new PDF documents with custom page sizes and properties.

9

10

```python { .api }

11

def open() -> Document:

12

"""

13

Create new empty document.

14

15

Returns:

16

New Document object

17

"""

18

19

class Document:

20

def new_page(self, pno: int = -1, width: float = 595, height: float = 842) -> Page:

21

"""

22

Create a new page in the document.

23

24

Parameters:

25

- pno: insertion position (-1 to append at end)

26

- width: page width in points (default: A4 width)

27

- height: page height in points (default: A4 height)

28

29

Returns:

30

New Page object

31

"""

32

33

def insert_page(self, pno: int, text: str = None, fontsize: int = 11,

34

width: float = 595, height: float = 842, **kwargs) -> Page:

35

"""

36

Insert page with optional text content.

37

38

Parameters:

39

- pno: insertion position

40

- text: initial text content

41

- fontsize: text font size

42

- width: page width in points

43

- height: page height in points

44

- fontname: font name

45

- fontfile: path to font file

46

- color: text color as RGB list

47

48

Returns:

49

New Page object

50

"""

51

```

52

53

### Text Insertion and Formatting

54

55

Add text content to pages with comprehensive formatting options.

56

57

```python { .api }

58

class Page:

59

def insert_text(self, point: Point, text: str, fontsize: float = 11,

60

fontname: str = "helv", fontfile: str = None,

61

set_simple: bool = False, encoding: int = 0,

62

color: list = None, fill: list = None,

63

render_mode: int = 0, border_width: float = 1,

64

rotate: int = 0, morph: tuple = None,

65

stroke_opacity: float = 1, fill_opacity: float = 1,

66

oc: int = 0) -> int:

67

"""

68

Insert text at specified position.

69

70

Parameters:

71

- point: insertion point (bottom-left of text)

72

- text: text content to insert

73

- fontsize: font size in points

74

- fontname: font name ("helv", "times", "cour", etc.)

75

- fontfile: path to external font file

76

- set_simple: use simple font encoding

77

- encoding: text encoding (0=Latin, 1=Greek, 2=Cyrillic)

78

- color: text color as RGB list [r, g, b]

79

- fill: fill color as RGB list

80

- render_mode: text rendering mode (0=fill, 1=stroke, 2=fill+stroke, etc.)

81

- border_width: stroke width for outlined text

82

- rotate: rotation angle in degrees

83

- morph: morphing parameters (point, matrix) tuple

84

- stroke_opacity: stroke opacity (0-1)

85

- fill_opacity: fill opacity (0-1)

86

- oc: optional content group reference

87

88

Returns:

89

Number of successfully inserted characters

90

"""

91

92

def insert_textbox(self, rect: Rect, buffer: str, fontsize: float = 11,

93

fontname: str = "helv", fontfile: str = None,

94

set_simple: bool = False, encoding: int = 0,

95

color: list = None, fill: list = None,

96

render_mode: int = 0, border_width: float = 1,

97

lineheight: float = None, align: int = 0,

98

rotate: int = 0, morph: tuple = None,

99

stroke_opacity: float = 1, fill_opacity: float = 1,

100

oc: int = 0, expandtabs: int = 8) -> float:

101

"""

102

Insert text within a rectangle with automatic wrapping.

103

104

Parameters:

105

- rect: rectangle to contain text

106

- buffer: text content

107

- fontsize: font size in points

108

- fontname: font name

109

- fontfile: path to external font file

110

- set_simple: use simple font encoding

111

- encoding: text encoding

112

- color: text color as RGB list

113

- fill: fill color as RGB list

114

- render_mode: text rendering mode

115

- border_width: stroke width

116

- lineheight: line height multiplier

117

- align: text alignment (0=left, 1=center, 2=right, 3=justify)

118

- rotate: rotation angle

119

- morph: morphing parameters

120

- stroke_opacity: stroke opacity

121

- fill_opacity: fill opacity

122

- oc: optional content group reference

123

- expandtabs: tab expansion size

124

125

Returns:

126

Unused vertical space in rectangle

127

"""

128

```

129

130

### Image Insertion

131

132

Add images to pages with positioning and scaling options.

133

134

```python { .api }

135

class Page:

136

def insert_image(self, rect: Rect, filename: str = None,

137

stream: bytes = None, pixmap: Pixmap = None,

138

mask: Pixmap = None, rotate: int = 0,

139

xref: int = 0, oc: int = 0, keep_proportion: bool = True,

140

overlay: bool = True, alpha: int = -1) -> int:

141

"""

142

Insert image into page.

143

144

Parameters:

145

- rect: target rectangle for image

146

- filename: path to image file

147

- stream: image data as bytes

148

- pixmap: Pixmap object to insert

149

- mask: optional mask Pixmap for transparency

150

- rotate: rotation angle (0, 90, 180, 270)

151

- xref: reuse existing image by xref number

152

- oc: optional content group reference

153

- keep_proportion: maintain image aspect ratio

154

- overlay: draw as overlay (True) or underlay (False)

155

- alpha: alpha/transparency value (0-255, -1 for automatic)

156

157

Returns:

158

Cross-reference number of inserted image

159

"""

160

```

161

162

### Drawing Operations

163

164

Add vector graphics and shapes to pages.

165

166

```python { .api }

167

class Shape:

168

def __init__(self, page: Page):

169

"""

170

Create shape drawing context for page.

171

172

Parameters:

173

- page: target Page object

174

"""

175

176

def draw_line(self, p1: Point, p2: Point) -> Point:

177

"""

178

Draw line between two points.

179

180

Parameters:

181

- p1: start point

182

- p2: end point

183

184

Returns:

185

End point for chaining

186

"""

187

188

def draw_bezier(self, p1: Point, p2: Point, p3: Point, p4: Point) -> Point:

189

"""

190

Draw cubic Bezier curve.

191

192

Parameters:

193

- p1: start point

194

- p2: first control point

195

- p3: second control point

196

- p4: end point

197

198

Returns:

199

End point for chaining

200

"""

201

202

def draw_rect(self, rect: Rect) -> Point:

203

"""

204

Draw rectangle.

205

206

Parameters:

207

- rect: rectangle to draw

208

209

Returns:

210

Bottom-right corner point

211

"""

212

213

def draw_oval(self, rect: Rect) -> Point:

214

"""

215

Draw oval/ellipse within rectangle.

216

217

Parameters:

218

- rect: bounding rectangle

219

220

Returns:

221

Bottom-right corner point

222

"""

223

224

def draw_circle(self, center: Point, radius: float) -> Point:

225

"""

226

Draw circle.

227

228

Parameters:

229

- center: circle center point

230

- radius: circle radius

231

232

Returns:

233

Center point

234

"""

235

236

def draw_sector(self, center: Point, point: Point, angle: float) -> Point:

237

"""

238

Draw circular sector.

239

240

Parameters:

241

- center: sector center

242

- point: radius end point

243

- angle: sector angle in degrees

244

245

Returns:

246

Center point

247

"""

248

249

def draw_polyline(self, points: list) -> Point:

250

"""

251

Draw connected line segments.

252

253

Parameters:

254

- points: list of Point objects

255

256

Returns:

257

Last point

258

"""

259

260

def draw_polygon(self, points: list) -> Point:

261

"""

262

Draw closed polygon.

263

264

Parameters:

265

- points: list of Point objects defining vertices

266

267

Returns:

268

First point

269

"""

270

271

def draw_squiggle(self, p1: Point, p2: Point, breadth: float = 2) -> Point:

272

"""

273

Draw squiggly line (wavy underline).

274

275

Parameters:

276

- p1: start point

277

- p2: end point

278

- breadth: wave amplitude

279

280

Returns:

281

End point

282

"""

283

284

def finish(self, fill: list = None, color: list = None,

285

dashes: str = None, even_odd: bool = False,

286

closePath: bool = False, lineJoin: int = 0,

287

lineCap: int = 0, width: float = 1,

288

stroke_opacity: float = 1, fill_opacity: float = 1,

289

oc: int = 0) -> None:

290

"""

291

Apply styling and finalize drawing operations.

292

293

Parameters:

294

- fill: fill color as RGB list

295

- color: stroke color as RGB list

296

- dashes: dash pattern string

297

- even_odd: use even-odd fill rule

298

- closePath: close the current path

299

- lineJoin: line join style (0=miter, 1=round, 2=bevel)

300

- lineCap: line cap style (0=butt, 1=round, 2=square)

301

- width: line width

302

- stroke_opacity: stroke opacity (0-1)

303

- fill_opacity: fill opacity (0-1)

304

- oc: optional content group reference

305

"""

306

307

def commit(self, overlay: bool = True) -> None:

308

"""

309

Commit all drawing operations to page.

310

311

Parameters:

312

- overlay: draw as overlay (True) or underlay (False)

313

"""

314

```

315

316

### Page Manipulation

317

318

Modify page properties and content arrangement.

319

320

```python { .api }

321

class Page:

322

def set_rotation(self, rotation: int) -> None:

323

"""

324

Set page rotation.

325

326

Parameters:

327

- rotation: rotation angle (0, 90, 180, 270)

328

"""

329

330

def set_cropbox(self, rect: Rect) -> None:

331

"""

332

Set page crop box.

333

334

Parameters:

335

- rect: new crop box rectangle

336

"""

337

338

def set_mediabox(self, rect: Rect) -> None:

339

"""

340

Set page media box.

341

342

Parameters:

343

- rect: new media box rectangle

344

"""

345

346

def clean_contents(self) -> bool:

347

"""

348

Clean and optimize page content stream.

349

350

Returns:

351

True if changes were made

352

"""

353

354

def wrap_contents(self) -> None:

355

"""Wrap page contents in a balanced way."""

356

```

357

358

### Font Management

359

360

Work with fonts for text insertion and formatting.

361

362

```python { .api }

363

class Font:

364

def __init__(self, fontname: str = "helv", fontfile: str = None,

365

fontbuffer: bytes = None, script: int = 0,

366

language: str = None, ordering: int = -1,

367

is_bold: bool = False, is_italic: bool = False,

368

is_serif: bool = False, embed: bool = True):

369

"""

370

Create or load font object.

371

372

Parameters:

373

- fontname: font name or base14 font identifier

374

- fontfile: path to font file (.ttf, .otf, etc.)

375

- fontbuffer: font data as bytes

376

- script: script identifier for Unicode

377

- language: language code

378

- ordering: CJK font ordering

379

- is_bold: prefer bold variant

380

- is_italic: prefer italic variant

381

- is_serif: prefer serif variant

382

- embed: embed font in PDF

383

"""

384

385

def glyph_advance(self, chr: int, script: int = 0, language: str = None,

386

wmode: int = 0) -> float:

387

"""

388

Get glyph advance width.

389

390

Parameters:

391

- chr: character code

392

- script: script identifier

393

- language: language code

394

- wmode: writing mode (0=horizontal, 1=vertical)

395

396

Returns:

397

Glyph advance width

398

"""

399

400

def glyph_bbox(self, chr: int, script: int = 0, language: str = None,

401

wmode: int = 0) -> Rect:

402

"""

403

Get glyph bounding box.

404

405

Parameters:

406

- chr: character code

407

- script: script identifier

408

- language: language code

409

- wmode: writing mode

410

411

Returns:

412

Glyph bounding rectangle

413

"""

414

415

def text_length(self, text: str, fontsize: float = 11, script: int = 0,

416

language: str = None, wmode: int = 0) -> float:

417

"""

418

Calculate text length in points.

419

420

Parameters:

421

- text: text string

422

- fontsize: font size in points

423

- script: script identifier

424

- language: language code

425

- wmode: writing mode

426

427

Returns:

428

Text width in points

429

"""

430

431

@property

432

def name(self) -> str:

433

"""Font name."""

434

435

@property

436

def flags(self) -> dict:

437

"""Font flags dictionary."""

438

439

@property

440

def bbox(self) -> Rect:

441

"""Font bounding box."""

442

443

@property

444

def is_writable(self) -> bool:

445

"""True if font can be used for text insertion."""

446

```

447

448

### Advanced Text Writing

449

450

Sophisticated text layout and formatting capabilities.

451

452

```python { .api }

453

class TextWriter:

454

def __init__(self, page_rect: Rect, opacity: float = 1, color: list = None):

455

"""

456

Create text writer for advanced text layout.

457

458

Parameters:

459

- page_rect: page rectangle bounds

460

- opacity: text opacity (0-1)

461

- color: default text color as RGB list

462

"""

463

464

def append(self, pos: Point, text: str, font: Font = None,

465

fontsize: float = 11, language: str = None,

466

script: int = 0, wmode: int = 0, bidi_level: int = 0,

467

markup_dir: int = 0, small_caps: bool = False) -> Rect:

468

"""

469

Append text at position.

470

471

Parameters:

472

- pos: text position

473

- text: text content

474

- font: Font object to use

475

- fontsize: font size in points

476

- language: language code for text shaping

477

- script: script identifier

478

- wmode: writing mode (0=horizontal, 1=vertical)

479

- bidi_level: bidirectional text level

480

- markup_dir: markup direction

481

- small_caps: use small capitals

482

483

Returns:

484

Text bounding rectangle

485

"""

486

487

def write_text(self, page: Page, opacity: float = None,

488

color: list = None, oc: int = 0,

489

overlay: bool = True, morph: tuple = None,

490

matrix: Matrix = None, render_mode: int = 0,

491

stroke_opacity: float = 1, fill_opacity: float = 1,

492

stroke_color: list = None) -> None:

493

"""

494

Write accumulated text to page.

495

496

Parameters:

497

- page: target Page object

498

- opacity: text opacity override

499

- color: text color override

500

- oc: optional content group reference

501

- overlay: draw as overlay (True) or underlay (False)

502

- morph: morphing transformation

503

- matrix: additional transformation matrix

504

- render_mode: text rendering mode

505

- stroke_opacity: stroke opacity

506

- fill_opacity: fill opacity

507

- stroke_color: stroke color for outlined text

508

"""

509

510

def fill_textbox(self, rect: Rect, text: str, pos: Point = None,

511

font: Font = None, fontsize: float = 11,

512

lineheight: float = None, align: int = 0,

513

warn: bool = True) -> int:

514

"""

515

Fill rectangle with text and automatic line wrapping.

516

517

Parameters:

518

- rect: containing rectangle

519

- text: text content

520

- pos: starting position within rectangle

521

- font: Font object

522

- fontsize: font size

523

- lineheight: line height multiplier

524

- align: text alignment (0=left, 1=center, 2=right, 3=justify)

525

- warn: warn if text doesn't fit

526

527

Returns:

528

Number of characters that didn't fit

529

"""

530

531

@property

532

def text_rect(self) -> Rect:

533

"""Bounding rectangle of all added text."""

534

535

@property

536

def last_point(self) -> Point:

537

"""Position after last text insertion."""

538

539

@property

540

def opacity(self) -> float:

541

"""Text opacity."""

542

```

543

544

## Usage Examples

545

546

### Creating New Documents

547

548

```python

549

import pymupdf

550

551

# Create new document

552

doc = pymupdf.open()

553

554

# Add pages with different sizes

555

page1 = doc.new_page() # Default A4

556

page2 = doc.new_page(width=792, height=612) # US Letter landscape

557

page3 = doc.new_page(width=297, height=420) # A3

558

559

# Set document metadata

560

doc.set_metadata({

561

"title": "My New Document",

562

"author": "Author Name",

563

"subject": "Document Subject",

564

"creator": "PyMuPDF Script",

565

"producer": "PyMuPDF",

566

"creationDate": pymupdf.get_pdf_now(),

567

"modDate": pymupdf.get_pdf_now()

568

})

569

570

# Save new document

571

doc.save("new_document.pdf")

572

doc.close()

573

```

574

575

### Adding Text Content

576

577

```python

578

import pymupdf

579

580

doc = pymupdf.open()

581

page = doc.new_page()

582

583

# Insert simple text

584

point = pymupdf.Point(50, 750) # Top-left area

585

page.insert_text(point, "Hello, World!", fontsize=16, color=[0, 0, 1])

586

587

# Insert formatted text box

588

rect = pymupdf.Rect(50, 600, 500, 700)

589

text = """This is a longer text that will be automatically wrapped within the

590

specified rectangle. It demonstrates text box functionality with automatic

591

line breaks and formatting options."""

592

593

page.insert_textbox(

594

rect,

595

text,

596

fontsize=12,

597

align=3, # Justified

598

lineheight=1.2,

599

color=[0.2, 0.2, 0.2]

600

)

601

602

# Insert text with custom font

603

try:

604

# Use built-in font

605

page.insert_text(

606

pymupdf.Point(50, 550),

607

"Text with Times font",

608

fontname="times",

609

fontsize=14,

610

color=[1, 0, 0]

611

)

612

except Exception as e:

613

print(f"Font error: {e}")

614

615

doc.save("text_document.pdf")

616

doc.close()

617

```

618

619

### Advanced Text Formatting

620

621

```python

622

import pymupdf

623

624

doc = pymupdf.open()

625

page = doc.new_page()

626

627

# Create TextWriter for advanced text layout

628

writer = pymupdf.TextWriter(page.rect, color=[0, 0, 0])

629

630

# Create custom font

631

font = pymupdf.Font("helv") # Helvetica

632

633

# Add text with different formatting

634

y_pos = 750

635

636

# Title

637

writer.append(

638

pymupdf.Point(50, y_pos),

639

"Document Title",

640

font=font,

641

fontsize=24

642

)

643

y_pos -= 40

644

645

# Subtitle

646

writer.append(

647

pymupdf.Point(50, y_pos),

648

"Subtitle with different formatting",

649

font=font,

650

fontsize=16

651

)

652

y_pos -= 30

653

654

# Body text

655

body_text = "This is body text with normal formatting. "

656

writer.append(

657

pymupdf.Point(50, y_pos),

658

body_text,

659

font=font,

660

fontsize=12

661

)

662

663

# Fill text box with automatic wrapping

664

rect = pymupdf.Rect(50, 500, 550, 650)

665

long_text = """Lorem ipsum dolor sit amet, consectetur adipiscing elit.

666

Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.

667

Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris."""

668

669

overflow = writer.fill_textbox(

670

rect,

671

long_text,

672

font=font,

673

fontsize=11,

674

lineheight=1.4,

675

align=0 # Left aligned

676

)

677

678

if overflow > 0:

679

print(f"Warning: {overflow} characters didn't fit")

680

681

# Write all text to page

682

writer.write_text(page)

683

684

doc.save("advanced_text.pdf")

685

doc.close()

686

```

687

688

### Adding Images

689

690

```python

691

import pymupdf

692

693

doc = pymupdf.open()

694

page = doc.new_page()

695

696

# Insert image from file

697

try:

698

image_rect = pymupdf.Rect(100, 400, 400, 600)

699

page.insert_image(

700

image_rect,

701

filename="sample_image.jpg",

702

keep_proportion=True,

703

overlay=True

704

)

705

except Exception as e:

706

print(f"Image insertion failed: {e}")

707

708

# Create simple colored rectangle as image substitute

709

shape = pymupdf.Shape(page)

710

shape.draw_rect(pymupdf.Rect(100, 200, 400, 350))

711

shape.finish(fill=[0.8, 0.8, 1.0], color=[0, 0, 1], width=2)

712

shape.commit()

713

714

# Add caption

715

page.insert_text(

716

pymupdf.Point(100, 180),

717

"Image Caption",

718

fontsize=10,

719

color=[0.5, 0.5, 0.5]

720

)

721

722

doc.save("document_with_images.pdf")

723

doc.close()

724

```

725

726

### Drawing Shapes and Graphics

727

728

```python

729

import pymupdf

730

731

doc = pymupdf.open()

732

page = doc.new_page()

733

734

# Create shape drawing context

735

shape = pymupdf.Shape(page)

736

737

# Draw various shapes

738

# Rectangle

739

shape.draw_rect(pymupdf.Rect(50, 700, 150, 750))

740

shape.finish(fill=[1, 0, 0], color=[0.5, 0, 0], width=2)

741

742

# Circle

743

shape.draw_circle(pymupdf.Point(250, 725), 25)

744

shape.finish(fill=[0, 1, 0], color=[0, 0.5, 0], width=2)

745

746

# Line

747

shape.draw_line(pymupdf.Point(50, 650), pymupdf.Point(300, 650))

748

shape.finish(color=[0, 0, 1], width=3)

749

750

# Polygon (triangle)

751

triangle_points = [

752

pymupdf.Point(400, 700),

753

pymupdf.Point(450, 750),

754

pymupdf.Point(350, 750)

755

]

756

shape.draw_polygon(triangle_points)

757

shape.finish(fill=[1, 1, 0], color=[0.5, 0.5, 0], width=2)

758

759

# Bezier curve

760

shape.draw_bezier(

761

pymupdf.Point(50, 600),

762

pymupdf.Point(150, 550),

763

pymupdf.Point(250, 550),

764

pymupdf.Point(350, 600)

765

)

766

shape.finish(color=[1, 0, 1], width=3)

767

768

# Commit all shapes

769

shape.commit(overlay=True)

770

771

# Add labels

772

labels = [

773

(pymupdf.Point(100, 680), "Rectangle"),

774

(pymupdf.Point(250, 680), "Circle"),

775

(pymupdf.Point(100, 630), "Line"),

776

(pymupdf.Point(400, 680), "Triangle"),

777

(pymupdf.Point(200, 580), "Bezier Curve")

778

]

779

780

for point, text in labels:

781

page.insert_text(point, text, fontsize=10)

782

783

doc.save("shapes_document.pdf")

784

doc.close()

785

```

786

787

### Creating Forms and Interactive Elements

788

789

```python

790

import pymupdf

791

792

doc = pymupdf.open()

793

page = doc.new_page()

794

795

# Add form title

796

page.insert_text(

797

pymupdf.Point(50, 750),

798

"Sample Form",

799

fontsize=18,

800

color=[0, 0, 0.5]

801

)

802

803

# Create form fields by adding annotations

804

# Text field

805

text_field_rect = pymupdf.Rect(150, 700, 400, 720)

806

text_annot = page.add_freetext_annot(

807

text_field_rect,

808

"Enter your name",

809

fontsize=12,

810

align=0

811

)

812

text_annot.set_border({"width": 1, "style": "solid"})

813

text_annot.update()

814

815

# Label for text field

816

page.insert_text(

817

pymupdf.Point(50, 715),

818

"Name:",

819

fontsize=12

820

)

821

822

# Checkbox simulation (rectangle with X)

823

checkbox_rect = pymupdf.Rect(150, 650, 170, 670)

824

shape = pymupdf.Shape(page)

825

shape.draw_rect(checkbox_rect)

826

shape.finish(fill=[1, 1, 1], color=[0, 0, 0], width=1)

827

shape.commit()

828

829

# Add checkbox label

830

page.insert_text(

831

pymupdf.Point(50, 665),

832

"Subscribe to newsletter:",

833

fontsize=12

834

)

835

836

# Instructions

837

instructions = """Instructions:

838

1. Fill in your name in the text field above

839

2. Check the box if you want to subscribe

840

3. Save the document when complete"""

841

842

page.insert_textbox(

843

pymupdf.Rect(50, 500, 500, 600),

844

instructions,

845

fontsize=10,

846

lineheight=1.3

847

)

848

849

doc.save("form_document.pdf")

850

doc.close()

851

```

852

853

### Document Assembly from Multiple Sources

854

855

```python

856

import pymupdf

857

858

def create_report_document(title: str, content_files: list, output_path: str):

859

"""Create report by combining multiple content sources."""

860

861

# Create new document

862

doc = pymupdf.open()

863

864

# Title page

865

title_page = doc.new_page()

866

867

# Add title

868

title_page.insert_text(

869

pymupdf.Point(50, 400),

870

title,

871

fontsize=24,

872

color=[0, 0, 0.5]

873

)

874

875

# Add creation date

876

import datetime

877

date_str = datetime.datetime.now().strftime("%B %d, %Y")

878

title_page.insert_text(

879

pymupdf.Point(50, 350),

880

f"Generated on {date_str}",

881

fontsize=12,

882

color=[0.5, 0.5, 0.5]

883

)

884

885

# Process content files

886

for file_path in content_files:

887

try:

888

if file_path.endswith('.pdf'):

889

# Insert PDF content

890

source_doc = pymupdf.open(file_path)

891

doc.insert_pdf(source_doc)

892

source_doc.close()

893

894

elif file_path.endswith('.txt'):

895

# Insert text content

896

with open(file_path, 'r', encoding='utf-8') as f:

897

text_content = f.read()

898

899

content_page = doc.new_page()

900

content_page.insert_textbox(

901

pymupdf.Rect(50, 50, 545, 792),

902

text_content,

903

fontsize=11,

904

lineheight=1.3

905

)

906

907

except Exception as e:

908

print(f"Error processing {file_path}: {e}")

909

910

# Save combined document

911

doc.save(output_path)

912

doc.close()

913

print(f"Report saved to {output_path}")

914

915

# Usage

916

content_files = [

917

"introduction.txt",

918

"data_analysis.pdf",

919

"conclusions.txt"

920

]

921

922

create_report_document(

923

"Monthly Report",

924

content_files,

925

"monthly_report.pdf"

926

)

927

```

928

929

### Template-Based Document Generation

930

931

```python

932

import pymupdf

933

934

class DocumentTemplate:

935

def __init__(self, template_path: str = None):

936

"""Create document template."""

937

if template_path:

938

self.doc = pymupdf.open(template_path)

939

else:

940

self.doc = pymupdf.open()

941

self._create_default_template()

942

943

def _create_default_template(self):

944

"""Create a default template."""

945

page = self.doc.new_page()

946

947

# Header area

948

header_rect = pymupdf.Rect(50, 750, 550, 792)

949

shape = pymupdf.Shape(page)

950

shape.draw_rect(header_rect)

951

shape.finish(fill=[0.9, 0.9, 0.9], color=[0.5, 0.5, 0.5])

952

shape.commit()

953

954

# Placeholder text

955

page.insert_text(

956

pymupdf.Point(60, 775),

957

"{{TITLE}}",

958

fontsize=16,

959

color=[0.5, 0.5, 0.5]

960

)

961

962

page.insert_text(

963

pymupdf.Point(60, 720),

964

"{{CONTENT}}",

965

fontsize=12,

966

color=[0.5, 0.5, 0.5]

967

)

968

969

def fill_template(self, replacements: dict) -> pymupdf.Document:

970

"""Fill template with actual content."""

971

# Create copy of template

972

new_doc = pymupdf.open()

973

new_doc.insert_pdf(self.doc)

974

975

for page_num in range(new_doc.page_count):

976

page = new_doc.load_page(page_num)

977

978

# Get existing text

979

text_dict = page.get_text("dict")

980

981

# Remove placeholder text and add real content

982

for block in text_dict["blocks"]:

983

if "lines" in block:

984

for line in block["lines"]:

985

for span in line["spans"]:

986

text = span["text"]

987

for placeholder, replacement in replacements.items():

988

if placeholder in text:

989

# Remove old text (simplified approach)

990

# In practice, you'd need more sophisticated replacement

991

992

# Add new text

993

bbox = span["bbox"]

994

point = pymupdf.Point(bbox[0], bbox[1])

995

996

new_text = text.replace(placeholder, replacement)

997

page.insert_text(

998

point,

999

new_text,

1000

fontsize=span["size"],

1001

fontname=span["font"]

1002

)

1003

1004

return new_doc

1005

1006

def close(self):

1007

"""Close template document."""

1008

self.doc.close()

1009

1010

# Usage

1011

template = DocumentTemplate()

1012

1013

filled_doc = template.fill_template({

1014

"{{TITLE}}": "Project Status Report",

1015

"{{CONTENT}}": "This project is proceeding according to schedule..."

1016

})

1017

1018

filled_doc.save("filled_document.pdf")

1019

filled_doc.close()

1020

template.close()

1021

```