0
# Document Creation and Modification
1
2
Creating new documents and modifying existing ones including page insertion, deletion, content manipulation, and adding text, images, and other content elements. PyMuPDF provides comprehensive tools for both creating documents from scratch and programmatically modifying existing documents.
3
4
## Capabilities
5
6
### New Document Creation
7
8
Create new PDF documents with custom page sizes and properties.
9
10
```python { .api }
11
def open() -> Document:
12
"""
13
Create new empty document.
14
15
Returns:
16
New Document object
17
"""
18
19
class Document:
20
def new_page(self, pno: int = -1, width: float = 595, height: float = 842) -> Page:
21
"""
22
Create a new page in the document.
23
24
Parameters:
25
- pno: insertion position (-1 to append at end)
26
- width: page width in points (default: A4 width)
27
- height: page height in points (default: A4 height)
28
29
Returns:
30
New Page object
31
"""
32
33
def insert_page(self, pno: int, text: str = None, fontsize: int = 11,
34
width: float = 595, height: float = 842, **kwargs) -> Page:
35
"""
36
Insert page with optional text content.
37
38
Parameters:
39
- pno: insertion position
40
- text: initial text content
41
- fontsize: text font size
42
- width: page width in points
43
- height: page height in points
44
- fontname: font name
45
- fontfile: path to font file
46
- color: text color as RGB list
47
48
Returns:
49
New Page object
50
"""
51
```
52
53
### Text Insertion and Formatting
54
55
Add text content to pages with comprehensive formatting options.
56
57
```python { .api }
58
class Page:
59
def insert_text(self, point: Point, text: str, fontsize: float = 11,
60
fontname: str = "helv", fontfile: str = None,
61
set_simple: bool = False, encoding: int = 0,
62
color: list = None, fill: list = None,
63
render_mode: int = 0, border_width: float = 1,
64
rotate: int = 0, morph: tuple = None,
65
stroke_opacity: float = 1, fill_opacity: float = 1,
66
oc: int = 0) -> int:
67
"""
68
Insert text at specified position.
69
70
Parameters:
71
- point: insertion point (bottom-left of text)
72
- text: text content to insert
73
- fontsize: font size in points
74
- fontname: font name ("helv", "times", "cour", etc.)
75
- fontfile: path to external font file
76
- set_simple: use simple font encoding
77
- encoding: text encoding (0=Latin, 1=Greek, 2=Cyrillic)
78
- color: text color as RGB list [r, g, b]
79
- fill: fill color as RGB list
80
- render_mode: text rendering mode (0=fill, 1=stroke, 2=fill+stroke, etc.)
81
- border_width: stroke width for outlined text
82
- rotate: rotation angle in degrees
83
- morph: morphing parameters (point, matrix) tuple
84
- stroke_opacity: stroke opacity (0-1)
85
- fill_opacity: fill opacity (0-1)
86
- oc: optional content group reference
87
88
Returns:
89
Number of successfully inserted characters
90
"""
91
92
def insert_textbox(self, rect: Rect, buffer: str, fontsize: float = 11,
93
fontname: str = "helv", fontfile: str = None,
94
set_simple: bool = False, encoding: int = 0,
95
color: list = None, fill: list = None,
96
render_mode: int = 0, border_width: float = 1,
97
lineheight: float = None, align: int = 0,
98
rotate: int = 0, morph: tuple = None,
99
stroke_opacity: float = 1, fill_opacity: float = 1,
100
oc: int = 0, expandtabs: int = 8) -> float:
101
"""
102
Insert text within a rectangle with automatic wrapping.
103
104
Parameters:
105
- rect: rectangle to contain text
106
- buffer: text content
107
- fontsize: font size in points
108
- fontname: font name
109
- fontfile: path to external font file
110
- set_simple: use simple font encoding
111
- encoding: text encoding
112
- color: text color as RGB list
113
- fill: fill color as RGB list
114
- render_mode: text rendering mode
115
- border_width: stroke width
116
- lineheight: line height multiplier
117
- align: text alignment (0=left, 1=center, 2=right, 3=justify)
118
- rotate: rotation angle
119
- morph: morphing parameters
120
- stroke_opacity: stroke opacity
121
- fill_opacity: fill opacity
122
- oc: optional content group reference
123
- expandtabs: tab expansion size
124
125
Returns:
126
Unused vertical space in rectangle
127
"""
128
```
129
130
### Image Insertion
131
132
Add images to pages with positioning and scaling options.
133
134
```python { .api }
135
class Page:
136
def insert_image(self, rect: Rect, filename: str = None,
137
stream: bytes = None, pixmap: Pixmap = None,
138
mask: Pixmap = None, rotate: int = 0,
139
xref: int = 0, oc: int = 0, keep_proportion: bool = True,
140
overlay: bool = True, alpha: int = -1) -> int:
141
"""
142
Insert image into page.
143
144
Parameters:
145
- rect: target rectangle for image
146
- filename: path to image file
147
- stream: image data as bytes
148
- pixmap: Pixmap object to insert
149
- mask: optional mask Pixmap for transparency
150
- rotate: rotation angle (0, 90, 180, 270)
151
- xref: reuse existing image by xref number
152
- oc: optional content group reference
153
- keep_proportion: maintain image aspect ratio
154
- overlay: draw as overlay (True) or underlay (False)
155
- alpha: alpha/transparency value (0-255, -1 for automatic)
156
157
Returns:
158
Cross-reference number of inserted image
159
"""
160
```
161
162
### Drawing Operations
163
164
Add vector graphics and shapes to pages.
165
166
```python { .api }
167
class Shape:
168
def __init__(self, page: Page):
169
"""
170
Create shape drawing context for page.
171
172
Parameters:
173
- page: target Page object
174
"""
175
176
def draw_line(self, p1: Point, p2: Point) -> Point:
177
"""
178
Draw line between two points.
179
180
Parameters:
181
- p1: start point
182
- p2: end point
183
184
Returns:
185
End point for chaining
186
"""
187
188
def draw_bezier(self, p1: Point, p2: Point, p3: Point, p4: Point) -> Point:
189
"""
190
Draw cubic Bezier curve.
191
192
Parameters:
193
- p1: start point
194
- p2: first control point
195
- p3: second control point
196
- p4: end point
197
198
Returns:
199
End point for chaining
200
"""
201
202
def draw_rect(self, rect: Rect) -> Point:
203
"""
204
Draw rectangle.
205
206
Parameters:
207
- rect: rectangle to draw
208
209
Returns:
210
Bottom-right corner point
211
"""
212
213
def draw_oval(self, rect: Rect) -> Point:
214
"""
215
Draw oval/ellipse within rectangle.
216
217
Parameters:
218
- rect: bounding rectangle
219
220
Returns:
221
Bottom-right corner point
222
"""
223
224
def draw_circle(self, center: Point, radius: float) -> Point:
225
"""
226
Draw circle.
227
228
Parameters:
229
- center: circle center point
230
- radius: circle radius
231
232
Returns:
233
Center point
234
"""
235
236
def draw_sector(self, center: Point, point: Point, angle: float) -> Point:
237
"""
238
Draw circular sector.
239
240
Parameters:
241
- center: sector center
242
- point: radius end point
243
- angle: sector angle in degrees
244
245
Returns:
246
Center point
247
"""
248
249
def draw_polyline(self, points: list) -> Point:
250
"""
251
Draw connected line segments.
252
253
Parameters:
254
- points: list of Point objects
255
256
Returns:
257
Last point
258
"""
259
260
def draw_polygon(self, points: list) -> Point:
261
"""
262
Draw closed polygon.
263
264
Parameters:
265
- points: list of Point objects defining vertices
266
267
Returns:
268
First point
269
"""
270
271
def draw_squiggle(self, p1: Point, p2: Point, breadth: float = 2) -> Point:
272
"""
273
Draw squiggly line (wavy underline).
274
275
Parameters:
276
- p1: start point
277
- p2: end point
278
- breadth: wave amplitude
279
280
Returns:
281
End point
282
"""
283
284
def finish(self, fill: list = None, color: list = None,
285
dashes: str = None, even_odd: bool = False,
286
closePath: bool = False, lineJoin: int = 0,
287
lineCap: int = 0, width: float = 1,
288
stroke_opacity: float = 1, fill_opacity: float = 1,
289
oc: int = 0) -> None:
290
"""
291
Apply styling and finalize drawing operations.
292
293
Parameters:
294
- fill: fill color as RGB list
295
- color: stroke color as RGB list
296
- dashes: dash pattern string
297
- even_odd: use even-odd fill rule
298
- closePath: close the current path
299
- lineJoin: line join style (0=miter, 1=round, 2=bevel)
300
- lineCap: line cap style (0=butt, 1=round, 2=square)
301
- width: line width
302
- stroke_opacity: stroke opacity (0-1)
303
- fill_opacity: fill opacity (0-1)
304
- oc: optional content group reference
305
"""
306
307
def commit(self, overlay: bool = True) -> None:
308
"""
309
Commit all drawing operations to page.
310
311
Parameters:
312
- overlay: draw as overlay (True) or underlay (False)
313
"""
314
```
315
316
### Page Manipulation
317
318
Modify page properties and content arrangement.
319
320
```python { .api }
321
class Page:
322
def set_rotation(self, rotation: int) -> None:
323
"""
324
Set page rotation.
325
326
Parameters:
327
- rotation: rotation angle (0, 90, 180, 270)
328
"""
329
330
def set_cropbox(self, rect: Rect) -> None:
331
"""
332
Set page crop box.
333
334
Parameters:
335
- rect: new crop box rectangle
336
"""
337
338
def set_mediabox(self, rect: Rect) -> None:
339
"""
340
Set page media box.
341
342
Parameters:
343
- rect: new media box rectangle
344
"""
345
346
def clean_contents(self) -> bool:
347
"""
348
Clean and optimize page content stream.
349
350
Returns:
351
True if changes were made
352
"""
353
354
def wrap_contents(self) -> None:
355
"""Wrap page contents in a balanced way."""
356
```
357
358
### Font Management
359
360
Work with fonts for text insertion and formatting.
361
362
```python { .api }
363
class Font:
364
def __init__(self, fontname: str = "helv", fontfile: str = None,
365
fontbuffer: bytes = None, script: int = 0,
366
language: str = None, ordering: int = -1,
367
is_bold: bool = False, is_italic: bool = False,
368
is_serif: bool = False, embed: bool = True):
369
"""
370
Create or load font object.
371
372
Parameters:
373
- fontname: font name or base14 font identifier
374
- fontfile: path to font file (.ttf, .otf, etc.)
375
- fontbuffer: font data as bytes
376
- script: script identifier for Unicode
377
- language: language code
378
- ordering: CJK font ordering
379
- is_bold: prefer bold variant
380
- is_italic: prefer italic variant
381
- is_serif: prefer serif variant
382
- embed: embed font in PDF
383
"""
384
385
def glyph_advance(self, chr: int, script: int = 0, language: str = None,
386
wmode: int = 0) -> float:
387
"""
388
Get glyph advance width.
389
390
Parameters:
391
- chr: character code
392
- script: script identifier
393
- language: language code
394
- wmode: writing mode (0=horizontal, 1=vertical)
395
396
Returns:
397
Glyph advance width
398
"""
399
400
def glyph_bbox(self, chr: int, script: int = 0, language: str = None,
401
wmode: int = 0) -> Rect:
402
"""
403
Get glyph bounding box.
404
405
Parameters:
406
- chr: character code
407
- script: script identifier
408
- language: language code
409
- wmode: writing mode
410
411
Returns:
412
Glyph bounding rectangle
413
"""
414
415
def text_length(self, text: str, fontsize: float = 11, script: int = 0,
416
language: str = None, wmode: int = 0) -> float:
417
"""
418
Calculate text length in points.
419
420
Parameters:
421
- text: text string
422
- fontsize: font size in points
423
- script: script identifier
424
- language: language code
425
- wmode: writing mode
426
427
Returns:
428
Text width in points
429
"""
430
431
@property
432
def name(self) -> str:
433
"""Font name."""
434
435
@property
436
def flags(self) -> dict:
437
"""Font flags dictionary."""
438
439
@property
440
def bbox(self) -> Rect:
441
"""Font bounding box."""
442
443
@property
444
def is_writable(self) -> bool:
445
"""True if font can be used for text insertion."""
446
```
447
448
### Advanced Text Writing
449
450
Sophisticated text layout and formatting capabilities.
451
452
```python { .api }
453
class TextWriter:
454
def __init__(self, page_rect: Rect, opacity: float = 1, color: list = None):
455
"""
456
Create text writer for advanced text layout.
457
458
Parameters:
459
- page_rect: page rectangle bounds
460
- opacity: text opacity (0-1)
461
- color: default text color as RGB list
462
"""
463
464
def append(self, pos: Point, text: str, font: Font = None,
465
fontsize: float = 11, language: str = None,
466
script: int = 0, wmode: int = 0, bidi_level: int = 0,
467
markup_dir: int = 0, small_caps: bool = False) -> Rect:
468
"""
469
Append text at position.
470
471
Parameters:
472
- pos: text position
473
- text: text content
474
- font: Font object to use
475
- fontsize: font size in points
476
- language: language code for text shaping
477
- script: script identifier
478
- wmode: writing mode (0=horizontal, 1=vertical)
479
- bidi_level: bidirectional text level
480
- markup_dir: markup direction
481
- small_caps: use small capitals
482
483
Returns:
484
Text bounding rectangle
485
"""
486
487
def write_text(self, page: Page, opacity: float = None,
488
color: list = None, oc: int = 0,
489
overlay: bool = True, morph: tuple = None,
490
matrix: Matrix = None, render_mode: int = 0,
491
stroke_opacity: float = 1, fill_opacity: float = 1,
492
stroke_color: list = None) -> None:
493
"""
494
Write accumulated text to page.
495
496
Parameters:
497
- page: target Page object
498
- opacity: text opacity override
499
- color: text color override
500
- oc: optional content group reference
501
- overlay: draw as overlay (True) or underlay (False)
502
- morph: morphing transformation
503
- matrix: additional transformation matrix
504
- render_mode: text rendering mode
505
- stroke_opacity: stroke opacity
506
- fill_opacity: fill opacity
507
- stroke_color: stroke color for outlined text
508
"""
509
510
def fill_textbox(self, rect: Rect, text: str, pos: Point = None,
511
font: Font = None, fontsize: float = 11,
512
lineheight: float = None, align: int = 0,
513
warn: bool = True) -> int:
514
"""
515
Fill rectangle with text and automatic line wrapping.
516
517
Parameters:
518
- rect: containing rectangle
519
- text: text content
520
- pos: starting position within rectangle
521
- font: Font object
522
- fontsize: font size
523
- lineheight: line height multiplier
524
- align: text alignment (0=left, 1=center, 2=right, 3=justify)
525
- warn: warn if text doesn't fit
526
527
Returns:
528
Number of characters that didn't fit
529
"""
530
531
@property
532
def text_rect(self) -> Rect:
533
"""Bounding rectangle of all added text."""
534
535
@property
536
def last_point(self) -> Point:
537
"""Position after last text insertion."""
538
539
@property
540
def opacity(self) -> float:
541
"""Text opacity."""
542
```
543
544
## Usage Examples
545
546
### Creating New Documents
547
548
```python
549
import pymupdf
550
551
# Create new document
552
doc = pymupdf.open()
553
554
# Add pages with different sizes
555
page1 = doc.new_page() # Default A4
556
page2 = doc.new_page(width=792, height=612) # US Letter landscape
557
page3 = doc.new_page(width=297, height=420) # A3
558
559
# Set document metadata
560
doc.set_metadata({
561
"title": "My New Document",
562
"author": "Author Name",
563
"subject": "Document Subject",
564
"creator": "PyMuPDF Script",
565
"producer": "PyMuPDF",
566
"creationDate": pymupdf.get_pdf_now(),
567
"modDate": pymupdf.get_pdf_now()
568
})
569
570
# Save new document
571
doc.save("new_document.pdf")
572
doc.close()
573
```
574
575
### Adding Text Content
576
577
```python
578
import pymupdf
579
580
doc = pymupdf.open()
581
page = doc.new_page()
582
583
# Insert simple text
584
point = pymupdf.Point(50, 750) # Top-left area
585
page.insert_text(point, "Hello, World!", fontsize=16, color=[0, 0, 1])
586
587
# Insert formatted text box
588
rect = pymupdf.Rect(50, 600, 500, 700)
589
text = """This is a longer text that will be automatically wrapped within the
590
specified rectangle. It demonstrates text box functionality with automatic
591
line breaks and formatting options."""
592
593
page.insert_textbox(
594
rect,
595
text,
596
fontsize=12,
597
align=3, # Justified
598
lineheight=1.2,
599
color=[0.2, 0.2, 0.2]
600
)
601
602
# Insert text with custom font
603
try:
604
# Use built-in font
605
page.insert_text(
606
pymupdf.Point(50, 550),
607
"Text with Times font",
608
fontname="times",
609
fontsize=14,
610
color=[1, 0, 0]
611
)
612
except Exception as e:
613
print(f"Font error: {e}")
614
615
doc.save("text_document.pdf")
616
doc.close()
617
```
618
619
### Advanced Text Formatting
620
621
```python
622
import pymupdf
623
624
doc = pymupdf.open()
625
page = doc.new_page()
626
627
# Create TextWriter for advanced text layout
628
writer = pymupdf.TextWriter(page.rect, color=[0, 0, 0])
629
630
# Create custom font
631
font = pymupdf.Font("helv") # Helvetica
632
633
# Add text with different formatting
634
y_pos = 750
635
636
# Title
637
writer.append(
638
pymupdf.Point(50, y_pos),
639
"Document Title",
640
font=font,
641
fontsize=24
642
)
643
y_pos -= 40
644
645
# Subtitle
646
writer.append(
647
pymupdf.Point(50, y_pos),
648
"Subtitle with different formatting",
649
font=font,
650
fontsize=16
651
)
652
y_pos -= 30
653
654
# Body text
655
body_text = "This is body text with normal formatting. "
656
writer.append(
657
pymupdf.Point(50, y_pos),
658
body_text,
659
font=font,
660
fontsize=12
661
)
662
663
# Fill text box with automatic wrapping
664
rect = pymupdf.Rect(50, 500, 550, 650)
665
long_text = """Lorem ipsum dolor sit amet, consectetur adipiscing elit.
666
Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
667
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris."""
668
669
overflow = writer.fill_textbox(
670
rect,
671
long_text,
672
font=font,
673
fontsize=11,
674
lineheight=1.4,
675
align=0 # Left aligned
676
)
677
678
if overflow > 0:
679
print(f"Warning: {overflow} characters didn't fit")
680
681
# Write all text to page
682
writer.write_text(page)
683
684
doc.save("advanced_text.pdf")
685
doc.close()
686
```
687
688
### Adding Images
689
690
```python
691
import pymupdf
692
693
doc = pymupdf.open()
694
page = doc.new_page()
695
696
# Insert image from file
697
try:
698
image_rect = pymupdf.Rect(100, 400, 400, 600)
699
page.insert_image(
700
image_rect,
701
filename="sample_image.jpg",
702
keep_proportion=True,
703
overlay=True
704
)
705
except Exception as e:
706
print(f"Image insertion failed: {e}")
707
708
# Create simple colored rectangle as image substitute
709
shape = pymupdf.Shape(page)
710
shape.draw_rect(pymupdf.Rect(100, 200, 400, 350))
711
shape.finish(fill=[0.8, 0.8, 1.0], color=[0, 0, 1], width=2)
712
shape.commit()
713
714
# Add caption
715
page.insert_text(
716
pymupdf.Point(100, 180),
717
"Image Caption",
718
fontsize=10,
719
color=[0.5, 0.5, 0.5]
720
)
721
722
doc.save("document_with_images.pdf")
723
doc.close()
724
```
725
726
### Drawing Shapes and Graphics
727
728
```python
729
import pymupdf
730
731
doc = pymupdf.open()
732
page = doc.new_page()
733
734
# Create shape drawing context
735
shape = pymupdf.Shape(page)
736
737
# Draw various shapes
738
# Rectangle
739
shape.draw_rect(pymupdf.Rect(50, 700, 150, 750))
740
shape.finish(fill=[1, 0, 0], color=[0.5, 0, 0], width=2)
741
742
# Circle
743
shape.draw_circle(pymupdf.Point(250, 725), 25)
744
shape.finish(fill=[0, 1, 0], color=[0, 0.5, 0], width=2)
745
746
# Line
747
shape.draw_line(pymupdf.Point(50, 650), pymupdf.Point(300, 650))
748
shape.finish(color=[0, 0, 1], width=3)
749
750
# Polygon (triangle)
751
triangle_points = [
752
pymupdf.Point(400, 700),
753
pymupdf.Point(450, 750),
754
pymupdf.Point(350, 750)
755
]
756
shape.draw_polygon(triangle_points)
757
shape.finish(fill=[1, 1, 0], color=[0.5, 0.5, 0], width=2)
758
759
# Bezier curve
760
shape.draw_bezier(
761
pymupdf.Point(50, 600),
762
pymupdf.Point(150, 550),
763
pymupdf.Point(250, 550),
764
pymupdf.Point(350, 600)
765
)
766
shape.finish(color=[1, 0, 1], width=3)
767
768
# Commit all shapes
769
shape.commit(overlay=True)
770
771
# Add labels
772
labels = [
773
(pymupdf.Point(100, 680), "Rectangle"),
774
(pymupdf.Point(250, 680), "Circle"),
775
(pymupdf.Point(100, 630), "Line"),
776
(pymupdf.Point(400, 680), "Triangle"),
777
(pymupdf.Point(200, 580), "Bezier Curve")
778
]
779
780
for point, text in labels:
781
page.insert_text(point, text, fontsize=10)
782
783
doc.save("shapes_document.pdf")
784
doc.close()
785
```
786
787
### Creating Forms and Interactive Elements
788
789
```python
790
import pymupdf
791
792
doc = pymupdf.open()
793
page = doc.new_page()
794
795
# Add form title
796
page.insert_text(
797
pymupdf.Point(50, 750),
798
"Sample Form",
799
fontsize=18,
800
color=[0, 0, 0.5]
801
)
802
803
# Create form fields by adding annotations
804
# Text field
805
text_field_rect = pymupdf.Rect(150, 700, 400, 720)
806
text_annot = page.add_freetext_annot(
807
text_field_rect,
808
"Enter your name",
809
fontsize=12,
810
align=0
811
)
812
text_annot.set_border({"width": 1, "style": "solid"})
813
text_annot.update()
814
815
# Label for text field
816
page.insert_text(
817
pymupdf.Point(50, 715),
818
"Name:",
819
fontsize=12
820
)
821
822
# Checkbox simulation (rectangle with X)
823
checkbox_rect = pymupdf.Rect(150, 650, 170, 670)
824
shape = pymupdf.Shape(page)
825
shape.draw_rect(checkbox_rect)
826
shape.finish(fill=[1, 1, 1], color=[0, 0, 0], width=1)
827
shape.commit()
828
829
# Add checkbox label
830
page.insert_text(
831
pymupdf.Point(50, 665),
832
"Subscribe to newsletter:",
833
fontsize=12
834
)
835
836
# Instructions
837
instructions = """Instructions:
838
1. Fill in your name in the text field above
839
2. Check the box if you want to subscribe
840
3. Save the document when complete"""
841
842
page.insert_textbox(
843
pymupdf.Rect(50, 500, 500, 600),
844
instructions,
845
fontsize=10,
846
lineheight=1.3
847
)
848
849
doc.save("form_document.pdf")
850
doc.close()
851
```
852
853
### Document Assembly from Multiple Sources
854
855
```python
856
import pymupdf
857
858
def create_report_document(title: str, content_files: list, output_path: str):
859
"""Create report by combining multiple content sources."""
860
861
# Create new document
862
doc = pymupdf.open()
863
864
# Title page
865
title_page = doc.new_page()
866
867
# Add title
868
title_page.insert_text(
869
pymupdf.Point(50, 400),
870
title,
871
fontsize=24,
872
color=[0, 0, 0.5]
873
)
874
875
# Add creation date
876
import datetime
877
date_str = datetime.datetime.now().strftime("%B %d, %Y")
878
title_page.insert_text(
879
pymupdf.Point(50, 350),
880
f"Generated on {date_str}",
881
fontsize=12,
882
color=[0.5, 0.5, 0.5]
883
)
884
885
# Process content files
886
for file_path in content_files:
887
try:
888
if file_path.endswith('.pdf'):
889
# Insert PDF content
890
source_doc = pymupdf.open(file_path)
891
doc.insert_pdf(source_doc)
892
source_doc.close()
893
894
elif file_path.endswith('.txt'):
895
# Insert text content
896
with open(file_path, 'r', encoding='utf-8') as f:
897
text_content = f.read()
898
899
content_page = doc.new_page()
900
content_page.insert_textbox(
901
pymupdf.Rect(50, 50, 545, 792),
902
text_content,
903
fontsize=11,
904
lineheight=1.3
905
)
906
907
except Exception as e:
908
print(f"Error processing {file_path}: {e}")
909
910
# Save combined document
911
doc.save(output_path)
912
doc.close()
913
print(f"Report saved to {output_path}")
914
915
# Usage
916
content_files = [
917
"introduction.txt",
918
"data_analysis.pdf",
919
"conclusions.txt"
920
]
921
922
create_report_document(
923
"Monthly Report",
924
content_files,
925
"monthly_report.pdf"
926
)
927
```
928
929
### Template-Based Document Generation
930
931
```python
932
import pymupdf
933
934
class DocumentTemplate:
935
def __init__(self, template_path: str = None):
936
"""Create document template."""
937
if template_path:
938
self.doc = pymupdf.open(template_path)
939
else:
940
self.doc = pymupdf.open()
941
self._create_default_template()
942
943
def _create_default_template(self):
944
"""Create a default template."""
945
page = self.doc.new_page()
946
947
# Header area
948
header_rect = pymupdf.Rect(50, 750, 550, 792)
949
shape = pymupdf.Shape(page)
950
shape.draw_rect(header_rect)
951
shape.finish(fill=[0.9, 0.9, 0.9], color=[0.5, 0.5, 0.5])
952
shape.commit()
953
954
# Placeholder text
955
page.insert_text(
956
pymupdf.Point(60, 775),
957
"{{TITLE}}",
958
fontsize=16,
959
color=[0.5, 0.5, 0.5]
960
)
961
962
page.insert_text(
963
pymupdf.Point(60, 720),
964
"{{CONTENT}}",
965
fontsize=12,
966
color=[0.5, 0.5, 0.5]
967
)
968
969
def fill_template(self, replacements: dict) -> pymupdf.Document:
970
"""Fill template with actual content."""
971
# Create copy of template
972
new_doc = pymupdf.open()
973
new_doc.insert_pdf(self.doc)
974
975
for page_num in range(new_doc.page_count):
976
page = new_doc.load_page(page_num)
977
978
# Get existing text
979
text_dict = page.get_text("dict")
980
981
# Remove placeholder text and add real content
982
for block in text_dict["blocks"]:
983
if "lines" in block:
984
for line in block["lines"]:
985
for span in line["spans"]:
986
text = span["text"]
987
for placeholder, replacement in replacements.items():
988
if placeholder in text:
989
# Remove old text (simplified approach)
990
# In practice, you'd need more sophisticated replacement
991
992
# Add new text
993
bbox = span["bbox"]
994
point = pymupdf.Point(bbox[0], bbox[1])
995
996
new_text = text.replace(placeholder, replacement)
997
page.insert_text(
998
point,
999
new_text,
1000
fontsize=span["size"],
1001
fontname=span["font"]
1002
)
1003
1004
return new_doc
1005
1006
def close(self):
1007
"""Close template document."""
1008
self.doc.close()
1009
1010
# Usage
1011
template = DocumentTemplate()
1012
1013
filled_doc = template.fill_template({
1014
"{{TITLE}}": "Project Status Report",
1015
"{{CONTENT}}": "This project is proceeding according to schedule..."
1016
})
1017
1018
filled_doc.save("filled_document.pdf")
1019
filled_doc.close()
1020
template.close()
1021
```