0
# Visual Debugging
1
2
Comprehensive visualization tools for overlaying debug information on PDF pages, including object highlighting, table structure visualization, custom drawing operations, and image export capabilities.
3
4
## Capabilities
5
6
### Page to Image Conversion
7
8
Convert PDF pages to images for visualization and debugging purposes.
9
10
```python { .api }
11
def to_image(resolution=None, width=None, height=None, antialias=False,
12
force_mediabox=False):
13
"""
14
Convert page to image for debugging.
15
16
Parameters:
17
- resolution: int or float, optional - Image resolution in DPI (default: 72)
18
- width: int, optional - Target image width in pixels
19
- height: int, optional - Target image height in pixels
20
- antialias: bool - Enable antialiasing for smoother rendering
21
- force_mediabox: bool - Use MediaBox instead of CropBox for dimensions
22
23
Returns:
24
PageImage: Image object with drawing capabilities
25
"""
26
```
27
28
**Usage Examples:**
29
30
```python
31
with pdfplumber.open("document.pdf") as pdf:
32
page = pdf.pages[0]
33
34
# Basic image conversion
35
im = page.to_image()
36
im.save("page.png")
37
38
# High resolution image
39
hires = page.to_image(resolution=300)
40
hires.save("page_hires.png")
41
42
# Specific dimensions
43
thumb = page.to_image(width=400, height=600)
44
thumb.save("thumbnail.png")
45
46
# Antialiased rendering
47
smooth = page.to_image(antialias=True)
48
smooth.save("smooth.png")
49
```
50
51
### PageImage Class
52
53
Image representation with comprehensive drawing and debugging capabilities.
54
55
```python { .api }
56
class PageImage:
57
"""Image representation with drawing capabilities."""
58
59
def __init__(self, page, original=None, resolution=72, antialias=False,
60
force_mediabox=False):
61
"""Initialize PageImage from page."""
62
63
@property
64
def page(self) -> Page:
65
"""Source page object."""
66
67
@property
68
def original(self) -> PIL.Image.Image:
69
"""Original page image without annotations."""
70
71
@property
72
def annotated(self) -> PIL.Image.Image:
73
"""Current image with annotations."""
74
75
@property
76
def resolution(self) -> Union[int, float]:
77
"""Image resolution in DPI."""
78
79
@property
80
def scale(self) -> float:
81
"""Scale factor from PDF coordinates to image pixels."""
82
83
def reset(self):
84
"""Reset annotations to original image."""
85
86
def copy(self):
87
"""Create copy of PageImage."""
88
89
def save(self, dest, format="PNG", quantize=True, colors=256, bits=8, **kwargs):
90
"""Save image to file."""
91
92
def show(self):
93
"""Display image (in interactive environments)."""
94
```
95
96
### Drawing Lines
97
98
Draw lines and line collections on the image.
99
100
```python { .api }
101
def draw_line(points_or_obj, stroke=(255, 0, 0, 200), stroke_width=1):
102
"""
103
Draw single line.
104
105
Parameters:
106
- points_or_obj: List of points or line object with coordinates
107
- stroke: Tuple[int, int, int, int] - RGBA color for line
108
- stroke_width: int - Line width in pixels
109
110
Returns:
111
PageImage: Self for method chaining
112
"""
113
114
def draw_lines(list_of_lines, stroke=(255, 0, 0, 200), stroke_width=1):
115
"""
116
Draw multiple lines.
117
118
Parameters:
119
- list_of_lines: List of line objects or point lists
120
- stroke: RGBA color tuple
121
- stroke_width: int - Line width
122
123
Returns:
124
PageImage: Self for method chaining
125
"""
126
127
def draw_vline(location, stroke=(255, 0, 0, 200), stroke_width=1):
128
"""Draw vertical line at X coordinate."""
129
130
def draw_vlines(locations, stroke=(255, 0, 0, 200), stroke_width=1):
131
"""Draw multiple vertical lines."""
132
133
def draw_hline(location, stroke=(255, 0, 0, 200), stroke_width=1):
134
"""Draw horizontal line at Y coordinate."""
135
136
def draw_hlines(locations, stroke=(255, 0, 0, 200), stroke_width=1):
137
"""Draw multiple horizontal lines."""
138
```
139
140
**Usage Examples:**
141
142
```python
143
with pdfplumber.open("document.pdf") as pdf:
144
page = pdf.pages[0]
145
im = page.to_image()
146
147
# Draw all lines on page
148
im.draw_lines(page.lines)
149
150
# Draw custom line
151
im.draw_line([(100, 100), (200, 200)], stroke=(0, 255, 0, 255), stroke_width=3)
152
153
# Draw grid lines
154
im.draw_vlines([100, 200, 300, 400], stroke=(0, 0, 255, 100))
155
im.draw_hlines([100, 200, 300], stroke=(0, 0, 255, 100))
156
157
im.save("lines_debug.png")
158
```
159
160
### Drawing Rectangles
161
162
Draw rectangles and rectangle collections with fill and stroke options.
163
164
```python { .api }
165
def draw_rect(bbox_or_obj, fill=(0, 0, 255, 50), stroke=(255, 0, 0, 200),
166
stroke_width=1):
167
"""
168
Draw rectangle.
169
170
Parameters:
171
- bbox_or_obj: Bounding box tuple or object with bbox coordinates
172
- fill: RGBA color tuple for rectangle fill
173
- stroke: RGBA color tuple for rectangle outline
174
- stroke_width: int - Outline width
175
176
Returns:
177
PageImage: Self for method chaining
178
"""
179
180
def draw_rects(list_of_rects, fill=(0, 0, 255, 50), stroke=(255, 0, 0, 200),
181
stroke_width=1):
182
"""Draw multiple rectangles."""
183
```
184
185
**Usage Examples:**
186
187
```python
188
with pdfplumber.open("document.pdf") as pdf:
189
page = pdf.pages[0]
190
im = page.to_image()
191
192
# Highlight all rectangles
193
im.draw_rects(page.rects)
194
195
# Highlight character bounding boxes
196
im.draw_rects(page.chars, fill=(255, 0, 0, 30), stroke=(255, 0, 0, 100))
197
198
# Custom rectangle
199
im.draw_rect((100, 100, 300, 200), fill=(0, 255, 0, 100))
200
201
im.save("rects_debug.png")
202
```
203
204
### Drawing Circles
205
206
Draw circles and circular markers.
207
208
```python { .api }
209
def draw_circle(center_or_obj, radius=5, fill=(0, 0, 255, 50),
210
stroke=(255, 0, 0, 200)):
211
"""
212
Draw circle.
213
214
Parameters:
215
- center_or_obj: Center point tuple or object with center coordinates
216
- radius: int - Circle radius in pixels
217
- fill: RGBA color tuple for circle fill
218
- stroke: RGBA color tuple for circle outline
219
220
Returns:
221
PageImage: Self for method chaining
222
"""
223
224
def draw_circles(list_of_circles, radius=5, fill=(0, 0, 255, 50),
225
stroke=(255, 0, 0, 200)):
226
"""Draw multiple circles."""
227
```
228
229
**Usage Examples:**
230
231
```python
232
with pdfplumber.open("document.pdf") as pdf:
233
page = pdf.pages[0]
234
im = page.to_image()
235
236
# Mark character centers
237
char_centers = [(c['x0'] + c['x1'])/2, (c['top'] + c['bottom'])/2)
238
for c in page.chars]
239
im.draw_circles(char_centers, radius=2, fill=(255, 0, 0, 100))
240
241
# Mark specific points
242
im.draw_circle((page.width/2, page.height/2), radius=10,
243
fill=(0, 255, 0, 200))
244
245
im.save("circles_debug.png")
246
```
247
248
### Text Visualization
249
250
Specialized methods for visualizing text elements and word boundaries.
251
252
```python { .api }
253
def outline_words(stroke=(255, 0, 0, 200), fill=(255, 0, 0, 50),
254
stroke_width=1, x_tolerance=3, y_tolerance=3):
255
"""
256
Outline detected words.
257
258
Parameters:
259
- stroke: RGBA color for word outlines
260
- fill: RGBA color for word fill
261
- stroke_width: int - Outline width
262
- x_tolerance: float - Horizontal tolerance for word detection
263
- y_tolerance: float - Vertical tolerance for word detection
264
265
Returns:
266
PageImage: Self for method chaining
267
"""
268
269
def outline_chars(stroke=(255, 0, 0, 255), fill=(255, 0, 0, 63),
270
stroke_width=1):
271
"""
272
Outline individual characters.
273
274
Parameters:
275
- stroke: RGBA color for character outlines
276
- fill: RGBA color for character fill
277
- stroke_width: int - Outline width
278
279
Returns:
280
PageImage: Self for method chaining
281
"""
282
```
283
284
**Usage Examples:**
285
286
```python
287
with pdfplumber.open("document.pdf") as pdf:
288
page = pdf.pages[0]
289
im = page.to_image()
290
291
# Outline all words
292
im.outline_words()
293
294
# Outline characters with custom colors
295
im.outline_chars(stroke=(0, 255, 0, 255), fill=(0, 255, 0, 30))
296
297
# Fine-tuned word detection
298
im.outline_words(x_tolerance=1, y_tolerance=1,
299
stroke=(0, 0, 255, 200))
300
301
im.save("text_debug.png")
302
```
303
304
### Table Debugging
305
306
Specialized visualization for table detection and structure analysis.
307
308
```python { .api }
309
def debug_table(table, fill=(0, 0, 255, 50), stroke=(255, 0, 0, 200),
310
stroke_width=1):
311
"""
312
Visualize table structure.
313
314
Parameters:
315
- table: Table object to visualize
316
- fill: RGBA color for cell fill
317
- stroke: RGBA color for cell outlines
318
- stroke_width: int - Outline width
319
320
Returns:
321
PageImage: Self for method chaining
322
"""
323
324
def debug_tablefinder(table_settings=None):
325
"""
326
Visualize table detection process.
327
328
Parameters:
329
- table_settings: TableSettings or dict for detection configuration
330
331
Returns:
332
PageImage: Self for method chaining
333
"""
334
```
335
336
**Usage Examples:**
337
338
```python
339
with pdfplumber.open("document.pdf") as pdf:
340
page = pdf.pages[0]
341
im = page.to_image()
342
343
# Debug all detected tables
344
tables = page.find_tables()
345
for i, table in enumerate(tables):
346
color = [(255, 0, 0, 50), (0, 255, 0, 50), (0, 0, 255, 50)][i % 3]
347
im.debug_table(table, fill=color)
348
349
# Debug table detection algorithm
350
im.debug_tablefinder()
351
352
# Debug with custom settings
353
custom_settings = {"vertical_strategy": "text", "horizontal_strategy": "lines"}
354
im.debug_tablefinder(table_settings=custom_settings)
355
356
im.save("table_debug.png")
357
```
358
359
### Drawing Constants
360
361
Default colors and styling options for drawing operations.
362
363
```python { .api }
364
# Default drawing constants
365
DEFAULT_RESOLUTION = 72
366
DEFAULT_FILL = (0, 0, 255, 50) # Semi-transparent blue
367
DEFAULT_STROKE = (255, 0, 0, 200) # Semi-transparent red
368
DEFAULT_STROKE_WIDTH = 1
369
```
370
371
### Advanced Visualization Workflows
372
373
**Multi-layer Debugging:**
374
375
```python
376
with pdfplumber.open("document.pdf") as pdf:
377
page = pdf.pages[0]
378
im = page.to_image(resolution=150)
379
380
# Layer 1: Page structure
381
im.draw_rects(page.rects, fill=(200, 200, 200, 30))
382
im.draw_lines(page.lines, stroke=(100, 100, 100, 150))
383
384
# Layer 2: Text elements
385
im.outline_chars(stroke=(255, 0, 0, 100), fill=(255, 0, 0, 20))
386
387
# Layer 3: Tables
388
tables = page.find_tables()
389
for table in tables:
390
im.debug_table(table, fill=(0, 255, 0, 40), stroke=(0, 255, 0, 200))
391
392
# Layer 4: Custom annotations
393
# Highlight large text
394
large_chars = [c for c in page.chars if c.get('size', 0) > 12]
395
im.draw_rects(large_chars, fill=(255, 255, 0, 80))
396
397
im.save("comprehensive_debug.png")
398
```
399
400
**Comparative Analysis:**
401
402
```python
403
with pdfplumber.open("document.pdf") as pdf:
404
page = pdf.pages[0]
405
406
# Compare different table detection strategies
407
strategies = [
408
{"vertical_strategy": "lines", "horizontal_strategy": "lines"},
409
{"vertical_strategy": "text", "horizontal_strategy": "text"}
410
]
411
412
for i, settings in enumerate(strategies):
413
im = page.to_image()
414
im.debug_tablefinder(table_settings=settings)
415
im.save(f"table_strategy_{i+1}.png")
416
```
417
418
**Region-Specific Debugging:**
419
420
```python
421
with pdfplumber.open("document.pdf") as pdf:
422
page = pdf.pages[0]
423
424
# Debug specific page regions
425
regions = [
426
("header", (0, 0, page.width, 100)),
427
("content", (0, 100, page.width, page.height-100)),
428
("footer", (0, page.height-50, page.width, page.height))
429
]
430
431
for name, bbox in regions:
432
cropped = page.crop(bbox)
433
im = cropped.to_image()
434
im.outline_words()
435
im.save(f"{name}_debug.png")
436
```