Tessl Tile for pypi/pdfplumber@0.11.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

cli.md index.md page-manipulation.md pdf-operations.md table-extraction.md text-extraction.md utilities.md visual-debugging.md

visual-debugging.mddocs/

0
# Visual Debugging
1

2
Comprehensive visualization tools for overlaying debug information on PDF pages, including object highlighting, table structure visualization, custom drawing operations, and image export capabilities.
3

4
## Capabilities
5

6
### Page to Image Conversion
7

8
Convert PDF pages to images for visualization and debugging purposes.
9

10
```python { .api }
11
def to_image(resolution=None, width=None, height=None, antialias=False, 
12
             force_mediabox=False):
13
    """
14
    Convert page to image for debugging.
15
    
16
    Parameters:
17
    - resolution: int or float, optional - Image resolution in DPI (default: 72)
18
    - width: int, optional - Target image width in pixels
19
    - height: int, optional - Target image height in pixels
20
    - antialias: bool - Enable antialiasing for smoother rendering
21
    - force_mediabox: bool - Use MediaBox instead of CropBox for dimensions
22
    
23
    Returns:
24
    PageImage: Image object with drawing capabilities
25
    """
26
```
27

28
**Usage Examples:**
29

30
```python
31
with pdfplumber.open("document.pdf") as pdf:
32
    page = pdf.pages[0]
33
    
34
    # Basic image conversion
35
    im = page.to_image()
36
    im.save("page.png")
37
    
38
    # High resolution image
39
    hires = page.to_image(resolution=300)
40
    hires.save("page_hires.png")
41
    
42
    # Specific dimensions
43
    thumb = page.to_image(width=400, height=600)
44
    thumb.save("thumbnail.png")
45
    
46
    # Antialiased rendering
47
    smooth = page.to_image(antialias=True)
48
    smooth.save("smooth.png")
49
```
50

51
### PageImage Class
52

53
Image representation with comprehensive drawing and debugging capabilities.
54

55
```python { .api }
56
class PageImage:
57
    """Image representation with drawing capabilities."""
58
    
59
    def __init__(self, page, original=None, resolution=72, antialias=False, 
60
                 force_mediabox=False):
61
        """Initialize PageImage from page."""
62
    
63
    @property
64
    def page(self) -> Page:
65
        """Source page object."""
66
    
67
    @property
68
    def original(self) -> PIL.Image.Image:
69
        """Original page image without annotations."""
70
    
71
    @property
72
    def annotated(self) -> PIL.Image.Image:
73
        """Current image with annotations."""
74
    
75
    @property
76
    def resolution(self) -> Union[int, float]:
77
        """Image resolution in DPI."""
78
    
79
    @property
80
    def scale(self) -> float:
81
        """Scale factor from PDF coordinates to image pixels."""
82
    
83
    def reset(self):
84
        """Reset annotations to original image."""
85
    
86
    def copy(self):
87
        """Create copy of PageImage."""
88
    
89
    def save(self, dest, format="PNG", quantize=True, colors=256, bits=8, **kwargs):
90
        """Save image to file."""
91
    
92
    def show(self):
93
        """Display image (in interactive environments)."""
94
```
95

96
### Drawing Lines
97

98
Draw lines and line collections on the image.
99

100
```python { .api }
101
def draw_line(points_or_obj, stroke=(255, 0, 0, 200), stroke_width=1):
102
    """
103
    Draw single line.
104
    
105
    Parameters:
106
    - points_or_obj: List of points or line object with coordinates
107
    - stroke: Tuple[int, int, int, int] - RGBA color for line
108
    - stroke_width: int - Line width in pixels
109
    
110
    Returns:
111
    PageImage: Self for method chaining
112
    """
113

114
def draw_lines(list_of_lines, stroke=(255, 0, 0, 200), stroke_width=1):
115
    """
116
    Draw multiple lines.
117
    
118
    Parameters:
119
    - list_of_lines: List of line objects or point lists
120
    - stroke: RGBA color tuple
121
    - stroke_width: int - Line width
122
    
123
    Returns:
124
    PageImage: Self for method chaining
125
    """
126

127
def draw_vline(location, stroke=(255, 0, 0, 200), stroke_width=1):
128
    """Draw vertical line at X coordinate."""
129

130
def draw_vlines(locations, stroke=(255, 0, 0, 200), stroke_width=1):
131
    """Draw multiple vertical lines."""
132

133
def draw_hline(location, stroke=(255, 0, 0, 200), stroke_width=1):
134
    """Draw horizontal line at Y coordinate."""
135

136
def draw_hlines(locations, stroke=(255, 0, 0, 200), stroke_width=1):
137
    """Draw multiple horizontal lines."""
138
```
139

140
**Usage Examples:**
141

142
```python
143
with pdfplumber.open("document.pdf") as pdf:
144
    page = pdf.pages[0]
145
    im = page.to_image()
146
    
147
    # Draw all lines on page
148
    im.draw_lines(page.lines)
149
    
150
    # Draw custom line
151
    im.draw_line([(100, 100), (200, 200)], stroke=(0, 255, 0, 255), stroke_width=3)
152
    
153
    # Draw grid lines
154
    im.draw_vlines([100, 200, 300, 400], stroke=(0, 0, 255, 100))
155
    im.draw_hlines([100, 200, 300], stroke=(0, 0, 255, 100))
156
    
157
    im.save("lines_debug.png")
158
```
159

160
### Drawing Rectangles
161

162
Draw rectangles and rectangle collections with fill and stroke options.
163

164
```python { .api }
165
def draw_rect(bbox_or_obj, fill=(0, 0, 255, 50), stroke=(255, 0, 0, 200), 
166
              stroke_width=1):
167
    """
168
    Draw rectangle.
169
    
170
    Parameters:
171
    - bbox_or_obj: Bounding box tuple or object with bbox coordinates
172
    - fill: RGBA color tuple for rectangle fill
173
    - stroke: RGBA color tuple for rectangle outline
174
    - stroke_width: int - Outline width
175
    
176
    Returns:
177
    PageImage: Self for method chaining
178
    """
179

180
def draw_rects(list_of_rects, fill=(0, 0, 255, 50), stroke=(255, 0, 0, 200), 
181
               stroke_width=1):
182
    """Draw multiple rectangles."""
183
```
184

185
**Usage Examples:**
186

187
```python
188
with pdfplumber.open("document.pdf") as pdf:
189
    page = pdf.pages[0]
190
    im = page.to_image()
191
    
192
    # Highlight all rectangles
193
    im.draw_rects(page.rects)
194
    
195
    # Highlight character bounding boxes
196
    im.draw_rects(page.chars, fill=(255, 0, 0, 30), stroke=(255, 0, 0, 100))
197
    
198
    # Custom rectangle
199
    im.draw_rect((100, 100, 300, 200), fill=(0, 255, 0, 100))
200
    
201
    im.save("rects_debug.png")
202
```
203

204
### Drawing Circles
205

206
Draw circles and circular markers.
207

208
```python { .api }
209
def draw_circle(center_or_obj, radius=5, fill=(0, 0, 255, 50), 
210
                stroke=(255, 0, 0, 200)):
211
    """
212
    Draw circle.
213
    
214
    Parameters:
215
    - center_or_obj: Center point tuple or object with center coordinates
216
    - radius: int - Circle radius in pixels
217
    - fill: RGBA color tuple for circle fill
218
    - stroke: RGBA color tuple for circle outline
219
    
220
    Returns:
221
    PageImage: Self for method chaining
222
    """
223

224
def draw_circles(list_of_circles, radius=5, fill=(0, 0, 255, 50), 
225
                 stroke=(255, 0, 0, 200)):
226
    """Draw multiple circles."""
227
```
228

229
**Usage Examples:**
230

231
```python
232
with pdfplumber.open("document.pdf") as pdf:
233
    page = pdf.pages[0]
234
    im = page.to_image()
235
    
236
    # Mark character centers
237
    char_centers = [(c['x0'] + c['x1'])/2, (c['top'] + c['bottom'])/2) 
238
                    for c in page.chars]
239
    im.draw_circles(char_centers, radius=2, fill=(255, 0, 0, 100))
240
    
241
    # Mark specific points
242
    im.draw_circle((page.width/2, page.height/2), radius=10, 
243
                   fill=(0, 255, 0, 200))
244
    
245
    im.save("circles_debug.png")
246
```
247

248
### Text Visualization
249

250
Specialized methods for visualizing text elements and word boundaries.
251

252
```python { .api }
253
def outline_words(stroke=(255, 0, 0, 200), fill=(255, 0, 0, 50), 
254
                  stroke_width=1, x_tolerance=3, y_tolerance=3):
255
    """
256
    Outline detected words.
257
    
258
    Parameters:
259
    - stroke: RGBA color for word outlines
260
    - fill: RGBA color for word fill
261
    - stroke_width: int - Outline width
262
    - x_tolerance: float - Horizontal tolerance for word detection
263
    - y_tolerance: float - Vertical tolerance for word detection
264
    
265
    Returns:
266
    PageImage: Self for method chaining
267
    """
268

269
def outline_chars(stroke=(255, 0, 0, 255), fill=(255, 0, 0, 63), 
270
                  stroke_width=1):
271
    """
272
    Outline individual characters.
273
    
274
    Parameters:
275
    - stroke: RGBA color for character outlines
276
    - fill: RGBA color for character fill
277
    - stroke_width: int - Outline width
278
    
279
    Returns:
280
    PageImage: Self for method chaining
281
    """
282
```
283

284
**Usage Examples:**
285

286
```python
287
with pdfplumber.open("document.pdf") as pdf:
288
    page = pdf.pages[0]
289
    im = page.to_image()
290
    
291
    # Outline all words
292
    im.outline_words()
293
    
294
    # Outline characters with custom colors
295
    im.outline_chars(stroke=(0, 255, 0, 255), fill=(0, 255, 0, 30))
296
    
297
    # Fine-tuned word detection
298
    im.outline_words(x_tolerance=1, y_tolerance=1, 
299
                     stroke=(0, 0, 255, 200))
300
    
301
    im.save("text_debug.png")
302
```
303

304
### Table Debugging
305

306
Specialized visualization for table detection and structure analysis.
307

308
```python { .api }
309
def debug_table(table, fill=(0, 0, 255, 50), stroke=(255, 0, 0, 200), 
310
                stroke_width=1):
311
    """
312
    Visualize table structure.
313
    
314
    Parameters:
315
    - table: Table object to visualize
316
    - fill: RGBA color for cell fill
317
    - stroke: RGBA color for cell outlines
318
    - stroke_width: int - Outline width
319
    
320
    Returns:
321
    PageImage: Self for method chaining
322
    """
323

324
def debug_tablefinder(table_settings=None):
325
    """
326
    Visualize table detection process.
327
    
328
    Parameters:
329
    - table_settings: TableSettings or dict for detection configuration
330
    
331
    Returns:
332
    PageImage: Self for method chaining
333
    """
334
```
335

336
**Usage Examples:**
337

338
```python
339
with pdfplumber.open("document.pdf") as pdf:
340
    page = pdf.pages[0]
341
    im = page.to_image()
342
    
343
    # Debug all detected tables
344
    tables = page.find_tables()
345
    for i, table in enumerate(tables):
346
        color = [(255, 0, 0, 50), (0, 255, 0, 50), (0, 0, 255, 50)][i % 3]
347
        im.debug_table(table, fill=color)
348
    
349
    # Debug table detection algorithm
350
    im.debug_tablefinder()
351
    
352
    # Debug with custom settings
353
    custom_settings = {"vertical_strategy": "text", "horizontal_strategy": "lines"}
354
    im.debug_tablefinder(table_settings=custom_settings)
355
    
356
    im.save("table_debug.png")
357
```
358

359
### Drawing Constants
360

361
Default colors and styling options for drawing operations.
362

363
```python { .api }
364
# Default drawing constants
365
DEFAULT_RESOLUTION = 72
366
DEFAULT_FILL = (0, 0, 255, 50)     # Semi-transparent blue
367
DEFAULT_STROKE = (255, 0, 0, 200)  # Semi-transparent red
368
DEFAULT_STROKE_WIDTH = 1
369
```
370

371
### Advanced Visualization Workflows
372

373
**Multi-layer Debugging:**
374

375
```python
376
with pdfplumber.open("document.pdf") as pdf:
377
    page = pdf.pages[0]
378
    im = page.to_image(resolution=150)
379
    
380
    # Layer 1: Page structure
381
    im.draw_rects(page.rects, fill=(200, 200, 200, 30))
382
    im.draw_lines(page.lines, stroke=(100, 100, 100, 150))
383
    
384
    # Layer 2: Text elements
385
    im.outline_chars(stroke=(255, 0, 0, 100), fill=(255, 0, 0, 20))
386
    
387
    # Layer 3: Tables
388
    tables = page.find_tables()
389
    for table in tables:
390
        im.debug_table(table, fill=(0, 255, 0, 40), stroke=(0, 255, 0, 200))
391
    
392
    # Layer 4: Custom annotations
393
    # Highlight large text
394
    large_chars = [c for c in page.chars if c.get('size', 0) > 12]
395
    im.draw_rects(large_chars, fill=(255, 255, 0, 80))
396
    
397
    im.save("comprehensive_debug.png")
398
```
399

400
**Comparative Analysis:**
401

402
```python
403
with pdfplumber.open("document.pdf") as pdf:
404
    page = pdf.pages[0]
405
    
406
    # Compare different table detection strategies
407
    strategies = [
408
        {"vertical_strategy": "lines", "horizontal_strategy": "lines"},
409
        {"vertical_strategy": "text", "horizontal_strategy": "text"}
410
    ]
411
    
412
    for i, settings in enumerate(strategies):
413
        im = page.to_image()
414
        im.debug_tablefinder(table_settings=settings)
415
        im.save(f"table_strategy_{i+1}.png")
416
```
417

418
**Region-Specific Debugging:**
419

420
```python
421
with pdfplumber.open("document.pdf") as pdf:
422
    page = pdf.pages[0]
423
    
424
    # Debug specific page regions
425
    regions = [
426
        ("header", (0, 0, page.width, 100)),
427
        ("content", (0, 100, page.width, page.height-100)),
428
        ("footer", (0, page.height-50, page.width, page.height))
429
    ]
430
    
431
    for name, bbox in regions:
432
        cropped = page.crop(bbox)
433
        im = cropped.to_image()
434
        im.outline_words()
435
        im.save(f"{name}_debug.png")
436
```

Version

Tile

Files

visual-debugging.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

visual-debugging.mddocs/