or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

cli.mdindex.mdpage-manipulation.mdpdf-operations.mdtable-extraction.mdtext-extraction.mdutilities.mdvisual-debugging.md

visual-debugging.mddocs/

0

# Visual Debugging

1

2

Comprehensive visualization tools for overlaying debug information on PDF pages, including object highlighting, table structure visualization, custom drawing operations, and image export capabilities.

3

4

## Capabilities

5

6

### Page to Image Conversion

7

8

Convert PDF pages to images for visualization and debugging purposes.

9

10

```python { .api }

11

def to_image(resolution=None, width=None, height=None, antialias=False,

12

force_mediabox=False):

13

"""

14

Convert page to image for debugging.

15

16

Parameters:

17

- resolution: int or float, optional - Image resolution in DPI (default: 72)

18

- width: int, optional - Target image width in pixels

19

- height: int, optional - Target image height in pixels

20

- antialias: bool - Enable antialiasing for smoother rendering

21

- force_mediabox: bool - Use MediaBox instead of CropBox for dimensions

22

23

Returns:

24

PageImage: Image object with drawing capabilities

25

"""

26

```

27

28

**Usage Examples:**

29

30

```python

31

with pdfplumber.open("document.pdf") as pdf:

32

page = pdf.pages[0]

33

34

# Basic image conversion

35

im = page.to_image()

36

im.save("page.png")

37

38

# High resolution image

39

hires = page.to_image(resolution=300)

40

hires.save("page_hires.png")

41

42

# Specific dimensions

43

thumb = page.to_image(width=400, height=600)

44

thumb.save("thumbnail.png")

45

46

# Antialiased rendering

47

smooth = page.to_image(antialias=True)

48

smooth.save("smooth.png")

49

```

50

51

### PageImage Class

52

53

Image representation with comprehensive drawing and debugging capabilities.

54

55

```python { .api }

56

class PageImage:

57

"""Image representation with drawing capabilities."""

58

59

def __init__(self, page, original=None, resolution=72, antialias=False,

60

force_mediabox=False):

61

"""Initialize PageImage from page."""

62

63

@property

64

def page(self) -> Page:

65

"""Source page object."""

66

67

@property

68

def original(self) -> PIL.Image.Image:

69

"""Original page image without annotations."""

70

71

@property

72

def annotated(self) -> PIL.Image.Image:

73

"""Current image with annotations."""

74

75

@property

76

def resolution(self) -> Union[int, float]:

77

"""Image resolution in DPI."""

78

79

@property

80

def scale(self) -> float:

81

"""Scale factor from PDF coordinates to image pixels."""

82

83

def reset(self):

84

"""Reset annotations to original image."""

85

86

def copy(self):

87

"""Create copy of PageImage."""

88

89

def save(self, dest, format="PNG", quantize=True, colors=256, bits=8, **kwargs):

90

"""Save image to file."""

91

92

def show(self):

93

"""Display image (in interactive environments)."""

94

```

95

96

### Drawing Lines

97

98

Draw lines and line collections on the image.

99

100

```python { .api }

101

def draw_line(points_or_obj, stroke=(255, 0, 0, 200), stroke_width=1):

102

"""

103

Draw single line.

104

105

Parameters:

106

- points_or_obj: List of points or line object with coordinates

107

- stroke: Tuple[int, int, int, int] - RGBA color for line

108

- stroke_width: int - Line width in pixels

109

110

Returns:

111

PageImage: Self for method chaining

112

"""

113

114

def draw_lines(list_of_lines, stroke=(255, 0, 0, 200), stroke_width=1):

115

"""

116

Draw multiple lines.

117

118

Parameters:

119

- list_of_lines: List of line objects or point lists

120

- stroke: RGBA color tuple

121

- stroke_width: int - Line width

122

123

Returns:

124

PageImage: Self for method chaining

125

"""

126

127

def draw_vline(location, stroke=(255, 0, 0, 200), stroke_width=1):

128

"""Draw vertical line at X coordinate."""

129

130

def draw_vlines(locations, stroke=(255, 0, 0, 200), stroke_width=1):

131

"""Draw multiple vertical lines."""

132

133

def draw_hline(location, stroke=(255, 0, 0, 200), stroke_width=1):

134

"""Draw horizontal line at Y coordinate."""

135

136

def draw_hlines(locations, stroke=(255, 0, 0, 200), stroke_width=1):

137

"""Draw multiple horizontal lines."""

138

```

139

140

**Usage Examples:**

141

142

```python

143

with pdfplumber.open("document.pdf") as pdf:

144

page = pdf.pages[0]

145

im = page.to_image()

146

147

# Draw all lines on page

148

im.draw_lines(page.lines)

149

150

# Draw custom line

151

im.draw_line([(100, 100), (200, 200)], stroke=(0, 255, 0, 255), stroke_width=3)

152

153

# Draw grid lines

154

im.draw_vlines([100, 200, 300, 400], stroke=(0, 0, 255, 100))

155

im.draw_hlines([100, 200, 300], stroke=(0, 0, 255, 100))

156

157

im.save("lines_debug.png")

158

```

159

160

### Drawing Rectangles

161

162

Draw rectangles and rectangle collections with fill and stroke options.

163

164

```python { .api }

165

def draw_rect(bbox_or_obj, fill=(0, 0, 255, 50), stroke=(255, 0, 0, 200),

166

stroke_width=1):

167

"""

168

Draw rectangle.

169

170

Parameters:

171

- bbox_or_obj: Bounding box tuple or object with bbox coordinates

172

- fill: RGBA color tuple for rectangle fill

173

- stroke: RGBA color tuple for rectangle outline

174

- stroke_width: int - Outline width

175

176

Returns:

177

PageImage: Self for method chaining

178

"""

179

180

def draw_rects(list_of_rects, fill=(0, 0, 255, 50), stroke=(255, 0, 0, 200),

181

stroke_width=1):

182

"""Draw multiple rectangles."""

183

```

184

185

**Usage Examples:**

186

187

```python

188

with pdfplumber.open("document.pdf") as pdf:

189

page = pdf.pages[0]

190

im = page.to_image()

191

192

# Highlight all rectangles

193

im.draw_rects(page.rects)

194

195

# Highlight character bounding boxes

196

im.draw_rects(page.chars, fill=(255, 0, 0, 30), stroke=(255, 0, 0, 100))

197

198

# Custom rectangle

199

im.draw_rect((100, 100, 300, 200), fill=(0, 255, 0, 100))

200

201

im.save("rects_debug.png")

202

```

203

204

### Drawing Circles

205

206

Draw circles and circular markers.

207

208

```python { .api }

209

def draw_circle(center_or_obj, radius=5, fill=(0, 0, 255, 50),

210

stroke=(255, 0, 0, 200)):

211

"""

212

Draw circle.

213

214

Parameters:

215

- center_or_obj: Center point tuple or object with center coordinates

216

- radius: int - Circle radius in pixels

217

- fill: RGBA color tuple for circle fill

218

- stroke: RGBA color tuple for circle outline

219

220

Returns:

221

PageImage: Self for method chaining

222

"""

223

224

def draw_circles(list_of_circles, radius=5, fill=(0, 0, 255, 50),

225

stroke=(255, 0, 0, 200)):

226

"""Draw multiple circles."""

227

```

228

229

**Usage Examples:**

230

231

```python

232

with pdfplumber.open("document.pdf") as pdf:

233

page = pdf.pages[0]

234

im = page.to_image()

235

236

# Mark character centers

237

char_centers = [(c['x0'] + c['x1'])/2, (c['top'] + c['bottom'])/2)

238

for c in page.chars]

239

im.draw_circles(char_centers, radius=2, fill=(255, 0, 0, 100))

240

241

# Mark specific points

242

im.draw_circle((page.width/2, page.height/2), radius=10,

243

fill=(0, 255, 0, 200))

244

245

im.save("circles_debug.png")

246

```

247

248

### Text Visualization

249

250

Specialized methods for visualizing text elements and word boundaries.

251

252

```python { .api }

253

def outline_words(stroke=(255, 0, 0, 200), fill=(255, 0, 0, 50),

254

stroke_width=1, x_tolerance=3, y_tolerance=3):

255

"""

256

Outline detected words.

257

258

Parameters:

259

- stroke: RGBA color for word outlines

260

- fill: RGBA color for word fill

261

- stroke_width: int - Outline width

262

- x_tolerance: float - Horizontal tolerance for word detection

263

- y_tolerance: float - Vertical tolerance for word detection

264

265

Returns:

266

PageImage: Self for method chaining

267

"""

268

269

def outline_chars(stroke=(255, 0, 0, 255), fill=(255, 0, 0, 63),

270

stroke_width=1):

271

"""

272

Outline individual characters.

273

274

Parameters:

275

- stroke: RGBA color for character outlines

276

- fill: RGBA color for character fill

277

- stroke_width: int - Outline width

278

279

Returns:

280

PageImage: Self for method chaining

281

"""

282

```

283

284

**Usage Examples:**

285

286

```python

287

with pdfplumber.open("document.pdf") as pdf:

288

page = pdf.pages[0]

289

im = page.to_image()

290

291

# Outline all words

292

im.outline_words()

293

294

# Outline characters with custom colors

295

im.outline_chars(stroke=(0, 255, 0, 255), fill=(0, 255, 0, 30))

296

297

# Fine-tuned word detection

298

im.outline_words(x_tolerance=1, y_tolerance=1,

299

stroke=(0, 0, 255, 200))

300

301

im.save("text_debug.png")

302

```

303

304

### Table Debugging

305

306

Specialized visualization for table detection and structure analysis.

307

308

```python { .api }

309

def debug_table(table, fill=(0, 0, 255, 50), stroke=(255, 0, 0, 200),

310

stroke_width=1):

311

"""

312

Visualize table structure.

313

314

Parameters:

315

- table: Table object to visualize

316

- fill: RGBA color for cell fill

317

- stroke: RGBA color for cell outlines

318

- stroke_width: int - Outline width

319

320

Returns:

321

PageImage: Self for method chaining

322

"""

323

324

def debug_tablefinder(table_settings=None):

325

"""

326

Visualize table detection process.

327

328

Parameters:

329

- table_settings: TableSettings or dict for detection configuration

330

331

Returns:

332

PageImage: Self for method chaining

333

"""

334

```

335

336

**Usage Examples:**

337

338

```python

339

with pdfplumber.open("document.pdf") as pdf:

340

page = pdf.pages[0]

341

im = page.to_image()

342

343

# Debug all detected tables

344

tables = page.find_tables()

345

for i, table in enumerate(tables):

346

color = [(255, 0, 0, 50), (0, 255, 0, 50), (0, 0, 255, 50)][i % 3]

347

im.debug_table(table, fill=color)

348

349

# Debug table detection algorithm

350

im.debug_tablefinder()

351

352

# Debug with custom settings

353

custom_settings = {"vertical_strategy": "text", "horizontal_strategy": "lines"}

354

im.debug_tablefinder(table_settings=custom_settings)

355

356

im.save("table_debug.png")

357

```

358

359

### Drawing Constants

360

361

Default colors and styling options for drawing operations.

362

363

```python { .api }

364

# Default drawing constants

365

DEFAULT_RESOLUTION = 72

366

DEFAULT_FILL = (0, 0, 255, 50) # Semi-transparent blue

367

DEFAULT_STROKE = (255, 0, 0, 200) # Semi-transparent red

368

DEFAULT_STROKE_WIDTH = 1

369

```

370

371

### Advanced Visualization Workflows

372

373

**Multi-layer Debugging:**

374

375

```python

376

with pdfplumber.open("document.pdf") as pdf:

377

page = pdf.pages[0]

378

im = page.to_image(resolution=150)

379

380

# Layer 1: Page structure

381

im.draw_rects(page.rects, fill=(200, 200, 200, 30))

382

im.draw_lines(page.lines, stroke=(100, 100, 100, 150))

383

384

# Layer 2: Text elements

385

im.outline_chars(stroke=(255, 0, 0, 100), fill=(255, 0, 0, 20))

386

387

# Layer 3: Tables

388

tables = page.find_tables()

389

for table in tables:

390

im.debug_table(table, fill=(0, 255, 0, 40), stroke=(0, 255, 0, 200))

391

392

# Layer 4: Custom annotations

393

# Highlight large text

394

large_chars = [c for c in page.chars if c.get('size', 0) > 12]

395

im.draw_rects(large_chars, fill=(255, 255, 0, 80))

396

397

im.save("comprehensive_debug.png")

398

```

399

400

**Comparative Analysis:**

401

402

```python

403

with pdfplumber.open("document.pdf") as pdf:

404

page = pdf.pages[0]

405

406

# Compare different table detection strategies

407

strategies = [

408

{"vertical_strategy": "lines", "horizontal_strategy": "lines"},

409

{"vertical_strategy": "text", "horizontal_strategy": "text"}

410

]

411

412

for i, settings in enumerate(strategies):

413

im = page.to_image()

414

im.debug_tablefinder(table_settings=settings)

415

im.save(f"table_strategy_{i+1}.png")

416

```

417

418

**Region-Specific Debugging:**

419

420

```python

421

with pdfplumber.open("document.pdf") as pdf:

422

page = pdf.pages[0]

423

424

# Debug specific page regions

425

regions = [

426

("header", (0, 0, page.width, 100)),

427

("content", (0, 100, page.width, page.height-100)),

428

("footer", (0, page.height-50, page.width, page.height))

429

]

430

431

for name, bbox in regions:

432

cropped = page.crop(bbox)

433

im = cropped.to_image()

434

im.outline_words()

435

im.save(f"{name}_debug.png")

436

```