or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

advanced.mdattachments.mdcontent-streams.mdcore-operations.mdencryption.mdforms.mdimages.mdindex.mdmetadata.mdobjects.mdoutlines.mdpages.md

pages.mddocs/

0

# Page Operations

1

2

Page-level operations including manipulation, rotation, content parsing, overlays, and coordinate transformations. These capabilities enable comprehensive page handling for PDF documents.

3

4

## Capabilities

5

6

### Page Class

7

8

The Page class provides comprehensive page-level operations including content manipulation, geometric transformations, and overlay functionality.

9

10

```python { .api }

11

class Page(Object):

12

"""

13

PDF page object with content and properties.

14

15

Represents a single page in a PDF document with all its content,

16

resources, and geometric properties.

17

"""

18

19

def rotate(self, angle: int, *, relative: bool = True) -> None:

20

"""

21

Rotate the page by the specified angle.

22

23

Parameters:

24

- angle (int): Rotation angle in degrees (must be multiple of 90)

25

- relative (bool): If True, rotate relative to current rotation;

26

if False, set absolute rotation

27

28

Raises:

29

ValueError: If angle is not a multiple of 90 degrees

30

"""

31

32

def add_overlay(self, other: Page) -> None:

33

"""

34

Add another page as an overlay on top of this page.

35

36

The overlay page content is drawn on top of this page's content.

37

Both pages must be from the same PDF or compatible PDFs.

38

39

Parameters:

40

- other (Page): Page to use as overlay

41

42

Raises:

43

ForeignObjectError: If pages are from incompatible PDFs

44

"""

45

46

def add_underlay(self, other: Page) -> None:

47

"""

48

Add another page as an underlay beneath this page.

49

50

The underlay page content is drawn beneath this page's content.

51

Both pages must be from the same PDF or compatible PDFs.

52

53

Parameters:

54

- other (Page): Page to use as underlay

55

56

Raises:

57

ForeignObjectError: If pages are from incompatible PDFs

58

"""

59

60

def parse_contents(self) -> list[ContentStreamInstruction]:

61

"""

62

Parse the page's content stream into individual instructions.

63

64

Returns:

65

list[ContentStreamInstruction]: List of content stream instructions

66

that make up the page content

67

68

Raises:

69

PdfParsingError: If content stream cannot be parsed

70

"""

71

72

@property

73

def mediabox(self) -> Rectangle:

74

"""

75

The page's media box defining the physical page boundaries.

76

77

The media box defines the boundaries of the physical medium

78

on which the page is intended to be displayed or printed.

79

80

Returns:

81

Rectangle: Media box coordinates (llx, lly, urx, ury)

82

"""

83

84

@property

85

def cropbox(self) -> Rectangle:

86

"""

87

The page's crop box defining the visible page region.

88

89

The crop box defines the region to which the contents of the page

90

should be clipped when displayed or printed.

91

92

Returns:

93

Rectangle: Crop box coordinates (llx, lly, urx, ury)

94

"""

95

96

@property

97

def trimbox(self) -> Rectangle:

98

"""

99

The page's trim box defining the intended finished page size.

100

101

Returns:

102

Rectangle: Trim box coordinates (llx, lly, urx, ury)

103

"""

104

105

@property

106

def artbox(self) -> Rectangle:

107

"""

108

The page's art box defining the meaningful content area.

109

110

Returns:

111

Rectangle: Art box coordinates (llx, lly, urx, ury)

112

"""

113

114

@property

115

def bleedbox(self) -> Rectangle:

116

"""

117

The page's bleed box defining the clipping path for production.

118

119

Returns:

120

Rectangle: Bleed box coordinates (llx, lly, urx, ury)

121

"""

122

123

@property

124

def resources(self) -> Dictionary:

125

"""

126

The page's resource dictionary containing fonts, images, etc.

127

128

Returns:

129

Dictionary: Resource dictionary with fonts, XObjects, patterns, etc.

130

"""

131

132

@property

133

def images(self) -> dict[Name, PdfImage]:

134

"""

135

Dictionary of images referenced by this page.

136

137

Returns:

138

dict[Name, PdfImage]: Mapping of image names to PdfImage objects

139

"""

140

141

@property

142

def rotation(self) -> int:

143

"""

144

Current rotation of the page in degrees.

145

146

Returns:

147

int: Rotation angle (0, 90, 180, or 270 degrees)

148

"""

149

150

@property

151

def contents(self) -> Object:

152

"""

153

The page's content stream(s).

154

155

May be a single Stream object or Array of Stream objects.

156

157

Returns:

158

Object: Content stream or array of content streams

159

"""

160

```

161

162

### Rectangle Class

163

164

Geometric rectangle representation for page boundaries and coordinate operations.

165

166

```python { .api }

167

class Rectangle:

168

"""

169

PDF rectangle representing a bounding box with four coordinates.

170

171

Coordinates are specified as (llx, lly, urx, ury) where:

172

- llx, lly: lower-left corner coordinates

173

- urx, ury: upper-right corner coordinates

174

"""

175

176

def __init__(self, llx: float, lly: float, urx: float, ury: float) -> None:

177

"""

178

Create a rectangle with the specified coordinates.

179

180

Parameters:

181

- llx (float): Lower-left X coordinate

182

- lly (float): Lower-left Y coordinate

183

- urx (float): Upper-right X coordinate

184

- ury (float): Upper-right Y coordinate

185

"""

186

187

@property

188

def width(self) -> float:

189

"""

190

Rectangle width (urx - llx).

191

192

Returns:

193

float: Width of the rectangle

194

"""

195

196

@property

197

def height(self) -> float:

198

"""

199

Rectangle height (ury - lly).

200

201

Returns:

202

float: Height of the rectangle

203

"""

204

205

@property

206

def lower_left(self) -> tuple[float, float]:

207

"""

208

Lower-left corner coordinates.

209

210

Returns:

211

tuple[float, float]: (llx, lly) coordinates

212

"""

213

214

@property

215

def upper_right(self) -> tuple[float, float]:

216

"""

217

Upper-right corner coordinates.

218

219

Returns:

220

tuple[float, float]: (urx, ury) coordinates

221

"""

222

223

@property

224

def lower_right(self) -> tuple[float, float]:

225

"""

226

Lower-right corner coordinates.

227

228

Returns:

229

tuple[float, float]: (urx, lly) coordinates

230

"""

231

232

@property

233

def upper_left(self) -> tuple[float, float]:

234

"""

235

Upper-left corner coordinates.

236

237

Returns:

238

tuple[float, float]: (llx, ury) coordinates

239

"""

240

241

def __and__(self, other: Rectangle) -> Rectangle:

242

"""

243

Rectangle intersection (& operator).

244

245

Parameters:

246

- other (Rectangle): Rectangle to intersect with

247

248

Returns:

249

Rectangle: Intersection of the two rectangles

250

"""

251

252

def __le__(self, other: Rectangle) -> bool:

253

"""

254

Test if this rectangle is contained within another (<= operator).

255

256

Parameters:

257

- other (Rectangle): Rectangle to test containment against

258

259

Returns:

260

bool: True if this rectangle is fully contained in other

261

"""

262

263

def __eq__(self, other: Rectangle) -> bool:

264

"""

265

Test rectangle equality.

266

267

Parameters:

268

- other (Rectangle): Rectangle to compare with

269

270

Returns:

271

bool: True if rectangles have same coordinates

272

"""

273

```

274

275

### Content Stream Instructions

276

277

Objects representing parsed content stream instructions for low-level content manipulation.

278

279

```python { .api }

280

class ContentStreamInstruction:

281

"""

282

Parsed content stream instruction representing an operator and its operands.

283

284

Content streams contain sequences of instructions that define what

285

appears on a page (text, graphics, images, etc.).

286

"""

287

288

@property

289

def operands(self) -> list[Object]:

290

"""

291

List of operand objects for this instruction.

292

293

Returns:

294

list[Object]: PDF objects that serve as operands to the operator

295

"""

296

297

@property

298

def operator(self) -> Operator:

299

"""

300

The PDF operator for this instruction.

301

302

Returns:

303

Operator: PDF operator object (e.g., 'Tj' for show text)

304

"""

305

306

class ContentStreamInlineImage:

307

"""

308

Inline image found within a content stream.

309

310

Represents images embedded directly in the content stream

311

rather than referenced as external objects.

312

"""

313

314

@property

315

def operands(self) -> list[Object]:

316

"""

317

Operands associated with the inline image.

318

319

Returns:

320

list[Object]: Image operands

321

"""

322

323

@property

324

def operator(self) -> Operator:

325

"""

326

The operator associated with this inline image.

327

328

Returns:

329

Operator: Usually the 'EI' (end inline image) operator

330

"""

331

332

@property

333

def iimage(self) -> PdfInlineImage:

334

"""

335

The inline image object.

336

337

Returns:

338

PdfInlineImage: Inline image that can be processed or extracted

339

"""

340

```

341

342

## Usage Examples

343

344

### Basic Page Operations

345

346

```python

347

import pikepdf

348

349

# Open a PDF

350

pdf = pikepdf.open('document.pdf')

351

352

# Get the first page

353

page = pdf.pages[0]

354

355

# Rotate page 90 degrees clockwise

356

page.rotate(90, relative=True)

357

358

# Get page dimensions

359

media_box = page.mediabox

360

print(f"Page size: {media_box.width} x {media_box.height} points")

361

362

# Access page rotation

363

current_rotation = page.rotation

364

print(f"Current rotation: {current_rotation} degrees")

365

366

pdf.save('rotated_document.pdf')

367

pdf.close()

368

```

369

370

### Page Overlays and Underlays

371

372

```python

373

import pikepdf

374

375

# Open PDFs

376

main_pdf = pikepdf.open('main_document.pdf')

377

overlay_pdf = pikepdf.open('overlay_content.pdf')

378

379

# Get pages

380

main_page = main_pdf.pages[0]

381

overlay_page = overlay_pdf.pages[0]

382

383

# Copy overlay page to main PDF

384

copied_overlay = main_pdf.copy_foreign(overlay_page)

385

386

# Add as overlay (on top of existing content)

387

main_page.add_overlay(copied_overlay)

388

389

# Or add as underlay (beneath existing content)

390

# main_page.add_underlay(copied_overlay)

391

392

main_pdf.save('document_with_overlay.pdf')

393

main_pdf.close()

394

overlay_pdf.close()

395

```

396

397

### Working with Page Boxes

398

399

```python

400

import pikepdf

401

402

pdf = pikepdf.open('document.pdf')

403

page = pdf.pages[0]

404

405

# Access different page boxes

406

media_box = page.mediabox

407

crop_box = page.cropbox

408

trim_box = page.trimbox

409

art_box = page.artbox

410

bleed_box = page.bleedbox

411

412

print(f"Media box: {media_box.width} x {media_box.height}")

413

print(f"Crop box: {crop_box.width} x {crop_box.height}")

414

415

# Modify crop box to create margins

416

new_crop = pikepdf.Rectangle(

417

media_box.lower_left[0] + 36, # 0.5 inch margin

418

media_box.lower_left[1] + 36,

419

media_box.upper_right[0] - 36,

420

media_box.upper_right[1] - 36

421

)

422

page.cropbox = new_crop

423

424

pdf.save('cropped_document.pdf')

425

pdf.close()

426

```

427

428

### Content Stream Parsing

429

430

```python

431

import pikepdf

432

433

pdf = pikepdf.open('document.pdf')

434

page = pdf.pages[0]

435

436

# Parse page content into instructions

437

instructions = page.parse_contents()

438

439

# Iterate through content stream instructions

440

for instruction in instructions:

441

operator = instruction.operator

442

operands = instruction.operands

443

444

# Look for text showing operations

445

if str(operator) == 'Tj': # Show text

446

text_string = operands[0]

447

print(f"Found text: {text_string}")

448

449

# Look for image placement operations

450

elif str(operator) == 'Do': # Invoke XObject

451

xobject_name = operands[0]

452

print(f"Found XObject reference: {xobject_name}")

453

454

pdf.close()

455

```

456

457

### Page Resource Management

458

459

```python

460

import pikepdf

461

462

pdf = pikepdf.open('document.pdf')

463

page = pdf.pages[0]

464

465

# Access page resources

466

resources = page.resources

467

468

# Check for fonts

469

if '/Font' in resources:

470

fonts = resources['/Font']

471

print(f"Page uses {len(fonts)} fonts:")

472

for font_name, font_obj in fonts.items():

473

print(f" {font_name}: {font_obj.get('/BaseFont', 'Unknown')}")

474

475

# Check for images

476

if '/XObject' in resources:

477

xobjects = resources['/XObject']

478

for name, obj in xobjects.items():

479

if obj.get('/Subtype') == pikepdf.Name.Image:

480

print(f"Found image: {name}")

481

482

# Access images through convenience property

483

page_images = page.images

484

for name, image in page_images.items():

485

print(f"Image {name}: {image.width}x{image.height}, {image.bpc} bpc")

486

487

pdf.close()

488

```

489

490

### Rectangle Operations

491

492

```python

493

import pikepdf

494

495

# Create rectangles

496

page_rect = pikepdf.Rectangle(0, 0, 612, 792) # US Letter

497

margin_rect = pikepdf.Rectangle(36, 36, 576, 756) # 0.5" margins

498

499

# Calculate dimensions

500

print(f"Page dimensions: {page_rect.width} x {page_rect.height}")

501

print(f"Margin area: {margin_rect.width} x {margin_rect.height}")

502

503

# Test containment

504

is_contained = margin_rect <= page_rect

505

print(f"Margin rect fits in page: {is_contained}")

506

507

# Calculate intersection

508

if margin_rect <= page_rect:

509

intersection = page_rect & margin_rect

510

print(f"Intersection: {intersection.width} x {intersection.height}")

511

512

# Access corner coordinates

513

ll = page_rect.lower_left

514

ur = page_rect.upper_right

515

print(f"Lower-left: {ll}, Upper-right: {ur}")

516

```

517

518

### Multiple Page Operations

519

520

```python

521

import pikepdf

522

523

pdf = pikepdf.open('multi_page_document.pdf')

524

525

# Rotate all pages

526

for i, page in enumerate(pdf.pages):

527

if i % 2 == 0: # Even pages (0, 2, 4...)

528

page.rotate(0) # Portrait

529

else: # Odd pages (1, 3, 5...)

530

page.rotate(90) # Landscape

531

532

print(f"Page {i+1}: {page.mediabox.width} x {page.mediabox.height}")

533

534

# Extract pages into separate PDFs

535

for i, page in enumerate(pdf.pages):

536

single_page_pdf = pikepdf.new()

537

single_page_pdf.pages.append(page)

538

single_page_pdf.save(f'page_{i+1}.pdf')

539

single_page_pdf.close()

540

541

pdf.close()

542

```