or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

annotations.mdform-fields.mdindex.mdmetadata.mdpage-operations.mdreading-writing.mdtext-extraction.mdutilities.md

page-operations.mddocs/

0

# Page Operations

1

2

Comprehensive page manipulation capabilities including transformations, merging, cropping, and geometric operations. The PageObject class provides the foundation for all page-level operations in pypdf.

3

4

## Capabilities

5

6

### Page Objects

7

8

PageObject represents individual PDF pages with complete access to page content, properties, and transformation capabilities.

9

10

```python { .api }

11

class PageObject:

12

@staticmethod

13

def create_blank_page(width: float, height: float) -> PageObject:

14

"""

15

Create a blank page with specified dimensions.

16

17

Args:

18

width: Page width in points

19

height: Page height in points

20

21

Returns:

22

New blank PageObject

23

"""

24

25

def extract_text(

26

self,

27

extraction_mode: str = "plain",

28

layout_mode_space_vertically: bool = True,

29

layout_mode_scale_weight: float = 1.25,

30

layout_mode_strip_rotated: bool = True,

31

orientations: tuple | int = (0, 90, 180, 270),

32

space_width: float = 200.0,

33

visitor_text=None

34

) -> str:

35

"""

36

Extract text from the page.

37

38

Args:

39

extraction_mode: Text extraction mode ("plain" or "layout", default: "plain")

40

layout_mode_space_vertically: Insert spaces for vertical gaps

41

layout_mode_scale_weight: Weight for layout scaling

42

layout_mode_strip_rotated: Strip rotated text

43

orientations: Text orientations to consider

44

space_width: Width threshold for spaces

45

visitor_text: Custom text visitor function

46

47

Returns:

48

Extracted text as string

49

"""

50

51

def scale(self, sx: float, sy: float) -> PageObject:

52

"""

53

Scale the page by given factors.

54

55

Args:

56

sx: Horizontal scaling factor

57

sy: Vertical scaling factor

58

59

Returns:

60

Self for method chaining

61

"""

62

63

def scale_by(self, factor: float) -> PageObject:

64

"""

65

Scale the page uniformly by a factor.

66

67

Args:

68

factor: Scaling factor

69

70

Returns:

71

Self for method chaining

72

"""

73

74

def scale_to(self, width: float, height: float) -> PageObject:

75

"""

76

Scale the page to specific dimensions.

77

78

Args:

79

width: Target width in points

80

height: Target height in points

81

82

Returns:

83

Self for method chaining

84

"""

85

86

def rotate(self, angle: int) -> PageObject:

87

"""

88

Rotate the page by the given angle.

89

90

Args:

91

angle: Rotation angle in degrees (90, 180, 270, etc.)

92

93

Returns:

94

Self for method chaining

95

"""

96

97

def rotate_clockwise(self, angle: int) -> PageObject:

98

"""

99

Rotate the page clockwise.

100

101

Args:

102

angle: Rotation angle in degrees

103

104

Returns:

105

Self for method chaining

106

"""

107

108

def rotate_counter_clockwise(self, angle: int) -> PageObject:

109

"""

110

Rotate the page counter-clockwise.

111

112

Args:

113

angle: Rotation angle in degrees

114

115

Returns:

116

Self for method chaining

117

"""

118

119

def transfer_rotation_to_content(self) -> PageObject:

120

"""

121

Apply the page's rotation to its content and reset rotation to 0.

122

123

Returns:

124

Self for method chaining

125

"""

126

127

def merge_page(self, page2: PageObject) -> None:

128

"""

129

Merge another page's content onto this page.

130

131

Args:

132

page2: PageObject to merge onto this page

133

"""

134

135

def merge_translated_page(self, page2: PageObject, tx: float, ty: float) -> None:

136

"""

137

Merge another page with translation offset.

138

139

Args:

140

page2: PageObject to merge

141

tx: Translation offset in x direction

142

ty: Translation offset in y direction

143

"""

144

145

def merge_rotated_page(self, page2: PageObject, rotation: float) -> None:

146

"""

147

Merge another page with rotation.

148

149

Args:

150

page2: PageObject to merge

151

rotation: Rotation angle in degrees

152

"""

153

154

def merge_scaled_page(self, page2: PageObject, scale: float, expand: bool = False) -> None:

155

"""

156

Merge another page with scaling.

157

158

Args:

159

page2: PageObject to merge

160

scale: Scaling factor

161

expand: Whether to expand the page to fit scaled content

162

"""

163

164

def merge_rotated_scaled_page(

165

self,

166

page2: PageObject,

167

rotation: float,

168

scale: float,

169

expand: bool = False

170

) -> None:

171

"""

172

Merge another page with rotation and scaling.

173

174

Args:

175

page2: PageObject to merge

176

rotation: Rotation angle in degrees

177

scale: Scaling factor

178

expand: Whether to expand the page to fit transformed content

179

"""

180

181

def merge_transformed_page(

182

self,

183

page2: PageObject,

184

ctm,

185

expand: bool = False

186

) -> None:

187

"""

188

Merge another page with custom transformation matrix.

189

190

Args:

191

page2: PageObject to merge

192

ctm: Transformation matrix

193

expand: Whether to expand the page to fit transformed content

194

"""

195

196

def add_transformation(self, ctm) -> None:

197

"""

198

Apply a transformation matrix to the page.

199

200

Args:

201

ctm: Transformation matrix

202

"""

203

204

### Page Box Properties

205

206

Access and modify PDF page boundaries and dimensions through five different box types, each serving specific purposes in the PDF specification.

207

208

```python { .api }

209

# Box Properties (all return RectangleObject)

210

@property

211

def mediabox(self) -> RectangleObject:

212

"""

213

The boundaries of the physical medium on which the page is intended

214

to be displayed or printed. This is the largest box and defines the

215

overall page size.

216

"""

217

218

@property

219

def cropbox(self) -> RectangleObject:

220

"""

221

The visible region of default user space. When displayed or printed,

222

contents outside this box are clipped. Falls back to mediabox if not set.

223

"""

224

225

@property

226

def bleedbox(self) -> RectangleObject:

227

"""

228

The region to which contents should be clipped when output in a

229

production environment. Used for printing with bleed margins.

230

Falls back to cropbox, then mediabox if not set.

231

"""

232

233

@property

234

def trimbox(self) -> RectangleObject:

235

"""

236

The intended dimensions of the finished page after trimming.

237

Falls back to cropbox, then mediabox if not set.

238

"""

239

240

@property

241

def artbox(self) -> RectangleObject:

242

"""

243

The extent of the page's meaningful content as intended by the

244

page's creator. Falls back to cropbox, then mediabox if not set.

245

"""

246

247

# RectangleObject Properties and Methods

248

class RectangleObject:

249

# Individual coordinates (read/write)

250

@property

251

def left(self) -> FloatObject: ...

252

253

@property

254

def bottom(self) -> FloatObject: ...

255

256

@property

257

def right(self) -> FloatObject: ...

258

259

@property

260

def top(self) -> FloatObject: ...

261

262

# Corner positions (read/write)

263

@property

264

def lower_left(self) -> tuple[float, float]: ...

265

266

@property

267

def lower_right(self) -> tuple[float, float]: ...

268

269

@property

270

def upper_left(self) -> tuple[float, float]: ...

271

272

@property

273

def upper_right(self) -> tuple[float, float]: ...

274

275

# Dimensions (read-only)

276

@property

277

def width(self) -> float: ...

278

279

@property

280

def height(self) -> float: ...

281

282

def scale(self, sx: float, sy: float) -> RectangleObject:

283

"""

284

Create a new scaled rectangle.

285

286

Args:

287

sx: Horizontal scale factor

288

sy: Vertical scale factor

289

290

Returns:

291

New scaled RectangleObject

292

"""

293

```

294

295

@property

296

def rotation(self) -> int:

297

"""Get the page rotation angle in degrees."""

298

299

@property

300

def user_unit(self) -> float:

301

"""Get the user unit scale factor."""

302

303

@property

304

def images(self):

305

"""Get images on the page."""

306

307

@property

308

def page_number(self) -> int | None:

309

"""Get the page number in the document."""

310

311

@property

312

def annotations(self):

313

"""Get page annotations."""

314

315

@property

316

def mediabox(self):

317

"""Get the media box (page boundaries)."""

318

319

@property

320

def cropbox(self):

321

"""Get the crop box (visible page area)."""

322

323

@property

324

def bleedbox(self):

325

"""Get the bleed box (printable area with bleed)."""

326

327

@property

328

def trimbox(self):

329

"""Get the trim box (final trimmed page size)."""

330

331

@property

332

def artbox(self):

333

"""Get the art box (meaningful content area)."""

334

```

335

336

### Transformation Matrix

337

338

The Transformation class provides a convenient interface for creating and combining geometric transformations.

339

340

```python { .api }

341

class Transformation:

342

def __init__(self, ctm=(1, 0, 0, 1, 0, 0)):

343

"""

344

Initialize a transformation matrix.

345

346

Args:

347

ctm: 6-element transformation matrix tuple (a, b, c, d, e, f)

348

"""

349

350

def translate(self, tx: float = 0, ty: float = 0) -> Transformation:

351

"""

352

Add translation to the transformation.

353

354

Args:

355

tx: Translation in x direction

356

ty: Translation in y direction

357

358

Returns:

359

Self for method chaining

360

"""

361

362

def scale(self, sx: float = 1, sy: float | None = None) -> Transformation:

363

"""

364

Add scaling to the transformation.

365

366

Args:

367

sx: Horizontal scaling factor

368

sy: Vertical scaling factor (defaults to sx)

369

370

Returns:

371

Self for method chaining

372

"""

373

374

def rotate(self, rotation: float) -> Transformation:

375

"""

376

Add rotation to the transformation.

377

378

Args:

379

rotation: Rotation angle in degrees

380

381

Returns:

382

Self for method chaining

383

"""

384

385

def transform(self, m) -> Transformation:

386

"""

387

Apply another transformation matrix.

388

389

Args:

390

m: Transformation matrix to apply

391

392

Returns:

393

Self for method chaining

394

"""

395

396

def apply_on(self, pt, as_object: bool = False):

397

"""

398

Apply the transformation to a point.

399

400

Args:

401

pt: Point coordinates

402

as_object: Return as object instead of tuple

403

404

Returns:

405

Transformed point coordinates

406

"""

407

408

@property

409

def matrix(self):

410

"""Get the transformation matrix."""

411

```

412

413

## Usage Examples

414

415

### Basic Page Transformations

416

417

```python

418

from pypdf import PdfReader, PdfWriter

419

420

reader = PdfReader("input.pdf")

421

writer = PdfWriter()

422

423

for page in reader.pages:

424

# Scale page to 150%

425

page.scale_by(1.5)

426

427

# Rotate page 90 degrees clockwise

428

page.rotate_clockwise(90)

429

430

writer.add_page(page)

431

432

with open("transformed.pdf", "wb") as output:

433

writer.write(output)

434

```

435

436

### Page Merging

437

438

```python

439

from pypdf import PdfReader, PdfWriter

440

441

reader = PdfReader("document.pdf")

442

overlay = PdfReader("watermark.pdf")

443

444

writer = PdfWriter()

445

446

for page in reader.pages:

447

# Merge watermark onto each page

448

page.merge_page(overlay.pages[0])

449

writer.add_page(page)

450

451

with open("watermarked.pdf", "wb") as output:

452

writer.write(output)

453

```

454

455

### Advanced Transformations

456

457

```python

458

from pypdf import PdfReader, PdfWriter, Transformation

459

460

reader = PdfReader("input.pdf")

461

writer = PdfWriter()

462

463

# Create complex transformation

464

transform = Transformation()

465

transform.translate(100, 50) # Move 100 points right, 50 up

466

transform.scale(0.8, 1.2) # Scale 80% horizontally, 120% vertically

467

transform.rotate(15) # Rotate 15 degrees

468

469

for page in reader.pages:

470

# Apply transformation matrix

471

page.add_transformation(transform.matrix)

472

writer.add_page(page)

473

474

with open("complex_transform.pdf", "wb") as output:

475

writer.write(output)

476

```

477

478

### Creating Blank Pages

479

480

```python

481

from pypdf import PdfWriter, PageObject, PaperSize

482

483

writer = PdfWriter()

484

485

# Create pages with different sizes

486

letter_page = PageObject.create_blank_page(612, 792) # Letter size

487

a4_page = PageObject.create_blank_page(*PaperSize.A4) # A4 size

488

489

writer.add_page(letter_page)

490

writer.add_page(a4_page)

491

492

with open("blank_pages.pdf", "wb") as output:

493

writer.write(output)

494

```

495

496

### Page Cropping and Boundaries

497

498

```python

499

from pypdf import PdfReader, PdfWriter

500

501

reader = PdfReader("input.pdf")

502

writer = PdfWriter()

503

504

for page in reader.pages:

505

# Get current page boundaries

506

media_box = page.mediabox

507

508

# Create crop box (crop 50 points from each side)

509

crop_box = [

510

media_box.left + 50,

511

media_box.bottom + 50,

512

media_box.right - 50,

513

media_box.top - 50

514

]

515

516

# Apply crop box

517

page.cropbox = crop_box

518

writer.add_page(page)

519

520

with open("cropped.pdf", "wb") as output:

521

writer.write(output)

522

```

523

524

### Multi-Page Overlay

525

526

```python

527

from pypdf import PdfReader, PdfWriter

528

529

base_doc = PdfReader("base.pdf")

530

overlay_doc = PdfReader("overlay.pdf")

531

532

writer = PdfWriter()

533

534

for i, page in enumerate(base_doc.pages):

535

# Use different overlay pages if available

536

overlay_index = i % len(overlay_doc.pages)

537

overlay_page = overlay_doc.pages[overlay_index]

538

539

# Scale overlay to fit page

540

page_width = float(page.mediabox.width)

541

page_height = float(page.mediabox.height)

542

overlay_width = float(overlay_page.mediabox.width)

543

overlay_height = float(overlay_page.mediabox.height)

544

545

scale_x = page_width / overlay_width

546

scale_y = page_height / overlay_height

547

scale = min(scale_x, scale_y)

548

549

overlay_page.scale_by(scale)

550

page.merge_page(overlay_page)

551

552

writer.add_page(page)

553

554

with open("multi_overlay.pdf", "wb") as output:

555

writer.write(output)

556

```