or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

errors-and-utilities.mdindex.mdpage-manipulation.mdpdf-merging.mdpdf-reading.mdpdf-writing.mdtypes-and-objects.md

page-manipulation.mddocs/

0

# Page Manipulation

1

2

Transform, scale, rotate, crop, and merge individual PDF pages with precise control over page geometry and content. The PageObject class and Transformation utilities provide comprehensive page manipulation capabilities.

3

4

## Capabilities

5

6

### PageObject Class

7

8

Represents a single PDF page with methods for content extraction, geometric transformations, and page merging.

9

10

```python { .api }

11

class PageObject(DictionaryObject):

12

"""PDF page object with transformation and content capabilities."""

13

14

@property

15

def mediabox(self) -> RectangleObject:

16

"""The page's media box (full page size)."""

17

18

@property

19

def cropbox(self) -> RectangleObject:

20

"""The page's crop box (visible area)."""

21

22

@property

23

def bleedbox(self) -> RectangleObject:

24

"""The page's bleed box (printing area)."""

25

26

@property

27

def trimbox(self) -> RectangleObject:

28

"""The page's trim box (finished page size)."""

29

30

@property

31

def artbox(self) -> RectangleObject:

32

"""The page's art box (meaningful content area)."""

33

34

@property

35

def annotations(self) -> Optional[ArrayObject]:

36

"""Page annotations if present."""

37

38

def extract_text(self, visitor_text=None) -> str:

39

"""

40

Extract text content from the page.

41

42

Args:

43

visitor_text (callable, optional): Custom text visitor function

44

45

Returns:

46

str: Extracted text content

47

"""

48

49

def scale(self, sx: float, sy: float) -> None:

50

"""

51

Scale the page by given factors.

52

53

Args:

54

sx (float): Horizontal scale factor

55

sy (float): Vertical scale factor

56

"""

57

58

def scale_by(self, factor: float) -> None:

59

"""

60

Scale the page uniformly.

61

62

Args:

63

factor (float): Scale factor for both dimensions

64

"""

65

66

def scale_to(self, width: float, height: float) -> None:

67

"""

68

Scale the page to specific dimensions.

69

70

Args:

71

width (float): Target width in points

72

height (float): Target height in points

73

"""

74

75

def rotate(self, angle: int) -> 'PageObject':

76

"""

77

Rotate the page by the given angle.

78

79

Args:

80

angle (int): Rotation angle in degrees (must be multiple of 90)

81

82

Returns:

83

PageObject: Self for method chaining

84

"""

85

86

def rotate_clockwise(self, angle: int) -> 'PageObject':

87

"""

88

DEPRECATED: Use rotate() instead.

89

Rotate the page clockwise.

90

91

Args:

92

angle (int): Rotation angle in degrees

93

94

Returns:

95

PageObject: Self for method chaining

96

"""

97

98

def rotate_counter_clockwise(self, angle: int) -> 'PageObject':

99

"""

100

DEPRECATED: Use rotate() instead.

101

Rotate the page counter-clockwise.

102

103

Args:

104

angle (int): Rotation angle in degrees

105

106

Returns:

107

PageObject: Self for method chaining

108

"""

109

110

def merge_page(self, page2: 'PageObject') -> None:

111

"""

112

Merge another page's content onto this page.

113

114

Args:

115

page2 (PageObject): Page to merge onto this page

116

"""

117

118

def merge_scaled_page(self, page2: 'PageObject', scale: float, expand: bool = False) -> None:

119

"""

120

Merge a scaled page onto this page.

121

122

Args:

123

page2 (PageObject): Page to merge

124

scale (float): Scale factor for the merged page

125

expand (bool): Whether to expand page size if needed

126

"""

127

128

def merge_rotated_page(self, page2: 'PageObject', rotation: int, expand: bool = False) -> None:

129

"""

130

Merge a rotated page onto this page.

131

132

Args:

133

page2 (PageObject): Page to merge

134

rotation (int): Rotation angle in degrees

135

expand (bool): Whether to expand page size if needed

136

"""

137

138

def merge_scaled_translated_page(

139

self,

140

page2: 'PageObject',

141

scale: float,

142

tx: float,

143

ty: float,

144

expand: bool = False

145

) -> None:

146

"""

147

Merge a scaled and translated page onto this page.

148

149

Args:

150

page2 (PageObject): Page to merge

151

scale (float): Scale factor

152

tx (float): X translation in points

153

ty (float): Y translation in points

154

expand (bool): Whether to expand page size if needed

155

"""

156

157

def merge_rotated_scaled_page(

158

self,

159

page2: 'PageObject',

160

rotation: int,

161

scale: float,

162

expand: bool = False

163

) -> None:

164

"""

165

Merge a rotated and scaled page onto this page.

166

167

Args:

168

page2 (PageObject): Page to merge

169

rotation (int): Rotation angle in degrees

170

scale (float): Scale factor

171

expand (bool): Whether to expand page size if needed

172

"""

173

174

def merge_rotated_scaled_translated_page(

175

self,

176

page2: 'PageObject',

177

rotation: int,

178

scale: float,

179

tx: float,

180

ty: float,

181

expand: bool = False

182

) -> None:

183

"""

184

Merge a page with full transformation onto this page.

185

186

Args:

187

page2 (PageObject): Page to merge

188

rotation (int): Rotation angle in degrees

189

scale (float): Scale factor

190

tx (float): X translation in points

191

ty (float): Y translation in points

192

expand (bool): Whether to expand page size if needed

193

"""

194

195

def merge_transformed_page(

196

self,

197

page2: 'PageObject',

198

ctm: Transformation,

199

expand: bool = False

200

) -> None:

201

"""

202

Merge a page with custom transformation matrix.

203

204

Args:

205

page2 (PageObject): Page to merge

206

ctm (Transformation): Current transformation matrix

207

expand (bool): Whether to expand page size if needed

208

"""

209

210

def add_transformation(self, ctm: Transformation) -> None:

211

"""

212

Add a transformation to the page.

213

214

Args:

215

ctm (Transformation): Transformation matrix to apply

216

"""

217

218

def get_fonts(self) -> Tuple[Set[str], Set[str]]:

219

"""

220

Get fonts used on the page.

221

222

Returns:

223

tuple: (font_names, font_subsets) sets

224

"""

225

226

def get_images(self) -> Dict[str, Any]:

227

"""

228

Get images embedded in the page.

229

230

Returns:

231

dict: Image information by name

232

"""

233

234

@staticmethod

235

def create_blank_page(pdf=None, width: float = 612, height: float = 792) -> 'PageObject':

236

"""

237

Create a blank page.

238

239

Args:

240

pdf: Optional PDF reader reference

241

width (float): Page width in points (default: 612 - 8.5")

242

height (float): Page height in points (default: 792 - 11")

243

244

Returns:

245

PageObject: New blank page

246

"""

247

```

248

249

### Transformation Class

250

251

2D coordinate transformation operations for precise page geometry control.

252

253

```python { .api }

254

class Transformation:

255

"""2D transformation matrix for page operations."""

256

257

def __init__(self, ctm: Tuple[float, float, float, float, float, float] = (1, 0, 0, 1, 0, 0)):

258

"""

259

Initialize transformation matrix.

260

261

Args:

262

ctm: 6-element transformation matrix (a, b, c, d, e, f)

263

"""

264

265

def translate(self, tx: float = 0, ty: float = 0) -> 'Transformation':

266

"""

267

Add translation to the transformation.

268

269

Args:

270

tx (float): X translation in points

271

ty (float): Y translation in points

272

273

Returns:

274

Transformation: New transformation with translation applied

275

"""

276

277

def scale(self, sx: Optional[float] = None, sy: Optional[float] = None) -> 'Transformation':

278

"""

279

Add scaling to the transformation.

280

281

Args:

282

sx: X scale factor (default: 1.0)

283

sy: Y scale factor (default: same as sx)

284

285

Returns:

286

Transformation: New transformation with scaling applied

287

"""

288

289

def rotate(self, rotation: float) -> 'Transformation':

290

"""

291

Add rotation to the transformation.

292

293

Args:

294

rotation (float): Rotation angle in degrees

295

296

Returns:

297

Transformation: New transformation with rotation applied

298

"""

299

300

```

301

302

### Rectangle Objects

303

304

Geometric rectangle representation for page boundaries and regions.

305

306

```python { .api }

307

class RectangleObject(ArrayObject):

308

"""PDF rectangle object for geometric regions."""

309

310

@property

311

def left(self) -> float:

312

"""Left coordinate."""

313

314

@property

315

def bottom(self) -> float:

316

"""Bottom coordinate."""

317

318

@property

319

def right(self) -> float:

320

"""Right coordinate."""

321

322

@property

323

def top(self) -> float:

324

"""Top coordinate."""

325

326

@property

327

def width(self) -> float:

328

"""Rectangle width."""

329

330

@property

331

def height(self) -> float:

332

"""Rectangle height."""

333

334

def scale(self, sx: float, sy: float) -> 'RectangleObject':

335

"""

336

Scale the rectangle.

337

338

Args:

339

sx (float): X scale factor

340

sy (float): Y scale factor

341

342

Returns:

343

RectangleObject: New scaled rectangle

344

"""

345

346

def normalize(self) -> 'RectangleObject':

347

"""

348

Normalize rectangle coordinates.

349

350

Returns:

351

RectangleObject: Normalized rectangle

352

"""

353

354

def intersect(self, other: 'RectangleObject') -> 'RectangleObject':

355

"""

356

Calculate intersection with another rectangle.

357

358

Args:

359

other (RectangleObject): Rectangle to intersect with

360

361

Returns:

362

RectangleObject: Intersection rectangle

363

"""

364

365

def union(self, other: 'RectangleObject') -> 'RectangleObject':

366

"""

367

Calculate union with another rectangle.

368

369

Args:

370

other (RectangleObject): Rectangle to union with

371

372

Returns:

373

RectangleObject: Union rectangle

374

"""

375

```

376

377

## Usage Examples

378

379

### Basic Page Transformations

380

381

```python

382

from PyPDF2 import PdfReader, PdfWriter

383

384

# Read source PDF

385

reader = PdfReader("source.pdf")

386

writer = PdfWriter()

387

388

# Get first page

389

page = reader.pages[0]

390

391

# Scale the page to 50% size

392

page.scale(0.5, 0.5)

393

394

# Rotate 90 degrees clockwise

395

page.rotate(90)

396

397

# Add to writer

398

writer.add_page(page)

399

400

# Save result

401

with open("transformed.pdf", "wb") as output_file:

402

writer.write(output_file)

403

```

404

405

### Advanced Page Merging

406

407

```python

408

from PyPDF2 import PdfReader, PdfWriter

409

410

# Read source files

411

reader1 = PdfReader("background.pdf")

412

reader2 = PdfReader("overlay.pdf")

413

writer = PdfWriter()

414

415

# Get pages

416

background = reader1.pages[0]

417

overlay = reader2.pages[0]

418

419

# Scale overlay to fit in corner

420

overlay.scale(0.3, 0.3)

421

422

# Merge overlay onto background

423

background.merge_scaled_translated_page(

424

overlay,

425

scale=0.5,

426

tx=400, # Position in bottom-right

427

ty=100,

428

expand=False

429

)

430

431

writer.add_page(background)

432

433

with open("merged_pages.pdf", "wb") as output_file:

434

writer.write(output_file)

435

```

436

437

### Working with Page Dimensions

438

439

```python

440

from PyPDF2 import PdfReader, PdfWriter

441

from PyPDF2.generic import RectangleObject

442

443

reader = PdfReader("document.pdf")

444

writer = PdfWriter()

445

446

for page in reader.pages:

447

# Get current dimensions

448

mediabox = page.mediabox

449

print(f"Page size: {mediabox.width} x {mediabox.height} points")

450

451

# Convert to inches (72 points = 1 inch)

452

width_inches = float(mediabox.width) / 72

453

height_inches = float(mediabox.height) / 72

454

print(f"Page size: {width_inches:.1f}\" x {height_inches:.1f}\"")

455

456

# Crop page to center area

457

crop_margin = 50 # 50 points margin

458

page.cropbox = RectangleObject([

459

float(mediabox.left) + crop_margin,

460

float(mediabox.bottom) + crop_margin,

461

float(mediabox.right) - crop_margin,

462

float(mediabox.top) - crop_margin

463

])

464

465

writer.add_page(page)

466

467

with open("cropped.pdf", "wb") as output_file:

468

writer.write(output_file)

469

```

470

471

### Creating Custom Transformations

472

473

```python

474

from PyPDF2 import PdfReader, PdfWriter, Transformation

475

476

reader = PdfReader("source.pdf")

477

writer = PdfWriter()

478

479

page = reader.pages[0]

480

481

# Create complex transformation

482

transform = (Transformation()

483

.rotate(45) # Rotate 45 degrees

484

.scale(0.8, 1.2) # Scale differently in X and Y

485

.translate(100, 50) # Move to new position

486

)

487

488

# Apply transformation

489

page.add_transformation(transform)

490

writer.add_page(page)

491

492

with open("custom_transform.pdf", "wb") as output_file:

493

writer.write(output_file)

494

```

495

496

### Text and Image Extraction

497

498

```python

499

from PyPDF2 import PdfReader

500

501

reader = PdfReader("document.pdf")

502

503

for page_num, page in enumerate(reader.pages):

504

# Extract text

505

text = page.extract_text()

506

print(f"Page {page_num + 1} text:")

507

print(text[:200] + "..." if len(text) > 200 else text)

508

509

# Get font information

510

font_names, font_subsets = page.get_fonts()

511

print(f"Fonts used: {font_names}")

512

513

# Get images

514

images = page.get_images()

515

print(f"Images found: {len(images)}")

516

for img_name, img_info in images.items():

517

print(f" - {img_name}: {img_info}")

518

```

519

520

### Creating Blank Pages with Content

521

522

```python

523

from PyPDF2 import PdfWriter, PageObject

524

from PyPDF2.generic import RectangleObject

525

526

writer = PdfWriter()

527

528

# Create custom sized blank page (A4: 595 x 842 points)

529

blank_page = PageObject.create_blank_page(width=595, height=842)

530

531

# You can then add content or merge other pages onto it

532

writer.add_page(blank_page)

533

534

# Create US Letter sized page (8.5" x 11" = 612 x 792 points)

535

letter_page = PageObject.create_blank_page(width=612, height=792)

536

writer.add_page(letter_page)

537

538

with open("blank_pages.pdf", "wb") as output_file:

539

writer.write(output_file)

540

```

541

542

## Utility Functions

543

544

### Page Size Utilities

545

546

```python { .api }

547

def set_custom_rtl(_min: int, _max: int, specials: List[int]) -> Tuple[int, int, List[int]]:

548

"""

549

Configure right-to-left text parameters for text extraction.

550

551

Args:

552

_min (int): Minimum character code for RTL

553

_max (int): Maximum character code for RTL

554

specials (list): Special character codes to handle as RTL

555

556

Returns:

557

tuple: Configuration tuple with min, max, and specials

558

"""

559

```

560

561

This function helps configure text extraction for right-to-left languages and custom character sets.