or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

advanced.mdattachments.mdcontent-streams.mdcore-operations.mdencryption.mdforms.mdimages.mdindex.mdmetadata.mdobjects.mdoutlines.mdpages.md

core-operations.mddocs/

0

# Core PDF Operations

1

2

Fundamental PDF document operations providing the essential functionality for opening, creating, saving, and manipulating PDF files. These operations form the foundation of all pikepdf functionality.

3

4

## Capabilities

5

6

### PDF Document Management

7

8

The main Pdf class provides comprehensive document-level operations including file I/O, metadata access, and document structure manipulation.

9

10

```python { .api }

11

class Pdf:

12

"""

13

Main PDF document class representing a complete PDF file.

14

"""

15

16

@staticmethod

17

def open(filename, *, password=None, hex_password=None, ignore_xref_streams=False,

18

suppress_warnings=True, attempt_recovery=True, inherit_page_attributes=True,

19

access_mode=AccessMode.default) -> Pdf:

20

"""

21

Open an existing PDF file.

22

23

Parameters:

24

- filename (str | pathlib.Path | IO): Path to PDF file or file-like object

25

- password (str, optional): Password for encrypted PDFs

26

- hex_password (str, optional): Password as hex string

27

- ignore_xref_streams (bool): Ignore cross-reference streams

28

- suppress_warnings (bool): Suppress QPDF warnings

29

- attempt_recovery (bool): Attempt to recover damaged PDFs

30

- inherit_page_attributes (bool): Inherit page attributes from page tree

31

- access_mode (AccessMode): File access mode

32

33

Returns:

34

Pdf: The opened PDF document

35

36

Raises:

37

PdfError: If the file cannot be opened

38

PasswordError: If password is required or incorrect

39

"""

40

41

@staticmethod

42

def new() -> Pdf:

43

"""

44

Create a new empty PDF document.

45

46

Returns:

47

Pdf: A new empty PDF document

48

"""

49

50

def save(self, filename, *, static_id=False, preserve_pdfa=True,

51

min_version=None, force_version=None, fix_metadata_version=True,

52

compress_streams=True, stream_decode_level=None,

53

object_stream_mode=ObjectStreamMode.preserve,

54

normalize_content=False, linearize=False, qdf=False,

55

progress=None, encryption=None, samefile_check=True) -> None:

56

"""

57

Save the PDF to a file.

58

59

Parameters:

60

- filename (str | pathlib.Path | IO): Output path or file-like object

61

- static_id (bool): Use static document ID for reproducible output

62

- preserve_pdfa (bool): Maintain PDF/A compliance

63

- min_version (str, optional): Minimum PDF version (e.g., '1.4')

64

- force_version (str, optional): Force specific PDF version

65

- fix_metadata_version (bool): Update metadata version to match PDF version

66

- compress_streams (bool): Compress stream objects

67

- stream_decode_level (StreamDecodeLevel, optional): Stream decoding level

68

- object_stream_mode (ObjectStreamMode): Object stream handling

69

- normalize_content (bool): Normalize content streams

70

- linearize (bool): Create linearized (fast web view) PDF

71

- qdf (bool): Save in QPDF's inspection format

72

- progress (callable, optional): Progress callback function

73

- encryption (Encryption, optional): Encryption settings

74

- samefile_check (bool): Check if saving to same file

75

76

Raises:

77

PdfError: If the file cannot be saved

78

"""

79

80

def close(self) -> None:

81

"""

82

Close the PDF and release resources.

83

84

The PDF object becomes unusable after closing.

85

"""

86

87

def copy_foreign(self, other_pdf_obj: Object) -> Object:

88

"""

89

Copy an object from another PDF into this PDF.

90

91

Parameters:

92

- other_pdf_obj (Object): Object from another PDF to copy

93

94

Returns:

95

Object: The copied object owned by this PDF

96

97

Raises:

98

ForeignObjectError: If the object cannot be copied

99

"""

100

101

def make_indirect(self, obj: Object) -> Object:

102

"""

103

Convert a direct object to an indirect object.

104

105

Parameters:

106

- obj (Object): Object to make indirect

107

108

Returns:

109

Object: The indirect object

110

"""

111

112

def add_blank_page(self, *, page_size=(612, 792)) -> Page:

113

"""

114

Add a blank page to the PDF.

115

116

Parameters:

117

- page_size (tuple): Page dimensions (width, height) in points

118

119

Returns:

120

Page: The newly created page

121

"""

122

123

@property

124

def Root(self) -> Dictionary:

125

"""

126

The PDF's document catalog (root object).

127

128

Returns:

129

Dictionary: Document catalog containing page tree and other references

130

"""

131

132

@property

133

def pages(self) -> list[Page]:

134

"""

135

List of all pages in the PDF.

136

137

Returns:

138

list[Page]: Pages that can be indexed, sliced, and modified

139

"""

140

141

@property

142

def objects(self) -> dict[tuple[int, int], Object]:

143

"""

144

Mapping of all indirect objects in the PDF.

145

146

Returns:

147

dict: Mapping from (objid, generation) to Object

148

"""

149

150

@property

151

def is_encrypted(self) -> bool:

152

"""

153

Whether the PDF is encrypted.

154

155

Returns:

156

bool: True if the PDF has encryption

157

"""

158

159

@property

160

def pdf_version(self) -> str:

161

"""

162

PDF version string (e.g., '1.4', '1.7').

163

164

Returns:

165

str: PDF version

166

"""

167

168

@property

169

def trailer(self) -> Dictionary:

170

"""

171

The PDF's trailer dictionary.

172

173

Returns:

174

Dictionary: Trailer containing cross-reference information

175

"""

176

177

@property

178

def docinfo(self) -> Dictionary:

179

"""

180

Document information dictionary.

181

182

Returns:

183

Dictionary: Document metadata (title, author, etc.)

184

"""

185

186

def check(self) -> list[str]:

187

"""

188

Check PDF for structural problems.

189

190

Returns:

191

list[str]: List of problems found (empty if no problems)

192

"""

193

```

194

195

### Convenience Functions

196

197

Global functions that provide shortcuts to common PDF operations.

198

199

```python { .api }

200

def open(filename, **kwargs) -> Pdf:

201

"""

202

Open an existing PDF file (alias for Pdf.open).

203

204

Parameters:

205

- filename: Path to PDF file or file-like object

206

- **kwargs: Same arguments as Pdf.open()

207

208

Returns:

209

Pdf: The opened PDF document

210

"""

211

212

def new() -> Pdf:

213

"""

214

Create a new empty PDF document (alias for Pdf.new).

215

216

Returns:

217

Pdf: A new empty PDF document

218

"""

219

```

220

221

### Access Modes

222

223

Control how PDF files are accessed and loaded into memory.

224

225

```python { .api }

226

from enum import Enum

227

228

class AccessMode(Enum):

229

"""File access modes for opening PDFs."""

230

default = ... # Standard file access

231

mmap = ... # Memory-mapped file access when possible

232

mmap_only = ... # Require memory-mapped access

233

stream = ... # Stream-based access for large files

234

```

235

236

### Object Stream Modes

237

238

Control how object streams are handled during save operations.

239

240

```python { .api }

241

class ObjectStreamMode(Enum):

242

"""Object stream handling modes."""

243

disable = ... # Don't use object streams

244

preserve = ... # Keep existing object streams

245

generate = ... # Generate new object streams for compression

246

```

247

248

### Stream Decode Levels

249

250

Control the level of stream decoding performed when reading PDFs.

251

252

```python { .api }

253

class StreamDecodeLevel(Enum):

254

"""Stream decoding levels."""

255

none = ... # No stream decoding

256

generalized = ... # Decode common filters

257

specialized = ... # Decode specialized filters

258

all = ... # Decode all supported filters

259

```

260

261

## Usage Examples

262

263

### Basic PDF Operations

264

265

```python

266

import pikepdf

267

268

# Open and read a PDF

269

with pikepdf.open('document.pdf') as pdf:

270

print(f"PDF version: {pdf.pdf_version}")

271

print(f"Number of pages: {len(pdf.pages)}")

272

print(f"Encrypted: {pdf.is_encrypted}")

273

274

# Create a new PDF with a blank page

275

new_pdf = pikepdf.new()

276

new_pdf.add_blank_page(page_size=(612, 792)) # US Letter

277

new_pdf.save('blank.pdf')

278

new_pdf.close()

279

```

280

281

### Working with Encrypted PDFs

282

283

```python

284

import pikepdf

285

286

# Open password-protected PDF

287

try:

288

pdf = pikepdf.open('encrypted.pdf', password='secret')

289

print("Successfully opened encrypted PDF")

290

pdf.close()

291

except pikepdf.PasswordError:

292

print("Incorrect password")

293

```

294

295

### Advanced Save Options

296

297

```python

298

import pikepdf

299

from pikepdf import Encryption, Permissions

300

301

# Open and save with compression and linearization

302

pdf = pikepdf.open('input.pdf')

303

304

# Configure encryption

305

encryption = Encryption(

306

owner='owner_password',

307

user='user_password',

308

allow=Permissions(print_highres=True, extract=False)

309

)

310

311

# Save with advanced options

312

pdf.save('output.pdf',

313

linearize=True, # Fast web view

314

compress_streams=True,

315

encryption=encryption,

316

fix_metadata_version=True)

317

318

pdf.close()

319

```

320

321

### Page Management

322

323

```python

324

import pikepdf

325

326

# Combine multiple PDFs

327

pdf1 = pikepdf.open('doc1.pdf')

328

pdf2 = pikepdf.open('doc2.pdf')

329

330

combined = pikepdf.new()

331

332

# Copy all pages from both PDFs

333

for page in pdf1.pages:

334

combined.pages.append(page)

335

336

for page in pdf2.pages:

337

combined.pages.append(page)

338

339

combined.save('combined.pdf')

340

341

# Close all PDFs

342

pdf1.close()

343

pdf2.close()

344

combined.close()

345

```