0
# Core PDF Operations
1
2
Fundamental PDF document operations providing the essential functionality for opening, creating, saving, and manipulating PDF files. These operations form the foundation of all pikepdf functionality.
3
4
## Capabilities
5
6
### PDF Document Management
7
8
The main Pdf class provides comprehensive document-level operations including file I/O, metadata access, and document structure manipulation.
9
10
```python { .api }
11
class Pdf:
12
"""
13
Main PDF document class representing a complete PDF file.
14
"""
15
16
@staticmethod
17
def open(filename, *, password=None, hex_password=None, ignore_xref_streams=False,
18
suppress_warnings=True, attempt_recovery=True, inherit_page_attributes=True,
19
access_mode=AccessMode.default) -> Pdf:
20
"""
21
Open an existing PDF file.
22
23
Parameters:
24
- filename (str | pathlib.Path | IO): Path to PDF file or file-like object
25
- password (str, optional): Password for encrypted PDFs
26
- hex_password (str, optional): Password as hex string
27
- ignore_xref_streams (bool): Ignore cross-reference streams
28
- suppress_warnings (bool): Suppress QPDF warnings
29
- attempt_recovery (bool): Attempt to recover damaged PDFs
30
- inherit_page_attributes (bool): Inherit page attributes from page tree
31
- access_mode (AccessMode): File access mode
32
33
Returns:
34
Pdf: The opened PDF document
35
36
Raises:
37
PdfError: If the file cannot be opened
38
PasswordError: If password is required or incorrect
39
"""
40
41
@staticmethod
42
def new() -> Pdf:
43
"""
44
Create a new empty PDF document.
45
46
Returns:
47
Pdf: A new empty PDF document
48
"""
49
50
def save(self, filename, *, static_id=False, preserve_pdfa=True,
51
min_version=None, force_version=None, fix_metadata_version=True,
52
compress_streams=True, stream_decode_level=None,
53
object_stream_mode=ObjectStreamMode.preserve,
54
normalize_content=False, linearize=False, qdf=False,
55
progress=None, encryption=None, samefile_check=True) -> None:
56
"""
57
Save the PDF to a file.
58
59
Parameters:
60
- filename (str | pathlib.Path | IO): Output path or file-like object
61
- static_id (bool): Use static document ID for reproducible output
62
- preserve_pdfa (bool): Maintain PDF/A compliance
63
- min_version (str, optional): Minimum PDF version (e.g., '1.4')
64
- force_version (str, optional): Force specific PDF version
65
- fix_metadata_version (bool): Update metadata version to match PDF version
66
- compress_streams (bool): Compress stream objects
67
- stream_decode_level (StreamDecodeLevel, optional): Stream decoding level
68
- object_stream_mode (ObjectStreamMode): Object stream handling
69
- normalize_content (bool): Normalize content streams
70
- linearize (bool): Create linearized (fast web view) PDF
71
- qdf (bool): Save in QPDF's inspection format
72
- progress (callable, optional): Progress callback function
73
- encryption (Encryption, optional): Encryption settings
74
- samefile_check (bool): Check if saving to same file
75
76
Raises:
77
PdfError: If the file cannot be saved
78
"""
79
80
def close(self) -> None:
81
"""
82
Close the PDF and release resources.
83
84
The PDF object becomes unusable after closing.
85
"""
86
87
def copy_foreign(self, other_pdf_obj: Object) -> Object:
88
"""
89
Copy an object from another PDF into this PDF.
90
91
Parameters:
92
- other_pdf_obj (Object): Object from another PDF to copy
93
94
Returns:
95
Object: The copied object owned by this PDF
96
97
Raises:
98
ForeignObjectError: If the object cannot be copied
99
"""
100
101
def make_indirect(self, obj: Object) -> Object:
102
"""
103
Convert a direct object to an indirect object.
104
105
Parameters:
106
- obj (Object): Object to make indirect
107
108
Returns:
109
Object: The indirect object
110
"""
111
112
def add_blank_page(self, *, page_size=(612, 792)) -> Page:
113
"""
114
Add a blank page to the PDF.
115
116
Parameters:
117
- page_size (tuple): Page dimensions (width, height) in points
118
119
Returns:
120
Page: The newly created page
121
"""
122
123
@property
124
def Root(self) -> Dictionary:
125
"""
126
The PDF's document catalog (root object).
127
128
Returns:
129
Dictionary: Document catalog containing page tree and other references
130
"""
131
132
@property
133
def pages(self) -> list[Page]:
134
"""
135
List of all pages in the PDF.
136
137
Returns:
138
list[Page]: Pages that can be indexed, sliced, and modified
139
"""
140
141
@property
142
def objects(self) -> dict[tuple[int, int], Object]:
143
"""
144
Mapping of all indirect objects in the PDF.
145
146
Returns:
147
dict: Mapping from (objid, generation) to Object
148
"""
149
150
@property
151
def is_encrypted(self) -> bool:
152
"""
153
Whether the PDF is encrypted.
154
155
Returns:
156
bool: True if the PDF has encryption
157
"""
158
159
@property
160
def pdf_version(self) -> str:
161
"""
162
PDF version string (e.g., '1.4', '1.7').
163
164
Returns:
165
str: PDF version
166
"""
167
168
@property
169
def trailer(self) -> Dictionary:
170
"""
171
The PDF's trailer dictionary.
172
173
Returns:
174
Dictionary: Trailer containing cross-reference information
175
"""
176
177
@property
178
def docinfo(self) -> Dictionary:
179
"""
180
Document information dictionary.
181
182
Returns:
183
Dictionary: Document metadata (title, author, etc.)
184
"""
185
186
def check(self) -> list[str]:
187
"""
188
Check PDF for structural problems.
189
190
Returns:
191
list[str]: List of problems found (empty if no problems)
192
"""
193
```
194
195
### Convenience Functions
196
197
Global functions that provide shortcuts to common PDF operations.
198
199
```python { .api }
200
def open(filename, **kwargs) -> Pdf:
201
"""
202
Open an existing PDF file (alias for Pdf.open).
203
204
Parameters:
205
- filename: Path to PDF file or file-like object
206
- **kwargs: Same arguments as Pdf.open()
207
208
Returns:
209
Pdf: The opened PDF document
210
"""
211
212
def new() -> Pdf:
213
"""
214
Create a new empty PDF document (alias for Pdf.new).
215
216
Returns:
217
Pdf: A new empty PDF document
218
"""
219
```
220
221
### Access Modes
222
223
Control how PDF files are accessed and loaded into memory.
224
225
```python { .api }
226
from enum import Enum
227
228
class AccessMode(Enum):
229
"""File access modes for opening PDFs."""
230
default = ... # Standard file access
231
mmap = ... # Memory-mapped file access when possible
232
mmap_only = ... # Require memory-mapped access
233
stream = ... # Stream-based access for large files
234
```
235
236
### Object Stream Modes
237
238
Control how object streams are handled during save operations.
239
240
```python { .api }
241
class ObjectStreamMode(Enum):
242
"""Object stream handling modes."""
243
disable = ... # Don't use object streams
244
preserve = ... # Keep existing object streams
245
generate = ... # Generate new object streams for compression
246
```
247
248
### Stream Decode Levels
249
250
Control the level of stream decoding performed when reading PDFs.
251
252
```python { .api }
253
class StreamDecodeLevel(Enum):
254
"""Stream decoding levels."""
255
none = ... # No stream decoding
256
generalized = ... # Decode common filters
257
specialized = ... # Decode specialized filters
258
all = ... # Decode all supported filters
259
```
260
261
## Usage Examples
262
263
### Basic PDF Operations
264
265
```python
266
import pikepdf
267
268
# Open and read a PDF
269
with pikepdf.open('document.pdf') as pdf:
270
print(f"PDF version: {pdf.pdf_version}")
271
print(f"Number of pages: {len(pdf.pages)}")
272
print(f"Encrypted: {pdf.is_encrypted}")
273
274
# Create a new PDF with a blank page
275
new_pdf = pikepdf.new()
276
new_pdf.add_blank_page(page_size=(612, 792)) # US Letter
277
new_pdf.save('blank.pdf')
278
new_pdf.close()
279
```
280
281
### Working with Encrypted PDFs
282
283
```python
284
import pikepdf
285
286
# Open password-protected PDF
287
try:
288
pdf = pikepdf.open('encrypted.pdf', password='secret')
289
print("Successfully opened encrypted PDF")
290
pdf.close()
291
except pikepdf.PasswordError:
292
print("Incorrect password")
293
```
294
295
### Advanced Save Options
296
297
```python
298
import pikepdf
299
from pikepdf import Encryption, Permissions
300
301
# Open and save with compression and linearization
302
pdf = pikepdf.open('input.pdf')
303
304
# Configure encryption
305
encryption = Encryption(
306
owner='owner_password',
307
user='user_password',
308
allow=Permissions(print_highres=True, extract=False)
309
)
310
311
# Save with advanced options
312
pdf.save('output.pdf',
313
linearize=True, # Fast web view
314
compress_streams=True,
315
encryption=encryption,
316
fix_metadata_version=True)
317
318
pdf.close()
319
```
320
321
### Page Management
322
323
```python
324
import pikepdf
325
326
# Combine multiple PDFs
327
pdf1 = pikepdf.open('doc1.pdf')
328
pdf2 = pikepdf.open('doc2.pdf')
329
330
combined = pikepdf.new()
331
332
# Copy all pages from both PDFs
333
for page in pdf1.pages:
334
combined.pages.append(page)
335
336
for page in pdf2.pages:
337
combined.pages.append(page)
338
339
combined.save('combined.pdf')
340
341
# Close all PDFs
342
pdf1.close()
343
pdf2.close()
344
combined.close()
345
```