Tessl Tile for pypi/pypdf2@2.12.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

errors-and-utilities.md index.md page-manipulation.md pdf-merging.md pdf-reading.md pdf-writing.md types-and-objects.md

errors-and-utilities.mddocs/

0
# Errors and Utilities
1

2
Exception classes for comprehensive error handling, utility functions for specialized operations, and helper classes that support PyPDF2's core functionality.
3

4
## Capabilities
5

6
### Exception Classes
7

8
Comprehensive exception hierarchy for handling various PDF processing errors.
9

10
```python { .api }
11
class PyPdfError(Exception):
12
    """Base exception class for all PyPDF2 errors."""
13

14
class PdfReadError(PyPdfError):
15
    """Raised when there's an error reading a PDF file."""
16

17
class PdfStreamError(PdfReadError):
18
    """Raised when there's an error processing PDF streams."""
19

20
class PageSizeNotDefinedError(PyPdfError):
21
    """Raised when page size cannot be determined."""
22

23
class ParseError(Exception):
24
    """Raised when there's an error parsing PDF content."""
25

26
class FileNotDecryptedError(PdfReadError):
27
    """Raised when attempting to access encrypted content without decryption."""
28

29
class WrongPasswordError(PdfReadError):
30
    """Raised when an incorrect password is provided for an encrypted PDF."""
31

32
class EmptyFileError(PdfReadError):
33
    """Raised when attempting to read an empty or corrupt PDF file."""
34

35
class DependencyError(Exception):
36
    """Raised when a required dependency is missing."""
37
```
38

39
### Warning Classes
40

41
Warning classes for non-fatal issues during PDF processing.
42

43
```python { .api }
44
class PdfReadWarning(UserWarning):
45
    """Warning issued during PDF reading for recoverable issues."""
46
```
47

48
### Paper Size Utilities
49

50
Standard paper size definitions and utilities.
51

52
```python { .api }
53
class PaperSize:
54
    """Standard paper size constants with dimensions in points."""
55
    
56
    # ISO A-series paper sizes
57
    A0: 'Dimensions'  # 2384 x 3371 points (33.1" x 46.8")
58
    A1: 'Dimensions'  # 1685 x 2384 points (23.4" x 33.1")
59
    A2: 'Dimensions'  # 1190 x 1685 points (16.5" x 23.4")
60
    A3: 'Dimensions'  # 842 x 1190 points (11.7" x 16.5")
61
    A4: 'Dimensions'  # 595 x 842 points (8.3" x 11.7")
62
    A5: 'Dimensions'  # 420 x 595 points (5.8" x 8.3")
63
    A6: 'Dimensions'  # 298 x 420 points (4.1" x 5.8")
64
    A7: 'Dimensions'  # 210 x 298 points (2.9" x 4.1")
65
    A8: 'Dimensions'  # 147 x 210 points (2.0" x 2.9")
66
    
67
    # Envelope sizes
68
    C4: 'Dimensions'  # 649 x 918 points (9.0" x 12.8")
69

70
class Dimensions:
71
    """Represents paper dimensions in points."""
72
    
73
    def __init__(self, width: float, height: float):
74
        """
75
        Initialize dimensions.
76
        
77
        Args:
78
            width (float): Width in points (72 points = 1 inch)
79
            height (float): Height in points (72 points = 1 inch)
80
        """
81
        self.width = width
82
        self.height = height
83
    
84
    @property
85
    def width_inches(self) -> float:
86
        """Width in inches."""
87
        return self.width / 72.0
88
    
89
    @property
90
    def height_inches(self) -> float:
91
        """Height in inches."""
92
        return self.height / 72.0
93
    
94
    @property
95
    def width_mm(self) -> float:
96
        """Width in millimeters."""
97
        return self.width / 72.0 * 25.4
98
    
99
    @property
100
    def height_mm(self) -> float:
101
        """Height in millimeters."""
102
        return self.height / 72.0 * 25.4
103
```
104

105
### PDF Filters
106

107
Compression and encoding filters for PDF content streams.
108

109
```python { .api }
110
class FlateDecode:
111
    """Flate/ZIP compression filter (most common)."""
112
    
113
    @staticmethod
114
    def decode(data: bytes, decode_parms: dict = None) -> bytes:
115
        """
116
        Decode Flate-compressed data.
117
        
118
        Args:
119
            data (bytes): Compressed data
120
            decode_parms (dict, optional): Decode parameters
121
            
122
        Returns:
123
            bytes: Decompressed data
124
        """
125
    
126
    @staticmethod
127
    def encode(data: bytes) -> bytes:
128
        """
129
        Encode data with Flate compression.
130
        
131
        Args:
132
            data (bytes): Data to compress
133
            
134
        Returns:
135
            bytes: Compressed data
136
        """
137

138
class ASCIIHexDecode:
139
    """ASCII hexadecimal encoding filter."""
140
    
141
    @staticmethod
142
    def decode(data: bytes, decode_parms: dict = None) -> bytes:
143
        """
144
        Decode ASCII hex encoded data.
145
        
146
        Args:
147
            data (bytes): Hex-encoded data
148
            decode_parms (dict, optional): Decode parameters
149
            
150
        Returns:
151
            bytes: Decoded data
152
        """
153

154
class LZWDecode:
155
    """LZW compression filter."""
156
    
157
    @staticmethod
158
    def decode(data: bytes, decode_parms: dict = None) -> bytes:
159
        """
160
        Decode LZW compressed data.
161
        
162
        Args:
163
            data (bytes): LZW compressed data
164
            decode_parms (dict, optional): Decode parameters
165
            
166
        Returns:
167
            bytes: Decompressed data
168
        """
169

170
class DCTDecode:
171
    """JPEG compression filter."""
172
    
173
    @staticmethod
174
    def decode(data: bytes, decode_parms: dict = None) -> bytes:
175
        """
176
        Decode JPEG compressed data.
177
        
178
        Args:
179
            data (bytes): JPEG data
180
            decode_parms (dict, optional): Decode parameters
181
            
182
        Returns:
183
            bytes: Image data
184
        """
185

186
class JPXDecode:
187
    """JPEG 2000 compression filter."""
188
    
189
    @staticmethod
190
    def decode(data: bytes, decode_parms: dict = None) -> bytes:
191
        """
192
        Decode JPEG 2000 compressed data.
193
        
194
        Args:
195
            data (bytes): JPEG 2000 data
196
            decode_parms (dict, optional): Decode parameters
197
            
198
        Returns:
199
            bytes: Image data
200
        """
201

202
class CCITTFaxDecode:
203
    """CCITT fax compression filter."""
204
    
205
    @staticmethod
206
    def decode(data: bytes, decode_parms: dict = None) -> bytes:
207
        """
208
        Decode CCITT fax compressed data.
209
        
210
        Args:
211
            data (bytes): CCITT compressed data
212
            decode_parms (dict, optional): Decode parameters with Width, Height, etc.
213
            
214
        Returns:
215
            bytes: Decompressed image data
216
        """
217
```
218

219
### XMP Metadata Support
220

221
Extended metadata support for documents that include XMP information.
222

223
```python { .api }
224
class XmpInformation:
225
    """Handler for XMP (Extensible Metadata Platform) information."""
226
    
227
    def __init__(self, stream):
228
        """
229
        Initialize XMP information from stream.
230
        
231
        Args:
232
            stream: XMP metadata stream
233
        """
234
    
235
    # Methods for accessing XMP metadata
236
    # Implementation varies based on XMP schema and content
237
    # Provides access to Dublin Core, PDF, and custom metadata
238
```
239

240
### Version Information
241

242
```python { .api }
243
__version__: str  # Current PyPDF2 version string "2.12.1"
244
```
245

246
### Utility Functions
247

248
General utility functions used throughout the library.
249

250
```python { .api }
251
def parse_filename_page_ranges(args: List[Union[str, PageRange, None]]) -> List[Tuple[str, PageRange]]:
252
    """
253
    Parse command-line style filename and page range arguments.
254
    
255
    Args:
256
        args: Arguments to parse (e.g., ["file1.pdf", "1:5", "file2.pdf", "::2"])
257
        
258
    Returns:
259
        list: List of (filename, page_range) tuples
260
    """
261
```
262

263
## Usage Examples
264

265
### Error Handling
266

267
```python
268
from PyPDF2 import PdfReader, PdfWriter
269
from PyPDF2.errors import (
270
    PdfReadError, WrongPasswordError, FileNotDecryptedError,
271
    EmptyFileError, DependencyError
272
)
273

274
def safe_pdf_operation(filename):
275
    try:
276
        reader = PdfReader(filename)
277
        
278
        if reader.is_encrypted:
279
            # Try to decrypt
280
            reader.decrypt("password")
281
            
282
        # Perform operations
283
        writer = PdfWriter()
284
        for page in reader.pages:
285
            writer.add_page(page)
286
            
287
        return writer
288
        
289
    except EmptyFileError:
290
        print(f"Error: {filename} is empty or corrupted")
291
    except WrongPasswordError:
292
        print(f"Error: Incorrect password for {filename}")
293
    except FileNotDecryptedError:
294
        print(f"Error: {filename} is encrypted and needs a password")
295
    except PdfReadError as e:
296
        print(f"Error reading {filename}: {e}")
297
    except DependencyError as e:
298
        print(f"Missing dependency: {e}")
299
    except Exception as e:
300
        print(f"Unexpected error: {e}")
301
    
302
    return None
303

304
# Usage
305
result = safe_pdf_operation("document.pdf")
306
if result:
307
    with open("processed.pdf", "wb") as output_file:
308
        result.write(output_file)
309
```
310

311
### Working with Paper Sizes
312

313
```python
314
from PyPDF2 import PdfWriter
315
from PyPDF2.papersizes import PaperSize
316

317
writer = PdfWriter()
318

319
# Create pages with different standard sizes
320
sizes_to_create = [
321
    ("A4", PaperSize.A4),
322
    ("A3", PaperSize.A3), 
323
    ("A5", PaperSize.A5),
324
    ("C4 Envelope", PaperSize.C4)
325
]
326

327
for name, size in sizes_to_create:
328
    page = writer.add_blank_page(size.width, size.height)
329
    print(f"{name}: {size.width} x {size.height} points")
330
    print(f"  {size.width_inches:.1f}\" x {size.height_inches:.1f}\"")
331
    print(f"  {size.width_mm:.0f}mm x {size.height_mm:.0f}mm")
332

333
with open("standard_sizes.pdf", "wb") as output_file:
334
    writer.write(output_file)
335
```
336

337
### Custom Paper Size Calculations
338

339
```python
340
from PyPDF2.papersizes import Dimensions
341

342
# Create custom paper sizes
343
us_letter = Dimensions(612, 792)  # 8.5" x 11"
344
us_legal = Dimensions(612, 1008)  # 8.5" x 14"
345
tabloid = Dimensions(792, 1224)   # 11" x 17"
346

347
custom_sizes = [
348
    ("US Letter", us_letter),
349
    ("US Legal", us_legal), 
350
    ("Tabloid", tabloid)
351
]
352

353
for name, size in custom_sizes:
354
    print(f"{name}:")
355
    print(f"  Points: {size.width} x {size.height}")
356
    print(f"  Inches: {size.width_inches:.1f}\" x {size.height_inches:.1f}\"")
357
    print(f"  mm: {size.width_mm:.0f} x {size.height_mm:.0f}")
358
```
359

360
### Filter Usage (Advanced)
361

362
```python
363
from PyPDF2.filters import FlateDecode
364
import zlib
365

366
# Example of manual filter usage (rarely needed)
367
original_data = b"Hello, World! This is test data for compression."
368

369
# Compress data
370
compressed = FlateDecode.encode(original_data)
371
print(f"Original size: {len(original_data)} bytes")
372
print(f"Compressed size: {len(compressed)} bytes")
373
print(f"Compression ratio: {len(compressed)/len(original_data):.2%}")
374

375
# Decompress data
376
decompressed = FlateDecode.decode(compressed)
377
print(f"Decompressed: {decompressed.decode('utf-8')}")
378
print(f"Data integrity: {original_data == decompressed}")
379
```
380

381
### Version Checking
382

383
```python
384
from PyPDF2 import __version__
385
from packaging import version
386

387
print(f"PyPDF2 version: {__version__}")
388

389
# Check if version meets requirements
390
required_version = "2.10.0"
391
if version.parse(__version__) >= version.parse(required_version):
392
    print(f"PyPDF2 version {__version__} meets requirement >= {required_version}")
393
else:
394
    print(f"PyPDF2 version {__version__} is below requirement >= {required_version}")
395
    print("Consider upgrading with: pip install --upgrade PyPDF2")
396
```
397

398
### Parsing Command Line Arguments
399

400
```python
401
from PyPDF2 import parse_filename_page_ranges, PdfMerger
402
import sys
403

404
def merge_from_args(args):
405
    """Merge PDFs based on command line arguments."""
406
    # Parse arguments like: ["file1.pdf", "1:5", "file2.pdf", "::2", "file3.pdf"]
407
    file_ranges = parse_filename_page_ranges(args)
408
    
409
    merger = PdfMerger()
410
    
411
    for filename, page_range in file_ranges:
412
        print(f"Adding {filename} with pages {page_range}")
413
        merger.append(filename, pages=page_range)
414
    
415
    merger.write("merged_output.pdf")
416
    merger.close()
417
    print("Merge completed: merged_output.pdf")
418

419
# Example usage
420
if __name__ == "__main__":
421
    if len(sys.argv) > 1:
422
        merge_from_args(sys.argv[1:])
423
    else:
424
        print("Usage: python script.py file1.pdf 1:5 file2.pdf ::2 file3.pdf")
425
```
426

427
### XMP Metadata Access
428

429
```python
430
from PyPDF2 import PdfReader
431

432
reader = PdfReader("document.pdf")
433

434
# Check for XMP metadata
435
if reader.xmp_metadata:
436
    xmp = reader.xmp_metadata
437
    print("XMP metadata found:")
438
    
439
    # XMP access depends on the specific XMP schema and content
440
    # Common patterns:
441
    try:
442
        print(f"Dublin Core title: {xmp.dc_title}")
443
        print(f"Dublin Core creator: {xmp.dc_creator}")
444
        print(f"Dublin Core subject: {xmp.dc_subject}")
445
    except AttributeError:
446
        print("Standard Dublin Core fields not available")
447
    
448
    # Raw XMP data
449
    print("Raw XMP metadata available for custom parsing")
450
else:
451
    print("No XMP metadata found")
452

453
# Standard metadata is always available through reader.metadata
454
if reader.metadata:
455
    print(f"Standard metadata title: {reader.metadata.title}")
456
```
457

458
## Error Recovery Strategies
459

460
### Handling Corrupted PDFs
461

462
```python
463
from PyPDF2 import PdfReader, PdfWriter
464
from PyPDF2.errors import PdfReadError, PdfStreamError
465
import warnings
466

467
def repair_pdf_attempt(filename):
468
    """Attempt to repair/recover a corrupted PDF."""
469
    try:
470
        # Try strict mode first
471
        reader = PdfReader(filename, strict=True)
472
        return reader, "No repair needed"
473
        
474
    except PdfReadError:
475
        try:
476
            # Try non-strict mode for recovery
477
            with warnings.catch_warnings():
478
                warnings.simplefilter("ignore")
479
                reader = PdfReader(filename, strict=False)
480
                return reader, "Recovered in non-strict mode"
481
                
482
        except PdfReadError:
483
            # Try to extract what we can
484
            try:
485
                reader = PdfReader(filename, strict=False)
486
                writer = PdfWriter()
487
                
488
                pages_recovered = 0
489
                for i, page in enumerate(reader.pages):
490
                    try:
491
                        # Test if page is readable
492
                        _ = page.extract_text()
493
                        writer.add_page(page)
494
                        pages_recovered += 1
495
                    except Exception:
496
                        print(f"Skipping corrupted page {i+1}")
497
                        continue
498
                
499
                return writer, f"Partially recovered {pages_recovered} pages"
500
                
501
            except Exception as e:
502
                return None, f"Recovery failed: {e}"
503

504
# Usage
505
pdf_reader, status = repair_pdf_attempt("corrupted.pdf")
506
print(f"Recovery status: {status}")
507

508
if pdf_reader:
509
    if hasattr(pdf_reader, 'write'):  # It's a writer
510
        with open("repaired.pdf", "wb") as output_file:
511
            pdf_reader.write(output_file)
512
    else:  # It's a reader
513
        print(f"Successfully opened PDF with {len(pdf_reader.pages)} pages")
514
```

Version

Tile

Files

errors-and-utilities.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

errors-and-utilities.mddocs/