Tessl Tile for pypi/pikepdf@9.10.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

advanced.md attachments.md content-streams.md core-operations.md encryption.md forms.md images.md index.md metadata.md objects.md outlines.md pages.md

images.mddocs/

0
# Images and Graphics
1

2
Image extraction, manipulation, and graphics operations including support for various formats and color spaces. These capabilities enable comprehensive image handling within PDF documents.
3

4
## Capabilities
5

6
### PdfImage Class
7

8
High-level PDF image handling with extraction and conversion capabilities.
9

10
```python { .api }
11
class PdfImage:
12
    """
13
    PDF image object handler for image extraction and manipulation.
14
    
15
    Provides access to image properties, extraction capabilities,
16
    and conversion to external formats like PIL Image objects.
17
    """
18
    
19
    def extract_to(self, *, fileprefix: str = 'image', dirname: str = '.') -> str:
20
        """
21
        Extract the image to a file with automatic format detection.
22
        
23
        The image is saved with an appropriate file extension based on
24
        its format and compression. Supports PNG, JPEG, TIFF, and other formats.
25
        
26
        Parameters:
27
        - fileprefix (str): Base filename for the extracted image
28
        - dirname (str): Directory to save the image in
29
        
30
        Returns:
31
        str: Full path to the extracted image file
32
        
33
        Raises:
34
        UnsupportedImageTypeError: If image format is not supported
35
        InvalidPdfImageError: If image data is corrupted or invalid
36
        """
37
    
38
    def as_pil_image(self) -> Any:  # PIL.Image.Image
39
        """
40
        Convert the PDF image to a PIL Image object.
41
        
42
        Returns:
43
        PIL.Image.Image: PIL Image object that can be manipulated or saved
44
        
45
        Raises:
46
        DependencyError: If PIL (Pillow) is not installed
47
        UnsupportedImageTypeError: If image cannot be converted
48
        InvalidPdfImageError: If image data is invalid
49
        """
50
    
51
    @property
52
    def width(self) -> int:
53
        """
54
        Image width in pixels.
55
        
56
        Returns:
57
        int: Width of the image in pixels
58
        """
59
    
60
    @property
61
    def height(self) -> int:
62
        """
63
        Image height in pixels.
64
        
65
        Returns:
66
        int: Height of the image in pixels
67
        """
68
    
69
    @property
70
    def bpc(self) -> int:
71
        """
72
        Bits per component (color depth).
73
        
74
        Returns:
75
        int: Number of bits per color component (typically 1, 8, or 16)
76
        """
77
    
78
    @property
79
    def colorspace(self) -> Name:
80
        """
81
        Color space of the image.
82
        
83
        Returns:
84
        Name: Color space (e.g., Name.DeviceRGB, Name.DeviceCMYK, Name.DeviceGray)
85
        """
86
    
87
    @property
88
    def filters(self) -> list[Name]:
89
        """
90
        List of filters applied to the image data.
91
        
92
        Returns:
93
        list[Name]: Compression and encoding filters (e.g., [Name.DCTDecode] for JPEG)
94
        """
95
    
96
    @property
97
    def filter_decodeparms(self) -> list[Object]:
98
        """
99
        Decode parameters for image filters.
100
        
101
        Returns:
102
        list[Object]: Parameters for filter decoding
103
        """
104
    
105
    @property
106
    def image_mask(self) -> bool:
107
        """
108
        Whether this image is used as a mask.
109
        
110
        Returns:
111
        bool: True if image is a mask (1-bit monochrome used for transparency)
112
        """
113
    
114
    @property
115
    def mask(self) -> Object:
116
        """
117
        Mask associated with this image.
118
        
119
        Returns:
120
        Object: Mask image or soft mask for transparency effects
121
        """
122
    
123
    @property
124
    def palette(self) -> Object:
125
        """
126
        Color palette for indexed color images.
127
        
128
        Returns:
129
        Object: Palette data for indexed color space images
130
        """
131
    
132
    @property
133
    def size(self) -> tuple[int, int]:
134
        """
135
        Image dimensions as a tuple.
136
        
137
        Returns:
138
        tuple[int, int]: (width, height) in pixels
139
        """
140
    
141
    @property
142
    def obj(self) -> Stream:
143
        """
144
        The underlying PDF stream object containing image data.
145
        
146
        Returns:
147
        Stream: PDF stream with image data and metadata
148
        """
149
```
150

151
### PdfInlineImage Class
152

153
Handler for inline images embedded directly in content streams.
154

155
```python { .api }
156
class PdfInlineImage:
157
    """
158
    Inline image embedded directly in a PDF content stream.
159
    
160
    Inline images are embedded directly in the page content stream
161
    rather than being stored as separate objects with indirect references.
162
    """
163
    
164
    def as_pil_image(self) -> Any:  # PIL.Image.Image
165
        """
166
        Convert the inline image to a PIL Image object.
167
        
168
        Returns:
169
        PIL.Image.Image: PIL Image object for manipulation or display
170
        
171
        Raises:
172
        DependencyError: If PIL (Pillow) is not installed
173
        UnsupportedImageTypeError: If image format is not supported
174
        InvalidPdfImageError: If image data is corrupted
175
        """
176
    
177
    @property
178
    def width(self) -> int:
179
        """
180
        Inline image width in pixels.
181
        
182
        Returns:
183
        int: Width of the inline image
184
        """
185
    
186
    @property
187
    def height(self) -> int:
188
        """
189
        Inline image height in pixels.
190
        
191
        Returns:
192
        int: Height of the inline image
193
        """
194
    
195
    @property
196
    def bpc(self) -> int:
197
        """
198
        Bits per component for the inline image.
199
        
200
        Returns:
201
        int: Color depth per component
202
        """
203
    
204
    @property
205
    def colorspace(self) -> Object:
206
        """
207
        Color space of the inline image.
208
        
209
        Returns:
210
        Object: Color space specification
211
        """
212
    
213
    @property
214
    def filters(self) -> list[Name]:
215
        """
216
        Filters applied to the inline image data.
217
        
218
        Returns:
219
        list[Name]: Compression and encoding filters
220
        """
221
    
222
    @property
223
    def size(self) -> tuple[int, int]:
224
        """
225
        Inline image dimensions.
226
        
227
        Returns:
228
        tuple[int, int]: (width, height) in pixels
229
        """
230
```
231

232
### Image Exception Classes
233

234
Specialized exceptions for image-related operations.
235

236
```python { .api }
237
class UnsupportedImageTypeError(Exception):
238
    """
239
    Raised when attempting to process an unsupported image type.
240
    
241
    This occurs when the PDF contains image formats or compression
242
    methods that pikepdf cannot handle or convert.
243
    """
244

245
class InvalidPdfImageError(Exception):
246
    """
247
    Raised when image data in the PDF is corrupted or invalid.
248
    
249
    This can occur with damaged PDF files or images with
250
    inconsistent metadata and data.
251
    """
252

253
class HifiPrintImageNotTranscodableError(Exception):
254
    """
255
    Raised when high-fidelity print images cannot be transcoded.
256
    
257
    Some specialized print images use formats that cannot be
258
    easily converted to standard image formats.
259
    """
260

261
class ImageDecompressionError(Exception):
262
    """
263
    Raised when image decompression fails.
264
    
265
    This occurs when compressed image data cannot be properly
266
    decompressed due to corruption or unsupported compression parameters.
267
    """
268
```
269

270
### Matrix Class (for Image Transformations)
271

272
Geometric transformation matrix for image placement and scaling.
273

274
```python { .api }
275
class Matrix:
276
    """
277
    PDF transformation matrix for geometric operations.
278
    
279
    Represents a 2D transformation matrix with 6 elements:
280
    [a b c d e f] representing the transformation:
281
    x' = a*x + c*y + e
282
    y' = b*x + d*y + f
283
    """
284
    
285
    def __init__(self, a: float = 1, b: float = 0, c: float = 0, 
286
                 d: float = 1, e: float = 0, f: float = 0) -> None:
287
        """
288
        Create a transformation matrix.
289
        
290
        Parameters:
291
        - a, b, c, d, e, f (float): Matrix elements
292
        """
293
    
294
    @staticmethod
295
    def identity() -> Matrix:
296
        """
297
        Create an identity matrix (no transformation).
298
        
299
        Returns:
300
        Matrix: Identity matrix [1 0 0 1 0 0]
301
        """
302
    
303
    def translated(self, dx: float, dy: float) -> Matrix:
304
        """
305
        Create a matrix with translation applied.
306
        
307
        Parameters:
308
        - dx (float): Translation in X direction
309
        - dy (float): Translation in Y direction
310
        
311
        Returns:
312
        Matrix: New matrix with translation applied
313
        """
314
    
315
    def scaled(self, sx: float, sy: float = None) -> Matrix:
316
        """
317
        Create a matrix with scaling applied.
318
        
319
        Parameters:
320
        - sx (float): Scale factor in X direction
321
        - sy (float, optional): Scale factor in Y direction (defaults to sx)
322
        
323
        Returns:
324
        Matrix: New matrix with scaling applied
325
        """
326
    
327
    def rotated(self, angle_degrees: float) -> Matrix:
328
        """
329
        Create a matrix with rotation applied.
330
        
331
        Parameters:
332
        - angle_degrees (float): Rotation angle in degrees
333
        
334
        Returns:
335
        Matrix: New matrix with rotation applied
336
        """
337
    
338
    def inverse(self) -> Matrix:
339
        """
340
        Calculate the inverse of this matrix.
341
        
342
        Returns:
343
        Matrix: Inverse transformation matrix
344
        
345
        Raises:
346
        ValueError: If matrix is not invertible (determinant is zero)
347
        """
348
    
349
    def transform(self, point: tuple[float, float]) -> tuple[float, float]:
350
        """
351
        Transform a point using this matrix.
352
        
353
        Parameters:
354
        - point (tuple[float, float]): Point coordinates (x, y)
355
        
356
        Returns:
357
        tuple[float, float]: Transformed point coordinates
358
        """
359
    
360
    @property
361
    def a(self) -> float:
362
        """X-scaling component."""
363
    
364
    @property  
365
    def b(self) -> float:
366
        """Y-skewing component."""
367
    
368
    @property
369
    def c(self) -> float:
370
        """X-skewing component."""
371
    
372
    @property
373
    def d(self) -> float:
374
        """Y-scaling component."""
375
    
376
    @property
377
    def e(self) -> float:
378
        """X-translation component."""
379
    
380
    @property
381
    def f(self) -> float:
382
        """Y-translation component."""
383
```
384

385
## Usage Examples
386

387
### Extracting Images from PDF
388

389
```python
390
import pikepdf
391

392
# Open PDF with images
393
pdf = pikepdf.open('document_with_images.pdf')
394

395
image_count = 0
396

397
# Iterate through all pages
398
for page_num, page in enumerate(pdf.pages):
399
    # Get images on this page
400
    page_images = page.images
401
    
402
    for name, image in page_images.items():
403
        try:
404
            # Extract image to file
405
            filename = image.extract_to(
406
                fileprefix=f'page{page_num+1}_image{image_count}',
407
                dirname='extracted_images'
408
            )
409
            
410
            print(f"Extracted image: {filename}")
411
            print(f"  Size: {image.width} x {image.height}")
412
            print(f"  Color depth: {image.bpc} bits per component")
413
            print(f"  Color space: {image.colorspace}")
414
            print(f"  Filters: {image.filters}")
415
            
416
            image_count += 1
417
            
418
        except pikepdf.UnsupportedImageTypeError as e:
419
            print(f"Could not extract image {name}: {e}")
420
        except pikepdf.InvalidPdfImageError as e:
421
            print(f"Invalid image data for {name}: {e}")
422

423
print(f"Total images extracted: {image_count}")
424
pdf.close()
425
```
426

427
### Converting Images to PIL Format
428

429
```python
430
import pikepdf
431
from PIL import Image, ImageEnhance
432

433
pdf = pikepdf.open('document_with_images.pdf')
434

435
for page_num, page in enumerate(pdf.pages):
436
    page_images = page.images
437
    
438
    for name, pdf_image in page_images.items():
439
        try:
440
            # Convert to PIL Image
441
            pil_image = pdf_image.as_pil_image()
442
            
443
            # Apply image processing
444
            if pil_image.mode == 'RGB':
445
                # Enhance brightness
446
                enhancer = ImageEnhance.Brightness(pil_image)
447
                enhanced = enhancer.enhance(1.2)
448
                
449
                # Save processed image
450
                output_path = f'processed_page{page_num+1}_{name}.png'
451
                enhanced.save(output_path)
452
                
453
                print(f"Processed and saved: {output_path}")
454
                
455
        except pikepdf.DependencyError:
456
            print("PIL (Pillow) not installed - cannot convert to PIL format")
457
        except Exception as e:
458
            print(f"Error processing image {name}: {e}")
459

460
pdf.close()
461
```
462

463
### Analyzing Image Properties
464

465
```python
466
import pikepdf
467

468
pdf = pikepdf.open('document_with_images.pdf')
469

470
# Collect image statistics
471
image_stats = {
472
    'total_images': 0,
473
    'by_colorspace': {},
474
    'by_filter': {},
475
    'by_dimensions': [],
476
    'total_size_bytes': 0
477
}
478

479
for page in pdf.pages:
480
    page_images = page.images
481
    
482
    for name, image in page_images.items():
483
        image_stats['total_images'] += 1
484
        
485
        # Color space statistics
486
        colorspace = str(image.colorspace)
487
        image_stats['by_colorspace'][colorspace] = \
488
            image_stats['by_colorspace'].get(colorspace, 0) + 1
489
        
490
        # Filter statistics  
491
        for filter_name in image.filters:
492
            filter_str = str(filter_name)
493
            image_stats['by_filter'][filter_str] = \
494
                image_stats['by_filter'].get(filter_str, 0) + 1
495
        
496
        # Dimension statistics
497
        dimensions = (image.width, image.height)
498
        image_stats['by_dimensions'].append(dimensions)
499
        
500
        # Size estimation (from stream length)
501
        if hasattr(image.obj, 'Length'):
502
            image_stats['total_size_bytes'] += int(image.obj.Length)
503
        
504
        # Detailed image info
505
        print(f"Image {name}:")
506
        print(f"  Dimensions: {image.width} x {image.height}")
507
        print(f"  Bits per component: {image.bpc}")
508
        print(f"  Color space: {image.colorspace}")
509
        print(f"  Filters: {image.filters}")
510
        print(f"  Is mask: {image.image_mask}")
511
        
512
        if image.mask:
513
            print(f"  Has mask/transparency")
514
        if image.palette:
515
            print(f"  Has color palette")
516

517
# Print summary statistics
518
print("\n=== Image Statistics ===")
519
print(f"Total images: {image_stats['total_images']}")
520
print(f"Total estimated size: {image_stats['total_size_bytes'] / 1024:.1f} KB")
521

522
print("\nColor spaces:")
523
for cs, count in image_stats['by_colorspace'].items():
524
    print(f"  {cs}: {count}")
525

526
print("\nCompression filters:")
527
for filter_name, count in image_stats['by_filter'].items():
528
    print(f"  {filter_name}: {count}")
529

530
# Find most common dimensions
531
if image_stats['by_dimensions']:
532
    from collections import Counter
533
    dimension_counts = Counter(image_stats['by_dimensions'])
534
    print(f"\nMost common dimensions:")
535
    for dims, count in dimension_counts.most_common(3):
536
        print(f"  {dims[0]}x{dims[1]}: {count} images")
537

538
pdf.close()
539
```
540

541
### Working with Inline Images
542

543
```python
544
import pikepdf
545

546
pdf = pikepdf.open('document.pdf')
547

548
for page_num, page in enumerate(pdf.pages):
549
    # Parse content stream to find inline images
550
    instructions = page.parse_contents()
551
    
552
    inline_image_count = 0
553
    
554
    for instruction in instructions:
555
        if isinstance(instruction, pikepdf.ContentStreamInlineImage):
556
            inline_image = instruction.iimage
557
            
558
            try:
559
                # Convert inline image to PIL
560
                pil_image = inline_image.as_pil_image()
561
                
562
                # Save inline image
563
                filename = f'page{page_num+1}_inline{inline_image_count}.png'
564
                pil_image.save(filename)
565
                
566
                print(f"Saved inline image: {filename}")
567
                print(f"  Size: {inline_image.width} x {inline_image.height}")
568
                print(f"  Color space: {inline_image.colorspace}")
569
                
570
                inline_image_count += 1
571
                
572
            except Exception as e:
573
                print(f"Could not process inline image: {e}")
574
    
575
    if inline_image_count > 0:
576
        print(f"Page {page_num+1}: Found {inline_image_count} inline images")
577

578
pdf.close()
579
```
580

581
### Image Replacement and Manipulation
582

583
```python
584
import pikepdf
585
from PIL import Image
586

587
pdf = pikepdf.open('document.pdf')
588
page = pdf.pages[0]
589

590
# Create a new image to insert
591
new_image = Image.new('RGB', (200, 100), color='red')
592
new_image_path = 'replacement.png'
593
new_image.save(new_image_path)
594

595
# Create PDF image from file
596
with open(new_image_path, 'rb') as f:
597
    image_data = f.read()
598

599
# Create image stream
600
image_stream = pikepdf.Stream(pdf, image_data)
601
image_stream.dictionary.update({
602
    '/Type': pikepdf.Name.XObject,
603
    '/Subtype': pikepdf.Name.Image,
604
    '/Width': 200,
605
    '/Height': 100,
606
    '/ColorSpace': pikepdf.Name.DeviceRGB,
607
    '/BitsPerComponent': 8,
608
    '/Filter': pikepdf.Name.DCTDecode  # JPEG compression
609
})
610

611
# Add image to page resources
612
if '/Resources' not in page:
613
    page['/Resources'] = pikepdf.Dictionary()
614
if '/XObject' not in page['/Resources']:
615
    page['/Resources']['/XObject'] = pikepdf.Dictionary()
616

617
# Add image with name
618
image_name = '/NewImage'
619
page['/Resources']['/XObject'][image_name] = image_stream
620

621
# Create content stream to display the image
622
content = f"""
623
q
624
200 0 0 100 50 700 cm
625
{image_name} Do
626
Q
627
"""
628

629
# Add to page content
630
if '/Contents' in page:
631
    existing_content = page['/Contents']
632
    if isinstance(existing_content, pikepdf.Array):
633
        new_stream = pikepdf.Stream(pdf, content.encode())
634
        existing_content.append(new_stream)
635
    else:
636
        # Convert single stream to array
637
        page['/Contents'] = pikepdf.Array([existing_content])
638
        new_stream = pikepdf.Stream(pdf, content.encode())
639
        page['/Contents'].append(new_stream)
640
else:
641
    page['/Contents'] = pikepdf.Stream(pdf, content.encode())
642

643
pdf.save('document_with_new_image.pdf')
644
pdf.close()
645
```
646

647
### Advanced Image Processing
648

649
```python
650
import pikepdf
651
from PIL import Image, ImageFilter, ImageOps
652

653
def process_pdf_images(input_pdf, output_pdf):
654
    """Process all images in a PDF with various filters."""
655
    
656
    pdf = pikepdf.open(input_pdf)
657
    
658
    for page_num, page in enumerate(pdf.pages):
659
        page_images = page.images
660
        
661
        for name, pdf_image in page_images.items():
662
            try:
663
                # Convert to PIL for processing
664
                pil_image = pdf_image.as_pil_image()
665
                
666
                # Apply various image enhancements
667
                if pil_image.mode == 'RGB':
668
                    # Apply unsharp mask for clarity
669
                    enhanced = pil_image.filter(ImageFilter.UnsharpMask(
670
                        radius=1, percent=150, threshold=3
671
                    ))
672
                    
673
                    # Auto-contrast adjustment
674
                    enhanced = ImageOps.autocontrast(enhanced, cutoff=1)
675
                    
676
                elif pil_image.mode == 'L':  # Grayscale
677
                    # Enhance contrast for grayscale images
678
                    enhanced = ImageOps.autocontrast(pil_image, cutoff=2)
679
                    
680
                else:
681
                    # Skip images we can't enhance
682
                    continue
683
                
684
                # Convert back to PDF format
685
                # Note: This is simplified - real implementation would need
686
                # to properly encode the image and update the PDF stream
687
                temp_path = f'temp_enhanced_{name}.png'
688
                enhanced.save(temp_path, optimize=True)
689
                
690
                print(f"Enhanced image {name} on page {page_num+1}")
691
                
692
                # Clean up temp file
693
                import os
694
                os.unlink(temp_path)
695
                
696
            except Exception as e:
697
                print(f"Could not enhance image {name}: {e}")
698
    
699
    pdf.save(output_pdf)
700
    pdf.close()
701

702
# Usage
703
process_pdf_images('input.pdf', 'enhanced_output.pdf')
704
```
705

706
### Image Format Conversion
707

708
```python
709
import pikepdf
710
from PIL import Image
711

712
def convert_pdf_images_to_format(pdf_path, output_format='PNG'):
713
    """Convert all PDF images to a specific format."""
714
    
715
    pdf = pikepdf.open(pdf_path)
716
    converted_images = []
717
    
718
    for page_num, page in enumerate(pdf.pages):
719
        page_images = page.images
720
        
721
        for name, pdf_image in page_images.items():
722
            try:
723
                # Convert to PIL
724
                pil_image = pdf_image.as_pil_image()
725
                
726
                # Determine output filename
727
                base_name = f'page{page_num+1}_{name}'
728
                output_path = f'{base_name}.{output_format.lower()}'
729
                
730
                # Convert and save
731
                if output_format.upper() == 'JPEG' and pil_image.mode == 'RGBA':
732
                    # Convert RGBA to RGB for JPEG
733
                    rgb_image = Image.new('RGB', pil_image.size, (255, 255, 255))
734
                    rgb_image.paste(pil_image, mask=pil_image.split()[-1])
735
                    rgb_image.save(output_path, format=output_format, quality=95)
736
                else:
737
                    pil_image.save(output_path, format=output_format)
738
                
739
                converted_images.append(output_path)
740
                print(f"Converted {name} to {output_path}")
741
                
742
            except Exception as e:
743
                print(f"Could not convert image {name}: {e}")
744
    
745
    pdf.close()
746
    return converted_images
747

748
# Convert all images to PNG
749
png_files = convert_pdf_images_to_format('document.pdf', 'PNG')
750
print(f"Converted {len(png_files)} images to PNG format")
751
```

Version

Tile

Files

images.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

images.mddocs/