Tessl Tile for pypi/pikepdf@9.10.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

advanced.md attachments.md content-streams.md core-operations.md encryption.md forms.md images.md index.md metadata.md objects.md outlines.md pages.md

objects.mddocs/

0
# PDF Objects and Data Types
1

2
PDF object types and data structures that form the foundation of PDF content representation. These classes provide the building blocks for manipulating PDF data at the object level.
3

4
## Capabilities
5

6
### Base Object Class
7

8
The fundamental PDF object type that all other PDF objects inherit from, providing common functionality for object manipulation and ownership.
9

10
```python { .api }
11
class Object:
12
    """
13
    Universal PDF object type representing any PDF data structure.
14
    
15
    All PDF objects (arrays, dictionaries, names, etc.) derive from this class.
16
    """
17
    
18
    def is_owned_by(self, possible_owner: Pdf) -> bool:
19
        """
20
        Check if this object is owned by a specific PDF.
21
        
22
        Parameters:
23
        - possible_owner (Pdf): PDF to check ownership against
24
        
25
        Returns:
26
        bool: True if this object belongs to the specified PDF
27
        """
28
    
29
    def same_owner_as(self, other: Object) -> bool:
30
        """
31
        Check if this object has the same owner as another object.
32
        
33
        Parameters:
34
        - other (Object): Object to compare ownership with
35
        
36
        Returns:
37
        bool: True if both objects have the same owner
38
        """
39
    
40
    def with_same_owner_as(self, other: Object) -> Object:
41
        """
42
        Return a copy of this object owned by the same PDF as another object.
43
        
44
        Parameters:
45
        - other (Object): Object whose owner should be used
46
        
47
        Returns:
48
        Object: Copy of this object with the same owner as other
49
        
50
        Raises:
51
        ForeignObjectError: If objects cannot be made compatible
52
        """
53
    
54
    @staticmethod
55
    def parse(data: str, *, pdf_context: Pdf = None) -> Object:
56
        """
57
        Parse a string representation of PDF data into an Object.
58
        
59
        Parameters:
60
        - data (str): String containing PDF object data
61
        - pdf_context (Pdf, optional): PDF context for parsing
62
        
63
        Returns:
64
        Object: Parsed PDF object
65
        
66
        Raises:
67
        PdfError: If the data cannot be parsed
68
        """
69
    
70
    def unparse(self, *, resolved: bool = False) -> str:
71
        """
72
        Convert the object back to its string representation.
73
        
74
        Parameters:
75
        - resolved (bool): Whether to resolve indirect references
76
        
77
        Returns:
78
        str: String representation of the object
79
        """
80
    
81
    @property
82
    def _type_code(self) -> ObjectType:
83
        """
84
        The object's type code.
85
        
86
        Returns:
87
        ObjectType: Enumeration value indicating the object type
88
        """
89
    
90
    @property
91
    def is_indirect(self) -> bool:
92
        """
93
        Whether this is an indirect object.
94
        
95
        Returns:
96
        bool: True if this is an indirect object reference
97
        """
98
    
99
    @property
100
    def objgen(self) -> tuple[int, int]:
101
        """
102
        Object and generation numbers for indirect objects.
103
        
104
        Returns:
105
        tuple[int, int]: (object_number, generation_number) or (0, 0) for direct objects
106
        """
107
```
108

109
### Array Objects
110

111
PDF arrays represent ordered collections of PDF objects, similar to Python lists.
112

113
```python { .api }
114
class Array(Object):
115
    """
116
    PDF array object representing an ordered list of PDF objects.
117
    
118
    Behaves like a Python list with additional PDF-specific functionality.
119
    """
120
    
121
    def __init__(self, iterable=None) -> None:
122
        """
123
        Create a new PDF array.
124
        
125
        Parameters:
126
        - iterable (optional): Initial objects to populate the array
127
        """
128
    
129
    def __len__(self) -> int:
130
        """Return the number of elements in the array."""
131
    
132
    def __getitem__(self, index: int) -> Object:
133
        """Get an element by index."""
134
    
135
    def __setitem__(self, index: int, value: Object) -> None:
136
        """Set an element at the given index."""
137
    
138
    def append(self, obj: Object) -> None:
139
        """
140
        Add an object to the end of the array.
141
        
142
        Parameters:
143
        - obj (Object): Object to append
144
        """
145
    
146
    def extend(self, iterable) -> None:
147
        """
148
        Extend the array with objects from an iterable.
149
        
150
        Parameters:
151
        - iterable: Objects to add to the array
152
        """
153
    
154
    def insert(self, index: int, obj: Object) -> None:
155
        """
156
        Insert an object at the specified index.
157
        
158
        Parameters:
159
        - index (int): Position to insert at
160
        - obj (Object): Object to insert
161
        """
162
```
163

164
### Dictionary Objects
165

166
PDF dictionaries represent key-value mappings where keys are Name objects and values are any PDF objects.
167

168
```python { .api }
169
class Dictionary(Object):
170
    """
171
    PDF dictionary object representing key-value mappings.
172
    
173
    Keys must be Name objects, values can be any PDF objects.
174
    Behaves like a Python dictionary with PDF-specific enhancements.
175
    """
176
    
177
    def __init__(self, mapping=None, **kwargs) -> None:
178
        """
179
        Create a new PDF dictionary.
180
        
181
        Parameters:
182
        - mapping (optional): Initial key-value pairs
183
        - **kwargs: Additional key-value pairs (keys converted to Names)
184
        """
185
    
186
    def __getitem__(self, key) -> Object:
187
        """Get a value by key (key can be str or Name)."""
188
    
189
    def __setitem__(self, key, value: Object) -> None:
190
        """Set a key-value pair (key converted to Name if needed)."""
191
    
192
    def __contains__(self, key) -> bool:
193
        """Check if key exists in dictionary."""
194
    
195
    def __len__(self) -> int:
196
        """Return number of key-value pairs."""
197
    
198
    def keys(self):
199
        """Return dictionary keys as Name objects."""
200
    
201
    def values(self):
202
        """Return dictionary values."""
203
    
204
    def items(self):
205
        """Return key-value pairs."""
206
    
207
    def get(self, key, default=None) -> Object:
208
        """
209
        Get a value with optional default.
210
        
211
        Parameters:
212
        - key: Dictionary key (str or Name)
213
        - default: Default value if key not found
214
        
215
        Returns:
216
        Object: Value associated with key, or default
217
        """
218
```
219

220
### Name Objects
221

222
PDF names are atomic identifiers used as dictionary keys and various PDF constants.
223

224
```python { .api }
225
class Name(Object):
226
    """
227
    PDF name object representing an immutable identifier.
228
    
229
    Names are used as dictionary keys and PDF constants.
230
    Supports both string construction and attribute-style access.
231
    """
232
    
233
    def __init__(self, name_string: str) -> None:
234
        """
235
        Create a PDF name from a string.
236
        
237
        Parameters:
238
        - name_string (str): String representation of the name
239
        """
240
    
241
    def __str__(self) -> str:
242
        """Return string representation without leading slash."""
243
    
244
    def __repr__(self) -> str:
245
        """Return full representation including leading slash."""
246
    
247
    def __eq__(self, other) -> bool:
248
        """Compare names for equality."""
249
    
250
    def __hash__(self) -> int:
251
        """Return hash for use as dictionary key."""
252

253
# Name constants can be accessed as attributes
254
# Example: Name.Type, Name.Font, Name.Contents
255
```
256

257
### String Objects
258

259
PDF strings can contain text or binary data with proper encoding handling.
260

261
```python { .api }
262
class String(Object):
263
    """
264
    PDF string object for text or binary data.
265
    
266
    Handles PDF string encoding including literal strings and hex strings.
267
    """
268
    
269
    def __init__(self, str_or_bytes) -> None:
270
        """
271
        Create a PDF string from text or bytes.
272
        
273
        Parameters:
274
        - str_or_bytes (str | bytes): String content
275
        """
276
    
277
    def __str__(self) -> str:
278
        """Return string content as text."""
279
    
280
    def __bytes__(self) -> bytes:
281
        """Return string content as bytes."""
282
    
283
    def __len__(self) -> int:
284
        """Return length of string content."""
285
    
286
    @property
287
    def for_pdf(self) -> str:
288
        """
289
        String representation suitable for PDF output.
290
        
291
        Returns:
292
        str: Properly escaped string for PDF files
293
        """
294
```
295

296
### Stream Objects
297

298
PDF streams contain both a dictionary of metadata and binary data content.
299

300
```python { .api }
301
class Stream(Object):
302
    """
303
    PDF stream object containing dictionary metadata and binary data.
304
    
305
    Streams are used for page content, images, fonts, and other binary data.
306
    """
307
    
308
    def __init__(self, owner: Pdf, data=None, dict=None, **kwargs) -> None:
309
        """
310
        Create a new PDF stream.
311
        
312
        Parameters:
313
        - owner (Pdf): PDF that will own this stream
314
        - data (bytes, optional): Stream data content
315
        - dict (Dictionary, optional): Stream dictionary
316
        - **kwargs: Additional dictionary entries
317
        """
318
    
319
    @property
320
    def dictionary(self) -> Dictionary:
321
        """
322
        The stream's dictionary containing metadata.
323
        
324
        Returns:
325
        Dictionary: Stream metadata and parameters
326
        """
327
    
328
    def read_bytes(self) -> bytes:
329
        """
330
        Read the stream's data as bytes.
331
        
332
        Returns:
333
        bytes: Decoded stream data
334
        
335
        Raises:
336
        DataDecodingError: If stream cannot be decoded
337
        """
338
    
339
    def read_raw_bytes(self) -> bytes:
340
        """
341
        Read the stream's raw (unfiltered) data.
342
        
343
        Returns:
344
        bytes: Raw stream data without decoding filters
345
        """
346
    
347
    def write(self, data: bytes, *, filter=None, decode_parms=None) -> None:
348
        """
349
        Write data to the stream.
350
        
351
        Parameters:
352
        - data (bytes): Data to write
353
        - filter (optional): Compression filter to apply
354
        - decode_parms (optional): Filter parameters
355
        """
356
```
357

358
### Operator Objects
359

360
PDF operators represent content stream commands and their operands.
361

362
```python { .api }
363
class Operator(Object):
364
    """
365
    PDF content stream operator.
366
    
367
    Represents commands in PDF content streams like 'Tj' (show text) or 'l' (line to).
368
    """
369
    
370
    def __init__(self, name: str) -> None:
371
        """
372
        Create a PDF operator.
373
        
374
        Parameters:
375
        - name (str): Operator name (e.g., 'Tj', 'cm', 'Do')
376
        """
377
    
378
    def __str__(self) -> str:
379
        """Return operator name."""
380
    
381
    def __repr__(self) -> str:
382
        """Return full representation."""
383
```
384

385
### Object Type Enumeration
386

387
Enumeration of all possible PDF object types for type checking and identification.
388

389
```python { .api }
390
from enum import Enum
391

392
class ObjectType(Enum):
393
    """Enumeration of PDF object types."""
394
    uninitialized = ...  # Uninitialized object
395
    reserved = ...  # Reserved type
396
    null = ...  # Null object
397
    boolean = ...  # Boolean true/false
398
    integer = ...  # Integer number
399
    real = ...  # Real (floating-point) number
400
    string = ...  # String object
401
    name_ = ...  # Name object (underscore avoids conflict with 'name')
402
    array = ...  # Array object
403
    dictionary = ...  # Dictionary object
404
    stream = ...  # Stream object
405
    operator = ...  # Content stream operator
406
    inlineimage = ...  # Inline image
407
```
408

409
## Usage Examples
410

411
### Working with Arrays
412

413
```python
414
import pikepdf
415

416
pdf = pikepdf.new()
417

418
# Create an array
419
arr = pikepdf.Array([1, 2, 3])
420

421
# Add elements
422
arr.append(pikepdf.String("hello"))
423
arr.extend([pikepdf.Name.Type, pikepdf.Name.Font])
424

425
# Access elements
426
first = arr[0]  # Integer 1
427
last = arr[-1]  # Name(/Font)
428

429
# Use in dictionary
430
dict_obj = pikepdf.Dictionary({
431
    '/Contents': arr,
432
    '/Type': pikepdf.Name.Page
433
})
434
```
435

436
### Working with Dictionaries
437

438
```python
439
import pikepdf
440

441
# Create a dictionary
442
page_dict = pikepdf.Dictionary({
443
    '/Type': pikepdf.Name.Page,
444
    '/MediaBox': pikepdf.Array([0, 0, 612, 792]),
445
    '/Resources': pikepdf.Dictionary()
446
})
447

448
# Access values
449
page_type = page_dict['/Type']  # Name(/Page)
450
media_box = page_dict['/MediaBox']  # Array
451

452
# Add new entries
453
page_dict['/Rotate'] = 90
454
page_dict['/Contents'] = pikepdf.Array()
455

456
# Check for keys
457
if '/Resources' in page_dict:
458
    resources = page_dict['/Resources']
459
```
460

461
### Working with Names
462

463
```python
464
import pikepdf
465

466
# Create names
467
type_name = pikepdf.Name.Type
468
page_name = pikepdf.Name.Page
469
custom_name = pikepdf.Name('/CustomAttribute')
470

471
# Names can be compared
472
if type_name == pikepdf.Name.Type:
473
    print("Names are equal")
474

475
# Use in dictionaries
476
metadata = {
477
    type_name: page_name,
478
    pikepdf.Name.MediaBox: pikepdf.Array([0, 0, 612, 792])
479
}
480
```
481

482
### Working with Strings
483

484
```python
485
import pikepdf
486

487
# Create strings
488
title = pikepdf.String("Document Title")
489
binary_data = pikepdf.String(b'\x00\x01\x02\x03')
490

491
# Convert between representations
492
text_content = str(title)  # "Document Title"
493
byte_content = bytes(binary_data)  # b'\x00\x01\x02\x03'
494

495
# Use in document info
496
pdf = pikepdf.new()
497
pdf.docinfo['/Title'] = title
498
pdf.docinfo['/Author'] = pikepdf.String("Jane Doe")
499
```
500

501
### Working with Streams
502

503
```python
504
import pikepdf
505

506
pdf = pikepdf.new()
507

508
# Create a stream with text content
509
content_data = b"BT /F1 12 Tf 100 700 Td (Hello World) Tj ET"
510
content_stream = pikepdf.Stream(pdf, content_data)
511

512
# Set stream properties
513
content_stream.dictionary['/Length'] = len(content_data)
514

515
# Read stream data
516
data = content_stream.read_bytes()
517
raw_data = content_stream.read_raw_bytes()
518

519
# Use stream in a page
520
page = pdf.add_blank_page()
521
page['/Contents'] = content_stream
522
```
523

524
### Object Copying and Ownership
525

526
```python
527
import pikepdf
528

529
# Open two PDFs
530
pdf1 = pikepdf.open('source.pdf')
531
pdf2 = pikepdf.new()
532

533
# Copy object from one PDF to another
534
source_obj = pdf1.pages[0]['/Resources']
535
copied_obj = pdf2.copy_foreign(source_obj)
536

537
# Check ownership
538
assert copied_obj.is_owned_by(pdf2)
539
assert not copied_obj.is_owned_by(pdf1)
540

541
# Make object indirect
542
indirect_obj = pdf2.make_indirect(copied_obj)
543
obj_id, generation = indirect_obj.objgen
544
```

Version

Tile

Files

objects.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

objects.mddocs/