or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

advanced.mdattachments.mdcontent-streams.mdcore-operations.mdencryption.mdforms.mdimages.mdindex.mdmetadata.mdobjects.mdoutlines.mdpages.md

objects.mddocs/

0

# PDF Objects and Data Types

1

2

PDF object types and data structures that form the foundation of PDF content representation. These classes provide the building blocks for manipulating PDF data at the object level.

3

4

## Capabilities

5

6

### Base Object Class

7

8

The fundamental PDF object type that all other PDF objects inherit from, providing common functionality for object manipulation and ownership.

9

10

```python { .api }

11

class Object:

12

"""

13

Universal PDF object type representing any PDF data structure.

14

15

All PDF objects (arrays, dictionaries, names, etc.) derive from this class.

16

"""

17

18

def is_owned_by(self, possible_owner: Pdf) -> bool:

19

"""

20

Check if this object is owned by a specific PDF.

21

22

Parameters:

23

- possible_owner (Pdf): PDF to check ownership against

24

25

Returns:

26

bool: True if this object belongs to the specified PDF

27

"""

28

29

def same_owner_as(self, other: Object) -> bool:

30

"""

31

Check if this object has the same owner as another object.

32

33

Parameters:

34

- other (Object): Object to compare ownership with

35

36

Returns:

37

bool: True if both objects have the same owner

38

"""

39

40

def with_same_owner_as(self, other: Object) -> Object:

41

"""

42

Return a copy of this object owned by the same PDF as another object.

43

44

Parameters:

45

- other (Object): Object whose owner should be used

46

47

Returns:

48

Object: Copy of this object with the same owner as other

49

50

Raises:

51

ForeignObjectError: If objects cannot be made compatible

52

"""

53

54

@staticmethod

55

def parse(data: str, *, pdf_context: Pdf = None) -> Object:

56

"""

57

Parse a string representation of PDF data into an Object.

58

59

Parameters:

60

- data (str): String containing PDF object data

61

- pdf_context (Pdf, optional): PDF context for parsing

62

63

Returns:

64

Object: Parsed PDF object

65

66

Raises:

67

PdfError: If the data cannot be parsed

68

"""

69

70

def unparse(self, *, resolved: bool = False) -> str:

71

"""

72

Convert the object back to its string representation.

73

74

Parameters:

75

- resolved (bool): Whether to resolve indirect references

76

77

Returns:

78

str: String representation of the object

79

"""

80

81

@property

82

def _type_code(self) -> ObjectType:

83

"""

84

The object's type code.

85

86

Returns:

87

ObjectType: Enumeration value indicating the object type

88

"""

89

90

@property

91

def is_indirect(self) -> bool:

92

"""

93

Whether this is an indirect object.

94

95

Returns:

96

bool: True if this is an indirect object reference

97

"""

98

99

@property

100

def objgen(self) -> tuple[int, int]:

101

"""

102

Object and generation numbers for indirect objects.

103

104

Returns:

105

tuple[int, int]: (object_number, generation_number) or (0, 0) for direct objects

106

"""

107

```

108

109

### Array Objects

110

111

PDF arrays represent ordered collections of PDF objects, similar to Python lists.

112

113

```python { .api }

114

class Array(Object):

115

"""

116

PDF array object representing an ordered list of PDF objects.

117

118

Behaves like a Python list with additional PDF-specific functionality.

119

"""

120

121

def __init__(self, iterable=None) -> None:

122

"""

123

Create a new PDF array.

124

125

Parameters:

126

- iterable (optional): Initial objects to populate the array

127

"""

128

129

def __len__(self) -> int:

130

"""Return the number of elements in the array."""

131

132

def __getitem__(self, index: int) -> Object:

133

"""Get an element by index."""

134

135

def __setitem__(self, index: int, value: Object) -> None:

136

"""Set an element at the given index."""

137

138

def append(self, obj: Object) -> None:

139

"""

140

Add an object to the end of the array.

141

142

Parameters:

143

- obj (Object): Object to append

144

"""

145

146

def extend(self, iterable) -> None:

147

"""

148

Extend the array with objects from an iterable.

149

150

Parameters:

151

- iterable: Objects to add to the array

152

"""

153

154

def insert(self, index: int, obj: Object) -> None:

155

"""

156

Insert an object at the specified index.

157

158

Parameters:

159

- index (int): Position to insert at

160

- obj (Object): Object to insert

161

"""

162

```

163

164

### Dictionary Objects

165

166

PDF dictionaries represent key-value mappings where keys are Name objects and values are any PDF objects.

167

168

```python { .api }

169

class Dictionary(Object):

170

"""

171

PDF dictionary object representing key-value mappings.

172

173

Keys must be Name objects, values can be any PDF objects.

174

Behaves like a Python dictionary with PDF-specific enhancements.

175

"""

176

177

def __init__(self, mapping=None, **kwargs) -> None:

178

"""

179

Create a new PDF dictionary.

180

181

Parameters:

182

- mapping (optional): Initial key-value pairs

183

- **kwargs: Additional key-value pairs (keys converted to Names)

184

"""

185

186

def __getitem__(self, key) -> Object:

187

"""Get a value by key (key can be str or Name)."""

188

189

def __setitem__(self, key, value: Object) -> None:

190

"""Set a key-value pair (key converted to Name if needed)."""

191

192

def __contains__(self, key) -> bool:

193

"""Check if key exists in dictionary."""

194

195

def __len__(self) -> int:

196

"""Return number of key-value pairs."""

197

198

def keys(self):

199

"""Return dictionary keys as Name objects."""

200

201

def values(self):

202

"""Return dictionary values."""

203

204

def items(self):

205

"""Return key-value pairs."""

206

207

def get(self, key, default=None) -> Object:

208

"""

209

Get a value with optional default.

210

211

Parameters:

212

- key: Dictionary key (str or Name)

213

- default: Default value if key not found

214

215

Returns:

216

Object: Value associated with key, or default

217

"""

218

```

219

220

### Name Objects

221

222

PDF names are atomic identifiers used as dictionary keys and various PDF constants.

223

224

```python { .api }

225

class Name(Object):

226

"""

227

PDF name object representing an immutable identifier.

228

229

Names are used as dictionary keys and PDF constants.

230

Supports both string construction and attribute-style access.

231

"""

232

233

def __init__(self, name_string: str) -> None:

234

"""

235

Create a PDF name from a string.

236

237

Parameters:

238

- name_string (str): String representation of the name

239

"""

240

241

def __str__(self) -> str:

242

"""Return string representation without leading slash."""

243

244

def __repr__(self) -> str:

245

"""Return full representation including leading slash."""

246

247

def __eq__(self, other) -> bool:

248

"""Compare names for equality."""

249

250

def __hash__(self) -> int:

251

"""Return hash for use as dictionary key."""

252

253

# Name constants can be accessed as attributes

254

# Example: Name.Type, Name.Font, Name.Contents

255

```

256

257

### String Objects

258

259

PDF strings can contain text or binary data with proper encoding handling.

260

261

```python { .api }

262

class String(Object):

263

"""

264

PDF string object for text or binary data.

265

266

Handles PDF string encoding including literal strings and hex strings.

267

"""

268

269

def __init__(self, str_or_bytes) -> None:

270

"""

271

Create a PDF string from text or bytes.

272

273

Parameters:

274

- str_or_bytes (str | bytes): String content

275

"""

276

277

def __str__(self) -> str:

278

"""Return string content as text."""

279

280

def __bytes__(self) -> bytes:

281

"""Return string content as bytes."""

282

283

def __len__(self) -> int:

284

"""Return length of string content."""

285

286

@property

287

def for_pdf(self) -> str:

288

"""

289

String representation suitable for PDF output.

290

291

Returns:

292

str: Properly escaped string for PDF files

293

"""

294

```

295

296

### Stream Objects

297

298

PDF streams contain both a dictionary of metadata and binary data content.

299

300

```python { .api }

301

class Stream(Object):

302

"""

303

PDF stream object containing dictionary metadata and binary data.

304

305

Streams are used for page content, images, fonts, and other binary data.

306

"""

307

308

def __init__(self, owner: Pdf, data=None, dict=None, **kwargs) -> None:

309

"""

310

Create a new PDF stream.

311

312

Parameters:

313

- owner (Pdf): PDF that will own this stream

314

- data (bytes, optional): Stream data content

315

- dict (Dictionary, optional): Stream dictionary

316

- **kwargs: Additional dictionary entries

317

"""

318

319

@property

320

def dictionary(self) -> Dictionary:

321

"""

322

The stream's dictionary containing metadata.

323

324

Returns:

325

Dictionary: Stream metadata and parameters

326

"""

327

328

def read_bytes(self) -> bytes:

329

"""

330

Read the stream's data as bytes.

331

332

Returns:

333

bytes: Decoded stream data

334

335

Raises:

336

DataDecodingError: If stream cannot be decoded

337

"""

338

339

def read_raw_bytes(self) -> bytes:

340

"""

341

Read the stream's raw (unfiltered) data.

342

343

Returns:

344

bytes: Raw stream data without decoding filters

345

"""

346

347

def write(self, data: bytes, *, filter=None, decode_parms=None) -> None:

348

"""

349

Write data to the stream.

350

351

Parameters:

352

- data (bytes): Data to write

353

- filter (optional): Compression filter to apply

354

- decode_parms (optional): Filter parameters

355

"""

356

```

357

358

### Operator Objects

359

360

PDF operators represent content stream commands and their operands.

361

362

```python { .api }

363

class Operator(Object):

364

"""

365

PDF content stream operator.

366

367

Represents commands in PDF content streams like 'Tj' (show text) or 'l' (line to).

368

"""

369

370

def __init__(self, name: str) -> None:

371

"""

372

Create a PDF operator.

373

374

Parameters:

375

- name (str): Operator name (e.g., 'Tj', 'cm', 'Do')

376

"""

377

378

def __str__(self) -> str:

379

"""Return operator name."""

380

381

def __repr__(self) -> str:

382

"""Return full representation."""

383

```

384

385

### Object Type Enumeration

386

387

Enumeration of all possible PDF object types for type checking and identification.

388

389

```python { .api }

390

from enum import Enum

391

392

class ObjectType(Enum):

393

"""Enumeration of PDF object types."""

394

uninitialized = ... # Uninitialized object

395

reserved = ... # Reserved type

396

null = ... # Null object

397

boolean = ... # Boolean true/false

398

integer = ... # Integer number

399

real = ... # Real (floating-point) number

400

string = ... # String object

401

name_ = ... # Name object (underscore avoids conflict with 'name')

402

array = ... # Array object

403

dictionary = ... # Dictionary object

404

stream = ... # Stream object

405

operator = ... # Content stream operator

406

inlineimage = ... # Inline image

407

```

408

409

## Usage Examples

410

411

### Working with Arrays

412

413

```python

414

import pikepdf

415

416

pdf = pikepdf.new()

417

418

# Create an array

419

arr = pikepdf.Array([1, 2, 3])

420

421

# Add elements

422

arr.append(pikepdf.String("hello"))

423

arr.extend([pikepdf.Name.Type, pikepdf.Name.Font])

424

425

# Access elements

426

first = arr[0] # Integer 1

427

last = arr[-1] # Name(/Font)

428

429

# Use in dictionary

430

dict_obj = pikepdf.Dictionary({

431

'/Contents': arr,

432

'/Type': pikepdf.Name.Page

433

})

434

```

435

436

### Working with Dictionaries

437

438

```python

439

import pikepdf

440

441

# Create a dictionary

442

page_dict = pikepdf.Dictionary({

443

'/Type': pikepdf.Name.Page,

444

'/MediaBox': pikepdf.Array([0, 0, 612, 792]),

445

'/Resources': pikepdf.Dictionary()

446

})

447

448

# Access values

449

page_type = page_dict['/Type'] # Name(/Page)

450

media_box = page_dict['/MediaBox'] # Array

451

452

# Add new entries

453

page_dict['/Rotate'] = 90

454

page_dict['/Contents'] = pikepdf.Array()

455

456

# Check for keys

457

if '/Resources' in page_dict:

458

resources = page_dict['/Resources']

459

```

460

461

### Working with Names

462

463

```python

464

import pikepdf

465

466

# Create names

467

type_name = pikepdf.Name.Type

468

page_name = pikepdf.Name.Page

469

custom_name = pikepdf.Name('/CustomAttribute')

470

471

# Names can be compared

472

if type_name == pikepdf.Name.Type:

473

print("Names are equal")

474

475

# Use in dictionaries

476

metadata = {

477

type_name: page_name,

478

pikepdf.Name.MediaBox: pikepdf.Array([0, 0, 612, 792])

479

}

480

```

481

482

### Working with Strings

483

484

```python

485

import pikepdf

486

487

# Create strings

488

title = pikepdf.String("Document Title")

489

binary_data = pikepdf.String(b'\x00\x01\x02\x03')

490

491

# Convert between representations

492

text_content = str(title) # "Document Title"

493

byte_content = bytes(binary_data) # b'\x00\x01\x02\x03'

494

495

# Use in document info

496

pdf = pikepdf.new()

497

pdf.docinfo['/Title'] = title

498

pdf.docinfo['/Author'] = pikepdf.String("Jane Doe")

499

```

500

501

### Working with Streams

502

503

```python

504

import pikepdf

505

506

pdf = pikepdf.new()

507

508

# Create a stream with text content

509

content_data = b"BT /F1 12 Tf 100 700 Td (Hello World) Tj ET"

510

content_stream = pikepdf.Stream(pdf, content_data)

511

512

# Set stream properties

513

content_stream.dictionary['/Length'] = len(content_data)

514

515

# Read stream data

516

data = content_stream.read_bytes()

517

raw_data = content_stream.read_raw_bytes()

518

519

# Use stream in a page

520

page = pdf.add_blank_page()

521

page['/Contents'] = content_stream

522

```

523

524

### Object Copying and Ownership

525

526

```python

527

import pikepdf

528

529

# Open two PDFs

530

pdf1 = pikepdf.open('source.pdf')

531

pdf2 = pikepdf.new()

532

533

# Copy object from one PDF to another

534

source_obj = pdf1.pages[0]['/Resources']

535

copied_obj = pdf2.copy_foreign(source_obj)

536

537

# Check ownership

538

assert copied_obj.is_owned_by(pdf2)

539

assert not copied_obj.is_owned_by(pdf1)

540

541

# Make object indirect

542

indirect_obj = pdf2.make_indirect(copied_obj)

543

obj_id, generation = indirect_obj.objgen

544

```