0
# Low-Level COS Operations
1
2
Direct manipulation of PDF objects using the Carousel Object System (COS) for advanced PDF structure handling, custom object creation, and low-level document analysis.
3
4
## COS Document Structure
5
6
Core document-level COS operations and object management.
7
8
```java { .api }
9
// Constructor and methods in org.apache.pdfbox.cos.COSDocument
10
public COSDocument();
11
public COSDocument(ScratchFile scratchFile);
12
13
// Object management
14
public List<COSObject> getObjects();
15
public void addObject(COSObject object);
16
public COSObject getObjectByType(COSName type);
17
public List<COSObject> getObjectsByType(COSName type);
18
19
// Document operations
20
public void close() throws IOException;
21
public boolean isClosed();
22
public long getHighestXRefObjectNumber();
23
public void setHighestXRefObjectNumber(long highestXRefObjectNumber);
24
```
25
26
## COS Objects and References
27
28
Handle indirect PDF objects and their references.
29
30
```java { .api }
31
// Constructor and methods in org.apache.pdfbox.cos.COSObject
32
public COSObject(COSBase object);
33
34
// Object access
35
public COSBase getObject();
36
public void setObject(COSBase object);
37
public COSBase getDereferenced();
38
39
// Object identification
40
public long getObjectNumber();
41
public void setObjectNumber(long objectNumber);
42
public int getGenerationNumber();
43
public void setGenerationNumber(int generationNumber);
44
45
// State management
46
public boolean isObjectNull();
47
public void setToNull();
48
```
49
50
## Base COS Types
51
52
Common operations for all COS object types.
53
54
```java { .api }
55
// Methods in org.apache.pdfbox.cos.COSBase (abstract base class)
56
public Object accept(ICOSVisitor visitor) throws IOException;
57
public COSBase getCOSObject();
58
59
// Type checking methods
60
public boolean isNeedToBeUpdated();
61
public void setNeedToBeUpdated(boolean needToBeUpdated);
62
public boolean isDirect();
63
public void setDirect(boolean direct);
64
```
65
66
## COS Names
67
68
Handle PDF name objects (atomic identifiers).
69
70
```java { .api }
71
// Constructor and methods in org.apache.pdfbox.cos.COSName
72
public static COSName getPDFName(String name);
73
public String getName();
74
75
// Common PDF names (constants)
76
public static final COSName TYPE;
77
public static final COSName SUBTYPE;
78
public static final COSName PARENT;
79
public static final COSName KIDS;
80
public static final COSName COUNT;
81
public static final COSName ROOT;
82
public static final COSName PAGES;
83
public static final COSName PAGE;
84
public static final COSName CONTENTS;
85
public static final COSName RESOURCES;
86
public static final COSName MEDIA_BOX;
87
public static final COSName CROP_BOX;
88
public static final COSName ROTATE;
89
public static final COSName FILTER;
90
public static final COSName LENGTH;
91
public static final COSName WIDTH;
92
public static final COSName HEIGHT;
93
```
94
95
## COS Strings
96
97
Handle PDF string objects with encoding support.
98
99
```java { .api }
100
// Constructors in org.apache.pdfbox.cos.COSString
101
public COSString();
102
public COSString(String str);
103
public COSString(byte[] bytes);
104
105
// String operations
106
public String getString();
107
public void setValue(String value);
108
public byte[] getBytes();
109
public void setBytes(byte[] bytes);
110
111
// Encoding operations
112
public String toHexString();
113
public static COSString parseHex(String hex);
114
public boolean forceHexForm();
115
public void setForceHexForm(boolean forceHexForm);
116
```
117
118
## COS Arrays
119
120
Handle PDF array objects with collection operations.
121
122
```java { .api }
123
// Constructors in org.apache.pdfbox.cos.COSArray
124
public COSArray();
125
public COSArray(List<COSBase> items);
126
127
// Array operations
128
public void add(COSBase object);
129
public void add(int index, COSBase object);
130
public void addAll(Collection<COSBase> objects);
131
public void addAll(COSArray array);
132
public COSBase get(int index);
133
public COSBase getObject(int index);
134
public void set(int index, COSBase object);
135
public void remove(int index);
136
public void remove(COSBase object);
137
public void clear();
138
139
// Array properties
140
public int size();
141
public boolean isEmpty();
142
public Iterator<COSBase> iterator();
143
public List<COSBase> toList();
144
145
// Type-specific getters
146
public String getString(int index);
147
public int getInt(int index);
148
public int getInt(int index, int defaultValue);
149
public float getFloat(int index);
150
public float getFloat(int index, float defaultValue);
151
public COSName getName(int index);
152
public COSName getName(int index, COSName defaultValue);
153
```
154
155
## COS Dictionaries
156
157
Handle PDF dictionary objects with key-value operations.
158
159
```java { .api }
160
// Constructors in org.apache.pdfbox.cos.COSDictionary
161
public COSDictionary();
162
public COSDictionary(Map<COSName, COSBase> map);
163
164
// Dictionary operations
165
public void setItem(COSName key, COSBase value);
166
public void setItem(String key, COSBase value);
167
public COSBase getItem(COSName key);
168
public COSBase getItem(String key);
169
public COSBase getDictionaryObject(COSName key);
170
public COSBase getDictionaryObject(String key);
171
public void removeItem(COSName key);
172
public void removeItem(String key);
173
public boolean containsKey(COSName key);
174
public boolean containsKey(String key);
175
176
// Dictionary properties
177
public Set<COSName> keySet();
178
public Collection<COSBase> getValues();
179
public int size();
180
public boolean isEmpty();
181
public void clear();
182
public void addAll(COSDictionary dictionary);
183
184
// Type-specific getters
185
public String getString(COSName key);
186
public String getString(String key);
187
public String getString(COSName key, String defaultValue);
188
public int getInt(COSName key);
189
public int getInt(String key);
190
public int getInt(COSName key, int defaultValue);
191
public float getFloat(COSName key);
192
public float getFloat(String key);
193
public float getFloat(COSName key, float defaultValue);
194
public boolean getBoolean(COSName key, boolean defaultValue);
195
public COSName getCOSName(COSName key);
196
public COSArray getCOSArray(COSName key);
197
public COSDictionary getCOSDictionary(COSName key);
198
```
199
200
## COS Numbers
201
202
Handle PDF numeric objects (integers and floats).
203
204
```java { .api }
205
// Methods in org.apache.pdfbox.cos.COSInteger
206
public static COSInteger get(int value);
207
public int intValue();
208
public long longValue();
209
public float floatValue();
210
211
// Methods in org.apache.pdfbox.cos.COSFloat
212
public COSFloat(float value);
213
public float floatValue();
214
public double doubleValue();
215
public int intValue();
216
```
217
218
## COS Streams
219
220
Handle PDF stream objects with data and dictionary components.
221
222
```java { .api }
223
// Constructor and methods in org.apache.pdfbox.cos.COSStream
224
public COSStream();
225
public COSStream(COSDictionary dictionary);
226
227
// Stream data operations
228
public InputStream createInputStream() throws IOException;
229
public InputStream createInputStream(DecodeOptions options) throws IOException;
230
public OutputStream createOutputStream() throws IOException;
231
public OutputStream createOutputStream(COSName expectedFilter) throws IOException;
232
233
// Dictionary operations (inherited from COSDictionary)
234
public void setItem(COSName key, COSBase value);
235
public COSBase getItem(COSName key);
236
237
// Stream properties
238
public long getLength();
239
public void setLength(long length);
240
public List<COSName> getFilters();
241
public void setFilters(List<COSName> filters);
242
```
243
244
## Usage Examples
245
246
### Working with COS Dictionaries
247
248
```java
249
// Create a new dictionary
250
COSDictionary dict = new COSDictionary();
251
252
// Add various types of values
253
dict.setItem(COSName.TYPE, COSName.getPDFName("Page"));
254
dict.setItem(COSName.getPDFName("Title"), new COSString("My Title"));
255
dict.setItem(COSName.getPDFName("Count"), COSInteger.get(42));
256
dict.setItem(COSName.getPDFName("Scale"), new COSFloat(1.5f));
257
258
// Read values back
259
COSName type = dict.getCOSName(COSName.TYPE);
260
String title = dict.getString("Title");
261
int count = dict.getInt("Count");
262
float scale = dict.getFloat("Scale");
263
264
System.out.println("Type: " + type.getName());
265
System.out.println("Title: " + title);
266
System.out.println("Count: " + count);
267
System.out.println("Scale: " + scale);
268
```
269
270
### Working with COS Arrays
271
272
```java
273
// Create array with various objects
274
COSArray array = new COSArray();
275
array.add(new COSString("Hello"));
276
array.add(COSInteger.get(123));
277
array.add(new COSFloat(3.14f));
278
array.add(COSName.getPDFName("Test"));
279
280
// Access array elements
281
for (int i = 0; i < array.size(); i++) {
282
COSBase item = array.get(i);
283
284
if (item instanceof COSString) {
285
System.out.println("String: " + ((COSString) item).getString());
286
} else if (item instanceof COSInteger) {
287
System.out.println("Integer: " + ((COSInteger) item).intValue());
288
} else if (item instanceof COSFloat) {
289
System.out.println("Float: " + ((COSFloat) item).floatValue());
290
} else if (item instanceof COSName) {
291
System.out.println("Name: " + ((COSName) item).getName());
292
}
293
}
294
295
// Type-specific access
296
String firstString = array.getString(0);
297
int firstInt = array.getInt(1);
298
float firstFloat = array.getFloat(2);
299
COSName firstName = array.getName(3);
300
```
301
302
### Low-Level Document Analysis
303
304
```java
305
PDDocument document = Loader.loadPDF(new File("document.pdf"));
306
COSDocument cosDoc = document.getDocument();
307
308
// Analyze all objects in the document
309
List<COSObject> objects = cosDoc.getObjects();
310
System.out.println("Total objects: " + objects.size());
311
312
Map<String, Integer> typeCount = new HashMap<>();
313
314
for (COSObject cosObject : objects) {
315
COSBase object = cosObject.getObject();
316
317
if (object instanceof COSDictionary) {
318
COSDictionary dict = (COSDictionary) object;
319
COSName type = dict.getCOSName(COSName.TYPE);
320
321
String typeName = (type != null) ? type.getName() : "Unknown";
322
typeCount.put(typeName, typeCount.getOrDefault(typeName, 0) + 1);
323
324
System.out.println("Object " + cosObject.getObjectNumber() +
325
": " + typeName);
326
}
327
}
328
329
// Print type statistics
330
typeCount.forEach((type, count) ->
331
System.out.println(type + ": " + count + " objects"));
332
333
document.close();
334
```
335
336
### Creating Custom PDF Objects
337
338
```java
339
PDDocument document = new PDDocument();
340
341
// Create custom dictionary
342
COSDictionary customDict = new COSDictionary();
343
customDict.setItem(COSName.TYPE, COSName.getPDFName("CustomType"));
344
customDict.setItem(COSName.getPDFName("Version"), new COSString("1.0"));
345
customDict.setItem(COSName.getPDFName("Features"), createFeatureArray());
346
347
// Create indirect object
348
COSObject indirectObject = new COSObject(customDict);
349
document.getDocument().addObject(indirectObject);
350
351
// Reference from page
352
PDPage page = new PDPage();
353
COSDictionary pageDict = page.getCOSObject();
354
pageDict.setItem(COSName.getPDFName("CustomData"), indirectObject);
355
356
document.addPage(page);
357
document.save("custom-objects.pdf");
358
document.close();
359
```
360
361
### Working with COS Streams
362
363
```java
364
// Create a stream with custom data
365
COSStream stream = new COSStream();
366
367
// Set stream dictionary properties
368
stream.setItem(COSName.TYPE, COSName.getPDFName("CustomStream"));
369
stream.setItem(COSName.SUBTYPE, COSName.getPDFName("Text"));
370
371
// Write data to stream
372
try (OutputStream output = stream.createOutputStream()) {
373
String data = "This is custom stream data";
374
output.write(data.getBytes(StandardCharsets.UTF_8));
375
}
376
377
// Read data back from stream
378
try (InputStream input = stream.createInputStream()) {
379
byte[] buffer = new byte[1024];
380
int bytesRead = input.read(buffer);
381
String readData = new String(buffer, 0, bytesRead, StandardCharsets.UTF_8);
382
System.out.println("Stream data: " + readData);
383
}
384
385
// Get stream properties
386
long length = stream.getLength();
387
System.out.println("Stream length: " + length);
388
```
389
390
### Advanced COS Manipulation
391
392
```java
393
public class COSTreeWalker implements ICOSVisitor {
394
private int depth = 0;
395
396
@Override
397
public Object visitFromArray(COSArray array) throws IOException {
398
System.out.println(indent() + "Array [" + array.size() + " items]");
399
depth++;
400
for (COSBase item : array) {
401
item.accept(this);
402
}
403
depth--;
404
return null;
405
}
406
407
@Override
408
public Object visitFromDictionary(COSDictionary dict) throws IOException {
409
System.out.println(indent() + "Dictionary [" + dict.size() + " keys]");
410
depth++;
411
for (COSName key : dict.keySet()) {
412
System.out.println(indent() + "Key: " + key.getName());
413
COSBase value = dict.getItem(key);
414
if (value != null) {
415
value.accept(this);
416
}
417
}
418
depth--;
419
return null;
420
}
421
422
@Override
423
public Object visitFromString(COSString string) throws IOException {
424
System.out.println(indent() + "String: \"" + string.getString() + "\"");
425
return null;
426
}
427
428
// ... implement other visit methods
429
430
private String indent() {
431
return " ".repeat(depth);
432
}
433
}
434
435
// Usage
436
PDDocument document = Loader.loadPDF(new File("document.pdf"));
437
COSDocument cosDoc = document.getDocument();
438
439
COSTreeWalker walker = new COSTreeWalker();
440
List<COSObject> objects = cosDoc.getObjects();
441
442
for (int i = 0; i < Math.min(5, objects.size()); i++) {
443
COSObject obj = objects.get(i);
444
System.out.println("=== Object " + obj.getObjectNumber() + " ===");
445
obj.getObject().accept(walker);
446
}
447
448
document.close();
449
```