The Apache PDFBox library is an open source Java tool for working with PDF documents.
—
Direct manipulation of PDF objects using the Carousel Object System (COS) for advanced PDF structure handling, custom object creation, and low-level document analysis.
Core document-level COS operations and object management.
// Constructor and methods in org.apache.pdfbox.cos.COSDocument
public COSDocument();
public COSDocument(ScratchFile scratchFile);
// Object management
public List<COSObject> getObjects();
public void addObject(COSObject object);
public COSObject getObjectByType(COSName type);
public List<COSObject> getObjectsByType(COSName type);
// Document operations
public void close() throws IOException;
public boolean isClosed();
public long getHighestXRefObjectNumber();
public void setHighestXRefObjectNumber(long highestXRefObjectNumber);Handle indirect PDF objects and their references.
// Constructor and methods in org.apache.pdfbox.cos.COSObject
public COSObject(COSBase object);
// Object access
public COSBase getObject();
public void setObject(COSBase object);
public COSBase getDereferenced();
// Object identification
public long getObjectNumber();
public void setObjectNumber(long objectNumber);
public int getGenerationNumber();
public void setGenerationNumber(int generationNumber);
// State management
public boolean isObjectNull();
public void setToNull();Common operations for all COS object types.
// Methods in org.apache.pdfbox.cos.COSBase (abstract base class)
public Object accept(ICOSVisitor visitor) throws IOException;
public COSBase getCOSObject();
// Type checking methods
public boolean isNeedToBeUpdated();
public void setNeedToBeUpdated(boolean needToBeUpdated);
public boolean isDirect();
public void setDirect(boolean direct);Handle PDF name objects (atomic identifiers).
// Constructor and methods in org.apache.pdfbox.cos.COSName
public static COSName getPDFName(String name);
public String getName();
// Common PDF names (constants)
public static final COSName TYPE;
public static final COSName SUBTYPE;
public static final COSName PARENT;
public static final COSName KIDS;
public static final COSName COUNT;
public static final COSName ROOT;
public static final COSName PAGES;
public static final COSName PAGE;
public static final COSName CONTENTS;
public static final COSName RESOURCES;
public static final COSName MEDIA_BOX;
public static final COSName CROP_BOX;
public static final COSName ROTATE;
public static final COSName FILTER;
public static final COSName LENGTH;
public static final COSName WIDTH;
public static final COSName HEIGHT;Handle PDF string objects with encoding support.
// Constructors in org.apache.pdfbox.cos.COSString
public COSString();
public COSString(String str);
public COSString(byte[] bytes);
// String operations
public String getString();
public void setValue(String value);
public byte[] getBytes();
public void setBytes(byte[] bytes);
// Encoding operations
public String toHexString();
public static COSString parseHex(String hex);
public boolean forceHexForm();
public void setForceHexForm(boolean forceHexForm);Handle PDF array objects with collection operations.
// Constructors in org.apache.pdfbox.cos.COSArray
public COSArray();
public COSArray(List<COSBase> items);
// Array operations
public void add(COSBase object);
public void add(int index, COSBase object);
public void addAll(Collection<COSBase> objects);
public void addAll(COSArray array);
public COSBase get(int index);
public COSBase getObject(int index);
public void set(int index, COSBase object);
public void remove(int index);
public void remove(COSBase object);
public void clear();
// Array properties
public int size();
public boolean isEmpty();
public Iterator<COSBase> iterator();
public List<COSBase> toList();
// Type-specific getters
public String getString(int index);
public int getInt(int index);
public int getInt(int index, int defaultValue);
public float getFloat(int index);
public float getFloat(int index, float defaultValue);
public COSName getName(int index);
public COSName getName(int index, COSName defaultValue);Handle PDF dictionary objects with key-value operations.
// Constructors in org.apache.pdfbox.cos.COSDictionary
public COSDictionary();
public COSDictionary(Map<COSName, COSBase> map);
// Dictionary operations
public void setItem(COSName key, COSBase value);
public void setItem(String key, COSBase value);
public COSBase getItem(COSName key);
public COSBase getItem(String key);
public COSBase getDictionaryObject(COSName key);
public COSBase getDictionaryObject(String key);
public void removeItem(COSName key);
public void removeItem(String key);
public boolean containsKey(COSName key);
public boolean containsKey(String key);
// Dictionary properties
public Set<COSName> keySet();
public Collection<COSBase> getValues();
public int size();
public boolean isEmpty();
public void clear();
public void addAll(COSDictionary dictionary);
// Type-specific getters
public String getString(COSName key);
public String getString(String key);
public String getString(COSName key, String defaultValue);
public int getInt(COSName key);
public int getInt(String key);
public int getInt(COSName key, int defaultValue);
public float getFloat(COSName key);
public float getFloat(String key);
public float getFloat(COSName key, float defaultValue);
public boolean getBoolean(COSName key, boolean defaultValue);
public COSName getCOSName(COSName key);
public COSArray getCOSArray(COSName key);
public COSDictionary getCOSDictionary(COSName key);Handle PDF numeric objects (integers and floats).
// Methods in org.apache.pdfbox.cos.COSInteger
public static COSInteger get(int value);
public int intValue();
public long longValue();
public float floatValue();
// Methods in org.apache.pdfbox.cos.COSFloat
public COSFloat(float value);
public float floatValue();
public double doubleValue();
public int intValue();Handle PDF stream objects with data and dictionary components.
// Constructor and methods in org.apache.pdfbox.cos.COSStream
public COSStream();
public COSStream(COSDictionary dictionary);
// Stream data operations
public InputStream createInputStream() throws IOException;
public InputStream createInputStream(DecodeOptions options) throws IOException;
public OutputStream createOutputStream() throws IOException;
public OutputStream createOutputStream(COSName expectedFilter) throws IOException;
// Dictionary operations (inherited from COSDictionary)
public void setItem(COSName key, COSBase value);
public COSBase getItem(COSName key);
// Stream properties
public long getLength();
public void setLength(long length);
public List<COSName> getFilters();
public void setFilters(List<COSName> filters);// Create a new dictionary
COSDictionary dict = new COSDictionary();
// Add various types of values
dict.setItem(COSName.TYPE, COSName.getPDFName("Page"));
dict.setItem(COSName.getPDFName("Title"), new COSString("My Title"));
dict.setItem(COSName.getPDFName("Count"), COSInteger.get(42));
dict.setItem(COSName.getPDFName("Scale"), new COSFloat(1.5f));
// Read values back
COSName type = dict.getCOSName(COSName.TYPE);
String title = dict.getString("Title");
int count = dict.getInt("Count");
float scale = dict.getFloat("Scale");
System.out.println("Type: " + type.getName());
System.out.println("Title: " + title);
System.out.println("Count: " + count);
System.out.println("Scale: " + scale);// Create array with various objects
COSArray array = new COSArray();
array.add(new COSString("Hello"));
array.add(COSInteger.get(123));
array.add(new COSFloat(3.14f));
array.add(COSName.getPDFName("Test"));
// Access array elements
for (int i = 0; i < array.size(); i++) {
COSBase item = array.get(i);
if (item instanceof COSString) {
System.out.println("String: " + ((COSString) item).getString());
} else if (item instanceof COSInteger) {
System.out.println("Integer: " + ((COSInteger) item).intValue());
} else if (item instanceof COSFloat) {
System.out.println("Float: " + ((COSFloat) item).floatValue());
} else if (item instanceof COSName) {
System.out.println("Name: " + ((COSName) item).getName());
}
}
// Type-specific access
String firstString = array.getString(0);
int firstInt = array.getInt(1);
float firstFloat = array.getFloat(2);
COSName firstName = array.getName(3);PDDocument document = Loader.loadPDF(new File("document.pdf"));
COSDocument cosDoc = document.getDocument();
// Analyze all objects in the document
List<COSObject> objects = cosDoc.getObjects();
System.out.println("Total objects: " + objects.size());
Map<String, Integer> typeCount = new HashMap<>();
for (COSObject cosObject : objects) {
COSBase object = cosObject.getObject();
if (object instanceof COSDictionary) {
COSDictionary dict = (COSDictionary) object;
COSName type = dict.getCOSName(COSName.TYPE);
String typeName = (type != null) ? type.getName() : "Unknown";
typeCount.put(typeName, typeCount.getOrDefault(typeName, 0) + 1);
System.out.println("Object " + cosObject.getObjectNumber() +
": " + typeName);
}
}
// Print type statistics
typeCount.forEach((type, count) ->
System.out.println(type + ": " + count + " objects"));
document.close();PDDocument document = new PDDocument();
// Create custom dictionary
COSDictionary customDict = new COSDictionary();
customDict.setItem(COSName.TYPE, COSName.getPDFName("CustomType"));
customDict.setItem(COSName.getPDFName("Version"), new COSString("1.0"));
customDict.setItem(COSName.getPDFName("Features"), createFeatureArray());
// Create indirect object
COSObject indirectObject = new COSObject(customDict);
document.getDocument().addObject(indirectObject);
// Reference from page
PDPage page = new PDPage();
COSDictionary pageDict = page.getCOSObject();
pageDict.setItem(COSName.getPDFName("CustomData"), indirectObject);
document.addPage(page);
document.save("custom-objects.pdf");
document.close();// Create a stream with custom data
COSStream stream = new COSStream();
// Set stream dictionary properties
stream.setItem(COSName.TYPE, COSName.getPDFName("CustomStream"));
stream.setItem(COSName.SUBTYPE, COSName.getPDFName("Text"));
// Write data to stream
try (OutputStream output = stream.createOutputStream()) {
String data = "This is custom stream data";
output.write(data.getBytes(StandardCharsets.UTF_8));
}
// Read data back from stream
try (InputStream input = stream.createInputStream()) {
byte[] buffer = new byte[1024];
int bytesRead = input.read(buffer);
String readData = new String(buffer, 0, bytesRead, StandardCharsets.UTF_8);
System.out.println("Stream data: " + readData);
}
// Get stream properties
long length = stream.getLength();
System.out.println("Stream length: " + length);public class COSTreeWalker implements ICOSVisitor {
private int depth = 0;
@Override
public Object visitFromArray(COSArray array) throws IOException {
System.out.println(indent() + "Array [" + array.size() + " items]");
depth++;
for (COSBase item : array) {
item.accept(this);
}
depth--;
return null;
}
@Override
public Object visitFromDictionary(COSDictionary dict) throws IOException {
System.out.println(indent() + "Dictionary [" + dict.size() + " keys]");
depth++;
for (COSName key : dict.keySet()) {
System.out.println(indent() + "Key: " + key.getName());
COSBase value = dict.getItem(key);
if (value != null) {
value.accept(this);
}
}
depth--;
return null;
}
@Override
public Object visitFromString(COSString string) throws IOException {
System.out.println(indent() + "String: \"" + string.getString() + "\"");
return null;
}
// ... implement other visit methods
private String indent() {
return " ".repeat(depth);
}
}
// Usage
PDDocument document = Loader.loadPDF(new File("document.pdf"));
COSDocument cosDoc = document.getDocument();
COSTreeWalker walker = new COSTreeWalker();
List<COSObject> objects = cosDoc.getObjects();
for (int i = 0; i < Math.min(5, objects.size()); i++) {
COSObject obj = objects.get(i);
System.out.println("=== Object " + obj.getObjectNumber() + " ===");
obj.getObject().accept(walker);
}
document.close();Install with Tessl CLI
npx tessl i tessl/maven-org-apache-pdfbox--pdfbox