CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-org-apache-pdfbox--pdfbox

The Apache PDFBox library is an open source Java tool for working with PDF documents.

Pending
Overview
Eval results
Files

cos-operations.mddocs/

Low-Level COS Operations

Direct manipulation of PDF objects using the Carousel Object System (COS) for advanced PDF structure handling, custom object creation, and low-level document analysis.

COS Document Structure

Core document-level COS operations and object management.

// Constructor and methods in org.apache.pdfbox.cos.COSDocument
public COSDocument();
public COSDocument(ScratchFile scratchFile);

// Object management
public List<COSObject> getObjects();
public void addObject(COSObject object);
public COSObject getObjectByType(COSName type);
public List<COSObject> getObjectsByType(COSName type);

// Document operations
public void close() throws IOException;
public boolean isClosed();
public long getHighestXRefObjectNumber();
public void setHighestXRefObjectNumber(long highestXRefObjectNumber);

COS Objects and References

Handle indirect PDF objects and their references.

// Constructor and methods in org.apache.pdfbox.cos.COSObject
public COSObject(COSBase object);

// Object access
public COSBase getObject();
public void setObject(COSBase object);
public COSBase getDereferenced();

// Object identification
public long getObjectNumber();
public void setObjectNumber(long objectNumber);
public int getGenerationNumber();
public void setGenerationNumber(int generationNumber);

// State management
public boolean isObjectNull();
public void setToNull();

Base COS Types

Common operations for all COS object types.

// Methods in org.apache.pdfbox.cos.COSBase (abstract base class)
public Object accept(ICOSVisitor visitor) throws IOException;
public COSBase getCOSObject();

// Type checking methods
public boolean isNeedToBeUpdated();
public void setNeedToBeUpdated(boolean needToBeUpdated);
public boolean isDirect();
public void setDirect(boolean direct);

COS Names

Handle PDF name objects (atomic identifiers).

// Constructor and methods in org.apache.pdfbox.cos.COSName
public static COSName getPDFName(String name);
public String getName();

// Common PDF names (constants)
public static final COSName TYPE;
public static final COSName SUBTYPE;
public static final COSName PARENT;
public static final COSName KIDS;
public static final COSName COUNT;
public static final COSName ROOT;
public static final COSName PAGES;
public static final COSName PAGE;
public static final COSName CONTENTS;
public static final COSName RESOURCES;
public static final COSName MEDIA_BOX;
public static final COSName CROP_BOX;
public static final COSName ROTATE;
public static final COSName FILTER;
public static final COSName LENGTH;
public static final COSName WIDTH;
public static final COSName HEIGHT;

COS Strings

Handle PDF string objects with encoding support.

// Constructors in org.apache.pdfbox.cos.COSString
public COSString();
public COSString(String str);
public COSString(byte[] bytes);

// String operations
public String getString();
public void setValue(String value);
public byte[] getBytes();
public void setBytes(byte[] bytes);

// Encoding operations
public String toHexString();
public static COSString parseHex(String hex);
public boolean forceHexForm();
public void setForceHexForm(boolean forceHexForm);

COS Arrays

Handle PDF array objects with collection operations.

// Constructors in org.apache.pdfbox.cos.COSArray
public COSArray();
public COSArray(List<COSBase> items);

// Array operations
public void add(COSBase object);
public void add(int index, COSBase object);
public void addAll(Collection<COSBase> objects);
public void addAll(COSArray array);
public COSBase get(int index);
public COSBase getObject(int index);
public void set(int index, COSBase object);
public void remove(int index);
public void remove(COSBase object);
public void clear();

// Array properties
public int size();
public boolean isEmpty();
public Iterator<COSBase> iterator();
public List<COSBase> toList();

// Type-specific getters
public String getString(int index);
public int getInt(int index);
public int getInt(int index, int defaultValue);
public float getFloat(int index);
public float getFloat(int index, float defaultValue);
public COSName getName(int index);
public COSName getName(int index, COSName defaultValue);

COS Dictionaries

Handle PDF dictionary objects with key-value operations.

// Constructors in org.apache.pdfbox.cos.COSDictionary
public COSDictionary();
public COSDictionary(Map<COSName, COSBase> map);

// Dictionary operations
public void setItem(COSName key, COSBase value);
public void setItem(String key, COSBase value);
public COSBase getItem(COSName key);
public COSBase getItem(String key);
public COSBase getDictionaryObject(COSName key);
public COSBase getDictionaryObject(String key);
public void removeItem(COSName key);
public void removeItem(String key);
public boolean containsKey(COSName key);
public boolean containsKey(String key);

// Dictionary properties
public Set<COSName> keySet();
public Collection<COSBase> getValues();
public int size();
public boolean isEmpty();
public void clear();
public void addAll(COSDictionary dictionary);

// Type-specific getters
public String getString(COSName key);
public String getString(String key);
public String getString(COSName key, String defaultValue);
public int getInt(COSName key);
public int getInt(String key);
public int getInt(COSName key, int defaultValue);
public float getFloat(COSName key);
public float getFloat(String key);
public float getFloat(COSName key, float defaultValue);
public boolean getBoolean(COSName key, boolean defaultValue);
public COSName getCOSName(COSName key);
public COSArray getCOSArray(COSName key);
public COSDictionary getCOSDictionary(COSName key);

COS Numbers

Handle PDF numeric objects (integers and floats).

// Methods in org.apache.pdfbox.cos.COSInteger
public static COSInteger get(int value);
public int intValue();
public long longValue();
public float floatValue();

// Methods in org.apache.pdfbox.cos.COSFloat
public COSFloat(float value);
public float floatValue();
public double doubleValue();
public int intValue();

COS Streams

Handle PDF stream objects with data and dictionary components.

// Constructor and methods in org.apache.pdfbox.cos.COSStream
public COSStream();
public COSStream(COSDictionary dictionary);

// Stream data operations
public InputStream createInputStream() throws IOException;
public InputStream createInputStream(DecodeOptions options) throws IOException;
public OutputStream createOutputStream() throws IOException;
public OutputStream createOutputStream(COSName expectedFilter) throws IOException;

// Dictionary operations (inherited from COSDictionary)
public void setItem(COSName key, COSBase value);
public COSBase getItem(COSName key);

// Stream properties
public long getLength();
public void setLength(long length);
public List<COSName> getFilters();
public void setFilters(List<COSName> filters);

Usage Examples

Working with COS Dictionaries

// Create a new dictionary
COSDictionary dict = new COSDictionary();

// Add various types of values
dict.setItem(COSName.TYPE, COSName.getPDFName("Page"));
dict.setItem(COSName.getPDFName("Title"), new COSString("My Title"));
dict.setItem(COSName.getPDFName("Count"), COSInteger.get(42));
dict.setItem(COSName.getPDFName("Scale"), new COSFloat(1.5f));

// Read values back
COSName type = dict.getCOSName(COSName.TYPE);
String title = dict.getString("Title");
int count = dict.getInt("Count");
float scale = dict.getFloat("Scale");

System.out.println("Type: " + type.getName());
System.out.println("Title: " + title);
System.out.println("Count: " + count);
System.out.println("Scale: " + scale);

Working with COS Arrays

// Create array with various objects
COSArray array = new COSArray();
array.add(new COSString("Hello"));
array.add(COSInteger.get(123));
array.add(new COSFloat(3.14f));
array.add(COSName.getPDFName("Test"));

// Access array elements
for (int i = 0; i < array.size(); i++) {
    COSBase item = array.get(i);
    
    if (item instanceof COSString) {
        System.out.println("String: " + ((COSString) item).getString());
    } else if (item instanceof COSInteger) {
        System.out.println("Integer: " + ((COSInteger) item).intValue());
    } else if (item instanceof COSFloat) {
        System.out.println("Float: " + ((COSFloat) item).floatValue());
    } else if (item instanceof COSName) {
        System.out.println("Name: " + ((COSName) item).getName());
    }
}

// Type-specific access
String firstString = array.getString(0);
int firstInt = array.getInt(1);
float firstFloat = array.getFloat(2);
COSName firstName = array.getName(3);

Low-Level Document Analysis

PDDocument document = Loader.loadPDF(new File("document.pdf"));
COSDocument cosDoc = document.getDocument();

// Analyze all objects in the document
List<COSObject> objects = cosDoc.getObjects();
System.out.println("Total objects: " + objects.size());

Map<String, Integer> typeCount = new HashMap<>();

for (COSObject cosObject : objects) {
    COSBase object = cosObject.getObject();
    
    if (object instanceof COSDictionary) {
        COSDictionary dict = (COSDictionary) object;
        COSName type = dict.getCOSName(COSName.TYPE);
        
        String typeName = (type != null) ? type.getName() : "Unknown";
        typeCount.put(typeName, typeCount.getOrDefault(typeName, 0) + 1);
        
        System.out.println("Object " + cosObject.getObjectNumber() + 
                          ": " + typeName);
    }
}

// Print type statistics
typeCount.forEach((type, count) -> 
    System.out.println(type + ": " + count + " objects"));

document.close();

Creating Custom PDF Objects

PDDocument document = new PDDocument();

// Create custom dictionary
COSDictionary customDict = new COSDictionary();
customDict.setItem(COSName.TYPE, COSName.getPDFName("CustomType"));
customDict.setItem(COSName.getPDFName("Version"), new COSString("1.0"));
customDict.setItem(COSName.getPDFName("Features"), createFeatureArray());

// Create indirect object
COSObject indirectObject = new COSObject(customDict);
document.getDocument().addObject(indirectObject);

// Reference from page
PDPage page = new PDPage();
COSDictionary pageDict = page.getCOSObject();
pageDict.setItem(COSName.getPDFName("CustomData"), indirectObject);

document.addPage(page);
document.save("custom-objects.pdf");
document.close();

Working with COS Streams

// Create a stream with custom data
COSStream stream = new COSStream();

// Set stream dictionary properties
stream.setItem(COSName.TYPE, COSName.getPDFName("CustomStream"));
stream.setItem(COSName.SUBTYPE, COSName.getPDFName("Text"));

// Write data to stream
try (OutputStream output = stream.createOutputStream()) {
    String data = "This is custom stream data";
    output.write(data.getBytes(StandardCharsets.UTF_8));
}

// Read data back from stream
try (InputStream input = stream.createInputStream()) {
    byte[] buffer = new byte[1024];
    int bytesRead = input.read(buffer);
    String readData = new String(buffer, 0, bytesRead, StandardCharsets.UTF_8);
    System.out.println("Stream data: " + readData);
}

// Get stream properties
long length = stream.getLength();
System.out.println("Stream length: " + length);

Advanced COS Manipulation

public class COSTreeWalker implements ICOSVisitor {
    private int depth = 0;
    
    @Override
    public Object visitFromArray(COSArray array) throws IOException {
        System.out.println(indent() + "Array [" + array.size() + " items]");
        depth++;
        for (COSBase item : array) {
            item.accept(this);
        }
        depth--;
        return null;
    }
    
    @Override
    public Object visitFromDictionary(COSDictionary dict) throws IOException {
        System.out.println(indent() + "Dictionary [" + dict.size() + " keys]");
        depth++;
        for (COSName key : dict.keySet()) {
            System.out.println(indent() + "Key: " + key.getName());
            COSBase value = dict.getItem(key);
            if (value != null) {
                value.accept(this);
            }
        }
        depth--;
        return null;
    }
    
    @Override
    public Object visitFromString(COSString string) throws IOException {
        System.out.println(indent() + "String: \"" + string.getString() + "\"");
        return null;
    }
    
    // ... implement other visit methods
    
    private String indent() {
        return "  ".repeat(depth);
    }
}

// Usage
PDDocument document = Loader.loadPDF(new File("document.pdf"));
COSDocument cosDoc = document.getDocument();

COSTreeWalker walker = new COSTreeWalker();
List<COSObject> objects = cosDoc.getObjects();

for (int i = 0; i < Math.min(5, objects.size()); i++) {
    COSObject obj = objects.get(i);
    System.out.println("=== Object " + obj.getObjectNumber() + " ===");
    obj.getObject().accept(walker);
}

document.close();

Install with Tessl CLI

npx tessl i tessl/maven-org-apache-pdfbox--pdfbox

docs

content-stream-processing.md

cos-operations.md

document-operations.md

index.md

interactive-forms.md

multi-pdf-operations.md

rendering-graphics.md

security-encryption.md

text-operations.md

tile.json