The Apache PDFBox library is an open source Java tool for working with PDF documents.
—
Low-level content stream parsing and generation for advanced PDF content manipulation, custom rendering engines, and detailed content analysis.
Foundation classes for processing PDF content streams.
// Constructor and methods in org.apache.pdfbox.contentstream.PDFStreamEngine
public PDFStreamEngine();
public PDFStreamEngine(ResourceCache resourceCache);
// Main processing methods
public void processPage(PDPage page) throws IOException;
public void processStream(PDContentStream contentStream, PDPage page, PDResources resources) throws IOException;
// Operator handling
protected void processOperator(Operator operator, List<COSBase> operands) throws IOException;
protected void unsupportedOperator(Operator operator, List<COSBase> operands) throws IOException;
// State management
public PDGraphicsState getGraphicsState();
public Matrix getTextMatrix();
public Matrix getTextLineMatrix();Enhanced stream processing for graphics operations and rendering.
// Constructor in org.apache.pdfbox.contentstream.PDFGraphicsStreamEngine
public PDFGraphicsStreamEngine(PDPage page);
// Abstract graphics methods (must be implemented)
protected abstract void appendRectangle(Point2D p0, Point2D p1, Point2D p2, Point2D p3) throws IOException;
protected abstract void drawImage(PDImage pdImage) throws IOException;
protected abstract void clip(int windingRule) throws IOException;
protected abstract void moveTo(float x, float y) throws IOException;
protected abstract void lineTo(float x, float y) throws IOException;
protected abstract void curveTo(float x1, float y1, float x2, float y2, float x3, float y3) throws IOException;
protected abstract void closePath() throws IOException;
protected abstract void endPath() throws IOException;
protected abstract void strokePath() throws IOException;
protected abstract void fillPath(int windingRule) throws IOException;
protected abstract void fillAndStrokePath(int windingRule) throws IOException;
protected abstract void shadingFill(COSName shadingName) throws IOException;
// Graphics state access
public PDColor getStrokingColor();
public PDColor getNonStrokingColor();
public float getLineWidth();
public int getLineCap();
public int getLineJoin();
public float getMiterLimit();
public float[] getLineDashPattern();
public float getLineDashPhase();Handle specific PDF operators and their operands.
// Methods for specific operator categories in PDFStreamEngine
protected void processTextPosition(TextPosition text);
protected void showText(byte[] string) throws IOException;
protected void showTextAdjusted(List<Object> array) throws IOException;
// Graphics state operators
protected void saveGraphicsState() throws IOException;
protected void restoreGraphicsState() throws IOException;
protected void concatenate(Matrix matrix) throws IOException;
// Path construction operators
protected void moveToOperator(List<COSBase> operands) throws IOException;
protected void lineToOperator(List<COSBase> operands) throws IOException;
protected void curveToOperator(List<COSBase> operands) throws IOException;
protected void closePathOperator(List<COSBase> operands) throws IOException;
protected void rectangleOperator(List<COSBase> operands) throws IOException;
// Path painting operators
protected void strokeOperator(List<COSBase> operands) throws IOException;
protected void fillOperator(List<COSBase> operands) throws IOException;
protected void fillAndStrokeOperator(List<COSBase> operands) throws IOException;
protected void clipOperator(List<COSBase> operands) throws IOException;Generate PDF content streams programmatically.
// Methods in org.apache.pdfbox.pdmodel.PDPageContentStream for content generation
public void beginText() throws IOException;
public void endText() throws IOException;
public void setFont(PDFont font, float fontSize) throws IOException;
public void setFontAndSize(PDFont font, float fontSize) throws IOException;
public void newLineAtOffset(float tx, float ty) throws IOException;
public void setTextMatrix(Matrix matrix) throws IOException;
public void showText(String text) throws IOException;
public void showTextWithPositioning(Object[] textWithPositioning) throws IOException;
// Path operations
public void moveTo(float x, float y) throws IOException;
public void lineTo(float x, float y) throws IOException;
public void curveTo(float x1, float y1, float x2, float y2, float x3, float y3) throws IOException;
public void addRect(float x, float y, float width, float height) throws IOException;
public void closePath() throws IOException;
// Path painting
public void stroke() throws IOException;
public void fill() throws IOException;
public void fillAndStroke() throws IOException;
public void closeAndStroke() throws IOException;
public void closeAndFillAndStroke() throws IOException;
public void clip() throws IOException;
// Graphics state
public void saveGraphicsState() throws IOException;
public void restoreGraphicsState() throws IOException;
public void transform(Matrix matrix) throws IOException;
public void setStrokingColor(Color color) throws IOException;
public void setStrokingColor(float c) throws IOException;
public void setStrokingColor(float c, float m, float y, float k) throws IOException;
public void setNonStrokingColor(Color color) throws IOException;
public void setNonStrokingColor(float c) throws IOException;
public void setNonStrokingColor(float c, float m, float y, float k) throws IOException;
public void setLineWidth(float lineWidth) throws IOException;
public void setLineCap(int lineCap) throws IOException;
public void setLineJoin(int lineJoin) throws IOException;
public void setMiterLimit(float miterLimit) throws IOException;
public void setLineDashPattern(float[] pattern, float phase) throws IOException;Work with PDF operators and their operands.
// Methods in org.apache.pdfbox.contentstream.operator.Operator
public String getName();
public List<COSBase> getOperands();
public void setOperands(List<COSBase> operands);
// Static factory methods
public static Operator getOperator(String name);Access and manage content stream resources.
// Methods in org.apache.pdfbox.pdmodel.PDResources
public PDFont getFont(COSName name) throws IOException;
public PDXObject getXObject(COSName name) throws IOException;
public PDExtendedGraphicsState getExtGState(COSName name);
public PDColorSpace getColorSpace(COSName name) throws IOException;
public PDPattern getPattern(COSName name) throws IOException;
public PDShading getShading(COSName name) throws IOException;
// Resource modification
public void put(COSName name, PDFont font);
public void put(COSName name, PDXObject xobject);
public void put(COSName name, PDExtendedGraphicsState extGState);
public void put(COSName name, PDColorSpace colorSpace);public class CustomContentProcessor extends PDFStreamEngine {
private List<String> textContent = new ArrayList<>();
private List<Rectangle2D> imagePositions = new ArrayList<>();
public CustomContentProcessor() throws IOException {
super();
}
@Override
protected void processTextPosition(TextPosition text) {
textContent.add(text.getUnicode());
System.out.println("Text: " + text.getUnicode() +
" at (" + text.getX() + ", " + text.getY() + ")");
}
@Override
protected void processOperator(Operator operator, List<COSBase> operands) throws IOException {
String operatorName = operator.getName();
if ("Do".equals(operatorName)) {
// XObject (image) placement
COSName name = (COSName) operands.get(0);
System.out.println("Drawing XObject: " + name.getName());
}
super.processOperator(operator, operands);
}
public List<String> getTextContent() {
return textContent;
}
}
// Usage
PDDocument document = Loader.loadPDF(new File("document.pdf"));
CustomContentProcessor processor = new CustomContentProcessor();
for (int i = 0; i < document.getNumberOfPages(); i++) {
PDPage page = document.getPage(i);
processor.processPage(page);
}
List<String> extractedText = processor.getTextContent();
document.close();public class SimpleGraphicsRenderer extends PDFGraphicsStreamEngine {
private Graphics2D graphics;
private AffineTransform baseTransform;
public SimpleGraphicsRenderer(PDPage page, Graphics2D graphics) {
super(page);
this.graphics = graphics;
this.baseTransform = graphics.getTransform();
}
@Override
protected void appendRectangle(Point2D p0, Point2D p1, Point2D p2, Point2D p3) throws IOException {
Path2D path = new Path2D.Float();
path.moveTo(p0.getX(), p0.getY());
path.lineTo(p1.getX(), p1.getY());
path.lineTo(p2.getX(), p2.getY());
path.lineTo(p3.getX(), p3.getY());
path.closePath();
graphics.draw(path);
}
@Override
protected void drawImage(PDImage pdImage) throws IOException {
BufferedImage image = pdImage.getImage();
Matrix matrix = getGraphicsState().getCurrentTransformationMatrix();
// Apply transformation and draw image
AffineTransform transform = matrix.createAffineTransform();
graphics.drawImage(image, transform, null);
}
@Override
protected void clip(int windingRule) throws IOException {
// Set clipping region
graphics.setClip(getCurrentPath());
}
@Override
protected void moveTo(float x, float y) throws IOException {
currentPath.moveTo(x, y);
}
@Override
protected void lineTo(float x, float y) throws IOException {
currentPath.lineTo(x, y);
}
@Override
protected void strokePath() throws IOException {
graphics.setStroke(createStroke());
graphics.setColor(getStrokingColor().toColor());
graphics.draw(currentPath);
}
@Override
protected void fillPath(int windingRule) throws IOException {
graphics.setColor(getNonStrokingColor().toColor());
graphics.fill(currentPath);
}
// ... implement other abstract methods
}PDDocument document = new PDDocument();
PDPage page = new PDPage(PDRectangle.A4);
document.addPage(page);
PDPageContentStream contentStream = new PDPageContentStream(document, page);
// Text operations
contentStream.beginText();
contentStream.setFont(PDType1Font.HELVETICA, 12);
contentStream.newLineAtOffset(100, 700);
contentStream.showText("Hello World!");
contentStream.endText();
// Graphics operations
contentStream.saveGraphicsState();
contentStream.setStrokingColor(Color.BLUE);
contentStream.setLineWidth(2);
// Draw rectangle
contentStream.addRect(100, 600, 200, 100);
contentStream.stroke();
// Draw circle (approximated with curves)
float centerX = 200, centerY = 500, radius = 50;
float kappa = 0.552284749831f; // 4/3 * (sqrt(2) - 1)
float offset = radius * kappa;
contentStream.moveTo(centerX, centerY + radius);
contentStream.curveTo(centerX + offset, centerY + radius, centerX + radius, centerY + offset, centerX + radius, centerY);
contentStream.curveTo(centerX + radius, centerY - offset, centerX + offset, centerY - radius, centerX, centerY - radius);
contentStream.curveTo(centerX - offset, centerY - radius, centerX - radius, centerY - offset, centerX - radius, centerY);
contentStream.curveTo(centerX - radius, centerY + offset, centerX - offset, centerY + radius, centerX, centerY + radius);
contentStream.fill();
contentStream.restoreGraphicsState();
contentStream.close();
document.save("custom-content.pdf");
document.close();public class OperatorAnalyzer extends PDFStreamEngine {
private Map<String, Integer> operatorCounts = new HashMap<>();
@Override
protected void processOperator(Operator operator, List<COSBase> operands) throws IOException {
String name = operator.getName();
operatorCounts.put(name, operatorCounts.getOrDefault(name, 0) + 1);
// Log specific operators
switch (name) {
case "Tj": // Show text
System.out.println("Text operator: " + operands);
break;
case "cm": // Concatenate matrix
System.out.println("Transform matrix: " + operands);
break;
case "Do": // Invoke XObject
System.out.println("XObject invocation: " + operands);
break;
}
super.processOperator(operator, operands);
}
public Map<String, Integer> getOperatorCounts() {
return operatorCounts;
}
}
// Usage
PDDocument document = Loader.loadPDF(new File("document.pdf"));
OperatorAnalyzer analyzer = new OperatorAnalyzer();
PDPage page = document.getPage(0);
analyzer.processPage(page);
Map<String, Integer> counts = analyzer.getOperatorCounts();
counts.forEach((op, count) ->
System.out.println("Operator " + op + ": " + count + " times"));
document.close();Install with Tessl CLI
npx tessl i tessl/maven-org-apache-pdfbox--pdfbox