CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-org-apache-pdfbox--pdfbox

The Apache PDFBox library is an open source Java tool for working with PDF documents.

Pending
Overview
Eval results
Files

content-stream-processing.mddocs/

Content Stream Processing

Low-level content stream parsing and generation for advanced PDF content manipulation, custom rendering engines, and detailed content analysis.

Base Stream Processing

Foundation classes for processing PDF content streams.

// Constructor and methods in org.apache.pdfbox.contentstream.PDFStreamEngine
public PDFStreamEngine();
public PDFStreamEngine(ResourceCache resourceCache);

// Main processing methods
public void processPage(PDPage page) throws IOException;
public void processStream(PDContentStream contentStream, PDPage page, PDResources resources) throws IOException;

// Operator handling
protected void processOperator(Operator operator, List<COSBase> operands) throws IOException;
protected void unsupportedOperator(Operator operator, List<COSBase> operands) throws IOException;

// State management
public PDGraphicsState getGraphicsState();
public Matrix getTextMatrix();
public Matrix getTextLineMatrix();

Graphics Stream Processing

Enhanced stream processing for graphics operations and rendering.

// Constructor in org.apache.pdfbox.contentstream.PDFGraphicsStreamEngine
public PDFGraphicsStreamEngine(PDPage page);

// Abstract graphics methods (must be implemented)
protected abstract void appendRectangle(Point2D p0, Point2D p1, Point2D p2, Point2D p3) throws IOException;
protected abstract void drawImage(PDImage pdImage) throws IOException;
protected abstract void clip(int windingRule) throws IOException;
protected abstract void moveTo(float x, float y) throws IOException;
protected abstract void lineTo(float x, float y) throws IOException;
protected abstract void curveTo(float x1, float y1, float x2, float y2, float x3, float y3) throws IOException;
protected abstract void closePath() throws IOException;
protected abstract void endPath() throws IOException;
protected abstract void strokePath() throws IOException;
protected abstract void fillPath(int windingRule) throws IOException;
protected abstract void fillAndStrokePath(int windingRule) throws IOException;
protected abstract void shadingFill(COSName shadingName) throws IOException;

// Graphics state access
public PDColor getStrokingColor();
public PDColor getNonStrokingColor();
public float getLineWidth();
public int getLineCap();
public int getLineJoin();
public float getMiterLimit();
public float[] getLineDashPattern();
public float getLineDashPhase();

Operator Processing

Handle specific PDF operators and their operands.

// Methods for specific operator categories in PDFStreamEngine
protected void processTextPosition(TextPosition text);
protected void showText(byte[] string) throws IOException;
protected void showTextAdjusted(List<Object> array) throws IOException;

// Graphics state operators
protected void saveGraphicsState() throws IOException;
protected void restoreGraphicsState() throws IOException;
protected void concatenate(Matrix matrix) throws IOException;

// Path construction operators
protected void moveToOperator(List<COSBase> operands) throws IOException;
protected void lineToOperator(List<COSBase> operands) throws IOException;
protected void curveToOperator(List<COSBase> operands) throws IOException;
protected void closePathOperator(List<COSBase> operands) throws IOException;
protected void rectangleOperator(List<COSBase> operands) throws IOException;

// Path painting operators
protected void strokeOperator(List<COSBase> operands) throws IOException;
protected void fillOperator(List<COSBase> operands) throws IOException;
protected void fillAndStrokeOperator(List<COSBase> operands) throws IOException;
protected void clipOperator(List<COSBase> operands) throws IOException;

Content Stream Creation

Generate PDF content streams programmatically.

// Methods in org.apache.pdfbox.pdmodel.PDPageContentStream for content generation
public void beginText() throws IOException;
public void endText() throws IOException;
public void setFont(PDFont font, float fontSize) throws IOException;
public void setFontAndSize(PDFont font, float fontSize) throws IOException;
public void newLineAtOffset(float tx, float ty) throws IOException;
public void setTextMatrix(Matrix matrix) throws IOException;
public void showText(String text) throws IOException;
public void showTextWithPositioning(Object[] textWithPositioning) throws IOException;

// Path operations
public void moveTo(float x, float y) throws IOException;
public void lineTo(float x, float y) throws IOException;
public void curveTo(float x1, float y1, float x2, float y2, float x3, float y3) throws IOException;
public void addRect(float x, float y, float width, float height) throws IOException;
public void closePath() throws IOException;

// Path painting
public void stroke() throws IOException;
public void fill() throws IOException;
public void fillAndStroke() throws IOException;
public void closeAndStroke() throws IOException;
public void closeAndFillAndStroke() throws IOException;
public void clip() throws IOException;

// Graphics state
public void saveGraphicsState() throws IOException;
public void restoreGraphicsState() throws IOException;
public void transform(Matrix matrix) throws IOException;
public void setStrokingColor(Color color) throws IOException;
public void setStrokingColor(float c) throws IOException;
public void setStrokingColor(float c, float m, float y, float k) throws IOException;
public void setNonStrokingColor(Color color) throws IOException;
public void setNonStrokingColor(float c) throws IOException;
public void setNonStrokingColor(float c, float m, float y, float k) throws IOException;
public void setLineWidth(float lineWidth) throws IOException;
public void setLineCap(int lineCap) throws IOException;
public void setLineJoin(int lineJoin) throws IOException;
public void setMiterLimit(float miterLimit) throws IOException;
public void setLineDashPattern(float[] pattern, float phase) throws IOException;

Operator Objects

Work with PDF operators and their operands.

// Methods in org.apache.pdfbox.contentstream.operator.Operator
public String getName();
public List<COSBase> getOperands();
public void setOperands(List<COSBase> operands);

// Static factory methods
public static Operator getOperator(String name);

Resource Management

Access and manage content stream resources.

// Methods in org.apache.pdfbox.pdmodel.PDResources
public PDFont getFont(COSName name) throws IOException;
public PDXObject getXObject(COSName name) throws IOException;
public PDExtendedGraphicsState getExtGState(COSName name);
public PDColorSpace getColorSpace(COSName name) throws IOException;
public PDPattern getPattern(COSName name) throws IOException;
public PDShading getShading(COSName name) throws IOException;

// Resource modification
public void put(COSName name, PDFont font);
public void put(COSName name, PDXObject xobject);
public void put(COSName name, PDExtendedGraphicsState extGState);
public void put(COSName name, PDColorSpace colorSpace);

Usage Examples

Custom Content Stream Processor

public class CustomContentProcessor extends PDFStreamEngine {
    private List<String> textContent = new ArrayList<>();
    private List<Rectangle2D> imagePositions = new ArrayList<>();
    
    public CustomContentProcessor() throws IOException {
        super();
    }
    
    @Override
    protected void processTextPosition(TextPosition text) {
        textContent.add(text.getUnicode());
        System.out.println("Text: " + text.getUnicode() + 
                          " at (" + text.getX() + ", " + text.getY() + ")");
    }
    
    @Override
    protected void processOperator(Operator operator, List<COSBase> operands) throws IOException {
        String operatorName = operator.getName();
        
        if ("Do".equals(operatorName)) {
            // XObject (image) placement
            COSName name = (COSName) operands.get(0);
            System.out.println("Drawing XObject: " + name.getName());
        }
        
        super.processOperator(operator, operands);
    }
    
    public List<String> getTextContent() {
        return textContent;
    }
}

// Usage
PDDocument document = Loader.loadPDF(new File("document.pdf"));
CustomContentProcessor processor = new CustomContentProcessor();

for (int i = 0; i < document.getNumberOfPages(); i++) {
    PDPage page = document.getPage(i);
    processor.processPage(page);
}

List<String> extractedText = processor.getTextContent();
document.close();

Custom Graphics Renderer

public class SimpleGraphicsRenderer extends PDFGraphicsStreamEngine {
    private Graphics2D graphics;
    private AffineTransform baseTransform;
    
    public SimpleGraphicsRenderer(PDPage page, Graphics2D graphics) {
        super(page);
        this.graphics = graphics;
        this.baseTransform = graphics.getTransform();
    }
    
    @Override
    protected void appendRectangle(Point2D p0, Point2D p1, Point2D p2, Point2D p3) throws IOException {
        Path2D path = new Path2D.Float();
        path.moveTo(p0.getX(), p0.getY());
        path.lineTo(p1.getX(), p1.getY());
        path.lineTo(p2.getX(), p2.getY());
        path.lineTo(p3.getX(), p3.getY());
        path.closePath();
        
        graphics.draw(path);
    }
    
    @Override
    protected void drawImage(PDImage pdImage) throws IOException {
        BufferedImage image = pdImage.getImage();
        Matrix matrix = getGraphicsState().getCurrentTransformationMatrix();
        
        // Apply transformation and draw image
        AffineTransform transform = matrix.createAffineTransform();
        graphics.drawImage(image, transform, null);
    }
    
    @Override
    protected void clip(int windingRule) throws IOException {
        // Set clipping region
        graphics.setClip(getCurrentPath());
    }
    
    @Override
    protected void moveTo(float x, float y) throws IOException {
        currentPath.moveTo(x, y);
    }
    
    @Override
    protected void lineTo(float x, float y) throws IOException {
        currentPath.lineTo(x, y);
    }
    
    @Override
    protected void strokePath() throws IOException {
        graphics.setStroke(createStroke());
        graphics.setColor(getStrokingColor().toColor());
        graphics.draw(currentPath);
    }
    
    @Override
    protected void fillPath(int windingRule) throws IOException {
        graphics.setColor(getNonStrokingColor().toColor());
        graphics.fill(currentPath);
    }
    
    // ... implement other abstract methods
}

Content Stream Generation

PDDocument document = new PDDocument();
PDPage page = new PDPage(PDRectangle.A4);
document.addPage(page);

PDPageContentStream contentStream = new PDPageContentStream(document, page);

// Text operations
contentStream.beginText();
contentStream.setFont(PDType1Font.HELVETICA, 12);
contentStream.newLineAtOffset(100, 700);
contentStream.showText("Hello World!");
contentStream.endText();

// Graphics operations
contentStream.saveGraphicsState();
contentStream.setStrokingColor(Color.BLUE);
contentStream.setLineWidth(2);

// Draw rectangle
contentStream.addRect(100, 600, 200, 100);
contentStream.stroke();

// Draw circle (approximated with curves)
float centerX = 200, centerY = 500, radius = 50;
float kappa = 0.552284749831f; // 4/3 * (sqrt(2) - 1)
float offset = radius * kappa;

contentStream.moveTo(centerX, centerY + radius);
contentStream.curveTo(centerX + offset, centerY + radius, centerX + radius, centerY + offset, centerX + radius, centerY);
contentStream.curveTo(centerX + radius, centerY - offset, centerX + offset, centerY - radius, centerX, centerY - radius);
contentStream.curveTo(centerX - offset, centerY - radius, centerX - radius, centerY - offset, centerX - radius, centerY);
contentStream.curveTo(centerX - radius, centerY + offset, centerX - offset, centerY + radius, centerX, centerY + radius);
contentStream.fill();

contentStream.restoreGraphicsState();
contentStream.close();

document.save("custom-content.pdf");
document.close();

Advanced Operator Processing

public class OperatorAnalyzer extends PDFStreamEngine {
    private Map<String, Integer> operatorCounts = new HashMap<>();
    
    @Override
    protected void processOperator(Operator operator, List<COSBase> operands) throws IOException {
        String name = operator.getName();
        operatorCounts.put(name, operatorCounts.getOrDefault(name, 0) + 1);
        
        // Log specific operators
        switch (name) {
            case "Tj":  // Show text
                System.out.println("Text operator: " + operands);
                break;
            case "cm":  // Concatenate matrix
                System.out.println("Transform matrix: " + operands);
                break;
            case "Do":  // Invoke XObject
                System.out.println("XObject invocation: " + operands);
                break;
        }
        
        super.processOperator(operator, operands);
    }
    
    public Map<String, Integer> getOperatorCounts() {
        return operatorCounts;
    }
}

// Usage
PDDocument document = Loader.loadPDF(new File("document.pdf"));
OperatorAnalyzer analyzer = new OperatorAnalyzer();

PDPage page = document.getPage(0);
analyzer.processPage(page);

Map<String, Integer> counts = analyzer.getOperatorCounts();
counts.forEach((op, count) -> 
    System.out.println("Operator " + op + ": " + count + " times"));

document.close();

Install with Tessl CLI

npx tessl i tessl/maven-org-apache-pdfbox--pdfbox

docs

content-stream-processing.md

cos-operations.md

document-operations.md

index.md

interactive-forms.md

multi-pdf-operations.md

rendering-graphics.md

security-encryption.md

text-operations.md

tile.json