CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-org-apache-pdfbox--pdfbox

The Apache PDFBox library is an open source Java tool for working with PDF documents.

Pending
Overview
Eval results
Files

multi-pdf-operations.mddocs/

Multi-PDF Operations

Utilities for merging multiple PDF documents, splitting documents into separate files, and overlaying content between documents with flexible configuration options.

PDF Merging

Combine multiple PDF documents into a single document with memory usage control.

// Constructor in org.apache.pdfbox.multipdf.PDFMergerUtility
public PDFMergerUtility();

// Source management
public void addSource(File source) throws FileNotFoundException;
public void addSource(InputStream source);
public void addSources(List<InputStream> sourcesList);

// Destination configuration
public void setDestinationFileName(String destinationFileName);
public void setDestinationStream(OutputStream destinationStream);
public void setDestinationDocument(PDDocument destinationDocument);

// Merge execution
public void mergeDocuments() throws IOException;
public void mergeDocuments(MemoryUsageSetting memUsageSetting) throws IOException;

// Page range control
public void setStartPage(int startPage);
public void setEndPage(int endPage);

PDF Splitting

Split a PDF document into separate documents based on various criteria.

// Constructor in org.apache.pdfbox.multipdf.Splitter
public Splitter();

// Configuration methods
public void setSplitAtPage(int splitAtPage);
public void setStartPage(int startPage);
public void setEndPage(int endPage);
public void setMemoryUsageSetting(MemoryUsageSetting memUsageSetting);

// Splitting execution
public List<PDDocument> split(PDDocument document) throws IOException;

// Page count control
protected boolean splitAtPage(int pageNumber);

PDF Overlay

Overlay content from one PDF onto pages of another PDF with flexible positioning and page mapping.

// Constructor in org.apache.pdfbox.multipdf.Overlay
public Overlay();

// Input document configuration
public void setInputPDF(PDDocument inputPDF);
public void setInputFile(String inputFile) throws IOException;

// Overlay document configuration  
public void setDefaultOverlayPDF(PDDocument defaultOverlayPDF);
public void setDefaultOverlayFile(String defaultOverlayFile) throws IOException;
public void setFirstPageOverlayPDF(PDDocument firstPageOverlayPDF);
public void setFirstPageOverlayFile(String firstPageOverlayFile) throws IOException;
public void setLastPageOverlayPDF(PDDocument lastPageOverlayPDF);
public void setLastPageOverlayFile(String lastPageOverlayFile) throws IOException;
public void setOddPageOverlayPDF(PDDocument oddPageOverlayPDF);
public void setOddPageOverlayFile(String oddPageOverlayFile) throws IOException;
public void setEvenPageOverlayPDF(PDDocument evenPageOverlayPDF);
public void setEvenPageOverlayFile(String evenPageOverlayFile) throws IOException;

// Advanced overlay configuration
public void setOverlayPosition(Overlay.Position position);
public void setAllPagesOverlayPDF(PDDocument allPagesOverlayPDF);
public void setAllPagesOverlayFile(String allPagesOverlayFile) throws IOException;

// Execution
public PDDocument overlay(Map<Integer, String> specificPageOverlayFile) throws IOException;

Memory Management

Control memory usage during multi-PDF operations.

// Factory methods in org.apache.pdfbox.io.MemoryUsageSetting
public static MemoryUsageSetting setupMainMemoryOnly();
public static MemoryUsageSetting setupMainMemoryOnly(long maxMainMemoryBytes);
public static MemoryUsageSetting setupTempFileOnly();
public static MemoryUsageSetting setupMixed(long maxMainMemoryBytes);
public static MemoryUsageSetting setupMixed(long maxMainMemoryBytes, long maxStorageBytes);

Overlay Positioning

Control overlay positioning on target pages.

// Enum values in org.apache.pdfbox.multipdf.Overlay.Position
public static final Position FOREGROUND;  // Overlay on top
public static final Position BACKGROUND;  // Overlay behind content

Usage Examples

Basic PDF Merging

PDFMergerUtility merger = new PDFMergerUtility();

// Add source documents
merger.addSource(new File("document1.pdf"));
merger.addSource(new File("document2.pdf"));
merger.addSource(new File("document3.pdf"));

// Set output destination
merger.setDestinationFileName("merged-document.pdf");

// Perform merge with memory optimization
MemoryUsageSetting memUsage = MemoryUsageSetting.setupTempFileOnly();
merger.mergeDocuments(memUsage);

Merging with Page Ranges

PDFMergerUtility merger = new PDFMergerUtility();

// Add first document (pages 1-5)
merger.addSource(new File("document1.pdf"));
merger.setStartPage(1);
merger.setEndPage(5);

// Add second document (pages 10-15)
merger.addSource(new File("document2.pdf"));
merger.setStartPage(10);
merger.setEndPage(15);

merger.setDestinationFileName("partial-merge.pdf");
merger.mergeDocuments();

PDF Splitting

PDDocument document = Loader.loadPDF(new File("large-document.pdf"));

Splitter splitter = new Splitter();

// Split every 10 pages
splitter.setSplitAtPage(10);

// Split with memory optimization
splitter.setMemoryUsageSetting(MemoryUsageSetting.setupTempFileOnly());

List<PDDocument> splitDocuments = splitter.split(document);

// Save split documents
for (int i = 0; i < splitDocuments.size(); i++) {
    PDDocument splitDoc = splitDocuments.get(i);
    splitDoc.save("split-part-" + (i + 1) + ".pdf");
    splitDoc.close();
}

document.close();

Custom Splitting Logic

public class CustomSplitter extends Splitter {
    @Override
    protected boolean splitAtPage(int pageNumber) {
        // Split at pages that are multiples of 5, but not 10
        return (pageNumber % 5 == 0) && (pageNumber % 10 != 0);
    }
}

PDDocument document = Loader.loadPDF(new File("document.pdf"));
CustomSplitter splitter = new CustomSplitter();
List<PDDocument> splitDocs = splitter.split(document);
// Handle split documents...

PDF Overlay Operations

Overlay overlay = new Overlay();

// Set input document
overlay.setInputFile("base-document.pdf");

// Set default overlay for all pages
overlay.setDefaultOverlayFile("watermark.pdf");

// Set specific overlays for first and last pages
overlay.setFirstPageOverlayFile("cover-overlay.pdf");
overlay.setLastPageOverlayFile("back-overlay.pdf");

// Set overlay position
overlay.setOverlayPosition(Overlay.Position.BACKGROUND);

// Create specific page overlays
Map<Integer, String> specificOverlays = new HashMap<>();
specificOverlays.put(3, "special-page-3-overlay.pdf");
specificOverlays.put(7, "special-page-7-overlay.pdf");

// Perform overlay
PDDocument result = overlay.overlay(specificOverlays);
result.save("overlaid-document.pdf");
result.close();

Advanced Overlay Configuration

Overlay overlay = new Overlay();

// Load documents programmatically
PDDocument inputDoc = Loader.loadPDF(new File("input.pdf"));
PDDocument watermarkDoc = Loader.loadPDF(new File("watermark.pdf"));
PDDocument headerDoc = Loader.loadPDF(new File("header.pdf"));
PDDocument footerDoc = Loader.loadPDF(new File("footer.pdf"));

overlay.setInputPDF(inputDoc);
overlay.setDefaultOverlayPDF(watermarkDoc);
overlay.setOddPageOverlayPDF(headerDoc);
overlay.setEvenPageOverlayPDF(footerDoc);

// Apply overlay
PDDocument result = overlay.overlay(new HashMap<>());
result.save("complex-overlay.pdf");

// Clean up
result.close();
inputDoc.close();
watermarkDoc.close();
headerDoc.close();
footerDoc.close();

Install with Tessl CLI

npx tessl i tessl/maven-org-apache-pdfbox--pdfbox

docs

content-stream-processing.md

cos-operations.md

document-operations.md

index.md

interactive-forms.md

multi-pdf-operations.md

rendering-graphics.md

security-encryption.md

text-operations.md

tile.json