The Apache PDFBox library is an open source Java tool for working with PDF documents.
—
Utilities for merging multiple PDF documents, splitting documents into separate files, and overlaying content between documents with flexible configuration options.
Combine multiple PDF documents into a single document with memory usage control.
// Constructor in org.apache.pdfbox.multipdf.PDFMergerUtility
public PDFMergerUtility();
// Source management
public void addSource(File source) throws FileNotFoundException;
public void addSource(InputStream source);
public void addSources(List<InputStream> sourcesList);
// Destination configuration
public void setDestinationFileName(String destinationFileName);
public void setDestinationStream(OutputStream destinationStream);
public void setDestinationDocument(PDDocument destinationDocument);
// Merge execution
public void mergeDocuments() throws IOException;
public void mergeDocuments(MemoryUsageSetting memUsageSetting) throws IOException;
// Page range control
public void setStartPage(int startPage);
public void setEndPage(int endPage);Split a PDF document into separate documents based on various criteria.
// Constructor in org.apache.pdfbox.multipdf.Splitter
public Splitter();
// Configuration methods
public void setSplitAtPage(int splitAtPage);
public void setStartPage(int startPage);
public void setEndPage(int endPage);
public void setMemoryUsageSetting(MemoryUsageSetting memUsageSetting);
// Splitting execution
public List<PDDocument> split(PDDocument document) throws IOException;
// Page count control
protected boolean splitAtPage(int pageNumber);Overlay content from one PDF onto pages of another PDF with flexible positioning and page mapping.
// Constructor in org.apache.pdfbox.multipdf.Overlay
public Overlay();
// Input document configuration
public void setInputPDF(PDDocument inputPDF);
public void setInputFile(String inputFile) throws IOException;
// Overlay document configuration
public void setDefaultOverlayPDF(PDDocument defaultOverlayPDF);
public void setDefaultOverlayFile(String defaultOverlayFile) throws IOException;
public void setFirstPageOverlayPDF(PDDocument firstPageOverlayPDF);
public void setFirstPageOverlayFile(String firstPageOverlayFile) throws IOException;
public void setLastPageOverlayPDF(PDDocument lastPageOverlayPDF);
public void setLastPageOverlayFile(String lastPageOverlayFile) throws IOException;
public void setOddPageOverlayPDF(PDDocument oddPageOverlayPDF);
public void setOddPageOverlayFile(String oddPageOverlayFile) throws IOException;
public void setEvenPageOverlayPDF(PDDocument evenPageOverlayPDF);
public void setEvenPageOverlayFile(String evenPageOverlayFile) throws IOException;
// Advanced overlay configuration
public void setOverlayPosition(Overlay.Position position);
public void setAllPagesOverlayPDF(PDDocument allPagesOverlayPDF);
public void setAllPagesOverlayFile(String allPagesOverlayFile) throws IOException;
// Execution
public PDDocument overlay(Map<Integer, String> specificPageOverlayFile) throws IOException;Control memory usage during multi-PDF operations.
// Factory methods in org.apache.pdfbox.io.MemoryUsageSetting
public static MemoryUsageSetting setupMainMemoryOnly();
public static MemoryUsageSetting setupMainMemoryOnly(long maxMainMemoryBytes);
public static MemoryUsageSetting setupTempFileOnly();
public static MemoryUsageSetting setupMixed(long maxMainMemoryBytes);
public static MemoryUsageSetting setupMixed(long maxMainMemoryBytes, long maxStorageBytes);Control overlay positioning on target pages.
// Enum values in org.apache.pdfbox.multipdf.Overlay.Position
public static final Position FOREGROUND; // Overlay on top
public static final Position BACKGROUND; // Overlay behind contentPDFMergerUtility merger = new PDFMergerUtility();
// Add source documents
merger.addSource(new File("document1.pdf"));
merger.addSource(new File("document2.pdf"));
merger.addSource(new File("document3.pdf"));
// Set output destination
merger.setDestinationFileName("merged-document.pdf");
// Perform merge with memory optimization
MemoryUsageSetting memUsage = MemoryUsageSetting.setupTempFileOnly();
merger.mergeDocuments(memUsage);PDFMergerUtility merger = new PDFMergerUtility();
// Add first document (pages 1-5)
merger.addSource(new File("document1.pdf"));
merger.setStartPage(1);
merger.setEndPage(5);
// Add second document (pages 10-15)
merger.addSource(new File("document2.pdf"));
merger.setStartPage(10);
merger.setEndPage(15);
merger.setDestinationFileName("partial-merge.pdf");
merger.mergeDocuments();PDDocument document = Loader.loadPDF(new File("large-document.pdf"));
Splitter splitter = new Splitter();
// Split every 10 pages
splitter.setSplitAtPage(10);
// Split with memory optimization
splitter.setMemoryUsageSetting(MemoryUsageSetting.setupTempFileOnly());
List<PDDocument> splitDocuments = splitter.split(document);
// Save split documents
for (int i = 0; i < splitDocuments.size(); i++) {
PDDocument splitDoc = splitDocuments.get(i);
splitDoc.save("split-part-" + (i + 1) + ".pdf");
splitDoc.close();
}
document.close();public class CustomSplitter extends Splitter {
@Override
protected boolean splitAtPage(int pageNumber) {
// Split at pages that are multiples of 5, but not 10
return (pageNumber % 5 == 0) && (pageNumber % 10 != 0);
}
}
PDDocument document = Loader.loadPDF(new File("document.pdf"));
CustomSplitter splitter = new CustomSplitter();
List<PDDocument> splitDocs = splitter.split(document);
// Handle split documents...Overlay overlay = new Overlay();
// Set input document
overlay.setInputFile("base-document.pdf");
// Set default overlay for all pages
overlay.setDefaultOverlayFile("watermark.pdf");
// Set specific overlays for first and last pages
overlay.setFirstPageOverlayFile("cover-overlay.pdf");
overlay.setLastPageOverlayFile("back-overlay.pdf");
// Set overlay position
overlay.setOverlayPosition(Overlay.Position.BACKGROUND);
// Create specific page overlays
Map<Integer, String> specificOverlays = new HashMap<>();
specificOverlays.put(3, "special-page-3-overlay.pdf");
specificOverlays.put(7, "special-page-7-overlay.pdf");
// Perform overlay
PDDocument result = overlay.overlay(specificOverlays);
result.save("overlaid-document.pdf");
result.close();Overlay overlay = new Overlay();
// Load documents programmatically
PDDocument inputDoc = Loader.loadPDF(new File("input.pdf"));
PDDocument watermarkDoc = Loader.loadPDF(new File("watermark.pdf"));
PDDocument headerDoc = Loader.loadPDF(new File("header.pdf"));
PDDocument footerDoc = Loader.loadPDF(new File("footer.pdf"));
overlay.setInputPDF(inputDoc);
overlay.setDefaultOverlayPDF(watermarkDoc);
overlay.setOddPageOverlayPDF(headerDoc);
overlay.setEvenPageOverlayPDF(footerDoc);
// Apply overlay
PDDocument result = overlay.overlay(new HashMap<>());
result.save("complex-overlay.pdf");
// Clean up
result.close();
inputDoc.close();
watermarkDoc.close();
headerDoc.close();
footerDoc.close();Install with Tessl CLI
npx tessl i tessl/maven-org-apache-pdfbox--pdfbox