The Apache PDFBox library is an open source Java tool for working with PDF documents.
—
Core functionality for loading, creating, saving, and manipulating PDF documents. This includes document lifecycle management, page operations, and basic document properties.
Load PDF documents from various sources with optional password support for encrypted documents.
// Static methods in org.apache.pdfbox.Loader
public static PDDocument loadPDF(File file) throws IOException;
public static PDDocument loadPDF(InputStream input) throws IOException;
public static PDDocument loadPDF(byte[] input) throws IOException;
public static PDDocument loadPDF(RandomAccessRead source) throws IOException;
public static PDDocument loadPDF(File file, String password) throws IOException;
public static PDDocument loadPDF(InputStream input, String password) throws IOException;Create new documents and manage document lifecycle.
// Constructors in org.apache.pdfbox.pdmodel.PDDocument
public PDDocument();
public PDDocument(COSDocument document);
// Document lifecycle methods
public void close() throws IOException;
public boolean isEncrypted();Add, remove, and manipulate pages within a PDF document.
// Page management methods in PDDocument
public void addPage(PDPage page);
public void removePage(int pageIndex);
public PDPageTree getPages();
public PDPage getPage(int pageIndex);
public int getNumberOfPages();
public PDPage importPage(PDPage page);Save documents to various output destinations.
// Save methods in PDDocument
public void save(File file) throws IOException;
public void save(OutputStream stream) throws IOException;
public void save(String fileName) throws IOException;Individual page operations and properties.
// Constructors in org.apache.pdfbox.pdmodel.PDPage
public PDPage();
public PDPage(PDRectangle mediaBox);
// Page properties
public PDRectangle getMediaBox();
public void setMediaBox(PDRectangle mediaBox);
public PDRectangle getCropBox();
public void setCropBox(PDRectangle cropBox);
public int getRotation();
public void setRotation(int rotation);
// Page content and resources
public PDStream getContents() throws IOException;
public void setContents(PDStream contents) throws IOException;
public PDResources getResources();
public void setResources(PDResources resources);
public List<PDAnnotation> getAnnotations() throws IOException;Manage document page collections with tree-based operations.
// Methods in org.apache.pdfbox.pdmodel.PDPageTree
public void add(PDPage page);
public void insertAfter(PDPage newPage, PDPage afterPage);
public void insertBefore(PDPage newPage, PDPage beforePage);
public void remove(PDPage page);
public PDPage get(int index);
public int indexOf(PDPage page);
public int getCount();
public Iterator<PDPage> iterator();Access and modify document metadata and properties.
// Document metadata methods in PDDocument
public PDDocumentInformation getDocumentInformation();
public void setDocumentInformation(PDDocumentInformation info);
public PDDocumentCatalog getDocumentCatalog();
// Methods in org.apache.pdfbox.pdmodel.PDDocumentInformation
public String getTitle();
public void setTitle(String title);
public String getAuthor();
public void setAuthor(String author);
public String getSubject();
public void setSubject(String subject);
public String getKeywords();
public void setKeywords(String keywords);
public String getCreator();
public void setCreator(String creator);
public String getProducer();
public void setProducer(String producer);
public Calendar getCreationDate();
public void setCreationDate(Calendar date);
public Calendar getModificationDate();
public void setModificationDate(Calendar date);Standard page sizes and rectangle definitions.
// Constants in org.apache.pdfbox.pdmodel.common.PDRectangle
public static final PDRectangle A0;
public static final PDRectangle A1;
public static final PDRectangle A2;
public static final PDRectangle A3;
public static final PDRectangle A4;
public static final PDRectangle A5;
public static final PDRectangle A6;
public static final PDRectangle LETTER;
public static final PDRectangle LEGAL;
// Rectangle methods
public float getWidth();
public float getHeight();
public float getLowerLeftX();
public float getLowerLeftY();
public float getUpperRightX();
public float getUpperRightY();PDDocument document = new PDDocument();
PDPage page = new PDPage(PDRectangle.A4);
document.addPage(page);
// Set document information
PDDocumentInformation info = new PDDocumentInformation();
info.setTitle("My Document");
info.setAuthor("John Doe");
info.setCreationDate(Calendar.getInstance());
document.setDocumentInformation(info);
document.save("simple-document.pdf");
document.close();PDDocument document = Loader.loadPDF(new File("example.pdf"));
System.out.println("Number of pages: " + document.getNumberOfPages());
System.out.println("Is encrypted: " + document.isEncrypted());
PDDocumentInformation info = document.getDocumentInformation();
System.out.println("Title: " + info.getTitle());
System.out.println("Author: " + info.getAuthor());
// Inspect first page
PDPage firstPage = document.getPage(0);
PDRectangle mediaBox = firstPage.getMediaBox();
System.out.println("Page size: " + mediaBox.getWidth() + " x " + mediaBox.getHeight());
document.close();Install with Tessl CLI
npx tessl i tessl/maven-org-apache-pdfbox--pdfbox