PDF generator using HTML and CSS
Core document processing functions for converting HTML and CSS content to PDF documents. These functions provide the main entry points for xhtml2pdf's conversion capabilities, handling everything from simple HTML strings to complex documents with external resources.
The primary function for converting HTML to PDF with comprehensive configuration options for handling various input sources, output destinations, and processing parameters.
def pisaDocument(
src,
dest=None,
dest_bytes=False,
path="",
link_callback=None,
debug=0,
default_css=None,
xhtml=False,
encoding=None,
xml_output=None,
raise_exception=True,
capacity=100 * 1024,
context_meta=None,
encrypt=None,
signature=None,
**kwargs
):
"""
Convert HTML to PDF with full control over processing options.
Args:
src: HTML source - can be:
- str: HTML content as string
- file-like object: Open file or BytesIO
- filename: Path to HTML file
dest: Output destination - can be:
- file-like object: Open file or BytesIO for writing
- filename: Path for output PDF file
- None: Return PDF content in context
dest_bytes (bool): If True and dest is None, return bytes
path (str): Base path for resolving relative URLs and file paths
link_callback (callable): Custom function to resolve URLs and file paths
Signature: callback(uri, rel) -> resolved_uri
debug (int): Debug level 0-2, higher values provide more logging
default_css (str): Custom default CSS to apply before document CSS
xhtml (bool): Force XHTML parsing mode instead of HTML5
encoding (str): Character encoding for source document
If None, encoding is auto-detected from HTML meta tags
xml_output: XML output configuration options
raise_exception (bool): Raise exceptions on conversion errors
capacity (int): Memory capacity in bytes for temporary files
context_meta (dict): Additional metadata to add to PDF context
encrypt (dict): PDF encryption settings with keys:
- userPassword: User password for PDF
- ownerPassword: Owner password for PDF
- canPrint: Allow printing (bool)
- canModify: Allow modifications (bool)
- canCopy: Allow copying content (bool)
- canAnnotate: Allow annotations (bool)
signature (dict): PDF digital signature settings
**kwargs: Additional processing options
Returns:
pisaContext: Processing context object with attributes:
- err (int): Number of errors encountered
- warn (int): Number of warnings encountered
- log (list): List of log messages
- dest: Output destination (if dest_bytes=True, contains PDF bytes)
"""Basic HTML string to PDF file:
from xhtml2pdf import pisa
html = "<html><body><h1>Hello World</h1></body></html>"
with open("output.pdf", "wb") as dest:
result = pisa.pisaDocument(html, dest)
if result.err:
print(f"Errors: {result.log}")Convert with custom CSS and base path:
from xhtml2pdf import pisa
custom_css = """
@page {
size: A4;
margin: 2cm;
}
body { font-family: Arial; }
"""
html = """
<html>
<body>
<h1>Report</h1>
<img src="chart.png" />
</body>
</html>
"""
with open("report.pdf", "wb") as dest:
result = pisa.pisaDocument(
html,
dest,
path="/path/to/resources/", # Base path for resolving chart.png
default_css=custom_css,
debug=1
)Convert with custom link callback:
from xhtml2pdf import pisa
import os
def link_callback(uri, rel):
"""
Resolve relative URLs to absolute file paths.
"""
if uri.startswith(('http://', 'https://')):
return uri
# Convert relative paths to absolute paths
if not os.path.isabs(uri):
return os.path.join('/path/to/assets/', uri)
return uri
html = '<html><body><img src="images/logo.png" /></body></html>'
with open("output.pdf", "wb") as dest:
result = pisa.pisaDocument(html, dest, link_callback=link_callback)Return PDF as bytes:
from xhtml2pdf import pisa
import io
html = "<html><body><h1>Document</h1></body></html>"
output = io.BytesIO()
result = pisa.pisaDocument(html, dest=output)
if not result.err:
pdf_bytes = output.getvalue()
# Use pdf_bytes as neededLower-level function for creating ReportLab story objects from HTML content, providing more granular control over the conversion process.
def pisaStory(
src,
path="",
link_callback=None,
debug=0,
default_css=None,
xhtml=False,
encoding=None,
context=None,
xml_output=None,
**kwargs
):
"""
Create ReportLab story from HTML source without generating PDF.
This function provides lower-level access to the conversion process,
allowing you to work with the ReportLab story directly before PDF generation.
Args:
src: HTML source (string, file-like object, or filename)
path (str): Base path for relative resource resolution
link_callback (callable): Custom URL/file resolution function
debug (int): Debug level for logging (0-2)
default_css (str): Custom default CSS stylesheet
xhtml (bool): Use XHTML parsing mode
encoding (str): Character encoding for source
context (pisaContext): Existing context to use (creates new if None)
xml_output: XML output options
**kwargs: Additional processing options
Returns:
pisaContext: Processing context with story in context.story attribute
"""from xhtml2pdf.document import pisaStory
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4
html = """
<html>
<body>
<h1>Chapter 1</h1>
<p>Content here...</p>
</body>
</html>
"""
# Create story from HTML
context = pisaStory(html, debug=1)
if not context.err:
# Use the story with ReportLab directly
pdf_canvas = canvas.Canvas("custom.pdf", pagesize=A4)
# ... custom processing with context.story
pdf_canvas.save()Utility function for generating error documents when conversion fails, providing user-friendly error reporting.
def pisaErrorDocument(dest, c):
"""
Generate a PDF document containing error information.
Args:
dest: Output destination for error PDF
c (pisaContext): Context containing error information
Returns:
pisaContext: Updated context after error document generation
"""Utility function for creating PDF encryption instances from encryption configuration data.
def get_encrypt_instance(data):
"""
Create PDF encryption instance from configuration data.
Args:
data (dict): Encryption configuration with keys:
- userPassword (str): User password
- ownerPassword (str): Owner password
- canPrint (bool): Allow printing
- canModify (bool): Allow modifications
- canCopy (bool): Allow copying
- canAnnotate (bool): Allow annotations
Returns:
Encryption instance for PDF generation
"""from xhtml2pdf import pisa
html = "<html><body><h1>Confidential</h1></body></html>"
encrypt_config = {
'userPassword': 'user123',
'ownerPassword': 'owner456',
'canPrint': True,
'canModify': False,
'canCopy': False,
'canAnnotate': False
}
with open("secure.pdf", "wb") as dest:
result = pisa.pisaDocument(html, dest, encrypt=encrypt_config)The capacity parameter controls memory usage during conversion:
# For large documents
result = pisa.pisaDocument(html, dest, capacity=1024*1024) # 1MB
# For memory-constrained environments
result = pisa.pisaDocument(html, dest, capacity=50*1024) # 50KBDebug levels provide different amounts of processing information:
result = pisa.pisaDocument(html, dest, debug=2)
for log_entry in result.log:
print(log_entry)Additional metadata can be embedded in the PDF:
metadata = {
'author': 'John Doe',
'title': 'My Document',
'subject': 'Sample PDF',
'creator': 'My Application'
}
result = pisa.pisaDocument(html, dest, context_meta=metadata)All document processing functions return a pisaContext object with these key attributes:
err (int): Number of errors encountered (0 = success)warn (int): Number of warnings generatedlog (list): Detailed log messages for debuggingdest: Output destination or PDF bytes (if dest_bytes=True)result = pisa.pisaDocument(html, dest)
# Check for success
if result.err:
print(f"Conversion failed with {result.err} errors")
for msg in result.log:
if 'ERROR' in str(msg):
print(f"Error: {msg}")
else:
print("PDF generated successfully")
# Handle warnings
if result.warn:
print(f"Generated with {result.warn} warnings")The legacy CreatePDF alias is still available for backward compatibility:
CreatePDF = pisaDocument # Backward compatibility aliasfrom xhtml2pdf.pisa import CreatePDF
# Legacy usage (deprecated but still works)
result = CreatePDF(html, dest)class pisaContext:
"""
Processing context returned by document processing functions.
Attributes:
err (int): Error count
warn (int): Warning count
log (list): Processing log messages
dest: Output destination or PDF content
story (list): ReportLab story elements (from pisaStory)
cssText (str): Processed CSS content
path (str): Base path for resources
"""Install with Tessl CLI
npx tessl i tessl/pypi-xhtml2pdf