PDF generator using HTML and CSS
npx @tessl/cli install tessl/pypi-xhtml2pdf@0.2.0A comprehensive HTML to PDF converter for Python that transforms HTML and CSS content into high-quality PDF documents. Built on the ReportLab Toolkit, html5lib, and pypdf, xhtml2pdf supports HTML5 and CSS 2.1 (with some CSS 3 features) and is completely written in pure Python for platform independence.
pip install xhtml2pdfpip install xhtml2pdf[pycairo] (recommended for better graphics)pip install xhtml2pdf[renderpm] (legacy rendering)Basic import for main functionality:
from xhtml2pdf import pisaComplete document processing import:
from xhtml2pdf.document import pisaDocumentBackward compatibility import:
from xhtml2pdf.pisa import CreatePDF # Alias for pisaDocumentAdvanced imports for specific features:
from xhtml2pdf.context import pisaContext
from xhtml2pdf.files import getFile, pisaFileObject
from xhtml2pdf.pdf import pisaPDF
from xhtml2pdf.util import getColor, getSize, getBoolfrom xhtml2pdf import pisa
import io
# HTML content
html_content = """
<html>
<head>
<style>
body { font-family: Arial, sans-serif; }
h1 { color: #333; }
</style>
</head>
<body>
<h1>Hello World</h1>
<p>This is a simple PDF generated from HTML.</p>
</body>
</html>
"""
# Create PDF
output = io.BytesIO()
result = pisa.pisaDocument(html_content, dest=output)
# Check for errors
if result.err:
print("Error generating PDF")
else:
# Save or use the PDF
with open("output.pdf", "wb") as f:
f.write(output.getvalue())from xhtml2pdf import pisa
# Convert HTML file to PDF file
with open("input.html", "r") as source:
with open("output.pdf", "wb") as dest:
result = pisa.pisaDocument(source, dest)
if not result.err:
print("PDF generated successfully")xhtml2pdf operates through a multi-stage processing pipeline:
The library provides both high-level convenience functions and low-level APIs for advanced customization, making it suitable for simple conversions as well as complex document generation systems.
Main conversion functions for transforming HTML to PDF, including the primary pisaDocument function and lower-level story creation capabilities.
def pisaDocument(
src,
dest=None,
dest_bytes=False,
path="",
link_callback=None,
debug=0,
default_css=None,
xhtml=False,
encoding=None,
xml_output=None,
raise_exception=True,
capacity=100 * 1024,
context_meta=None,
encrypt=None,
signature=None,
**kwargs
):
"""
Convert HTML to PDF.
Args:
src: HTML source (string, file-like object, or filename)
dest: Output destination (file-like object or filename)
dest_bytes: Return PDF as bytes if True
path: Base path for relative resources
link_callback: Function to resolve URLs and file paths
debug: Debug level (0-2)
default_css: Custom default CSS string
xhtml: Force XHTML parsing
encoding: Character encoding for source
xml_output: XML output options
raise_exception: Raise exceptions on errors
capacity: Memory capacity for temp files
context_meta: Additional context metadata
encrypt: PDF encryption settings
signature: PDF signature settings
Returns:
pisaContext: Processing context with results and errors
"""Advanced processing context management for controlling fonts, CSS, resources, and conversion behavior throughout the HTML-to-PDF pipeline.
class pisaContext:
def __init__(self, path="", debug=0, capacity=-1): ...
def addCSS(self, value): ...
def parseCSS(self): ...
def addFrag(self, text="", frag=None): ...
def getFile(self, name, relative=None): ...
def getFontName(self, names, default="helvetica"): ...
def registerFont(self, fontname, alias=None): ...Comprehensive file and resource management system supporting local files, URLs, data URIs, and various resource types with automatic MIME type detection.
def getFile(*a, **kw): ...
class pisaFileObject:
def __init__(self, uri, basepath=None, callback=None): ...
def getFileContent(self): ...
def getMimeType(self): ...Advanced CSS parsing, cascade processing, and style application system supporting CSS 2.1 and select CSS 3 features for precise document styling.
class pisaCSSBuilder:
def atFontFace(self, declarations): ...
def atPage(self): ...
def atFrame(self): ...
class pisaCSSParser:
def parseExternal(self, cssResourceName): ...Collection of utility functions for size conversion, color handling, coordinate calculation, text processing, and other common operations.
def getColor(value, default=None): ...
def getSize(value, relative=0, base=None, default=0.0): ...
def getBool(s): ...
def getAlign(value, default=TA_LEFT): ...
def arabic_format(text, language): ...PDF document manipulation, joining, encryption, digital signatures, and watermark capabilities for advanced PDF processing.
class pisaPDF:
def __init__(self, capacity=-1): ...
def addFromURI(self, url, basepath=None): ...
def join(self, file=None): ...
class PDFSignature:
@staticmethod
def sign(): ...Complete command-line interface for batch processing and integration with shell scripts and automated workflows.
def command(): ...
def execute(): ...
def usage(): ...
def showLogging(*, debug=False): ...WSGI middleware components for integrating PDF generation directly into web applications with automatic HTML-to-PDF conversion.
class PisaMiddleware:
def __init__(self, app): ...
def __call__(self, environ, start_response): ...xhtml2pdf uses a context-based error handling system:
result = pisa.pisaDocument(html_content, dest=output)
# Check for errors
if result.err:
print(f"Errors occurred during conversion: {result.log}")
# Check for warnings
if result.warn:
print(f"Warnings: {result.log}")Common exceptions that may be raised:
IOError: File access issues when reading HTML files or writing PDF outputFileNotFoundError: Missing HTML files, CSS files, or image resourcesPermissionError: Insufficient permissions to read/write filesUnicodeDecodeError: Character encoding problems in HTML/CSS contentImportError: Missing optional dependencies (pycairo, renderpm, pyHanko)ValueError: Invalid configuration parameters or malformed HTML/CSSMemoryError: Insufficient memory for large document processingreportlab.platypus.doctemplate.LayoutError: Page layout issuesreportlab.lib.colors.ColorError: Invalid color specificationsNetwork-related exceptions (for URL resources):
urllib.error.URLError: Network connectivity issuesurllib.error.HTTPError: HTTP errors when fetching remote resourcesssl.SSLError: SSL certificate issues for HTTPS resourcesclass pisaContext:
"""
Main processing context for HTML-to-PDF conversion.
Attributes:
err (int): Error count
warn (int): Warning count
log (list): Processing log messages
cssText (str): Accumulated CSS text
cssParser: CSS parser instance
fontList (list): Available fonts
path (str): Base path for resources
"""
class pisaFileObject:
"""
Unified file object for various URI types.
Handles local files, URLs, data URIs, and byte streams
with automatic MIME type detection and content processing.
"""
class pisaTempFile:
"""
Temporary file handler for PDF generation.
Manages temporary storage during conversion process
with automatic cleanup and memory management.
"""