PDF generator using HTML and CSS
Advanced PDF document manipulation, joining, encryption, digital signatures, and watermark capabilities for enterprise-grade PDF processing and security.
Core PDF manipulation functionality for joining multiple PDFs, managing document structure, and handling complex PDF operations.
class pisaPDF:
"""
PDF document handler for joining and manipulating PDF files.
Provides capabilities for combining multiple PDFs, managing
document structure, and handling PDF-specific operations.
"""
def __init__(self, capacity=-1):
"""
Initialize PDF handler with optional capacity limit.
Args:
capacity (int): Memory capacity limit in bytes (-1 for unlimited)
"""
def addFromURI(self, url, basepath=None):
"""
Add PDF content from URI (file path or URL).
Args:
url (str): Path or URL to PDF file
basepath (str, optional): Base path for relative URLs
"""
def addFromFile(self, f):
"""
Add PDF content from file object.
Args:
f: File-like object containing PDF data
"""
def addFromString(self, data):
"""
Add PDF content from string data.
Args:
data (str): PDF data as string
"""
def addDocument(self, doc):
"""
Add PDF document object.
Args:
doc: PDF document object to add
"""
def join(self, file=None):
"""
Join all added PDFs into single document.
Args:
file (optional): Output file path or file object
Returns:
Combined PDF document
"""from xhtml2pdf.pdf import pisaPDF
# Create PDF handler
pdf_handler = pisaPDF()
# Add multiple PDF sources
pdf_handler.addFromURI("report_part1.pdf")
pdf_handler.addFromURI("report_part2.pdf")
pdf_handler.addFromURI("appendix.pdf")
# Join into single PDF
with open("combined_report.pdf", "wb") as output:
pdf_handler.join(output)Comprehensive PDF encryption system supporting password protection, permission controls, and access restrictions.
def get_encrypt_instance(data):
"""
Create PDF encryption instance from configuration data.
Args:
data (dict): Encryption configuration with keys:
- userPassword (str): User password for opening PDF
- ownerPassword (str): Owner password for full access
- canPrint (bool): Allow printing permission
- canModify (bool): Allow modification permission
- canCopy (bool): Allow copying content permission
- canAnnotate (bool): Allow annotation permission
Returns:
Encryption instance for PDF generation
"""from xhtml2pdf import pisa
html_content = """
<html>
<body>
<h1>Confidential Document</h1>
<p>This document contains sensitive information.</p>
</body>
</html>
"""
# Configure encryption settings
encryption_config = {
'userPassword': 'view123', # Password to open PDF
'ownerPassword': 'admin456', # Password for full access
'canPrint': True, # Allow printing
'canModify': False, # Prevent modifications
'canCopy': False, # Prevent copying text
'canAnnotate': False # Prevent annotations
}
# Generate encrypted PDF
with open("secure_document.pdf", "wb") as dest:
result = pisa.pisaDocument(
html_content,
dest,
encrypt=encryption_config
)
if not result.err:
print("Encrypted PDF generated successfully")Advanced watermark and background processing capabilities for PDF document branding and visual enhancement.
class WaterMarks:
"""
Watermark and background processing system for PDF documents.
Provides comprehensive watermark capabilities including image overlays,
background patterns, opacity control, and positioning for document branding.
"""
@staticmethod
def process_doc(context, input_doc, output_doc):
"""
Process PDF document with watermarks and backgrounds.
Args:
context (pisaContext): Processing context with background settings
input_doc (bytes): Input PDF document data
output_doc (bytes): Output PDF document data
Returns:
tuple: (processed_pdf_bytes, has_background_flag)
"""
@staticmethod
def get_watermark(context, max_numpage):
"""
Generate watermark iterator for multi-page documents.
Args:
context (pisaContext): Processing context
max_numpage (int): Maximum number of pages
Returns:
Iterator: Watermark data for each page
"""
@staticmethod
def get_size_location(img, context, pagesize, *, is_portrait):
"""
Calculate watermark size and position on page.
Args:
img: Image object for watermark
context (dict): Watermark context with positioning data
pagesize (tuple): Page dimensions (width, height)
is_portrait (bool): Whether page is in portrait orientation
Returns:
tuple: Position and size coordinates (x, y, width, height)
"""
@staticmethod
def get_img_with_opacity(pisafile, context):
"""
Apply opacity settings to watermark image.
Args:
pisafile (pisaFileObject): Image file object
context (dict): Context with opacity settings
Returns:
BytesIO: Processed image with opacity applied
"""from xhtml2pdf import pisa
# HTML with background/watermark CSS
html_with_watermark = """
<html>
<head>
<style>
@page {
background-image: url('watermark.png');
background-opacity: 0.3;
background-object-position: center center;
}
body { font-family: Arial; padding: 2cm; }
</style>
</head>
<body>
<h1>Confidential Document</h1>
<p>This document has a watermark background.</p>
</body>
</html>
"""
with open("watermarked.pdf", "wb") as dest:
result = pisa.pisaDocument(html_with_watermark, dest)PDF digital signature support for document authentication, integrity verification, and non-repudiation.
class PDFSignature:
"""
PDF digital signature handler for document authentication.
Provides capabilities for applying digital signatures to PDFs
for legal compliance and document integrity verification.
"""
@staticmethod
def sign(inputfile, output, config):
"""
Apply digital signature to PDF document.
Args:
inputfile: Input PDF file path or file object
output: Output PDF file path or file object
config (dict): Signature configuration with type and parameters
Creates cryptographic signature for document authentication
and integrity verification purposes.
"""
@staticmethod
def simple_sign(inputfile, output, config):
"""
Apply simple digital signature to PDF.
Args:
inputfile: Input PDF file path or file object
output: Output PDF file path or file object
config (dict): Simple signature configuration
"""
@staticmethod
def lta_sign(inputfile, output, config):
"""
Apply Long Term Archive (LTA) signature to PDF.
Args:
inputfile: Input PDF file path or file object
output: Output PDF file path or file object
config (dict): LTA signature configuration with timestamps
"""
@staticmethod
def get_passphrase(config):
"""
Extract passphrase from signature configuration.
Args:
config (dict): Signature configuration
Returns:
bytes: Passphrase for private key access
"""
@staticmethod
def get_signature_meta(config):
"""
Extract signature metadata from configuration.
Args:
config (dict): Signature configuration
Returns:
dict: Signature metadata (reason, location, contact info)
"""from xhtml2pdf import pisa
html_content = """
<html>
<body>
<h1>Legal Document</h1>
<p>This document requires digital signature.</p>
</body>
</html>
"""
# Signature configuration
signature_config = {
'certificate_path': 'path/to/certificate.p12',
'password': 'cert_password',
'reason': 'Document approval',
'location': 'New York, NY',
'contact_info': 'legal@company.com'
}
# Generate signed PDF
with open("signed_document.pdf", "wb") as dest:
result = pisa.pisaDocument(
html_content,
dest,
signature=signature_config
)PDF watermark processing for adding background images, text overlays, and document branding.
class WaterMarks:
"""
PDF watermark processing for background elements and overlays.
Handles watermark positioning, sizing, and application
to PDF documents for branding and security purposes.
"""
@staticmethod
def get_size_location():
"""
Calculate watermark size and position parameters.
Returns:
Size and location parameters for watermark placement
"""
@staticmethod
def process_doc():
"""
Process document for watermark application.
Applies watermark elements to PDF document pages
with proper positioning and transparency settings.
"""from xhtml2pdf import pisa
# HTML with watermark CSS
html_with_watermark = """
<html>
<head>
<style>
@page {
size: A4;
margin: 1in;
background-image: url('watermark.png');
background-repeat: no-repeat;
background-position: center;
background-size: 50%;
}
body {
position: relative;
z-index: 1;
}
.watermark-text {
position: fixed;
top: 50%;
left: 50%;
transform: rotate(-45deg);
font-size: 72pt;
color: rgba(200, 200, 200, 0.3);
z-index: 0;
}
</style>
</head>
<body>
<div class="watermark-text">DRAFT</div>
<h1>Document Title</h1>
<p>Document content here...</p>
</body>
</html>
"""
with open("watermarked.pdf", "wb") as dest:
result = pisa.pisaDocument(html_with_watermark, dest)Enhanced PDF metadata management for document properties, compliance, and archival requirements.
# PDF metadata configuration
metadata_config = {
'title': 'Annual Financial Report',
'author': 'Finance Department',
'subject': 'Q4 2023 Financial Results',
'keywords': 'finance, report, quarterly, 2023',
'creator': 'xhtml2pdf Financial System',
'producer': 'Company Document Generator',
'creation_date': '2023-12-31',
'modification_date': '2023-12-31',
'trapped': False,
'pdf_version': '1.4'
}
# Apply metadata during conversion
result = pisa.pisaDocument(
html_content,
dest,
context_meta=metadata_config
)Support for PDF/A standard compliance for long-term document archival and accessibility requirements.
# PDF/A compliance configuration
pdfa_config = {
'pdf_version': '1.4',
'color_profile': 'sRGB',
'embed_fonts': True,
'compress_images': False,
'metadata_xmp': True,
'accessibility': True
}
# Generate PDF/A compliant document
result = pisa.pisaDocument(
html_content,
dest,
pdfa_compliance=pdfa_config
)PDF manipulation operations can raise various exceptions:
from xhtml2pdf.pdf import pisaPDF
from xhtml2pdf import pisa
try:
# PDF manipulation
pdf_handler = pisaPDF()
pdf_handler.addFromURI("nonexistent.pdf")
except FileNotFoundError:
print("PDF file not found")
except PermissionError:
print("Insufficient permissions to access PDF")
except Exception as e:
print(f"PDF processing error: {e}")
try:
# Encrypted PDF generation
result = pisa.pisaDocument(html, dest, encrypt=encrypt_config)
except ValueError:
print("Invalid encryption configuration")
except ImportError:
print("Encryption libraries not available")class pisaPDF:
"""
PDF document handler for joining and manipulating PDF files.
Attributes:
capacity (int): Memory capacity limit
documents (list): List of added PDF documents
"""
class PDFSignature:
"""
PDF digital signature handler for document authentication.
Provides static methods for applying cryptographic signatures
to PDF documents for legal compliance and integrity verification.
"""
class WaterMarks:
"""
PDF watermark processing for background elements and overlays.
Handles watermark positioning, sizing, and application
with support for image and text-based watermarks.
"""Install with Tessl CLI
npx tessl i tessl/pypi-xhtml2pdf