PDF generator using HTML and CSS
WSGI middleware components for integrating PDF generation directly into web applications with automatic HTML-to-PDF conversion, content filtering, and seamless web framework integration.
Complete WSGI middleware system for automatic PDF conversion in web applications with flexible filtering and processing capabilities.
class Filter:
"""
Base WSGI filter class for content transformation.
Provides foundation for WSGI-based content filtering
and transformation with pluggable processing pipeline.
"""
def __init__(self, app):
"""
Initialize WSGI filter with wrapped application.
Args:
app: WSGI application to wrap
"""
def __call__(self, environ, start_response):
"""
WSGI application interface for request processing.
Args:
environ (dict): WSGI environment dictionary
start_response (callable): WSGI start_response callable
Returns:
Response iterator
"""
def should_filter(self):
"""
Determine if filtering should be applied to current request.
Returns:
bool: True if request should be filtered
"""
def filter(self):
"""
Apply filter transformation to response content.
Processes response content and applies necessary
transformations based on filter configuration.
"""
class HTMLFilter(Filter):
"""
HTML-specific WSGI filter for HTML content processing.
Extends base Filter with HTML-specific processing
capabilities and content type handling.
"""
class PisaMiddleware(HTMLFilter):
"""
WSGI middleware for automatic HTML-to-PDF conversion.
Intercepts HTML responses and converts them to PDF format
based on request parameters and configuration settings.
"""Seamless integration that automatically converts HTML responses to PDF based on URL parameters, headers, or content negotiation.
from xhtml2pdf.wsgi import PisaMiddleware
from your_web_app import app
# Wrap your WSGI application
pdf_enabled_app = PisaMiddleware(app)
# Use with any WSGI server
if __name__ == '__main__':
from wsgiref.simple_server import make_server
server = make_server('localhost', 8080, pdf_enabled_app)
server.serve_forever()Flask Integration:
from flask import Flask, render_template
from xhtml2pdf.wsgi import PisaMiddleware
app = Flask(__name__)
@app.route('/report')
def generate_report():
return render_template('report.html', data=get_report_data())
# Enable PDF conversion middleware
app.wsgi_app = PisaMiddleware(app.wsgi_app)
# Access as PDF: /report?format=pdf
# Or with Accept header: Accept: application/pdfDjango Integration:
# settings.py
MIDDLEWARE = [
# ... other middleware
'xhtml2pdf.wsgi.PisaMiddleware',
]
# Or in wsgi.py
from django.core.wsgi import get_wsgi_application
from xhtml2pdf.wsgi import PisaMiddleware
application = get_wsgi_application()
application = PisaMiddleware(application)Pyramid Integration:
from pyramid.config import Configurator
from xhtml2pdf.wsgi import PisaMiddleware
def main(global_config, **settings):
config = Configurator(settings=settings)
# Configure routes and views
config.add_route('report', '/report')
config.scan()
app = config.make_wsgi_app()
return PisaMiddleware(app)Automatic PDF conversion based on HTTP content negotiation, URL parameters, and custom headers.
# URL parameter-based conversion
# GET /invoice/123?format=pdf
# Accept header-based conversion
# Accept: application/pdf
# Custom header-based conversion
# X-PDF-Convert: true
# File extension-based conversion
# GET /report.pdfComprehensive configuration options for PDF conversion behavior, styling, and processing parameters.
class ConfigurablePisaMiddleware(PisaMiddleware):
"""
Enhanced WSGI middleware with advanced configuration options.
"""
def __init__(self, app, **config):
"""
Initialize middleware with configuration options.
Args:
app: WSGI application to wrap
**config: Configuration options:
- pdf_triggers: List of trigger conditions
- default_css: Default CSS for PDF conversion
- base_path: Base path for resource resolution
- debug: Debug level for PDF processing
- link_callback: Custom link resolution function
- page_size: Default page size (A4, Letter, etc.)
- margins: Page margins specification
- orientation: Page orientation (portrait/landscape)
"""
super().__init__(app)
self.config = {
'pdf_triggers': ['format=pdf', 'application/pdf'],
'default_css': self.get_default_css(),
'base_path': '',
'debug': 0,
'link_callback': None,
'page_size': 'A4',
'margins': '1in',
'orientation': 'portrait'
}
self.config.update(config)
def get_default_css(self):
"""
Get default CSS for PDF conversion.
Returns:
str: Default CSS stylesheet for PDF formatting
"""
return """
@page {
size: A4;
margin: 1in;
}
body {
font-family: Arial, sans-serif;
font-size: 11pt;
line-height: 1.4;
}
"""
# Usage with configuration
app = ConfigurablePisaMiddleware(
your_app,
pdf_triggers=['format=pdf', '.pdf', 'application/pdf'],
page_size='Letter',
margins='0.5in',
debug=1
)Advanced customization options for PDF generation including custom styling, headers, footers, and post-processing.
class CustomPisaMiddleware(PisaMiddleware):
"""
Custom WSGI middleware with enhanced PDF processing.
"""
def process_pdf_request(self, environ, html_content):
"""
Custom PDF processing with enhanced features.
Args:
environ (dict): WSGI environment
html_content (str): HTML content to convert
Returns:
bytes: Generated PDF content
"""
# Add custom headers and footers
enhanced_html = self.add_headers_footers(html_content, environ)
# Apply custom CSS based on request
css = self.get_request_css(environ)
# Custom link callback for resource resolution
def custom_link_callback(uri, rel):
return self.resolve_resource(uri, rel, environ)
# Generate PDF with custom options
import io
from xhtml2pdf import pisa
output = io.BytesIO()
result = pisa.pisaDocument(
enhanced_html,
dest=output,
default_css=css,
link_callback=custom_link_callback,
path=self.get_base_path(environ),
debug=self.config.get('debug', 0)
)
if result.err:
raise Exception(f"PDF generation failed: {result.log}")
return output.getvalue()
def add_headers_footers(self, html, environ):
"""
Add custom headers and footers to HTML content.
"""
header = f"""
<div class="pdf-header">
Generated on {datetime.now().strftime('%Y-%m-%d %H:%M')}
| {environ.get('HTTP_HOST', 'localhost')}
</div>
"""
footer = """
<div class="pdf-footer">
Page <pdf:pagenumber> of <pdf:pagecount>
</div>
"""
# Inject header/footer styles and content
return self.inject_pdf_elements(html, header, footer)Comprehensive error handling and logging for WSGI PDF conversion operations.
import logging
from xhtml2pdf.wsgi import PisaMiddleware
class LoggingPisaMiddleware(PisaMiddleware):
"""
WSGI middleware with enhanced logging and error handling.
"""
def __init__(self, app, logger=None):
super().__init__(app)
self.logger = logger or logging.getLogger(__name__)
def __call__(self, environ, start_response):
try:
return super().__call__(environ, start_response)
except Exception as e:
self.logger.error(f"PDF conversion failed: {e}", exc_info=True)
# Return error response
error_html = f"""
<html>
<body>
<h1>PDF Conversion Error</h1>
<p>Unable to generate PDF: {str(e)}</p>
</body>
</html>
"""
response_headers = [
('Content-Type', 'text/html'),
('Content-Length', str(len(error_html)))
]
start_response('500 Internal Server Error', response_headers)
return [error_html.encode('utf-8')]
# Configure logging
logging.basicConfig(level=logging.INFO)
app = LoggingPisaMiddleware(your_app)Caching and performance optimization strategies for high-traffic web applications.
import hashlib
from functools import lru_cache
class CachingPisaMiddleware(PisaMiddleware):
"""
WSGI middleware with PDF caching capabilities.
"""
def __init__(self, app, cache_size=128, cache_ttl=3600):
super().__init__(app)
self.cache_size = cache_size
self.cache_ttl = cache_ttl
self.pdf_cache = {}
@lru_cache(maxsize=128)
def generate_cached_pdf(self, content_hash, html_content):
"""
Generate PDF with caching based on content hash.
Args:
content_hash (str): Hash of HTML content
html_content (str): HTML content to convert
Returns:
bytes: Generated PDF content
"""
import io
from xhtml2pdf import pisa
output = io.BytesIO()
result = pisa.pisaDocument(html_content, dest=output)
if result.err:
raise Exception("PDF generation failed")
return output.getvalue()
def get_content_hash(self, html_content):
"""
Generate hash of HTML content for caching.
"""
return hashlib.md5(html_content.encode('utf-8')).hexdigest()from xhtml2pdf.wsgi import PisaMiddleware
def simple_app(environ, start_response):
"""
Simple WSGI application that returns HTML content.
"""
html = """
<html>
<body>
<h1>Hello World</h1>
<p>This can be converted to PDF!</p>
</body>
</html>
"""
response_headers = [
('Content-Type', 'text/html'),
('Content-Length', str(len(html)))
]
start_response('200 OK', response_headers)
return [html.encode('utf-8')]
# Enable PDF conversion
app = PisaMiddleware(simple_app)
# Access as PDF: /?format=pdffrom xhtml2pdf.wsgi import PisaMiddleware
class EnterprisePisaMiddleware(PisaMiddleware):
"""
Enterprise-grade WSGI middleware with security and audit features.
"""
def __init__(self, app, audit_logger=None, security_config=None):
super().__init__(app)
self.audit_logger = audit_logger
self.security_config = security_config or {}
def __call__(self, environ, start_response):
# Security checks
if not self.authorize_pdf_access(environ):
start_response('403 Forbidden', [])
return [b'PDF access denied']
# Audit logging
if self.audit_logger:
self.audit_logger.info(
f"PDF requested: {environ.get('PATH_INFO')} "
f"by {environ.get('REMOTE_ADDR')}"
)
return super().__call__(environ, start_response)
def authorize_pdf_access(self, environ):
"""
Check if user is authorized for PDF conversion.
"""
# Implement your authorization logic
return Trueclass Filter:
"""
Base WSGI filter class for content transformation.
Attributes:
app: Wrapped WSGI application
"""
class HTMLFilter(Filter):
"""
HTML-specific WSGI filter for HTML content processing.
Extends base Filter with HTML processing capabilities.
"""
class PisaMiddleware(HTMLFilter):
"""
WSGI middleware for automatic HTML-to-PDF conversion.
Provides seamless PDF generation from HTML responses
with configurable triggers and processing options.
"""Install with Tessl CLI
npx tessl i tessl/pypi-xhtml2pdf