tessl/pypi-xhtml2pdf

PDF generator using HTML and CSS

Overview

Eval results

Files

WSGI Integration

Name: tessl/pypi-xhtml2pdf
Author: tessl

WSGI middleware components for integrating PDF generation directly into web applications with automatic HTML-to-PDF conversion, content filtering, and seamless web framework integration.

Capabilities

WSGI Middleware Architecture

Complete WSGI middleware system for automatic PDF conversion in web applications with flexible filtering and processing capabilities.

class Filter:
    """
    Base WSGI filter class for content transformation.
    
    Provides foundation for WSGI-based content filtering
    and transformation with pluggable processing pipeline.
    """
    def __init__(self, app):
        """
        Initialize WSGI filter with wrapped application.
        
        Args:
            app: WSGI application to wrap
        """
    
    def __call__(self, environ, start_response):
        """
        WSGI application interface for request processing.
        
        Args:
            environ (dict): WSGI environment dictionary
            start_response (callable): WSGI start_response callable
            
        Returns:
            Response iterator
        """
    
    def should_filter(self):
        """
        Determine if filtering should be applied to current request.
        
        Returns:
            bool: True if request should be filtered
        """
    
    def filter(self):
        """
        Apply filter transformation to response content.
        
        Processes response content and applies necessary
        transformations based on filter configuration.
        """

class HTMLFilter(Filter):
    """
    HTML-specific WSGI filter for HTML content processing.
    
    Extends base Filter with HTML-specific processing
    capabilities and content type handling.
    """

class PisaMiddleware(HTMLFilter):
    """
    WSGI middleware for automatic HTML-to-PDF conversion.
    
    Intercepts HTML responses and converts them to PDF format
    based on request parameters and configuration settings.
    """

Automatic PDF Conversion

Seamless integration that automatically converts HTML responses to PDF based on URL parameters, headers, or content negotiation.

Basic WSGI Integration

from xhtml2pdf.wsgi import PisaMiddleware
from your_web_app import app

# Wrap your WSGI application
pdf_enabled_app = PisaMiddleware(app)

# Use with any WSGI server
if __name__ == '__main__':
    from wsgiref.simple_server import make_server
    server = make_server('localhost', 8080, pdf_enabled_app)
    server.serve_forever()

Framework-Specific Integration

Flask Integration:

from flask import Flask, render_template
from xhtml2pdf.wsgi import PisaMiddleware

app = Flask(__name__)

@app.route('/report')
def generate_report():
    return render_template('report.html', data=get_report_data())

# Enable PDF conversion middleware
app.wsgi_app = PisaMiddleware(app.wsgi_app)

# Access as PDF: /report?format=pdf
# Or with Accept header: Accept: application/pdf

Django Integration:

# settings.py
MIDDLEWARE = [
    # ... other middleware
    'xhtml2pdf.wsgi.PisaMiddleware',
]

# Or in wsgi.py
from django.core.wsgi import get_wsgi_application
from xhtml2pdf.wsgi import PisaMiddleware

application = get_wsgi_application()
application = PisaMiddleware(application)

Pyramid Integration:

from pyramid.config import Configurator
from xhtml2pdf.wsgi import PisaMiddleware

def main(global_config, **settings):
    config = Configurator(settings=settings)
    
    # Configure routes and views
    config.add_route('report', '/report')
    config.scan()
    
    app = config.make_wsgi_app()
    return PisaMiddleware(app)

Content Negotiation

Automatic PDF conversion based on HTTP content negotiation, URL parameters, and custom headers.

# URL parameter-based conversion
# GET /invoice/123?format=pdf

# Accept header-based conversion  
# Accept: application/pdf

# Custom header-based conversion
# X-PDF-Convert: true

# File extension-based conversion
# GET /report.pdf

Advanced Configuration

Comprehensive configuration options for PDF conversion behavior, styling, and processing parameters.

class ConfigurablePisaMiddleware(PisaMiddleware):
    """
    Enhanced WSGI middleware with advanced configuration options.
    """
    def __init__(self, app, **config):
        """
        Initialize middleware with configuration options.
        
        Args:
            app: WSGI application to wrap
            **config: Configuration options:
                - pdf_triggers: List of trigger conditions
                - default_css: Default CSS for PDF conversion
                - base_path: Base path for resource resolution
                - debug: Debug level for PDF processing
                - link_callback: Custom link resolution function
                - page_size: Default page size (A4, Letter, etc.)
                - margins: Page margins specification
                - orientation: Page orientation (portrait/landscape)
        """
        super().__init__(app)
        self.config = {
            'pdf_triggers': ['format=pdf', 'application/pdf'],
            'default_css': self.get_default_css(),
            'base_path': '',
            'debug': 0,
            'link_callback': None,
            'page_size': 'A4',
            'margins': '1in',
            'orientation': 'portrait'
        }
        self.config.update(config)
    
    def get_default_css(self):
        """
        Get default CSS for PDF conversion.
        
        Returns:
            str: Default CSS stylesheet for PDF formatting
        """
        return """
        @page {
            size: A4;
            margin: 1in;
        }
        body {
            font-family: Arial, sans-serif;
            font-size: 11pt;
            line-height: 1.4;
        }
        """

# Usage with configuration
app = ConfigurablePisaMiddleware(
    your_app,
    pdf_triggers=['format=pdf', '.pdf', 'application/pdf'],
    page_size='Letter',
    margins='0.5in',
    debug=1
)

Custom PDF Processing

Advanced customization options for PDF generation including custom styling, headers, footers, and post-processing.

class CustomPisaMiddleware(PisaMiddleware):
    """
    Custom WSGI middleware with enhanced PDF processing.
    """
    
    def process_pdf_request(self, environ, html_content):
        """
        Custom PDF processing with enhanced features.
        
        Args:
            environ (dict): WSGI environment
            html_content (str): HTML content to convert
            
        Returns:
            bytes: Generated PDF content
        """
        # Add custom headers and footers
        enhanced_html = self.add_headers_footers(html_content, environ)
        
        # Apply custom CSS based on request
        css = self.get_request_css(environ)
        
        # Custom link callback for resource resolution
        def custom_link_callback(uri, rel):
            return self.resolve_resource(uri, rel, environ)
        
        # Generate PDF with custom options
        import io
        from xhtml2pdf import pisa
        
        output = io.BytesIO()
        result = pisa.pisaDocument(
            enhanced_html,
            dest=output,
            default_css=css,
            link_callback=custom_link_callback,
            path=self.get_base_path(environ),
            debug=self.config.get('debug', 0)
        )
        
        if result.err:
            raise Exception(f"PDF generation failed: {result.log}")
        
        return output.getvalue()
    
    def add_headers_footers(self, html, environ):
        """
        Add custom headers and footers to HTML content.
        """
        header = f"""
        <div class="pdf-header">
            Generated on {datetime.now().strftime('%Y-%m-%d %H:%M')}
            | {environ.get('HTTP_HOST', 'localhost')}
        </div>
        """
        
        footer = """
        <div class="pdf-footer">
            Page <pdf:pagenumber> of <pdf:pagecount>
        </div>
        """
        
        # Inject header/footer styles and content
        return self.inject_pdf_elements(html, header, footer)

Error Handling and Logging

Comprehensive error handling and logging for WSGI PDF conversion operations.

import logging
from xhtml2pdf.wsgi import PisaMiddleware

class LoggingPisaMiddleware(PisaMiddleware):
    """
    WSGI middleware with enhanced logging and error handling.
    """
    
    def __init__(self, app, logger=None):
        super().__init__(app)
        self.logger = logger or logging.getLogger(__name__)
    
    def __call__(self, environ, start_response):
        try:
            return super().__call__(environ, start_response)
        except Exception as e:
            self.logger.error(f"PDF conversion failed: {e}", exc_info=True)
            
            # Return error response
            error_html = f"""
            <html>
                <body>
                    <h1>PDF Conversion Error</h1>
                    <p>Unable to generate PDF: {str(e)}</p>
                </body>
            </html>
            """
            
            response_headers = [
                ('Content-Type', 'text/html'),
                ('Content-Length', str(len(error_html)))
            ]
            start_response('500 Internal Server Error', response_headers)
            return [error_html.encode('utf-8')]

# Configure logging
logging.basicConfig(level=logging.INFO)
app = LoggingPisaMiddleware(your_app)

Performance Optimization

Caching and performance optimization strategies for high-traffic web applications.

import hashlib
from functools import lru_cache

class CachingPisaMiddleware(PisaMiddleware):
    """
    WSGI middleware with PDF caching capabilities.
    """
    
    def __init__(self, app, cache_size=128, cache_ttl=3600):
        super().__init__(app)
        self.cache_size = cache_size
        self.cache_ttl = cache_ttl
        self.pdf_cache = {}
    
    @lru_cache(maxsize=128)
    def generate_cached_pdf(self, content_hash, html_content):
        """
        Generate PDF with caching based on content hash.
        
        Args:
            content_hash (str): Hash of HTML content
            html_content (str): HTML content to convert
            
        Returns:
            bytes: Generated PDF content
        """
        import io
        from xhtml2pdf import pisa
        
        output = io.BytesIO()
        result = pisa.pisaDocument(html_content, dest=output)
        
        if result.err:
            raise Exception("PDF generation failed")
        
        return output.getvalue()
    
    def get_content_hash(self, html_content):
        """
        Generate hash of HTML content for caching.
        """
        return hashlib.md5(html_content.encode('utf-8')).hexdigest()

Usage Examples

Simple Web Application

from xhtml2pdf.wsgi import PisaMiddleware

def simple_app(environ, start_response):
    """
    Simple WSGI application that returns HTML content.
    """
    html = """
    <html>
        <body>
            <h1>Hello World</h1>
            <p>This can be converted to PDF!</p>
        </body>
    </html>
    """
    
    response_headers = [
        ('Content-Type', 'text/html'),
        ('Content-Length', str(len(html)))
    ]
    start_response('200 OK', response_headers)
    return [html.encode('utf-8')]

# Enable PDF conversion
app = PisaMiddleware(simple_app)

# Access as PDF: /?format=pdf

Enterprise Integration

from xhtml2pdf.wsgi import PisaMiddleware

class EnterprisePisaMiddleware(PisaMiddleware):
    """
    Enterprise-grade WSGI middleware with security and audit features.
    """
    
    def __init__(self, app, audit_logger=None, security_config=None):
        super().__init__(app)
        self.audit_logger = audit_logger
        self.security_config = security_config or {}
    
    def __call__(self, environ, start_response):
        # Security checks
        if not self.authorize_pdf_access(environ):
            start_response('403 Forbidden', [])
            return [b'PDF access denied']
        
        # Audit logging
        if self.audit_logger:
            self.audit_logger.info(
                f"PDF requested: {environ.get('PATH_INFO')} "
                f"by {environ.get('REMOTE_ADDR')}"
            )
        
        return super().__call__(environ, start_response)
    
    def authorize_pdf_access(self, environ):
        """
        Check if user is authorized for PDF conversion.
        """
        # Implement your authorization logic
        return True

Types

class Filter:
    """
    Base WSGI filter class for content transformation.
    
    Attributes:
        app: Wrapped WSGI application
    """

class HTMLFilter(Filter):
    """
    HTML-specific WSGI filter for HTML content processing.
    
    Extends base Filter with HTML processing capabilities.
    """

class PisaMiddleware(HTMLFilter):
    """
    WSGI middleware for automatic HTML-to-PDF conversion.
    
    Provides seamless PDF generation from HTML responses
    with configurable triggers and processing options.
    """

Install with Tessl CLI