tessl/pypi-xhtml2pdf

PDF generator using HTML and CSS

Overview

Eval results

Files

Context Management

Name: tessl/pypi-xhtml2pdf
Author: tessl

Advanced processing context management for controlling the HTML-to-PDF conversion pipeline. The context system manages fonts, CSS processing, resource handling, fragment management, and overall conversion state throughout the document processing lifecycle.

Capabilities

Main Processing Context

The central context class that manages all aspects of the HTML-to-PDF conversion process, including CSS accumulation, font management, resource resolution, and content fragment handling.

class pisaContext:
    def __init__(self, path="", debug=0, capacity=-1):
        """
        Initialize processing context for HTML-to-PDF conversion.
        
        Args:
            path (str): Base path for resolving relative resources
            debug (int): Debug level (0=none, 1=basic, 2=detailed)
            capacity (int): Memory capacity for temp files (-1=unlimited)
        """

    # CSS Management
    def addCSS(self, value):
        """
        Add CSS content to the context for processing.
        
        Args:
            value (str): CSS content to add
        """

    def addDefaultCSS(self, value):
        """
        Add default CSS that will be processed before document CSS.
        
        Args:
            value (str): Default CSS content
        """

    def parseCSS(self):
        """
        Parse all accumulated CSS content and build style cascade.
        
        Returns:
            bool: True if parsing successful, False otherwise
        """

    # Content Management  
    def addStory(self, data):
        """
        Add content data to the ReportLab story.
        
        Args:
            data: Story data to add (ReportLab flowables)
        """

    def swapStory(self, story=None):
        """
        Replace the current story with new content.
        
        Args:
            story (list): New story content (None creates empty story)
        
        Returns:
            list: Previous story content
        """

    # Paragraph and Style Management
    def toParagraphStyle(self, first):
        """
        Convert current fragment style to ReportLab paragraph style.
        
        Args:
            first (bool): Whether this is the first paragraph
        
        Returns:
            ParagraphStyle: ReportLab paragraph style object
        """

    def addTOC(self):
        """
        Add table of contents placeholder to story.
        
        Creates a TOC structure that will be populated with headings
        and page numbers during the multi-pass PDF generation process.
        """

    def addPageCount(self):
        """
        Enable page counting and add page count tracking to context.
        
        Sets up the context for multi-pass processing to calculate
        total page numbers for page X of Y displays.
        """

    def addPara(self, *, force=False):
        """
        Add current paragraph to story.
        
        Args:
            force (bool): Force paragraph creation even if empty
        """

    def setDir(self, direction):
        """
        Set text direction for RTL/LTR language support.
        
        Args:
            direction (str): Text direction - 'rtl' or 'ltr'
        """

    def UID(self):
        """
        Generate unique identifier for elements.
        
        Returns:
            int: Unique ID number
        """

    # Fragment Management
    def clearFrag(self):
        """Clear current text fragment."""

    def copyFrag(self, **kw):
        """
        Copy current fragment with optional modifications.
        
        Args:
            **kw: Fragment attributes to modify
        
        Returns:
            Fragment: Copied fragment with modifications
        """

    def newFrag(self, **kw):
        """
        Create new text fragment with specified attributes.
        
        Args:
            **kw: Fragment attributes
        
        Returns:
            Fragment: New fragment object
        """

    def addFrag(self, text="", frag=None):
        """
        Add text fragment to current paragraph.
        
        Args:
            text (str): Text content to add
            frag (Fragment): Fragment object (creates new if None)
        """

    def pushFrag(self):
        """Push current fragment onto fragment stack."""

    def pullFrag(self):
        """
        Pull fragment from fragment stack.
        
        Returns:
            Fragment: Fragment from stack (None if empty)
        """

    # Logging and Error Handling
    def warning(self, msg, *args):
        """
        Log warning message.
        
        Args:
            msg (str): Warning message format string
            *args: Message formatting arguments
        """

    def error(self, msg, *args):
        """
        Log error message and increment error count.
        
        Args:
            msg (str): Error message format string
            *args: Message formatting arguments
        """

    # Resource Management
    def getFile(self, name, relative=None):
        """
        Get file resource with path resolution.
        
        Args:
            name (str): File name or path
            relative (str): Relative path base
        
        Returns:
            pisaFileObject: File object for resource
        """

    # Font Management
    def getFontName(self, names, default="helvetica"):
        """
        Get available font name from font list.
        
        Args:
            names (str or list): Font name(s) to look for
            default (str): Default font if none found
        
        Returns:
            str: Available font name
        """

    def registerFont(self, fontname, alias=None):
        """
        Register font with ReportLab.
        
        Args:
            fontname (str): Font file name or registered font name
            alias (str): Alias name for font (optional)
        """

    def loadFont(self, names, src, encoding="WinAnsiEncoding", bold=0, italic=0):
        """
        Load font from file and register with context.
        
        Args:
            names (str or list): Font name(s) to register
            src (str): Font file path
            encoding (str): Font encoding
            bold (int): Bold weight (0=normal, 1=bold)
            italic (int): Italic style (0=normal, 1=italic)
        """

    # Direction and Text Management
    def setDir(self, direction):
        """
        Set text direction for document.
        
        Args:
            direction (str): Text direction ('ltr' or 'rtl')
        """

    def UID(self):
        """
        Generate unique identifier.
        
        Returns:
            int: Unique identifier number
        """

Usage Examples

Basic context setup:

from xhtml2pdf.context import pisaContext

# Create context with base path and debug enabled
context = pisaContext(path="/path/to/resources", debug=1)

# Add custom CSS
context.addCSS("""
@page { size: A4; margin: 2cm; }
body { font-family: Arial; color: #333; }
""")

# Parse CSS before processing
context.parseCSS()

Font management:

context = pisaContext()

# Register custom font
context.loadFont(
    names=["CustomFont", "custom"],
    src="/path/to/custom-font.ttf",
    encoding="UTF-8"
)

# Use font in CSS
context.addCSS("h1 { font-family: CustomFont; }")

Fragment manipulation:

context = pisaContext()

# Create and modify text fragments
frag = context.newFrag(fontName="Arial", fontSize=12, textColor="blue")
context.addFrag("Hello ", frag)

# Copy fragment with modifications
bold_frag = context.copyFrag(fontName="Arial-Bold")
context.addFrag("World!", bold_frag)

# Add paragraph to story
context.addPara()

CSS Builder

Specialized CSS builder for xhtml2pdf that handles PDF-specific CSS extensions and at-rules.

class pisaCSSBuilder:
    def atFontFace(self, declarations):
        """
        Process @font-face CSS at-rule.
        
        Args:
            declarations (list): CSS declarations from @font-face rule
        """

    def atPage(self):
        """
        Process @page CSS at-rule for page formatting.
        
        Returns:
            Page configuration object
        """

    def atFrame(self):
        """
        Process @frame CSS at-rule for PDF frame layout.
        
        Returns:
            Frame configuration object
        """

CSS Parser

CSS parser specialized for xhtml2pdf's needs, handling external CSS files and PDF-specific CSS extensions.

class pisaCSSParser:
    def parseExternal(self, cssResourceName):
        """
        Parse external CSS file and add to context.
        
        Args:
            cssResourceName (str): Path or URL to CSS file
        
        Returns:
            bool: True if parsing successful
        """

Specialized Text Handlers

Text handling classes for dynamic content like page numbers and page counts.

class PageNumberText:
    """Handler for dynamic page number insertion in PDF content."""

class PageCountText:  
    """Handler for dynamic page count insertion in PDF content."""

Fragment Management Utilities

Utility functions for working with text fragments and paragraph styles.

def clone(self, **kwargs):
    """
    Clone paragraph fragment with optional modifications.
    
    Args:
        **kwargs: Attributes to modify in cloned fragment
    
    Returns:
        Fragment: Cloned fragment object
    """

def getParaFrag(style):
    """
    Create paragraph fragment from style information.
    
    Args:
        style: Style object containing fragment properties
    
    Returns:
        Fragment: New paragraph fragment
    """

Path and Directory Utilities

Utility functions for path and directory name handling in the context system.

def getDirName(path):
    """
    Get directory name from file path.
    
    Args:
        path (str): File path
    
    Returns:
        str: Directory name
    """

def reverse_sentence(sentence):
    """
    Reverse sentence for right-to-left language support.
    
    Args:
        sentence (str): Input sentence
    
    Returns:
        str: Reversed sentence for RTL display
    """

Advanced Context Usage

Memory Management

Context capacity controls memory usage during processing:

# Unlimited memory (default)
context = pisaContext(capacity=-1)

# Limited memory for large documents
context = pisaContext(capacity=2*1024*1024)  # 2MB limit

Error and Warning Handling

context = pisaContext(debug=1)

# Check for errors and warnings during processing
if context.err > 0:
    print(f"Errors: {context.err}")
    
if context.warn > 0:
    print(f"Warnings: {context.warn}")

# Access detailed log
for entry in context.log:
    print(entry)

CSS Processing Pipeline

context = pisaContext()

# Add CSS in order of precedence
context.addDefaultCSS("""
/* Default styles */
body { margin: 0; padding: 0; }
""")

context.addCSS("""
/* Document styles */  
body { font-family: Arial; }
h1 { color: blue; }
""")

# Parse all CSS at once
context.parseCSS()

Resource Path Resolution

context = pisaContext(path="/base/path")

# Files will be resolved relative to /base/path
file_obj = context.getFile("images/logo.png")  # -> /base/path/images/logo.png
file_obj = context.getFile("/absolute/path.png")  # -> /absolute/path.png

Types

class pisaContext:
    """
    Main processing context for HTML-to-PDF conversion.
    
    Attributes:
        err (int): Number of errors encountered
        warn (int): Number of warnings generated
        log (list): Processing log messages
        cssText (str): Accumulated CSS content
        cssParser: CSS parser instance
        fontList (list): Available font names
        path (str): Base path for resource resolution
        story (list): ReportLab story elements
        fragList (list): Current paragraph fragments
        fragStack (list): Fragment stack for nested elements
        debug (int): Debug level (0-2)
        capacity (int): Memory capacity for temp files
    """

class Fragment:
    """
    Text fragment with styling information.
    
    Attributes:
        fontName (str): Font family name
        fontSize (int): Font size in points
        textColor: Text color specification
        fontWeight (str): Font weight ('normal', 'bold')
        fontStyle (str): Font style ('normal', 'italic')
        textDecoration (str): Text decoration ('none', 'underline')
    """

Install with Tessl CLI