PDF generator using HTML and CSS
Comprehensive file and resource management system for handling various types of content sources including local files, URLs, data URIs, and byte streams. The file handling system provides unified access to resources with automatic MIME type detection, caching, and path resolution.
Main function for getting file objects from various sources with automatic type detection and path resolution.
def getFile(*a, **kw):
"""
Get file object from various sources (paths, URLs, data URIs).
Args:
*a: Positional arguments passed to pisaFileObject
**kw: Keyword arguments passed to pisaFileObject
Returns:
pisaFileObject: Unified file object for resource access
"""Unified file object that handles different types of URI sources with consistent interface for content access and MIME type detection.
class pisaFileObject:
def __init__(self, uri, basepath=None, callback=None):
"""
Initialize file object for various URI types.
Args:
uri (str): File URI - can be:
- Local file path: "/path/to/file.jpg"
- HTTP/HTTPS URL: "https://example.com/image.png"
- Data URI: "data:image/png;base64,iVBORw0KGgo..."
- File URI: "file:///path/to/file.css"
basepath (str): Base path for resolving relative paths
callback (callable): Custom URI resolution callback
Signature: callback(uri, rel) -> resolved_uri
"""
def getFileContent(self):
"""
Get raw file content as bytes.
Returns:
bytes: Raw file content
Raises:
IOError: If file cannot be accessed
urllib.error.URLError: If URL cannot be fetched
"""
def getNamedFile(self):
"""
Get named file object for the resource.
Returns:
file-like object: Named file object with read() method
"""
def getData(self):
"""
Get file data with potential processing.
Returns:
bytes or str: Processed file data
"""
def getFile(self):
"""
Get file-like object for reading.
Returns:
file-like object: Object with read(), seek(), tell() methods
"""
def getMimeType(self):
"""
Get MIME type of the file content.
Returns:
str: MIME type (e.g., 'text/css', 'image/png', 'text/html')
"""
def notFound(self):
"""
Handle file not found cases.
Returns:
bool: True if file was not found
"""
def getAbsPath(self):
"""
Get absolute path for the file.
Returns:
str: Absolute file path (empty string for non-file URIs)
"""
def getBytesIO(self):
"""
Get BytesIO object containing file content.
Returns:
io.BytesIO: BytesIO object with file content
"""Load local file:
from xhtml2pdf.files import pisaFileObject
# Load local CSS file
css_file = pisaFileObject("/path/to/styles.css")
content = css_file.getFileContent().decode('utf-8')
mime_type = css_file.getMimeType() # 'text/css'Load from URL:
# Load image from URL
img_file = pisaFileObject("https://example.com/logo.png")
if not img_file.notFound():
image_data = img_file.getFileContent()
mime_type = img_file.getMimeType() # 'image/png'Load data URI:
# Load embedded data
data_uri = "data:text/css;base64,Ym9keSB7IGZvbnQtZmFtaWx5OiBBcmlhbDsgfQ=="
css_file = pisaFileObject(data_uri)
content = css_file.getFileContent().decode('utf-8') # "body { font-family: Arial; }"Custom callback for path resolution:
def resolve_path(uri, rel):
"""Custom resolution for application-specific paths."""
if uri.startswith('app://'):
return '/app/assets/' + uri[6:] # Convert app:// to local path
return uri
file_obj = pisaFileObject("app://images/logo.png", callback=resolve_path)Temporary file handler for managing intermediate files during PDF generation with automatic cleanup and memory management.
class pisaTempFile:
def __init__(self, buffer="", capacity=CAPACITY):
"""
Initialize temporary file for PDF generation.
Args:
buffer (str): Initial buffer content
capacity (int): Maximum memory capacity before switching to disk
"""
def makeTempFile(self):
"""
Create actual temporary file on disk.
Returns:
file object: Temporary file object
"""
def getFileName(self):
"""
Get temporary file name.
Returns:
str: Temporary file path
"""
def fileno(self):
"""
Get file descriptor number.
Returns:
int: File descriptor
"""
def getvalue(self):
"""
Get current file content as bytes.
Returns:
bytes: File content
"""
def write(self, value):
"""
Write data to temporary file.
Args:
value (str or bytes): Data to write
"""Base classes and specialized handlers for different types of file sources.
class BaseFile:
def __init__(self, path, basepath):
"""
Base class for file handlers.
Args:
path (str): File path or URI
basepath (str): Base path for resolution
"""
class B64InlineURI(BaseFile):
"""Handler for base64-encoded data URIs."""
class LocalProtocolURI(BaseFile):
"""Handler for local protocol URIs (file://)."""
class NetworkFileUri(BaseFile):
"""Handler for network URIs (http://, https://)."""
class LocalFileURI(BaseFile):
"""Handler for local file system paths."""
class BytesFileUri(BaseFile):
"""Handler for byte stream content."""
class LocalTmpFile(BaseFile):
"""Handler for local temporary files."""Network manager and temporary file system for handling downloads and caching.
class FileNetworkManager:
"""Manager for network file operations and caching."""
class TmpFiles(threading.local):
"""Thread-local temporary files manager with automatic cleanup."""Utility functions for cleaning up temporary files and resources.
def cleanFiles():
"""
Clean up temporary files created during processing.
This function should be called after PDF generation is complete
to free up disk space and system resources.
"""from xhtml2pdf.files import cleanFiles
from xhtml2pdf import pisa
try:
# Process multiple documents
for html_file in html_files:
with open(html_file) as source:
with open(f"{html_file}.pdf", "wb") as dest:
pisa.pisaDocument(source, dest)
finally:
# Clean up all temporary files
cleanFiles()The file handling system automatically detects and processes various file types:
text/html, text/css, text/plain, text/xmlimage/png, image/jpeg, image/gif, image/bmp, image/svg+xmlfont/ttf, font/otf, application/font-woff, font/woff2application/pdf, application/octet-streamThe system supports various path formats:
# Absolute paths
file_obj = pisaFileObject("/absolute/path/to/file.css")
# Relative paths (with basepath)
file_obj = pisaFileObject("styles/main.css", basepath="/project/assets")
# URLs
file_obj = pisaFileObject("https://cdn.example.com/font.ttf")
# Data URIs
file_obj = pisaFileObject("data:text/css;charset=utf-8,body{margin:0}")
# File URIs
file_obj = pisaFileObject("file:///local/path/image.png")File operations include comprehensive error handling:
from xhtml2pdf.files import pisaFileObject
file_obj = pisaFileObject("https://example.com/missing.png")
if file_obj.notFound():
print("File not found, using fallback")
# Handle missing file case
else:
try:
content = file_obj.getFileContent()
# Process file content
except (IOError, urllib.error.URLError) as e:
print(f"Error loading file: {e}")
# Handle network or I/O errorsThe file system implements automatic caching for network resources:
Temporary files switch between memory and disk based on size:
# Small files stay in memory (default capacity)
temp_file = pisaTempFile(capacity=64*1024) # 64KB threshold
# Large files use disk immediately
temp_file = pisaTempFile(capacity=1024) # 1KB thresholdclass pisaFileObject:
"""
Unified file object for various URI types.
Attributes:
uri (str): Original URI string
basepath (str): Base path for resolution
callback (callable): Custom resolution callback
Handles local files, URLs, data URIs, and byte streams
with automatic MIME type detection and content processing.
"""
class pisaTempFile:
"""
Temporary file handler for PDF generation.
Attributes:
capacity (int): Memory capacity threshold
buffer (str): Current buffer content
Manages temporary storage during conversion process
with automatic cleanup and memory management.
"""Install with Tessl CLI
npx tessl i tessl/pypi-xhtml2pdf