Static site generator supporting Markdown and reStructuredText
—
Helper functions and classes for content processing, URL handling, caching, date formatting, and file operations. These utilities support common tasks throughout the Pelican generation pipeline.
Functions for processing and formatting text content, including slug generation and HTML manipulation.
def slugify(
value: str,
regex_subs: Iterable[tuple[str, str]] = (),
preserve_case: bool = False,
use_unicode: bool = False,
) -> str:
"""
Convert text to URL-safe slug.
Parameters:
- value (str): Text to convert to slug
- regex_subs (Iterable[tuple[str, str]], optional): Regex substitution pairs
- preserve_case (bool, optional): Whether to preserve original case
- use_unicode (bool, optional): Whether to allow unicode characters
Returns:
str: URL-safe slug with lowercase letters, numbers, and hyphens
"""
def truncate_html_words(s: str, num: int, end_text: str = "…") -> str:
"""
Truncate HTML content to specified number of words.
Parameters:
- s (str): HTML content to truncate
- num (int): Maximum number of words
- end_text (str, optional): Text to append when truncated
Returns:
str: Truncated HTML with proper tag closure
"""
def truncate_html_paragraphs(s: str, num: int) -> str:
"""
Truncate HTML content to specified number of paragraphs.
Parameters:
- s (str): HTML content to truncate
- num (int): Maximum number of paragraphs
Returns:
str: Truncated HTML with complete paragraphs
"""Functions for file system operations, directory management, and file copying with metadata preservation.
def clean_output_dir(path: str, retention: list = None) -> None:
"""
Clean output directory while preserving specified files.
Parameters:
- path (str): Output directory path to clean
- retention (list, optional): List of files/patterns to preserve
"""
def copy(source: str, destination: str, ignores: list = None) -> None:
"""
Copy files with metadata preservation and optional ignore patterns.
Parameters:
- source (str): Source file or directory path
- destination (str): Destination file or directory path
- ignores (list, optional): List of patterns to ignore during copy
"""
def mkdir_p(path: str) -> None:
"""
Create directory and parent directories as needed (like mkdir -p).
Parameters:
- path (str): Directory path to create
"""Functions for URL generation, path manipulation, and cross-platform path handling.
def path_to_url(path: str) -> str:
"""
Convert file system path to URL path.
Parameters:
- path (str): File system path
Returns:
str: URL-formatted path with forward slashes
"""
def posixize_path(path: str) -> str:
"""
Convert path to POSIX format (forward slashes).
Parameters:
- path (str): File system path
Returns:
str: POSIX-formatted path
"""
def sanitised_join(base: str, *args: str) -> str:
"""
Safely join path components preventing directory traversal.
Parameters:
- base (str): Base directory path
- *args (str): Path components to join
Returns:
str: Safely joined path within base directory
"""Classes and functions for date processing, timezone handling, and date formatting.
class SafeDatetime(datetime.datetime):
"""
Extended datetime class with safe operations and string representation.
Provides additional safety checks and consistent formatting for
date operations in content processing.
"""
class DateFormatter:
"""
Date formatting utility for Jinja2 templates.
Provides strftime formatting with locale support and timezone awareness.
"""
def __init__(self, locale: str = None): ...
def __call__(self, date, format_string: str) -> str:
"""
Format date using strftime with locale support.
Parameters:
- date: Date object to format
- format_string (str): strftime format string
Returns:
str: Formatted date string
"""
def set_date_tzinfo(date, default_tz) -> datetime.datetime:
"""
Set timezone info on date object with fallback to default timezone.
Parameters:
- date: Date object (may be naive or timezone-aware)
- default_tz: Default timezone to apply if date is naive
Returns:
datetime.datetime: Timezone-aware datetime object
"""Functions for sorting, organizing, and processing content collections.
def order_content(content_list: list, order_by: str) -> list:
"""
Sort content list by specified criteria.
Parameters:
- content_list (list): List of content objects to sort
- order_by (str): Sort criteria ('date', 'title', 'basename', etc.)
Returns:
list: Sorted content list
"""
def process_translations(content_list: list, translation_id: str) -> None:
"""
Process and link content translations.
Parameters:
- content_list (list): List of content objects
- translation_id (str): Metadata field used for translation linking
"""
def maybe_pluralize(count: int, singular: str, plural: str = None) -> str:
"""
Return singular or plural form based on count.
Parameters:
- count (int): Number to check for pluralization
- singular (str): Singular form of word
- plural (str, optional): Plural form (defaults to singular + 's')
Returns:
str: Formatted string with count and appropriate word form
"""Classes for file-based caching to improve generation performance on large sites.
class FileDataCacher:
"""
Base class for file-based data caching.
Provides caching functionality to avoid reprocessing unchanged files.
"""
def __init__(self, cache_path: str, cache_name: str = 'cache'): ...
def get_cached_data(self, path: str, fallback: callable) -> Any:
"""
Get cached data or compute using fallback function.
Parameters:
- path (str): File path for cache key
- fallback (callable): Function to compute data if not cached
Returns:
Any: Cached or computed data
"""
class FileStampDataCacher(FileDataCacher):
"""
File caching with timestamp-based invalidation.
Extends FileDataCacher with file modification time checking
for automatic cache invalidation.
"""
def should_update_cache(self, path: str, cache_key: str) -> bool:
"""
Check if cache should be updated based on file modification time.
Parameters:
- path (str): Source file path
- cache_key (str): Cache entry key
Returns:
bool: True if cache needs updating
"""Utility decorators and helper classes for common patterns.
class memoized:
"""
Decorator for caching function results (memoization).
Caches function return values based on arguments to avoid
repeated expensive computations.
"""
def __init__(self, func: callable): ...
def __call__(self, *args, **kwargs): ...
def cache_clear(self) -> None:
"""Clear memoization cache."""
def deprecated_attribute(old: str, new: str, since: tuple):
"""
Decorator for marking class attributes as deprecated.
Parameters:
- old (str): Old attribute name
- new (str): New attribute name
- since (tuple): Version tuple when deprecation started
Returns:
property: Property that issues deprecation warning
"""Utilities for monitoring file changes during development and auto-reload functionality.
class FileChangeFilter:
"""
Filter for file change monitoring.
Filters file system events to relevant changes for site regeneration.
"""
def __init__(self, ignore_patterns: list = None): ...
def should_process(self, path: str, event_type: str) -> bool:
"""
Check if file change should trigger regeneration.
Parameters:
- path (str): Changed file path
- event_type (str): Type of file system event
Returns:
bool: True if change should trigger regeneration
"""
def wait_for_changes(settings_path: str, settings: dict) -> list:
"""
Wait for and detect file changes in content and theme directories.
Parameters:
- settings_path (str): Path to settings file
- settings (dict): Site settings dictionary
Returns:
list: List of changed files with metadata
"""from pelican.utils import slugify, truncate_html_words, truncate_html_paragraphs
# Generate URL-safe slugs
title = "My Article Title with Special Characters!"
slug = slugify(title) # "my-article-title-with-special-characters"
# Custom substitutions
slug = slugify("C++ Programming", substitutions=(('C++', 'cpp'),)) # "cpp-programming"
# Truncate HTML content
html_content = "<p>First paragraph.</p><p>Second paragraph with more text.</p>"
short_text = truncate_html_words(html_content, 5) # Truncates to 5 words
# Truncate by paragraphs
short_paragraphs = truncate_html_paragraphs(html_content, 1) # First paragraph onlyfrom pelican.utils import clean_output_dir, copy, mkdir_p
# Clean output directory but keep certain files
clean_output_dir('output', retention=['.git', 'CNAME', '*.pdf'])
# Copy files with ignore patterns
copy('content/images', 'output/images', ignores=['*.tmp', '.DS_Store'])
# Create directory structure
mkdir_p('output/assets/css')from pelican.utils import path_to_url, posixize_path, sanitised_join
# Convert file paths to URLs
file_path = 'content\\articles\\my-post.md' # Windows path
url_path = path_to_url(file_path) # 'content/articles/my-post.md'
# Ensure POSIX format
posix_path = posixize_path(file_path) # 'content/articles/my-post.md'
# Safe path joining (prevents directory traversal)
safe_path = sanitised_join('/var/www', '../../../etc/passwd') # '/var/www/etc/passwd'from pelican.utils import DateFormatter, set_date_tzinfo, SafeDatetime
from datetime import datetime
import pytz
# Create date formatter
formatter = DateFormatter('en_US')
# Format dates in templates (used automatically by Pelican)
date = datetime.now()
formatted = formatter(date, '%B %d, %Y') # "January 15, 2023"
# Handle timezone-naive dates
naive_date = datetime.now()
utc_tz = pytz.UTC
aware_date = set_date_tzinfo(naive_date, utc_tz)
# Safe datetime operations
safe_date = SafeDatetime.now()
print(safe_date) # Safe string representationfrom pelican.utils import order_content, process_translations, maybe_pluralize
# Sort articles by different criteria
articles = [article1, article2, article3]
sorted_by_date = order_content(articles, 'date')
sorted_by_title = order_content(articles, 'title')
sorted_by_reverse_date = order_content(articles, 'reversed-date')
# Process content translations
all_content = articles + pages
process_translations(all_content, 'slug')
# Pluralization helper
article_count = len(articles)
message = maybe_pluralize(article_count, 'article', 'articles')
# "5 articles" or "1 article"from pelican.utils import FileStampDataCacher
import os
class ContentProcessor(FileStampDataCacher):
"""Content processor with caching."""
def __init__(self, cache_path):
super().__init__(cache_path, 'content_cache')
def process_file(self, file_path):
"""Process file with caching."""
def expensive_processing():
# Expensive content processing
with open(file_path, 'r') as f:
content = f.read()
# ... complex processing ...
return processed_content
return self.get_cached_data(file_path, expensive_processing)
# Usage
processor = ContentProcessor('cache')
result = processor.process_file('content/article.md')from pelican.utils import memoized
@memoized
def expensive_computation(x, y):
"""Expensive function that benefits from caching."""
print(f"Computing for {x}, {y}") # Only prints on first call
return x ** y + y ** x
# First call - computes and caches
result1 = expensive_computation(2, 3) # Prints "Computing for 2, 3"
# Second call - uses cached result
result2 = expensive_computation(2, 3) # No print, returns cached result
# Clear cache if needed
expensive_computation.cache_clear()from pelican.utils import wait_for_changes, FileChangeFilter
# Monitor for file changes
def auto_regenerate(settings_path, settings):
"""Auto-regeneration loop."""
while True:
try:
# Wait for changes
changed_files = wait_for_changes(settings_path, settings)
if changed_files:
print(f"Detected changes: {[f[1] for f in changed_files]}")
# Trigger regeneration
regenerate_site(settings)
except KeyboardInterrupt:
break
# File change filtering
filter_obj = FileChangeFilter(ignore_patterns=['*.tmp', '*.swp', '.git/*'])
def should_regenerate(file_path, event_type):
"""Check if file change should trigger regeneration."""
return filter_obj.should_process(file_path, event_type)from pelican.utils import memoized
import re
class ContentEnhancer:
"""Custom utility for enhancing content."""
@memoized
def extract_headings(self, html_content):
"""Extract headings from HTML content with caching."""
headings = []
for match in re.finditer(r'<h([1-6])[^>]*>(.*?)</h\1>', html_content):
level = int(match.group(1))
title = re.sub(r'<[^>]+>', '', match.group(2)) # Strip HTML tags
headings.append({'level': level, 'title': title})
return headings
def generate_toc(self, html_content):
"""Generate table of contents from headings."""
headings = self.extract_headings(html_content)
toc_html = '<ul class="toc">'
for heading in headings:
indent = ' ' * (heading['level'] - 1)
toc_html += f'\n{indent}<li><a href="#{slugify(heading["title"])}">{heading["title"]}</a></li>'
toc_html += '\n</ul>'
return toc_html
# Usage in plugin or generator
enhancer = ContentEnhancer()
def add_toc_to_articles(generator):
"""Add table of contents to articles."""
for article in generator.articles:
if hasattr(article, 'content'):
article.toc = enhancer.generate_toc(article.content)Install with Tessl CLI
npx tessl i tessl/pypi-pelican