tessl/pypi-pelican

Static site generator supporting Markdown and reStructuredText

—

Pending

Overview

Eval results

Files

Utilities

Name: tessl/pypi-pelican
Author: tessl

Helper functions and classes for content processing, URL handling, caching, date formatting, and file operations. These utilities support common tasks throughout the Pelican generation pipeline.

Capabilities

Text Processing

Functions for processing and formatting text content, including slug generation and HTML manipulation.

def slugify(
    value: str,
    regex_subs: Iterable[tuple[str, str]] = (),
    preserve_case: bool = False,
    use_unicode: bool = False,
) -> str:
    """
    Convert text to URL-safe slug.
    
    Parameters:
    - value (str): Text to convert to slug
    - regex_subs (Iterable[tuple[str, str]], optional): Regex substitution pairs
    - preserve_case (bool, optional): Whether to preserve original case
    - use_unicode (bool, optional): Whether to allow unicode characters
    
    Returns:
    str: URL-safe slug with lowercase letters, numbers, and hyphens
    """

def truncate_html_words(s: str, num: int, end_text: str = "…") -> str:
    """
    Truncate HTML content to specified number of words.
    
    Parameters:
    - s (str): HTML content to truncate
    - num (int): Maximum number of words
    - end_text (str, optional): Text to append when truncated
    
    Returns:
    str: Truncated HTML with proper tag closure
    """

def truncate_html_paragraphs(s: str, num: int) -> str:
    """
    Truncate HTML content to specified number of paragraphs.
    
    Parameters:
    - s (str): HTML content to truncate
    - num (int): Maximum number of paragraphs
    
    Returns:
    str: Truncated HTML with complete paragraphs
    """

File Operations

Functions for file system operations, directory management, and file copying with metadata preservation.

def clean_output_dir(path: str, retention: list = None) -> None:
    """
    Clean output directory while preserving specified files.
    
    Parameters:
    - path (str): Output directory path to clean
    - retention (list, optional): List of files/patterns to preserve
    """

def copy(source: str, destination: str, ignores: list = None) -> None:
    """
    Copy files with metadata preservation and optional ignore patterns.
    
    Parameters:
    - source (str): Source file or directory path
    - destination (str): Destination file or directory path  
    - ignores (list, optional): List of patterns to ignore during copy
    """

def mkdir_p(path: str) -> None:
    """
    Create directory and parent directories as needed (like mkdir -p).
    
    Parameters:
    - path (str): Directory path to create
    """

URL and Path Utilities

Functions for URL generation, path manipulation, and cross-platform path handling.

def path_to_url(path: str) -> str:
    """
    Convert file system path to URL path.
    
    Parameters:
    - path (str): File system path
    
    Returns:
    str: URL-formatted path with forward slashes
    """

def posixize_path(path: str) -> str:
    """
    Convert path to POSIX format (forward slashes).
    
    Parameters:
    - path (str): File system path
    
    Returns:
    str: POSIX-formatted path
    """

def sanitised_join(base: str, *args: str) -> str:
    """
    Safely join path components preventing directory traversal.
    
    Parameters:
    - base (str): Base directory path
    - *args (str): Path components to join
    
    Returns:
    str: Safely joined path within base directory
    """

Date and Time Utilities

Classes and functions for date processing, timezone handling, and date formatting.

class SafeDatetime(datetime.datetime):
    """
    Extended datetime class with safe operations and string representation.
    
    Provides additional safety checks and consistent formatting for
    date operations in content processing.
    """

class DateFormatter:
    """
    Date formatting utility for Jinja2 templates.
    
    Provides strftime formatting with locale support and timezone awareness.
    """
    def __init__(self, locale: str = None): ...
    
    def __call__(self, date, format_string: str) -> str:
        """
        Format date using strftime with locale support.
        
        Parameters:
        - date: Date object to format
        - format_string (str): strftime format string
        
        Returns:
        str: Formatted date string
        """

def set_date_tzinfo(date, default_tz) -> datetime.datetime:
    """
    Set timezone info on date object with fallback to default timezone.
    
    Parameters:
    - date: Date object (may be naive or timezone-aware)
    - default_tz: Default timezone to apply if date is naive
    
    Returns:
    datetime.datetime: Timezone-aware datetime object
    """

Content Organization

Functions for sorting, organizing, and processing content collections.

def order_content(content_list: list, order_by: str) -> list:
    """
    Sort content list by specified criteria.
    
    Parameters:
    - content_list (list): List of content objects to sort
    - order_by (str): Sort criteria ('date', 'title', 'basename', etc.)
    
    Returns:
    list: Sorted content list
    """

def process_translations(content_list: list, translation_id: str) -> None:
    """
    Process and link content translations.
    
    Parameters:
    - content_list (list): List of content objects
    - translation_id (str): Metadata field used for translation linking
    """

def maybe_pluralize(count: int, singular: str, plural: str = None) -> str:
    """
    Return singular or plural form based on count.
    
    Parameters:
    - count (int): Number to check for pluralization
    - singular (str): Singular form of word
    - plural (str, optional): Plural form (defaults to singular + 's')
    
    Returns:
    str: Formatted string with count and appropriate word form
    """

Caching Utilities

Classes for file-based caching to improve generation performance on large sites.

class FileDataCacher:
    """
    Base class for file-based data caching.
    
    Provides caching functionality to avoid reprocessing unchanged files.
    """
    def __init__(self, cache_path: str, cache_name: str = 'cache'): ...
    
    def get_cached_data(self, path: str, fallback: callable) -> Any:
        """
        Get cached data or compute using fallback function.
        
        Parameters:
        - path (str): File path for cache key
        - fallback (callable): Function to compute data if not cached
        
        Returns:
        Any: Cached or computed data
        """

class FileStampDataCacher(FileDataCacher):
    """
    File caching with timestamp-based invalidation.
    
    Extends FileDataCacher with file modification time checking
    for automatic cache invalidation.
    """
    
    def should_update_cache(self, path: str, cache_key: str) -> bool:
        """
        Check if cache should be updated based on file modification time.
        
        Parameters:
        - path (str): Source file path
        - cache_key (str): Cache entry key
        
        Returns:
        bool: True if cache needs updating
        """

Decorators and Helpers

Utility decorators and helper classes for common patterns.

class memoized:
    """
    Decorator for caching function results (memoization).
    
    Caches function return values based on arguments to avoid
    repeated expensive computations.
    """
    def __init__(self, func: callable): ...
    
    def __call__(self, *args, **kwargs): ...
    
    def cache_clear(self) -> None:
        """Clear memoization cache."""

def deprecated_attribute(old: str, new: str, since: tuple):
    """
    Decorator for marking class attributes as deprecated.
    
    Parameters:
    - old (str): Old attribute name
    - new (str): New attribute name  
    - since (tuple): Version tuple when deprecation started
    
    Returns:
    property: Property that issues deprecation warning
    """

File Monitoring

Utilities for monitoring file changes during development and auto-reload functionality.

class FileChangeFilter:
    """
    Filter for file change monitoring.
    
    Filters file system events to relevant changes for site regeneration.
    """
    def __init__(self, ignore_patterns: list = None): ...
    
    def should_process(self, path: str, event_type: str) -> bool:
        """
        Check if file change should trigger regeneration.
        
        Parameters:
        - path (str): Changed file path
        - event_type (str): Type of file system event
        
        Returns:
        bool: True if change should trigger regeneration
        """

def wait_for_changes(settings_path: str, settings: dict) -> list:
    """
    Wait for and detect file changes in content and theme directories.
    
    Parameters:
    - settings_path (str): Path to settings file
    - settings (dict): Site settings dictionary
    
    Returns:
    list: List of changed files with metadata
    """

Usage Examples

Text Processing Examples

from pelican.utils import slugify, truncate_html_words, truncate_html_paragraphs

# Generate URL-safe slugs
title = "My Article Title with Special Characters!"
slug = slugify(title)  # "my-article-title-with-special-characters"

# Custom substitutions
slug = slugify("C++ Programming", substitutions=(('C++', 'cpp'),))  # "cpp-programming"

# Truncate HTML content
html_content = "<p>First paragraph.</p><p>Second paragraph with more text.</p>"
short_text = truncate_html_words(html_content, 5)  # Truncates to 5 words

# Truncate by paragraphs
short_paragraphs = truncate_html_paragraphs(html_content, 1)  # First paragraph only

File Operations Examples

from pelican.utils import clean_output_dir, copy, mkdir_p

# Clean output directory but keep certain files
clean_output_dir('output', retention=['.git', 'CNAME', '*.pdf'])

# Copy files with ignore patterns
copy('content/images', 'output/images', ignores=['*.tmp', '.DS_Store'])

# Create directory structure
mkdir_p('output/assets/css')

URL and Path Examples

from pelican.utils import path_to_url, posixize_path, sanitised_join

# Convert file paths to URLs
file_path = 'content\\articles\\my-post.md'  # Windows path
url_path = path_to_url(file_path)  # 'content/articles/my-post.md'

# Ensure POSIX format
posix_path = posixize_path(file_path)  # 'content/articles/my-post.md'

# Safe path joining (prevents directory traversal)
safe_path = sanitised_join('/var/www', '../../../etc/passwd')  # '/var/www/etc/passwd'

Date Formatting Examples

from pelican.utils import DateFormatter, set_date_tzinfo, SafeDatetime
from datetime import datetime
import pytz

# Create date formatter
formatter = DateFormatter('en_US')

# Format dates in templates (used automatically by Pelican)
date = datetime.now()
formatted = formatter(date, '%B %d, %Y')  # "January 15, 2023"

# Handle timezone-naive dates
naive_date = datetime.now()
utc_tz = pytz.UTC
aware_date = set_date_tzinfo(naive_date, utc_tz)

# Safe datetime operations
safe_date = SafeDatetime.now()
print(safe_date)  # Safe string representation

Content Organization Examples

from pelican.utils import order_content, process_translations, maybe_pluralize

# Sort articles by different criteria
articles = [article1, article2, article3]
sorted_by_date = order_content(articles, 'date')
sorted_by_title = order_content(articles, 'title')
sorted_by_reverse_date = order_content(articles, 'reversed-date')

# Process content translations
all_content = articles + pages
process_translations(all_content, 'slug')

# Pluralization helper
article_count = len(articles)
message = maybe_pluralize(article_count, 'article', 'articles')
# "5 articles" or "1 article"

Caching Examples

from pelican.utils import FileStampDataCacher
import os

class ContentProcessor(FileStampDataCacher):
    """Content processor with caching."""
    
    def __init__(self, cache_path):
        super().__init__(cache_path, 'content_cache')
    
    def process_file(self, file_path):
        """Process file with caching."""
        def expensive_processing():
            # Expensive content processing
            with open(file_path, 'r') as f:
                content = f.read()
            # ... complex processing ...
            return processed_content
        
        return self.get_cached_data(file_path, expensive_processing)

# Usage
processor = ContentProcessor('cache')
result = processor.process_file('content/article.md')

Memoization Examples

from pelican.utils import memoized

@memoized
def expensive_computation(x, y):
    """Expensive function that benefits from caching."""
    print(f"Computing for {x}, {y}")  # Only prints on first call
    return x ** y + y ** x

# First call - computes and caches
result1 = expensive_computation(2, 3)  # Prints "Computing for 2, 3"

# Second call - uses cached result  
result2 = expensive_computation(2, 3)  # No print, returns cached result

# Clear cache if needed
expensive_computation.cache_clear()

File Monitoring Examples

from pelican.utils import wait_for_changes, FileChangeFilter

# Monitor for file changes
def auto_regenerate(settings_path, settings):
    """Auto-regeneration loop."""
    while True:
        try:
            # Wait for changes
            changed_files = wait_for_changes(settings_path, settings)
            
            if changed_files:
                print(f"Detected changes: {[f[1] for f in changed_files]}")
                # Trigger regeneration
                regenerate_site(settings)
                
        except KeyboardInterrupt:
            break

# File change filtering
filter_obj = FileChangeFilter(ignore_patterns=['*.tmp', '*.swp', '.git/*'])

def should_regenerate(file_path, event_type):
    """Check if file change should trigger regeneration."""
    return filter_obj.should_process(file_path, event_type)

Custom Utility Development

from pelican.utils import memoized
import re

class ContentEnhancer:
    """Custom utility for enhancing content."""
    
    @memoized
    def extract_headings(self, html_content):
        """Extract headings from HTML content with caching."""
        headings = []
        for match in re.finditer(r'<h([1-6])[^>]*>(.*?)</h\1>', html_content):
            level = int(match.group(1))
            title = re.sub(r'<[^>]+>', '', match.group(2))  # Strip HTML tags
            headings.append({'level': level, 'title': title})
        return headings
    
    def generate_toc(self, html_content):
        """Generate table of contents from headings."""
        headings = self.extract_headings(html_content)
        
        toc_html = '<ul class="toc">'
        for heading in headings:
            indent = '  ' * (heading['level'] - 1)
            toc_html += f'\n{indent}<li><a href="#{slugify(heading["title"])}">{heading["title"]}</a></li>'
        toc_html += '\n</ul>'
        
        return toc_html

# Usage in plugin or generator
enhancer = ContentEnhancer()

def add_toc_to_articles(generator):
    """Add table of contents to articles."""
    for article in generator.articles:
        if hasattr(article, 'content'):
            article.toc = enhancer.generate_toc(article.content)

Install with Tessl CLI