tessl/pypi-foliant

Modular, Markdown-based documentation generator that makes pdf, docx, html, and more.

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview

Eval results

Files

Preprocessor System

Name: tessl/pypi-foliant
Author: tessl

Foliant's preprocessor system provides content transformation capabilities for modifying Markdown before backend processing. Preprocessors use tag-based content processing to enable features like includes, diagram generation, conditional content, and custom transformations.

Capabilities

Base Preprocessor Class

Foundation class for all content preprocessors providing tag parsing, option handling, and common functionality.

class BasePreprocessor:
    """Base preprocessor class that all preprocessors must inherit from."""
    
    defaults: dict = {}
    tags: tuple = ()
    
    def __init__(self, context: dict, logger: Logger, quiet=False, debug=False, options={}):
        """
        Initialize preprocessor with build context and options.
        
        Parameters:
        - context (dict): Build context containing project_path, config, target, backend
        - logger (Logger): Logger instance for processing messages
        - quiet (bool): Suppress output messages
        - debug (bool): Enable debug logging
        - options (dict): Preprocessor-specific configuration options
        """
    
    @staticmethod
    def get_options(options_string: str) -> Dict[str, OptionValue]:
        """
        Parse XML attribute string into typed options dictionary.
        
        Parameters:
        - options_string (str): String of XML-style attributes
        
        Returns:
        Dict[str, OptionValue]: Parsed options with proper types
        
        Example:
        'width="800" height="600" visible="true"' ->
        {'width': 800, 'height': 600, 'visible': True}
        """
    
    def apply(self):
        """
        Run preprocessor against project content.
        Must be implemented by each preprocessor.
        
        Raises:
        NotImplementedError: If not implemented by subclass
        """

Unescape Preprocessor

Built-in preprocessor that handles escaped tag processing for nested tag scenarios.

class Preprocessor(BasePreprocessor):
    """
    Internal preprocessor for unescaping escaped tags.
    Removes leading < from escaped tag definitions.
    """
    
    def process_escaped_tags(self, content: str) -> str:
        """
        Remove escape sequences from tag definitions.
        
        Parameters:
        - content (str): Markdown content with escaped tags
        
        Returns:
        str: Content with tags unescaped
        """
    
    def apply(self):
        """Process all .md files in working directory to unescape tags."""

Type Definitions

OptionValue = int | float | bool | str

# Preprocessor context structure
PreprocessorContext = {
    'project_path': Path,    # Path to project directory
    'config': dict,          # Parsed configuration
    'target': str,           # Target format
    'backend': str           # Backend name
}

# Tag pattern structure for regex matching
TagPattern = {
    'tag': str,              # Tag name
    'options': str,          # Options string
    'body': str              # Tag content body
}

Usage Examples

Custom Preprocessor Implementation

from foliant.preprocessors.base import BasePreprocessor
import re

class CustomPreprocessor(BasePreprocessor):
    """Custom preprocessor for special content transformation."""
    
    defaults = {
        'format': 'html',
        'style': 'default'
    }
    tags = ('custom', 'transform')
    
    def apply(self):
        """Process all markdown files with custom tags."""
        for markdown_file in self.working_dir.rglob('*.md'):
            self.logger.debug(f'Processing {markdown_file}')
            
            with open(markdown_file, 'r', encoding='utf8') as f:
                content = f.read()
            
            # Process tags using inherited pattern
            content = self.pattern.sub(self._process_tag, content)
            
            with open(markdown_file, 'w', encoding='utf8') as f:
                f.write(content)
    
    def _process_tag(self, match):
        """Process individual tag occurrence."""
        tag = match.group('tag')
        options_str = match.group('options') or ''
        body = match.group('body')
        
        # Parse options
        options = self.get_options(options_str)
        final_options = {**self.defaults, **self.options, **options}
        
        # Transform content based on tag and options
        if tag == 'custom':
            return self._transform_custom(body, final_options)
        elif tag == 'transform':
            return self._transform_content(body, final_options)
        
        return match.group(0)  # Return unchanged if not handled
    
    def _transform_custom(self, content, options):
        """Transform custom tag content."""
        format_type = options['format']
        style = options['style']
        
        if format_type == 'html':
            return f'<div class="custom-{style}">{content}</div>'
        else:
            return f'[{style.upper()}]: {content}'
    
    def _transform_content(self, content, options):
        """Transform generic content."""
        return content.upper() if options.get('uppercase') else content

Tag-based Content Processing

Example Markdown with custom tags:

# My Document

<custom format="html" style="highlight">
Important content here
</custom>

<transform uppercase="true">
This text will be uppercase
</transform>

<custom style="callout">
This is a callout box
</custom>

Preprocessor usage:

from pathlib import Path
import logging

# Set up context
context = {
    'project_path': Path('./project'),
    'config': {'title': 'Test'},
    'target': 'html',
    'backend': 'mkdocs'
}

# Create and run preprocessor
preprocessor = CustomPreprocessor(
    context=context,
    logger=logging.getLogger(),
    options={'format': 'html', 'style': 'modern'}
)

preprocessor.apply()

Option Parsing

from foliant.preprocessors.base import BasePreprocessor

# Parse XML-style options
options_string = 'width="800" height="600" visible="true" title="My Chart"'
options = BasePreprocessor.get_options(options_string)

print(options)
# Output: {'width': 800, 'height': 600, 'visible': True, 'title': 'My Chart'}

# Handle empty options
empty_options = BasePreprocessor.get_options('')
print(empty_options)  # Output: {}

Complex Preprocessor with File Operations

from foliant.preprocessors.base import BasePreprocessor
import subprocess
from pathlib import Path

class DiagramPreprocessor(BasePreprocessor):
    """Preprocessor for generating diagrams from text."""
    
    defaults = {
        'format': 'png',
        'theme': 'default',
        'output_dir': 'images'
    }
    tags = ('plantuml', 'mermaid')
    
    def apply(self):
        """Process diagram tags in all markdown files."""
        # Create output directory
        output_dir = self.working_dir / self.options['output_dir']
        output_dir.mkdir(exist_ok=True)
        
        for markdown_file in self.working_dir.rglob('*.md'):
            content = self._process_file(markdown_file, output_dir)
            
            with open(markdown_file, 'w', encoding='utf8') as f:
                f.write(content)
    
    def _process_file(self, file_path, output_dir):
        """Process single markdown file."""
        with open(file_path, 'r', encoding='utf8') as f:
            content = f.read()
        
        return self.pattern.sub(
            lambda m: self._process_diagram(m, output_dir, file_path.stem),
            content
        )
    
    def _process_diagram(self, match, output_dir, file_stem):
        """Process individual diagram tag."""
        tag = match.group('tag')
        options_str = match.group('options') or ''
        body = match.group('body')
        
        options = {**self.defaults, **self.options, **self.get_options(options_str)}
        
        # Generate unique filename
        diagram_hash = hash(body + str(options))
        filename = f"{file_stem}_{tag}_{abs(diagram_hash)}.{options['format']}"
        output_path = output_dir / filename
        
        # Generate diagram
        if tag == 'plantuml':
            self._generate_plantuml(body, output_path, options)
        elif tag == 'mermaid':
            self._generate_mermaid(body, output_path, options)
        
        # Return markdown image reference
        return f"![Diagram]({output_path.relative_to(self.working_dir)})"
    
    def _generate_plantuml(self, source, output_path, options):
        """Generate PlantUML diagram."""
        subprocess.run([
            'plantuml', 
            '-t' + options['format'],
            '-o', str(output_path.parent),
            '-'
        ], input=source, text=True, check=True)
    
    def _generate_mermaid(self, source, output_path, options):
        """Generate Mermaid diagram."""
        subprocess.run([
            'mmdc',
            '-i', '-',
            '-o', str(output_path),
            '-t', options['theme']
        ], input=source, text=True, check=True)

Preprocessor Configuration

Example foliant.yml preprocessor configuration:

title: My Project

preprocessors:
  - includes
  - plantuml:
      format: svg
      theme: dark
      server_url: http://localhost:8080
  - custom:
      style: modern
      format: html
      uppercase: false

Conditional Preprocessor

class ConditionalPreprocessor(BasePreprocessor):
    """Preprocessor for conditional content inclusion."""
    
    defaults = {'target': 'all'}
    tags = ('if', 'unless', 'target')
    
    def apply(self):
        """Remove or keep content based on conditions."""
        current_target = self.context['target']
        
        for markdown_file in self.working_dir.rglob('*.md'):
            with open(markdown_file, 'r', encoding='utf8') as f:
                content = f.read()
            
            # Process conditional tags
            content = self._process_conditionals(content, current_target)
            
            with open(markdown_file, 'w', encoding='utf8') as f:
                f.write(content)
    
    def _process_conditionals(self, content, current_target):
        """Process conditional tags based on current build target."""
        def process_tag(match):
            tag = match.group('tag')
            options_str = match.group('options') or ''
            body = match.group('body')
            
            options = self.get_options(options_str)
            target_condition = options.get('target', 'all')
            
            if tag == 'if':
                # Include content if target matches
                if target_condition == 'all' or target_condition == current_target:
                    return body
                else:
                    return ''
            elif tag == 'unless':
                # Include content unless target matches
                if target_condition != current_target:
                    return body
                else:
                    return ''
            elif tag == 'target':
                # Include only for specific target
                if target_condition == current_target:
                    return body
                else:
                    return ''
            
            return match.group(0)
        
        return self.pattern.sub(process_tag, content)

Usage in Markdown:

# Documentation

<if target="html">
This content only appears in HTML builds.
</if>

<unless target="pdf">
This content appears in all formats except PDF.
</unless>

<target target="pdf">
PDF-specific content here.
</target>

Install with Tessl CLI