Modular, Markdown-based documentation generator that makes pdf, docx, html, and more.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Foliant's preprocessor system provides content transformation capabilities for modifying Markdown before backend processing. Preprocessors use tag-based content processing to enable features like includes, diagram generation, conditional content, and custom transformations.
Foundation class for all content preprocessors providing tag parsing, option handling, and common functionality.
class BasePreprocessor:
"""Base preprocessor class that all preprocessors must inherit from."""
defaults: dict = {}
tags: tuple = ()
def __init__(self, context: dict, logger: Logger, quiet=False, debug=False, options={}):
"""
Initialize preprocessor with build context and options.
Parameters:
- context (dict): Build context containing project_path, config, target, backend
- logger (Logger): Logger instance for processing messages
- quiet (bool): Suppress output messages
- debug (bool): Enable debug logging
- options (dict): Preprocessor-specific configuration options
"""
@staticmethod
def get_options(options_string: str) -> Dict[str, OptionValue]:
"""
Parse XML attribute string into typed options dictionary.
Parameters:
- options_string (str): String of XML-style attributes
Returns:
Dict[str, OptionValue]: Parsed options with proper types
Example:
'width="800" height="600" visible="true"' ->
{'width': 800, 'height': 600, 'visible': True}
"""
def apply(self):
"""
Run preprocessor against project content.
Must be implemented by each preprocessor.
Raises:
NotImplementedError: If not implemented by subclass
"""Built-in preprocessor that handles escaped tag processing for nested tag scenarios.
class Preprocessor(BasePreprocessor):
"""
Internal preprocessor for unescaping escaped tags.
Removes leading < from escaped tag definitions.
"""
def process_escaped_tags(self, content: str) -> str:
"""
Remove escape sequences from tag definitions.
Parameters:
- content (str): Markdown content with escaped tags
Returns:
str: Content with tags unescaped
"""
def apply(self):
"""Process all .md files in working directory to unescape tags."""OptionValue = int | float | bool | str
# Preprocessor context structure
PreprocessorContext = {
'project_path': Path, # Path to project directory
'config': dict, # Parsed configuration
'target': str, # Target format
'backend': str # Backend name
}
# Tag pattern structure for regex matching
TagPattern = {
'tag': str, # Tag name
'options': str, # Options string
'body': str # Tag content body
}from foliant.preprocessors.base import BasePreprocessor
import re
class CustomPreprocessor(BasePreprocessor):
"""Custom preprocessor for special content transformation."""
defaults = {
'format': 'html',
'style': 'default'
}
tags = ('custom', 'transform')
def apply(self):
"""Process all markdown files with custom tags."""
for markdown_file in self.working_dir.rglob('*.md'):
self.logger.debug(f'Processing {markdown_file}')
with open(markdown_file, 'r', encoding='utf8') as f:
content = f.read()
# Process tags using inherited pattern
content = self.pattern.sub(self._process_tag, content)
with open(markdown_file, 'w', encoding='utf8') as f:
f.write(content)
def _process_tag(self, match):
"""Process individual tag occurrence."""
tag = match.group('tag')
options_str = match.group('options') or ''
body = match.group('body')
# Parse options
options = self.get_options(options_str)
final_options = {**self.defaults, **self.options, **options}
# Transform content based on tag and options
if tag == 'custom':
return self._transform_custom(body, final_options)
elif tag == 'transform':
return self._transform_content(body, final_options)
return match.group(0) # Return unchanged if not handled
def _transform_custom(self, content, options):
"""Transform custom tag content."""
format_type = options['format']
style = options['style']
if format_type == 'html':
return f'<div class="custom-{style}">{content}</div>'
else:
return f'[{style.upper()}]: {content}'
def _transform_content(self, content, options):
"""Transform generic content."""
return content.upper() if options.get('uppercase') else contentExample Markdown with custom tags:
# My Document
<custom format="html" style="highlight">
Important content here
</custom>
<transform uppercase="true">
This text will be uppercase
</transform>
<custom style="callout">
This is a callout box
</custom>Preprocessor usage:
from pathlib import Path
import logging
# Set up context
context = {
'project_path': Path('./project'),
'config': {'title': 'Test'},
'target': 'html',
'backend': 'mkdocs'
}
# Create and run preprocessor
preprocessor = CustomPreprocessor(
context=context,
logger=logging.getLogger(),
options={'format': 'html', 'style': 'modern'}
)
preprocessor.apply()from foliant.preprocessors.base import BasePreprocessor
# Parse XML-style options
options_string = 'width="800" height="600" visible="true" title="My Chart"'
options = BasePreprocessor.get_options(options_string)
print(options)
# Output: {'width': 800, 'height': 600, 'visible': True, 'title': 'My Chart'}
# Handle empty options
empty_options = BasePreprocessor.get_options('')
print(empty_options) # Output: {}from foliant.preprocessors.base import BasePreprocessor
import subprocess
from pathlib import Path
class DiagramPreprocessor(BasePreprocessor):
"""Preprocessor for generating diagrams from text."""
defaults = {
'format': 'png',
'theme': 'default',
'output_dir': 'images'
}
tags = ('plantuml', 'mermaid')
def apply(self):
"""Process diagram tags in all markdown files."""
# Create output directory
output_dir = self.working_dir / self.options['output_dir']
output_dir.mkdir(exist_ok=True)
for markdown_file in self.working_dir.rglob('*.md'):
content = self._process_file(markdown_file, output_dir)
with open(markdown_file, 'w', encoding='utf8') as f:
f.write(content)
def _process_file(self, file_path, output_dir):
"""Process single markdown file."""
with open(file_path, 'r', encoding='utf8') as f:
content = f.read()
return self.pattern.sub(
lambda m: self._process_diagram(m, output_dir, file_path.stem),
content
)
def _process_diagram(self, match, output_dir, file_stem):
"""Process individual diagram tag."""
tag = match.group('tag')
options_str = match.group('options') or ''
body = match.group('body')
options = {**self.defaults, **self.options, **self.get_options(options_str)}
# Generate unique filename
diagram_hash = hash(body + str(options))
filename = f"{file_stem}_{tag}_{abs(diagram_hash)}.{options['format']}"
output_path = output_dir / filename
# Generate diagram
if tag == 'plantuml':
self._generate_plantuml(body, output_path, options)
elif tag == 'mermaid':
self._generate_mermaid(body, output_path, options)
# Return markdown image reference
return f"})"
def _generate_plantuml(self, source, output_path, options):
"""Generate PlantUML diagram."""
subprocess.run([
'plantuml',
'-t' + options['format'],
'-o', str(output_path.parent),
'-'
], input=source, text=True, check=True)
def _generate_mermaid(self, source, output_path, options):
"""Generate Mermaid diagram."""
subprocess.run([
'mmdc',
'-i', '-',
'-o', str(output_path),
'-t', options['theme']
], input=source, text=True, check=True)Example foliant.yml preprocessor configuration:
title: My Project
preprocessors:
- includes
- plantuml:
format: svg
theme: dark
server_url: http://localhost:8080
- custom:
style: modern
format: html
uppercase: falseclass ConditionalPreprocessor(BasePreprocessor):
"""Preprocessor for conditional content inclusion."""
defaults = {'target': 'all'}
tags = ('if', 'unless', 'target')
def apply(self):
"""Remove or keep content based on conditions."""
current_target = self.context['target']
for markdown_file in self.working_dir.rglob('*.md'):
with open(markdown_file, 'r', encoding='utf8') as f:
content = f.read()
# Process conditional tags
content = self._process_conditionals(content, current_target)
with open(markdown_file, 'w', encoding='utf8') as f:
f.write(content)
def _process_conditionals(self, content, current_target):
"""Process conditional tags based on current build target."""
def process_tag(match):
tag = match.group('tag')
options_str = match.group('options') or ''
body = match.group('body')
options = self.get_options(options_str)
target_condition = options.get('target', 'all')
if tag == 'if':
# Include content if target matches
if target_condition == 'all' or target_condition == current_target:
return body
else:
return ''
elif tag == 'unless':
# Include content unless target matches
if target_condition != current_target:
return body
else:
return ''
elif tag == 'target':
# Include only for specific target
if target_condition == current_target:
return body
else:
return ''
return match.group(0)
return self.pattern.sub(process_tag, content)Usage in Markdown:
# Documentation
<if target="html">
This content only appears in HTML builds.
</if>
<unless target="pdf">
This content appears in all formats except PDF.
</unless>
<target target="pdf">
PDF-specific content here.
</target>Install with Tessl CLI
npx tessl i tessl/pypi-foliant