Static site generator supporting Markdown and reStructuredText
—
Reader classes for parsing different markup formats including Markdown, reStructuredText, and HTML. Readers extract metadata, process content, and convert markup to HTML for theme rendering.
Central reader manager that coordinates different format readers and provides caching functionality for improved performance.
class Readers(FileStampDataCacher):
"""
Content reader manager with caching support.
Parameters:
- settings (dict): Site configuration dictionary
- cache_name (str, optional): Cache identifier for file caching
"""
def __init__(self, settings: dict, cache_name: str = ""): ...
def read_file(
self,
base_path: str,
path: str,
content_class=Content,
fmt: str = None
) -> Content:
"""
Read and parse a content file.
Parameters:
- base_path (str): Base directory path
- path (str): Relative file path
- content_class (class, optional): Content class to instantiate (default: Content)
- fmt (str, optional): Force specific format reader
Returns:
Content: Parsed content object with metadata and HTML content
"""
# Available readers (populated from settings)
readers: dict[str, BaseReader] # Format -> Reader mappingFoundation class for all content format readers providing common functionality for metadata extraction and content processing.
class BaseReader:
"""
Base class for content format readers.
Parameters:
- settings (dict): Site configuration dictionary
"""
def __init__(self, settings: dict): ...
enabled: bool = True # Whether this reader is enabled
file_extensions: list[str] # Supported file extensions
def read(self, source_path: str) -> tuple[str, dict]:
"""
Read and parse content file.
Parameters:
- source_path (str): Path to content file
Returns:
tuple: (HTML content string, metadata dictionary)
"""
def process_metadata(self, name: str, value: str) -> tuple[str, Any]:
"""
Process individual metadata field.
Parameters:
- name (str): Metadata field name
- value (str): Raw metadata value
Returns:
tuple: (processed name, processed value)
"""Reader for reStructuredText (.rst) files using the docutils library for parsing and HTML generation.
class RstReader(BaseReader):
"""
reStructuredText content reader.
Supports:
- Standard reStructuredText syntax
- Custom Pelican directives (code highlighting, etc.)
- Metadata extraction from docutils meta fields
- Math rendering via MathJax
- Custom role and directive registration
"""
file_extensions: list[str] = ['rst']
def read(self, source_path: str) -> tuple[str, dict]:
"""
Parse reStructuredText file and extract content/metadata.
Uses docutils for parsing with Pelican-specific settings and directives.
Supports custom roles and directives for enhanced functionality.
"""Reader for Markdown (.md, .markdown, .mkd) files using the Python-Markdown library with configurable extensions.
class MarkdownReader(BaseReader):
"""
Markdown content reader.
Supports:
- Standard Markdown syntax
- Configurable Python-Markdown extensions
- Metadata extraction from YAML front matter or meta extension
- Code highlighting via Pygments
- Table support, footnotes, and other extensions
"""
file_extensions: list[str] = ['md', 'markdown', 'mkd']
def read(self, source_path: str) -> tuple[str, dict]:
"""
Parse Markdown file and extract content/metadata.
Uses Python-Markdown with configurable extensions.
Metadata can be extracted from YAML front matter or meta extension.
"""Reader for HTML (.html, .htm) files that extracts metadata from HTML meta tags and preserves HTML content.
class HTMLReader(BaseReader):
"""
HTML content reader.
Supports:
- Raw HTML content preservation
- Metadata extraction from HTML meta tags
- Title extraction from <title> tag
- Custom metadata via <meta> tags
"""
file_extensions: list[str] = ['html', 'htm']
def read(self, source_path: str) -> tuple[str, dict]:
"""
Parse HTML file and extract content/metadata.
Extracts metadata from HTML meta tags and preserves HTML content as-is.
Useful for importing existing HTML content or custom layouts.
"""Configure Markdown reader behavior in settings:
# In pelicanconf.py
MARKDOWN = {
'extension_configs': {
'markdown.extensions.codehilite': {'css_class': 'highlight'},
'markdown.extensions.extra': {},
'markdown.extensions.meta': {},
'markdown.extensions.toc': {'permalink': True},
},
'output_format': 'html5',
}Configure reStructuredText reader behavior:
# In pelicanconf.py
DOCUTILS_SETTINGS = {
'smart_quotes': True,
'initial_header_level': 2,
'syntax_highlight': 'short',
'input_encoding': 'utf-8',
'math_output': 'MathJax',
}Register custom readers for additional formats:
# In pelicanconf.py
READERS = {
'txt': 'path.to.custom.TextReader',
'org': 'path.to.custom.OrgModeReader',
}All readers process these standard metadata fields:
title: Content titledate: Publication date (ISO format or custom format)modified: Last modification datecategory: Content category (articles only)tags: Comma-separated tags (articles only)slug: URL slug (auto-generated if not provided)author: Author nameauthors: Multiple authors (comma-separated)summary: Content summary/descriptionlang: Content language codestatus: Content status (published, draft, hidden)template: Custom template namesave_as: Custom output file pathurl: Custom URL path---
title: My Article Title
date: 2023-01-15 10:30
category: Python
tags: tutorial, programming
author: John Doe
summary: A comprehensive guide to Python programming.
---
# Article Content
Content goes here...Title: My Article Title
Date: 2023-01-15 10:30
Category: Python
Tags: tutorial, programming
Author: John Doe
Summary: A comprehensive guide to Python programming.
# Article Content
Content goes here...My Article Title
================
:date: 2023-01-15 10:30
:category: Python
:tags: tutorial, programming
:author: John Doe
:summary: A comprehensive guide to Python programming.
Article Content
---------------
Content goes here...<html>
<head>
<title>My Article Title</title>
<meta name="date" content="2023-01-15 10:30">
<meta name="category" content="Python">
<meta name="tags" content="tutorial, programming">
<meta name="author" content="John Doe">
<meta name="summary" content="A comprehensive guide to Python programming.">
</head>
<body>
<h1>Article Content</h1>
<p>Content goes here...</p>
</body>
</html>from pelican.readers import Readers
from pelican.settings import read_settings
# Load settings and create readers
settings = read_settings('pelicanconf.py')
readers = Readers(settings)
# Read a Markdown file
content = readers.read_file(
base_path='content',
path='articles/my-post.md',
content_class=Article
)
print(content.title) # Article title
print(content.content) # HTML content
print(content.metadata) # Raw metadata dictionaryfrom pelican.readers import BaseReader
import json
class JsonReader(BaseReader):
"""Custom reader for JSON content files."""
file_extensions = ['json']
def read(self, source_path):
"""Read JSON file and extract content/metadata."""
with open(source_path, 'r', encoding='utf-8') as f:
data = json.load(f)
# Extract content and metadata
content = data.get('content', '')
metadata = {k: v for k, v in data.items() if k != 'content'}
# Process metadata using base class method
processed_metadata = {}
for name, value in metadata.items():
name, value = self.process_metadata(name, str(value))
processed_metadata[name] = value
return content, processed_metadata
# Register custom reader
# In pelicanconf.py:
# READERS = {'json': 'path.to.JsonReader'}from pelican.generators import Generator
class CustomGenerator(Generator):
"""Generator that uses readers to process content."""
def generate_context(self):
"""Generate content using readers."""
content_files = self.get_content_files()
for content_file in content_files:
# Use readers to parse file
content = self.readers.read_file(
base_path=self.path,
path=content_file,
content_class=Article
)
# Process content
self.process_content(content)
def get_content_files(self):
"""Get list of content files to process."""
# Implementation depends on file discovery strategy
return []
def process_content(self, content):
"""Process parsed content."""
# Add to context or perform custom processing
passfrom pelican.readers import BaseReader
from datetime import datetime
class CustomReader(BaseReader):
"""Reader with custom metadata processing."""
def process_metadata(self, name, value):
"""Custom metadata processing logic."""
name, value = super().process_metadata(name, value)
# Custom date parsing
if name == 'date':
if isinstance(value, str):
try:
value = datetime.strptime(value, '%Y-%m-%d %H:%M')
except ValueError:
value = datetime.strptime(value, '%Y-%m-%d')
# Custom tag processing
elif name == 'tags':
if isinstance(value, str):
value = [tag.strip() for tag in value.split(',')]
return name, valueInstall with Tessl CLI
npx tessl i tessl/pypi-pelican