tessl/pypi-pelican

Static site generator supporting Markdown and reStructuredText

—

Pending

Overview

Eval results

Files

Content Reading

Name: tessl/pypi-pelican
Author: tessl

Reader classes for parsing different markup formats including Markdown, reStructuredText, and HTML. Readers extract metadata, process content, and convert markup to HTML for theme rendering.

Capabilities

Readers Manager

Central reader manager that coordinates different format readers and provides caching functionality for improved performance.

class Readers(FileStampDataCacher):
    """
    Content reader manager with caching support.
    
    Parameters:
    - settings (dict): Site configuration dictionary
    - cache_name (str, optional): Cache identifier for file caching
    """
    def __init__(self, settings: dict, cache_name: str = ""): ...
    
    def read_file(
        self,
        base_path: str,
        path: str,
        content_class=Content,
        fmt: str = None
    ) -> Content:
        """
        Read and parse a content file.
        
        Parameters:
        - base_path (str): Base directory path
        - path (str): Relative file path
        - content_class (class, optional): Content class to instantiate (default: Content)
        - fmt (str, optional): Force specific format reader
        
        Returns:
        Content: Parsed content object with metadata and HTML content
        """
    
    # Available readers (populated from settings)
    readers: dict[str, BaseReader]  # Format -> Reader mapping

Base Reader Class

Foundation class for all content format readers providing common functionality for metadata extraction and content processing.

class BaseReader:
    """
    Base class for content format readers.
    
    Parameters:
    - settings (dict): Site configuration dictionary
    """
    def __init__(self, settings: dict): ...
    
    enabled: bool = True  # Whether this reader is enabled
    file_extensions: list[str]  # Supported file extensions
    
    def read(self, source_path: str) -> tuple[str, dict]:
        """
        Read and parse content file.
        
        Parameters:
        - source_path (str): Path to content file
        
        Returns:
        tuple: (HTML content string, metadata dictionary)
        """
    
    def process_metadata(self, name: str, value: str) -> tuple[str, Any]:
        """
        Process individual metadata field.
        
        Parameters:
        - name (str): Metadata field name
        - value (str): Raw metadata value
        
        Returns:
        tuple: (processed name, processed value)
        """

reStructuredText Reader

Reader for reStructuredText (.rst) files using the docutils library for parsing and HTML generation.

class RstReader(BaseReader):
    """
    reStructuredText content reader.
    
    Supports:
    - Standard reStructuredText syntax
    - Custom Pelican directives (code highlighting, etc.)
    - Metadata extraction from docutils meta fields
    - Math rendering via MathJax
    - Custom role and directive registration
    """
    
    file_extensions: list[str] = ['rst']
    
    def read(self, source_path: str) -> tuple[str, dict]:
        """
        Parse reStructuredText file and extract content/metadata.
        
        Uses docutils for parsing with Pelican-specific settings and directives.
        Supports custom roles and directives for enhanced functionality.
        """

Markdown Reader

Reader for Markdown (.md, .markdown, .mkd) files using the Python-Markdown library with configurable extensions.

class MarkdownReader(BaseReader):
    """
    Markdown content reader.
    
    Supports:
    - Standard Markdown syntax
    - Configurable Python-Markdown extensions
    - Metadata extraction from YAML front matter or meta extension
    - Code highlighting via Pygments
    - Table support, footnotes, and other extensions
    """
    
    file_extensions: list[str] = ['md', 'markdown', 'mkd']
    
    def read(self, source_path: str) -> tuple[str, dict]:
        """
        Parse Markdown file and extract content/metadata.
        
        Uses Python-Markdown with configurable extensions.
        Metadata can be extracted from YAML front matter or meta extension.
        """

HTML Reader

Reader for HTML (.html, .htm) files that extracts metadata from HTML meta tags and preserves HTML content.

class HTMLReader(BaseReader):
    """
    HTML content reader.
    
    Supports:
    - Raw HTML content preservation
    - Metadata extraction from HTML meta tags
    - Title extraction from <title> tag
    - Custom metadata via <meta> tags
    """
    
    file_extensions: list[str] = ['html', 'htm']
    
    def read(self, source_path: str) -> tuple[str, dict]:
        """
        Parse HTML file and extract content/metadata.
        
        Extracts metadata from HTML meta tags and preserves HTML content as-is.
        Useful for importing existing HTML content or custom layouts.
        """

Reader Configuration

Markdown Configuration

Configure Markdown reader behavior in settings:

# In pelicanconf.py
MARKDOWN = {
    'extension_configs': {
        'markdown.extensions.codehilite': {'css_class': 'highlight'},
        'markdown.extensions.extra': {},
        'markdown.extensions.meta': {},
        'markdown.extensions.toc': {'permalink': True},
    },
    'output_format': 'html5',
}

reStructuredText Configuration

Configure reStructuredText reader behavior:

# In pelicanconf.py
DOCUTILS_SETTINGS = {
    'smart_quotes': True,
    'initial_header_level': 2,
    'syntax_highlight': 'short',
    'input_encoding': 'utf-8',
    'math_output': 'MathJax',
}

Custom Readers

# In pelicanconf.py
READERS = {
    'txt': 'path.to.custom.TextReader',
    'org': 'path.to.custom.OrgModeReader',
}

Metadata Processing

Common Metadata Fields

All readers process these standard metadata fields:

title: Content title
date: Publication date (ISO format or custom format)
modified: Last modification date
category: Content category (articles only)
tags: Comma-separated tags (articles only)
slug: URL slug (auto-generated if not provided)
author: Author name
authors: Multiple authors (comma-separated)
summary: Content summary/description
lang: Content language code
status: Content status (published, draft, hidden)
template: Custom template name
save_as: Custom output file path
url: Custom URL path

Metadata Format Examples

Markdown with YAML Front Matter

---
title: My Article Title
date: 2023-01-15 10:30
category: Python
tags: tutorial, programming
author: John Doe
summary: A comprehensive guide to Python programming.
---

# Article Content

Content goes here...

Markdown with Meta Extension

Title: My Article Title
Date: 2023-01-15 10:30
Category: Python
Tags: tutorial, programming
Author: John Doe
Summary: A comprehensive guide to Python programming.

# Article Content

Content goes here...

reStructuredText

My Article Title
================

:date: 2023-01-15 10:30
:category: Python
:tags: tutorial, programming
:author: John Doe
:summary: A comprehensive guide to Python programming.

Article Content
---------------

Content goes here...

HTML

<html>
<head>
    <title>My Article Title</title>
    <meta name="date" content="2023-01-15 10:30">
    <meta name="category" content="Python">
    <meta name="tags" content="tutorial, programming">
    <meta name="author" content="John Doe">
    <meta name="summary" content="A comprehensive guide to Python programming.">
</head>
<body>
    <h1>Article Content</h1>
    <p>Content goes here...</p>
</body>
</html>

Usage Examples

Using Readers Directly

from pelican.readers import Readers
from pelican.settings import read_settings

# Load settings and create readers
settings = read_settings('pelicanconf.py')
readers = Readers(settings)

# Read a Markdown file
content = readers.read_file(
    base_path='content',
    path='articles/my-post.md',
    content_class=Article
)

print(content.title)     # Article title
print(content.content)   # HTML content
print(content.metadata)  # Raw metadata dictionary

Custom Reader Implementation

from pelican.readers import BaseReader
import json

class JsonReader(BaseReader):
    """Custom reader for JSON content files."""
    
    file_extensions = ['json']
    
    def read(self, source_path):
        """Read JSON file and extract content/metadata."""
        with open(source_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
        
        # Extract content and metadata
        content = data.get('content', '')
        metadata = {k: v for k, v in data.items() if k != 'content'}
        
        # Process metadata using base class method
        processed_metadata = {}
        for name, value in metadata.items():
            name, value = self.process_metadata(name, str(value))
            processed_metadata[name] = value
        
        return content, processed_metadata

# Register custom reader
# In pelicanconf.py:
# READERS = {'json': 'path.to.JsonReader'}

Reader Integration with Generators

from pelican.generators import Generator

class CustomGenerator(Generator):
    """Generator that uses readers to process content."""
    
    def generate_context(self):
        """Generate content using readers."""
        content_files = self.get_content_files()
        
        for content_file in content_files:
            # Use readers to parse file
            content = self.readers.read_file(
                base_path=self.path,
                path=content_file,
                content_class=Article
            )
            
            # Process content
            self.process_content(content)
    
    def get_content_files(self):
        """Get list of content files to process."""
        # Implementation depends on file discovery strategy
        return []
    
    def process_content(self, content):
        """Process parsed content."""
        # Add to context or perform custom processing
        pass

Metadata Processing Customization

from pelican.readers import BaseReader
from datetime import datetime

class CustomReader(BaseReader):
    """Reader with custom metadata processing."""
    
    def process_metadata(self, name, value):
        """Custom metadata processing logic."""
        name, value = super().process_metadata(name, value)
        
        # Custom date parsing
        if name == 'date':
            if isinstance(value, str):
                try:
                    value = datetime.strptime(value, '%Y-%m-%d %H:%M')
                except ValueError:
                    value = datetime.strptime(value, '%Y-%m-%d')
        
        # Custom tag processing
        elif name == 'tags':
            if isinstance(value, str):
                value = [tag.strip() for tag in value.split(',')]
        
        return name, value

Install with Tessl CLI