A sane and fast Markdown parser with useful plugins and renderers
—
Output format renderers that convert parsed Markdown tokens into various formats including HTML, reStructuredText, and normalized Markdown. Renderers provide the final output stage of the parsing pipeline and can be customized or extended for specific output requirements.
Converts parsed Markdown tokens to HTML output with security features, customizable escaping, and protocol filtering.
class HTMLRenderer(BaseRenderer):
"""
A renderer for converting Markdown to HTML.
Attributes:
- NAME: ClassVar[Literal["html"]] = "html"
- HARMFUL_PROTOCOLS: ClassVar[Tuple[str, ...]] - Blocked URL protocols
- GOOD_DATA_PROTOCOLS: ClassVar[Tuple[str, ...]] - Allowed data URLs
"""
def __init__(
self,
escape: bool = True,
allow_harmful_protocols: Optional[bool] = None
):
"""
Initialize HTML renderer.
Parameters:
- escape: Whether to escape HTML in rendered output
- allow_harmful_protocols: Whether to allow potentially harmful URL protocols
"""
def safe_url(self, url: str) -> str:
"""
Ensure the given URL is safe by filtering harmful protocols.
Parameters:
- url: URL to validate
Returns:
Safe URL or empty string if harmful
"""Usage example:
from mistune import HTMLRenderer, Markdown
# Default HTML renderer with escaping
renderer = HTMLRenderer(escape=True)
md = Markdown(renderer=renderer)
html = md('**Bold** and <script>alert("xss")</script>')
# Output: '<p><strong>Bold</strong> and <script>alert("xss")</script></p>\n'
# HTML renderer without escaping
renderer = HTMLRenderer(escape=False)
md = Markdown(renderer=renderer)
html = md('**Bold** and <em>italic</em>')
# Output: '<p><strong>Bold</strong> and <em>italic</em></p>\n'Converts Markdown tokens to reStructuredText format for documentation systems and Sphinx integration.
class RSTRenderer(BaseRenderer):
"""
A renderer for converting Markdown to reStructuredText format.
Attributes:
- NAME: ClassVar[Literal["rst"]] = "rst"
"""
def __init__(self):
"""Initialize reStructuredText renderer."""Usage example:
from mistune import RSTRenderer, Markdown
renderer = RSTRenderer()
md = Markdown(renderer=renderer)
rst = md('''
# Main Heading
This is a paragraph with **bold** and *italic* text.
- List item 1
- List item 2
``inline code``
''')
print(rst)
# Output:
# Main Heading
# ============
#
# This is a paragraph with **bold** and *italic* text.
#
# - List item 1
# - List item 2
#
# ``inline code``Normalizes and reformats Markdown input, useful for Markdown formatting and standardization.
class MarkdownRenderer(BaseRenderer):
"""
A renderer for converting parsed tokens back to normalized Markdown.
Attributes:
- NAME: ClassVar[Literal["markdown"]] = "markdown"
"""
def __init__(self):
"""Initialize Markdown renderer."""Usage example:
from mistune import MarkdownRenderer, Markdown
renderer = MarkdownRenderer()
md = Markdown(renderer=renderer)
# Normalize inconsistent Markdown formatting
normalized = md('''
# Inconsistent Heading
This is a paragraph with multiple spaces.
* List item 1
* List item 2
''')
print(normalized)
# Output: Clean, normalized MarkdownAbstract base class for creating custom renderers with common rendering functionality.
class BaseRenderer:
"""
Base class for all renderers providing common rendering functionality.
Attributes:
- NAME: ClassVar[str] - Unique identifier for the renderer
"""
def render_tokens(
self,
tokens: List[Dict[str, Any]],
state: BlockState
) -> str:
"""
Render a list of tokens to string output.
Parameters:
- tokens: List of parsed tokens to render
- state: Current block parsing state
Returns:
Rendered string output
"""
def render_token(
self,
token: Dict[str, Any],
state: BlockState
) -> str:
"""
Render a single token to string output.
Parameters:
- token: Token to render
- state: Current block parsing state
Returns:
Rendered string representation of token
"""
def finalize_data(self, data: str) -> str:
"""
Finalize rendered data before returning.
Parameters:
- data: Rendered data string
Returns:
Finalized output string
"""Pattern for creating custom renderers by extending BaseRenderer:
from mistune import BaseRenderer, BlockState
class CustomRenderer(BaseRenderer):
NAME = "custom"
def render_heading(self, text: str, level: int) -> str:
# Custom heading rendering
return f"{'#' * level} {text}\n\n"
def render_paragraph(self, text: str) -> str:
# Custom paragraph rendering
return f"<p class='custom'>{text}</p>\n"
def render_strong(self, text: str) -> str:
# Custom bold rendering
return f"<b class='strong'>{text}</b>"
# Use custom renderer
from mistune import Markdown
md = Markdown(renderer=CustomRenderer())
output = md('# Heading\n\nThis is **bold** text.')Renderers integrate with the parser through the renderer parameter:
# String shortcuts
md = mistune.create_markdown(renderer='html') # HTMLRenderer
md = mistune.create_markdown(renderer='ast') # No renderer (AST output)
# Renderer instances
from mistune import HTMLRenderer, RSTRenderer
md = mistune.create_markdown(renderer=HTMLRenderer(escape=False))
md = mistune.create_markdown(renderer=RSTRenderer())
# Custom renderer
md = mistune.create_markdown(renderer=CustomRenderer())When no renderer is specified or renderer='ast', mistune returns the parsed Abstract Syntax Tree:
import mistune
# Get AST tokens instead of rendered output
tokens = mistune.markdown('**bold** text', renderer='ast')
# Returns: [{'type': 'paragraph', 'children': [
# {'type': 'strong', 'children': [{'type': 'text', 'raw': 'bold'}]},
# {'type': 'text', 'raw': ' text'}
# ]}]
# AST is useful for analysis and custom processing
def analyze_tokens(tokens):
for token in tokens:
print(f"Token type: {token['type']}")
if 'children' in token:
analyze_tokens(token['children'])
analyze_tokens(tokens)Install with Tessl CLI
npx tessl i tessl/pypi-mistune