Python parser for the CommonMark Markdown spec
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Comprehensive parsing functionality with full control over the Abstract Syntax Tree (AST). The Parser class converts Markdown text into a tree of Node objects, which can then be manipulated programmatically.
Main parsing class that converts CommonMark Markdown text into an Abstract Syntax Tree representation.
class Parser:
def __init__(self, options={}):
"""
Initialize a new Parser instance.
Args:
options (dict): Configuration options for parsing behavior
"""
def parse(self, my_input):
"""
Parse CommonMark text into an AST.
Args:
my_input (str): CommonMark Markdown text to parse
Returns:
Node: Root node of the parsed AST
"""Represents individual nodes in the Abstract Syntax Tree with methods for tree manipulation and traversal.
class Node:
def __init__(self, node_type, sourcepos):
"""
Create a new Node.
Args:
node_type (str): Type of the node (e.g., 'document', 'paragraph', 'text', 'heading')
sourcepos (SourcePos): Source position information [[start_line, start_col], [end_line, end_col]]
"""
def walker(self):
"""
Create a NodeWalker for traversing this node and its descendants.
Returns:
NodeWalker: Iterator for tree traversal
"""
def append_child(self, child):
"""
Append a child node to this node.
Args:
child (Node): Node to append as child
"""
def prepend_child(self, child):
"""
Prepend a child node to this node.
Args:
child (Node): Node to prepend as child
"""
def unlink(self):
"""
Remove this node from its parent, unlinking it from the tree.
"""
def insert_after(self, sibling):
"""
Insert this node after the specified sibling node.
Args:
sibling (Node): Node after which to insert this node
"""
def insert_before(self, sibling):
"""
Insert this node before the specified sibling node.
Args:
sibling (Node): Node before which to insert this node
"""
def pretty(self):
"""
Print pretty-printed representation of this node to stdout.
Uses pprint to display the node's internal dictionary structure.
Returns:
None: Prints to stdout rather than returning a value
"""
def normalize(self):
"""
Normalize the node by combining adjacent text nodes.
"""
def is_container(self):
"""
Check if this node can contain other nodes.
Returns:
bool: True if node can contain children, False otherwise
"""Iterator for traversing AST nodes in document order, providing fine-grained control over tree traversal.
class NodeWalker:
def __init__(self, root):
"""
Create a NodeWalker starting at the specified root node.
Args:
root (Node): Root node to start traversal from
"""
def nxt(self):
"""
Get the next node in the traversal.
Returns:
WalkEvent or None: Dictionary with 'node' (Node) and 'entering' (bool) keys,
or None if traversal is complete
"""
def resume_at(self, node, entering):
"""
Resume traversal at a specific node and entering state.
Args:
node (Node): Node to resume at
entering (bool): Whether we're entering or exiting the node
"""from commonmark import Parser
parser = Parser()
markdown = """
# Hello World
This is a paragraph with **bold** text.
"""
ast = parser.parse(markdown)
ast.pretty() # Print AST structure to stdoutfrom commonmark import Parser
from commonmark.node import Node
parser = Parser()
ast = parser.parse("# Original Title")
# Create a new text node
new_text = Node('text', [[1, 1], [1, 9]])
new_text.literal = "New Title"
# Replace the title text
title_node = ast.first_child # Header node
old_text = title_node.first_child # Original text
title_node.append_child(new_text)
old_text.unlink()from commonmark import Parser
parser = Parser()
ast = parser.parse("""
# Title
Some text with **bold** and *italic*.
""")
walker = ast.walker()
event = walker.nxt()
while event:
node, entering = event['node'], event['entering']
if entering:
print(f"Entering: {node.t}")
if hasattr(node, 'literal') and node.literal:
print(f" Content: '{node.literal}'")
event = walker.nxt()# Source position format: [[start_line, start_col], [end_line, end_col]]
SourcePos = list[list[int, int], list[int, int]]
# Node types
NodeType = str # 'document', 'paragraph', 'text', 'strong', 'emph', 'heading', etc.
# Walking event structure
WalkEvent = dict[str, Node | bool] # {'node': Node, 'entering': bool} or NoneCommon node properties that can be accessed:
node.t: Node type (e.g., 'document', 'paragraph', 'text', 'strong', 'emph', 'heading')node.literal: Text content for text nodes (str or None)node.first_child: First child node (Node or None)node.last_child: Last child node (Node or None)node.parent: Parent node (Node or None)node.nxt: Next sibling node (Node or None)node.prv: Previous sibling node (Node or None)node.sourcepos: Source position information [[start_line, start_col], [end_line, end_col]]node.string_content: String content for container nodes (str)node.info: Info string for code blocks (str or None)node.destination: URL for links and images (str or None)node.title: Title for links and images (str or None)node.level: Heading level 1-6 for heading nodes (int or None)node.list_data: List metadata dictionary for list and list item nodes (dict)Install with Tessl CLI
npx tessl i tessl/pypi-commonmark