Python port of markdown-it providing CommonMark-compliant markdown parsing with configurable syntax and pluggable architecture
—
Structured representation of parsed markdown elements with metadata, attributes, and hierarchical relationships for advanced processing and custom rendering.
Core data structure representing parsed markdown elements.
class Token:
"""Represents a parsed markdown element with metadata and attributes."""
# Core properties
type: str # Token type (e.g., "paragraph_open")
tag: str # HTML tag name (e.g., "p")
nesting: int # Level change: 1 (opening), 0 (self-closing), -1 (closing)
attrs: dict[str, str | int | float] # HTML attributes
map: list[int] | None # Source map [line_begin, line_end]
level: int # Nesting level
children: list[Token] | None # Child tokens (for inline and img tokens)
content: str # Inner content
markup: str # Markup characters ('*', '_', fence string, etc.)
info: str # Additional info (fence language, autolink flag, etc.)
meta: dict[Any, Any] # Plugin storage
block: bool # True for block-level tokens
hidden: bool # Skip when rendering (tight lists)Create tokens programmatically or from dictionaries.
def __init__(
self,
type: str,
tag: str,
nesting: int,
attrs: dict = None,
map: list[int] = None,
level: int = 0,
children: list[Token] = None,
content: str = "",
markup: str = "",
info: str = "",
meta: dict = None,
block: bool = False,
hidden: bool = False
):
"""Initialize a new token."""
@classmethod
def from_dict(cls, dct: dict[str, Any]) -> Token:
"""
Create token from dictionary representation.
Parameters:
- dct: dictionary with token data
Returns:
- Token: new token instance
"""Usage Example:
from markdown_it.token import Token
# Create token manually
token = Token(
type="paragraph_open",
tag="p",
nesting=1,
attrs={"class": "custom"},
level=0,
block=True
)
# Create from dictionary
token_dict = {
"type": "strong_open",
"tag": "strong",
"nesting": 1,
"markup": "**"
}
token = Token.from_dict(token_dict)Methods for managing HTML attributes on tokens.
def attrItems(self) -> list[tuple[str, str | int | float]]:
"""
Get (key, value) list of attributes.
Returns:
- list: attribute key-value pairs
"""
def attrPush(self, attrData: tuple[str, str | int | float]) -> None:
"""
Add [name, value] attribute to list.
Parameters:
- attrData: (name, value) tuple to add
"""
def attrSet(self, name: str, value: str | int | float) -> None:
"""
Set attribute value, overriding if exists.
Parameters:
- name: attribute name
- value: attribute value
"""
def attrGet(self, name: str) -> str | int | float | None:
"""
Get attribute value.
Parameters:
- name: attribute name
Returns:
- str | int | float | None: attribute value or None if not found
"""
def attrJoin(self, name: str, value: str) -> None:
"""
Join value to existing attribute via space, or create new.
Parameters:
- name: attribute name
- value: value to join
"""Usage Example:
from markdown_it.token import Token
token = Token("div_open", "div", 1)
# Set attributes
token.attrSet("class", "container")
token.attrSet("id", "main")
# Join to existing attribute (useful for CSS classes)
token.attrJoin("class", "highlight")
# Get attribute value
class_value = token.attrGet("class") # "container highlight"
# List all attributes
attrs = token.attrItems() # [("class", "container highlight"), ("id", "main")]Methods for copying and converting tokens.
def copy(self, **changes: Any) -> Token:
"""
Create shallow copy with optional changes.
Parameters:
- changes: keyword arguments for properties to change
Returns:
- Token: new token instance with changes applied
"""
def as_dict(
self,
*,
children: bool = True,
as_upstream: bool = True,
meta_serializer: callable = None,
filter: callable = None,
dict_factory: callable = dict,
) -> dict[str, Any]:
"""
Convert token to dictionary representation.
Parameters:
- children: also convert children to dicts
- as_upstream: ensure compatibility with markdown-it format
- meta_serializer: hook for serializing Token.meta
- filter: callable to filter attributes
- dict_factory: function to create dictionaries
Returns:
- dict: token as dictionary
"""Usage Example:
from markdown_it.token import Token
# Original token
token = Token("paragraph_open", "p", 1, level=0)
# Create modified copy
modified = token.copy(
attrs={"class": "highlight"},
level=1
)
# Convert to dictionary
token_dict = token.as_dict()
print(token_dict)
# Convert with filtering
def filter_func(key, value):
return key in ['type', 'tag', 'attrs']
filtered_dict = token.as_dict(filter=filter_func)Standard token types produced by markdown-it-py:
# Structural block elements
"paragraph_open" / "paragraph_close" # <p> tags
"heading_open" / "heading_close" # <h1>-<h6> tags
"blockquote_open" / "blockquote_close" # <blockquote> tags
"list_item_open" / "list_item_close" # <li> tags
"bullet_list_open" / "bullet_list_close" # <ul> tags
"ordered_list_open" / "ordered_list_close" # <ol> tags
# Content blocks
"code_block" # <pre><code> blocks
"fence" # Fenced code blocks
"hr" # <hr> horizontal rules
"html_block" # Raw HTML blocks
"table_open" / "table_close" # <table> tags
"tr_open" / "tr_close" # <tr> tags
"td_open" / "td_close" # <td> tags
"th_open" / "th_close" # <th> tags# Text formatting
"inline" # Container for inline content
"text" # Plain text
"code_inline" # `code` spans
"em_open" / "em_close" # <em> emphasis
"strong_open" / "strong_close" # <strong> tags
"s_open" / "s_close" # <s> strikethrough
# Links and media
"link_open" / "link_close" # <a> links
"image" # <img> images
"autolink_open" / "autolink_close" # Auto-detected links
# Special
"softbreak" # Soft line breaks
"hardbreak" # Hard line breaks <br>
"html_inline" # Inline HTML
"entity" # HTML entitiesfrom markdown_it import MarkdownIt
md = MarkdownIt()
tokens = md.parse("""
# Heading
Paragraph with **bold** and *italic* text.
- List item 1
- List item 2
""")
# Inspect token structure
for i, token in enumerate(tokens):
print(f"{i}: {token.type} | {token.tag} | level={token.level}")
if token.children:
for j, child in enumerate(token.children):
print(f" {j}: {child.type} | content='{child.content}'")from markdown_it import MarkdownIt
def add_custom_class(tokens):
"""Add custom CSS class to all paragraph tokens."""
for token in tokens:
if token.type == "paragraph_open":
token.attrSet("class", "custom-paragraph")
return tokens
md = MarkdownIt()
tokens = md.parse("# Title\n\nParagraph text.")
modified_tokens = add_custom_class(tokens)
html = md.renderer.render(modified_tokens, md.options, {})def filter_html_tokens(tokens):
"""Remove HTML tokens for security."""
return [token for token in tokens
if not token.type.startswith('html_')]
def extract_headings(tokens):
"""Extract heading text from token stream."""
headings = []
for token in tokens:
if token.type == "heading_open":
# Next token should be inline with heading content
next_token = tokens[tokens.index(token) + 1]
if next_token.type == "inline":
headings.append({
'level': int(token.tag[1]), # h1->1, h2->2, etc.
'text': next_token.content
})
return headingsdef create_custom_block(content, css_class=None):
"""Create custom block with wrapper div."""
tokens = []
# Opening div
div_open = Token("div_open", "div", 1)
if css_class:
div_open.attrSet("class", css_class)
tokens.append(div_open)
# Content paragraph
p_open = Token("paragraph_open", "p", 1, level=1)
inline = Token("inline", "", 0, content=content, level=1)
p_close = Token("paragraph_close", "p", -1, level=1)
tokens.extend([p_open, inline, p_close])
# Closing div
div_close = Token("div_close", "div", -1)
tokens.append(div_close)
return tokensInstall with Tessl CLI
npx tessl i tessl/pypi-markdown-it-py