0
# Token System
1
2
Structured representation of parsed markdown elements with metadata, attributes, and hierarchical relationships for advanced processing and custom rendering.
3
4
## Capabilities
5
6
### Token Class
7
8
Core data structure representing parsed markdown elements.
9
10
```python { .api }
11
class Token:
12
"""Represents a parsed markdown element with metadata and attributes."""
13
14
# Core properties
15
type: str # Token type (e.g., "paragraph_open")
16
tag: str # HTML tag name (e.g., "p")
17
nesting: int # Level change: 1 (opening), 0 (self-closing), -1 (closing)
18
attrs: dict[str, str | int | float] # HTML attributes
19
map: list[int] | None # Source map [line_begin, line_end]
20
level: int # Nesting level
21
children: list[Token] | None # Child tokens (for inline and img tokens)
22
content: str # Inner content
23
markup: str # Markup characters ('*', '_', fence string, etc.)
24
info: str # Additional info (fence language, autolink flag, etc.)
25
meta: dict[Any, Any] # Plugin storage
26
block: bool # True for block-level tokens
27
hidden: bool # Skip when rendering (tight lists)
28
```
29
30
### Token Creation
31
32
Create tokens programmatically or from dictionaries.
33
34
```python { .api }
35
def __init__(
36
self,
37
type: str,
38
tag: str,
39
nesting: int,
40
attrs: dict = None,
41
map: list[int] = None,
42
level: int = 0,
43
children: list[Token] = None,
44
content: str = "",
45
markup: str = "",
46
info: str = "",
47
meta: dict = None,
48
block: bool = False,
49
hidden: bool = False
50
):
51
"""Initialize a new token."""
52
53
@classmethod
54
def from_dict(cls, dct: dict[str, Any]) -> Token:
55
"""
56
Create token from dictionary representation.
57
58
Parameters:
59
- dct: dictionary with token data
60
61
Returns:
62
- Token: new token instance
63
"""
64
```
65
66
**Usage Example:**
67
68
```python
69
from markdown_it.token import Token
70
71
# Create token manually
72
token = Token(
73
type="paragraph_open",
74
tag="p",
75
nesting=1,
76
attrs={"class": "custom"},
77
level=0,
78
block=True
79
)
80
81
# Create from dictionary
82
token_dict = {
83
"type": "strong_open",
84
"tag": "strong",
85
"nesting": 1,
86
"markup": "**"
87
}
88
token = Token.from_dict(token_dict)
89
```
90
91
### Attribute Management
92
93
Methods for managing HTML attributes on tokens.
94
95
```python { .api }
96
def attrItems(self) -> list[tuple[str, str | int | float]]:
97
"""
98
Get (key, value) list of attributes.
99
100
Returns:
101
- list: attribute key-value pairs
102
"""
103
104
def attrPush(self, attrData: tuple[str, str | int | float]) -> None:
105
"""
106
Add [name, value] attribute to list.
107
108
Parameters:
109
- attrData: (name, value) tuple to add
110
"""
111
112
def attrSet(self, name: str, value: str | int | float) -> None:
113
"""
114
Set attribute value, overriding if exists.
115
116
Parameters:
117
- name: attribute name
118
- value: attribute value
119
"""
120
121
def attrGet(self, name: str) -> str | int | float | None:
122
"""
123
Get attribute value.
124
125
Parameters:
126
- name: attribute name
127
128
Returns:
129
- str | int | float | None: attribute value or None if not found
130
"""
131
132
def attrJoin(self, name: str, value: str) -> None:
133
"""
134
Join value to existing attribute via space, or create new.
135
136
Parameters:
137
- name: attribute name
138
- value: value to join
139
"""
140
```
141
142
**Usage Example:**
143
144
```python
145
from markdown_it.token import Token
146
147
token = Token("div_open", "div", 1)
148
149
# Set attributes
150
token.attrSet("class", "container")
151
token.attrSet("id", "main")
152
153
# Join to existing attribute (useful for CSS classes)
154
token.attrJoin("class", "highlight")
155
156
# Get attribute value
157
class_value = token.attrGet("class") # "container highlight"
158
159
# List all attributes
160
attrs = token.attrItems() # [("class", "container highlight"), ("id", "main")]
161
```
162
163
### Token Manipulation
164
165
Methods for copying and converting tokens.
166
167
```python { .api }
168
def copy(self, **changes: Any) -> Token:
169
"""
170
Create shallow copy with optional changes.
171
172
Parameters:
173
- changes: keyword arguments for properties to change
174
175
Returns:
176
- Token: new token instance with changes applied
177
"""
178
179
def as_dict(
180
self,
181
*,
182
children: bool = True,
183
as_upstream: bool = True,
184
meta_serializer: callable = None,
185
filter: callable = None,
186
dict_factory: callable = dict,
187
) -> dict[str, Any]:
188
"""
189
Convert token to dictionary representation.
190
191
Parameters:
192
- children: also convert children to dicts
193
- as_upstream: ensure compatibility with markdown-it format
194
- meta_serializer: hook for serializing Token.meta
195
- filter: callable to filter attributes
196
- dict_factory: function to create dictionaries
197
198
Returns:
199
- dict: token as dictionary
200
"""
201
```
202
203
**Usage Example:**
204
205
```python
206
from markdown_it.token import Token
207
208
# Original token
209
token = Token("paragraph_open", "p", 1, level=0)
210
211
# Create modified copy
212
modified = token.copy(
213
attrs={"class": "highlight"},
214
level=1
215
)
216
217
# Convert to dictionary
218
token_dict = token.as_dict()
219
print(token_dict)
220
221
# Convert with filtering
222
def filter_func(key, value):
223
return key in ['type', 'tag', 'attrs']
224
225
filtered_dict = token.as_dict(filter=filter_func)
226
```
227
228
## Common Token Types
229
230
Standard token types produced by markdown-it-py:
231
232
### Block Tokens
233
234
```python
235
# Structural block elements
236
"paragraph_open" / "paragraph_close" # <p> tags
237
"heading_open" / "heading_close" # <h1>-<h6> tags
238
"blockquote_open" / "blockquote_close" # <blockquote> tags
239
"list_item_open" / "list_item_close" # <li> tags
240
"bullet_list_open" / "bullet_list_close" # <ul> tags
241
"ordered_list_open" / "ordered_list_close" # <ol> tags
242
243
# Content blocks
244
"code_block" # <pre><code> blocks
245
"fence" # Fenced code blocks
246
"hr" # <hr> horizontal rules
247
"html_block" # Raw HTML blocks
248
"table_open" / "table_close" # <table> tags
249
"tr_open" / "tr_close" # <tr> tags
250
"td_open" / "td_close" # <td> tags
251
"th_open" / "th_close" # <th> tags
252
```
253
254
### Inline Tokens
255
256
```python
257
# Text formatting
258
"inline" # Container for inline content
259
"text" # Plain text
260
"code_inline" # `code` spans
261
"em_open" / "em_close" # <em> emphasis
262
"strong_open" / "strong_close" # <strong> tags
263
"s_open" / "s_close" # <s> strikethrough
264
265
# Links and media
266
"link_open" / "link_close" # <a> links
267
"image" # <img> images
268
"autolink_open" / "autolink_close" # Auto-detected links
269
270
# Special
271
"softbreak" # Soft line breaks
272
"hardbreak" # Hard line breaks <br>
273
"html_inline" # Inline HTML
274
"entity" # HTML entities
275
```
276
277
### Token Inspection
278
279
```python
280
from markdown_it import MarkdownIt
281
282
md = MarkdownIt()
283
tokens = md.parse("""
284
# Heading
285
286
Paragraph with **bold** and *italic* text.
287
288
- List item 1
289
- List item 2
290
""")
291
292
# Inspect token structure
293
for i, token in enumerate(tokens):
294
print(f"{i}: {token.type} | {token.tag} | level={token.level}")
295
if token.children:
296
for j, child in enumerate(token.children):
297
print(f" {j}: {child.type} | content='{child.content}'")
298
```
299
300
## Advanced Token Processing
301
302
### Modifying Token Stream
303
304
```python
305
from markdown_it import MarkdownIt
306
307
def add_custom_class(tokens):
308
"""Add custom CSS class to all paragraph tokens."""
309
for token in tokens:
310
if token.type == "paragraph_open":
311
token.attrSet("class", "custom-paragraph")
312
return tokens
313
314
md = MarkdownIt()
315
tokens = md.parse("# Title\n\nParagraph text.")
316
modified_tokens = add_custom_class(tokens)
317
html = md.renderer.render(modified_tokens, md.options, {})
318
```
319
320
### Token Filtering
321
322
```python
323
def filter_html_tokens(tokens):
324
"""Remove HTML tokens for security."""
325
return [token for token in tokens
326
if not token.type.startswith('html_')]
327
328
def extract_headings(tokens):
329
"""Extract heading text from token stream."""
330
headings = []
331
for token in tokens:
332
if token.type == "heading_open":
333
# Next token should be inline with heading content
334
next_token = tokens[tokens.index(token) + 1]
335
if next_token.type == "inline":
336
headings.append({
337
'level': int(token.tag[1]), # h1->1, h2->2, etc.
338
'text': next_token.content
339
})
340
return headings
341
```
342
343
### Custom Token Creation
344
345
```python
346
def create_custom_block(content, css_class=None):
347
"""Create custom block with wrapper div."""
348
tokens = []
349
350
# Opening div
351
div_open = Token("div_open", "div", 1)
352
if css_class:
353
div_open.attrSet("class", css_class)
354
tokens.append(div_open)
355
356
# Content paragraph
357
p_open = Token("paragraph_open", "p", 1, level=1)
358
inline = Token("inline", "", 0, content=content, level=1)
359
p_close = Token("paragraph_close", "p", -1, level=1)
360
361
tokens.extend([p_open, inline, p_close])
362
363
# Closing div
364
div_close = Token("div_close", "div", -1)
365
tokens.append(div_close)
366
367
return tokens
368
```