Tessl Tile for pypi/markdown-it-py@4.0.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

cli.md configuration.md core-parsing.md index.md link-processing.md rendering.md syntax-tree.md token-system.md

syntax-tree.mddocs/

0
# Syntax Tree Processing
1

2
Tree representation utilities for converting linear token streams into hierarchical structures for advanced document analysis and manipulation. This module is unique to the Python implementation and not part of the original JavaScript markdown-it.
3

4
## Capabilities
5

6
### SyntaxTreeNode Class
7

8
Hierarchical representation of markdown document structure.
9

10
```python { .api }
11
class SyntaxTreeNode:
12
    """
13
    A Markdown syntax tree node representing either:
14
    - Root of the document
15
    - Single unnested token
16
    - Token pair (open/close) with nested content
17
    """
18
    
19
    def __init__(self, tokens: list[Token] = (), *, create_root: bool = True):
20
        """
21
        Initialize syntax tree from token stream.
22
        
23
        Parameters:
24
        - tokens: token stream to convert to tree
25
        - create_root: whether to create a root node for the document
26
        """
27
    
28
    # Properties
29
    token: Token | None                    # Associated token (for leaf nodes)
30
    nester_tokens: tuple[Token, Token] | None  # Opening/closing token pair (for containers)
31
    parent: SyntaxTreeNode | None          # Parent node
32
    children: list[SyntaxTreeNode]         # Child nodes
33
```
34

35
### Tree Construction
36

37
Build tree structures from token streams:
38

39
```python { .api }
40
# Class methods for tree creation
41
@classmethod
42
def from_tokens(cls, tokens: list[Token]) -> SyntaxTreeNode:
43
    """
44
    Create syntax tree from token list.
45
    
46
    Parameters:
47
    - tokens: list of tokens to convert
48
    
49
    Returns:
50
    - SyntaxTreeNode: root node of constructed tree
51
    """
52
```
53

54
**Usage Example:**
55

56
```python
57
from markdown_it import MarkdownIt
58
from markdown_it.tree import SyntaxTreeNode
59

60
md = MarkdownIt()
61
tokens = md.parse("""
62
# Heading
63

64
Paragraph with **bold** text.
65

66
- Item 1
67
- Item 2
68
""")
69

70
# Create syntax tree
71
tree = SyntaxTreeNode(tokens)
72

73
# Access tree structure
74
print(f"Root has {len(tree.children)} children")
75
for child in tree.children:
76
    print(f"Child type: {child.token.type if child.token else 'container'}")
77
```
78

79
### Tree Traversal
80

81
Navigate and inspect tree structure:
82

83
```python { .api }
84
def walk(self, filter: callable = None) -> Generator[SyntaxTreeNode, None, None]:
85
    """
86
    Walk the tree depth-first, yielding nodes.
87
    
88
    Parameters:
89
    - filter: optional function to filter nodes
90
    
91
    Yields:
92
    - SyntaxTreeNode: tree nodes in depth-first order
93
    """
94

95
@property
96
def is_root(self) -> bool:
97
    """True if this is the root node."""
98

99
@property  
100
def is_leaf(self) -> bool:
101
    """True if this node has no children."""
102

103
@property
104
def is_container(self) -> bool:
105
    """True if this node represents a token pair container."""
106
```
107

108
**Usage Example:**
109

110
```python
111
from markdown_it.tree import SyntaxTreeNode
112

113
# Tree traversal
114
for node in tree.walk():
115
    if node.token and node.token.type == "heading_open":
116
        level = int(node.token.tag[1])  # h1->1, h2->2, etc.
117
        print(f"Found heading level {level}")
118

119
# Filter specific node types
120
def is_paragraph(node):
121
    return node.token and node.token.type == "paragraph_open"
122

123
for para_node in tree.walk(filter=is_paragraph):
124
    print("Found paragraph")
125
    
126
# Check node types
127
for node in tree.children:
128
    if node.is_container:
129
        print(f"Container with {len(node.children)} children")
130
    elif node.is_leaf:
131
        print(f"Leaf node: {node.token.type}")
132
```
133

134
### Tree Manipulation
135

136
Modify tree structure and content:
137

138
```python { .api }
139
def remove_child(self, child: SyntaxTreeNode) -> None:
140
    """
141
    Remove child node from this node.
142
    
143
    Parameters:
144
    - child: child node to remove
145
    """
146

147
def add_child(self, child: SyntaxTreeNode) -> None:
148
    """
149
    Add child node to this node.
150
    
151
    Parameters:
152
    - child: child node to add
153
    """
154

155
def replace_child(self, old_child: SyntaxTreeNode, new_child: SyntaxTreeNode) -> None:
156
    """
157
    Replace existing child with new child.
158
    
159
    Parameters:
160
    - old_child: child to replace
161
    - new_child: replacement child
162
    """
163
```
164

165
**Usage Example:**
166

167
```python
168
from markdown_it.tree import SyntaxTreeNode
169
from markdown_it.token import Token
170

171
# Create new nodes
172
new_token = Token("div_open", "div", 1)
173
new_node = SyntaxTreeNode()
174
new_node.token = new_token
175

176
# Add to tree
177
tree.add_child(new_node)
178

179
# Remove nodes
180
for node in list(tree.children):  # Copy list since we're modifying
181
    if node.token and node.token.type == "hr":
182
        tree.remove_child(node)
183
```
184

185
### Tree Conversion
186

187
Convert between tree and token representations:
188

189
```python { .api }
190
def to_tokens(self) -> list[Token]:
191
    """
192
    Convert tree back to linear token stream.
193
    
194
    Returns:
195
    - list[Token]: linearized token representation
196
    """
197

198
def to_pretty(self, *, indent: int = 2, show_text: bool = False) -> str:
199
    """
200
    Generate pretty-printed tree representation.
201
    
202
    Parameters:
203
    - indent: indentation spaces per level
204
    - show_text: whether to show text content
205
    
206
    Returns:
207
    - str: formatted tree structure
208
    """
209
```
210

211
**Usage Example:**
212

213
```python
214
from markdown_it import MarkdownIt
215
from markdown_it.tree import SyntaxTreeNode
216

217
md = MarkdownIt()
218
tokens = md.parse("# Title\n\nParagraph text.")
219

220
# Token stream -> Tree -> Token stream
221
tree = SyntaxTreeNode(tokens)
222
reconstructed_tokens = tree.to_tokens()
223

224
# Verify round-trip consistency
225
original_html = md.renderer.render(tokens, md.options, {})
226
reconstructed_html = md.renderer.render(reconstructed_tokens, md.options, {})
227
assert original_html == reconstructed_html
228

229
# Pretty print tree structure
230
print(tree.to_pretty(show_text=True))
231
```
232

233
## Advanced Tree Operations
234

235
### Content Extraction
236

237
Extract specific content from tree structure:
238

239
```python
240
def extract_headings(tree):
241
    """Extract all headings with their levels and text."""
242
    headings = []
243
    
244
    for node in tree.walk():
245
        if (node.is_container and 
246
            node.nester_tokens and 
247
            node.nester_tokens[0].type == "heading_open"):
248
            
249
            level = int(node.nester_tokens[0].tag[1])
250
            
251
            # Find text content in children
252
            text = ""
253
            for child in node.children:
254
                if child.token and child.token.type == "inline":
255
                    text = child.token.content
256
                    break
257
            
258
            headings.append({
259
                'level': level,
260
                'text': text,
261
                'node': node
262
            })
263
    
264
    return headings
265

266
def extract_links(tree):
267
    """Extract all links with URLs and text."""
268
    links = []
269
    
270
    for node in tree.walk():
271
        if (node.is_container and 
272
            node.nester_tokens and
273
            node.nester_tokens[0].type == "link_open"):
274
            
275
            href = node.nester_tokens[0].attrGet("href")
276
            
277
            # Extract link text
278
            text = ""
279
            for child in node.children:
280
                if child.token and child.token.type == "text":
281
                    text = child.token.content
282
                    break
283
            
284
            links.append({
285
                'url': href,
286
                'text': text,
287
                'node': node
288
            })
289
    
290
    return links
291
```
292

293
### Tree Transformation
294

295
Transform tree structure for custom processing:
296

297
```python
298
def wrap_paragraphs_in_divs(tree):
299
    """Wrap all paragraphs in div containers."""
300
    from markdown_it.token import Token
301
    
302
    for node in list(tree.children):  # Copy since we're modifying
303
        if (node.is_container and 
304
            node.nester_tokens and
305
            node.nester_tokens[0].type == "paragraph_open"):
306
            
307
            # Create wrapper div
308
            div_open = Token("div_open", "div", 1)
309
            div_open.attrSet("class", "paragraph-wrapper")
310
            div_close = Token("div_close", "div", -1)
311
            
312
            # Create new container node  
313
            wrapper_node = SyntaxTreeNode()
314
            wrapper_node.parent = tree
315
            wrapper_node.nester_tokens = (div_open, div_close)
316
            wrapper_node.children = [node]
317
            
318
            # Update parent relationships
319
            node.parent = wrapper_node
320
            
321
            # Replace in tree
322
            tree.replace_child(node, wrapper_node)
323

324
def add_table_of_contents(tree):
325
    """Add table of contents based on headings."""
326
    headings = extract_headings(tree)
327
    
328
    if not headings:
329
        return
330
    
331
    # Create TOC tokens
332
    toc_tokens = [
333
        Token("div_open", "div", 1, attrs={"class": "table-of-contents"}),
334
        Token("heading_open", "h2", 1),
335
        Token("inline", "", 0, content="Table of Contents"),
336
        Token("heading_close", "h2", -1),
337
        Token("bullet_list_open", "ul", 1)
338
    ]
339
    
340
    for heading in headings:
341
        toc_tokens.extend([
342
            Token("list_item_open", "li", 1),
343
            Token("paragraph_open", "p", 1),
344
            Token("link_open", "a", 1, attrs={"href": f"#{heading['text'].lower().replace(' ', '-')}"}),
345
            Token("inline", "", 0, content=heading['text']),
346
            Token("link_close", "a", -1),
347
            Token("paragraph_close", "p", -1),
348
            Token("list_item_close", "li", -1)
349
        ])
350
    
351
    toc_tokens.extend([
352
        Token("bullet_list_close", "ul", -1),
353
        Token("div_close", "div", -1)
354
    ])
355
    
356
    # Create TOC tree node
357
    toc_tree = SyntaxTreeNode(toc_tokens, create_root=False)
358
    
359
    # Insert at beginning
360
    tree.children.insert(0, toc_tree)
361
    toc_tree.parent = tree
362
```
363

364
### Tree Analysis
365

366
Analyze document structure using tree representation:
367

368
```python
369
def analyze_document_structure(tree):
370
    """Analyze document structure and return statistics."""
371
    stats = {
372
        'total_nodes': 0,
373
        'headings': [],
374
        'paragraphs': 0,
375
        'lists': 0,
376
        'code_blocks': 0,
377
        'links': 0,
378
        'images': 0,
379
        'max_nesting_level': 0
380
    }
381
    
382
    def analyze_node(node, level=0):
383
        stats['total_nodes'] += 1
384
        stats['max_nesting_level'] = max(stats['max_nesting_level'], level)
385
        
386
        if node.token:
387
            token_type = node.token.type
388
            if token_type == "heading_open":
389
                stats['headings'].append(int(node.token.tag[1]))
390
            elif token_type == "paragraph_open":
391
                stats['paragraphs'] += 1
392
            elif token_type in ["bullet_list_open", "ordered_list_open"]:
393
                stats['lists'] += 1
394
            elif token_type in ["code_block", "fence"]:
395
                stats['code_blocks'] += 1
396
            elif token_type == "link_open":
397
                stats['links'] += 1
398
            elif token_type == "image":
399
                stats['images'] += 1
400
        
401
        for child in node.children:
402
            analyze_node(child, level + 1)
403
    
404
    for child in tree.children:
405
        analyze_node(child)
406
    
407
    return stats
408

409
# Usage
410
stats = analyze_document_structure(tree)
411
print(f"Document has {stats['paragraphs']} paragraphs")
412
print(f"Heading levels: {set(stats['headings'])}")
413
print(f"Maximum nesting: {stats['max_nesting_level']}")
414
```

Version

Tile

Files

syntax-tree.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

syntax-tree.mddocs/