or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

cli.mdconfiguration.mdcore-parsing.mdindex.mdlink-processing.mdrendering.mdsyntax-tree.mdtoken-system.md

syntax-tree.mddocs/

0

# Syntax Tree Processing

1

2

Tree representation utilities for converting linear token streams into hierarchical structures for advanced document analysis and manipulation. This module is unique to the Python implementation and not part of the original JavaScript markdown-it.

3

4

## Capabilities

5

6

### SyntaxTreeNode Class

7

8

Hierarchical representation of markdown document structure.

9

10

```python { .api }

11

class SyntaxTreeNode:

12

"""

13

A Markdown syntax tree node representing either:

14

- Root of the document

15

- Single unnested token

16

- Token pair (open/close) with nested content

17

"""

18

19

def __init__(self, tokens: list[Token] = (), *, create_root: bool = True):

20

"""

21

Initialize syntax tree from token stream.

22

23

Parameters:

24

- tokens: token stream to convert to tree

25

- create_root: whether to create a root node for the document

26

"""

27

28

# Properties

29

token: Token | None # Associated token (for leaf nodes)

30

nester_tokens: tuple[Token, Token] | None # Opening/closing token pair (for containers)

31

parent: SyntaxTreeNode | None # Parent node

32

children: list[SyntaxTreeNode] # Child nodes

33

```

34

35

### Tree Construction

36

37

Build tree structures from token streams:

38

39

```python { .api }

40

# Class methods for tree creation

41

@classmethod

42

def from_tokens(cls, tokens: list[Token]) -> SyntaxTreeNode:

43

"""

44

Create syntax tree from token list.

45

46

Parameters:

47

- tokens: list of tokens to convert

48

49

Returns:

50

- SyntaxTreeNode: root node of constructed tree

51

"""

52

```

53

54

**Usage Example:**

55

56

```python

57

from markdown_it import MarkdownIt

58

from markdown_it.tree import SyntaxTreeNode

59

60

md = MarkdownIt()

61

tokens = md.parse("""

62

# Heading

63

64

Paragraph with **bold** text.

65

66

- Item 1

67

- Item 2

68

""")

69

70

# Create syntax tree

71

tree = SyntaxTreeNode(tokens)

72

73

# Access tree structure

74

print(f"Root has {len(tree.children)} children")

75

for child in tree.children:

76

print(f"Child type: {child.token.type if child.token else 'container'}")

77

```

78

79

### Tree Traversal

80

81

Navigate and inspect tree structure:

82

83

```python { .api }

84

def walk(self, filter: callable = None) -> Generator[SyntaxTreeNode, None, None]:

85

"""

86

Walk the tree depth-first, yielding nodes.

87

88

Parameters:

89

- filter: optional function to filter nodes

90

91

Yields:

92

- SyntaxTreeNode: tree nodes in depth-first order

93

"""

94

95

@property

96

def is_root(self) -> bool:

97

"""True if this is the root node."""

98

99

@property

100

def is_leaf(self) -> bool:

101

"""True if this node has no children."""

102

103

@property

104

def is_container(self) -> bool:

105

"""True if this node represents a token pair container."""

106

```

107

108

**Usage Example:**

109

110

```python

111

from markdown_it.tree import SyntaxTreeNode

112

113

# Tree traversal

114

for node in tree.walk():

115

if node.token and node.token.type == "heading_open":

116

level = int(node.token.tag[1]) # h1->1, h2->2, etc.

117

print(f"Found heading level {level}")

118

119

# Filter specific node types

120

def is_paragraph(node):

121

return node.token and node.token.type == "paragraph_open"

122

123

for para_node in tree.walk(filter=is_paragraph):

124

print("Found paragraph")

125

126

# Check node types

127

for node in tree.children:

128

if node.is_container:

129

print(f"Container with {len(node.children)} children")

130

elif node.is_leaf:

131

print(f"Leaf node: {node.token.type}")

132

```

133

134

### Tree Manipulation

135

136

Modify tree structure and content:

137

138

```python { .api }

139

def remove_child(self, child: SyntaxTreeNode) -> None:

140

"""

141

Remove child node from this node.

142

143

Parameters:

144

- child: child node to remove

145

"""

146

147

def add_child(self, child: SyntaxTreeNode) -> None:

148

"""

149

Add child node to this node.

150

151

Parameters:

152

- child: child node to add

153

"""

154

155

def replace_child(self, old_child: SyntaxTreeNode, new_child: SyntaxTreeNode) -> None:

156

"""

157

Replace existing child with new child.

158

159

Parameters:

160

- old_child: child to replace

161

- new_child: replacement child

162

"""

163

```

164

165

**Usage Example:**

166

167

```python

168

from markdown_it.tree import SyntaxTreeNode

169

from markdown_it.token import Token

170

171

# Create new nodes

172

new_token = Token("div_open", "div", 1)

173

new_node = SyntaxTreeNode()

174

new_node.token = new_token

175

176

# Add to tree

177

tree.add_child(new_node)

178

179

# Remove nodes

180

for node in list(tree.children): # Copy list since we're modifying

181

if node.token and node.token.type == "hr":

182

tree.remove_child(node)

183

```

184

185

### Tree Conversion

186

187

Convert between tree and token representations:

188

189

```python { .api }

190

def to_tokens(self) -> list[Token]:

191

"""

192

Convert tree back to linear token stream.

193

194

Returns:

195

- list[Token]: linearized token representation

196

"""

197

198

def to_pretty(self, *, indent: int = 2, show_text: bool = False) -> str:

199

"""

200

Generate pretty-printed tree representation.

201

202

Parameters:

203

- indent: indentation spaces per level

204

- show_text: whether to show text content

205

206

Returns:

207

- str: formatted tree structure

208

"""

209

```

210

211

**Usage Example:**

212

213

```python

214

from markdown_it import MarkdownIt

215

from markdown_it.tree import SyntaxTreeNode

216

217

md = MarkdownIt()

218

tokens = md.parse("# Title\n\nParagraph text.")

219

220

# Token stream -> Tree -> Token stream

221

tree = SyntaxTreeNode(tokens)

222

reconstructed_tokens = tree.to_tokens()

223

224

# Verify round-trip consistency

225

original_html = md.renderer.render(tokens, md.options, {})

226

reconstructed_html = md.renderer.render(reconstructed_tokens, md.options, {})

227

assert original_html == reconstructed_html

228

229

# Pretty print tree structure

230

print(tree.to_pretty(show_text=True))

231

```

232

233

## Advanced Tree Operations

234

235

### Content Extraction

236

237

Extract specific content from tree structure:

238

239

```python

240

def extract_headings(tree):

241

"""Extract all headings with their levels and text."""

242

headings = []

243

244

for node in tree.walk():

245

if (node.is_container and

246

node.nester_tokens and

247

node.nester_tokens[0].type == "heading_open"):

248

249

level = int(node.nester_tokens[0].tag[1])

250

251

# Find text content in children

252

text = ""

253

for child in node.children:

254

if child.token and child.token.type == "inline":

255

text = child.token.content

256

break

257

258

headings.append({

259

'level': level,

260

'text': text,

261

'node': node

262

})

263

264

return headings

265

266

def extract_links(tree):

267

"""Extract all links with URLs and text."""

268

links = []

269

270

for node in tree.walk():

271

if (node.is_container and

272

node.nester_tokens and

273

node.nester_tokens[0].type == "link_open"):

274

275

href = node.nester_tokens[0].attrGet("href")

276

277

# Extract link text

278

text = ""

279

for child in node.children:

280

if child.token and child.token.type == "text":

281

text = child.token.content

282

break

283

284

links.append({

285

'url': href,

286

'text': text,

287

'node': node

288

})

289

290

return links

291

```

292

293

### Tree Transformation

294

295

Transform tree structure for custom processing:

296

297

```python

298

def wrap_paragraphs_in_divs(tree):

299

"""Wrap all paragraphs in div containers."""

300

from markdown_it.token import Token

301

302

for node in list(tree.children): # Copy since we're modifying

303

if (node.is_container and

304

node.nester_tokens and

305

node.nester_tokens[0].type == "paragraph_open"):

306

307

# Create wrapper div

308

div_open = Token("div_open", "div", 1)

309

div_open.attrSet("class", "paragraph-wrapper")

310

div_close = Token("div_close", "div", -1)

311

312

# Create new container node

313

wrapper_node = SyntaxTreeNode()

314

wrapper_node.parent = tree

315

wrapper_node.nester_tokens = (div_open, div_close)

316

wrapper_node.children = [node]

317

318

# Update parent relationships

319

node.parent = wrapper_node

320

321

# Replace in tree

322

tree.replace_child(node, wrapper_node)

323

324

def add_table_of_contents(tree):

325

"""Add table of contents based on headings."""

326

headings = extract_headings(tree)

327

328

if not headings:

329

return

330

331

# Create TOC tokens

332

toc_tokens = [

333

Token("div_open", "div", 1, attrs={"class": "table-of-contents"}),

334

Token("heading_open", "h2", 1),

335

Token("inline", "", 0, content="Table of Contents"),

336

Token("heading_close", "h2", -1),

337

Token("bullet_list_open", "ul", 1)

338

]

339

340

for heading in headings:

341

toc_tokens.extend([

342

Token("list_item_open", "li", 1),

343

Token("paragraph_open", "p", 1),

344

Token("link_open", "a", 1, attrs={"href": f"#{heading['text'].lower().replace(' ', '-')}"}),

345

Token("inline", "", 0, content=heading['text']),

346

Token("link_close", "a", -1),

347

Token("paragraph_close", "p", -1),

348

Token("list_item_close", "li", -1)

349

])

350

351

toc_tokens.extend([

352

Token("bullet_list_close", "ul", -1),

353

Token("div_close", "div", -1)

354

])

355

356

# Create TOC tree node

357

toc_tree = SyntaxTreeNode(toc_tokens, create_root=False)

358

359

# Insert at beginning

360

tree.children.insert(0, toc_tree)

361

toc_tree.parent = tree

362

```

363

364

### Tree Analysis

365

366

Analyze document structure using tree representation:

367

368

```python

369

def analyze_document_structure(tree):

370

"""Analyze document structure and return statistics."""

371

stats = {

372

'total_nodes': 0,

373

'headings': [],

374

'paragraphs': 0,

375

'lists': 0,

376

'code_blocks': 0,

377

'links': 0,

378

'images': 0,

379

'max_nesting_level': 0

380

}

381

382

def analyze_node(node, level=0):

383

stats['total_nodes'] += 1

384

stats['max_nesting_level'] = max(stats['max_nesting_level'], level)

385

386

if node.token:

387

token_type = node.token.type

388

if token_type == "heading_open":

389

stats['headings'].append(int(node.token.tag[1]))

390

elif token_type == "paragraph_open":

391

stats['paragraphs'] += 1

392

elif token_type in ["bullet_list_open", "ordered_list_open"]:

393

stats['lists'] += 1

394

elif token_type in ["code_block", "fence"]:

395

stats['code_blocks'] += 1

396

elif token_type == "link_open":

397

stats['links'] += 1

398

elif token_type == "image":

399

stats['images'] += 1

400

401

for child in node.children:

402

analyze_node(child, level + 1)

403

404

for child in tree.children:

405

analyze_node(child)

406

407

return stats

408

409

# Usage

410

stats = analyze_document_structure(tree)

411

print(f"Document has {stats['paragraphs']} paragraphs")

412

print(f"Heading levels: {set(stats['headings'])}")

413

print(f"Maximum nesting: {stats['max_nesting_level']}")

414

```