or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

core-parsing.mddirectives.mdindex.mdparsing.mdplugins.mdrenderers.mdutilities.md

utilities.mddocs/

0

# Utilities and Helpers

1

2

Utility functions and helper modules that provide text processing, URL handling, HTML escaping, table of contents generation, and other common Markdown processing tasks. These utilities support both internal mistune operations and external use cases.

3

4

## Capabilities

5

6

### Text Processing Utilities

7

8

Core text processing functions for HTML escaping, URL handling, and text manipulation.

9

10

```python { .api }

11

def escape(s: str, quote: bool = True) -> str:

12

"""

13

Escape HTML characters in text for safe HTML output.

14

15

Parameters:

16

- s: String to escape

17

- quote: Whether to escape quote characters

18

19

Returns:

20

HTML-escaped string

21

"""

22

23

def escape_url(link: str) -> str:

24

"""

25

Escape URL for safe use in HTML attributes.

26

27

Parameters:

28

- link: URL to escape

29

30

Returns:

31

URL-encoded string safe for HTML attributes

32

"""

33

34

def safe_entity(s: str) -> str:

35

"""

36

Convert HTML entities to safe Unicode characters.

37

38

Parameters:

39

- s: String containing HTML entities

40

41

Returns:

42

String with entities converted to Unicode

43

"""

44

45

def unikey(s: str) -> str:

46

"""

47

Generate a Unicode-safe key from string for internal use.

48

49

Parameters:

50

- s: String to convert

51

52

Returns:

53

Unicode-safe identifier string

54

"""

55

```

56

57

Usage examples:

58

59

```python

60

from mistune import escape, escape_url, safe_entity, unikey

61

62

# HTML escaping

63

text = '<script>alert("xss")</script>'

64

safe_text = escape(text)

65

# Output: '&lt;script&gt;alert(&quot;xss&quot;)&lt;/script&gt;'

66

67

# URL escaping

68

url = 'https://example.com/path with spaces?q=test&r=2'

69

safe_url = escape_url(url)

70

# Output: 'https://example.com/path%20with%20spaces?q=test&amp;r=2'

71

72

# Entity conversion

73

entity_text = '&amp; &lt; &gt; &quot;'

74

unicode_text = safe_entity(entity_text)

75

# Output: '& < > "'

76

77

# Unicode key generation

78

key = unikey('Hello 世界')

79

# Output: Safe identifier string

80

```

81

82

### Additional Text Utilities

83

84

Extended text processing functions for advanced text manipulation.

85

86

```python { .api }

87

def expand_leading_tab(text: str, width: int = 4) -> str:

88

"""

89

Expand leading tabs to spaces.

90

91

Parameters:

92

- text: Text with potential leading tabs

93

- width: Number of spaces per tab

94

95

Returns:

96

Text with leading tabs expanded to spaces

97

"""

98

99

def expand_tab(text: str, space: str = " ") -> str:

100

"""

101

Expand all tabs to specified space string.

102

103

Parameters:

104

- text: Text containing tabs

105

- space: String to replace tabs with

106

107

Returns:

108

Text with tabs expanded

109

"""

110

111

def unescape(s: str) -> str:

112

"""

113

Unescape HTML entities back to original characters.

114

115

Parameters:

116

- s: HTML-escaped string

117

118

Returns:

119

Unescaped string with original characters

120

"""

121

122

def striptags(s: str) -> str:

123

"""

124

Remove HTML tags from string, keeping only text content.

125

126

Parameters:

127

- s: String containing HTML tags

128

129

Returns:

130

Plain text with HTML tags removed

131

"""

132

133

def strip_end(src: str) -> str:

134

"""

135

Strip trailing whitespace and newlines from text.

136

137

Parameters:

138

- src: Source text

139

140

Returns:

141

Text with trailing whitespace removed

142

"""

143

```

144

145

Usage examples:

146

147

```python

148

from mistune.util import expand_leading_tab, expand_tab, unescape, striptags, strip_end

149

150

# Tab expansion

151

code = "\tif True:\n\t\tprint('hello')"

152

expanded = expand_leading_tab(code, 2)

153

# Output: " if True:\n print('hello')"

154

155

# HTML tag stripping

156

html = '<p>Hello <strong>world</strong>!</p>'

157

text = striptags(html)

158

# Output: 'Hello world!'

159

160

# Unescaping

161

escaped = '&lt;div&gt;content&lt;/div&gt;'

162

original = unescape(escaped)

163

# Output: '<div>content</div>'

164

```

165

166

## Table of Contents Utilities

167

168

Utilities for generating and managing table of contents from document structure.

169

170

### TOC Hook System

171

172

Functions for adding table of contents generation to Markdown parsers.

173

174

```python { .api }

175

def add_toc_hook(

176

md: Markdown,

177

min_level: int = 1,

178

max_level: int = 3,

179

heading_id: Optional[Callable[[Dict[str, Any], int], str]] = None

180

) -> None:

181

"""

182

Add a hook to save TOC items into state.env for later use.

183

184

Parameters:

185

- md: Markdown instance to add hook to

186

- min_level: Minimum heading level to include

187

- max_level: Maximum heading level to include

188

- heading_id: Function to generate heading IDs

189

"""

190

191

def render_toc_ul(toc: Iterable[Tuple[int, str, str]]) -> str:

192

"""

193

Render TOC items as HTML unordered list.

194

195

Parameters:

196

- toc: Iterable of (level, id, text) tuples

197

198

Returns:

199

HTML unordered list representing the TOC

200

"""

201

```

202

203

### TOC Usage Examples

204

205

```python

206

import mistune

207

from mistune.toc import add_toc_hook, render_toc_ul

208

209

# Create parser with TOC hook

210

md = mistune.create_markdown()

211

add_toc_hook(md, min_level=1, max_level=3)

212

213

markdown_text = """

214

# Chapter 1: Introduction

215

216

## 1.1 Overview

217

Some content here.

218

219

## 1.2 Getting Started

220

More content.

221

222

### 1.2.1 Installation

223

Installation instructions.

224

225

# Chapter 2: Advanced Topics

226

227

## 2.1 Configuration

228

Configuration details.

229

"""

230

231

# Parse and extract TOC

232

html, state = md.parse(markdown_text)

233

toc_items = state.env.get('toc_items', [])

234

235

# Generate TOC HTML

236

toc_html = render_toc_ul(toc_items)

237

print("Table of Contents:")

238

print(toc_html)

239

240

print("\nMain Content:")

241

print(html)

242

```

243

244

### Custom Heading ID Generation

245

246

```python

247

from mistune.toc import add_toc_hook

248

import re

249

250

def custom_heading_id(token, index):

251

"""Generate custom heading IDs based on heading text."""

252

heading_text = ''.join(

253

child.get('raw', '') for child in token.get('children', [])

254

if child.get('type') == 'text'

255

)

256

257

# Convert to slug format

258

slug = re.sub(r'[^\w\s-]', '', heading_text.lower())

259

slug = re.sub(r'[-\s]+', '-', slug).strip('-')

260

return f"section-{slug}"

261

262

md = mistune.create_markdown()

263

add_toc_hook(md, heading_id=custom_heading_id)

264

265

result, state = md.parse('''

266

# Getting Started Guide

267

## Installation Steps

268

## Configuration Options

269

''')

270

271

toc_items = state.env['toc_items']

272

for item in toc_items:

273

print(f"Level {item['level']}: {item['title']} (ID: {item['id']})")

274

# Output:

275

# Level 1: Getting Started Guide (ID: section-getting-started-guide)

276

# Level 2: Installation Steps (ID: section-installation-steps)

277

# Level 2: Configuration Options (ID: section-configuration-options)

278

```

279

280

## Helper Functions

281

282

283

## Integration Examples

284

285

### Custom Text Processing Pipeline

286

287

```python

288

from mistune import create_markdown, escape, striptags

289

from mistune.toc import add_toc_hook, render_toc_ul

290

291

def process_markdown_with_toc(text):

292

"""Process Markdown with TOC generation and text utilities."""

293

294

# Create parser with TOC support

295

md = create_markdown()

296

add_toc_hook(md, min_level=2, max_level=4)

297

298

# Parse content

299

html, state = md.parse(text)

300

301

# Generate TOC

302

toc_items = state.env.get('toc_items', [])

303

toc_html = render_toc_ul(toc_items) if toc_items else ''

304

305

# Extract plain text summary (first paragraph)

306

summary = striptags(html.split('</p>')[0] + '</p>') if '<p>' in html else ''

307

308

return {

309

'html': html,

310

'toc': toc_html,

311

'summary': summary,

312

'toc_items': toc_items

313

}

314

315

# Usage

316

result = process_markdown_with_toc("""

317

This is the introduction paragraph.

318

319

## Section 1

320

Content for section 1.

321

322

### Subsection 1.1

323

Detailed content.

324

325

## Section 2

326

Content for section 2.

327

""")

328

329

print("Summary:", result['summary'])

330

print("TOC:", result['toc'])

331

print("Full HTML:", result['html'])

332

```

333

334

These utilities provide the building blocks for advanced Markdown processing workflows, content analysis, and integration with documentation systems while maintaining the performance and modularity that characterizes the mistune architecture.