CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-mistune

A sane and fast Markdown parser with useful plugins and renderers

Pending
Overview
Eval results
Files

utilities.mddocs/

Utilities and Helpers

Utility functions and helper modules that provide text processing, URL handling, HTML escaping, table of contents generation, and other common Markdown processing tasks. These utilities support both internal mistune operations and external use cases.

Capabilities

Text Processing Utilities

Core text processing functions for HTML escaping, URL handling, and text manipulation.

def escape(s: str, quote: bool = True) -> str:
    """
    Escape HTML characters in text for safe HTML output.
    
    Parameters:
    - s: String to escape
    - quote: Whether to escape quote characters
    
    Returns:
    HTML-escaped string
    """

def escape_url(link: str) -> str:
    """
    Escape URL for safe use in HTML attributes.
    
    Parameters:
    - link: URL to escape
    
    Returns:
    URL-encoded string safe for HTML attributes
    """

def safe_entity(s: str) -> str:
    """
    Convert HTML entities to safe Unicode characters.
    
    Parameters:
    - s: String containing HTML entities
    
    Returns:
    String with entities converted to Unicode
    """

def unikey(s: str) -> str:
    """
    Generate a Unicode-safe key from string for internal use.
    
    Parameters:
    - s: String to convert
    
    Returns:
    Unicode-safe identifier string
    """

Usage examples:

from mistune import escape, escape_url, safe_entity, unikey

# HTML escaping
text = '<script>alert("xss")</script>'
safe_text = escape(text)
# Output: '&lt;script&gt;alert(&quot;xss&quot;)&lt;/script&gt;'

# URL escaping
url = 'https://example.com/path with spaces?q=test&r=2'
safe_url = escape_url(url)
# Output: 'https://example.com/path%20with%20spaces?q=test&amp;r=2'

# Entity conversion
entity_text = '&amp; &lt; &gt; &quot;'
unicode_text = safe_entity(entity_text) 
# Output: '& < > "'

# Unicode key generation
key = unikey('Hello 世界')
# Output: Safe identifier string

Additional Text Utilities

Extended text processing functions for advanced text manipulation.

def expand_leading_tab(text: str, width: int = 4) -> str:
    """
    Expand leading tabs to spaces.
    
    Parameters:
    - text: Text with potential leading tabs
    - width: Number of spaces per tab
    
    Returns:
    Text with leading tabs expanded to spaces
    """

def expand_tab(text: str, space: str = "    ") -> str:
    """
    Expand all tabs to specified space string.
    
    Parameters:
    - text: Text containing tabs
    - space: String to replace tabs with
    
    Returns:
    Text with tabs expanded
    """

def unescape(s: str) -> str:
    """
    Unescape HTML entities back to original characters.
    
    Parameters:
    - s: HTML-escaped string
    
    Returns:
    Unescaped string with original characters
    """

def striptags(s: str) -> str:
    """
    Remove HTML tags from string, keeping only text content.
    
    Parameters:
    - s: String containing HTML tags
    
    Returns:
    Plain text with HTML tags removed
    """

def strip_end(src: str) -> str:
    """
    Strip trailing whitespace and newlines from text.
    
    Parameters:
    - src: Source text
    
    Returns:
    Text with trailing whitespace removed
    """

Usage examples:

from mistune.util import expand_leading_tab, expand_tab, unescape, striptags, strip_end

# Tab expansion
code = "\tif True:\n\t\tprint('hello')"
expanded = expand_leading_tab(code, 2)
# Output: "  if True:\n    print('hello')"

# HTML tag stripping
html = '<p>Hello <strong>world</strong>!</p>'
text = striptags(html)
# Output: 'Hello world!'

# Unescaping  
escaped = '&lt;div&gt;content&lt;/div&gt;'
original = unescape(escaped)
# Output: '<div>content</div>'

Table of Contents Utilities

Utilities for generating and managing table of contents from document structure.

TOC Hook System

Functions for adding table of contents generation to Markdown parsers.

def add_toc_hook(
    md: Markdown,
    min_level: int = 1, 
    max_level: int = 3,
    heading_id: Optional[Callable[[Dict[str, Any], int], str]] = None
) -> None:
    """
    Add a hook to save TOC items into state.env for later use.
    
    Parameters:
    - md: Markdown instance to add hook to
    - min_level: Minimum heading level to include
    - max_level: Maximum heading level to include  
    - heading_id: Function to generate heading IDs
    """

def render_toc_ul(toc: Iterable[Tuple[int, str, str]]) -> str:
    """
    Render TOC items as HTML unordered list.
    
    Parameters:
    - toc: Iterable of (level, id, text) tuples
    
    Returns:
    HTML unordered list representing the TOC
    """

TOC Usage Examples

import mistune
from mistune.toc import add_toc_hook, render_toc_ul

# Create parser with TOC hook
md = mistune.create_markdown()
add_toc_hook(md, min_level=1, max_level=3)

markdown_text = """
# Chapter 1: Introduction

## 1.1 Overview
Some content here.

## 1.2 Getting Started  
More content.

### 1.2.1 Installation
Installation instructions.

# Chapter 2: Advanced Topics

## 2.1 Configuration
Configuration details.
"""

# Parse and extract TOC
html, state = md.parse(markdown_text)
toc_items = state.env.get('toc_items', [])

# Generate TOC HTML
toc_html = render_toc_ul(toc_items)
print("Table of Contents:")
print(toc_html)

print("\nMain Content:")  
print(html)

Custom Heading ID Generation

from mistune.toc import add_toc_hook
import re

def custom_heading_id(token, index):
    """Generate custom heading IDs based on heading text."""
    heading_text = ''.join(
        child.get('raw', '') for child in token.get('children', [])
        if child.get('type') == 'text'
    )
    
    # Convert to slug format
    slug = re.sub(r'[^\w\s-]', '', heading_text.lower())
    slug = re.sub(r'[-\s]+', '-', slug).strip('-')
    return f"section-{slug}"

md = mistune.create_markdown()
add_toc_hook(md, heading_id=custom_heading_id)

result, state = md.parse('''
# Getting Started Guide
## Installation Steps  
## Configuration Options
''')

toc_items = state.env['toc_items']
for item in toc_items:
    print(f"Level {item['level']}: {item['title']} (ID: {item['id']})")
# Output:
# Level 1: Getting Started Guide (ID: section-getting-started-guide)
# Level 2: Installation Steps (ID: section-installation-steps)
# Level 2: Configuration Options (ID: section-configuration-options)

Helper Functions

Integration Examples

Custom Text Processing Pipeline

from mistune import create_markdown, escape, striptags
from mistune.toc import add_toc_hook, render_toc_ul

def process_markdown_with_toc(text):
    """Process Markdown with TOC generation and text utilities."""
    
    # Create parser with TOC support
    md = create_markdown()
    add_toc_hook(md, min_level=2, max_level=4)
    
    # Parse content
    html, state = md.parse(text)
    
    # Generate TOC
    toc_items = state.env.get('toc_items', [])
    toc_html = render_toc_ul(toc_items) if toc_items else ''
    
    # Extract plain text summary (first paragraph)
    summary = striptags(html.split('</p>')[0] + '</p>') if '<p>' in html else ''
    
    return {
        'html': html,
        'toc': toc_html,
        'summary': summary,
        'toc_items': toc_items
    }

# Usage
result = process_markdown_with_toc("""
This is the introduction paragraph.

## Section 1
Content for section 1.

### Subsection 1.1  
Detailed content.

## Section 2
Content for section 2.
""")

print("Summary:", result['summary'])
print("TOC:", result['toc'])
print("Full HTML:", result['html'])

These utilities provide the building blocks for advanced Markdown processing workflows, content analysis, and integration with documentation systems while maintaining the performance and modularity that characterizes the mistune architecture.

Install with Tessl CLI

npx tessl i tessl/pypi-mistune

docs

core-parsing.md

directives.md

index.md

parsing.md

plugins.md

renderers.md

utilities.md

tile.json