A pure Python markup converter supporting creole2html, html2creole, html2ReSt, and html2textile conversions
—
Low-level classes for advanced parsing and emission control, enabling custom conversion workflows and specialized markup processing. These classes provide the foundation for all conversion functions and allow fine-grained control over the parsing and output generation process.
Parse Creole markup into document tree structure for processing.
class CreoleParser:
def __init__(self, markup_string: str, block_rules: tuple = None,
blog_line_breaks: bool = True, debug: bool = False): ...
def parse(self) -> DocNode: ...Parameters:
markup_string: Creole markup text to parseblock_rules: Custom block-level parsing rulesblog_line_breaks: Use blog-style (True) vs wiki-style (False) line breaksdebug: Enable debug outputUsage Examples:
from creole.parser.creol2html_parser import CreoleParser
# Basic parsing
parser = CreoleParser("This is **bold** text")
document = parser.parse()
# Custom block rules
from creole.parser.creol2html_rules import BlockRules
custom_rules = BlockRules()
parser = CreoleParser(markup, block_rules=custom_rules)
document = parser.parse()
# Debug mode
parser = CreoleParser(markup, debug=True)
document = parser.parse()
if debug:
document.debug() # Print document tree structureParse HTML markup into document tree structure for conversion to other formats.
class HtmlParser:
def __init__(self, debug: bool = False): ...
def feed(self, html_string: str) -> DocNode: ...
def debug(self): ...Parameters:
debug: Enable debug output and tree visualizationUsage Examples:
from creole.parser.html_parser import HtmlParser
# Basic HTML parsing
parser = HtmlParser()
document = parser.feed('<p>Hello <strong>world</strong></p>')
# Debug mode
parser = HtmlParser(debug=True)
document = parser.feed(html_content)
parser.debug() # Print parsing debug informationConvert document tree to HTML output with macro support and formatting options.
class HtmlEmitter:
def __init__(self, document: DocNode, macros: dict = None,
verbose: int = None, stderr = None, strict: bool = False): ...
def emit(self) -> str: ...Parameters:
document: Document tree to convertmacros: Dictionary of macro functionsverbose: Verbosity level for outputstderr: Error output streamstrict: Enable strict Creole 1.0 complianceUsage Examples:
from creole.emitter.creol2html_emitter import HtmlEmitter
from creole.parser.creol2html_parser import CreoleParser
# Parse and emit HTML
parser = CreoleParser("**bold** text")
document = parser.parse()
emitter = HtmlEmitter(document)
html = emitter.emit()
# With macros
def code_macro(ext, text):
return f'<pre><code class="{ext}">{text}</code></pre>'
macros = {'code': code_macro}
emitter = HtmlEmitter(document, macros=macros)
html = emitter.emit()
# Strict mode
emitter = HtmlEmitter(document, strict=True)
html = emitter.emit()Convert document tree to Creole markup output with unknown tag handling.
class CreoleEmitter:
def __init__(self, document: DocNode, debug: bool = False,
unknown_emit = None, strict: bool = False): ...
def emit(self) -> str: ...Parameters:
document: Document tree to convertdebug: Enable debug outputunknown_emit: Handler function for unknown HTML tagsstrict: Enable strict Creole output modeUsage Examples:
from creole.emitter.html2creole_emitter import CreoleEmitter
from creole.parser.html_parser import HtmlParser
from creole.shared.unknown_tags import transparent_unknown_nodes
# Parse HTML and emit Creole
parser = HtmlParser()
document = parser.feed('<p><strong>bold</strong> text</p>')
emitter = CreoleEmitter(document)
creole = emitter.emit()
# Handle unknown tags
emitter = CreoleEmitter(document, unknown_emit=transparent_unknown_nodes)
creole = emitter.emit()
# Debug mode
emitter = CreoleEmitter(document, debug=True)
creole = emitter.emit()Convert document tree to ReStructuredText markup with reference link handling.
class ReStructuredTextEmitter:
def __init__(self, document: DocNode, debug: bool = False,
unknown_emit = None): ...
def emit(self) -> str: ...Parameters:
document: Document tree to convertdebug: Enable debug outputunknown_emit: Handler function for unknown HTML tagsUsage Examples:
from creole.emitter.html2rest_emitter import ReStructuredTextEmitter
from creole.parser.html_parser import HtmlParser
# Parse HTML and emit ReStructuredText
parser = HtmlParser()
document = parser.feed('<h1>Title</h1><p>Content with <a href="http://example.com">link</a></p>')
emitter = ReStructuredTextEmitter(document)
rest = emitter.emit()
# Returns ReStructuredText with proper heading underlines and reference linksConvert document tree to Textile markup format.
class TextileEmitter:
def __init__(self, document: DocNode, debug: bool = False,
unknown_emit = None): ...
def emit(self) -> str: ...Parameters:
document: Document tree to convertdebug: Enable debug outputunknown_emit: Handler function for unknown HTML tagsUsage Examples:
from creole.emitter.html2textile_emitter import TextileEmitter
from creole.parser.html_parser import HtmlParser
# Parse HTML and emit Textile
parser = HtmlParser()
document = parser.feed('<p><strong>bold</strong> and <em>italic</em></p>')
emitter = TextileEmitter(document)
textile = emitter.emit()
# Returns: '*bold* and __italic__'The document tree node that represents markup elements and hierarchy.
class DocNode:
def __init__(self, kind: str = None, parent = None): ...
def debug(self): ...
def append(self, child): ...
def get_text(self) -> str: ...Properties:
kind: Node type (e.g., 'document', 'paragraph', 'strong', 'link')parent: Parent node referencechildren: List of child nodescontent: Text content for leaf nodesattrs: Dictionary of node attributesUsage Examples:
from creole.shared.document_tree import DocNode
# Create document structure
doc = DocNode('document')
para = DocNode('paragraph', parent=doc)
doc.append(para)
bold = DocNode('strong', parent=para)
bold.content = 'bold text'
para.append(bold)
# Debug tree structure
doc.debug()from creole.parser.creol2html_parser import CreoleParser
from creole.emitter.html2rest_emitter import ReStructuredTextEmitter
# Parse Creole and emit ReStructuredText directly
parser = CreoleParser("= Heading =\n\nThis is **bold** text")
document = parser.parse()
emitter = ReStructuredTextEmitter(document)
rest_output = emitter.emit()# Parse, modify, and emit
parser = CreoleParser("Original text")
document = parser.parse()
# Modify document tree
for node in document.children:
if node.kind == 'strong':
node.kind = 'emphasis' # Change bold to italic
emitter = HtmlEmitter(document)
modified_html = emitter.emit()Utility class for converting HTML entities to Unicode characters.
class Deentity:
def __init__(self): ...
def replace_all(self, content: str) -> str: ...
def replace_number(self, text: str) -> str: ...
def replace_hex(self, text: str) -> str: ...
def replace_named(self, text: str) -> str: ...Usage Examples:
from creole.html_tools.deentity import Deentity
# Create decoder instance
decoder = Deentity()
# Convert all types of HTML entities
html_text = "<p>Hello & welcome — — "
clean_text = decoder.replace_all(html_text)
# Returns: '<p>Hello & welcome — — \xa0'
# Convert specific entity types
decoder.replace_number("62") # Returns: '>'
decoder.replace_hex("3E") # Returns: '>'
decoder.replace_named("amp") # Returns: '&'Remove unnecessary whitespace from HTML while preserving structure.
def strip_html(html_code: str) -> str: ...Usage Examples:
from creole.html_tools.strip_html import strip_html
# Clean up HTML whitespace
messy_html = ' <p> one \n two </p>'
clean_html = strip_html(messy_html)
# Returns: '<p>one two</p>'
# Preserves important spacing around inline elements
html = 'one <i>two \n <strong> \n three \n </strong></i>'
clean = strip_html(html)
# Returns: 'one <i>two <strong>three</strong> </i>'Functions for handling unknown HTML tags during conversion.
def raise_unknown_node(emitter, node): ...
def use_html_macro(emitter, node): ...
def preformat_unknown_nodes(emitter, node): ...
def escape_unknown_nodes(emitter, node): ...
def transparent_unknown_nodes(emitter, node): ...Usage Examples:
from creole.shared.unknown_tags import (
transparent_unknown_nodes, escape_unknown_nodes,
raise_unknown_node, use_html_macro
)
from creole import html2creole
# Different ways to handle unknown tags
html = '<p>Text with <unknown>content</unknown></p>'
# Remove tags, keep content (default)
creole = html2creole(html, unknown_emit=transparent_unknown_nodes)
# Returns: 'Text with content'
# Escape unknown tags as text
creole = html2creole(html, unknown_emit=escape_unknown_nodes)
# Returns: 'Text with <unknown>content</unknown>'
# Raise error on unknown tags
try:
creole = html2creole(html, unknown_emit=raise_unknown_node)
except NotImplementedError:
print("Unknown tag encountered")
# Wrap in HTML macro
creole = html2creole(html, unknown_emit=use_html_macro)
# Returns: 'Text with <<html>><unknown>content</unknown><</html>>'All parser and emitter classes support debug mode for troubleshooting:
# Enable debugging
parser = CreoleParser(markup, debug=True)
document = parser.parse()
document.debug() # Print tree structure
emitter = HtmlEmitter(document, verbose=2)
html = emitter.emit() # Verbose output during emissionInstall with Tessl CLI
npx tessl i tessl/pypi-python-creole