CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-mammoth

Convert Word documents from docx to simple and clean HTML and Markdown

Pending
Overview
Eval results
Files

writers.mddocs/

Writers and Output Generation

Writer system for generating HTML and Markdown output from internal document representations. The writer system provides a flexible interface for creating different output formats and custom rendering logic.

Capabilities

Writer Factory

Create writer instances for different output formats.

def writer(output_format=None):
    """
    Create writer instance for specified output format.
    
    Parameters:
    - output_format: str, "html" or "markdown" (default: "html")
    
    Returns:
    HtmlWriter or MarkdownWriter instance
    """

def formats():
    """
    Get available output format keys.
    
    Returns:
    List of supported format strings
    """

Usage example:

from mammoth.writers import writer, formats

# Get available formats
print(formats())  # ['html', 'markdown']

# Create HTML writer
html_writer = writer("html")

# Create Markdown writer
md_writer = writer("markdown")

HTML Writer

Writer class for generating HTML output with full control over element creation and attributes.

class HtmlWriter:
    """HTML writer for generating HTML output."""
    
    def text(self, text):
        """
        Write text content with proper escaping.
        
        Parameters:
        - text: str, text content to write
        """
    
    def start(self, name, attributes=None):
        """
        Write opening HTML tag.
        
        Parameters:
        - name: str, HTML element name
        - attributes: dict, HTML attributes (optional)
        """
    
    def end(self, name):
        """
        Write closing HTML tag.
        
        Parameters:
        - name: str, HTML element name
        """
    
    def self_closing(self, name, attributes=None):
        """
        Write self-closing HTML tag.
        
        Parameters:
        - name: str, HTML element name
        - attributes: dict, HTML attributes (optional)
        """
    
    def append(self, html):
        """
        Append raw HTML content.
        
        Parameters:
        - html: str, HTML content to append
        """
    
    def as_string(self):
        """
        Get final HTML output.
        
        Returns:
        str, complete HTML content
        """

Usage example:

from mammoth.writers import writer

html_writer = writer("html")

# Create HTML structure
html_writer.start("div", {"class": "container"})
html_writer.start("h1")
html_writer.text("Hello World")
html_writer.end("h1")
html_writer.start("p")
html_writer.text("This is a paragraph.")
html_writer.end("p")
html_writer.self_closing("img", {"src": "image.jpg", "alt": "Image"})
html_writer.end("div")

# Get final HTML
output = html_writer.as_string()
print(output)  # <div class="container"><h1>Hello World</h1><p>This is a paragraph.</p><img src="image.jpg" alt="Image" /></div>

Markdown Writer

Writer class for generating Markdown output with the same interface as HtmlWriter.

class MarkdownWriter:
    """Markdown writer for generating Markdown output."""
    
    def text(self, text):
        """Write text content with Markdown escaping."""
    
    def start(self, name, attributes=None):
        """Convert HTML opening tag to Markdown equivalent."""
    
    def end(self, name):
        """Convert HTML closing tag to Markdown equivalent."""
    
    def self_closing(self, name, attributes=None):
        """Convert self-closing HTML tag to Markdown equivalent."""
    
    def append(self, html):
        """Append content, converting HTML to Markdown."""
    
    def as_string(self):
        """Get final Markdown output."""

Usage example:

from mammoth.writers import writer

md_writer = writer("markdown")

# Create Markdown structure
md_writer.start("h1")
md_writer.text("Hello World")
md_writer.end("h1")
md_writer.start("p")
md_writer.text("This is a paragraph.")
md_writer.end("p")

# Get final Markdown
output = md_writer.as_string()
print(output)  # # Hello World\n\nThis is a paragraph.

Writer Interface

Abstract base class defining the writer interface.

class Writer:
    """Abstract base class for all writers."""
    
    def text(self, text):
        """Write text content."""
        raise NotImplementedError()
    
    def start(self, name, attributes=None):
        """Write opening tag."""
        raise NotImplementedError()
    
    def end(self, name):
        """Write closing tag."""
        raise NotImplementedError()
    
    def self_closing(self, name, attributes=None):
        """Write self-closing tag."""
        raise NotImplementedError()
    
    def append(self, html):
        """Append raw content."""
        raise NotImplementedError()
    
    def as_string(self):
        """Get final output."""
        raise NotImplementedError()

HTML Generation Functions

Low-level HTML generation utilities for creating HTML node trees.

def text(value):
    """
    Create text node.
    
    Parameters:
    - value: str, text content
    
    Returns:
    TextNode instance
    """

def element(tag_names, attributes=None, children=None, 
           collapsible=None, separator=None):
    """
    Create HTML element nodes.
    
    Parameters:
    - tag_names: str or list, HTML element name(s)
    - attributes: dict, HTML attributes
    - children: list, child nodes
    - collapsible: bool, whether element can be collapsed
    - separator: str, separator for multiple elements
    
    Returns:
    Element instance
    """

def collapsible_element(tag_names, attributes=None, children=None):
    """
    Create collapsible HTML elements.
    
    Parameters:
    - tag_names: str or list, HTML element name(s)
    - attributes: dict, HTML attributes
    - children: list, child nodes
    
    Returns:
    Element instance that can be collapsed if empty
    """

def strip_empty(nodes):
    """
    Remove empty elements from node tree.
    
    Parameters:
    - nodes: list, HTML nodes
    
    Returns:
    List of nodes with empty elements removed
    """

def collapse(nodes):
    """
    Collapse adjacent matching elements.
    
    Parameters:
    - nodes: list, HTML nodes
    
    Returns:
    List of nodes with adjacent elements collapsed
    """

def write(writer, nodes):
    """
    Write HTML nodes using a writer.
    
    Parameters:
    - writer: Writer instance
    - nodes: list, HTML nodes to write
    """

Force Write Constant

Special constant for forcing elements to be written even if empty.

force_write = ForceWrite()  # Forces element to be written even if empty

Usage example:

from mammoth.html import element, text, force_write, write
from mammoth.writers import writer

# Create HTML node tree
nodes = [
    element("div", {"class": "container"}, [
        element("h1", None, [text("Title")]),
        element("p", None, [text("Content")]),
        element("div", {"class": "empty"}, [], force_write)  # Force empty div
    ])
]

# Write to HTML
html_writer = writer("html")
write(html_writer, nodes)
output = html_writer.as_string()

Custom Writer Examples

Custom XML Writer

from mammoth.writers.abc import Writer

class XmlWriter(Writer):
    def __init__(self):
        self._content = []
    
    def text(self, text):
        # Escape XML special characters
        escaped = text.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
        self._content.append(escaped)
    
    def start(self, name, attributes=None):
        tag = f"<{name}"
        if attributes:
            for key, value in attributes.items():
                tag += f' {key}="{value}"'
        tag += ">"
        self._content.append(tag)
    
    def end(self, name):
        self._content.append(f"</{name}>")
    
    def self_closing(self, name, attributes=None):
        tag = f"<{name}"
        if attributes:
            for key, value in attributes.items():
                tag += f' {key}="{value}"'
        tag += "/>"
        self._content.append(tag)
    
    def append(self, content):
        self._content.append(content)
    
    def as_string(self):
        return "".join(self._content)

# Use custom writer
xml_writer = XmlWriter()
xml_writer.start("document")
xml_writer.start("title")
xml_writer.text("My Document")
xml_writer.end("title")
xml_writer.end("document")

print(xml_writer.as_string())  # <document><title>My Document</title></document>

Install with Tessl CLI

npx tessl i tessl/pypi-mammoth

docs

conversion.md

images.md

index.md

styles.md

transforms.md

writers.md

tile.json