tessl/pypi-panflute

Pythonic Pandoc filters library for programmatic document manipulation and transformation

—

Pending

Overview

Eval results

Files

Document I/O

Name: tessl/pypi-panflute
Author: tessl

Core functions for reading and writing Pandoc JSON documents, running filter functions, and managing document processing workflows. These functions handle the fundamental operations of loading documents from Pandoc, processing them with filters, and outputting results.

Capabilities

Document Loading

Load Pandoc JSON documents from input streams and convert them to panflute Doc objects.

def load(input_stream=None) -> Doc:
    """
    Load JSON-encoded document and return a Doc element.

    Parameters:
    - input_stream: text stream used as input (default is sys.stdin)

    Returns:
    Doc: Parsed document with format and API version set

    Example:
    import panflute as pf
    
    # Load from stdin (typical filter usage)
    doc = pf.load()
    
    # Load from file
    with open('document.json', encoding='utf-8') as f:
        doc = pf.load(f)
    
    # Load from string
    import io
    json_str = '{"pandoc-api-version":[1,23],"meta":{},"blocks":[]}'
    doc = pf.load(io.StringIO(json_str))
    """

Document Output

Convert panflute Doc objects to Pandoc JSON format and write to output streams.

def dump(doc: Doc, output_stream=None):
    """
    Dump a Doc object into JSON-encoded text string.

    Parameters:
    - doc: Document to serialize
    - output_stream: text stream used as output (default is sys.stdout)

    Example:
    import panflute as pf
    
    doc = pf.Doc(pf.Para(pf.Str('Hello world')))
    
    # Dump to stdout (typical filter usage)
    pf.dump(doc)
    
    # Dump to file
    with open('output.json', 'w', encoding='utf-8') as f:
        pf.dump(doc, f)
    
    # Dump to string
    import io
    with io.StringIO() as f:
        pf.dump(doc, f)
        json_output = f.getvalue()
    """

Single Filter Execution

Run a single filter function on a document with optional preprocessing and postprocessing.

def run_filter(action: callable,
               prepare: callable = None,
               finalize: callable = None,
               input_stream = None,
               output_stream = None,
               doc: Doc = None,
               stop_if: callable = None,
               **kwargs):
    """
    Apply a filter function to each element in a document.

    Parameters:
    - action: function taking (element, doc) that processes elements
    - prepare: function executed before filtering (receives doc)
    - finalize: function executed after filtering (receives doc)
    - input_stream: input source (default stdin)
    - output_stream: output destination (default stdout)
    - doc: existing Doc to process instead of loading from stream
    - stop_if: function taking (element) to stop traversal early
    - **kwargs: additional arguments passed to action function

    Returns:
    Doc: processed document if doc parameter provided, otherwise None

    Example:
    import panflute as pf
    
    def emphasize_words(elem, doc):
        if isinstance(elem, pf.Str) and 'important' in elem.text:
            return pf.Emph(elem)
    
    def prepare_doc(doc):
        doc.emphasis_count = 0
    
    def finalize_doc(doc):
        pf.debug(f"Added {doc.emphasis_count} emphasis elements")
    
    if __name__ == '__main__':
        pf.run_filter(emphasize_words, prepare=prepare_doc, finalize=finalize_doc)
    """

Multiple Filter Execution

Run multiple filter functions sequentially on a document.

def run_filters(actions: list,
                prepare: callable = None,
                finalize: callable = None,
                input_stream = None,
                output_stream = None,
                doc: Doc = None,
                stop_if: callable = None,
                **kwargs):
    """
    Apply multiple filter functions sequentially to a document.

    Parameters:
    - actions: list of functions, each taking (element, doc)
    - prepare: function executed before filtering
    - finalize: function executed after all filtering
    - input_stream: input source (default stdin)
    - output_stream: output destination (default stdout)
    - doc: existing Doc to process instead of loading from stream
    - stop_if: function taking (element) to stop traversal early
    - **kwargs: additional arguments passed to all action functions

    Returns:
    Doc: processed document if doc parameter provided, otherwise None

    Example:
    import panflute as pf
    
    def convert_quotes(elem, doc):
        if isinstance(elem, pf.Str):
            return pf.Str(elem.text.replace('"', '"').replace('"', '"'))
    
    def add_emphasis(elem, doc):
        if isinstance(elem, pf.Str) and elem.text.isupper():
            return pf.Strong(elem)
    
    filters = [convert_quotes, add_emphasis]
    
    if __name__ == '__main__':
        pf.run_filters(filters)
    """

Legacy Compatibility Functions

Wrapper functions providing backward compatibility with pandocfilters.

def toJSONFilter(*args, **kwargs):
    """Wrapper for run_filter() - backward compatibility with pandocfilters."""

def toJSONFilters(*args, **kwargs):
    """Wrapper for run_filters() - backward compatibility with pandocfilters."""

Usage Examples

Basic Filter Pipeline

import panflute as pf

def remove_emphasis(elem, doc):
    """Remove all emphasis elements, keeping their content."""
    if isinstance(elem, pf.Emph):
        return list(elem.content)

def count_words(elem, doc):
    """Count words in the document."""
    if isinstance(elem, pf.Str):
        doc.word_count = getattr(doc, 'word_count', 0) + len(elem.text.split())

def prepare(doc):
    """Initialize document processing."""
    doc.word_count = 0
    pf.debug("Starting document processing...")

def finalize(doc):
    """Complete document processing."""
    pf.debug(f"Processing complete. Word count: {doc.word_count}")

if __name__ == '__main__':
    pf.run_filters([remove_emphasis, count_words], 
                   prepare=prepare, finalize=finalize)

Programmatic Document Processing

import panflute as pf
import io

# Create a document programmatically
doc = pf.Doc(
    pf.Header(pf.Str('Sample Document'), level=1),
    pf.Para(pf.Str('This is a '), pf.Emph(pf.Str('sample')), pf.Str(' document.')),
    metadata={'author': pf.MetaString('John Doe')}
)

def uppercase_filter(elem, doc):
    if isinstance(elem, pf.Str):
        return pf.Str(elem.text.upper())

# Process the document
processed_doc = pf.run_filters([uppercase_filter], doc=doc)

# Output as JSON
with io.StringIO() as output:
    pf.dump(processed_doc, output)
    json_result = output.getvalue()
    print(json_result)

Install with Tessl CLI