Pythonic Pandoc filters library for programmatic document manipulation and transformation
—
Core functions for reading and writing Pandoc JSON documents, running filter functions, and managing document processing workflows. These functions handle the fundamental operations of loading documents from Pandoc, processing them with filters, and outputting results.
Load Pandoc JSON documents from input streams and convert them to panflute Doc objects.
def load(input_stream=None) -> Doc:
"""
Load JSON-encoded document and return a Doc element.
Parameters:
- input_stream: text stream used as input (default is sys.stdin)
Returns:
Doc: Parsed document with format and API version set
Example:
import panflute as pf
# Load from stdin (typical filter usage)
doc = pf.load()
# Load from file
with open('document.json', encoding='utf-8') as f:
doc = pf.load(f)
# Load from string
import io
json_str = '{"pandoc-api-version":[1,23],"meta":{},"blocks":[]}'
doc = pf.load(io.StringIO(json_str))
"""Convert panflute Doc objects to Pandoc JSON format and write to output streams.
def dump(doc: Doc, output_stream=None):
"""
Dump a Doc object into JSON-encoded text string.
Parameters:
- doc: Document to serialize
- output_stream: text stream used as output (default is sys.stdout)
Example:
import panflute as pf
doc = pf.Doc(pf.Para(pf.Str('Hello world')))
# Dump to stdout (typical filter usage)
pf.dump(doc)
# Dump to file
with open('output.json', 'w', encoding='utf-8') as f:
pf.dump(doc, f)
# Dump to string
import io
with io.StringIO() as f:
pf.dump(doc, f)
json_output = f.getvalue()
"""Run a single filter function on a document with optional preprocessing and postprocessing.
def run_filter(action: callable,
prepare: callable = None,
finalize: callable = None,
input_stream = None,
output_stream = None,
doc: Doc = None,
stop_if: callable = None,
**kwargs):
"""
Apply a filter function to each element in a document.
Parameters:
- action: function taking (element, doc) that processes elements
- prepare: function executed before filtering (receives doc)
- finalize: function executed after filtering (receives doc)
- input_stream: input source (default stdin)
- output_stream: output destination (default stdout)
- doc: existing Doc to process instead of loading from stream
- stop_if: function taking (element) to stop traversal early
- **kwargs: additional arguments passed to action function
Returns:
Doc: processed document if doc parameter provided, otherwise None
Example:
import panflute as pf
def emphasize_words(elem, doc):
if isinstance(elem, pf.Str) and 'important' in elem.text:
return pf.Emph(elem)
def prepare_doc(doc):
doc.emphasis_count = 0
def finalize_doc(doc):
pf.debug(f"Added {doc.emphasis_count} emphasis elements")
if __name__ == '__main__':
pf.run_filter(emphasize_words, prepare=prepare_doc, finalize=finalize_doc)
"""Run multiple filter functions sequentially on a document.
def run_filters(actions: list,
prepare: callable = None,
finalize: callable = None,
input_stream = None,
output_stream = None,
doc: Doc = None,
stop_if: callable = None,
**kwargs):
"""
Apply multiple filter functions sequentially to a document.
Parameters:
- actions: list of functions, each taking (element, doc)
- prepare: function executed before filtering
- finalize: function executed after all filtering
- input_stream: input source (default stdin)
- output_stream: output destination (default stdout)
- doc: existing Doc to process instead of loading from stream
- stop_if: function taking (element) to stop traversal early
- **kwargs: additional arguments passed to all action functions
Returns:
Doc: processed document if doc parameter provided, otherwise None
Example:
import panflute as pf
def convert_quotes(elem, doc):
if isinstance(elem, pf.Str):
return pf.Str(elem.text.replace('"', '"').replace('"', '"'))
def add_emphasis(elem, doc):
if isinstance(elem, pf.Str) and elem.text.isupper():
return pf.Strong(elem)
filters = [convert_quotes, add_emphasis]
if __name__ == '__main__':
pf.run_filters(filters)
"""Wrapper functions providing backward compatibility with pandocfilters.
def toJSONFilter(*args, **kwargs):
"""Wrapper for run_filter() - backward compatibility with pandocfilters."""
def toJSONFilters(*args, **kwargs):
"""Wrapper for run_filters() - backward compatibility with pandocfilters."""import panflute as pf
def remove_emphasis(elem, doc):
"""Remove all emphasis elements, keeping their content."""
if isinstance(elem, pf.Emph):
return list(elem.content)
def count_words(elem, doc):
"""Count words in the document."""
if isinstance(elem, pf.Str):
doc.word_count = getattr(doc, 'word_count', 0) + len(elem.text.split())
def prepare(doc):
"""Initialize document processing."""
doc.word_count = 0
pf.debug("Starting document processing...")
def finalize(doc):
"""Complete document processing."""
pf.debug(f"Processing complete. Word count: {doc.word_count}")
if __name__ == '__main__':
pf.run_filters([remove_emphasis, count_words],
prepare=prepare, finalize=finalize)import panflute as pf
import io
# Create a document programmatically
doc = pf.Doc(
pf.Header(pf.Str('Sample Document'), level=1),
pf.Para(pf.Str('This is a '), pf.Emph(pf.Str('sample')), pf.Str(' document.')),
metadata={'author': pf.MetaString('John Doe')}
)
def uppercase_filter(elem, doc):
if isinstance(elem, pf.Str):
return pf.Str(elem.text.upper())
# Process the document
processed_doc = pf.run_filters([uppercase_filter], doc=doc)
# Output as JSON
with io.StringIO() as output:
pf.dump(processed_doc, output)
json_result = output.getvalue()
print(json_result)Install with Tessl CLI
npx tessl i tessl/pypi-panflute