Pythonic Pandoc filters library for programmatic document manipulation and transformation
—
Complete set of Pandoc AST element classes for building and manipulating documents. These classes represent all the structural and content elements that can appear in Pandoc documents, organized in a hierarchy that mirrors Pandoc's internal representation.
Root document class that contains all content and metadata.
class Doc(Element):
"""
Pandoc document container with metadata and content blocks.
Parameters:
- *args: Block elements contained in the document
- metadata: document metadata (dict)
- format: output format such as 'html', 'latex', 'markdown'
- api_version: Pandoc API version tuple (default: (1, 23))
Properties:
- content: ListContainer of Block elements
- metadata: MetaMap of document metadata
- format: str, output format
- api_version: tuple, Pandoc API version
- pandoc_version: tuple, runtime Pandoc version
- pandoc_reader_options: dict, Pandoc reader options
Methods:
- get_metadata(key='', default=None, builtin=True): retrieve nested metadata
"""
def __init__(self, *args, metadata={}, format='html', api_version=(1, 23)): ...
def get_metadata(self, key='', default=None, builtin=True): ...Block-level elements that form the main document structure.
class Plain(Block):
"""Plain text block without paragraph formatting."""
def __init__(self, *args): ...
class Para(Block):
"""Paragraph block containing inline elements."""
def __init__(self, *args): ...
class BlockQuote(Block):
"""Block quotation containing other blocks."""
def __init__(self, *args): ...
class Header(Block):
"""
Section header with level and inline content.
Parameters:
- *args: Inline elements for header text
- level: header level (1-6, default: 1)
- identifier: element ID string
- classes: list of CSS class names
- attributes: dict of additional attributes
"""
def __init__(self, *args, level=1, identifier='', classes=[], attributes={}): ...
class Div(Block):
"""
Generic block container with attributes.
Parameters:
- *args: Block elements contained in the div
- identifier: element ID string
- classes: list of CSS class names
- attributes: dict of additional attributes
"""
def __init__(self, *args, identifier='', classes=[], attributes={}): ...class CodeBlock(Block):
"""
Code block with syntax highlighting support.
Parameters:
- text: code content string
- identifier: element ID string
- classes: list of language/class names for highlighting
- attributes: dict of additional attributes
"""
def __init__(self, text, identifier='', classes=[], attributes={}): ...
class RawBlock(Block):
"""
Raw content block for specific output formats.
Parameters:
- text: raw content string
- format: target format (e.g., 'html', 'latex', 'tex')
"""
def __init__(self, text, format): ...class BulletList(Block):
"""Unordered list containing ListItem elements."""
def __init__(self, *args): ...
class OrderedList(Block):
"""
Ordered list with numbering style.
Parameters:
- *args: ListItem elements
- start: starting number (default: 1)
- style: numbering style ('Decimal', 'LowerRoman', 'UpperRoman', 'LowerAlpha', 'UpperAlpha')
- delimiter: delimiter style ('Period', 'OneParen', 'TwoParens')
"""
def __init__(self, *args, start=1, style='Decimal', delimiter='Period'): ...
class DefinitionList(Block):
"""Definition list containing DefinitionItem elements."""
def __init__(self, *args): ...
class LineBlock(Block):
"""Line block preserving line breaks."""
def __init__(self, *args): ...class HorizontalRule(Block):
"""Horizontal rule/separator."""
def __init__(self): ...
class Figure(Block):
"""
Figure block (Pandoc 3.0+).
Parameters:
- *args: Block elements in figure body
- caption: Caption element (optional)
- identifier: element ID string
- classes: list of CSS class names
- attributes: dict of additional attributes
"""
def __init__(self, *args, caption=None, identifier='', classes=[], attributes={}): ...
class Null(Block):
"""Null block (deprecated in Pandoc 3.0+)."""
def __init__(self): ...Inline elements that appear within block elements.
class Str(Inline):
"""
String of text.
Parameters:
- text: text content string
"""
def __init__(self, text): ...
class Space(Inline):
"""Inter-word space."""
def __init__(self): ...
class SoftBreak(Inline):
"""Soft line break (converted to space in most formats)."""
def __init__(self): ...
class LineBreak(Inline):
"""Hard line break."""
def __init__(self): ...class Emph(Inline):
"""Emphasis/italic text containing inline elements."""
def __init__(self, *args): ...
class Strong(Inline):
"""Strong/bold text containing inline elements."""
def __init__(self, *args): ...
class Underline(Inline):
"""Underlined text containing inline elements."""
def __init__(self, *args): ...
class Strikeout(Inline):
"""Strikethrough text containing inline elements."""
def __init__(self, *args): ...
class Superscript(Inline):
"""Superscript text containing inline elements."""
def __init__(self, *args): ...
class Subscript(Inline):
"""Subscript text containing inline elements."""
def __init__(self, *args): ...
class SmallCaps(Inline):
"""Small capitals text containing inline elements."""
def __init__(self, *args): ...
class Span(Inline):
"""
Generic inline container with attributes.
Parameters:
- *args: Inline elements contained in the span
- identifier: element ID string
- classes: list of CSS class names
- attributes: dict of additional attributes
"""
def __init__(self, *args, identifier='', classes=[], attributes={}): ...class Code(Inline):
"""
Inline code.
Parameters:
- text: code content string
- identifier: element ID string
- classes: list of language/class names
- attributes: dict of additional attributes
"""
def __init__(self, text, identifier='', classes=[], attributes={}): ...
class Math(Inline):
"""
Mathematical expression.
Parameters:
- text: LaTeX math expression string
- format: math format ('InlineMath' or 'DisplayMath')
"""
def __init__(self, text, format='InlineMath'): ...
class RawInline(Inline):
"""
Raw inline content for specific formats.
Parameters:
- text: raw content string
- format: target format (e.g., 'html', 'latex', 'tex')
"""
def __init__(self, text, format): ...class Link(Inline):
"""
Hyperlink element.
Parameters:
- *args: Inline elements for link text
- url: link URL string
- title: link title string (tooltip)
- identifier: element ID string
- classes: list of CSS class names
- attributes: dict of additional attributes
"""
def __init__(self, *args, url='', title='', identifier='', classes=[], attributes={}): ...
class Image(Inline):
"""
Image element.
Parameters:
- *args: Inline elements for alt text
- url: image URL string
- title: image title string
- identifier: element ID string
- classes: list of CSS class names
- attributes: dict of additional attributes
"""
def __init__(self, *args, url='', title='', identifier='', classes=[], attributes={}): ...class Note(Inline):
"""Footnote containing block elements."""
def __init__(self, *args): ...
class Quoted(Inline):
"""
Quoted text with quote type.
Parameters:
- *args: Inline elements for quoted content
- quote_type: quote style ('SingleQuote' or 'DoubleQuote')
"""
def __init__(self, *args, quote_type='DoubleQuote'): ...
class Cite(Inline):
"""
Citation with citation objects.
Parameters:
- *args: Inline elements for citation text
- citations: list of Citation objects
"""
def __init__(self, *args, citations=[]): ...Elements for creating and structuring tables with headers, bodies, rows, and cells.
class Table(Block):
"""
Table container with header, body, footer, and metadata.
Parameters:
- *args: TableBody elements
- head: TableHead element (optional)
- foot: TableFoot element (optional)
- caption: Caption element (optional)
- colspec: list of (alignment, width) tuples for columns
"""
def __init__(self, *args, head=None, foot=None, caption=None, colspec=[]): ...
class TableHead(Block):
"""Table header section containing TableRow elements."""
def __init__(self, *args): ...
class TableFoot(Block):
"""Table footer section containing TableRow elements."""
def __init__(self, *args): ...
class TableBody(Block):
"""Table body section containing TableRow elements."""
def __init__(self, *args): ...
class TableRow(Element):
"""Table row containing TableCell elements."""
def __init__(self, *args): ...
class TableCell(Element):
"""
Individual table cell containing block elements.
Parameters:
- *args: Block elements in the cell
- alignment: cell alignment ('AlignLeft', 'AlignRight', 'AlignCenter', 'AlignDefault')
- rowspan: number of rows the cell spans (default: 1)
- colspan: number of columns the cell spans (default: 1)
"""
def __init__(self, *args, alignment='AlignDefault', rowspan=1, colspan=1): ...
class Caption(Element):
"""Table caption containing block elements."""
def __init__(self, *args): ...Elements that form parts of list structures.
class ListItem(Element):
"""Individual list item containing block elements."""
def __init__(self, *args): ...
class Definition(Element):
"""Definition in definition list containing block elements."""
def __init__(self, *args): ...
class DefinitionItem(Element):
"""
Term-definition pair in definition list.
Parameters:
- term: list of Inline elements for the term
- definitions: list of Definition elements
"""
def __init__(self, term, definitions): ...
class LineItem(Element):
"""Line in line block containing inline elements."""
def __init__(self, *args): ...class Citation(Element):
"""
Citation reference for academic citations.
Parameters:
- id: citation identifier string
- mode: citation mode ('NormalCitation', 'AuthorInText', 'SuppressAuthor')
- prefix: list of Inline elements before citation
- suffix: list of Inline elements after citation
- note_num: note number (int)
- hash: citation hash (int)
"""
def __init__(self, id, mode='NormalCitation', prefix=[], suffix=[], note_num=0, hash=0): ...Elements for document frontmatter metadata.
class MetaList(MetaValue):
"""List of metadata values."""
def __init__(self, *args): ...
class MetaMap(MetaValue):
"""Map/dictionary of metadata key-value pairs."""
def __init__(self, *args, **kwargs): ...
class MetaInlines(MetaValue):
"""Inline content as metadata."""
def __init__(self, *args): ...
class MetaBlocks(MetaValue):
"""Block content as metadata."""
def __init__(self, *args): ...
class MetaString(MetaValue):
"""String metadata value."""
def __init__(self, text): ...
class MetaBool(MetaValue):
"""Boolean metadata value."""
def __init__(self, boolean): ...import panflute as pf
# Create a complex document structure
doc = pf.Doc(
# Title and metadata
pf.Header(pf.Str('My Research Paper'), level=1),
# Abstract
pf.Header(pf.Str('Abstract'), level=2),
pf.Para(
pf.Str('This paper discusses '),
pf.Emph(pf.Str('important findings')),
pf.Str(' in the field of computational linguistics.')
),
# Introduction with citation
pf.Header(pf.Str('Introduction'), level=2),
pf.Para(
pf.Str('Previous work by '),
pf.Cite(
pf.Str('Smith et al.'),
citations=[pf.Citation('smith2020', mode='NormalCitation')]
),
pf.Str(' has shown that...')
),
# Code example
pf.Header(pf.Str('Implementation'), level=2),
pf.Para(pf.Str('The following code demonstrates our approach:')),
pf.CodeBlock(
'def process_text(text):\n return text.upper()',
classes=['python']
),
# Results table
pf.Header(pf.Str('Results'), level=2),
pf.Table(
pf.TableBody(
pf.TableRow(
pf.TableCell(pf.Plain(pf.Str('Metric'))),
pf.TableCell(pf.Plain(pf.Str('Before'))),
pf.TableCell(pf.Plain(pf.Str('After')))
),
pf.TableRow(
pf.TableCell(pf.Plain(pf.Str('Accuracy'))),
pf.TableCell(pf.Plain(pf.Str('85%'))),
pf.TableCell(pf.Plain(pf.Str('100%')))
),
pf.TableRow(
pf.TableCell(pf.Plain(pf.Str('Speed'))),
pf.TableCell(pf.Plain(pf.Str('100ms'))),
pf.TableCell(pf.Plain(pf.Str('70ms')))
)
),
caption=pf.Caption(pf.Plain(pf.Str('Performance Comparison')))
),
# List of results
pf.Header(pf.Str('Additional Results'), level=2),
pf.BulletList(
pf.ListItem(pf.Plain(pf.Str('Accuracy improved by 15%'))),
pf.ListItem(pf.Plain(pf.Str('Processing time reduced by 30%'))),
pf.ListItem(pf.Plain(pf.Str('Memory usage optimized')))
),
# Metadata
metadata={
'author': pf.MetaString('Dr. Jane Researcher'),
'date': pf.MetaString('2024-01-15'),
'keywords': pf.MetaList(
pf.MetaString('NLP'),
pf.MetaString('machine learning'),
pf.MetaString('text processing')
)
}
)import panflute as pf
def enhance_links(elem, doc):
"""Add target="_blank" to external links."""
if isinstance(elem, pf.Link):
if elem.url.startswith('http'):
elem.attributes['target'] = '_blank'
elem.attributes['rel'] = 'noopener noreferrer'
return elem
def collect_headers(elem, doc):
"""Collect all headers for table of contents."""
if isinstance(elem, pf.Header):
if not hasattr(doc, 'headers'):
doc.headers = []
doc.headers.append({
'level': elem.level,
'text': pf.stringify(elem),
'identifier': elem.identifier
})
# Process a document
if __name__ == '__main__':
pf.run_filters([enhance_links, collect_headers])Install with Tessl CLI
npx tessl i tessl/pypi-panflute