TatSu takes a grammar in a variation of EBNF as input, and outputs a memoizing PEG/Packrat parser in Python.
npx @tessl/cli install tessl/pypi-tatsu@5.13.0TatSu is a Python parser generator that compiles Extended Backus-Naur Form (EBNF) grammars into memoizing Parsing Expression Grammar (PEG) parsers. It generates efficient Packrat parsers with left-recursion support, provides both runtime parsing and static parser generation, and includes comprehensive AST building and semantic action capabilities for building compilers, interpreters, and domain-specific languages.
pip install TatSuimport tatsuFor specific functionality:
from tatsu import compile, parse, to_python_sourcecode, to_python_model
from tatsu.exceptions import ParseException
from tatsu.semantics import ModelBuilderSemanticsimport tatsu
# Define a simple grammar
grammar = '''
expr = term (("+" | "-") term)*;
term = factor (("*" | "/") factor)*;
factor = "(" expr ")" | number;
number = /\d+/;
'''
# Compile the grammar into a parser model
model = tatsu.compile(grammar)
# Parse some input
result = model.parse("2 + 3 * 4")
print(result) # Outputs the AST
# Or parse directly in one step
result = tatsu.parse(grammar, "2 + 3 * 4")
# Generate Python parser code
parser_code = tatsu.to_python_sourcecode(grammar, name="Calculator")
# Generate object model classes
model_code = tatsu.to_python_model(grammar, name="Calculator")TatSu follows a multi-stage architecture for parser generation and execution:
This design enables both interactive grammar development and production parser deployment, with support for advanced features like left-recursion, packrat memoization, and extensible semantic processing.
The primary interface for compiling grammars and parsing input text, providing both one-step parsing and separate compilation for reuse.
def compile(grammar, name=None, semantics=None, asmodel=False, config=None, **settings):
"""
Compile an EBNF grammar into a parser model.
Parameters:
- grammar: str, EBNF grammar definition
- name: str, optional name for the parser
- semantics: semantic actions object
- asmodel: bool, use ModelBuilderSemantics if True
- config: ParserConfig, parser configuration
- **settings: additional parser settings
Returns:
Model object that can parse input text
"""
def parse(grammar, input, start=None, name=None, semantics=None, asmodel=False, config=None, **settings):
"""
Parse input text using the provided grammar.
Parameters:
- grammar: str, EBNF grammar definition
- input: str, text to parse
- start: str, optional start rule name
- name: str, optional parser name
- semantics: semantic actions object
- asmodel: bool, use ModelBuilderSemantics if True
- config: ParserConfig, parser configuration
- **settings: additional parser settings
Returns:
Parsed AST or semantic action result
"""Generate static Python parser code and object model classes from grammars for deployment and distribution.
def to_python_sourcecode(grammar, name=None, filename=None, config=None, **settings):
"""
Generate Python parser source code from grammar.
Parameters:
- grammar: str, EBNF grammar definition
- name: str, optional parser class name
- filename: str, optional source filename for error reporting
- config: ParserConfig, parser configuration
- **settings: additional generation settings
Returns:
str, Python source code for the parser
"""
def to_python_model(grammar, name=None, filename=None, base_type=None, config=None, **settings):
"""
Generate Python object model classes from grammar.
Parameters:
- grammar: str, EBNF grammar definition
- name: str, optional model class prefix
- filename: str, optional source filename
- base_type: type, base class for generated model classes
- config: ParserConfig, parser configuration
- **settings: additional generation settings
Returns:
str, Python source code for object model classes
"""Comprehensive exception hierarchy for handling grammar compilation errors, parse failures, and semantic processing issues.
class ParseException(Exception):
"""Base exception for all TatSu parsing errors."""
class GrammarError(ParseException):
"""Grammar definition and compilation errors."""
class FailedParse(ParseException):
"""Base parse failure with position information."""
class FailedToken(FailedParse):
"""Expected token not found."""
class FailedPattern(FailedParse):
"""Regular expression pattern match failed."""Build custom semantic actions to transform parse results, construct object models, and implement domain-specific processing during parsing.
class ModelBuilderSemantics:
"""Object model building semantics with type registration."""
def __init__(self, context=None, base_type=None, types=None):
"""
Initialize model builder semantics.
Parameters:
- context: parsing context
- base_type: base class for generated nodes (default: Node)
- types: dict of rule name to type mappings
"""
class ASTSemantics:
"""Basic AST building semantics for parse tree construction."""Configure parser behavior, manage parsing state, and access parse position and rule information.
class ParserConfig:
"""Parser configuration with settings for parsing behavior."""
class ParseInfo:
"""Parse position and rule information with line tracking."""
class LineInfo:
"""Source line information with position data."""Work with abstract syntax trees and structured parse results, including node creation, traversal, and manipulation.
class AST(dict):
"""Abstract syntax tree node, dictionary-based with parse info."""
class Node:
"""Base parse tree node with parent/child relationships."""
def children(self):
"""Get all child nodes."""
def text_lines(self):
"""Get source text lines for this node."""Traverse and transform parse trees using visitor patterns with pre-order, depth-first, and context-aware walking strategies.
class NodeWalker:
"""Base tree walker with method dispatch."""
def walk(self, node):
"""Walk a parse tree starting from the given node."""
class DepthFirstWalker(NodeWalker):
"""Depth-first tree traversal walker."""
class ContextWalker(NodeWalker):
"""Context-aware tree walking with stack management."""from typing import Any, Dict, List, Optional, Union, Callable
# Core types
GrammarType = str
InputType = str
ASTType = Union[Dict[str, Any], List[Any], str, None]
SemanticActionType = Callable[[Any], Any]
# Configuration types
ParserSettings = Dict[str, Any]
GenerationSettings = Dict[str, Any]
# Model types
class Model:
"""Compiled grammar model that can parse input."""
def parse(self, input: str, **kwargs) -> ASTType:
"""Parse input text and return AST."""
def pretty(self) -> str:
"""Return pretty-printed grammar."""