TatSu takes a grammar in a variation of EBNF as input, and outputs a memoizing PEG/Packrat parser in Python.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Work with abstract syntax trees and structured parse results, including node creation, traversal, manipulation, and conversion to custom object models. TatSu provides flexible AST representation and object model generation capabilities.
Foundation classes for representing parse results as structured data with position information and manipulation methods.
class AST(dict):
"""
Abstract syntax tree node, dictionary-based with parse info.
Features:
- Dictionary-based storage for flexible data access
- Parse position information tracking
- JSON serialization support
- Immutable freezing for optimization
- Automatic string representation
"""
@property
def frozen(self):
"""Check if AST is frozen (immutable)."""
@property
def parseinfo(self):
"""Get parse position information for this node."""
def copy(self):
"""
Create a deep copy of the AST node.
Returns:
AST: Deep copy of the node with all child nodes copied
"""
def asjson(self):
"""
Convert AST to JSON-serializable representation.
Returns:
dict: JSON-serializable dictionary representation
"""
def set_parseinfo(self, parseinfo):
"""
Set parse position information for this node.
Parameters:
- parseinfo (ParseInfo): Parse position and context information
"""
def _set(self, key, value):
"""Set a value, handling frozen state."""
def _setlist(self, key, values):
"""Set a list value, handling frozen state."""Usage example:
import tatsu
from tatsu.ast import AST
grammar = '''
expr = term ("+" term)*;
term = number;
number = /\d+/;
'''
model = tatsu.compile(grammar)
result = model.parse("1 + 2 + 3")
# AST is a dictionary-like structure
print(isinstance(result, AST)) # True
print(result.keys()) # Access keys like a dictionary
print(result.asjson()) # Convert to JSON
# Access parse information if enabled
if hasattr(result, 'parseinfo') and result.parseinfo:
info = result.parseinfo
print(f"Parsed rule: {info.rule}")
print(f"Position: {info.pos}-{info.endpos}")Structured node classes with parent-child relationships, position tracking, and typed representations.
class Node:
"""
Base parse tree node with parent/child relationships.
Features:
- Parent/child relationship tracking
- Source position and line information
- Text content and comment preservation
- Tree traversal and manipulation methods
- JSON serialization support
"""
@property
def parent(self):
"""Get parent node in the parse tree."""
@property
def line(self):
"""Get starting line number (1-based)."""
@property
def endline(self):
"""Get ending line number (1-based)."""
@property
def col(self):
"""Get starting column number (1-based)."""
@property
def context(self):
"""Get parsing context information."""
@property
def text(self):
"""Get source text for this node."""
@property
def comments(self):
"""Get associated comments."""
def children(self):
"""
Get all child nodes in the parse tree.
Returns:
list: List of direct child nodes
"""
def children_list(self):
"""
Get flattened list of all child nodes.
Returns:
list: Flattened list including nested children
"""
def children_set(self):
"""
Get set of all child nodes (no duplicates).
Returns:
set: Set of all child nodes
"""
def text_lines(self):
"""
Get source text lines for this node.
Returns:
list: List of source text lines covered by this node
"""
def line_index(self):
"""
Get line index information for this node.
Returns:
LineInfo: Detailed line position information
"""
def asjson(self):
"""
Convert node to JSON-serializable representation.
Returns:
dict: JSON representation of the node and its children
"""Usage example:
import tatsu
from tatsu.semantics import ModelBuilderSemantics
from tatsu.objectmodel import Node
grammar = '''
program = statement*;
statement = assignment | expression;
assignment = identifier "=" expression;
expression = identifier | number;
identifier = /[a-zA-Z][a-zA-Z0-9]*/;
number = /\d+/;
'''
# Use ModelBuilderSemantics to create Node objects
model = tatsu.compile(grammar)
result = model.parse("x = 42", semantics=ModelBuilderSemantics())
# Work with Node objects
print(isinstance(result, Node)) # True
print(result.children()) # Get child nodes
print(result.text) # Get source text
# Tree traversal
def print_tree(node, depth=0):
indent = " " * depth
print(f"{indent}{node.__class__.__name__}: {node}")
for child in node.children():
if isinstance(child, Node):
print_tree(child, depth + 1)
print_tree(result)Create domain-specific object models with typed nodes and custom behavior.
# Custom node base class
class CustomNode(Node):
"""Custom base node with additional functionality."""
def __init__(self, **kwargs):
super().__init__(**kwargs)
self._validate()
def _validate(self):
"""Override to add validation logic."""
pass
def __repr__(self):
"""Custom string representation."""
return f"{self.__class__.__name__}({dict(self)})"
# Example domain-specific nodes
class Expression(CustomNode):
"""Base class for all expressions."""
pass
class BinaryOperation(Expression):
"""Binary operation with left/right operands."""
def __init__(self, left=None, operator=None, right=None, **kwargs):
super().__init__(**kwargs)
self.left = left
self.operator = operator
self.right = right
def evaluate(self, context=None):
"""Evaluate the binary operation."""
left_val = self.left.evaluate(context) if hasattr(self.left, 'evaluate') else self.left
right_val = self.right.evaluate(context) if hasattr(self.right, 'evaluate') else self.right
if self.operator == '+':
return left_val + right_val
elif self.operator == '*':
return left_val * right_val
# Add more operators as needed
class Literal(Expression):
"""Literal value expression."""
def __init__(self, value=None, **kwargs):
super().__init__(**kwargs)
self.value = value
def evaluate(self, context=None):
"""Return the literal value."""
return self.valueTransform and modify AST structures for optimization, analysis, and code generation.
class ASTTransformer:
"""Base class for AST transformation operations."""
def transform(self, ast):
"""
Transform an AST node and its children.
Parameters:
- ast: AST node to transform
Returns:
Transformed AST node
"""
def visit(self, node):
"""Visit a single node for transformation."""
method_name = f'visit_{node.__class__.__name__}'
visitor = getattr(self, method_name, self.generic_visit)
return visitor(node)
def generic_visit(self, node):
"""Default visitor that processes children."""
if isinstance(node, Node):
for child in node.children():
self.visit(child)
return node
# Example transformations
class ConstantFolding(ASTTransformer):
"""Fold constant expressions at compile time."""
def visit_BinaryOperation(self, node):
# First transform children
node.left = self.visit(node.left)
node.right = self.visit(node.right)
# If both operands are literals, fold the operation
if isinstance(node.left, Literal) and isinstance(node.right, Literal):
if node.operator == '+':
return Literal(value=node.left.value + node.right.value)
elif node.operator == '*':
return Literal(value=node.left.value * node.right.value)
return node
class DeadCodeElimination(ASTTransformer):
"""Remove unreachable code."""
def visit_ConditionalStatement(self, node):
node.condition = self.visit(node.condition)
# If condition is a constant, eliminate dead branch
if isinstance(node.condition, Literal):
if node.condition.value:
return self.visit(node.then_branch)
else:
return self.visit(node.else_branch) if node.else_branch else None
return nodeAnalyze AST structures for semantic correctness, type checking, and code quality.
class ASTAnalyzer:
"""Base class for AST analysis operations."""
def __init__(self):
self.errors = []
self.warnings = []
def analyze(self, ast):
"""
Analyze an AST for various properties.
Parameters:
- ast: AST node to analyze
Returns:
AnalysisResult: Results of the analysis
"""
self.visit(ast)
return AnalysisResult(self.errors, self.warnings)
def visit(self, node):
"""Visit a node for analysis."""
method_name = f'visit_{node.__class__.__name__}'
visitor = getattr(self, method_name, self.generic_visit)
return visitor(node)
def generic_visit(self, node):
"""Default visitor that processes children."""
if isinstance(node, Node):
for child in node.children():
self.visit(child)
class TypeChecker(ASTAnalyzer):
"""Type checking analyzer."""
def __init__(self):
super().__init__()
self.symbol_table = {}
def visit_BinaryOperation(self, node):
left_type = self.get_type(node.left)
right_type = self.get_type(node.right)
if left_type != right_type:
self.errors.append(f"Type mismatch: {left_type} {node.operator} {right_type}")
def get_type(self, node):
"""Infer the type of a node."""
if isinstance(node, Literal):
return type(node.value).__name__
# Add more type inference logic
return 'unknown'
class AnalysisResult:
"""Results of AST analysis."""
def __init__(self, errors, warnings):
self.errors = errors
self.warnings = warnings
@property
def is_valid(self):
"""Check if analysis found no errors."""
return len(self.errors) == 0# Backward compatibility alias
ParseModel = Node
# Legacy AST conversion
def ast_to_model(ast_node):
"""Convert legacy AST to Node-based model."""
if isinstance(ast_node, AST):
# Convert AST dict to Node object
node = Node()
for key, value in ast_node.items():
if isinstance(value, (AST, list)):
setattr(node, key, ast_to_model(value))
else:
setattr(node, key, value)
return node
elif isinstance(ast_node, list):
return [ast_to_model(item) for item in ast_node]
else:
return ast_node
def model_to_ast(node):
"""Convert Node-based model to legacy AST."""
if isinstance(node, Node):
result = AST()
for key, value in node.__dict__.items():
if not key.startswith('_'):
result[key] = model_to_ast(value)
return result
elif isinstance(node, list):
return [model_to_ast(item) for item in node]
else:
return nodeclass ASTBuildingSemantics:
"""Build custom AST during parsing."""
def binary_expr(self, ast):
left, ops = ast[0], ast[1]
result = left
for op, right in ops:
result = BinaryOperation(
left=result,
operator=op,
right=right
)
return result
def number(self, ast):
return Literal(value=int(ast))
# Usage
model = tatsu.compile(grammar)
result = model.parse("2 + 3 * 4", semantics=ASTBuildingSemantics())
# Transform and analyze
transformer = ConstantFolding()
optimized = transformer.transform(result)
analyzer = TypeChecker()
analysis = analyzer.analyze(optimized)
if analysis.is_valid:
print("AST is valid")
print(f"Result: {optimized.evaluate()}")Install with Tessl CLI
npx tessl i tessl/pypi-tatsu