A modern general-purpose parsing library for Python that can parse any context-free grammar efficiently
—
Parse tree representation and processing including tree nodes, visitor patterns for traversal, and transformer classes for modification and data extraction.
The Tree class represents parse tree nodes containing rule data and child elements.
class Tree:
"""
Parse tree node containing rule data and children.
"""
def __init__(self, data: str, children: list, meta=None):
"""
Initialize tree node.
Parameters:
- data: Rule name or alias
- children: List of child nodes/tokens
- meta: Position metadata (line, column, etc.)
"""
def pretty(self, indent_str: str = ' ') -> str:
"""
Return indented string representation for debugging.
Parameters:
- indent_str: String used for indentation
Returns:
str: Pretty-printed tree representation
"""
def iter_subtrees(self) -> Iterator['Tree']:
"""
Depth-first iteration over all subtrees including self.
Returns:
Iterator[Tree]: Subtree iterator
"""
def iter_subtrees_topdown(self) -> Iterator['Tree']:
"""
Breadth-first iteration over subtrees.
Returns:
Iterator[Tree]: Subtree iterator (top-down)
"""
def find_pred(self, pred: Callable[['Tree'], bool]) -> Iterator['Tree']:
"""
Find all nodes matching predicate function.
Parameters:
- pred: Predicate function taking Tree and returning bool
Returns:
Iterator[Tree]: Matching nodes
"""
def find_data(self, data: str) -> Iterator['Tree']:
"""
Find all nodes with specific data value.
Parameters:
- data: Rule name to search for
Returns:
Iterator[Tree]: Nodes with matching data
"""
def scan_values(self, pred: Callable[[Any], bool]) -> Iterator[Any]:
"""
Find all values (non-Tree children) matching predicate.
Parameters:
- pred: Predicate function for values
Returns:
Iterator[Any]: Matching values
"""
def expand_kids_by_index(self, *indices: int) -> None:
"""
Expand children at specified indices into parent's children list.
Parameters:
- *indices: Indices of children to expand
"""
def expand_kids_by_data(self, *data_values: str) -> None:
"""
Expand children with specified data values.
Parameters:
- *data_values: Data values of children to expand
"""
def copy(self) -> 'Tree':
"""
Create shallow copy of tree.
Returns:
Tree: Copied tree
"""
def set(self, data: str, children: list) -> 'Tree':
"""
Create new tree with specified data and children.
Parameters:
- data: New rule name
- children: New children list
Returns:
Tree: New tree instance
"""
# Properties
data: str # Rule name or alias
children: list # Child nodes and tokens
meta: Meta # Position metadata
# Position properties (deprecated, use meta)
line: int # Line number
column: int # Column number
end_line: int # End line number
end_column: int # End column numberSlotted tree class for memory efficiency in large parse trees.
class SlottedTree(Tree):
"""
Memory-optimized tree with __slots__.
"""
__slots__ = ('data', 'children', 'rule', '_meta')Container for position information when propagate_positions is enabled.
class Meta:
"""
Container for position metadata.
"""
def __init__(self):
self.empty = True
# Position attributes (set by parser when propagate_positions=True)
line: int # Start line (1-based)
column: int # Start column (1-based)
start_pos: int # Start position in input
end_line: int # End line (1-based)
end_column: int # End column (1-based)
end_pos: int # End position in input
empty: bool # Whether metadata is emptyClasses for transforming parse trees into other data structures or modified trees.
class Transformer:
"""
Transforms trees bottom-up according to node data.
Calls methods named after rules to transform nodes.
"""
def __init__(self, visit_tokens: bool = True):
"""
Initialize transformer.
Parameters:
- visit_tokens: Whether to visit token nodes
"""
def transform(self, tree: Tree) -> Any:
"""
Transform tree and return result.
Parameters:
- tree: Tree to transform
Returns:
Any: Transformation result
"""
def __mul__(self, other: 'Transformer') -> 'TransformerChain':
"""
Chain transformers using * operator.
Parameters:
- other: Transformer to chain
Returns:
TransformerChain: Chained transformers
"""
def __default__(self, data: str, children: list, meta) -> Any:
"""
Default transformation when no method found.
Parameters:
- data: Rule name
- children: Transformed children
- meta: Position metadata
Returns:
Any: Default transformation (usually Tree)
"""
def __default_token__(self, token: Token) -> Any:
"""
Default token transformation.
Parameters:
- token: Token to transform
Returns:
Any: Token transformation result
"""
__visit_tokens__: bool # Class attribute for token visitingTransformers that modify trees in-place for memory efficiency.
class Transformer_InPlace(Transformer):
"""
Non-recursive transformer that modifies tree in-place.
"""
class Transformer_InPlaceRecursive(Transformer):
"""
Recursive transformer that modifies tree in-place.
"""
class Transformer_NonRecursive(Transformer):
"""
Non-recursive transformer for processing huge trees.
Avoids stack overflow on very deep trees.
"""Utility for combining multiple transformers.
class TransformerChain:
"""
Chains multiple transformers together.
"""
def __init__(self, *transformers: Transformer):
"""
Initialize transformer chain.
Parameters:
- *transformers: Transformers to chain
"""
def transform(self, tree: Tree) -> Any:
"""
Apply all transformers in sequence.
Parameters:
- tree: Input tree
Returns:
Any: Final transformation result
"""
def __mul__(self, other: Transformer) -> 'TransformerChain':
"""
Add transformer to chain.
Parameters:
- other: Transformer to add
Returns:
TransformerChain: Extended chain
"""Classes for traversing trees without modification, useful for analysis and extraction.
class Visitor:
"""
Non-recursive tree visitor for analysis without modification.
"""
def visit(self, tree: Tree) -> None:
"""
Visit tree bottom-up.
Parameters:
- tree: Tree to visit
"""
def visit_topdown(self, tree: Tree) -> None:
"""
Visit tree top-down.
Parameters:
- tree: Tree to visit
"""
def __default__(self, tree: Tree) -> None:
"""
Default visit method when no specific method found.
Parameters:
- tree: Tree node being visited
"""
class Visitor_Recursive:
"""
Recursive tree visitor, slightly faster than non-recursive.
"""Advanced visitor with explicit visit control for complex tree processing.
class Interpreter:
"""
Tree interpreter with explicit visit control.
"""
def visit(self, tree: Tree) -> Any:
"""
Visit single node without visiting children.
Parameters:
- tree: Tree node to visit
Returns:
Any: Visit result
"""
def visit_children(self, tree: Tree) -> list:
"""
Visit all children of node.
Parameters:
- tree: Parent tree node
Returns:
list: Results from visiting children
"""
def __default__(self, tree: Tree) -> Any:
"""
Default behavior when no method found.
Parameters:
- tree: Tree node
Returns:
Any: Default result
"""Decorators and functions for customizing visitor/transformer behavior.
def v_args(inline: bool = False, meta: bool = False, tree: bool = False,
wrapper: Callable = None):
"""
Decorator to modify visitor/transformer method arguments.
Parameters:
- inline: Pass children as *args instead of list
- meta: Include meta information as parameter
- tree: Pass entire tree instead of just children
- wrapper: Custom wrapper function
Returns:
Callable: Decorator function
"""
def inline_args(f):
"""
Deprecated decorator. Use v_args(inline=True) instead.
"""
def visit_children_decor(func):
"""
Decorator for interpreter methods to auto-visit children.
Parameters:
- func: Method to decorate
Returns:
Callable: Decorated method
"""
def merge_transformers(*transformers, **kwargs) -> Transformer:
"""
Merge multiple transformers into combined namespaces.
Parameters:
- *transformers: Transformer instances to merge
- **kwargs: Additional options
Returns:
Transformer: Merged transformer
"""Transformer for processing ambiguous parse results.
class CollapseAmbiguities(Transformer):
"""
Transforms ambiguous trees into lists of unambiguous alternatives.
Converts _ambig nodes into lists containing all possible interpretations.
"""Exception used to remove nodes during transformation.
class Discard(Exception):
"""
When raised in transformer callback, discards node from parent.
The node will not appear in the transformed result.
"""from lark import Lark, Tree
parser = Lark(grammar)
tree = parser.parse("2 + 3 * 4")
# Pretty print tree
print(tree.pretty())
# Find specific nodes
additions = list(tree.find_data('add'))
numbers = list(tree.find_data('number'))
# Iterate over all subtrees
for subtree in tree.iter_subtrees():
print(f"Rule: {subtree.data}, Children: {len(subtree.children)}")from lark import Transformer, v_args
class Calculator(Transformer):
"""Transform parse tree into calculated result."""
@v_args(inline=True)
def add(self, left, right):
return left + right
@v_args(inline=True)
def mul(self, left, right):
return left * right
def number(self, children):
return int(children[0])
# Apply transformer
calc = Calculator()
result = calc.transform(tree)
print(f"Result: {result}")from lark import Transformer, Token
class MyTransformer(Transformer):
def __init__(self):
super().__init__(visit_tokens=True)
def IDENTIFIER(self, token):
# Transform identifier tokens
return token.value.upper()
def __default_token__(self, token):
# Default token handling
return token.valuefrom lark import Visitor
class VariableCollector(Visitor):
def __init__(self):
self.variables = set()
def identifier(self, tree):
self.variables.add(tree.children[0].value)
# Collect variables
collector = VariableCollector()
collector.visit(tree)
print(f"Variables found: {collector.variables}")from lark import Transformer
class FirstTransform(Transformer):
def rule1(self, children):
return "first_" + str(children[0])
class SecondTransform(Transformer):
def rule1(self, children):
return children[0] + "_second"
# Chain transformers
chained = FirstTransform() * SecondTransform()
result = chained.transform(tree)from lark import Interpreter
class MyInterpreter(Interpreter):
def expression(self, tree):
# Manually control child visiting
left = self.visit(tree.children[0])
op = tree.children[1].value
right = self.visit(tree.children[2])
if op == '+':
return left + right
elif op == '*':
return left * right
def number(self, tree):
return int(tree.children[0].value)from lark import Lark, CollapseAmbiguities
# Parse with ambiguity="explicit"
parser = Lark(ambiguous_grammar, ambiguity="explicit")
tree = parser.parse(text)
# Collapse ambiguities into lists
collapse = CollapseAmbiguities()
alternatives = collapse.transform(tree)
# Process each alternative
for alt in alternatives:
process_interpretation(alt)Install with Tessl CLI
npx tessl i tessl/pypi-lark-parser