CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-lark-parser

A modern general-purpose parsing library for Python that can parse any context-free grammar efficiently

Pending
Overview
Eval results
Files

tree-processing.mddocs/

Tree Processing

Parse tree representation and processing including tree nodes, visitor patterns for traversal, and transformer classes for modification and data extraction.

Capabilities

Tree Representation

The Tree class represents parse tree nodes containing rule data and child elements.

class Tree:
    """
    Parse tree node containing rule data and children.
    """
    
    def __init__(self, data: str, children: list, meta=None):
        """
        Initialize tree node.
        
        Parameters:
        - data: Rule name or alias
        - children: List of child nodes/tokens
        - meta: Position metadata (line, column, etc.)
        """
    
    def pretty(self, indent_str: str = '  ') -> str:
        """
        Return indented string representation for debugging.
        
        Parameters:
        - indent_str: String used for indentation
        
        Returns:
        str: Pretty-printed tree representation
        """
    
    def iter_subtrees(self) -> Iterator['Tree']:
        """
        Depth-first iteration over all subtrees including self.
        
        Returns:
        Iterator[Tree]: Subtree iterator
        """
    
    def iter_subtrees_topdown(self) -> Iterator['Tree']:
        """
        Breadth-first iteration over subtrees.
        
        Returns:
        Iterator[Tree]: Subtree iterator (top-down)
        """
    
    def find_pred(self, pred: Callable[['Tree'], bool]) -> Iterator['Tree']:
        """
        Find all nodes matching predicate function.
        
        Parameters:
        - pred: Predicate function taking Tree and returning bool
        
        Returns:
        Iterator[Tree]: Matching nodes
        """
    
    def find_data(self, data: str) -> Iterator['Tree']:
        """
        Find all nodes with specific data value.
        
        Parameters:
        - data: Rule name to search for
        
        Returns:
        Iterator[Tree]: Nodes with matching data
        """
    
    def scan_values(self, pred: Callable[[Any], bool]) -> Iterator[Any]:
        """
        Find all values (non-Tree children) matching predicate.
        
        Parameters:
        - pred: Predicate function for values
        
        Returns:
        Iterator[Any]: Matching values
        """
    
    def expand_kids_by_index(self, *indices: int) -> None:
        """
        Expand children at specified indices into parent's children list.
        
        Parameters:
        - *indices: Indices of children to expand
        """
    
    def expand_kids_by_data(self, *data_values: str) -> None:
        """
        Expand children with specified data values.
        
        Parameters:
        - *data_values: Data values of children to expand
        """
    
    def copy(self) -> 'Tree':
        """
        Create shallow copy of tree.
        
        Returns:
        Tree: Copied tree
        """
    
    def set(self, data: str, children: list) -> 'Tree':
        """
        Create new tree with specified data and children.
        
        Parameters:
        - data: New rule name
        - children: New children list
        
        Returns:
        Tree: New tree instance
        """
    
    # Properties
    data: str           # Rule name or alias
    children: list      # Child nodes and tokens
    meta: Meta          # Position metadata
    
    # Position properties (deprecated, use meta)
    line: int           # Line number
    column: int         # Column number  
    end_line: int       # End line number
    end_column: int     # End column number

Memory-Optimized Tree

Slotted tree class for memory efficiency in large parse trees.

class SlottedTree(Tree):
    """
    Memory-optimized tree with __slots__.
    """
    __slots__ = ('data', 'children', 'rule', '_meta')

Position Metadata

Container for position information when propagate_positions is enabled.

class Meta:
    """
    Container for position metadata.
    """
    
    def __init__(self):
        self.empty = True
    
    # Position attributes (set by parser when propagate_positions=True)
    line: int           # Start line (1-based)
    column: int         # Start column (1-based)
    start_pos: int      # Start position in input
    end_line: int       # End line (1-based)
    end_column: int     # End column (1-based)
    end_pos: int        # End position in input
    empty: bool         # Whether metadata is empty

Tree Transformers

Classes for transforming parse trees into other data structures or modified trees.

class Transformer:
    """
    Transforms trees bottom-up according to node data.
    Calls methods named after rules to transform nodes.
    """
    
    def __init__(self, visit_tokens: bool = True):
        """
        Initialize transformer.
        
        Parameters:
        - visit_tokens: Whether to visit token nodes
        """
    
    def transform(self, tree: Tree) -> Any:
        """
        Transform tree and return result.
        
        Parameters:
        - tree: Tree to transform
        
        Returns:
        Any: Transformation result
        """
    
    def __mul__(self, other: 'Transformer') -> 'TransformerChain':
        """
        Chain transformers using * operator.
        
        Parameters:  
        - other: Transformer to chain
        
        Returns:
        TransformerChain: Chained transformers
        """
    
    def __default__(self, data: str, children: list, meta) -> Any:
        """
        Default transformation when no method found.
        
        Parameters:
        - data: Rule name
        - children: Transformed children
        - meta: Position metadata
        
        Returns:
        Any: Default transformation (usually Tree)
        """
    
    def __default_token__(self, token: Token) -> Any:
        """
        Default token transformation.
        
        Parameters:
        - token: Token to transform
        
        Returns:
        Any: Token transformation result
        """
    
    __visit_tokens__: bool  # Class attribute for token visiting

In-Place Transformers

Transformers that modify trees in-place for memory efficiency.

class Transformer_InPlace(Transformer):
    """
    Non-recursive transformer that modifies tree in-place.
    """

class Transformer_InPlaceRecursive(Transformer):
    """
    Recursive transformer that modifies tree in-place.
    """

class Transformer_NonRecursive(Transformer):
    """
    Non-recursive transformer for processing huge trees.
    Avoids stack overflow on very deep trees.
    """

Transformer Chaining

Utility for combining multiple transformers.

class TransformerChain:
    """
    Chains multiple transformers together.
    """
    
    def __init__(self, *transformers: Transformer):
        """
        Initialize transformer chain.
        
        Parameters:
        - *transformers: Transformers to chain
        """
    
    def transform(self, tree: Tree) -> Any:
        """
        Apply all transformers in sequence.
        
        Parameters:
        - tree: Input tree
        
        Returns:
        Any: Final transformation result
        """
    
    def __mul__(self, other: Transformer) -> 'TransformerChain':
        """
        Add transformer to chain.
        
        Parameters:
        - other: Transformer to add
        
        Returns:
        TransformerChain: Extended chain
        """

Tree Visitors

Classes for traversing trees without modification, useful for analysis and extraction.

class Visitor:
    """
    Non-recursive tree visitor for analysis without modification.
    """
    
    def visit(self, tree: Tree) -> None:
        """
        Visit tree bottom-up.
        
        Parameters:
        - tree: Tree to visit
        """
    
    def visit_topdown(self, tree: Tree) -> None:
        """
        Visit tree top-down.
        
        Parameters:
        - tree: Tree to visit
        """
    
    def __default__(self, tree: Tree) -> None:
        """
        Default visit method when no specific method found.
        
        Parameters:
        - tree: Tree node being visited
        """

class Visitor_Recursive:
    """
    Recursive tree visitor, slightly faster than non-recursive.
    """

Tree Interpreter

Advanced visitor with explicit visit control for complex tree processing.

class Interpreter:
    """
    Tree interpreter with explicit visit control.
    """
    
    def visit(self, tree: Tree) -> Any:
        """
        Visit single node without visiting children.
        
        Parameters:
        - tree: Tree node to visit
        
        Returns:
        Any: Visit result
        """
    
    def visit_children(self, tree: Tree) -> list:
        """
        Visit all children of node.
        
        Parameters:
        - tree: Parent tree node
        
        Returns:
        list: Results from visiting children
        """
    
    def __default__(self, tree: Tree) -> Any:
        """
        Default behavior when no method found.
        
        Parameters:
        - tree: Tree node
        
        Returns:
        Any: Default result
        """

Visitor Decorators and Utilities

Decorators and functions for customizing visitor/transformer behavior.

def v_args(inline: bool = False, meta: bool = False, tree: bool = False, 
          wrapper: Callable = None):
    """
    Decorator to modify visitor/transformer method arguments.
    
    Parameters:
    - inline: Pass children as *args instead of list
    - meta: Include meta information as parameter
    - tree: Pass entire tree instead of just children
    - wrapper: Custom wrapper function
    
    Returns:
    Callable: Decorator function
    """

def inline_args(f):
    """
    Deprecated decorator. Use v_args(inline=True) instead.
    """

def visit_children_decor(func):
    """
    Decorator for interpreter methods to auto-visit children.
    
    Parameters:
    - func: Method to decorate
    
    Returns:
    Callable: Decorated method
    """

def merge_transformers(*transformers, **kwargs) -> Transformer:
    """
    Merge multiple transformers into combined namespaces.
    
    Parameters:
    - *transformers: Transformer instances to merge
    - **kwargs: Additional options
    
    Returns:
    Transformer: Merged transformer
    """

Ambiguity Handling

Transformer for processing ambiguous parse results.

class CollapseAmbiguities(Transformer):
    """
    Transforms ambiguous trees into lists of unambiguous alternatives.
    Converts _ambig nodes into lists containing all possible interpretations.
    """

Exception for Node Removal

Exception used to remove nodes during transformation.

class Discard(Exception):
    """
    When raised in transformer callback, discards node from parent.
    The node will not appear in the transformed result.
    """

Usage Examples

Basic Tree Processing

from lark import Lark, Tree

parser = Lark(grammar)
tree = parser.parse("2 + 3 * 4")

# Pretty print tree
print(tree.pretty())

# Find specific nodes
additions = list(tree.find_data('add'))
numbers = list(tree.find_data('number'))

# Iterate over all subtrees
for subtree in tree.iter_subtrees():
    print(f"Rule: {subtree.data}, Children: {len(subtree.children)}")

Creating a Transformer

from lark import Transformer, v_args

class Calculator(Transformer):
    """Transform parse tree into calculated result."""
    
    @v_args(inline=True)
    def add(self, left, right):
        return left + right
    
    @v_args(inline=True) 
    def mul(self, left, right):
        return left * right
    
    def number(self, children):
        return int(children[0])

# Apply transformer
calc = Calculator()
result = calc.transform(tree)
print(f"Result: {result}")

Transformer with Token Handling

from lark import Transformer, Token

class MyTransformer(Transformer):
    def __init__(self):
        super().__init__(visit_tokens=True)
    
    def IDENTIFIER(self, token):
        # Transform identifier tokens
        return token.value.upper()
    
    def __default_token__(self, token):
        # Default token handling
        return token.value

Visitor for Analysis

from lark import Visitor

class VariableCollector(Visitor):
    def __init__(self):
        self.variables = set()
    
    def identifier(self, tree):
        self.variables.add(tree.children[0].value)

# Collect variables
collector = VariableCollector()
collector.visit(tree)
print(f"Variables found: {collector.variables}")

Chaining Transformers

from lark import Transformer

class FirstTransform(Transformer):
    def rule1(self, children):
        return "first_" + str(children[0])

class SecondTransform(Transformer):  
    def rule1(self, children):
        return children[0] + "_second"

# Chain transformers
chained = FirstTransform() * SecondTransform()
result = chained.transform(tree)

Using Interpreter

from lark import Interpreter

class MyInterpreter(Interpreter):
    def expression(self, tree):
        # Manually control child visiting
        left = self.visit(tree.children[0])
        op = tree.children[1].value
        right = self.visit(tree.children[2])
        
        if op == '+':
            return left + right
        elif op == '*':
            return left * right
    
    def number(self, tree):
        return int(tree.children[0].value)

Handling Ambiguous Results

from lark import Lark, CollapseAmbiguities

# Parse with ambiguity="explicit"
parser = Lark(ambiguous_grammar, ambiguity="explicit")
tree = parser.parse(text)

# Collapse ambiguities into lists
collapse = CollapseAmbiguities()
alternatives = collapse.transform(tree)

# Process each alternative
for alt in alternatives:
    process_interpretation(alt)

Install with Tessl CLI

npx tessl i tessl/pypi-lark-parser

docs

core-parsing.md

exceptions.md

index.md

tokens-lexing.md

tree-processing.md

utilities.md

tile.json