CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-lark-parser

A modern general-purpose parsing library for Python that can parse any context-free grammar efficiently

Pending
Overview
Eval results
Files

utilities.mddocs/

Utilities and Tools

Additional utilities including AST generation helpers, tree reconstruction, standalone parser generation, serialization, visualization tools, and various helper functions.

Capabilities

Grammar Building Components

Classes for programmatically building and manipulating grammar definitions.

class Symbol:
    """
    Base class for grammar symbols.
    """
    
    def __init__(self, name: str):
        """
        Initialize symbol.
        
        Parameters:
        - name: Symbol name
        """
    
    name: str
    is_term: bool

class Terminal(Symbol):
    """
    Terminal symbol in grammar definitions.
    """
    
    def __init__(self, name: str, filter_out: bool = False):
        """
        Initialize terminal symbol.
        
        Parameters:
        - name: Terminal name
        - filter_out: Whether to filter out this terminal from parse trees
        """
    
    filter_out: bool
    is_term = True

class NonTerminal(Symbol):
    """
    Non-terminal symbol in grammar definitions.
    """
    
    is_term = False

class Rule:
    """
    Grammar rule definition containing origin, expansion, and options.
    """
    
    def __init__(self, origin: NonTerminal, expansion: List[Symbol], 
                 order: int = 0, alias: str = None, options: 'RuleOptions' = None):
        """
        Initialize grammar rule.
        
        Parameters:
        - origin: Non-terminal that this rule defines
        - expansion: List of symbols that make up the rule
        - order: Rule priority order
        - alias: Alternative name for the rule
        - options: Rule configuration options
        """
    
    origin: NonTerminal
    expansion: List[Symbol]
    alias: str
    order: int
    options: 'RuleOptions'

class RuleOptions:
    """
    Configuration options for grammar rules.
    """
    
    def __init__(self, keep_all_tokens: bool = False, expand1: bool = False,
                 priority: int = None, template_source: str = None,
                 empty_indices: Tuple = ()):
        """
        Initialize rule options.
        
        Parameters:
        - keep_all_tokens: Preserve all tokens in parse tree
        - expand1: Expand single-child rules
        - priority: Rule priority for disambiguation
        - template_source: Template source information
        - empty_indices: Indices of empty rule positions
        """
    
    keep_all_tokens: bool
    expand1: bool
    priority: int
    template_source: str
    empty_indices: Tuple

Configuration Classes

Configuration objects for lexer and parser behavior.

class LexerConf:
    """
    Lexer configuration containing terminals and options.
    """
    
    def __init__(self, terminals: List['TerminalDef'], re_module, 
                 ignore: Tuple = (), postlex=None, callbacks: Dict = None,
                 g_regex_flags: int = 0, skip_validation: bool = False,
                 use_bytes: bool = False):
        """
        Initialize lexer configuration.
        
        Parameters:
        - terminals: List of terminal definitions
        - re_module: Regular expression module (re or regex)
        - ignore: Terminals to ignore in parsing
        - postlex: Post-lexing processor
        - callbacks: Lexer callback functions
        - g_regex_flags: Global regex flags
        - skip_validation: Skip terminal validation
        - use_bytes: Process bytes instead of strings
        """
    
    terminals: List['TerminalDef']
    terminals_by_name: Dict[str, 'TerminalDef']
    ignore: Tuple
    postlex: 'PostLex'
    callbacks: Dict
    g_regex_flags: int
    re_module: Any
    skip_validation: bool
    use_bytes: bool

class ParserConf:
    """
    Parser configuration containing rules and start symbols.
    """
    
    def __init__(self, rules: List[Rule], callbacks: Dict, start: List[str]):
        """
        Initialize parser configuration.
        
        Parameters:
        - rules: Grammar rules
        - callbacks: Parser callback functions
        - start: Start symbol(s)
        """
    
    rules: List[Rule]
    callbacks: Dict
    start: List[str]

AST Generation Utilities

Helper classes and functions for creating custom Abstract Syntax Tree (AST) classes from parse trees.

class Ast:
    """
    Abstract base class for custom AST node classes.
    Provides foundation for creating domain-specific AST representations.
    """
    
    @classmethod
    def from_lark_tree(cls, tree: Tree) -> 'Ast':
        """
        Create AST instance from Lark parse tree.
        
        Parameters:
        - tree: Lark Tree instance
        
        Returns:
        Ast: AST node instance
        """

class AsList(Ast):
    """
    AST node that stores parse results as a single list.
    Useful for collecting multiple items into a flat structure.
    """

def create_transformer(ast_module, transformer: Transformer = None) -> Transformer:
    """
    Create transformer from module containing AST classes.
    Automatically maps grammar rules to AST classes based on naming.
    
    Parameters:
    - ast_module: Module containing AST class definitions
    - transformer: Base transformer class (optional)
    
    Returns:
    Transformer: Configured transformer for AST generation
    """

def camel_to_snake(name: str) -> str:
    """
    Convert CamelCase names to snake_case.
    
    Parameters:
    - name: CamelCase string
    
    Returns:
    str: snake_case version
    """

def inline(f):
    """
    Decorator to mark AST classes as inline.
    Indicates that the AST class should receive children as separate arguments.
    
    Parameters:
    - f: AST class to mark as inline
    
    Returns:
    Callable: Decorated class
    """

Text Reconstruction

Classes for reconstructing original text from parse trees, useful for pretty-printing and code generation.

class Reconstructor:
    """
    Reconstructs text from parse trees by writing tokens in order.
    """
    
    def __init__(self, parser: Lark, term_subs: Dict[str, Callable] = None):
        """
        Initialize reconstructor.
        
        Parameters:
        - parser: Lark parser instance used to create trees
        - term_subs: Terminal substitution functions
        """
    
    def reconstruct(self, tree: Tree, postproc: Callable = None, 
                   insert_spaces: bool = True) -> str:
        """
        Reconstruct text from parse tree.
        
        Parameters:
        - tree: Parse tree to reconstruct
        - postproc: Post-processing function for final text
        - insert_spaces: Whether to insert spaces between tokens
        
        Returns:
        str: Reconstructed text
        """

class WriteTokensTransformer(Transformer):
    """
    Transformer that reconstructs text by writing tokens.
    Used internally by Reconstructor for token-level reconstruction.
    """
    
    def __init__(self, tokens: Dict[str, str], term_subs: Dict[str, Callable]):
        """
        Initialize token writer.
        
        Parameters:
        - tokens: Mapping of token types to values
        - term_subs: Terminal substitution functions
        """

Standalone Parser Generation

Tools for generating standalone parsers that don't require the Lark library at runtime.

def gen_standalone(lark_instance: Lark, out=None, compress: bool = False) -> str:
    """
    Generate standalone parser code from Lark instance.
    Creates self-contained Python code that can parse without Lark dependency.
    Only works with LALR parser mode.
    
    Parameters:
    - lark_instance: Lark parser instance to convert (must use parser='lalr')
    - out: Output file object (optional)
    - compress: Whether to compress the generated code
    
    Returns:
    str: Generated standalone parser code
    
    Example:
    >>> parser = Lark(grammar, parser='lalr')
    >>> standalone_code = gen_standalone(parser)
    >>> with open('standalone_parser.py', 'w') as f:
    ...     f.write(standalone_code)
    """

def build_lalr(grammar_text: str, **options) -> Lark:
    """
    Build LALR parser from command-line style arguments.
    
    Parameters:
    - grammar_text: Grammar definition string
    - **options: Parser configuration options
    
    Returns:
    Lark: Configured LALR parser instance
    """

def make_warnings_comments():
    """
    Configure warnings to appear as comments in generated output.
    Useful for command-line tools that generate code.
    """

Parser Serialization

Functions for saving and loading parser instances to avoid repeated grammar compilation.

def serialize(lark_instance: Lark, f) -> None:
    """
    Serialize Lark parser instance to file for caching.
    
    Parameters:
    - lark_instance: Lark parser to serialize
    - f: File object to write serialized data
    """

Tree Visualization

Functions for creating visual representations of parse trees using graphing libraries.

def pydot__tree_to_png(tree: Tree, filename: str, rankdir: str = "LR", **kwargs) -> None:
    """
    Create PNG image of parse tree using pydot.
    
    Parameters:
    - tree: Parse tree to visualize
    - filename: Output PNG filename
    - rankdir: Graph direction ("LR", "TB", etc.)
    - **kwargs: Additional pydot options
    """

def pydot__tree_to_dot(tree: Tree, filename: str, rankdir: str = "LR", **kwargs) -> None:
    """
    Create DOT file representation of parse tree.
    
    Parameters:
    - tree: Parse tree to convert
    - filename: Output DOT filename
    - rankdir: Graph direction
    - **kwargs: Additional pydot options
    """

def pydot__tree_to_graph(tree: Tree, rankdir: str = "LR", **kwargs):
    """
    Create pydot graph object from parse tree.
    
    Parameters:
    - tree: Parse tree to convert
    - rankdir: Graph direction
    - **kwargs: Additional pydot options
    
    Returns:
    pydot.Dot: Graph object
    """

Command-Line Tools

Utilities for building command-line interfaces and processing grammar files.

def build_lalr(grammar_text: str, **options) -> Lark:
    """
    Build LALR parser from command-line arguments.
    
    Parameters:
    - grammar_text: Grammar definition
    - **options: Parser configuration options
    
    Returns:
    Lark: Configured LALR parser
    """

def make_warnings_comments() -> None:
    """
    Configure warnings to appear as comments in generated output.
    Useful for command-line tools that generate code.
    """

Logger Configuration

Logging utilities for debugging and development.

logger: logging.Logger
"""
Lark's logging instance for debug output and development information.
Use logger.setLevel() to control verbosity.
"""

Internal Utilities

Various helper classes and functions used internally by Lark components.

class Serialize:
    """
    Mixin class providing serialization capabilities.
    """
    
    def serialize(self, memo: Dict = None) -> Any:
        """
        Serialize object to transferable format.
        
        Parameters:
        - memo: Memoization dictionary for circular references
        
        Returns:
        Any: Serialized representation
        """

class SerializeMemoizer:
    """
    Helper for memoizing object serialization.
    """
    
    def __init__(self):
        self.memo = {}
    
    def serialize(self, obj: Any) -> Any:
        """
        Serialize object with memoization.
        
        Parameters:
        - obj: Object to serialize
        
        Returns:
        Any: Serialized object
        """

File System Utilities

Cross-platform file system operation helpers.

class FS:
    """
    File system utilities for cross-platform operations.
    """
    
    @staticmethod
    def open(filename: str, mode: str = 'r', **kwargs):
        """
        Open file with proper encoding handling.
        
        Parameters:
        - filename: File path
        - mode: File open mode
        - **kwargs: Additional open() arguments
        
        Returns:
        File object
        """
    
    @staticmethod
    def exists(path: str) -> bool:
        """
        Check if path exists.
        
        Parameters:
        - path: File or directory path
        
        Returns:
        bool: True if path exists
        """

String and Type Utilities

Helper functions for string processing and type checking.

def isascii(s: str) -> bool:
    """
    Check if string contains only ASCII characters.
    
    Parameters:
    - s: String to check
    
    Returns:
    bool: True if string is ASCII-only
    """

def is_id_continue(c: str) -> bool:
    """
    Check if character can continue a Unicode identifier.
    
    Parameters:
    - c: Character to check
    
    Returns:
    bool: True if character can continue identifier
    """

def is_id_start(c: str) -> bool:
    """
    Check if character can start a Unicode identifier.
    
    Parameters:
    - c: Character to check
    
    Returns:
    bool: True if character can start identifier
    """

def combine_alternatives(lists: List[List[Any]]) -> List[Any]:
    """
    Combine alternative rule definitions.
    
    Parameters:
    - lists: List of alternative rule lists
    
    Returns:
    List[Any]: Combined alternatives
    """

def classify(seq: Sequence[Any], key: Callable = None, value: Callable = None) -> Dict:
    """
    Classify sequence elements into dictionary by key function.
    
    Parameters:
    - seq: Sequence to classify
    - key: Function to extract keys
    - value: Function to extract values
    
    Returns:
    Dict: Classified elements
    """

def get_regexp_width(regexp: str) -> Tuple[int, int]:
    """
    Analyze regular expression to determine min/max match width.
    
    Parameters:
    - regexp: Regular expression string
    
    Returns:
    Tuple[int, int]: (min_width, max_width)
    """

STRING_TYPE: type  # String type for version compatibility
"""Type object representing string type across Python versions."""

ABC: type  # Abstract base class type
"""Abstract base class type for creating abstract classes."""

def abstractmethod(func: Callable) -> Callable:
    """
    Decorator marking method as abstract.
    
    Parameters:
    - func: Method to mark as abstract
    
    Returns:
    Callable: Decorated method
    """

Smart Decorators

Advanced decorator utilities for flexible function modification.

def smart_decorator(decorator: Callable, **decorator_kwargs) -> Callable:
    """
    Create smart decorator that can handle various function signatures.
    
    Parameters:
    - decorator: Base decorator function
    - **decorator_kwargs: Default decorator arguments
    
    Returns:
    Callable: Smart decorator function
    """

def combine_alternatives(*alternatives) -> Callable:
    """
    Combine multiple alternative implementations into single function.
    
    Parameters:
    - *alternatives: Alternative function implementations
    
    Returns:
    Callable: Combined function
    """

Usage Examples

Creating Custom AST Classes

from lark import Lark, Tree
from lark.ast_utils import Ast, create_transformer, inline

# Define AST classes
class Expression(Ast):
    pass

class BinaryOp(Expression):
    def __init__(self, left, op, right):
        self.left = left
        self.op = op
        self.right = right

@inline
class Number(Expression):
    def __init__(self, value):
        self.value = int(value)

# Create module with AST classes
import sys
ast_module = sys.modules[__name__]

# Generate transformer
transformer = create_transformer(ast_module)

# Use with parser
parser = Lark(grammar, transformer=transformer)
ast = parser.parse("2 + 3 * 4")

print(f"AST root type: {type(ast)}")
print(f"Left operand: {ast.left}")

Text Reconstruction

from lark import Lark
from lark.reconstruct import Reconstructor

# Parse text
parser = Lark(grammar)
tree = parser.parse("x = 42 + y")

# Reconstruct original text
reconstructor = Reconstructor(parser)
reconstructed = reconstructor.reconstruct(tree)
print(f"Reconstructed: {reconstructed}")

# Reconstruct with custom formatting
def format_postproc(text):
    return text.replace('+', ' + ').replace('=', ' = ')

formatted = reconstructor.reconstruct(tree, postproc=format_postproc)
print(f"Formatted: {formatted}")

Generating Standalone Parser

from lark import Lark
from lark.tools.standalone import gen_standalone

# Create parser
parser = Lark(grammar, parser='lalr')  # Only LALR supports standalone

# Generate standalone code
standalone_code = gen_standalone(parser)

# Save to file
with open('my_parser.py', 'w') as f:
    f.write(standalone_code)

# The generated file can be used without Lark:
# from my_parser import Lark_StandAlone
# parser = Lark_StandAlone()
# result = parser.parse(text)

Parser Serialization and Caching

from lark import Lark
from lark.tools.serialize import serialize
import pickle

# Create parser
parser = Lark(grammar)

# Serialize parser
with open('parser.cache', 'wb') as f:
    serialize(parser, f)

# Load serialized parser
with open('parser.cache', 'rb') as f:
    cached_parser = pickle.load(f)

# Use cached parser
result = cached_parser.parse(text)

Tree Visualization

from lark import Lark
from lark.tree import pydot__tree_to_png

# Parse text
parser = Lark(grammar)
tree = parser.parse("complex expression")

# Create PNG visualization
pydot__tree_to_png(tree, 'parse_tree.png', rankdir='TB')

# Create DOT file
from lark.tree import pydot__tree_to_dot
pydot__tree_to_dot(tree, 'parse_tree.dot')

Command-Line Tool Integration

from lark.tools import build_lalr, make_warnings_comments
import argparse

def main():
    parser = argparse.ArgumentParser(description='Grammar processor')
    parser.add_argument('grammar_file', help='Grammar file path')
    parser.add_argument('input_file', help='Input file to parse')
    parser.add_argument('--debug', action='store_true')
    
    args = parser.parse_args()
    
    # Configure warnings as comments
    make_warnings_comments()
    
    # Read grammar
    with open(args.grammar_file) as f:
        grammar = f.read()
    
    # Build parser
    lark_parser = build_lalr(grammar, debug=args.debug)
    
    # Parse input
    with open(args.input_file) as f:
        text = f.read()
    
    result = lark_parser.parse(text)
    print(result.pretty())

if __name__ == '__main__':
    main()

Custom Logger Configuration

from lark.utils import logger
import logging

# Configure Lark logging
logger.setLevel(logging.DEBUG)
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter(
    '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
))
logger.addHandler(handler)

# Now Lark will output debug information
parser = Lark(grammar, debug=True)
tree = parser.parse(text)  # Will show debug output

Advanced AST Transformation

from lark import Lark, Transformer
from lark.ast_utils import camel_to_snake

class AstGenerator(Transformer):
    """Generate AST nodes with converted names."""
    
    def __init__(self, ast_classes):
        super().__init__()
        self.ast_classes = ast_classes
    
    def __default__(self, data, children, meta):
        # Convert rule name to class name
        class_name = data.title().replace('_', '')
        
        if class_name in self.ast_classes:
            ast_class = self.ast_classes[class_name]
            return ast_class(*children)
        
        # Fallback to generic AST node
        return super().__default__(data, children, meta)

# Define AST classes
class Expression:
    pass

class BinaryExpr(Expression):
    def __init__(self, left, op, right):
        self.left = left
        self.op = op  
        self.right = right

ast_classes = {
    'BinaryExpr': BinaryExpr,
    'Expression': Expression
}

# Use custom AST generator
transformer = AstGenerator(ast_classes)
parser = Lark(grammar, transformer=transformer)

File System Operations

from lark.utils import FS
import os

# Cross-platform file operations
grammar_file = 'grammar.lark'

if FS.exists(grammar_file):
    with FS.open(grammar_file, 'r', encoding='utf-8') as f:
        grammar = f.read()
    
    parser = Lark(grammar)
else:
    print(f"Grammar file {grammar_file} not found")

Smart Decorator Usage

from lark.utils import smart_decorator

def timing_decorator(func, log_time=True):
    """Decorator that measures function execution time."""
    import time
    
    def wrapper(*args, **kwargs):
        start = time.time()
        result = func(*args, **kwargs)
        end = time.time()
        
        if log_time:
            print(f"{func.__name__} took {end - start:.4f} seconds")
        
        return result
    
    return wrapper

# Create smart timing decorator
timed = smart_decorator(timing_decorator, log_time=True)

# Use with functions
@timed
def parse_large_file(filename):
    parser = Lark(grammar)
    with open(filename) as f:
        return parser.parse(f.read())

# Function will automatically log execution time
result = parse_large_file('large_input.txt')

Install with Tessl CLI

npx tessl i tessl/pypi-lark-parser

docs

core-parsing.md

exceptions.md

index.md

tokens-lexing.md

tree-processing.md

utilities.md

tile.json