A modern general-purpose parsing library for Python that can parse any context-free grammar efficiently
—
Additional utilities including AST generation helpers, tree reconstruction, standalone parser generation, serialization, visualization tools, and various helper functions.
Classes for programmatically building and manipulating grammar definitions.
class Symbol:
"""
Base class for grammar symbols.
"""
def __init__(self, name: str):
"""
Initialize symbol.
Parameters:
- name: Symbol name
"""
name: str
is_term: bool
class Terminal(Symbol):
"""
Terminal symbol in grammar definitions.
"""
def __init__(self, name: str, filter_out: bool = False):
"""
Initialize terminal symbol.
Parameters:
- name: Terminal name
- filter_out: Whether to filter out this terminal from parse trees
"""
filter_out: bool
is_term = True
class NonTerminal(Symbol):
"""
Non-terminal symbol in grammar definitions.
"""
is_term = False
class Rule:
"""
Grammar rule definition containing origin, expansion, and options.
"""
def __init__(self, origin: NonTerminal, expansion: List[Symbol],
order: int = 0, alias: str = None, options: 'RuleOptions' = None):
"""
Initialize grammar rule.
Parameters:
- origin: Non-terminal that this rule defines
- expansion: List of symbols that make up the rule
- order: Rule priority order
- alias: Alternative name for the rule
- options: Rule configuration options
"""
origin: NonTerminal
expansion: List[Symbol]
alias: str
order: int
options: 'RuleOptions'
class RuleOptions:
"""
Configuration options for grammar rules.
"""
def __init__(self, keep_all_tokens: bool = False, expand1: bool = False,
priority: int = None, template_source: str = None,
empty_indices: Tuple = ()):
"""
Initialize rule options.
Parameters:
- keep_all_tokens: Preserve all tokens in parse tree
- expand1: Expand single-child rules
- priority: Rule priority for disambiguation
- template_source: Template source information
- empty_indices: Indices of empty rule positions
"""
keep_all_tokens: bool
expand1: bool
priority: int
template_source: str
empty_indices: TupleConfiguration objects for lexer and parser behavior.
class LexerConf:
"""
Lexer configuration containing terminals and options.
"""
def __init__(self, terminals: List['TerminalDef'], re_module,
ignore: Tuple = (), postlex=None, callbacks: Dict = None,
g_regex_flags: int = 0, skip_validation: bool = False,
use_bytes: bool = False):
"""
Initialize lexer configuration.
Parameters:
- terminals: List of terminal definitions
- re_module: Regular expression module (re or regex)
- ignore: Terminals to ignore in parsing
- postlex: Post-lexing processor
- callbacks: Lexer callback functions
- g_regex_flags: Global regex flags
- skip_validation: Skip terminal validation
- use_bytes: Process bytes instead of strings
"""
terminals: List['TerminalDef']
terminals_by_name: Dict[str, 'TerminalDef']
ignore: Tuple
postlex: 'PostLex'
callbacks: Dict
g_regex_flags: int
re_module: Any
skip_validation: bool
use_bytes: bool
class ParserConf:
"""
Parser configuration containing rules and start symbols.
"""
def __init__(self, rules: List[Rule], callbacks: Dict, start: List[str]):
"""
Initialize parser configuration.
Parameters:
- rules: Grammar rules
- callbacks: Parser callback functions
- start: Start symbol(s)
"""
rules: List[Rule]
callbacks: Dict
start: List[str]Helper classes and functions for creating custom Abstract Syntax Tree (AST) classes from parse trees.
class Ast:
"""
Abstract base class for custom AST node classes.
Provides foundation for creating domain-specific AST representations.
"""
@classmethod
def from_lark_tree(cls, tree: Tree) -> 'Ast':
"""
Create AST instance from Lark parse tree.
Parameters:
- tree: Lark Tree instance
Returns:
Ast: AST node instance
"""
class AsList(Ast):
"""
AST node that stores parse results as a single list.
Useful for collecting multiple items into a flat structure.
"""
def create_transformer(ast_module, transformer: Transformer = None) -> Transformer:
"""
Create transformer from module containing AST classes.
Automatically maps grammar rules to AST classes based on naming.
Parameters:
- ast_module: Module containing AST class definitions
- transformer: Base transformer class (optional)
Returns:
Transformer: Configured transformer for AST generation
"""
def camel_to_snake(name: str) -> str:
"""
Convert CamelCase names to snake_case.
Parameters:
- name: CamelCase string
Returns:
str: snake_case version
"""
def inline(f):
"""
Decorator to mark AST classes as inline.
Indicates that the AST class should receive children as separate arguments.
Parameters:
- f: AST class to mark as inline
Returns:
Callable: Decorated class
"""Classes for reconstructing original text from parse trees, useful for pretty-printing and code generation.
class Reconstructor:
"""
Reconstructs text from parse trees by writing tokens in order.
"""
def __init__(self, parser: Lark, term_subs: Dict[str, Callable] = None):
"""
Initialize reconstructor.
Parameters:
- parser: Lark parser instance used to create trees
- term_subs: Terminal substitution functions
"""
def reconstruct(self, tree: Tree, postproc: Callable = None,
insert_spaces: bool = True) -> str:
"""
Reconstruct text from parse tree.
Parameters:
- tree: Parse tree to reconstruct
- postproc: Post-processing function for final text
- insert_spaces: Whether to insert spaces between tokens
Returns:
str: Reconstructed text
"""
class WriteTokensTransformer(Transformer):
"""
Transformer that reconstructs text by writing tokens.
Used internally by Reconstructor for token-level reconstruction.
"""
def __init__(self, tokens: Dict[str, str], term_subs: Dict[str, Callable]):
"""
Initialize token writer.
Parameters:
- tokens: Mapping of token types to values
- term_subs: Terminal substitution functions
"""Tools for generating standalone parsers that don't require the Lark library at runtime.
def gen_standalone(lark_instance: Lark, out=None, compress: bool = False) -> str:
"""
Generate standalone parser code from Lark instance.
Creates self-contained Python code that can parse without Lark dependency.
Only works with LALR parser mode.
Parameters:
- lark_instance: Lark parser instance to convert (must use parser='lalr')
- out: Output file object (optional)
- compress: Whether to compress the generated code
Returns:
str: Generated standalone parser code
Example:
>>> parser = Lark(grammar, parser='lalr')
>>> standalone_code = gen_standalone(parser)
>>> with open('standalone_parser.py', 'w') as f:
... f.write(standalone_code)
"""
def build_lalr(grammar_text: str, **options) -> Lark:
"""
Build LALR parser from command-line style arguments.
Parameters:
- grammar_text: Grammar definition string
- **options: Parser configuration options
Returns:
Lark: Configured LALR parser instance
"""
def make_warnings_comments():
"""
Configure warnings to appear as comments in generated output.
Useful for command-line tools that generate code.
"""Functions for saving and loading parser instances to avoid repeated grammar compilation.
def serialize(lark_instance: Lark, f) -> None:
"""
Serialize Lark parser instance to file for caching.
Parameters:
- lark_instance: Lark parser to serialize
- f: File object to write serialized data
"""Functions for creating visual representations of parse trees using graphing libraries.
def pydot__tree_to_png(tree: Tree, filename: str, rankdir: str = "LR", **kwargs) -> None:
"""
Create PNG image of parse tree using pydot.
Parameters:
- tree: Parse tree to visualize
- filename: Output PNG filename
- rankdir: Graph direction ("LR", "TB", etc.)
- **kwargs: Additional pydot options
"""
def pydot__tree_to_dot(tree: Tree, filename: str, rankdir: str = "LR", **kwargs) -> None:
"""
Create DOT file representation of parse tree.
Parameters:
- tree: Parse tree to convert
- filename: Output DOT filename
- rankdir: Graph direction
- **kwargs: Additional pydot options
"""
def pydot__tree_to_graph(tree: Tree, rankdir: str = "LR", **kwargs):
"""
Create pydot graph object from parse tree.
Parameters:
- tree: Parse tree to convert
- rankdir: Graph direction
- **kwargs: Additional pydot options
Returns:
pydot.Dot: Graph object
"""Utilities for building command-line interfaces and processing grammar files.
def build_lalr(grammar_text: str, **options) -> Lark:
"""
Build LALR parser from command-line arguments.
Parameters:
- grammar_text: Grammar definition
- **options: Parser configuration options
Returns:
Lark: Configured LALR parser
"""
def make_warnings_comments() -> None:
"""
Configure warnings to appear as comments in generated output.
Useful for command-line tools that generate code.
"""Logging utilities for debugging and development.
logger: logging.Logger
"""
Lark's logging instance for debug output and development information.
Use logger.setLevel() to control verbosity.
"""Various helper classes and functions used internally by Lark components.
class Serialize:
"""
Mixin class providing serialization capabilities.
"""
def serialize(self, memo: Dict = None) -> Any:
"""
Serialize object to transferable format.
Parameters:
- memo: Memoization dictionary for circular references
Returns:
Any: Serialized representation
"""
class SerializeMemoizer:
"""
Helper for memoizing object serialization.
"""
def __init__(self):
self.memo = {}
def serialize(self, obj: Any) -> Any:
"""
Serialize object with memoization.
Parameters:
- obj: Object to serialize
Returns:
Any: Serialized object
"""Cross-platform file system operation helpers.
class FS:
"""
File system utilities for cross-platform operations.
"""
@staticmethod
def open(filename: str, mode: str = 'r', **kwargs):
"""
Open file with proper encoding handling.
Parameters:
- filename: File path
- mode: File open mode
- **kwargs: Additional open() arguments
Returns:
File object
"""
@staticmethod
def exists(path: str) -> bool:
"""
Check if path exists.
Parameters:
- path: File or directory path
Returns:
bool: True if path exists
"""Helper functions for string processing and type checking.
def isascii(s: str) -> bool:
"""
Check if string contains only ASCII characters.
Parameters:
- s: String to check
Returns:
bool: True if string is ASCII-only
"""
def is_id_continue(c: str) -> bool:
"""
Check if character can continue a Unicode identifier.
Parameters:
- c: Character to check
Returns:
bool: True if character can continue identifier
"""
def is_id_start(c: str) -> bool:
"""
Check if character can start a Unicode identifier.
Parameters:
- c: Character to check
Returns:
bool: True if character can start identifier
"""
def combine_alternatives(lists: List[List[Any]]) -> List[Any]:
"""
Combine alternative rule definitions.
Parameters:
- lists: List of alternative rule lists
Returns:
List[Any]: Combined alternatives
"""
def classify(seq: Sequence[Any], key: Callable = None, value: Callable = None) -> Dict:
"""
Classify sequence elements into dictionary by key function.
Parameters:
- seq: Sequence to classify
- key: Function to extract keys
- value: Function to extract values
Returns:
Dict: Classified elements
"""
def get_regexp_width(regexp: str) -> Tuple[int, int]:
"""
Analyze regular expression to determine min/max match width.
Parameters:
- regexp: Regular expression string
Returns:
Tuple[int, int]: (min_width, max_width)
"""
STRING_TYPE: type # String type for version compatibility
"""Type object representing string type across Python versions."""
ABC: type # Abstract base class type
"""Abstract base class type for creating abstract classes."""
def abstractmethod(func: Callable) -> Callable:
"""
Decorator marking method as abstract.
Parameters:
- func: Method to mark as abstract
Returns:
Callable: Decorated method
"""Advanced decorator utilities for flexible function modification.
def smart_decorator(decorator: Callable, **decorator_kwargs) -> Callable:
"""
Create smart decorator that can handle various function signatures.
Parameters:
- decorator: Base decorator function
- **decorator_kwargs: Default decorator arguments
Returns:
Callable: Smart decorator function
"""
def combine_alternatives(*alternatives) -> Callable:
"""
Combine multiple alternative implementations into single function.
Parameters:
- *alternatives: Alternative function implementations
Returns:
Callable: Combined function
"""from lark import Lark, Tree
from lark.ast_utils import Ast, create_transformer, inline
# Define AST classes
class Expression(Ast):
pass
class BinaryOp(Expression):
def __init__(self, left, op, right):
self.left = left
self.op = op
self.right = right
@inline
class Number(Expression):
def __init__(self, value):
self.value = int(value)
# Create module with AST classes
import sys
ast_module = sys.modules[__name__]
# Generate transformer
transformer = create_transformer(ast_module)
# Use with parser
parser = Lark(grammar, transformer=transformer)
ast = parser.parse("2 + 3 * 4")
print(f"AST root type: {type(ast)}")
print(f"Left operand: {ast.left}")from lark import Lark
from lark.reconstruct import Reconstructor
# Parse text
parser = Lark(grammar)
tree = parser.parse("x = 42 + y")
# Reconstruct original text
reconstructor = Reconstructor(parser)
reconstructed = reconstructor.reconstruct(tree)
print(f"Reconstructed: {reconstructed}")
# Reconstruct with custom formatting
def format_postproc(text):
return text.replace('+', ' + ').replace('=', ' = ')
formatted = reconstructor.reconstruct(tree, postproc=format_postproc)
print(f"Formatted: {formatted}")from lark import Lark
from lark.tools.standalone import gen_standalone
# Create parser
parser = Lark(grammar, parser='lalr') # Only LALR supports standalone
# Generate standalone code
standalone_code = gen_standalone(parser)
# Save to file
with open('my_parser.py', 'w') as f:
f.write(standalone_code)
# The generated file can be used without Lark:
# from my_parser import Lark_StandAlone
# parser = Lark_StandAlone()
# result = parser.parse(text)from lark import Lark
from lark.tools.serialize import serialize
import pickle
# Create parser
parser = Lark(grammar)
# Serialize parser
with open('parser.cache', 'wb') as f:
serialize(parser, f)
# Load serialized parser
with open('parser.cache', 'rb') as f:
cached_parser = pickle.load(f)
# Use cached parser
result = cached_parser.parse(text)from lark import Lark
from lark.tree import pydot__tree_to_png
# Parse text
parser = Lark(grammar)
tree = parser.parse("complex expression")
# Create PNG visualization
pydot__tree_to_png(tree, 'parse_tree.png', rankdir='TB')
# Create DOT file
from lark.tree import pydot__tree_to_dot
pydot__tree_to_dot(tree, 'parse_tree.dot')from lark.tools import build_lalr, make_warnings_comments
import argparse
def main():
parser = argparse.ArgumentParser(description='Grammar processor')
parser.add_argument('grammar_file', help='Grammar file path')
parser.add_argument('input_file', help='Input file to parse')
parser.add_argument('--debug', action='store_true')
args = parser.parse_args()
# Configure warnings as comments
make_warnings_comments()
# Read grammar
with open(args.grammar_file) as f:
grammar = f.read()
# Build parser
lark_parser = build_lalr(grammar, debug=args.debug)
# Parse input
with open(args.input_file) as f:
text = f.read()
result = lark_parser.parse(text)
print(result.pretty())
if __name__ == '__main__':
main()from lark.utils import logger
import logging
# Configure Lark logging
logger.setLevel(logging.DEBUG)
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
))
logger.addHandler(handler)
# Now Lark will output debug information
parser = Lark(grammar, debug=True)
tree = parser.parse(text) # Will show debug outputfrom lark import Lark, Transformer
from lark.ast_utils import camel_to_snake
class AstGenerator(Transformer):
"""Generate AST nodes with converted names."""
def __init__(self, ast_classes):
super().__init__()
self.ast_classes = ast_classes
def __default__(self, data, children, meta):
# Convert rule name to class name
class_name = data.title().replace('_', '')
if class_name in self.ast_classes:
ast_class = self.ast_classes[class_name]
return ast_class(*children)
# Fallback to generic AST node
return super().__default__(data, children, meta)
# Define AST classes
class Expression:
pass
class BinaryExpr(Expression):
def __init__(self, left, op, right):
self.left = left
self.op = op
self.right = right
ast_classes = {
'BinaryExpr': BinaryExpr,
'Expression': Expression
}
# Use custom AST generator
transformer = AstGenerator(ast_classes)
parser = Lark(grammar, transformer=transformer)from lark.utils import FS
import os
# Cross-platform file operations
grammar_file = 'grammar.lark'
if FS.exists(grammar_file):
with FS.open(grammar_file, 'r', encoding='utf-8') as f:
grammar = f.read()
parser = Lark(grammar)
else:
print(f"Grammar file {grammar_file} not found")from lark.utils import smart_decorator
def timing_decorator(func, log_time=True):
"""Decorator that measures function execution time."""
import time
def wrapper(*args, **kwargs):
start = time.time()
result = func(*args, **kwargs)
end = time.time()
if log_time:
print(f"{func.__name__} took {end - start:.4f} seconds")
return result
return wrapper
# Create smart timing decorator
timed = smart_decorator(timing_decorator, log_time=True)
# Use with functions
@timed
def parse_large_file(filename):
parser = Lark(grammar)
with open(filename) as f:
return parser.parse(f.read())
# Function will automatically log execution time
result = parse_large_file('large_input.txt')Install with Tessl CLI
npx tessl i tessl/pypi-lark-parser