A modern general-purpose parsing library for Python that can parse any context-free grammar efficiently
—
Main parsing functionality providing the primary interface for creating parsers, configuring parsing behavior, and parsing text according to grammar definitions.
The Lark class serves as the primary interface for the parsing library, coordinating grammar loading, lexer configuration, and parse tree generation.
class Lark:
def __init__(self, grammar: str, **options):
"""
Initialize parser with grammar and options.
Parameters:
- grammar: EBNF grammar string or file path
- **options: Configuration options (see LarkOptions)
"""
def parse(self, text: str, start: str = None, on_error=None) -> Tree:
"""
Parse text according to grammar.
Parameters:
- text: Input text to parse
- start: Starting rule (overrides grammar start)
- on_error: Error callback function
Returns:
Tree: Parse tree root
"""
def parse_interactive(self, text: str = None, start: str = None):
"""
Start interactive parsing session for error recovery.
Parameters:
- text: Input text (optional for incremental parsing)
- start: Starting rule
Returns:
InteractiveParser: Interactive parser instance
"""
def lex(self, text: str, dont_ignore: bool = False) -> Iterator[Token]:
"""
Tokenize text without parsing.
Parameters:
- text: Input text to tokenize
- dont_ignore: Include normally ignored tokens
Returns:
Iterator[Token]: Token stream
"""
def get_terminal(self, name: str):
"""
Get terminal definition by name.
Parameters:
- name: Terminal name
Returns:
Terminal definition
"""
def save(self, f):
"""
Save parser instance to file for caching.
Parameters:
- f: File object to write to
"""
@classmethod
def load(cls, f):
"""
Load parser instance from file.
Parameters:
- f: File object to read from
Returns:
Lark: Loaded parser instance
"""
@classmethod
def open(cls, grammar_filename: str, rel_to: str = None, **options):
"""
Create parser from grammar file.
Parameters:
- grammar_filename: Path to grammar file
- rel_to: Base path for relative imports
- **options: Parser options
Returns:
Lark: Parser instance
"""
@classmethod
def open_from_package(cls, package: str, grammar_path: str,
search_paths: Tuple[str, ...] = ("",), **options):
"""
Load grammar from Python package.
Parameters:
- package: Package name
- grammar_path: Path within package
- search_paths: Search paths for imports
- **options: Parser options
Returns:
Lark: Parser instance
"""
# Properties
source_path: Optional[str] # Grammar source file path
source_grammar: str # Original grammar string
grammar: Grammar # Compiled grammar object
options: LarkOptions # Parser configuration
terminals: List[TerminalDef] # Terminal definitions
rules: List[Rule] # Grammar rulesConfiguration options controlling parsing behavior, algorithm selection, and feature enablement.
class LarkOptions:
"""
Configuration options for Lark parser.
"""
# General Options
start: Union[str, List[str]] # Start symbol(s)
debug: bool # Enable debug output
transformer: Optional[Transformer] # Auto-apply transformer
propagate_positions: Union[bool, Callable] # Position propagation
maybe_placeholders: bool # [] operator behavior
cache: Union[bool, str] # Cache grammar analysis
regex: bool # Use regex module
g_regex_flags: int # Global regex flags
keep_all_tokens: bool # Keep punctuation tokens
tree_class: type # Custom tree class
# Algorithm Options
parser: str # "earley", "lalr", "cyk"
lexer: str # Lexer type
ambiguity: str # Ambiguity handling
# Lexer types:
# - "auto": Choose based on parser
# - "standard": Standard lexer
# - "contextual": Context-sensitive (LALR only)
# - "dynamic": Flexible (Earley only)
# - "dynamic_complete": All tokenization variants
# Ambiguity handling (Earley only):
# - "resolve": Automatic resolution
# - "explicit": Wrap in _ambig nodes
# - "forest": Return shared packed parse forest
# Domain Specific Options
postlex: Optional[PostLex] # Lexer post-processing
priority: str # Priority evaluation
lexer_callbacks: Dict[str, Callable] # Token callbacks
use_bytes: bool # Accept bytes input
edit_terminals: Optional[Callable] # Terminal editing callbackStep-by-step parsing with error recovery and incremental input processing.
class InteractiveParser:
"""
Interactive parser for step-by-step parsing and error recovery.
Provides advanced control over parsing and error handling with LALR.
"""
def feed_token(self, token: Token):
"""
Feed parser with a token and advance to next state.
Parameters:
- token: Token instance to process
Note: token must be an instance of Token class
"""
def exhaust_lexer(self) -> None:
"""
Feed remaining lexer state into interactive parser.
Modifies instance in place, does not feed '$END' token.
"""
def feed_eof(self, last_token: Token = None):
"""
Feed '$END' token to parser.
Parameters:
- last_token: Token to borrow position from (optional)
"""
def accepts(self) -> Set[str]:
"""
Get set of token types that will advance parser to valid state.
Returns:
Set[str]: Set of acceptable token type names
"""
def choices(self) -> Dict[str, Any]:
"""
Get dictionary of token types matched to parser actions.
Only returns token types accepted by current state.
Returns:
Dict[str, Any]: Token types and their actions
"""
def resume_parse(self):
"""
Resume automated parsing from current state.
Returns:
Parse result from current position
"""
def copy(self) -> 'InteractiveParser':
"""
Create new interactive parser with separate state.
Returns:
InteractiveParser: Independent copy
"""
def as_immutable(self) -> 'ImmutableInteractiveParser':
"""
Convert to immutable interactive parser.
Returns:
ImmutableInteractiveParser: Immutable version
"""
def pretty(self) -> str:
"""
Print parser choices in readable format.
Returns:
str: Formatted choices and stack information
"""
class ImmutableInteractiveParser(InteractiveParser):
"""
Immutable version of InteractiveParser.
Operations create new instances instead of modifying in-place.
"""
result: Any # Parse result when parsing completes
def feed_token(self, token: Token) -> 'ImmutableInteractiveParser':
"""
Feed token and return new parser instance with updated state.
Parameters:
- token: Token to process
Returns:
ImmutableInteractiveParser: New parser instance
"""
def exhaust_lexer(self) -> 'ImmutableInteractiveParser':
"""
Feed remaining lexer state and return new parser instance.
Returns:
ImmutableInteractiveParser: New parser instance
"""
def as_mutable(self) -> InteractiveParser:
"""
Convert to mutable InteractiveParser.
Returns:
InteractiveParser: Mutable version
"""Abstract base class for lexer post-processing, such as indentation handling.
class PostLex:
"""
Abstract base class for lexer post-processing.
"""
def process(self, stream: Iterator[Token]) -> Iterator[Token]:
"""
Process token stream after lexing.
Parameters:
- stream: Input token stream
Returns:
Iterator[Token]: Processed token stream
"""
always_accept: Tuple[str, ...] # Token types to always acceptFunctions and classes for loading and processing grammar definitions.
class FromPackageLoader:
"""
Loader for grammars stored in Python packages.
"""
def __init__(self, package_root: str = ""):
"""
Initialize package loader.
Parameters:
- package_root: Root package path
"""
def __call__(self, base_path: str, grammar_path: str) -> Tuple[str, str]:
"""
Load grammar from package.
Parameters:
- base_path: Base import path
- grammar_path: Grammar file path
Returns:
Tuple[str, str]: (grammar_text, full_path)
"""from lark import Lark
# Simple arithmetic grammar
grammar = """
?start: sum
?sum: product
| sum "+" product -> add
| sum "-" product -> sub
?product: atom
| product "*" atom -> mul
| product "/" atom -> div
?atom: NUMBER -> number
| "-" atom -> neg
| "(" sum ")"
%import common.NUMBER
%import common.WS_INLINE
%ignore WS_INLINE
"""
parser = Lark(grammar)
result = parser.parse("3 + 4 * 2")
print(result.pretty())from lark import Lark
# Configure parser with specific options
parser = Lark(
grammar,
parser='lalr', # Use LALR parser
lexer='standard', # Standard lexer
start='expression', # Custom start rule
debug=True, # Enable debug output
keep_all_tokens=True, # Keep all tokens
propagate_positions=True # Track positions
)from lark import Lark
# Load grammar from file
parser = Lark.open('my_grammar.lark', rel_to=__file__)
# Load from package
parser = Lark.open_from_package(
'my_package.grammars',
'grammar.lark',
search_paths=('common',)
)from lark import Lark
parser = Lark(grammar)
interactive = parser.parse_interactive()
# Feed tokens incrementally
for token in parser.lex("1 + 2"):
try:
interactive.feed_token(token)
except UnexpectedToken:
# Handle error, possibly recover
acceptable = interactive.accepts()
print(f"Expected one of: {acceptable}")from lark import Lark
# Cache to temporary file
parser = Lark(grammar, cache=True)
# Cache to specific file
parser = Lark(grammar, cache='my_grammar.cache')
# Manual save/load
parser.save(open('parser.cache', 'wb'))
cached_parser = Lark.load(open('parser.cache', 'rb'))Install with Tessl CLI
npx tessl i tessl/pypi-lark-parser