A modern general-purpose parsing library for Python that can parse any context-free grammar efficiently
—
Comprehensive error handling including parse errors, lexical errors, grammar errors, and unexpected input handling with context information and error recovery capabilities.
Foundation exception classes providing the error hierarchy for Lark parsing operations.
class LarkError(Exception):
"""
Base class for all Lark-specific exceptions.
"""
class ParseError(LarkError):
"""
Base class for all parsing-related errors.
Raised when parsing fails due to invalid input structure.
"""
class LexError(LarkError):
"""
Base class for all lexing-related errors.
Raised when lexer cannot tokenize input text.
"""Errors related to parser setup and grammar definition problems.
class GrammarError(LarkError):
"""
Raised when grammar definition contains errors.
Indicates problems in grammar syntax, rule definitions, or terminal patterns.
"""
class ConfigurationError(LarkError):
"""
Raised when invalid configuration options are provided.
Indicates incompatible parser options or invalid parameter values.
"""Base class and specific implementations for handling unexpected input during parsing.
class UnexpectedInput(ParseError):
"""
Base class for unexpected input exceptions.
Provides context information and error recovery utilities.
"""
def get_context(self, text: str, span: int = 40) -> str:
"""
Get formatted error context showing position of error in input.
Parameters:
- text: Original input text
- span: Number of characters to show around error
Returns:
str: Formatted context with error pointer
"""
def match_examples(self, parse_fn: Callable, examples: Dict[str, str],
token_type_match_fallback: bool = False,
use_accepts: bool = False) -> str:
"""
Match error against example error patterns.
Parameters:
- parse_fn: Function to parse examples
- examples: Dict mapping example names to example text
- token_type_match_fallback: Use token type matching as fallback
- use_accepts: Consider acceptable tokens in matching
Returns:
str: Name of best matching example
"""
# Attributes
line: int # Line number where error occurred
column: int # Column number where error occurred
pos_in_stream: int # Position in token stream
state: Any # Parser state at error
_terminals_by_name: Dict # Terminal definitions by nameErrors when parser expects more input but reaches end of text.
class UnexpectedEOF(UnexpectedInput):
"""
Raised when parser expected more tokens but input ended.
Indicates incomplete input that could be valid with additional content.
"""
def __init__(self, expected: List[str], state: Any = None,
terminals_by_name: Dict = None):
"""
Initialize unexpected EOF error.
Parameters:
- expected: List of expected token types
- state: Parser state when EOF encountered
- terminals_by_name: Terminal definitions
"""
expected: List[str] # Expected token typesErrors when lexer cannot match input characters to any terminal pattern.
class UnexpectedCharacters(LexError, UnexpectedInput):
"""
Raised when lexer cannot match input characters to terminals.
Indicates characters that don't form valid tokens according to grammar.
"""
def __init__(self, seq: str, lex_pos: int, line: int, column: int,
allowed: Set[str] = None, considered_rules: Set = None,
state: Any = None, token_history: List = None,
terminals_by_name: Dict = None):
"""
Initialize unexpected characters error.
Parameters:
- seq: Input sequence containing error
- lex_pos: Position in sequence where error occurred
- line: Line number of error
- column: Column number of error
- allowed: Set of characters/patterns that were expected
- considered_rules: Rules that were considered during lexing
- state: Lexer state at error
- token_history: Previous tokens
- terminals_by_name: Terminal definitions
"""
allowed: Set[str] # Expected characters/patterns
considered_rules: Set # Rules considered during lexing
token_history: List[Token] # Previous tokens for contextErrors when parser receives valid tokens in invalid combinations.
class UnexpectedToken(UnexpectedInput):
"""
Raised when parser receives a token it didn't expect.
The token is valid lexically but appears in wrong context syntactically.
"""
def __init__(self, token: Token, expected: Set[str] = None,
considered_rules: Set = None, state: Any = None,
interactive_parser = None, terminals_by_name: Dict = None,
token_history: List = None):
"""
Initialize unexpected token error.
Parameters:
- token: The unexpected token
- expected: Set of expected token types
- considered_rules: Rules considered during parsing
- state: Parser state at error
- interactive_parser: Interactive parser instance (if available)
- terminals_by_name: Terminal definitions
- token_history: Previous tokens for context
"""
token: Token # The unexpected token
accepts: Set[str] # Set of acceptable token types
interactive_parser: Any # Parser instance at failure point
considered_rules: Set # Rules considered during parsing
token_history: List[Token] # Previous tokens for contextErrors that occur during tree processing operations.
class VisitError(LarkError):
"""
Raised when visitors or transformers are interrupted by an exception.
Wraps the original exception with context about where it occurred.
"""
def __init__(self, rule: str, tree: Tree, orig_exc: Exception):
"""
Initialize visit error.
Parameters:
- rule: Rule name where error occurred
- tree: Tree node being processed when error occurred
- orig_exc: Original exception that caused the error
"""
rule: str # Rule name where error occurred
tree: Tree # Tree node being processed
orig_exc: Exception # Original exceptionException used for controlling tree transformation flow.
class Discard(Exception):
"""
When raised in transformer callback, discards the node from parent tree.
Used to remove nodes during transformation without causing errors.
"""from lark import Lark, ParseError, LexError, UnexpectedToken
parser = Lark(grammar)
try:
tree = parser.parse(text)
except ParseError as e:
print(f"Parse error: {e}")
except LexError as e:
print(f"Lex error: {e}")from lark import Lark, UnexpectedToken, UnexpectedCharacters
parser = Lark(grammar)
try:
result = parser.parse("invalid input")
except UnexpectedToken as e:
print(f"Unexpected token '{e.token.value}' of type {e.token.type}")
print(f"Expected one of: {e.accepts}")
print(f"At line {e.line}, column {e.column}")
# Get context
context = e.get_context(text)
print(f"Context:\\n{context}")
except UnexpectedCharacters as e:
print(f"Unexpected character at position {e.pos_in_stream}")
print(f"At line {e.line}, column {e.column}")
print(f"Expected one of: {e.allowed}")from lark import Lark, UnexpectedToken
parser = Lark(grammar, parser='lalr') # Required for interactive parsing
try:
result = parser.parse(text)
except UnexpectedToken as e:
if e.interactive_parser:
# Use interactive parser for recovery
interactive = e.interactive_parser
# See what tokens are acceptable
acceptable = interactive.accepts()
print(f"Acceptable tokens: {acceptable}")
# Try to recover by feeding a valid token
if 'SEMICOLON' in acceptable:
from lark import Token
recovery_token = Token('SEMICOLON', ';')
interactive.feed_token(recovery_token)
# Continue parsing
try:
result = interactive.resume_parse()
print("Successfully recovered!")
except Exception as recovery_error:
print(f"Recovery failed: {recovery_error}")from lark import Lark, UnexpectedToken
# Define error examples for better error messages
error_examples = {
"missing_semicolon": "x = 1", # Missing semicolon
"unclosed_paren": "f(x", # Unclosed parenthesis
"invalid_operator": "x + + y", # Double operator
}
def parse_with_examples(parser, text):
try:
return parser.parse(text)
except UnexpectedToken as e:
# Try to match against examples
example_name = e.match_examples(
parser.parse,
error_examples,
use_accepts=True
)
if example_name:
print(f"Error type: {example_name}")
if example_name == "missing_semicolon":
print("Hint: Add a semicolon at the end of the statement")
elif example_name == "unclosed_paren":
print("Hint: Check for unmatched parentheses")
raise # Re-raise the original exception
parser = Lark(grammar)
result = parse_with_examples(parser, "x = 1") # Will trigger missing_semicolonfrom lark import Transformer, VisitError
class MyTransformer(Transformer):
def some_rule(self, children):
# This might raise an exception
result = risky_operation(children[0])
return result
transformer = MyTransformer()
try:
result = transformer.transform(tree)
except VisitError as e:
print(f"Error in rule '{e.rule}': {e.orig_exc}")
print(f"Tree node: {e.tree}")
# Handle specific original exception types
if isinstance(e.orig_exc, ValueError):
print("Value error during transformation")
elif isinstance(e.orig_exc, KeyError):
print("Key error during transformation")from lark import Transformer, Discard
class FilterTransformer(Transformer):
def comment(self, children):
# Remove comment nodes from tree
raise Discard()
def empty_statement(self, children):
# Remove empty statements
if not children or all(c.strip() == '' for c in children):
raise Discard()
return children
# Apply transformer to remove unwanted nodes
filter_transformer = FilterTransformer()
cleaned_tree = filter_transformer.transform(original_tree)from lark import Lark, GrammarError, ConfigurationError
try:
# Invalid grammar syntax
parser = Lark("""
start: expr
expr: NUMBER + # Invalid rule syntax
""")
except GrammarError as e:
print(f"Grammar error: {e}")
try:
# Invalid configuration
parser = Lark(grammar, parser='invalid_parser')
except ConfigurationError as e:
print(f"Configuration error: {e}")from lark import (Lark, LarkError, ParseError, LexError, GrammarError,
ConfigurationError, UnexpectedInput, UnexpectedToken,
UnexpectedCharacters, UnexpectedEOF, VisitError)
def safe_parse(grammar_text, input_text):
"""Safely parse with comprehensive error handling."""
try:
# Create parser
parser = Lark(grammar_text)
# Parse input
tree = parser.parse(input_text)
return tree, None
except GrammarError as e:
return None, f"Grammar definition error: {e}"
except ConfigurationError as e:
return None, f"Parser configuration error: {e}"
except UnexpectedEOF as e:
return None, f"Unexpected end of input. Expected: {e.expected}"
except UnexpectedCharacters as e:
context = e.get_context(input_text)
return None, f"Unexpected characters at line {e.line}:\\n{context}"
except UnexpectedToken as e:
context = e.get_context(input_text)
return None, f"Unexpected token '{e.token.value}' at line {e.line}. Expected: {e.accepts}\\n{context}"
except VisitError as e:
return None, f"Error processing rule '{e.rule}': {e.orig_exc}"
except ParseError as e:
return None, f"Parse error: {e}"
except LexError as e:
return None, f"Lexical error: {e}"
except LarkError as e:
return None, f"Lark error: {e}"
# Usage
tree, error = safe_parse(my_grammar, my_input)
if error:
print(f"Error: {error}")
else:
print("Parsing successful!")
print(tree.pretty())from lark import UnexpectedInput
def format_error_context(error: UnexpectedInput, text: str, span: int = 60):
"""Format error with enhanced context information."""
context = error.get_context(text, span)
# Add line numbers to context
lines = context.split('\\n')
formatted_lines = []
for i, line in enumerate(lines):
line_num = error.line + i - 1 # Adjust for context
if '^' in line: # Error pointer line
formatted_lines.append(f" {line}")
else:
formatted_lines.append(f"{line_num:4d}: {line}")
return '\\n'.join(formatted_lines)
# Usage with any UnexpectedInput exception
try:
parser.parse(text)
except UnexpectedInput as e:
formatted_context = format_error_context(e, text)
print(f"Parse error at line {e.line}, column {e.column}:")
print(formatted_context)Install with Tessl CLI
npx tessl i tessl/pypi-lark-parser