tessl/pypi-lark-parser

A modern general-purpose parsing library for Python that can parse any context-free grammar efficiently

—

Pending

Overview

Eval results

Files

Exception Handling

Name: tessl/pypi-lark-parser
Author: tessl

Comprehensive error handling including parse errors, lexical errors, grammar errors, and unexpected input handling with context information and error recovery capabilities.

Capabilities

Base Exception Classes

Foundation exception classes providing the error hierarchy for Lark parsing operations.

class LarkError(Exception):
    """
    Base class for all Lark-specific exceptions.
    """

class ParseError(LarkError):
    """
    Base class for all parsing-related errors.
    Raised when parsing fails due to invalid input structure.
    """

class LexError(LarkError):
    """
    Base class for all lexing-related errors.
    Raised when lexer cannot tokenize input text.
    """

Configuration and Grammar Errors

Errors related to parser setup and grammar definition problems.

class GrammarError(LarkError):
    """
    Raised when grammar definition contains errors.
    Indicates problems in grammar syntax, rule definitions, or terminal patterns.
    """

class ConfigurationError(LarkError):
    """
    Raised when invalid configuration options are provided.
    Indicates incompatible parser options or invalid parameter values.
    """

Unexpected Input Handling

Base class and specific implementations for handling unexpected input during parsing.

class UnexpectedInput(ParseError):
    """
    Base class for unexpected input exceptions.
    Provides context information and error recovery utilities.
    """
    
    def get_context(self, text: str, span: int = 40) -> str:
        """
        Get formatted error context showing position of error in input.
        
        Parameters:
        - text: Original input text
        - span: Number of characters to show around error
        
        Returns:
        str: Formatted context with error pointer
        """
    
    def match_examples(self, parse_fn: Callable, examples: Dict[str, str],
                      token_type_match_fallback: bool = False,
                      use_accepts: bool = False) -> str:
        """
        Match error against example error patterns.
        
        Parameters:
        - parse_fn: Function to parse examples
        - examples: Dict mapping example names to example text
        - token_type_match_fallback: Use token type matching as fallback
        - use_accepts: Consider acceptable tokens in matching
        
        Returns:
        str: Name of best matching example
        """
    
    # Attributes
    line: int                    # Line number where error occurred
    column: int                  # Column number where error occurred  
    pos_in_stream: int          # Position in token stream
    state: Any                  # Parser state at error
    _terminals_by_name: Dict    # Terminal definitions by name

End-of-File Errors

Errors when parser expects more input but reaches end of text.

class UnexpectedEOF(UnexpectedInput):
    """
    Raised when parser expected more tokens but input ended.
    Indicates incomplete input that could be valid with additional content.
    """
    
    def __init__(self, expected: List[str], state: Any = None, 
                 terminals_by_name: Dict = None):
        """
        Initialize unexpected EOF error.
        
        Parameters:
        - expected: List of expected token types
        - state: Parser state when EOF encountered
        - terminals_by_name: Terminal definitions
        """
    
    expected: List[str]         # Expected token types

Character-Level Lexing Errors

Errors when lexer cannot match input characters to any terminal pattern.

class UnexpectedCharacters(LexError, UnexpectedInput):
    """
    Raised when lexer cannot match input characters to terminals.
    Indicates characters that don't form valid tokens according to grammar.
    """
    
    def __init__(self, seq: str, lex_pos: int, line: int, column: int,
                 allowed: Set[str] = None, considered_rules: Set = None,
                 state: Any = None, token_history: List = None,
                 terminals_by_name: Dict = None):
        """
        Initialize unexpected characters error.
        
        Parameters:
        - seq: Input sequence containing error
        - lex_pos: Position in sequence where error occurred
        - line: Line number of error
        - column: Column number of error
        - allowed: Set of characters/patterns that were expected
        - considered_rules: Rules that were considered during lexing
        - state: Lexer state at error
        - token_history: Previous tokens
        - terminals_by_name: Terminal definitions
        """
    
    allowed: Set[str]           # Expected characters/patterns
    considered_rules: Set       # Rules considered during lexing
    token_history: List[Token]  # Previous tokens for context

Token-Level Parsing Errors

Errors when parser receives valid tokens in invalid combinations.

class UnexpectedToken(UnexpectedInput):
    """
    Raised when parser receives a token it didn't expect.
    The token is valid lexically but appears in wrong context syntactically.
    """
    
    def __init__(self, token: Token, expected: Set[str] = None, 
                 considered_rules: Set = None, state: Any = None,
                 interactive_parser = None, terminals_by_name: Dict = None,
                 token_history: List = None):
        """
        Initialize unexpected token error.
        
        Parameters:
        - token: The unexpected token
        - expected: Set of expected token types
        - considered_rules: Rules considered during parsing
        - state: Parser state at error
        - interactive_parser: Interactive parser instance (if available)
        - terminals_by_name: Terminal definitions
        - token_history: Previous tokens for context
        """
    
    token: Token                    # The unexpected token
    accepts: Set[str]              # Set of acceptable token types
    interactive_parser: Any        # Parser instance at failure point
    considered_rules: Set          # Rules considered during parsing
    token_history: List[Token]     # Previous tokens for context

Visitor and Transformer Errors

Errors that occur during tree processing operations.

class VisitError(LarkError):
    """
    Raised when visitors or transformers are interrupted by an exception.
    Wraps the original exception with context about where it occurred.
    """
    
    def __init__(self, rule: str, tree: Tree, orig_exc: Exception):
        """
        Initialize visit error.
        
        Parameters:
        - rule: Rule name where error occurred
        - tree: Tree node being processed when error occurred
        - orig_exc: Original exception that caused the error
        """
    
    rule: str           # Rule name where error occurred
    tree: Tree          # Tree node being processed
    orig_exc: Exception # Original exception

Tree Processing Control

Exception used for controlling tree transformation flow.

class Discard(Exception):
    """
    When raised in transformer callback, discards the node from parent tree.
    Used to remove nodes during transformation without causing errors.
    """

Usage Examples

Basic Error Handling

from lark import Lark, ParseError, LexError, UnexpectedToken

parser = Lark(grammar)

try:
    tree = parser.parse(text)
except ParseError as e:
    print(f"Parse error: {e}")
except LexError as e:
    print(f"Lex error: {e}")

Detailed Error Information

from lark import Lark, UnexpectedToken, UnexpectedCharacters

parser = Lark(grammar)

try:
    result = parser.parse("invalid input")
except UnexpectedToken as e:
    print(f"Unexpected token '{e.token.value}' of type {e.token.type}")
    print(f"Expected one of: {e.accepts}")
    print(f"At line {e.line}, column {e.column}")
    
    # Get context
    context = e.get_context(text)
    print(f"Context:\\n{context}")
    
except UnexpectedCharacters as e:
    print(f"Unexpected character at position {e.pos_in_stream}")
    print(f"At line {e.line}, column {e.column}")
    print(f"Expected one of: {e.allowed}")

Error Recovery with Interactive Parser

from lark import Lark, UnexpectedToken

parser = Lark(grammar, parser='lalr')  # Required for interactive parsing

try:
    result = parser.parse(text)
except UnexpectedToken as e:
    if e.interactive_parser:
        # Use interactive parser for recovery
        interactive = e.interactive_parser
        
        # See what tokens are acceptable
        acceptable = interactive.accepts()
        print(f"Acceptable tokens: {acceptable}")
        
        # Try to recover by feeding a valid token
        if 'SEMICOLON' in acceptable:
            from lark import Token
            recovery_token = Token('SEMICOLON', ';')
            interactive.feed_token(recovery_token)
            
            # Continue parsing
            try:
                result = interactive.resume_parse()
                print("Successfully recovered!")
            except Exception as recovery_error:
                print(f"Recovery failed: {recovery_error}")

Custom Error Messages with Examples

from lark import Lark, UnexpectedToken

# Define error examples for better error messages
error_examples = {
    "missing_semicolon": "x = 1",  # Missing semicolon
    "unclosed_paren": "f(x",       # Unclosed parenthesis
    "invalid_operator": "x + + y", # Double operator
}

def parse_with_examples(parser, text):
    try:
        return parser.parse(text)
    except UnexpectedToken as e:
        # Try to match against examples
        example_name = e.match_examples(
            parser.parse, 
            error_examples,
            use_accepts=True
        )
        
        if example_name:
            print(f"Error type: {example_name}")
            if example_name == "missing_semicolon":
                print("Hint: Add a semicolon at the end of the statement")
            elif example_name == "unclosed_paren":
                print("Hint: Check for unmatched parentheses")
        
        raise  # Re-raise the original exception

parser = Lark(grammar)
result = parse_with_examples(parser, "x = 1")  # Will trigger missing_semicolon

Handling Visitor Errors

from lark import Transformer, VisitError

class MyTransformer(Transformer):
    def some_rule(self, children):
        # This might raise an exception
        result = risky_operation(children[0])
        return result

transformer = MyTransformer()

try:
    result = transformer.transform(tree)
except VisitError as e:
    print(f"Error in rule '{e.rule}': {e.orig_exc}")
    print(f"Tree node: {e.tree}")
    
    # Handle specific original exception types
    if isinstance(e.orig_exc, ValueError):
        print("Value error during transformation")
    elif isinstance(e.orig_exc, KeyError):
        print("Key error during transformation")

Using Discard for Node Removal

from lark import Transformer, Discard

class FilterTransformer(Transformer):
    def comment(self, children):
        # Remove comment nodes from tree
        raise Discard()
    
    def empty_statement(self, children):
        # Remove empty statements
        if not children or all(c.strip() == '' for c in children):
            raise Discard()
        return children

# Apply transformer to remove unwanted nodes
filter_transformer = FilterTransformer()
cleaned_tree = filter_transformer.transform(original_tree)

Grammar Error Handling

from lark import Lark, GrammarError, ConfigurationError

try:
    # Invalid grammar syntax
    parser = Lark("""
        start: expr
        expr: NUMBER +  # Invalid rule syntax
    """)
except GrammarError as e:
    print(f"Grammar error: {e}")

try:
    # Invalid configuration
    parser = Lark(grammar, parser='invalid_parser')
except ConfigurationError as e:
    print(f"Configuration error: {e}")

Comprehensive Error Handling

from lark import (Lark, LarkError, ParseError, LexError, GrammarError,
                  ConfigurationError, UnexpectedInput, UnexpectedToken,
                  UnexpectedCharacters, UnexpectedEOF, VisitError)

def safe_parse(grammar_text, input_text):
    """Safely parse with comprehensive error handling."""
    
    try:
        # Create parser
        parser = Lark(grammar_text)
        
        # Parse input
        tree = parser.parse(input_text)
        
        return tree, None
        
    except GrammarError as e:
        return None, f"Grammar definition error: {e}"
        
    except ConfigurationError as e:
        return None, f"Parser configuration error: {e}"
        
    except UnexpectedEOF as e:
        return None, f"Unexpected end of input. Expected: {e.expected}"
        
    except UnexpectedCharacters as e:
        context = e.get_context(input_text)
        return None, f"Unexpected characters at line {e.line}:\\n{context}"
        
    except UnexpectedToken as e:
        context = e.get_context(input_text)
        return None, f"Unexpected token '{e.token.value}' at line {e.line}. Expected: {e.accepts}\\n{context}"
        
    except VisitError as e:
        return None, f"Error processing rule '{e.rule}': {e.orig_exc}"
        
    except ParseError as e:
        return None, f"Parse error: {e}"
        
    except LexError as e:
        return None, f"Lexical error: {e}"
        
    except LarkError as e:
        return None, f"Lark error: {e}"

# Usage
tree, error = safe_parse(my_grammar, my_input)
if error:
    print(f"Error: {error}")
else:
    print("Parsing successful!")
    print(tree.pretty())

Error Context Formatting

from lark import UnexpectedInput

def format_error_context(error: UnexpectedInput, text: str, span: int = 60):
    """Format error with enhanced context information."""
    
    context = error.get_context(text, span)
    
    # Add line numbers to context
    lines = context.split('\\n')
    formatted_lines = []
    
    for i, line in enumerate(lines):
        line_num = error.line + i - 1  # Adjust for context
        if '^' in line:  # Error pointer line
            formatted_lines.append(f"     {line}")
        else:
            formatted_lines.append(f"{line_num:4d}: {line}")
    
    return '\\n'.join(formatted_lines)

# Usage with any UnexpectedInput exception
try:
    parser.parse(text)
except UnexpectedInput as e:
    formatted_context = format_error_context(e, text)
    print(f"Parse error at line {e.line}, column {e.column}:")
    print(formatted_context)

Install with Tessl CLI