CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-parso

A Python parser that supports error recovery and round-trip parsing for different Python versions

Pending
Overview
Eval results
Files

grammar-system.mddocs/

Grammar System

The grammar system in parso provides fine-grained control over Python parsing, including version-specific grammars, error detection, code refactoring, and caching mechanisms. This is the foundation layer that powers the high-level parsing functions.

Capabilities

Grammar Classes

Core grammar classes that handle the parsing logic and provide access to advanced parsing features.

class Grammar:
    """
    Generic grammar class for parsing languages.
    
    Attributes:
        version_info (PythonVersionInfo): Python version information
    """
    
    def parse(self, code=None, *, error_recovery=True, path=None, start_symbol=None, 
              cache=False, diff_cache=False, cache_path=None, file_io=None):
        """
        Parse code using this grammar.
        
        Args:
            code (str | bytes, optional): Source code to parse
            error_recovery (bool): Enable error recovery (default: True)
            path (str | Path, optional): File path for caching
            start_symbol (str, optional): Grammar start symbol (default: 'file_input')
            cache (bool): Enable pickle caching (default: False)
            diff_cache (bool): Enable differential caching (default: False)
            cache_path (str | Path, optional): Custom cache directory
            file_io (FileIO, optional): File I/O handler
            
        Returns:
            NodeOrLeaf: Parsed syntax tree (typically Module)
            
        Raises:
            TypeError: If neither code nor path provided
            NotImplementedError: If error_recovery used with non-default start_symbol
            ParserSyntaxError: If parsing fails and error_recovery is False
        """
    
    def iter_errors(self, node):
        """
        Find syntax and semantic errors in a parsed tree.
        
        Args:
            node (NodeOrLeaf): Root node to check for errors
            
        Yields:
            Issue: Error objects with position and message information
            
        Raises:
            ValueError: If no error normalizer configured for this grammar
        """
    
    def refactor(self, base_node, node_to_str_map):
        """
        Refactor code by replacing nodes with new strings.
        
        Args:
            base_node (NodeOrLeaf): Root node to refactor
            node_to_str_map (dict): Mapping of nodes to replacement strings
            
        Returns:
            str: Refactored code
        """
class PythonGrammar(Grammar):
    """
    Python-specific grammar implementation with tokenization and error detection.
    
    Attributes:
        version_info (PythonVersionInfo): Python version for this grammar
    """
    
    def __init__(self, version_info, bnf_text):
        """
        Initialize Python grammar.
        
        Args:
            version_info (PythonVersionInfo): Python version information
            bnf_text (str): BNF grammar definition
        """

Usage Examples

import parso

# Load and use grammar directly
grammar = parso.load_grammar(version="3.9")

# Parse with advanced options
module = grammar.parse(
    'def example(): return 42',
    error_recovery=True,
    cache=True,
    diff_cache=True
)

# Parse from file with custom start symbol
# Note: start_symbol only works with error_recovery=False
try:
    expr = grammar.parse(
        '1 + 2 * 3',
        error_recovery=False,
        start_symbol='expr'
    )
except NotImplementedError:
    # start_symbol requires error_recovery=False
    expr = grammar.parse('1 + 2 * 3', error_recovery=False, start_symbol='expr')

# Check version information
print(f"Grammar version: {grammar.version_info.major}.{grammar.version_info.minor}")

Error Detection

Advanced error detection and analysis capabilities for finding syntax and semantic issues.

def iter_errors(self, node):
    """
    Generator yielding error objects for syntax and semantic issues.
    
    Args:
        node (NodeOrLeaf): Parsed tree to analyze
        
    Yields:
        Issue: Error objects with message, code, and position information
    """

Usage Examples

import parso

grammar = parso.load_grammar()

# Parse code with multiple errors
code = '''
def function(:  # Missing parameter name
    x = 1 +     # Incomplete expression
    return x

continue        # Continue outside loop
'''

module = grammar.parse(code)
errors = list(grammar.iter_errors(module))

for error in errors:
    print(f"Line {error.start_pos[0]}: {error.message}")
    print(f"Error code: {error.code}")
    print(f"At position: {error.start_pos}")

# Handle specific error types
syntax_errors = [e for e in errors if 'SyntaxError' in e.message]
semantic_errors = [e for e in errors if 'continue' in e.message.lower()]

Code Refactoring

Refactor parsed code by replacing specific nodes with new content while preserving formatting.

def refactor(self, base_node, node_to_str_map):
    """
    Apply refactoring transformations to code.
    
    Args:
        base_node (NodeOrLeaf): Root node containing code to refactor
        node_to_str_map (dict): Mapping from nodes to replacement strings
        
    Returns:
        str: Refactored source code with replacements applied
    """

Usage Examples

import parso

grammar = parso.load_grammar()
module = grammar.parse('''
def old_function_name():
    old_variable = 42
    return old_variable
''')

# Find nodes to replace
function_node = module.children[0]  # Function definition
func_name = function_node.name      # Function name
suite = function_node.get_suite()

# Find variable nodes within the function
old_var_nodes = []
for name_node in module.get_used_names()['old_variable']:
    if name_node.get_definition():  # Only definition, not usage
        old_var_nodes.append(name_node)

# Create refactoring map
refactor_map = {
    func_name: 'new_function_name',
}

# Apply refactoring
refactored_code = grammar.refactor(module, refactor_map)
print(refactored_code)

Grammar Options and Configuration

Advanced parsing options for specific use cases and performance tuning.

Cache Configuration

import parso
from pathlib import Path

grammar = parso.load_grammar()

# Custom cache directory
custom_cache = Path.home() / '.my_parso_cache'
module = grammar.parse(
    path='script.py',
    cache=True,
    cache_path=custom_cache
)

# Differential caching for incremental parsing
module = grammar.parse(
    path='large_file.py',
    cache=True,
    diff_cache=True  # Only re-parse changed sections
)

Start Symbol Parsing

Parse specific grammar constructs instead of full modules:

import parso

grammar = parso.load_grammar()

# Parse just an expression (requires error_recovery=False)
expr = grammar.parse('x + y * z', error_recovery=False, start_symbol='expr')
print(type(expr).__name__)  # Should be expression node type

# Parse a statement
stmt = grammar.parse('x = 42', error_recovery=False, start_symbol='stmt')

# Parse function definition
func = grammar.parse(
    'def example(a, b=None): return a + b',
    error_recovery=False,
    start_symbol='funcdef'
)

Error Recovery vs Strict Parsing

Understanding when to use error recovery and when to require valid syntax.

Error Recovery Mode (Default)

import parso

grammar = parso.load_grammar()

# Error recovery allows parsing of broken code
broken_code = '''
def function_with_syntax_error(:
    pass

class MissingColon
    pass

for item in   # Missing iterable
    print(item)
'''

# This succeeds and returns a tree with error nodes
module = grammar.parse(broken_code, error_recovery=True)
print(f"Parsed {len(module.children)} top-level items")

# Check for errors
errors = list(grammar.iter_errors(module))
print(f"Found {len(errors)} errors")

Strict Parsing Mode

import parso

grammar = parso.load_grammar()

# Strict mode raises exceptions on syntax errors
try:
    module = grammar.parse('def invalid(: pass', error_recovery=False)
except parso.ParserSyntaxError as e:
    print(f"Parse failed: {e.message}")
    print(f"Error at: {e.error_leaf.start_pos}")
    
# Use strict mode for validation
def validate_python_code(code):
    """Check if Python code is syntactically valid."""
    try:
        grammar = parso.load_grammar()
        grammar.parse(code, error_recovery=False)
        return True, None
    except parso.ParserSyntaxError as e:
        return False, str(e)

is_valid, error_msg = validate_python_code('def hello(): return "world"')
print(f"Valid: {is_valid}")  # True

is_valid, error_msg = validate_python_code('def broken(: pass')
print(f"Valid: {is_valid}, Error: {error_msg}")  # False, error message

Version-Specific Grammar Features

Working with different Python versions and their specific grammar features.

import parso

# Python 3.8 - walrus operator and positional-only parameters
grammar38 = parso.load_grammar(version="3.8")
module = grammar38.parse('''
def func(pos_only, /, normal, *, kw_only):
    if (result := expensive_operation()) is not None:
        return result
''')

# Python 3.10 - match statements and union types
grammar310 = parso.load_grammar(version="3.10")
module = grammar310.parse('''
def process(value: int | str) -> str:
    match value:
        case int() if value > 0:
            return "positive integer"
        case str() if value:
            return "non-empty string"
        case _:
            return "other"
''')

# Version compatibility checking
def parse_with_fallback(code, preferred_version="3.10"):
    """Parse code, falling back to older versions if needed."""
    versions = ["3.10", "3.9", "3.8", "3.7", "3.6"]
    start_idx = versions.index(preferred_version) if preferred_version in versions else 0
    
    for version in versions[start_idx:]:
        try:
            grammar = parso.load_grammar(version=version)
            return grammar.parse(code, error_recovery=False), version
        except (parso.ParserSyntaxError, NotImplementedError):
            continue
    
    # Fall back to error recovery mode with latest version
    grammar = parso.load_grammar(version="3.10")
    return grammar.parse(code, error_recovery=True), "3.10-recovery"

# Usage
result, version_used = parse_with_fallback('match x: case 1: pass')
print(f"Parsed with Python {version_used}")

Advanced Integration Patterns

Grammar Caching and Reuse

import parso

class ParserManager:
    """Manage multiple grammars efficiently."""
    
    def __init__(self):
        self._grammars = {}
    
    def get_grammar(self, version="3.9"):
        """Get cached grammar instance."""
        if version not in self._grammars:
            self._grammars[version] = parso.load_grammar(version=version)
        return self._grammars[version]
    
    def parse_file(self, path, version="3.9", **kwargs):
        """Parse file with cached grammar."""
        grammar = self.get_grammar(version)
        return grammar.parse(path=path, **kwargs)

# Usage
manager = ParserManager()
module1 = manager.parse_file("file1.py", cache=True)
module2 = manager.parse_file("file2.py", cache=True)  # Reuses grammar

Custom Error Handling

import parso

def detailed_error_analysis(code, version="3.9"):
    """Comprehensive error analysis with categorization."""
    grammar = parso.load_grammar(version=version)
    module = grammar.parse(code)
    errors = list(grammar.iter_errors(module))
    
    categorized = {
        'syntax': [],
        'indentation': [],
        'semantic': []
    }
    
    for error in errors:
        message = error.message.lower()
        if 'indentation' in message or 'indent' in message:
            categorized['indentation'].append(error)
        elif 'syntax' in message:
            categorized['syntax'].append(error)
        else:
            categorized['semantic'].append(error)
    
    return categorized, module

# Usage
errors, tree = detailed_error_analysis('''
def function():
pass  # Wrong indentation
continue  # Semantic error
def invalid(: pass  # Syntax error
''')

for category, error_list in errors.items():
    if error_list:
        print(f"{category.title()} errors: {len(error_list)}")
        for error in error_list:
            print(f"  Line {error.start_pos[0]}: {error.message}")

Install with Tessl CLI

npx tessl i tessl/pypi-parso

docs

core-parsing.md

error-handling.md

grammar-system.md

index.md

python-elements.md

tokenization.md

tree-navigation.md

utilities.md

tile.json