A Python parser that supports error recovery and round-trip parsing for different Python versions
—
The grammar system in parso provides fine-grained control over Python parsing, including version-specific grammars, error detection, code refactoring, and caching mechanisms. This is the foundation layer that powers the high-level parsing functions.
Core grammar classes that handle the parsing logic and provide access to advanced parsing features.
class Grammar:
"""
Generic grammar class for parsing languages.
Attributes:
version_info (PythonVersionInfo): Python version information
"""
def parse(self, code=None, *, error_recovery=True, path=None, start_symbol=None,
cache=False, diff_cache=False, cache_path=None, file_io=None):
"""
Parse code using this grammar.
Args:
code (str | bytes, optional): Source code to parse
error_recovery (bool): Enable error recovery (default: True)
path (str | Path, optional): File path for caching
start_symbol (str, optional): Grammar start symbol (default: 'file_input')
cache (bool): Enable pickle caching (default: False)
diff_cache (bool): Enable differential caching (default: False)
cache_path (str | Path, optional): Custom cache directory
file_io (FileIO, optional): File I/O handler
Returns:
NodeOrLeaf: Parsed syntax tree (typically Module)
Raises:
TypeError: If neither code nor path provided
NotImplementedError: If error_recovery used with non-default start_symbol
ParserSyntaxError: If parsing fails and error_recovery is False
"""
def iter_errors(self, node):
"""
Find syntax and semantic errors in a parsed tree.
Args:
node (NodeOrLeaf): Root node to check for errors
Yields:
Issue: Error objects with position and message information
Raises:
ValueError: If no error normalizer configured for this grammar
"""
def refactor(self, base_node, node_to_str_map):
"""
Refactor code by replacing nodes with new strings.
Args:
base_node (NodeOrLeaf): Root node to refactor
node_to_str_map (dict): Mapping of nodes to replacement strings
Returns:
str: Refactored code
"""class PythonGrammar(Grammar):
"""
Python-specific grammar implementation with tokenization and error detection.
Attributes:
version_info (PythonVersionInfo): Python version for this grammar
"""
def __init__(self, version_info, bnf_text):
"""
Initialize Python grammar.
Args:
version_info (PythonVersionInfo): Python version information
bnf_text (str): BNF grammar definition
"""import parso
# Load and use grammar directly
grammar = parso.load_grammar(version="3.9")
# Parse with advanced options
module = grammar.parse(
'def example(): return 42',
error_recovery=True,
cache=True,
diff_cache=True
)
# Parse from file with custom start symbol
# Note: start_symbol only works with error_recovery=False
try:
expr = grammar.parse(
'1 + 2 * 3',
error_recovery=False,
start_symbol='expr'
)
except NotImplementedError:
# start_symbol requires error_recovery=False
expr = grammar.parse('1 + 2 * 3', error_recovery=False, start_symbol='expr')
# Check version information
print(f"Grammar version: {grammar.version_info.major}.{grammar.version_info.minor}")Advanced error detection and analysis capabilities for finding syntax and semantic issues.
def iter_errors(self, node):
"""
Generator yielding error objects for syntax and semantic issues.
Args:
node (NodeOrLeaf): Parsed tree to analyze
Yields:
Issue: Error objects with message, code, and position information
"""import parso
grammar = parso.load_grammar()
# Parse code with multiple errors
code = '''
def function(: # Missing parameter name
x = 1 + # Incomplete expression
return x
continue # Continue outside loop
'''
module = grammar.parse(code)
errors = list(grammar.iter_errors(module))
for error in errors:
print(f"Line {error.start_pos[0]}: {error.message}")
print(f"Error code: {error.code}")
print(f"At position: {error.start_pos}")
# Handle specific error types
syntax_errors = [e for e in errors if 'SyntaxError' in e.message]
semantic_errors = [e for e in errors if 'continue' in e.message.lower()]Refactor parsed code by replacing specific nodes with new content while preserving formatting.
def refactor(self, base_node, node_to_str_map):
"""
Apply refactoring transformations to code.
Args:
base_node (NodeOrLeaf): Root node containing code to refactor
node_to_str_map (dict): Mapping from nodes to replacement strings
Returns:
str: Refactored source code with replacements applied
"""import parso
grammar = parso.load_grammar()
module = grammar.parse('''
def old_function_name():
old_variable = 42
return old_variable
''')
# Find nodes to replace
function_node = module.children[0] # Function definition
func_name = function_node.name # Function name
suite = function_node.get_suite()
# Find variable nodes within the function
old_var_nodes = []
for name_node in module.get_used_names()['old_variable']:
if name_node.get_definition(): # Only definition, not usage
old_var_nodes.append(name_node)
# Create refactoring map
refactor_map = {
func_name: 'new_function_name',
}
# Apply refactoring
refactored_code = grammar.refactor(module, refactor_map)
print(refactored_code)Advanced parsing options for specific use cases and performance tuning.
import parso
from pathlib import Path
grammar = parso.load_grammar()
# Custom cache directory
custom_cache = Path.home() / '.my_parso_cache'
module = grammar.parse(
path='script.py',
cache=True,
cache_path=custom_cache
)
# Differential caching for incremental parsing
module = grammar.parse(
path='large_file.py',
cache=True,
diff_cache=True # Only re-parse changed sections
)Parse specific grammar constructs instead of full modules:
import parso
grammar = parso.load_grammar()
# Parse just an expression (requires error_recovery=False)
expr = grammar.parse('x + y * z', error_recovery=False, start_symbol='expr')
print(type(expr).__name__) # Should be expression node type
# Parse a statement
stmt = grammar.parse('x = 42', error_recovery=False, start_symbol='stmt')
# Parse function definition
func = grammar.parse(
'def example(a, b=None): return a + b',
error_recovery=False,
start_symbol='funcdef'
)Understanding when to use error recovery and when to require valid syntax.
import parso
grammar = parso.load_grammar()
# Error recovery allows parsing of broken code
broken_code = '''
def function_with_syntax_error(:
pass
class MissingColon
pass
for item in # Missing iterable
print(item)
'''
# This succeeds and returns a tree with error nodes
module = grammar.parse(broken_code, error_recovery=True)
print(f"Parsed {len(module.children)} top-level items")
# Check for errors
errors = list(grammar.iter_errors(module))
print(f"Found {len(errors)} errors")import parso
grammar = parso.load_grammar()
# Strict mode raises exceptions on syntax errors
try:
module = grammar.parse('def invalid(: pass', error_recovery=False)
except parso.ParserSyntaxError as e:
print(f"Parse failed: {e.message}")
print(f"Error at: {e.error_leaf.start_pos}")
# Use strict mode for validation
def validate_python_code(code):
"""Check if Python code is syntactically valid."""
try:
grammar = parso.load_grammar()
grammar.parse(code, error_recovery=False)
return True, None
except parso.ParserSyntaxError as e:
return False, str(e)
is_valid, error_msg = validate_python_code('def hello(): return "world"')
print(f"Valid: {is_valid}") # True
is_valid, error_msg = validate_python_code('def broken(: pass')
print(f"Valid: {is_valid}, Error: {error_msg}") # False, error messageWorking with different Python versions and their specific grammar features.
import parso
# Python 3.8 - walrus operator and positional-only parameters
grammar38 = parso.load_grammar(version="3.8")
module = grammar38.parse('''
def func(pos_only, /, normal, *, kw_only):
if (result := expensive_operation()) is not None:
return result
''')
# Python 3.10 - match statements and union types
grammar310 = parso.load_grammar(version="3.10")
module = grammar310.parse('''
def process(value: int | str) -> str:
match value:
case int() if value > 0:
return "positive integer"
case str() if value:
return "non-empty string"
case _:
return "other"
''')
# Version compatibility checking
def parse_with_fallback(code, preferred_version="3.10"):
"""Parse code, falling back to older versions if needed."""
versions = ["3.10", "3.9", "3.8", "3.7", "3.6"]
start_idx = versions.index(preferred_version) if preferred_version in versions else 0
for version in versions[start_idx:]:
try:
grammar = parso.load_grammar(version=version)
return grammar.parse(code, error_recovery=False), version
except (parso.ParserSyntaxError, NotImplementedError):
continue
# Fall back to error recovery mode with latest version
grammar = parso.load_grammar(version="3.10")
return grammar.parse(code, error_recovery=True), "3.10-recovery"
# Usage
result, version_used = parse_with_fallback('match x: case 1: pass')
print(f"Parsed with Python {version_used}")import parso
class ParserManager:
"""Manage multiple grammars efficiently."""
def __init__(self):
self._grammars = {}
def get_grammar(self, version="3.9"):
"""Get cached grammar instance."""
if version not in self._grammars:
self._grammars[version] = parso.load_grammar(version=version)
return self._grammars[version]
def parse_file(self, path, version="3.9", **kwargs):
"""Parse file with cached grammar."""
grammar = self.get_grammar(version)
return grammar.parse(path=path, **kwargs)
# Usage
manager = ParserManager()
module1 = manager.parse_file("file1.py", cache=True)
module2 = manager.parse_file("file2.py", cache=True) # Reuses grammarimport parso
def detailed_error_analysis(code, version="3.9"):
"""Comprehensive error analysis with categorization."""
grammar = parso.load_grammar(version=version)
module = grammar.parse(code)
errors = list(grammar.iter_errors(module))
categorized = {
'syntax': [],
'indentation': [],
'semantic': []
}
for error in errors:
message = error.message.lower()
if 'indentation' in message or 'indent' in message:
categorized['indentation'].append(error)
elif 'syntax' in message:
categorized['syntax'].append(error)
else:
categorized['semantic'].append(error)
return categorized, module
# Usage
errors, tree = detailed_error_analysis('''
def function():
pass # Wrong indentation
continue # Semantic error
def invalid(: pass # Syntax error
''')
for category, error_list in errors.items():
if error_list:
print(f"{category.title()} errors: {len(error_list)}")
for error in error_list:
print(f" Line {error.start_pos[0]}: {error.message}")Install with Tessl CLI
npx tessl i tessl/pypi-parso