A Python parser that supports error recovery and round-trip parsing for different Python versions
—
Comprehensive error detection, syntax error reporting, and code quality analysis including PEP 8 normalization and custom rule systems for Python code analysis.
Core exception types for parser and syntax errors.
class ParserSyntaxError(Exception):
"""
Exception raised when parsing fails.
Attributes:
message (str): Error description
error_leaf (ErrorLeaf): Token where error occurred
"""
def __init__(self, message, error_leaf):
"""
Initialize syntax error.
Args:
message (str): Error message
error_leaf (ErrorLeaf): Problematic token
"""
class InternalParseError(Exception):
"""
Internal parser error indicating parser failure.
Attributes:
msg (str): Error description
type: Token type where error occurred
value (str): Token value
start_pos (tuple[int, int]): Error position
"""Advanced error finder for syntax and semantic analysis.
class ErrorFinder:
"""
Finds and reports syntax and semantic errors in Python code.
"""
def find_errors(self, node):
"""
Find all errors in a syntax tree.
Args:
node (NodeOrLeaf): Root node to analyze
Returns:
list[Issue]: List of error issues found
"""
class ErrorFinderConfig:
"""
Configuration for error detection behavior.
"""
def create_normalizer(self, grammar):
"""
Create error finder normalizer.
Args:
grammar (Grammar): Grammar instance
Returns:
ErrorFinder: Configured error finder
"""Code normalization and transformation system with rule-based architecture.
class Normalizer:
"""
Base class for code normalization and analysis.
Attributes:
issues (list[Issue]): Found issues during normalization
"""
def __init__(self, grammar, config):
"""
Initialize normalizer.
Args:
grammar (Grammar): Grammar instance
config (NormalizerConfig): Configuration
"""
def walk(self, node):
"""
Walk syntax tree and apply normalization.
Args:
node (NodeOrLeaf): Root node to process
Returns:
str: Normalized code
"""
def add_issue(self, node, code, message):
"""
Add an issue to the issue list.
Args:
node (NodeOrLeaf): Node where issue occurred
code (str): Issue code identifier
message (str): Issue description
Returns:
bool: True if issue was added
"""
class NormalizerConfig:
"""Base configuration for normalizers."""
def create_normalizer(self, grammar):
"""Create configured normalizer instance."""
class RefactoringNormalizer(Normalizer):
"""
Normalizer for code refactoring transformations.
"""
def __init__(self, node_to_str_map):
"""
Initialize refactoring normalizer.
Args:
node_to_str_map (dict): Mapping from nodes to replacement strings
"""
class Issue:
"""
Represents a code issue or error.
Attributes:
node (NodeOrLeaf): Node where issue occurs
code (str): Issue code
message (str): Issue description
start_pos (tuple[int, int]): Issue position
"""import parso
# Parse code with errors
code = '''
def function(: # Missing parameter name
x = 1 + # Incomplete expression
return x
continue # Continue outside loop
class MissingColon # Missing colon
pass
'''
grammar = parso.load_grammar()
module = grammar.parse(code)
# Find syntax and semantic errors
errors = list(grammar.iter_errors(module))
print(f"Found {len(errors)} errors:")
for error in errors:
print(f" Line {error.start_pos[0]}: {error.message}")
print(f" Code: {error.code}")
print(f" Position: {error.start_pos}")
# Handle parser exceptions in strict mode
try:
strict_module = grammar.parse(code, error_recovery=False)
except parso.ParserSyntaxError as e:
print(f"Parse failed: {e.message}")
print(f"Error at: {e.error_leaf.start_pos}")
print(f"Error token: '{e.error_leaf.value}'")Python PEP 8 style guide enforcement and formatting.
class PEP8Normalizer(Normalizer):
"""
PEP 8 style guide normalizer.
"""
def normalize(self, code):
"""
Apply PEP 8 formatting to code.
Args:
code (str): Input Python code
Returns:
str: PEP 8 formatted code
"""
class PEP8NormalizerConfig(NormalizerConfig):
"""
Configuration for PEP 8 normalization.
"""import parso
from parso.python import pep8
# Code with PEP 8 violations
messy_code = '''
def bad_spacing(x,y):
return x+y
class BadClass:
def method(self):
pass
'''
grammar = parso.load_grammar()
module = grammar.parse(messy_code)
# Get PEP 8 normalizer
config = pep8.PEP8NormalizerConfig()
normalizer = config.create_normalizer(grammar)
# Apply normalization
normalized_code = normalizer.walk(module)
print("Normalized code:")
print(normalized_code)
# Check for PEP 8 issues
pep8_issues = normalizer.issues
print(f"Found {len(pep8_issues)} PEP 8 issues:")
for issue in pep8_issues:
print(f" {issue.code}: {issue.message} at {issue.start_pos}")Creating custom rules for code analysis and transformation.
class Rule:
"""
Base class for normalization rules.
Attributes:
error_code (int): Rule error code
"""
def __init__(self, normalizer):
"""
Initialize rule.
Args:
normalizer (Normalizer): Parent normalizer
"""
def feed_node(self, node):
"""
Process a node with this rule.
Args:
node (NodeOrLeaf): Node to process
"""
@Normalizer.register_rule(type='funcdef')
class CustomFunctionRule(Rule):
"""Example custom rule for function analysis."""
error_code = 1001
def feed_node(self, node):
# Custom logic for function nodes
if len(node.name.value) < 3:
self.normalizer.add_issue(
node,
f"C{self.error_code}",
"Function name should be at least 3 characters"
)def categorize_errors(errors):
"""Categorize errors by type."""
categories = {
'syntax': [],
'indentation': [],
'semantic': [],
'other': []
}
for error in errors:
message = error.message.lower()
if 'syntax' in message:
categories['syntax'].append(error)
elif 'indent' in message:
categories['indentation'].append(error)
elif any(word in message for word in ['continue', 'break', 'return', 'yield']):
categories['semantic'].append(error)
else:
categories['other'].append(error)
return categories
# Usage
grammar = parso.load_grammar()
module = grammar.parse('''
def func():
pass # Wrong indentation
continue # Continue not in loop
def invalid(: pass # Syntax error
''')
errors = list(grammar.iter_errors(module))
categorized = categorize_errors(errors)
for category, error_list in categorized.items():
if error_list:
print(f"{category.title()} errors ({len(error_list)}):")
for error in error_list:
print(f" Line {error.start_pos[0]}: {error.message}")def analyze_error_recovery(code):
"""Analyze how well error recovery handles broken code."""
grammar = parso.load_grammar()
# Parse with error recovery
module = grammar.parse(code, error_recovery=True)
errors = list(grammar.iter_errors(module))
# Count different node types
def count_nodes(node, counts=None):
if counts is None:
counts = {'total': 0, 'error_nodes': 0, 'error_leaves': 0}
counts['total'] += 1
if hasattr(node, 'type'):
if node.type == 'error_node':
counts['error_nodes'] += 1
elif node.type == 'error_leaf':
counts['error_leaves'] += 1
if hasattr(node, 'children'):
for child in node.children:
count_nodes(child, counts)
return counts
node_counts = count_nodes(module)
return {
'errors_found': len(errors),
'error_nodes': node_counts['error_nodes'],
'error_leaves': node_counts['error_leaves'],
'total_nodes': node_counts['total'],
'recovery_success': len(errors) > 0 and node_counts['total'] > 0
}
# Test error recovery
broken_code = '''
def func1():
pass
def broken_func(missing_param:
x = incomplete +
return x
def func2():
return "ok"
'''
analysis = analyze_error_recovery(broken_code)
print("Error recovery analysis:", analysis)def validate_python_syntax(code, version="3.9"):
"""Validate Python code syntax."""
try:
grammar = parso.load_grammar(version=version)
grammar.parse(code, error_recovery=False)
return True, "Valid syntax"
except parso.ParserSyntaxError as e:
return False, f"Syntax error: {e.message} at {e.error_leaf.start_pos}"
except Exception as e:
return False, f"Parse error: {str(e)}"
def check_code_quality(code, version="3.9"):
"""Comprehensive code quality check."""
grammar = parso.load_grammar(version=version)
module = grammar.parse(code)
errors = list(grammar.iter_errors(module))
# Get PEP 8 issues
from parso.python import pep8
config = pep8.PEP8NormalizerConfig()
normalizer = config.create_normalizer(grammar)
normalizer.walk(module)
pep8_issues = normalizer.issues
return {
'syntax_valid': len(errors) == 0,
'syntax_errors': errors,
'pep8_issues': pep8_issues,
'total_issues': len(errors) + len(pep8_issues)
}
# Usage
code_quality = check_code_quality('''
def example():
x=1+2 # PEP 8: spaces around operators
return x
''')
print(f"Syntax valid: {code_quality['syntax_valid']}")
print(f"Total issues: {code_quality['total_issues']}")Install with Tessl CLI
npx tessl i tessl/pypi-parso