A Python parser that supports error recovery and round-trip parsing for different Python versions
—
Utility functions for text processing, version handling, encoding detection, file I/O operations, and caching that support the parsing infrastructure and provide convenient helpers for working with Python code.
Utilities for handling Python source code text with proper encoding and line splitting.
def split_lines(string, keepends=False):
"""
Split Python code into lines, handling form feeds correctly.
Unlike str.splitlines(), this treats form feeds as normal characters
and only splits on \\n and \\r\\n, which matches Python's behavior.
Args:
string (str): Text to split
keepends (bool): Keep line endings in result (default: False)
Returns:
list[str]: List of lines, always returns at least [''] for empty input
"""
def python_bytes_to_unicode(source, encoding='utf-8', errors='strict'):
"""
Convert bytes to unicode with Python encoding detection.
Handles Unicode BOMs and PEP 263 encoding declarations automatically.
Args:
source (str | bytes): Source code as string or bytes
encoding (str): Default encoding if none detected (default: 'utf-8')
errors (str): Error handling strategy ('strict', 'replace', 'ignore')
Returns:
str: Unicode string representation of source code
Raises:
LookupError: If detected encoding is invalid and errors='strict'
"""from parso.utils import split_lines, python_bytes_to_unicode
# Line splitting - Python-aware
code = "line1\nline2\r\nline3\f\nline4" # Form feed between line3 and line4
lines = split_lines(code)
print("Lines:", lines) # ['line1', 'line2', 'line3\fline4']
# With line endings preserved
lines_with_ends = split_lines(code, keepends=True)
print("With endings:", lines_with_ends)
# Encoding detection from bytes
latin1_code = b'# -*- coding: latin-1 -*-\ntext = "caf\xe9"'
unicode_code = python_bytes_to_unicode(latin1_code)
print("Detected and converted:", repr(unicode_code))
# UTF-8 BOM handling
utf8_bom = b'\xef\xbb\xbfprint("hello world")'
clean_code = python_bytes_to_unicode(utf8_bom)
print("BOM removed:", repr(clean_code))
# Error handling options
invalid_bytes = b'\xff\xfe invalid encoding'
safe_code = python_bytes_to_unicode(invalid_bytes, errors='replace')
print("With replacements:", repr(safe_code))Classes and functions for working with Python version information.
class Version:
"""
Parso version information.
Attributes:
major (int): Major version number
minor (int): Minor version number
micro (int): Micro version number
"""
class PythonVersionInfo:
"""
Python version information for grammar selection.
Attributes:
major (int): Python major version (e.g., 3)
minor (int): Python minor version (e.g., 9)
"""
def __gt__(self, other):
"""Compare versions (supports tuples)."""
def __eq__(self, other):
"""Check version equality (supports tuples)."""
def parse_version_string(version=None):
"""
Parse Python version string into version info.
Args:
version (str, optional): Version string like '3.8' or '3.10.1'
Defaults to current Python version
Returns:
PythonVersionInfo: Parsed version information
Raises:
ValueError: If version format is invalid
TypeError: If version is not a string
"""
def version_info():
"""
Get parso library version information.
Returns:
Version: Parso version as named tuple
"""from parso.utils import parse_version_string, version_info, PythonVersionInfo
# Parse version strings
py38 = parse_version_string("3.8")
py310 = parse_version_string("3.10.5") # Micro version ignored
current = parse_version_string() # Uses sys.version_info
print(f"Python 3.8: {py38.major}.{py38.minor}")
print(f"Python 3.10: {py310.major}.{py310.minor}")
print(f"Current: {current.major}.{current.minor}")
# Version comparisons
if py310 > py38:
print("3.10 is newer than 3.8")
if py38 == (3, 8): # Compare with tuple
print("Version matches tuple")
# Get parso version
parso_version = version_info()
print(f"Parso version: {parso_version.major}.{parso_version.minor}.{parso_version.micro}")
# Version-specific feature detection
def supports_walrus_operator(version_info):
"""Check if Python version supports walrus operator."""
return version_info >= (3, 8)
def supports_match_statements(version_info):
"""Check if Python version supports match statements."""
return version_info >= (3, 10)
py_version = parse_version_string("3.9")
print(f"3.9 supports walrus: {supports_walrus_operator(py_version)}")
print(f"3.9 supports match: {supports_match_statements(py_version)}")File handling abstractions that support caching and content management.
class FileIO:
"""
File I/O abstraction for reading Python source files.
Attributes:
path (Path): File path as pathlib.Path object
"""
def __init__(self, path):
"""
Initialize file I/O handler.
Args:
path (str | Path): File path to read
"""
def read(self):
"""
Read file contents as bytes.
Returns:
bytes: Raw file contents
"""
def get_last_modified(self):
"""
Get file modification timestamp.
Returns:
float | None: Timestamp or None if file doesn't exist
"""
class KnownContentFileIO(FileIO):
"""
File I/O wrapper for content that's already known.
Useful for parsing strings while maintaining file-like interface.
"""
def __init__(self, path, content):
"""
Initialize with known content.
Args:
path (str | Path): File path (can be None)
content (str | bytes): Known file content
"""
def read(self):
"""
Return the known content.
Returns:
str | bytes: The provided content
"""from parso.file_io import FileIO, KnownContentFileIO
import parso
# Read from actual file
file_io = FileIO("/path/to/script.py")
content = file_io.read()
last_modified = file_io.get_last_modified()
# Parse using FileIO
grammar = parso.load_grammar()
module = grammar.parse(file_io=file_io, cache=True)
# Use known content (useful for in-memory parsing)
code = '''
def example():
return "hello world"
'''
known_io = KnownContentFileIO("virtual_file.py", code)
module = grammar.parse(file_io=known_io)
# File I/O with caching
def parse_file_with_caching(file_path):
"""Parse file with automatic caching."""
file_io = FileIO(file_path)
# Check if file exists and get modification time
mod_time = file_io.get_last_modified()
if mod_time is None:
raise FileNotFoundError(f"File not found: {file_path}")
grammar = parso.load_grammar()
return grammar.parse(file_io=file_io, cache=True)
# Virtual file for testing
def create_test_module(code_string, filename="test.py"):
"""Create module from string with virtual filename."""
file_io = KnownContentFileIO(filename, code_string)
grammar = parso.load_grammar()
return grammar.parse(file_io=file_io)
test_module = create_test_module('x = 42')Functions for managing parso's parser cache system.
def load_module(hashed_grammar, file_io, cache_path=None):
"""
Load cached parsed module.
Args:
hashed_grammar (str): Grammar hash identifier
file_io (FileIO): File I/O handler
cache_path (Path, optional): Custom cache directory
Returns:
NodeOrLeaf | None: Cached module or None if not cached/outdated
"""
def try_to_save_module(hashed_grammar, file_io, module, lines, pickling=True, cache_path=None):
"""
Save parsed module to cache.
Args:
hashed_grammar (str): Grammar hash
file_io (FileIO): File I/O handler
module (NodeOrLeaf): Parsed module to cache
lines (list[str]): Source code lines
pickling (bool): Enable disk caching (default: True)
cache_path (Path, optional): Custom cache directory
"""
def clear_cache(cache_path=None):
"""
Clear all cached files and in-memory cache.
Args:
cache_path (Path, optional): Cache directory to clear
"""
def clear_inactive_cache(cache_path=None, inactivity_threshold=2592000):
"""
Clear cached files that haven't been accessed recently.
Args:
cache_path (Path, optional): Cache directory
inactivity_threshold (int): Seconds of inactivity before removal
Returns:
bool: True if cleanup completed successfully
"""import parso
import parso.cache
from pathlib import Path
# Manual cache management
def process_files_with_caching(file_paths):
"""Process multiple files with shared cache."""
grammar = parso.load_grammar()
for file_path in file_paths:
try:
# Parse with caching enabled
module = grammar.parse(path=file_path, cache=True)
print(f"Processed {file_path}: {len(module.children)} statements")
except Exception as e:
print(f"Error processing {file_path}: {e}")
# Cache statistics
def get_cache_stats():
"""Get information about current cache state."""
cache = parso.cache.parser_cache
total_grammars = len(cache)
total_files = sum(len(files) for files in cache.values())
return {
'grammars_cached': total_grammars,
'files_cached': total_files,
'cache_keys': list(cache.keys())
}
stats = get_cache_stats()
print("Cache statistics:", stats)
# Periodic cache cleanup
def cleanup_old_cache():
"""Clean up old cache files."""
print("Clearing inactive cache files...")
success = parso.cache.clear_inactive_cache()
if success:
print("Cache cleanup completed")
else:
print("Cache cleanup had issues")
# Custom cache directory
custom_cache = Path.home() / '.my_parso_cache'
grammar = parso.load_grammar()
module = grammar.parse(
path="example.py",
cache=True,
cache_path=custom_cache
)
# Clear specific cache directory
parso.cache.clear_cache(cache_path=custom_cache)from parso.utils import python_bytes_to_unicode
from parso.file_io import FileIO
import parso
def safe_parse_file(file_path):
"""Safely parse file handling encoding issues."""
try:
# Read as bytes first
with open(file_path, 'rb') as f:
raw_content = f.read()
# Convert to unicode with encoding detection
unicode_content = python_bytes_to_unicode(raw_content, errors='replace')
# Parse the content
grammar = parso.load_grammar()
return grammar.parse(unicode_content)
except Exception as e:
print(f"Error parsing {file_path}: {e}")
return None
# Process directory of Python files
def process_python_directory(directory):
"""Process all Python files in directory safely."""
from pathlib import Path
python_files = Path(directory).glob("**/*.py")
for py_file in python_files:
module = safe_parse_file(py_file)
if module:
print(f"Successfully parsed: {py_file}")
else:
print(f"Failed to parse: {py_file}")from parso.utils import parse_version_string
import parso
def parse_with_version_detection(code):
"""Parse code with automatic version detection."""
# Try to detect version from code features
def detect_version_features(code):
"""Detect Python version from code features."""
if ':=' in code: # Walrus operator
return "3.8"
if 'match ' in code and 'case ' in code: # Match statements
return "3.10"
if '|' in code and 'Union' not in code: # Union types
return "3.10"
return "3.6" # Safe default
detected_version = detect_version_features(code)
version_info = parse_version_string(detected_version)
grammar = parso.load_grammar(version=f"{version_info.major}.{version_info.minor}")
return grammar.parse(code), detected_version
# Usage
code_samples = [
'x = 42', # Basic
'if (n := len(items)) > 5: pass', # Python 3.8 walrus
'''match value:
case 1: print("one")''', # Python 3.10 match
]
for code in code_samples:
module, version = parse_with_version_detection(code)
print(f"Parsed with Python {version}: {code[:30]}...")import parso
from parso.cache import clear_inactive_cache
import time
class HighPerformanceParser:
"""Optimized parser for processing many files."""
def __init__(self, cache_cleanup_interval=3600): # 1 hour
self.grammar = parso.load_grammar()
self.last_cleanup = time.time()
self.cleanup_interval = cache_cleanup_interval
self.files_processed = 0
def parse_file(self, file_path):
"""Parse single file with optimizations."""
try:
# Use caching and differential parsing for performance
module = self.grammar.parse(
path=file_path,
cache=True,
diff_cache=True
)
self.files_processed += 1
# Periodic cache cleanup
if time.time() - self.last_cleanup > self.cleanup_interval:
clear_inactive_cache()
self.last_cleanup = time.time()
print(f"Cleaned cache after processing {self.files_processed} files")
return module
except Exception as e:
print(f"Error parsing {file_path}: {e}")
return None
def batch_parse(self, file_paths):
"""Parse multiple files efficiently."""
results = []
for file_path in file_paths:
result = self.parse_file(file_path)
if result:
results.append((file_path, result))
return results
# Usage
parser = HighPerformanceParser()
file_paths = ["file1.py", "file2.py", "file3.py"]
results = parser.batch_parse(file_paths)
print(f"Successfully parsed {len(results)} files")Install with Tessl CLI
npx tessl i tessl/pypi-parso