CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-parso

A Python parser that supports error recovery and round-trip parsing for different Python versions

Pending
Overview
Eval results
Files

utilities.mddocs/

Utilities and Helpers

Utility functions for text processing, version handling, encoding detection, file I/O operations, and caching that support the parsing infrastructure and provide convenient helpers for working with Python code.

Capabilities

Text Processing

Utilities for handling Python source code text with proper encoding and line splitting.

def split_lines(string, keepends=False):
    """
    Split Python code into lines, handling form feeds correctly.
    
    Unlike str.splitlines(), this treats form feeds as normal characters
    and only splits on \\n and \\r\\n, which matches Python's behavior.
    
    Args:
        string (str): Text to split
        keepends (bool): Keep line endings in result (default: False)
        
    Returns:
        list[str]: List of lines, always returns at least [''] for empty input
    """

def python_bytes_to_unicode(source, encoding='utf-8', errors='strict'):
    """
    Convert bytes to unicode with Python encoding detection.
    
    Handles Unicode BOMs and PEP 263 encoding declarations automatically.
    
    Args:
        source (str | bytes): Source code as string or bytes
        encoding (str): Default encoding if none detected (default: 'utf-8')
        errors (str): Error handling strategy ('strict', 'replace', 'ignore')
        
    Returns:
        str: Unicode string representation of source code
        
    Raises:
        LookupError: If detected encoding is invalid and errors='strict'
    """

Usage Examples

from parso.utils import split_lines, python_bytes_to_unicode

# Line splitting - Python-aware
code = "line1\nline2\r\nline3\f\nline4"  # Form feed between line3 and line4
lines = split_lines(code)
print("Lines:", lines)  # ['line1', 'line2', 'line3\fline4']

# With line endings preserved
lines_with_ends = split_lines(code, keepends=True)
print("With endings:", lines_with_ends)

# Encoding detection from bytes
latin1_code = b'# -*- coding: latin-1 -*-\ntext = "caf\xe9"'
unicode_code = python_bytes_to_unicode(latin1_code)
print("Detected and converted:", repr(unicode_code))

# UTF-8 BOM handling
utf8_bom = b'\xef\xbb\xbfprint("hello world")'
clean_code = python_bytes_to_unicode(utf8_bom)
print("BOM removed:", repr(clean_code))

# Error handling options
invalid_bytes = b'\xff\xfe invalid encoding'
safe_code = python_bytes_to_unicode(invalid_bytes, errors='replace')
print("With replacements:", repr(safe_code))

Version Handling

Classes and functions for working with Python version information.

class Version:
    """
    Parso version information.
    
    Attributes:
        major (int): Major version number
        minor (int): Minor version number  
        micro (int): Micro version number
    """

class PythonVersionInfo:
    """
    Python version information for grammar selection.
    
    Attributes:
        major (int): Python major version (e.g., 3)
        minor (int): Python minor version (e.g., 9)
    """
    
    def __gt__(self, other):
        """Compare versions (supports tuples)."""
    
    def __eq__(self, other):
        """Check version equality (supports tuples)."""

def parse_version_string(version=None):
    """
    Parse Python version string into version info.
    
    Args:
        version (str, optional): Version string like '3.8' or '3.10.1'
                               Defaults to current Python version
        
    Returns:
        PythonVersionInfo: Parsed version information
        
    Raises:
        ValueError: If version format is invalid
        TypeError: If version is not a string
    """

def version_info():
    """
    Get parso library version information.
    
    Returns:
        Version: Parso version as named tuple
    """

Usage Examples

from parso.utils import parse_version_string, version_info, PythonVersionInfo

# Parse version strings
py38 = parse_version_string("3.8")
py310 = parse_version_string("3.10.5")  # Micro version ignored
current = parse_version_string()  # Uses sys.version_info

print(f"Python 3.8: {py38.major}.{py38.minor}")
print(f"Python 3.10: {py310.major}.{py310.minor}")
print(f"Current: {current.major}.{current.minor}")

# Version comparisons
if py310 > py38:
    print("3.10 is newer than 3.8")

if py38 == (3, 8):  # Compare with tuple
    print("Version matches tuple")

# Get parso version
parso_version = version_info()
print(f"Parso version: {parso_version.major}.{parso_version.minor}.{parso_version.micro}")

# Version-specific feature detection
def supports_walrus_operator(version_info):
    """Check if Python version supports walrus operator."""
    return version_info >= (3, 8)

def supports_match_statements(version_info):
    """Check if Python version supports match statements."""
    return version_info >= (3, 10)

py_version = parse_version_string("3.9") 
print(f"3.9 supports walrus: {supports_walrus_operator(py_version)}")
print(f"3.9 supports match: {supports_match_statements(py_version)}")

File I/O Classes

File handling abstractions that support caching and content management.

class FileIO:
    """
    File I/O abstraction for reading Python source files.
    
    Attributes:
        path (Path): File path as pathlib.Path object
    """
    
    def __init__(self, path):
        """
        Initialize file I/O handler.
        
        Args:
            path (str | Path): File path to read
        """
    
    def read(self):
        """
        Read file contents as bytes.
        
        Returns:
            bytes: Raw file contents
        """
    
    def get_last_modified(self):
        """
        Get file modification timestamp.
        
        Returns:
            float | None: Timestamp or None if file doesn't exist
        """

class KnownContentFileIO(FileIO):
    """
    File I/O wrapper for content that's already known.
    
    Useful for parsing strings while maintaining file-like interface.
    """
    
    def __init__(self, path, content):
        """
        Initialize with known content.
        
        Args:
            path (str | Path): File path (can be None)
            content (str | bytes): Known file content
        """
    
    def read(self):
        """
        Return the known content.
        
        Returns:
            str | bytes: The provided content
        """

Usage Examples

from parso.file_io import FileIO, KnownContentFileIO
import parso

# Read from actual file
file_io = FileIO("/path/to/script.py")
content = file_io.read()
last_modified = file_io.get_last_modified()

# Parse using FileIO
grammar = parso.load_grammar()
module = grammar.parse(file_io=file_io, cache=True)

# Use known content (useful for in-memory parsing)
code = '''
def example():
    return "hello world"
'''

known_io = KnownContentFileIO("virtual_file.py", code)
module = grammar.parse(file_io=known_io)

# File I/O with caching
def parse_file_with_caching(file_path):
    """Parse file with automatic caching."""
    file_io = FileIO(file_path)
    
    # Check if file exists and get modification time
    mod_time = file_io.get_last_modified()
    if mod_time is None:
        raise FileNotFoundError(f"File not found: {file_path}")
    
    grammar = parso.load_grammar()
    return grammar.parse(file_io=file_io, cache=True)

# Virtual file for testing
def create_test_module(code_string, filename="test.py"):
    """Create module from string with virtual filename."""
    file_io = KnownContentFileIO(filename, code_string)
    grammar = parso.load_grammar()
    return grammar.parse(file_io=file_io)

test_module = create_test_module('x = 42')

Cache Management

Functions for managing parso's parser cache system.

def load_module(hashed_grammar, file_io, cache_path=None):
    """
    Load cached parsed module.
    
    Args:
        hashed_grammar (str): Grammar hash identifier
        file_io (FileIO): File I/O handler
        cache_path (Path, optional): Custom cache directory
        
    Returns:
        NodeOrLeaf | None: Cached module or None if not cached/outdated
    """

def try_to_save_module(hashed_grammar, file_io, module, lines, pickling=True, cache_path=None):
    """
    Save parsed module to cache.
    
    Args:
        hashed_grammar (str): Grammar hash
        file_io (FileIO): File I/O handler  
        module (NodeOrLeaf): Parsed module to cache
        lines (list[str]): Source code lines
        pickling (bool): Enable disk caching (default: True)
        cache_path (Path, optional): Custom cache directory
    """

def clear_cache(cache_path=None):
    """
    Clear all cached files and in-memory cache.
    
    Args:
        cache_path (Path, optional): Cache directory to clear
    """

def clear_inactive_cache(cache_path=None, inactivity_threshold=2592000):
    """
    Clear cached files that haven't been accessed recently.
    
    Args:
        cache_path (Path, optional): Cache directory
        inactivity_threshold (int): Seconds of inactivity before removal
        
    Returns:
        bool: True if cleanup completed successfully
    """

Usage Examples

import parso
import parso.cache
from pathlib import Path

# Manual cache management
def process_files_with_caching(file_paths):
    """Process multiple files with shared cache."""
    grammar = parso.load_grammar()
    
    for file_path in file_paths:
        try:
            # Parse with caching enabled
            module = grammar.parse(path=file_path, cache=True)
            print(f"Processed {file_path}: {len(module.children)} statements")
        except Exception as e:
            print(f"Error processing {file_path}: {e}")

# Cache statistics
def get_cache_stats():
    """Get information about current cache state."""
    cache = parso.cache.parser_cache
    
    total_grammars = len(cache)
    total_files = sum(len(files) for files in cache.values())
    
    return {
        'grammars_cached': total_grammars,  
        'files_cached': total_files,
        'cache_keys': list(cache.keys())
    }

stats = get_cache_stats()
print("Cache statistics:", stats)

# Periodic cache cleanup
def cleanup_old_cache():
    """Clean up old cache files."""
    print("Clearing inactive cache files...")
    success = parso.cache.clear_inactive_cache()
    
    if success:
        print("Cache cleanup completed")
    else:
        print("Cache cleanup had issues")

# Custom cache directory
custom_cache = Path.home() / '.my_parso_cache'
grammar = parso.load_grammar()
module = grammar.parse(
    path="example.py",
    cache=True,
    cache_path=custom_cache
)

# Clear specific cache directory  
parso.cache.clear_cache(cache_path=custom_cache)

Integration Patterns

Encoding-Safe File Processing

from parso.utils import python_bytes_to_unicode
from parso.file_io import FileIO
import parso

def safe_parse_file(file_path):
    """Safely parse file handling encoding issues."""
    try:
        # Read as bytes first
        with open(file_path, 'rb') as f:
            raw_content = f.read()
        
        # Convert to unicode with encoding detection
        unicode_content = python_bytes_to_unicode(raw_content, errors='replace')
        
        # Parse the content
        grammar = parso.load_grammar()
        return grammar.parse(unicode_content)
    
    except Exception as e:
        print(f"Error parsing {file_path}: {e}")
        return None

# Process directory of Python files
def process_python_directory(directory):
    """Process all Python files in directory safely."""
    from pathlib import Path
    
    python_files = Path(directory).glob("**/*.py")
    
    for py_file in python_files:
        module = safe_parse_file(py_file)
        if module:
            print(f"Successfully parsed: {py_file}")
        else:
            print(f"Failed to parse: {py_file}")

Version-Aware Parsing

from parso.utils import parse_version_string
import parso

def parse_with_version_detection(code):
    """Parse code with automatic version detection."""
    
    # Try to detect version from code features
    def detect_version_features(code):
        """Detect Python version from code features."""
        if ':=' in code:  # Walrus operator
            return "3.8"
        if 'match ' in code and 'case ' in code:  # Match statements
            return "3.10"
        if '|' in code and 'Union' not in code:  # Union types
            return "3.10"  
        return "3.6"  # Safe default
    
    detected_version = detect_version_features(code)
    version_info = parse_version_string(detected_version)
    
    grammar = parso.load_grammar(version=f"{version_info.major}.{version_info.minor}")
    return grammar.parse(code), detected_version

# Usage
code_samples = [
    'x = 42',  # Basic
    'if (n := len(items)) > 5: pass',  # Python 3.8 walrus
    '''match value:
    case 1: print("one")''',  # Python 3.10 match
]

for code in code_samples:
    module, version = parse_with_version_detection(code)
    print(f"Parsed with Python {version}: {code[:30]}...")

High-Performance Parsing

import parso
from parso.cache import clear_inactive_cache
import time

class HighPerformanceParser:
    """Optimized parser for processing many files."""
    
    def __init__(self, cache_cleanup_interval=3600):  # 1 hour
        self.grammar = parso.load_grammar()
        self.last_cleanup = time.time()
        self.cleanup_interval = cache_cleanup_interval
        self.files_processed = 0
    
    def parse_file(self, file_path):
        """Parse single file with optimizations."""
        try:
            # Use caching and differential parsing for performance
            module = self.grammar.parse(
                path=file_path,
                cache=True,
                diff_cache=True
            )
            
            self.files_processed += 1
            
            # Periodic cache cleanup
            if time.time() - self.last_cleanup > self.cleanup_interval:
                clear_inactive_cache()
                self.last_cleanup = time.time()
                print(f"Cleaned cache after processing {self.files_processed} files")
            
            return module
            
        except Exception as e:
            print(f"Error parsing {file_path}: {e}")
            return None
    
    def batch_parse(self, file_paths):
        """Parse multiple files efficiently."""
        results = []
        
        for file_path in file_paths:
            result = self.parse_file(file_path)
            if result:
                results.append((file_path, result))
        
        return results

# Usage
parser = HighPerformanceParser()
file_paths = ["file1.py", "file2.py", "file3.py"]
results = parser.batch_parse(file_paths)
print(f"Successfully parsed {len(results)} files")

Install with Tessl CLI

npx tessl i tessl/pypi-parso

docs

core-parsing.md

error-handling.md

grammar-system.md

index.md

python-elements.md

tokenization.md

tree-navigation.md

utilities.md

tile.json