tessl/pypi-tatsu

TatSu takes a grammar in a variation of EBNF as input, and outputs a memoizing PEG/Packrat parser in Python.

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview

Eval results

Files

Configuration and Context

Name: tessl/pypi-tatsu
Author: tessl

Configure parser behavior, manage parsing state, and access parse position and rule information. TatSu provides comprehensive configuration options and context management for fine-tuning parser behavior and accessing detailed parsing information.

Capabilities

Parser Configuration

Centralized configuration object for controlling all aspects of parser behavior and generation.

class ParserConfig:
    """
    Parser configuration with settings for parsing behavior.
    
    Attributes:
    - name: str, parser name for error reporting
    - filename: str, source filename for error context
    - encoding: str, input text encoding (default: 'utf-8')
    - start: str, start rule name (default: first rule)
    - semantics: object, semantic actions object
    - trace: bool, enable verbose parsing output
    - colorize: bool, use colored output in traces
    - ignorecase: bool, case-insensitive parsing
    - left_recursion: bool, enable left-recursion support
    - nameguard: bool, prevent tokens that are prefixes of others
    - whitespace: str, characters to skip during parsing
    - parseinfo: bool, include parse position information in AST
    """
    
    def new(self, **kwargs):
        """
        Create new configuration with updated settings.
        
        Returns:
        ParserConfig: New configuration object with merged settings
        """
    
    def replace(self, **kwargs):
        """
        Replace specific configuration values.
        
        Returns:
        ParserConfig: New configuration with replaced values
        """
    
    def merge(self, other_config):
        """
        Merge with another configuration object.
        
        Parameters:
        - other_config (ParserConfig): Configuration to merge
        
        Returns:
        ParserConfig: Merged configuration
        """
    
    def asdict(self):
        """
        Convert configuration to dictionary.
        
        Returns:
        dict: Configuration as key-value pairs
        """
    
    def effective_rule_name(self, rule_name):
        """
        Get effective rule name with configuration context.
        
        Returns:
        str: Effective rule name for given input
        """

Usage example:

import tatsu
from tatsu.infos import ParserConfig

# Create custom configuration
config = ParserConfig(
    name="MyParser",
    trace=True,
    colorize=True,
    left_recursion=True,
    whitespace=r'\s+',
    parseinfo=True
)

# Use configuration with parsing
model = tatsu.compile(grammar, config=config)
result = model.parse(input_text, config=config)

# Create derived configurations
debug_config = config.new(trace=True, colorize=True)
production_config = config.new(trace=False, colorize=False)

Parse Information

Access detailed information about parse position, rules, and context during and after parsing.

class ParseInfo:
    """
    Parse position and rule information with line tracking.
    
    Attributes:
    - tokenizer: Tokenizer, input tokenizer object
    - rule: str, current rule name
    - pos: int, character position in input
    - endpos: int, end position after parsing
    - line: int, line number (1-based)
    - endline: int, end line number
    - alerts: list, parse alerts and warnings
    """
    
    def text_lines(self):
        """
        Get source text lines for this parse info.
        
        Returns:
        list: List of source lines covered by this parse
        """
    
    def line_index(self):
        """
        Get line index information.
        
        Returns:
        LineInfo: Detailed line position information
        """

Usage example:

import tatsu

grammar = '''
    @@parseinfo :: True
    
    expr = term ("+" term)*;
    term = factor ("*" factor)*;
    factor = number;
    number = /\d+/;
'''

model = tatsu.compile(grammar)
result = model.parse("2 + 3 * 4")

# Access parse information
def print_parse_info(node, depth=0):
    indent = "  " * depth
    if hasattr(node, 'parseinfo') and node.parseinfo:
        info = node.parseinfo
        print(f"{indent}Rule: {info.rule}")
        print(f"{indent}Position: {info.pos}-{info.endpos}")
        print(f"{indent}Lines: {info.line}-{info.endline}")
    
    if isinstance(node, list):
        for item in node:
            print_parse_info(item, depth + 1)
    elif hasattr(node, '__dict__'):
        for value in node.__dict__.values():
            print_parse_info(value, depth + 1)

print_parse_info(result)

Line Information

Detailed source line and position information for error reporting and IDE integration.

class LineInfo:
    """
    Source line information with position data.
    
    Attributes:
    - filename: str, source filename
    - line: int, line number (1-based)
    - col: int, column number (1-based)
    - start: int, start position in line
    - end: int, end position in line
    - text: str, line text content
    """

Comment Information

Handle and preserve comments during parsing for documentation generation and code analysis.

class CommentInfo:
    """
    Comment information for inline and end-of-line comments.
    
    Attributes:
    - inline: list, inline comments within expressions
    - eol: list, end-of-line comments
    """
    
    def new_comment(self, text, position, comment_type='inline'):
        """
        Create new comment information.
        
        Parameters:
        - text (str): Comment text content
        - position (int): Character position of comment
        - comment_type (str): Type of comment ('inline', 'eol', 'block')
        
        Returns:
        CommentInfo: New comment information object
        """

Parse Alerts and Warnings

System for collecting and reporting parse warnings, deprecation notices, and other non-fatal issues.

class Alert:
    """
    Parse alert/warning with severity level.
    
    Attributes:
    - level: str, alert level ('info', 'warning', 'error')
    - message: str, alert message text
    """

Rule Information

Metadata about grammar rules including parameters, flags, and analysis results.

class RuleInfo:
    """
    Rule metadata including analysis results.
    
    Attributes:
    - name: str, rule name
    - impl: object, rule implementation object
    - is_leftrec: bool, true if rule is left-recursive
    - is_memoizable: bool, true if rule can be memoized
    - is_name: bool, true if rule produces named results
    - params: list, rule parameters
    - kwparams: dict, keyword parameters
    """

Parse State Management

Internal parsing state tracking for advanced use cases and debugging.

class ParseState:
    """
    Parser state with position and AST information.
    
    Attributes:
    - pos: int, current parse position
    - ast: object, current AST being built
    - cst: object, concrete syntax tree
    - alerts: list, accumulated alerts
    """

class RuleResult:
    """
    Rule parse result with position and state.
    
    Attributes:
    - node: object, parsed AST node
    - newpos: int, new position after parsing
    - newstate: ParseState, updated parse state
    """

class MemoKey:
    """
    Memoization key for caching parse results.
    
    Attributes:
    - pos: int, parse position
    - rule: str, rule name
    - state: object, parse state identifier
    """

Advanced Configuration

Custom Configuration Classes

from tatsu.infos import ParserConfig

class ProjectConfig(ParserConfig):
    """Project-specific parser configuration."""
    
    def __init__(self, **kwargs):
        defaults = {
            'trace': False,
            'left_recursion': True,
            'parseinfo': True,
            'whitespace': r'\s+',
            'nameguard': True
        }
        defaults.update(kwargs)
        super().__init__(**defaults)
    
    def debug_mode(self):
        """Enable debug mode settings."""
        return self.new(
            trace=True,
            colorize=True,
            parseinfo=True
        )
    
    def production_mode(self):
        """Enable production mode settings."""
        return self.new(
            trace=False,
            colorize=False,
            parseinfo=False
        )

Environment-Based Configuration

import os
from tatsu.infos import ParserConfig

def create_config_from_env():
    """Create parser configuration from environment variables."""
    return ParserConfig(
        trace=os.getenv('TATSU_TRACE', 'false').lower() == 'true',
        colorize=os.getenv('TATSU_COLOR', 'true').lower() == 'true',
        left_recursion=os.getenv('TATSU_LEFT_REC', 'true').lower() == 'true',
        whitespace=os.getenv('TATSU_WHITESPACE', r'\s+'),
        parseinfo=os.getenv('TATSU_PARSEINFO', 'false').lower() == 'true'
    )

# Usage
config = create_config_from_env()
model = tatsu.compile(grammar, config=config)

Configuration Validation

class ValidatedConfig(ParserConfig):
    """Configuration with validation rules."""
    
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self._validate()
    
    def _validate(self):
        """Validate configuration settings."""
        if self.trace and not self.parseinfo:
            import warnings
            warnings.warn("Trace mode is more useful with parseinfo enabled")
        
        if self.colorize and not self.trace:
            import warnings
            warnings.warn("Colorize has no effect without trace mode")
        
        if self.whitespace is not None:
            try:
                import re
                re.compile(self.whitespace)
            except re.error as e:
                raise ValueError(f"Invalid whitespace regex: {e}")

# Usage with validation
try:
    config = ValidatedConfig(
        whitespace="[invalid regex",  # This will raise ValueError
        trace=True,
        parseinfo=True
    )
except ValueError as e:
    print(f"Configuration error: {e}")

Runtime Configuration Updates

class DynamicConfig:
    """Configuration that can be updated during parsing."""
    
    def __init__(self, base_config):
        self.base_config = base_config
        self.overrides = {}
    
    def with_override(self, **kwargs):
        """Temporarily override configuration values."""
        old_overrides = self.overrides.copy()
        self.overrides.update(kwargs)
        return ConfigContext(self, old_overrides)
    
    def get_effective_config(self):
        """Get configuration with current overrides applied."""
        config_dict = self.base_config.asdict()
        config_dict.update(self.overrides)
        return ParserConfig(**config_dict)

class ConfigContext:
    """Context manager for temporary configuration overrides."""
    
    def __init__(self, dynamic_config, old_overrides):
        self.config = dynamic_config
        self.old_overrides = old_overrides
    
    def __enter__(self):
        return self.config.get_effective_config()
    
    def __exit__(self, exc_type, exc_val, exc_tb):
        self.config.overrides = self.old_overrides

# Usage
dynamic_config = DynamicConfig(base_config)

# Temporarily enable tracing for debugging
with dynamic_config.with_override(trace=True, colorize=True) as config:
    result = model.parse(input_text, config=config)

Install with Tessl CLI