TatSu takes a grammar in a variation of EBNF as input, and outputs a memoizing PEG/Packrat parser in Python.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Configure parser behavior, manage parsing state, and access parse position and rule information. TatSu provides comprehensive configuration options and context management for fine-tuning parser behavior and accessing detailed parsing information.
Centralized configuration object for controlling all aspects of parser behavior and generation.
class ParserConfig:
"""
Parser configuration with settings for parsing behavior.
Attributes:
- name: str, parser name for error reporting
- filename: str, source filename for error context
- encoding: str, input text encoding (default: 'utf-8')
- start: str, start rule name (default: first rule)
- semantics: object, semantic actions object
- trace: bool, enable verbose parsing output
- colorize: bool, use colored output in traces
- ignorecase: bool, case-insensitive parsing
- left_recursion: bool, enable left-recursion support
- nameguard: bool, prevent tokens that are prefixes of others
- whitespace: str, characters to skip during parsing
- parseinfo: bool, include parse position information in AST
"""
def new(self, **kwargs):
"""
Create new configuration with updated settings.
Returns:
ParserConfig: New configuration object with merged settings
"""
def replace(self, **kwargs):
"""
Replace specific configuration values.
Returns:
ParserConfig: New configuration with replaced values
"""
def merge(self, other_config):
"""
Merge with another configuration object.
Parameters:
- other_config (ParserConfig): Configuration to merge
Returns:
ParserConfig: Merged configuration
"""
def asdict(self):
"""
Convert configuration to dictionary.
Returns:
dict: Configuration as key-value pairs
"""
def effective_rule_name(self, rule_name):
"""
Get effective rule name with configuration context.
Returns:
str: Effective rule name for given input
"""Usage example:
import tatsu
from tatsu.infos import ParserConfig
# Create custom configuration
config = ParserConfig(
name="MyParser",
trace=True,
colorize=True,
left_recursion=True,
whitespace=r'\s+',
parseinfo=True
)
# Use configuration with parsing
model = tatsu.compile(grammar, config=config)
result = model.parse(input_text, config=config)
# Create derived configurations
debug_config = config.new(trace=True, colorize=True)
production_config = config.new(trace=False, colorize=False)Access detailed information about parse position, rules, and context during and after parsing.
class ParseInfo:
"""
Parse position and rule information with line tracking.
Attributes:
- tokenizer: Tokenizer, input tokenizer object
- rule: str, current rule name
- pos: int, character position in input
- endpos: int, end position after parsing
- line: int, line number (1-based)
- endline: int, end line number
- alerts: list, parse alerts and warnings
"""
def text_lines(self):
"""
Get source text lines for this parse info.
Returns:
list: List of source lines covered by this parse
"""
def line_index(self):
"""
Get line index information.
Returns:
LineInfo: Detailed line position information
"""Usage example:
import tatsu
grammar = '''
@@parseinfo :: True
expr = term ("+" term)*;
term = factor ("*" factor)*;
factor = number;
number = /\d+/;
'''
model = tatsu.compile(grammar)
result = model.parse("2 + 3 * 4")
# Access parse information
def print_parse_info(node, depth=0):
indent = " " * depth
if hasattr(node, 'parseinfo') and node.parseinfo:
info = node.parseinfo
print(f"{indent}Rule: {info.rule}")
print(f"{indent}Position: {info.pos}-{info.endpos}")
print(f"{indent}Lines: {info.line}-{info.endline}")
if isinstance(node, list):
for item in node:
print_parse_info(item, depth + 1)
elif hasattr(node, '__dict__'):
for value in node.__dict__.values():
print_parse_info(value, depth + 1)
print_parse_info(result)Detailed source line and position information for error reporting and IDE integration.
class LineInfo:
"""
Source line information with position data.
Attributes:
- filename: str, source filename
- line: int, line number (1-based)
- col: int, column number (1-based)
- start: int, start position in line
- end: int, end position in line
- text: str, line text content
"""Handle and preserve comments during parsing for documentation generation and code analysis.
class CommentInfo:
"""
Comment information for inline and end-of-line comments.
Attributes:
- inline: list, inline comments within expressions
- eol: list, end-of-line comments
"""
def new_comment(self, text, position, comment_type='inline'):
"""
Create new comment information.
Parameters:
- text (str): Comment text content
- position (int): Character position of comment
- comment_type (str): Type of comment ('inline', 'eol', 'block')
Returns:
CommentInfo: New comment information object
"""System for collecting and reporting parse warnings, deprecation notices, and other non-fatal issues.
class Alert:
"""
Parse alert/warning with severity level.
Attributes:
- level: str, alert level ('info', 'warning', 'error')
- message: str, alert message text
"""Metadata about grammar rules including parameters, flags, and analysis results.
class RuleInfo:
"""
Rule metadata including analysis results.
Attributes:
- name: str, rule name
- impl: object, rule implementation object
- is_leftrec: bool, true if rule is left-recursive
- is_memoizable: bool, true if rule can be memoized
- is_name: bool, true if rule produces named results
- params: list, rule parameters
- kwparams: dict, keyword parameters
"""Internal parsing state tracking for advanced use cases and debugging.
class ParseState:
"""
Parser state with position and AST information.
Attributes:
- pos: int, current parse position
- ast: object, current AST being built
- cst: object, concrete syntax tree
- alerts: list, accumulated alerts
"""
class RuleResult:
"""
Rule parse result with position and state.
Attributes:
- node: object, parsed AST node
- newpos: int, new position after parsing
- newstate: ParseState, updated parse state
"""
class MemoKey:
"""
Memoization key for caching parse results.
Attributes:
- pos: int, parse position
- rule: str, rule name
- state: object, parse state identifier
"""from tatsu.infos import ParserConfig
class ProjectConfig(ParserConfig):
"""Project-specific parser configuration."""
def __init__(self, **kwargs):
defaults = {
'trace': False,
'left_recursion': True,
'parseinfo': True,
'whitespace': r'\s+',
'nameguard': True
}
defaults.update(kwargs)
super().__init__(**defaults)
def debug_mode(self):
"""Enable debug mode settings."""
return self.new(
trace=True,
colorize=True,
parseinfo=True
)
def production_mode(self):
"""Enable production mode settings."""
return self.new(
trace=False,
colorize=False,
parseinfo=False
)import os
from tatsu.infos import ParserConfig
def create_config_from_env():
"""Create parser configuration from environment variables."""
return ParserConfig(
trace=os.getenv('TATSU_TRACE', 'false').lower() == 'true',
colorize=os.getenv('TATSU_COLOR', 'true').lower() == 'true',
left_recursion=os.getenv('TATSU_LEFT_REC', 'true').lower() == 'true',
whitespace=os.getenv('TATSU_WHITESPACE', r'\s+'),
parseinfo=os.getenv('TATSU_PARSEINFO', 'false').lower() == 'true'
)
# Usage
config = create_config_from_env()
model = tatsu.compile(grammar, config=config)class ValidatedConfig(ParserConfig):
"""Configuration with validation rules."""
def __init__(self, **kwargs):
super().__init__(**kwargs)
self._validate()
def _validate(self):
"""Validate configuration settings."""
if self.trace and not self.parseinfo:
import warnings
warnings.warn("Trace mode is more useful with parseinfo enabled")
if self.colorize and not self.trace:
import warnings
warnings.warn("Colorize has no effect without trace mode")
if self.whitespace is not None:
try:
import re
re.compile(self.whitespace)
except re.error as e:
raise ValueError(f"Invalid whitespace regex: {e}")
# Usage with validation
try:
config = ValidatedConfig(
whitespace="[invalid regex", # This will raise ValueError
trace=True,
parseinfo=True
)
except ValueError as e:
print(f"Configuration error: {e}")class DynamicConfig:
"""Configuration that can be updated during parsing."""
def __init__(self, base_config):
self.base_config = base_config
self.overrides = {}
def with_override(self, **kwargs):
"""Temporarily override configuration values."""
old_overrides = self.overrides.copy()
self.overrides.update(kwargs)
return ConfigContext(self, old_overrides)
def get_effective_config(self):
"""Get configuration with current overrides applied."""
config_dict = self.base_config.asdict()
config_dict.update(self.overrides)
return ParserConfig(**config_dict)
class ConfigContext:
"""Context manager for temporary configuration overrides."""
def __init__(self, dynamic_config, old_overrides):
self.config = dynamic_config
self.old_overrides = old_overrides
def __enter__(self):
return self.config.get_effective_config()
def __exit__(self, exc_type, exc_val, exc_tb):
self.config.overrides = self.old_overrides
# Usage
dynamic_config = DynamicConfig(base_config)
# Temporarily enable tracing for debugging
with dynamic_config.with_override(trace=True, colorize=True) as config:
result = model.parse(input_text, config=config)Install with Tessl CLI
npx tessl i tessl/pypi-tatsu