or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

index.mdlexical-analysis.mdsyntax-parsing.md
tile.json

tessl/pypi-ply

Python implementation of lex and yacc parsing tools with LALR(1) algorithm and zero dependencies

Workspace
tessl
Visibility
Public
Created
Last updated
Describes
pypipkg:pypi/ply@2022.10.x

To install, run

npx @tessl/cli install tessl/pypi-ply@2022.10.0

index.mddocs/

PLY (Python Lex-Yacc)

PLY is a pure Python implementation of the popular Unix parsing tools lex and yacc. It provides a complete framework for building lexical analyzers and parsers using the LALR(1) parsing algorithm, designed for creating compilers, interpreters, protocol decoders, and other language processing tools.

Package Information

  • Package Name: ply
  • Language: Python
  • Installation: Copy directly from GitHub (no longer distributed via PyPI)
  • Repository: https://github.com/dabeaz/ply
  • Version: 2022.10.27

Core Imports

import ply.lex as lex
import ply.yacc as yacc

Alternative import patterns:

from ply import lex
from ply import yacc

Basic Usage

import ply.lex as lex
import ply.yacc as yacc

# Define tokens for lexical analysis
tokens = (
    'NAME',
    'NUMBER',
    'PLUS',
    'MINUS',
    'TIMES',
    'DIVIDE',
    'LPAREN',
    'RPAREN',
)

# Token rules
t_PLUS     = r'\+'
t_MINUS    = r'-'
t_TIMES    = r'\*'
t_DIVIDE   = r'/'
t_LPAREN   = r'\('
t_RPAREN   = r'\)'
t_ignore   = ' \t'

def t_NAME(t):
    r'[a-zA-Z_][a-zA-Z_0-9]*'
    return t

def t_NUMBER(t):
    r'\d+'
    t.value = int(t.value)
    return t

def t_newline(t):
    r'\n+'
    t.lexer.lineno += len(t.value)

def t_error(t):
    print(f"Illegal character '{t.value[0]}'")
    t.lexer.skip(1)

# Build the lexer
lexer = lex.lex()

# Define grammar rules for parsing
def p_expression_binop(p):
    '''expression : expression PLUS term
                  | expression MINUS term'''
    if p[2] == '+':
        p[0] = p[1] + p[3]
    elif p[2] == '-':
        p[0] = p[1] - p[3]

def p_expression_term(p):
    '''expression : term'''
    p[0] = p[1]

def p_term_binop(p):
    '''term : term TIMES factor
            | term DIVIDE factor'''
    if p[2] == '*':
        p[0] = p[1] * p[3]
    elif p[2] == '/':
        p[0] = p[1] / p[3]

def p_term_factor(p):
    '''term : factor'''
    p[0] = p[1]

def p_factor_num(p):
    '''factor : NUMBER'''
    p[0] = p[1]

def p_factor_expr(p):
    '''factor : LPAREN expression RPAREN'''
    p[0] = p[2]

def p_error(p):
    if p:
        print(f"Syntax error at token {p.type}")
    else:
        print("Syntax error at EOF")

# Build the parser
parser = yacc.yacc()

# Parse input
result = parser.parse("3 + 4 * 2", lexer=lexer)
print(f"Result: {result}")  # Output: Result: 11

Architecture

PLY follows the traditional Unix lex/yacc design with two separate but coordinated phases:

  • Lexical Analysis (lex): Converts raw text into tokens using regular expressions and state machines
  • Syntax Analysis (yacc): Parses token streams into structured data using LALR(1) grammar rules
  • Convention-based API: Uses function/variable naming patterns for automatic rule discovery
  • Error Recovery: Comprehensive error handling and recovery mechanisms for both phases

The design emphasizes simplicity and educational value while providing production-ready parsing capabilities.

Capabilities

Lexical Analysis

Tokenizes input text using regular expressions and finite state machines. Supports multiple lexer states, line tracking, error handling, and flexible token rules defined through naming conventions.

def lex(*, module=None, object=None, debug=False, reflags=int(re.VERBOSE), debuglog=None, errorlog=None): ...
def TOKEN(r): ...
def runmain(lexer=None, data=None): ...

class Lexer:
    def input(self, s): ...
    def token(self): ...
    def clone(self, object=None): ...
    def begin(self, state): ...
    def push_state(self, state): ...
    def pop_state(self): ...
    def current_state(self): ...
    def skip(self, n): ...
    def __iter__(self): ...
    def __next__(self): ...
    lineno: int
    lexpos: int

class LexToken:
    type: str
    value: any
    lineno: int
    lexpos: int

Lexical Analysis

Syntax Parsing

Parses token streams using LALR(1) algorithm with grammar rules defined in function docstrings. Supports precedence rules, error recovery, debugging, and ambiguity resolution.

def yacc(*, debug=False, module=None, start=None, check_recursion=True, optimize=False, debugfile='parser.out', debuglog=None, errorlog=None): ...
def format_result(r): ...
def format_stack_entry(r): ...

class LRParser:
    def parse(self, input=None, lexer=None, debug=False, tracking=False): ...
    def errok(self): ...
    def restart(self): ...
    def set_defaulted_states(self): ...
    def disable_defaulted_states(self): ...

class YaccProduction:
    def lineno(self, n): ...
    def set_lineno(self, n, lineno): ...
    def linespan(self, n): ...
    def lexpos(self, n): ...
    def set_lexpos(self, n, lexpos): ...
    def lexspan(self, n): ...
    def error(self): ...
    def __getitem__(self, n): ...
    def __setitem__(self, n, v): ...
    def __len__(self): ...
    slice: list
    stack: list
    lexer: object
    parser: object

Syntax Parsing

Types

class LexError(Exception):
    """Exception raised for lexical analysis errors"""
    text: str

class YaccError(Exception):
    """Base exception for parser errors"""

class GrammarError(YaccError):
    """Exception for grammar specification errors"""

class LALRError(YaccError):
    """Exception for LALR parsing algorithm errors"""

class PlyLogger:
    """Logging utility for PLY operations"""
    def critical(self, msg, *args, **kwargs): ...
    def warning(self, msg, *args, **kwargs): ...
    def error(self, msg, *args, **kwargs): ...
    def info(self, msg, *args, **kwargs): ...
    def debug(self, msg, *args, **kwargs): ...

class NullLogger:
    """Null logging implementation"""
    def debug(self, msg, *args, **kwargs): ...
    def warning(self, msg, *args, **kwargs): ...
    def error(self, msg, *args, **kwargs): ...
    def info(self, msg, *args, **kwargs): ...
    def critical(self, msg, *args, **kwargs): ...

class YaccSymbol:
    """Internal parser symbol representation"""
    def __str__(self): ...
    def __repr__(self): ...

# Configuration constants
yaccdebug: bool = False
debug_file: str = 'parser.out'
error_count: int = 3
resultlimit: int = 40
MAXINT: int
StringTypes: tuple = (str, bytes)

# Package version
__version__: str = '2022.10.27'