CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-parsec

A universal Python parser combinator library inspired by Parsec library of Haskell

Pending
Overview
Eval results
Files

parser-generation.mddocs/

Parser Generation

Powerful declarative syntax using Python generators to build complex parsers with natural control flow, variable binding, and conditional logic. The generator approach provides an intuitive way to express complex parsing logic while maintaining the functional parser combinator foundation.

Capabilities

Generator Decorator

The core decorator that transforms Python generator functions into parser combinators with full access to intermediate parsing results.

def generate(fn):
    """
    Create a parser from a generator function.
    
    Args:
        fn (function or str): Generator function or description string
        
    Returns:
        Parser: Parser built from the generator
        
    Usage patterns:
        @generate
        def my_parser():
            result = yield some_parser
            return final_result
            
        @generate("description for errors")  
        def my_parser():
            # parser logic
            
    Note:
        Generator should yield Parser objects and return final result.
        Intermediate results are sent back via generator.send().
    """

Usage Examples

Basic Generator Parsing

from parsec import generate, string, many1, letter, digit, spaces

# Simple generator parser
@generate
def greeting():
    hello = yield string("hello")
    yield spaces()
    name = yield many1(letter())
    return f"{hello} {''.join(name)}"

result = greeting.parse("hello   alice")  # Returns "hello alice"

# Generator with error description
@generate("greeting parser")
def greeting_with_desc():
    yield string("hi")
    yield spaces()
    name = yield many1(letter())
    return "".join(name)

try:
    result = greeting_with_desc.parse("bye alice")
except ParseError as e:
    print(e.expected)  # "greeting parser"

Conditional Parsing

from parsec import generate, string, many1, digit, letter

# Conditional logic based on parsed values
@generate
def conditional_number():
    sign = yield string("+") ^ string("-") ^ string("")
    digits = yield many1(digit())
    number = int("".join(digits))
    
    if sign == "-":
        return -number
    else:
        return number

result = conditional_number.parse("-123")  # Returns -123
result = conditional_number.parse("+456")  # Returns 456
result = conditional_number.parse("789")   # Returns 789

# More complex conditional parsing
@generate
def typed_value():
    type_marker = yield string("i:") ^ string("s:") ^ string("f:")
    
    if type_marker == "i:":
        digits = yield many1(digit())
        return int("".join(digits))
    elif type_marker == "s:":
        chars = yield many1(letter())
        return "".join(chars)
    else:  # "f:"
        whole = yield many1(digit())
        yield string(".")
        decimal = yield many1(digit())
        return float("".join(whole) + "." + "".join(decimal))

result = typed_value.parse("i:123")    # Returns 123 (int)
result = typed_value.parse("s:hello")  # Returns "hello" (str) 
result = typed_value.parse("f:12.34")  # Returns 12.34 (float)

Complex Data Structure Parsing

from parsec import generate, string, many, many1, letter, digit, spaces, one_of, none_of

# Parse JSON-like objects
@generate
def json_string():
    yield string('"')
    chars = yield many(none_of('"'))
    yield string('"')
    return "".join(chars)

@generate  
def json_number():
    from parsec import Parser, Value
    sign = yield string("-") ^ string("")
    digits = yield many1(digit())
    decimal = yield (string(".") >> many1(digit())) ^ Parser(lambda text, index: Value.success(index, []))
    
    number_str = sign + "".join(digits)
    if decimal:
        number_str += "." + "".join(decimal)
        return float(number_str)
    else:
        return int(number_str)

@generate
def json_array():
    yield string("[")
    yield spaces()
    
    # Handle empty array
    empty_check = yield string("]") ^ string("")
    if empty_check == "]":
        return []
    
    # Parse first element
    first = yield json_value
    elements = [first]
    
    # Parse remaining elements
    rest = yield many(string(",") >> spaces() >> json_value)
    elements.extend(rest)
    
    yield spaces()
    yield string("]")
    return elements

@generate
def json_value():
    value = yield json_string ^ json_number ^ json_array
    return value

# Usage
result = json_array.parse('["hello", 123, -45.6]')  
# Returns ["hello", 123, -45.6]

Stateful Parsing

from parsec import generate, string, many, many1, letter, digit

# Parser that maintains state across operations
@generate
def calculator():
    result = yield many1(digit()).parsecmap(lambda d: int("".join(d)))
    
    operations = yield many(
        (string("+") ^ string("-") ^ string("*") ^ string("/")) +
        many1(digit()).parsecmap(lambda d: int("".join(d)))
    )
    
    for op, operand in operations:
        if op == "+":
            result += operand
        elif op == "-":
            result -= operand
        elif op == "*":
            result *= operand
        elif op == "/":
            result //= operand  # Integer division
    
    return result

result = calculator.parse("10+5*2-3")  # Returns 22

# Counter example with internal state
@generate
def word_counter():
    words = []
    count = 0
    
    while True:
        # Try to parse another word
        try:
            word_chars = yield many1(letter())
            word = "".join(word_chars)
            words.append(word)
            count += 1
            
            # Optional whitespace between words
            yield spaces()
            
        except:
            break
    
    return {"words": words, "count": count}

# This won't work exactly as shown due to exception handling,
# but demonstrates the concept of stateful parsing

Recursive Parsing with Generators

from parsec import generate, string, many, many1, letter, spaces, one_of

# Forward declaration for recursive grammar
expr = None

@generate
def factor():
    # Number or parenthesized expression
    number = yield many1(digit()).parsecmap(lambda d: int("".join(d)))
    return number

@generate  
def factor_or_paren():
    result = yield factor ^ (string("(") >> expr << string(")"))
    return result

@generate
def term():
    left = yield factor_or_paren
    
    ops = yield many((one_of("*/") + factor_or_paren))
    
    result = left
    for op, right in ops:
        if op == "*":
            result *= right
        else:  # op == "/"
            result //= right
    
    return result

@generate
def expression():
    left = yield term
    
    ops = yield many((one_of("+-") + term))
    
    result = left
    for op, right in ops:
        if op == "+":
            result += right
        else:  # op == "-"
            result -= right
    
    return result

# Set the forward reference
expr = expression

# Usage
result = expression.parse("2+3*4")      # Returns 14
result = expression.parse("(2+3)*4")    # Returns 20

Error Handling in Generators

from parsec import generate, string, many1, letter, ParseError

# Generator with custom error handling
@generate("email address")
def email_parser():
    try:
        username = yield many1(letter() ^ digit() ^ one_of("._"))
        yield string("@")
        domain = yield many1(letter() ^ digit() ^ one_of(".-"))
        yield string(".")
        tld = yield many1(letter())
        
        return {
            "username": "".join(username),
            "domain": "".join(domain), 
            "tld": "".join(tld)
        }
    except ParseError:
        # Could add custom error handling here
        raise

# Generator that returns alternative parsers for error recovery
@generate
def robust_number():
    try:
        # Try to parse a number
        digits = yield many1(digit())
        return int("".join(digits))
    except:
        # If that fails, try to parse "unknown" 
        yield string("unknown")
        return None

result = robust_number.parse("123")      # Returns 123
result = robust_number.parse("unknown")  # Returns None

Advanced Patterns

Generator Composition

from parsec import generate

# Compose generators for modularity
@generate
def parse_header():
    yield string("BEGIN")
    yield spaces()
    name = yield many1(letter())
    yield string("\n")
    return "".join(name)

@generate
def parse_body():
    lines = yield many(many1(letter() ^ digit() ^ space()) < string("\n"))
    return ["".join(line) for line in lines]

@generate  
def parse_footer():
    yield string("END")
    return None

@generate
def parse_document():
    header = yield parse_header
    body = yield parse_body  
    footer = yield parse_footer
    
    return {
        "title": header,
        "content": body
    }

# Usage
doc_text = """BEGIN MyDocument
line one
line two  
END"""

result = parse_document.parse(doc_text)
# Returns {"title": "MyDocument", "content": ["line one", "line two"]}

Install with Tessl CLI

npx tessl i tessl/pypi-parsec

docs

character-parsing.md

combinators.md

core-primitives.md

index.md

parser-generation.md

parser-operators.md

tile.json