A universal Python parser combinator library inspired by Parsec library of Haskell
—
Powerful declarative syntax using Python generators to build complex parsers with natural control flow, variable binding, and conditional logic. The generator approach provides an intuitive way to express complex parsing logic while maintaining the functional parser combinator foundation.
The core decorator that transforms Python generator functions into parser combinators with full access to intermediate parsing results.
def generate(fn):
"""
Create a parser from a generator function.
Args:
fn (function or str): Generator function or description string
Returns:
Parser: Parser built from the generator
Usage patterns:
@generate
def my_parser():
result = yield some_parser
return final_result
@generate("description for errors")
def my_parser():
# parser logic
Note:
Generator should yield Parser objects and return final result.
Intermediate results are sent back via generator.send().
"""from parsec import generate, string, many1, letter, digit, spaces
# Simple generator parser
@generate
def greeting():
hello = yield string("hello")
yield spaces()
name = yield many1(letter())
return f"{hello} {''.join(name)}"
result = greeting.parse("hello alice") # Returns "hello alice"
# Generator with error description
@generate("greeting parser")
def greeting_with_desc():
yield string("hi")
yield spaces()
name = yield many1(letter())
return "".join(name)
try:
result = greeting_with_desc.parse("bye alice")
except ParseError as e:
print(e.expected) # "greeting parser"from parsec import generate, string, many1, digit, letter
# Conditional logic based on parsed values
@generate
def conditional_number():
sign = yield string("+") ^ string("-") ^ string("")
digits = yield many1(digit())
number = int("".join(digits))
if sign == "-":
return -number
else:
return number
result = conditional_number.parse("-123") # Returns -123
result = conditional_number.parse("+456") # Returns 456
result = conditional_number.parse("789") # Returns 789
# More complex conditional parsing
@generate
def typed_value():
type_marker = yield string("i:") ^ string("s:") ^ string("f:")
if type_marker == "i:":
digits = yield many1(digit())
return int("".join(digits))
elif type_marker == "s:":
chars = yield many1(letter())
return "".join(chars)
else: # "f:"
whole = yield many1(digit())
yield string(".")
decimal = yield many1(digit())
return float("".join(whole) + "." + "".join(decimal))
result = typed_value.parse("i:123") # Returns 123 (int)
result = typed_value.parse("s:hello") # Returns "hello" (str)
result = typed_value.parse("f:12.34") # Returns 12.34 (float)from parsec import generate, string, many, many1, letter, digit, spaces, one_of, none_of
# Parse JSON-like objects
@generate
def json_string():
yield string('"')
chars = yield many(none_of('"'))
yield string('"')
return "".join(chars)
@generate
def json_number():
from parsec import Parser, Value
sign = yield string("-") ^ string("")
digits = yield many1(digit())
decimal = yield (string(".") >> many1(digit())) ^ Parser(lambda text, index: Value.success(index, []))
number_str = sign + "".join(digits)
if decimal:
number_str += "." + "".join(decimal)
return float(number_str)
else:
return int(number_str)
@generate
def json_array():
yield string("[")
yield spaces()
# Handle empty array
empty_check = yield string("]") ^ string("")
if empty_check == "]":
return []
# Parse first element
first = yield json_value
elements = [first]
# Parse remaining elements
rest = yield many(string(",") >> spaces() >> json_value)
elements.extend(rest)
yield spaces()
yield string("]")
return elements
@generate
def json_value():
value = yield json_string ^ json_number ^ json_array
return value
# Usage
result = json_array.parse('["hello", 123, -45.6]')
# Returns ["hello", 123, -45.6]from parsec import generate, string, many, many1, letter, digit
# Parser that maintains state across operations
@generate
def calculator():
result = yield many1(digit()).parsecmap(lambda d: int("".join(d)))
operations = yield many(
(string("+") ^ string("-") ^ string("*") ^ string("/")) +
many1(digit()).parsecmap(lambda d: int("".join(d)))
)
for op, operand in operations:
if op == "+":
result += operand
elif op == "-":
result -= operand
elif op == "*":
result *= operand
elif op == "/":
result //= operand # Integer division
return result
result = calculator.parse("10+5*2-3") # Returns 22
# Counter example with internal state
@generate
def word_counter():
words = []
count = 0
while True:
# Try to parse another word
try:
word_chars = yield many1(letter())
word = "".join(word_chars)
words.append(word)
count += 1
# Optional whitespace between words
yield spaces()
except:
break
return {"words": words, "count": count}
# This won't work exactly as shown due to exception handling,
# but demonstrates the concept of stateful parsingfrom parsec import generate, string, many, many1, letter, spaces, one_of
# Forward declaration for recursive grammar
expr = None
@generate
def factor():
# Number or parenthesized expression
number = yield many1(digit()).parsecmap(lambda d: int("".join(d)))
return number
@generate
def factor_or_paren():
result = yield factor ^ (string("(") >> expr << string(")"))
return result
@generate
def term():
left = yield factor_or_paren
ops = yield many((one_of("*/") + factor_or_paren))
result = left
for op, right in ops:
if op == "*":
result *= right
else: # op == "/"
result //= right
return result
@generate
def expression():
left = yield term
ops = yield many((one_of("+-") + term))
result = left
for op, right in ops:
if op == "+":
result += right
else: # op == "-"
result -= right
return result
# Set the forward reference
expr = expression
# Usage
result = expression.parse("2+3*4") # Returns 14
result = expression.parse("(2+3)*4") # Returns 20from parsec import generate, string, many1, letter, ParseError
# Generator with custom error handling
@generate("email address")
def email_parser():
try:
username = yield many1(letter() ^ digit() ^ one_of("._"))
yield string("@")
domain = yield many1(letter() ^ digit() ^ one_of(".-"))
yield string(".")
tld = yield many1(letter())
return {
"username": "".join(username),
"domain": "".join(domain),
"tld": "".join(tld)
}
except ParseError:
# Could add custom error handling here
raise
# Generator that returns alternative parsers for error recovery
@generate
def robust_number():
try:
# Try to parse a number
digits = yield many1(digit())
return int("".join(digits))
except:
# If that fails, try to parse "unknown"
yield string("unknown")
return None
result = robust_number.parse("123") # Returns 123
result = robust_number.parse("unknown") # Returns Nonefrom parsec import generate
# Compose generators for modularity
@generate
def parse_header():
yield string("BEGIN")
yield spaces()
name = yield many1(letter())
yield string("\n")
return "".join(name)
@generate
def parse_body():
lines = yield many(many1(letter() ^ digit() ^ space()) < string("\n"))
return ["".join(line) for line in lines]
@generate
def parse_footer():
yield string("END")
return None
@generate
def parse_document():
header = yield parse_header
body = yield parse_body
footer = yield parse_footer
return {
"title": header,
"content": body
}
# Usage
doc_text = """BEGIN MyDocument
line one
line two
END"""
result = parse_document.parse(doc_text)
# Returns {"title": "MyDocument", "content": ["line one", "line two"]}Install with Tessl CLI
npx tessl i tessl/pypi-parsec