Easy-to-use parser combinators for parsing in pure Python
—
Higher-order functions that combine multiple parsers into more complex parsing logic. These combinators enable compositional parser construction and advanced parsing patterns.
Try multiple parsers in sequence until one succeeds.
def alt(*parsers):
"""
Try alternative parsers in order until one succeeds.
Args:
*parsers: Variable number of Parser objects to try
Returns:
Parser: Parser that succeeds with first successful alternative
"""Parse multiple parsers in sequence and collect their results.
def seq(*parsers, **kw_parsers):
"""
Parse parsers in sequence and collect results.
Args:
*parsers: Parsers to execute in order (returns list)
**kw_parsers: Named parsers to execute (returns dict, Python 3.6+ only)
Returns:
Parser: Parser returning list of results or dict of named results
Note:
Cannot mix positional and keyword arguments.
Keyword arguments only available in Python 3.6+.
"""Use Python generator syntax for complex parser composition with imperative-style code.
def generate(fn):
"""
Create parser using generator syntax for complex parsing logic.
Args:
fn: Generator function that yields parsers and returns final result
Returns:
Parser: Parser that executes the generator-based parsing logic
Usage:
Can be used as decorator or called with generator function.
If called with string, returns decorator that adds description.
"""from parsy import alt, string, regex
# Simple alternatives
sign = alt(string('+'), string('-'), string(''))
result = sign.parse('+') # Returns '+'
# Complex alternatives with different result types
value = alt(
regex(r'\d+').map(int), # Integer
regex(r'\d+\.\d+').map(float), # Float
regex(r'"[^"]*"').map(lambda s: s[1:-1]) # String
)
result = value.parse('42') # Returns 42 (int)
result = value.parse('3.14') # Returns 3.14 (float)
result = value.parse('"hello"') # Returns 'hello' (str)from parsy import seq, string, regex
# Basic sequence returning list
greeting = seq(
string('Hello'),
regex(r'\s+'),
regex(r'\w+')
)
result = greeting.parse('Hello world') # Returns ['Hello', ' ', 'world']
# Sequence with transformation
greeting_formatted = seq(
string('Hello'),
regex(r'\s+'),
regex(r'\w+')
).combine(lambda hello, space, name: f"{hello} {name}!")
result = greeting_formatted.parse('Hello world') # Returns 'Hello world!'
# Named sequence (Python 3.6+)
person = seq(
name=regex(r'\w+'),
age=regex(r'\d+').map(int)
)
result = person.parse('Alice25') # Returns {'name': 'Alice', 'age': 25}from parsy import generate, string, regex
# Simple generator parser
@generate
def simple_assignment():
name = yield regex(r'\w+')
yield string('=')
value = yield regex(r'\d+').map(int)
return (name, value)
result = simple_assignment.parse('x=42') # Returns ('x', 42)
# Complex nested parsing with conditionals
@generate
def conditional_expression():
condition = yield regex(r'\w+')
yield string('?')
true_value = yield regex(r'\w+')
yield string(':')
false_value = yield regex(r'\w+')
# Can include Python logic
if condition == 'true':
return true_value
else:
return false_value
result = conditional_expression.parse('true?yes:no') # Returns 'yes'
# Generator with error handling
@generate
def validated_number():
sign = yield alt(string('+'), string('-')).optional()
digits = yield regex(r'\d+')
# Validation logic
number = int(digits)
if sign == '-':
number = -number
if number > 1000:
# Can raise custom errors or return failure
raise ValueError("Number too large")
return number
result = validated_number.parse('+42') # Returns 42
# Generator with description
@generate('mathematical expression')
def math_expr():
left = yield regex(r'\d+').map(int)
op = yield regex(r'[+\-*/]')
right = yield regex(r'\d+').map(int)
if op == '+':
return left + right
elif op == '-':
return left - right
elif op == '*':
return left * right
elif op == '/':
return left / right
result = math_expr.parse('5*3') # Returns 15from parsy import alt, seq, generate, string, regex
# Recursive parsing with forward declaration
from parsy import forward_declaration
expr = forward_declaration()
# Define atomic expressions
number = regex(r'\d+').map(int)
variable = regex(r'[a-z]+')
atom = alt(number, variable, string('(') >> expr << string(')'))
# Define operations
term = atom.sep_by(alt(string('*'), string('/')), min=1)
expression = term.sep_by(alt(string('+'), string('-')), min=1)
# Complete the forward declaration
expr.become(expression)
# Complex nested structure parsing with forward declaration
json_value = forward_declaration()
@generate
def json_object():
yield string('{')
yield regex(r'\s*')
# Handle empty object
empty_end = yield string('}').optional()
if empty_end:
return {}
# Parse key-value pairs
pairs = yield json_pair.sep_by(regex(r'\s*,\s*'))
yield regex(r'\s*')
yield string('}')
return dict(pairs)
@generate
def json_pair():
key = yield regex(r'"([^"]*)"', group=1)
yield regex(r'\s*:\s*')
value = yield json_value
return (key, value)
# Define json_value after json_object is defined
json_value.become(alt(
regex(r'"([^"]*)"', group=1), # String
regex(r'\d+').map(int), # Number
json_object # Nested object
))
result = json_object.parse('{"name": "Alice", "age": 30}')
# Returns {'name': 'Alice', 'age': 30}Install with Tessl CLI
npx tessl i tessl/pypi-parsy