A universal Python parser combinator library inspired by Parsec library of Haskell
—
Higher-order functions for combining and repeating parsers. These combinators enable building complex parsers from simple building blocks through composition and repetition patterns, forming the foundation of parser combinator methodology.
Control how many times a parser should be applied, with various bounds and greedy matching behavior.
def times(p, mint, maxt=None):
"""
Repeat a parser between mint and maxt times (greedy).
Args:
p (Parser): Parser to repeat
mint (int): Minimum number of repetitions
maxt (int, optional): Maximum repetitions. Defaults to mint if not specified
Returns:
Parser: Parser that returns list of values from p
Note:
Does as much matching as possible within bounds.
"""
def count(p, n):
"""
Parse exactly n occurrences of parser p.
Args:
p (Parser): Parser to repeat
n (int): Exact number of repetitions required
Returns:
Parser: Parser that returns list of exactly n values
Note:
If n <= 0, returns parser that succeeds with empty list.
"""
def many(p):
"""
Repeat a parser 0 to infinity times (greedy).
Args:
p (Parser): Parser to repeat
Returns:
Parser: Parser that returns list of values (possibly empty)
Note:
Always succeeds, returns empty list if no matches.
"""
def many1(p):
"""
Repeat a parser 1 to infinity times (greedy).
Args:
p (Parser): Parser to repeat
Returns:
Parser: Parser that returns non-empty list of values
Raises:
ParseError: If parser p fails on first attempt
"""from parsec import times, count, many, many1, letter, digit, string
# Parse exactly 3 letters
three_letters = count(letter(), 3)
result = three_letters.parse("abcdef") # Returns ['a', 'b', 'c']
# Parse 2-4 letters
some_letters = times(letter(), 2, 4)
result = some_letters.parse("abcdef") # Returns ['a', 'b', 'c', 'd']
# Parse zero or more digits
digits = many(digit())
result = digits.parse("123abc") # Returns ['1', '2', '3']
result = digits.parse("abc") # Returns [] (empty list)
# Parse one or more letters
letters = many1(letter())
result = letters.parse("abc123") # Returns ['a', 'b', 'c']
try:
result = letters.parse("123") # Raises ParseError
except ParseError:
print("No letters found")from parsec import times, many, string, letter, digit
# Parse word followed by number
word_num = many1(letter()) + many1(digit())
result = word_num.parse("abc123") # Returns "abc123"
# Parse 3 letters followed by a digit
pattern = times(letter(), 3) >> digit()
result = pattern.parse("xyz1abc") # Returns '1'
# Parse comma-separated letters
@generate
def csv_letters():
first = yield letter()
rest = yield many(string(",") >> letter())
return [first] + rest
result = csv_letters.parse("a,b,c") # Returns ['a', 'b', 'c']from parsec import times, letter
# Minimum only - parse at least 2 letters
at_least_two = times(letter(), 2, float('inf'))
result = at_least_two.parse("abcdefgh") # Returns ['a','b','c','d','e','f','g','h']
# Zero repetitions allowed
zero_or_more = times(letter(), 0, 3)
result = zero_or_more.parse("12345") # Returns [] (empty list)
result = zero_or_more.parse("ab12") # Returns ['a', 'b']
# Fixed range
bounded = times(letter(), 2, 4)
result = bounded.parse("a") # ParseError - not enough matches
result = bounded.parse("abcd") # Returns ['a', 'b', 'c', 'd']
result = bounded.parse("abcdefg") # Returns ['a', 'b', 'c', 'd'] (stops at max)from parsec import many, letter, digit, string
# Greedy matching - takes as much as possible
parser = many(letter()) >> digit()
# This works - many() stops when it can't match more letters
result = parser.parse("abc1") # Returns '1'
# This also works - many() consumes all letters it can
result = parser.parse("abcdefg1") # Returns '1'
# many() is greedy but will backtrack to allow overall success
parser = many(string("ab")) >> string("ab")
result = parser.parse("ababab") # many() takes "abab", leaves "ab" for final parserInstall with Tessl CLI
npx tessl i tessl/pypi-parsec