A universal Python parser combinator library inspired by Parsec library of Haskell
—
Monadic and compositional operators for sequencing, choice, transformation, and control flow between parsers. These operators enable both infix notation and functional composition patterns, providing the core building blocks for parser combination.
Core monadic operations that enable sequential composition and value transformation in parser chains.
def bind(self, fn):
"""
Monadic bind operation (>>=).
Passes the result of this parser to fn, continues with returned parser.
Args:
fn (function): Function that takes parsed value and returns Parser
Returns:
Parser: Combined parser
Note:
If this parser fails, fn is not called and failure is returned.
"""
def compose(self, other):
"""
Sequential composition (>>).
Runs this parser, then other parser, returns result of other.
Args:
other (Parser): Parser to run after this one
Returns:
Parser: Combined parser returning other's result
Note:
If this parser fails, other is not run.
"""
def parsecmap(self, fn):
"""
Transform the result of this parser with a function.
Args:
fn (function): Function to transform the parsed value
Returns:
Parser: Parser that returns fn(parsed_value)
Note:
Only applies fn if parsing succeeds.
"""Alternative selection between parsers with different backtracking behaviors.
def choice(self, other):
"""
Choice without backtrack (|).
Try this parser first. If it fails without consuming input, try other.
Args:
other (Parser): Alternative parser to try
Returns:
Parser: Parser that returns result of first successful parser
Note:
No backtracking - if this parser consumes input then fails,
other is not tried.
"""
def try_choice(self, other):
"""
Choice with backtrack (^).
Try this parser first. If it fails, try other regardless of input consumption.
Args:
other (Parser): Alternative parser to try
Returns:
Parser: Parser that returns result of first successful parser
Note:
Full backtracking - other is tried even if this parser consumed input.
"""Operations for combining parser results and controlling parser termination.
def joint(self, other):
"""
Joint operation (+).
Combine results from two sequential parsers.
Args:
other (Parser): Parser to run after this one
Returns:
Parser: Parser that returns combined/concatenated results
Note:
Results are aggregated using Value.aggregate().
"""
def skip(self, other):
"""
Skip operation (<).
Run this parser, then other parser, return this parser's result.
Other parser consumes its input.
Args:
other (Parser): Parser to run and consume after this one
Returns:
Parser: Parser that returns this parser's result
"""
def ends_with(self, other):
"""
Ends with operation (<<).
Run this parser, then other parser, return this parser's result.
Other parser does not consume input.
Args:
other (Parser): Parser to check but not consume after this one
Returns:
Parser: Parser that returns this parser's result
Note:
other parser must succeed but doesn't advance position.
"""Operations for adding metadata and error handling to parsers.
def mark(self):
"""
Add position information to parser result.
Returns:
Parser: Parser that returns (start_pos, value, end_pos)
Note:
Positions are (line, column) tuples with 0-based indexing.
"""
def desc(self, description):
"""
Add description for better error messages.
Args:
description (str): Description of what this parser expects
Returns:
Parser: Parser with improved error reporting
Note:
If parser fails, error will show this description.
"""Python operator overloads for convenient infix syntax.
# Infix operators available on Parser instances:
parser1 | parser2 # choice (calls choice())
parser1 ^ parser2 # try_choice (calls try_choice())
parser1 + parser2 # joint (calls joint())
parser1 >> parser2 # compose (calls compose())
parser1 >>= fn # bind (calls bind())
parser1 << parser2 # ends_with (calls ends_with())
parser1 < parser2 # skip (calls skip())Standalone functions that mirror the Parser methods for functional programming style.
def bind(p, fn):
"""Functional version of Parser.bind()"""
def compose(pa, pb):
"""Functional version of Parser.compose()"""
def joint(pa, pb):
"""Functional version of Parser.joint()"""
def choice(pa, pb):
"""Functional version of Parser.choice()"""
def try_choice(pa, pb):
"""Functional version of Parser.try_choice()"""
def parsecmap(p, fn):
"""
Functional version of Parser.parsecmap().
Note:
This function has a bug in the original implementation.
It calls p.map(fn) but should call p.parsecmap(fn).
"""
def mark(p):
"""Functional version of Parser.mark()"""
def parse(p, text, index):
"""
Utility function to parse with a parser.
Args:
p (Parser): Parser to use
text (str): Text to parse
index (int): Starting position
Returns:
Parsed result
Note:
This function appears to have a bug in the original implementation.
It calls p.parse(text, index) but Parser.parse() only takes text parameter.
"""from parsec import string, letter, digit, many
# Compose - run first parser, then second, return second result
parser = string("hello") >> string("world")
result = parser.parse("helloworld") # Returns "world"
# Joint - combine results from both parsers
parser = string("hello") + string("world")
result = parser.parse("helloworld") # Returns "helloworld"
# Skip - use first result, but consume second parser's input
parser = many(letter()) < string(".")
result = parser.parse("hello.") # Returns ['h','e','l','l','o']
# Ends with - first result, second must match but doesn't consume
parser = many(letter()) << string("123")
result = parser.parse("hello123more") # Returns ['h','e','l','l','o'], "123more" remainsfrom parsec import string, letter
# Regular choice - no backtrack
parser = string("ab") | string("ac")
result = parser.parse("ab") # Returns "ab"
# parser.parse("ac") would fail because string("ab") consumed 'a' but failed on 'c'
# Try choice - with backtrack
parser = string("ab") ^ string("ac")
result = parser.parse("ab") # Returns "ab"
result = parser.parse("ac") # Returns "ac" (backtracked after "ab" failed)
# Multiple choices
parser = string("cat") ^ string("car") ^ string("card")
result = parser.parse("card") # Returns "card"from parsec import many1, digit, string
# Transform results
number_parser = many1(digit()).parsecmap(lambda digits: int("".join(digits)))
result = number_parser.parse("123") # Returns 123 (integer)
# Monadic binding for conditional parsing
@generate
def conditional_parser():
op = yield string("+") ^ string("-")
num1 = yield number_parser
num2 = yield number_parser
if op == "+":
return num1 + num2
else:
return num1 - num2
result = conditional_parser.parse("+123456") # Returns 579
# Using bind directly
def make_repeat_parser(char):
return many1(string(char))
parser = string("a").bind(make_repeat_parser)
result = parser.parse("aaaa") # Returns ['a', 'a', 'a', 'a']from parsec import mark, many1, letter, string
# Mark parser positions
marked_word = mark(many1(letter()))
start_pos, word, end_pos = marked_word.parse("hello")
# start_pos: (0, 0), word: ['h','e','l','l','o'], end_pos: (0, 5)
# Mark multiple elements
@generate
def marked_lines():
lines = yield many(mark(many(letter())) < string("\n"))
return lines
result = marked_lines.parse("hello\nworld\n")
# Returns [((0,0), ['h','e','l','l','o'], (0,5)), ((1,0), ['w','o','r','l','d'], (1,5))]from parsec import string, letter, many1
# Add descriptive error messages
identifier = many1(letter()).desc("identifier")
keyword = string("def").desc("def keyword")
parser = keyword >> identifier
try:
result = parser.parse("define") # Should be "def", not "define"
except ParseError as e:
print(e.expected) # "def keyword"
# Chain descriptions
complex_parser = (
string("function").desc("function keyword") >>
many1(letter()).desc("function name") >>
string("(").desc("opening parenthesis")
)from parsec import bind, compose, choice, many1, letter, digit, parsecmap
# Functional composition
def to_int(digits):
return int("".join(digits))
number_parser = parsecmap(many1(digit()), to_int)
# Functional choice and composition
word_or_number = choice(
parsecmap(many1(letter()), "".join),
number_parser
)
# Functional binding
def repeat_char_parser(char):
return many1(string(char))
parser = bind(letter(), repeat_char_parser)Install with Tessl CLI
npx tessl i tessl/pypi-parsec