A universal Python parser combinator library inspired by Parsec library of Haskell
—
Specialized parsers for character-level and string-level text processing. These parsers handle literal string matching, regular expression patterns, character class matching, whitespace processing, and end-of-input detection.
Parse literal strings and regular expression patterns with full control over matching behavior.
def string(s):
"""
Parse a literal string.
Args:
s (str): The exact string to match
Returns:
Parser: Parser that returns the matched string on success
Note:
Fails if the input doesn't exactly match the string s.
"""
def regex(exp, flags=0):
"""
Parse according to a regular expression pattern.
Args:
exp (str or compiled regex): Regular expression pattern
flags (int, optional): Regex flags (re.IGNORECASE, etc.)
Returns:
Parser: Parser that returns the matched text
Note:
Uses re.match() which anchors to current position.
Returns the full matched text (group 0).
"""Parse individual characters based on membership in character sets or character properties.
def one_of(s):
"""
Parse a character that appears in the given string.
Args:
s (str): String containing allowed characters
Returns:
Parser: Parser that returns the matched character
Example:
one_of("abc") matches 'a', 'b', or 'c'
"""
def none_of(s):
"""
Parse a character that does NOT appear in the given string.
Args:
s (str): String containing forbidden characters
Returns:
Parser: Parser that returns the matched character
Example:
none_of("abc") matches any character except 'a', 'b', or 'c'
"""
def letter():
"""
Parse an alphabetic character.
Returns:
Parser: Parser that returns the matched letter
Note:
Uses Python's str.isalpha() method.
"""
def digit():
"""
Parse a numeric digit character.
Returns:
Parser: Parser that returns the matched digit character
Note:
Uses Python's str.isdigit() method.
"""Handle whitespace characters and common whitespace patterns in text processing.
def space():
"""
Parse a single whitespace character.
Returns:
Parser: Parser that returns the matched whitespace character
Note:
Uses Python's str.isspace() method (spaces, tabs, newlines, etc.).
"""
def spaces():
"""
Parse zero or more whitespace characters.
Returns:
Parser: Parser that returns list of whitespace characters
Note:
Always succeeds, returns empty list if no whitespace found.
Equivalent to many(space()).
"""Detect when the parser has reached the end of the input text.
def eof():
"""
Parse end-of-file (end of input string).
Returns:
Parser: Parser that returns None when at end of input
Note:
Succeeds only when no more characters remain to be parsed.
Useful for ensuring entire input is consumed.
"""from parsec import string, regex
# Literal string matching
hello = string("hello")
result = hello.parse("hello world") # Returns "hello"
# Case-sensitive matching
parser = string("Hello")
try:
result = parser.parse("hello") # Raises ParseError
except ParseError:
print("Case mismatch")
# Regular expression matching
number = regex(r'\d+')
result = number.parse("123abc") # Returns "123"
# Regex with flags
import re
word = regex(r'[a-z]+', re.IGNORECASE)
result = word.parse("Hello") # Returns "Hello"from parsec import one_of, none_of, letter, digit
# Character from set
vowel = one_of("aeiou")
result = vowel.parse("apple") # Returns 'a'
# Character not in set
consonant = none_of("aeiou")
result = consonant.parse("hello") # Returns 'h'
# Alphabetic characters
char = letter()
result = char.parse("abc123") # Returns 'a'
# Numeric digits
num = digit()
result = num.parse("123abc") # Returns '1'from parsec import space, spaces, string
# Single whitespace
ws = space()
result = ws.parse(" hello") # Returns ' '
# Multiple whitespace (optional)
ws_many = spaces()
result = ws_many.parse(" hello") # Returns [' ', ' ', ' ']
result = ws_many.parse("hello") # Returns [] (empty list)
# Parse words separated by whitespace
@generate
def spaced_words():
word1 = yield many1(letter())
yield spaces()
word2 = yield many1(letter())
return ("".join(word1), "".join(word2))
result = spaced_words.parse("hello world") # Returns ("hello", "world")from parsec import regex, string, many, letter, digit
# Email-like pattern
email = regex(r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}')
result = email.parse("user@example.com") # Returns "user@example.com"
# Quoted strings
@generate
def quoted_string():
yield string('"')
content = yield many(none_of('"'))
yield string('"')
return "".join(content)
result = quoted_string.parse('"hello world"') # Returns "hello world"
# Numbers with optional decimal
from parsec import Parser, Value
@generate
def decimal_number():
integer_part = yield many1(digit())
decimal_part = yield (string('.') >> many1(digit())) ^ Parser(lambda text, index: Value.success(index, []))
if decimal_part:
return float("".join(integer_part) + "." + "".join(decimal_part))
else:
return int("".join(integer_part))
result = decimal_number.parse("123.45") # Returns 123.45
result = decimal_number.parse("123") # Returns 123from parsec import eof, many1, letter
# Ensure entire input is letters
letters_only = many1(letter()) << eof()
result = letters_only.parse("hello") # Returns ['h','e','l','l','o']
try:
result = letters_only.parse("hello123") # Raises ParseError
except ParseError:
print("Non-letters found")
# Parse complete tokens
@generate
def complete_word():
chars = yield many1(letter())
yield eof() # Ensure nothing follows
return "".join(chars)
result = complete_word.parse("hello") # Returns "hello"Install with Tessl CLI
npx tessl i tessl/pypi-parsec