Easy-to-use parser combinators for parsing in pure Python
—
Ready-to-use parser constants for common parsing tasks. These parsers handle frequent parsing scenarios like whitespace, character classes, and special positions.
Parse common character categories.
any_char: Parser
"""Parse any single character."""
letter: Parser
"""Parse any alphabetic character (using str.isalpha())."""
digit: Parser
"""Parse any digit character (using str.isdigit())."""
decimal_digit: Parser
"""Parse decimal digits 0-9 specifically."""Parse whitespace characters using regular expressions.
whitespace: Parser
"""Parse one or more whitespace characters (regex r'\\s+')."""Handle special parsing positions and states.
eof: Parser
"""Parse end of input - succeeds only when no more input remains."""
index: Parser
"""Get the current parse position as an integer."""
line_info: Parser
"""Get current line and column information as (line, column) tuple."""from parsy import any_char, letter, digit, decimal_digit
# Parse any character
result = any_char.parse('x') # Returns 'x'
result = any_char.parse('5') # Returns '5'
result = any_char.parse('@') # Returns '@'
# Parse letters only
result = letter.parse('a') # Returns 'a'
result = letter.parse('Z') # Returns 'Z'
# letter.parse('5') # Raises ParseError
# Parse digits
result = digit.parse('7') # Returns '7'
result = decimal_digit.parse('3') # Returns '3'from parsy import whitespace, string, regex
# Parse whitespace
result = whitespace.parse(' ') # Returns ' '
result = whitespace.parse('\t\n ') # Returns '\t\n '
# Common pattern: optional whitespace
optional_ws = whitespace.optional()
# Lexeme pattern: parse something followed by optional whitespace
def lexeme(parser):
return parser << optional_ws
# Parse tokens with automatic whitespace handling
number = lexeme(regex(r'\d+').map(int))
plus = lexeme(string('+'))
# Parse "123 + 456" with automatic whitespace handling
@generate
def addition():
left = yield number
yield plus
right = yield number
return left + right
result = addition.parse('123 + 456') # Returns 579from parsy import index, line_info, string, regex, generate
# Track parse position
@generate
def positioned_parse():
start_pos = yield index
content = yield string('hello')
end_pos = yield index
return (start_pos, content, end_pos)
result = positioned_parse.parse('hello') # Returns (0, 'hello', 5)
# Track line and column information
@generate
def line_aware_parse():
start_line_info = yield line_info
content = yield regex(r'[^\n]+')
end_line_info = yield line_info
return {
'content': content,
'start': start_line_info,
'end': end_line_info
}
multiline_input = "line1\nline2\nline3"
# Position after "line1\n"
result = line_aware_parse.parse_partial(multiline_input[6:])from parsy import eof, string, regex
# Ensure complete parsing
complete_number = regex(r'\d+').map(int) << eof
result = complete_number.parse('123') # Returns 123
# complete_number.parse('123abc') # Raises ParseError - input not fully consumed
# Parse complete expressions
@generate
def complete_expression():
expr = yield regex(r'[^;]+').desc('expression')
yield string(';')
yield eof
return expr.strip()
result = complete_expression.parse('x = 5 + 3;') # Returns 'x = 5 + 3'
# complete_expression.parse('x = 5 + 3; extra') # Raises ParseErrorfrom parsy import letter, digit, any_char, whitespace, generate
# Build identifier parser
@generate
def identifier():
first = yield letter
rest = yield (letter | digit).many()
return first + ''.join(rest)
result = identifier.parse('var123') # Returns 'var123'
# Build word parser (letters separated by whitespace)
word = letter.at_least(1).concat()
words = word.sep_by(whitespace.at_least(1))
result = words.parse('hello world python') # Returns ['hello', 'world', 'python']
# Build line parser
@generate
def line_with_ending():
content = yield any_char.many().concat()
yield alt(string('\n'), eof)
return content
# Parse quoted string with escape sequences
@generate
def quoted_string():
yield string('"')
chars = yield (
string('\\') >> any_char | # Escaped character
any_char.should_fail('"') # Any char except quote
).many().concat()
yield string('"')
return chars
result = quoted_string.parse('"hello \\"world\\""') # Returns 'hello "world"'from parsy import line_info_at
# Get line info for any position
text = "line1\nline2\nline3"
line_col = line_info_at(text, 8) # Position after "line1\nli"
print(line_col) # Returns (1, 2) - line 1, column 2 (0-indexed)
# Error handling with position info
def parse_with_position_info(parser, text):
try:
return parser.parse(text)
except ParseError as e:
line, col = line_info_at(text, e.index)
print(f"Parse error at line {line}, column {col}: {e}")
raiseInstall with Tessl CLI
npx tessl i tessl/pypi-parsy