CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-parsy

Easy-to-use parser combinators for parsing in pure Python

Pending
Overview
Eval results
Files

predefined-parsers.mddocs/

Pre-defined Parsers

Ready-to-use parser constants for common parsing tasks. These parsers handle frequent parsing scenarios like whitespace, character classes, and special positions.

Capabilities

Character Class Parsers

Parse common character categories.

any_char: Parser
"""Parse any single character."""

letter: Parser
"""Parse any alphabetic character (using str.isalpha())."""

digit: Parser
"""Parse any digit character (using str.isdigit())."""

decimal_digit: Parser
"""Parse decimal digits 0-9 specifically."""

Whitespace Parsing

Parse whitespace characters using regular expressions.

whitespace: Parser
"""Parse one or more whitespace characters (regex r'\\s+')."""

Position and Control Parsers

Handle special parsing positions and states.

eof: Parser
"""Parse end of input - succeeds only when no more input remains."""

index: Parser
"""Get the current parse position as an integer."""

line_info: Parser
"""Get current line and column information as (line, column) tuple."""

Usage Examples

Basic Character Parsing

from parsy import any_char, letter, digit, decimal_digit

# Parse any character
result = any_char.parse('x')  # Returns 'x'
result = any_char.parse('5')  # Returns '5'
result = any_char.parse('@')  # Returns '@'

# Parse letters only
result = letter.parse('a')  # Returns 'a'
result = letter.parse('Z')  # Returns 'Z'
# letter.parse('5')  # Raises ParseError

# Parse digits
result = digit.parse('7')  # Returns '7'
result = decimal_digit.parse('3')  # Returns '3'

Whitespace Handling

from parsy import whitespace, string, regex

# Parse whitespace
result = whitespace.parse('   ')  # Returns '   '
result = whitespace.parse('\t\n ')  # Returns '\t\n '

# Common pattern: optional whitespace
optional_ws = whitespace.optional()

# Lexeme pattern: parse something followed by optional whitespace
def lexeme(parser):
    return parser << optional_ws

# Parse tokens with automatic whitespace handling
number = lexeme(regex(r'\d+').map(int))
plus = lexeme(string('+'))

# Parse "123 + 456" with automatic whitespace handling
@generate
def addition():
    left = yield number
    yield plus
    right = yield number
    return left + right

result = addition.parse('123 + 456')  # Returns 579

Position Tracking

from parsy import index, line_info, string, regex, generate

# Track parse position
@generate
def positioned_parse():
    start_pos = yield index
    content = yield string('hello')
    end_pos = yield index
    return (start_pos, content, end_pos)

result = positioned_parse.parse('hello')  # Returns (0, 'hello', 5)

# Track line and column information
@generate
def line_aware_parse():
    start_line_info = yield line_info
    content = yield regex(r'[^\n]+')
    end_line_info = yield line_info
    return {
        'content': content,
        'start': start_line_info,
        'end': end_line_info
    }

multiline_input = "line1\nline2\nline3"
# Position after "line1\n"
result = line_aware_parse.parse_partial(multiline_input[6:])

End-of-Input Validation

from parsy import eof, string, regex

# Ensure complete parsing
complete_number = regex(r'\d+').map(int) << eof
result = complete_number.parse('123')  # Returns 123
# complete_number.parse('123abc')  # Raises ParseError - input not fully consumed

# Parse complete expressions
@generate
def complete_expression():
    expr = yield regex(r'[^;]+').desc('expression')
    yield string(';')
    yield eof
    return expr.strip()

result = complete_expression.parse('x = 5 + 3;')  # Returns 'x = 5 + 3'
# complete_expression.parse('x = 5 + 3; extra')  # Raises ParseError

Combining Pre-defined Parsers

from parsy import letter, digit, any_char, whitespace, generate

# Build identifier parser
@generate
def identifier():
    first = yield letter
    rest = yield (letter | digit).many()
    return first + ''.join(rest)

result = identifier.parse('var123')  # Returns 'var123'

# Build word parser (letters separated by whitespace)
word = letter.at_least(1).concat()
words = word.sep_by(whitespace.at_least(1))
result = words.parse('hello world python')  # Returns ['hello', 'world', 'python']

# Build line parser
@generate
def line_with_ending():
    content = yield any_char.many().concat()
    yield alt(string('\n'), eof)
    return content

# Parse quoted string with escape sequences
@generate
def quoted_string():
    yield string('"')
    chars = yield (
        string('\\') >> any_char |  # Escaped character
        any_char.should_fail('"')   # Any char except quote
    ).many().concat()
    yield string('"')
    return chars

result = quoted_string.parse('"hello \\"world\\""')  # Returns 'hello "world"'

Utility Functions

from parsy import line_info_at

# Get line info for any position
text = "line1\nline2\nline3"
line_col = line_info_at(text, 8)  # Position after "line1\nli"
print(line_col)  # Returns (1, 2) - line 1, column 2 (0-indexed)

# Error handling with position info
def parse_with_position_info(parser, text):
    try:
        return parser.parse(text)
    except ParseError as e:
        line, col = line_info_at(text, e.index)
        print(f"Parse error at line {line}, column {col}: {e}")
        raise

Install with Tessl CLI

npx tessl i tessl/pypi-parsy

docs

basic-parsers.md

combinators.md

core-parser.md

index.md

predefined-parsers.md

utilities.md

tile.json