CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-parsec

A universal Python parser combinator library inspired by Parsec library of Haskell

Pending
Overview
Eval results
Files

character-parsing.mddocs/

Character and String Parsing

Specialized parsers for character-level and string-level text processing. These parsers handle literal string matching, regular expression patterns, character class matching, whitespace processing, and end-of-input detection.

Capabilities

String Parsing

Parse literal strings and regular expression patterns with full control over matching behavior.

def string(s):
    """
    Parse a literal string.
    
    Args:
        s (str): The exact string to match
        
    Returns:
        Parser: Parser that returns the matched string on success
        
    Note:
        Fails if the input doesn't exactly match the string s.
    """

def regex(exp, flags=0):
    """
    Parse according to a regular expression pattern.
    
    Args:
        exp (str or compiled regex): Regular expression pattern
        flags (int, optional): Regex flags (re.IGNORECASE, etc.)
        
    Returns:
        Parser: Parser that returns the matched text
        
    Note:
        Uses re.match() which anchors to current position.
        Returns the full matched text (group 0).
    """

Character Class Parsing

Parse individual characters based on membership in character sets or character properties.

def one_of(s):
    """
    Parse a character that appears in the given string.
    
    Args:
        s (str): String containing allowed characters
        
    Returns:
        Parser: Parser that returns the matched character
        
    Example:
        one_of("abc") matches 'a', 'b', or 'c'
    """

def none_of(s):
    """
    Parse a character that does NOT appear in the given string.
    
    Args:
        s (str): String containing forbidden characters
        
    Returns:
        Parser: Parser that returns the matched character
        
    Example:
        none_of("abc") matches any character except 'a', 'b', or 'c'
    """

def letter():
    """
    Parse an alphabetic character.
    
    Returns:
        Parser: Parser that returns the matched letter
        
    Note:
        Uses Python's str.isalpha() method.
    """

def digit():
    """
    Parse a numeric digit character.
    
    Returns:
        Parser: Parser that returns the matched digit character
        
    Note:
        Uses Python's str.isdigit() method.
    """

Whitespace Parsing

Handle whitespace characters and common whitespace patterns in text processing.

def space():
    """
    Parse a single whitespace character.
    
    Returns:
        Parser: Parser that returns the matched whitespace character
        
    Note:
        Uses Python's str.isspace() method (spaces, tabs, newlines, etc.).
    """

def spaces():
    """
    Parse zero or more whitespace characters.
    
    Returns:
        Parser: Parser that returns list of whitespace characters
        
    Note:
        Always succeeds, returns empty list if no whitespace found.
        Equivalent to many(space()).
    """

End-of-Input Detection

Detect when the parser has reached the end of the input text.

def eof():
    """
    Parse end-of-file (end of input string).
    
    Returns:
        Parser: Parser that returns None when at end of input
        
    Note:
        Succeeds only when no more characters remain to be parsed.
        Useful for ensuring entire input is consumed.
    """

Usage Examples

String Matching

from parsec import string, regex

# Literal string matching
hello = string("hello")
result = hello.parse("hello world")  # Returns "hello"

# Case-sensitive matching
parser = string("Hello")
try:
    result = parser.parse("hello")  # Raises ParseError
except ParseError:
    print("Case mismatch")

# Regular expression matching
number = regex(r'\d+')
result = number.parse("123abc")  # Returns "123"

# Regex with flags  
import re
word = regex(r'[a-z]+', re.IGNORECASE)
result = word.parse("Hello")  # Returns "Hello"

Character Class Matching

from parsec import one_of, none_of, letter, digit

# Character from set
vowel = one_of("aeiou")
result = vowel.parse("apple")  # Returns 'a'

# Character not in set
consonant = none_of("aeiou")
result = consonant.parse("hello")  # Returns 'h'

# Alphabetic characters
char = letter()
result = char.parse("abc123")  # Returns 'a'

# Numeric digits
num = digit()
result = num.parse("123abc")  # Returns '1'

Whitespace Handling

from parsec import space, spaces, string

# Single whitespace
ws = space()
result = ws.parse(" hello")  # Returns ' '

# Multiple whitespace (optional)
ws_many = spaces()
result = ws_many.parse("   hello")  # Returns [' ', ' ', ' ']
result = ws_many.parse("hello")     # Returns [] (empty list)

# Parse words separated by whitespace
@generate
def spaced_words():
    word1 = yield many1(letter())
    yield spaces()
    word2 = yield many1(letter())
    return ("".join(word1), "".join(word2))

result = spaced_words.parse("hello   world")  # Returns ("hello", "world")

Complex String Patterns

from parsec import regex, string, many, letter, digit

# Email-like pattern
email = regex(r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}')
result = email.parse("user@example.com")  # Returns "user@example.com"

# Quoted strings
@generate
def quoted_string():
    yield string('"')
    content = yield many(none_of('"'))
    yield string('"')
    return "".join(content)

result = quoted_string.parse('"hello world"')  # Returns "hello world"

# Numbers with optional decimal
from parsec import Parser, Value

@generate
def decimal_number():
    integer_part = yield many1(digit())
    decimal_part = yield (string('.') >> many1(digit())) ^ Parser(lambda text, index: Value.success(index, []))
    if decimal_part:
        return float("".join(integer_part) + "." + "".join(decimal_part))
    else:
        return int("".join(integer_part))

result = decimal_number.parse("123.45")  # Returns 123.45
result = decimal_number.parse("123")     # Returns 123

End-of-Input Validation

from parsec import eof, many1, letter

# Ensure entire input is letters
letters_only = many1(letter()) << eof()
result = letters_only.parse("hello")    # Returns ['h','e','l','l','o']
try:
    result = letters_only.parse("hello123")  # Raises ParseError
except ParseError:
    print("Non-letters found")

# Parse complete tokens
@generate  
def complete_word():
    chars = yield many1(letter())
    yield eof()  # Ensure nothing follows
    return "".join(chars)

result = complete_word.parse("hello")  # Returns "hello"

Install with Tessl CLI

npx tessl i tessl/pypi-parsec

docs

character-parsing.md

combinators.md

core-primitives.md

index.md

parser-generation.md

parser-operators.md

tile.json