A Python parsing module providing an alternative approach to creating and executing simple grammars
—
High-level helper functions for common parsing patterns. These utilities simplify the creation of complex parsers by providing pre-built patterns for frequently encountered parsing scenarios like delimited lists, nested expressions, and markup parsing.
Required imports for type annotations:
from typing import Union, Optional, Iterable, Callable
from pyparsing import ParserElement, ParseExpression, ParseResultsFunctions for parsing various list and array structures.
def delimited_list(expr: ParserElement,
delim: str = ",",
combine: bool = False) -> ParserElement:
"""Create parser for delimited lists."""
class DelimitedList(ParseExpression):
"""Parse delimited lists with customizable delimiters."""
def __init__(self,
expr: ParserElement,
delim: str = ",",
combine: bool = False): ...def counted_array(expr: ParserElement,
int_expr: ParserElement = None) -> ParserElement:
"""Create parser for counted arrays (count followed by elements)."""Usage examples:
# Parse comma-separated values
csv_row = delimited_list(Word(alphanums))
# Matches: "apple,banana,cherry" -> ['apple', 'banana', 'cherry']
# Parse counted array
items = counted_array(Word(alphas))
# Matches: "3 red green blue" -> ['red', 'green', 'blue']
# Custom delimiter
pipe_list = delimited_list(Word(alphas), delim="|")
# Matches: "one|two|three" -> ['one', 'two', 'three']Functions for creating choice expressions from strings.
def one_of(strs: Union[Iterable[str], str],
caseless: bool = False,
use_regex: bool = True,
as_keyword: bool = False,
*,
# Backward compatibility parameters
useRegex: bool = True,
asKeyword: bool = False) -> ParserElement:
"""Create MatchFirst expression from string of alternatives."""Usage examples:
# Simple string alternatives
boolean = one_of("true false")
# Matches either "true" or "false"
# Case-insensitive matching
direction = one_of("North South East West", caseless=True)
# Matches "north", "SOUTH", "East", etc.
# Keyword matching (with word boundaries)
operator = one_of("and or not", asKeyword=True)
# Matches "and" but not "band"Functions for parsing nested structures with delimiters.
def nested_expr(opener: str = "(",
closer: str = ")",
content: ParserElement = None,
ignoreExpr: ParserElement = None) -> ParserElement:
"""Create parser for nested expressions with delimiters."""Usage examples:
# Parse nested parentheses
nested_parens = nested_expr("(", ")")
# Matches: "(a (b c) d)" -> [['a', ['b', 'c'], 'd']]
# Parse nested brackets with specific content
bracket_list = nested_expr("[", "]", content=delimited_list(Word(alphas)))
# Matches: "[apple, [banana, cherry], date]"
# Parse nested braces ignoring comments
code_block = nested_expr("{", "}", ignoreExpr=c_style_comment)Functions for parsing markup languages.
def make_html_tags(tagStr: str) -> tuple:
"""Create opening and closing HTML tag parsers."""
def make_xml_tags(tagStr: str) -> tuple:
"""Create opening and closing XML tag parsers."""def replace_html_entity(tokens: ParseResults) -> str:
"""Replace HTML entities with their character equivalents."""Usage examples:
# Create HTML tag parsers
div_start, div_end = make_html_tags("div")
div_content = div_start + SkipTo(div_end) + div_end
# Parse XML with attributes
para_start, para_end = make_xml_tags("para")
para_with_attrs = para_start + SkipTo(para_end) + para_end
# Handle HTML entities
entity_parser = common_html_entity.set_parse_action(replace_html_entity)Functions for parsing dictionary-like structures.
def dict_of(key: ParserElement, value: ParserElement) -> ParserElement:
"""Create parser for dictionary-like structures."""Usage examples:
# Parse key-value pairs
config_item = dict_of(Word(alphas), QuotedString('"'))
# Matches: 'name "John"' -> {'name': 'John'}
# Parse multiple key-value pairs
config_dict = Dict(OneOrMore(config_item))Function for parsing infix mathematical and logical expressions.
def infix_notation(baseExpr: ParserElement,
opList: list,
lpar: str = "(",
rpar: str = ")") -> ParserElement:
"""Create parser for infix notation expressions."""
class OpAssoc:
"""Enumeration for operator associativity."""
LEFT = object()
RIGHT = object()
NONE = object()Usage example:
# Parse arithmetic expressions
number = Word(nums)
arith_expr = infix_notation(number, [
('+', 2, OpAssoc.LEFT), # Addition, precedence 2, left associative
('-', 2, OpAssoc.LEFT), # Subtraction
('*', 3, OpAssoc.LEFT), # Multiplication, precedence 3
('/', 3, OpAssoc.LEFT), # Division
('^', 4, OpAssoc.RIGHT), # Exponentiation, right associative
])
# Parses: "2 + 3 * 4" -> [[2, '+', [3, '*', 4]]]Functions for matching previously parsed content.
def match_previous_literal(expr: ParserElement) -> ParserElement:
"""Create parser that matches a previously parsed literal."""
def match_previous_expr(expr: ParserElement) -> ParserElement:
"""Create parser that matches a previously parsed expression."""Usage examples:
# Match repeated literals
first_word = Word(alphas)
repeat_word = match_previous_literal(first_word)
pattern = first_word + ":" + repeat_word
# Matches: "hello:hello" but not "hello:world"
# Match repeated expressions
tag_name = Word(alphas)
open_tag = "<" + tag_name + ">"
close_tag = "</" + match_previous_expr(tag_name) + ">"
xml_element = open_tag + SkipTo(close_tag) + close_tagFunctions for transforming parsed text.
def original_text_for(expr: ParserElement, asString: bool = True) -> ParserElement:
"""Return original text instead of parsed tokens."""
def ungroup(expr: ParserElement) -> ParserElement:
"""Remove grouping from expression results."""Usage examples:
# Get original text of complex expression
date_pattern = Word(nums) + "/" + Word(nums) + "/" + Word(nums)
date_text = original_text_for(date_pattern)
# Returns "12/25/2023" instead of ['12', '/', '25', '/', '2023']
# Remove unwanted grouping
grouped_items = Group(Word(alphas) + Word(nums))
flat_items = ungroup(grouped_items)Functions for creating parse actions.
def replace_with(replStr: str) -> callable:
"""Create parse action that replaces tokens with specified string."""
def remove_quotes(s: str, loc: int, tokens: ParseResults) -> str:
"""Parse action to remove surrounding quotes."""
def with_attribute(**attrDict) -> callable:
"""Create parse action for matching HTML/XML attributes."""
def with_class(classname: str) -> callable:
"""Create parse action for matching HTML class attributes."""Usage examples:
# Replace matched tokens
placeholder = Literal("TBD").set_parse_action(replace_with("To Be Determined"))
# Remove quotes from strings
quoted_string = QuotedString('"').set_parse_action(remove_quotes)
# Match HTML elements with specific attributes
div_with_id = any_open_tag.set_parse_action(with_attribute(id="main"))
# Match elements with CSS class
highlighted = any_open_tag.set_parse_action(with_class("highlight"))Pre-built parser expressions for common patterns.
# Comment parsers
c_style_comment: ParserElement # /* comment */
html_comment: ParserElement # <!-- comment -->
rest_of_line: ParserElement # Everything to end of line
dbl_slash_comment: ParserElement # // comment
cpp_style_comment: ParserElement # C++ style comments
java_style_comment: ParserElement # Java style comments
python_style_comment: ParserElement # # comment
# HTML/XML parsers
any_open_tag: ParserElement # Any opening HTML/XML tag
any_close_tag: ParserElement # Any closing HTML/XML tag
common_html_entity: ParserElement # Common HTML entities (&, <, etc.)
# String parsers
dbl_quoted_string: ParserElement # "double quoted string"
sgl_quoted_string: ParserElement # 'single quoted string'
quoted_string: ParserElement # Either single or double quoted
unicode_string: ParserElement # Unicode string literalsSpecialized utilities for complex parsing scenarios.
def condition_as_parse_action(condition: callable,
message: str = "failed user-defined condition") -> callable:
"""Convert boolean condition to parse action."""
def token_map(func: callable, *args) -> callable:
"""Create parse action that maps function over tokens."""
def autoname_elements() -> None:
"""Automatically assign names to parser elements for debugging."""Usage examples:
# Conditional parsing
positive_int = Word(nums).set_parse_action(
condition_as_parse_action(lambda t: int(t[0]) > 0, "must be positive")
)
# Transform all tokens
uppercase_words = OneOrMore(Word(alphas)).set_parse_action(token_map(str.upper))
# Enable automatic naming for debugging
autoname_elements()
parser = Word(alphas) + Word(nums) # Elements get auto-namedSpecialized utility functions for advanced parsing scenarios.
def col(loc: int, strg: str) -> int:
"""Return column number of location in string."""
def line(loc: int, strg: str) -> int:
"""Return line number of location in string."""
def lineno(loc: int, strg: str) -> int:
"""Return line number of location in string."""
def match_only_at_col(n: int) -> ParserElement:
"""Match only at specified column."""
def srange(s: str) -> str:
"""Expand character range expression."""Usage examples:
# Column-specific matching
indent = match_only_at_col(1) # Match only at column 1
code_line = indent + rest_of_line
# Character range expansion
vowels = srange("[aeiouAEIOU]") # Expands to "aeiouAEIOU"
consonants = srange("[b-df-hj-np-tv-zB-DF-HJ-NP-TV-Z]")
# Position utilities (used in parse actions)
def report_position(s, loc, tokens):
print(f"Found at line {lineno(loc, s)}, column {col(loc, s)}")
return tokens
parser = Word(alphas).set_parse_action(report_position)Install with Tessl CLI
npx tessl i tessl/pypi-pyparsing