tessl/pypi-pyparsing

A Python parsing module providing an alternative approach to creating and executing simple grammars

—

Pending

Overview

Eval results

Files

Helper Functions and Utilities

Name: tessl/pypi-pyparsing
Author: tessl

High-level helper functions for common parsing patterns. These utilities simplify the creation of complex parsers by providing pre-built patterns for frequently encountered parsing scenarios like delimited lists, nested expressions, and markup parsing.

Required imports for type annotations:

from typing import Union, Optional, Iterable, Callable
from pyparsing import ParserElement, ParseExpression, ParseResults

Capabilities

List and Array Parsing

Functions for parsing various list and array structures.

def delimited_list(expr: ParserElement, 
                  delim: str = ",", 
                  combine: bool = False) -> ParserElement:
    """Create parser for delimited lists."""

class DelimitedList(ParseExpression):
    """Parse delimited lists with customizable delimiters."""
    
    def __init__(self, 
                 expr: ParserElement, 
                 delim: str = ",", 
                 combine: bool = False): ...

def counted_array(expr: ParserElement, 
                 int_expr: ParserElement = None) -> ParserElement:
    """Create parser for counted arrays (count followed by elements)."""

Usage examples:

# Parse comma-separated values
csv_row = delimited_list(Word(alphanums))
# Matches: "apple,banana,cherry" -> ['apple', 'banana', 'cherry']

# Parse counted array
items = counted_array(Word(alphas))
# Matches: "3 red green blue" -> ['red', 'green', 'blue']

# Custom delimiter
pipe_list = delimited_list(Word(alphas), delim="|")
# Matches: "one|two|three" -> ['one', 'two', 'three']

String Choice and Alternatives

Functions for creating choice expressions from strings.

def one_of(strs: Union[Iterable[str], str], 
          caseless: bool = False, 
          use_regex: bool = True,
          as_keyword: bool = False,
          *,
          # Backward compatibility parameters
          useRegex: bool = True,
          asKeyword: bool = False) -> ParserElement:
    """Create MatchFirst expression from string of alternatives."""

Usage examples:

# Simple string alternatives
boolean = one_of("true false")
# Matches either "true" or "false"

# Case-insensitive matching
direction = one_of("North South East West", caseless=True)
# Matches "north", "SOUTH", "East", etc.

# Keyword matching (with word boundaries)
operator = one_of("and or not", asKeyword=True)
# Matches "and" but not "band"

Nested Expression Parsing

Functions for parsing nested structures with delimiters.

def nested_expr(opener: str = "(", 
               closer: str = ")", 
               content: ParserElement = None,
               ignoreExpr: ParserElement = None) -> ParserElement:
    """Create parser for nested expressions with delimiters."""

Usage examples:

# Parse nested parentheses
nested_parens = nested_expr("(", ")")
# Matches: "(a (b c) d)" -> [['a', ['b', 'c'], 'd']]

# Parse nested brackets with specific content
bracket_list = nested_expr("[", "]", content=delimited_list(Word(alphas)))
# Matches: "[apple, [banana, cherry], date]"

# Parse nested braces ignoring comments
code_block = nested_expr("{", "}", ignoreExpr=c_style_comment)

HTML/XML Parsing Utilities

Functions for parsing markup languages.

def make_html_tags(tagStr: str) -> tuple:
    """Create opening and closing HTML tag parsers."""

def make_xml_tags(tagStr: str) -> tuple:
    """Create opening and closing XML tag parsers."""

def replace_html_entity(tokens: ParseResults) -> str:
    """Replace HTML entities with their character equivalents."""

Usage examples:

# Create HTML tag parsers
div_start, div_end = make_html_tags("div")
div_content = div_start + SkipTo(div_end) + div_end

# Parse XML with attributes
para_start, para_end = make_xml_tags("para")
para_with_attrs = para_start + SkipTo(para_end) + para_end

# Handle HTML entities
entity_parser = common_html_entity.set_parse_action(replace_html_entity)

Dictionary and Key-Value Parsing

Functions for parsing dictionary-like structures.

def dict_of(key: ParserElement, value: ParserElement) -> ParserElement:
    """Create parser for dictionary-like structures."""

Usage examples:

# Parse key-value pairs  
config_item = dict_of(Word(alphas), QuotedString('"'))
# Matches: 'name "John"' -> {'name': 'John'}

# Parse multiple key-value pairs
config_dict = Dict(OneOrMore(config_item))

Infix Notation Parsing

Function for parsing infix mathematical and logical expressions.

def infix_notation(baseExpr: ParserElement, 
                  opList: list,
                  lpar: str = "(",
                  rpar: str = ")") -> ParserElement:
    """Create parser for infix notation expressions."""

class OpAssoc:
    """Enumeration for operator associativity."""
    LEFT = object()
    RIGHT = object()
    NONE = object()

Usage example:

# Parse arithmetic expressions
number = Word(nums)
arith_expr = infix_notation(number, [
    ('+', 2, OpAssoc.LEFT),    # Addition, precedence 2, left associative
    ('-', 2, OpAssoc.LEFT),    # Subtraction  
    ('*', 3, OpAssoc.LEFT),    # Multiplication, precedence 3
    ('/', 3, OpAssoc.LEFT),    # Division
    ('^', 4, OpAssoc.RIGHT),   # Exponentiation, right associative
])
# Parses: "2 + 3 * 4" -> [[2, '+', [3, '*', 4]]]

Previous Match Functions

Functions for matching previously parsed content.

def match_previous_literal(expr: ParserElement) -> ParserElement:
    """Create parser that matches a previously parsed literal."""

def match_previous_expr(expr: ParserElement) -> ParserElement:
    """Create parser that matches a previously parsed expression."""

Usage examples:

# Match repeated literals
first_word = Word(alphas)
repeat_word = match_previous_literal(first_word)
pattern = first_word + ":" + repeat_word
# Matches: "hello:hello" but not "hello:world"

# Match repeated expressions
tag_name = Word(alphas)
open_tag = "<" + tag_name + ">"
close_tag = "</" + match_previous_expr(tag_name) + ">"
xml_element = open_tag + SkipTo(close_tag) + close_tag

Text Transformation Utilities

Functions for transforming parsed text.

def original_text_for(expr: ParserElement, asString: bool = True) -> ParserElement:
    """Return original text instead of parsed tokens."""

def ungroup(expr: ParserElement) -> ParserElement:
    """Remove grouping from expression results."""

Usage examples:

# Get original text of complex expression
date_pattern = Word(nums) + "/" + Word(nums) + "/" + Word(nums)
date_text = original_text_for(date_pattern)
# Returns "12/25/2023" instead of ['12', '/', '25', '/', '2023']

# Remove unwanted grouping
grouped_items = Group(Word(alphas) + Word(nums))
flat_items = ungroup(grouped_items)

Action Creation Functions

Functions for creating parse actions.

def replace_with(replStr: str) -> callable:
    """Create parse action that replaces tokens with specified string."""

def remove_quotes(s: str, loc: int, tokens: ParseResults) -> str:
    """Parse action to remove surrounding quotes."""

def with_attribute(**attrDict) -> callable:
    """Create parse action for matching HTML/XML attributes."""

def with_class(classname: str) -> callable:
    """Create parse action for matching HTML class attributes."""

Usage examples:

# Replace matched tokens
placeholder = Literal("TBD").set_parse_action(replace_with("To Be Determined"))

# Remove quotes from strings
quoted_string = QuotedString('"').set_parse_action(remove_quotes)

# Match HTML elements with specific attributes
div_with_id = any_open_tag.set_parse_action(with_attribute(id="main"))

# Match elements with CSS class
highlighted = any_open_tag.set_parse_action(with_class("highlight"))

Built-in Helper Expressions

Pre-built parser expressions for common patterns.

# Comment parsers
c_style_comment: ParserElement        # /* comment */
html_comment: ParserElement           # <!-- comment -->
rest_of_line: ParserElement          # Everything to end of line
dbl_slash_comment: ParserElement     # // comment
cpp_style_comment: ParserElement     # C++ style comments  
java_style_comment: ParserElement    # Java style comments
python_style_comment: ParserElement  # # comment

# HTML/XML parsers
any_open_tag: ParserElement          # Any opening HTML/XML tag
any_close_tag: ParserElement         # Any closing HTML/XML tag
common_html_entity: ParserElement    # Common HTML entities (&amp;, &lt;, etc.)

# String parsers  
dbl_quoted_string: ParserElement     # "double quoted string"
sgl_quoted_string: ParserElement     # 'single quoted string'
quoted_string: ParserElement         # Either single or double quoted
unicode_string: ParserElement        # Unicode string literals

Advanced Parsing Utilities

Specialized utilities for complex parsing scenarios.

def condition_as_parse_action(condition: callable, 
                             message: str = "failed user-defined condition") -> callable:
    """Convert boolean condition to parse action."""

def token_map(func: callable, *args) -> callable:
    """Create parse action that maps function over tokens."""

def autoname_elements() -> None:
    """Automatically assign names to parser elements for debugging."""

Usage examples:

# Conditional parsing
positive_int = Word(nums).set_parse_action(
    condition_as_parse_action(lambda t: int(t[0]) > 0, "must be positive")
)

# Transform all tokens
uppercase_words = OneOrMore(Word(alphas)).set_parse_action(token_map(str.upper))

# Enable automatic naming for debugging
autoname_elements()
parser = Word(alphas) + Word(nums)  # Elements get auto-named

Additional Utility Functions

Specialized utility functions for advanced parsing scenarios.

def col(loc: int, strg: str) -> int:
    """Return column number of location in string."""

def line(loc: int, strg: str) -> int:
    """Return line number of location in string."""

def lineno(loc: int, strg: str) -> int:
    """Return line number of location in string."""

def match_only_at_col(n: int) -> ParserElement:
    """Match only at specified column."""

def srange(s: str) -> str:
    """Expand character range expression."""

Usage examples:

# Column-specific matching
indent = match_only_at_col(1)  # Match only at column 1
code_line = indent + rest_of_line

# Character range expansion
vowels = srange("[aeiouAEIOU]")  # Expands to "aeiouAEIOU"
consonants = srange("[b-df-hj-np-tv-zB-DF-HJ-NP-TV-Z]")

# Position utilities (used in parse actions)
def report_position(s, loc, tokens):
    print(f"Found at line {lineno(loc, s)}, column {col(loc, s)}")
    return tokens

parser = Word(alphas).set_parse_action(report_position)

Install with Tessl CLI