0
# Core Parser Elements
1
2
Fundamental building blocks for creating parsing expressions. These classes form the foundation of all pyparsing grammars, providing terminal elements that match specific text patterns and expression combinators that define how elements relate to each other.
3
4
**Required imports for type annotations:**
5
6
```python
7
from typing import Union, Optional, Generator
8
from pyparsing import ParserElement, ParseResults, Token, ParseExpression
9
```
10
11
## Capabilities
12
13
### Base Classes
14
15
Abstract base classes that define the core parsing interface and provide common functionality for all parser elements.
16
17
```python { .api }
18
class ParserElement:
19
"""Abstract base class for all parsing elements."""
20
21
def parse_string(self, instring: str, parse_all: bool = False) -> ParseResults:
22
"""Parse a string and return results."""
23
24
def parse_file(self, file_or_filename, parse_all: bool = False) -> ParseResults:
25
"""Parse contents of a file."""
26
27
def search_string(self, instring: str, maxMatches: int = None) -> list:
28
"""Search for matches within a string."""
29
30
def scan_string(self, instring: str, maxMatches: int = None) -> Generator:
31
"""Generator that yields matches and locations."""
32
33
def transform_string(self, instring: str) -> str:
34
"""Transform string by replacing matches."""
35
36
def set_results_name(self, name: str) -> ParserElement:
37
"""Assign a name to parsed results."""
38
39
def set_parse_action(self, *fns) -> ParserElement:
40
"""Assign parse actions to be called when successfully parsed."""
41
42
def add_parse_action(self, *fns) -> ParserElement:
43
"""Add parse actions to existing ones."""
44
45
def set_fail_action(self, fn) -> ParserElement:
46
"""Set action to be called if parsing fails."""
47
48
def set_debug(self, flag: bool = True) -> ParserElement:
49
"""Enable/disable debug output for this element."""
50
51
def copy(self) -> ParserElement:
52
"""Create a copy of this parser element."""
53
54
def __add__(self, other) -> ParserElement:
55
"""Implement + operator for And combinations."""
56
57
def __or__(self, other) -> ParserElement:
58
"""Implement | operator for MatchFirst combinations."""
59
60
def __xor__(self, other) -> ParserElement:
61
"""Implement ^ operator for Or combinations."""
62
63
def __and__(self, other) -> ParserElement:
64
"""Implement & operator for Each combinations."""
65
66
def __mul__(self, other) -> ParserElement:
67
"""Implement * operator for repetition."""
68
69
def __pos__(self) -> ParserElement:
70
"""Implement unary + operator for OneOrMore."""
71
72
def __invert__(self) -> ParserElement:
73
"""Implement ~ operator for NotAny."""
74
```
75
76
```python { .api }
77
class ParseExpression(ParserElement):
78
"""Base class for parser expressions that contain other parser elements."""
79
80
def __init__(self, exprs: list, savelist: bool = False): ...
81
82
def append(self, other: ParserElement) -> ParserElement:
83
"""Add another element to this expression."""
84
85
def ignore(self, other: ParserElement) -> ParserElement:
86
"""Ignore the specified expression while parsing."""
87
```
88
89
```python { .api }
90
class Token(ParserElement):
91
"""Base class for terminal parsing elements."""
92
93
def __init__(self): ...
94
```
95
96
### Terminal Elements
97
98
Parser elements that match specific text patterns and consume input directly.
99
100
```python { .api }
101
class Literal(Token):
102
"""Match an exact literal string."""
103
104
def __init__(self, matchString: str): ...
105
106
class CaselessLiteral(Literal):
107
"""Match a literal string, ignoring case."""
108
109
def __init__(self, matchString: str): ...
110
```
111
112
```python { .api }
113
class Word(Token):
114
"""Match words composed of specified character sets."""
115
116
def __init__(self,
117
init_chars: str = "",
118
body_chars: str = None,
119
min: int = 1,
120
max: int = 0,
121
exact: int = 0,
122
as_keyword: bool = False,
123
exclude_chars: str = None,
124
*,
125
# Backward compatibility parameters
126
initChars: str = "",
127
bodyChars: str = None,
128
asKeyword: bool = False,
129
excludeChars: str = None): ...
130
```
131
132
**Usage example:**
133
```python
134
# Match Python identifiers
135
identifier = Word(alphas + "_", alphanums + "_")
136
137
# Match integers
138
integer = Word(nums)
139
140
# Match exactly 3 digits
141
three_digits = Word(nums, exact=3)
142
143
# Using keyword parameters
144
float_num = Word(nums, body_chars=nums + ".", min=1)
145
```
146
147
```python { .api }
148
class Char(Token):
149
"""Match a single character from specified set."""
150
151
def __init__(self, charset: str, asKeyword: bool = False, excludeChars: str = None): ...
152
```
153
154
```python { .api }
155
class Keyword(Token):
156
"""Match a specific keyword with word boundaries."""
157
158
def __init__(self, matchString: str, ident_chars: str = None, caseless: bool = False): ...
159
160
class CaselessKeyword(Keyword):
161
"""Match a keyword ignoring case."""
162
163
def __init__(self, matchString: str, ident_chars: str = None): ...
164
```
165
166
```python { .api }
167
class Regex(Token):
168
"""Match using regular expressions."""
169
170
def __init__(self, pattern: str, flags: int = 0, asGroupList: bool = False, asMatch: bool = False): ...
171
```
172
173
```python { .api }
174
class QuotedString(Token):
175
"""Match quoted strings with escape character handling."""
176
177
def __init__(self,
178
quoteChar: str,
179
escChar: str = None,
180
escQuote: str = None,
181
multiline: bool = False,
182
unquoteResults: bool = True,
183
endQuoteChar: str = None,
184
convertWhitespaceEscapes: bool = True): ...
185
```
186
187
```python { .api }
188
class CharsNotIn(Token):
189
"""Match characters not in the specified set."""
190
191
def __init__(self, notChars: str, min: int = 1, max: int = None, exact: int = None): ...
192
```
193
194
```python { .api }
195
class White(Token):
196
"""Match whitespace characters."""
197
198
def __init__(self, ws: str = " \t\r\n", min: int = 1, max: int = None, exact: int = None): ...
199
```
200
201
```python { .api }
202
class Empty(Token):
203
"""Always matches without consuming input."""
204
205
def __init__(self): ...
206
```
207
208
### Expression Combinators
209
210
Classes that combine multiple parser elements to create complex parsing expressions.
211
212
```python { .api }
213
class And(ParseExpression):
214
"""Match all expressions in sequence."""
215
216
def __init__(self, exprs: list, savelist: bool = True): ...
217
218
class Or(ParseExpression):
219
"""Match any expression, longest match wins."""
220
221
def __init__(self, exprs: list, savelist: bool = False): ...
222
223
class MatchFirst(ParseExpression):
224
"""Match first successful expression."""
225
226
def __init__(self, exprs: list, savelist: bool = False): ...
227
228
class Each(ParseExpression):
229
"""Match all expressions in any order."""
230
231
def __init__(self, exprs: list, savelist: bool = True): ...
232
```
233
234
**Usage example:**
235
```python
236
# Sequential matching with And (or + operator)
237
greet = Word(alphas) + "," + Word(alphas) + "!"
238
239
# Alternative matching with MatchFirst (or | operator)
240
number_word = "one" | "two" | "three" | Word(nums)
241
242
# Longest match with Or (or ^ operator)
243
floating_point = Regex(r'\d+\.\d+') ^ Word(nums)
244
245
# All expressions in any order with Each (or & operator)
246
attrs = "width" + "=" + Word(nums) & "height" + "=" + Word(nums)
247
```
248
249
### Position-Based Elements
250
251
Elements that match based on position within the input rather than consuming text.
252
253
```python { .api }
254
class PositionToken(Token):
255
"""Base class for position-based matching."""
256
257
def __init__(self): ...
258
259
class LineStart(PositionToken):
260
"""Match at start of line."""
261
262
def __init__(self): ...
263
264
class LineEnd(PositionToken):
265
"""Match at end of line."""
266
267
def __init__(self): ...
268
269
class StringStart(PositionToken):
270
"""Match at start of string."""
271
272
def __init__(self): ...
273
274
class StringEnd(PositionToken):
275
"""Match at end of string."""
276
277
def __init__(self): ...
278
279
class WordStart(PositionToken):
280
"""Match at start of word boundary."""
281
282
def __init__(self): ...
283
284
class WordEnd(PositionToken):
285
"""Match at end of word boundary."""
286
287
def __init__(self): ...
288
289
class AtLineStart(PositionToken):
290
"""Assertion that position is at line start."""
291
292
def __init__(self): ...
293
294
class AtStringStart(PositionToken):
295
"""Assertion that position is at string start."""
296
297
def __init__(self): ...
298
299
class GoToColumn(PositionToken):
300
"""Match at specific column position."""
301
302
def __init__(self, colno: int): ...
303
```
304
305
### Special Elements
306
307
Specialized elements for advanced parsing scenarios.
308
309
```python { .api }
310
class Forward(ParserElement):
311
"""Forward declaration for recursive grammars."""
312
313
def __init__(self, other: ParserElement = None): ...
314
315
def __lshift__(self, other: ParserElement) -> ParserElement:
316
"""Set the forward reference using << operator."""
317
318
class SkipTo(ParseElementEnhance):
319
"""Skip to specified expression."""
320
321
def __init__(self,
322
other: ParserElement,
323
include: bool = False,
324
ignore: ParserElement = None,
325
failOn: ParserElement = None): ...
326
327
class NoMatch(Token):
328
"""Never matches - useful for debugging."""
329
330
def __init__(self): ...
331
```
332
333
**Usage example:**
334
```python
335
# Forward reference for recursive grammar
336
expr = Forward()
337
term = Word(nums) | "(" + expr + ")"
338
expr <<= term + ZeroOrMore(("+" | "-") + term)
339
340
# Skip to closing tag
341
content = SkipTo("</body>")
342
```