0
# Parser Generation
1
2
Powerful declarative syntax using Python generators to build complex parsers with natural control flow, variable binding, and conditional logic. The generator approach provides an intuitive way to express complex parsing logic while maintaining the functional parser combinator foundation.
3
4
## Capabilities
5
6
### Generator Decorator
7
8
The core decorator that transforms Python generator functions into parser combinators with full access to intermediate parsing results.
9
10
```python { .api }
11
def generate(fn):
12
"""
13
Create a parser from a generator function.
14
15
Args:
16
fn (function or str): Generator function or description string
17
18
Returns:
19
Parser: Parser built from the generator
20
21
Usage patterns:
22
@generate
23
def my_parser():
24
result = yield some_parser
25
return final_result
26
27
@generate("description for errors")
28
def my_parser():
29
# parser logic
30
31
Note:
32
Generator should yield Parser objects and return final result.
33
Intermediate results are sent back via generator.send().
34
"""
35
```
36
37
## Usage Examples
38
39
### Basic Generator Parsing
40
41
```python
42
from parsec import generate, string, many1, letter, digit, spaces
43
44
# Simple generator parser
45
@generate
46
def greeting():
47
hello = yield string("hello")
48
yield spaces()
49
name = yield many1(letter())
50
return f"{hello} {''.join(name)}"
51
52
result = greeting.parse("hello alice") # Returns "hello alice"
53
54
# Generator with error description
55
@generate("greeting parser")
56
def greeting_with_desc():
57
yield string("hi")
58
yield spaces()
59
name = yield many1(letter())
60
return "".join(name)
61
62
try:
63
result = greeting_with_desc.parse("bye alice")
64
except ParseError as e:
65
print(e.expected) # "greeting parser"
66
```
67
68
### Conditional Parsing
69
70
```python
71
from parsec import generate, string, many1, digit, letter
72
73
# Conditional logic based on parsed values
74
@generate
75
def conditional_number():
76
sign = yield string("+") ^ string("-") ^ string("")
77
digits = yield many1(digit())
78
number = int("".join(digits))
79
80
if sign == "-":
81
return -number
82
else:
83
return number
84
85
result = conditional_number.parse("-123") # Returns -123
86
result = conditional_number.parse("+456") # Returns 456
87
result = conditional_number.parse("789") # Returns 789
88
89
# More complex conditional parsing
90
@generate
91
def typed_value():
92
type_marker = yield string("i:") ^ string("s:") ^ string("f:")
93
94
if type_marker == "i:":
95
digits = yield many1(digit())
96
return int("".join(digits))
97
elif type_marker == "s:":
98
chars = yield many1(letter())
99
return "".join(chars)
100
else: # "f:"
101
whole = yield many1(digit())
102
yield string(".")
103
decimal = yield many1(digit())
104
return float("".join(whole) + "." + "".join(decimal))
105
106
result = typed_value.parse("i:123") # Returns 123 (int)
107
result = typed_value.parse("s:hello") # Returns "hello" (str)
108
result = typed_value.parse("f:12.34") # Returns 12.34 (float)
109
```
110
111
### Complex Data Structure Parsing
112
113
```python
114
from parsec import generate, string, many, many1, letter, digit, spaces, one_of, none_of
115
116
# Parse JSON-like objects
117
@generate
118
def json_string():
119
yield string('"')
120
chars = yield many(none_of('"'))
121
yield string('"')
122
return "".join(chars)
123
124
@generate
125
def json_number():
126
from parsec import Parser, Value
127
sign = yield string("-") ^ string("")
128
digits = yield many1(digit())
129
decimal = yield (string(".") >> many1(digit())) ^ Parser(lambda text, index: Value.success(index, []))
130
131
number_str = sign + "".join(digits)
132
if decimal:
133
number_str += "." + "".join(decimal)
134
return float(number_str)
135
else:
136
return int(number_str)
137
138
@generate
139
def json_array():
140
yield string("[")
141
yield spaces()
142
143
# Handle empty array
144
empty_check = yield string("]") ^ string("")
145
if empty_check == "]":
146
return []
147
148
# Parse first element
149
first = yield json_value
150
elements = [first]
151
152
# Parse remaining elements
153
rest = yield many(string(",") >> spaces() >> json_value)
154
elements.extend(rest)
155
156
yield spaces()
157
yield string("]")
158
return elements
159
160
@generate
161
def json_value():
162
value = yield json_string ^ json_number ^ json_array
163
return value
164
165
# Usage
166
result = json_array.parse('["hello", 123, -45.6]')
167
# Returns ["hello", 123, -45.6]
168
```
169
170
### Stateful Parsing
171
172
```python
173
from parsec import generate, string, many, many1, letter, digit
174
175
# Parser that maintains state across operations
176
@generate
177
def calculator():
178
result = yield many1(digit()).parsecmap(lambda d: int("".join(d)))
179
180
operations = yield many(
181
(string("+") ^ string("-") ^ string("*") ^ string("/")) +
182
many1(digit()).parsecmap(lambda d: int("".join(d)))
183
)
184
185
for op, operand in operations:
186
if op == "+":
187
result += operand
188
elif op == "-":
189
result -= operand
190
elif op == "*":
191
result *= operand
192
elif op == "/":
193
result //= operand # Integer division
194
195
return result
196
197
result = calculator.parse("10+5*2-3") # Returns 22
198
199
# Counter example with internal state
200
@generate
201
def word_counter():
202
words = []
203
count = 0
204
205
while True:
206
# Try to parse another word
207
try:
208
word_chars = yield many1(letter())
209
word = "".join(word_chars)
210
words.append(word)
211
count += 1
212
213
# Optional whitespace between words
214
yield spaces()
215
216
except:
217
break
218
219
return {"words": words, "count": count}
220
221
# This won't work exactly as shown due to exception handling,
222
# but demonstrates the concept of stateful parsing
223
```
224
225
### Recursive Parsing with Generators
226
227
```python
228
from parsec import generate, string, many, many1, letter, spaces, one_of
229
230
# Forward declaration for recursive grammar
231
expr = None
232
233
@generate
234
def factor():
235
# Number or parenthesized expression
236
number = yield many1(digit()).parsecmap(lambda d: int("".join(d)))
237
return number
238
239
@generate
240
def factor_or_paren():
241
result = yield factor ^ (string("(") >> expr << string(")"))
242
return result
243
244
@generate
245
def term():
246
left = yield factor_or_paren
247
248
ops = yield many((one_of("*/") + factor_or_paren))
249
250
result = left
251
for op, right in ops:
252
if op == "*":
253
result *= right
254
else: # op == "/"
255
result //= right
256
257
return result
258
259
@generate
260
def expression():
261
left = yield term
262
263
ops = yield many((one_of("+-") + term))
264
265
result = left
266
for op, right in ops:
267
if op == "+":
268
result += right
269
else: # op == "-"
270
result -= right
271
272
return result
273
274
# Set the forward reference
275
expr = expression
276
277
# Usage
278
result = expression.parse("2+3*4") # Returns 14
279
result = expression.parse("(2+3)*4") # Returns 20
280
```
281
282
### Error Handling in Generators
283
284
```python
285
from parsec import generate, string, many1, letter, ParseError
286
287
# Generator with custom error handling
288
@generate("email address")
289
def email_parser():
290
try:
291
username = yield many1(letter() ^ digit() ^ one_of("._"))
292
yield string("@")
293
domain = yield many1(letter() ^ digit() ^ one_of(".-"))
294
yield string(".")
295
tld = yield many1(letter())
296
297
return {
298
"username": "".join(username),
299
"domain": "".join(domain),
300
"tld": "".join(tld)
301
}
302
except ParseError:
303
# Could add custom error handling here
304
raise
305
306
# Generator that returns alternative parsers for error recovery
307
@generate
308
def robust_number():
309
try:
310
# Try to parse a number
311
digits = yield many1(digit())
312
return int("".join(digits))
313
except:
314
# If that fails, try to parse "unknown"
315
yield string("unknown")
316
return None
317
318
result = robust_number.parse("123") # Returns 123
319
result = robust_number.parse("unknown") # Returns None
320
```
321
322
## Advanced Patterns
323
324
### Generator Composition
325
326
```python
327
from parsec import generate
328
329
# Compose generators for modularity
330
@generate
331
def parse_header():
332
yield string("BEGIN")
333
yield spaces()
334
name = yield many1(letter())
335
yield string("\n")
336
return "".join(name)
337
338
@generate
339
def parse_body():
340
lines = yield many(many1(letter() ^ digit() ^ space()) < string("\n"))
341
return ["".join(line) for line in lines]
342
343
@generate
344
def parse_footer():
345
yield string("END")
346
return None
347
348
@generate
349
def parse_document():
350
header = yield parse_header
351
body = yield parse_body
352
footer = yield parse_footer
353
354
return {
355
"title": header,
356
"content": body
357
}
358
359
# Usage
360
doc_text = """BEGIN MyDocument
361
line one
362
line two
363
END"""
364
365
result = parse_document.parse(doc_text)
366
# Returns {"title": "MyDocument", "content": ["line one", "line two"]}
367
```