0
# Parser Combinators
1
2
Higher-order functions that combine multiple parsers into more complex parsing logic. These combinators enable compositional parser construction and advanced parsing patterns.
3
4
## Capabilities
5
6
### Alternative Parsing
7
8
Try multiple parsers in sequence until one succeeds.
9
10
```python { .api }
11
def alt(*parsers):
12
"""
13
Try alternative parsers in order until one succeeds.
14
15
Args:
16
*parsers: Variable number of Parser objects to try
17
18
Returns:
19
Parser: Parser that succeeds with first successful alternative
20
"""
21
```
22
23
### Sequential Parsing
24
25
Parse multiple parsers in sequence and collect their results.
26
27
```python { .api }
28
def seq(*parsers, **kw_parsers):
29
"""
30
Parse parsers in sequence and collect results.
31
32
Args:
33
*parsers: Parsers to execute in order (returns list)
34
**kw_parsers: Named parsers to execute (returns dict, Python 3.6+ only)
35
36
Returns:
37
Parser: Parser returning list of results or dict of named results
38
39
Note:
40
Cannot mix positional and keyword arguments.
41
Keyword arguments only available in Python 3.6+.
42
"""
43
```
44
45
### Generator-based Parsing
46
47
Use Python generator syntax for complex parser composition with imperative-style code.
48
49
```python { .api }
50
def generate(fn):
51
"""
52
Create parser using generator syntax for complex parsing logic.
53
54
Args:
55
fn: Generator function that yields parsers and returns final result
56
57
Returns:
58
Parser: Parser that executes the generator-based parsing logic
59
60
Usage:
61
Can be used as decorator or called with generator function.
62
If called with string, returns decorator that adds description.
63
"""
64
```
65
66
## Usage Examples
67
68
### Alternative Parsing
69
70
```python
71
from parsy import alt, string, regex
72
73
# Simple alternatives
74
sign = alt(string('+'), string('-'), string(''))
75
result = sign.parse('+') # Returns '+'
76
77
# Complex alternatives with different result types
78
value = alt(
79
regex(r'\d+').map(int), # Integer
80
regex(r'\d+\.\d+').map(float), # Float
81
regex(r'"[^"]*"').map(lambda s: s[1:-1]) # String
82
)
83
result = value.parse('42') # Returns 42 (int)
84
result = value.parse('3.14') # Returns 3.14 (float)
85
result = value.parse('"hello"') # Returns 'hello' (str)
86
```
87
88
### Sequential Parsing
89
90
```python
91
from parsy import seq, string, regex
92
93
# Basic sequence returning list
94
greeting = seq(
95
string('Hello'),
96
regex(r'\s+'),
97
regex(r'\w+')
98
)
99
result = greeting.parse('Hello world') # Returns ['Hello', ' ', 'world']
100
101
# Sequence with transformation
102
greeting_formatted = seq(
103
string('Hello'),
104
regex(r'\s+'),
105
regex(r'\w+')
106
).combine(lambda hello, space, name: f"{hello} {name}!")
107
result = greeting_formatted.parse('Hello world') # Returns 'Hello world!'
108
109
# Named sequence (Python 3.6+)
110
person = seq(
111
name=regex(r'\w+'),
112
age=regex(r'\d+').map(int)
113
)
114
result = person.parse('Alice25') # Returns {'name': 'Alice', 'age': 25}
115
```
116
117
### Generator-based Parsing
118
119
```python
120
from parsy import generate, string, regex
121
122
# Simple generator parser
123
@generate
124
def simple_assignment():
125
name = yield regex(r'\w+')
126
yield string('=')
127
value = yield regex(r'\d+').map(int)
128
return (name, value)
129
130
result = simple_assignment.parse('x=42') # Returns ('x', 42)
131
132
# Complex nested parsing with conditionals
133
@generate
134
def conditional_expression():
135
condition = yield regex(r'\w+')
136
yield string('?')
137
true_value = yield regex(r'\w+')
138
yield string(':')
139
false_value = yield regex(r'\w+')
140
141
# Can include Python logic
142
if condition == 'true':
143
return true_value
144
else:
145
return false_value
146
147
result = conditional_expression.parse('true?yes:no') # Returns 'yes'
148
149
# Generator with error handling
150
@generate
151
def validated_number():
152
sign = yield alt(string('+'), string('-')).optional()
153
digits = yield regex(r'\d+')
154
155
# Validation logic
156
number = int(digits)
157
if sign == '-':
158
number = -number
159
160
if number > 1000:
161
# Can raise custom errors or return failure
162
raise ValueError("Number too large")
163
164
return number
165
166
result = validated_number.parse('+42') # Returns 42
167
168
# Generator with description
169
@generate('mathematical expression')
170
def math_expr():
171
left = yield regex(r'\d+').map(int)
172
op = yield regex(r'[+\-*/]')
173
right = yield regex(r'\d+').map(int)
174
175
if op == '+':
176
return left + right
177
elif op == '-':
178
return left - right
179
elif op == '*':
180
return left * right
181
elif op == '/':
182
return left / right
183
184
result = math_expr.parse('5*3') # Returns 15
185
```
186
187
### Advanced Combination Patterns
188
189
```python
190
from parsy import alt, seq, generate, string, regex
191
192
# Recursive parsing with forward declaration
193
from parsy import forward_declaration
194
195
expr = forward_declaration()
196
197
# Define atomic expressions
198
number = regex(r'\d+').map(int)
199
variable = regex(r'[a-z]+')
200
atom = alt(number, variable, string('(') >> expr << string(')'))
201
202
# Define operations
203
term = atom.sep_by(alt(string('*'), string('/')), min=1)
204
expression = term.sep_by(alt(string('+'), string('-')), min=1)
205
206
# Complete the forward declaration
207
expr.become(expression)
208
209
# Complex nested structure parsing with forward declaration
210
json_value = forward_declaration()
211
212
@generate
213
def json_object():
214
yield string('{')
215
yield regex(r'\s*')
216
217
# Handle empty object
218
empty_end = yield string('}').optional()
219
if empty_end:
220
return {}
221
222
# Parse key-value pairs
223
pairs = yield json_pair.sep_by(regex(r'\s*,\s*'))
224
yield regex(r'\s*')
225
yield string('}')
226
227
return dict(pairs)
228
229
@generate
230
def json_pair():
231
key = yield regex(r'"([^"]*)"', group=1)
232
yield regex(r'\s*:\s*')
233
value = yield json_value
234
return (key, value)
235
236
# Define json_value after json_object is defined
237
json_value.become(alt(
238
regex(r'"([^"]*)"', group=1), # String
239
regex(r'\d+').map(int), # Number
240
json_object # Nested object
241
))
242
243
result = json_object.parse('{"name": "Alice", "age": 30}')
244
# Returns {'name': 'Alice', 'age': 30}
245
```