or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

character-parsing.mdcombinators.mdcore-primitives.mdindex.mdparser-generation.mdparser-operators.md

parser-generation.mddocs/

0

# Parser Generation

1

2

Powerful declarative syntax using Python generators to build complex parsers with natural control flow, variable binding, and conditional logic. The generator approach provides an intuitive way to express complex parsing logic while maintaining the functional parser combinator foundation.

3

4

## Capabilities

5

6

### Generator Decorator

7

8

The core decorator that transforms Python generator functions into parser combinators with full access to intermediate parsing results.

9

10

```python { .api }

11

def generate(fn):

12

"""

13

Create a parser from a generator function.

14

15

Args:

16

fn (function or str): Generator function or description string

17

18

Returns:

19

Parser: Parser built from the generator

20

21

Usage patterns:

22

@generate

23

def my_parser():

24

result = yield some_parser

25

return final_result

26

27

@generate("description for errors")

28

def my_parser():

29

# parser logic

30

31

Note:

32

Generator should yield Parser objects and return final result.

33

Intermediate results are sent back via generator.send().

34

"""

35

```

36

37

## Usage Examples

38

39

### Basic Generator Parsing

40

41

```python

42

from parsec import generate, string, many1, letter, digit, spaces

43

44

# Simple generator parser

45

@generate

46

def greeting():

47

hello = yield string("hello")

48

yield spaces()

49

name = yield many1(letter())

50

return f"{hello} {''.join(name)}"

51

52

result = greeting.parse("hello alice") # Returns "hello alice"

53

54

# Generator with error description

55

@generate("greeting parser")

56

def greeting_with_desc():

57

yield string("hi")

58

yield spaces()

59

name = yield many1(letter())

60

return "".join(name)

61

62

try:

63

result = greeting_with_desc.parse("bye alice")

64

except ParseError as e:

65

print(e.expected) # "greeting parser"

66

```

67

68

### Conditional Parsing

69

70

```python

71

from parsec import generate, string, many1, digit, letter

72

73

# Conditional logic based on parsed values

74

@generate

75

def conditional_number():

76

sign = yield string("+") ^ string("-") ^ string("")

77

digits = yield many1(digit())

78

number = int("".join(digits))

79

80

if sign == "-":

81

return -number

82

else:

83

return number

84

85

result = conditional_number.parse("-123") # Returns -123

86

result = conditional_number.parse("+456") # Returns 456

87

result = conditional_number.parse("789") # Returns 789

88

89

# More complex conditional parsing

90

@generate

91

def typed_value():

92

type_marker = yield string("i:") ^ string("s:") ^ string("f:")

93

94

if type_marker == "i:":

95

digits = yield many1(digit())

96

return int("".join(digits))

97

elif type_marker == "s:":

98

chars = yield many1(letter())

99

return "".join(chars)

100

else: # "f:"

101

whole = yield many1(digit())

102

yield string(".")

103

decimal = yield many1(digit())

104

return float("".join(whole) + "." + "".join(decimal))

105

106

result = typed_value.parse("i:123") # Returns 123 (int)

107

result = typed_value.parse("s:hello") # Returns "hello" (str)

108

result = typed_value.parse("f:12.34") # Returns 12.34 (float)

109

```

110

111

### Complex Data Structure Parsing

112

113

```python

114

from parsec import generate, string, many, many1, letter, digit, spaces, one_of, none_of

115

116

# Parse JSON-like objects

117

@generate

118

def json_string():

119

yield string('"')

120

chars = yield many(none_of('"'))

121

yield string('"')

122

return "".join(chars)

123

124

@generate

125

def json_number():

126

from parsec import Parser, Value

127

sign = yield string("-") ^ string("")

128

digits = yield many1(digit())

129

decimal = yield (string(".") >> many1(digit())) ^ Parser(lambda text, index: Value.success(index, []))

130

131

number_str = sign + "".join(digits)

132

if decimal:

133

number_str += "." + "".join(decimal)

134

return float(number_str)

135

else:

136

return int(number_str)

137

138

@generate

139

def json_array():

140

yield string("[")

141

yield spaces()

142

143

# Handle empty array

144

empty_check = yield string("]") ^ string("")

145

if empty_check == "]":

146

return []

147

148

# Parse first element

149

first = yield json_value

150

elements = [first]

151

152

# Parse remaining elements

153

rest = yield many(string(",") >> spaces() >> json_value)

154

elements.extend(rest)

155

156

yield spaces()

157

yield string("]")

158

return elements

159

160

@generate

161

def json_value():

162

value = yield json_string ^ json_number ^ json_array

163

return value

164

165

# Usage

166

result = json_array.parse('["hello", 123, -45.6]')

167

# Returns ["hello", 123, -45.6]

168

```

169

170

### Stateful Parsing

171

172

```python

173

from parsec import generate, string, many, many1, letter, digit

174

175

# Parser that maintains state across operations

176

@generate

177

def calculator():

178

result = yield many1(digit()).parsecmap(lambda d: int("".join(d)))

179

180

operations = yield many(

181

(string("+") ^ string("-") ^ string("*") ^ string("/")) +

182

many1(digit()).parsecmap(lambda d: int("".join(d)))

183

)

184

185

for op, operand in operations:

186

if op == "+":

187

result += operand

188

elif op == "-":

189

result -= operand

190

elif op == "*":

191

result *= operand

192

elif op == "/":

193

result //= operand # Integer division

194

195

return result

196

197

result = calculator.parse("10+5*2-3") # Returns 22

198

199

# Counter example with internal state

200

@generate

201

def word_counter():

202

words = []

203

count = 0

204

205

while True:

206

# Try to parse another word

207

try:

208

word_chars = yield many1(letter())

209

word = "".join(word_chars)

210

words.append(word)

211

count += 1

212

213

# Optional whitespace between words

214

yield spaces()

215

216

except:

217

break

218

219

return {"words": words, "count": count}

220

221

# This won't work exactly as shown due to exception handling,

222

# but demonstrates the concept of stateful parsing

223

```

224

225

### Recursive Parsing with Generators

226

227

```python

228

from parsec import generate, string, many, many1, letter, spaces, one_of

229

230

# Forward declaration for recursive grammar

231

expr = None

232

233

@generate

234

def factor():

235

# Number or parenthesized expression

236

number = yield many1(digit()).parsecmap(lambda d: int("".join(d)))

237

return number

238

239

@generate

240

def factor_or_paren():

241

result = yield factor ^ (string("(") >> expr << string(")"))

242

return result

243

244

@generate

245

def term():

246

left = yield factor_or_paren

247

248

ops = yield many((one_of("*/") + factor_or_paren))

249

250

result = left

251

for op, right in ops:

252

if op == "*":

253

result *= right

254

else: # op == "/"

255

result //= right

256

257

return result

258

259

@generate

260

def expression():

261

left = yield term

262

263

ops = yield many((one_of("+-") + term))

264

265

result = left

266

for op, right in ops:

267

if op == "+":

268

result += right

269

else: # op == "-"

270

result -= right

271

272

return result

273

274

# Set the forward reference

275

expr = expression

276

277

# Usage

278

result = expression.parse("2+3*4") # Returns 14

279

result = expression.parse("(2+3)*4") # Returns 20

280

```

281

282

### Error Handling in Generators

283

284

```python

285

from parsec import generate, string, many1, letter, ParseError

286

287

# Generator with custom error handling

288

@generate("email address")

289

def email_parser():

290

try:

291

username = yield many1(letter() ^ digit() ^ one_of("._"))

292

yield string("@")

293

domain = yield many1(letter() ^ digit() ^ one_of(".-"))

294

yield string(".")

295

tld = yield many1(letter())

296

297

return {

298

"username": "".join(username),

299

"domain": "".join(domain),

300

"tld": "".join(tld)

301

}

302

except ParseError:

303

# Could add custom error handling here

304

raise

305

306

# Generator that returns alternative parsers for error recovery

307

@generate

308

def robust_number():

309

try:

310

# Try to parse a number

311

digits = yield many1(digit())

312

return int("".join(digits))

313

except:

314

# If that fails, try to parse "unknown"

315

yield string("unknown")

316

return None

317

318

result = robust_number.parse("123") # Returns 123

319

result = robust_number.parse("unknown") # Returns None

320

```

321

322

## Advanced Patterns

323

324

### Generator Composition

325

326

```python

327

from parsec import generate

328

329

# Compose generators for modularity

330

@generate

331

def parse_header():

332

yield string("BEGIN")

333

yield spaces()

334

name = yield many1(letter())

335

yield string("\n")

336

return "".join(name)

337

338

@generate

339

def parse_body():

340

lines = yield many(many1(letter() ^ digit() ^ space()) < string("\n"))

341

return ["".join(line) for line in lines]

342

343

@generate

344

def parse_footer():

345

yield string("END")

346

return None

347

348

@generate

349

def parse_document():

350

header = yield parse_header

351

body = yield parse_body

352

footer = yield parse_footer

353

354

return {

355

"title": header,

356

"content": body

357

}

358

359

# Usage

360

doc_text = """BEGIN MyDocument

361

line one

362

line two

363

END"""

364

365

result = parse_document.parse(doc_text)

366

# Returns {"title": "MyDocument", "content": ["line one", "line two"]}

367

```