or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

core-parsing.mdexceptions.mdindex.mdtokens-lexing.mdtree-processing.mdutilities.md

exceptions.mddocs/

0

# Exception Handling

1

2

Comprehensive error handling including parse errors, lexical errors, grammar errors, and unexpected input handling with context information and error recovery capabilities.

3

4

## Capabilities

5

6

### Base Exception Classes

7

8

Foundation exception classes providing the error hierarchy for Lark parsing operations.

9

10

```python { .api }

11

class LarkError(Exception):

12

"""

13

Base class for all Lark-specific exceptions.

14

"""

15

16

class ParseError(LarkError):

17

"""

18

Base class for all parsing-related errors.

19

Raised when parsing fails due to invalid input structure.

20

"""

21

22

class LexError(LarkError):

23

"""

24

Base class for all lexing-related errors.

25

Raised when lexer cannot tokenize input text.

26

"""

27

```

28

29

### Configuration and Grammar Errors

30

31

Errors related to parser setup and grammar definition problems.

32

33

```python { .api }

34

class GrammarError(LarkError):

35

"""

36

Raised when grammar definition contains errors.

37

Indicates problems in grammar syntax, rule definitions, or terminal patterns.

38

"""

39

40

class ConfigurationError(LarkError):

41

"""

42

Raised when invalid configuration options are provided.

43

Indicates incompatible parser options or invalid parameter values.

44

"""

45

```

46

47

### Unexpected Input Handling

48

49

Base class and specific implementations for handling unexpected input during parsing.

50

51

```python { .api }

52

class UnexpectedInput(ParseError):

53

"""

54

Base class for unexpected input exceptions.

55

Provides context information and error recovery utilities.

56

"""

57

58

def get_context(self, text: str, span: int = 40) -> str:

59

"""

60

Get formatted error context showing position of error in input.

61

62

Parameters:

63

- text: Original input text

64

- span: Number of characters to show around error

65

66

Returns:

67

str: Formatted context with error pointer

68

"""

69

70

def match_examples(self, parse_fn: Callable, examples: Dict[str, str],

71

token_type_match_fallback: bool = False,

72

use_accepts: bool = False) -> str:

73

"""

74

Match error against example error patterns.

75

76

Parameters:

77

- parse_fn: Function to parse examples

78

- examples: Dict mapping example names to example text

79

- token_type_match_fallback: Use token type matching as fallback

80

- use_accepts: Consider acceptable tokens in matching

81

82

Returns:

83

str: Name of best matching example

84

"""

85

86

# Attributes

87

line: int # Line number where error occurred

88

column: int # Column number where error occurred

89

pos_in_stream: int # Position in token stream

90

state: Any # Parser state at error

91

_terminals_by_name: Dict # Terminal definitions by name

92

```

93

94

### End-of-File Errors

95

96

Errors when parser expects more input but reaches end of text.

97

98

```python { .api }

99

class UnexpectedEOF(UnexpectedInput):

100

"""

101

Raised when parser expected more tokens but input ended.

102

Indicates incomplete input that could be valid with additional content.

103

"""

104

105

def __init__(self, expected: List[str], state: Any = None,

106

terminals_by_name: Dict = None):

107

"""

108

Initialize unexpected EOF error.

109

110

Parameters:

111

- expected: List of expected token types

112

- state: Parser state when EOF encountered

113

- terminals_by_name: Terminal definitions

114

"""

115

116

expected: List[str] # Expected token types

117

```

118

119

### Character-Level Lexing Errors

120

121

Errors when lexer cannot match input characters to any terminal pattern.

122

123

```python { .api }

124

class UnexpectedCharacters(LexError, UnexpectedInput):

125

"""

126

Raised when lexer cannot match input characters to terminals.

127

Indicates characters that don't form valid tokens according to grammar.

128

"""

129

130

def __init__(self, seq: str, lex_pos: int, line: int, column: int,

131

allowed: Set[str] = None, considered_rules: Set = None,

132

state: Any = None, token_history: List = None,

133

terminals_by_name: Dict = None):

134

"""

135

Initialize unexpected characters error.

136

137

Parameters:

138

- seq: Input sequence containing error

139

- lex_pos: Position in sequence where error occurred

140

- line: Line number of error

141

- column: Column number of error

142

- allowed: Set of characters/patterns that were expected

143

- considered_rules: Rules that were considered during lexing

144

- state: Lexer state at error

145

- token_history: Previous tokens

146

- terminals_by_name: Terminal definitions

147

"""

148

149

allowed: Set[str] # Expected characters/patterns

150

considered_rules: Set # Rules considered during lexing

151

token_history: List[Token] # Previous tokens for context

152

```

153

154

### Token-Level Parsing Errors

155

156

Errors when parser receives valid tokens in invalid combinations.

157

158

```python { .api }

159

class UnexpectedToken(UnexpectedInput):

160

"""

161

Raised when parser receives a token it didn't expect.

162

The token is valid lexically but appears in wrong context syntactically.

163

"""

164

165

def __init__(self, token: Token, expected: Set[str] = None,

166

considered_rules: Set = None, state: Any = None,

167

interactive_parser = None, terminals_by_name: Dict = None,

168

token_history: List = None):

169

"""

170

Initialize unexpected token error.

171

172

Parameters:

173

- token: The unexpected token

174

- expected: Set of expected token types

175

- considered_rules: Rules considered during parsing

176

- state: Parser state at error

177

- interactive_parser: Interactive parser instance (if available)

178

- terminals_by_name: Terminal definitions

179

- token_history: Previous tokens for context

180

"""

181

182

token: Token # The unexpected token

183

accepts: Set[str] # Set of acceptable token types

184

interactive_parser: Any # Parser instance at failure point

185

considered_rules: Set # Rules considered during parsing

186

token_history: List[Token] # Previous tokens for context

187

```

188

189

### Visitor and Transformer Errors

190

191

Errors that occur during tree processing operations.

192

193

```python { .api }

194

class VisitError(LarkError):

195

"""

196

Raised when visitors or transformers are interrupted by an exception.

197

Wraps the original exception with context about where it occurred.

198

"""

199

200

def __init__(self, rule: str, tree: Tree, orig_exc: Exception):

201

"""

202

Initialize visit error.

203

204

Parameters:

205

- rule: Rule name where error occurred

206

- tree: Tree node being processed when error occurred

207

- orig_exc: Original exception that caused the error

208

"""

209

210

rule: str # Rule name where error occurred

211

tree: Tree # Tree node being processed

212

orig_exc: Exception # Original exception

213

```

214

215

### Tree Processing Control

216

217

Exception used for controlling tree transformation flow.

218

219

```python { .api }

220

class Discard(Exception):

221

"""

222

When raised in transformer callback, discards the node from parent tree.

223

Used to remove nodes during transformation without causing errors.

224

"""

225

```

226

227

## Usage Examples

228

229

### Basic Error Handling

230

231

```python

232

from lark import Lark, ParseError, LexError, UnexpectedToken

233

234

parser = Lark(grammar)

235

236

try:

237

tree = parser.parse(text)

238

except ParseError as e:

239

print(f"Parse error: {e}")

240

except LexError as e:

241

print(f"Lex error: {e}")

242

```

243

244

### Detailed Error Information

245

246

```python

247

from lark import Lark, UnexpectedToken, UnexpectedCharacters

248

249

parser = Lark(grammar)

250

251

try:

252

result = parser.parse("invalid input")

253

except UnexpectedToken as e:

254

print(f"Unexpected token '{e.token.value}' of type {e.token.type}")

255

print(f"Expected one of: {e.accepts}")

256

print(f"At line {e.line}, column {e.column}")

257

258

# Get context

259

context = e.get_context(text)

260

print(f"Context:\\n{context}")

261

262

except UnexpectedCharacters as e:

263

print(f"Unexpected character at position {e.pos_in_stream}")

264

print(f"At line {e.line}, column {e.column}")

265

print(f"Expected one of: {e.allowed}")

266

```

267

268

### Error Recovery with Interactive Parser

269

270

```python

271

from lark import Lark, UnexpectedToken

272

273

parser = Lark(grammar, parser='lalr') # Required for interactive parsing

274

275

try:

276

result = parser.parse(text)

277

except UnexpectedToken as e:

278

if e.interactive_parser:

279

# Use interactive parser for recovery

280

interactive = e.interactive_parser

281

282

# See what tokens are acceptable

283

acceptable = interactive.accepts()

284

print(f"Acceptable tokens: {acceptable}")

285

286

# Try to recover by feeding a valid token

287

if 'SEMICOLON' in acceptable:

288

from lark import Token

289

recovery_token = Token('SEMICOLON', ';')

290

interactive.feed_token(recovery_token)

291

292

# Continue parsing

293

try:

294

result = interactive.resume_parse()

295

print("Successfully recovered!")

296

except Exception as recovery_error:

297

print(f"Recovery failed: {recovery_error}")

298

```

299

300

### Custom Error Messages with Examples

301

302

```python

303

from lark import Lark, UnexpectedToken

304

305

# Define error examples for better error messages

306

error_examples = {

307

"missing_semicolon": "x = 1", # Missing semicolon

308

"unclosed_paren": "f(x", # Unclosed parenthesis

309

"invalid_operator": "x + + y", # Double operator

310

}

311

312

def parse_with_examples(parser, text):

313

try:

314

return parser.parse(text)

315

except UnexpectedToken as e:

316

# Try to match against examples

317

example_name = e.match_examples(

318

parser.parse,

319

error_examples,

320

use_accepts=True

321

)

322

323

if example_name:

324

print(f"Error type: {example_name}")

325

if example_name == "missing_semicolon":

326

print("Hint: Add a semicolon at the end of the statement")

327

elif example_name == "unclosed_paren":

328

print("Hint: Check for unmatched parentheses")

329

330

raise # Re-raise the original exception

331

332

parser = Lark(grammar)

333

result = parse_with_examples(parser, "x = 1") # Will trigger missing_semicolon

334

```

335

336

### Handling Visitor Errors

337

338

```python

339

from lark import Transformer, VisitError

340

341

class MyTransformer(Transformer):

342

def some_rule(self, children):

343

# This might raise an exception

344

result = risky_operation(children[0])

345

return result

346

347

transformer = MyTransformer()

348

349

try:

350

result = transformer.transform(tree)

351

except VisitError as e:

352

print(f"Error in rule '{e.rule}': {e.orig_exc}")

353

print(f"Tree node: {e.tree}")

354

355

# Handle specific original exception types

356

if isinstance(e.orig_exc, ValueError):

357

print("Value error during transformation")

358

elif isinstance(e.orig_exc, KeyError):

359

print("Key error during transformation")

360

```

361

362

### Using Discard for Node Removal

363

364

```python

365

from lark import Transformer, Discard

366

367

class FilterTransformer(Transformer):

368

def comment(self, children):

369

# Remove comment nodes from tree

370

raise Discard()

371

372

def empty_statement(self, children):

373

# Remove empty statements

374

if not children or all(c.strip() == '' for c in children):

375

raise Discard()

376

return children

377

378

# Apply transformer to remove unwanted nodes

379

filter_transformer = FilterTransformer()

380

cleaned_tree = filter_transformer.transform(original_tree)

381

```

382

383

### Grammar Error Handling

384

385

```python

386

from lark import Lark, GrammarError, ConfigurationError

387

388

try:

389

# Invalid grammar syntax

390

parser = Lark("""

391

start: expr

392

expr: NUMBER + # Invalid rule syntax

393

""")

394

except GrammarError as e:

395

print(f"Grammar error: {e}")

396

397

try:

398

# Invalid configuration

399

parser = Lark(grammar, parser='invalid_parser')

400

except ConfigurationError as e:

401

print(f"Configuration error: {e}")

402

```

403

404

### Comprehensive Error Handling

405

406

```python

407

from lark import (Lark, LarkError, ParseError, LexError, GrammarError,

408

ConfigurationError, UnexpectedInput, UnexpectedToken,

409

UnexpectedCharacters, UnexpectedEOF, VisitError)

410

411

def safe_parse(grammar_text, input_text):

412

"""Safely parse with comprehensive error handling."""

413

414

try:

415

# Create parser

416

parser = Lark(grammar_text)

417

418

# Parse input

419

tree = parser.parse(input_text)

420

421

return tree, None

422

423

except GrammarError as e:

424

return None, f"Grammar definition error: {e}"

425

426

except ConfigurationError as e:

427

return None, f"Parser configuration error: {e}"

428

429

except UnexpectedEOF as e:

430

return None, f"Unexpected end of input. Expected: {e.expected}"

431

432

except UnexpectedCharacters as e:

433

context = e.get_context(input_text)

434

return None, f"Unexpected characters at line {e.line}:\\n{context}"

435

436

except UnexpectedToken as e:

437

context = e.get_context(input_text)

438

return None, f"Unexpected token '{e.token.value}' at line {e.line}. Expected: {e.accepts}\\n{context}"

439

440

except VisitError as e:

441

return None, f"Error processing rule '{e.rule}': {e.orig_exc}"

442

443

except ParseError as e:

444

return None, f"Parse error: {e}"

445

446

except LexError as e:

447

return None, f"Lexical error: {e}"

448

449

except LarkError as e:

450

return None, f"Lark error: {e}"

451

452

# Usage

453

tree, error = safe_parse(my_grammar, my_input)

454

if error:

455

print(f"Error: {error}")

456

else:

457

print("Parsing successful!")

458

print(tree.pretty())

459

```

460

461

### Error Context Formatting

462

463

```python

464

from lark import UnexpectedInput

465

466

def format_error_context(error: UnexpectedInput, text: str, span: int = 60):

467

"""Format error with enhanced context information."""

468

469

context = error.get_context(text, span)

470

471

# Add line numbers to context

472

lines = context.split('\\n')

473

formatted_lines = []

474

475

for i, line in enumerate(lines):

476

line_num = error.line + i - 1 # Adjust for context

477

if '^' in line: # Error pointer line

478

formatted_lines.append(f" {line}")

479

else:

480

formatted_lines.append(f"{line_num:4d}: {line}")

481

482

return '\\n'.join(formatted_lines)

483

484

# Usage with any UnexpectedInput exception

485

try:

486

parser.parse(text)

487

except UnexpectedInput as e:

488

formatted_context = format_error_context(e, text)

489

print(f"Parse error at line {e.line}, column {e.column}:")

490

print(formatted_context)

491

```