or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

core-parsing.mdexceptions.mdindex.mdtokens-lexing.mdtree-processing.mdutilities.md

core-parsing.mddocs/

0

# Core Parsing

1

2

Main parsing functionality providing the primary interface for creating parsers, configuring parsing behavior, and parsing text according to grammar definitions.

3

4

## Capabilities

5

6

### Main Parser Interface

7

8

The Lark class serves as the primary interface for the parsing library, coordinating grammar loading, lexer configuration, and parse tree generation.

9

10

```python { .api }

11

class Lark:

12

def __init__(self, grammar: str, **options):

13

"""

14

Initialize parser with grammar and options.

15

16

Parameters:

17

- grammar: EBNF grammar string or file path

18

- **options: Configuration options (see LarkOptions)

19

"""

20

21

def parse(self, text: str, start: str = None, on_error=None) -> Tree:

22

"""

23

Parse text according to grammar.

24

25

Parameters:

26

- text: Input text to parse

27

- start: Starting rule (overrides grammar start)

28

- on_error: Error callback function

29

30

Returns:

31

Tree: Parse tree root

32

"""

33

34

def parse_interactive(self, text: str = None, start: str = None):

35

"""

36

Start interactive parsing session for error recovery.

37

38

Parameters:

39

- text: Input text (optional for incremental parsing)

40

- start: Starting rule

41

42

Returns:

43

InteractiveParser: Interactive parser instance

44

"""

45

46

def lex(self, text: str, dont_ignore: bool = False) -> Iterator[Token]:

47

"""

48

Tokenize text without parsing.

49

50

Parameters:

51

- text: Input text to tokenize

52

- dont_ignore: Include normally ignored tokens

53

54

Returns:

55

Iterator[Token]: Token stream

56

"""

57

58

def get_terminal(self, name: str):

59

"""

60

Get terminal definition by name.

61

62

Parameters:

63

- name: Terminal name

64

65

Returns:

66

Terminal definition

67

"""

68

69

def save(self, f):

70

"""

71

Save parser instance to file for caching.

72

73

Parameters:

74

- f: File object to write to

75

"""

76

77

@classmethod

78

def load(cls, f):

79

"""

80

Load parser instance from file.

81

82

Parameters:

83

- f: File object to read from

84

85

Returns:

86

Lark: Loaded parser instance

87

"""

88

89

@classmethod

90

def open(cls, grammar_filename: str, rel_to: str = None, **options):

91

"""

92

Create parser from grammar file.

93

94

Parameters:

95

- grammar_filename: Path to grammar file

96

- rel_to: Base path for relative imports

97

- **options: Parser options

98

99

Returns:

100

Lark: Parser instance

101

"""

102

103

@classmethod

104

def open_from_package(cls, package: str, grammar_path: str,

105

search_paths: Tuple[str, ...] = ("",), **options):

106

"""

107

Load grammar from Python package.

108

109

Parameters:

110

- package: Package name

111

- grammar_path: Path within package

112

- search_paths: Search paths for imports

113

- **options: Parser options

114

115

Returns:

116

Lark: Parser instance

117

"""

118

119

# Properties

120

source_path: Optional[str] # Grammar source file path

121

source_grammar: str # Original grammar string

122

grammar: Grammar # Compiled grammar object

123

options: LarkOptions # Parser configuration

124

terminals: List[TerminalDef] # Terminal definitions

125

rules: List[Rule] # Grammar rules

126

```

127

128

### Parser Configuration

129

130

Configuration options controlling parsing behavior, algorithm selection, and feature enablement.

131

132

```python { .api }

133

class LarkOptions:

134

"""

135

Configuration options for Lark parser.

136

"""

137

138

# General Options

139

start: Union[str, List[str]] # Start symbol(s)

140

debug: bool # Enable debug output

141

transformer: Optional[Transformer] # Auto-apply transformer

142

propagate_positions: Union[bool, Callable] # Position propagation

143

maybe_placeholders: bool # [] operator behavior

144

cache: Union[bool, str] # Cache grammar analysis

145

regex: bool # Use regex module

146

g_regex_flags: int # Global regex flags

147

keep_all_tokens: bool # Keep punctuation tokens

148

tree_class: type # Custom tree class

149

150

# Algorithm Options

151

parser: str # "earley", "lalr", "cyk"

152

lexer: str # Lexer type

153

ambiguity: str # Ambiguity handling

154

155

# Lexer types:

156

# - "auto": Choose based on parser

157

# - "standard": Standard lexer

158

# - "contextual": Context-sensitive (LALR only)

159

# - "dynamic": Flexible (Earley only)

160

# - "dynamic_complete": All tokenization variants

161

162

# Ambiguity handling (Earley only):

163

# - "resolve": Automatic resolution

164

# - "explicit": Wrap in _ambig nodes

165

# - "forest": Return shared packed parse forest

166

167

# Domain Specific Options

168

postlex: Optional[PostLex] # Lexer post-processing

169

priority: str # Priority evaluation

170

lexer_callbacks: Dict[str, Callable] # Token callbacks

171

use_bytes: bool # Accept bytes input

172

edit_terminals: Optional[Callable] # Terminal editing callback

173

```

174

175

### Interactive Parsing

176

177

Step-by-step parsing with error recovery and incremental input processing.

178

179

```python { .api }

180

class InteractiveParser:

181

"""

182

Interactive parser for step-by-step parsing and error recovery.

183

Provides advanced control over parsing and error handling with LALR.

184

"""

185

186

def feed_token(self, token: Token):

187

"""

188

Feed parser with a token and advance to next state.

189

190

Parameters:

191

- token: Token instance to process

192

193

Note: token must be an instance of Token class

194

"""

195

196

def exhaust_lexer(self) -> None:

197

"""

198

Feed remaining lexer state into interactive parser.

199

Modifies instance in place, does not feed '$END' token.

200

"""

201

202

def feed_eof(self, last_token: Token = None):

203

"""

204

Feed '$END' token to parser.

205

206

Parameters:

207

- last_token: Token to borrow position from (optional)

208

"""

209

210

def accepts(self) -> Set[str]:

211

"""

212

Get set of token types that will advance parser to valid state.

213

214

Returns:

215

Set[str]: Set of acceptable token type names

216

"""

217

218

def choices(self) -> Dict[str, Any]:

219

"""

220

Get dictionary of token types matched to parser actions.

221

Only returns token types accepted by current state.

222

223

Returns:

224

Dict[str, Any]: Token types and their actions

225

"""

226

227

def resume_parse(self):

228

"""

229

Resume automated parsing from current state.

230

231

Returns:

232

Parse result from current position

233

"""

234

235

def copy(self) -> 'InteractiveParser':

236

"""

237

Create new interactive parser with separate state.

238

239

Returns:

240

InteractiveParser: Independent copy

241

"""

242

243

def as_immutable(self) -> 'ImmutableInteractiveParser':

244

"""

245

Convert to immutable interactive parser.

246

247

Returns:

248

ImmutableInteractiveParser: Immutable version

249

"""

250

251

def pretty(self) -> str:

252

"""

253

Print parser choices in readable format.

254

255

Returns:

256

str: Formatted choices and stack information

257

"""

258

259

class ImmutableInteractiveParser(InteractiveParser):

260

"""

261

Immutable version of InteractiveParser.

262

Operations create new instances instead of modifying in-place.

263

"""

264

265

result: Any # Parse result when parsing completes

266

267

def feed_token(self, token: Token) -> 'ImmutableInteractiveParser':

268

"""

269

Feed token and return new parser instance with updated state.

270

271

Parameters:

272

- token: Token to process

273

274

Returns:

275

ImmutableInteractiveParser: New parser instance

276

"""

277

278

def exhaust_lexer(self) -> 'ImmutableInteractiveParser':

279

"""

280

Feed remaining lexer state and return new parser instance.

281

282

Returns:

283

ImmutableInteractiveParser: New parser instance

284

"""

285

286

def as_mutable(self) -> InteractiveParser:

287

"""

288

Convert to mutable InteractiveParser.

289

290

Returns:

291

InteractiveParser: Mutable version

292

"""

293

```

294

295

### Post-Lexer Processing

296

297

Abstract base class for lexer post-processing, such as indentation handling.

298

299

```python { .api }

300

class PostLex:

301

"""

302

Abstract base class for lexer post-processing.

303

"""

304

305

def process(self, stream: Iterator[Token]) -> Iterator[Token]:

306

"""

307

Process token stream after lexing.

308

309

Parameters:

310

- stream: Input token stream

311

312

Returns:

313

Iterator[Token]: Processed token stream

314

"""

315

316

always_accept: Tuple[str, ...] # Token types to always accept

317

```

318

319

### Grammar Loading

320

321

Functions and classes for loading and processing grammar definitions.

322

323

```python { .api }

324

class FromPackageLoader:

325

"""

326

Loader for grammars stored in Python packages.

327

"""

328

329

def __init__(self, package_root: str = ""):

330

"""

331

Initialize package loader.

332

333

Parameters:

334

- package_root: Root package path

335

"""

336

337

def __call__(self, base_path: str, grammar_path: str) -> Tuple[str, str]:

338

"""

339

Load grammar from package.

340

341

Parameters:

342

- base_path: Base import path

343

- grammar_path: Grammar file path

344

345

Returns:

346

Tuple[str, str]: (grammar_text, full_path)

347

"""

348

```

349

350

## Usage Examples

351

352

### Basic Grammar Definition

353

354

```python

355

from lark import Lark

356

357

# Simple arithmetic grammar

358

grammar = """

359

?start: sum

360

361

?sum: product

362

| sum "+" product -> add

363

| sum "-" product -> sub

364

365

?product: atom

366

| product "*" atom -> mul

367

| product "/" atom -> div

368

369

?atom: NUMBER -> number

370

| "-" atom -> neg

371

| "(" sum ")"

372

373

%import common.NUMBER

374

%import common.WS_INLINE

375

%ignore WS_INLINE

376

"""

377

378

parser = Lark(grammar)

379

result = parser.parse("3 + 4 * 2")

380

print(result.pretty())

381

```

382

383

### Parser Configuration

384

385

```python

386

from lark import Lark

387

388

# Configure parser with specific options

389

parser = Lark(

390

grammar,

391

parser='lalr', # Use LALR parser

392

lexer='standard', # Standard lexer

393

start='expression', # Custom start rule

394

debug=True, # Enable debug output

395

keep_all_tokens=True, # Keep all tokens

396

propagate_positions=True # Track positions

397

)

398

```

399

400

### Grammar from File

401

402

```python

403

from lark import Lark

404

405

# Load grammar from file

406

parser = Lark.open('my_grammar.lark', rel_to=__file__)

407

408

# Load from package

409

parser = Lark.open_from_package(

410

'my_package.grammars',

411

'grammar.lark',

412

search_paths=('common',)

413

)

414

```

415

416

### Interactive Parsing

417

418

```python

419

from lark import Lark

420

421

parser = Lark(grammar)

422

interactive = parser.parse_interactive()

423

424

# Feed tokens incrementally

425

for token in parser.lex("1 + 2"):

426

try:

427

interactive.feed_token(token)

428

except UnexpectedToken:

429

# Handle error, possibly recover

430

acceptable = interactive.accepts()

431

print(f"Expected one of: {acceptable}")

432

```

433

434

### Caching for Performance

435

436

```python

437

from lark import Lark

438

439

# Cache to temporary file

440

parser = Lark(grammar, cache=True)

441

442

# Cache to specific file

443

parser = Lark(grammar, cache='my_grammar.cache')

444

445

# Manual save/load

446

parser.save(open('parser.cache', 'wb'))

447

cached_parser = Lark.load(open('parser.cache', 'rb'))

448

```