or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

core-parsing.mderror-handling.mdgrammar-system.mdindex.mdpython-elements.mdtokenization.mdtree-navigation.mdutilities.md

grammar-system.mddocs/

0

# Grammar System

1

2

The grammar system in parso provides fine-grained control over Python parsing, including version-specific grammars, error detection, code refactoring, and caching mechanisms. This is the foundation layer that powers the high-level parsing functions.

3

4

## Capabilities

5

6

### Grammar Classes

7

8

Core grammar classes that handle the parsing logic and provide access to advanced parsing features.

9

10

```python { .api }

11

class Grammar:

12

"""

13

Generic grammar class for parsing languages.

14

15

Attributes:

16

version_info (PythonVersionInfo): Python version information

17

"""

18

19

def parse(self, code=None, *, error_recovery=True, path=None, start_symbol=None,

20

cache=False, diff_cache=False, cache_path=None, file_io=None):

21

"""

22

Parse code using this grammar.

23

24

Args:

25

code (str | bytes, optional): Source code to parse

26

error_recovery (bool): Enable error recovery (default: True)

27

path (str | Path, optional): File path for caching

28

start_symbol (str, optional): Grammar start symbol (default: 'file_input')

29

cache (bool): Enable pickle caching (default: False)

30

diff_cache (bool): Enable differential caching (default: False)

31

cache_path (str | Path, optional): Custom cache directory

32

file_io (FileIO, optional): File I/O handler

33

34

Returns:

35

NodeOrLeaf: Parsed syntax tree (typically Module)

36

37

Raises:

38

TypeError: If neither code nor path provided

39

NotImplementedError: If error_recovery used with non-default start_symbol

40

ParserSyntaxError: If parsing fails and error_recovery is False

41

"""

42

43

def iter_errors(self, node):

44

"""

45

Find syntax and semantic errors in a parsed tree.

46

47

Args:

48

node (NodeOrLeaf): Root node to check for errors

49

50

Yields:

51

Issue: Error objects with position and message information

52

53

Raises:

54

ValueError: If no error normalizer configured for this grammar

55

"""

56

57

def refactor(self, base_node, node_to_str_map):

58

"""

59

Refactor code by replacing nodes with new strings.

60

61

Args:

62

base_node (NodeOrLeaf): Root node to refactor

63

node_to_str_map (dict): Mapping of nodes to replacement strings

64

65

Returns:

66

str: Refactored code

67

"""

68

```

69

70

```python { .api }

71

class PythonGrammar(Grammar):

72

"""

73

Python-specific grammar implementation with tokenization and error detection.

74

75

Attributes:

76

version_info (PythonVersionInfo): Python version for this grammar

77

"""

78

79

def __init__(self, version_info, bnf_text):

80

"""

81

Initialize Python grammar.

82

83

Args:

84

version_info (PythonVersionInfo): Python version information

85

bnf_text (str): BNF grammar definition

86

"""

87

```

88

89

#### Usage Examples

90

91

```python

92

import parso

93

94

# Load and use grammar directly

95

grammar = parso.load_grammar(version="3.9")

96

97

# Parse with advanced options

98

module = grammar.parse(

99

'def example(): return 42',

100

error_recovery=True,

101

cache=True,

102

diff_cache=True

103

)

104

105

# Parse from file with custom start symbol

106

# Note: start_symbol only works with error_recovery=False

107

try:

108

expr = grammar.parse(

109

'1 + 2 * 3',

110

error_recovery=False,

111

start_symbol='expr'

112

)

113

except NotImplementedError:

114

# start_symbol requires error_recovery=False

115

expr = grammar.parse('1 + 2 * 3', error_recovery=False, start_symbol='expr')

116

117

# Check version information

118

print(f"Grammar version: {grammar.version_info.major}.{grammar.version_info.minor}")

119

```

120

121

### Error Detection

122

123

Advanced error detection and analysis capabilities for finding syntax and semantic issues.

124

125

```python { .api }

126

def iter_errors(self, node):

127

"""

128

Generator yielding error objects for syntax and semantic issues.

129

130

Args:

131

node (NodeOrLeaf): Parsed tree to analyze

132

133

Yields:

134

Issue: Error objects with message, code, and position information

135

"""

136

```

137

138

#### Usage Examples

139

140

```python

141

import parso

142

143

grammar = parso.load_grammar()

144

145

# Parse code with multiple errors

146

code = '''

147

def function(: # Missing parameter name

148

x = 1 + # Incomplete expression

149

return x

150

151

continue # Continue outside loop

152

'''

153

154

module = grammar.parse(code)

155

errors = list(grammar.iter_errors(module))

156

157

for error in errors:

158

print(f"Line {error.start_pos[0]}: {error.message}")

159

print(f"Error code: {error.code}")

160

print(f"At position: {error.start_pos}")

161

162

# Handle specific error types

163

syntax_errors = [e for e in errors if 'SyntaxError' in e.message]

164

semantic_errors = [e for e in errors if 'continue' in e.message.lower()]

165

```

166

167

### Code Refactoring

168

169

Refactor parsed code by replacing specific nodes with new content while preserving formatting.

170

171

```python { .api }

172

def refactor(self, base_node, node_to_str_map):

173

"""

174

Apply refactoring transformations to code.

175

176

Args:

177

base_node (NodeOrLeaf): Root node containing code to refactor

178

node_to_str_map (dict): Mapping from nodes to replacement strings

179

180

Returns:

181

str: Refactored source code with replacements applied

182

"""

183

```

184

185

#### Usage Examples

186

187

```python

188

import parso

189

190

grammar = parso.load_grammar()

191

module = grammar.parse('''

192

def old_function_name():

193

old_variable = 42

194

return old_variable

195

''')

196

197

# Find nodes to replace

198

function_node = module.children[0] # Function definition

199

func_name = function_node.name # Function name

200

suite = function_node.get_suite()

201

202

# Find variable nodes within the function

203

old_var_nodes = []

204

for name_node in module.get_used_names()['old_variable']:

205

if name_node.get_definition(): # Only definition, not usage

206

old_var_nodes.append(name_node)

207

208

# Create refactoring map

209

refactor_map = {

210

func_name: 'new_function_name',

211

}

212

213

# Apply refactoring

214

refactored_code = grammar.refactor(module, refactor_map)

215

print(refactored_code)

216

```

217

218

### Grammar Options and Configuration

219

220

Advanced parsing options for specific use cases and performance tuning.

221

222

#### Cache Configuration

223

224

```python

225

import parso

226

from pathlib import Path

227

228

grammar = parso.load_grammar()

229

230

# Custom cache directory

231

custom_cache = Path.home() / '.my_parso_cache'

232

module = grammar.parse(

233

path='script.py',

234

cache=True,

235

cache_path=custom_cache

236

)

237

238

# Differential caching for incremental parsing

239

module = grammar.parse(

240

path='large_file.py',

241

cache=True,

242

diff_cache=True # Only re-parse changed sections

243

)

244

```

245

246

#### Start Symbol Parsing

247

248

Parse specific grammar constructs instead of full modules:

249

250

```python

251

import parso

252

253

grammar = parso.load_grammar()

254

255

# Parse just an expression (requires error_recovery=False)

256

expr = grammar.parse('x + y * z', error_recovery=False, start_symbol='expr')

257

print(type(expr).__name__) # Should be expression node type

258

259

# Parse a statement

260

stmt = grammar.parse('x = 42', error_recovery=False, start_symbol='stmt')

261

262

# Parse function definition

263

func = grammar.parse(

264

'def example(a, b=None): return a + b',

265

error_recovery=False,

266

start_symbol='funcdef'

267

)

268

```

269

270

### Error Recovery vs Strict Parsing

271

272

Understanding when to use error recovery and when to require valid syntax.

273

274

#### Error Recovery Mode (Default)

275

276

```python

277

import parso

278

279

grammar = parso.load_grammar()

280

281

# Error recovery allows parsing of broken code

282

broken_code = '''

283

def function_with_syntax_error(:

284

pass

285

286

class MissingColon

287

pass

288

289

for item in # Missing iterable

290

print(item)

291

'''

292

293

# This succeeds and returns a tree with error nodes

294

module = grammar.parse(broken_code, error_recovery=True)

295

print(f"Parsed {len(module.children)} top-level items")

296

297

# Check for errors

298

errors = list(grammar.iter_errors(module))

299

print(f"Found {len(errors)} errors")

300

```

301

302

#### Strict Parsing Mode

303

304

```python

305

import parso

306

307

grammar = parso.load_grammar()

308

309

# Strict mode raises exceptions on syntax errors

310

try:

311

module = grammar.parse('def invalid(: pass', error_recovery=False)

312

except parso.ParserSyntaxError as e:

313

print(f"Parse failed: {e.message}")

314

print(f"Error at: {e.error_leaf.start_pos}")

315

316

# Use strict mode for validation

317

def validate_python_code(code):

318

"""Check if Python code is syntactically valid."""

319

try:

320

grammar = parso.load_grammar()

321

grammar.parse(code, error_recovery=False)

322

return True, None

323

except parso.ParserSyntaxError as e:

324

return False, str(e)

325

326

is_valid, error_msg = validate_python_code('def hello(): return "world"')

327

print(f"Valid: {is_valid}") # True

328

329

is_valid, error_msg = validate_python_code('def broken(: pass')

330

print(f"Valid: {is_valid}, Error: {error_msg}") # False, error message

331

```

332

333

### Version-Specific Grammar Features

334

335

Working with different Python versions and their specific grammar features.

336

337

```python

338

import parso

339

340

# Python 3.8 - walrus operator and positional-only parameters

341

grammar38 = parso.load_grammar(version="3.8")

342

module = grammar38.parse('''

343

def func(pos_only, /, normal, *, kw_only):

344

if (result := expensive_operation()) is not None:

345

return result

346

''')

347

348

# Python 3.10 - match statements and union types

349

grammar310 = parso.load_grammar(version="3.10")

350

module = grammar310.parse('''

351

def process(value: int | str) -> str:

352

match value:

353

case int() if value > 0:

354

return "positive integer"

355

case str() if value:

356

return "non-empty string"

357

case _:

358

return "other"

359

''')

360

361

# Version compatibility checking

362

def parse_with_fallback(code, preferred_version="3.10"):

363

"""Parse code, falling back to older versions if needed."""

364

versions = ["3.10", "3.9", "3.8", "3.7", "3.6"]

365

start_idx = versions.index(preferred_version) if preferred_version in versions else 0

366

367

for version in versions[start_idx:]:

368

try:

369

grammar = parso.load_grammar(version=version)

370

return grammar.parse(code, error_recovery=False), version

371

except (parso.ParserSyntaxError, NotImplementedError):

372

continue

373

374

# Fall back to error recovery mode with latest version

375

grammar = parso.load_grammar(version="3.10")

376

return grammar.parse(code, error_recovery=True), "3.10-recovery"

377

378

# Usage

379

result, version_used = parse_with_fallback('match x: case 1: pass')

380

print(f"Parsed with Python {version_used}")

381

```

382

383

## Advanced Integration Patterns

384

385

### Grammar Caching and Reuse

386

387

```python

388

import parso

389

390

class ParserManager:

391

"""Manage multiple grammars efficiently."""

392

393

def __init__(self):

394

self._grammars = {}

395

396

def get_grammar(self, version="3.9"):

397

"""Get cached grammar instance."""

398

if version not in self._grammars:

399

self._grammars[version] = parso.load_grammar(version=version)

400

return self._grammars[version]

401

402

def parse_file(self, path, version="3.9", **kwargs):

403

"""Parse file with cached grammar."""

404

grammar = self.get_grammar(version)

405

return grammar.parse(path=path, **kwargs)

406

407

# Usage

408

manager = ParserManager()

409

module1 = manager.parse_file("file1.py", cache=True)

410

module2 = manager.parse_file("file2.py", cache=True) # Reuses grammar

411

```

412

413

### Custom Error Handling

414

415

```python

416

import parso

417

418

def detailed_error_analysis(code, version="3.9"):

419

"""Comprehensive error analysis with categorization."""

420

grammar = parso.load_grammar(version=version)

421

module = grammar.parse(code)

422

errors = list(grammar.iter_errors(module))

423

424

categorized = {

425

'syntax': [],

426

'indentation': [],

427

'semantic': []

428

}

429

430

for error in errors:

431

message = error.message.lower()

432

if 'indentation' in message or 'indent' in message:

433

categorized['indentation'].append(error)

434

elif 'syntax' in message:

435

categorized['syntax'].append(error)

436

else:

437

categorized['semantic'].append(error)

438

439

return categorized, module

440

441

# Usage

442

errors, tree = detailed_error_analysis('''

443

def function():

444

pass # Wrong indentation

445

continue # Semantic error

446

def invalid(: pass # Syntax error

447

''')

448

449

for category, error_list in errors.items():

450

if error_list:

451

print(f"{category.title()} errors: {len(error_list)}")

452

for error in error_list:

453

print(f" Line {error.start_pos[0]}: {error.message}")

454

```