0
# Grammar System
1
2
The grammar system in parso provides fine-grained control over Python parsing, including version-specific grammars, error detection, code refactoring, and caching mechanisms. This is the foundation layer that powers the high-level parsing functions.
3
4
## Capabilities
5
6
### Grammar Classes
7
8
Core grammar classes that handle the parsing logic and provide access to advanced parsing features.
9
10
```python { .api }
11
class Grammar:
12
"""
13
Generic grammar class for parsing languages.
14
15
Attributes:
16
version_info (PythonVersionInfo): Python version information
17
"""
18
19
def parse(self, code=None, *, error_recovery=True, path=None, start_symbol=None,
20
cache=False, diff_cache=False, cache_path=None, file_io=None):
21
"""
22
Parse code using this grammar.
23
24
Args:
25
code (str | bytes, optional): Source code to parse
26
error_recovery (bool): Enable error recovery (default: True)
27
path (str | Path, optional): File path for caching
28
start_symbol (str, optional): Grammar start symbol (default: 'file_input')
29
cache (bool): Enable pickle caching (default: False)
30
diff_cache (bool): Enable differential caching (default: False)
31
cache_path (str | Path, optional): Custom cache directory
32
file_io (FileIO, optional): File I/O handler
33
34
Returns:
35
NodeOrLeaf: Parsed syntax tree (typically Module)
36
37
Raises:
38
TypeError: If neither code nor path provided
39
NotImplementedError: If error_recovery used with non-default start_symbol
40
ParserSyntaxError: If parsing fails and error_recovery is False
41
"""
42
43
def iter_errors(self, node):
44
"""
45
Find syntax and semantic errors in a parsed tree.
46
47
Args:
48
node (NodeOrLeaf): Root node to check for errors
49
50
Yields:
51
Issue: Error objects with position and message information
52
53
Raises:
54
ValueError: If no error normalizer configured for this grammar
55
"""
56
57
def refactor(self, base_node, node_to_str_map):
58
"""
59
Refactor code by replacing nodes with new strings.
60
61
Args:
62
base_node (NodeOrLeaf): Root node to refactor
63
node_to_str_map (dict): Mapping of nodes to replacement strings
64
65
Returns:
66
str: Refactored code
67
"""
68
```
69
70
```python { .api }
71
class PythonGrammar(Grammar):
72
"""
73
Python-specific grammar implementation with tokenization and error detection.
74
75
Attributes:
76
version_info (PythonVersionInfo): Python version for this grammar
77
"""
78
79
def __init__(self, version_info, bnf_text):
80
"""
81
Initialize Python grammar.
82
83
Args:
84
version_info (PythonVersionInfo): Python version information
85
bnf_text (str): BNF grammar definition
86
"""
87
```
88
89
#### Usage Examples
90
91
```python
92
import parso
93
94
# Load and use grammar directly
95
grammar = parso.load_grammar(version="3.9")
96
97
# Parse with advanced options
98
module = grammar.parse(
99
'def example(): return 42',
100
error_recovery=True,
101
cache=True,
102
diff_cache=True
103
)
104
105
# Parse from file with custom start symbol
106
# Note: start_symbol only works with error_recovery=False
107
try:
108
expr = grammar.parse(
109
'1 + 2 * 3',
110
error_recovery=False,
111
start_symbol='expr'
112
)
113
except NotImplementedError:
114
# start_symbol requires error_recovery=False
115
expr = grammar.parse('1 + 2 * 3', error_recovery=False, start_symbol='expr')
116
117
# Check version information
118
print(f"Grammar version: {grammar.version_info.major}.{grammar.version_info.minor}")
119
```
120
121
### Error Detection
122
123
Advanced error detection and analysis capabilities for finding syntax and semantic issues.
124
125
```python { .api }
126
def iter_errors(self, node):
127
"""
128
Generator yielding error objects for syntax and semantic issues.
129
130
Args:
131
node (NodeOrLeaf): Parsed tree to analyze
132
133
Yields:
134
Issue: Error objects with message, code, and position information
135
"""
136
```
137
138
#### Usage Examples
139
140
```python
141
import parso
142
143
grammar = parso.load_grammar()
144
145
# Parse code with multiple errors
146
code = '''
147
def function(: # Missing parameter name
148
x = 1 + # Incomplete expression
149
return x
150
151
continue # Continue outside loop
152
'''
153
154
module = grammar.parse(code)
155
errors = list(grammar.iter_errors(module))
156
157
for error in errors:
158
print(f"Line {error.start_pos[0]}: {error.message}")
159
print(f"Error code: {error.code}")
160
print(f"At position: {error.start_pos}")
161
162
# Handle specific error types
163
syntax_errors = [e for e in errors if 'SyntaxError' in e.message]
164
semantic_errors = [e for e in errors if 'continue' in e.message.lower()]
165
```
166
167
### Code Refactoring
168
169
Refactor parsed code by replacing specific nodes with new content while preserving formatting.
170
171
```python { .api }
172
def refactor(self, base_node, node_to_str_map):
173
"""
174
Apply refactoring transformations to code.
175
176
Args:
177
base_node (NodeOrLeaf): Root node containing code to refactor
178
node_to_str_map (dict): Mapping from nodes to replacement strings
179
180
Returns:
181
str: Refactored source code with replacements applied
182
"""
183
```
184
185
#### Usage Examples
186
187
```python
188
import parso
189
190
grammar = parso.load_grammar()
191
module = grammar.parse('''
192
def old_function_name():
193
old_variable = 42
194
return old_variable
195
''')
196
197
# Find nodes to replace
198
function_node = module.children[0] # Function definition
199
func_name = function_node.name # Function name
200
suite = function_node.get_suite()
201
202
# Find variable nodes within the function
203
old_var_nodes = []
204
for name_node in module.get_used_names()['old_variable']:
205
if name_node.get_definition(): # Only definition, not usage
206
old_var_nodes.append(name_node)
207
208
# Create refactoring map
209
refactor_map = {
210
func_name: 'new_function_name',
211
}
212
213
# Apply refactoring
214
refactored_code = grammar.refactor(module, refactor_map)
215
print(refactored_code)
216
```
217
218
### Grammar Options and Configuration
219
220
Advanced parsing options for specific use cases and performance tuning.
221
222
#### Cache Configuration
223
224
```python
225
import parso
226
from pathlib import Path
227
228
grammar = parso.load_grammar()
229
230
# Custom cache directory
231
custom_cache = Path.home() / '.my_parso_cache'
232
module = grammar.parse(
233
path='script.py',
234
cache=True,
235
cache_path=custom_cache
236
)
237
238
# Differential caching for incremental parsing
239
module = grammar.parse(
240
path='large_file.py',
241
cache=True,
242
diff_cache=True # Only re-parse changed sections
243
)
244
```
245
246
#### Start Symbol Parsing
247
248
Parse specific grammar constructs instead of full modules:
249
250
```python
251
import parso
252
253
grammar = parso.load_grammar()
254
255
# Parse just an expression (requires error_recovery=False)
256
expr = grammar.parse('x + y * z', error_recovery=False, start_symbol='expr')
257
print(type(expr).__name__) # Should be expression node type
258
259
# Parse a statement
260
stmt = grammar.parse('x = 42', error_recovery=False, start_symbol='stmt')
261
262
# Parse function definition
263
func = grammar.parse(
264
'def example(a, b=None): return a + b',
265
error_recovery=False,
266
start_symbol='funcdef'
267
)
268
```
269
270
### Error Recovery vs Strict Parsing
271
272
Understanding when to use error recovery and when to require valid syntax.
273
274
#### Error Recovery Mode (Default)
275
276
```python
277
import parso
278
279
grammar = parso.load_grammar()
280
281
# Error recovery allows parsing of broken code
282
broken_code = '''
283
def function_with_syntax_error(:
284
pass
285
286
class MissingColon
287
pass
288
289
for item in # Missing iterable
290
print(item)
291
'''
292
293
# This succeeds and returns a tree with error nodes
294
module = grammar.parse(broken_code, error_recovery=True)
295
print(f"Parsed {len(module.children)} top-level items")
296
297
# Check for errors
298
errors = list(grammar.iter_errors(module))
299
print(f"Found {len(errors)} errors")
300
```
301
302
#### Strict Parsing Mode
303
304
```python
305
import parso
306
307
grammar = parso.load_grammar()
308
309
# Strict mode raises exceptions on syntax errors
310
try:
311
module = grammar.parse('def invalid(: pass', error_recovery=False)
312
except parso.ParserSyntaxError as e:
313
print(f"Parse failed: {e.message}")
314
print(f"Error at: {e.error_leaf.start_pos}")
315
316
# Use strict mode for validation
317
def validate_python_code(code):
318
"""Check if Python code is syntactically valid."""
319
try:
320
grammar = parso.load_grammar()
321
grammar.parse(code, error_recovery=False)
322
return True, None
323
except parso.ParserSyntaxError as e:
324
return False, str(e)
325
326
is_valid, error_msg = validate_python_code('def hello(): return "world"')
327
print(f"Valid: {is_valid}") # True
328
329
is_valid, error_msg = validate_python_code('def broken(: pass')
330
print(f"Valid: {is_valid}, Error: {error_msg}") # False, error message
331
```
332
333
### Version-Specific Grammar Features
334
335
Working with different Python versions and their specific grammar features.
336
337
```python
338
import parso
339
340
# Python 3.8 - walrus operator and positional-only parameters
341
grammar38 = parso.load_grammar(version="3.8")
342
module = grammar38.parse('''
343
def func(pos_only, /, normal, *, kw_only):
344
if (result := expensive_operation()) is not None:
345
return result
346
''')
347
348
# Python 3.10 - match statements and union types
349
grammar310 = parso.load_grammar(version="3.10")
350
module = grammar310.parse('''
351
def process(value: int | str) -> str:
352
match value:
353
case int() if value > 0:
354
return "positive integer"
355
case str() if value:
356
return "non-empty string"
357
case _:
358
return "other"
359
''')
360
361
# Version compatibility checking
362
def parse_with_fallback(code, preferred_version="3.10"):
363
"""Parse code, falling back to older versions if needed."""
364
versions = ["3.10", "3.9", "3.8", "3.7", "3.6"]
365
start_idx = versions.index(preferred_version) if preferred_version in versions else 0
366
367
for version in versions[start_idx:]:
368
try:
369
grammar = parso.load_grammar(version=version)
370
return grammar.parse(code, error_recovery=False), version
371
except (parso.ParserSyntaxError, NotImplementedError):
372
continue
373
374
# Fall back to error recovery mode with latest version
375
grammar = parso.load_grammar(version="3.10")
376
return grammar.parse(code, error_recovery=True), "3.10-recovery"
377
378
# Usage
379
result, version_used = parse_with_fallback('match x: case 1: pass')
380
print(f"Parsed with Python {version_used}")
381
```
382
383
## Advanced Integration Patterns
384
385
### Grammar Caching and Reuse
386
387
```python
388
import parso
389
390
class ParserManager:
391
"""Manage multiple grammars efficiently."""
392
393
def __init__(self):
394
self._grammars = {}
395
396
def get_grammar(self, version="3.9"):
397
"""Get cached grammar instance."""
398
if version not in self._grammars:
399
self._grammars[version] = parso.load_grammar(version=version)
400
return self._grammars[version]
401
402
def parse_file(self, path, version="3.9", **kwargs):
403
"""Parse file with cached grammar."""
404
grammar = self.get_grammar(version)
405
return grammar.parse(path=path, **kwargs)
406
407
# Usage
408
manager = ParserManager()
409
module1 = manager.parse_file("file1.py", cache=True)
410
module2 = manager.parse_file("file2.py", cache=True) # Reuses grammar
411
```
412
413
### Custom Error Handling
414
415
```python
416
import parso
417
418
def detailed_error_analysis(code, version="3.9"):
419
"""Comprehensive error analysis with categorization."""
420
grammar = parso.load_grammar(version=version)
421
module = grammar.parse(code)
422
errors = list(grammar.iter_errors(module))
423
424
categorized = {
425
'syntax': [],
426
'indentation': [],
427
'semantic': []
428
}
429
430
for error in errors:
431
message = error.message.lower()
432
if 'indentation' in message or 'indent' in message:
433
categorized['indentation'].append(error)
434
elif 'syntax' in message:
435
categorized['syntax'].append(error)
436
else:
437
categorized['semantic'].append(error)
438
439
return categorized, module
440
441
# Usage
442
errors, tree = detailed_error_analysis('''
443
def function():
444
pass # Wrong indentation
445
continue # Semantic error
446
def invalid(: pass # Syntax error
447
''')
448
449
for category, error_list in errors.items():
450
if error_list:
451
print(f"{category.title()} errors: {len(error_list)}")
452
for error in error_list:
453
print(f" Line {error.start_pos[0]}: {error.message}")
454
```