0
# AST and Object Models
1
2
Work with abstract syntax trees and structured parse results, including node creation, traversal, manipulation, and conversion to custom object models. TatSu provides flexible AST representation and object model generation capabilities.
3
4
## Capabilities
5
6
### AST Node Classes
7
8
Foundation classes for representing parse results as structured data with position information and manipulation methods.
9
10
```python { .api }
11
class AST(dict):
12
"""
13
Abstract syntax tree node, dictionary-based with parse info.
14
15
Features:
16
- Dictionary-based storage for flexible data access
17
- Parse position information tracking
18
- JSON serialization support
19
- Immutable freezing for optimization
20
- Automatic string representation
21
"""
22
23
@property
24
def frozen(self):
25
"""Check if AST is frozen (immutable)."""
26
27
@property
28
def parseinfo(self):
29
"""Get parse position information for this node."""
30
31
def copy(self):
32
"""
33
Create a deep copy of the AST node.
34
35
Returns:
36
AST: Deep copy of the node with all child nodes copied
37
"""
38
39
def asjson(self):
40
"""
41
Convert AST to JSON-serializable representation.
42
43
Returns:
44
dict: JSON-serializable dictionary representation
45
"""
46
47
def set_parseinfo(self, parseinfo):
48
"""
49
Set parse position information for this node.
50
51
Parameters:
52
- parseinfo (ParseInfo): Parse position and context information
53
"""
54
55
def _set(self, key, value):
56
"""Set a value, handling frozen state."""
57
58
def _setlist(self, key, values):
59
"""Set a list value, handling frozen state."""
60
```
61
62
Usage example:
63
64
```python
65
import tatsu
66
from tatsu.ast import AST
67
68
grammar = '''
69
expr = term ("+" term)*;
70
term = number;
71
number = /\d+/;
72
'''
73
74
model = tatsu.compile(grammar)
75
result = model.parse("1 + 2 + 3")
76
77
# AST is a dictionary-like structure
78
print(isinstance(result, AST)) # True
79
print(result.keys()) # Access keys like a dictionary
80
print(result.asjson()) # Convert to JSON
81
82
# Access parse information if enabled
83
if hasattr(result, 'parseinfo') and result.parseinfo:
84
info = result.parseinfo
85
print(f"Parsed rule: {info.rule}")
86
print(f"Position: {info.pos}-{info.endpos}")
87
```
88
89
### Node-Based Object Model
90
91
Structured node classes with parent-child relationships, position tracking, and typed representations.
92
93
```python { .api }
94
class Node:
95
"""
96
Base parse tree node with parent/child relationships.
97
98
Features:
99
- Parent/child relationship tracking
100
- Source position and line information
101
- Text content and comment preservation
102
- Tree traversal and manipulation methods
103
- JSON serialization support
104
"""
105
106
@property
107
def parent(self):
108
"""Get parent node in the parse tree."""
109
110
@property
111
def line(self):
112
"""Get starting line number (1-based)."""
113
114
@property
115
def endline(self):
116
"""Get ending line number (1-based)."""
117
118
@property
119
def col(self):
120
"""Get starting column number (1-based)."""
121
122
@property
123
def context(self):
124
"""Get parsing context information."""
125
126
@property
127
def text(self):
128
"""Get source text for this node."""
129
130
@property
131
def comments(self):
132
"""Get associated comments."""
133
134
def children(self):
135
"""
136
Get all child nodes in the parse tree.
137
138
Returns:
139
list: List of direct child nodes
140
"""
141
142
def children_list(self):
143
"""
144
Get flattened list of all child nodes.
145
146
Returns:
147
list: Flattened list including nested children
148
"""
149
150
def children_set(self):
151
"""
152
Get set of all child nodes (no duplicates).
153
154
Returns:
155
set: Set of all child nodes
156
"""
157
158
def text_lines(self):
159
"""
160
Get source text lines for this node.
161
162
Returns:
163
list: List of source text lines covered by this node
164
"""
165
166
def line_index(self):
167
"""
168
Get line index information for this node.
169
170
Returns:
171
LineInfo: Detailed line position information
172
"""
173
174
def asjson(self):
175
"""
176
Convert node to JSON-serializable representation.
177
178
Returns:
179
dict: JSON representation of the node and its children
180
"""
181
```
182
183
Usage example:
184
185
```python
186
import tatsu
187
from tatsu.semantics import ModelBuilderSemantics
188
from tatsu.objectmodel import Node
189
190
grammar = '''
191
program = statement*;
192
statement = assignment | expression;
193
assignment = identifier "=" expression;
194
expression = identifier | number;
195
identifier = /[a-zA-Z][a-zA-Z0-9]*/;
196
number = /\d+/;
197
'''
198
199
# Use ModelBuilderSemantics to create Node objects
200
model = tatsu.compile(grammar)
201
result = model.parse("x = 42", semantics=ModelBuilderSemantics())
202
203
# Work with Node objects
204
print(isinstance(result, Node)) # True
205
print(result.children()) # Get child nodes
206
print(result.text) # Get source text
207
208
# Tree traversal
209
def print_tree(node, depth=0):
210
indent = " " * depth
211
print(f"{indent}{node.__class__.__name__}: {node}")
212
for child in node.children():
213
if isinstance(child, Node):
214
print_tree(child, depth + 1)
215
216
print_tree(result)
217
```
218
219
### Custom Object Models
220
221
Create domain-specific object models with typed nodes and custom behavior.
222
223
```python { .api }
224
# Custom node base class
225
class CustomNode(Node):
226
"""Custom base node with additional functionality."""
227
228
def __init__(self, **kwargs):
229
super().__init__(**kwargs)
230
self._validate()
231
232
def _validate(self):
233
"""Override to add validation logic."""
234
pass
235
236
def __repr__(self):
237
"""Custom string representation."""
238
return f"{self.__class__.__name__}({dict(self)})"
239
240
# Example domain-specific nodes
241
class Expression(CustomNode):
242
"""Base class for all expressions."""
243
pass
244
245
class BinaryOperation(Expression):
246
"""Binary operation with left/right operands."""
247
248
def __init__(self, left=None, operator=None, right=None, **kwargs):
249
super().__init__(**kwargs)
250
self.left = left
251
self.operator = operator
252
self.right = right
253
254
def evaluate(self, context=None):
255
"""Evaluate the binary operation."""
256
left_val = self.left.evaluate(context) if hasattr(self.left, 'evaluate') else self.left
257
right_val = self.right.evaluate(context) if hasattr(self.right, 'evaluate') else self.right
258
259
if self.operator == '+':
260
return left_val + right_val
261
elif self.operator == '*':
262
return left_val * right_val
263
# Add more operators as needed
264
265
class Literal(Expression):
266
"""Literal value expression."""
267
268
def __init__(self, value=None, **kwargs):
269
super().__init__(**kwargs)
270
self.value = value
271
272
def evaluate(self, context=None):
273
"""Return the literal value."""
274
return self.value
275
```
276
277
### AST Transformation and Manipulation
278
279
Transform and modify AST structures for optimization, analysis, and code generation.
280
281
```python { .api }
282
class ASTTransformer:
283
"""Base class for AST transformation operations."""
284
285
def transform(self, ast):
286
"""
287
Transform an AST node and its children.
288
289
Parameters:
290
- ast: AST node to transform
291
292
Returns:
293
Transformed AST node
294
"""
295
296
def visit(self, node):
297
"""Visit a single node for transformation."""
298
method_name = f'visit_{node.__class__.__name__}'
299
visitor = getattr(self, method_name, self.generic_visit)
300
return visitor(node)
301
302
def generic_visit(self, node):
303
"""Default visitor that processes children."""
304
if isinstance(node, Node):
305
for child in node.children():
306
self.visit(child)
307
return node
308
309
# Example transformations
310
class ConstantFolding(ASTTransformer):
311
"""Fold constant expressions at compile time."""
312
313
def visit_BinaryOperation(self, node):
314
# First transform children
315
node.left = self.visit(node.left)
316
node.right = self.visit(node.right)
317
318
# If both operands are literals, fold the operation
319
if isinstance(node.left, Literal) and isinstance(node.right, Literal):
320
if node.operator == '+':
321
return Literal(value=node.left.value + node.right.value)
322
elif node.operator == '*':
323
return Literal(value=node.left.value * node.right.value)
324
325
return node
326
327
class DeadCodeElimination(ASTTransformer):
328
"""Remove unreachable code."""
329
330
def visit_ConditionalStatement(self, node):
331
node.condition = self.visit(node.condition)
332
333
# If condition is a constant, eliminate dead branch
334
if isinstance(node.condition, Literal):
335
if node.condition.value:
336
return self.visit(node.then_branch)
337
else:
338
return self.visit(node.else_branch) if node.else_branch else None
339
340
return node
341
```
342
343
### AST Analysis and Validation
344
345
Analyze AST structures for semantic correctness, type checking, and code quality.
346
347
```python { .api }
348
class ASTAnalyzer:
349
"""Base class for AST analysis operations."""
350
351
def __init__(self):
352
self.errors = []
353
self.warnings = []
354
355
def analyze(self, ast):
356
"""
357
Analyze an AST for various properties.
358
359
Parameters:
360
- ast: AST node to analyze
361
362
Returns:
363
AnalysisResult: Results of the analysis
364
"""
365
self.visit(ast)
366
return AnalysisResult(self.errors, self.warnings)
367
368
def visit(self, node):
369
"""Visit a node for analysis."""
370
method_name = f'visit_{node.__class__.__name__}'
371
visitor = getattr(self, method_name, self.generic_visit)
372
return visitor(node)
373
374
def generic_visit(self, node):
375
"""Default visitor that processes children."""
376
if isinstance(node, Node):
377
for child in node.children():
378
self.visit(child)
379
380
class TypeChecker(ASTAnalyzer):
381
"""Type checking analyzer."""
382
383
def __init__(self):
384
super().__init__()
385
self.symbol_table = {}
386
387
def visit_BinaryOperation(self, node):
388
left_type = self.get_type(node.left)
389
right_type = self.get_type(node.right)
390
391
if left_type != right_type:
392
self.errors.append(f"Type mismatch: {left_type} {node.operator} {right_type}")
393
394
def get_type(self, node):
395
"""Infer the type of a node."""
396
if isinstance(node, Literal):
397
return type(node.value).__name__
398
# Add more type inference logic
399
return 'unknown'
400
401
class AnalysisResult:
402
"""Results of AST analysis."""
403
404
def __init__(self, errors, warnings):
405
self.errors = errors
406
self.warnings = warnings
407
408
@property
409
def is_valid(self):
410
"""Check if analysis found no errors."""
411
return len(self.errors) == 0
412
```
413
414
### Compatibility and Legacy Support
415
416
```python { .api }
417
# Backward compatibility alias
418
ParseModel = Node
419
420
# Legacy AST conversion
421
def ast_to_model(ast_node):
422
"""Convert legacy AST to Node-based model."""
423
if isinstance(ast_node, AST):
424
# Convert AST dict to Node object
425
node = Node()
426
for key, value in ast_node.items():
427
if isinstance(value, (AST, list)):
428
setattr(node, key, ast_to_model(value))
429
else:
430
setattr(node, key, value)
431
return node
432
elif isinstance(ast_node, list):
433
return [ast_to_model(item) for item in ast_node]
434
else:
435
return ast_node
436
437
def model_to_ast(node):
438
"""Convert Node-based model to legacy AST."""
439
if isinstance(node, Node):
440
result = AST()
441
for key, value in node.__dict__.items():
442
if not key.startswith('_'):
443
result[key] = model_to_ast(value)
444
return result
445
elif isinstance(node, list):
446
return [model_to_ast(item) for item in node]
447
else:
448
return node
449
```
450
451
## Integration Examples
452
453
### Using AST with Semantic Actions
454
455
```python
456
class ASTBuildingSemantics:
457
"""Build custom AST during parsing."""
458
459
def binary_expr(self, ast):
460
left, ops = ast[0], ast[1]
461
result = left
462
463
for op, right in ops:
464
result = BinaryOperation(
465
left=result,
466
operator=op,
467
right=right
468
)
469
return result
470
471
def number(self, ast):
472
return Literal(value=int(ast))
473
474
# Usage
475
model = tatsu.compile(grammar)
476
result = model.parse("2 + 3 * 4", semantics=ASTBuildingSemantics())
477
478
# Transform and analyze
479
transformer = ConstantFolding()
480
optimized = transformer.transform(result)
481
482
analyzer = TypeChecker()
483
analysis = analyzer.analyze(optimized)
484
485
if analysis.is_valid:
486
print("AST is valid")
487
print(f"Result: {optimized.evaluate()}")
488
```