0
# Core AST Processing
1
2
Main classes for annotating AST trees with source code positions and extracting text from AST nodes. These provide the primary functionality for mapping between AST structures and their corresponding source code.
3
4
## Capabilities
5
6
### ASTTokens Class
7
8
The primary class for working with tokenized source code and AST nodes. It maintains source code in multiple forms and marks AST nodes with token information.
9
10
```python { .api }
11
class ASTTokens:
12
def __init__(self, source_text, parse=False, tree=None, filename='<unknown>', tokens=None):
13
"""
14
Initialize ASTTokens with source code.
15
16
Parameters:
17
- source_text (str): Unicode or UTF8-encoded source code
18
- parse (bool): If True, parses source_text with ast.parse()
19
- tree (ast.Module, optional): AST tree to annotate with tokens
20
- filename (str): Filename for error reporting
21
- tokens (Iterable, optional): Pre-generated tokens
22
"""
23
24
@property
25
def text(self) -> str:
26
"""The source code passed to constructor."""
27
28
@property
29
def tokens(self) -> List[Token]:
30
"""List of tokens from source code."""
31
32
@property
33
def tree(self) -> Optional[ast.Module]:
34
"""Root AST tree (parsed or provided)."""
35
36
@property
37
def filename(self) -> str:
38
"""Filename that was parsed."""
39
40
def mark_tokens(self, root_node):
41
"""
42
Mark AST nodes with .first_token and .last_token attributes.
43
44
Parameters:
45
- root_node (ast.Module): Root of AST tree to mark
46
"""
47
48
def get_text(self, node, padded=True) -> str:
49
"""
50
Get source text corresponding to AST node.
51
52
Parameters:
53
- node (ast.AST): AST node to get text for
54
- padded (bool): Include leading whitespace for multiline statements
55
56
Returns:
57
str: Source text for the node, empty string for nodes like Load
58
"""
59
60
def get_text_range(self, node, padded=True) -> Tuple[int, int]:
61
"""
62
Get character positions in source text for AST node.
63
64
Parameters:
65
- node (ast.AST): AST node to get range for
66
- padded (bool): Include leading whitespace for multiline statements
67
68
Returns:
69
Tuple[int, int]: (startpos, endpos) character offsets
70
"""
71
72
def get_text_positions(self, node, padded) -> Tuple[Tuple[int, int], Tuple[int, int]]:
73
"""
74
Get line/column positions for AST node.
75
76
Parameters:
77
- node (ast.AST): AST node to get positions for
78
- padded (bool): Include leading whitespace for multiline statements
79
80
Returns:
81
Tuple[Tuple[int, int], Tuple[int, int]]: ((start_line, start_col), (end_line, end_col))
82
"""
83
```
84
85
#### Usage Example
86
87
```python
88
import asttokens
89
import ast
90
91
# Parse and annotate source code
92
source = "def hello(name):\n return f'Hello, {name}!'"
93
atok = asttokens.ASTTokens(source, parse=True)
94
95
# Find function definition node
96
func_node = atok.tree.body[0]
97
print(atok.get_text(func_node)) # Gets full function text
98
99
# Get specific parts
100
name_param = func_node.args.args[0]
101
print(atok.get_text(name_param)) # Gets just 'name'
102
103
# Access token attributes added by mark_tokens
104
print(func_node.first_token.string) # 'def'
105
print(func_node.last_token.string) # "'"
106
```
107
108
### ASTText Class
109
110
Performance-optimized alternative that uses AST position information when available, falling back to full tokenization only when necessary.
111
112
```python { .api }
113
class ASTText:
114
def __init__(self, source_text, tree=None, filename='<unknown>'):
115
"""
116
Initialize ASTText with source code.
117
118
Parameters:
119
- source_text (str): Source code to analyze
120
- tree (ast.Module, optional): Parsed AST tree, will parse if not provided
121
- filename (str): Filename for error reporting
122
"""
123
124
@property
125
def tree(self) -> ast.Module:
126
"""AST tree (parsed if not provided)."""
127
128
@property
129
def asttokens(self) -> ASTTokens:
130
"""Fallback ASTTokens instance (lazy-loaded)."""
131
132
def get_text(self, node, padded=True) -> str:
133
"""
134
Get source text for AST node (same interface as ASTTokens).
135
136
Parameters:
137
- node (ast.AST): AST node to get text for
138
- padded (bool): Include leading whitespace for multiline statements
139
140
Returns:
141
str: Source text for the node
142
"""
143
144
def get_text_range(self, node, padded=True) -> Tuple[int, int]:
145
"""
146
Get character positions for AST node (same interface as ASTTokens).
147
148
Parameters:
149
- node (ast.AST): AST node to get range for
150
- padded (bool): Include leading whitespace for multiline statements
151
152
Returns:
153
Tuple[int, int]: (startpos, endpos) character offsets
154
"""
155
156
def get_text_positions(self, node, padded) -> Tuple[Tuple[int, int], Tuple[int, int]]:
157
"""
158
Get line/column positions for AST node (same interface as ASTTokens).
159
160
Parameters:
161
- node (ast.AST): AST node to get positions for
162
- padded (bool): Include leading whitespace for multiline statements
163
164
Returns:
165
Tuple[Tuple[int, int], Tuple[int, int]]: ((start_line, start_col), (end_line, end_col))
166
"""
167
```
168
169
#### Usage Example
170
171
```python
172
import asttokens
173
import ast
174
175
source = "x = [1, 2, 3]"
176
tree = ast.parse(source)
177
178
# Use ASTText for better performance when supported
179
astext = asttokens.ASTText(source, tree=tree)
180
181
# Same interface as ASTTokens
182
assign_node = tree.body[0]
183
print(astext.get_text(assign_node)) # 'x = [1, 2, 3]'
184
185
# Falls back to full tokenization when needed
186
if not asttokens.supports_tokenless(assign_node):
187
# Automatically uses .asttokens fallback
188
text = astext.get_text(assign_node)
189
```
190
191