0
# Position Utilities
1
2
Utilities for converting between different position representations (line/column vs character offsets) and working with source code positions. These utilities handle the complexities of Unicode text and provide compatibility across different position systems.
3
4
## Capabilities
5
6
### LineNumbers Class
7
8
Utility class for converting between character offsets and (line, column) positions in source text.
9
10
```python { .api }
11
class LineNumbers:
12
def __init__(self, text):
13
"""
14
Initialize with source text for position calculations.
15
16
Parameters:
17
- text (str): Source text to analyze
18
"""
19
20
def line_to_offset(self, line, column) -> int:
21
"""
22
Convert line and column position to character offset.
23
24
Parameters:
25
- line (int): Line number (1-based)
26
- column (int): Column position (0-based)
27
28
Returns:
29
int: Character offset in source text
30
"""
31
32
def offset_to_line(self, offset) -> Tuple[int, int]:
33
"""
34
Convert character offset to line and column position.
35
36
Parameters:
37
- offset (int): Character offset in source text
38
39
Returns:
40
Tuple[int, int]: (line, column) where line is 1-based, column is 0-based
41
"""
42
43
def from_utf8_col(self, line, utf8_column) -> int:
44
"""
45
Convert UTF8 byte column to Unicode character column.
46
47
Parameters:
48
- line (int): Line number (1-based)
49
- utf8_column (int): Column position in UTF8 bytes
50
51
Returns:
52
int: Column position in Unicode characters
53
"""
54
```
55
56
#### Usage Example
57
58
```python
59
import asttokens
60
61
source = "hello = 'world'\nprint(hello)"
62
line_numbers = asttokens.LineNumbers(source)
63
64
# Convert position to offset
65
offset = line_numbers.line_to_offset(1, 8) # Line 1, column 8
66
print(source[offset]) # '=' (character at that position)
67
68
# Convert offset to position
69
line, col = line_numbers.offset_to_line(16) # Character 16
70
print(f"Line {line}, Column {col}") # Line 2, Column 0
71
72
# Handle UTF8 encoding differences
73
source_utf8 = "café = 'délicious'"
74
line_numbers_utf8 = asttokens.LineNumbers(source_utf8)
75
unicode_col = line_numbers_utf8.from_utf8_col(1, 5) # UTF8 byte 5
76
print(unicode_col) # Unicode character position
77
```
78
79
### Tokenless Support Detection
80
81
Function to determine if nodes or Python versions support faster tokenless operations.
82
83
```python { .api }
84
def supports_tokenless(node=None) -> bool:
85
"""
86
Check if node or Python version supports tokenless operation.
87
88
Parameters:
89
- node (ast.AST, optional): Specific AST node to check
90
91
Returns:
92
bool: True if tokenless operation is supported
93
"""
94
```
95
96
#### Usage Example
97
98
```python
99
import asttokens
100
import ast
101
102
source = "x = [1, 2, 3]"
103
tree = ast.parse(source)
104
105
# Check general tokenless support
106
if asttokens.supports_tokenless():
107
print("Python version supports tokenless operations")
108
109
# Check specific node support
110
assign_node = tree.body[0]
111
if asttokens.supports_tokenless(assign_node):
112
print("This node supports tokenless operations")
113
# Use ASTText for better performance
114
astext = asttokens.ASTText(source, tree=tree)
115
text = astext.get_text(assign_node)
116
else:
117
print("Node requires full tokenization")
118
# Use ASTTokens
119
atok = asttokens.ASTTokens(source, tree=tree)
120
text = atok.get_text(assign_node)
121
```
122
123
### Token Utility Functions
124
125
Helper functions for working with tokens are available through the `asttokens.util` module. These provide token matching, type checking, and generation capabilities.
126
127
```python { .api }
128
# Available through asttokens.util module
129
from asttokens.util import (
130
token_repr, match_token, expect_token, is_non_coding_token,
131
generate_tokens, patched_generate_tokens
132
)
133
134
def token_repr(tok_type, string) -> str:
135
"""
136
Create human-readable representation of token.
137
138
Parameters:
139
- tok_type (int): Token type from token module
140
- string (str): Token string content
141
142
Returns:
143
str: Human-friendly token representation
144
"""
145
146
def match_token(token, tok_type, tok_str=None) -> bool:
147
"""
148
Check if token matches specified type and optionally string.
149
150
Parameters:
151
- token (Token): Token to check
152
- tok_type (int): Expected token type
153
- tok_str (str, optional): Expected token string
154
155
Returns:
156
bool: True if token matches criteria
157
"""
158
159
def expect_token(token, tok_type, tok_str=None):
160
"""
161
Validate that token matches expected type/string, raise if not.
162
163
Parameters:
164
- token (Token): Token to validate
165
- tok_type (int): Expected token type
166
- tok_str (str, optional): Expected token string
167
168
Raises:
169
ValueError: If token doesn't match expectations
170
"""
171
172
def is_non_coding_token(token_type) -> bool:
173
"""
174
Check if token type represents non-coding content.
175
176
Parameters:
177
- token_type (int): Token type to check
178
179
Returns:
180
bool: True for comments, newlines, encoding declarations
181
"""
182
183
def generate_tokens(text) -> Iterator[Token]:
184
"""
185
Generate enhanced Token objects from source text.
186
187
Parameters:
188
- text (str): Source code to tokenize
189
190
Yields:
191
Token: Enhanced token with position information
192
"""
193
194
def patched_generate_tokens(original_tokens) -> Iterator[Token]:
195
"""
196
Fixed tokenizer that handles non-ASCII identifiers correctly.
197
198
Parameters:
199
- original_tokens (Iterator): Original token stream
200
201
Yields:
202
Token: Corrected tokens with proper handling
203
"""
204
```
205
206
#### Usage Example
207
208
```python
209
import asttokens
210
import asttokens.util
211
import token
212
213
source = "name = 'value' # comment"
214
atok = asttokens.ASTTokens(source, parse=True)
215
216
# Get first token
217
first_token = atok.tokens[0]
218
219
# Check token matching
220
if asttokens.util.match_token(first_token, token.NAME, 'name'):
221
print("Found 'name' token")
222
223
# Create readable representation
224
repr_str = asttokens.util.token_repr(first_token.type, first_token.string)
225
print(repr_str) # "NAME:'name'"
226
227
# Check for non-coding tokens
228
for tok in atok.tokens:
229
if asttokens.util.is_non_coding_token(tok.type):
230
print(f"Non-coding token: {tok.string}")
231
232
# Generate tokens manually
233
tokens = list(asttokens.util.generate_tokens("x = 1"))
234
print([f"{t.type}:{t.string}" for t in tokens])
235
```
236
237
### Enhanced Token Class
238
239
The Token class provides rich position information for each token.
240
241
```python { .api }
242
class Token:
243
"""
244
Enhanced token representation with comprehensive position information.
245
246
Attributes:
247
- type (int): Token type from token module
248
- string (str): Token text content
249
- start (Tuple[int, int]): Starting (row, column) position
250
- end (Tuple[int, int]): Ending (row, column) position
251
- line (str): Complete line text containing this token
252
- index (int): Token index in token list
253
- startpos (int): Starting character offset
254
- endpos (int): Ending character offset
255
"""
256
257
def __str__(self) -> str:
258
"""
259
Human-readable token representation.
260
261
Returns:
262
str: String representation of token
263
"""
264
```
265
266
#### Usage Example
267
268
```python
269
import asttokens
270
271
source = "def func():\n pass"
272
atok = asttokens.ASTTokens(source, parse=True)
273
274
# Examine token details
275
def_token = atok.tokens[0]
276
print(f"Type: {def_token.type}") # Token type number
277
print(f"String: {def_token.string}") # 'def'
278
print(f"Start: {def_token.start}") # (1, 0) - line 1, column 0
279
print(f"End: {def_token.end}") # (1, 3) - line 1, column 3
280
print(f"Line: {def_token.line}") # 'def func():'
281
print(f"Index: {def_token.index}") # 0 - first token
282
print(f"Start pos: {def_token.startpos}") # 0 - character offset 0
283
print(f"End pos: {def_token.endpos}") # 3 - character offset 3
284
print(f"Repr: {def_token}") # Human-readable representation
285
286
# Use position information
287
text_slice = source[def_token.startpos:def_token.endpos]
288
print(text_slice) # 'def' - exact token text
289
```