Tessl Tile for pypi/asttokens@3.0.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

ast-processing.md ast-utilities.md index.md position-utilities.md

position-utilities.mddocs/

0
# Position Utilities
1

2
Utilities for converting between different position representations (line/column vs character offsets) and working with source code positions. These utilities handle the complexities of Unicode text and provide compatibility across different position systems.
3

4
## Capabilities
5

6
### LineNumbers Class
7

8
Utility class for converting between character offsets and (line, column) positions in source text.
9

10
```python { .api }
11
class LineNumbers:
12
    def __init__(self, text):
13
        """
14
        Initialize with source text for position calculations.
15
        
16
        Parameters:
17
        - text (str): Source text to analyze
18
        """
19
        
20
    def line_to_offset(self, line, column) -> int:
21
        """
22
        Convert line and column position to character offset.
23
        
24
        Parameters:
25
        - line (int): Line number (1-based)
26
        - column (int): Column position (0-based)
27
        
28
        Returns:
29
        int: Character offset in source text
30
        """
31
        
32
    def offset_to_line(self, offset) -> Tuple[int, int]:
33
        """
34
        Convert character offset to line and column position.
35
        
36
        Parameters:
37
        - offset (int): Character offset in source text
38
        
39
        Returns:
40
        Tuple[int, int]: (line, column) where line is 1-based, column is 0-based
41
        """
42
        
43
    def from_utf8_col(self, line, utf8_column) -> int:
44
        """
45
        Convert UTF8 byte column to Unicode character column.
46
        
47
        Parameters:
48
        - line (int): Line number (1-based)
49
        - utf8_column (int): Column position in UTF8 bytes
50
        
51
        Returns:
52
        int: Column position in Unicode characters
53
        """
54
```
55

56
#### Usage Example
57

58
```python
59
import asttokens
60

61
source = "hello = 'world'\nprint(hello)"
62
line_numbers = asttokens.LineNumbers(source)
63

64
# Convert position to offset
65
offset = line_numbers.line_to_offset(1, 8)  # Line 1, column 8
66
print(source[offset])  # '=' (character at that position)
67

68
# Convert offset to position  
69
line, col = line_numbers.offset_to_line(16)  # Character 16
70
print(f"Line {line}, Column {col}")  # Line 2, Column 0
71

72
# Handle UTF8 encoding differences
73
source_utf8 = "café = 'délicious'"
74
line_numbers_utf8 = asttokens.LineNumbers(source_utf8)
75
unicode_col = line_numbers_utf8.from_utf8_col(1, 5)  # UTF8 byte 5
76
print(unicode_col)  # Unicode character position
77
```
78

79
### Tokenless Support Detection
80

81
Function to determine if nodes or Python versions support faster tokenless operations.
82

83
```python { .api }
84
def supports_tokenless(node=None) -> bool:
85
    """
86
    Check if node or Python version supports tokenless operation.
87
    
88
    Parameters:
89
    - node (ast.AST, optional): Specific AST node to check
90
    
91
    Returns:
92
    bool: True if tokenless operation is supported
93
    """
94
```
95

96
#### Usage Example
97

98
```python
99
import asttokens
100
import ast
101

102
source = "x = [1, 2, 3]"
103
tree = ast.parse(source)
104

105
# Check general tokenless support
106
if asttokens.supports_tokenless():
107
    print("Python version supports tokenless operations")
108
    
109
# Check specific node support
110
assign_node = tree.body[0]
111
if asttokens.supports_tokenless(assign_node):
112
    print("This node supports tokenless operations")
113
    # Use ASTText for better performance
114
    astext = asttokens.ASTText(source, tree=tree)
115
    text = astext.get_text(assign_node)
116
else:
117
    print("Node requires full tokenization")
118
    # Use ASTTokens
119
    atok = asttokens.ASTTokens(source, tree=tree)
120
    text = atok.get_text(assign_node)
121
```
122

123
### Token Utility Functions
124

125
Helper functions for working with tokens are available through the `asttokens.util` module. These provide token matching, type checking, and generation capabilities.
126

127
```python { .api }
128
# Available through asttokens.util module
129
from asttokens.util import (
130
    token_repr, match_token, expect_token, is_non_coding_token,
131
    generate_tokens, patched_generate_tokens
132
)
133

134
def token_repr(tok_type, string) -> str:
135
    """
136
    Create human-readable representation of token.
137
    
138
    Parameters:
139
    - tok_type (int): Token type from token module
140
    - string (str): Token string content
141
    
142
    Returns:
143
    str: Human-friendly token representation
144
    """
145

146
def match_token(token, tok_type, tok_str=None) -> bool:
147
    """
148
    Check if token matches specified type and optionally string.
149
    
150
    Parameters:
151
    - token (Token): Token to check
152
    - tok_type (int): Expected token type
153
    - tok_str (str, optional): Expected token string
154
    
155
    Returns:
156
    bool: True if token matches criteria
157
    """
158

159
def expect_token(token, tok_type, tok_str=None):
160
    """
161
    Validate that token matches expected type/string, raise if not.
162
    
163
    Parameters:
164
    - token (Token): Token to validate
165
    - tok_type (int): Expected token type
166
    - tok_str (str, optional): Expected token string
167
    
168
    Raises:
169
    ValueError: If token doesn't match expectations
170
    """
171

172
def is_non_coding_token(token_type) -> bool:
173
    """
174
    Check if token type represents non-coding content.
175
    
176
    Parameters:
177
    - token_type (int): Token type to check
178
    
179
    Returns:
180
    bool: True for comments, newlines, encoding declarations
181
    """
182

183
def generate_tokens(text) -> Iterator[Token]:
184
    """
185
    Generate enhanced Token objects from source text.
186
    
187
    Parameters:
188
    - text (str): Source code to tokenize
189
    
190
    Yields:
191
    Token: Enhanced token with position information
192
    """
193

194
def patched_generate_tokens(original_tokens) -> Iterator[Token]:
195
    """
196
    Fixed tokenizer that handles non-ASCII identifiers correctly.
197
    
198
    Parameters:
199
    - original_tokens (Iterator): Original token stream
200
    
201
    Yields:
202
    Token: Corrected tokens with proper handling
203
    """
204
```
205

206
#### Usage Example
207

208
```python
209
import asttokens
210
import asttokens.util
211
import token
212

213
source = "name = 'value'  # comment"
214
atok = asttokens.ASTTokens(source, parse=True)
215

216
# Get first token
217
first_token = atok.tokens[0]
218

219
# Check token matching
220
if asttokens.util.match_token(first_token, token.NAME, 'name'):
221
    print("Found 'name' token")
222

223
# Create readable representation
224
repr_str = asttokens.util.token_repr(first_token.type, first_token.string)
225
print(repr_str)  # "NAME:'name'"
226

227
# Check for non-coding tokens
228
for tok in atok.tokens:
229
    if asttokens.util.is_non_coding_token(tok.type):
230
        print(f"Non-coding token: {tok.string}")
231

232
# Generate tokens manually
233
tokens = list(asttokens.util.generate_tokens("x = 1"))
234
print([f"{t.type}:{t.string}" for t in tokens])
235
```
236

237
### Enhanced Token Class
238

239
The Token class provides rich position information for each token.
240

241
```python { .api }
242
class Token:
243
    """
244
    Enhanced token representation with comprehensive position information.
245
    
246
    Attributes:
247
    - type (int): Token type from token module
248
    - string (str): Token text content  
249
    - start (Tuple[int, int]): Starting (row, column) position
250
    - end (Tuple[int, int]): Ending (row, column) position
251
    - line (str): Complete line text containing this token
252
    - index (int): Token index in token list
253
    - startpos (int): Starting character offset
254
    - endpos (int): Ending character offset
255
    """
256
    
257
    def __str__(self) -> str:
258
        """
259
        Human-readable token representation.
260
        
261
        Returns:
262
        str: String representation of token
263
        """
264
```
265

266
#### Usage Example
267

268
```python
269
import asttokens
270

271
source = "def func():\n    pass"
272
atok = asttokens.ASTTokens(source, parse=True)
273

274
# Examine token details
275
def_token = atok.tokens[0]
276
print(f"Type: {def_token.type}")        # Token type number
277
print(f"String: {def_token.string}")    # 'def'
278
print(f"Start: {def_token.start}")      # (1, 0) - line 1, column 0
279
print(f"End: {def_token.end}")          # (1, 3) - line 1, column 3
280
print(f"Line: {def_token.line}")        # 'def func():'
281
print(f"Index: {def_token.index}")      # 0 - first token
282
print(f"Start pos: {def_token.startpos}")  # 0 - character offset 0
283
print(f"End pos: {def_token.endpos}")      # 3 - character offset 3
284
print(f"Repr: {def_token}")             # Human-readable representation
285

286
# Use position information
287
text_slice = source[def_token.startpos:def_token.endpos]
288
print(text_slice)  # 'def' - exact token text
289
```

Version

Tile

Files

position-utilities.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

position-utilities.mddocs/